2010-09-16 14:45:30 -07:00
# -*- coding: utf-8 -*-
"""
mirrorcheck command
Poll every active mirror URL we have in the database , grab the ' lastsync ' file ,
and record details about how long it took and how up to date the mirror is . If
we encounter errors , record those as well .
Usage : . / manage . py mirrorcheck
"""
from django . core . management . base import NoArgsCommand
from datetime import datetime , timedelta
import logging
import re
import socket
import sys
import time
from threading import Thread
2010-10-07 06:07:03 -07:00
import types
2010-09-16 14:45:30 -07:00
from Queue import Queue , Empty
import urllib2
2010-10-13 16:44:17 -07:00
from mirrors . models import MirrorUrl , MirrorLog
2010-09-16 14:45:30 -07:00
logging . basicConfig (
2010-10-13 16:44:17 -07:00
level = logging . WARNING ,
2010-09-16 14:45:30 -07:00
format = ' %(asctime)s -> %(levelname)s : %(message)s ' ,
datefmt = ' % Y- % m- %d % H: % M: % S ' ,
stream = sys . stderr )
logger = logging . getLogger ( )
class Command ( NoArgsCommand ) :
help = " Runs a check on all known mirror URLs to determine their up-to-date status. "
def handle_noargs ( self , * * options ) :
v = int ( options . get ( ' verbosity ' , 0 ) )
if v == 0 :
2010-10-13 16:44:17 -07:00
logger . level = logging . ERROR
2010-09-16 14:45:30 -07:00
elif v == 1 :
2010-10-13 16:44:17 -07:00
logger . level = logging . WARNING
2010-09-16 14:45:30 -07:00
elif v == 2 :
2010-10-13 16:44:17 -07:00
logger . level = logging . DEBUG
2010-09-16 14:45:30 -07:00
import signal , traceback
handler = lambda sig , stack : traceback . print_stack ( stack )
signal . signal ( signal . SIGQUIT , handler )
signal . signal ( signal . SIGUSR1 , handler )
return check_current_mirrors ( )
2010-12-13 11:40:01 -08:00
def parse_rfc3339_datetime ( time_string ) :
2010-09-16 14:45:30 -07:00
# '2010-09-02 11:05:06+02:00'
2010-10-11 18:49:04 -07:00
m = re . match ( ' ^( \ d {4} )-( \ d {2} )-( \ d {2} ) '
2010-12-13 11:40:01 -08:00
' ( \ d {2} ):( \ d {2} ):( \ d {2} )([-+])( \ d {2} ):( \ d {2} ) ' , time_string )
2010-09-16 14:45:30 -07:00
if m :
vals = m . groups ( )
parsed = datetime ( int ( vals [ 0 ] ) , int ( vals [ 1 ] ) , int ( vals [ 2 ] ) ,
int ( vals [ 3 ] ) , int ( vals [ 4 ] ) , int ( vals [ 5 ] ) )
# now account for time zone offset
sign = vals [ 6 ]
offset = timedelta ( hours = int ( sign + vals [ 7 ] ) ,
minutes = int ( sign + vals [ 8 ] ) )
# subtract the offset, e.g. '-04:00' should be moved up 4 hours
return parsed - offset
return None
def check_mirror_url ( mirror_url ) :
url = mirror_url . url + ' lastsync '
2011-01-31 13:10:48 -08:00
logger . info ( " checking URL %s " , url )
2010-09-16 14:45:30 -07:00
log = MirrorLog ( url = mirror_url , check_time = datetime . utcnow ( ) )
try :
start = time . time ( )
result = urllib2 . urlopen ( url , timeout = 10 )
data = result . read ( )
result . close ( )
end = time . time ( )
# lastsync should be an epoch value, but some mirrors
# are creating their own in RFC-3339 format:
# '2010-09-02 11:05:06+02:00'
2010-10-07 06:07:03 -07:00
parsed_time = None
2010-09-16 14:45:30 -07:00
try :
parsed_time = datetime . utcfromtimestamp ( int ( data ) )
except ValueError :
2010-09-21 15:29:45 -07:00
# it is bad news to try logging the lastsync value;
# sometimes we get a crazy-encoded web page.
logger . info ( " attempting to parse generated lastsync file "
2011-01-31 13:10:48 -08:00
" from mirror %s " , url )
2010-09-16 14:45:30 -07:00
parsed_time = parse_rfc3339_datetime ( data )
log . last_sync = parsed_time
2010-10-07 06:07:03 -07:00
# if we couldn't parse a time, this is a failure
if parsed_time == None :
log . error = " Could not parse time from lastsync "
log . is_success = False
2010-09-16 14:45:30 -07:00
log . duration = end - start
2011-01-31 13:10:48 -08:00
logger . debug ( " success: %s , %.2f " , url , log . duration )
2010-09-16 14:45:30 -07:00
except urllib2 . HTTPError , e :
2010-10-07 06:07:03 -07:00
if e . code == 404 :
# we have a duration, just not a success
end = time . time ( )
log . duration = end - start
2010-09-16 14:45:30 -07:00
log . is_success = False
2010-10-07 06:07:03 -07:00
log . error = str ( e )
2011-01-31 13:10:48 -08:00
logger . debug ( " failed: %s , %s " , url , log . error )
2010-09-16 14:45:30 -07:00
except urllib2 . URLError , e :
2010-10-13 16:44:17 -07:00
log . is_success = False
2010-09-16 14:45:30 -07:00
log . error = e . reason
2010-10-07 06:07:03 -07:00
if isinstance ( e . reason , types . StringTypes ) and \
re . search ( r ' 550.*No such file ' , e . reason ) :
# similar to 404 case above, still record duration
end = time . time ( )
log . duration = end - start
2010-09-16 14:45:30 -07:00
if isinstance ( e . reason , socket . timeout ) :
log . error = " Connection timed out. "
elif isinstance ( e . reason , socket . error ) :
log . error = e . reason . args [ 1 ]
2011-01-31 13:10:48 -08:00
logger . debug ( " failed: %s , %s " , url , log . error )
2010-10-11 18:49:04 -07:00
except socket . timeout , e :
2010-10-13 16:44:17 -07:00
log . is_success = False
2010-10-11 18:49:04 -07:00
log . error = " Connection timed out. "
2011-01-31 13:10:48 -08:00
logger . debug ( " failed: %s , %s " , url , log . error )
2010-09-16 14:45:30 -07:00
return log
2010-12-13 11:40:01 -08:00
def mirror_url_worker ( work , output ) :
2010-09-16 14:45:30 -07:00
while True :
try :
2010-12-13 11:40:01 -08:00
item = work . get ( block = False )
2010-10-11 18:49:04 -07:00
try :
2010-12-13 11:40:01 -08:00
log = check_mirror_url ( item )
output . put ( log )
2010-10-11 18:49:04 -07:00
finally :
2010-12-13 11:40:01 -08:00
work . task_done ( )
2010-09-16 14:45:30 -07:00
except Empty :
return 0
class MirrorCheckPool ( object ) :
def __init__ ( self , work , num_threads = 10 ) :
self . tasks = Queue ( )
2010-12-13 11:40:01 -08:00
self . logs = Queue ( )
for i in list ( work ) :
2010-09-16 14:45:30 -07:00
self . tasks . put ( i )
self . threads = [ ]
for i in range ( num_threads ) :
2010-12-13 11:40:01 -08:00
thread = Thread ( target = mirror_url_worker ,
args = ( self . tasks , self . logs ) )
2010-09-16 14:45:30 -07:00
thread . daemon = True
self . threads . append ( thread )
2010-12-13 11:40:01 -08:00
def run ( self ) :
2010-09-16 14:45:30 -07:00
logger . debug ( " starting threads " )
for t in self . threads :
t . start ( )
logger . debug ( " joining on all threads " )
self . tasks . join ( )
2010-12-13 11:40:01 -08:00
logger . debug ( " processing log entries " )
try :
while True :
log = self . logs . get ( block = False )
log . save ( )
self . logs . task_done ( )
except Empty :
logger . debug ( " all log items saved to database " )
2010-09-16 14:45:30 -07:00
def check_current_mirrors ( ) :
urls = MirrorUrl . objects . filter (
2010-12-13 11:40:01 -08:00
protocol__is_download = True ,
2010-09-16 14:45:30 -07:00
mirror__active = True , mirror__public = True )
pool = MirrorCheckPool ( urls )
2010-12-13 11:40:01 -08:00
pool . run ( )
2010-09-16 14:45:30 -07:00
return 0
# For lack of a better place to put it, here is a query to get latest check
# result joined with mirror details:
# SELECT mu.*, m.*, ml.* FROM mirrors_mirrorurl mu JOIN mirrors_mirror m ON mu.mirror_id = m.id JOIN mirrors_mirrorlog ml ON mu.id = ml.url_id LEFT JOIN mirrors_mirrorlog ml2 ON ml.url_id = ml2.url_id AND ml.id < ml2.id WHERE ml2.id IS NULL AND m.active = 1 AND m.public = 1;
# vim: set ts=4 sw=4 et: