2010-09-16 14:45:30 -07:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
|
|
mirrorcheck command
|
|
|
|
|
|
|
|
Poll every active mirror URL we have in the database, grab the 'lastsync' file,
|
|
|
|
and record details about how long it took and how up to date the mirror is. If
|
|
|
|
we encounter errors, record those as well.
|
|
|
|
|
|
|
|
Usage: ./manage.py mirrorcheck
|
|
|
|
"""
|
|
|
|
|
2011-02-16 19:51:23 -08:00
|
|
|
from collections import deque
|
2011-04-18 13:10:20 -07:00
|
|
|
from datetime import datetime
|
2012-07-08 18:36:51 -07:00
|
|
|
from httplib import HTTPException
|
2010-09-16 14:45:30 -07:00
|
|
|
import logging
|
2012-05-13 18:15:00 -07:00
|
|
|
import os
|
|
|
|
from optparse import make_option
|
|
|
|
from pytz import utc
|
2010-09-16 14:45:30 -07:00
|
|
|
import re
|
|
|
|
import socket
|
2012-05-13 18:15:00 -07:00
|
|
|
import subprocess
|
2010-09-16 14:45:30 -07:00
|
|
|
import sys
|
|
|
|
import time
|
2012-05-13 18:15:00 -07:00
|
|
|
import tempfile
|
2010-09-16 14:45:30 -07:00
|
|
|
from threading import Thread
|
2010-10-07 06:07:03 -07:00
|
|
|
import types
|
2010-09-16 14:45:30 -07:00
|
|
|
from Queue import Queue, Empty
|
|
|
|
import urllib2
|
|
|
|
|
2012-05-13 18:15:00 -07:00
|
|
|
from django.core.management.base import NoArgsCommand
|
|
|
|
from django.db import transaction
|
2012-07-24 07:19:48 -07:00
|
|
|
from django.utils.timezone import now
|
2012-05-13 18:15:00 -07:00
|
|
|
|
2013-03-06 18:58:09 -08:00
|
|
|
from mirrors.models import MirrorUrl, MirrorLog, CheckLocation
|
|
|
|
|
2010-09-16 14:45:30 -07:00
|
|
|
|
|
|
|
logging.basicConfig(
|
2010-10-13 16:44:17 -07:00
|
|
|
level=logging.WARNING,
|
2010-09-16 14:45:30 -07:00
|
|
|
format='%(asctime)s -> %(levelname)s: %(message)s',
|
|
|
|
datefmt='%Y-%m-%d %H:%M:%S',
|
|
|
|
stream=sys.stderr)
|
|
|
|
logger = logging.getLogger()
|
|
|
|
|
2013-03-12 18:18:51 -07:00
|
|
|
|
2010-09-16 14:45:30 -07:00
|
|
|
class Command(NoArgsCommand):
|
2012-05-13 18:15:00 -07:00
|
|
|
option_list = NoArgsCommand.option_list + (
|
2013-03-06 18:58:09 -08:00
|
|
|
make_option('-t', '--timeout', dest='timeout', type='float', default=10.0,
|
2012-05-13 18:15:00 -07:00
|
|
|
help='Timeout value for connecting to URL'),
|
2013-03-06 18:58:09 -08:00
|
|
|
make_option('-l', '--location', dest='location', type='int',
|
|
|
|
help='ID of CheckLocation object to use for this run'),
|
2012-05-13 18:15:00 -07:00
|
|
|
)
|
2010-09-16 14:45:30 -07:00
|
|
|
help = "Runs a check on all known mirror URLs to determine their up-to-date status."
|
|
|
|
|
|
|
|
def handle_noargs(self, **options):
|
|
|
|
v = int(options.get('verbosity', 0))
|
|
|
|
if v == 0:
|
2010-10-13 16:44:17 -07:00
|
|
|
logger.level = logging.ERROR
|
2010-09-16 14:45:30 -07:00
|
|
|
elif v == 1:
|
2010-10-13 16:44:17 -07:00
|
|
|
logger.level = logging.WARNING
|
2010-09-16 14:45:30 -07:00
|
|
|
elif v == 2:
|
2010-10-13 16:44:17 -07:00
|
|
|
logger.level = logging.DEBUG
|
2010-09-16 14:45:30 -07:00
|
|
|
|
2013-03-06 18:58:09 -08:00
|
|
|
timeout = options.get('timeout')
|
|
|
|
|
2012-05-13 18:15:00 -07:00
|
|
|
urls = MirrorUrl.objects.select_related('protocol').filter(
|
2013-05-31 17:21:47 -07:00
|
|
|
active=True, mirror__active=True, mirror__public=True)
|
2012-05-13 18:15:00 -07:00
|
|
|
|
2013-03-06 18:58:09 -08:00
|
|
|
location = options.get('location', None)
|
|
|
|
if location:
|
|
|
|
location = CheckLocation.objects.get(id=location)
|
|
|
|
family = location.family
|
|
|
|
monkeypatch_getaddrinfo(family)
|
|
|
|
if family == socket.AF_INET6:
|
|
|
|
urls = urls.filter(has_ipv6=True)
|
|
|
|
elif family == socket.AF_INET:
|
|
|
|
urls = urls.filter(has_ipv4=True)
|
|
|
|
|
|
|
|
pool = MirrorCheckPool(urls, location, timeout)
|
2012-05-13 18:15:00 -07:00
|
|
|
pool.run()
|
|
|
|
return 0
|
2010-09-16 14:45:30 -07:00
|
|
|
|
2012-04-29 19:26:23 -07:00
|
|
|
|
2013-03-06 18:58:09 -08:00
|
|
|
def monkeypatch_getaddrinfo(force_family=socket.AF_INET):
|
|
|
|
'''Force the Python socket module to connect over the designated family;
|
|
|
|
e.g. socket.AF_INET or socket.AF_INET6.'''
|
|
|
|
orig = socket.getaddrinfo
|
2013-03-12 18:18:51 -07:00
|
|
|
|
2013-03-06 18:58:09 -08:00
|
|
|
def wrapper(host, port, family=0, socktype=0, proto=0, flags=0):
|
|
|
|
return orig(host, port, force_family, socktype, proto, flags)
|
2013-03-12 18:18:51 -07:00
|
|
|
|
2013-03-06 18:58:09 -08:00
|
|
|
socket.getaddrinfo = wrapper
|
|
|
|
|
|
|
|
|
2012-05-13 18:15:00 -07:00
|
|
|
def parse_lastsync(log, data):
|
|
|
|
'''lastsync file should be an epoch value created by us.'''
|
|
|
|
try:
|
|
|
|
parsed_time = datetime.utcfromtimestamp(int(data))
|
|
|
|
log.last_sync = parsed_time.replace(tzinfo=utc)
|
2014-12-11 07:06:47 -08:00
|
|
|
except (TypeError, ValueError):
|
2012-05-13 18:15:00 -07:00
|
|
|
# it is bad news to try logging the lastsync value;
|
|
|
|
# sometimes we get a crazy-encoded web page.
|
|
|
|
# if we couldn't parse a time, this is a failure.
|
|
|
|
log.last_sync = None
|
|
|
|
log.error = "Could not parse time from lastsync"
|
|
|
|
log.is_success = False
|
|
|
|
|
|
|
|
|
2013-03-06 18:58:09 -08:00
|
|
|
def check_mirror_url(mirror_url, location, timeout):
|
2010-09-16 14:45:30 -07:00
|
|
|
url = mirror_url.url + 'lastsync'
|
2011-01-31 13:10:48 -08:00
|
|
|
logger.info("checking URL %s", url)
|
2013-03-06 18:58:09 -08:00
|
|
|
log = MirrorLog(url=mirror_url, check_time=now(), location=location)
|
2012-04-29 19:26:23 -07:00
|
|
|
headers = {'User-Agent': 'archweb/1.0'}
|
|
|
|
req = urllib2.Request(url, None, headers)
|
2013-04-16 20:12:01 -07:00
|
|
|
start = time.time()
|
2010-09-16 14:45:30 -07:00
|
|
|
try:
|
2012-05-13 18:15:00 -07:00
|
|
|
result = urllib2.urlopen(req, timeout=timeout)
|
2010-09-16 14:45:30 -07:00
|
|
|
data = result.read()
|
|
|
|
result.close()
|
|
|
|
end = time.time()
|
2012-05-13 18:15:00 -07:00
|
|
|
parse_lastsync(log, data)
|
2010-09-16 14:45:30 -07:00
|
|
|
log.duration = end - start
|
2011-01-31 13:10:48 -08:00
|
|
|
logger.debug("success: %s, %.2f", url, log.duration)
|
2012-05-13 18:15:00 -07:00
|
|
|
except urllib2.HTTPError as e:
|
2010-10-07 06:07:03 -07:00
|
|
|
if e.code == 404:
|
|
|
|
# we have a duration, just not a success
|
|
|
|
end = time.time()
|
|
|
|
log.duration = end - start
|
2010-09-16 14:45:30 -07:00
|
|
|
log.is_success = False
|
2010-10-07 06:07:03 -07:00
|
|
|
log.error = str(e)
|
2011-01-31 13:10:48 -08:00
|
|
|
logger.debug("failed: %s, %s", url, log.error)
|
2012-05-13 18:15:00 -07:00
|
|
|
except urllib2.URLError as e:
|
2010-10-13 16:44:17 -07:00
|
|
|
log.is_success = False
|
2010-09-16 14:45:30 -07:00
|
|
|
log.error = e.reason
|
2010-10-07 06:07:03 -07:00
|
|
|
if isinstance(e.reason, types.StringTypes) and \
|
|
|
|
re.search(r'550.*No such file', e.reason):
|
|
|
|
# similar to 404 case above, still record duration
|
|
|
|
end = time.time()
|
|
|
|
log.duration = end - start
|
2010-09-16 14:45:30 -07:00
|
|
|
if isinstance(e.reason, socket.timeout):
|
|
|
|
log.error = "Connection timed out."
|
|
|
|
elif isinstance(e.reason, socket.error):
|
2014-06-10 19:04:39 -07:00
|
|
|
log.error = e.reason.args[-1]
|
2011-01-31 13:10:48 -08:00
|
|
|
logger.debug("failed: %s, %s", url, log.error)
|
2013-04-16 20:12:01 -07:00
|
|
|
except HTTPException:
|
2012-07-08 18:36:51 -07:00
|
|
|
# e.g., BadStatusLine
|
|
|
|
log.is_success = False
|
|
|
|
log.error = "Exception in processing HTTP request."
|
|
|
|
logger.debug("failed: %s, %s", url, log.error)
|
2013-04-16 20:12:01 -07:00
|
|
|
except socket.timeout:
|
2010-10-13 16:44:17 -07:00
|
|
|
log.is_success = False
|
2010-10-11 18:49:04 -07:00
|
|
|
log.error = "Connection timed out."
|
2011-01-31 13:10:48 -08:00
|
|
|
logger.debug("failed: %s, %s", url, log.error)
|
2012-05-13 18:15:00 -07:00
|
|
|
except socket.error as e:
|
|
|
|
log.is_success = False
|
|
|
|
log.error = str(e)
|
|
|
|
logger.debug("failed: %s, %s", url, log.error)
|
|
|
|
|
|
|
|
return log
|
|
|
|
|
|
|
|
|
2013-03-06 18:58:09 -08:00
|
|
|
def check_rsync_url(mirror_url, location, timeout):
|
2012-05-13 18:15:00 -07:00
|
|
|
url = mirror_url.url + 'lastsync'
|
|
|
|
logger.info("checking URL %s", url)
|
2013-03-06 18:58:09 -08:00
|
|
|
log = MirrorLog(url=mirror_url, check_time=now(), location=location)
|
2012-05-13 18:15:00 -07:00
|
|
|
|
|
|
|
tempdir = tempfile.mkdtemp()
|
2013-03-06 18:58:09 -08:00
|
|
|
ipopt = ''
|
|
|
|
if location:
|
|
|
|
if location.family == socket.AF_INET6:
|
|
|
|
ipopt = '--ipv6'
|
|
|
|
elif location.family == socket.AF_INET:
|
|
|
|
ipopt = '--ipv4'
|
2012-05-13 18:15:00 -07:00
|
|
|
lastsync_path = os.path.join(tempdir, 'lastsync')
|
|
|
|
rsync_cmd = ["rsync", "--quiet", "--contimeout=%d" % timeout,
|
2013-03-06 19:38:58 -08:00
|
|
|
"--timeout=%d" % timeout]
|
|
|
|
if ipopt:
|
|
|
|
rsync_cmd.append(ipopt)
|
|
|
|
rsync_cmd.append(url)
|
|
|
|
rsync_cmd.append(lastsync_path)
|
2012-05-13 18:15:00 -07:00
|
|
|
try:
|
|
|
|
with open(os.devnull, 'w') as devnull:
|
2013-06-01 12:53:51 -07:00
|
|
|
if logger.isEnabledFor(logging.DEBUG):
|
|
|
|
logger.debug("rsync cmd: %s", ' '.join(rsync_cmd))
|
|
|
|
start = time.time()
|
2012-05-13 18:15:00 -07:00
|
|
|
proc = subprocess.Popen(rsync_cmd, stdout=devnull,
|
|
|
|
stderr=subprocess.PIPE)
|
|
|
|
_, errdata = proc.communicate()
|
|
|
|
end = time.time()
|
|
|
|
log.duration = end - start
|
|
|
|
if proc.returncode != 0:
|
|
|
|
logger.debug("error: %s, %s", url, errdata)
|
|
|
|
log.is_success = False
|
|
|
|
log.error = errdata.strip()
|
|
|
|
# look at rsync error code- if we had a command error or timed out,
|
|
|
|
# don't record a duration as it is misleading
|
|
|
|
if proc.returncode in (1, 30, 35):
|
|
|
|
log.duration = None
|
|
|
|
else:
|
|
|
|
logger.debug("success: %s, %.2f", url, log.duration)
|
2014-12-11 07:06:47 -08:00
|
|
|
if os.path.exists(lastsync_path):
|
|
|
|
with open(lastsync_path, 'r') as lastsync:
|
|
|
|
parse_lastsync(log, lastsync.read())
|
|
|
|
else:
|
|
|
|
parse_lastsync(log, None)
|
2012-05-13 18:15:00 -07:00
|
|
|
finally:
|
|
|
|
if os.path.exists(lastsync_path):
|
|
|
|
os.unlink(lastsync_path)
|
|
|
|
os.rmdir(tempdir)
|
2010-09-16 14:45:30 -07:00
|
|
|
|
|
|
|
return log
|
|
|
|
|
2012-04-29 19:26:23 -07:00
|
|
|
|
2013-03-06 18:58:09 -08:00
|
|
|
def mirror_url_worker(work, output, location, timeout):
|
2010-09-16 14:45:30 -07:00
|
|
|
while True:
|
|
|
|
try:
|
2012-05-13 18:15:00 -07:00
|
|
|
url = work.get(block=False)
|
2010-10-11 18:49:04 -07:00
|
|
|
try:
|
2012-05-13 18:15:00 -07:00
|
|
|
if url.protocol.protocol == 'rsync':
|
2013-03-06 18:58:09 -08:00
|
|
|
log = check_rsync_url(url, location, timeout)
|
2013-03-13 11:36:14 -07:00
|
|
|
elif (url.protocol.protocol == 'ftp' and location and
|
2013-03-12 18:18:51 -07:00
|
|
|
location.family == socket.AF_INET6):
|
|
|
|
# IPv6 + FTP don't work; skip checking completely
|
|
|
|
log = None
|
2012-05-13 18:15:00 -07:00
|
|
|
else:
|
2013-03-06 18:58:09 -08:00
|
|
|
log = check_mirror_url(url, location, timeout)
|
2013-03-12 18:18:51 -07:00
|
|
|
if log:
|
|
|
|
output.append(log)
|
2010-10-11 18:49:04 -07:00
|
|
|
finally:
|
2010-12-13 11:40:01 -08:00
|
|
|
work.task_done()
|
2010-09-16 14:45:30 -07:00
|
|
|
except Empty:
|
|
|
|
return 0
|
|
|
|
|
2012-04-29 19:26:23 -07:00
|
|
|
|
2010-09-16 14:45:30 -07:00
|
|
|
class MirrorCheckPool(object):
|
2013-03-06 18:58:09 -08:00
|
|
|
def __init__(self, urls, location, timeout=10, num_threads=10):
|
2010-09-16 14:45:30 -07:00
|
|
|
self.tasks = Queue()
|
2011-02-16 19:51:23 -08:00
|
|
|
self.logs = deque()
|
2013-03-06 18:58:09 -08:00
|
|
|
for url in list(urls):
|
|
|
|
self.tasks.put(url)
|
2010-09-16 14:45:30 -07:00
|
|
|
self.threads = []
|
|
|
|
for i in range(num_threads):
|
2010-12-13 11:40:01 -08:00
|
|
|
thread = Thread(target=mirror_url_worker,
|
2013-03-06 18:58:09 -08:00
|
|
|
args=(self.tasks, self.logs, location, timeout))
|
2010-09-16 14:45:30 -07:00
|
|
|
thread.daemon = True
|
|
|
|
self.threads.append(thread)
|
|
|
|
|
2013-11-06 19:17:27 -08:00
|
|
|
@transaction.atomic
|
2010-12-13 11:40:01 -08:00
|
|
|
def run(self):
|
2010-09-16 14:45:30 -07:00
|
|
|
logger.debug("starting threads")
|
2011-04-18 13:10:20 -07:00
|
|
|
for thread in self.threads:
|
|
|
|
thread.start()
|
2010-09-16 14:45:30 -07:00
|
|
|
logger.debug("joining on all threads")
|
|
|
|
self.tasks.join()
|
2012-05-13 18:15:00 -07:00
|
|
|
logger.debug("processing %d log entries", len(self.logs))
|
2013-03-06 17:56:48 -08:00
|
|
|
MirrorLog.objects.bulk_create(self.logs)
|
2011-02-16 19:51:23 -08:00
|
|
|
logger.debug("log entries saved")
|
2010-09-16 14:45:30 -07:00
|
|
|
|
|
|
|
# vim: set ts=4 sw=4 et:
|