evorepo/mirrors/utils.py
Dan McGee 76516cae45 Don't limit protocols returned by mirror status function
If results weren't available for certain URLs, they won't show up anyway
in this list, and if we start to check rsync URLs, then we want their
values to come back in this status list.

Signed-off-by: Dan McGee <dan@archlinux.org>
2012-05-13 19:45:57 -05:00

132 lines
4.9 KiB
Python

from datetime import timedelta
from django.db.models import Avg, Count, Max, Min, StdDev
from django_countries.fields import Country
from main.utils import cache_function, utc_now
from .models import MirrorLog, MirrorProtocol, MirrorUrl
default_cutoff = timedelta(hours=24)
def annotate_url(url, delays):
'''Given a MirrorURL object, add a few more attributes to it regarding
status, including completion_pct, delay, and score.'''
url.completion_pct = float(url.success_count) / url.check_count
if url.id in delays:
url_delays = delays[url.id]
url.delay = sum(url_delays, timedelta()) / len(url_delays)
hours = url.delay.days * 24.0 + url.delay.seconds / 3600.0
if url.completion_pct > 0:
divisor = url.completion_pct
else:
# arbitrary small value
divisor = 0.005
url.score = (hours + url.duration_avg + url.duration_stddev) / divisor
else:
url.delay = None
url.score = None
@cache_function(123)
def get_mirror_statuses(cutoff=default_cutoff):
cutoff_time = utc_now() - cutoff
# I swear, this actually has decent performance...
urls = MirrorUrl.objects.select_related('mirror', 'protocol').filter(
mirror__active=True, mirror__public=True,
logs__check_time__gte=cutoff_time).annotate(
check_count=Count('logs'),
success_count=Count('logs__duration'),
last_sync=Max('logs__last_sync'),
last_check=Max('logs__check_time'),
duration_avg=Avg('logs__duration'),
duration_stddev=StdDev('logs__duration'))
# The Django ORM makes it really hard to get actual average delay in the
# above query, so run a seperate query for it and we will process the
# results here.
times = MirrorLog.objects.filter(is_success=True, last_sync__isnull=False,
check_time__gte=cutoff_time)
delays = {}
for log in times:
delay = log.check_time - log.last_sync
delays.setdefault(log.url_id, []).append(delay)
if urls:
last_check = max([u.last_check for u in urls])
num_checks = max([u.check_count for u in urls])
check_info = MirrorLog.objects.filter(
check_time__gte=cutoff_time).aggregate(
mn=Min('check_time'), mx=Max('check_time'))
if num_checks > 1:
check_frequency = (check_info['mx'] - check_info['mn']) \
/ (num_checks - 1)
else:
check_frequency = None
else:
last_check = None
num_checks = 0
check_frequency = None
for url in urls:
annotate_url(url, delays)
return {
'cutoff': cutoff,
'last_check': last_check,
'num_checks': num_checks,
'check_frequency': check_frequency,
'urls': urls,
}
@cache_function(117)
def get_mirror_errors(cutoff=default_cutoff):
cutoff_time = utc_now() - cutoff
errors = MirrorLog.objects.filter(
is_success=False, check_time__gte=cutoff_time,
url__mirror__active=True, url__mirror__public=True).values(
'url__url', 'url__country', 'url__protocol__protocol',
'url__mirror__country', 'error').annotate(
error_count=Count('error'), last_occurred=Max('check_time')
).order_by('-last_occurred', '-error_count')
errors = list(errors)
for err in errors:
ctry_code = err['url__country'] or err['url__mirror__country']
err['country'] = Country(ctry_code)
return errors
@cache_function(295)
def get_mirror_url_for_download(cutoff=default_cutoff):
'''Find a good mirror URL to use for package downloads. If we have mirror
status data available, it is used to determine a good choice by looking at
the last batch of status rows.'''
cutoff_time = utc_now() - cutoff
status_data = MirrorLog.objects.filter(
check_time__gte=cutoff_time).aggregate(
Max('check_time'), Max('last_sync'))
if status_data['check_time__max'] is not None:
min_check_time = status_data['check_time__max'] - timedelta(minutes=5)
min_sync_time = status_data['last_sync__max'] - timedelta(minutes=20)
best_logs = MirrorLog.objects.filter(is_success=True,
check_time__gte=min_check_time, last_sync__gte=min_sync_time,
url__mirror__public=True, url__mirror__active=True,
url__protocol__default=True).order_by(
'duration')[:1]
if best_logs:
return MirrorUrl.objects.get(id=best_logs[0].url_id)
mirror_urls = MirrorUrl.objects.filter(
mirror__public=True, mirror__active=True, protocol__default=True)
# look first for a country-agnostic URL, then fall back to any HTTP URL
filtered_urls = mirror_urls.filter(mirror__country='')[:1]
if not filtered_urls:
filtered_urls = mirror_urls[:1]
if not filtered_urls:
return None
return filtered_urls[0]
# vim: set ts=4 sw=4 et: