76516cae45
If results weren't available for certain URLs, they won't show up anyway in this list, and if we start to check rsync URLs, then we want their values to come back in this status list. Signed-off-by: Dan McGee <dan@archlinux.org>
132 lines
4.9 KiB
Python
132 lines
4.9 KiB
Python
from datetime import timedelta
|
|
|
|
from django.db.models import Avg, Count, Max, Min, StdDev
|
|
from django_countries.fields import Country
|
|
|
|
from main.utils import cache_function, utc_now
|
|
from .models import MirrorLog, MirrorProtocol, MirrorUrl
|
|
|
|
|
|
default_cutoff = timedelta(hours=24)
|
|
|
|
def annotate_url(url, delays):
|
|
'''Given a MirrorURL object, add a few more attributes to it regarding
|
|
status, including completion_pct, delay, and score.'''
|
|
url.completion_pct = float(url.success_count) / url.check_count
|
|
if url.id in delays:
|
|
url_delays = delays[url.id]
|
|
url.delay = sum(url_delays, timedelta()) / len(url_delays)
|
|
hours = url.delay.days * 24.0 + url.delay.seconds / 3600.0
|
|
|
|
if url.completion_pct > 0:
|
|
divisor = url.completion_pct
|
|
else:
|
|
# arbitrary small value
|
|
divisor = 0.005
|
|
url.score = (hours + url.duration_avg + url.duration_stddev) / divisor
|
|
else:
|
|
url.delay = None
|
|
url.score = None
|
|
|
|
|
|
@cache_function(123)
|
|
def get_mirror_statuses(cutoff=default_cutoff):
|
|
cutoff_time = utc_now() - cutoff
|
|
# I swear, this actually has decent performance...
|
|
urls = MirrorUrl.objects.select_related('mirror', 'protocol').filter(
|
|
mirror__active=True, mirror__public=True,
|
|
logs__check_time__gte=cutoff_time).annotate(
|
|
check_count=Count('logs'),
|
|
success_count=Count('logs__duration'),
|
|
last_sync=Max('logs__last_sync'),
|
|
last_check=Max('logs__check_time'),
|
|
duration_avg=Avg('logs__duration'),
|
|
duration_stddev=StdDev('logs__duration'))
|
|
|
|
# The Django ORM makes it really hard to get actual average delay in the
|
|
# above query, so run a seperate query for it and we will process the
|
|
# results here.
|
|
times = MirrorLog.objects.filter(is_success=True, last_sync__isnull=False,
|
|
check_time__gte=cutoff_time)
|
|
delays = {}
|
|
for log in times:
|
|
delay = log.check_time - log.last_sync
|
|
delays.setdefault(log.url_id, []).append(delay)
|
|
|
|
if urls:
|
|
last_check = max([u.last_check for u in urls])
|
|
num_checks = max([u.check_count for u in urls])
|
|
check_info = MirrorLog.objects.filter(
|
|
check_time__gte=cutoff_time).aggregate(
|
|
mn=Min('check_time'), mx=Max('check_time'))
|
|
if num_checks > 1:
|
|
check_frequency = (check_info['mx'] - check_info['mn']) \
|
|
/ (num_checks - 1)
|
|
else:
|
|
check_frequency = None
|
|
else:
|
|
last_check = None
|
|
num_checks = 0
|
|
check_frequency = None
|
|
|
|
for url in urls:
|
|
annotate_url(url, delays)
|
|
|
|
return {
|
|
'cutoff': cutoff,
|
|
'last_check': last_check,
|
|
'num_checks': num_checks,
|
|
'check_frequency': check_frequency,
|
|
'urls': urls,
|
|
}
|
|
|
|
|
|
@cache_function(117)
|
|
def get_mirror_errors(cutoff=default_cutoff):
|
|
cutoff_time = utc_now() - cutoff
|
|
errors = MirrorLog.objects.filter(
|
|
is_success=False, check_time__gte=cutoff_time,
|
|
url__mirror__active=True, url__mirror__public=True).values(
|
|
'url__url', 'url__country', 'url__protocol__protocol',
|
|
'url__mirror__country', 'error').annotate(
|
|
error_count=Count('error'), last_occurred=Max('check_time')
|
|
).order_by('-last_occurred', '-error_count')
|
|
errors = list(errors)
|
|
for err in errors:
|
|
ctry_code = err['url__country'] or err['url__mirror__country']
|
|
err['country'] = Country(ctry_code)
|
|
return errors
|
|
|
|
|
|
@cache_function(295)
|
|
def get_mirror_url_for_download(cutoff=default_cutoff):
|
|
'''Find a good mirror URL to use for package downloads. If we have mirror
|
|
status data available, it is used to determine a good choice by looking at
|
|
the last batch of status rows.'''
|
|
cutoff_time = utc_now() - cutoff
|
|
status_data = MirrorLog.objects.filter(
|
|
check_time__gte=cutoff_time).aggregate(
|
|
Max('check_time'), Max('last_sync'))
|
|
if status_data['check_time__max'] is not None:
|
|
min_check_time = status_data['check_time__max'] - timedelta(minutes=5)
|
|
min_sync_time = status_data['last_sync__max'] - timedelta(minutes=20)
|
|
best_logs = MirrorLog.objects.filter(is_success=True,
|
|
check_time__gte=min_check_time, last_sync__gte=min_sync_time,
|
|
url__mirror__public=True, url__mirror__active=True,
|
|
url__protocol__default=True).order_by(
|
|
'duration')[:1]
|
|
if best_logs:
|
|
return MirrorUrl.objects.get(id=best_logs[0].url_id)
|
|
|
|
mirror_urls = MirrorUrl.objects.filter(
|
|
mirror__public=True, mirror__active=True, protocol__default=True)
|
|
# look first for a country-agnostic URL, then fall back to any HTTP URL
|
|
filtered_urls = mirror_urls.filter(mirror__country='')[:1]
|
|
if not filtered_urls:
|
|
filtered_urls = mirror_urls[:1]
|
|
if not filtered_urls:
|
|
return None
|
|
return filtered_urls[0]
|
|
|
|
# vim: set ts=4 sw=4 et:
|