evorepo/mirrors/utils.py

from datetime import timedelta

from django.db.models import Avg, Count, Max, Min, StdDev

from main.utils import cache_function, utc_now
from .models import MirrorLog, MirrorProtocol, MirrorUrl


default_cutoff = timedelta(hours=24)

def annotate_url(url, delays):
    '''Given a MirrorURL object, add a few more attributes to it regarding
    status, including completion_pct, delay, and score.'''
    url.completion_pct = float(url.success_count) / url.check_count
    if url.id in delays:
        url_delays = delays[url.id]
        url.delay = sum(url_delays, timedelta()) / len(url_delays)
        hours = url.delay.days * 24.0 + url.delay.seconds / 3600.0

        if url.completion_pct > 0:
            divisor = url.completion_pct
        else:
            # arbitrary small value
            divisor = 0.005
        url.score = (hours + url.duration_avg + url.duration_stddev) / divisor
    else:
        url.delay = None
        url.score = None

@cache_function(123)
def get_mirror_statuses(cutoff=default_cutoff):
    cutoff_time = utc_now() - cutoff
    protocols = list(MirrorProtocol.objects.filter(is_download=True))
    # I swear, this actually has decent performance...
    urls = MirrorUrl.objects.select_related('mirror', 'protocol').filter(
            mirror__active=True, mirror__public=True,
            protocol__in=protocols,
            logs__check_time__gte=cutoff_time).annotate(
            check_count=Count('logs'),
            success_count=Count('logs__duration'),
            last_sync=Max('logs__last_sync'),
            last_check=Max('logs__check_time'),
            duration_avg=Avg('logs__duration'),
            duration_stddev=StdDev('logs__duration')
            ).order_by('-last_sync', '-duration_avg')

    # The Django ORM makes it really hard to get actual average delay in the
    # above query, so run a seperate query for it and we will process the
    # results here.
    times = MirrorLog.objects.filter(is_success=True, last_sync__isnull=False,
            check_time__gte=cutoff_time)
    delays = {}
    for log in times:
        delay = log.check_time - log.last_sync
        delays.setdefault(log.url_id, []).append(delay)

    if urls:
        last_check = max([u.last_check for u in urls])
        num_checks = max([u.check_count for u in urls])
        check_info = MirrorLog.objects.filter(
                check_time__gte=cutoff_time).aggregate(
                mn=Min('check_time'), mx=Max('check_time'))
        if num_checks > 1:
            check_frequency = (check_info['mx'] - check_info['mn']) \
                    / (num_checks - 1)
        else:
            check_frequency = None
    else:
        last_check = None
        num_checks = 0
        check_frequency = None

    for url in urls:
        annotate_url(url, delays)

    return {
        'cutoff': cutoff,
        'last_check': last_check,
        'num_checks': num_checks,
        'check_frequency': check_frequency,
        'urls': urls,
    }

@cache_function(117)
def get_mirror_errors(cutoff=default_cutoff):
    cutoff_time = utc_now() - cutoff
    errors = MirrorLog.objects.filter(
            is_success=False, check_time__gte=cutoff_time,
            url__mirror__active=True, url__mirror__public=True).values(
            'url__url', 'url__country', 'url__protocol__protocol',
            'url__mirror__country', 'error').annotate(
            error_count=Count('error'), last_occurred=Max('check_time')
            ).order_by('-last_occurred', '-error_count')
    errors = list(errors)
    for err in errors:
        err['country'] = err['url__country'] or err['url__mirror__country']
    return errors

# vim: set ts=4 sw=4 et:
Make all datetime objects fully timezone aware This is most of the transition to Django 1.4 `USE_TZ = True`. We need to ensure we don't mix aware and non-aware datetime objects when dealing with datetimes in the code. Add a utc_now() helper method that we can use most places, and ensure there is always a timezone attached when necessary. Signed-off-by: Dan McGee <dan@archlinux.org> 2012-03-23 17:29:40 -07:00			`from datetime import timedelta`

Allow caching of mirror status info Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-21 16:31:26 -07:00			`from django.db.models import Avg, Count, Max, Min, StdDev`

Make all datetime objects fully timezone aware This is most of the transition to Django 1.4 `USE_TZ = True`. We need to ensure we don't mix aware and non-aware datetime objects when dealing with datetimes in the code. Add a utc_now() helper method that we can use most places, and ensure there is always a timezone attached when necessary. Signed-off-by: Dan McGee <dan@archlinux.org> 2012-03-23 17:29:40 -07:00			`from main.utils import cache_function, utc_now`
Allow caching of mirror status info Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-21 16:31:26 -07:00			`from .models import MirrorLog, MirrorProtocol, MirrorUrl`


Make all datetime objects fully timezone aware This is most of the transition to Django 1.4 `USE_TZ = True`. We need to ensure we don't mix aware and non-aware datetime objects when dealing with datetimes in the code. Add a utc_now() helper method that we can use most places, and ensure there is always a timezone attached when necessary. Signed-off-by: Dan McGee <dan@archlinux.org> 2012-03-23 17:29:40 -07:00			`default_cutoff = timedelta(hours=24)`
Allow caching of mirror status info Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-21 16:31:26 -07:00
mirrors: pylint discovered cleanups Signed-off-by: Dan McGee <dan@archlinux.org> 2011-04-18 13:10:20 -07:00			`def annotate_url(url, delays):`
			`'''Given a MirrorURL object, add a few more attributes to it regarding`
			`status, including completion_pct, delay, and score.'''`
			`url.completion_pct = float(url.success_count) / url.check_count`
			`if url.id in delays:`
			`url_delays = delays[url.id]`
Make all datetime objects fully timezone aware This is most of the transition to Django 1.4 `USE_TZ = True`. We need to ensure we don't mix aware and non-aware datetime objects when dealing with datetimes in the code. Add a utc_now() helper method that we can use most places, and ensure there is always a timezone attached when necessary. Signed-off-by: Dan McGee <dan@archlinux.org> 2012-03-23 17:29:40 -07:00			`url.delay = sum(url_delays, timedelta()) / len(url_delays)`
mirrors: pylint discovered cleanups Signed-off-by: Dan McGee <dan@archlinux.org> 2011-04-18 13:10:20 -07:00			`hours = url.delay.days * 24.0 + url.delay.seconds / 3600.0`

			`if url.completion_pct > 0:`
			`divisor = url.completion_pct`
			`else:`
			`# arbitrary small value`
			`divisor = 0.005`
			`url.score = (hours + url.duration_avg + url.duration_stddev) / divisor`
			`else:`
			`url.delay = None`
			`url.score = None`

Adjust page and content caching lengths and decorators Remove never_cache from many places now that we don't actually need it since we aren't caching by default. Adjust our cache_function decorator times be shorter values, and also randomize them a bit to make cache invalidations not all line up. Signed-off-by: Dan McGee <dan@archlinux.org> 2012-01-05 11:03:00 -08:00			`@cache_function(123)`
Give more information about mirror check runs and frequency Show how many times the check has ran in the last 24 hours, as well as the average interval between checks. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-23 08:33:32 -07:00			`def get_mirror_statuses(cutoff=default_cutoff):`
Make all datetime objects fully timezone aware This is most of the transition to Django 1.4 `USE_TZ = True`. We need to ensure we don't mix aware and non-aware datetime objects when dealing with datetimes in the code. Add a utc_now() helper method that we can use most places, and ensure there is always a timezone attached when necessary. Signed-off-by: Dan McGee <dan@archlinux.org> 2012-03-23 17:29:40 -07:00			`cutoff_time = utc_now() - cutoff`
Use new is_download field Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-30 12:32:54 -07:00			`protocols = list(MirrorProtocol.objects.filter(is_download=True))`
Allow caching of mirror status info Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-21 16:31:26 -07:00			`# I swear, this actually has decent performance...`
Mirror status query refinements Only show errors for active and public mirrors, and collapse two filter calls into just one for our normal status query. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-24 11:26:08 -07:00			`urls = MirrorUrl.objects.select_related('mirror', 'protocol').filter(`
Allow caching of mirror status info Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-21 16:31:26 -07:00			`mirror__active=True, mirror__public=True,`
Mirror status query refinements Only show errors for active and public mirrors, and collapse two filter calls into just one for our normal status query. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-24 11:26:08 -07:00			`protocol__in=protocols,`
Allow caching of mirror status info Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-21 16:31:26 -07:00			`logs__check_time__gte=cutoff_time).annotate(`
Mirror status improvements * Fix sorting issues. '', 'unknown', and '∞' should now always sort after anything else in the list. * Add a completion percentage column; this will tell you at a glance if a mirror is sometimes unresponsive. This should probably be incorporated into the mirror score. * Make a few more things dynamic in the template, like the time back the page reflects. * Add some additional template tags for formatting things. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-30 10:47:30 -07:00			`check_count=Count('logs'),`
			`success_count=Count('logs__duration'),`
			`last_sync=Max('logs__last_sync'),`
Allow caching of mirror status info Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-21 16:31:26 -07:00			`last_check=Max('logs__check_time'),`
Switch mirror status delay display to average delay This takes a bit more work to compute, but since we cache all of this anyway it isn't too big of deal. Using average delay instead of last delay will be a bit more fair on mirrors that have odd syncing schedules, as well as exposing those that only sync once a day. Also fix an issue that will arise with cutoff_time being calculated once, and adjust mirror score to treat hours delay as a float rather than an integer. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-22 11:28:12 -07:00			`duration_avg=Avg('logs__duration'),`
Switch back to using standard deviation in mirror check page This got checked in by default, whoops. Signed-off-by: Dan McGee <dan@archlinux.org> 2011-12-11 17:43:24 -08:00			`duration_stddev=StdDev('logs__duration')`
Allow caching of mirror status info Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-21 16:31:26 -07:00			`).order_by('-last_sync', '-duration_avg')`

Switch mirror status delay display to average delay This takes a bit more work to compute, but since we cache all of this anyway it isn't too big of deal. Using average delay instead of last delay will be a bit more fair on mirrors that have odd syncing schedules, as well as exposing those that only sync once a day. Also fix an issue that will arise with cutoff_time being calculated once, and adjust mirror score to treat hours delay as a float rather than an integer. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-22 11:28:12 -07:00			`# The Django ORM makes it really hard to get actual average delay in the`
			`# above query, so run a seperate query for it and we will process the`
			`# results here.`
			`times = MirrorLog.objects.filter(is_success=True, last_sync__isnull=False,`
			`check_time__gte=cutoff_time)`
			`delays = {}`
			`for log in times:`
mirrors: pylint discovered cleanups Signed-off-by: Dan McGee <dan@archlinux.org> 2011-04-18 13:10:20 -07:00			`delay = log.check_time - log.last_sync`
			`delays.setdefault(log.url_id, []).append(delay)`
Switch mirror status delay display to average delay This takes a bit more work to compute, but since we cache all of this anyway it isn't too big of deal. Using average delay instead of last delay will be a bit more fair on mirrors that have odd syncing schedules, as well as exposing those that only sync once a day. Also fix an issue that will arise with cutoff_time being calculated once, and adjust mirror score to treat hours delay as a float rather than an integer. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-22 11:28:12 -07:00
Give more information about mirror check runs and frequency Show how many times the check has ran in the last 24 hours, as well as the average interval between checks. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-23 08:33:32 -07:00			`if urls:`
			`last_check = max([u.last_check for u in urls])`
			`num_checks = max([u.check_count for u in urls])`
			`check_info = MirrorLog.objects.filter(`
			`check_time__gte=cutoff_time).aggregate(`
			`mn=Min('check_time'), mx=Max('check_time'))`
Fix an off by one error in math for check interval Because we are averaging the interval and not the value, we need to subtract one from the total we are dividing by. Whoops. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-10-01 16:08:54 -07:00			`if num_checks > 1:`
			`check_frequency = (check_info['mx'] - check_info['mn']) \`
			`/ (num_checks - 1)`
			`else:`
mirrors: pylint discovered cleanups Signed-off-by: Dan McGee <dan@archlinux.org> 2011-04-18 13:10:20 -07:00			`check_frequency = None`
Give more information about mirror check runs and frequency Show how many times the check has ran in the last 24 hours, as well as the average interval between checks. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-23 08:33:32 -07:00			`else:`
			`last_check = None`
			`num_checks = 0`
			`check_frequency = None`

Mirror status improvements * Fix sorting issues. '', 'unknown', and '∞' should now always sort after anything else in the list. * Add a completion percentage column; this will tell you at a glance if a mirror is sometimes unresponsive. This should probably be incorporated into the mirror score. * Make a few more things dynamic in the template, like the time back the page reflects. * Add some additional template tags for formatting things. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-30 10:47:30 -07:00			`for url in urls:`
mirrors: pylint discovered cleanups Signed-off-by: Dan McGee <dan@archlinux.org> 2011-04-18 13:10:20 -07:00			`annotate_url(url, delays)`
Mirror status improvements * Fix sorting issues. '', 'unknown', and '∞' should now always sort after anything else in the list. * Add a completion percentage column; this will tell you at a glance if a mirror is sometimes unresponsive. This should probably be incorporated into the mirror score. * Make a few more things dynamic in the template, like the time back the page reflects. * Add some additional template tags for formatting things. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-30 10:47:30 -07:00
Give more information about mirror check runs and frequency Show how many times the check has ran in the last 24 hours, as well as the average interval between checks. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-23 08:33:32 -07:00			`return {`
Mirror status improvements * Fix sorting issues. '', 'unknown', and '∞' should now always sort after anything else in the list. * Add a completion percentage column; this will tell you at a glance if a mirror is sometimes unresponsive. This should probably be incorporated into the mirror score. * Make a few more things dynamic in the template, like the time back the page reflects. * Add some additional template tags for formatting things. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-30 10:47:30 -07:00			`'cutoff': cutoff,`
Give more information about mirror check runs and frequency Show how many times the check has ran in the last 24 hours, as well as the average interval between checks. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-23 08:33:32 -07:00			`'last_check': last_check,`
			`'num_checks': num_checks,`
			`'check_frequency': check_frequency,`
			`'urls': urls,`
			`}`
Allow caching of mirror status info Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-21 16:31:26 -07:00
Adjust page and content caching lengths and decorators Remove never_cache from many places now that we don't actually need it since we aren't caching by default. Adjust our cache_function decorator times be shorter values, and also randomize them a bit to make cache invalidations not all line up. Signed-off-by: Dan McGee <dan@archlinux.org> 2012-01-05 11:03:00 -08:00			`@cache_function(117)`
Give more information about mirror check runs and frequency Show how many times the check has ran in the last 24 hours, as well as the average interval between checks. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-23 08:33:32 -07:00			`def get_mirror_errors(cutoff=default_cutoff):`
Make all datetime objects fully timezone aware This is most of the transition to Django 1.4 `USE_TZ = True`. We need to ensure we don't mix aware and non-aware datetime objects when dealing with datetimes in the code. Add a utc_now() helper method that we can use most places, and ensure there is always a timezone attached when necessary. Signed-off-by: Dan McGee <dan@archlinux.org> 2012-03-23 17:29:40 -07:00			`cutoff_time = utc_now() - cutoff`
Allow caching of mirror status info Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-21 16:31:26 -07:00			`errors = MirrorLog.objects.filter(`
Mirror status query refinements Only show errors for active and public mirrors, and collapse two filter calls into just one for our normal status query. Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-24 11:26:08 -07:00			`is_success=False, check_time__gte=cutoff_time,`
			`url__mirror__active=True, url__mirror__public=True).values(`
Add optional country override for individual mirror URLs This allows a named top-level mirror to have geographically distributed URLs, e.g. kernel.org and the geo-DNS setup. Signed-off-by: Dan McGee <dan@archlinux.org> 2011-04-12 16:36:20 -07:00			`'url__url', 'url__country', 'url__protocol__protocol',`
			`'url__mirror__country', 'error').annotate(`
Allow caching of mirror status info Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-21 16:31:26 -07:00			`error_count=Count('error'), last_occurred=Max('check_time')`
			`).order_by('-last_occurred', '-error_count')`
Add optional country override for individual mirror URLs This allows a named top-level mirror to have geographically distributed URLs, e.g. kernel.org and the geo-DNS setup. Signed-off-by: Dan McGee <dan@archlinux.org> 2011-04-12 16:36:20 -07:00			`errors = list(errors)`
			`for err in errors:`
			`err['country'] = err['url__country'] or err['url__mirror__country']`
			`return errors`
Allow caching of mirror status info Signed-off-by: Dan McGee <dan@archlinux.org> 2010-09-21 16:31:26 -07:00
			`# vim: set ts=4 sw=4 et:`