# HG changeset patch # User Brian Neal # Date 1426645485 18000 # Node ID d3f6e9cb1f39d3dbad2dc2dbdcdf1d3a4366d91d # Parent 90acb29478e9121f0bc7c81ae3ece938b1b10331 First take at an oembed refresh command. diff -r 90acb29478e9 -r d3f6e9cb1f39 oembed/management/commands/oembed_refresh.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/oembed/management/commands/oembed_refresh.py Tue Mar 17 21:24:45 2015 -0500 @@ -0,0 +1,95 @@ +"""oembed_refresh - +A custom management command to refresh the oEmbed HTML for the oEmbed objects in +our database. +This command is currently designed to fix up SSL mixed content issues. In the +future it would be nice to rework this command to refresh oEmbed objects if they +are older than some date. +""" +import datetime +import logging +import os.path +import re +import urllib2 + +from django.core.management.base import NoArgsCommand +from django.conf import settings + +from oembed.core import get_oembed +from oembed.models import Oembed +from oembed.models import Provider + + +LOGFILE = os.path.join(settings.PROJECT_PATH, 'logs', 'oembed_refresh.log') +logger = logging.getLogger(__name__) + + +def _setup_logging(): + logger.setLevel(logging.DEBUG) + logger.propagate = False + handler = logging.FileHandler(filename=LOGFILE, encoding='utf-8') + formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + + +def error_html(oembed): + """Returns a string of HTML to be used when we can't retrieve the oEmbed + data. + """ + return u'

{title}

'.format(url=oembed.url, title=oembed.title) + + +def refresh_html(providers, oembed): + """Refresh the oEmbed HTML for the given oembed object.""" + + # Find provider + for p in providers: + if re.match(p.url_regex, oembed.url): + endpoint = p.api_endpoint + break + else: + logger.error("No provider found for %s", oembed) + return + + html = None + try: + result = get_oembed(endpoint, oembed.url, fmt='json', + maxwidth=settings.OEMBED_MAXWIDTH, + maxheight=settings.OEMBED_MAXHEIGHT, + scheme='https') + except urllib2.HTTPError as ex: + if 400 <= ex.code < 500: + logger.error("Server could not handle request for %s: %d", oembed, ex.code) + html = error_html(oembed) + else: + logger.critical("Server error during request for %s: %d", oembed, ex.code) + + except urllib2.URLError as ex: + logger.critical("Failed to reach provider for %s: %s", oembed, ex.reason) + else: + html = result['html'] + + if html: + logger.info("Updating %s", oembed) + oembed.html = html + oembed.save() + + +class Command(NoArgsCommand): + help = "Refresh oEmbed objects by requesting new data from providers""" + + def handle_noargs(self, **options): + time_started = datetime.datetime.now() + _setup_logging() + logger.info("Starting; arguments received: %s", options) + + providers = list(Provider.objects.all()) + + qs = Oembed.objects.all() + for oembed in qs.iterator(): + if 'https:' not in oembed.html: + refresh_html(providers, oembed) + + time_finished = datetime.datetime.now() + elapsed = time_finished - time_started + logger.info("Finished; elapsed time: %s", elapsed) diff -r 90acb29478e9 -r d3f6e9cb1f39 oembed/models.py --- a/oembed/models.py Mon Mar 16 21:05:22 2015 -0500 +++ b/oembed/models.py Tue Mar 17 21:24:45 2015 -0500 @@ -47,7 +47,8 @@ date_added = models.DateTimeField() def __unicode__(self): - return self.title or self.url + desc = self.title if self.title else self.url + return u"".format(self.pk, desc) def save(self, *args, **kwargs): if not self.pk: