bgneal@911: """oembed_refresh -
bgneal@911: A custom management command to refresh the oEmbed HTML for the oEmbed objects in
bgneal@911: our database.
bgneal@911: This command is currently designed to fix up SSL mixed content issues. In the
bgneal@911: future it would be nice to rework this command to refresh oEmbed objects if they
bgneal@911: are older than some date.
bgneal@911: """
bgneal@911: import datetime
bgneal@911: import logging
bgneal@911: import os.path
bgneal@911: import re
bgneal@911: import urllib2
bgneal@911: 
bgneal@911: from django.core.management.base import NoArgsCommand
bgneal@911: from django.conf import settings
bgneal@911: 
bgneal@911: from oembed.core import get_oembed
bgneal@911: from oembed.models import Oembed
bgneal@911: from oembed.models import Provider
bgneal@911: 
bgneal@911: 
bgneal@911: LOGFILE = os.path.join(settings.PROJECT_PATH, 'logs', 'oembed_refresh.log')
bgneal@911: logger = logging.getLogger(__name__)
bgneal@911: 
bgneal@911: 
bgneal@911: def _setup_logging():
bgneal@911:     logger.setLevel(logging.DEBUG)
bgneal@911:     logger.propagate = False
bgneal@911:     handler = logging.FileHandler(filename=LOGFILE, encoding='utf-8')
bgneal@911:     formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
bgneal@911:     handler.setFormatter(formatter)
bgneal@911:     logger.addHandler(handler)
bgneal@911: 
bgneal@911: 
bgneal@911: def error_html(oembed):
bgneal@911:     """Returns a string of HTML to be used when we can't retrieve the oEmbed
bgneal@911:     data.
bgneal@911:     """
bgneal@912:     return u'<p>Video: <a href="{url}">{title}</a></p>'.format(
bgneal@912:                 url=oembed.url, title=oembed.title)
bgneal@911: 
bgneal@911: 
bgneal@911: def refresh_html(providers, oembed):
bgneal@911:     """Refresh the oEmbed HTML for the given oembed object."""
bgneal@911: 
bgneal@911:     # Find provider
bgneal@911:     for p in providers:
bgneal@911:         if re.match(p.url_regex, oembed.url):
bgneal@911:             endpoint = p.api_endpoint
bgneal@911:             break
bgneal@911:     else:
bgneal@948:         logger.error(u"No provider found for %s", oembed)
bgneal@911:         return
bgneal@911: 
bgneal@911:     html = None
bgneal@911:     try:
bgneal@911:         result = get_oembed(endpoint, oembed.url, fmt='json',
bgneal@911:                             maxwidth=settings.OEMBED_MAXWIDTH,
bgneal@911:                             maxheight=settings.OEMBED_MAXHEIGHT,
bgneal@911:                             scheme='https')
bgneal@911:     except urllib2.HTTPError as ex:
bgneal@911:         if 400 <= ex.code < 500:
bgneal@948:             logger.error(u"Server could not handle request for %s: %d", oembed, ex.code)
bgneal@911:             html = error_html(oembed)
bgneal@911:         else:
bgneal@948:             logger.critical(u"Server error during request for %s: %d", oembed, ex.code)
bgneal@911: 
bgneal@911:     except urllib2.URLError as ex:
bgneal@948:         logger.critical(u"Failed to reach provider for %s: %s", oembed, ex.reason)
bgneal@911:     else:
bgneal@911:         html = result['html']
bgneal@911: 
bgneal@911:     if html:
bgneal@948:         logger.info(u"Updating %s", oembed)
bgneal@911:         oembed.html = html
bgneal@911:         oembed.save()
bgneal@911: 
bgneal@911: 
bgneal@911: class Command(NoArgsCommand):
bgneal@911:     help = "Refresh oEmbed objects by requesting new data from providers"""
bgneal@911: 
bgneal@911:     def handle_noargs(self, **options):
bgneal@911:         time_started = datetime.datetime.now()
bgneal@911:         _setup_logging()
bgneal@911:         logger.info("Starting; arguments received: %s", options)
bgneal@911: 
bgneal@911:         providers = list(Provider.objects.all())
bgneal@911: 
bgneal@911:         qs = Oembed.objects.all()
bgneal@911:         for oembed in qs.iterator():
bgneal@911:             if 'https:' not in oembed.html:
bgneal@911:                 refresh_html(providers, oembed)
bgneal@911: 
bgneal@911:         time_finished = datetime.datetime.now()
bgneal@911:         elapsed = time_finished - time_started
bgneal@911:         logger.info("Finished; elapsed time: %s", elapsed)