comparison oembed/management/commands/oembed_refresh.py @ 911:d3f6e9cb1f39

First take at an oembed refresh command.
author Brian Neal <bgneal@gmail.com>
date Tue, 17 Mar 2015 21:24:45 -0500
parents
children 602639621257
comparison
equal deleted inserted replaced
910:90acb29478e9 911:d3f6e9cb1f39
1 """oembed_refresh -
2 A custom management command to refresh the oEmbed HTML for the oEmbed objects in
3 our database.
4 This command is currently designed to fix up SSL mixed content issues. In the
5 future it would be nice to rework this command to refresh oEmbed objects if they
6 are older than some date.
7 """
8 import datetime
9 import logging
10 import os.path
11 import re
12 import urllib2
13
14 from django.core.management.base import NoArgsCommand
15 from django.conf import settings
16
17 from oembed.core import get_oembed
18 from oembed.models import Oembed
19 from oembed.models import Provider
20
21
22 LOGFILE = os.path.join(settings.PROJECT_PATH, 'logs', 'oembed_refresh.log')
23 logger = logging.getLogger(__name__)
24
25
26 def _setup_logging():
27 logger.setLevel(logging.DEBUG)
28 logger.propagate = False
29 handler = logging.FileHandler(filename=LOGFILE, encoding='utf-8')
30 formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
31 handler.setFormatter(formatter)
32 logger.addHandler(handler)
33
34
35 def error_html(oembed):
36 """Returns a string of HTML to be used when we can't retrieve the oEmbed
37 data.
38 """
39 return u'<p><a href="{url}">{title}</a></p>'.format(url=oembed.url, title=oembed.title)
40
41
42 def refresh_html(providers, oembed):
43 """Refresh the oEmbed HTML for the given oembed object."""
44
45 # Find provider
46 for p in providers:
47 if re.match(p.url_regex, oembed.url):
48 endpoint = p.api_endpoint
49 break
50 else:
51 logger.error("No provider found for %s", oembed)
52 return
53
54 html = None
55 try:
56 result = get_oembed(endpoint, oembed.url, fmt='json',
57 maxwidth=settings.OEMBED_MAXWIDTH,
58 maxheight=settings.OEMBED_MAXHEIGHT,
59 scheme='https')
60 except urllib2.HTTPError as ex:
61 if 400 <= ex.code < 500:
62 logger.error("Server could not handle request for %s: %d", oembed, ex.code)
63 html = error_html(oembed)
64 else:
65 logger.critical("Server error during request for %s: %d", oembed, ex.code)
66
67 except urllib2.URLError as ex:
68 logger.critical("Failed to reach provider for %s: %s", oembed, ex.reason)
69 else:
70 html = result['html']
71
72 if html:
73 logger.info("Updating %s", oembed)
74 oembed.html = html
75 oembed.save()
76
77
78 class Command(NoArgsCommand):
79 help = "Refresh oEmbed objects by requesting new data from providers"""
80
81 def handle_noargs(self, **options):
82 time_started = datetime.datetime.now()
83 _setup_logging()
84 logger.info("Starting; arguments received: %s", options)
85
86 providers = list(Provider.objects.all())
87
88 qs = Oembed.objects.all()
89 for oembed in qs.iterator():
90 if 'https:' not in oembed.html:
91 refresh_html(providers, oembed)
92
93 time_finished = datetime.datetime.now()
94 elapsed = time_finished - time_started
95 logger.info("Finished; elapsed time: %s", elapsed)