# HG changeset patch # User Brian Neal # Date 1460784666 18000 # Node ID 5bba39fafad8acbbd637c8bdbf45bbe95c7788cc # Parent 650ab160cbb9d0a58dd449b112d7e58491603c46 Added mgmt command to fix news story video embeds. These embeds stopped working when we went to https. diff -r 650ab160cbb9 -r 5bba39fafad8 news/management/commands/fix_news_embeds.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/news/management/commands/fix_news_embeds.py Sat Apr 16 00:31:06 2016 -0500 @@ -0,0 +1,98 @@ +""" +Custom management command to find and fix the old YouTube embeds in the +news stories. +""" +from optparse import make_option +import urlparse +from urllib2 import HTTPError + +from django.core.management.base import NoArgsCommand +import lxml.html + +from news.models import Story +from oembed.core import get_oembed + + +class Command(NoArgsCommand): + help = "Rewrite news stories that have old flash YouTube embeds" + option_list = NoArgsCommand.option_list + ( + make_option('-p', '--preview', + action='store_true', + default=False, + help="find and print old embeds but don't update"), + ) + + def handle_noargs(self, **options): + self.preview = options['preview'] + + qs = Story.objects.all() + for story in qs.iterator(): + self._process_story(story) + + def _process_story(self, story): + r1 = self._process_html(story, 'short_text') + r2 = self._process_html(story, 'long_text') + r3 = self._process_html(story, 'admin_content') + if not self.preview and (r1 or r2 or r3): + print "Updating", story.title + story.save() + + def _process_html(self, story, field): + html = getattr(story, field) + s = html.strip() + if not s: + return False + + root = lxml.html.fragment_fromstring(s, create_parent=True) + for obj in root.iter('object'): + if story.version != 0: + print "*" * 5, story.title, "bad version!" + continue + for param in obj.iter('param'): + value = param.get('value') + if value and value.startswith('http'): + r = urlparse.urlparse(value) + if (r.hostname != 'www.youtube.com' and + r.hostname != 'www.youtube-nocookie.com'): + print "Unknown source hostname:", r.hostname, ";", story.title + continue + try: + new_embed = self._process_path(story, r.path) + except HTTPError as ex: + print "*" * 5, story.title, ex + continue + if not new_embed: + continue + parent = obj.getparent() + new_child = lxml.html.fragment_fromstring(new_embed) + parent.replace(obj, new_child) + new_html = lxml.html.tostring(root)[5:-6] + if self.preview: + print story.title + print new_html, "\n" * 3 + return False + + setattr(story, field, new_html) + return True + + + def _process_path(self, story, path): + if path.startswith('/v/'): + video_id = strip_query(path[3:]) + url = "https://www.youtube.com/watch?v={}".format(video_id) + elif path.startswith('/p/'): + playlist_id = strip_query(path[3:]) + url = "https://www.youtube.com/playlist?list=PL{}".format(playlist_id) + else: + print "Unknown YouTube path:", path, ";", story.title + return None + + oembed = get_oembed("http://www.youtube.com/oembed", url, scheme='https') + return oembed['html'] + + +def strip_query(path): + n = path.find('&') + if n != -1: + return path[:n] + return path