changeset 1075:5bba39fafad8

Added mgmt command to fix news story video embeds. These embeds stopped working when we went to https.
author Brian Neal <bgneal@gmail.com>
date Sat, 16 Apr 2016 00:31:06 -0500
parents 650ab160cbb9
children b24708086bfc
files news/management/__init__.py news/management/commands/__init__.py news/management/commands/fix_news_embeds.py
diffstat 1 files changed, 98 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/news/management/commands/fix_news_embeds.py	Sat Apr 16 00:31:06 2016 -0500
@@ -0,0 +1,98 @@
+"""
+Custom management command to find and fix the old YouTube <object> embeds in the
+news stories.
+"""
+from optparse import make_option
+import urlparse
+from urllib2 import HTTPError
+
+from django.core.management.base import NoArgsCommand
+import lxml.html
+
+from news.models import Story
+from oembed.core import get_oembed
+
+
+class Command(NoArgsCommand):
+    help = "Rewrite news stories that have old flash YouTube embeds"
+    option_list = NoArgsCommand.option_list + (
+        make_option('-p', '--preview',
+                    action='store_true',
+                    default=False,
+                    help="find and print old embeds but don't update"),
+        )
+
+    def handle_noargs(self, **options):
+        self.preview = options['preview']
+
+        qs = Story.objects.all()
+        for story in qs.iterator():
+            self._process_story(story)
+
+    def _process_story(self, story):
+        r1 = self._process_html(story, 'short_text')
+        r2 = self._process_html(story, 'long_text')
+        r3 = self._process_html(story, 'admin_content')
+        if not self.preview and (r1 or r2 or r3):
+            print "Updating", story.title
+            story.save()
+
+    def _process_html(self, story, field):
+        html = getattr(story, field)
+        s = html.strip()
+        if not s:
+            return False
+
+        root = lxml.html.fragment_fromstring(s, create_parent=True)
+        for obj in root.iter('object'):
+            if story.version != 0:
+                print "*" * 5, story.title, "bad version!"
+                continue
+            for param in obj.iter('param'):
+                value = param.get('value')
+                if value and value.startswith('http'):
+                    r = urlparse.urlparse(value)
+                    if (r.hostname != 'www.youtube.com' and
+                        r.hostname != 'www.youtube-nocookie.com'):
+                        print "Unknown source hostname:", r.hostname, ";", story.title
+                        continue
+                    try:
+                        new_embed = self._process_path(story, r.path)
+                    except HTTPError as ex:
+                        print "*" * 5, story.title, ex
+                        continue
+                    if not new_embed:
+                        continue
+                    parent = obj.getparent()
+                    new_child = lxml.html.fragment_fromstring(new_embed)
+                    parent.replace(obj, new_child)
+                    new_html = lxml.html.tostring(root)[5:-6]
+                    if self.preview:
+                        print story.title
+                        print new_html, "\n" * 3
+                        return False
+
+                    setattr(story, field, new_html)
+                    return True
+
+
+    def _process_path(self, story, path):
+        if path.startswith('/v/'):
+            video_id = strip_query(path[3:])
+            url = "https://www.youtube.com/watch?v={}".format(video_id)
+        elif path.startswith('/p/'):
+            playlist_id = strip_query(path[3:])
+            url = "https://www.youtube.com/playlist?list=PL{}".format(playlist_id)
+        else:
+            print "Unknown YouTube path:", path, ";", story.title
+            return None
+
+        oembed = get_oembed("http://www.youtube.com/oembed", url, scheme='https')
+        return oembed['html']
+
+
+def strip_query(path):
+    n = path.find('&')
+    if n != -1:
+        return path[:n]
+    return path