annotate news/management/commands/fix_news_embeds.py @ 1075:5bba39fafad8

Added mgmt command to fix news story video embeds. These embeds stopped working when we went to https.
author Brian Neal <bgneal@gmail.com>
date Sat, 16 Apr 2016 00:31:06 -0500
parents
children b24708086bfc
rev   line source
bgneal@1075 1 """
bgneal@1075 2 Custom management command to find and fix the old YouTube <object> embeds in the
bgneal@1075 3 news stories.
bgneal@1075 4 """
bgneal@1075 5 from optparse import make_option
bgneal@1075 6 import urlparse
bgneal@1075 7 from urllib2 import HTTPError
bgneal@1075 8
bgneal@1075 9 from django.core.management.base import NoArgsCommand
bgneal@1075 10 import lxml.html
bgneal@1075 11
bgneal@1075 12 from news.models import Story
bgneal@1075 13 from oembed.core import get_oembed
bgneal@1075 14
bgneal@1075 15
bgneal@1075 16 class Command(NoArgsCommand):
bgneal@1075 17 help = "Rewrite news stories that have old flash YouTube embeds"
bgneal@1075 18 option_list = NoArgsCommand.option_list + (
bgneal@1075 19 make_option('-p', '--preview',
bgneal@1075 20 action='store_true',
bgneal@1075 21 default=False,
bgneal@1075 22 help="find and print old embeds but don't update"),
bgneal@1075 23 )
bgneal@1075 24
bgneal@1075 25 def handle_noargs(self, **options):
bgneal@1075 26 self.preview = options['preview']
bgneal@1075 27
bgneal@1075 28 qs = Story.objects.all()
bgneal@1075 29 for story in qs.iterator():
bgneal@1075 30 self._process_story(story)
bgneal@1075 31
bgneal@1075 32 def _process_story(self, story):
bgneal@1075 33 r1 = self._process_html(story, 'short_text')
bgneal@1075 34 r2 = self._process_html(story, 'long_text')
bgneal@1075 35 r3 = self._process_html(story, 'admin_content')
bgneal@1075 36 if not self.preview and (r1 or r2 or r3):
bgneal@1075 37 print "Updating", story.title
bgneal@1075 38 story.save()
bgneal@1075 39
bgneal@1075 40 def _process_html(self, story, field):
bgneal@1075 41 html = getattr(story, field)
bgneal@1075 42 s = html.strip()
bgneal@1075 43 if not s:
bgneal@1075 44 return False
bgneal@1075 45
bgneal@1075 46 root = lxml.html.fragment_fromstring(s, create_parent=True)
bgneal@1075 47 for obj in root.iter('object'):
bgneal@1075 48 if story.version != 0:
bgneal@1075 49 print "*" * 5, story.title, "bad version!"
bgneal@1075 50 continue
bgneal@1075 51 for param in obj.iter('param'):
bgneal@1075 52 value = param.get('value')
bgneal@1075 53 if value and value.startswith('http'):
bgneal@1075 54 r = urlparse.urlparse(value)
bgneal@1075 55 if (r.hostname != 'www.youtube.com' and
bgneal@1075 56 r.hostname != 'www.youtube-nocookie.com'):
bgneal@1075 57 print "Unknown source hostname:", r.hostname, ";", story.title
bgneal@1075 58 continue
bgneal@1075 59 try:
bgneal@1075 60 new_embed = self._process_path(story, r.path)
bgneal@1075 61 except HTTPError as ex:
bgneal@1075 62 print "*" * 5, story.title, ex
bgneal@1075 63 continue
bgneal@1075 64 if not new_embed:
bgneal@1075 65 continue
bgneal@1075 66 parent = obj.getparent()
bgneal@1075 67 new_child = lxml.html.fragment_fromstring(new_embed)
bgneal@1075 68 parent.replace(obj, new_child)
bgneal@1075 69 new_html = lxml.html.tostring(root)[5:-6]
bgneal@1075 70 if self.preview:
bgneal@1075 71 print story.title
bgneal@1075 72 print new_html, "\n" * 3
bgneal@1075 73 return False
bgneal@1075 74
bgneal@1075 75 setattr(story, field, new_html)
bgneal@1075 76 return True
bgneal@1075 77
bgneal@1075 78
bgneal@1075 79 def _process_path(self, story, path):
bgneal@1075 80 if path.startswith('/v/'):
bgneal@1075 81 video_id = strip_query(path[3:])
bgneal@1075 82 url = "https://www.youtube.com/watch?v={}".format(video_id)
bgneal@1075 83 elif path.startswith('/p/'):
bgneal@1075 84 playlist_id = strip_query(path[3:])
bgneal@1075 85 url = "https://www.youtube.com/playlist?list=PL{}".format(playlist_id)
bgneal@1075 86 else:
bgneal@1075 87 print "Unknown YouTube path:", path, ";", story.title
bgneal@1075 88 return None
bgneal@1075 89
bgneal@1075 90 oembed = get_oembed("http://www.youtube.com/oembed", url, scheme='https')
bgneal@1075 91 return oembed['html']
bgneal@1075 92
bgneal@1075 93
bgneal@1075 94 def strip_query(path):
bgneal@1075 95 n = path.find('&')
bgneal@1075 96 if n != -1:
bgneal@1075 97 return path[:n]
bgneal@1075 98 return path