bgneal@1075: """
bgneal@1075: Custom management command to find and fix the old YouTube <object> embeds in the
bgneal@1075: news stories.
bgneal@1075: """
bgneal@1075: from optparse import make_option
bgneal@1075: import urlparse
bgneal@1075: from urllib2 import HTTPError
bgneal@1075: 
bgneal@1075: from django.core.management.base import NoArgsCommand
bgneal@1075: import lxml.html
bgneal@1075: 
bgneal@1075: from news.models import Story
bgneal@1075: from oembed.core import get_oembed
bgneal@1075: 
bgneal@1075: 
bgneal@1075: class Command(NoArgsCommand):
bgneal@1075:     help = "Rewrite news stories that have old flash YouTube embeds"
bgneal@1075:     option_list = NoArgsCommand.option_list + (
bgneal@1075:         make_option('-p', '--preview',
bgneal@1075:                     action='store_true',
bgneal@1075:                     default=False,
bgneal@1075:                     help="find and print old embeds but don't update"),
bgneal@1075:         )
bgneal@1075: 
bgneal@1075:     def handle_noargs(self, **options):
bgneal@1075:         self.preview = options['preview']
bgneal@1075: 
bgneal@1075:         qs = Story.objects.all()
bgneal@1075:         for story in qs.iterator():
bgneal@1075:             self._process_story(story)
bgneal@1075: 
bgneal@1075:     def _process_story(self, story):
bgneal@1075:         r1 = self._process_html(story, 'short_text')
bgneal@1075:         r2 = self._process_html(story, 'long_text')
bgneal@1075:         r3 = self._process_html(story, 'admin_content')
bgneal@1075:         if not self.preview and (r1 or r2 or r3):
bgneal@1075:             print "Updating", story.title
bgneal@1075:             story.save()
bgneal@1075: 
bgneal@1075:     def _process_html(self, story, field):
bgneal@1075:         html = getattr(story, field)
bgneal@1075:         s = html.strip()
bgneal@1075:         if not s:
bgneal@1075:             return False
bgneal@1075: 
bgneal@1075:         root = lxml.html.fragment_fromstring(s, create_parent=True)
bgneal@1076: 
bgneal@1076:         for iframe in root.iter('iframe'):
bgneal@1076:             src = iframe.get('src')
bgneal@1076:             if src and src.startswith('http:'):
bgneal@1076:                 print "*" * 5, "iframe with http src -", story.title
bgneal@1076: 
bgneal@1075:         for obj in root.iter('object'):
bgneal@1075:             if story.version != 0:
bgneal@1075:                 print "*" * 5, story.title, "bad version!"
bgneal@1075:                 continue
bgneal@1075:             for param in obj.iter('param'):
bgneal@1075:                 value = param.get('value')
bgneal@1075:                 if value and value.startswith('http'):
bgneal@1075:                     r = urlparse.urlparse(value)
bgneal@1075:                     if (r.hostname != 'www.youtube.com' and
bgneal@1075:                         r.hostname != 'www.youtube-nocookie.com'):
bgneal@1075:                         print "Unknown source hostname:", r.hostname, ";", story.title
bgneal@1075:                         continue
bgneal@1075:                     try:
bgneal@1075:                         new_embed = self._process_path(story, r.path)
bgneal@1075:                     except HTTPError as ex:
bgneal@1075:                         print "*" * 5, story.title, ex
bgneal@1075:                         continue
bgneal@1075:                     if not new_embed:
bgneal@1075:                         continue
bgneal@1075:                     parent = obj.getparent()
bgneal@1075:                     new_child = lxml.html.fragment_fromstring(new_embed)
bgneal@1075:                     parent.replace(obj, new_child)
bgneal@1075:                     new_html = lxml.html.tostring(root)[5:-6]
bgneal@1075:                     if self.preview:
bgneal@1075:                         print story.title
bgneal@1075:                         print new_html, "\n" * 3
bgneal@1075:                         return False
bgneal@1075: 
bgneal@1075:                     setattr(story, field, new_html)
bgneal@1075:                     return True
bgneal@1075: 
bgneal@1075: 
bgneal@1075:     def _process_path(self, story, path):
bgneal@1075:         if path.startswith('/v/'):
bgneal@1075:             video_id = strip_query(path[3:])
bgneal@1075:             url = "https://www.youtube.com/watch?v={}".format(video_id)
bgneal@1075:         elif path.startswith('/p/'):
bgneal@1075:             playlist_id = strip_query(path[3:])
bgneal@1075:             url = "https://www.youtube.com/playlist?list=PL{}".format(playlist_id)
bgneal@1075:         else:
bgneal@1075:             print "Unknown YouTube path:", path, ";", story.title
bgneal@1075:             return None
bgneal@1075: 
bgneal@1075:         oembed = get_oembed("http://www.youtube.com/oembed", url, scheme='https')
bgneal@1075:         return oembed['html']
bgneal@1075: 
bgneal@1075: 
bgneal@1075: def strip_query(path):
bgneal@1075:     n = path.find('&')
bgneal@1075:     if n != -1:
bgneal@1075:         return path[:n]
bgneal@1075:     return path