bgneal@1075: """ bgneal@1075: Custom management command to find and fix the old YouTube embeds in the bgneal@1075: news stories. bgneal@1075: """ bgneal@1075: from optparse import make_option bgneal@1075: import urlparse bgneal@1075: from urllib2 import HTTPError bgneal@1075: bgneal@1075: from django.core.management.base import NoArgsCommand bgneal@1075: import lxml.html bgneal@1075: bgneal@1075: from news.models import Story bgneal@1075: from oembed.core import get_oembed bgneal@1075: bgneal@1075: bgneal@1075: class Command(NoArgsCommand): bgneal@1075: help = "Rewrite news stories that have old flash YouTube embeds" bgneal@1075: option_list = NoArgsCommand.option_list + ( bgneal@1075: make_option('-p', '--preview', bgneal@1075: action='store_true', bgneal@1075: default=False, bgneal@1075: help="find and print old embeds but don't update"), bgneal@1075: ) bgneal@1075: bgneal@1075: def handle_noargs(self, **options): bgneal@1075: self.preview = options['preview'] bgneal@1075: bgneal@1075: qs = Story.objects.all() bgneal@1075: for story in qs.iterator(): bgneal@1075: self._process_story(story) bgneal@1075: bgneal@1075: def _process_story(self, story): bgneal@1075: r1 = self._process_html(story, 'short_text') bgneal@1075: r2 = self._process_html(story, 'long_text') bgneal@1075: r3 = self._process_html(story, 'admin_content') bgneal@1075: if not self.preview and (r1 or r2 or r3): bgneal@1075: print "Updating", story.title bgneal@1075: story.save() bgneal@1075: bgneal@1075: def _process_html(self, story, field): bgneal@1075: html = getattr(story, field) bgneal@1075: s = html.strip() bgneal@1075: if not s: bgneal@1075: return False bgneal@1075: bgneal@1075: root = lxml.html.fragment_fromstring(s, create_parent=True) bgneal@1076: bgneal@1076: for iframe in root.iter('iframe'): bgneal@1076: src = iframe.get('src') bgneal@1076: if src and src.startswith('http:'): bgneal@1076: print "*" * 5, "iframe with http src -", story.title bgneal@1076: bgneal@1075: for obj in root.iter('object'): bgneal@1075: if story.version != 0: bgneal@1075: print "*" * 5, story.title, "bad version!" bgneal@1075: continue bgneal@1075: for param in obj.iter('param'): bgneal@1075: value = param.get('value') bgneal@1075: if value and value.startswith('http'): bgneal@1075: r = urlparse.urlparse(value) bgneal@1075: if (r.hostname != 'www.youtube.com' and bgneal@1075: r.hostname != 'www.youtube-nocookie.com'): bgneal@1075: print "Unknown source hostname:", r.hostname, ";", story.title bgneal@1075: continue bgneal@1075: try: bgneal@1075: new_embed = self._process_path(story, r.path) bgneal@1075: except HTTPError as ex: bgneal@1075: print "*" * 5, story.title, ex bgneal@1075: continue bgneal@1075: if not new_embed: bgneal@1075: continue bgneal@1075: parent = obj.getparent() bgneal@1075: new_child = lxml.html.fragment_fromstring(new_embed) bgneal@1075: parent.replace(obj, new_child) bgneal@1075: new_html = lxml.html.tostring(root)[5:-6] bgneal@1075: if self.preview: bgneal@1075: print story.title bgneal@1075: print new_html, "\n" * 3 bgneal@1075: return False bgneal@1075: bgneal@1075: setattr(story, field, new_html) bgneal@1075: return True bgneal@1075: bgneal@1075: bgneal@1075: def _process_path(self, story, path): bgneal@1075: if path.startswith('/v/'): bgneal@1075: video_id = strip_query(path[3:]) bgneal@1075: url = "https://www.youtube.com/watch?v={}".format(video_id) bgneal@1075: elif path.startswith('/p/'): bgneal@1075: playlist_id = strip_query(path[3:]) bgneal@1075: url = "https://www.youtube.com/playlist?list=PL{}".format(playlist_id) bgneal@1075: else: bgneal@1075: print "Unknown YouTube path:", path, ";", story.title bgneal@1075: return None bgneal@1075: bgneal@1075: oembed = get_oembed("http://www.youtube.com/oembed", url, scheme='https') bgneal@1075: return oembed['html'] bgneal@1075: bgneal@1075: bgneal@1075: def strip_query(path): bgneal@1075: n = path.find('&') bgneal@1075: if n != -1: bgneal@1075: return path[:n] bgneal@1075: return path