Mercurial > public > sg101
view news/management/commands/fix_news_embeds.py @ 1174:ba3230aba90c
Fix unicode error with wiki cookie processing
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Thu, 07 Jun 2018 19:53:13 -0500 |
parents | b24708086bfc |
children |
line wrap: on
line source
""" Custom management command to find and fix the old YouTube <object> embeds in the news stories. """ from optparse import make_option import urlparse from urllib2 import HTTPError from django.core.management.base import NoArgsCommand import lxml.html from news.models import Story from oembed.core import get_oembed class Command(NoArgsCommand): help = "Rewrite news stories that have old flash YouTube embeds" option_list = NoArgsCommand.option_list + ( make_option('-p', '--preview', action='store_true', default=False, help="find and print old embeds but don't update"), ) def handle_noargs(self, **options): self.preview = options['preview'] qs = Story.objects.all() for story in qs.iterator(): self._process_story(story) def _process_story(self, story): r1 = self._process_html(story, 'short_text') r2 = self._process_html(story, 'long_text') r3 = self._process_html(story, 'admin_content') if not self.preview and (r1 or r2 or r3): print "Updating", story.title story.save() def _process_html(self, story, field): html = getattr(story, field) s = html.strip() if not s: return False root = lxml.html.fragment_fromstring(s, create_parent=True) for iframe in root.iter('iframe'): src = iframe.get('src') if src and src.startswith('http:'): print "*" * 5, "iframe with http src -", story.title for obj in root.iter('object'): if story.version != 0: print "*" * 5, story.title, "bad version!" continue for param in obj.iter('param'): value = param.get('value') if value and value.startswith('http'): r = urlparse.urlparse(value) if (r.hostname != 'www.youtube.com' and r.hostname != 'www.youtube-nocookie.com'): print "Unknown source hostname:", r.hostname, ";", story.title continue try: new_embed = self._process_path(story, r.path) except HTTPError as ex: print "*" * 5, story.title, ex continue if not new_embed: continue parent = obj.getparent() new_child = lxml.html.fragment_fromstring(new_embed) parent.replace(obj, new_child) new_html = lxml.html.tostring(root)[5:-6] if self.preview: print story.title print new_html, "\n" * 3 return False setattr(story, field, new_html) return True def _process_path(self, story, path): if path.startswith('/v/'): video_id = strip_query(path[3:]) url = "https://www.youtube.com/watch?v={}".format(video_id) elif path.startswith('/p/'): playlist_id = strip_query(path[3:]) url = "https://www.youtube.com/playlist?list=PL{}".format(playlist_id) else: print "Unknown YouTube path:", path, ";", story.title return None oembed = get_oembed("http://www.youtube.com/oembed", url, scheme='https') return oembed['html'] def strip_query(path): n = path.find('&') if n != -1: return path[:n] return path