Mercurial > public > sg101
comparison news/management/commands/fix_news_embeds.py @ 1075:5bba39fafad8
Added mgmt command to fix news story video embeds.
These embeds stopped working when we went to https.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Sat, 16 Apr 2016 00:31:06 -0500 |
parents | |
children | b24708086bfc |
comparison
equal
deleted
inserted
replaced
1074:650ab160cbb9 | 1075:5bba39fafad8 |
---|---|
1 """ | |
2 Custom management command to find and fix the old YouTube <object> embeds in the | |
3 news stories. | |
4 """ | |
5 from optparse import make_option | |
6 import urlparse | |
7 from urllib2 import HTTPError | |
8 | |
9 from django.core.management.base import NoArgsCommand | |
10 import lxml.html | |
11 | |
12 from news.models import Story | |
13 from oembed.core import get_oembed | |
14 | |
15 | |
16 class Command(NoArgsCommand): | |
17 help = "Rewrite news stories that have old flash YouTube embeds" | |
18 option_list = NoArgsCommand.option_list + ( | |
19 make_option('-p', '--preview', | |
20 action='store_true', | |
21 default=False, | |
22 help="find and print old embeds but don't update"), | |
23 ) | |
24 | |
25 def handle_noargs(self, **options): | |
26 self.preview = options['preview'] | |
27 | |
28 qs = Story.objects.all() | |
29 for story in qs.iterator(): | |
30 self._process_story(story) | |
31 | |
32 def _process_story(self, story): | |
33 r1 = self._process_html(story, 'short_text') | |
34 r2 = self._process_html(story, 'long_text') | |
35 r3 = self._process_html(story, 'admin_content') | |
36 if not self.preview and (r1 or r2 or r3): | |
37 print "Updating", story.title | |
38 story.save() | |
39 | |
40 def _process_html(self, story, field): | |
41 html = getattr(story, field) | |
42 s = html.strip() | |
43 if not s: | |
44 return False | |
45 | |
46 root = lxml.html.fragment_fromstring(s, create_parent=True) | |
47 for obj in root.iter('object'): | |
48 if story.version != 0: | |
49 print "*" * 5, story.title, "bad version!" | |
50 continue | |
51 for param in obj.iter('param'): | |
52 value = param.get('value') | |
53 if value and value.startswith('http'): | |
54 r = urlparse.urlparse(value) | |
55 if (r.hostname != 'www.youtube.com' and | |
56 r.hostname != 'www.youtube-nocookie.com'): | |
57 print "Unknown source hostname:", r.hostname, ";", story.title | |
58 continue | |
59 try: | |
60 new_embed = self._process_path(story, r.path) | |
61 except HTTPError as ex: | |
62 print "*" * 5, story.title, ex | |
63 continue | |
64 if not new_embed: | |
65 continue | |
66 parent = obj.getparent() | |
67 new_child = lxml.html.fragment_fromstring(new_embed) | |
68 parent.replace(obj, new_child) | |
69 new_html = lxml.html.tostring(root)[5:-6] | |
70 if self.preview: | |
71 print story.title | |
72 print new_html, "\n" * 3 | |
73 return False | |
74 | |
75 setattr(story, field, new_html) | |
76 return True | |
77 | |
78 | |
79 def _process_path(self, story, path): | |
80 if path.startswith('/v/'): | |
81 video_id = strip_query(path[3:]) | |
82 url = "https://www.youtube.com/watch?v={}".format(video_id) | |
83 elif path.startswith('/p/'): | |
84 playlist_id = strip_query(path[3:]) | |
85 url = "https://www.youtube.com/playlist?list=PL{}".format(playlist_id) | |
86 else: | |
87 print "Unknown YouTube path:", path, ";", story.title | |
88 return None | |
89 | |
90 oembed = get_oembed("http://www.youtube.com/oembed", url, scheme='https') | |
91 return oembed['html'] | |
92 | |
93 | |
94 def strip_query(path): | |
95 n = path.find('&') | |
96 if n != -1: | |
97 return path[:n] | |
98 return path |