comparison news/management/commands/fix_news_embeds.py @ 1075:5bba39fafad8

Added mgmt command to fix news story video embeds. These embeds stopped working when we went to https.
author Brian Neal <bgneal@gmail.com>
date Sat, 16 Apr 2016 00:31:06 -0500
parents
children b24708086bfc
comparison
equal deleted inserted replaced
1074:650ab160cbb9 1075:5bba39fafad8
1 """
2 Custom management command to find and fix the old YouTube <object> embeds in the
3 news stories.
4 """
5 from optparse import make_option
6 import urlparse
7 from urllib2 import HTTPError
8
9 from django.core.management.base import NoArgsCommand
10 import lxml.html
11
12 from news.models import Story
13 from oembed.core import get_oembed
14
15
16 class Command(NoArgsCommand):
17 help = "Rewrite news stories that have old flash YouTube embeds"
18 option_list = NoArgsCommand.option_list + (
19 make_option('-p', '--preview',
20 action='store_true',
21 default=False,
22 help="find and print old embeds but don't update"),
23 )
24
25 def handle_noargs(self, **options):
26 self.preview = options['preview']
27
28 qs = Story.objects.all()
29 for story in qs.iterator():
30 self._process_story(story)
31
32 def _process_story(self, story):
33 r1 = self._process_html(story, 'short_text')
34 r2 = self._process_html(story, 'long_text')
35 r3 = self._process_html(story, 'admin_content')
36 if not self.preview and (r1 or r2 or r3):
37 print "Updating", story.title
38 story.save()
39
40 def _process_html(self, story, field):
41 html = getattr(story, field)
42 s = html.strip()
43 if not s:
44 return False
45
46 root = lxml.html.fragment_fromstring(s, create_parent=True)
47 for obj in root.iter('object'):
48 if story.version != 0:
49 print "*" * 5, story.title, "bad version!"
50 continue
51 for param in obj.iter('param'):
52 value = param.get('value')
53 if value and value.startswith('http'):
54 r = urlparse.urlparse(value)
55 if (r.hostname != 'www.youtube.com' and
56 r.hostname != 'www.youtube-nocookie.com'):
57 print "Unknown source hostname:", r.hostname, ";", story.title
58 continue
59 try:
60 new_embed = self._process_path(story, r.path)
61 except HTTPError as ex:
62 print "*" * 5, story.title, ex
63 continue
64 if not new_embed:
65 continue
66 parent = obj.getparent()
67 new_child = lxml.html.fragment_fromstring(new_embed)
68 parent.replace(obj, new_child)
69 new_html = lxml.html.tostring(root)[5:-6]
70 if self.preview:
71 print story.title
72 print new_html, "\n" * 3
73 return False
74
75 setattr(story, field, new_html)
76 return True
77
78
79 def _process_path(self, story, path):
80 if path.startswith('/v/'):
81 video_id = strip_query(path[3:])
82 url = "https://www.youtube.com/watch?v={}".format(video_id)
83 elif path.startswith('/p/'):
84 playlist_id = strip_query(path[3:])
85 url = "https://www.youtube.com/playlist?list=PL{}".format(playlist_id)
86 else:
87 print "Unknown YouTube path:", path, ";", story.title
88 return None
89
90 oembed = get_oembed("http://www.youtube.com/oembed", url, scheme='https')
91 return oembed['html']
92
93
94 def strip_query(path):
95 n = path.find('&')
96 if n != -1:
97 return path[:n]
98 return path