bgneal@1075
|
1 """
|
bgneal@1075
|
2 Custom management command to find and fix the old YouTube <object> embeds in the
|
bgneal@1075
|
3 news stories.
|
bgneal@1075
|
4 """
|
bgneal@1075
|
5 from optparse import make_option
|
bgneal@1075
|
6 import urlparse
|
bgneal@1075
|
7 from urllib2 import HTTPError
|
bgneal@1075
|
8
|
bgneal@1075
|
9 from django.core.management.base import NoArgsCommand
|
bgneal@1075
|
10 import lxml.html
|
bgneal@1075
|
11
|
bgneal@1075
|
12 from news.models import Story
|
bgneal@1075
|
13 from oembed.core import get_oembed
|
bgneal@1075
|
14
|
bgneal@1075
|
15
|
bgneal@1075
|
16 class Command(NoArgsCommand):
|
bgneal@1075
|
17 help = "Rewrite news stories that have old flash YouTube embeds"
|
bgneal@1075
|
18 option_list = NoArgsCommand.option_list + (
|
bgneal@1075
|
19 make_option('-p', '--preview',
|
bgneal@1075
|
20 action='store_true',
|
bgneal@1075
|
21 default=False,
|
bgneal@1075
|
22 help="find and print old embeds but don't update"),
|
bgneal@1075
|
23 )
|
bgneal@1075
|
24
|
bgneal@1075
|
25 def handle_noargs(self, **options):
|
bgneal@1075
|
26 self.preview = options['preview']
|
bgneal@1075
|
27
|
bgneal@1075
|
28 qs = Story.objects.all()
|
bgneal@1075
|
29 for story in qs.iterator():
|
bgneal@1075
|
30 self._process_story(story)
|
bgneal@1075
|
31
|
bgneal@1075
|
32 def _process_story(self, story):
|
bgneal@1075
|
33 r1 = self._process_html(story, 'short_text')
|
bgneal@1075
|
34 r2 = self._process_html(story, 'long_text')
|
bgneal@1075
|
35 r3 = self._process_html(story, 'admin_content')
|
bgneal@1075
|
36 if not self.preview and (r1 or r2 or r3):
|
bgneal@1075
|
37 print "Updating", story.title
|
bgneal@1075
|
38 story.save()
|
bgneal@1075
|
39
|
bgneal@1075
|
40 def _process_html(self, story, field):
|
bgneal@1075
|
41 html = getattr(story, field)
|
bgneal@1075
|
42 s = html.strip()
|
bgneal@1075
|
43 if not s:
|
bgneal@1075
|
44 return False
|
bgneal@1075
|
45
|
bgneal@1075
|
46 root = lxml.html.fragment_fromstring(s, create_parent=True)
|
bgneal@1076
|
47
|
bgneal@1076
|
48 for iframe in root.iter('iframe'):
|
bgneal@1076
|
49 src = iframe.get('src')
|
bgneal@1076
|
50 if src and src.startswith('http:'):
|
bgneal@1076
|
51 print "*" * 5, "iframe with http src -", story.title
|
bgneal@1076
|
52
|
bgneal@1075
|
53 for obj in root.iter('object'):
|
bgneal@1075
|
54 if story.version != 0:
|
bgneal@1075
|
55 print "*" * 5, story.title, "bad version!"
|
bgneal@1075
|
56 continue
|
bgneal@1075
|
57 for param in obj.iter('param'):
|
bgneal@1075
|
58 value = param.get('value')
|
bgneal@1075
|
59 if value and value.startswith('http'):
|
bgneal@1075
|
60 r = urlparse.urlparse(value)
|
bgneal@1075
|
61 if (r.hostname != 'www.youtube.com' and
|
bgneal@1075
|
62 r.hostname != 'www.youtube-nocookie.com'):
|
bgneal@1075
|
63 print "Unknown source hostname:", r.hostname, ";", story.title
|
bgneal@1075
|
64 continue
|
bgneal@1075
|
65 try:
|
bgneal@1075
|
66 new_embed = self._process_path(story, r.path)
|
bgneal@1075
|
67 except HTTPError as ex:
|
bgneal@1075
|
68 print "*" * 5, story.title, ex
|
bgneal@1075
|
69 continue
|
bgneal@1075
|
70 if not new_embed:
|
bgneal@1075
|
71 continue
|
bgneal@1075
|
72 parent = obj.getparent()
|
bgneal@1075
|
73 new_child = lxml.html.fragment_fromstring(new_embed)
|
bgneal@1075
|
74 parent.replace(obj, new_child)
|
bgneal@1075
|
75 new_html = lxml.html.tostring(root)[5:-6]
|
bgneal@1075
|
76 if self.preview:
|
bgneal@1075
|
77 print story.title
|
bgneal@1075
|
78 print new_html, "\n" * 3
|
bgneal@1075
|
79 return False
|
bgneal@1075
|
80
|
bgneal@1075
|
81 setattr(story, field, new_html)
|
bgneal@1075
|
82 return True
|
bgneal@1075
|
83
|
bgneal@1075
|
84
|
bgneal@1075
|
85 def _process_path(self, story, path):
|
bgneal@1075
|
86 if path.startswith('/v/'):
|
bgneal@1075
|
87 video_id = strip_query(path[3:])
|
bgneal@1075
|
88 url = "https://www.youtube.com/watch?v={}".format(video_id)
|
bgneal@1075
|
89 elif path.startswith('/p/'):
|
bgneal@1075
|
90 playlist_id = strip_query(path[3:])
|
bgneal@1075
|
91 url = "https://www.youtube.com/playlist?list=PL{}".format(playlist_id)
|
bgneal@1075
|
92 else:
|
bgneal@1075
|
93 print "Unknown YouTube path:", path, ";", story.title
|
bgneal@1075
|
94 return None
|
bgneal@1075
|
95
|
bgneal@1075
|
96 oembed = get_oembed("http://www.youtube.com/oembed", url, scheme='https')
|
bgneal@1075
|
97 return oembed['html']
|
bgneal@1075
|
98
|
bgneal@1075
|
99
|
bgneal@1075
|
100 def strip_query(path):
|
bgneal@1075
|
101 n = path.find('&')
|
bgneal@1075
|
102 if n != -1:
|
bgneal@1075
|
103 return path[:n]
|
bgneal@1075
|
104 return path
|