Mercurial > public > sg101
comparison core/management/commands/ssl_images.py @ 987:76525f5ac2b1
Modify ssl_images to update news models.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Wed, 28 Oct 2015 21:06:13 -0500 |
parents | 26de15fb5a80 |
children | 65b2bc9cb3cc |
comparison
equal
deleted
inserted
replaced
986:26de15fb5a80 | 987:76525f5ac2b1 |
---|---|
18 import uuid | 18 import uuid |
19 | 19 |
20 from django.core.management.base import NoArgsCommand, CommandError | 20 from django.core.management.base import NoArgsCommand, CommandError |
21 from django.conf import settings | 21 from django.conf import settings |
22 from lxml import etree | 22 from lxml import etree |
23 import lxml.html | |
23 import markdown.inlinepatterns | 24 import markdown.inlinepatterns |
24 from PIL import Image | 25 from PIL import Image |
25 import requests | 26 import requests |
26 | 27 |
27 from comments.models import Comment | 28 from comments.models import Comment |
28 from forums.models import Post | 29 from forums.models import Post |
29 from core.download import download_file | 30 from core.download import download_file |
30 from core.functions import remove_file | 31 from core.functions import remove_file |
31 from core.s3 import S3Bucket | 32 from core.s3 import S3Bucket |
33 from news.models import Story | |
32 | 34 |
33 | 35 |
34 LOGFILE = os.path.join(settings.PROJECT_PATH, 'logs', 'ssl_images.log') | 36 LOGFILE = os.path.join(settings.PROJECT_PATH, 'logs', 'ssl_images.log') |
35 logger = logging.getLogger(__name__) | 37 logger = logging.getLogger(__name__) |
36 | 38 |
39 IMAGE_REF_RE = re.compile(markdown.inlinepatterns.IMAGE_REFERENCE_RE, | 41 IMAGE_REF_RE = re.compile(markdown.inlinepatterns.IMAGE_REFERENCE_RE, |
40 re.DOTALL | re.UNICODE) | 42 re.DOTALL | re.UNICODE) |
41 | 43 |
42 SG101_HOSTS = set(['www.surfguitar101.com', 'surfguitar101.com']) | 44 SG101_HOSTS = set(['www.surfguitar101.com', 'surfguitar101.com']) |
43 WHITELIST_HOSTS = set(settings.USER_IMAGES_SOURCES) | 45 WHITELIST_HOSTS = set(settings.USER_IMAGES_SOURCES) |
44 MODEL_CHOICES = ['comments', 'posts'] | 46 MODEL_CHOICES = ['comments', 'posts', 'news'] |
45 | 47 |
46 PHOTO_MAX_SIZE = (660, 720) | 48 PHOTO_MAX_SIZE = (660, 720) |
47 PHOTO_BASE_URL = settings.HOT_LINK_PHOTOS_BASE_URL | 49 PHOTO_BASE_URL = settings.HOT_LINK_PHOTOS_BASE_URL |
48 PHOTO_BUCKET_NAME = settings.HOT_LINK_PHOTOS_BUCKET | 50 PHOTO_BUCKET_NAME = settings.HOT_LINK_PHOTOS_BUCKET |
49 | 51 |
235 links, getting rid of plain old http sources; either converting to https | 237 links, getting rid of plain old http sources; either converting to https |
236 or relative style links (if the link is to SG101). | 238 or relative style links (if the link is to SG101). |
237 | 239 |
238 """ | 240 """ |
239 return IMAGE_LINK_RE.sub(replace_image_markup, text) | 241 return IMAGE_LINK_RE.sub(replace_image_markup, text) |
242 | |
243 | |
244 def process_html(html): | |
245 """Process the html fragment, converting to https where needed.""" | |
246 s = html.strip() | |
247 if not s: | |
248 return s | |
249 | |
250 changed = False | |
251 root = lxml.html.fragment_fromstring(s, create_parent=True) | |
252 for img in root.iter('img'): | |
253 src = img.get('src') | |
254 src = src.strip() if src else '' | |
255 if src: | |
256 try: | |
257 r = urlparse.urlparse(src) | |
258 except ValueError: | |
259 logger.warning("Bad url? Should not happen; skipping...") | |
260 continue | |
261 | |
262 new_src = None | |
263 if r.hostname in SG101_HOSTS: | |
264 new_src = r.path # convert to relative path | |
265 elif ((r.scheme == 'http') or | |
266 (r.scheme == 'https' and r.hostname not in WHITELIST_HOSTS)): | |
267 new_src = convert_to_ssl(r) | |
268 if not new_src: | |
269 # failed to convert to https; convert to a link | |
270 tail = img.tail | |
271 img.clear() | |
272 img.tag = 'a' | |
273 img.set('href', src) | |
274 img.text = 'Image' | |
275 img.tail = tail | |
276 changed = True | |
277 | |
278 if new_src: | |
279 img.set('src', new_src) | |
280 changed = True | |
281 | |
282 if changed: | |
283 result = lxml.html.tostring(root, encoding='utf-8') | |
284 return result[5:-6] # strip off parent div we added | |
285 return html | |
240 | 286 |
241 | 287 |
242 def html_check(html): | 288 def html_check(html): |
243 """Return True if the given HTML fragment has <img> tags with src attributes | 289 """Return True if the given HTML fragment has <img> tags with src attributes |
244 that use http, and False otherwise. | 290 that use http, and False otherwise. |
281 if options['model'] not in MODEL_CHOICES: | 327 if options['model'] not in MODEL_CHOICES: |
282 raise CommandError('Please choose a --model option') | 328 raise CommandError('Please choose a --model option') |
283 | 329 |
284 if options['model'] == 'comments': | 330 if options['model'] == 'comments': |
285 qs = Comment.objects.all() | 331 qs = Comment.objects.all() |
286 text_attr = 'comment' | 332 text_attrs = ['comment'] |
287 model_name = 'Comment' | 333 model_name = 'Comment' |
334 elif options['model'] == 'posts': | |
335 qs = Post.objects.all() | |
336 text_attrs = ['body'] | |
337 model_name = 'Post' | |
288 else: | 338 else: |
289 qs = Post.objects.all() | 339 qs = Story.objects.all() |
290 text_attr = 'body' | 340 text_attrs = ['short_text', 'long_text'] |
291 model_name = 'Post' | 341 model_name = 'Story' |
342 | |
343 html_based = options['model'] == 'news' | |
292 | 344 |
293 i, j = options['i'], options['j'] | 345 i, j = options['i'], options['j'] |
294 | 346 |
295 if i is not None and i < 0: | 347 if i is not None and i < 0: |
296 raise CommandError("-i must be >= 0") | 348 raise CommandError("-i must be >= 0") |
331 for n, model in enumerate(qs.iterator()): | 383 for n, model in enumerate(qs.iterator()): |
332 if quit_flag: | 384 if quit_flag: |
333 logger.warning("SIGINT received, exiting") | 385 logger.warning("SIGINT received, exiting") |
334 break | 386 break |
335 logger.info("Processing %s #%d (pk = %d)", model_name, n + i, model.pk) | 387 logger.info("Processing %s #%d (pk = %d)", model_name, n + i, model.pk) |
336 txt = getattr(model, text_attr) | 388 save_flag = False |
337 warn_if_image_refs(txt, model_name, model.pk) | 389 for text_attr in text_attrs: |
338 new_txt = process_post(txt) | 390 txt = getattr(model, text_attr) |
339 if txt != new_txt: | 391 |
340 logger.info("Content changed on %s #%d (pk = %d)", | 392 if html_based: |
341 model_name, n + i, model.pk) | 393 new_txt = process_html(txt) |
342 logger.debug("original: %s", txt) | 394 else: |
343 logger.debug("changed: %s", new_txt) | 395 new_txt = process_post(txt) |
344 setattr(model, text_attr, new_txt) | 396 warn_if_image_refs(txt, model_name, model.pk) |
345 model.save() | 397 |
346 elif html_check(model.html): | 398 if txt != new_txt: |
347 # Check for content generated with older smiley code that used | 399 logger.info("Content changed on %s #%d (pk = %d)", |
348 # absolute URLs for the smiley images. If True, then just save | 400 model_name, n + i, model.pk) |
349 # the model again to force updated HTML to be created. | 401 logger.debug(u"original: %s", txt) |
350 logger.info("Older Smiley HTML detected, forcing a save") | 402 logger.debug(u"changed: %s", new_txt) |
403 setattr(model, text_attr, new_txt) | |
404 save_flag = True | |
405 elif not html_based and html_check(model.html): | |
406 # Check for content generated with older smiley code that used | |
407 # absolute URLs for the smiley images. If True, then just save | |
408 # the model again to force updated HTML to be created. | |
409 logger.info("Older Smiley HTML detected, forcing a save") | |
410 save_flag = True | |
411 | |
412 if save_flag: | |
351 model.save() | 413 model.save() |
352 count += 1 | 414 count += 1 |
353 | 415 |
354 time_finished = datetime.datetime.now() | 416 time_finished = datetime.datetime.now() |
355 elapsed = time_finished - time_started | 417 elapsed = time_finished - time_started |