comparison core/management/commands/ssl_images.py @ 987:76525f5ac2b1

Modify ssl_images to update news models.
author Brian Neal <bgneal@gmail.com>
date Wed, 28 Oct 2015 21:06:13 -0500
parents 26de15fb5a80
children 65b2bc9cb3cc
comparison
equal deleted inserted replaced
986:26de15fb5a80 987:76525f5ac2b1
18 import uuid 18 import uuid
19 19
20 from django.core.management.base import NoArgsCommand, CommandError 20 from django.core.management.base import NoArgsCommand, CommandError
21 from django.conf import settings 21 from django.conf import settings
22 from lxml import etree 22 from lxml import etree
23 import lxml.html
23 import markdown.inlinepatterns 24 import markdown.inlinepatterns
24 from PIL import Image 25 from PIL import Image
25 import requests 26 import requests
26 27
27 from comments.models import Comment 28 from comments.models import Comment
28 from forums.models import Post 29 from forums.models import Post
29 from core.download import download_file 30 from core.download import download_file
30 from core.functions import remove_file 31 from core.functions import remove_file
31 from core.s3 import S3Bucket 32 from core.s3 import S3Bucket
33 from news.models import Story
32 34
33 35
34 LOGFILE = os.path.join(settings.PROJECT_PATH, 'logs', 'ssl_images.log') 36 LOGFILE = os.path.join(settings.PROJECT_PATH, 'logs', 'ssl_images.log')
35 logger = logging.getLogger(__name__) 37 logger = logging.getLogger(__name__)
36 38
39 IMAGE_REF_RE = re.compile(markdown.inlinepatterns.IMAGE_REFERENCE_RE, 41 IMAGE_REF_RE = re.compile(markdown.inlinepatterns.IMAGE_REFERENCE_RE,
40 re.DOTALL | re.UNICODE) 42 re.DOTALL | re.UNICODE)
41 43
42 SG101_HOSTS = set(['www.surfguitar101.com', 'surfguitar101.com']) 44 SG101_HOSTS = set(['www.surfguitar101.com', 'surfguitar101.com'])
43 WHITELIST_HOSTS = set(settings.USER_IMAGES_SOURCES) 45 WHITELIST_HOSTS = set(settings.USER_IMAGES_SOURCES)
44 MODEL_CHOICES = ['comments', 'posts'] 46 MODEL_CHOICES = ['comments', 'posts', 'news']
45 47
46 PHOTO_MAX_SIZE = (660, 720) 48 PHOTO_MAX_SIZE = (660, 720)
47 PHOTO_BASE_URL = settings.HOT_LINK_PHOTOS_BASE_URL 49 PHOTO_BASE_URL = settings.HOT_LINK_PHOTOS_BASE_URL
48 PHOTO_BUCKET_NAME = settings.HOT_LINK_PHOTOS_BUCKET 50 PHOTO_BUCKET_NAME = settings.HOT_LINK_PHOTOS_BUCKET
49 51
235 links, getting rid of plain old http sources; either converting to https 237 links, getting rid of plain old http sources; either converting to https
236 or relative style links (if the link is to SG101). 238 or relative style links (if the link is to SG101).
237 239
238 """ 240 """
239 return IMAGE_LINK_RE.sub(replace_image_markup, text) 241 return IMAGE_LINK_RE.sub(replace_image_markup, text)
242
243
244 def process_html(html):
245 """Process the html fragment, converting to https where needed."""
246 s = html.strip()
247 if not s:
248 return s
249
250 changed = False
251 root = lxml.html.fragment_fromstring(s, create_parent=True)
252 for img in root.iter('img'):
253 src = img.get('src')
254 src = src.strip() if src else ''
255 if src:
256 try:
257 r = urlparse.urlparse(src)
258 except ValueError:
259 logger.warning("Bad url? Should not happen; skipping...")
260 continue
261
262 new_src = None
263 if r.hostname in SG101_HOSTS:
264 new_src = r.path # convert to relative path
265 elif ((r.scheme == 'http') or
266 (r.scheme == 'https' and r.hostname not in WHITELIST_HOSTS)):
267 new_src = convert_to_ssl(r)
268 if not new_src:
269 # failed to convert to https; convert to a link
270 tail = img.tail
271 img.clear()
272 img.tag = 'a'
273 img.set('href', src)
274 img.text = 'Image'
275 img.tail = tail
276 changed = True
277
278 if new_src:
279 img.set('src', new_src)
280 changed = True
281
282 if changed:
283 result = lxml.html.tostring(root, encoding='utf-8')
284 return result[5:-6] # strip off parent div we added
285 return html
240 286
241 287
242 def html_check(html): 288 def html_check(html):
243 """Return True if the given HTML fragment has <img> tags with src attributes 289 """Return True if the given HTML fragment has <img> tags with src attributes
244 that use http, and False otherwise. 290 that use http, and False otherwise.
281 if options['model'] not in MODEL_CHOICES: 327 if options['model'] not in MODEL_CHOICES:
282 raise CommandError('Please choose a --model option') 328 raise CommandError('Please choose a --model option')
283 329
284 if options['model'] == 'comments': 330 if options['model'] == 'comments':
285 qs = Comment.objects.all() 331 qs = Comment.objects.all()
286 text_attr = 'comment' 332 text_attrs = ['comment']
287 model_name = 'Comment' 333 model_name = 'Comment'
334 elif options['model'] == 'posts':
335 qs = Post.objects.all()
336 text_attrs = ['body']
337 model_name = 'Post'
288 else: 338 else:
289 qs = Post.objects.all() 339 qs = Story.objects.all()
290 text_attr = 'body' 340 text_attrs = ['short_text', 'long_text']
291 model_name = 'Post' 341 model_name = 'Story'
342
343 html_based = options['model'] == 'news'
292 344
293 i, j = options['i'], options['j'] 345 i, j = options['i'], options['j']
294 346
295 if i is not None and i < 0: 347 if i is not None and i < 0:
296 raise CommandError("-i must be >= 0") 348 raise CommandError("-i must be >= 0")
331 for n, model in enumerate(qs.iterator()): 383 for n, model in enumerate(qs.iterator()):
332 if quit_flag: 384 if quit_flag:
333 logger.warning("SIGINT received, exiting") 385 logger.warning("SIGINT received, exiting")
334 break 386 break
335 logger.info("Processing %s #%d (pk = %d)", model_name, n + i, model.pk) 387 logger.info("Processing %s #%d (pk = %d)", model_name, n + i, model.pk)
336 txt = getattr(model, text_attr) 388 save_flag = False
337 warn_if_image_refs(txt, model_name, model.pk) 389 for text_attr in text_attrs:
338 new_txt = process_post(txt) 390 txt = getattr(model, text_attr)
339 if txt != new_txt: 391
340 logger.info("Content changed on %s #%d (pk = %d)", 392 if html_based:
341 model_name, n + i, model.pk) 393 new_txt = process_html(txt)
342 logger.debug("original: %s", txt) 394 else:
343 logger.debug("changed: %s", new_txt) 395 new_txt = process_post(txt)
344 setattr(model, text_attr, new_txt) 396 warn_if_image_refs(txt, model_name, model.pk)
345 model.save() 397
346 elif html_check(model.html): 398 if txt != new_txt:
347 # Check for content generated with older smiley code that used 399 logger.info("Content changed on %s #%d (pk = %d)",
348 # absolute URLs for the smiley images. If True, then just save 400 model_name, n + i, model.pk)
349 # the model again to force updated HTML to be created. 401 logger.debug(u"original: %s", txt)
350 logger.info("Older Smiley HTML detected, forcing a save") 402 logger.debug(u"changed: %s", new_txt)
403 setattr(model, text_attr, new_txt)
404 save_flag = True
405 elif not html_based and html_check(model.html):
406 # Check for content generated with older smiley code that used
407 # absolute URLs for the smiley images. If True, then just save
408 # the model again to force updated HTML to be created.
409 logger.info("Older Smiley HTML detected, forcing a save")
410 save_flag = True
411
412 if save_flag:
351 model.save() 413 model.save()
352 count += 1 414 count += 1
353 415
354 time_finished = datetime.datetime.now() 416 time_finished = datetime.datetime.now()
355 elapsed = time_finished - time_started 417 elapsed = time_finished - time_started