sg101: core/management/commands/ssl

comparison core/management/commands/ssl_images.py @ 987:76525f5ac2b1

Modify ssl_images to update news models.

author	Brian Neal <bgneal@gmail.com>
date	Wed, 28 Oct 2015 21:06:13 -0500
parents	26de15fb5a80
children	65b2bc9cb3cc

comparison

equal deleted inserted replaced

-:26de15fb5a80
+:76525f5ac2b1
 import uuid
 from django.core.management.base import NoArgsCommand, CommandError
 from django.conf import settings
 from lxml import etree
+import lxml.html
 import markdown.inlinepatterns
 from PIL import Image
 import requests
 from comments.models import Comment
 from forums.models import Post
 from core.download import download_file
 from core.functions import remove_file
 from core.s3 import S3Bucket
+from news.models import Story
 LOGFILE = os.path.join(settings.PROJECT_PATH, 'logs', 'ssl_images.log')
 logger = logging.getLogger(__name__)
 IMAGE_REF_RE = re.compile(markdown.inlinepatterns.IMAGE_REFERENCE_RE,
 re.DOTALL | re.UNICODE)
 SG101_HOSTS = set(['www.surfguitar101.com', 'surfguitar101.com'])
 WHITELIST_HOSTS = set(settings.USER_IMAGES_SOURCES)
-MODEL_CHOICES = ['comments', 'posts']
+MODEL_CHOICES = ['comments', 'posts', 'news']
 PHOTO_MAX_SIZE = (660, 720)
 PHOTO_BASE_URL = settings.HOT_LINK_PHOTOS_BASE_URL
 PHOTO_BUCKET_NAME = settings.HOT_LINK_PHOTOS_BUCKET
 links, getting rid of plain old http sources; either converting to https
 or relative style links (if the link is to SG101).
 """
 return IMAGE_LINK_RE.sub(replace_image_markup, text)
+def process_html(html):
+"""Process the html fragment, converting to https where needed."""
+s = html.strip()
+if not s:
+return s
+changed = False
+root = lxml.html.fragment_fromstring(s, create_parent=True)
+for img in root.iter('img'):
+src = img.get('src')
+src = src.strip() if src else ''
+if src:
+try:
+r = urlparse.urlparse(src)
+except ValueError:
+logger.warning("Bad url? Should not happen; skipping...")
+continue
+new_src = None
+if r.hostname in SG101_HOSTS:
+new_src = r.path        # convert to relative path
+elif ((r.scheme == 'http') or
+(r.scheme == 'https' and r.hostname not in WHITELIST_HOSTS)):
+new_src = convert_to_ssl(r)
+if not new_src:
+# failed to convert to https; convert to a link
+tail = img.tail
+img.clear()
+img.tag = 'a'
+img.set('href', src)
+img.text = 'Image'
+img.tail = tail
+changed = True
+if new_src:
+img.set('src', new_src)
+changed = True
+if changed:
+result = lxml.html.tostring(root, encoding='utf-8')
+return result[5:-6]     # strip off parent div we added
+return html
 def html_check(html):
 """Return True if the given HTML fragment has <img> tags with src attributes
 that use http, and False otherwise.
 if options['model'] not in MODEL_CHOICES:
 raise CommandError('Please choose a --model option')
 if options['model'] == 'comments':
 qs = Comment.objects.all()
-text_attr = 'comment'
+text_attrs = ['comment']
 model_name = 'Comment'
+elif options['model'] == 'posts':
+qs = Post.objects.all()
+text_attrs = ['body']
+model_name = 'Post'
 else:
-qs = Post.objects.all()
+qs = Story.objects.all()
-text_attr = 'body'
+text_attrs = ['short_text', 'long_text']
-model_name = 'Post'
+model_name = 'Story'
+html_based = options['model'] == 'news'
 i, j = options['i'], options['j']
 if i is not None and i < 0:
 raise CommandError("-i must be >= 0")
 for n, model in enumerate(qs.iterator()):
 if quit_flag:
 logger.warning("SIGINT received, exiting")
 break
 logger.info("Processing %s #%d (pk = %d)", model_name, n + i, model.pk)
-txt = getattr(model, text_attr)
+save_flag = False
-warn_if_image_refs(txt, model_name, model.pk)
+for text_attr in text_attrs:
-new_txt = process_post(txt)
+txt = getattr(model, text_attr)
-if txt != new_txt:
-logger.info("Content changed on %s #%d (pk = %d)",
+if html_based:
-model_name, n + i, model.pk)
+new_txt = process_html(txt)
-logger.debug("original: %s", txt)
+else:
-logger.debug("changed:  %s", new_txt)
+new_txt = process_post(txt)
-setattr(model, text_attr, new_txt)
+warn_if_image_refs(txt, model_name, model.pk)
-model.save()
-elif html_check(model.html):
+if txt != new_txt:
-# Check for content generated with older smiley code that used
+logger.info("Content changed on %s #%d (pk = %d)",
-# absolute URLs for the smiley images. If True, then just save
+model_name, n + i, model.pk)
-# the model again to force updated HTML to be created.
+logger.debug(u"original: %s", txt)
-logger.info("Older Smiley HTML detected, forcing a save")
+logger.debug(u"changed:  %s", new_txt)
+setattr(model, text_attr, new_txt)
+save_flag = True
+elif not html_based and html_check(model.html):
+# Check for content generated with older smiley code that used
+# absolute URLs for the smiley images. If True, then just save
+# the model again to force updated HTML to be created.
+logger.info("Older Smiley HTML detected, forcing a save")
+save_flag = True
+if save_flag:
 model.save()
 count += 1
 time_finished = datetime.datetime.now()
 elapsed = time_finished - time_started

Mercurial > public > sg101

comparison core/management/commands/ssl_images.py @ 987:76525f5ac2b1