changeset 894:101728976f9c

Check html for <img src="http:...">. Older Smiley code generated absolute URLs for smiley images. Check for this and if found, save the model to force regeneration of HTML.
author Brian Neal <bgneal@gmail.com>
date Wed, 18 Feb 2015 21:20:31 -0600
parents 3aecf9058130
children e7c549e4dbf7
files core/management/commands/ssl_images.py core/tests/test_ssl_images.py
diffstat 2 files changed, 71 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/core/management/commands/ssl_images.py	Tue Feb 17 18:59:06 2015 -0600
+++ b/core/management/commands/ssl_images.py	Wed Feb 18 21:20:31 2015 -0600
@@ -20,6 +20,7 @@
 
 from django.core.management.base import NoArgsCommand, CommandError
 from django.conf import settings
+from lxml import etree
 import markdown.inlinepatterns
 from PIL import Image
 
@@ -280,6 +281,21 @@
     return IMAGE_LINK_RE.sub(replace_image_markup, text)
 
 
+def html_check(html):
+    """Return True if the given HTML fragment has <img> tags with src attributes
+    that use http, and False otherwise.
+    """
+    if not html:
+        return False
+
+    root = etree.HTML(html)
+    for img in root.iter('img'):
+        src = img.get('src')
+        if src and src.lower().startswith('http:'):
+            return True
+    return False
+
+
 class Command(NoArgsCommand):
     help = "Rewrite forum posts and comments to not use http for images"
     option_list = NoArgsCommand.option_list + (
@@ -362,5 +378,11 @@
                 logger.debug("changed:  %s", new_txt)
                 setattr(model, text_attr, new_txt)
                 model.save()
+            elif html_check(model.html):
+                # Check for content generated with older smiley code that used
+                # absolute URLs for the smiley images. If True, then just save
+                # the model again to force updated HTML to be created.
+                logger.info("Older Smiley HTML detected, forcing a save")
+                model.save()
 
         logger.info("ssl_images exiting")
--- a/core/tests/test_ssl_images.py	Tue Feb 17 18:59:06 2015 -0600
+++ b/core/tests/test_ssl_images.py	Wed Feb 18 21:20:31 2015 -0600
@@ -5,6 +5,7 @@
 
 import mock
 
+from core.management.commands.ssl_images import html_check
 from core.management.commands.ssl_images import process_post
 import core.management.commands.ssl_images
 
@@ -265,3 +266,51 @@
         self.assertEqual(expected, result)
         expected_args = [mock.call(urlparse(c)) for c in old_src]
         self.assertEqual(check_https_mock.call_args_list, expected_args)
+
+
+class HtmlCheckTestCase(unittest.TestCase):
+
+    def test_empty(self):
+        self.assertFalse(html_check(''))
+
+    def test_no_images(self):
+        self.assertFalse(html_check('<p>Hi there!</p>'))
+        self.assertFalse(html_check('<p>Hi <b>there</b>!</p>'))
+
+    def test_safe_image(self):
+        self.assertFalse(html_check('<img src="https://a.jpg" />'))
+        self.assertFalse(html_check('<img src="" alt="stuff" />'))
+        self.assertFalse(html_check('<img src="HTTPS://a.jpg" />'))
+        self.assertFalse(html_check("""
+            <div>
+            <p>Look: <img src="https://a.jpg" alt="a" /></p>
+            <p>Look again: <img src="https://b.jpg" alt="b" /></p>
+            </div>
+            """))
+
+    def test_one_image(self):
+        self.assertTrue(html_check('<img src="http://a.jpg" alt="a" />'))
+        self.assertTrue(html_check(
+            '<p>Look: <img src="http://a.jpg" alt="a" /></p>'))
+
+    def test_two_images(self):
+        self.assertTrue(html_check("""
+            <p>Look: <img src="https://a.jpg" alt="a" /></p>
+            <p>Look again: <img src="http://b.jpg" alt="b" /></p>
+            """))
+        self.assertTrue(html_check("""
+            <p>Look: <img src="http://a.jpg" alt="a" /></p>
+            <p>Look again: <img src="http://b.jpg" alt="b" /></p>
+            """))
+        self.assertTrue(html_check("""
+            <div>
+            <p>Look: <img src="http://a.jpg" alt="a" /></p>
+            <p>Look again: <img src="http://b.jpg" alt="b" /></p>
+            </div>
+            """))
+        self.assertTrue(html_check("""
+            <div>
+            <p>Look: <img src="http://a.jpg" alt="a" /></p>
+            <p>Look again: <img src="https://b.jpg" alt="b" /></p>
+            </div>
+            """))