# HG changeset patch # User Brian Neal # Date 1424316031 21600 # Node ID 101728976f9c26a900e3b8ba26cd3c6fbdab380c # Parent 3aecf9058130496b62910181b03954f95d56a115 Check html for . Older Smiley code generated absolute URLs for smiley images. Check for this and if found, save the model to force regeneration of HTML. diff -r 3aecf9058130 -r 101728976f9c core/management/commands/ssl_images.py --- a/core/management/commands/ssl_images.py Tue Feb 17 18:59:06 2015 -0600 +++ b/core/management/commands/ssl_images.py Wed Feb 18 21:20:31 2015 -0600 @@ -20,6 +20,7 @@ from django.core.management.base import NoArgsCommand, CommandError from django.conf import settings +from lxml import etree import markdown.inlinepatterns from PIL import Image @@ -280,6 +281,21 @@ return IMAGE_LINK_RE.sub(replace_image_markup, text) +def html_check(html): + """Return True if the given HTML fragment has tags with src attributes + that use http, and False otherwise. + """ + if not html: + return False + + root = etree.HTML(html) + for img in root.iter('img'): + src = img.get('src') + if src and src.lower().startswith('http:'): + return True + return False + + class Command(NoArgsCommand): help = "Rewrite forum posts and comments to not use http for images" option_list = NoArgsCommand.option_list + ( @@ -362,5 +378,11 @@ logger.debug("changed: %s", new_txt) setattr(model, text_attr, new_txt) model.save() + elif html_check(model.html): + # Check for content generated with older smiley code that used + # absolute URLs for the smiley images. If True, then just save + # the model again to force updated HTML to be created. + logger.info("Older Smiley HTML detected, forcing a save") + model.save() logger.info("ssl_images exiting") diff -r 3aecf9058130 -r 101728976f9c core/tests/test_ssl_images.py --- a/core/tests/test_ssl_images.py Tue Feb 17 18:59:06 2015 -0600 +++ b/core/tests/test_ssl_images.py Wed Feb 18 21:20:31 2015 -0600 @@ -5,6 +5,7 @@ import mock +from core.management.commands.ssl_images import html_check from core.management.commands.ssl_images import process_post import core.management.commands.ssl_images @@ -265,3 +266,51 @@ self.assertEqual(expected, result) expected_args = [mock.call(urlparse(c)) for c in old_src] self.assertEqual(check_https_mock.call_args_list, expected_args) + + +class HtmlCheckTestCase(unittest.TestCase): + + def test_empty(self): + self.assertFalse(html_check('')) + + def test_no_images(self): + self.assertFalse(html_check('

Hi there!

')) + self.assertFalse(html_check('

Hi there!

')) + + def test_safe_image(self): + self.assertFalse(html_check('')) + self.assertFalse(html_check('stuff')) + self.assertFalse(html_check('')) + self.assertFalse(html_check(""" +
+

Look: a

+

Look again: b

+
+ """)) + + def test_one_image(self): + self.assertTrue(html_check('a')) + self.assertTrue(html_check( + '

Look: a

')) + + def test_two_images(self): + self.assertTrue(html_check(""" +

Look: a

+

Look again: b

+ """)) + self.assertTrue(html_check(""" +

Look: a

+

Look again: b

+ """)) + self.assertTrue(html_check(""" +
+

Look: a

+

Look again: b

+
+ """)) + self.assertTrue(html_check(""" +
+

Look: a

+

Look again: b

+
+ """))