Mercurial > public > sg101
changeset 894:101728976f9c
Check html for <img src="http:...">.
Older Smiley code generated absolute URLs for smiley images. Check for this and
if found, save the model to force regeneration of HTML.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Wed, 18 Feb 2015 21:20:31 -0600 (2015-02-19) |
parents | 3aecf9058130 |
children | e7c549e4dbf7 |
files | core/management/commands/ssl_images.py core/tests/test_ssl_images.py |
diffstat | 2 files changed, 71 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/core/management/commands/ssl_images.py Tue Feb 17 18:59:06 2015 -0600 +++ b/core/management/commands/ssl_images.py Wed Feb 18 21:20:31 2015 -0600 @@ -20,6 +20,7 @@ from django.core.management.base import NoArgsCommand, CommandError from django.conf import settings +from lxml import etree import markdown.inlinepatterns from PIL import Image @@ -280,6 +281,21 @@ return IMAGE_LINK_RE.sub(replace_image_markup, text) +def html_check(html): + """Return True if the given HTML fragment has <img> tags with src attributes + that use http, and False otherwise. + """ + if not html: + return False + + root = etree.HTML(html) + for img in root.iter('img'): + src = img.get('src') + if src and src.lower().startswith('http:'): + return True + return False + + class Command(NoArgsCommand): help = "Rewrite forum posts and comments to not use http for images" option_list = NoArgsCommand.option_list + ( @@ -362,5 +378,11 @@ logger.debug("changed: %s", new_txt) setattr(model, text_attr, new_txt) model.save() + elif html_check(model.html): + # Check for content generated with older smiley code that used + # absolute URLs for the smiley images. If True, then just save + # the model again to force updated HTML to be created. + logger.info("Older Smiley HTML detected, forcing a save") + model.save() logger.info("ssl_images exiting")
--- a/core/tests/test_ssl_images.py Tue Feb 17 18:59:06 2015 -0600 +++ b/core/tests/test_ssl_images.py Wed Feb 18 21:20:31 2015 -0600 @@ -5,6 +5,7 @@ import mock +from core.management.commands.ssl_images import html_check from core.management.commands.ssl_images import process_post import core.management.commands.ssl_images @@ -265,3 +266,51 @@ self.assertEqual(expected, result) expected_args = [mock.call(urlparse(c)) for c in old_src] self.assertEqual(check_https_mock.call_args_list, expected_args) + + +class HtmlCheckTestCase(unittest.TestCase): + + def test_empty(self): + self.assertFalse(html_check('')) + + def test_no_images(self): + self.assertFalse(html_check('<p>Hi there!</p>')) + self.assertFalse(html_check('<p>Hi <b>there</b>!</p>')) + + def test_safe_image(self): + self.assertFalse(html_check('<img src="https://a.jpg" />')) + self.assertFalse(html_check('<img src="" alt="stuff" />')) + self.assertFalse(html_check('<img src="HTTPS://a.jpg" />')) + self.assertFalse(html_check(""" + <div> + <p>Look: <img src="https://a.jpg" alt="a" /></p> + <p>Look again: <img src="https://b.jpg" alt="b" /></p> + </div> + """)) + + def test_one_image(self): + self.assertTrue(html_check('<img src="http://a.jpg" alt="a" />')) + self.assertTrue(html_check( + '<p>Look: <img src="http://a.jpg" alt="a" /></p>')) + + def test_two_images(self): + self.assertTrue(html_check(""" + <p>Look: <img src="https://a.jpg" alt="a" /></p> + <p>Look again: <img src="http://b.jpg" alt="b" /></p> + """)) + self.assertTrue(html_check(""" + <p>Look: <img src="http://a.jpg" alt="a" /></p> + <p>Look again: <img src="http://b.jpg" alt="b" /></p> + """)) + self.assertTrue(html_check(""" + <div> + <p>Look: <img src="http://a.jpg" alt="a" /></p> + <p>Look again: <img src="http://b.jpg" alt="b" /></p> + </div> + """)) + self.assertTrue(html_check(""" + <div> + <p>Look: <img src="http://a.jpg" alt="a" /></p> + <p>Look again: <img src="https://b.jpg" alt="b" /></p> + </div> + """))