# HG changeset patch # User Brian Neal # Date 1441157620 18000 # Node ID 4619290d171dac447ef58cd983788c97c48e092b # Parent 10e7570a3aab6bfc8fdfd71826721ecb0400e2d2 Whitelist hot-linked image sources. diff -r 10e7570a3aab -r 4619290d171d comments/forms.py --- a/comments/forms.py Tue Aug 04 16:58:17 2015 -0500 +++ b/comments/forms.py Tue Sep 01 20:33:40 2015 -0500 @@ -7,6 +7,10 @@ from django.contrib.contenttypes.models import ContentType from comments.models import Comment +from core.html import ImageCheckError +from core.html import image_check +from core.markup import site_markup + COMMENT_MAX_LENGTH = getattr(settings, 'COMMENT_MAX_LENGTH', 3000) @@ -64,6 +68,18 @@ return new + def clean_comment(self): + comment = self.cleaned_data['comment'] + self.comment_html = None + if comment: + self.comment_html = site_markup(comment) + try: + image_check(self.comment_html) + except ImageCheckError as ex: + raise forms.ValidationError(str(ex)) + + return comment + class Media: css = { 'all': (settings.GPP_THIRD_PARTY_CSS['markitup'] + diff -r 10e7570a3aab -r 4619290d171d comments/models.py --- a/comments/models.py Tue Aug 04 16:58:17 2015 -0500 +++ b/comments/models.py Tue Sep 01 20:33:40 2015 -0500 @@ -57,7 +57,9 @@ if not self.id: self.creation_date = datetime.datetime.now() - self.html = site_markup(self.comment) + self.html = kwargs.pop('html', None) + if self.html is None: + self.html = site_markup(self.comment) super(Comment, self).save(*args, **kwargs) def get_absolute_url(self): diff -r 10e7570a3aab -r 4619290d171d comments/views.py --- a/comments/views.py Tue Aug 04 16:58:17 2015 -0500 +++ b/comments/views.py Tue Sep 01 20:33:40 2015 -0500 @@ -67,7 +67,12 @@ form = CommentForm(target, request.POST) if not form.is_valid(): - return HttpResponseBadRequest('Invalid comment; missing parameters?') + # The client side javascript is pretty simplistic right now and we don't + # want to change it yet. It is expecting a single error string. Just grab + # the first error message and use that. + errors = form.errors.as_data() + msg = errors.values()[0][0].message if errors else 'Unknown error' + return HttpResponseBadRequest(msg) comment = form.get_comment_object(request.user, request.META.get("REMOTE_ADDR", None)) @@ -76,7 +81,7 @@ if antispam.utils.spam_check(request, comment.comment): return HttpResponseForbidden(antispam.BUSTED_MESSAGE) - comment.save() + comment.save(html=form.comment_html) # return the rendered comment return render_to_response('comments/comment.html', { diff -r 10e7570a3aab -r 4619290d171d core/html.py --- a/core/html.py Tue Aug 04 16:58:17 2015 -0500 +++ b/core/html.py Tue Sep 01 20:33:40 2015 -0500 @@ -1,5 +1,10 @@ """Common HTML related functions""" +from urlparse import urlparse + import bleach +from lxml import etree + +from django.conf import settings # Each entry in the _CLEAN_PROFILES dict is a profile name -> 3-tuple pair. The @@ -47,3 +52,47 @@ return bleach.clean(text, tags=tags, attributes=attrs, styles=styles, strip=True, strip_comments=True) + + +class ImageCheckError(Exception): + """Exception for the image_check() function""" + + +ALLOWED_HOSTS = set(settings.USER_IMAGES_SOURCES) + + +def image_check(html, allowed_hosts=None): + """Returns true if all image tags in the given html come from hosts + specified in the allowed_hosts container using https. + + An ImageCheckError is raised if the following problems are detected: + * the image src is missing altogether + * the scheme is missing or not https + * the hostname is missing + * the hostname is not in allowed_hosts + + If allowed_hosts is not None, it will be used as the whitelist of allowed + hosts. If None, USER_IMAGES_SOURCES from settings will be used as the + whitelist. + """ + if not allowed_hosts: + allowed_hosts = ALLOWED_HOSTS + + root = etree.HTML(html) + for img in root.iter('img'): + src = img.get('src') + if not src: + raise ImageCheckError("Missing image source") + r = urlparse(src) + + if not r.scheme and not r.hostname: + # relative URL is ok + continue + if r.scheme != 'https': + raise ImageCheckError("Image must be accessed via https") + if not r.hostname: + raise ImageCheckError("Missing image hostname") + if r.hostname not in allowed_hosts: + raise ImageCheckError("Invalid image source") + + return True diff -r 10e7570a3aab -r 4619290d171d core/management/commands/ssl_images.py --- a/core/management/commands/ssl_images.py Tue Aug 04 16:58:17 2015 -0500 +++ b/core/management/commands/ssl_images.py Tue Sep 01 20:33:40 2015 -0500 @@ -39,6 +39,7 @@ re.DOTALL | re.UNICODE) SG101_HOSTS = set(['www.surfguitar101.com', 'surfguitar101.com']) +WHITELIST_HOSTS = set(settings.USER_IMAGES_SOURCES) MODEL_CHOICES = ['comments', 'posts'] PHOTO_MAX_SIZE = (660, 720) @@ -224,7 +225,10 @@ # Try a few things to get this on ssl: new_src = convert_to_ssl(r) elif r.scheme == 'https': - new_src = src # already https, accept it as-is + if r.hostname in WHITELIST_HOSTS: + new_src = src # already in whitelist + else: + new_src = convert_to_ssl(r) if new_src: if title: diff -r 10e7570a3aab -r 4619290d171d core/tests/test_ssl_images.py --- a/core/tests/test_ssl_images.py Tue Aug 04 16:58:17 2015 -0500 +++ b/core/tests/test_ssl_images.py Tue Sep 01 20:33:40 2015 -0500 @@ -4,6 +4,7 @@ from urlparse import urlparse import mock +from django.conf import settings from core.management.commands.ssl_images import html_check from core.management.commands.ssl_images import process_post @@ -14,6 +15,10 @@ SG101_RE = re.compile(r'http://(?:www\.)?surfguitar101.com/', re.I) + def setUp(self): + self.assertTrue(len(settings.USER_IMAGES_SOURCES) > 0) + self.safe_host = settings.USER_IMAGES_SOURCES[0] + def tearDown(self): core.management.commands.ssl_images.url_cache = {} @@ -50,9 +55,9 @@ def test_https_already(self): test_str = """An image that is already using https: - ![flyer](https://example.com/zzz.png) + ![flyer](https://{}/zzz.png) It's cool. - """ + """.format(self.safe_host) result = process_post(test_str) self.assertEqual(test_str, result) @@ -70,19 +75,19 @@ def test_multiple_non_http(self): test_str = """An image: ![image](http://www.surfguitar101.com/img.jpg) - And another: ![pic](HTTPS://example.com/foo/bar/img.png). - More stuff here.""" + And another: ![pic](HTTPS://{}/foo/bar/img.png). + More stuff here.""".format(self.safe_host) expected = """An image: ![image](/img.jpg) - And another: ![pic](HTTPS://example.com/foo/bar/img.png). - More stuff here.""" + And another: ![pic](HTTPS://{}/foo/bar/img.png). + More stuff here.""".format(self.safe_host) result = process_post(test_str) self.assertEqual(expected, result) def test_https_already_with_title(self): test_str = """An image that is already using https: - ![flyer](https://example.com/zzz.png "the title") + ![flyer](https://{}/zzz.png "the title") It's cool. - """ + """.format(self.safe_host) result = process_post(test_str) self.assertEqual(test_str, result) @@ -112,13 +117,13 @@ def test_https_already_brackets(self): test_str = """An image that is already using https: - ![flyer]() + ![flyer]() It's cool. - """ + """.format(self.safe_host) expected = """An image that is already using https: - ![flyer](https://example.com/zzz.png) + ![flyer](https://{}/zzz.png) It's cool. - """ + """.format(self.safe_host) result = process_post(test_str) self.assertEqual(expected, result) @@ -172,13 +177,13 @@ def test_multiple_replacement_2(self, upload_mock): old_src = [ 'http://example.com/images/my_image.jpg', - 'https://example.com/static/wow.gif', + 'https://{}/static/wow.gif'.format(self.safe_host), 'http://www.surfguitar101.com/media/a/b/c/pic.png', 'http://surfguitar101.com/media/a/b/c/pic2.png', ] new_src = [ 'https://cloud.com/some/path/012345.jpg', - 'https://example.com/static/wow.gif', + 'https://{}/static/wow.gif'.format(self.safe_host), '/media/a/b/c/pic.png', '/media/a/b/c/pic2.png', ] diff -r 10e7570a3aab -r 4619290d171d forums/forms.py --- a/forums/forms.py Tue Aug 04 16:58:17 2015 -0500 +++ b/forums/forms.py Tue Sep 01 20:33:40 2015 -0500 @@ -13,6 +13,9 @@ from forums.attachments import AttachmentProcessor import forums.permissions as perms from forums.signals import notify_new_topic, notify_new_post +from core.html import ImageCheckError +from core.html import image_check +from core.markup import site_markup FORUMS_FORM_CSS = { @@ -44,17 +47,26 @@ self.attach_proc = AttachmentProcessor(attachments) def clean_body(self): - data = self.cleaned_data['body'] - if not data and not self.attach_proc.has_attachments(): - raise forms.ValidationError("This field is required.") - return data + body = self.cleaned_data['body'] + self.body_html = None + if not body and not self.attach_proc.has_attachments(): + raise forms.ValidationError("Please enter some text") + + if body: + self.body_html = site_markup(body) + try: + image_check(self.body_html) + except ImageCheckError as ex: + raise forms.ValidationError(str(ex)) + + return body def clean_topic_id(self): id = self.cleaned_data['topic_id'] try: self.topic = Topic.objects.select_related().get(pk=id) except Topic.DoesNotExist: - raise forms.ValidationError('invalid topic') + raise forms.ValidationError('Invalid topic') return id def save(self, user, ip=None): @@ -63,7 +75,7 @@ """ post = Post(topic=self.topic, user=user, body=self.cleaned_data['body'], user_ip=ip) - post.save() + post.save(html=self.body_html) self.attach_proc.save_attachments(post) notify_new_post(post) return post @@ -112,10 +124,19 @@ choices=[(v, v) for v in pks]) def clean_body(self): - data = self.cleaned_data['body'] - if not data and not self.attach_proc.has_attachments(): + body = self.cleaned_data['body'] + self.body_html = None + if not body and not self.attach_proc.has_attachments(): raise forms.ValidationError("This field is required.") - return data + + if body: + self.body_html = site_markup(body) + try: + image_check(self.body_html) + except ImageCheckError as ex: + raise forms.ValidationError(str(ex)) + + return body def save(self, ip=None): """ @@ -133,7 +154,7 @@ user=self.user, body=self.cleaned_data['body'], user_ip=ip) - post.save() + post.save(html=self.body_html) self.attach_proc.save_attachments(post) @@ -189,10 +210,19 @@ widget=forms.HiddenInput(attrs={'value': post.id})) def clean_body(self): - data = self.cleaned_data['body'] - if not data and not self.attach_proc.has_attachments(): + body = self.cleaned_data['body'] + self.body_html = None + if not body and not self.attach_proc.has_attachments(): raise forms.ValidationError('This field is required.') - return data + + if body: + self.body_html = site_markup(body) + try: + image_check(self.body_html) + except ImageCheckError as ex: + raise forms.ValidationError(str(ex)) + + return body def save(self, *args, **kwargs): commit = kwargs.get('commit', False) diff -r 10e7570a3aab -r 4619290d171d forums/models.py --- a/forums/models.py Tue Aug 04 16:58:17 2015 -0500 +++ b/forums/models.py Tue Sep 01 20:33:40 2015 -0500 @@ -311,7 +311,9 @@ self.creation_date = datetime.datetime.now() self.update_date = self.creation_date - self.html = site_markup(self.body) + self.html = kwargs.pop('html', None) + if self.html is None: + self.html = site_markup(self.body) super(Post, self).save(*args, **kwargs) def delete(self, *args, **kwargs): diff -r 10e7570a3aab -r 4619290d171d forums/views/main.py --- a/forums/views/main.py Tue Aug 04 16:58:17 2015 -0500 +++ b/forums/views/main.py Tue Sep 01 20:33:40 2015 -0500 @@ -359,7 +359,12 @@ }, context_instance=RequestContext(request)) - return HttpResponseBadRequest("Oops, did you forget some text?"); + # The client side javascript is pretty simplistic right now and we don't + # want to change it yet. It is expecting a single error string. Just grab + # the first error message and use that. + errors = form.errors.as_data() + msg = errors.values()[0][0].message if errors else 'Unknown error' + return HttpResponseBadRequest(msg) def _goto_post(post): @@ -439,7 +444,7 @@ return HttpResponseRedirect(reverse('antispam-suspended')) post = form.save(commit=False) post.touch() - post.save() + post.save(html=form.body_html) notify_updated_post(post) # if we are editing a first post, save the parent topic as well @@ -589,7 +594,7 @@ post.topic = topic post.user = request.user post.user_ip = request.META.get("REMOTE_ADDR", "") - post.save() + post.save(html=form.body_html) notify_new_post(post) # Save any attachments diff -r 10e7570a3aab -r 4619290d171d sg101/settings/base.py --- a/sg101/settings/base.py Tue Aug 04 16:58:17 2015 -0500 +++ b/sg101/settings/base.py Tue Sep 01 20:33:40 2015 -0500 @@ -296,6 +296,14 @@ USER_PHOTOS_THUMB_SIZE = (120, 120) USER_PHOTOS_RATE_LIMIT = (50, 86400) # number / seconds +USER_IMAGES_SOURCES = [ + 'dl.dropboxusercontent.com', + 's3.amazonaws.com', + 's3-us-west-1.amazonaws.com', + 'scontent-ord1-1.xx.fbcdn.net', + 'lh3.googleusercontent.com', +] + # If this flag is False, the queued_search queue will not be processed. This is # useful when we are rebuilding the search index. SEARCH_QUEUE_ENABLED = True