# HG changeset patch # User Brian Neal # Date 1441245044 18000 # Node ID 734a4a350bd5ce067ced01b15e2b40a250f76147 # Parent 51a2051588f5f7e31e5bb6bb9a25a3f12e985381# Parent 2aaf9fa5867bb4512e92ce2fa7e78d57bfe39249 Merge with upstream. diff -r 2aaf9fa5867b -r 734a4a350bd5 comments/forms.py --- a/comments/forms.py Mon Aug 31 17:08:12 2015 -0500 +++ b/comments/forms.py Wed Sep 02 20:50:44 2015 -0500 @@ -7,6 +7,10 @@ from django.contrib.contenttypes.models import ContentType from comments.models import Comment +from core.html import ImageCheckError +from core.html import image_check +from core.markup import site_markup + COMMENT_MAX_LENGTH = getattr(settings, 'COMMENT_MAX_LENGTH', 3000) @@ -64,6 +68,18 @@ return new + def clean_comment(self): + comment = self.cleaned_data['comment'] + self.comment_html = None + if comment: + self.comment_html = site_markup(comment) + try: + image_check(self.comment_html) + except ImageCheckError as ex: + raise forms.ValidationError(str(ex)) + + return comment + class Media: css = { 'all': (settings.GPP_THIRD_PARTY_CSS['markitup'] + diff -r 2aaf9fa5867b -r 734a4a350bd5 comments/models.py --- a/comments/models.py Mon Aug 31 17:08:12 2015 -0500 +++ b/comments/models.py Wed Sep 02 20:50:44 2015 -0500 @@ -57,7 +57,9 @@ if not self.id: self.creation_date = datetime.datetime.now() - self.html = site_markup(self.comment) + self.html = kwargs.pop('html', None) + if self.html is None: + self.html = site_markup(self.comment) super(Comment, self).save(*args, **kwargs) def get_absolute_url(self): diff -r 2aaf9fa5867b -r 734a4a350bd5 comments/views.py --- a/comments/views.py Mon Aug 31 17:08:12 2015 -0500 +++ b/comments/views.py Wed Sep 02 20:50:44 2015 -0500 @@ -67,7 +67,12 @@ form = CommentForm(target, request.POST) if not form.is_valid(): - return HttpResponseBadRequest('Invalid comment; missing parameters?') + # The client side javascript is pretty simplistic right now and we don't + # want to change it yet. It is expecting a single error string. Just grab + # the first error message and use that. + errors = form.errors.as_data() + msg = errors.values()[0][0].message if errors else 'Unknown error' + return HttpResponseBadRequest(msg) comment = form.get_comment_object(request.user, request.META.get("REMOTE_ADDR", None)) @@ -76,7 +81,7 @@ if antispam.utils.spam_check(request, comment.comment): return HttpResponseForbidden(antispam.BUSTED_MESSAGE) - comment.save() + comment.save(html=form.comment_html) # return the rendered comment return render_to_response('comments/comment.html', { diff -r 2aaf9fa5867b -r 734a4a350bd5 core/functions.py --- a/core/functions.py Mon Aug 31 17:08:12 2015 -0500 +++ b/core/functions.py Wed Sep 02 20:50:44 2015 -0500 @@ -1,9 +1,9 @@ """This file houses various core utility functions""" -from contextlib import contextmanager import datetime import logging import os import re +import tempfile from django.contrib.sites.models import Site from django.conf import settings @@ -12,15 +12,23 @@ import core.tasks -@contextmanager -def temp_open(path, mode): - """A context manager for closing and removing temporary files.""" - fp = open(path, mode) - try: - yield fp - finally: - fp.close() - os.remove(path) +class TemporaryFile(object): + """Context manager class for working with temporary files. + + The file will be closed and removed when the context is exited. + """ + def __init__(self, **kwargs): + """kwargs will be passed to mkstemp.""" + self.kwargs = kwargs + + def __enter__(self): + self.fd, self.filename = tempfile.mkstemp(**self.kwargs) + self.file = os.fdopen(self.fd, 'w+b') + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.file.close() + os.remove(self.filename) def send_mail(subject, message, from_email, recipient_list, reply_to=None, defer=True): diff -r 2aaf9fa5867b -r 734a4a350bd5 core/html.py --- a/core/html.py Mon Aug 31 17:08:12 2015 -0500 +++ b/core/html.py Wed Sep 02 20:50:44 2015 -0500 @@ -1,5 +1,10 @@ """Common HTML related functions""" +from urlparse import urlparse + import bleach +from lxml import etree + +from django.conf import settings # Each entry in the _CLEAN_PROFILES dict is a profile name -> 3-tuple pair. The @@ -47,3 +52,47 @@ return bleach.clean(text, tags=tags, attributes=attrs, styles=styles, strip=True, strip_comments=True) + + +class ImageCheckError(Exception): + """Exception for the image_check() function""" + + +ALLOWED_HOSTS = set(settings.USER_IMAGES_SOURCES) + + +def image_check(html, allowed_hosts=None): + """Returns true if all image tags in the given html come from hosts + specified in the allowed_hosts container using https. + + An ImageCheckError is raised if the following problems are detected: + * the image src is missing altogether + * the scheme is missing or not https + * the hostname is missing + * the hostname is not in allowed_hosts + + If allowed_hosts is not None, it will be used as the whitelist of allowed + hosts. If None, USER_IMAGES_SOURCES from settings will be used as the + whitelist. + """ + if not allowed_hosts: + allowed_hosts = ALLOWED_HOSTS + + root = etree.HTML(html) + for img in root.iter('img'): + src = img.get('src') + if not src: + raise ImageCheckError("Missing image source") + r = urlparse(src) + + if not r.scheme and not r.hostname: + # relative URL is ok + continue + if r.scheme != 'https': + raise ImageCheckError("Image must be accessed via https") + if not r.hostname: + raise ImageCheckError("Missing image hostname") + if r.hostname not in allowed_hosts: + raise ImageCheckError("Invalid image source") + + return True diff -r 2aaf9fa5867b -r 734a4a350bd5 core/image_uploader.py --- a/core/image_uploader.py Mon Aug 31 17:08:12 2015 -0500 +++ b/core/image_uploader.py Wed Sep 02 20:50:44 2015 -0500 @@ -7,12 +7,10 @@ import logging from io import BytesIO import os.path -import tempfile import uuid from PIL import Image -from core.functions import temp_open from core.image import orient_image @@ -24,17 +22,16 @@ return b64encode(uuid.uuid4().bytes, '-_').rstrip('=') -def upload(fp, bucket, metadata=None, new_size=None, thumb_size=None): +def upload(filename, bucket, metadata=None, new_size=None, thumb_size=None): """Upload an image file to a given S3Bucket. The image can optionally be resized and a thumbnail can be generated and uploaded as well. Parameters: - fp - The image file to process. This is expected to be an instance of - Django's UploadedFile class. The file must have a name attribute and - we expect the name to have an extension. This is extension is - used for the uploaded image and thumbnail names. + filename - The path to the file to process. The filename should have an + extension, and this is used for the uploaded image & thumbnail + names. bucket - A core.s3.S3Bucket instance to upload to. metadata - If not None, must be a dictionary of metadata to apply to the uploaded file and thumbnail. @@ -51,52 +48,38 @@ a thumbnail was not requested. """ - filename = fp.name logger.info('Processing image file: %s', filename) - # Trying to use PIL (or Pillow) on a Django UploadedFile is often - # problematic because the file is often an in-memory file if it is under - # a certain size. This complicates matters and many of the operations we try - # to perform on it fail if this is the case. To get around these issues, - # we make a copy of the file on the file system and operate on the copy. - # First generate a unique name and temporary file path. unique_key = make_key() - ext = os.path.splitext(fp.name)[1] - temp_name = os.path.join(tempfile.gettempdir(), unique_key + ext) + ext = os.path.splitext(filename)[1] - # Write the UploadedFile to a temporary file on disk - with temp_open(temp_name, 'wb') as temp_file: - for chunk in fp.chunks(): - temp_file.write(chunk) - temp_file.close() + # Re-orient if necessary + image = Image.open(filename) + changed, image = orient_image(image) + if changed: + image.save(filename) - # Re-orient if necessary - image = Image.open(temp_name) - changed, image = orient_image(image) - if changed: - image.save(temp_name) + # Resize image if necessary + if new_size: + image = Image.open(filename) + if image.size > new_size: + logger.debug('Resizing from {} to {}'.format(image.size, new_size)) + image.thumbnail(new_size, Image.ANTIALIAS) + image.save(filename) - # Resize image if necessary - if new_size: - image = Image.open(temp_name) - if image.size > new_size: - logger.debug('Resizing from {} to {}'.format(image.size, new_size)) - image.thumbnail(new_size, Image.ANTIALIAS) - image.save(temp_name) + # Create thumbnail if necessary + thumb = None + if thumb_size: + logger.debug('Creating thumbnail {}'.format(thumb_size)) + image = Image.open(filename) + image.thumbnail(thumb_size, Image.ANTIALIAS) + thumb = BytesIO() + image.save(thumb, format=image.format) - # Create thumbnail if necessary - thumb = None - if thumb_size: - logger.debug('Creating thumbnail {}'.format(thumb_size)) - image = Image.open(temp_name) - image.thumbnail(thumb_size, Image.ANTIALIAS) - thumb = BytesIO() - image.save(thumb, format=image.format) - - # Upload images to S3 - file_key = unique_key + ext - logging.debug('Uploading image') - image_url = bucket.upload_from_filename(file_key, temp_name, metadata) + # Upload images to S3 + file_key = unique_key + ext + logging.debug('Uploading image') + image_url = bucket.upload_from_filename(file_key, filename, metadata) thumb_url = None if thumb: diff -r 2aaf9fa5867b -r 734a4a350bd5 core/management/commands/ssl_images.py --- a/core/management/commands/ssl_images.py Mon Aug 31 17:08:12 2015 -0500 +++ b/core/management/commands/ssl_images.py Wed Sep 02 20:50:44 2015 -0500 @@ -39,6 +39,7 @@ re.DOTALL | re.UNICODE) SG101_HOSTS = set(['www.surfguitar101.com', 'surfguitar101.com']) +WHITELIST_HOSTS = set(settings.USER_IMAGES_SOURCES) MODEL_CHOICES = ['comments', 'posts'] PHOTO_MAX_SIZE = (660, 720) @@ -224,7 +225,10 @@ # Try a few things to get this on ssl: new_src = convert_to_ssl(r) elif r.scheme == 'https': - new_src = src # already https, accept it as-is + if r.hostname in WHITELIST_HOSTS: + new_src = src # already in whitelist + else: + new_src = convert_to_ssl(r) if new_src: if title: diff -r 2aaf9fa5867b -r 734a4a350bd5 core/tests/test_ssl_images.py --- a/core/tests/test_ssl_images.py Mon Aug 31 17:08:12 2015 -0500 +++ b/core/tests/test_ssl_images.py Wed Sep 02 20:50:44 2015 -0500 @@ -4,6 +4,7 @@ from urlparse import urlparse import mock +from django.conf import settings from core.management.commands.ssl_images import html_check from core.management.commands.ssl_images import process_post @@ -14,6 +15,10 @@ SG101_RE = re.compile(r'http://(?:www\.)?surfguitar101.com/', re.I) + def setUp(self): + self.assertTrue(len(settings.USER_IMAGES_SOURCES) > 0) + self.safe_host = settings.USER_IMAGES_SOURCES[0] + def tearDown(self): core.management.commands.ssl_images.url_cache = {} @@ -50,9 +55,9 @@ def test_https_already(self): test_str = """An image that is already using https: - ![flyer](https://example.com/zzz.png) + ![flyer](https://{}/zzz.png) It's cool. - """ + """.format(self.safe_host) result = process_post(test_str) self.assertEqual(test_str, result) @@ -70,19 +75,19 @@ def test_multiple_non_http(self): test_str = """An image: ![image](http://www.surfguitar101.com/img.jpg) - And another: ![pic](HTTPS://example.com/foo/bar/img.png). - More stuff here.""" + And another: ![pic](HTTPS://{}/foo/bar/img.png). + More stuff here.""".format(self.safe_host) expected = """An image: ![image](/img.jpg) - And another: ![pic](HTTPS://example.com/foo/bar/img.png). - More stuff here.""" + And another: ![pic](HTTPS://{}/foo/bar/img.png). + More stuff here.""".format(self.safe_host) result = process_post(test_str) self.assertEqual(expected, result) def test_https_already_with_title(self): test_str = """An image that is already using https: - ![flyer](https://example.com/zzz.png "the title") + ![flyer](https://{}/zzz.png "the title") It's cool. - """ + """.format(self.safe_host) result = process_post(test_str) self.assertEqual(test_str, result) @@ -112,13 +117,13 @@ def test_https_already_brackets(self): test_str = """An image that is already using https: - ![flyer]() + ![flyer]() It's cool. - """ + """.format(self.safe_host) expected = """An image that is already using https: - ![flyer](https://example.com/zzz.png) + ![flyer](https://{}/zzz.png) It's cool. - """ + """.format(self.safe_host) result = process_post(test_str) self.assertEqual(expected, result) @@ -172,13 +177,13 @@ def test_multiple_replacement_2(self, upload_mock): old_src = [ 'http://example.com/images/my_image.jpg', - 'https://example.com/static/wow.gif', + 'https://{}/static/wow.gif'.format(self.safe_host), 'http://www.surfguitar101.com/media/a/b/c/pic.png', 'http://surfguitar101.com/media/a/b/c/pic2.png', ] new_src = [ 'https://cloud.com/some/path/012345.jpg', - 'https://example.com/static/wow.gif', + 'https://{}/static/wow.gif'.format(self.safe_host), '/media/a/b/c/pic.png', '/media/a/b/c/pic2.png', ] diff -r 2aaf9fa5867b -r 734a4a350bd5 forums/forms.py --- a/forums/forms.py Mon Aug 31 17:08:12 2015 -0500 +++ b/forums/forms.py Wed Sep 02 20:50:44 2015 -0500 @@ -13,6 +13,9 @@ from forums.attachments import AttachmentProcessor import forums.permissions as perms from forums.signals import notify_new_topic, notify_new_post +from core.html import ImageCheckError +from core.html import image_check +from core.markup import site_markup FORUMS_FORM_CSS = { @@ -44,17 +47,26 @@ self.attach_proc = AttachmentProcessor(attachments) def clean_body(self): - data = self.cleaned_data['body'] - if not data and not self.attach_proc.has_attachments(): - raise forms.ValidationError("This field is required.") - return data + body = self.cleaned_data['body'] + self.body_html = None + if not body and not self.attach_proc.has_attachments(): + raise forms.ValidationError("Please enter some text") + + if body: + self.body_html = site_markup(body) + try: + image_check(self.body_html) + except ImageCheckError as ex: + raise forms.ValidationError(str(ex)) + + return body def clean_topic_id(self): id = self.cleaned_data['topic_id'] try: self.topic = Topic.objects.select_related().get(pk=id) except Topic.DoesNotExist: - raise forms.ValidationError('invalid topic') + raise forms.ValidationError('Invalid topic') return id def save(self, user, ip=None): @@ -63,7 +75,7 @@ """ post = Post(topic=self.topic, user=user, body=self.cleaned_data['body'], user_ip=ip) - post.save() + post.save(html=self.body_html) self.attach_proc.save_attachments(post) notify_new_post(post) return post @@ -112,10 +124,19 @@ choices=[(v, v) for v in pks]) def clean_body(self): - data = self.cleaned_data['body'] - if not data and not self.attach_proc.has_attachments(): + body = self.cleaned_data['body'] + self.body_html = None + if not body and not self.attach_proc.has_attachments(): raise forms.ValidationError("This field is required.") - return data + + if body: + self.body_html = site_markup(body) + try: + image_check(self.body_html) + except ImageCheckError as ex: + raise forms.ValidationError(str(ex)) + + return body def save(self, ip=None): """ @@ -133,7 +154,7 @@ user=self.user, body=self.cleaned_data['body'], user_ip=ip) - post.save() + post.save(html=self.body_html) self.attach_proc.save_attachments(post) @@ -189,10 +210,19 @@ widget=forms.HiddenInput(attrs={'value': post.id})) def clean_body(self): - data = self.cleaned_data['body'] - if not data and not self.attach_proc.has_attachments(): + body = self.cleaned_data['body'] + self.body_html = None + if not body and not self.attach_proc.has_attachments(): raise forms.ValidationError('This field is required.') - return data + + if body: + self.body_html = site_markup(body) + try: + image_check(self.body_html) + except ImageCheckError as ex: + raise forms.ValidationError(str(ex)) + + return body def save(self, *args, **kwargs): commit = kwargs.get('commit', False) diff -r 2aaf9fa5867b -r 734a4a350bd5 forums/models.py --- a/forums/models.py Mon Aug 31 17:08:12 2015 -0500 +++ b/forums/models.py Wed Sep 02 20:50:44 2015 -0500 @@ -311,7 +311,9 @@ self.creation_date = datetime.datetime.now() self.update_date = self.creation_date - self.html = site_markup(self.body) + self.html = kwargs.pop('html', None) + if self.html is None: + self.html = site_markup(self.body) super(Post, self).save(*args, **kwargs) def delete(self, *args, **kwargs): diff -r 2aaf9fa5867b -r 734a4a350bd5 forums/views/main.py --- a/forums/views/main.py Mon Aug 31 17:08:12 2015 -0500 +++ b/forums/views/main.py Wed Sep 02 20:50:44 2015 -0500 @@ -359,7 +359,12 @@ }, context_instance=RequestContext(request)) - return HttpResponseBadRequest("Oops, did you forget some text?"); + # The client side javascript is pretty simplistic right now and we don't + # want to change it yet. It is expecting a single error string. Just grab + # the first error message and use that. + errors = form.errors.as_data() + msg = errors.values()[0][0].message if errors else 'Unknown error' + return HttpResponseBadRequest(msg) def _goto_post(post): @@ -439,7 +444,7 @@ return HttpResponseRedirect(reverse('antispam-suspended')) post = form.save(commit=False) post.touch() - post.save() + post.save(html=form.body_html) notify_updated_post(post) # if we are editing a first post, save the parent topic as well @@ -589,7 +594,7 @@ post.topic = topic post.user = request.user post.user_ip = request.META.get("REMOTE_ADDR", "") - post.save() + post.save(html=form.body_html) notify_new_post(post) # Save any attachments diff -r 2aaf9fa5867b -r 734a4a350bd5 sg101/settings/base.py --- a/sg101/settings/base.py Mon Aug 31 17:08:12 2015 -0500 +++ b/sg101/settings/base.py Wed Sep 02 20:50:44 2015 -0500 @@ -296,6 +296,14 @@ USER_PHOTOS_THUMB_SIZE = (120, 120) USER_PHOTOS_RATE_LIMIT = (50, 86400) # number / seconds +USER_IMAGES_SOURCES = [ + 'dl.dropboxusercontent.com', + 's3.amazonaws.com', + 's3-us-west-1.amazonaws.com', + 'scontent-ord1-1.xx.fbcdn.net', + 'lh3.googleusercontent.com', +] + # If this flag is False, the queued_search queue will not be processed. This is # useful when we are rebuilding the search index. SEARCH_QUEUE_ENABLED = True diff -r 2aaf9fa5867b -r 734a4a350bd5 user_photos/forms.py --- a/user_photos/forms.py Mon Aug 31 17:08:12 2015 -0500 +++ b/user_photos/forms.py Wed Sep 02 20:50:44 2015 -0500 @@ -1,12 +1,14 @@ """Forms for the user_photos application.""" import datetime import hashlib +import os.path from django import forms from django.conf import settings from core.s3 import S3Bucket from core.image_uploader import upload +from core.functions import TemporaryFile from core.services import get_redis_connection from user_photos.models import Photo @@ -70,14 +72,29 @@ base_url=settings.USER_PHOTOS_BASE_URL, bucket_name=settings.USER_PHOTOS_BUCKET) - now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') - metadata = {'user': self.user.username, 'date': now} + # Trying to use PIL (or Pillow) on a Django UploadedFile is often + # problematic because the file is often an in-memory file if it is under + # a certain size. This complicates matters and many of the operations we try + # to perform on it fail if this is the case. To get around these issues, + # we make a copy of the file on the file system and operate on the copy. - url, thumb_url = upload(fp=self.cleaned_data['image_file'], - bucket=bucket, - metadata=metadata, - new_size=settings.USER_PHOTOS_MAX_SIZE, - thumb_size=settings.USER_PHOTOS_THUMB_SIZE) + fp = self.cleaned_data['image_file'] + ext = os.path.splitext(fp.name)[1] + + # Write the UploadedFile to a temporary file on disk + with TemporaryFile(suffix=ext) as t: + for chunk in fp.chunks(): + t.file.write(chunk) + t.file.close() + + now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + metadata = {'user': self.user.username, 'date': now} + + url, thumb_url = upload(filename=t.filename, + bucket=bucket, + metadata=metadata, + new_size=settings.USER_PHOTOS_MAX_SIZE, + thumb_size=settings.USER_PHOTOS_THUMB_SIZE) photo = Photo(user=self.user, url=url, thumb_url=thumb_url, signature=signature)