changeset 970:bd594bcba5eb

Merge with upstream.
author Brian Neal <bgneal@gmail.com>
date Sun, 13 Sep 2015 14:51:33 -0500
parents 734a4a350bd5 (diff) 69c72ed469d3 (current diff)
children 4f265f61874b
files
diffstat 13 files changed, 232 insertions(+), 98 deletions(-) [+]
line wrap: on
line diff
--- a/comments/forms.py	Tue Sep 08 19:13:24 2015 -0500
+++ b/comments/forms.py	Sun Sep 13 14:51:33 2015 -0500
@@ -7,6 +7,10 @@
 from django.contrib.contenttypes.models import ContentType
 
 from comments.models import Comment
+from core.html import ImageCheckError
+from core.html import image_check
+from core.markup import site_markup
+
 
 COMMENT_MAX_LENGTH = getattr(settings, 'COMMENT_MAX_LENGTH', 3000)
 
@@ -64,6 +68,18 @@
 
         return new
 
+    def clean_comment(self):
+        comment = self.cleaned_data['comment']
+        self.comment_html = None
+        if comment:
+            self.comment_html = site_markup(comment)
+            try:
+                image_check(self.comment_html)
+            except ImageCheckError as ex:
+                raise forms.ValidationError(str(ex))
+
+        return comment
+
     class Media:
         css = {
             'all': (settings.GPP_THIRD_PARTY_CSS['markitup'] +
--- a/comments/models.py	Tue Sep 08 19:13:24 2015 -0500
+++ b/comments/models.py	Sun Sep 13 14:51:33 2015 -0500
@@ -57,7 +57,9 @@
         if not self.id:
             self.creation_date = datetime.datetime.now()
 
-        self.html = site_markup(self.comment)
+        self.html = kwargs.pop('html', None)
+        if self.html is None:
+            self.html = site_markup(self.comment)
         super(Comment, self).save(*args, **kwargs)
 
     def get_absolute_url(self):
--- a/comments/views.py	Tue Sep 08 19:13:24 2015 -0500
+++ b/comments/views.py	Sun Sep 13 14:51:33 2015 -0500
@@ -67,7 +67,12 @@
 
     form = CommentForm(target, request.POST)
     if not form.is_valid():
-        return HttpResponseBadRequest('Invalid comment; missing parameters?')
+        # The client side javascript is pretty simplistic right now and we don't
+        # want to change it yet. It is expecting a single error string. Just grab
+        # the first error message and use that.
+        errors = form.errors.as_data()
+        msg = errors.values()[0][0].message if errors else 'Unknown error'
+        return HttpResponseBadRequest(msg)
 
     comment = form.get_comment_object(request.user, request.META.get("REMOTE_ADDR", None))
 
@@ -76,7 +81,7 @@
     if antispam.utils.spam_check(request, comment.comment):
         return HttpResponseForbidden(antispam.BUSTED_MESSAGE)
 
-    comment.save()
+    comment.save(html=form.comment_html)
 
     # return the rendered comment
     return render_to_response('comments/comment.html', {
--- a/core/functions.py	Tue Sep 08 19:13:24 2015 -0500
+++ b/core/functions.py	Sun Sep 13 14:51:33 2015 -0500
@@ -1,9 +1,9 @@
 """This file houses various core utility functions"""
-from contextlib import contextmanager
 import datetime
 import logging
 import os
 import re
+import tempfile
 
 from django.contrib.sites.models import Site
 from django.conf import settings
@@ -12,15 +12,23 @@
 import core.tasks
 
 
-@contextmanager
-def temp_open(path, mode):
-    """A context manager for closing and removing temporary files."""
-    fp = open(path, mode)
-    try:
-        yield fp
-    finally:
-        fp.close()
-        os.remove(path)
+class TemporaryFile(object):
+    """Context manager class for working with temporary files.
+
+    The file will be closed and removed when the context is exited.
+    """
+    def __init__(self, **kwargs):
+        """kwargs will be passed to mkstemp."""
+        self.kwargs = kwargs
+
+    def __enter__(self):
+        self.fd, self.filename = tempfile.mkstemp(**self.kwargs)
+        self.file = os.fdopen(self.fd, 'w+b')
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.file.close()
+        os.remove(self.filename)
 
 
 def send_mail(subject, message, from_email, recipient_list, reply_to=None, defer=True):
--- a/core/html.py	Tue Sep 08 19:13:24 2015 -0500
+++ b/core/html.py	Sun Sep 13 14:51:33 2015 -0500
@@ -1,5 +1,10 @@
 """Common HTML related functions"""
+from urlparse import urlparse
+
 import bleach
+from lxml import etree
+
+from django.conf import settings
 
 
 # Each entry in the _CLEAN_PROFILES dict is a profile name -> 3-tuple pair. The
@@ -47,3 +52,47 @@
 
     return bleach.clean(text, tags=tags, attributes=attrs, styles=styles,
         strip=True, strip_comments=True)
+
+
+class ImageCheckError(Exception):
+    """Exception for the image_check() function"""
+
+
+ALLOWED_HOSTS = set(settings.USER_IMAGES_SOURCES)
+
+
+def image_check(html, allowed_hosts=None):
+    """Returns true if all image tags in the given html come from hosts
+    specified in the allowed_hosts container using https.
+
+    An ImageCheckError is raised if the following problems are detected:
+        * the image src is missing altogether
+        * the scheme is missing or not https
+        * the hostname is missing
+        * the hostname is not in allowed_hosts
+
+    If allowed_hosts is not None, it will be used as the whitelist of allowed
+    hosts. If None, USER_IMAGES_SOURCES from settings will be used as the
+    whitelist.
+    """
+    if not allowed_hosts:
+        allowed_hosts = ALLOWED_HOSTS
+
+    root = etree.HTML(html)
+    for img in root.iter('img'):
+        src = img.get('src')
+        if not src:
+            raise ImageCheckError("Missing image source")
+        r = urlparse(src)
+
+        if not r.scheme and not r.hostname:
+            # relative URL is ok
+            continue
+        if r.scheme != 'https':
+            raise ImageCheckError("Image must be accessed via https")
+        if not r.hostname:
+            raise ImageCheckError("Missing image hostname")
+        if r.hostname not in allowed_hosts:
+            raise ImageCheckError("Invalid image source")
+
+    return True
--- a/core/image_uploader.py	Tue Sep 08 19:13:24 2015 -0500
+++ b/core/image_uploader.py	Sun Sep 13 14:51:33 2015 -0500
@@ -7,12 +7,10 @@
 import logging
 from io import BytesIO
 import os.path
-import tempfile
 import uuid
 
 from PIL import Image
 
-from core.functions import temp_open
 from core.image import orient_image
 
 
@@ -24,17 +22,16 @@
     return b64encode(uuid.uuid4().bytes, '-_').rstrip('=')
 
 
-def upload(fp, bucket, metadata=None, new_size=None, thumb_size=None):
+def upload(filename, bucket, metadata=None, new_size=None, thumb_size=None):
     """Upload an image file to a given S3Bucket.
 
     The image can optionally be resized and a thumbnail can be generated and
     uploaded as well.
 
     Parameters:
-        fp - The image file to process. This is expected to be an instance of
-             Django's UploadedFile class. The file must have a name attribute and
-             we expect the name to have an extension. This is extension is
-             used for the uploaded image and thumbnail names.
+        filename - The path to the file to process. The filename should have an
+                   extension, and this is used for the uploaded image & thumbnail
+                   names.
         bucket - A core.s3.S3Bucket instance to upload to.
         metadata - If not None, must be a dictionary of metadata to apply to the
                    uploaded file and thumbnail.
@@ -51,52 +48,38 @@
     a thumbnail was not requested.
     """
 
-    filename = fp.name
     logger.info('Processing image file: %s', filename)
 
-    # Trying to use PIL (or Pillow) on a Django UploadedFile is often
-    # problematic because the file is often an in-memory file if it is under
-    # a certain size. This complicates matters and many of the operations we try
-    # to perform on it fail if this is the case. To get around these issues,
-    # we make a copy of the file on the file system and operate on the copy.
-    # First generate a unique name and temporary file path.
     unique_key = make_key()
-    ext = os.path.splitext(fp.name)[1]
-    temp_name = os.path.join(tempfile.gettempdir(), unique_key + ext)
+    ext = os.path.splitext(filename)[1]
 
-    # Write the UploadedFile to a temporary file on disk
-    with temp_open(temp_name, 'wb') as temp_file:
-        for chunk in fp.chunks():
-            temp_file.write(chunk)
-        temp_file.close()
+    # Re-orient if necessary
+    image = Image.open(filename)
+    changed, image = orient_image(image)
+    if changed:
+        image.save(filename)
 
-        # Re-orient if necessary
-        image = Image.open(temp_name)
-        changed, image = orient_image(image)
-        if changed:
-            image.save(temp_name)
+    # Resize image if necessary
+    if new_size:
+        image = Image.open(filename)
+        if image.size > new_size:
+            logger.debug('Resizing from {} to {}'.format(image.size, new_size))
+            image.thumbnail(new_size, Image.ANTIALIAS)
+            image.save(filename)
 
-        # Resize image if necessary
-        if new_size:
-            image = Image.open(temp_name)
-            if image.size > new_size:
-                logger.debug('Resizing from {} to {}'.format(image.size, new_size))
-                image.thumbnail(new_size, Image.ANTIALIAS)
-                image.save(temp_name)
+    # Create thumbnail if necessary
+    thumb = None
+    if thumb_size:
+        logger.debug('Creating thumbnail {}'.format(thumb_size))
+        image = Image.open(filename)
+        image.thumbnail(thumb_size, Image.ANTIALIAS)
+        thumb = BytesIO()
+        image.save(thumb, format=image.format)
 
-        # Create thumbnail if necessary
-        thumb = None
-        if thumb_size:
-            logger.debug('Creating thumbnail {}'.format(thumb_size))
-            image = Image.open(temp_name)
-            image.thumbnail(thumb_size, Image.ANTIALIAS)
-            thumb = BytesIO()
-            image.save(thumb, format=image.format)
-
-        # Upload images to S3
-        file_key = unique_key + ext
-        logging.debug('Uploading image')
-        image_url = bucket.upload_from_filename(file_key, temp_name, metadata)
+    # Upload images to S3
+    file_key = unique_key + ext
+    logging.debug('Uploading image')
+    image_url = bucket.upload_from_filename(file_key, filename, metadata)
 
     thumb_url = None
     if thumb:
--- a/core/management/commands/ssl_images.py	Tue Sep 08 19:13:24 2015 -0500
+++ b/core/management/commands/ssl_images.py	Sun Sep 13 14:51:33 2015 -0500
@@ -39,6 +39,7 @@
                           re.DOTALL | re.UNICODE)
 
 SG101_HOSTS = set(['www.surfguitar101.com', 'surfguitar101.com'])
+WHITELIST_HOSTS = set(settings.USER_IMAGES_SOURCES)
 MODEL_CHOICES = ['comments', 'posts']
 
 PHOTO_MAX_SIZE = (660, 720)
@@ -224,7 +225,10 @@
             # Try a few things to get this on ssl:
             new_src = convert_to_ssl(r)
         elif r.scheme == 'https':
-            new_src = src       # already https, accept it as-is
+            if r.hostname in WHITELIST_HOSTS:
+                new_src = src   # already in whitelist
+            else:
+                new_src = convert_to_ssl(r)
 
     if new_src:
         if title:
--- a/core/tests/test_ssl_images.py	Tue Sep 08 19:13:24 2015 -0500
+++ b/core/tests/test_ssl_images.py	Sun Sep 13 14:51:33 2015 -0500
@@ -4,6 +4,7 @@
 from urlparse import urlparse
 
 import mock
+from django.conf import settings
 
 from core.management.commands.ssl_images import html_check
 from core.management.commands.ssl_images import process_post
@@ -14,6 +15,10 @@
 
     SG101_RE = re.compile(r'http://(?:www\.)?surfguitar101.com/', re.I)
 
+    def setUp(self):
+        self.assertTrue(len(settings.USER_IMAGES_SOURCES) > 0)
+        self.safe_host = settings.USER_IMAGES_SOURCES[0]
+
     def tearDown(self):
         core.management.commands.ssl_images.url_cache = {}
 
@@ -50,9 +55,9 @@
 
     def test_https_already(self):
         test_str = """An image that is already using https:
-            ![flyer](https://example.com/zzz.png)
+            ![flyer](https://{}/zzz.png)
             It's cool.
-            """
+            """.format(self.safe_host)
         result = process_post(test_str)
         self.assertEqual(test_str, result)
 
@@ -70,19 +75,19 @@
 
     def test_multiple_non_http(self):
         test_str = """An image: ![image](http://www.surfguitar101.com/img.jpg)
-        And another: ![pic](HTTPS://example.com/foo/bar/img.png).
-        More stuff here."""
+        And another: ![pic](HTTPS://{}/foo/bar/img.png).
+        More stuff here.""".format(self.safe_host)
         expected = """An image: ![image](/img.jpg)
-        And another: ![pic](HTTPS://example.com/foo/bar/img.png).
-        More stuff here."""
+        And another: ![pic](HTTPS://{}/foo/bar/img.png).
+        More stuff here.""".format(self.safe_host)
         result = process_post(test_str)
         self.assertEqual(expected, result)
 
     def test_https_already_with_title(self):
         test_str = """An image that is already using https:
-            ![flyer](https://example.com/zzz.png "the title")
+            ![flyer](https://{}/zzz.png "the title")
             It's cool.
-            """
+            """.format(self.safe_host)
         result = process_post(test_str)
         self.assertEqual(test_str, result)
 
@@ -112,13 +117,13 @@
 
     def test_https_already_brackets(self):
         test_str = """An image that is already using https:
-            ![flyer](<https://example.com/zzz.png>)
+            ![flyer](<https://{}/zzz.png>)
             It's cool.
-            """
+            """.format(self.safe_host)
         expected = """An image that is already using https:
-            ![flyer](https://example.com/zzz.png)
+            ![flyer](https://{}/zzz.png)
             It's cool.
-            """
+            """.format(self.safe_host)
         result = process_post(test_str)
         self.assertEqual(expected, result)
 
@@ -172,13 +177,13 @@
     def test_multiple_replacement_2(self, upload_mock):
         old_src = [
             'http://example.com/images/my_image.jpg',
-            'https://example.com/static/wow.gif',
+            'https://{}/static/wow.gif'.format(self.safe_host),
             'http://www.surfguitar101.com/media/a/b/c/pic.png',
             'http://surfguitar101.com/media/a/b/c/pic2.png',
         ]
         new_src = [
             'https://cloud.com/some/path/012345.jpg',
-            'https://example.com/static/wow.gif',
+            'https://{}/static/wow.gif'.format(self.safe_host),
             '/media/a/b/c/pic.png',
             '/media/a/b/c/pic2.png',
         ]
--- a/forums/forms.py	Tue Sep 08 19:13:24 2015 -0500
+++ b/forums/forms.py	Sun Sep 13 14:51:33 2015 -0500
@@ -13,6 +13,9 @@
 from forums.attachments import AttachmentProcessor
 import forums.permissions as perms
 from forums.signals import notify_new_topic, notify_new_post
+from core.html import ImageCheckError
+from core.html import image_check
+from core.markup import site_markup
 
 
 FORUMS_FORM_CSS = {
@@ -44,17 +47,26 @@
         self.attach_proc = AttachmentProcessor(attachments)
 
     def clean_body(self):
-        data = self.cleaned_data['body']
-        if not data and not self.attach_proc.has_attachments():
-            raise forms.ValidationError("This field is required.")
-        return data
+        body = self.cleaned_data['body']
+        self.body_html = None
+        if not body and not self.attach_proc.has_attachments():
+            raise forms.ValidationError("Please enter some text")
+
+        if body:
+            self.body_html = site_markup(body)
+            try:
+                image_check(self.body_html)
+            except ImageCheckError as ex:
+                raise forms.ValidationError(str(ex))
+
+        return body
 
     def clean_topic_id(self):
         id = self.cleaned_data['topic_id']
         try:
             self.topic = Topic.objects.select_related().get(pk=id)
         except Topic.DoesNotExist:
-            raise forms.ValidationError('invalid topic')
+            raise forms.ValidationError('Invalid topic')
         return id
 
     def save(self, user, ip=None):
@@ -63,7 +75,7 @@
         """
         post = Post(topic=self.topic, user=user, body=self.cleaned_data['body'],
                 user_ip=ip)
-        post.save()
+        post.save(html=self.body_html)
         self.attach_proc.save_attachments(post)
         notify_new_post(post)
         return post
@@ -112,10 +124,19 @@
                     choices=[(v, v) for v in pks])
 
     def clean_body(self):
-        data = self.cleaned_data['body']
-        if not data and not self.attach_proc.has_attachments():
+        body = self.cleaned_data['body']
+        self.body_html = None
+        if not body and not self.attach_proc.has_attachments():
             raise forms.ValidationError("This field is required.")
-        return data
+
+        if body:
+            self.body_html = site_markup(body)
+            try:
+                image_check(self.body_html)
+            except ImageCheckError as ex:
+                raise forms.ValidationError(str(ex))
+
+        return body
 
     def save(self, ip=None):
         """
@@ -133,7 +154,7 @@
                 user=self.user,
                 body=self.cleaned_data['body'],
                 user_ip=ip)
-        post.save()
+        post.save(html=self.body_html)
 
         self.attach_proc.save_attachments(post)
 
@@ -189,10 +210,19 @@
                         widget=forms.HiddenInput(attrs={'value': post.id}))
 
     def clean_body(self):
-        data = self.cleaned_data['body']
-        if not data and not self.attach_proc.has_attachments():
+        body = self.cleaned_data['body']
+        self.body_html = None
+        if not body and not self.attach_proc.has_attachments():
             raise forms.ValidationError('This field is required.')
-        return data
+
+        if body:
+            self.body_html = site_markup(body)
+            try:
+                image_check(self.body_html)
+            except ImageCheckError as ex:
+                raise forms.ValidationError(str(ex))
+
+        return body
 
     def save(self, *args, **kwargs):
         commit = kwargs.get('commit', False)
--- a/forums/models.py	Tue Sep 08 19:13:24 2015 -0500
+++ b/forums/models.py	Sun Sep 13 14:51:33 2015 -0500
@@ -311,7 +311,9 @@
             self.creation_date = datetime.datetime.now()
             self.update_date = self.creation_date
 
-        self.html = site_markup(self.body)
+        self.html = kwargs.pop('html', None)
+        if self.html is None:
+            self.html = site_markup(self.body)
         super(Post, self).save(*args, **kwargs)
 
     def delete(self, *args, **kwargs):
--- a/forums/views/main.py	Tue Sep 08 19:13:24 2015 -0500
+++ b/forums/views/main.py	Sun Sep 13 14:51:33 2015 -0500
@@ -359,7 +359,12 @@
             },
             context_instance=RequestContext(request))
 
-    return HttpResponseBadRequest("Oops, did you forget some text?");
+    # The client side javascript is pretty simplistic right now and we don't
+    # want to change it yet. It is expecting a single error string. Just grab
+    # the first error message and use that.
+    errors = form.errors.as_data()
+    msg = errors.values()[0][0].message if errors else 'Unknown error'
+    return HttpResponseBadRequest(msg)
 
 
 def _goto_post(post):
@@ -439,7 +444,7 @@
                 return HttpResponseRedirect(reverse('antispam-suspended'))
             post = form.save(commit=False)
             post.touch()
-            post.save()
+            post.save(html=form.body_html)
             notify_updated_post(post)
 
             # if we are editing a first post, save the parent topic as well
@@ -589,7 +594,7 @@
                 post.topic = topic
                 post.user = request.user
                 post.user_ip = request.META.get("REMOTE_ADDR", "")
-                post.save()
+                post.save(html=form.body_html)
                 notify_new_post(post)
 
                 # Save any attachments
--- a/sg101/settings/base.py	Tue Sep 08 19:13:24 2015 -0500
+++ b/sg101/settings/base.py	Sun Sep 13 14:51:33 2015 -0500
@@ -296,6 +296,14 @@
 USER_PHOTOS_THUMB_SIZE = (120, 120)
 USER_PHOTOS_RATE_LIMIT = (50, 86400)   # number / seconds
 
+USER_IMAGES_SOURCES = [
+    'dl.dropboxusercontent.com',
+    's3.amazonaws.com',
+    's3-us-west-1.amazonaws.com',
+    'scontent-ord1-1.xx.fbcdn.net',
+    'lh3.googleusercontent.com',
+]
+
 # If this flag is False, the queued_search queue will not be processed. This is
 # useful when we are rebuilding the search index.
 SEARCH_QUEUE_ENABLED = True
--- a/user_photos/forms.py	Tue Sep 08 19:13:24 2015 -0500
+++ b/user_photos/forms.py	Sun Sep 13 14:51:33 2015 -0500
@@ -1,12 +1,14 @@
 """Forms for the user_photos application."""
 import datetime
 import hashlib
+import os.path
 
 from django import forms
 from django.conf import settings
 
 from core.s3 import S3Bucket
 from core.image_uploader import upload
+from core.functions import TemporaryFile
 from core.services import get_redis_connection
 from user_photos.models import Photo
 
@@ -70,14 +72,29 @@
                           base_url=settings.USER_PHOTOS_BASE_URL,
                           bucket_name=settings.USER_PHOTOS_BUCKET)
 
-        now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
-        metadata = {'user': self.user.username, 'date': now}
+        # Trying to use PIL (or Pillow) on a Django UploadedFile is often
+        # problematic because the file is often an in-memory file if it is under
+        # a certain size. This complicates matters and many of the operations we try
+        # to perform on it fail if this is the case. To get around these issues,
+        # we make a copy of the file on the file system and operate on the copy.
 
-        url, thumb_url = upload(fp=self.cleaned_data['image_file'],
-                                bucket=bucket,
-                                metadata=metadata,
-                                new_size=settings.USER_PHOTOS_MAX_SIZE,
-                                thumb_size=settings.USER_PHOTOS_THUMB_SIZE)
+        fp = self.cleaned_data['image_file']
+        ext = os.path.splitext(fp.name)[1]
+
+        # Write the UploadedFile to a temporary file on disk
+        with TemporaryFile(suffix=ext) as t:
+            for chunk in fp.chunks():
+                t.file.write(chunk)
+            t.file.close()
+
+            now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+            metadata = {'user': self.user.username, 'date': now}
+
+            url, thumb_url = upload(filename=t.filename,
+                                    bucket=bucket,
+                                    metadata=metadata,
+                                    new_size=settings.USER_PHOTOS_MAX_SIZE,
+                                    thumb_size=settings.USER_PHOTOS_THUMB_SIZE)
 
         photo = Photo(user=self.user, url=url, thumb_url=thumb_url,
                 signature=signature)