changeset 979:a6331579ff43

Convert ssl_images to use download_file.
author Brian Neal <bgneal@gmail.com>
date Fri, 09 Oct 2015 22:20:32 -0500
parents a828e80223d2
children 3ebde23a59d0
files core/download.py core/management/commands/ssl_images.py
diffstat 2 files changed, 37 insertions(+), 79 deletions(-) [+]
line wrap: on
line diff
--- a/core/download.py	Mon Oct 05 20:26:14 2015 -0500
+++ b/core/download.py	Fri Oct 09 22:20:32 2015 -0500
@@ -40,6 +40,8 @@
         suffix = mimetypes.guess_extension(content_type) if content_type else ''
         if suffix == '.jpe':
             suffix = '.jpg'
+        elif suffix is None:
+            suffix = ''
         fd, path = tempfile.mkstemp(suffix=suffix)
         os.close(fd)
 
--- a/core/management/commands/ssl_images.py	Mon Oct 05 20:26:14 2015 -0500
+++ b/core/management/commands/ssl_images.py	Fri Oct 09 22:20:32 2015 -0500
@@ -15,7 +15,6 @@
 import re
 import signal
 import socket
-import urllib
 import urlparse
 import uuid
 
@@ -24,9 +23,12 @@
 from lxml import etree
 import markdown.inlinepatterns
 from PIL import Image
+import requests
 
 from comments.models import Comment
 from forums.models import Post
+from core.download import download_file
+from core.functions import remove_file
 from core.s3 import S3Bucket
 
 
@@ -43,13 +45,12 @@
 MODEL_CHOICES = ['comments', 'posts']
 
 PHOTO_MAX_SIZE = (660, 720)
-PHOTO_BASE_URL = 'https://s3.amazonaws.com/'
-PHOTO_BUCKET_NAME = 'sg101.forum.photos'
+PHOTO_BASE_URL = settings.HOT_LINK_PHOTOS_BASE_URL
+PHOTO_BUCKET_NAME = settings.HOT_LINK_PHOTOS_BUCKET
 
 CACHE_FILENAME = 'ssl_images_cache.json'
 
 quit_flag = False
-opener = None
 bucket = None
 url_cache = {}
 bad_hosts = set()
@@ -70,79 +71,25 @@
     logger.addHandler(handler)
 
 
-class ImageURLopener(urllib.FancyURLopener):
-    """Our URL opener. Handles redirects as per FancyURLopener. But all other
-    errors and authentication requests will raise an IOError.
+def resize_image(img_path):
+    """Resizes the image found at img_path if necessary.
+
+    Returns True if the image was resized or resizing wasn't necessary.
+    Returns False if the image could not be read or processed.
     """
-    HANDLED_ERRORS = set([302, 301, 303, 307])
+    try:
+        image = Image.open(img_path)
+    except IOError as ex:
+        logger.error("Error opening %s: %s", img_path, ex)
+        return False
 
-    def http_error_default(self, url, fp, errcode, errmsg, headers):
-        return urllib.URLopener.http_error_default(self, url, fp, errcode,
-                                                   errmsg, headers)
-
-    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
-        """Handle http errors.
-        We let FancyURLopener handle the redirects, but any other error we want
-        to let fail.
-        """
-        if errcode in self.HANDLED_ERRORS:
-            name = 'http_error_%d' % errcode
-            method = getattr(self, name)
-            if data is None:
-                result = method(url, fp, errcode, errmsg, headers)
-            else:
-                result = method(url, fp, errcode, errmsg, headers, data)
-            if result:
-                return result
-        return self.http_error_default(url, fp, errcode, errmsg, headers)
-
-
-def download_image(parsed_url):
-    """Downloads the image file from the given source URL.
-
-    If successful returns the path to the downloaded file. Otherwise None is
-    returned.
-    """
-    src = parsed_url.geturl()
-    logger.info("Retrieving %s", src)
-    try:
-        fn, hdrs = opener.retrieve(src)
-    except IOError as ex:
-        args = ex.args if ex.args else []
-        if len(args) == 4 and args[0] == 'http error':
-            logger.error("http error: %d - %s", args[1], args[2])
-        elif len(args) == 2 and isinstance(args[1], socket.gaierror):
-            logger.error("gaierror, ignoring host %s", parsed_url.hostname)
-            bad_hosts.add(parsed_url.hostname)
-        else:
-            logger.error("%s", ex)
-        return None
-
-    # Does it look like an image?
-    content_type = hdrs.get('content-type')
-    if not content_type:
-        logger.error("No content-type header found")
-        return None
-
-    file_size = os.stat(fn).st_size
-    logger.info("Retrieved: %s bytes; content-type: %s", file_size, content_type)
-
-    parts = content_type.split('/')
-    if len(parts) < 2 or parts[0] != 'image':
-        logger.error("Unknown content-type: %s", content_type)
-        return None
-
-    return fn
-
-
-def resize_image(img_path):
-    """Resizes the image found at img_path if necessary."""
-    image = Image.open(img_path)
     if image.size > PHOTO_MAX_SIZE:
         logger.info('Resizing from %s to %s', image.size, PHOTO_MAX_SIZE)
         image.thumbnail(PHOTO_MAX_SIZE, Image.ANTIALIAS)
         image.save(img_path)
 
+    return True
+
 
 def gen_key():
     """Return a random key."""
@@ -195,10 +142,22 @@
 
     Returns the new URL or None if unsuccessful.
     """
-    fn = download_image(parsed_url)
+    url = parsed_url.geturl()
+    fn = None
+    try:
+        fn = download_file(url)
+    except requests.ConnectionError as ex:
+        logger.error("ConnectionError, ignoring host %s", parsed_url.hostname)
+        bad_hosts.add(parsed_url.hostname)
+    except requests.RequestException as ex:
+        logger.error("%s", ex)
+    except Exception as ex:
+        logger.exception("%s", ex)
+
     if fn:
-        resize_image(fn)
-        return upload_image(fn)
+        with remove_file(fn):
+            if resize_image(fn):
+                return upload_image(fn)
     return None
 
 
@@ -292,7 +251,8 @@
                 help="optional second slice index; the j in [i:j]"),
             make_option('-t', '--timeout',
                 type='int',
-                help="optional socket timeout (secs)"),
+                help="optional socket timeout (secs)",
+                default=30),
             )
 
     def handle_noargs(self, **options):
@@ -329,17 +289,13 @@
             qs = qs[:j]
 
         # Set global socket timeout
-        timeout = options.get('timeout', 30)
+        timeout = options.get('timeout')
         logger.info("Setting socket timeout to %d", timeout)
         socket.setdefaulttimeout(timeout)
 
         # Install signal handler for ctrl-c
         signal.signal(signal.SIGINT, signal_handler)
 
-        # Create URL opener to download photos
-        global opener
-        opener = ImageURLopener()
-
         # Create bucket to upload photos
         global bucket
         bucket = S3Bucket(access_key=settings.USER_PHOTOS_ACCESS_KEY,