comparison core/management/commands/ssl_images.py @ 979:a6331579ff43

Convert ssl_images to use download_file.
author Brian Neal <bgneal@gmail.com>
date Fri, 09 Oct 2015 22:20:32 -0500
parents 4619290d171d
children 3ebde23a59d0
comparison
equal deleted inserted replaced
978:a828e80223d2 979:a6331579ff43
13 from optparse import make_option 13 from optparse import make_option
14 import os 14 import os
15 import re 15 import re
16 import signal 16 import signal
17 import socket 17 import socket
18 import urllib
19 import urlparse 18 import urlparse
20 import uuid 19 import uuid
21 20
22 from django.core.management.base import NoArgsCommand, CommandError 21 from django.core.management.base import NoArgsCommand, CommandError
23 from django.conf import settings 22 from django.conf import settings
24 from lxml import etree 23 from lxml import etree
25 import markdown.inlinepatterns 24 import markdown.inlinepatterns
26 from PIL import Image 25 from PIL import Image
26 import requests
27 27
28 from comments.models import Comment 28 from comments.models import Comment
29 from forums.models import Post 29 from forums.models import Post
30 from core.download import download_file
31 from core.functions import remove_file
30 from core.s3 import S3Bucket 32 from core.s3 import S3Bucket
31 33
32 34
33 LOGFILE = os.path.join(settings.PROJECT_PATH, 'logs', 'ssl_images.log') 35 LOGFILE = os.path.join(settings.PROJECT_PATH, 'logs', 'ssl_images.log')
34 logger = logging.getLogger(__name__) 36 logger = logging.getLogger(__name__)
41 SG101_HOSTS = set(['www.surfguitar101.com', 'surfguitar101.com']) 43 SG101_HOSTS = set(['www.surfguitar101.com', 'surfguitar101.com'])
42 WHITELIST_HOSTS = set(settings.USER_IMAGES_SOURCES) 44 WHITELIST_HOSTS = set(settings.USER_IMAGES_SOURCES)
43 MODEL_CHOICES = ['comments', 'posts'] 45 MODEL_CHOICES = ['comments', 'posts']
44 46
45 PHOTO_MAX_SIZE = (660, 720) 47 PHOTO_MAX_SIZE = (660, 720)
46 PHOTO_BASE_URL = 'https://s3.amazonaws.com/' 48 PHOTO_BASE_URL = settings.HOT_LINK_PHOTOS_BASE_URL
47 PHOTO_BUCKET_NAME = 'sg101.forum.photos' 49 PHOTO_BUCKET_NAME = settings.HOT_LINK_PHOTOS_BUCKET
48 50
49 CACHE_FILENAME = 'ssl_images_cache.json' 51 CACHE_FILENAME = 'ssl_images_cache.json'
50 52
51 quit_flag = False 53 quit_flag = False
52 opener = None
53 bucket = None 54 bucket = None
54 url_cache = {} 55 url_cache = {}
55 bad_hosts = set() 56 bad_hosts = set()
56 57
57 58
68 formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') 69 formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
69 handler.setFormatter(formatter) 70 handler.setFormatter(formatter)
70 logger.addHandler(handler) 71 logger.addHandler(handler)
71 72
72 73
73 class ImageURLopener(urllib.FancyURLopener): 74 def resize_image(img_path):
74 """Our URL opener. Handles redirects as per FancyURLopener. But all other 75 """Resizes the image found at img_path if necessary.
75 errors and authentication requests will raise an IOError. 76
76 """ 77 Returns True if the image was resized or resizing wasn't necessary.
77 HANDLED_ERRORS = set([302, 301, 303, 307]) 78 Returns False if the image could not be read or processed.
78 79 """
79 def http_error_default(self, url, fp, errcode, errmsg, headers): 80 try:
80 return urllib.URLopener.http_error_default(self, url, fp, errcode, 81 image = Image.open(img_path)
81 errmsg, headers)
82
83 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
84 """Handle http errors.
85 We let FancyURLopener handle the redirects, but any other error we want
86 to let fail.
87 """
88 if errcode in self.HANDLED_ERRORS:
89 name = 'http_error_%d' % errcode
90 method = getattr(self, name)
91 if data is None:
92 result = method(url, fp, errcode, errmsg, headers)
93 else:
94 result = method(url, fp, errcode, errmsg, headers, data)
95 if result:
96 return result
97 return self.http_error_default(url, fp, errcode, errmsg, headers)
98
99
100 def download_image(parsed_url):
101 """Downloads the image file from the given source URL.
102
103 If successful returns the path to the downloaded file. Otherwise None is
104 returned.
105 """
106 src = parsed_url.geturl()
107 logger.info("Retrieving %s", src)
108 try:
109 fn, hdrs = opener.retrieve(src)
110 except IOError as ex: 82 except IOError as ex:
111 args = ex.args if ex.args else [] 83 logger.error("Error opening %s: %s", img_path, ex)
112 if len(args) == 4 and args[0] == 'http error': 84 return False
113 logger.error("http error: %d - %s", args[1], args[2]) 85
114 elif len(args) == 2 and isinstance(args[1], socket.gaierror):
115 logger.error("gaierror, ignoring host %s", parsed_url.hostname)
116 bad_hosts.add(parsed_url.hostname)
117 else:
118 logger.error("%s", ex)
119 return None
120
121 # Does it look like an image?
122 content_type = hdrs.get('content-type')
123 if not content_type:
124 logger.error("No content-type header found")
125 return None
126
127 file_size = os.stat(fn).st_size
128 logger.info("Retrieved: %s bytes; content-type: %s", file_size, content_type)
129
130 parts = content_type.split('/')
131 if len(parts) < 2 or parts[0] != 'image':
132 logger.error("Unknown content-type: %s", content_type)
133 return None
134
135 return fn
136
137
138 def resize_image(img_path):
139 """Resizes the image found at img_path if necessary."""
140 image = Image.open(img_path)
141 if image.size > PHOTO_MAX_SIZE: 86 if image.size > PHOTO_MAX_SIZE:
142 logger.info('Resizing from %s to %s', image.size, PHOTO_MAX_SIZE) 87 logger.info('Resizing from %s to %s', image.size, PHOTO_MAX_SIZE)
143 image.thumbnail(PHOTO_MAX_SIZE, Image.ANTIALIAS) 88 image.thumbnail(PHOTO_MAX_SIZE, Image.ANTIALIAS)
144 image.save(img_path) 89 image.save(img_path)
90
91 return True
145 92
146 93
147 def gen_key(): 94 def gen_key():
148 """Return a random key.""" 95 """Return a random key."""
149 return base64.b64encode(uuid.uuid4().bytes, '-_').rstrip('=') 96 return base64.b64encode(uuid.uuid4().bytes, '-_').rstrip('=')
193 def save_image_to_cloud(parsed_url): 140 def save_image_to_cloud(parsed_url):
194 """Downloads an image at a given source URL. Uploads it to cloud storage. 141 """Downloads an image at a given source URL. Uploads it to cloud storage.
195 142
196 Returns the new URL or None if unsuccessful. 143 Returns the new URL or None if unsuccessful.
197 """ 144 """
198 fn = download_image(parsed_url) 145 url = parsed_url.geturl()
146 fn = None
147 try:
148 fn = download_file(url)
149 except requests.ConnectionError as ex:
150 logger.error("ConnectionError, ignoring host %s", parsed_url.hostname)
151 bad_hosts.add(parsed_url.hostname)
152 except requests.RequestException as ex:
153 logger.error("%s", ex)
154 except Exception as ex:
155 logger.exception("%s", ex)
156
199 if fn: 157 if fn:
200 resize_image(fn) 158 with remove_file(fn):
201 return upload_image(fn) 159 if resize_image(fn):
160 return upload_image(fn)
202 return None 161 return None
203 162
204 163
205 def replace_image_markup(match): 164 def replace_image_markup(match):
206 src_parts = match.group(8).split() 165 src_parts = match.group(8).split()
290 make_option('-j', '--j', 249 make_option('-j', '--j',
291 type='int', 250 type='int',
292 help="optional second slice index; the j in [i:j]"), 251 help="optional second slice index; the j in [i:j]"),
293 make_option('-t', '--timeout', 252 make_option('-t', '--timeout',
294 type='int', 253 type='int',
295 help="optional socket timeout (secs)"), 254 help="optional socket timeout (secs)",
255 default=30),
296 ) 256 )
297 257
298 def handle_noargs(self, **options): 258 def handle_noargs(self, **options):
299 time_started = datetime.datetime.now() 259 time_started = datetime.datetime.now()
300 _setup_logging() 260 _setup_logging()
327 qs = qs[i:] 287 qs = qs[i:]
328 elif i is None and j is not None: 288 elif i is None and j is not None:
329 qs = qs[:j] 289 qs = qs[:j]
330 290
331 # Set global socket timeout 291 # Set global socket timeout
332 timeout = options.get('timeout', 30) 292 timeout = options.get('timeout')
333 logger.info("Setting socket timeout to %d", timeout) 293 logger.info("Setting socket timeout to %d", timeout)
334 socket.setdefaulttimeout(timeout) 294 socket.setdefaulttimeout(timeout)
335 295
336 # Install signal handler for ctrl-c 296 # Install signal handler for ctrl-c
337 signal.signal(signal.SIGINT, signal_handler) 297 signal.signal(signal.SIGINT, signal_handler)
338
339 # Create URL opener to download photos
340 global opener
341 opener = ImageURLopener()
342 298
343 # Create bucket to upload photos 299 # Create bucket to upload photos
344 global bucket 300 global bucket
345 bucket = S3Bucket(access_key=settings.USER_PHOTOS_ACCESS_KEY, 301 bucket = S3Bucket(access_key=settings.USER_PHOTOS_ACCESS_KEY,
346 secret_key=settings.USER_PHOTOS_SECRET_KEY, 302 secret_key=settings.USER_PHOTOS_SECRET_KEY,