Mercurial > public > sg101
comparison core/management/commands/ssl_images.py @ 979:a6331579ff43
Convert ssl_images to use download_file.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Fri, 09 Oct 2015 22:20:32 -0500 |
parents | 4619290d171d |
children | 3ebde23a59d0 |
comparison
equal
deleted
inserted
replaced
978:a828e80223d2 | 979:a6331579ff43 |
---|---|
13 from optparse import make_option | 13 from optparse import make_option |
14 import os | 14 import os |
15 import re | 15 import re |
16 import signal | 16 import signal |
17 import socket | 17 import socket |
18 import urllib | |
19 import urlparse | 18 import urlparse |
20 import uuid | 19 import uuid |
21 | 20 |
22 from django.core.management.base import NoArgsCommand, CommandError | 21 from django.core.management.base import NoArgsCommand, CommandError |
23 from django.conf import settings | 22 from django.conf import settings |
24 from lxml import etree | 23 from lxml import etree |
25 import markdown.inlinepatterns | 24 import markdown.inlinepatterns |
26 from PIL import Image | 25 from PIL import Image |
26 import requests | |
27 | 27 |
28 from comments.models import Comment | 28 from comments.models import Comment |
29 from forums.models import Post | 29 from forums.models import Post |
30 from core.download import download_file | |
31 from core.functions import remove_file | |
30 from core.s3 import S3Bucket | 32 from core.s3 import S3Bucket |
31 | 33 |
32 | 34 |
33 LOGFILE = os.path.join(settings.PROJECT_PATH, 'logs', 'ssl_images.log') | 35 LOGFILE = os.path.join(settings.PROJECT_PATH, 'logs', 'ssl_images.log') |
34 logger = logging.getLogger(__name__) | 36 logger = logging.getLogger(__name__) |
41 SG101_HOSTS = set(['www.surfguitar101.com', 'surfguitar101.com']) | 43 SG101_HOSTS = set(['www.surfguitar101.com', 'surfguitar101.com']) |
42 WHITELIST_HOSTS = set(settings.USER_IMAGES_SOURCES) | 44 WHITELIST_HOSTS = set(settings.USER_IMAGES_SOURCES) |
43 MODEL_CHOICES = ['comments', 'posts'] | 45 MODEL_CHOICES = ['comments', 'posts'] |
44 | 46 |
45 PHOTO_MAX_SIZE = (660, 720) | 47 PHOTO_MAX_SIZE = (660, 720) |
46 PHOTO_BASE_URL = 'https://s3.amazonaws.com/' | 48 PHOTO_BASE_URL = settings.HOT_LINK_PHOTOS_BASE_URL |
47 PHOTO_BUCKET_NAME = 'sg101.forum.photos' | 49 PHOTO_BUCKET_NAME = settings.HOT_LINK_PHOTOS_BUCKET |
48 | 50 |
49 CACHE_FILENAME = 'ssl_images_cache.json' | 51 CACHE_FILENAME = 'ssl_images_cache.json' |
50 | 52 |
51 quit_flag = False | 53 quit_flag = False |
52 opener = None | |
53 bucket = None | 54 bucket = None |
54 url_cache = {} | 55 url_cache = {} |
55 bad_hosts = set() | 56 bad_hosts = set() |
56 | 57 |
57 | 58 |
68 formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') | 69 formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') |
69 handler.setFormatter(formatter) | 70 handler.setFormatter(formatter) |
70 logger.addHandler(handler) | 71 logger.addHandler(handler) |
71 | 72 |
72 | 73 |
73 class ImageURLopener(urllib.FancyURLopener): | 74 def resize_image(img_path): |
74 """Our URL opener. Handles redirects as per FancyURLopener. But all other | 75 """Resizes the image found at img_path if necessary. |
75 errors and authentication requests will raise an IOError. | 76 |
76 """ | 77 Returns True if the image was resized or resizing wasn't necessary. |
77 HANDLED_ERRORS = set([302, 301, 303, 307]) | 78 Returns False if the image could not be read or processed. |
78 | 79 """ |
79 def http_error_default(self, url, fp, errcode, errmsg, headers): | 80 try: |
80 return urllib.URLopener.http_error_default(self, url, fp, errcode, | 81 image = Image.open(img_path) |
81 errmsg, headers) | |
82 | |
83 def http_error(self, url, fp, errcode, errmsg, headers, data=None): | |
84 """Handle http errors. | |
85 We let FancyURLopener handle the redirects, but any other error we want | |
86 to let fail. | |
87 """ | |
88 if errcode in self.HANDLED_ERRORS: | |
89 name = 'http_error_%d' % errcode | |
90 method = getattr(self, name) | |
91 if data is None: | |
92 result = method(url, fp, errcode, errmsg, headers) | |
93 else: | |
94 result = method(url, fp, errcode, errmsg, headers, data) | |
95 if result: | |
96 return result | |
97 return self.http_error_default(url, fp, errcode, errmsg, headers) | |
98 | |
99 | |
100 def download_image(parsed_url): | |
101 """Downloads the image file from the given source URL. | |
102 | |
103 If successful returns the path to the downloaded file. Otherwise None is | |
104 returned. | |
105 """ | |
106 src = parsed_url.geturl() | |
107 logger.info("Retrieving %s", src) | |
108 try: | |
109 fn, hdrs = opener.retrieve(src) | |
110 except IOError as ex: | 82 except IOError as ex: |
111 args = ex.args if ex.args else [] | 83 logger.error("Error opening %s: %s", img_path, ex) |
112 if len(args) == 4 and args[0] == 'http error': | 84 return False |
113 logger.error("http error: %d - %s", args[1], args[2]) | 85 |
114 elif len(args) == 2 and isinstance(args[1], socket.gaierror): | |
115 logger.error("gaierror, ignoring host %s", parsed_url.hostname) | |
116 bad_hosts.add(parsed_url.hostname) | |
117 else: | |
118 logger.error("%s", ex) | |
119 return None | |
120 | |
121 # Does it look like an image? | |
122 content_type = hdrs.get('content-type') | |
123 if not content_type: | |
124 logger.error("No content-type header found") | |
125 return None | |
126 | |
127 file_size = os.stat(fn).st_size | |
128 logger.info("Retrieved: %s bytes; content-type: %s", file_size, content_type) | |
129 | |
130 parts = content_type.split('/') | |
131 if len(parts) < 2 or parts[0] != 'image': | |
132 logger.error("Unknown content-type: %s", content_type) | |
133 return None | |
134 | |
135 return fn | |
136 | |
137 | |
138 def resize_image(img_path): | |
139 """Resizes the image found at img_path if necessary.""" | |
140 image = Image.open(img_path) | |
141 if image.size > PHOTO_MAX_SIZE: | 86 if image.size > PHOTO_MAX_SIZE: |
142 logger.info('Resizing from %s to %s', image.size, PHOTO_MAX_SIZE) | 87 logger.info('Resizing from %s to %s', image.size, PHOTO_MAX_SIZE) |
143 image.thumbnail(PHOTO_MAX_SIZE, Image.ANTIALIAS) | 88 image.thumbnail(PHOTO_MAX_SIZE, Image.ANTIALIAS) |
144 image.save(img_path) | 89 image.save(img_path) |
90 | |
91 return True | |
145 | 92 |
146 | 93 |
147 def gen_key(): | 94 def gen_key(): |
148 """Return a random key.""" | 95 """Return a random key.""" |
149 return base64.b64encode(uuid.uuid4().bytes, '-_').rstrip('=') | 96 return base64.b64encode(uuid.uuid4().bytes, '-_').rstrip('=') |
193 def save_image_to_cloud(parsed_url): | 140 def save_image_to_cloud(parsed_url): |
194 """Downloads an image at a given source URL. Uploads it to cloud storage. | 141 """Downloads an image at a given source URL. Uploads it to cloud storage. |
195 | 142 |
196 Returns the new URL or None if unsuccessful. | 143 Returns the new URL or None if unsuccessful. |
197 """ | 144 """ |
198 fn = download_image(parsed_url) | 145 url = parsed_url.geturl() |
146 fn = None | |
147 try: | |
148 fn = download_file(url) | |
149 except requests.ConnectionError as ex: | |
150 logger.error("ConnectionError, ignoring host %s", parsed_url.hostname) | |
151 bad_hosts.add(parsed_url.hostname) | |
152 except requests.RequestException as ex: | |
153 logger.error("%s", ex) | |
154 except Exception as ex: | |
155 logger.exception("%s", ex) | |
156 | |
199 if fn: | 157 if fn: |
200 resize_image(fn) | 158 with remove_file(fn): |
201 return upload_image(fn) | 159 if resize_image(fn): |
160 return upload_image(fn) | |
202 return None | 161 return None |
203 | 162 |
204 | 163 |
205 def replace_image_markup(match): | 164 def replace_image_markup(match): |
206 src_parts = match.group(8).split() | 165 src_parts = match.group(8).split() |
290 make_option('-j', '--j', | 249 make_option('-j', '--j', |
291 type='int', | 250 type='int', |
292 help="optional second slice index; the j in [i:j]"), | 251 help="optional second slice index; the j in [i:j]"), |
293 make_option('-t', '--timeout', | 252 make_option('-t', '--timeout', |
294 type='int', | 253 type='int', |
295 help="optional socket timeout (secs)"), | 254 help="optional socket timeout (secs)", |
255 default=30), | |
296 ) | 256 ) |
297 | 257 |
298 def handle_noargs(self, **options): | 258 def handle_noargs(self, **options): |
299 time_started = datetime.datetime.now() | 259 time_started = datetime.datetime.now() |
300 _setup_logging() | 260 _setup_logging() |
327 qs = qs[i:] | 287 qs = qs[i:] |
328 elif i is None and j is not None: | 288 elif i is None and j is not None: |
329 qs = qs[:j] | 289 qs = qs[:j] |
330 | 290 |
331 # Set global socket timeout | 291 # Set global socket timeout |
332 timeout = options.get('timeout', 30) | 292 timeout = options.get('timeout') |
333 logger.info("Setting socket timeout to %d", timeout) | 293 logger.info("Setting socket timeout to %d", timeout) |
334 socket.setdefaulttimeout(timeout) | 294 socket.setdefaulttimeout(timeout) |
335 | 295 |
336 # Install signal handler for ctrl-c | 296 # Install signal handler for ctrl-c |
337 signal.signal(signal.SIGINT, signal_handler) | 297 signal.signal(signal.SIGINT, signal_handler) |
338 | |
339 # Create URL opener to download photos | |
340 global opener | |
341 opener = ImageURLopener() | |
342 | 298 |
343 # Create bucket to upload photos | 299 # Create bucket to upload photos |
344 global bucket | 300 global bucket |
345 bucket = S3Bucket(access_key=settings.USER_PHOTOS_ACCESS_KEY, | 301 bucket = S3Bucket(access_key=settings.USER_PHOTOS_ACCESS_KEY, |
346 secret_key=settings.USER_PHOTOS_SECRET_KEY, | 302 secret_key=settings.USER_PHOTOS_SECRET_KEY, |