comparison core/management/commands/ssl_images.py @ 881:13f2d4393ec4

More work on ssl_images command. Uploading now works.
author Brian Neal <bgneal@gmail.com>
date Thu, 15 Jan 2015 21:05:05 -0600
parents 6900040df0f8
children 9a3019f2c7dc
comparison
equal deleted inserted replaced
880:bab6b1eac1e2 881:13f2d4393ec4
9 import logging 9 import logging
10 from optparse import make_option 10 from optparse import make_option
11 import os.path 11 import os.path
12 import re 12 import re
13 import signal 13 import signal
14 import socket
15 import urllib
14 import urlparse 16 import urlparse
17 import uuid
15 18
16 from django.core.management.base import NoArgsCommand, CommandError 19 from django.core.management.base import NoArgsCommand, CommandError
17 from django.conf import settings 20 from django.conf import settings
18 import markdown.inlinepatterns 21 import markdown.inlinepatterns
22 from PIL import Image
19 23
20 from comments.models import Comment 24 from comments.models import Comment
21 from forums.models import Post 25 from forums.models import Post
26 from core.s3 import S3Bucket
22 27
23 28
24 LOGFILE = os.path.join(settings.PROJECT_PATH, 'logs', 'ssl_images.log') 29 LOGFILE = os.path.join(settings.PROJECT_PATH, 'logs', 'ssl_images.log')
25 logger = logging.getLogger(__name__) 30 logger = logging.getLogger(__name__)
26 31
30 re.DOTALL | re.UNICODE) 35 re.DOTALL | re.UNICODE)
31 36
32 SG101_HOSTS = set(['www.surfguitar101.com', 'surfguitar101.com']) 37 SG101_HOSTS = set(['www.surfguitar101.com', 'surfguitar101.com'])
33 MODEL_CHOICES = ['comments', 'posts'] 38 MODEL_CHOICES = ['comments', 'posts']
34 39
40 PHOTO_MAX_SIZE = (660, 720)
41 PHOTO_BASE_URL = 'https://s3.amazonaws.com/'
42 PHOTO_BUCKET_NAME = 'sg101.forum.photos'
43
35 quit_flag = False 44 quit_flag = False
45 opener = None
46 bucket = None
47 url_cache = {}
36 48
37 49
38 def signal_handler(signum, frame): 50 def signal_handler(signum, frame):
39 """SIGINT signal handler""" 51 """SIGINT signal handler"""
40 global quit_flag 52 global quit_flag
48 formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') 60 formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
49 handler.setFormatter(formatter) 61 handler.setFormatter(formatter)
50 logger.addHandler(handler) 62 logger.addHandler(handler)
51 63
52 64
65 class ImageURLopener(urllib.FancyURLopener):
66 """Our URL opener. Handles redirects as per FancyURLopener. But all other
67 errors and authentication requests will raise an IOError.
68 """
69 HANDLED_ERRORS = set([302, 301, 303, 307])
70
71 def http_error_default(self, url, fp, errcode, errmsg, headers):
72 return urllib.URLopener.http_error_default(self, url, fp, errcode,
73 errmsg, headers)
74
75 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
76 """Handle http errors.
77 We let FancyURLopener handle the redirects, but any other error we want
78 to let fail.
79 """
80 if errcode in self.HANDLED_ERRORS:
81 name = 'http_error_%d' % errcode
82 method = getattr(self, name)
83 if data is None:
84 result = method(url, fp, errcode, errmsg, headers)
85 else:
86 result = method(url, fp, errcode, errmsg, headers, data)
87 if result:
88 return result
89 return self.http_error_default(url, fp, errcode, errmsg, headers)
90
91
92 def download_image(src):
93 """Downloads the image file from the given source URL.
94
95 If successful returns the path to the downloaded file. Otherwise None is
96 returned.
97 """
98 logger.info("Retrieving %s", src)
99 try:
100 fn, hdrs = opener.retrieve(src)
101 except IOError as ex:
102 args = ex.args
103 if len(args) == 4 and args[0] == 'http error':
104 logger.error("http error: %d - %s", args[1], args[2])
105 else:
106 logger.error("%s", ex)
107 return None
108
109 # If there is an error or timeout, sometimes there is no content-length
110 # header.
111 content_length = hdrs.get('content-length')
112 if not content_length:
113 logger.error("Bad content-length: %s", content_length)
114 return None
115
116 # Does it look like an image?
117 content_type = hdrs.get('content-type')
118 if not content_type:
119 logger.error("No content-type header found")
120 return None
121
122 logger.info("Retrieved: %s bytes; content-type: %s", content_length,
123 content_type)
124
125 parts = content_type.split('/')
126 if len(parts) < 2 or parts[0] != 'image':
127 logger.error("Unknown content-type: %s", content_type)
128 return None
129
130 return fn
131
132
133 def resize_image(img_path):
134 """Resizes the image found at img_path if necessary."""
135 image = Image.open(img_path)
136 if image.size > PHOTO_MAX_SIZE:
137 logger.info('Resizing from %s to %s', image.size, PHOTO_MAX_SIZE)
138 image.thumbnail(PHOTO_MAX_SIZE, Image.ANTIALIAS)
139 image.save(img_path)
140
141
142 def upload_image(img_path):
143 """Upload image file located at img_path to our S3 bucket.
144
145 Returns the URL of the image in the bucket or None if an error occurs.
146 """
147 logger.info("upload_image starting")
148 # Make a unique name for the image in the bucket
149 unique_key = uuid.uuid4().hex
150 ext = os.path.splitext(img_path)[1]
151 file_key = unique_key + ext
152 try:
153 return bucket.upload_from_filename(file_key, img_path, public=True)
154 except IOError as ex:
155 logger.error("Error uploading file: %s", ex)
156 return None
157
158
53 def save_image_to_cloud(src): 159 def save_image_to_cloud(src):
54 # TODO 160 """Downloads an image at a given source URL. Uploads it to cloud storage.
55 return src 161
162 Returns the new URL or None if unsuccessful.
163 """
164 # Check the cache first
165 new_url = url_cache.get(src)
166 if new_url:
167 return new_url
168
169 fn = download_image(src)
170 if fn:
171 resize_image(fn)
172 new_url = upload_image(fn)
173 if new_url:
174 url_cache[src] = new_url
175 return new_url
176 return None
56 177
57 178
58 def replace_image_markup(match): 179 def replace_image_markup(match):
59 src_parts = match.group(8).split() 180 src_parts = match.group(8).split()
60 if src_parts: 181 if src_parts:
128 raise CommandError('Please choose a --model option') 249 raise CommandError('Please choose a --model option')
129 250
130 if options['model'] == 'comments': 251 if options['model'] == 'comments':
131 qs = Comment.objects.all() 252 qs = Comment.objects.all()
132 text_attr = 'comment' 253 text_attr = 'comment'
254 model_name = 'Comment'
133 else: 255 else:
134 qs = Post.objects.all() 256 qs = Post.objects.all()
135 text_attr = 'body' 257 text_attr = 'body'
258 model_name = 'Post'
136 259
137 i, j = options['i'], options['j'] 260 i, j = options['i'], options['j']
138 261
139 if i is not None and i < 0: 262 if i is not None and i < 0:
140 raise CommandError("-i must be >= 0") 263 raise CommandError("-i must be >= 0")
148 elif i is not None and j is None: 271 elif i is not None and j is None:
149 qs = qs[i:] 272 qs = qs[i:]
150 elif i is None and j is not None: 273 elif i is None and j is not None:
151 qs = qs[:j] 274 qs = qs[:j]
152 275
276 # Set global socket timeout
277 socket.setdefaulttimeout(30)
278
153 # Install signal handler for ctrl-c 279 # Install signal handler for ctrl-c
154 signal.signal(signal.SIGINT, signal_handler) 280 signal.signal(signal.SIGINT, signal_handler)
155 281
282 # Create URL opener to download photos
283 global opener
284 opener = ImageURLopener()
285
286 # Create bucket to upload photos
287 global bucket
288 bucket = S3Bucket(access_key=settings.USER_PHOTOS_ACCESS_KEY,
289 secret_key=settings.USER_PHOTOS_SECRET_KEY,
290 base_url=PHOTO_BASE_URL,
291 bucket_name=PHOTO_BUCKET_NAME)
156 s = [] 292 s = []
157 for model in qs.iterator(): 293 for n, model in enumerate(qs.iterator()):
158 if quit_flag: 294 if quit_flag:
159 logger.warning("SIGINT received, exiting") 295 logger.warning("SIGINT received, exiting")
296 break
297 logger.info("Processing %s #%d (pk = %d)", model_name, n + i, model.pk)
160 txt = getattr(model, text_attr) 298 txt = getattr(model, text_attr)
161 new_txt = process_post(txt) 299 new_txt = process_post(txt)
300 if txt != new_txt:
301 logger.debug("content changed")
302 logger.debug("original: %s", txt)
303 logger.debug("changed: %s", new_txt)
162 s.append(new_txt) 304 s.append(new_txt)
163 305
164 import pprint 306 import pprint
165 pprint.pprint(s) 307 pprint.pprint(s)