Mercurial > public > sg101
comparison core/management/commands/ssl_images.py @ 887:9a15f7c27526
Actually save model object upon change.
This commit was tested on the comments model.
Additional logging added.
Added check for Markdown image references.
Added TODOs after observing behavior on comments.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Tue, 03 Feb 2015 21:09:44 -0600 |
parents | 9a3019f2c7dc |
children | deef1536a54a |
comparison
equal
deleted
inserted
replaced
886:3d635fd53ef0 | 887:9a15f7c27526 |
---|---|
105 logger.error("http error: %d - %s", args[1], args[2]) | 105 logger.error("http error: %d - %s", args[1], args[2]) |
106 else: | 106 else: |
107 logger.error("%s", ex) | 107 logger.error("%s", ex) |
108 return None | 108 return None |
109 | 109 |
110 # TODO: This code below is not right. content-length is optional and will | |
111 # not appear when using chunked encoding, for example. Remove this check. If | |
112 # we want to log the size of the file, use stat() on it or something. | |
113 # | |
110 # If there is an error or timeout, sometimes there is no content-length | 114 # If there is an error or timeout, sometimes there is no content-length |
111 # header. | 115 # header. |
112 content_length = hdrs.get('content-length') | 116 content_length = hdrs.get('content-length') |
113 if not content_length: | 117 if not content_length: |
114 logger.error("Bad content-length: %s", content_length) | 118 logger.error("Bad content-length: %s", content_length) |
199 if src: | 203 if src: |
200 r = urlparse.urlparse(src) | 204 r = urlparse.urlparse(src) |
201 if r.hostname in SG101_HOSTS: | 205 if r.hostname in SG101_HOSTS: |
202 new_src = r.path # convert to relative path | 206 new_src = r.path # convert to relative path |
203 elif r.scheme == 'http': | 207 elif r.scheme == 'http': |
208 # TODO: it has been observed that at least 2 different services | |
209 # serve up the same image on https: with the URL otherwise the same. | |
210 # Add code to see if the image is available at https (maybe do | |
211 # a HEAD request?) and if so just change the protocol to https in | |
212 # the original URL. | |
204 new_src = save_image_to_cloud(src) | 213 new_src = save_image_to_cloud(src) |
205 elif r.scheme == 'https': | 214 elif r.scheme == 'https': |
206 new_src = src # already https, accept it as-is | 215 new_src = src # already https, accept it as-is |
207 | 216 |
208 if new_src: | 217 if new_src: |
215 s = u'[{alt}]({src})'.format(alt=alt, src=src) | 224 s = u'[{alt}]({src})'.format(alt=alt, src=src) |
216 | 225 |
217 return s | 226 return s |
218 | 227 |
219 | 228 |
229 def warn_if_image_refs(text, model_name, pk): | |
230 """Search text for Markdown image reference markup. | |
231 | |
232 We aren't expecting these, but we will log something if we see any. | |
233 """ | |
234 if IMAGE_REF_RE.search(text): | |
235 logger.warning("Image reference found in %s pk = #%d", model_name, pk) | |
236 | |
237 | |
220 def process_post(text): | 238 def process_post(text): |
221 """Process the post object: | 239 """Process the post object: |
222 | 240 |
223 A regex substitution is run on the post's text field. This fixes up image | 241 A regex substitution is run on the post's text field. This fixes up image |
224 links, getting rid of plain old http sources; either converting to https | 242 links, getting rid of plain old http sources; either converting to https |
225 or relative style links (if the link is to SG101). | 243 or relative style links (if the link is to SG101). |
226 | |
227 We also do a search for Markdown image reference markup. We aren't expecting | |
228 these, but we will log something if we see any. | |
229 | 244 |
230 """ | 245 """ |
231 return IMAGE_LINK_RE.sub(replace_image_markup, text) | 246 return IMAGE_LINK_RE.sub(replace_image_markup, text) |
232 | 247 |
233 | 248 |
292 global bucket | 307 global bucket |
293 bucket = S3Bucket(access_key=settings.USER_PHOTOS_ACCESS_KEY, | 308 bucket = S3Bucket(access_key=settings.USER_PHOTOS_ACCESS_KEY, |
294 secret_key=settings.USER_PHOTOS_SECRET_KEY, | 309 secret_key=settings.USER_PHOTOS_SECRET_KEY, |
295 base_url=PHOTO_BASE_URL, | 310 base_url=PHOTO_BASE_URL, |
296 bucket_name=PHOTO_BUCKET_NAME) | 311 bucket_name=PHOTO_BUCKET_NAME) |
297 s = [] | 312 |
313 if i is None: | |
314 i = 0 | |
315 | |
298 for n, model in enumerate(qs.iterator()): | 316 for n, model in enumerate(qs.iterator()): |
299 if quit_flag: | 317 if quit_flag: |
300 logger.warning("SIGINT received, exiting") | 318 logger.warning("SIGINT received, exiting") |
301 break | 319 break |
302 logger.info("Processing %s #%d (pk = %d)", model_name, n + i, model.pk) | 320 logger.info("Processing %s #%d (pk = %d)", model_name, n + i, model.pk) |
303 txt = getattr(model, text_attr) | 321 txt = getattr(model, text_attr) |
322 warn_if_image_refs(txt, model_name, model.pk) | |
304 new_txt = process_post(txt) | 323 new_txt = process_post(txt) |
305 if txt != new_txt: | 324 if txt != new_txt: |
306 logger.debug("content changed") | 325 logger.info("Content changed on %s #%d (pk= %d)", |
326 model_name, n + i, model.pk) | |
307 logger.debug("original: %s", txt) | 327 logger.debug("original: %s", txt) |
308 logger.debug("changed: %s", new_txt) | 328 logger.debug("changed: %s", new_txt) |
309 s.append(new_txt) | 329 setattr(model, text_attr, new_txt) |
310 | 330 model.save() |
311 import pprint | 331 |
312 pprint.pprint(s) | 332 logger.info("ssl_images exiting") |