Mercurial > public > sg101
comparison core/management/commands/ssl_images.py @ 888:deef1536a54a
See if file available at https: by doing HEAD request.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Mon, 09 Feb 2015 20:25:13 -0600 |
parents | 9a15f7c27526 |
children | ae146e30d588 |
comparison
equal
deleted
inserted
replaced
887:9a15f7c27526 | 888:deef1536a54a |
---|---|
5 /something. | 5 /something. |
6 - Non SG101 images that use http: are downloaded, resized, and uploaded to | 6 - Non SG101 images that use http: are downloaded, resized, and uploaded to |
7 an S3 bucket. The src attribute is replaced with the new S3 URL. | 7 an S3 bucket. The src attribute is replaced with the new S3 URL. |
8 """ | 8 """ |
9 import base64 | 9 import base64 |
10 import httplib | |
10 import logging | 11 import logging |
11 from optparse import make_option | 12 from optparse import make_option |
12 import os.path | 13 import os |
13 import re | 14 import re |
14 import signal | 15 import signal |
15 import socket | 16 import socket |
16 import urllib | 17 import urllib |
17 import urlparse | 18 import urlparse |
105 logger.error("http error: %d - %s", args[1], args[2]) | 106 logger.error("http error: %d - %s", args[1], args[2]) |
106 else: | 107 else: |
107 logger.error("%s", ex) | 108 logger.error("%s", ex) |
108 return None | 109 return None |
109 | 110 |
110 # TODO: This code below is not right. content-length is optional and will | |
111 # not appear when using chunked encoding, for example. Remove this check. If | |
112 # we want to log the size of the file, use stat() on it or something. | |
113 # | |
114 # If there is an error or timeout, sometimes there is no content-length | |
115 # header. | |
116 content_length = hdrs.get('content-length') | |
117 if not content_length: | |
118 logger.error("Bad content-length: %s", content_length) | |
119 return None | |
120 | |
121 # Does it look like an image? | 111 # Does it look like an image? |
122 content_type = hdrs.get('content-type') | 112 content_type = hdrs.get('content-type') |
123 if not content_type: | 113 if not content_type: |
124 logger.error("No content-type header found") | 114 logger.error("No content-type header found") |
125 return None | 115 return None |
126 | 116 |
127 logger.info("Retrieved: %s bytes; content-type: %s", content_length, | 117 file_size = os.stat(fn).st_size |
128 content_type) | 118 logger.info("Retrieved: %s bytes; content-type: %s", file_size, content_type) |
129 | 119 |
130 parts = content_type.split('/') | 120 parts = content_type.split('/') |
131 if len(parts) < 2 or parts[0] != 'image': | 121 if len(parts) < 2 or parts[0] != 'image': |
132 logger.error("Unknown content-type: %s", content_type) | 122 logger.error("Unknown content-type: %s", content_type) |
133 return None | 123 return None |
163 except IOError as ex: | 153 except IOError as ex: |
164 logger.error("Error uploading file: %s", ex) | 154 logger.error("Error uploading file: %s", ex) |
165 return None | 155 return None |
166 | 156 |
167 | 157 |
168 def save_image_to_cloud(src): | 158 def convert_to_ssl(parsed_url): |
169 """Downloads an image at a given source URL. Uploads it to cloud storage. | 159 """Top-level function for moving an image to SSL.""" |
170 | 160 |
171 Returns the new URL or None if unsuccessful. | 161 src = parsed_url.geturl() |
172 """ | 162 |
173 # Check the cache first | 163 # Check the cache first |
174 new_url = url_cache.get(src) | 164 new_url = url_cache.get(src) |
175 if new_url: | 165 if new_url: |
166 logger.info("Found URL in cache: %s => %s", src, new_url) | |
176 return new_url | 167 return new_url |
177 | 168 |
169 # It has been observed that at least 2 different services | |
170 # serve up the same image on https: with the URL otherwise the same. | |
171 # Check to see if the image is available via https first. | |
172 new_url = check_https_availability(parsed_url) | |
173 if new_url: | |
174 return new_url | |
175 | |
176 # If none of the above worked, try to download and upload to our S3 bucket | |
177 return save_image_to_cloud(src) | |
178 | |
179 | |
180 def check_https_availability(parsed_url): | |
181 """Given a urlparse.urlparse() result, perform a HEAD request over https | |
182 using the same net location and path. If we get a response that indicates an | |
183 image is available, return the url of the image over https. Otherwise return | |
184 None. | |
185 """ | |
186 logger.info("Checking https availability for %s", parsed_url.geturl()) | |
187 con = httplib.HTTPSConnection(parsed_url.netloc) | |
188 try: | |
189 con.request('HEAD', parsed_url.path) | |
190 except (httplib.HTTPException, socket.timeout) as ex: | |
191 logger.info("https HEAD request failed: %s", ex) | |
192 return None | |
193 | |
194 content_type = None | |
195 response = con.getresponse() | |
196 if response.status == 200: | |
197 content_type = response.getheader('content-type') | |
198 if content_type: | |
199 parts = content_type.split('/') | |
200 if len(parts) >= 2 and parts[0] == 'image': | |
201 url = urlparse.urlunparse(('https', ) + parsed_url[1:]) | |
202 logger.info("Image is available at %s", url) | |
203 return url | |
204 | |
205 logger.info('https HEAD request failed; status = %d, content-type = %s', | |
206 response.status, content_type) | |
207 return None | |
208 | |
209 | |
210 def save_image_to_cloud(src): | |
211 """Downloads an image at a given source URL. Uploads it to cloud storage. | |
212 | |
213 Returns the new URL or None if unsuccessful. | |
214 """ | |
178 fn = download_image(src) | 215 fn = download_image(src) |
179 if fn: | 216 if fn: |
180 resize_image(fn) | 217 resize_image(fn) |
181 new_url = upload_image(fn) | 218 new_url = upload_image(fn) |
182 if new_url: | 219 if new_url: |
203 if src: | 240 if src: |
204 r = urlparse.urlparse(src) | 241 r = urlparse.urlparse(src) |
205 if r.hostname in SG101_HOSTS: | 242 if r.hostname in SG101_HOSTS: |
206 new_src = r.path # convert to relative path | 243 new_src = r.path # convert to relative path |
207 elif r.scheme == 'http': | 244 elif r.scheme == 'http': |
208 # TODO: it has been observed that at least 2 different services | 245 # Try a few things to get this on ssl: |
209 # serve up the same image on https: with the URL otherwise the same. | 246 new_src = convert_to_ssl(r) |
210 # Add code to see if the image is available at https (maybe do | |
211 # a HEAD request?) and if so just change the protocol to https in | |
212 # the original URL. | |
213 new_src = save_image_to_cloud(src) | |
214 elif r.scheme == 'https': | 247 elif r.scheme == 'https': |
215 new_src = src # already https, accept it as-is | 248 new_src = src # already https, accept it as-is |
216 | 249 |
217 if new_src: | 250 if new_src: |
218 if title: | 251 if title: |