comparison core/management/commands/ssl_images.py @ 896:0054a4a88c1c

Remove checking for https availability. This seems to take quite a while, plus Python doesn't validate the cert, so we could end up with dodgy sites.
author Brian Neal <bgneal@gmail.com>
date Wed, 25 Feb 2015 21:09:41 -0600
parents e7c549e4dbf7
children 49ebeb54990a
comparison
equal deleted inserted replaced
895:e7c549e4dbf7 896:0054a4a88c1c
6 - Non SG101 images that use http: are downloaded, resized, and uploaded to 6 - Non SG101 images that use http: are downloaded, resized, and uploaded to
7 an S3 bucket. The src attribute is replaced with the new S3 URL. 7 an S3 bucket. The src attribute is replaced with the new S3 URL.
8 """ 8 """
9 import base64 9 import base64
10 import datetime 10 import datetime
11 import httplib
12 import logging 11 import logging
13 from optparse import make_option 12 from optparse import make_option
14 import os 13 import os
15 import re 14 import re
16 import signal 15 import signal
166 new_url = url_cache.get(src) 165 new_url = url_cache.get(src)
167 if new_url: 166 if new_url:
168 logger.info("Found URL in cache: %s => %s", src, new_url) 167 logger.info("Found URL in cache: %s => %s", src, new_url)
169 return new_url 168 return new_url
170 169
171 # It has been observed that at least 2 different services 170 # Try to download and upload to our S3 bucket
172 # serve up the same image on https: with the URL otherwise the same.
173 # Check to see if the image is available via https first.
174 new_url = check_https_availability(parsed_url)
175 if new_url:
176 url_cache[src] = new_url
177 return new_url
178
179 # If none of the above worked, try to download and upload to our S3 bucket
180 new_url = save_image_to_cloud(src) 171 new_url = save_image_to_cloud(src)
181 if new_url: 172 if new_url:
182 url_cache[src] = new_url 173 url_cache[src] = new_url
183 return new_url 174 return new_url
184
185
186 def check_https_availability(parsed_url):
187 """Given a urlparse.urlparse() result, perform a HEAD request over https
188 using the same net location and path. If we get a response that indicates an
189 image is available, return the url of the image over https. Otherwise return
190 None.
191 """
192 logger.info("Checking https availability for %s", parsed_url.geturl())
193 con = httplib.HTTPSConnection(parsed_url.netloc)
194 try:
195 con.request('HEAD', parsed_url.path)
196 except (httplib.HTTPException, socket.timeout) as ex:
197 logger.info("https HEAD request failed: %s", ex)
198 return None
199
200 content_type = None
201 response = con.getresponse()
202 if response.status == 200:
203 content_type = response.getheader('content-type')
204 if content_type:
205 parts = content_type.split('/')
206 if len(parts) >= 2 and parts[0] == 'image':
207 url = urlparse.urlunparse(('https', ) + parsed_url[1:])
208 logger.info("Image is available at %s", url)
209 return url
210
211 logger.info('https HEAD request failed; status = %d, content-type = %s',
212 response.status, content_type)
213 return None
214 175
215 176
216 def save_image_to_cloud(src): 177 def save_image_to_cloud(src):
217 """Downloads an image at a given source URL. Uploads it to cloud storage. 178 """Downloads an image at a given source URL. Uploads it to cloud storage.
218 179