# HG changeset patch # User Brian Neal # Date 1425153166 21600 # Node ID 49ebeb54990adac368b178c46f6b12a947d4d45b # Parent 0054a4a88c1c7e266f593b9210fa27904c42f763 Record if an image could not be retrieved. Added some additional stats at the end. diff -r 0054a4a88c1c -r 49ebeb54990a core/management/commands/ssl_images.py --- a/core/management/commands/ssl_images.py Wed Feb 25 21:09:41 2015 -0600 +++ b/core/management/commands/ssl_images.py Sat Feb 28 13:52:46 2015 -0600 @@ -162,15 +162,18 @@ src = parsed_url.geturl() # Check the cache first - new_url = url_cache.get(src) - if new_url: - logger.info("Found URL in cache: %s => %s", src, new_url) - return new_url + try: + new_url = url_cache[src] + except KeyError: + # cache miss, try to get the file + new_url = save_image_to_cloud(src) + url_cache[src] = new_url + else: + if new_url: + logger.info("Found URL in cache: %s => %s", src, new_url) + else: + logger.info("URL known to be bad, skipping: %s", src) - # Try to download and upload to our S3 bucket - new_url = save_image_to_cloud(src) - if new_url: - url_cache[src] = new_url return new_url @@ -354,3 +357,14 @@ elapsed = time_finished - time_started logger.info("ssl_images exiting; number of objects: %d; elapsed: %s", count, elapsed) + + http_images = len(url_cache) + https_images = sum(1 for v in url_cache.itervalues() if v) + bad_images = http_images - https_images + if http_images > 0: + pct_saved = float(https_images) / http_images * 100.0 + else: + pct_saved = 0.0 + + logger.info("Summary: http: %d; https: %d; lost: %d; saved: %3.1f %%", + http_images, https_images, bad_images, pct_saved)