changeset 897:49ebeb54990a

Record if an image could not be retrieved. Added some additional stats at the end.
author Brian Neal <bgneal@gmail.com>
date Sat, 28 Feb 2015 13:52:46 -0600 (2015-02-28)
parents 0054a4a88c1c
children 8fcd278d8987
files core/management/commands/ssl_images.py
diffstat 1 files changed, 22 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/core/management/commands/ssl_images.py	Wed Feb 25 21:09:41 2015 -0600
+++ b/core/management/commands/ssl_images.py	Sat Feb 28 13:52:46 2015 -0600
@@ -162,15 +162,18 @@
     src = parsed_url.geturl()
 
     # Check the cache first
-    new_url = url_cache.get(src)
-    if new_url:
-        logger.info("Found URL in cache: %s => %s", src, new_url)
-        return new_url
+    try:
+        new_url = url_cache[src]
+    except KeyError:
+        # cache miss, try to get the file
+        new_url = save_image_to_cloud(src)
+        url_cache[src] = new_url
+    else:
+        if new_url:
+            logger.info("Found URL in cache: %s => %s", src, new_url)
+        else:
+            logger.info("URL known to be bad, skipping: %s", src)
 
-    # Try to download and upload to our S3 bucket
-    new_url = save_image_to_cloud(src)
-    if new_url:
-        url_cache[src] = new_url
     return new_url
 
 
@@ -354,3 +357,14 @@
         elapsed = time_finished - time_started
         logger.info("ssl_images exiting; number of objects: %d; elapsed: %s",
                     count, elapsed)
+
+        http_images = len(url_cache)
+        https_images = sum(1 for v in url_cache.itervalues() if v)
+        bad_images = http_images - https_images
+        if http_images > 0:
+            pct_saved = float(https_images) / http_images * 100.0
+        else:
+            pct_saved = 0.0
+
+        logger.info("Summary: http: %d; https: %d; lost: %d; saved: %3.1f %%",
+                    http_images, https_images, bad_images, pct_saved)