Mercurial > public > sg101
changeset 897:49ebeb54990a
Record if an image could not be retrieved.
Added some additional stats at the end.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Sat, 28 Feb 2015 13:52:46 -0600 |
parents | 0054a4a88c1c |
children | 8fcd278d8987 |
files | core/management/commands/ssl_images.py |
diffstat | 1 files changed, 22 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/core/management/commands/ssl_images.py Wed Feb 25 21:09:41 2015 -0600 +++ b/core/management/commands/ssl_images.py Sat Feb 28 13:52:46 2015 -0600 @@ -162,15 +162,18 @@ src = parsed_url.geturl() # Check the cache first - new_url = url_cache.get(src) - if new_url: - logger.info("Found URL in cache: %s => %s", src, new_url) - return new_url + try: + new_url = url_cache[src] + except KeyError: + # cache miss, try to get the file + new_url = save_image_to_cloud(src) + url_cache[src] = new_url + else: + if new_url: + logger.info("Found URL in cache: %s => %s", src, new_url) + else: + logger.info("URL known to be bad, skipping: %s", src) - # Try to download and upload to our S3 bucket - new_url = save_image_to_cloud(src) - if new_url: - url_cache[src] = new_url return new_url @@ -354,3 +357,14 @@ elapsed = time_finished - time_started logger.info("ssl_images exiting; number of objects: %d; elapsed: %s", count, elapsed) + + http_images = len(url_cache) + https_images = sum(1 for v in url_cache.itervalues() if v) + bad_images = http_images - https_images + if http_images > 0: + pct_saved = float(https_images) / http_images * 100.0 + else: + pct_saved = 0.0 + + logger.info("Summary: http: %d; https: %d; lost: %d; saved: %3.1f %%", + http_images, https_images, bad_images, pct_saved)