bgneal@972: """This module contains routines for downloading files.""" bgneal@972: bgneal@972: import logging bgneal@973: import mimetypes bgneal@972: import os bgneal@972: import shutil bgneal@972: import tempfile bgneal@972: bgneal@972: import requests bgneal@972: bgneal@972: bgneal@972: logger = logging.getLogger(__name__) bgneal@972: bgneal@972: bgneal@972: def download_file(url, path=None): bgneal@972: """Downloads the image file from the given source URL and stores it in the bgneal@972: filename given by path. If path is None, a temporary file will be created. bgneal@972: bgneal@972: If successful returns the path to the downloaded file. Otherwise None is bgneal@972: returned. bgneal@972: bgneal@972: This function may raise various exceptions from the requests library. bgneal@972: """ bgneal@972: logger.info("download_file from %s; path=%s", url, path) bgneal@972: bgneal@972: try: bgneal@972: r = requests.get(url, stream=True) bgneal@972: except requests.RequestException: bgneal@972: logger.exception("download_file requests.get('%s') exception", url) bgneal@972: raise bgneal@972: bgneal@972: if r.status_code != 200: bgneal@972: logger.error("download_file from %s: error code %d", url, r.status_code) bgneal@972: return None bgneal@972: bgneal@972: # Save file data bgneal@972: bgneal@972: if not path: bgneal@973: content_type = r.headers.get('content-type') bgneal@973: suffix = mimetypes.guess_extension(content_type) if content_type else '' bgneal@973: if suffix == '.jpe': bgneal@973: suffix = '.jpg' bgneal@973: fd, path = tempfile.mkstemp(suffix=suffix) bgneal@972: os.close(fd) bgneal@972: bgneal@972: try: bgneal@972: with open(path, 'wb') as fp: bgneal@972: r.raw.decode_content = True bgneal@972: shutil.copyfileobj(r.raw, fp) bgneal@972: except requests.RequestException: bgneal@972: logger.exception("download_file download exception") bgneal@976: os.remove(path) bgneal@972: raise bgneal@972: bgneal@972: file_size = os.stat(path).st_size bgneal@972: logger.info("download_file retrieved %s bytes from %s; saved to %s", file_size, url, path) bgneal@972: return path bgneal@972: bgneal@972: bgneal@972: if __name__ == '__main__': bgneal@972: import sys bgneal@972: s = "%(asctime)s : %(levelname)s : %(message)s" bgneal@972: logging.basicConfig(level=logging.DEBUG, format=s) bgneal@972: logging.info("argument is %s", sys.argv[1]) bgneal@972: result = download_file(sys.argv[1]) bgneal@972: if result: bgneal@972: print result