bgneal@972: """This module contains routines for downloading files."""
bgneal@972: 
bgneal@972: import logging
bgneal@973: import mimetypes
bgneal@972: import os
bgneal@972: import shutil
bgneal@972: import tempfile
bgneal@981: from urlparse import urlparse
bgneal@972: 
bgneal@972: import requests
bgneal@972: 
bgneal@972: 
bgneal@972: logger = logging.getLogger(__name__)
bgneal@972: 
bgneal@972: 
bgneal@980: def download_file(url, path=None, timeout=None):
bgneal@972:     """Downloads the image file from the given source URL and stores it in the
bgneal@972:     filename given by path. If path is None, a temporary file will be created.
bgneal@972: 
bgneal@972:     If successful returns the path to the downloaded file. Otherwise None is
bgneal@972:     returned.
bgneal@972: 
bgneal@972:     This function may raise various exceptions from the requests library.
bgneal@972:     """
bgneal@972:     logger.info("download_file from %s; path=%s", url, path)
bgneal@972: 
bgneal@972:     try:
bgneal@980:         r = requests.get(url, stream=True, timeout=timeout)
bgneal@972:     except requests.RequestException:
bgneal@972:         logger.exception("download_file requests.get('%s') exception", url)
bgneal@972:         raise
bgneal@972: 
bgneal@972:     if r.status_code != 200:
bgneal@972:         logger.error("download_file from %s: error code %d", url, r.status_code)
bgneal@972:         return None
bgneal@972: 
bgneal@972:     # Save file data
bgneal@972: 
bgneal@972:     if not path:
bgneal@973:         content_type = r.headers.get('content-type')
bgneal@973:         suffix = mimetypes.guess_extension(content_type) if content_type else ''
bgneal@981: 
bgneal@981:         # mimetypes currently returns '.jpe' for jpeg; so fix that up here...
bgneal@973:         if suffix == '.jpe':
bgneal@973:             suffix = '.jpg'
bgneal@981:         elif not suffix:
bgneal@981:             # No content-type so guess based on extension if we can
bgneal@981:             p = urlparse(url)
bgneal@981:             suffix = os.path.splitext(p.path)[1]
bgneal@981: 
bgneal@973:         fd, path = tempfile.mkstemp(suffix=suffix)
bgneal@972:         os.close(fd)
bgneal@972: 
bgneal@972:     try:
bgneal@972:         with open(path, 'wb') as fp:
bgneal@972:             r.raw.decode_content = True
bgneal@972:             shutil.copyfileobj(r.raw, fp)
bgneal@972:     except requests.RequestException:
bgneal@972:         logger.exception("download_file download exception")
bgneal@976:         os.remove(path)
bgneal@972:         raise
bgneal@972: 
bgneal@972:     file_size = os.stat(path).st_size
bgneal@972:     logger.info("download_file retrieved %s bytes from %s; saved to %s", file_size, url, path)
bgneal@972:     return path
bgneal@972: 
bgneal@972: 
bgneal@972: if __name__ == '__main__':
bgneal@972:     import sys
bgneal@972:     s = "%(asctime)s : %(levelname)s : %(message)s"
bgneal@972:     logging.basicConfig(level=logging.DEBUG, format=s)
bgneal@972:     logging.info("argument is %s", sys.argv[1])
bgneal@972:     result = download_file(sys.argv[1])
bgneal@972:     if result:
bgneal@972:         print result