annotate core/download.py @ 1201:fe10aea76cbd

Add 2023 MP3 compilation links
author Brian Neal <bgneal@gmail.com>
date Sun, 24 Mar 2024 14:50:23 -0500
parents ef1558941bc9
children
rev   line source
bgneal@972 1 """This module contains routines for downloading files."""
bgneal@972 2
bgneal@972 3 import logging
bgneal@973 4 import mimetypes
bgneal@972 5 import os
bgneal@972 6 import shutil
bgneal@972 7 import tempfile
bgneal@981 8 from urlparse import urlparse
bgneal@972 9
bgneal@972 10 import requests
bgneal@972 11
bgneal@972 12
bgneal@972 13 logger = logging.getLogger(__name__)
bgneal@972 14
bgneal@972 15
bgneal@980 16 def download_file(url, path=None, timeout=None):
bgneal@972 17 """Downloads the image file from the given source URL and stores it in the
bgneal@972 18 filename given by path. If path is None, a temporary file will be created.
bgneal@972 19
bgneal@972 20 If successful returns the path to the downloaded file. Otherwise None is
bgneal@972 21 returned.
bgneal@972 22
bgneal@972 23 This function may raise various exceptions from the requests library.
bgneal@972 24 """
bgneal@972 25 logger.info("download_file from %s; path=%s", url, path)
bgneal@972 26
bgneal@972 27 try:
bgneal@980 28 r = requests.get(url, stream=True, timeout=timeout)
bgneal@972 29 except requests.RequestException:
bgneal@972 30 logger.exception("download_file requests.get('%s') exception", url)
bgneal@972 31 raise
bgneal@972 32
bgneal@972 33 if r.status_code != 200:
bgneal@972 34 logger.error("download_file from %s: error code %d", url, r.status_code)
bgneal@972 35 return None
bgneal@972 36
bgneal@972 37 # Save file data
bgneal@972 38
bgneal@972 39 if not path:
bgneal@973 40 content_type = r.headers.get('content-type')
bgneal@973 41 suffix = mimetypes.guess_extension(content_type) if content_type else ''
bgneal@981 42
bgneal@981 43 # mimetypes currently returns '.jpe' for jpeg; so fix that up here...
bgneal@973 44 if suffix == '.jpe':
bgneal@973 45 suffix = '.jpg'
bgneal@981 46 elif not suffix:
bgneal@981 47 # No content-type so guess based on extension if we can
bgneal@981 48 p = urlparse(url)
bgneal@981 49 suffix = os.path.splitext(p.path)[1]
bgneal@981 50
bgneal@973 51 fd, path = tempfile.mkstemp(suffix=suffix)
bgneal@972 52 os.close(fd)
bgneal@972 53
bgneal@972 54 try:
bgneal@972 55 with open(path, 'wb') as fp:
bgneal@972 56 r.raw.decode_content = True
bgneal@972 57 shutil.copyfileobj(r.raw, fp)
bgneal@972 58 except requests.RequestException:
bgneal@972 59 logger.exception("download_file download exception")
bgneal@976 60 os.remove(path)
bgneal@972 61 raise
bgneal@972 62
bgneal@972 63 file_size = os.stat(path).st_size
bgneal@972 64 logger.info("download_file retrieved %s bytes from %s; saved to %s", file_size, url, path)
bgneal@972 65 return path
bgneal@972 66
bgneal@972 67
bgneal@972 68 if __name__ == '__main__':
bgneal@972 69 import sys
bgneal@972 70 s = "%(asctime)s : %(levelname)s : %(message)s"
bgneal@972 71 logging.basicConfig(level=logging.DEBUG, format=s)
bgneal@972 72 logging.info("argument is %s", sys.argv[1])
bgneal@972 73 result = download_file(sys.argv[1])
bgneal@972 74 if result:
bgneal@972 75 print result