bgneal@972
|
1 """This module contains routines for downloading files."""
|
bgneal@972
|
2
|
bgneal@972
|
3 import logging
|
bgneal@973
|
4 import mimetypes
|
bgneal@972
|
5 import os
|
bgneal@972
|
6 import shutil
|
bgneal@972
|
7 import tempfile
|
bgneal@972
|
8
|
bgneal@972
|
9 import requests
|
bgneal@972
|
10
|
bgneal@972
|
11
|
bgneal@972
|
12 logger = logging.getLogger(__name__)
|
bgneal@972
|
13
|
bgneal@972
|
14
|
bgneal@972
|
15 def download_file(url, path=None):
|
bgneal@972
|
16 """Downloads the image file from the given source URL and stores it in the
|
bgneal@972
|
17 filename given by path. If path is None, a temporary file will be created.
|
bgneal@972
|
18
|
bgneal@972
|
19 If successful returns the path to the downloaded file. Otherwise None is
|
bgneal@972
|
20 returned.
|
bgneal@972
|
21
|
bgneal@972
|
22 This function may raise various exceptions from the requests library.
|
bgneal@972
|
23 """
|
bgneal@972
|
24 logger.info("download_file from %s; path=%s", url, path)
|
bgneal@972
|
25
|
bgneal@972
|
26 try:
|
bgneal@972
|
27 r = requests.get(url, stream=True)
|
bgneal@972
|
28 except requests.RequestException:
|
bgneal@972
|
29 logger.exception("download_file requests.get('%s') exception", url)
|
bgneal@972
|
30 raise
|
bgneal@972
|
31
|
bgneal@972
|
32 if r.status_code != 200:
|
bgneal@972
|
33 logger.error("download_file from %s: error code %d", url, r.status_code)
|
bgneal@972
|
34 return None
|
bgneal@972
|
35
|
bgneal@972
|
36 # Save file data
|
bgneal@972
|
37
|
bgneal@972
|
38 if not path:
|
bgneal@973
|
39 content_type = r.headers.get('content-type')
|
bgneal@973
|
40 suffix = mimetypes.guess_extension(content_type) if content_type else ''
|
bgneal@973
|
41 if suffix == '.jpe':
|
bgneal@973
|
42 suffix = '.jpg'
|
bgneal@979
|
43 elif suffix is None:
|
bgneal@979
|
44 suffix = ''
|
bgneal@973
|
45 fd, path = tempfile.mkstemp(suffix=suffix)
|
bgneal@972
|
46 os.close(fd)
|
bgneal@972
|
47
|
bgneal@972
|
48 try:
|
bgneal@972
|
49 with open(path, 'wb') as fp:
|
bgneal@972
|
50 r.raw.decode_content = True
|
bgneal@972
|
51 shutil.copyfileobj(r.raw, fp)
|
bgneal@972
|
52 except requests.RequestException:
|
bgneal@972
|
53 logger.exception("download_file download exception")
|
bgneal@976
|
54 os.remove(path)
|
bgneal@972
|
55 raise
|
bgneal@972
|
56
|
bgneal@972
|
57 file_size = os.stat(path).st_size
|
bgneal@972
|
58 logger.info("download_file retrieved %s bytes from %s; saved to %s", file_size, url, path)
|
bgneal@972
|
59 return path
|
bgneal@972
|
60
|
bgneal@972
|
61
|
bgneal@972
|
62 if __name__ == '__main__':
|
bgneal@972
|
63 import sys
|
bgneal@972
|
64 s = "%(asctime)s : %(levelname)s : %(message)s"
|
bgneal@972
|
65 logging.basicConfig(level=logging.DEBUG, format=s)
|
bgneal@972
|
66 logging.info("argument is %s", sys.argv[1])
|
bgneal@972
|
67 result = download_file(sys.argv[1])
|
bgneal@972
|
68 if result:
|
bgneal@972
|
69 print result
|