bgneal@972
|
1 """This module contains routines for downloading files."""
|
bgneal@972
|
2
|
bgneal@972
|
3 import logging
|
bgneal@973
|
4 import mimetypes
|
bgneal@972
|
5 import os
|
bgneal@972
|
6 import shutil
|
bgneal@972
|
7 import tempfile
|
bgneal@981
|
8 from urlparse import urlparse
|
bgneal@972
|
9
|
bgneal@972
|
10 import requests
|
bgneal@972
|
11
|
bgneal@972
|
12
|
bgneal@972
|
13 logger = logging.getLogger(__name__)
|
bgneal@972
|
14
|
bgneal@972
|
15
|
bgneal@980
|
16 def download_file(url, path=None, timeout=None):
|
bgneal@972
|
17 """Downloads the image file from the given source URL and stores it in the
|
bgneal@972
|
18 filename given by path. If path is None, a temporary file will be created.
|
bgneal@972
|
19
|
bgneal@972
|
20 If successful returns the path to the downloaded file. Otherwise None is
|
bgneal@972
|
21 returned.
|
bgneal@972
|
22
|
bgneal@972
|
23 This function may raise various exceptions from the requests library.
|
bgneal@972
|
24 """
|
bgneal@972
|
25 logger.info("download_file from %s; path=%s", url, path)
|
bgneal@972
|
26
|
bgneal@972
|
27 try:
|
bgneal@980
|
28 r = requests.get(url, stream=True, timeout=timeout)
|
bgneal@972
|
29 except requests.RequestException:
|
bgneal@972
|
30 logger.exception("download_file requests.get('%s') exception", url)
|
bgneal@972
|
31 raise
|
bgneal@972
|
32
|
bgneal@972
|
33 if r.status_code != 200:
|
bgneal@972
|
34 logger.error("download_file from %s: error code %d", url, r.status_code)
|
bgneal@972
|
35 return None
|
bgneal@972
|
36
|
bgneal@972
|
37 # Save file data
|
bgneal@972
|
38
|
bgneal@972
|
39 if not path:
|
bgneal@973
|
40 content_type = r.headers.get('content-type')
|
bgneal@973
|
41 suffix = mimetypes.guess_extension(content_type) if content_type else ''
|
bgneal@981
|
42
|
bgneal@981
|
43 # mimetypes currently returns '.jpe' for jpeg; so fix that up here...
|
bgneal@973
|
44 if suffix == '.jpe':
|
bgneal@973
|
45 suffix = '.jpg'
|
bgneal@981
|
46 elif not suffix:
|
bgneal@981
|
47 # No content-type so guess based on extension if we can
|
bgneal@981
|
48 p = urlparse(url)
|
bgneal@981
|
49 suffix = os.path.splitext(p.path)[1]
|
bgneal@981
|
50
|
bgneal@973
|
51 fd, path = tempfile.mkstemp(suffix=suffix)
|
bgneal@972
|
52 os.close(fd)
|
bgneal@972
|
53
|
bgneal@972
|
54 try:
|
bgneal@972
|
55 with open(path, 'wb') as fp:
|
bgneal@972
|
56 r.raw.decode_content = True
|
bgneal@972
|
57 shutil.copyfileobj(r.raw, fp)
|
bgneal@972
|
58 except requests.RequestException:
|
bgneal@972
|
59 logger.exception("download_file download exception")
|
bgneal@976
|
60 os.remove(path)
|
bgneal@972
|
61 raise
|
bgneal@972
|
62
|
bgneal@972
|
63 file_size = os.stat(path).st_size
|
bgneal@972
|
64 logger.info("download_file retrieved %s bytes from %s; saved to %s", file_size, url, path)
|
bgneal@972
|
65 return path
|
bgneal@972
|
66
|
bgneal@972
|
67
|
bgneal@972
|
68 if __name__ == '__main__':
|
bgneal@972
|
69 import sys
|
bgneal@972
|
70 s = "%(asctime)s : %(levelname)s : %(message)s"
|
bgneal@972
|
71 logging.basicConfig(level=logging.DEBUG, format=s)
|
bgneal@972
|
72 logging.info("argument is %s", sys.argv[1])
|
bgneal@972
|
73 result = download_file(sys.argv[1])
|
bgneal@972
|
74 if result:
|
bgneal@972
|
75 print result
|