From 1a5097b0105520b905dcab857976ab403d736a69 Mon Sep 17 00:00:00 2001 From: Kentaro Wada Date: Tue, 15 Jan 2019 18:30:04 +0000 Subject: [PATCH] Add gdown.cached_download --- gdown/__init__.py | 3 ++ gdown/cached_download.py | 59 ++++++++++++++++++++++++++++++++++++++++ setup.py | 2 +- 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 gdown/cached_download.py diff --git a/gdown/__init__.py b/gdown/__init__.py index f2021605..3bdb96af 100644 --- a/gdown/__init__.py +++ b/gdown/__init__.py @@ -4,6 +4,9 @@ from .download import download +from .cached_download import cached_download +from .cached_download import md5sum + __author__ = 'Kentaro Wada ' __version__ = pkg_resources.get_distribution('gdown').version diff --git a/gdown/cached_download.py b/gdown/cached_download.py new file mode 100644 index 00000000..99002793 --- /dev/null +++ b/gdown/cached_download.py @@ -0,0 +1,59 @@ +import hashlib +import os +import os.path as osp +import shutil +import sys +import tempfile + +import filelock + +from .download import download + + +cache_root = osp.join(osp.expanduser('~'), '.cache/gdown') + + +def md5sum(file): + with open(file, 'rb') as f: + return hashlib.md5(f.read()).hexdigest() + + +def cached_download(url, path=None, md5=None, quiet=False, postprocess=None): + if path is None: + path = url.replace('/', '-SLASH-')\ + .replace(':', '-COLON-')\ + .replace('=', '-EQUAL-')\ + .replace('?', '-QUESTION-') + path = osp.join(cache_root, path) + + # check existence + if osp.exists(path) and not md5: + if not quiet: + print('[%s] File exists.' % path) + return path + elif osp.exists(path) and md5 and md5sum(path) == md5: + return path + + # download + lock_path = osp.join(cache_root, '_dl_lock') + try: + os.makedirs(osp.dirname(path)) + except OSError: + pass + temp_root = tempfile.mkdtemp(dir=cache_root) + try: + temp_path = osp.join(temp_root, 'dl') + download(url, temp_path, quiet=quiet) + with filelock.FileLock(lock_path): + shutil.move(temp_path, path) + if not quiet: + print('Saved to: {}'.format(path), file=sys.stderr) + except Exception: + shutil.rmtree(temp_root) + raise + + # postprocess + if postprocess is not None: + postprocess(path) + + return path diff --git a/setup.py b/setup.py index 4f7f5f94..6ba781ab 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ name='gdown', version=version, packages=find_packages(), - install_requires=['requests', 'six', 'tqdm'], + install_requires=['filelock', 'requests', 'six', 'tqdm'], description='Google Drive direct download of big files.', long_description=long_description, long_description_content_type='text/markdown',