Skip to content

Commit

Permalink
Added file existence check and option for force redownload to CorpusD…
Browse files Browse the repository at this point in the history
…ownloader - GH #125
  • Loading branch information
cdkini authored and ynop committed Jul 5, 2020
1 parent 7c29c23 commit 143c21d
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 1 deletion.
14 changes: 13 additions & 1 deletion audiomate/corpus/io/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import abc
import os
import shutil

from audiomate.utils import jsonfile

Expand All @@ -17,14 +18,25 @@ class CorpusDownloader(metaclass=abc.ABCMeta):
implementation.
"""

def download(self, target_path):
def download(self, target_path, force_redownload=False):
"""
Downloads the data of the corpus and saves it to the given path.
The data has to be saved in a way, so that the corresponding ``CorpusReader`` can load the corpus.
Args:
target_path (str): The path to save the data to.
force_redownload (bool, optional): If ``True``, overwrite the target path and redownload the corpus.
Raises:
IOError: When the corpus has already been downloaded to the target path.
Overridden if `force_redownload` is set to ``True``.
"""
if os.path.exists(target_path) and len(os.listdir(target_path)) > 0:

if not force_redownload:
raise IOError('Corpus already downloaded at {}.'.format(target_path))
shutil.rmtree(target_path)

return self._download(target_path)

@classmethod
Expand Down
42 changes: 42 additions & 0 deletions tests/corpus/io/test_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os

from audiomate.corpus.io import base

import pytest


def create_mock_corpus_downloader():

class MockCorpusDownloader(base.CorpusDownloader):

@classmethod
def type(cls):
return 'mock'

def _download(self, target_path):
os.makedirs(os.path.join(target_path, 'subfolder', 'a.txt'))

return MockCorpusDownloader()


class TestCorpusDownloader:

def test_force_redownload_overwrites_existing_directory(self, tmpdir):
target_folder = tmpdir.strpath
corpus_dl = create_mock_corpus_downloader()

tmpdir.mkdir('subfolder').join('b.txt')
corpus_dl.download(target_folder, force_redownload=True)

assert len(os.listdir(tmpdir)) == 1
assert os.path.exists(os.path.join(target_folder, 'subfolder', 'a.txt'))
assert not os.path.exists(os.path.join(target_folder, 'subfolder', 'b.txt'))

def test_existing_directory_forces_io_error(self, tmpdir):
target_folder = tmpdir.strpath
corpus_dl = create_mock_corpus_downloader()

tmpdir.mkdir('subfolder').join('a.txt')

with pytest.raises(IOError):
corpus_dl.download(target_folder, force_redownload=False)

0 comments on commit 143c21d

Please sign in to comment.