Skip to content

Commit

Permalink
Merge branch 'gtzan-downloader'
Browse files Browse the repository at this point in the history
  • Loading branch information
ynop committed Oct 5, 2018
2 parents 184378b + 9dea04b commit 14e932c
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 5 deletions.
2 changes: 1 addition & 1 deletion audiomate/corpus/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from .base import CorpusDownloader, CorpusReader, CorpusWriter
from .broadcast import BroadcastReader # noqa: F401
from .default import DefaultReader, DefaultWriter # noqa: F401
from .gtzan import GtzanReader # noqa: F401
from .gtzan import GtzanDownloader, GtzanReader # noqa: F401
from .kaldi import KaldiReader, KaldiWriter # noqa: F401
from .musan import MusanDownloader, MusanReader # noqa: F401
from .speech_commands import SpeechCommandsReader # noqa: F401
Expand Down
35 changes: 35 additions & 0 deletions audiomate/corpus/io/gtzan.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,46 @@

import audiomate
from audiomate.corpus import assets
from audiomate.utils import download
from audiomate.utils import files
from . import base

DOWNLOAD_URL = 'http://opihi.cs.uvic.ca/sound/music_speech.tar.gz'
DIRECTORIES = {'music_wav': 'music', 'speech_wav': 'speech'}


class GtzanDownloader(base.CorpusDownloader):
"""
Downloader for the GTZAN Corpus.
Args:
url (str): The url to download the dataset from. If not given the default URL is used.
It is expected to be a tar.gz file.
"""

def __init__(self, url=None):
if url is None:
self.url = DOWNLOAD_URL
else:
self.url = url

@classmethod
def type(cls):
return 'gtzan'

def _download(self, target_path):
os.makedirs(target_path, exist_ok=True)
tmp_file = os.path.join(target_path, 'tmp_ark.tar.gz')

download.download_file(self.url, tmp_file)
download.extract_tar(tmp_file, target_path)

# We use copy since subfolders in the archive are read-only, hence throws permission error when trying to move.
files.move_all_files_from_subfolders_to_top(target_path, delete_subfolders=True, copy=True)

os.remove(tmp_file)


class GtzanReader(base.CorpusReader):
"""
Reader for the GTZAN music/speech corpus. The corpus consits of 64 music and 64 speech tracks that are each 30
Expand Down
12 changes: 10 additions & 2 deletions audiomate/utils/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
import shutil


def move_all_files_from_subfolders_to_top(folder_path, delete_subfolders=False):
def move_all_files_from_subfolders_to_top(folder_path, delete_subfolders=False, copy=False):
"""
Move all files/folder from all subfolders of `folder_path` on top into `folder_path`.
Args:
folder_path (str): Path of the folder.
delete_subfolders (bool): If True the subfolders are deleted after all items are moved out of it.
copy (bool): If True copies the files instead of moving. (default False)
"""
for item in os.listdir(folder_path):
sub_path = os.path.join(folder_path, item)
Expand All @@ -18,7 +19,14 @@ def move_all_files_from_subfolders_to_top(folder_path, delete_subfolders=False):
for sub_item in os.listdir(sub_path):
src = os.path.join(sub_path, sub_item)
target = os.path.join(folder_path, sub_item)
shutil.move(src, target)

if copy:
if os.path.isfile(src):
shutil.copy(src, target)
else:
shutil.copytree(src, target)
else:
shutil.move(src, target)

if delete_subfolders:
shutil.rmtree(sub_path)
5 changes: 4 additions & 1 deletion docs/notes/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ Next Version
But it only works for the prepared corpus.

* Added function (:func:`audiomate.corpus.utils.label_cleaning.merge_consecutive_labels_with_same_values`)
for merging consecutive labels with the same value.
for merging consecutive labels with the same value

* Added downloader (:class:`audiomate.corpus.io.GtzanDownloader`) for the
`GTZAN Music/Speech <https://marsyasweb.appspot.com/download/data_sets/>`_.

**Fixes**

Expand Down
5 changes: 4 additions & 1 deletion docs/reference/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Implementations
Free-Spoken-Digit-Dataset x x
Folder x
Google Speech Commands x
GTZAN x
GTZAN x x
Kaldi x x
Mozilla DeepSpeech x
MUSAN x x
Expand Down Expand Up @@ -109,6 +109,9 @@ Google Speech Commands

GTZAN
^^^^^
.. autoclass:: GtzanDownloader
:members:

.. autoclass:: GtzanReader
:members:

Expand Down
25 changes: 25 additions & 0 deletions tests/corpus/io/test_gtzan.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import os

import pytest
import requests_mock

from audiomate import corpus
from audiomate.corpus import io
from audiomate.corpus.io import gtzan
from tests import resources


Expand All @@ -17,6 +19,29 @@ def data_path():
return resources.sample_corpus_path('gtzan')


@pytest.fixture()
def tar_data():
with open(resources.get_resource_path(['sample_files', 'cv_corpus_v1.tar.gz']), 'rb') as f:
return f.read()


class TestGtzanDownloader:

def test_download(self, tar_data, tmpdir):
target_folder = tmpdir.strpath
downloader = io.GtzanDownloader()

with requests_mock.Mocker() as mock:
mock.get(gtzan.DOWNLOAD_URL, content=tar_data)

downloader.download(target_folder)

assert os.path.isfile(os.path.join(target_folder, 'cv-valid-dev.csv'))
assert os.path.isdir(os.path.join(target_folder, 'cv-valid-dev'))
assert os.path.isfile(os.path.join(target_folder, 'cv-valid-train.csv'))
assert os.path.isdir(os.path.join(target_folder, 'cv-valid-train'))


class TestGtzanReader:

def test_load_files(self, reader, data_path):
Expand Down

0 comments on commit 14e932c

Please sign in to comment.