Goal:
- Extract all audio and create .npy files of batches

In [1]:
import os

import numpy as np
import torch
import torchaudio

from tqdm import tqdm
import matplotlib.pyplot as plt

In [4]:
# Function to turn a directory into single .npy file
def chunk_audio(dir, seconds):
    files = sorted([file for file in os.listdir(dir) if file.endswith(".wav")])
    big_waveform = torch.zeros(0)
    for file in tqdm(files):
        waveform, sample_rate = torchaudio.load(f"{dir}/{file}")
        chunk_size = int(sample_rate * seconds)

        assert sample_rate == 24000  # Verify sampling rate of 24 kHz
        big_waveform = torch.cat([big_waveform,\
                                  waveform.flatten()])

        # Grab clips every three seconds
        while len(big_waveform) >= chunk_size:
            yield big_waveform[:chunk_size]
            big_waveform = big_waveform[chunk_size:]

In [5]:
root_dir = "./data/LibriTTS/train-clean-100"
dirs = sorted(os.listdir(root_dir))[0:]

for dir in dirs:
    for subdir in sorted(os.listdir(f"{root_dir}/{dir}")):
        # Check if first file in this sequence already exists
        if os.path.exists(f"./data/LibriTTS/train-clean-100-0.2s/{dir:>04}_{subdir:>06}_{0:>05}.npy"):
            continue

        for i, chunk in enumerate(chunk_audio(f"{root_dir}/{dir}/{subdir}", seconds=0.2)):
            np.save(f"./data/LibriTTS/train-clean-100-0.2s/{dir:>04}_{subdir:>06}_{i:>05}.npy", chunk)

  0%|          | 0/66 [00:00<?, ?it/s]

100%|██████████| 66/66 [00:03<00:00, 17.64it/s]
100%|██████████| 30/30 [00:01<00:00, 16.74it/s]
100%|██████████| 134/134 [00:06<00:00, 19.60it/s]
100%|██████████| 108/108 [00:06<00:00, 15.48it/s]
100%|██████████| 4/4 [00:00<00:00,  8.03it/s]
100%|██████████| 8/8 [00:00<00:00, 56.37it/s]
100%|██████████| 127/127 [00:10<00:00, 11.91it/s]
100%|██████████| 163/163 [00:08<00:00, 19.22it/s]
100%|██████████| 71/71 [00:02<00:00, 27.32it/s]
100%|██████████| 121/121 [00:06<00:00, 18.16it/s]
100%|██████████| 68/68 [00:03<00:00, 20.87it/s]
100%|██████████| 56/56 [00:03<00:00, 17.32it/s]
100%|██████████| 112/112 [00:07<00:00, 15.53it/s]
100%|██████████| 2/2 [00:00<00:00, 16.16it/s]
100%|██████████| 13/13 [00:00<00:00, 36.15it/s]
100%|██████████| 208/208 [00:09<00:00, 22.99it/s]
100%|██████████| 38/38 [00:00<00:00, 68.16it/s]
100%|██████████| 35/35 [00:00<00:00, 37.10it/s]
100%|██████████| 56/56 [00:02<00:00, 25.09it/s]
100%|██████████| 64/64 [00:02<00:00, 27.32it/s]
100%|██████████| 5/5 [00:00<00:0