# Prepare trainer

In [1]:
import os
import time
import logging
import argparse

from utils.trainer import trainer
from utils.hparams import HParam
from utils.writer import MyWriter
from datasets.dataloader import create_dataloader


parser = argparse.ArgumentParser()
parser.add_argument('-b', '--base_dir', type=str, default='.',
                    help="Root directory of run.")
parser.add_argument('-c', '--config', type=str, required=True,
                    help="yaml file for configuration")
parser.add_argument('-e', '--embedder_path', type=str, required=True,
                    help="path of embedder model pt file")
parser.add_argument('--checkpoint_path', type=str, default=None,
                    help="path of checkpoint pt file")
parser.add_argument('-m', '--model', type=str, required=True,
                    help="Name of the model. Used for both logging and saving checkpoints.")
parser.add_argument('-d', '--data_dir', type=str, required=True,
                    help="Name of the model. Used for both logging and saving checkpoints.")
args = parser.parse_args(["-c", "config.yaml", "-e", "embedder.pt", "-m", "test_coldload", "-d", "tmp_coldload"])

hp = HParam(args.config)
with open(args.config, 'r') as f:
    # store hparams as string
    hp_str = ''.join(f.readlines())

pt_dir = os.path.join(args.base_dir, hp.log.chkpt_dir, args.model)
os.makedirs(pt_dir, exist_ok=True)

log_dir = os.path.join(args.base_dir, hp.log.log_dir, args.model)
os.makedirs(log_dir, exist_ok=True)

chkpt_path = args.checkpoint_path if args.checkpoint_path is not None else None

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(os.path.join(log_dir,
            '%s-%d.log' % (args.model, time.time()))),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger()

if hp.data.train_dir == '' or hp.data.test_dir == '':
    logger.error("train_dir, test_dir cannot be empty.")
    raise Exception("Please specify directories of data in %s" % args.config)

writer = MyWriter(hp, log_dir)

  for doc in docs:


# Test coldloader (from original repo)

In [2]:
%%time
!python generator.py -c config.yaml -o tmp_coldload -d datasets/LibriSpeech --train_amt 4
trainloader = create_dataloader(hp, args, train=True)
testloader = create_dataloader(hp, args, train=False)

trainer(args, pt_dir, chkpt_path, trainloader, testloader, writer, logger, hp, hp_str, log_dir)

  for doc in docs:
100%|████████████████████████████████████| 10000/10000 [01:11<00:00, 140.01it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 110.98it/s]


2022-01-30 17:10:31,940 - INFO - Starting new training run
2022-01-30 17:10:33,354 - INFO - Wrote summary at step 1
2022-01-30 17:10:34,237 - INFO - Wrote summary at step 2
2022-01-30 17:10:34,977 - INFO - Wrote summary at step 3
2022-01-30 17:10:35,721 - INFO - Wrote summary at step 4
2022-01-30 17:10:36,459 - INFO - Wrote summary at step 5
2022-01-30 17:10:37,198 - INFO - Wrote summary at step 6
2022-01-30 17:10:37,936 - INFO - Wrote summary at step 7
2022-01-30 17:10:38,678 - INFO - Wrote summary at step 8
2022-01-30 17:10:39,416 - INFO - Wrote summary at step 9
2022-01-30 17:10:40,158 - INFO - Wrote summary at step 10
2022-01-30 17:10:40,895 - INFO - Wrote summary at step 11
2022-01-30 17:10:41,635 - INFO - Wrote summary at step 12
2022-01-30 17:10:42,376 - INFO - Wrote summary at step 13
2022-01-30 17:10:43,119 - INFO - Wrote summary at step 14
2022-01-30 17:10:43,858 - INFO - Wrote summary at step 15
2022-01-30 17:10:44,610 - INFO - Wrote summary at step 16
2022-01-30 17:10:45,35

CPU times: user 37min 16s, sys: 14.3 s, total: 37min 30s
Wall time: 38min 52s


Traceback (most recent call last):
  File "/root/voicefilter/utils/train.py", line 116, in train
    drive.Upload(save_path, "1sWAUt5vfyD97Cq85J8_zuwMeX4tmfEiZ")
AttributeError: 'GDrive' object has no attribute 'Upload'


# Test hotloader (JIT loader)

In [6]:
from types import SimpleNamespace

In [29]:
import os
import glob
import torch
import librosa
import random
import numpy as np
from torch.utils.data import Dataset, DataLoader

from utils.audio import Audio

def vad_merge(w):
    intervals = librosa.effects.split(w, top_db=20)
    temp = list()
    for s, e in intervals:
        temp.append(w[s:e])
    return np.concatenate(temp, axis=None)

def create_dataloader(hp, args, train):
    def train_collate_fn(batch):
        dvec_list = list()
        target_mag_list = list()
        mixed_mag_list = list()

        for dvec_mel, target_mag, mixed_mag in batch:
            dvec_list.append(dvec_mel)
            target_mag_list.append(target_mag)
            mixed_mag_list.append(mixed_mag)
        target_mag_list = torch.stack(target_mag_list, dim=0)
        mixed_mag_list = torch.stack(mixed_mag_list, dim=0)

        return dvec_list, target_mag_list, mixed_mag_list

    def test_collate_fn(batch):
        return batch

    args = {
        "libri_dir": "datasets/LibriSpeech",
        "voxceleb_dir": None,
        "out_dir": "tmp",
        "vad": 0
    }

    args = SimpleNamespace(**args)

    if args.libri_dir is None and args.voxceleb_dir is None:
        raise Exception("Please provide directory of data")

    # Get all file paths
    if args.libri_dir is not None:
        train_folders = [x for x in glob.glob(os.path.join(args.libri_dir, 'train-clean-100', '*'))
                            if os.path.isdir(x)] + \
                        [x for x in glob.glob(os.path.join(args.libri_dir, 'train-clean-360', '*'))
                            if os.path.isdir(x)]
        test_folders = [x for x in glob.glob(os.path.join(args.libri_dir, 'dev-clean', '*'))]

    elif args.voxceleb_dir is not None:
        all_folders = [x for x in glob.glob(os.path.join(args.voxceleb_dir, '*'))
                            if os.path.isdir(x)]
        train_folders = all_folders[:-20]
        test_folders = all_folders[-20:]

    train_spk = [glob.glob(os.path.join(spk, '**', hp.form.input), recursive=True)
                    for spk in train_folders]
    train_spk = [x for x in train_spk if len(x) >= 2]

    test_spk = [glob.glob(os.path.join(spk, '**', hp.form.input), recursive=True)
                    for spk in test_folders]
    test_spk = [x for x in test_spk if len(x) >= 2]
    
    if train:
        return DataLoader(dataset=VFDataset(hp, args, speakers=train_spk, train=True),
                          batch_size=hp.train.batch_size,
                          shuffle=True,
                          num_workers=hp.train.num_workers,
                          collate_fn=train_collate_fn,
                          pin_memory=True,
                          drop_last=True,
                          sampler=None)
    else:
        return DataLoader(dataset=VFDataset(hp, args, speakers=test_spk, train=False),
                          collate_fn=test_collate_fn,
                          batch_size=1, shuffle=False, num_workers=0)


class VFDataset(Dataset):
    def __init__(self, hp, args, speakers, train):
        self.sr = hp.audio.sample_rate
        self.hp = hp
        self.args = args
        self.speakers = speakers
        self.train = train
        self.data_dir = hp.data.train_dir if train else hp.data.test_dir

        self.audio = Audio(hp)

    def __len__(self):
        return 10**5

    def __getitem__(self, idx):
        def get_suitable_set():
            # Random 2 speaker
            spk1, spk2 = random.sample(self.speakers, 2)
            s1_dvec, s1_target = random.sample(spk1, 2)
            s2 = random.choice(spk2)

            d, _ = librosa.load(s1_dvec, sr=self.sr)
            w1, _ = librosa.load(s1_target, sr=self.sr)
            w2, _ = librosa.load(s2, sr=self.sr)
            assert len(d.shape) == len(w1.shape) == len(w2.shape) == 1, \
                'wav files must be mono, not stereo'

            d, _ = librosa.effects.trim(d, top_db=20)
            w1, _ = librosa.effects.trim(w1, top_db=20)
            w2, _ = librosa.effects.trim(w2, top_db=20)

            # if reference for d-vector is too short, discard it
            if d.shape[0] < 1.1 * self.hp.embedder.window * self.hp.audio.hop_length:
                return None, None, None, None, None, None

            # LibriSpeech dataset have many silent interval, so let's vad-merge them
            # VoiceFilter paper didn't do that. To test SDR in same way, don't vad-merge.
            # if vad == 1:
            #     w1, w2 = vad_merge(w1), vad_merge(w2)

            # I think random segment length will be better, but let's follow the paper first
            # fit audio to `hp.data.audio_len` seconds.
            # if merged audio is shorter than `L`, discard it
            L = int(self.sr * self.hp.data.audio_len)
            if w1.shape[0] < L or w2.shape[0] < L:
                return None, None, None, None, None, None

            w1, w2 = w1[:L], w2[:L]
            mixed = w1 + w2

            norm = np.max(np.abs(mixed)) * 1.1
            w1, w2, mixed = w1/norm, w2/norm, mixed/norm

            dvec_mel = self.audio.get_mel(d)
            dvec_mel = torch.from_numpy(dvec_mel).float()

            # save magnitude spectrograms
            target_mag, _ = self.audio.wav2spec(w1)
            mixed_mag, phase = self.audio.wav2spec(mixed)
            
            return dvec_mel, w1, mixed, torch.from_numpy(target_mag), torch.from_numpy(mixed_mag), phase

        dvec_mel, target_wav, mixed_wav, target_mag, mixed_mag, mixed_phase = get_suitable_set()
        while dvec_mel is None:
            dvec_mel, target_wav, mixed_wav, target_mag, mixed_mag, mixed_phase = get_suitable_set()

        if self.train:
            return dvec_mel, target_mag, mixed_mag
        else:
            return dvec_mel, target_wav, mixed_wav, target_mag, mixed_mag, mixed_phase

In [32]:
%%time
trainloader = create_dataloader(hp, args, train=True)
testloader = create_dataloader(hp, args, train=False)

trainer(args, pt_dir, chkpt_path, trainloader, testloader, writer, logger, hp, hp_str, log_dir)

2022-01-31 05:36:47,286 - INFO - access_token is expired. Now: 2022-01-31 05:36:47.286663, token_expiry: 2022-01-30 18:02:50
2022-01-31 05:36:47,287 - INFO - Refreshing access_token
2022-01-31 05:36:47,440 - INFO - Starting new training run
2022-01-31 05:36:49,255 - INFO - Wrote summary at step 1
2022-01-31 05:36:49,955 - INFO - Wrote summary at step 2
2022-01-31 05:36:50,657 - INFO - Wrote summary at step 3
2022-01-31 05:36:51,359 - INFO - Wrote summary at step 4
2022-01-31 05:36:52,056 - INFO - Wrote summary at step 5
2022-01-31 05:36:52,756 - INFO - Wrote summary at step 6
2022-01-31 05:36:53,457 - INFO - Wrote summary at step 7
2022-01-31 05:36:54,157 - INFO - Wrote summary at step 8
2022-01-31 05:36:54,858 - INFO - Wrote summary at step 9
2022-01-31 05:36:55,559 - INFO - Wrote summary at step 10
2022-01-31 05:36:56,260 - INFO - Wrote summary at step 11
2022-01-31 05:36:56,964 - INFO - Wrote summary at step 12
2022-01-31 05:36:57,664 - INFO - Wrote summary at step 13
2022-01-31 05:

CPU times: user 34min 56s, sys: 12.3 s, total: 35min 8s
Wall time: 35min 44s


Traceback (most recent call last):
  File "/root/voicefilter/utils/trainer.py", line 116, in trainer
    os.system(f'zip -j ./tensorboard.zip ./{log_dir}/*')
AttributeError: 'GDrive' object has no attribute 'Upload'
