<a href="https://colab.research.google.com/github/yashwardhan-gautam/Grid2.0/blob/master/VoiceFilter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Mount drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Basic necessary definitions

#### Hyper Parameters

In [None]:
class HyperParams:
  n_fft= 1200
  num_freq= 601 # n_fft//2 + 1
  sample_rate= 16000
  hop_length= 160
  win_length= 400
  min_level_db= -100.0
  ref_level_db= 20.0
  preemphasis= 0.97
  power= 0.30
  embedder_window= 80
  data_audio_len= 3.0
  embedder_num_mels= 40
  embedder_lstm_hidden = 768
  embedder_emb_dim = 256
  embedder_lstm_layers = 3
  embedder_window = 80
  embedder_stride = 40
  model_lstm_dim = 400
  model_fc1_dim = 600
  model_fc2_dim = 601 # num_freq

#### Audio related helper functions

In [None]:
import librosa
import numpy as np  

In [None]:
class Audio:
  def __init__(self,hyper_params):
    self.hyper_params = hyper_params
    self.mel_basis_matrix = librosa.filters.mel(sr=hyper_params.sample_rate,
                                             n_fft=hyper_params.n_fft,
                                             n_mels=hyper_params.embedder_num_mels);

  def get_mel_spec(self,wave):
    spec = librosa.core.stft(y=wave, n_fft=self.hyper_params.n_fft,
                              hop_length=self.hyper_params.hop_length,
                              win_length=self.hyper_params.win_length,
                              window='hann')
    power_spec = np.abs(spec) ** 2
    mel_spec = np.log10(np.dot(self.mel_basis_matrix,power_spec)+1e-6)
    return mel_spec  
  def wave2spec(self,wave): 
    spec = librosa.core.stft(y=wave, n_fft=self.hyper_params.n_fft,
                            hop_length=self.hyper_params.hop_length,
                            win_length=self.hyper_params.win_length)
    phase = np.angle(spec)
    spec_db = self.amp2db(np.abs(spec))
    spec_db_norm = self.normalize(spec_db)
    spec_db_norm = spec_db_norm.T   # Taking transpose here
    phase = phase.T # Taking transpose here
    return spec_db_norm, phase
  def spec2wave(self,spec_db_norm,phase):
    spec_db_norm, phase = spec_db_norm.T, phase.T
    spec_db = self.denormalize(spec_db_norm)
    spec_amp = self.db2amp(spec_db)
    spec = spec_amp * np.exp(1j*phase)
    wave = librosa.core.istft(spec,
                             hop_length=self.hyper_params.hop_length,
                             win_length=self.hyper_params.win_length)
    return wave
  def amp2db(self,mat):
    return 20.0 * np.log10(np.maximum(1e-5,mat)) - self.hyper_params.ref_level_db
  def db2amp(self,mat):
    return np.power(10.0, (mat+self.hyper_params.ref_level_db)*0.05)
  def normalize(self,mat):
    return np.clip((mat-self.hyper_params.min_level_db)/-self.hyper_params.min_level_db, 0.0 , 1.0)
  def denormalize(self, mat):
    return np.clip(mat,0.0,1.0)*(-self.hyper_params.min_level_db)+self.hyper_params.min_level_db

In [None]:
hyper_params = HyperParams()
audio = Audio(hyper_params)

#### Define paths and create folders

In [None]:
import os

In [None]:
# dataset_path = os.path.join('drive','MyDrive','CS753 ASR Project','LibriSpeech Dataset');
!mkdir './LibriSpeech Dataset'
dataset_path = os.path.join('./LibriSpeech Dataset');
path = {}
path['train'] = os.path.join(dataset_path ,'LibriSpeech Train Dataset')

In [None]:
# !rm -r 'LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/'
# !rm -r 'LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/'
# !rm -r 'LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/'
# !rm -r 'LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/'
# !rm -r 'LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/'
# !rm -r 'LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv'
# !rm -r 'LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data'

In [None]:
# create directories to store dataset
for dataset in ['train']:
  os.makedirs(os.path.join(path[dataset],'input_spec'),exist_ok=True)
  os.makedirs(os.path.join(path[dataset],'output_spec'),exist_ok=True)
  os.makedirs(os.path.join(path[dataset],'input_phase'),exist_ok=True)
  os.makedirs(os.path.join(path[dataset],'output_phase'),exist_ok=True)
  os.makedirs(os.path.join(path[dataset],'dvec'),exist_ok=True)
print('Directories created')

Directories created


In [None]:
# create 8 separate directories for training dataset to avoid issues with gdrive
def create_folders(i):
  os.makedirs(os.path.join(path['train'],'input_spec_'+i),exist_ok=True)
  os.makedirs(os.path.join(path['train'],'output_spec_'+i),exist_ok=True)
  os.makedirs(os.path.join(path['train'],'input_phase_'+i),exist_ok=True)
  os.makedirs(os.path.join(path['train'],'output_phase_'+i),exist_ok=True)
  os.makedirs(os.path.join(path['train'],'dvec_'+i),exist_ok=True)

for i in range(8):
  create_folders(str(i))
print('Directories created')

Directories created


### Unzip LibriSpeech dataset ( Execute just once )
ALREADY DONE IN DATA PREPARATION. DO NOT REPEAT.

ALSO, USE LINUX COMMANDS TO FIRST EXTRACT DATASETS IN VM, AND THEN MOVE TO DRIVE INSTEAD OF USING SHUTIL TO DIRECTLY EXTRACT IN DRIVE.

In [None]:
# import shutil

In [None]:
# shutil.unpack_archive(dataset_path+'/train-clean-100.tar.gz',dataset_path)
# Rename the extracted folder LibriSpeech to LibriSpeech Train Dataset

In [None]:
# shutil.unpack_archive(dataset_path+'/dev-clean.tar.gz',dataset_path)
# Rename the extracted folder LibriSpeech to LibriSpeech Dev Dataset

In [None]:
# shutil.unpack_archive(dataset_path+'/test-clean.tar.gz',dataset_path)
# Rename the extracted folder LibriSpeech to LibriSpeech Test Dataset

In [None]:
# !rm -r "drive/MyDrive/LibriSpeech Dataset/"

In [None]:
# !mkdir "drive/MyDrive/LibriSpeech Dataset/"
# !mkdir "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Test Dataset"
# !mkdir "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Dev Dataset"
# !mkdir "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset"

# %ls "drive/MyDrive/LibriSpeech Dataset/"

# %cd "LibriSpeech Dataset/"

# !wget https://www.openslr.org/resources/12/test-clean.tar.gz
# !time cp "test-clean.tar.gz" "../drive/MyDrive/LibriSpeech Dataset/"
# !tar -xf "test-clean.tar.gz"
# !mv LibriSpeech "LibriSpeech Test Dataset"

# !wget https://www.openslr.org/resources/12/dev-clean.tar.gz
# !time cp "dev-clean.tar.gz" "../drive/MyDrive/LibriSpeech Dataset/"
# !tar -xf "dev-clean.tar.gz"
# !mv LibriSpeech "LibriSpeech Dev Dataset"

# !wget https://www.openslr.org/resources/12/train-clean-100.tar.gz
# !time cp "train-clean-100.tar.gz" "../drive/MyDrive/LibriSpeech Dataset/" # approx 4 mins
# !tar -xf "train-clean-100.tar.gz"
# !mv LibriSpeech "LibriSpeech Train Dataset"

# !wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1YFmhmUok-W76JkrfA0fzQt3c-ZsfiwfL' -O embedder.pt
# !cp embedder.pt "../drive/MyDrive/LibriSpeech Dataset/"

# %cd ..

In [None]:
# takes approx 6 mins
%cd "LibriSpeech Dataset/"
!time cp "../drive/MyDrive/LibriSpeech Dataset/train-clean-100.tar.gz" .
!time tar -xf train-clean-100.tar.gz
!mv LibriSpeech "LibriSpeech Train Dataset"
!rm train-clean-100.tar.gz

!time cp "../drive/MyDrive/LibriSpeech Dataset/embedder.pt" .
%cd ..

/content/LibriSpeech Dataset

real	3m10.948s
user	0m0.023s
sys	0m8.008s

real	1m48.763s
user	0m40.370s
sys	0m20.359s

real	0m11.335s
user	0m0.002s
sys	0m0.054s
/content


In [None]:
# ## takes approx 35 mins
# ## copy train dataset
# %cd "LibriSpeech Dataset/LibriSpeech Train Dataset/"

# !time cp "../../drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/dvec5.tar" .
# !time tar -xf dvec5.tar
# !mv "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/" .
# !rm -r "LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm dvec5.tar
# print("dvec done")

# !time cp "../../drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase5.tar" .
# !time tar -xf input_phase5.tar
# !mv "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/" .
# !rm -r "LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm input_phase5.tar
# print("input_phase done")

# !time cp "../../drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase5.tar" .
# !time tar -xf output_phase5.tar
# !mv "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/" .
# !rm -r "LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm output_phase5.tar
# print("output_phase done")

# !time cp "../../drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec5.tar" .
# !time tar -xf input_spec5.tar
# !mv "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/" .
# !rm -r "LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm input_spec5.tar
# print("input_spec done")

# !time cp "../../drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec5.tar" .
# !time tar -xf output_spec5.tar
# !mv "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/" .
# !rm -r "LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm output_spec5.tar
# print("output_spec done")

# !time cp "../../drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches5.data" train_speeches.data
# !time cp "../../drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame5.csv" data_frame.csv
# print("ALL done")

# %cd ../..

# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/" | wc -l
# !wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data"
# !wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv"

### Create and store speech collection

In [None]:
import glob
import pickle

In [None]:
#### Run this cell only the first time ####
# dev_base_path = os.path.join(path['dev'],'LibriSpeech','dev-clean')
# test_base_path = os.path.join(path['test'],'LibriSpeech','test-clean')
train_base_path = os.path.join(path['train'],'LibriSpeech','train-clean-100')

In [None]:
#### Run this cell only the first time ####
# dev_spks = os.listdir(dev_base_path)
# list of all speaker folders
# dev_speeches = [glob.glob(os.path.join(dev_base_path,spk,'*','*.flac'),recursive=True) for spk in dev_spks]
# dev_speeches = [speeches for speeches in dev_speeches if len(speeches)>=2]
# list of lists containing speeches of a speaker
# test_spks = os.listdir(test_base_path)
# list of all speaker folders
# test_speeches = [glob.glob(os.path.join(test_base_path,spk,'*','*.flac'),recursive=True) for spk in test_spks]
# test_speeches = [speeches for speeches in test_speeches if len(speeches)>=2]
# list of lists containing speeches of a speaker
train_spks = os.listdir(train_base_path)
# list of all speaker folders
train_speeches = [glob.glob(os.path.join(train_base_path,spk,'*','*.flac'),recursive=True) for spk in train_spks]
train_speeches = [speeches for speeches in train_speeches if len(speeches)>=2]
# list of lists containing speeches of a speaker

In [None]:
#### Run this cell only the first time #####
# with open(os.path.join(path['dev'],'dev_speeches.data'),'wb') as f:
#   pickle.dump(dev_speeches,f)
# with open(os.path.join(path['test'],'test_speeches.data'),'wb') as f:
#   pickle.dump(test_speeches,f)
with open(os.path.join(path['train'],'train_speeches.data'),'wb') as f:
  pickle.dump(train_speeches,f)

In [None]:
# with open(os.path.join(path['dev'],'dev_speeches.data'),'rb') as f:
#   dev_speeches = pickle.load(f)
# with open(os.path.join(path['test'],'test_speeches.data'),'rb') as f:
#   test_speeches = pickle.load(f)
with open(os.path.join(path['train'],'train_speeches.data'),'rb') as f:
  train_speeches = pickle.load(f)

### Use pre trained model to obtain Embedding

In [None]:
import torch
import torch.nn as nn

class LinearNorm(nn.Module):
    def __init__(self, hp):
        super(LinearNorm, self).__init__()
        self.linear_layer = nn.Linear(hp.embedder_lstm_hidden, hp.embedder_emb_dim)

    def forward(self, x):
        return self.linear_layer(x)


class SpeechEmbedder(nn.Module):
    def __init__(self, hp):
        super(SpeechEmbedder, self).__init__()
        self.lstm = nn.LSTM(hp.embedder_num_mels,
                            hp.embedder_lstm_hidden,
                            num_layers=hp.embedder_lstm_layers,
                            batch_first=True)
        self.proj = LinearNorm(hp)
        self.hp = hp

    def forward(self, mel):
        # (num_mels, T)
        mels = mel.unfold(1, self.hp.embedder_window, self.hp.embedder_stride) # (num_mels, T', window)
        mels = mels.permute(1, 2, 0) # (T', window, num_mels)
        # print("h1")
        x, _ = self.lstm(mels) # (T', window, lstm_hidden)
        # print("h2")
        x = x[:, -1, :] # (T', lstm_hidden), use last frame only
        x = self.proj(x) # (T', emb_dim)
        x = x / torch.norm(x, p=2, dim=1, keepdim=True) # (T', emb_dim)
        x = x.sum(0) / x.size(0) # (emb_dim), average pooling over time frames
        return x

In [None]:
# Embedder downloaded from https://drive.google.com/file/d/1YFmhmUok-W76JkrfA0fzQt3c-ZsfiwfL/view (https://github.com/mindslab-ai/voicefilter)
embedder_path = os.path.join(dataset_path,"embedder.pt")
embedder_pt = torch.load(embedder_path,map_location=torch.device('cpu'))
embedder = SpeechEmbedder(hyper_params)
embedder.load_state_dict(embedder_pt)
embedder.eval()

SpeechEmbedder(
  (lstm): LSTM(40, 768, num_layers=3, batch_first=True)
  (proj): LinearNorm(
    (linear_layer): Linear(in_features=768, out_features=256, bias=True)
  )
)

### Prepare dataset

In [None]:
import random
import pandas as pd

In [None]:
# returns dvec for the input wave using pre trained embedder model
def get_dvector(wave):
  mel_spec = audio.get_mel_spec(wave)
  dvec = embedder(torch.from_numpy(mel_spec).float())
  dvec = dvec.detach().numpy()
  return dvec

In [None]:
# pre process waves and store spectrogram, phase and dvector in their respective folders
def create_example(target_dir, hyper_params, idx, ref_speech, pri_speech, sec_speech):
  sample_rate = hyper_params.sample_rate
  ref_wave, _ = librosa.load(ref_speech,sr=sample_rate) #load the audio file
  pri_wave, _ = librosa.load(pri_speech, sr = sample_rate)
  sec_wave,_ = librosa.load(sec_speech, sr = sample_rate)
  assert len(ref_wave.shape)==len(pri_wave.shape)==len(sec_wave.shape)==1,\
  'wave files must be mono and not stereo'
  ref_wave,_ = librosa.effects.trim(ref_wave, top_db = 20) # clip silent portion on both ends
  pri_wave,_ = librosa.effects.trim(pri_wave, top_db = 20)
  sec_wave,_ = librosa.effects.trim(sec_wave, top_db = 20)
  
  if ref_wave.shape[0] < 1.1 * hyper_params.embedder_window * hyper_params.hop_length :
    return
  length_wave = int(sample_rate * hyper_params.data_audio_len)
  if pri_wave.shape[0]<length_wave or sec_wave.shape[0]<length_wave:
    return
  pri_wave, sec_wave = pri_wave[:length_wave], sec_wave[:length_wave] # clip wave to fixed length
  mix_wave = pri_wave + sec_wave
  norm = np.max(np.abs(mix_wave)) * 1.1
  pri_wave, mix_wave = pri_wave/norm , mix_wave/norm  # normalize wave by 1.1*max(absolute amplitude)
  pri_spec, pri_phase = audio.wave2spec(pri_wave)  # convert wave to spec
  mix_spec, mix_phase = audio.wave2spec(mix_wave)
  dvec = get_dvector(ref_wave)

  # paths for storing data
  pri_spec_path = os.path.join(target_dir,'output_spec','%06d.npy'%idx)
  pri_phase_path = os.path.join(target_dir,'output_phase','%06d.npy'%idx)
  mix_spec_path = os.path.join(target_dir, 'input_spec','%06d.npy'%idx)
  mix_phase_path = os.path.join(target_dir,'input_phase','%06d.npy'%idx)
  dvec_path = os.path.join(target_dir,'dvec','%06d.npy'%idx)
  # store data on paths above
  np.save(pri_spec_path,pri_spec)
  np.save(mix_spec_path,mix_spec)
  np.save(mix_phase_path, mix_phase)
  np.save(pri_phase_path, pri_phase)
  np.save(dvec_path,dvec)

  #print(idx)
  return [idx, ref_speech, pri_speech, sec_speech,  mix_spec_path, pri_spec_path, mix_phase_path, pri_phase_path, dvec_path]

In [None]:
columns=['key','ref_speech','pri_speech','sec_speech','input_spec_path','output_spec_path','input_phase_path','output_phase_path','dvec_path']

In [None]:
### to be run just once ####
sample_data_frame = pd.DataFrame(data = [], columns=columns)
for dataset in ['train']:
  sample_data_frame.to_csv(os.path.join(path[dataset],'data_frame.csv'),index=False);

In [None]:
def create_dataset(i):
  batch = []
  array = range(i+1,n+1)
  if parity == 1:
    array = range(1,i)
  for j in array:
    first = min(i,j)
    sec = max(i,j)
    if (sec-first)%2 == parity:
      first, sec = sec, first
    n1 = len(speeches[first-1]) # -1 accounts for zero based indexing
    n2 = len(speeches[sec-1]) # -1 accounts for zero based indexing
    sum = first+sec-1 # -1 accounts for zero based indexing
    diff = first-sec-1 # -1 accounts for zero based indexing
    diff_mod = (abs(diff))%n1
    if diff < 0 and diff_mod > 0:
      diff_mod = n1 - diff_mod
    ref_speech = speeches[first-1][diff_mod]
    pri_speech = speeches[first-1][sum%n1]
    sec_speech = speeches[sec-1][first%n2]
    row = create_example( path[dataset], hyper_params , n*(i-1) + j, ref_speech, pri_speech, sec_speech)
    if row is not None:
      batch.append(row)
  print(i)
  data.extend(batch)
  return batch

In [None]:
def save_batch(dataset,data):
  df_path = os.path.join(path[dataset],'data_frame.csv')
  df = pd.read_csv(df_path)
  df_batch = pd.DataFrame(data = data, columns = columns)
  df = df.append(df_batch)
  df.to_csv(df_path,index=False)

In [None]:
import os
import time
from multiprocessing import Pool
cpu_num = len(os.sched_getaffinity(0))

In [None]:
print("Number of cpu available : ",cpu_num)

Number of cpu available :  40


#### Train set

In [None]:
dataset = 'train' # important global variable
speeches = train_speeches # important global variable
# n = len(train_speeches) # important global variable
n = 200  ## to speedup train dataset
print("number of speakers(train set) : ",n)
for i in range(n):
  random.shuffle(train_speeches[i])  # shuffle the speeches of all speakers
arr = list(range(1,n+1))  # create a list for all speakers

number of speakers(train set) :  200


##### 0-100

In [None]:
# data = []
# parity = 0 # important global variable
# x = time.time()
# with Pool(cpu_num) as p:
#   res = p.map(create_dataset, arr[0:25] , 4)
# for batch in res:
#   if len(batch) > 0:
#     data.extend(batch)
# y = time.time()
# print(y-x)
# save_batch('train',data)

In [None]:
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/" | wc -l
# !wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data"
# !wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv"

# !time tar -cf dvec1.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/"
# !time cp dvec1.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm dvec1.tar

# !time tar -cf input_phase1.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/"
# !time cp input_phase1.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm input_phase1.tar

# !time tar -cf output_phase1.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/"
# !time cp output_phase1.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm output_phase1.tar

# !time tar -cf input_spec1.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/"
# !time cp input_spec1.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm input_spec1.tar

# !time tar -cf output_spec1.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/"
# !time cp output_spec1.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm output_spec1.tar

# !time cp "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data" "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches1.data"
# !time cp "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv" "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame1.csv"

# %ls "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset"

# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/dvec1.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase1.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase1.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec1.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec1.tar" | wc -l
# !wc -l "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches1.data"
# !wc -l "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame1.csv"

In [None]:
# data = []
# parity = 0 # important global variable
# x = time.time()
# with Pool(cpu_num) as p:
#   res = p.map(create_dataset, arr[25:50] , 4)
# for batch in res:
#   if len(batch) > 0:
#     data.extend(batch)
# y = time.time()
# print(y-x)
# save_batch('train',data)

In [None]:
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/" | wc -l
# !wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data"
# !wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv"

# !time tar -cf dvec2.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/"
# !time cp dvec2.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm dvec2.tar

# !time tar -cf input_phase2.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/"
# !time cp input_phase2.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm input_phase2.tar

# !time tar -cf output_phase2.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/"
# !time cp output_phase2.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm output_phase2.tar

# !time tar -cf input_spec2.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/"
# !time cp input_spec2.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm input_spec2.tar

# !time tar -cf output_spec2.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/"
# !time cp output_spec2.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm output_spec2.tar

# !time cp "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data" "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches2.data"
# !time cp "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv" "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame2.csv"

# %ls "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset"

# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/dvec2.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase2.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase2.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec2.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec2.tar" | wc -l
# !wc -l "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches2.data"
# !wc -l "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame2.csv"

In [None]:
# data = []
# parity = 0 # important global variable
# x = time.time()
# with Pool(cpu_num) as p:
#   res = p.map(create_dataset, arr[50:75] , 4)
# for batch in res:
#   if len(batch) > 0:
#     data.extend(batch)
# y = time.time()
# print(y-x)
# save_batch('train',data)

In [None]:
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/" | wc -l
# !wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data"
# !wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv"

# !time tar -cf dvec3.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/"
# !time cp dvec3.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm dvec3.tar

# !time tar -cf input_phase3.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/"
# !time cp input_phase3.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm input_phase3.tar

# !time tar -cf output_phase3.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/"
# !time cp output_phase3.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm output_phase3.tar

# !time tar -cf input_spec3.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/"
# !time cp input_spec3.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm input_spec3.tar

# !time tar -cf output_spec3.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/"
# !time cp output_spec3.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm output_spec3.tar

# !time cp "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data" "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches3.data"
# !time cp "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv" "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame3.csv"

# # %ls "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset"

# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/dvec3.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase3.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase3.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec3.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec3.tar" | wc -l
# !wc -l "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches3.data"
# !wc -l "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame3.csv"

In [None]:
# data = []
# parity = 0 # important global variable
# x = time.time()
# with Pool(cpu_num) as p:
#   res = p.map(create_dataset, arr[75:100] , 4)
# for batch in res:
#   if len(batch) > 0:
#     data.extend(batch)
# y = time.time()
# print(y-x)
# save_batch('train',data)

In [None]:
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/" | wc -l
# !wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data"
# !wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv"

# !rm -r "LibriSpeech Dataset/LibriSpeech Dev Dataset/"
# !rm -r "LibriSpeech Dataset/LibriSpeech Test Dataset/"
# !rm -r "LibriSpeech Dataset/LibriSpeech Train Dataset/LibriSpeech/"

# %ls "LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !time tar -cf dvec4.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/"
# !time cp dvec4.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm dvec4.tar

# !time tar -cf input_phase4.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/"
# !time cp input_phase4.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm input_phase4.tar

# !time tar -cf output_phase4.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/"
# !time cp output_phase4.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm output_phase4.tar

# !time tar -cf input_spec4.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/"
# !time cp input_spec4.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm input_spec4.tar

# !time tar -cf output_spec4.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/"
# !time cp output_spec4.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
# !rm output_spec4.tar

# !time cp "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data" "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches4.data"
# !time cp "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv" "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame4.csv"

# %ls "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset"

# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/dvec4.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase4.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase4.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec4.tar" | wc -l
# !tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec4.tar" | wc -l
# !wc -l "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches4.data"
# !wc -l "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame4.csv"

##### 100-200

In [None]:
# data = []
# parity = 0 # important global variable
# x = time.time()
# with Pool(cpu_num) as p:
#   res = p.map(create_dataset, arr[100:125] , 4)
# for batch in res:
#   if len(batch) > 0:
#     data.extend(batch)
# y = time.time()
# print(y-x)
# save_batch('train',data)

In [None]:
import threading
data = [] # important global variable
parity = 0 # important global variable
x = time.time()

t = [0]*250

for i in arr[125:150]:
    t[i] = threading.Thread(target=create_dataset, args=(i,))
    t[i].start()

for i in arr[125:150]:
    t[i].join()

# for i in range(125,150):
    # batch = create_dataset(i)
# with Pool(cpu_num) as p:
#   res = p.map(create_dataset, arr[125:150])
# batch = create_dataset(125)
# for batch in res:
#   if len(batch) > 0:
    # data.extend(batch)
y = time.time()
print(y-x)
save_batch('train',data)

145
150
149
148
147
133
142
146
143
141
144
131
138
140
139
129
136
135
137
134
127
126
128
130
132
521.9960467815399


In [None]:
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/" | wc -l
!wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data"
!wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv"

1462
1462
1462
1462
1462
369 LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data
1462 LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv


In [None]:
import threading
data = [] # important global variable
parity = 0 # important global variable
x = time.time()

t = [0]*250

for i in arr[150:175]:
    t[i] = threading.Thread(target=create_dataset, args=(i,))
    t[i].start()

for i in arr[150:175]:
    t[i].join()

# for i in range(125,150):
    # batch = create_dataset(i)
# with Pool(cpu_num) as p:
#   res = p.map(create_dataset, arr[125:150])
# batch = create_dataset(125)
# for batch in res:
#   if len(batch) > 0:
    # data.extend(batch)
y = time.time()
print(y-x)
save_batch('train',data)

175
170
173
172
174
169
167
163
171
165
168
162
166
164
157
155
159
161
160
156
153
158
151
152
154
305.95789551734924


In [None]:
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/" | wc -l
!wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data"
!wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv"

2338
2338
2338
2338
2338
369 LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data
2338 LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv


In [None]:
import threading
data = [] # important global variable
parity = 0 # important global variable
x = time.time()

t = [0]*250

for i in arr[175:200]:
    t[i] = threading.Thread(target=create_dataset, args=(i,))
    t[i].start()

for i in arr[175:200]:
    t[i].join()

# for i in range(125,150):
    # batch = create_dataset(i)
# with Pool(cpu_num) as p:
#   res = p.map(create_dataset, arr[125:150])
# batch = create_dataset(125)
# for batch in res:
#   if len(batch) > 0:
    # data.extend(batch)
y = time.time()
print(y-x)
save_batch('train',data)

200
199
198
196
197
195
190
194
191
192
193
188
189
186
187
185
181
183
184
182
178
180
177
179
176
102.64650416374207


In [None]:
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/" | wc -l
!wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data"
!wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv"

2611
2611
2611
2611
2611
369 LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data
2611 LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv


In [None]:
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/" | wc -l
!wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data"
!wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv"

# !rm -r "LibriSpeech Dataset/LibriSpeech Dev Dataset/"
# !rm -r "LibriSpeech Dataset/LibriSpeech Test Dataset/"
# !rm -r "LibriSpeech Dataset/LibriSpeech Train Dataset/LibriSpeech/"
# !rm -r "LibriSpeech Dataset/train-clean-100.tar.gz"

%ls "LibriSpeech Dataset/LibriSpeech Train Dataset/"
!time tar -cf dvec8.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/"
!time cp dvec8.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
!rm dvec8.tar

!time tar -cf input_phase8.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/"
!time cp input_phase8.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
!rm input_phase8.tar

!time tar -cf output_phase8.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/"
!time cp output_phase8.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
!rm output_phase8.tar

!time tar -cf input_spec8.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/"
!time cp input_spec8.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
!rm input_spec8.tar

!time tar -cf output_spec8.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/"
!time cp output_spec8.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
!rm output_spec8.tar

!time cp "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data" "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches8.data"
!time cp "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv" "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame8.csv"

%ls "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset"

!tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/dvec8.tar" | wc -l
!tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase8.tar" | wc -l
!tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase8.tar" | wc -l
!tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec8.tar" | wc -l
!tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec8.tar" | wc -l
!wc -l "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches8.data"
!wc -l "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame8.csv"

2611
2611
2611
2611
2611
369 LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data
2611 LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv
data_frame.csv  [0m[01;34minput_phase_1[0m/  [01;34minput_spec_4[0m/    [01;34moutput_phase_6[0m/
[01;34mdvec[0m/           [01;34minput_phase_2[0m/  [01;34minput_spec_5[0m/    [01;34moutput_phase_7[0m/
[01;34mdvec_0[0m/         [01;34minput_phase_3[0m/  [01;34minput_spec_6[0m/    [01;34moutput_spec[0m/
[01;34mdvec_1[0m/         [01;34minput_phase_4[0m/  [01;34minput_spec_7[0m/    [01;34moutput_spec_0[0m/
[01;34mdvec_2[0m/         [01;34minput_phase_5[0m/  [01;34mLibriSpeech[0m/     [01;34moutput_spec_1[0m/
[01;34mdvec_3[0m/         [01;34minput_phase_6[0m/  [01;34moutput_phase[0m/    [01;34moutput_spec_2[0m/
[01;34mdvec_4[0m/         [01;34minput_phase_7[0m/  [01;34moutput_phase_0[0m/  [01;34moutput_spec_3[0m/
[01;34mdvec_5[0m/         [01;34minput_spec[0m/     [01;34

##### 200-251

In [None]:
dataset = 'train' # important global variable
speeches = train_speeches # important global variable
n = len(train_speeches) # important global variable
print("number of speakers(train set) : ",n)
for i in range(n):
  random.shuffle(train_speeches[i])  # shuffle the speeches of all speakers
arr = list(range(1,n+1))  # create a list for all speakers

import threading
data = [] # important global variable
parity = 1 # important global variable
x = time.time()

t = [0]*251

for i in arr[200:225]:
    t[i] = threading.Thread(target=create_dataset, args=(i,))
    t[i].start()

for i in arr[200:225]:
    t[i].join()

y = time.time()
print(y-x)
save_batch('train',data)

number of speakers(train set) :  251
203
205
206
201
202
204
207
223
209
211
210
216
217
215
213
220
212
219
224
214
221
225
208
222
218
1792.4984476566315


In [None]:
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/" | wc -l
!wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data"
!wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv"

7594
7594
7594
7594
7594
369 LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data
7594 LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv


In [None]:
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/" | wc -l
%ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/" | wc -l
!wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data"
!wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv"

# !rm -r "LibriSpeech Dataset/LibriSpeech Dev Dataset/"
# !rm -r "LibriSpeech Dataset/LibriSpeech Test Dataset/"
# !rm -r "LibriSpeech Dataset/LibriSpeech Train Dataset/LibriSpeech/"
# !rm -r "LibriSpeech Dataset/train-clean-100.tar.gz"

%ls "LibriSpeech Dataset/LibriSpeech Train Dataset/"
!time tar -cf dvec9.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/"
!time cp dvec9.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
!rm dvec9.tar

!time tar -cf input_phase9.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/"
!time cp input_phase9.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
!rm input_phase9.tar

!time tar -cf output_phase9.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/"
!time cp output_phase9.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
!rm output_phase9.tar

!time tar -cf input_spec9.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/"
!time cp input_spec9.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
!rm input_spec9.tar

!time tar -cf output_spec9.tar "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/"
!time cp output_spec9.tar "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/"
!rm output_spec9.tar

!time cp "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data" "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches9.data"
!time cp "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv" "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame9.csv"

%ls "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset"

!tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/dvec9.tar" | wc -l
!tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase9.tar" | wc -l
!tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase9.tar" | wc -l
!tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec9.tar" | wc -l
!tar -tvf "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec9.tar" | wc -l
!wc -l "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches9.data"
!wc -l "drive/MyDrive/LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame9.csv"

7594
7594
7594
7594
7594
369 LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data
7594 LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv
data_frame.csv  [0m[01;34minput_phase_1[0m/  [01;34minput_spec_4[0m/    [01;34moutput_phase_6[0m/
[01;34mdvec[0m/           [01;34minput_phase_2[0m/  [01;34minput_spec_5[0m/    [01;34moutput_phase_7[0m/
[01;34mdvec_0[0m/         [01;34minput_phase_3[0m/  [01;34minput_spec_6[0m/    [01;34moutput_spec[0m/
[01;34mdvec_1[0m/         [01;34minput_phase_4[0m/  [01;34minput_spec_7[0m/    [01;34moutput_spec_0[0m/
[01;34mdvec_2[0m/         [01;34minput_phase_5[0m/  [01;34mLibriSpeech[0m/     [01;34moutput_spec_1[0m/
[01;34mdvec_3[0m/         [01;34minput_phase_6[0m/  [01;34moutput_phase[0m/    [01;34moutput_spec_2[0m/
[01;34mdvec_4[0m/         [01;34minput_phase_7[0m/  [01;34moutput_phase_0[0m/  [01;34moutput_spec_3[0m/
[01;34mdvec_5[0m/         [01;34minput_spec[0m/     [01;34

In [None]:
# import threading
# data = [] # important global variable
# parity = 1 # important global variable
# x = time.time()

# t = [0]*251

# for i in arr[225:250]:
#     t[i] = threading.Thread(target=create_dataset, args=(i,))
#     t[i].start()

# for i in arr[225:250]:
#     t[i].join()

# y = time.time()
# print(y-x)
# save_batch('train',data)

In [None]:
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/dvec/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_phase/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_phase/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/input_spec/" | wc -l
# %ls -l "LibriSpeech Dataset/LibriSpeech Train Dataset/output_spec/" | wc -l
# !wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/train_speeches.data"
# !wc -l "LibriSpeech Dataset/LibriSpeech Train Dataset/data_frame.csv"

#### Grouping data to avoid gdrive timeout

In [None]:
import shutil
import os
import pandas as pd

In [None]:
# move files in the dataframe from start_index to end_index to folder for fraction j
def move_files(start_index,end_index,j):
  df_train = pd.read_csv(os.path.join(path['train'],'data_frame.csv'))
  for i in range(start_index,end_index):
    for col in ['dvec_path','input_phase_path','input_spec_path','output_phase_path','output_spec_path']:
      old_path = df_train[col][i]
      new_path = os.path.join(path['train'],col.rsplit('_',1)[0]+"_"+str(j),old_path.split('/')[-1])
      #print(old_path,"  ",new_path)
      shutil.move(old_path,new_path)
      df_train.loc[i,col]=new_path  
  df_train.to_csv(os.path.join(path['train'],'data_frame.csv'),index=False)

In [None]:
dataset_train = 'train'
df_train = pd.read_csv(os.path.join(path[dataset_train],'data_frame.csv'))
num_samples = df_train.shape[0]

num_fractions = 8
fraction_sizes = num_fractions * [ num_samples//num_fractions ]
for i in range(num_samples%num_fractions):
  fraction_sizes[i]+=1
print(fraction_sizes)

In [None]:
start_pos = 0
for i in range(num_fractions):
  end_pos = start_pos + fraction_sizes[i]
  move_files(start_pos,end_pos,i)
  start_pos = end_pos

### Data Analysis

In [None]:
import pandas as pd

In [None]:
def print_stats(dataset):
  df = pd.read_csv(os.path.join(path[dataset],'data_frame.csv'))
  num_samples = df.shape[0]
  cnt=0 # cnt of the number of times primary speech is same as the reference speech
  pairs = {} # cnt of all ordered pairs of speakers
  waves=[]
  for i in range(num_samples):
    ref = df['ref_speech'][i]
    pri = df['pri_speech'][i]
    sec = df['sec_speech'][i]
    ref_wave = ref.split('/')[-1]
    pri_wave = pri.split('/')[-1]
    sec_wave = sec.split('/')[-1]
    waves.append(ref_wave)
    waves.append(pri_wave)
    waves.append(sec_wave)
    pri_spk = pri.split('/')[-3]
    sec_spk = sec.split('/')[-3]
    if (pri_spk,sec_spk) in pairs:
      pairs[(pri_spk,sec_spk)]+=1
    else :
      pairs[(pri_spk,sec_spk)]=1
    if pri_wave == ref_wave:
      cnt += 1
  waves = len(list(set(waves)))
  if dataset == 'train':
    speeches = train_speeches
  elif dataset == 'dev':
    speeches = dev_speeches
  else :
    speeches = test_speeches
  total_speeches = sum([len(spk) for spk in speeches])
  print("====================",dataset,"dataset statistics ====================")
  print("Total no. of unique speeches available in LibriSpeech",dataset,"dataset :",total_speeches)
  print("No. of unique speeches used :",waves)
  print("Percentage of total speeches used : {:.2f} %".format((waves/total_speeches)*100))
  print("------------------------------------------------------------")
  print("Total no. of samples prepared :",num_samples)
  print("No. of samples with same primary and reference speech :",cnt)
  print("Fraction of such samples as a part of the entire dataset : {:.2f} %".format((cnt/num_samples)*100))
  print("-------------------------------------------------------------")
  if all(val == 1 for val in pairs.values()):
    print("Note: All ordered pairs of primary and secondary speakers are unique")

In [None]:
print_stats('train')

Total no. of unique speeches available in LibriSpeech train dataset : 28539
No. of unique speeches used : 12723
Percentage of total speeches used : 44.58 %
------------------------------------------------------------
Total no. of samples prepared : 7593
No. of samples with same primary and reference speech : 89
Fraction of such samples as a part of the entire dataset : 1.17 %
-------------------------------------------------------------
Note: All ordered pairs of primary and secondary speakers are unique
