# Vocal Pitch Modulator NN Training 
This is the notebook used to train the Vocal Pitch Modulator.

In [None]:
%load_ext autoreload
%autoreload 1

import os
import csv

import scipy.io as sio
from scipy.io import wavfile
from scipy.io.wavfile import write
import scipy.signal as sis
import scipy.fftpack as fftpack

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import subplots

%aimport VPM
from VPM import *
%aimport Utils
from Utils import *

In [None]:
# Constants that should not change without the dataset being changed
n_pitches = 16
n_vowels = 12
n_people = 3

label_to_vowel = { 0: "bed",  1: "bird",   2: "boat",  3: "book", 
                   4: "cat",  5: "dog",    6: "feet",  7: "law",  
                   8: "moo",  9: "nut",   10: "pig",  11: "say" }

vowel_to_label = { "bed": 0,  "bird": 1,  "boat":  2, "book":  3,
                   "cat": 4,  "dog":  5,  "feet":  6, "law":   7,
                   "moo": 8,  "nut":  9,  "pig":  10, "say":  11}

noteidx_to_pitch = {  0: "A2",   1: "Bb2",  2: "B2",   3: "C3",
                      4: "Db3",  5: "D3",   6: "Eb3",  7: "E3", 
                      8: "F3",   9: "Gb3", 10: "G3",  11: "Ab3",
                     12: "A3",  13: "Bb3", 14: "B3",  15: "C4" }

## Getting data references
Read the reference csv to relevant data structure

In [None]:
#################################################################
# The list of filenames in the dataset in a 3d array format.
# A specific file is accessed with 
# data_ref_list[vowel_idx][pitch_idx][person_idx]
#################################################################
# e.g. data_list[vowel_to_label["dog"]][5][1]
data_ref_list = create_data_ref_list(os.path.join("Data", 'dataset_files.csv'),
                            n_pitches, n_vowels, n_people)
# print(data_ref_list)

#################################################################
# The list of filenames in the dataset as a 1d array.
# To access a specific file, use 
# flat_data_ref_list[flat_idx(vowel, pitch, person)]
#################################################################
# e.g. flat_data_ref_list[flat_idx(3, 1, 2)]
flat_data_ref_list = [ data_ref_list[i][j][k] 
                       for i in range(n_vowels) 
                       for j in range(n_pitches) 
                       for k in range(n_people) ]
flat_idx = lambda vowel, pitch, people: flat_array_idx(
    vowel, pitch, people, n_vowels, n_pitches, n_people)
nd_idx = lambda idx: nd_array_idx(idx, n_vowels, n_pitches, n_people)

print(flat_data_ref_list)

## Data-label Pitch Index pairs
Generate the data-label pitch index pairs

In [None]:
#################################################################
# An array where each element is a 3-tuple of
# [shift_amt, input_pitch_idx, label_pitch_iIdx].
#################################################################
data_label_pairs, _ = create_data_label_pairs(n_pitches)
print("Total data-label pairs:", len(data_label_pairs), "-", data_label_pairs)

## Get All .wav Data
Get the wav file data into a single matrix

In [None]:
#################################################################
# A matrix where each element is the wavfile content of the 
# file at flat_data_ref_list[idx]
# To retrieve the 3d indices, use: 
#     vowel, pitch, person = nd_idx(idx)
#################################################################
all_wav_data = load_wav_files(os.path.join("Data", "dataset"), flat_data_ref_list)

## Create all spectrograms
Get the spectrograms for each wav in `all_wav_data`

In [None]:
all_spectrograms = np.array([ stft(waveform) for waveform in all_wav_data ])

In [None]:
print(all_spectrograms.shape)