In [1]:
import os
from os import listdir
import csv

# Import statements
import scipy.io as sio
from scipy.io import wavfile
from scipy.io.wavfile import write
import scipy.signal as sis
import scipy.fftpack as fftpack

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import subplots

n_pitches = 16
n_words = 12
n_people = 3

In [2]:
label_to_vowel = { 0: "bed",  1: "bird",   2: "boat",  3: "book", 
                   4: "cat",  5: "dog",    6: "feet",  7: "law",  
                   8: "moo",  9: "nut",   10: "pig",  11: "say" }
vowel_to_label = { "bed": 0,  "bird": 1,  "boat":  2, "book":  3,
                   "cat": 4,  "dog":  5,  "feet":  6, "law":   7,
                   "moo": 8,  "nut":  9,  "pig":  10, "say":  11}
noteidx_to_pitch = {  0: "A2",   1: "Bb2",  2: "B2",   3: "C3",
                      4: "Db3",  5: "D3",   6: "Eb3",  7: "E3", 
                      8: "F3",   9: "Gb3", 10: "G3",  11: "Ab3",
                     12: "A3",  13: "Bb3", 14: "B3",  15: "C4" }

# Create the reference csv

In [24]:
def create_reference_csv():
    # Set up the csv writer
    dataset_dir = "dataset"
    files = os.listdir(dataset_dir)
    f = open('dataset_files.csv', 'w', newline='')
    writer = csv.writer(f)

    # Write the headers
    writer.writerow(["filename", "wordIdx", "pitchIdx", "personNum"])

    # Write rows for each file
    for file in files:
        # Filenames follow the format
        # <personNum>_<wordIdx>-<wordVowel>_<pitchIdx>-<pitch>.wav
        personNum, wordIdx, wordVowel, pitchIdx, pitch = file.replace('-', '_').split("_")
        writer.writerow([file, wordIdx, pitchIdx, personNum])

    # Close csv writer
    f.close()

    create_reference_csv()

# Read the reference csv to relevant data structure

In [3]:
def createDataList():
    # A list of dimension [n_words][n_pitches][n_people].
    # To access the list of files, use dataList[wordIdx][pitchIdx]
    dataList = [ [ [] for pIdx in range(0, n_pitches) ] 
                 for wIdx in range(0, n_words) ]

    with open('dataset_files.csv') as dataset_csv:
        reader = csv.reader(dataset_csv, delimiter=',')
        for idx, row in enumerate(reader):
            if idx == 0: continue
            filename, wordIdx, pitchIdx, personNum = row
            dataList[int(wordIdx)][int(pitchIdx)].append(filename)
    return dataList

dataList = createDataList()
dataList#[vowel_to_label["dog"]][5]

[[['0_0-bed_0-A2.wav', '2_0-bed_0-A2.wav', '3_0-bed_0-A2.wav'],
  ['0_0-bed_1-Bb2.wav', '2_0-bed_1-Bb2.wav', '3_0-bed_1-Bb2.wav'],
  ['0_0-bed_2-B2.wav', '2_0-bed_2-B2.wav', '3_0-bed_2-B2.wav'],
  ['0_0-bed_3-C3.wav', '2_0-bed_3-C3.wav', '3_0-bed_3-C3.wav'],
  ['0_0-bed_4-Db3.wav', '2_0-bed_4-Db3.wav', '3_0-bed_4-Db3.wav'],
  ['0_0-bed_5-D3.wav', '2_0-bed_5-D3.wav', '3_0-bed_5-D3.wav'],
  ['0_0-bed_6-Eb3.wav', '2_0-bed_6-Eb3.wav', '3_0-bed_6-Eb3.wav'],
  ['0_0-bed_7-E3.wav', '2_0-bed_7-E3.wav', '3_0-bed_7-E3.wav'],
  ['0_0-bed_8-F3.wav', '2_0-bed_8-F3.wav', '3_0-bed_8-F3.wav'],
  ['0_0-bed_9-Gb3.wav', '2_0-bed_9-Gb3.wav', '3_0-bed_9-Gb3.wav'],
  ['0_0-bed_10-G3.wav', '2_0-bed_10-G3.wav', '3_0-bed_10-G3.wav'],
  ['0_0-bed_11-Ab3.wav', '2_0-bed_11-Ab3.wav', '3_0-bed_11-Ab3.wav'],
  ['0_0-bed_12-A3.wav', '2_0-bed_12-A3.wav', '3_0-bed_12-A3.wav'],
  ['0_0-bed_13-Bb3.wav', '2_0-bed_13-Bb3.wav', '3_0-bed_13-Bb3.wav'],
  ['0_0-bed_14-B3.wav', '2_0-bed_14-B3.wav', '3_0-bed_14-B3.wav'],
  ['0_0

In [5]:
def create_data_label_pairs(purelyIndices):
    # This provides an array of arrays, where each subarray contains 3-d arrays
    # as elements: [pitchShift, filename, filename]
    # A list of dimension [n_pitchShifts][n_words * n_startingPitches * n_people],
    # Where:
    #   n_pitchShifts: the number of possible pitch shifts,
    #   n_startingPitches: the number of starting pitches for that pitchShift value
    
    def append_pair(pitchShift, wordIdx, pitchIdx):
        # [pitchShift, wordIdx, inputPitchIdx, labelPitchIdx]
        if (purelyIndices):
            data_label_pairs[pitchShift].append(
                [pitchShift, wordIdx, pitchIdx, pitchIdx + pitchShift])
        # [pitchShift, filename, filename]
        else:
            for peopleIdx in range(n_people):
                data_label_pairs[pitchShift].append(
                    [pitchShift,
                     dataList[wordIdx][pitchIdx][peopleIdx],
                     dataList[wordIdx][pitchIdx + pitchShift][peopleIdx]])

    data_label_pairs = {}
    for pIdx in range(-n_pitches + 1, n_pitches):
        data_label_pairs[pIdx] = []

    # Pitch indices range from 0-15, so we can shift from -15 to 15 pitches up.
    for pitchShift in range(0, n_pitches):
        for wordIdx in range(0, 12):
            # Pitch shift starting points
            for pitchIdx in range(0, n_pitches - pitchShift):
                append_pair(pitchShift, wordIdx, pitchIdx)
    for pitchShift in range(-n_pitches + 1, 0):
        for wordIdx in range(0, 12):
            # Pitch shift starting points
            for pitchIdx in range(n_pitches - 1, -1 - pitchShift, -1):
                append_pair(pitchShift, wordIdx, pitchIdx)
                
    return data_label_pairs

data_label_pairs = create_data_label_pairs(False)
data_label_pairs

{-15: [[-15, '0_0-bed_15-C4.wav', '0_0-bed_0-A2.wav'],
  [-15, '2_0-bed_15-C4.wav', '2_0-bed_0-A2.wav'],
  [-15, '3_0-bed_15-C4.wav', '3_0-bed_0-A2.wav'],
  [-15, '0_1-bird_15-C4.wav', '0_1-bird_0-A2.wav'],
  [-15, '2_1-bird_15-C4.wav', '2_1-bird_0-A2.wav'],
  [-15, '3_1-bird_15-C4.wav', '3_1-bird_0-A2.wav'],
  [-15, '0_2-boat_15-C4.wav', '0_2-boat_0-A2.wav'],
  [-15, '2_2-boat_15-C4.wav', '2_2-boat_0-A2.wav'],
  [-15, '3_2-boat_15-C4.wav', '3_2-boat_0-A2.wav'],
  [-15, '0_3-book_15-C4.wav', '0_3-book_0-A2.wav'],
  [-15, '2_3-book_15-C4.wav', '2_3-book_0-A2.wav'],
  [-15, '3_3-book_15-C4.wav', '3_3-book_0-A2.wav'],
  [-15, '0_4-cat_15-C4.wav', '0_4-cat_0-A2.wav'],
  [-15, '2_4-cat_15-C4.wav', '2_4-cat_0-A2.wav'],
  [-15, '3_4-cat_15-C4.wav', '3_4-cat_0-A2.wav'],
  [-15, '0_5-dog_15-C4.wav', '0_5-dog_0-A2.wav'],
  [-15, '2_5-dog_15-C4.wav', '2_5-dog_0-A2.wav'],
  [-15, '3_5-dog_15-C4.wav', '3_5-dog_0-A2.wav'],
  [-15, '0_6-feet_15-C4.wav', '0_6-feet_0-A2.wav'],
  [-15, '2_6-feet_15-C4.w

In [8]:
total = 0
for i in range(16) :
    total = total + len(data_label_pairs[i])

In [9]:
total

4896