In [1]:
# modules
import os
import pandas as pd
import numpy as np
from tqdm import  tqdm
from os.path import basename as bn, join, split as sp

from sklearn.utils import shuffle

# DATASET Path and Constants

In [2]:
# Read paths
ROOT_TIMIT_DATA_PATH = "/home/jeevan/datasets/TIMIT Acoustic-Phonetic Continuous Speech Corpus (LDC93S1)/TIMIT"

# Write paths
ALL_EXP_FOLDER = "./exports/"
(lambda fp : os.mkdir(fp) if not os.path.exists(fp) else 0)(ALL_EXP_FOLDER) #make export folder

# Vowel info Export CSV filename
ALL_TIMIT_VOWELS_IMP_FILENAME = "a_all-timit_vowels.csv"
ALL_TIMIT_VOWELS_IMP_FILEPATH = join(ALL_EXP_FOLDER, ALL_TIMIT_VOWELS_IMP_FILENAME)

# Vowel subset Export CSV filename
SUBSET_TIMIT_VOWELS_EXP_FILENAME = "b_subset-timit_vowels_vowlimLIM.csv"
SUBSET_TIMIT_VOWELS_EXP_FILEPATH = join(ALL_EXP_FOLDER, SUBSET_TIMIT_VOWELS_EXP_FILENAME)

# TIMIT SAMPLING RATE
TIMIT_AUDIO_FS = 16000

### Import All TIMIT Vowel Info dataframe

In [6]:
ALL_TIMIT_VOWELS_DF = pd.read_csv(ALL_TIMIT_VOWELS_IMP_FILEPATH)
ALL_TIMIT_VOWEL_LIST = pd.unique(ALL_TIMIT_VOWELS_DF["vowel_name"])
print(ALL_TIMIT_VOWELS_DF["vowel_name"].value_counts())
ALL_TIMIT_VOWELS_DF.describe()


ix      11587
iy       9663
ih       6760
ae       5404
eh       5293
ax       4956
axr      4790
aa       4197
ao       4096
ay       3242
ah       3185
ey       3088
ow       2913
er       2846
ux       2488
oy        947
aw        945
uh        756
uw        725
ax-h      493
Name: vowel_name, dtype: int64


Unnamed: 0,index,start_sample,end_sample,duration_sample,start_second,end_second,duration_second
count,78374.0,78374.0,78374.0,78374.0,78374.0,78374.0,78374.0
mean,6.265688,24477.171166,26012.632455,1535.461288,1.529823,1.62579,0.095966
std,4.480745,15701.519448,15768.373059,778.745234,0.981345,0.985523,0.048672
min,0.0,417.0,793.0,74.0,0.026062,0.049563,0.004625
25%,3.0,11770.0,13284.0,960.0,0.735625,0.83025,0.06
50%,6.0,22360.0,23853.0,1387.0,1.3975,1.490813,0.086688
75%,9.0,34440.0,36040.0,1963.0,2.1525,2.2525,0.122688
max,24.0,112600.0,114980.0,7735.0,7.0375,7.18625,0.483438


### Apply filters: Duration Thr

In [10]:
MIN_SAMPLE_DUR = 1023
dur_filt = ALL_TIMIT_VOWELS_DF["duration_sample"] > MIN_SAMPLE_DUR
FILTERED_ALL_TIMIT_VOWELS_DF = ALL_TIMIT_VOWELS_DF.loc[dur_filt]
FILTERED_ALL_TIMIT_VOWELS_DF

Unnamed: 0,index,audio_filepath,wav_file,person_id,sex,start_sample,end_sample,duration_sample,start_second,end_second,duration_second,vowel_name
0,0,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SA1.WAV,FAKS0,F,11240,12783,1543,0.702500,0.798937,0.096437,iy
1,1,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SA1.WAV,FAKS0,F,14078,16157,2079,0.879875,1.009813,0.129938,ae
2,2,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SA1.WAV,FAKS0,F,17587,18760,1173,1.099187,1.172500,0.073313,er
3,3,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SA1.WAV,FAKS0,F,19962,21514,1552,1.247625,1.344625,0.097000,aa
4,4,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SA1.WAV,FAKS0,F,26280,28591,2311,1.642500,1.786938,0.144437,uw
...,...,...,...,...,...,...,...,...,...,...,...,...
78367,4,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX82.WAV,MTCS0,M,17511,18920,1409,1.094437,1.182500,0.088063,iy
78368,5,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX82.WAV,MTCS0,M,19720,20929,1209,1.232500,1.308062,0.075563,ix
78371,8,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX82.WAV,MTCS0,M,26843,28490,1647,1.677687,1.780625,0.102938,ay
78372,9,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX82.WAV,MTCS0,M,30310,31707,1397,1.894375,1.981688,0.087313,ih


## FUNCTION: Make Vowel info Subset

In [14]:
def make_vowel_subset(vowel: str, df: pd.DataFrame,  limit) -> pd.DataFrame:
    filt = df["vowel_name"] == vowel
    vowel_df = df[filt]

    filt_sex = lambda s: vowel_df["sex"] == s
    male_vowel_df = shuffle(vowel_df[filt_sex("M")], random_state=6)
    female_vowel_df = shuffle(vowel_df[filt_sex("F")], random_state=8)

    m_size = limit if len(male_vowel_df) > limit else len(male_vowel_df)
    f_size = limit if len(female_vowel_df) > limit else len(female_vowel_df)

    male_vowel_df   = male_vowel_df[:m_size]
    female_vowel_df = female_vowel_df[:f_size]

    vowel_df = pd.concat([male_vowel_df, female_vowel_df]) 

    return vowel_df
    
make_vowel_subset(
    vowel=ALL_TIMIT_VOWEL_LIST[np.random.randint(0, len(ALL_TIMIT_VOWEL_LIST))],
    df=FILTERED_ALL_TIMIT_VOWELS_DF,
    limit=20
)

Unnamed: 0,index,audio_filepath,wav_file,person_id,sex,start_sample,end_sample,duration_sample,start_second,end_second,duration_second,vowel_name
65279,7,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX353.WAV,MSAT1,M,28230,29660,1430,1.764375,1.85375,0.089375,eh
61093,2,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX398.WAV,MTMT0,M,10200,11864,1664,0.6375,0.7415,0.104,eh
6559,8,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX388.WAV,MKCH0,M,23280,24396,1116,1.455,1.52475,0.06975,eh
56556,1,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX21.WAV,MDAS0,M,5892,7399,1507,0.36825,0.462438,0.094187,eh
18216,12,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI820.WAV,MGRT0,M,62440,64280,1840,3.9025,4.0175,0.115,eh
42270,4,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SA2.WAV,MRBC0,M,18760,19843,1083,1.1725,1.240187,0.067687,eh
49067,12,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI761.WAV,MJPM1,M,52146,53960,1814,3.259125,3.3725,0.113375,eh
59429,9,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX15.WAV,MRAM0,M,28560,29800,1240,1.785,1.8625,0.0775,eh
49470,6,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX243.WAV,MJWS0,M,19926,22182,2256,1.245375,1.386375,0.141,eh
46960,17,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI688.WAV,MCSS0,M,62600,64920,2320,3.9125,4.0575,0.145,eh


### Create and export Vowel Info SUBSET dataframe

In [19]:
VOWEL_LIMIT = 100
vowel_df = FILTERED_ALL_TIMIT_VOWELS_DF
SUBSET_TIMIT_VOWELS_DF = pd.concat([make_vowel_subset(vowel, limit=VOWEL_LIMIT, df=vowel_df) for vowel in tqdm(ALL_TIMIT_VOWEL_LIST)])

SUBSET_TIMIT_VOWELS_DF.reset_index(inplace=True)
SUBSET_TIMIT_VOWELS_DF.index.name = "index"
SUBSET_TIMIT_VOWELS_DF.drop(columns=["index", "level_0"], inplace=True)

SUBSET_TIMIT_VOWELS_DF.to_csv(SUBSET_TIMIT_VOWELS_EXP_FILEPATH.replace("LIM", f"{VOWEL_LIMIT}"), index=True)

SUBSET_TIMIT_VOWELS_DF 

100%|██████████| 20/20 [00:00<00:00, 82.31it/s]


Unnamed: 0_level_0,audio_filepath,wav_file,person_id,sex,start_sample,end_sample,duration_sample,start_second,end_second,duration_second,vowel_name
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SA2.WAV,MWGR0,M,8482,9539,1057,0.530125,0.596187,0.066062,iy
1,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX394.WAV,MJES0,M,23410,24600,1190,1.463125,1.537500,0.074375,iy
2,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI2222.WAV,MSTK0,M,56557,57800,1243,3.534813,3.612500,0.077688,iy
3,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI1340.WAV,MJDM0,M,43947,45891,1944,2.746688,2.868187,0.121500,iy
4,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI1239.WAV,MLBC0,M,39979,41357,1378,2.498687,2.584812,0.086125,iy
...,...,...,...,...,...,...,...,...,...,...,...
3816,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI1448.WAV,MRJM3,M,48800,50168,1368,3.050000,3.135500,0.085500,ax-h
3817,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI1923.WAV,MAPV0,M,39477,40600,1123,2.467312,2.537500,0.070187,ax-h
3818,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX92.WAV,MSFV0,M,40647,43446,2799,2.540437,2.715375,0.174937,ax-h
3819,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI2241.WAV,FLJG0,F,30033,31206,1173,1.877063,1.950375,0.073313,ax-h


### Stats

In [20]:
SUBSET_TIMIT_VOWELS_DF["vowel_name"].value_counts()

iy      200
ae      200
uh      200
ey      200
ah      200
aw      200
ux      200
ax      200
ay      200
oy      200
eh      200
ix      200
ow      200
axr     200
ao      200
ih      200
uw      200
aa      200
er      200
ax-h     21
Name: vowel_name, dtype: int64