In [1]:
# modules
from tqdm import  tqdm
from glob import glob
import pandas as pd
import numpy as np
import os
from os import listdir
from os.path import basename as bn, join, split as sp

# DATASET Path and Constants

In [2]:
# Read paths
ROOT_TIMIT_DATA_PATH = "/home/jeevan/datasets/TIMIT Acoustic-Phonetic Continuous Speech Corpus (LDC93S1)/TIMIT"

# Write paths
ALL_EXP_FOLDER = "./exports/"
(lambda fp : os.mkdir(fp) if not os.path.exists(fp) else 0)(ALL_EXP_FOLDER) #make export folder

# Vowel info Export CSV filename
ALL_TIMIT_VOWELS_EXP_FILENAME = "a_all-timit_vowels.csv"
ALL_TIMIT_VOWELS_EXP_FILEPATH = join(ALL_EXP_FOLDER, ALL_TIMIT_VOWELS_EXP_FILENAME)

# TIMIT SAMPLING RATE
TIMIT_AUDIO_FS = 16000

# TIMIT VOWEL INFO
TIMIT_VOWEL_INFO = """  iy         beet          bcl b IY tcl t
                        ih         bit           bcl b IH tcl t 
                        eh         bet           bcl b EH tcl t
                        ey         bait          bcl b EY tcl t
                        ae         bat           bcl b AE tcl t
                        aa         bott          bcl b AA tcl t
                        aw         bout          bcl b AW tcl t
                        ay         bite          bcl b AY tcl t
                        ah         but           bcl b AH tcl t
                        ao         bought        bcl b AO tcl t
                        oy         boy           bcl b OY
                        ow         boat          bcl b OW tcl t
                        uh         book          bcl b UH kcl k
                        uw         boot          bcl b UW tcl t
                        ux         toot          tcl t UX tcl t
                        er         bird          bcl b ER dcl d
                        ax         about         AX bcl b aw tcl t
                        ix         debit         dcl d eh bcl b IX tcl t
                        axr        butter        bcl b ah dx AXR
                        ax-h       suspect       s AX-H s pcl p eh kcl k tcl t"""

# 1. Vowels of TIMIT Dataset

### TIMIT Audio Paths

In [3]:
DATA_LOCATION_PATHS = f"{ROOT_TIMIT_DATA_PATH}/T*/*/*/*.WAV"
ALL_TIMIT_AUDIO_PATHS = glob(DATA_LOCATION_PATHS)
ALL_TIMIT_AUDIO_PATHS.sort()

print(len(ALL_TIMIT_AUDIO_PATHS), "audio files exists.")
print(ALL_TIMIT_AUDIO_PATHS[:3],)

6300 audio files exists.
['/home/jeevan/datasets/TIMIT Acoustic-Phonetic Continuous Speech Corpus (LDC93S1)/TIMIT/TEST/DR1/FAKS0/SA1.WAV', '/home/jeevan/datasets/TIMIT Acoustic-Phonetic Continuous Speech Corpus (LDC93S1)/TIMIT/TEST/DR1/FAKS0/SA2.WAV', '/home/jeevan/datasets/TIMIT Acoustic-Phonetic Continuous Speech Corpus (LDC93S1)/TIMIT/TEST/DR1/FAKS0/SI1573.WAV']


### TIMIT Vowel List

In [4]:
timit_vowels = TIMIT_VOWEL_INFO.split("\n")
ALL_TIMIT_VOWEL_LIST = [' '.join(x.split()).split(" ")[0] for x in timit_vowels]

print(len(ALL_TIMIT_VOWEL_LIST), "types of vowels are present")
print(ALL_TIMIT_VOWEL_LIST)

20 types of vowels are present
['iy', 'ih', 'eh', 'ey', 'ae', 'aa', 'aw', 'ay', 'ah', 'ao', 'oy', 'ow', 'uh', 'uw', 'ux', 'er', 'ax', 'ix', 'axr', 'ax-h']


## FUNCTION: Extract Vowel info from TIMIT Audio File

In [5]:
def get_vowel_info_from_file(audio_path: str, anot_seperator: str=" ", 
    vowels: list=ALL_TIMIT_VOWEL_LIST, 
    sr: int=TIMIT_AUDIO_FS) -> pd.DataFrame:

    _anotation_path: str = audio_path.replace(".WAV", ".PHN")
    
    _phns: list = []
    with open(_anotation_path, mode="r") as _ph_file:
        _phns = (
            _ph_file.readlines()
        )  # read all lines : <start-sample, end-sample, phonation>; space seperated values

        _phns = [
            p.strip().split(anot_seperator) for p in _phns
        ]  # split phones (last element)

    _vow_rows = [
        {
            "audio_filepath": audio_path,
            "wav_file": sp(audio_path)[1],
            "person_id": bn(sp(_anotation_path)[0]), # Ex. .../TIMIT/TEST/DR1/FAKS0/SA1.WAV
            "sex": bn(sp(_anotation_path)[0])[0],

            "start_sample": int(_ph[0]),
            "end_sample": int(_ph[1]),
            "duration_sample":  int(_ph[1]) -  int(_ph[0]),

            "start_second": int(_ph[0]) / sr,
            "end_second": int(_ph[1]) / sr,
            "duration_second": (int(_ph[1]) - int(_ph[0])) / sr,
            
            "vowel_name": _ph[-1],
        }

        for _ph in _phns
        if _ph[-1] in vowels
    ]

    _vowel_df = pd.DataFrame(_vow_rows)
    return _vowel_df
    

get_vowel_info_from_file(
    audio_path=ALL_TIMIT_AUDIO_PATHS[np.random.randint(0, len(ALL_TIMIT_AUDIO_PATHS))],
    anot_seperator=" ",
    vowels=ALL_TIMIT_VOWEL_LIST,
    sr=TIMIT_AUDIO_FS
)

Unnamed: 0,audio_filepath,wav_file,person_id,sex,start_sample,end_sample,duration_sample,start_second,end_second,duration_second,vowel_name
0,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI2002.WAV,FLMC0,F,3171,4511,1340,0.198187,0.281938,0.08375,ih
1,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI2002.WAV,FLMC0,F,7227,8647,1420,0.451688,0.540438,0.08875,ow
2,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI2002.WAV,FLMC0,F,9800,12546,2746,0.6125,0.784125,0.171625,ao
3,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI2002.WAV,FLMC0,F,14719,15582,863,0.919937,0.973875,0.053937,ih
4,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI2002.WAV,FLMC0,F,16292,17022,730,1.01825,1.063875,0.045625,ax
5,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI2002.WAV,FLMC0,F,18214,19718,1504,1.138375,1.232375,0.094,ah
6,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI2002.WAV,FLMC0,F,20720,21827,1107,1.295,1.364188,0.069187,er
7,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SI2002.WAV,FLMC0,F,22915,26504,3589,1.432187,1.6565,0.224312,ey


### Create and export Vowel Info dataframe

In [7]:

csv_path = ALL_TIMIT_VOWELS_EXP_FILEPATH
if not os.path.exists(csv_path):
    ALL_TIMIT_VOWELS_DF = pd.concat([get_vowel_info_from_file(fp) for fp in tqdm(ALL_TIMIT_AUDIO_PATHS)])
    ALL_TIMIT_VOWELS_DF.index.name = "index"
    ALL_TIMIT_VOWELS_DF.to_csv(csv_path, index=True)

ALL_TIMIT_VOWELS_DF

Unnamed: 0_level_0,audio_filepath,wav_file,person_id,sex,start_sample,end_sample,duration_sample,start_second,end_second,duration_second,vowel_name
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SA1.WAV,FAKS0,F,11240,12783,1543,0.702500,0.798937,0.096437,iy
1,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SA1.WAV,FAKS0,F,14078,16157,2079,0.879875,1.009813,0.129938,ae
2,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SA1.WAV,FAKS0,F,17587,18760,1173,1.099187,1.172500,0.073313,er
3,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SA1.WAV,FAKS0,F,19962,21514,1552,1.247625,1.344625,0.097000,aa
4,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SA1.WAV,FAKS0,F,26280,28591,2311,1.642500,1.786938,0.144437,uw
...,...,...,...,...,...,...,...,...,...,...,...
6,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX82.WAV,MTCS0,M,22751,23586,835,1.421938,1.474125,0.052187,ao
7,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX82.WAV,MTCS0,M,24520,25386,866,1.532500,1.586625,0.054125,ih
8,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX82.WAV,MTCS0,M,26843,28490,1647,1.677687,1.780625,0.102938,ay
9,/home/jeevan/datasets/TIMIT Acoustic-Phonetic ...,SX82.WAV,MTCS0,M,30310,31707,1397,1.894375,1.981688,0.087313,ih


In [8]:
ALL_TIMIT_VOWELS_DF.describe()

Unnamed: 0,start_sample,end_sample,duration_sample,start_second,end_second,duration_second
count,78374.0,78374.0,78374.0,78374.0,78374.0,78374.0
mean,24477.171166,26012.632455,1535.461288,1.529823,1.62579,0.095966
std,15701.519448,15768.373059,778.745234,0.981345,0.985523,0.048672
min,417.0,793.0,74.0,0.026062,0.049563,0.004625
25%,11770.0,13284.0,960.0,0.735625,0.83025,0.06
50%,22360.0,23853.0,1387.0,1.3975,1.490813,0.086688
75%,34440.0,36040.0,1963.0,2.1525,2.2525,0.122688
max,112600.0,114980.0,7735.0,7.0375,7.18625,0.483438
