In [None]:
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
import os
from tqdm.notebook import tqdm

In [None]:
# Download the TIMIT dataset
!wget cls.ru.nl/~ltenbosch/TIMIT.tar.gz
!tar -zxvf /content/TIMIT.tar.gz

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
wavfiles16kHz/TEST/DR6MJFC0SA1.WAV
wavfiles16kHz/TEST/DR6MJFC0SA2.WAV
wavfiles16kHz/TEST/DR6MJFC0SI1033.WAV
wavfiles16kHz/TEST/DR6MJFC0SI1663.WAV
wavfiles16kHz/TEST/DR6MJFC0SI2293.WAV
wavfiles16kHz/TEST/DR6MJFC0SX133.WAV
wavfiles16kHz/TEST/DR6MJFC0SX223.WAV
wavfiles16kHz/TEST/DR6MJFC0SX313.WAV
wavfiles16kHz/TEST/DR6MJFC0SX43.WAV
wavfiles16kHz/TEST/DR6MPAM1SX36.WAV
wavfiles16kHz/TEST/DR6MPAM1SA1.WAV
wavfiles16kHz/TEST/DR6MPAM1SA2.WAV
wavfiles16kHz/TEST/DR6MPAM1SI1029.WAV
wavfiles16kHz/TEST/DR6MPAM1SI1836.WAV
wavfiles16kHz/TEST/DR6MPAM1SI576.WAV
wavfiles16kHz/TEST/DR6MPAM1SX126.WAV
wavfiles16kHz/TEST/DR6MPAM1SX216.WAV
wavfiles16kHz/TEST/DR6MPAM1SX306.WAV
wavfiles16kHz/TEST/DR6MPAM1SX396.WAV
wavfiles16kHz/TEST/DR6MRJR0SX282.WAV
wavfiles16kHz/TEST/DR6MRJR0SA1.WAV
wavfiles16kHz/TEST/DR6MRJR0SA2.WAV
wavfiles16kHz/TEST/DR6MRJR0SI1182.WAV
wavfiles16kHz/TEST/DR6MRJR0SI1812.WAV
wavfiles16kHz/TEST/DR6MRJR0SI2313.WAV
wavfiles16kHz/TE

In [None]:
# Initialize empty lists for train and test sets
mfcc_train = []
mfcc_test = []

# Define the root directories for train and test sets
root_dir_train = 'wavfiles16kHz/TRAIN'
root_dir_test = 'wavfiles16kHz/TEST'

# Iterate over the train files
for filename in tqdm(os.listdir(root_dir_train)):
    if filename.endswith('.WAV'):
        file_path = os.path.join(root_dir_train, filename)
        audio, sr = librosa.load(file_path)
        mfcc = librosa.feature.mfcc(y=audio, sr=sr)
        mfcc_train.append(np.mean(mfcc, axis=1))

# Iterate over the test files
for filename in tqdm(os.listdir(root_dir_test)):
    if filename.endswith('.WAV'):
        file_path = os.path.join(root_dir_test, filename)
        audio, sr = librosa.load(file_path)
        mfcc = librosa.feature.mfcc(y=audio, sr=sr)
        mfcc_test.append(np.mean(mfcc, axis=1))

  0%|          | 0/4620 [00:00<?, ?it/s]

  0%|          | 0/1680 [00:00<?, ?it/s]

In [None]:
labels_train = []
root_dir = 'wavfiles16kHz/TRAIN'
for filename in os.listdir(root_dir):
    if filename.endswith('.WAV'):
        label = filename[3]
        labels_train.append(label)

labels_test = []
root_dir = 'wavfiles16kHz/TEST'
for filename in os.listdir(root_dir):
    if filename.endswith('.WAV'):
        label = filename[3]
        labels_test.append(label)


# Convert the labels to one-hot encoding
encoder = LabelEncoder()
# Fit the encoder on the train labels
encoder.fit(labels_train)
# Transform the train and test labels to one-hot encoding
encoded_labels_train = encoder.transform(labels_train)
encoded_labels_test = encoder.transform(labels_test)
one_hot_labels_train = to_categorical(encoded_labels_train)
one_hot_labels_test = to_categorical(encoded_labels_test)

In [None]:
model = Sequential()
model.add(Dense(64, input_dim=mfcc_train[0].shape[0], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(one_hot_labels_train.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Convert the lists to NumPy arrays and reshape
mfcc_train = np.array(mfcc_train)
mfcc_test = np.array(mfcc_test)

In [None]:
# Train the model
model.fit(mfcc_train, one_hot_labels_train, epochs=10, batch_size=64, validation_split=0.2)

Epoch 1/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 54ms/step - accuracy: 0.6703 - loss: 3.9659 - val_accuracy: 0.9297 - val_loss: 0.1869
Epoch 2/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9220 - loss: 0.1971 - val_accuracy: 0.9545 - val_loss: 0.1302
Epoch 3/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9419 - loss: 0.1468 - val_accuracy: 0.9643 - val_loss: 0.0954
Epoch 4/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9511 - loss: 0.1161 - val_accuracy: 0.9675 - val_loss: 0.0784
Epoch 5/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9577 - loss: 0.0987 - val_accuracy: 0.9654 - val_loss: 0.0888
Epoch 6/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9466 - loss: 0.1363 - val_accuracy: 0.9697 - val_loss: 0.0816
Epoch 7/10
[1m58/58[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x794a173393c0>

In [None]:
# Test the model
score = model.evaluate(mfcc_test, one_hot_labels_test) # Use one_hot_labels_test instead of y_test
print("Accuracy:", score[1])

[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9346 - loss: 0.1894
Accuracy: 0.9375
