In [3]:
import os
import librosa
import numpy as np

import tensorflow as tf

def extract_mfcc(file_path, n_mfcc=13):
    y, sr = librosa.load(file_path, res_type='kaiser_fast')
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return mfcc.T

def load_data(dataset_dir, n_mfcc=13):
    data = []
    labels = []
    accents = sorted(os.listdir(dataset_dir))
    for accent in accents:
        accent_dir = os.path.join(dataset_dir, accent)
        for file in os.listdir(accent_dir):
            file_path = os.path.join(accent_dir, file)
            mfcc = extract_mfcc(file_path, n_mfcc)
            data.append(mfcc)
            labels.append(accent)
    return np.array(data), np.array(labels)

dataset_dir = 'files/wav/'
mfccs, labels = load_data(dataset_dir)

num_classes = len(sorted(set(labels)))
labels = tf.keras.utils.to_categorical(labels, num_classes)

NotADirectoryError: [Errno 20] Not a directory: 'files/wav/common_voice_en_37285574.wav'

In [None]:
import tensorflow as tf

tdnn_units = 128
delay_length = 3

def tdnn_block(inputs, activation='relu', conv_filter=3, pool_size=2):
    x = inputs
    x = tf.keras.layers.ZeroPadding2D(padding=((1, 1)))(x)
    x = tf.keras.layers.Conv2D(filters=tdnn_units, kernel_size=(1, conv_filter), padding='valid', activation=activation)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ZeroPadding2D(padding=((1, 1)))(x)
    x = tf.keras.layers.Conv2D(filters=tdnn_units, kernel_size=(conv_filter, 1), padding='valid', activation=activation)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.MaxPooling2D(pool_size=(pool_size, 1), strides=(pool_size, 1))(x)
    x = tf.keras.layers.ZeroPadding2D(padding=((0, 1)))(x)
    x = tf.keras.layers.Conv2D(filters=tdnn_units, kernel_size=(1, delay_length), padding='valid', activation=activation)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    return x

def tdnn(input_shape, num_classes):
    input_layer = tf.keras.layers.Input(shape=input_shape)
    x = tf.keras.layers.Reshape((1, num_mfcc_features, 1))(input_layer)
    x = tdnn_block(x)
    x = tdnn_block(x)
    x = tdnn_block(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(units=tdnn_units, activation=activation)(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    output_layer = tf.keras.layers.Dense(units=num_classes, activation='softmax')(x)
    model = tf.keras.models.Model(inputs=input_layer, outputs=output_layer)
    return model