In [4]:
import pandas as pd
import numpy as np
from dataset import LABELS, silence_detection_training, silence_detection_validation, silence_detection_full, TensorflowDataset
from models import Lstm
from sklearn.preprocessing import LabelEncoder
import random
from sklearn.utils import resample
import tensorflow as tf


In [61]:
model = Lstm(lstm_units=64, dropout_rate=0.2, epoch=4, batch_size=32, learning_rate=0.001, input_shape=(39,44), num_classes=2, model_path='models\\lstm_silence.h5')
model.train(silence_detection_full)

In [5]:
train = pd.read_pickle('extracted_features\\silence_detection_training.pkl')
val = pd.read_pickle('extracted_features\\silence_detection_validation.pkl')

# Separate 'silence' and 'non-silence' observations
train_silence = [x for x in train if x[1] == 'silence']
train_non_silence = [x for x in train if x[1] != 'silence']

# Resample 'silence' observations to match the number of 'non-silence' observations
train_silence_resampled = resample(train_silence, replace=True, n_samples=len(train_non_silence))

# Combine resampled 'silence' and 'non-silence' observations into the final training set
train_final = train_non_silence + train_silence_resampled

# Change all 'non-silence' labels greater than 'silence' to 'non-silence'
for i in range(len(train_final)):
    if train_final[i][1] != 'silence':
        train_final[i][1] = 'non-silence'

for i in range(len(val)):
    if val[i][1] != 'silence':
        val[i][1] = 'non-silence'

# randomly sort the training set
random.shuffle(train_final)


In [6]:
X_train = np.array([x[0] for x in train_final])
y_train = np.array([x[1] for x in train_final])
X_val = np.array([x[0] for x in val])
y_val = np.array([x[1] for x in val])

In [12]:
print('non-silence observations:', (y_train!='silence').sum())
print('silence observations:', (y_train=='silence').sum())

non-silence observations: 6835
silence observations: 6835


In [None]:
training = tf.data.Dataset.from_tensor_slices((X_train, y_train))
validation = tf.data.Dataset.from_tensor_slices((X_val, y_val))

In [None]:
results = pd.DataFrame(columns=['lstm_units', 'dropout_rate', 'epoch', 'batch', 'loss', 'loss_max', 'accuracy', 'accuracy_max', 'val_loss', 'val_loss_max', 'val_accuracy', 'val_accuracy_max'])

# Set your model's hyperparameters
for lstm_units in [64]:
    for dropout_rate in [0.2]:
            for epoch in [4]:
                  for batch in [32]:

                        num_classes = len(np.unique(y_train))  # Number of unique classes in your dataset

                        input_shape = (39, 44)

                        model = Lstm(lstm_units=lstm_units, dropout_rate=0.1, learning_rate=lr, num_classes=10, batch_size=batch, epoch=epoch)
                        model.train(training, validation)
                        results = np.concatenate((results, pd.DataFrame([[lstm_units,dropout_rate, epoch, batch, model.history.history['loss'][-1], model.history.history['accuracy'][-1], model.history.history['val_loss'][-1], model.history.history['val_accuracy'][-1]]], 
                                                                        columns=['lstm_units', 'dropout_rate', 'learning_rate', 'epoch', 'batch', 'loss_max','accuracy_max','val_loss_max', 'val_accuracy_max'])))


In [39]:
# save model 
model.save('models\\lstm_silence_model.h5')