Training with oversampling and tuning for AUC

In [1]:
import os
os.chdir('../')

In [2]:
from tensorflow.keras.applications import MobileNetV3Large
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import AUC
import numpy as np

X_train_path = 'artifacts/data_transformation/X_train.npy'
X_test_path = 'artifacts/data_transformation/X_test.npy'
y_train_path = 'artifacts/data_transformation/y_train.npy'
y_test_path = 'artifacts/data_transformation/y_test.npy'

X_train = np.load(X_train_path)
X_test = np.load(X_test_path)
y_train = np.load(y_train_path)
y_test = np.load(y_test_path)

base_model = MobileNetV3Large(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable = False
    
upd_model = Sequential([
                base_model,
                Flatten(),
                Dropout(0.5),
                Dense(32, activation='relu'),
                Dense(1, activation = 'sigmoid')
            ])
upd_model.compile(optimizer = Adam(learning_rate = 0.001), loss = 'binary_crossentropy', metrics = ['accuracy', AUC(name='auc')])
upd_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 MobilenetV3large (Function  (None, 7, 7, 960)         2996352   
 al)                                                             
                                                                 
 flatten (Flatten)           (None, 47040)             0         
                                                                 
 dropout (Dropout)           (None, 47040)             0         
                                                                 
 dense (Dense)               (None, 32)                1505312   
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                                 
Total params: 4501697 (17.17 MB)
Trainable params: 1505345 (5.74 MB)
Non-trainable params: 2996352 (11.43 MB)
____________

In [3]:
early_stopping = EarlyStopping(monitor='val_auc', patience=3, restore_best_weights=True)
upd_model.fit(X_train, y_train, epochs = 100, batch_size = 32, validation_data = [X_test, y_test], callbacks=[early_stopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100


<keras.src.callbacks.History at 0x2076e6b9820>

In [4]:
import librosa

# Load and preprocess the audio files
def load_audio_files(audio_directory):
    audio_files = []
    folders = os.listdir(audio_directory)
    folders.reverse()
    for label, folder in enumerate(folders):
        folder_path = os.path.join(audio_directory, folder)
        for filename in os.listdir(folder_path):
            if filename.endswith('.wav') or filename.endswith('.mp3'):
                filepath = os.path.join(folder_path, filename)
                audio_files.append((filepath, label))
    return audio_files

def split_audio_into_segments(audio, sr, segment_length=30):
    segment_length = segment_length * sr
    num_segments = len(audio) // segment_length
    segments = [audio[i * segment_length:(i + 1) * segment_length] for i in range(num_segments)]
    return segments

def create_spectrogram(audio, sr, n_fft=2048, hop_length=512, n_mels=128):
    spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    spectrogram = librosa.power_to_db(spectrogram, ref=np.max)
    return spectrogram

def random_crop(spectrogram, crop_size):
    pad_width_x = max(crop_size - spectrogram.shape[1], 0)
    pad_width_y = max(crop_size - spectrogram.shape[0], 0)
    spectrogram = np.pad(spectrogram, ((0, pad_width_y), (0, pad_width_x)), mode='constant')
    max_x = spectrogram.shape[1] - crop_size
    max_y = spectrogram.shape[0] - crop_size
    x = np.random.randint(0, max_x + 1)
    y = np.random.randint(0, max_y + 1)
    cropped_spectrogram = spectrogram[y:y + crop_size, x:x + crop_size]
    return cropped_spectrogram

def time_shift(spectrogram, max_shift):
    shift_amount = np.random.randint(-max_shift, max_shift)
    shifted_spectrogram = np.roll(spectrogram, shift_amount, axis=1)
    return shifted_spectrogram

def add_noise(spectrogram, noise_factor=0.005):
    noise = np.random.randn(*spectrogram.shape) * noise_factor
    noisy_spectrogram = spectrogram + noise
    return noisy_spectrogram

def augment_data(spectrogram):
    spectrogram = random_crop(spectrogram, crop_size=224)
    spectrogram = time_shift(spectrogram, max_shift=10)
    spectrogram = add_noise(spectrogram, noise_factor=0.005)
    return spectrogram

def load_test_file(file_path):
    audio, sr = librosa.load(file_path, sr=16000)
    return audio, sr

def extract_segment(audio, sr, segment_length=10):
    segment_length = segment_length * sr
    if len(audio) < segment_length:
        pad_width = segment_length - len(audio)
        audio = np.pad(audio, (0, pad_width), mode='constant')
    start = np.random.randint(0, len(audio) - segment_length + 1)
    segment = audio[start:start + segment_length]
    return segment

def create_test_set_from_audio(file_path):
    audio, sr = load_test_file(file_path)
    segment = extract_segment(audio, sr, segment_length=10)
    spectrogram = create_spectrogram(segment, sr)
    augmented_spectrogram = augment_data(spectrogram)
    augmented_spectrogram = np.expand_dims(augmented_spectrogram, axis=-1)
    augmented_spectrogram = np.repeat(augmented_spectrogram, 3, axis=-1)
    return augmented_spectrogram

In [5]:
# Testing on a new file
test_file_path = 'artifacts/data_ingestion/DEMONSTRATION/DEMONSTRATION/linus-original-DEMO.mp3'
test_set = create_test_set_from_audio(test_file_path)
test_set = np.expand_dims(test_set, axis=0)

# Make predictions
prediction = upd_model.predict(test_set)

# Output the prediction
print(prediction)

[[0.00020698]]


In [6]:
from sklearn.metrics import classification_report

y_pred = upd_model.predict(X_test)
y_pred = (y_test > 0.5).astype(int)
print(classification_report(y_test, y_pred, target_names=['Real', 'Fake']))

              precision    recall  f1-score   support

        Real       1.00      1.00      1.00        75
        Fake       1.00      1.00      1.00       522

    accuracy                           1.00       597
   macro avg       1.00      1.00      1.00       597
weighted avg       1.00      1.00      1.00       597



In [7]:
import pandas as pd
from scipy import stats

In [8]:
test_file_path = 'artifacts/data_ingestion/DEMONSTRATION/DEMONSTRATION/'
files = os.listdir(test_file_path)
actuals = [1,0,1,0]
a = list(zip(actuals, files))
results = []

def predict_with_models(test_set, model):
    prediction = (model.predict(test_set) > 0.5).astype(int)
    return prediction[0][0]

for filenum, file in enumerate(a):
    for i in range(10): 
        # Make predictions
        test_set1 = create_test_set_from_audio(os.path.join(test_file_path, file[1]))
        test_set1 = np.expand_dims(test_set1, axis=0)
        prediction1 = predict_with_models(test_set1, upd_model)
        
        test_set2 = create_test_set_from_audio(os.path.join(test_file_path, file[1]))
        test_set2 = np.expand_dims(test_set2, axis=0)
        prediction2 = predict_with_models(test_set2, upd_model)
        
        test_set3 = create_test_set_from_audio(os.path.join(test_file_path, file[1]))
        test_set3 = np.expand_dims(test_set3, axis=0)
        prediction3 = predict_with_models(test_set3, upd_model)
        
        test_set4 = create_test_set_from_audio(os.path.join(test_file_path, file[1]))
        test_set4 = np.expand_dims(test_set4, axis=0)
        prediction4 = predict_with_models(test_set4, upd_model)
        
        test_set5 = create_test_set_from_audio(os.path.join(test_file_path, file[1]))
        test_set5 = np.expand_dims(test_set5, axis=0)
        prediction5 = predict_with_models(test_set5, upd_model)
        
        final_pred = stats.mode([prediction1, prediction2, prediction3, prediction4, prediction5])
        
        results.append({
                'File': filenum,
                'Loop': i,
                'Actual': file[0],
                'Final Prediction': final_pred[0][0]
            })    

df_results = pd.DataFrame(results)
display(df_results)
#df_results.to_csv('compare.csv')



  final_pred = stats.mode([prediction1, prediction2, prediction3, prediction4, prediction5])




Unnamed: 0,File,Loop,Actual,Final Prediction
0,0,0,1,0
1,0,1,1,1
2,0,2,1,1
3,0,3,1,1
4,0,4,1,1
5,0,5,1,1
6,0,6,1,0
7,0,7,1,1
8,0,8,1,1
9,0,9,1,1


In [9]:
# Example usage
test_file_path = 'artifacts/data_ingestion/DEMONSTRATION/DEMONSTRATION/matthew_mcconaughey_winning_best_actor__86th_oscars_2014-cut.mp3'
results = []
probs = []
for i in range(5):
    test_set = create_test_set_from_audio(test_file_path)
    test_set = np.expand_dims(test_set, axis=0)
    # Make predictions
    prob = upd_model.predict(test_set)[0][0]
    prediction = (prob > 0.5).astype(int)
    probs.append(prob)
    results.append(prediction)

print(results)
print(probs)
final_pred = stats.mode(results)[0][0]

# Output the prediction
print(final_pred)

[0, 0, 0, 0, 0]
[0.012057306, 0.03437051, 0.0055038943, 0.39036554, 0.028225122]
0


  final_pred = stats.mode(results)[0][0]


In [10]:
#upd_model.save('best_model5.keras')