In [19]:
import sounddevice as sd
import wavio

def record_audio(filename, duration, samplerate=44100):
    print("Recording...")
    recording = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=2)
    sd.wait()
    print("Recording finished.")
    wavio.write(filename, recording, samplerate, sampwidth=2)
    print(f"Audio saved to {filename}")

record_audio('test.wav', duration=10)


Recording...
Recording finished.
Audio saved to test.wav


In [20]:
from modules.pre_processing.scripts.pre_processing_data import AudioDataPreProcessor
import numpy as np
from modules.extracting_feature.scripts.extract_statistic_features import StatisticFeatureExtraction
audio_data_processor = AudioDataPreProcessor("./", "./")
audio_data_processor.process_file("test.wav", './')


'./test.wav'

In [21]:
statistic_feature_extractor = StatisticFeatureExtraction(26)
features = statistic_feature_extractor._feature_engineering_for_file('test.wav')
feature_arr = np.array(list(features.values())).reshape(1, -1)

In [22]:
feature_arr

array([[-4.73648773e+02,  1.09435361e+04, -2.61865204e+02,
        -6.35029114e+02, -4.52641357e+02, -5.84783066e+02,
        -3.83976616e+02,  3.73163910e+02, -2.44200632e-01,
        -1.23798692e+00,  5.31747960e+07,  1.51727661e+02,
         2.70947632e+03,  2.52443512e+02,  7.23837357e+01,
         1.58725616e+02,  9.35849342e+01,  1.94682873e+02,
         1.80059784e+02, -8.49626784e-04, -1.28684580e+00,
         5.81515300e+06,  7.83324385e+00,  1.08773047e+03,
         8.67350159e+01, -7.95908890e+01,  1.07277889e+01,
        -1.46160710e+01,  2.69455237e+01,  1.66325897e+02,
        -1.69113010e-01, -2.97439098e-02,  2.59694391e+05,
        -8.63058949e+00,  8.45875488e+02,  4.06371651e+01,
        -1.03748756e+02, -8.88125420e-01, -2.35766072e+01,
         1.08146417e+01,  1.44385925e+02, -8.02585185e-01,
         1.39176846e-01,  2.08001938e+05,  7.60221100e+00,
         2.17695618e+02,  4.26838760e+01, -4.04512672e+01,
         1.07618475e+01,  3.85809496e-01,  1.73855596e+0

In [23]:
import joblib
def load_scaler(scaler_file):
  return joblib.load(scaler_file) 

scaler = load_scaler("models/scaler_fine_tuned_cnn_model_using_feature_statistics_with_26_n_mfcc.pkl")

In [24]:
scaled_arr = scaler.transform(feature_arr)



In [25]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image

model_path = 'models/fine_tuned_model_using_feature_statistics_with_26_n_mfccs.keras'
model = load_model(model_path)

In [26]:
from sklearn.preprocessing import StandardScaler

In [27]:
scaled_arr = np.expand_dims(scaled_arr, axis=2)

In [28]:
scaled_arr.shape

(1, 310, 1)

In [29]:
scaled_arr

array([[[-1.63580532],
        [-0.91754304],
        [-2.09567821],
        [-0.69873507],
        [-1.7926423 ],
        [-0.79912875],
        [-2.20051792],
        [-0.53726041],
        [ 0.24315972],
        [-0.33326038],
        [ 9.29495906],
        [ 2.00698314],
        [ 0.47807849],
        [ 2.54942541],
        [ 2.1896117 ],
        [ 1.77413462],
        [ 1.06889205],
        [ 2.12980129],
        [ 1.15454618],
        [ 0.42707388],
        [-0.46198303],
        [11.28256214],
        [ 0.98908644],
        [ 0.92213041],
        [ 2.55967668],
        [-0.26303518],
        [ 0.97764728],
        [ 0.49419986],
        [ 1.25484298],
        [ 2.06607009],
        [ 0.46559603],
        [-0.05318571],
        [ 3.25361857],
        [-1.18301363],
        [ 2.03789542],
        [-0.48218398],
        [-3.76378132],
        [-0.49523182],
        [-1.27662718],
        [-0.50755729],
        [ 3.06491618],
        [-1.46184231],
        [ 0.08876439],
        [ 6

In [30]:
model.summary()

In [31]:
predictions = model.predict(scaled_arr)
top_3_indices = np.argsort(predictions[0])[-3:][::-1]
emotion_classes = ['Angry', 'Disgusted', 'Fearful', 'Happy', 'Neutral', 'Sad', 'Surprised']
top_3_emotions = [(emotion_classes[i], predictions[0][i]) for i in top_3_indices]
for emotion, score in top_3_emotions:
    print(f'Emotion: {emotion}, Score: {score:.4f}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step
Emotion: Disgusted, Score: 0.9724
Emotion: Sad, Score: 0.0143
Emotion: Happy, Score: 0.0124
