In [1]:
from audio_preprocessing.cut_audio_segment_from_audio_file import AudioProcessor
from feature_extraction.feature_extractor import FeatureExtractor
from feature_extraction.mfcc_images_extractor import MelSpectrogramImageExtractor
from data_processing.english_data_processing import ModelEvaluator
from itertools import combinations
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

In [2]:
def extract_mfcc_images(n_mfcc, figsize, input_folder, output_folder):
    mfccImageExtractor = MelSpectrogramImageExtractor(n_mfcc, figsize)
    mfccImageExtractor.extract_from_folders(input_folder, output_folder)

In [3]:
def cut_audio_segment_from_audio_file(frame_size, hop_size, threshold, input_folder, output_folder):
    segmentation = AudioProcessor(frame_size, hop_size, threshold)
    segmentation.process_folders(input_folder, output_folder)

In [4]:
def extract_feature(n_mfcc, input_folder, output_folder, selected_features):
    featureExtractor = FeatureExtractor(n_mfcc)
    featureExtractor.process_folder(input_folder, output_folder, selected_features)
    # featureExtractor.process_folder(input_folder, output_folder, ['mfcc','mfcc_statistics', 'zcr', 'pitch', 'rms'])

In [5]:
def model(featured_data):
    models = [RandomForestClassifier, ExtraTreesClassifier]
    
    df_train_test = pd.read_csv(featured_data)
    df_shuffled = df_train_test.sample(frac=1, random_state=42).reset_index(drop=True)

    modelEvaluator = ModelEvaluator(models)
    best_model, best_scaler, best_avg_acc = modelEvaluator.evaluate_models(df_shuffled)
    return best_model, best_scaler, best_avg_acc

In [6]:
def process_data(frame_size, hop_size, threshold, raw_folder, cutted_folder, n_mfcc_list, featured_data_path, n_mfcc_for_images, images_folder, features):
    n = len(features)

    results = []

    for n_mfcc in n_mfcc_list:
        for r in range(0, n):
            for combo in combinations(range(n), r):
                selected_features = [features[i] for i in range(n) if i not in combo]
                extract_feature(n_mfcc, cutted_folder, featured_data_path, selected_features)
                current_model, current_scaler, current_avg_acc = model(featured_data_path)
                print(f"Scaler: {current_scaler}, Model: {current_model}, n_mfcc: {n_mfcc}, Selected features: {selected_features}, Accuracy: {current_avg_acc}")
                results.append({'Scaler': current_scaler, 'Model': current_model, 'N_MFCC': n_mfcc, 'Selected Features': selected_features, 'Average Accuracy': current_avg_acc})

    results_df = pd.DataFrame(results)
    ranked_results = results_df.sort_values(by='Average Accuracy', ascending=False)

    return ranked_results

In [7]:
ranked_results = process_data(256, 128, 0.0005, 'D:/data_analysis/speech_emotion_recognition/data/EnglishDataset/combined_data', 'D:/data_analysis/speech_emotion_recognition/data/EnglishDataset/cleaned_data', [26,40], 'D:/data_analysis/speech_emotion_recognition/notebooks/feature0.csv', 100, 'D:/data_analysis/speech_emotion_recognition/data/EnglishDataset/images', ['mfcc','mfcc_statistics', 'zcr', 'pitch', 'rms'])

[32m2024-05-23 03:52:37.422[0m | [1mINFO    [0m | [36mfeature_extraction.feature_extractor[0m:[36mprocess_folder[0m:[36m148[0m - [1mProcessing input folder: D:/data_analysis/speech_emotion_recognition/data/EnglishDataset/cleaned_data[0m
[32m2024-05-23 03:55:05.322[0m | [1mINFO    [0m | [36mfeature_extraction.feature_extractor[0m:[36mprocess_folder[0m:[36m158[0m - [1mFeature extraction completed. Data saved to: D:/data_analysis/speech_emotion_recognition/notebooks/feature0.csv[0m
[32m2024-05-23 03:55:05.487[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m53[0m - [1mFinding best model....[0m
[32m2024-05-23 03:56:34.659[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.RandomForestClassifier'> - scaler: None, accuracy: 0.6278300718355214[0m
[32m2024-05-23 03:56:52.054[0m | [1mINFO    [0m | [36md

Scaler: MaxAbsScaler, Model: <class 'sklearn.ensemble._forest.RandomForestClassifier'>, n_mfcc: 26, Selected features: ['mfcc', 'mfcc_statistics', 'zcr', 'pitch', 'rms'], Accuracy: 0.6306498224754357


[32m2024-05-23 04:07:36.411[0m | [1mINFO    [0m | [36mfeature_extraction.feature_extractor[0m:[36mprocess_folder[0m:[36m158[0m - [1mFeature extraction completed. Data saved to: D:/data_analysis/speech_emotion_recognition/notebooks/feature0.csv[0m
[32m2024-05-23 04:07:36.542[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m53[0m - [1mFinding best model....[0m
[32m2024-05-23 04:08:57.360[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.RandomForestClassifier'> - scaler: None, accuracy: 0.6246504830319545[0m
[32m2024-05-23 04:09:14.417[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.ExtraTreesClassifier'> - scaler: None, accuracy: 0.6226506481710842[0m
[32m2024-05-23 04:10:36.975[0m | [1mINFO    [0m 

Scaler: MaxAbsScaler, Model: <class 'sklearn.ensemble._forest.RandomForestClassifier'>, n_mfcc: 26, Selected features: ['mfcc_statistics', 'zcr', 'pitch', 'rms'], Accuracy: 0.6295577574106185


[32m2024-05-23 04:18:36.004[0m | [1mINFO    [0m | [36mfeature_extraction.feature_extractor[0m:[36mprocess_folder[0m:[36m158[0m - [1mFeature extraction completed. Data saved to: D:/data_analysis/speech_emotion_recognition/notebooks/feature0.csv[0m
[32m2024-05-23 04:18:36.049[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m53[0m - [1mFinding best model....[0m
[32m2024-05-23 04:19:21.026[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.RandomForestClassifier'> - scaler: None, accuracy: 0.6012925439682932[0m
[32m2024-05-23 04:19:32.108[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.ExtraTreesClassifier'> - scaler: None, accuracy: 0.6031999009165221[0m
[32m2024-05-23 04:20:16.886[0m | [1mINFO    [0m 

Scaler: StandardScaler, Model: <class 'sklearn.ensemble._forest.ExtraTreesClassifier'>, n_mfcc: 26, Selected features: ['mfcc', 'zcr', 'pitch', 'rms'], Accuracy: 0.6102014697382545


[32m2024-05-23 04:26:00.944[0m | [1mINFO    [0m | [36mfeature_extraction.feature_extractor[0m:[36mprocess_folder[0m:[36m158[0m - [1mFeature extraction completed. Data saved to: D:/data_analysis/speech_emotion_recognition/notebooks/feature0.csv[0m
[32m2024-05-23 04:26:01.105[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m53[0m - [1mFinding best model....[0m
[32m2024-05-23 04:27:30.341[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.RandomForestClassifier'> - scaler: None, accuracy: 0.6252852778465858[0m
[32m2024-05-23 04:27:48.561[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.ExtraTreesClassifier'> - scaler: None, accuracy: 0.6246508133102138[0m
[32m2024-05-23 04:29:18.010[0m | [1mINFO    [0m 

Scaler: RobustScaler, Model: <class 'sklearn.ensemble._forest.RandomForestClassifier'>, n_mfcc: 26, Selected features: ['mfcc', 'mfcc_statistics', 'pitch', 'rms'], Accuracy: 0.6293768474940137


[32m2024-05-23 04:38:13.862[0m | [1mINFO    [0m | [36mfeature_extraction.feature_extractor[0m:[36mprocess_folder[0m:[36m158[0m - [1mFeature extraction completed. Data saved to: D:/data_analysis/speech_emotion_recognition/notebooks/feature0.csv[0m
[32m2024-05-23 04:38:14.029[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m53[0m - [1mFinding best model....[0m
[32m2024-05-23 04:39:43.146[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.RandomForestClassifier'> - scaler: None, accuracy: 0.6253765172157543[0m
[32m2024-05-23 04:40:00.803[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.ExtraTreesClassifier'> - scaler: None, accuracy: 0.6191063495995375[0m
[32m2024-05-23 04:41:29.918[0m | [1mINFO    [0m 

Scaler: MinMaxScaler, Model: <class 'sklearn.ensemble._forest.RandomForestClassifier'>, n_mfcc: 26, Selected features: ['mfcc', 'mfcc_statistics', 'zcr', 'rms'], Accuracy: 0.6263775906200975


[32m2024-05-23 04:50:39.960[0m | [1mINFO    [0m | [36mfeature_extraction.feature_extractor[0m:[36mprocess_folder[0m:[36m158[0m - [1mFeature extraction completed. Data saved to: D:/data_analysis/speech_emotion_recognition/notebooks/feature0.csv[0m
[32m2024-05-23 04:50:40.122[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m53[0m - [1mFinding best model....[0m
[32m2024-05-23 04:52:08.363[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.RandomForestClassifier'> - scaler: None, accuracy: 0.6260126331434233[0m
[32m2024-05-23 04:52:25.833[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.ExtraTreesClassifier'> - scaler: None, accuracy: 0.6226503178928247[0m
[32m2024-05-23 04:53:55.098[0m | [1mINFO    [0m 

Scaler: None, Model: <class 'sklearn.ensemble._forest.RandomForestClassifier'>, n_mfcc: 26, Selected features: ['mfcc', 'mfcc_statistics', 'zcr', 'pitch'], Accuracy: 0.6260126331434233


[32m2024-05-23 05:02:00.741[0m | [1mINFO    [0m | [36mfeature_extraction.feature_extractor[0m:[36mprocess_folder[0m:[36m158[0m - [1mFeature extraction completed. Data saved to: D:/data_analysis/speech_emotion_recognition/notebooks/feature0.csv[0m
[32m2024-05-23 05:02:00.762[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m53[0m - [1mFinding best model....[0m
[32m2024-05-23 05:02:22.686[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.RandomForestClassifier'> - scaler: None, accuracy: 0.5224060771199737[0m
[32m2024-05-23 05:02:30.852[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.ExtraTreesClassifier'> - scaler: None, accuracy: 0.5223126909421187[0m
[32m2024-05-23 05:02:53.006[0m | [1mINFO    [0m 

Scaler: MaxAbsScaler, Model: <class 'sklearn.ensemble._forest.ExtraTreesClassifier'>, n_mfcc: 26, Selected features: ['zcr', 'pitch', 'rms'], Accuracy: 0.5264942614152422


[32m2024-05-23 05:06:20.025[0m | [1mINFO    [0m | [36mfeature_extraction.feature_extractor[0m:[36mprocess_folder[0m:[36m158[0m - [1mFeature extraction completed. Data saved to: D:/data_analysis/speech_emotion_recognition/notebooks/feature0.csv[0m
[32m2024-05-23 05:06:20.163[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m53[0m - [1mFinding best model....[0m
[32m2024-05-23 05:07:42.414[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.RandomForestClassifier'> - scaler: None, accuracy: 0.6200142845347205[0m
[32m2024-05-23 05:07:59.306[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.ExtraTreesClassifier'> - scaler: None, accuracy: 0.6213775906200975[0m
[32m2024-05-23 05:09:21.439[0m | [1mINFO    [0m 

Scaler: MinMaxScaler, Model: <class 'sklearn.ensemble._forest.RandomForestClassifier'>, n_mfcc: 26, Selected features: ['mfcc_statistics', 'pitch', 'rms'], Accuracy: 0.6269230451655521


[32m2024-05-23 05:17:45.903[0m | [1mINFO    [0m | [36mfeature_extraction.feature_extractor[0m:[36mprocess_folder[0m:[36m158[0m - [1mFeature extraction completed. Data saved to: D:/data_analysis/speech_emotion_recognition/notebooks/feature0.csv[0m
[32m2024-05-23 05:17:46.068[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m53[0m - [1mFinding best model....[0m
[32m2024-05-23 05:19:13.778[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.RandomForestClassifier'> - scaler: None, accuracy: 0.6196513912971678[0m
[32m2024-05-23 05:19:30.290[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m74[0m - [1mModel <class 'sklearn.ensemble._forest.ExtraTreesClassifier'> - scaler: None, accuracy: 0.623558583106267[0m
[32m2024-05-23 05:20:52.387[0m | [1mINFO    [0m |

Scaler: RobustScaler, Model: <class 'sklearn.ensemble._forest.RandomForestClassifier'>, n_mfcc: 26, Selected features: ['mfcc_statistics', 'zcr', 'rms'], Accuracy: 0.625558087688878


[32m2024-05-23 05:34:32.362[0m | [1mINFO    [0m | [36mfeature_extraction.feature_extractor[0m:[36mprocess_folder[0m:[36m158[0m - [1mFeature extraction completed. Data saved to: D:/data_analysis/speech_emotion_recognition/notebooks/feature0.csv[0m
[32m2024-05-23 05:34:33.002[0m | [1mINFO    [0m | [36mdata_processing.english_data_processing[0m:[36mevaluate_models[0m:[36m53[0m - [1mFinding best model....[0m


In [None]:
print(ranked_results)