This script succeeds DS_1_Feature_Extraction.py and generates all necessary datasets needed for modelling based the following factors:

*- MFCC features: Yes, No

*- Data Scaling: Yes, No

*- Anomaly treatment: Yes, No

*- Hilbert Transformed: Yes, No

*- Features: All Features, Mean features and mean subset

*- Sound sample duration: All sound( 30 seconds), 5 seconds, 1 second

In [1]:
# import necessary libraries

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

pd.options.mode.chained_assignment = None

# utilitity functions to build the datasets

def scale_dataset(data_df):
    scaler_obj = MinMaxScaler(feature_range=(0, 1))
    numeric_col_df = data_df.select_dtypes(include='number')
    numeric_col_df = numeric_col_df.fillna(0)
    numeric_col_df_scaled = scaler_obj.fit(numeric_col_df)
    numeric_col_df_scaled = pd.DataFrame(numeric_col_df_scaled.transform(numeric_col_df), columns = numeric_col_df.columns)
    non_numeric_cols = set(list(numeric_col_df_scaled.columns)) ^ set(list(data_df.columns))
    data_df_scaled = data_df[non_numeric_cols]
    data_df_scaled = pd.concat([data_df_scaled,numeric_col_df_scaled], axis=1).reset_index(drop=True)
    return data_df_scaled

def ignore_columns_in_dataset(data_df, str_pattern=""):
    data_df_refined = data_df.loc[:,~data_df.columns.str.contains(str_pattern.strip())]
    return data_df_refined

def include_columns_in_dataset(data_df, class_var_name, str_pattern=""):
    data_df_refined = data_df[class_var_name]
    data_df_include = data_df.loc[:,data_df.columns.str.contains(str_pattern.strip())]
    data_df_refined = pd.concat([data_df_refined,data_df_include], axis=1)
    return data_df_refined

def eliminate_irrelevant_columns_in_dataset(data_df):
    data_size = data_df.shape[0]
    cols_to_ignore = []
    max_val = 0
    min_val = 0
    # eliminate all numeric columns containing constant values - either all columns contains 0 or same value
    # i.e. the min value of the column == max value in the column
    numeric_df = data_df.select_dtypes(include='number')
    numeric_df = numeric_df.fillna(0)
    col_list = numeric_df.columns
    non_numeric_cols = set(list(data_df.columns)) ^ set(list(numeric_df.columns))
    col_df = data_df[non_numeric_cols]
    
    for col_name in col_list:
        try:
            min_val = np.min(numeric_df[col_name])
        except Exception as e:
            min_val = 0
        
        try:
            max_val = np.max(numeric_df[col_name])
        except Exception as e:
            max_val = 0
        
        if min_val==max_val:
            cols_to_ignore.append(col_name)
        else:
            # if the number of unique values is less than 5% of the data size(N), ignore the column
            num_unique_vals_in_col = len(set(list(base_df[col_name])))
            uniqueness_ratio = 1-(num_unique_vals_in_col/data_size)
            if uniqueness_ratio > 0.95:
                cols_to_ignore.append(col_name)
    
    dressed_df = numeric_df
    if len(cols_to_ignore) > 0:
        dressed_df = numeric_df[numeric_df.columns[~numeric_df.columns.isin(cols_to_ignore)]]
    
    dressed_df = pd.concat([col_df,dressed_df], axis=1)
    dressed_df = dressed_df.fillna(0)
    return dressed_df, cols_to_ignore

def standardize_data(x):
    mean_val = np.mean(x)
    sd_val = np.std(x)
    return (x - mean_val)/sd_val

def treat_anomaly(data_df):
    imputed_df = pd.DataFrame()
    class_list = list(set(list(data_df.sound_file_class)))
    for class_name in class_list:
        # process one column at a time for each class
        class_df = base_df[base_df.sound_file_class==class_name]
        anomaly_df = class_df.select_dtypes(include='number')
        anomaly_df.fillna(0)
        col_list = anomaly_df.columns
        non_numeric_cols = set(list(class_df.columns)) ^ set(list(anomaly_df.columns))
        col_df = class_df[non_numeric_cols]
        for col_name in col_list:
            temp_df = anomaly_df[[col_name]]
            temp_df['stand'] = temp_df.apply(lambda x: standardize_data(x))
            temp_df['anomaly_flag'] = temp_df['stand'].apply(lambda x: 'N' if abs(x) < 3 else 'Y')
            mean_val = np.mean(temp_df[col_name])
            temp_df[col_name+"_imputed"] = temp_df.apply(lambda x: mean_val if x.anomaly_flag=='Y' else x[col_name], axis=1)
            col_df = pd.concat([col_df,temp_df[col_name+"_imputed"]], axis=1)
            col_df.rename(columns={col_name+"_imputed":col_name}, inplace=True)
        imputed_df = pd.concat([imputed_df,col_df], axis=0)
    return imputed_df

In [2]:
# All Feature Dataset with 4 classes & Sample Duration: Whole sound signal (30 seconds): (based dataset to use:DS_1_Feature_MFCC_hilbert_trans.csv)
# 1.         Hilbert Transformed - Yes, MFCC included: Yes , Scale Data: No, Anomaly treated: No
base_df = pd.read_csv("Data\DS_1_Feature_MFCC_hilbert_trans.csv")
base_df,col_ignored = eliminate_irrelevant_columns_in_dataset(base_df)
base_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_yMFCC_yht_ns_nat.csv", index=False)

# # 2.         Hilbert Transformed - Yes, MFCC included: Yes , Scale Data: No, Anomaly treated: Yes
anomaly_df = treat_anomaly(base_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_yMFCC_yht_ns_yat.csv", index=False)

# 3.         Hilbert Transformed - Yes, MFCC included: Yes ,  Scale Data: Yes, Anomaly treated: No
scaled_df = scale_dataset(base_df)
scaled_df,col_ignored = eliminate_irrelevant_columns_in_dataset(scaled_df)
scaled_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_yMFCC_yht_ys_nat.csv", index=False)

# # 4.         Hilbert Transformed - Yes, MFCC included: Yes , Scale Data: Yes, Anomaly treated: Yes
scaled_df = scale_dataset(base_df)
anomaly_df = treat_anomaly(scaled_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_yMFCC_yht_ys_yat.csv", index=False)

# 5.         Hilbert Transformed - Yes, MFCC included: No ,  Scale Data: No, Anomaly treated: No
no_mfcc_base_df = ignore_columns_in_dataset(base_df,str_pattern="mfcc_")
no_mfcc_df,col_ignored = eliminate_irrelevant_columns_in_dataset(no_mfcc_base_df)
no_mfcc_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_nMFCC_yht_ns_nat.csv", index=False)

# 6.         Hilbert Transformed - Yes, MFCC included: No ,  Scale Data: Yes, Anomaly treated: No
scaled_df = scale_dataset(no_mfcc_base_df)
scaled_df,col_ignored = eliminate_irrelevant_columns_in_dataset(scaled_df)
scaled_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_nMFCC_yht_ys_nat.csv", index=False)

# 7.         Hilbert Transformed - Yes, MFCC included: No ,  Scale Data: No, Anomaly treated: Yes
anomaly_df = treat_anomaly(no_mfcc_base_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_nMFCC_yht_ns_yat.csv", index=False)

# 8.         Hilbert Transformed - Yes, MFCC included: No ,  Scale Data: Yes, Anomaly treated: Yes
scaled_df = scale_dataset(no_mfcc_base_df)
anomaly_df = treat_anomaly(scaled_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_nMFCC_yht_ys_yat.csv", index=False)


In [3]:
# All Feature Dataset with 4 classes & Sample Duration: Whole sound signal (30 seconds): (based dataset to use:DS_1_Feature_MFCC_no_hilbert_trans.csv)
# 1.         Hilbert Transformed - No, MFCC included: Yes , Scale Data: No, Anomaly treated: No
base_df = pd.read_csv("Data\DS_1_Feature_MFCC_no_hilbert_trans.csv")
base_df,col_ignored = eliminate_irrelevant_columns_in_dataset(base_df)
base_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_yMFCC_nht_ns_nat.csv", index=False)

# # 2.         Hilbert Transformed - No, MFCC included: Yes , Scale Data: No, Anomaly treated: Yes
anomaly_df = treat_anomaly(base_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_yMFCC_nht_ns_yat.csv", index=False)

# 3.         Hilbert Transformed - No, MFCC included: Yes ,  Scale Data: Yes, Anomaly treated: No
scaled_df = scale_dataset(base_df)
scaled_df,col_ignored = eliminate_irrelevant_columns_in_dataset(scaled_df)
scaled_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_yMFCC_nht_ys_nat.csv", index=False)

# # 4.         Hilbert Transformed - No, MFCC included: Yes , Scale Data: Yes, Anomaly treated: Yes
scaled_df = scale_dataset(base_df)
anomaly_df = treat_anomaly(scaled_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_yMFCC_nht_ys_yat.csv", index=False)

# 5.         Hilbert Transformed - No, MFCC included: No ,  Scale Data: No, Anomaly treated: No
no_mfcc_base_df = ignore_columns_in_dataset(base_df,str_pattern="mfcc_")
no_mfcc_df,col_ignored = eliminate_irrelevant_columns_in_dataset(no_mfcc_base_df)
no_mfcc_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_nMFCC_nht_ns_nat.csv", index=False)

# 6.         Hilbert Transformed - No, MFCC included: No ,  Scale Data: Yes, Anomaly treated: No
scaled_df = scale_dataset(no_mfcc_base_df)
scaled_df,col_ignored = eliminate_irrelevant_columns_in_dataset(scaled_df)
scaled_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_nMFCC_nht_ys_nat.csv", index=False)

# 7.         Hilbert Transformed - No, MFCC included: No ,  Scale Data: No, Anomaly treated: Yes
anomaly_df = treat_anomaly(no_mfcc_base_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_nMFCC_nht_ns_yat.csv", index=False)

# 8.         Hilbert Transformed - No, MFCC included: No ,  Scale Data: Yes, Anomaly treated: Yes
scaled_df = scale_dataset(no_mfcc_base_df)
anomaly_df = treat_anomaly(scaled_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_30_sec_nMFCC_nht_ys_yat.csv", index=False)


In [4]:
# All Feature Dataset with 4 classes & Sample Duration: augmented sound signal (5 seconds): (based dataset to use:DS_1_Feature_MFCC_hilbert_trans.csv)
# 1.         Hilbert Transformed - Yes, MFCC included: Yes , Scale Data: No, Anomaly treated: No
base_df = pd.read_csv("Data\DS_1_Feature_MFCC_hilbert_trans_ms_5sec.csv")
base_df,col_ignored = eliminate_irrelevant_columns_in_dataset(base_df)
base_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_yMFCC_yht_ns_nat.csv", index=False)

# # 2.         Hilbert Transformed - Yes, MFCC included: Yes , Scale Data: No, Anomaly treated: Yes
anomaly_df = treat_anomaly(base_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_yMFCC_yht_ns_yat.csv", index=False)

# 3.         Hilbert Transformed - Yes, MFCC included: Yes ,  Scale Data: Yes, Anomaly treated: No
scaled_df = scale_dataset(base_df)
scaled_df,col_ignored = eliminate_irrelevant_columns_in_dataset(scaled_df)
scaled_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_yMFCC_yht_ys_nat.csv", index=False)

# # 4.         Hilbert Transformed - Yes, MFCC included: Yes , Scale Data: Yes, Anomaly treated: Yes
scaled_df = scale_dataset(base_df)
anomaly_df = treat_anomaly(scaled_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_yMFCC_yht_ys_yat.csv", index=False)

# 5.         Hilbert Transformed - Yes, MFCC included: No ,  Scale Data: No, Anomaly treated: No
no_mfcc_base_df = ignore_columns_in_dataset(base_df,str_pattern="mfcc_")
no_mfcc_df,col_ignored = eliminate_irrelevant_columns_in_dataset(no_mfcc_base_df)
no_mfcc_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_nMFCC_yht_ns_nat.csv", index=False)

# 6.         Hilbert Transformed - Yes, MFCC included: No ,  Scale Data: Yes, Anomaly treated: No
scaled_df = scale_dataset(no_mfcc_base_df)
scaled_df,col_ignored = eliminate_irrelevant_columns_in_dataset(scaled_df)
scaled_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_nMFCC_yht_ys_nat.csv", index=False)

# 7.         Hilbert Transformed - Yes, MFCC included: No ,  Scale Data: No, Anomaly treated: Yes
anomaly_df = treat_anomaly(no_mfcc_base_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_nMFCC_yht_ns_yat.csv", index=False)

# 8.         Hilbert Transformed - Yes, MFCC included: No ,  Scale Data: Yes, Anomaly treated: Yes
scaled_df = scale_dataset(no_mfcc_base_df)
anomaly_df = treat_anomaly(scaled_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_nMFCC_yht_ys_yat.csv", index=False)


In [5]:
# All Feature Dataset with 4 classes & Sample Duration: augmented sound signal (5 seconds): (based dataset to use:DS_1_Feature_MFCC_no_hilbert_trans_ms_5sec.csv)
# 1.         Hilbert Transformed - No, MFCC included: Yes , Scale Data: No, Anomaly treated: No
base_df = pd.read_csv("Data\DS_1_Feature_MFCC_no_hilbert_trans_ms_5sec.csv")
base_df,col_ignored = eliminate_irrelevant_columns_in_dataset(base_df)
base_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_yMFCC_nht_ns_nat.csv", index=False)

# # 2.         Hilbert Transformed - No, MFCC included: Yes , Scale Data: No, Anomaly treated: Yes
anomaly_df = treat_anomaly(base_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_yMFCC_nht_ns_yat.csv", index=False)

# 3.         Hilbert Transformed - No, MFCC included: Yes ,  Scale Data: Yes, Anomaly treated: No
scaled_df = scale_dataset(base_df)
scaled_df,col_ignored = eliminate_irrelevant_columns_in_dataset(scaled_df)
scaled_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_yMFCC_nht_ys_nat.csv", index=False)

# # 4.         Hilbert Transformed - No, MFCC included: Yes , Scale Data: Yes, Anomaly treated: Yes
scaled_df = scale_dataset(base_df)
anomaly_df = treat_anomaly(scaled_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_yMFCC_nht_ys_yat.csv", index=False)

# 5.         Hilbert Transformed - No, MFCC included: No ,  Scale Data: No, Anomaly treated: No
no_mfcc_base_df = ignore_columns_in_dataset(base_df,str_pattern="mfcc_")
no_mfcc_df,col_ignored = eliminate_irrelevant_columns_in_dataset(no_mfcc_base_df)
no_mfcc_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_nMFCC_nht_ns_nat.csv", index=False)

# 6.         Hilbert Transformed - No, MFCC included: No ,  Scale Data: Yes, Anomaly treated: No
scaled_df = scale_dataset(no_mfcc_base_df)
scaled_df,col_ignored = eliminate_irrelevant_columns_in_dataset(scaled_df)
scaled_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_nMFCC_nht_ys_nat.csv", index=False)

# 7.         Hilbert Transformed - No, MFCC included: No ,  Scale Data: No, Anomaly treated: Yes
anomaly_df = treat_anomaly(no_mfcc_base_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_nMFCC_nht_ns_yat.csv", index=False)

# 8.         Hilbert Transformed - No, MFCC included: No ,  Scale Data: Yes, Anomaly treated: Yes
scaled_df = scale_dataset(no_mfcc_base_df)
anomaly_df = treat_anomaly(scaled_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_5_sec_nMFCC_nht_ys_yat.csv", index=False)


In [6]:
# All Feature Dataset with 4 classes & Sample Duration: augmented sound signal (5 seconds): (based dataset to use:DS_1_Feature_MFCC_hilbert_trans_ms_1sec.csv)
# 1.         Hilbert Transformed - Yes, MFCC included: Yes , Scale Data: No, Anomaly treated: No
base_df = pd.read_csv("Data\DS_1_Feature_MFCC_hilbert_trans_ms_1sec.csv")
base_df,col_ignored = eliminate_irrelevant_columns_in_dataset(base_df)
base_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_yMFCC_yht_ns_nat.csv", index=False)

# # 2.         Hilbert Transformed - Yes, MFCC included: Yes , Scale Data: No, Anomaly treated: Yes
anomaly_df = treat_anomaly(base_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_yMFCC_yht_ns_yat.csv", index=False)

# 3.         Hilbert Transformed - Yes, MFCC included: Yes ,  Scale Data: Yes, Anomaly treated: No
scaled_df = scale_dataset(base_df)
scaled_df,col_ignored = eliminate_irrelevant_columns_in_dataset(scaled_df)
scaled_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_yMFCC_yht_ys_nat.csv", index=False)

# # 4.         Hilbert Transformed - Yes, MFCC included: Yes , Scale Data: Yes, Anomaly treated: Yes
scaled_df = scale_dataset(base_df)
anomaly_df = treat_anomaly(scaled_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_yMFCC_yht_ys_yat.csv", index=False)

# 5.         Hilbert Transformed - Yes, MFCC included: No ,  Scale Data: No, Anomaly treated: No
no_mfcc_base_df = ignore_columns_in_dataset(base_df,str_pattern="mfcc_")
no_mfcc_df,col_ignored = eliminate_irrelevant_columns_in_dataset(no_mfcc_base_df)
no_mfcc_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_nMFCC_yht_ns_nat.csv", index=False)

# 6.         Hilbert Transformed - Yes, MFCC included: No ,  Scale Data: Yes, Anomaly treated: No
scaled_df = scale_dataset(no_mfcc_base_df)
scaled_df,col_ignored = eliminate_irrelevant_columns_in_dataset(scaled_df)
scaled_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_nMFCC_yht_ys_nat.csv", index=False)

# 7.         Hilbert Transformed - Yes, MFCC included: No ,  Scale Data: No, Anomaly treated: Yes
anomaly_df = treat_anomaly(no_mfcc_base_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_nMFCC_yht_ns_yat.csv", index=False)

# 8.         Hilbert Transformed - Yes, MFCC included: No ,  Scale Data: Yes, Anomaly treated: Yes
scaled_df = scale_dataset(no_mfcc_base_df)
anomaly_df = treat_anomaly(scaled_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_nMFCC_yht_ys_yat.csv", index=False)


In [7]:
# All Feature Dataset with 4 classes & Sample Duration: augmented sound signal (5 seconds): (based dataset to use:DS_1_Feature_MFCC_no_hilbert_trans_ms_1sec.csv)
# 1.         Hilbert Transformed - No, MFCC included: Yes , Scale Data: No, Anomaly treated: No
base_df = pd.read_csv("Data\DS_1_Feature_MFCC_no_hilbert_trans_ms_1sec.csv")
base_df,col_ignored = eliminate_irrelevant_columns_in_dataset(base_df)
base_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_yMFCC_nht_ns_nat.csv", index=False)

# # 2.         Hilbert Transformed - No, MFCC included: Yes , Scale Data: No, Anomaly treated: Yes
anomaly_df = treat_anomaly(base_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_yMFCC_nht_ns_yat.csv", index=False)

# 3.         Hilbert Transformed - No, MFCC included: Yes ,  Scale Data: Yes, Anomaly treated: No
scaled_df = scale_dataset(base_df)
scaled_df,col_ignored = eliminate_irrelevant_columns_in_dataset(scaled_df)
scaled_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_yMFCC_nht_ys_nat.csv", index=False)

# # 4.         Hilbert Transformed - No, MFCC included: Yes , Scale Data: Yes, Anomaly treated: Yes
scaled_df = scale_dataset(base_df)
anomaly_df = treat_anomaly(scaled_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_yMFCC_nht_ys_yat.csv", index=False)

# 5.         Hilbert Transformed - No, MFCC included: No ,  Scale Data: No, Anomaly treated: No
no_mfcc_base_df = ignore_columns_in_dataset(base_df,str_pattern="mfcc_")
no_mfcc_df,col_ignored = eliminate_irrelevant_columns_in_dataset(no_mfcc_base_df)
no_mfcc_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_nMFCC_nht_ns_nat.csv", index=False)

# 6.         Hilbert Transformed - No, MFCC included: No ,  Scale Data: Yes, Anomaly treated: No
scaled_df = scale_dataset(no_mfcc_base_df)
scaled_df,col_ignored = eliminate_irrelevant_columns_in_dataset(scaled_df)
scaled_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_nMFCC_nht_ys_nat.csv", index=False)

# 7.         Hilbert Transformed - No, MFCC included: No ,  Scale Data: No, Anomaly treated: Yes
anomaly_df = treat_anomaly(no_mfcc_base_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_nMFCC_nht_ns_yat.csv", index=False)

# 8.         Hilbert Transformed - No, MFCC included: No ,  Scale Data: Yes, Anomaly treated: Yes
scaled_df = scale_dataset(no_mfcc_base_df)
anomaly_df = treat_anomaly(scaled_df)
anomaly_df,col_ignored = eliminate_irrelevant_columns_in_dataset(anomaly_df)
anomaly_df.to_csv("Data\Model Datasets\DS_1_All_Feature_1_sec_nMFCC_nht_ys_yat.csv", index=False)
