In [5]:
from loguru import logger
import os
import librosa
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis
from scripts.mfcc_extractor import MfccExtractor
from scripts.pitch_extractor import PitchExtractor
from scripts.rms_extractor import RmsExtractor
from scripts.zcr_extractor import ZcrExtractor
import pandas as pd
import concurrent.futures
import logging

class StatisticFeatureExtraction:
    def __init__(self, n_mfcc=20):
        self.n_mfcc = n_mfcc

    def extract_mfcc_statistics(self, signal, sample_rate):
        mfcc_extractor = MfccExtractor(signal, sample_rate, self.n_mfcc)
        features = mfcc_extractor.compute_mfccs_statistics()
        feature_data = {}
        for i, (mean, var, max_val, min_val, median, p25, p75, rng, skewness, kurt, energy) in enumerate(zip(
                features['mfcc_mean'],
                features['mfcc_variance'],
                features['mfcc_max'],
                features['mfcc_min'],
                features['mfcc_median'],
                features['mfcc_25th_percentile'],
                features['mfcc_75th_percentile'],
                features['mfcc_range'],
                features['mfcc_skewness'],
                features['mfcc_kurtosis'],
                features['mfcc_energy'])):
            feature_data[f'mfcc_mean_{i+1}'] = mean
            feature_data[f'mfcc_variance_{i+1}'] = var
            feature_data[f'mfcc_max_{i+1}'] = max_val
            feature_data[f'mfcc_min_{i+1}'] = min_val
            feature_data[f'mfcc_median_{i+1}'] = median
            feature_data[f'mfcc_25th_percentile_{i+1}'] = p25
            feature_data[f'mfcc_75th_percentile_{i+1}'] = p75
            feature_data[f'mfcc_range_{i+1}'] = rng
            feature_data[f'mfcc_skewness_{i+1}'] = skewness
            feature_data[f'mfcc_kurtosis_{i+1}'] = kurt
            feature_data[f'mfcc_energy_{i+1}'] = energy

        return feature_data

    def extract_pitch_statistics(self, signal, sample_rate):
        pitch_extractor = PitchExtractor(signal, sample_rate)
        return pitch_extractor.compute_pitch_statistics()

    def extract_zcr_statistics(self, signal, frame_size, hop_size):
        zcr_extractor = ZcrExtractor(signal, frame_size, hop_size)
        return zcr_extractor.compute_zcr_statistics()

    def extract_rms_statistics(self, signal, frame_size, hop_size):
        rms_extractor = RmsExtractor(signal, frame_size, hop_size)
        return rms_extractor.compute_rms_statistics()

    def feature_engineering_for_file(self, audio_file, target_seconds=180):
        try:
            signal, sample_rate = librosa.load(audio_file, sr=None)
            if len(signal) >= 2048:
                features = {}
                
                # MFCC features
                mfcc_stats = self.extract_mfcc_statistics(signal, sample_rate)
                features.update({f'{key}': value for key, value in mfcc_stats.items()})

                # ZCR features
                zcr_stats = self.extract_zcr_statistics(signal, frame_size=2048, hop_size=512)
                features.update({f'zcr_{key}': value for key, value in zcr_stats.items()})

                # Pitch features
                pitch_stats = self.extract_pitch_statistics(signal, sample_rate)
                features.update({f'pitch_{key}': value for key, value in pitch_stats.items()})

                # RMS features
                rms_stats = self.extract_rms_statistics(signal, frame_size=2048, hop_size=512)
                features.update({f'rms_{key}': value for key, value in rms_stats.items()})

                return features
            
        except Exception as e:
            logger.error(f"Error processing file {audio_file}: {e}")
        return None

    def process_folder(self, input_folder, csv_file):
      file_and_label_df = pd.read_excel(csv_file)
      feature_dataframes = pd.DataFrame()  # DataFrame để lưu trữ các features từ tất cả các file âm thanh

      for index, row in file_and_label_df.iterrows():
          file_path = row['cleaned_file_path']
          label = row['label']
          features = self.feature_engineering_for_file(file_path)

          if index % 100 == 0:
              logger.info(f'Processed {index} file!')
              feature_dataframes.to_csv('train_add_feature_data.csv', index=False)

          if features is not None:
              features['file_path'] = file_path
              features['label'] = label
              feature_dataframe = pd.DataFrame([features])  # Tạo DataFrame từ features của mỗi file
              feature_dataframes = pd.concat([feature_dataframes, feature_dataframe], ignore_index=True)  # Kết hợp DataFrame mới vào DataFrame tổng hợp

      # Lưu DataFrame tổng hợp vào tệp CSV
      feature_dataframes.to_csv('train_add_feature_data.csv', index=False)


feature_extractor = StatisticFeatureExtraction()
feature_extractor.process_folder(r'D:\data_analysis\speech_emotion_recognition\data\EnglishDataset\train_test_splited_data\cleaned_data\train', r"D:\data_analysis\speech_emotion_recognition\src\feature_extraction\Book1.xlsx")

[32m2024-05-30 05:45:41.299[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_folder[0m:[36m99[0m - [1mProcessed 0 file![0m
[32m2024-05-30 05:46:47.085[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_folder[0m:[36m99[0m - [1mProcessed 100 file![0m
[32m2024-05-30 05:46:48.897[0m | [31m[1mERROR   [0m | [36m__main__[0m:[36mfeature_engineering_for_file[0m:[36m86[0m - [31m[1mError processing file D:\data_analysis\speech_emotion_recognition\data\EnglishDataset\train_test_splited_data\cleaned_data\train\1027_IEO_NEU_XX.wav: 'tuple' object has no attribute 'items'[0m
[32m2024-05-30 05:46:49.549[0m | [31m[1mERROR   [0m | [36m__main__[0m:[36mfeature_engineering_for_file[0m:[36m86[0m - [31m[1mError processing file D:\data_analysis\speech_emotion_recognition\data\EnglishDataset\train_test_splited_data\cleaned_data\train\noisy_1027_IEO_NEU_XX.wav: 'tuple' object has no attribute 'items'[0m
[32m2024-05-30 05:47:47.408[0m | [1mINFO    [0m | [

In [12]:
import pandas as pd

# Load the CSV files
train_feature_data = pd.read_csv('train_feature_data.csv')
train_add_feature_data = pd.read_csv('train_add_feature_data.csv')

# Concatenate the DataFrames
merged_data = pd.concat([train_feature_data, train_add_feature_data])

# Save the merged DataFrame to a new CSV file
merged_data.to_csv('merged_train_feature_data.csv', index=False)


In [1]:
from loguru import logger
import os
import librosa
import csv
import pandas as pd
import numpy as np
from scripts.mfcc_extractor import MfccExtractor
from scripts.pitch_extractor import PitchExtractor
from scripts.rms_extractor import RmsExtractor
from scripts.zcr_extractor import ZcrExtractor

class FeatureExtractor:
    def __init__(self, n_mfcc=20):
        self.n_mfcc = n_mfcc

    def extract_original_mfcc(self, signal, sample_rate, frame_size, hop_size):
        mfcc_extractor = MfccExtractor(signal, sample_rate, self.n_mfcc, frame_size, hop_size)
        mfcc_matrix = mfcc_extractor.compute_mfccs()
        mfcc_vectors = []
        for row in range(mfcc_matrix.shape[0]):
            mfcc_vector = mfcc_matrix[row, :]
            mfcc_vectors.append(mfcc_vector)

        return mfcc_vectors
    
    def extract_pitch(self, signal, sample_rate, frame_size, hop_size):
        pitch_extractor = PitchExtractor(signal, sample_rate, frame_size, hop_size)
        return pitch_extractor.compute_pitch()
    
    def extract_original_zcr(self, signal, frame_size, hop_size):
        zcr_extractor = ZcrExtractor(signal, frame_size, hop_size)
        zcr = zcr_extractor.compute_zcr()
        return zcr
    
    def extract_original_rms(self, signal, frame_size, hop_size):
        rms_extractor = RmsExtractor(signal, frame_size, hop_size)
        return rms_extractor.compute_rms()
    
    def pad_or_trim(self, zcr, target_length):
      if len(zcr) < target_length:
          return np.pad(zcr, (0, target_length - len(zcr)), 'constant')
      else:
          return zcr[:target_length]
    
    def calculate_number_of_frames(self, audio_length, sample_rate, frame_length, hop_length):
      """
      Tính số lượng khung dựa vào độ dài tín hiệu âm thanh, tần số lấy mẫu, frame length, và hop length.
      """
      # Tính độ dài tín hiệu âm thanh bằng số mẫu
      signal_length = int(audio_length * sample_rate)
      
      # Tính số lượng khung
      num_frames = 1 + (signal_length - frame_length) // hop_length
      
      return num_frames

    def feature_engineering_for_file(self, audio_file, target_seconds=3, frame_size=2048, hop_size=512):
        try:
            signal, sample_rate = librosa.load(audio_file, sr=None)
            if len(signal) >= frame_size:
                zcr = self.extract_original_zcr(signal, frame_size, hop_size)
                num_of_frames = self.calculate_number_of_frames(target_seconds, sample_rate, frame_size, hop_size)
                zcr_normalize = self.pad_or_trim(zcr, num_of_frames)

                rms = self.extract_original_rms(signal, frame_size, hop_size)
                rms_normalize = self.pad_or_trim(rms, num_of_frames)

                pitch = self.extract_pitch(signal, sample_rate, frame_size, hop_size)
                pitch_normalize = self.pad_or_trim(pitch, num_of_frames)

                mfcc_normalize = []
                for mfcc_vector in self.extract_original_mfcc(signal, sample_rate, frame_size, hop_size):
                    normalized_mfcc_vector = self.pad_or_trim(mfcc_vector, num_of_frames)
                    mfcc_normalize.append(normalized_mfcc_vector)
                
                stacked_mfcc = np.hstack(mfcc_normalize)

                combined_features = np.hstack((zcr_normalize, rms_normalize, pitch_normalize, stacked_mfcc))
                return combined_features
        except Exception as e:
            logger.error(f"Error processing file {audio_file}: {e}")
        return None

    def process_folder(self, input_folder, csv_file):
        file_and_label_df = pd.read_csv(csv_file)
        feature_dataframes = pd.DataFrame()  # DataFrame to store features from all audio files

        for index, row in file_and_label_df.iterrows():
            file_path = row['cleaned_file_path']
            label = row['label']
            features = self.feature_engineering_for_file(file_path)

            if index % 100 == 0:
                logger.info(f'Processed {index} file!')
                feature_dataframes.to_csv('_test_feature_data.csv', index=False)

            if features is not None:
                features_dict = {
                    'file_path': file_path,
                    'label': label
                }
                for i, feature in enumerate(features):
                    features_dict[f'feature_{i+1}'] = feature  # Assign feature values to corresponding columns in DataFrame
                feature_dataframe = pd.DataFrame([features_dict])  # Convert the dictionary to a DataFrame
                feature_dataframes = pd.concat([feature_dataframes, feature_dataframe], ignore_index=True)  # Append the new row to the aggregated DataFrame

        # Save the aggregated DataFrame to a CSV file
        feature_dataframes.to_csv('_test_feature_data.csv', index=False)

In [2]:
feature_extractor = FeatureExtractor()
feature_extractor.process_folder(r'D:\data_analysis\speech_emotion_recognition\data\EnglishDataset\train_test_splited_data\cleaned_data\test', r"D:\data_analysis\speech_emotion_recognition\data\EnglishDataset\train_test_splited_data\cleaned_data\test_file_paths_with_labels.csv")

[32m2024-05-31 08:10:02.204[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_folder[0m:[36m94[0m - [1mProcessed 0 file![0m
[32m2024-05-31 08:10:49.978[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_folder[0m:[36m94[0m - [1mProcessed 100 file![0m
[32m2024-05-31 08:11:39.360[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_folder[0m:[36m94[0m - [1mProcessed 200 file![0m
[32m2024-05-31 08:12:24.938[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_folder[0m:[36m94[0m - [1mProcessed 300 file![0m
[32m2024-05-31 08:13:10.079[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_folder[0m:[36m94[0m - [1mProcessed 400 file![0m
[32m2024-05-31 08:13:51.495[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_folder[0m:[36m94[0m - [1mProcessed 500 file![0m
[32m2024-05-31 08:14:40.433[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_folder[0m:[36m94[0m - [1mProcessed 600 file![0m
[32m2024-05-31 08:15:47.909[0m | 