In [2]:
import pandas as pd
import os

# Updated path to match your folder
csv_path = r"C:\Users\vaidi\Downloads\emotion_detection\VideoDemographics.csv"

# Load metadata
metadata = pd.read_csv(csv_path)

# Preview the data
metadata.head()

Unnamed: 0,ActorID,Age,Sex,Race,Ethnicity
0,1001,51,Male,Caucasian,Not Hispanic
1,1002,21,Female,Caucasian,Not Hispanic
2,1003,21,Female,Caucasian,Not Hispanic
3,1004,42,Female,Caucasian,Not Hispanic
4,1005,29,Male,African American,Not Hispanic


In [3]:
# Create a gender map from ActorID
gender_map = {
    int(row['ActorID']): 1 if row['Sex'].strip().lower().startswith('f') else 0
    for _, row in metadata.iterrows()
}

# Show a few entries
list(gender_map.items())[:5]


[(1001, 0), (1002, 1), (1003, 1), (1004, 1), (1005, 0)]

In [4]:
import librosa
import numpy as np
import os

# ✅ Corrected path to audio files
audio_path = r"C:\Users\vaidi\Downloads\emotion_detection\AudioWAV"

# Lists to store features and labels
features = []
labels = []

# Function to extract MFCC features
def extract_mfcc(file_path):
    y, sr = librosa.load(file_path, sr=22050)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return np.mean(mfcc.T, axis=0)

# Loop through audio files
for filename in os.listdir(audio_path):
    if filename.endswith(".wav"):
        actor_id = int(filename.split("_")[0])
        gender = gender_map.get(actor_id)

        if gender is not None:
            file_path = os.path.join(audio_path, filename)
            try:
                mfcc = extract_mfcc(file_path)
                features.append(mfcc)
                labels.append(gender)
            except Exception as e:
                print(f"Error processing {filename}: {e}")


In [5]:
from sklearn.model_selection import train_test_split

# Convert to NumPy arrays
X = np.array(features)
y = np.array(labels)

# Split into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Show the shapes of each set
X_train.shape, X_test.shape, y_train.shape, y_test.shape


((5953, 13), (1489, 13), (5953,), (1489,))

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Create and train the model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=["Male", "Female"]))


Accuracy: 0.7353928811282741

Classification Report:
               precision    recall  f1-score   support

        Male       0.75      0.76      0.76       802
      Female       0.72      0.71      0.71       687

    accuracy                           0.74      1489
   macro avg       0.73      0.73      0.73      1489
weighted avg       0.74      0.74      0.74      1489



In [7]:
import joblib

# Save the trained model
joblib.dump(model, "gender_classifier.pkl")
print("Model saved as gender_classifier.pkl")


Model saved as gender_classifier.pkl


In [10]:
# Load saved model
gender_model = joblib.load("gender_classifier.pkl")

# Path to a test .wav file
test_file = r"C:\Users\vaidi\Downloads\emotion_detection\AudioWAV\1010_WSI_HAP_XX.wav"  # Replace with any valid file

# Extract MFCC and reshape for prediction
mfcc = extract_mfcc(test_file).reshape(1, -1)
predicted_gender = gender_model.predict(mfcc)[0]

# Interpret prediction
gender_label = "Female" if predicted_gender == 1 else "Male"
print("Predicted Gender:", gender_label)


Predicted Gender: Female
