In [4]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# ‚úÖ Path to your Google Drive folder (update this if needed)
data_dir = "/content/drive/MyDrive/team audio/team audio"

# ‚úÖ Function to extract MFCC features from an audio file
def extract_features(file_path):
    try:
        audio, sr = librosa.load(file_path, duration=3, offset=0.5)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
        return np.mean(mfccs.T, axis=0)
    except Exception as e:
        print(f"‚ùå Error loading: {file_path} | {e}")
        return None

# ‚úÖ Feature extraction
features = []
labels = []

for folder in os.listdir(data_dir):
    folder_path = os.path.join(data_dir, folder)
    emotion = folder.lower()  # Label is the folder name

    if not os.path.isdir(folder_path):
        continue

    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)

        # ‚úÖ Skip if it's not a file (e.g., skip inner folders)
        if not os.path.isfile(file_path):
            continue

        mfcc = extract_features(file_path)
        if mfcc is not None:
            features.append(mfcc)
            labels.append(emotion)

# ‚úÖ Convert to numpy arrays
X = np.array(features)
y = np.array(labels)

print("‚úÖ Feature shape:", X.shape)
print("‚úÖ Labels:", set(y))
print("‚úÖ Total samples:", len(y))

# ‚úÖ Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ‚úÖ Train the Random Forest model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# ‚úÖ Predict & Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("üéØ Accuracy:", accuracy)
print("üìä Classification Report:")
print(classification_report(y_test, y_pred))
from google.colab import files
uploaded=files.upload()
def predict_emotion(file_path):
  mfcc=extract_features(file_path)
  if mfcc is not None:
    return model.predict(mfcc.reshape(1,-1))[0]
  else:
    return "could not process audio"
for fname in uploaded.keys():
  print(f"{fname}:{predict_emotion(fname)}")

‚úÖ Feature shape: (20, 13)
‚úÖ Labels: {np.str_('vishali'), np.str_('bhuvana')}
‚úÖ Total samples: 20
üéØ Accuracy: 1.0
üìä Classification Report:
              precision    recall  f1-score   support

     bhuvana       1.00      1.00      1.00         2
     vishali       1.00      1.00      1.00         2

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4



Saving WhatsApp Audio 2025-07-18 at 12.03.12_b0bfa832.waptt.opus to WhatsApp Audio 2025-07-18 at 12.03.12_b0bfa832.waptt.opus
WhatsApp Audio 2025-07-18 at 12.03.12_b0bfa832.waptt.opus:vishali
