In [1]:
import sys
print(sys.executable)


c:\Users\abdul\AppData\Local\Programs\Python\Python310\python.exe


In [2]:
import sys
!{sys.executable} -m pip install resampy




In [3]:
import os
import librosa
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

# ------------------- Feature Extraction ---------------------
def extract_features(file_path, max_pad_len=174):
    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    if mfccs.shape[1] < max_pad_len:
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfccs = mfccs[:, :max_pad_len]
    return mfccs

# ------------------- Data Augmentation ---------------------
def add_noise(data):
    noise_amp = 0.005 * np.random.uniform() * np.amax(data)
    return data + noise_amp * np.random.normal(size=data.shape[0])

def pitch_shift(data, sr):
    return librosa.effects.pitch_shift(data, sr, n_steps=random.choice([-2, -1, 1, 2]))

def time_stretch(data):
    rate = random.uniform(0.8, 1.2)
    return librosa.effects.time_stretch(data, rate)

def extract_augmented(file_path, max_pad_len=174):
    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
    features = []
    for func in [lambda x: x, add_noise, pitch_shift, time_stretch]:
        try:
            new_audio = func(audio) if func != time_stretch else func(audio)
            mfcc = librosa.feature.mfcc(y=new_audio, sr=sample_rate, n_mfcc=40)
            if mfcc.shape[1] < max_pad_len:
                mfcc = np.pad(mfcc, ((0,0),(0,max_pad_len - mfcc.shape[1])), mode='constant')
            else:
                mfcc = mfcc[:, :max_pad_len]
            features.append(mfcc)
        except:
            continue
    return features

# ------------------- Load Dataset ---------------------
def load_dataset(path):
    X, Y = [], []
    for root, _, files in os.walk(path):
        for file in files:
            if file.endswith('.wav'):
                full_path = os.path.join(root, file)
                label = file.split('-')[2]
                mfccs_list = extract_augmented(full_path)
                for mfcc in mfccs_list:
                    X.append(mfcc)
                    Y.append(label)
    return np.array(X), np.array(Y)

# ------------------- Load & Preprocess ---------------------
path = r'C:\Users\abdul\Downloads\audio-testing\RAVDESS-AUDIO-DATASET'
X, Y = load_dataset(path)
print("Total samples (augmented):", len(X))

# Normalize features
X = X.reshape(X.shape[0], -1)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = X_scaled.reshape(X.shape[0], 40, 174, 1)

encoder = LabelEncoder()
y_encoded = encoder.fit_transform(Y)
y_categorical = to_categorical(y_encoded)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_categorical, test_size=0.2, stratify=y_categorical, random_state=42)

# ------------------- CNN Model ---------------------
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(40, 174, 1)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(y_categorical.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)

# ------------------- Train ---------------------
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stop])

# ------------------- Evaluate ---------------------
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"\n✅ Test Accuracy: {test_acc:.4f}")

# ------------------- Plot ---------------------
plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Validation')
plt.title("Model Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

# ------------------- Predict External File ---------------------
def predict_emotion(file_path):
    mfcc = extract_features(file_path)
    if mfcc is None:
        return "Could not extract"
    mfcc = scaler.transform(mfcc.reshape(1, -1))
    mfcc = mfcc.reshape(1, 40, 174, 1)
    prediction = model.predict(mfcc)
    predicted_label = encoder.inverse_transform([np.argmax(prediction)])
    return predicted_label[0]

# Test your own audio
external_path = r'C:\Users\abdul\Downloads\audio-testing\CREMA-D -AUDIO-DATASET\AudioWAV\1001_IEO_FEA_MD.wav'
result = predict_emotion(external_path)
print("🎙️ Predicted Emotion:", result)


Total samples (augmented): 0


ValueError: cannot reshape array of size 0 into shape (0,newaxis)

In [8]:
import os
import pandas as pd
import numpy as np
import torch
from datasets import Dataset, Audio
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import (
    AutoFeatureExtractor,
    Wav2Vec2ForSequenceClassification,
    TrainingArguments,
    Trainer
)
import logging
import pickle
from pathlib import Path
from collections import Counter
from sklearn.metrics import accuracy_score, f1_score

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# ----------- 1. Load data -------------------

def get_label_from_filename(filename):
    fname = filename.lower()
    # RAVDESS: 03-01-05-01-01-01-01.wav
    if filename.startswith("03-01-"):
        parts = filename.split("-")
        if len(parts) >= 3:
            emotion_code = parts[2]
            emotion_map = {
                "01": "neutral",
                "02": "calm",
                "03": "happy",
                "04": "sad",
                "05": "angry",
                "06": "fear",
                "07": "disgust",
                "08": "surprise"
            }
            label = emotion_map.get(emotion_code, "unknown")
            if label == "unknown":
                logger.debug(f"Unknown RAVDESS emotion code {emotion_code} in {filename}")
            return label
    # CREMA-D: 1001_DFA_ANG_XX.wav
    if "_ang_" in fname:
        return "angry"
    elif "_sad_" in fname:
        return "sad"
    elif "_hap_" in fname:
        return "happy"
    elif "_fea_" in fname:
        return "fear"
    elif "_dis_" in fname:
        return "disgust"
    elif "_neu_" in fname:
        return "neutral"
    elif "_sur_" in fname:
        return "surprise"
    logger.debug(f"Unknown label for file: {filename}")
    return "unknown"

def load_data(folders):
    audio_paths = []
    labels = []
    skipped_files = 0
    for folder in folders:
        folder_path = Path(folder)
        if not folder_path.exists():
            logger.error(f"Folder not found: {folder}")
            continue
        logger.info(f"Scanning folder: {folder}")
        wav_files = list(folder_path.rglob("*.wav"))
        logger.info(f"Found {len(wav_files)} .wav files in {folder}")
        for file_path in wav_files:
            try:
                label = get_label_from_filename(file_path.name)
                if label != "unknown":
                    audio_paths.append(str(file_path))
                    labels.append(label)
                else:
                    skipped_files += 1
            except Exception as e:
                logger.warning(f"Error processing file {file_path}: {e}")
                skipped_files += 1
    logger.info(f"Loaded {len(audio_paths)} audio files, skipped {skipped_files} files.")
    if len(audio_paths) < 100:
        logger.warning(f"Only {len(audio_paths)} audio files loaded. Expected more. Check dataset paths or file naming.")
    return audio_paths, labels

# Define dataset paths
folders = [
    r"C:\Users\abdul\Downloads\audio-testing\RAVDESS-AUDIO-DATASET",
    r"C:\Users\abdul\Downloads\audio-testing\CREMA-D -AUDIO-DATASET"
]

try:
    logger.info("Loading audio files...")
    audio_paths, labels = load_data(folders)
    
    # Check label distribution
    label_counts = Counter(labels)
    logger.info("Label distribution:")
    for label, count in label_counts.items():
        logger.info(f"{label}: {count}")
    if len(label_counts) < 2:
        raise ValueError("Insufficient unique labels for training.")
    if not audio_paths:
        raise ValueError("No audio files loaded. Check dataset paths and file naming.")
except Exception as e:
    logger.error(f"Failed to load data: {e}")
    raise

# Create DataFrame
df = pd.DataFrame({"path": audio_paths, "label": labels})

# Encode labels
le = LabelEncoder()
df["label_id"] = le.fit_transform(df["label"])

# Save label encoder
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)

# Split data
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["label_id"], random_state=42)
logger.info(f"Training set size: {len(train_df)}, Validation set size: {len(val_df)}")

# ----------- 2. Huggingface Dataset and audio cast -----------

try:
    logger.info("Converting to Huggingface Dataset...")
    train_ds = Dataset.from_pandas(train_df.reset_index(drop=True))
    val_ds = Dataset.from_pandas(val_df.reset_index(drop=True))

    train_ds = train_ds.cast_column("path", Audio(sampling_rate=16000))
    val_ds = val_ds.cast_column("path", Audio(sampling_rate=16000))
except Exception as e:
    logger.error(f"Error creating datasets: {e}")
    raise

# ----------- 3. Feature Extractor and Preprocess -----------

model_ckpt = "facebook/wav2vec2-base"
try:
    logger.info("Loading feature extractor...")
    feature_extractor = AutoFeatureExtractor.from_pretrained(model_ckpt)
except Exception as e:
    logger.error(f"Error loading feature extractor: {e}")
    raise

def preprocess(batch):
    try:
        audio = batch["path"]
        if audio["array"] is None or len(audio["array"]) == 0:
            raise ValueError("Empty audio array")
        inputs = feature_extractor(
            audio["array"],
            sampling_rate=audio["sampling_rate"],
            return_tensors="pt",
            padding=True,
            max_length=16000 * 3,
            truncation=True
        )
        batch["input_values"] = inputs["input_values"][0].numpy()
        batch["attention_mask"] = inputs.get("attention_mask", torch.ones_like(inputs["input_values"]))[0].numpy()
        batch["labels"] = batch["label_id"]
        return batch
    except Exception as e:
        logger.warning(f"Error preprocessing audio: {e}")
        return None

try:
    logger.info("Preprocessing training dataset...")
    train_ds = train_ds.map(
        preprocess,
        remove_columns=["path", "label", "label_id"],
        batched=False,
        load_from_cache_file=True
    )
    train_ds = train_ds.filter(lambda x: x["input_values"] is not None)

    logger.info("Preprocessing validation dataset...")
    val_ds = val_ds.map(
        preprocess,
        remove_columns=["path", "label", "label_id"],
        batched=False,
        load_from_cache_file=True
    )
    val_ds = val_ds.filter(lambda x: x["input_values"] is not None)
except Exception as e:
    logger.error(f"Error during preprocessing: {e}")
    raise

# Check if datasets are empty
if len(train_ds) == 0 or len(val_ds) == 0:
    raise ValueError("One or both datasets are empty after preprocessing.")
logger.info(f"Training dataset size: {len(train_ds)}, Validation dataset size: {len(val_ds)}")

# ----------- 4. Model -----------

num_labels = len(le.classes_)
try:
    logger.info("Loading model...")
    model = Wav2Vec2ForSequenceClassification.from_pretrained(model_ckpt, num_labels=num_labels)
except Exception as e:
    logger.error(f"Error loading model: {e}")
    raise

# ----------- 5. Data collator -----------

def collate_fn(batch):
    try:
        input_values = [torch.tensor(item["input_values"], dtype=torch.float32) for item in batch]
        attention_mask = [torch.tensor(item["attention_mask"], dtype=torch.long) for item in batch]
        labels = torch.tensor([item["labels"] for item in batch], dtype=torch.long)

        input_values_padded = torch.nn.utils.rnn.pad_sequence(input_values, batch_first=True)
        attention_mask_padded = torch.nn.utils.rnn.pad_sequence(attention_mask, batch_first=True)

        return {
            "input_values": input_values_padded,
            "attention_mask": attention_mask_padded,
            "labels": labels
        }
    except Exception as e:
        logger.error(f"Error in data collation: {e}")
        raise

# ----------- 6. Compute metrics -----------

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average="weighted")
    return {"accuracy": accuracy, "f1": f1}

# ----------- 7. Training arguments -----------

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=5,
    logging_dir="./logs",
    logging_steps=10,
    fp16=torch.cuda.is_available(),
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    gradient_checkpointing=True,
    dataloader_pin_memory=torch.cuda.is_available(),
    dataloader_num_workers=0,  # Disable multiprocessing to avoid worker crashes
)

# Check GPU memory
if torch.cuda.is_available():
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3  # GB
    logger.info(f"GPU memory available: {gpu_memory:.2f} GB")
    if gpu_memory < 4:
        logger.warning("Low GPU memory. Using batch size 2 instead of 4.")
        training_args.per_device_train_batch_size = 2
        training_args.per_device_eval_batch_size = 2

# ----------- 8. Trainer -----------

try:
    logger.info("Initializing trainer...")
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        processing_class=feature_extractor,
        data_collator=collate_fn,
        compute_metrics=compute_metrics
    )
except Exception as e:
    logger.error(f"Error initializing trainer: {e}")
    raise

# ----------- 9. Train -----------

try:
    logger.info("Starting training...")
    trainer.train()
    logger.info("Training completed successfully.")
except Exception as e:
    logger.error(f"Error during training: {e}")
    raise

# Save the model
trainer.save_model("./results/final_model")
logger.info("Model saved to ./results/final_model")

2025-05-18 17:36:32,376 - INFO - Loading audio files...
2025-05-18 17:36:32,379 - INFO - Scanning folder: C:\Users\abdul\Downloads\audio-testing\RAVDESS-AUDIO-DATASET
2025-05-18 17:36:32,526 - INFO - Found 923 .wav files in C:\Users\abdul\Downloads\audio-testing\RAVDESS-AUDIO-DATASET
2025-05-18 17:36:32,532 - INFO - Scanning folder: C:\Users\abdul\Downloads\audio-testing\CREMA-D -AUDIO-DATASET
2025-05-18 17:36:32,564 - INFO - Found 203 .wav files in C:\Users\abdul\Downloads\audio-testing\CREMA-D -AUDIO-DATASET
2025-05-18 17:36:32,565 - INFO - Loaded 1126 audio files, skipped 0 files.
2025-05-18 17:36:32,566 - INFO - Label distribution:
2025-05-18 17:36:32,567 - INFO - neutral: 92
2025-05-18 17:36:32,568 - INFO - calm: 128
2025-05-18 17:36:32,569 - INFO - happy: 163
2025-05-18 17:36:32,570 - INFO - sad: 158
2025-05-18 17:36:32,571 - INFO - angry: 155
2025-05-18 17:36:32,572 - INFO - fear: 155
2025-05-18 17:36:32,575 - INFO - disgust: 155
2025-05-18 17:36:32,576 - INFO - surprise: 120
20

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Filter:   0%|          | 0/900 [00:00<?, ? examples/s]

2025-05-18 17:37:26,150 - INFO - Preprocessing validation dataset...


Map:   0%|          | 0/226 [00:00<?, ? examples/s]

Filter:   0%|          | 0/226 [00:00<?, ? examples/s]

2025-05-18 17:37:35,227 - INFO - Training dataset size: 900, Validation dataset size: 226
2025-05-18 17:37:35,241 - INFO - Loading model...
Some weights of Wav2Vec2ForSequenceClassification were not initialized from the model checkpoint at facebook/wav2vec2-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
2025-05-18 17:37:39,518 - INFO - Initializing trainer...
2025-05-18 17:37:40,070 - INFO - Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.8311,1.748951,0.278761,0.17567
2,1.3613,1.407486,0.584071,0.560237
3,1.057,1.166153,0.646018,0.631255
4,0.9893,0.999134,0.725664,0.725746
5,0.7522,0.884539,0.747788,0.743845


2025-05-18 19:11:50,018 - INFO - Training completed successfully.
2025-05-18 19:11:54,626 - INFO - Model saved to ./results/final_model


In [13]:
import logging
import pickle
from pathlib import Path
from collections import Counter
import pandas as pd
import numpy as np
import torch
import torchaudio
from datasets import Dataset, Audio
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score
from transformers import (
    Wav2Vec2FeatureExtractor,
    HubertForSequenceClassification,
    TrainingArguments,
    Trainer
)

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# ----------- 1. Data Augmentation Function -----------

def add_noise(audio, noise_factor=0.005):
    """Add random white noise to audio to simulate real-world conditions."""
    try:
        noise = torch.randn_like(torch.tensor(audio, dtype=torch.float32)) * noise_factor
        noisy_audio = audio + noise.numpy()
        # Clip to prevent distortion
        noisy_audio = np.clip(noisy_audio, -1.0, 1.0)
        return noisy_audio
    except Exception as e:
        logger.warning(f"Error adding noise: {e}")
        return audio

# ----------- 2. Load data -------------------

def get_label_from_filename(filename, dataset_type):
    fname = filename.lower()
    if dataset_type == "IEMOCAP":
        label_map = {
            "hap": "happy",
            "sad": "sad",
            "ang": "angry",
            "neu": "neutral",
            "exc": "excited",
            "fru": "frustrated"
        }
        for key, value in label_map.items():
            if f"_{key}" in fname:
                return value
        logger.debug(f"Unknown IEMOCAP label for file: {filename}")
        return "unknown"
    elif dataset_type == "CREMA-D":
        if "_ang_" in fname:
            return "angry"
        elif "_sad_" in fname:
            return "sad"
        elif "_hap_" in fname:
            return "happy"
        elif "_fea_" in fname:
            return "fear"
        elif "_dis_" in fname:
            return "disgust"
        elif "_neu_" in fname:
            return "neutral"
        logger.debug(f"Unknown CREMA-D label for file: {filename}")
        return "unknown"
    return "unknown"

def load_data(folders):
    audio_paths = []
    labels = []
    skipped_files = 0
    for folder in folders:
        folder_path = Path(folder["path"])
        dataset_type = folder["type"]
        if not folder_path.exists():
            logger.error(f"Folder not found: {folder_path}")
            continue
        logger.info(f"Scanning folder: {folder_path} ({dataset_type})")
        wav_files = list(folder_path.rglob("*.wav"))
        logger.info(f"Found {len(wav_files)} .wav files in {folder_path}")
        for file_path in wav_files:
            try:
                label = get_label_from_filename(file_path.name, dataset_type)
                if label != "unknown":
                    audio_paths.append(str(file_path))
                    labels.append(label)
                else:
                    skipped_files += 1
            except Exception as e:
                logger.warning(f"Error processing file {file_path}: {e}")
                skipped_files += 1
    logger.info(f"Loaded {len(audio_paths)} audio files, skipped {skipped_files} files.")
    if len(audio_paths) < 100:
        logger.warning(f"Only {len(audio_paths)} audio files loaded. Expected more. Check dataset paths or file naming.")
    return audio_paths, labels

# Define dataset paths
folders = [
    {"path": r"C:\Users\abdul\Downloads\audio-testing\IEMOCAP Dataset", "type": "IEMOCAP"},
    {"path": r"C:\Users\abdul\Downloads\audio-testing\CREMA-D -AUDIO-DATASET", "type": "CREMA-D"}
]

try:
    logger.info("Loading audio files...")
    audio_paths, labels = load_data(folders)
    
    # Check label distribution
    label_counts = Counter(labels)
    logger.info("Label distribution:")
    for label, count in label_counts.items():
        logger.info(f"{label}: {count}")
    if len(label_counts) < 2:
        raise ValueError("Insufficient unique labels for training.")
    if not audio_paths:
        raise ValueError("No audio files loaded. Check dataset paths and file naming.")
except Exception as e:
    logger.error(f"Failed to load data: {e}")
    raise

# Create DataFrame
df = pd.DataFrame({"path": audio_paths, "label": labels})

# Standardize labels
df["label"] = df["label"].replace({"excited": "happy", "frustrated": "angry"})

# Encode labels
le = LabelEncoder()
df["label_id"] = le.fit_transform(df["label"])

# Save label encoder
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)

# Split data
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["label_id"], random_state=42)
logger.info(f"Training set size: {len(train_df)}, Validation set size: {len(val_df)}")

# ----------- 3. Huggingface Dataset and audio cast -----------

try:
    logger.info("Converting to Huggingface Dataset...")
    train_ds = Dataset.from_pandas(train_df.reset_index(drop=True))
    val_ds = Dataset.from_pandas(val_df.reset_index(drop=True))

    train_ds = train_ds.cast_column("path", Audio(sampling_rate=16000))
    val_ds = val_ds.cast_column("path", Audio(sampling_rate=16000))
except Exception as e:
    logger.error(f"Error creating datasets: {e}")
    raise

# ----------- 4. Feature Extractor and Preprocess -----------

model_ckpt = "facebook/hubert-base-ls960"
try:
    logger.info("Loading feature extractor...")
    feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_ckpt)
except Exception as e:
    logger.error(f"Error loading feature extractor: {e}")
    raise

def preprocess(batch):
    try:
        audio = batch["path"]
        if audio["array"] is None or len(audio["array"]) == 0:
            raise ValueError("Empty audio array")
        # Apply augmentation to training data with 50% probability
        if np.random.rand() < 0.5 and "label_id" in batch:  # Ensure it's training data
            audio_array = add_noise(audio["array"])
        else:
            audio_array = audio["array"]
        inputs = feature_extractor(
            audio_array,
            sampling_rate=audio["sampling_rate"],
            return_tensors="pt",
            padding=True,
            max_length=16000 * 5,
            truncation=True
        )
        batch["input_values"] = inputs["input_values"][0].numpy()
        batch["attention_mask"] = inputs.get("attention_mask", torch.ones_like(inputs["input_values"]))[0].numpy()
        batch["labels"] = batch["label_id"]
        return batch
    except Exception as e:
        logger.warning(f"Error preprocessing audio: {e}")
        return None

try:
    logger.info("Preprocessing training dataset...")
    train_ds = train_ds.map(
        preprocess,
        remove_columns=["path", "label", "label_id"],
        batched=False,
        load_from_cache_file=True
    )
    train_ds = train_ds.filter(lambda x: x["input_values"] is not None)

    logger.info("Preprocessing validation dataset...")
    val_ds = val_ds.map(
        preprocess,
        remove_columns=["path", "label", "label_id"],
        batched=False,
        load_from_cache_file=True
    )
    val_ds = val_ds.filter(lambda x: x["input_values"] is not None)
except Exception as e:
    logger.error(f"Error during preprocessing: {e}")
    raise

# Check if datasets are empty
if len(train_ds) == 0 or len(val_ds) == 0:
    raise ValueError("One or both datasets are empty after preprocessing.")
logger.info(f"Training dataset size: {len(train_ds)}, Validation dataset size: {len(val_ds)}")

# ----------- 5. Model -----------

num_labels = len(le.classes_)
try:
    logger.info("Loading model...")
    model = HubertForSequenceClassification.from_pretrained(model_ckpt, num_labels=num_labels)
except Exception as e:
    logger.error(f"Error loading model: {e}")
    raise

# ----------- 6. Data collator -----------

def collate_fn(batch):
    try:
        input_values = [torch.tensor(item["input_values"], dtype=torch.float32) for item in batch]
        attention_mask = [torch.tensor(item["attention_mask"], dtype=torch.long) for item in batch]
        labels = torch.tensor([item["labels"] for item in batch], dtype=torch.long)

        input_values_padded = torch.nn.utils.rnn.pad_sequence(input_values, batch_first=True)
        attention_mask_padded = torch.nn.utils.rnn.pad_sequence(attention_mask, batch_first=True)

        return {
            "input_values": input_values_padded,
            "attention_mask": attention_mask_padded,
            "labels": labels
        }
    except Exception as e:
        logger.error(f"Error in data collation: {e}")
        raise

# ----------- 7. Compute metrics -----------

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average="weighted")
    return {"accuracy": accuracy, "f1": f1}

# ----------- 8. Training arguments -----------

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=5,
    logging_dir="./logs",
    logging_steps=10,
    fp16=torch.cuda.is_available(),
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    gradient_checkpointing=True,
    dataloader_pin_memory=torch.cuda.is_available(),
    dataloader_num_workers=0,
)

# Check GPU memory
if torch.cuda.is_available():
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3  # GB
    logger.info(f"GPU memory available: {gpu_memory:.2f} GB")
    if gpu_memory < 4:
        logger.warning("Low GPU memory. Using batch size 2 instead of 4.")
        training_args.per_device_train_batch_size = 2
        training_args.per_device_eval_batch_size = 2

# ----------- 9. Trainer -----------

try:
    logger.info("Initializing trainer...")
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        processing_class=feature_extractor,
        data_collator=collate_fn,
        compute_metrics=compute_metrics
    )
except Exception as e:
    logger.error(f"Error initializing trainer: {e}")
    raise

# ----------- 10. Train -----------

try:
    logger.info("Starting training...")
    trainer.train()
    logger.info("Training completed successfully.")
except Exception as e:
    logger.error(f"Error during training: {e}")
    raise

# Save the model and feature extractor
trainer.save_model("./results/final_model")
feature_extractor.save_pretrained("./results/final_model")
logger.info("Model and feature extractor saved to ./results/final_model")

# ----------- 11. Prediction Function for External Audio -----------

def predict_emotion(audio_file_path, model_path="./results/final_model"):
    try:
        # Load model and feature extractor
        model = HubertForSequenceClassification.from_pretrained(model_path)
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
        with open("label_encoder.pkl", "rb") as f:
            le = pickle.load(f)

        # Load and preprocess audio
        waveform, sample_rate = torchaudio.load(audio_file_path)
        if sample_rate != 16000:
            resampler = torchaudio.transforms.Resample(sample_rate, 16000)
            waveform = resampler(waveform)
        waveform = waveform.squeeze().numpy()

        inputs = feature_extractor(
            waveform,
            sampling_rate=16000,
            return_tensors="pt",
            padding=True,
            max_length=16000 * 5,
            truncation=True
        )

        # Move to GPU if available
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        inputs = {k: v.to(device) for k, v in inputs.items()}

        # Predict
        model.eval()
        with torch.no_grad():
            logits = model(**inputs).logits
            probabilities = torch.softmax(logits, dim=-1)
            predicted_id = torch.argmax(logits, dim=-1).item()
            predicted_label = le.inverse_transform([predicted_id])[0]
            confidence = probabilities[0][predicted_id].item()

        logger.info(f"Predicted emotion: {predicted_label} (Confidence: {confidence:.2f})")
        return predicted_label, confidence
    except Exception as e:
        logger.error(f"Error predicting emotion for {audio_file_path}: {e}")
        return None, None

# Example usage
if __name__ == "__main__":
    # Replace with path to an external .wav file
    test_audio = r"C:\Users\abdul\Downloads\audio-testing\hap.wav"
    if Path(test_audio).exists():
        label, confidence = predict_emotion(test_audio)
        if label:
            print(f"Emotion: {label}, Confidence: {confidence:.2f}")
    else:
        logger.warning(f"Test audio file {test_audio} not found.")

2025-05-19 14:47:14,283 - INFO - Loading audio files...
2025-05-19 14:47:14,298 - INFO - Scanning folder: C:\Users\abdul\Downloads\audio-testing\IEMOCAP Dataset (IEMOCAP)
2025-05-19 14:47:14,503 - INFO - Found 1756 .wav files in C:\Users\abdul\Downloads\audio-testing\IEMOCAP Dataset
2025-05-19 14:47:14,509 - INFO - Scanning folder: C:\Users\abdul\Downloads\audio-testing\CREMA-D -AUDIO-DATASET (CREMA-D)
2025-05-19 14:47:14,554 - INFO - Found 203 .wav files in C:\Users\abdul\Downloads\audio-testing\CREMA-D -AUDIO-DATASET
2025-05-19 14:47:14,557 - INFO - Loaded 203 audio files, skipped 1756 files.
2025-05-19 14:47:14,563 - INFO - Label distribution:
2025-05-19 14:47:14,564 - INFO - angry: 35
2025-05-19 14:47:14,565 - INFO - disgust: 35
2025-05-19 14:47:14,566 - INFO - fear: 35
2025-05-19 14:47:14,569 - INFO - happy: 35
2025-05-19 14:47:14,571 - INFO - neutral: 28
2025-05-19 14:47:14,572 - INFO - sad: 35
2025-05-19 14:47:14,790 - INFO - Training set size: 162, Validation set size: 41
2025-

Map:   0%|          | 0/162 [00:00<?, ? examples/s]

Filter:   0%|          | 0/162 [00:00<?, ? examples/s]

2025-05-19 14:47:22,514 - INFO - Preprocessing validation dataset...


Map:   0%|          | 0/41 [00:00<?, ? examples/s]

Filter:   0%|          | 0/41 [00:00<?, ? examples/s]

2025-05-19 14:47:23,984 - INFO - Training dataset size: 162, Validation dataset size: 41
2025-05-19 14:47:23,985 - INFO - Loading model...
Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-base-ls960 and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
2025-05-19 14:47:25,814 - INFO - Initializing trainer...
2025-05-19 14:47:26,089 - INFO - Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.7967,1.777135,0.268293,0.134516
2,1.7269,1.724994,0.317073,0.201011
3,1.6931,1.707531,0.317073,0.247083
4,1.655,1.661394,0.317073,0.208051
5,1.6866,1.662611,0.292683,0.211992


2025-05-19 15:07:01,780 - INFO - Training completed successfully.
2025-05-19 15:07:07,640 - INFO - Model and feature extractor saved to ./results/final_model
2025-05-19 15:07:10,033 - INFO - Predicted emotion: happy (Confidence: 0.18)


Emotion: happy, Confidence: 0.18


In [None]:
from transformers import pipeline

# Load a pretrained emotion classification model
# (this is a small/distilbert model fine-tuned for emotion classification)
emotion_classifier = pipeline("text-classification", 
                              model="bhadresh-savani/distilbert-base-uncased-emotion",
                              return_all_scores=True)

def predict_emotion_from_text(text):
    results = emotion_classifier(text)[0]  # list of dicts: label + score
    # Find the emotion with the highest score
    best = max(results, key=lambda x: x['score'])
    return best['label'], best['score']

if __name__ == "__main__":
    while True:
        transcript = input("Enter transcript text (or 'exit' to quit): ")
        if transcript.lower() == 'exit':
            break
        emotion, score = predict_emotion_from_text(transcript)
        print(f"Predicted emotion: {emotion} (confidence: {score:.2f})")


W0519 15:15:06.256000 40036 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.





Device set to use cpu


Predicted emotion: joy (confidence: 0.51)
Predicted emotion: joy (confidence: 0.51)
Predicted emotion: anger (confidence: 0.99)
Predicted emotion: anger (confidence: 0.99)
Predicted emotion: joy (confidence: 0.46)
Predicted emotion: fear (confidence: 0.66)
Predicted emotion: joy (confidence: 0.52)
Predicted emotion: anger (confidence: 0.47)
Predicted emotion: joy (confidence: 1.00)
Predicted emotion: joy (confidence: 1.00)
Predicted emotion: joy (confidence: 0.53)
Predicted emotion: anger (confidence: 0.85)
Predicted emotion: anger (confidence: 0.81)
Predicted emotion: joy (confidence: 0.87)


In [None]:
from transformers import pipeline

# Load a model with wider emotion labels
emotion_classifier = pipeline(
    "text-classification",
    model="nateraw/bert-base-uncased-emotion",
    return_all_scores=True
)

def predict_emotion_from_text(text):
    results = emotion_classifier(text)[0]
    best = max(results, key=lambda x: x['score'])
    return best['label'], best['score']

if __name__ == "__main__":
    while True:
        transcript = input("Enter transcript text (or 'exit' to quit): ")
        if transcript.lower() == 'exit':
            break
        emotion, score = predict_emotion_from_text(transcript)
        print(f"Predicted emotion: {emotion} (confidence: {score:.2f})")


W0520 10:30:25.773000 30580 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.





Device set to use cpu


Predicted emotion: joy (confidence: 0.99)
Predicted emotion: joy (confidence: 0.99)
Predicted emotion: anger (confidence: 0.37)
Predicted emotion: joy (confidence: 0.99)
Predicted emotion: joy (confidence: 0.99)
Predicted emotion: anger (confidence: 0.88)
Predicted emotion: anger (confidence: 0.86)
Predicted emotion: joy (confidence: 0.68)
Predicted emotion: joy (confidence: 0.68)
Predicted emotion: love (confidence: 0.83)
Predicted emotion: anger (confidence: 0.47)
Predicted emotion: anger (confidence: 0.56)


In [None]:
from transformers import pipeline

# Load both emotion classifiers
model_1 = pipeline("text-classification",
                   model="bhadresh-savani/distilbert-base-uncased-emotion",
                   return_all_scores=True)

model_2 = pipeline("text-classification",
                   model="nateraw/bert-base-uncased-emotion",
                   return_all_scores=True)

def merge_scores(results):
    """Helper to normalize and merge model output scores into one dict."""
    merged = {}
    for res in results:
        for item in res:
            label = item["label"].lower()
            score = item["score"]
            merged[label] = merged.get(label, 0) + score
    return merged

def predict_ensemble_emotion(text):
    results_1 = model_1(text)[0]
    results_2 = model_2(text)[0]
    
    # Merge scores from both models
    merged_scores = merge_scores([results_1, results_2])
    
    # Pick the emotion with the highest combined score
    best_label = max(merged_scores, key=merged_scores.get)
    confidence = merged_scores[best_label] / 2  # Average confidence across 2 models

    return best_label.capitalize(), confidence

if __name__ == "__main__":
    while True:
        transcript = input("Enter transcript text (or 'exit' to quit): ")
        if transcript.lower() == 'exit':
            break
        emotion, score = predict_ensemble_emotion(transcript)
        print(f"✅ Predicted emotion: {emotion} (confidence: {score:.2f})")


W0520 10:34:08.384000 13168 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.





Device set to use cpu
Device set to use cpu


✅ Predicted emotion: Anger (confidence: 0.47)
✅ Predicted emotion: Joy (confidence: 0.56)
✅ Predicted emotion: Joy (confidence: 0.99)
✅ Predicted emotion: Joy (confidence: 0.99)
✅ Predicted emotion: Joy (confidence: 0.87)
✅ Predicted emotion: Anger (confidence: 0.68)
✅ Predicted emotion: Joy (confidence: 0.72)
✅ Predicted emotion: Anger (confidence: 0.72)
✅ Predicted emotion: Anger (confidence: 0.36)


In [None]:
from transformers import pipeline

# Load the RoBERTa-based emotion classifier
emotion_classifier = pipeline(
    "text-classification",
    model="SamLowe/roberta-base-go_emotions",
    top_k=None,  # Replace return_all_scores=True
    truncation=True,  # Enable truncation directly
    max_length=512  # Set max_length directly
)

def predict_emotion_from_text(text):
    """
    Predict emotion from input text using the RoBERTa GoEmotions model.
    Returns the dominant emotion and its confidence score.
    """
    try:
        # Ensure text is not empty
        if not text.strip():
            raise ValueError("Input text is empty")
        
        # Get model predictions
        results = emotion_classifier(text)[0]
        # Find the emotion with the highest score
        best = max(results, key=lambda x: x['score'])
        emotion = best['label'].capitalize()
        score = best['score']
        return emotion, score
    except Exception as e:
        print(f"Error processing text: {e}")
        return "Unknown", 0.0

if __name__ == "__main__":
    # Example sentences to test
    test_sentences = [
        "I don’t care how good you are, just get it done!",
        "What is machine learning?",
        "I’m so excited for the concert tonight!",
        "Why isn’t this working properly?",
        "I’m feeling really down today."
    ]
    
    print("Testing emotion detection with example sentences (type 'exit' to quit).")
    print("Enter your own transcript or press Enter to test predefined sentences.")
    
    while True:
        transcript = input("Transcript: ")
        if transcript.lower() == 'exit':
            break
        if not transcript.strip():
            # If no input, test predefined sentences
            print("\nTesting predefined sentences:")
            for sentence in test_sentences:
                emotion, score = predict_emotion_from_text(sentence)
                print(f"Sentence: '{sentence}'")
                print(f"✅ Predicted emotion: {emotion} (confidence: {score:.2f})\n")
        else:
            # Process user input
            emotion, score = predict_emotion_from_text(transcript)
            print(f"Sentence: '{transcript}'")
            print(f"✅ Predicted emotion: {emotion} (confidence: {score:.2f})\n")

W0520 10:46:42.728000 41712 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.





Device set to use cpu


Testing emotion detection with example sentences (type 'exit' to quit).
Enter your own transcript or press Enter to test predefined sentences.

Testing predefined sentences:
Sentence: 'I don’t care how good you are, just get it done!'
✅ Predicted emotion: Admiration (confidence: 0.71)

Sentence: 'What is machine learning?'
✅ Predicted emotion: Curiosity (confidence: 0.63)

Sentence: 'I’m so excited for the concert tonight!'
✅ Predicted emotion: Excitement (confidence: 0.82)

Sentence: 'Why isn’t this working properly?'
✅ Predicted emotion: Confusion (confidence: 0.52)

Sentence: 'I’m feeling really down today.'
✅ Predicted emotion: Disappointment (confidence: 0.42)

Sentence: 'Could you please schedule a meeting for next week?'
✅ Predicted emotion: Curiosity (confidence: 0.53)

Sentence: 'Could you please schedule a meeting for next week?'
✅ Predicted emotion: Curiosity (confidence: 0.53)



In [None]:
from transformers import pipeline

classifier = pipeline("text-classification",
                      model="j-hartmann/emotion-english-distilroberta-base",
                      return_all_scores=True)

def predict_emotion(text):
    results = classifier(text)[0]
    best = max(results, key=lambda x: x["score"])
    return best["label"], best["score"]

while True:
    sentence = input("Enter sentence: ")
    if sentence.lower() == "exit":
        break
    emotion, score = predict_emotion(sentence)
    print(f"Predicted emotion: {emotion} (confidence: {score:.2f})")


W0520 10:50:22.884000 33364 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.





Device set to use cpu


Predicted emotion: neutral (confidence: 0.55)
Predicted emotion: anger (confidence: 0.87)
Predicted emotion: neutral (confidence: 0.78)
Predicted emotion: joy (confidence: 0.93)
Predicted emotion: surprise (confidence: 0.30)
Predicted emotion: sadness (confidence: 0.99)


In [None]:
from transformers import pipeline
import re

# Load the Twitter RoBERTa-based emotion classifier
emotion_classifier = pipeline(
    "text-classification",
    model="cardiffnlp/twitter-roberta-base-emotion",
    top_k=None,  # Get scores for all labels
    truncation=True,
    max_length=512,
    framework="pt"  # Use PyTorch to avoid TensorFlow warnings
)

def clean_text(text):
    """
    Clean WebRTC transcription text by removing filler words, extra spaces, and normalizing.
    """
    text = re.sub(r'\b(um|uh|like|you know)\b', '', text, flags=re.IGNORECASE)
    text = re.sub(r'\s+', ' ', text).strip().lower()
    # Remove excessive punctuation that might confuse the model
    text = re.sub(r'[.!?]{2,}', '.', text)
    return text

def is_neutral_query(text):
    """
    Rule-based check for neutral queries (e.g., informational questions).
    Returns True if the sentence is likely neutral.
    """
    neutral_patterns = [
        r'^(what|how|when|where|why|can you|could you|tell me|is it).*?\?$',
        r'^(please|could you|would you).*?(schedule|set|find|look up|tell me).*'
    ]
    text = text.lower().strip()
    return any(re.match(pattern, text) for pattern in neutral_patterns)

def predict_emotion_from_text(text):
    """
    Predict emotion from input text using the Twitter RoBERTa model.
    Includes rule-based neutral fallback. Returns the dominant emotion and its confidence score.
    """
    try:
        # Clean the input text
        text = clean_text(text)
        if not text:
            raise ValueError("Input text is empty after cleaning")

        # Check for neutral queries first
        if is_neutral_query(text):
            return "Neutral", 0.9  # High confidence for rule-based neutral

        # Get model predictions
        results = emotion_classifier(text)[0]
        # Find the emotion with the highest score
        best = max(results, key=lambda x: x['score'])
        emotion = best['label'].replace('emotion:', '').capitalize()  # Clean label
        score = best['score']

        # Fallback to Neutral if confidence is low
        if score < 0.6:
            return "Neutral", score

        return emotion, score
    except Exception as e:
        print(f"Error processing text: {e}")
        return "Unknown", 0.0

if __name__ == "__main__":
    # Test sentences from your output
    test_sentences = [
        "I don’t care how good you are, just get it done!",
        "What is machine learning?",
        "I’m so excited for the concert tonight!",
        "Why isn’t this working properly?",
        "I’m feeling really down today.",
        "Could you please schedule a meeting for next week?"
    ]
    
    print("Testing emotion detection with example sentences (type 'exit' to quit).")
    print("Enter your own transcript or press Enter to test predefined sentences.")
    
    while True:
        transcript = input("Transcript: ")
        if transcript.lower() == 'exit':
            break
        if not transcript.strip():
            # Test predefined sentences
            print("\nTesting predefined sentences:")
            for sentence in test_sentences:
                emotion, score = predict_emotion_from_text(sentence)
                print(f"Sentence: '{sentence}'")
                print(f"✅ Predicted emotion: {emotion} (confidence: {score:.2f})\n")
        else:
            # Process user input
            emotion, score = predict_emotion_from_text(transcript)
            print(f"Sentence: '{transcript}'")
            print(f"✅ Predicted emotion: {emotion} (confidence: {score:.2f})\n")

W0520 10:54:33.505000 12024 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


config.json:   0%|          | 0.00/768 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Device set to use cpu


Testing emotion detection with example sentences (type 'exit' to quit).
Enter your own transcript or press Enter to test predefined sentences.

Testing predefined sentences:
Sentence: 'I don’t care how good you are, just get it done!'
✅ Predicted emotion: Anger (confidence: 0.89)

Sentence: 'What is machine learning?'
✅ Predicted emotion: Neutral (confidence: 0.90)

Sentence: 'I’m so excited for the concert tonight!'
✅ Predicted emotion: Optimism (confidence: 0.95)

Sentence: 'Why isn’t this working properly?'
✅ Predicted emotion: Neutral (confidence: 0.90)

Sentence: 'I’m feeling really down today.'
✅ Predicted emotion: Sadness (confidence: 0.98)

Sentence: 'Could you please schedule a meeting for next week?'
✅ Predicted emotion: Neutral (confidence: 0.90)

Sentence: 'what is deep learning?'
✅ Predicted emotion: Neutral (confidence: 0.90)

Sentence: 'Are you sure you get it right this time'
✅ Predicted emotion: Neutral (confidence: 0.51)


Testing predefined sentences:
Sentence: 'I do

In [None]:
from transformers import pipeline
import re

# Load the Twitter RoBERTa-based emotion classifier
emotion_classifier = pipeline(
    "text-classification",
    model="cardiffnlp/twitter-roberta-base-emotion",
    top_k=None,
    truncation=True,
    max_length=512,
    framework="pt"
)

def clean_text(text):
    """
    Clean WebRTC transcription text by adding punctuation, removing filler words, and normalizing.
    """
    text = re.sub(r'\b(um|uh|like|you know)\b', '', text, flags=re.IGNORECASE)
    text = re.sub(r'\s+', ' ', text).strip().lower()
    # Add punctuation for run-on sentences
    text = re.sub(r'\b(are you serious|come on|i did not|this is ridiculous|what the)\b', r'\1.', text)
    text = re.sub(r'\b(this what|what you)\b', r'\1,', text)
    text = text.replace(' i ', ' I ')
    if not text.endswith('.'):
        text += '.'
    return text

def is_neutral_query(text):
    """
    Rule-based check for neutral queries, excluding emotionally charged questions.
    """
    neutral_patterns = [
        r'^(what|how|when|where|can you|could you|tell me|is it).*?\?$',
        r'^(please|could you|would you).*?(schedule|set|find|look up|tell me).*'
    ]
    negative_words = ['not', 'isn\'t', 'doesn\'t', 'won\'t', 'can\'t', 'serious', 'ridiculous']
    technical_terms = ['machine learning', 'deep learning', 'artificial intelligence', 'data science']
    text = text.lower().strip()
    # Mark as neutral if it contains technical terms
    if any(term in text for term in technical_terms):
        return True
    # Exclude questions with negative/emotional words
    if any(word in text for word in negative_words):
        return False
    return any(re.match(pattern, text) for pattern in neutral_patterns)

def boost_anger_score(text, results):
    """
    Boost anger score for sentences with anger-related keywords/phrases.
    """
    anger_keywords = [
        'are you serious', 'come on', 'i did not expect', 'this is ridiculous', 
        'what the', 'this what', 'not expected', 'you kidding'
    ]
    text = text.lower().strip()
    if any(keyword in text for keyword in anger_keywords):
        for result in results:
            if result['label'] == 'emotion:anger':
                result['score'] = min(result['score'] * 2.0, 1.0)  # Double anger score
    return results

def predict_emotion_from_text(text):
    """
    Predict emotion from input text using the Twitter RoBERTa model with neutral fallback and anger boost.
    """
    try:
        # Clean the input text
        text = clean_text(text)
        if not text:
            raise ValueError("Input text is empty after cleaning")

        # Check for neutral queries
        if is_neutral_query(text):
            return "Neutral", 0.95  # Higher confidence for neutral

        # Get model predictions and boost anger if applicable
        results = emotion_classifier(text)[0]
        results = boost_anger_score(text, results)
        
        # Find the emotion with the highest score
        best = max(results, key=lambda x: x['score'])
        emotion = best['label'].replace('emotion:', '').capitalize()
        score = best['score']

        # Fallback to Neutral if confidence is low
        if score < 0.7:
            return "Neutral", score

        return emotion, score
    except Exception as e:
        print(f"Error processing text: {e}")
        return "Unknown", 0.0

if __name__ == "__main__":
    # Test sentences
    test_sentences = [
        "I don’t care how good you are, just get it done!",
        "What is machine learning?",
        "I’m so excited for the concert tonight!",
        "Why isn’t this working properly?",
        "I’m feeling really down today.",
        "Could you please schedule a meeting for next week?",
        "What is deep learning?",
        "Are you serious this what you are giving me come on i did not expected that from you"
    ]
    
    print("Testing emotion detection with example sentences (type 'exit' to quit).")
    print("Enter your own transcript or press Enter to test predefined sentences.")
    
    while True:
        transcript = input("Transcript: ")
        if transcript.lower() == 'exit':
            break
        if not transcript.strip():
            print("\nTesting predefined sentences:")
            for sentence in test_sentences:
                emotion, score = predict_emotion_from_text(sentence)
                print(f"Sentence: '{sentence}'")
                print(f"✅ Predicted emotion: {emotion} (confidence: {score:.2f})\n")
        else:
            emotion, score = predict_emotion_from_text(transcript)
            print(f"Sentence: '{transcript}'")
            print(f"✅ Predicted emotion: {emotion} (confidence: {score:.2f})\n")
            

W0520 11:19:17.446000 27856 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.
Device set to use cpu


Testing emotion detection with example sentences (type 'exit' to quit).
Enter your own transcript or press Enter to test predefined sentences.

Testing predefined sentences:
Sentence: 'I don’t care how good you are, just get it done!'
✅ Predicted emotion: Anger (confidence: 0.87)

Sentence: 'What is machine learning?'
✅ Predicted emotion: Neutral (confidence: 0.95)

Sentence: 'I’m so excited for the concert tonight!'
✅ Predicted emotion: Optimism (confidence: 0.95)

Sentence: 'Why isn’t this working properly?'
✅ Predicted emotion: Joy (confidence: 0.89)

Sentence: 'I’m feeling really down today.'
✅ Predicted emotion: Sadness (confidence: 0.98)

Sentence: 'Could you please schedule a meeting for next week?'
✅ Predicted emotion: Neutral (confidence: 0.95)

Sentence: 'What is deep learning?'
✅ Predicted emotion: Neutral (confidence: 0.95)

Sentence: 'Are you serious this what you are giving me come on i did not expected that from you'
✅ Predicted emotion: Joy (confidence: 0.97)

Sentence:

In [None]:
from transformers import pipeline
import re

# Load both emotion classifiers
anger_classifier = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    top_k=None,
    truncation=True,
    max_length=512,
    framework="pt"
)

other_classifier = pipeline(
    "text-classification",
    model="cardiffnlp/twitter-roberta-base-emotion",
    top_k=None,
    truncation=True,
    max_length=512,
    framework="pt"
)

def clean_text(text):
    """
    Clean WebRTC transcription text by adding punctuation, removing filler words, and normalizing.
    """
    text = re.sub(r'\b(um|uh|like|you know)\b', '', text, flags=re.IGNORECASE)
    text = re.sub(r'\s+', ' ', text).strip().lower()
    # Add punctuation for run-on sentences and negative questions
    text = re.sub(r'\b(why isn\'t|what the|are you serious|come on|i did not|this is ridiculous|you kidding|fix it|do better|unacceptable|driving me nuts|broken again|worst service)\b', r'\1.', text)
    text = re.sub(r'\b(this what|what you)\b', r'\1,', text)
    text = text.replace(' i ', ' I ')
    if not text.endswith('.'):
        text += '.'
    return text

def is_neutral_query(text):
    """
    Rule-based check for neutral queries, excluding emotionally charged questions.
    """
    neutral_patterns = [
        r'^(what|how|when|where|can you|could you|tell me|is it).*?\?$',
        r'^(please|could you|would you).*?(schedule|set|find|look up|tell me|explain).*'
    ]
    technical_terms = ['machine learning', 'deep learning', 'artificial intelligence', 'data science', 'neural networks']
    negative_words = ['not', 'isn\'t', 'doesn\'t', 'won\'t', 'can\'t', 'serious', 'ridiculous', 'failing', 'broken']
    text = text.lower().strip()
    # Mark as neutral if it contains technical terms
    if any(term in text for term in technical_terms):
        return True
    # Exclude questions with negative/emotional words
    if any(word in text for word in negative_words):
        return False
    return any(re.match(pattern, text) for pattern in neutral_patterns)

def is_anger_query(text):
    """
    Rule-based check for anger-related queries or statements.
    """
    anger_patterns = [
        r'\b(why isn\'t|what the|are you serious|come on|i did not expect|this is ridiculous|you kidding|fix it|do better|unacceptable|driving me nuts|broken again|worst service)\b',
        r'\b(not working|failing|broken|messed up)\b'
    ]
    text = text.lower().strip()
    return any(re.search(pattern, text) for pattern in anger_patterns)

def predict_emotion_from_text(text):
    """
    Predict emotion using j-hartmann for anger and cardiffnlp for other emotions.
    """
    try:
        # Clean the input text
        text = clean_text(text)
        if not text:
            raise ValueError("Input text is empty after cleaning")

        # Check for neutral queries
        if is_neutral_query(text):
            return "Neutral", 0.95

        # Check for anger queries (rule-based)
        if is_anger_query(text):
            return "Anger", 0.90

        # Check for anger using j-hartmann model
        anger_results = anger_classifier(text)[0]
        anger_score = next((r['score'] for r in anger_results if r['label'] == 'anger'), 0.0)
        if anger_score > 0.7:
            return "Anger", anger_score

        # Use cardiffnlp for other emotions
        other_results = other_classifier(text)[0]
        
        # Suppress joy for negative sentences
        negative_words = ['not', 'isn\'t', 'doesn\'t', 'won\'t', 'can\'t', 'failing', 'broken']
        if any(word in text.lower() for word in negative_words):
            for result in other_results:
                if result['label'] == 'emotion:joy':
                    result['score'] *= 0.1  # Reduce joy score significantly

        # Find the emotion with the highest score
        best = max(other_results, key=lambda x: x['score'])
        emotion = best['label'].replace('emotion:', '').capitalize()
        score = best['score']

        # Fallback to Neutral if confidence is low
        if score < 0.7:
            return "Neutral", score

        return emotion, score
    except Exception as e:
        print(f"Error processing text: {e}")
        return "Unknown", 0.0

if __name__ == "__main__":
    # Test sentences from previous output plus new ones
    test_sentences = [
        "I don’t care how good you are, just get it done!",
        "What is machine learning?",
        "I’m so excited for the concert tonight!",
        "Why isn’t this working properly?",
        "I’m feeling really down today.",
        "Could you please schedule a meeting for next week?",
        "What is deep learning?",
        "Are you serious this what you are giving me come on i did not expected that from you",
        "This is absolutely unacceptable, do better!",
        "Can you explain how neural networks work?",
        "I’m having the worst day ever, nothing is going right.",
        "Wow, I just won a free trip, this is awesome!",
        "What the heck is wrong with this thing, it’s broken again!",
        "Tell me about the latest AI advancements.",
        "I can’t believe you messed this up, fix it now!",
        "I’m so thrilled, my team just nailed the presentation!",
        "Why does this keep failing, it’s driving me nuts!",
        "I feel so lost, nothing makes sense anymore.",
        "Hey, can you set an alarm for 7 AM tomorrow?",
        "You gotta be kidding me, this is the worst service ever!"
    ]
    
    print("Testing emotion detection with example sentences (type 'exit' to quit).")
    print("Enter your own transcript or press Enter to test predefined sentences.")
    
    while True:
        transcript = input("Transcript: ")
        if transcript.lower() == 'exit':
            break
        if not transcript.strip():
            print("\nTesting predefined sentences:")
            for sentence in test_sentences:
                emotion, score = predict_emotion_from_text(sentence)
                print(f"Sentence: '{sentence}'")
                print(f"✅ Predicted emotion: {emotion} (confidence: {score:.2f})\n")
        else:
            emotion, score = predict_emotion_from_text(transcript)
            print(f"Sentence: '{transcript}'")
            print(f"✅ Predicted emotion: {emotion} (confidence: {score:.2f})\n")

W0520 11:25:25.066000 31080 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.
Device set to use cpu
Device set to use cpu


Testing emotion detection with example sentences (type 'exit' to quit).
Enter your own transcript or press Enter to test predefined sentences.

Testing predefined sentences:
Sentence: 'I don’t care how good you are, just get it done!'
✅ Predicted emotion: Anger (confidence: 0.92)

Sentence: 'What is machine learning?'
✅ Predicted emotion: Neutral (confidence: 0.95)

Sentence: 'I’m so excited for the concert tonight!'
✅ Predicted emotion: Optimism (confidence: 0.95)

Sentence: 'Why isn’t this working properly?'
✅ Predicted emotion: Joy (confidence: 0.89)

Sentence: 'I’m feeling really down today.'
✅ Predicted emotion: Sadness (confidence: 0.98)

Sentence: 'Could you please schedule a meeting for next week?'
✅ Predicted emotion: Neutral (confidence: 0.95)

Sentence: 'What is deep learning?'
✅ Predicted emotion: Neutral (confidence: 0.95)

Sentence: 'Are you serious this what you are giving me come on i did not expected that from you'
✅ Predicted emotion: Anger (confidence: 0.90)

Sentenc

In [None]:
from transformers import pipeline
import re

# Load both emotion classifiers
anger_classifier = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    top_k=None,
    truncation=True,
    max_length=512,
    framework="pt"
)

other_classifier = pipeline(
    "text-classification",
    model="cardiffnlp/twitter-roberta-base-emotion",
    top_k=None,
    truncation=True,
    max_length=512,
    framework="pt"
)

def clean_text(text):
    """
    Clean WebRTC transcription text by adding punctuation, removing filler words, and normalizing.
    """
    text = re.sub(r'\b(um|uh|like|you know)\b', '', text, flags=re.IGNORECASE)
    text = re.sub(r'\s+', ' ', text).strip().lower()
    # Add punctuation for run-on sentences and negative questions
    text = re.sub(r'\b(why isn\'t|what the|are you serious|come on|i did not|this is ridiculous|you kidding|fix it|do better|unacceptable|driving me nuts|broken again|worst service|keep crashing|complete disaster|so annoying|totally unacceptable|fed up|nonsense)\b', r'\1.', text)
    text = re.sub(r'\b(this what|what you)\b', r'\1,', text)
    text = text.replace(' i ', ' I ')
    if not text.endswith('.'):
        text += '.'
    return text

def is_neutral_query(text):
    """
    Rule-based check for neutral queries, ensuring high confidence.
    """
    neutral_patterns = [
        r'^(what|how|when|where|can you|could you|tell me|is it|hey).*?\?$',
        r'^(please|could you|would you|hey).*?(schedule|set|find|look up|tell me|explain|reset|alarm).*'
    ]
    technical_terms = ['machine learning', 'deep learning', 'artificial intelligence', 'data science', 'neural networks', 'ml', 'ai', 'advancements']
    text = text.lower().strip()
    # Mark as neutral if it contains technical terms or neutral patterns
    if any(term in text for term in technical_terms) or any(re.match(pattern, text) for pattern in neutral_patterns):
        return True
    return False

def is_anger_query(text):
    """
    Rule-based check for anger-related queries or statements.
    """
    anger_patterns = [
        r'\b(why isn\'t|what the|are you serious|come on|i did not expect|this is ridiculous|you kidding|fix it|do better|unacceptable|driving me nuts|broken again|worst service|keep crashing|complete disaster|so annoying|totally unacceptable|fed up|nonsense|not working|failing|broken|messed up)\b'
    ]
    text = text.lower().strip()
    return any(re.search(pattern, text) for pattern in anger_patterns)

def predict_emotion_from_text(text):
    """
    Predict emotion using j-hartmann for anger and cardiffnlp for other emotions.
    """
    try:
        # Clean the input text
        text = clean_text(text)
        if not text:
            raise ValueError("Input text is empty after cleaning")

        # Check for neutral queries
        if is_neutral_query(text):
            return "Neutral", 0.95

        # Check for anger queries (rule-based)
        if is_anger_query(text):
            return "Anger", 0.90

        # Check for anger using j-hartmann model
        anger_results = anger_classifier(text)[0]
        anger_score = next((r['score'] for r in anger_results if r['label'] == 'anger'), 0.0)
        if anger_score > 0.6:  # Lowered threshold for better anger detection
            return "Anger", anger_score

        # Use cardiffnlp for other emotions
        other_results = other_classifier(text)[0]
        
        # Suppress joy for negative sentences
        negative_words = ['not', 'isn\'t', 'doesn\'t', 'won\'t', 'can\'t', 'failing', 'broken', 'crashing', 'annoying', 'disaster', 'unacceptable', 'fed up', 'nonsense']
        if any(word in text.lower() for word in negative_words):
            for result in other_results:
                if result['label'] == 'emotion:joy':
                    result['score'] *= 0.05  # Stronger joy suppression

        # Find the emotion with the highest score
        best = max(other_results, key=lambda x: x['score'])
        emotion = best['label'].replace('emotion:', '').capitalize()
        score = best['score']

        # Fallback to Neutral if confidence is low
        if score < 0.7:
            return "Neutral", 0.95

        return emotion, score
    except Exception as e:
        print(f"Error processing text: {e}")
        return "Unknown", 0.0

if __name__ == "__main__":
    # Deduplicated test sentences, including new ones
    test_sentences = [
        "I don’t care how good you are, just get it done!",
        "What is machine learning?",
        "I’m so excited for the concert tonight!",
        "Why isn’t this working properly?",
        "I’m feeling really down today.",
        "Could you please schedule a meeting for next week?",
        "What is deep learning?",
        "Are you serious this what you are giving me come on i did not expected that from you",
        "This is absolutely unacceptable, do better!",
        "Can you explain how neural networks work?",
        "I’m having the worst day ever, nothing is going right.",
        "Wow, I just won a free trip, this is awesome!",
        "What the heck is wrong with this thing, it’s broken again!",
        "Tell me about the latest AI advancements.",
        "I can’t believe you messed this up, fix it now!",
        "I’m so thrilled, my team just nailed the presentation!",
        "Why does this keep failing, it’s driving me nuts!",
        "I feel so lost, nothing makes sense anymore.",
        "Hey, can you set an alarm for 7 AM tomorrow?",
        "You gotta be kidding me, this is the worst service ever!",
        "what is ml?",
        "This system is a complete disaster, get it fixed!",
        "I can’t believe how amazing this party is, best night ever!",
        "Why does this app keep crashing, it’s so annoying!"
    ]
    
    print("Testing emotion detection with example sentences (type 'exit' to quit).")
    print("Enter your own transcript or press Enter to test predefined sentences.")
    
    while True:
        transcript = input("Transcript: ")
        if transcript.lower() == 'exit':
            break
        if not transcript.strip():
            print("\nTesting predefined sentences:")
            for sentence in test_sentences:
                emotion, score = predict_emotion_from_text(sentence)
                print(f"Sentence: '{sentence}'")
                print(f"✅ Predicted emotion: {emotion} (confidence: {score:.2f})\n")
        else:
            emotion, score = predict_emotion_from_text(transcript)
            print(f"Sentence: '{transcript}'")
            print(f"✅ Predicted emotion: {emotion} (confidence: {score:.2f})\n")

W0520 12:00:21.309000 29156 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.
Device set to use cpu
Device set to use cpu


Testing emotion detection with example sentences (type 'exit' to quit).
Enter your own transcript or press Enter to test predefined sentences.

Testing predefined sentences:
Sentence: 'I don’t care how good you are, just get it done!'
✅ Predicted emotion: Anger (confidence: 0.92)

Sentence: 'What is machine learning?'
✅ Predicted emotion: Neutral (confidence: 0.95)

Sentence: 'I’m so excited for the concert tonight!'
✅ Predicted emotion: Optimism (confidence: 0.95)

Sentence: 'Why isn’t this working properly?'
✅ Predicted emotion: Joy (confidence: 0.89)

Sentence: 'I’m feeling really down today.'
✅ Predicted emotion: Sadness (confidence: 0.98)

Sentence: 'Could you please schedule a meeting for next week?'
✅ Predicted emotion: Neutral (confidence: 0.95)

Sentence: 'What is deep learning?'
✅ Predicted emotion: Neutral (confidence: 0.95)

Sentence: 'Are you serious this what you are giving me come on i did not expected that from you'
✅ Predicted emotion: Anger (confidence: 0.90)

Sentenc