<a href="https://colab.research.google.com/github/vedikahatolkar/Moodsensefinal/blob/main/moodsense_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Audio + DSP packages
!pip install sounddevice scipy

# Core transformers stack (compatible with Py3.12)
!pip install --upgrade transformers datasets accelerate safetensors tokenizers

# ML packages
!pip install scikit-learn librosa soundfile matplotlib tensorflow keras

# SentencePiece (working version for Py3.12)
!pip install sentencepiece==0.1.99

# Upgrade build tools
!python -m pip install --upgrade pip setuptools wheel

In [None]:
import numpy as np
import pandas as pd
import torch
import librosa
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments


In [None]:
print("Loading GoEmotions...")
raw = load_dataset("go_emotions")

map_dict = {
    'admiration':'happy','amusement':'happy','joy':'happy','love':'happy','optimism':'happy','approval':'happy',
    'sadness':'sad','disappointment':'sad','grief':'sad',
    'anger':'angry','annoyance':'angry','disgust':'angry',
    'fear':'fear','nervousness':'fear',
}

label_names = raw['train'].features['labels'].feature.names

def map_emotion(example):
    labels = [label_names[i] for i in example['labels']]
    mapped = [map_dict.get(l, 'neutral') for l in labels]
    example["emotion"] = mapped[0] if mapped else "neutral"
    return example

raw = raw.map(map_emotion)
raw = raw.remove_columns(["id","labels"])
raw["train"][0]


In [None]:
MODEL_TEXT = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(MODEL_TEXT)

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True)

proc = raw.map(tokenize, batched=True)


In [None]:
unique_emotions = list(sorted(set(proc["train"]["emotion"])))
label2id = {e:i for i,e in enumerate(unique_emotions)}
id2label = {i:e for e,i in label2id.items()}

def encode_labels(example):
    example["labels"] = label2id[example["emotion"]]
    return example

proc = proc.map(encode_labels)
proc = proc.remove_columns(["emotion"])
proc.set_format(type="torch", columns=["input_ids","attention_mask","labels"])

label2id


In [None]:
model_text = AutoModelForSequenceClassification.from_pretrained(
    MODEL_TEXT,
    num_labels=len(label2id),
    id2label=id2label,
    label2id=label2id
)


In [None]:
training_args = TrainingArguments(
    output_dir="./bert_emotion_model",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    learning_rate=3e-5,
    num_train_epochs=2,
    weight_decay=0.01,
    do_eval=True,
    eval_strategy="steps",   # works on all versions
    save_strategy="steps",         # works on all versions
    eval_steps=500,
    save_steps=500,
    logging_steps=200
)


In [None]:
from datasets import DatasetDict
from sklearn.model_selection import train_test_split

# Convert HuggingFace 'proc["train"]' to list for sklearn split
texts = proc["train"]["input_ids"]
masks = proc["train"]["attention_mask"]
labels = proc["train"]["labels"]

# Split 80/20
train_idx, test_idx = train_test_split(
    range(len(texts)),
    test_size=0.2,
    random_state=42,
    shuffle=True
)

# Build new HF DatasetDict
train_dataset = proc["train"].select(train_idx)
test_dataset = proc["train"].select(test_idx)

new_data = DatasetDict({
    "train": train_dataset,
    "test": test_dataset
})

print(new_data)


In [None]:
trainer = Trainer(
    model=model_text,
    args=training_args,
    train_dataset=new_data["train"],   # 80% training
    eval_dataset=new_data["test"]      # 20% testing
)

trainer.train()
trainer.save_model("./bert_emotion_model")
print("Model training complete!")


In [None]:
def predict_text_emotion(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    inputs = {key: value.to(model_text.device) for key, value in inputs.items()} # Move inputs to model's device
    with torch.no_grad():
        outputs = model_text(**inputs)
    probs = torch.softmax(outputs.logits, dim=1)
    pred = torch.argmax(probs).item()
    return id2label[pred]

print(predict_text_emotion("I am feeling very sad today"))

In [None]:
def extract_mfcc(file, max_len=200):
    y, sr = librosa.load(file, sr=22050)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    if mfcc.shape[1] < max_len:
        mfcc = np.pad(mfcc, ((0,0),(0, max_len - mfcc.shape[1])), mode="constant")
    else:
        mfcc = mfcc[:, :max_len]
    return mfcc


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout

def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation="relu", input_shape=(40,200,1)),
        MaxPool2D((2,2)),
        Conv2D(64, (3,3), activation="relu"),
        MaxPool2D((2,2)),
        Flatten(),
        Dense(128, activation="relu"),
        Dropout(0.3),
        Dense(5, activation="softmax")
    ])
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    return model

model_speech = build_cnn()
model_speech.summary()


In [None]:
def predict_speech_emotion(file):
    mfcc = extract_mfcc(file)
    x = mfcc.reshape(1,40,200,1)
    pred = model_speech.predict(x)[0]
    classes = ["angry","happy","sad","fear","neutral"]
    return classes[np.argmax(pred)]


In [None]:
def chatbot_respond(text, emotion):
    responses = {
        "happy": "I'm glad to hear that! What made you feel happy today?",
        "sad": "I'm sorry you're feeling sad. I'm here for you â€” want to talk about it?",
        "angry": "It sounds frustrating. What made you feel this way?",
        "fear": "It's okay to feel anxious. Do you want to share what's worrying you?",
        "neutral": "Thanks for sharing. How are you feeling overall?"
    }
    return responses.get(emotion, responses["neutral"])


In [None]:
def moodsense(text, audio_file=None):
    text_em = predict_text_emotion(text)
    audio_em = None # Initialize audio_em

    if audio_file:
        try:
            audio_em = predict_speech_emotion(audio_file)
        except Exception as e:
            print(f"Error processing audio: {e}")
            audio_em = None # Ensure audio_em is None if there's an error

    if audio_em is None:
        final_em = text_em
    elif audio_em == "neutral":
        final_em = text_em # text gets priority if audio is neutral
    else:
        final_em = audio_em # Use audio emotion if available and not neutral

    reply = chatbot_respond(text, final_em)

    print("Text Emotion:", text_em)
    print("Audio Emotion:", audio_em)
    print("Final Decision:", final_em)
    print("\nChatbot Response:\n", reply)

    return final_em, reply

In [None]:
moodsense("I am feeling really excited about this project!")

In [None]:
from google.colab import output
import base64
from IPython.display import Audio

def record_audio():
    js = """
    async function record() {
      const stream = await navigator.mediaDevices.getUserMedia({audio: true});
      const recorder = new MediaRecorder(stream);
      let chunks = [];

      recorder.ondataavailable = e => chunks.push(e.data);
      recorder.onstop = e => {
        let blob = new Blob(chunks, {type: 'audio/wav'});
        let reader = new FileReader();
        reader.readAsDataURL(blob);
        reader.onloadend = () => {
          google.colab.kernel.invokeFunction('notebook.saveAudio', [reader.result], {});
        };
      };

      recorder.start();
      await new Promise(resolve => setTimeout(resolve, 5000));
      recorder.stop();
    }
    record();
    """
    output.eval_js(js)

audio_bytes = None

def save_audio(data):
    global audio_bytes
    audio_bytes = base64.b64decode(data.split(',')[1])

output.register_callback('notebook.saveAudio', save_audio)


In [None]:
print("ðŸŽ¤ Recording for 5 seconds...")
record_audio()


In [None]:
if audio_bytes:
    with open("mic_input.wav", "wb") as f:
        f.write(audio_bytes)
    print("Audio saved as mic_input.wav")

Audio("mic_input.wav")


In [None]:
moodsense("Here is my spoken input", "mic_input.wav")


In [None]:
!zip -r bert_emotion_model.zip bert_emotion_model

In [None]:
from google.colab import files
files.download("bert_emotion_model.zip")