In [5]:
# ============================================================
# CELL 1: Import Libraries
# ============================================================

import os
from pathlib import Path
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score

print("Importing libraries...")
print("✓ Libraries imported")


# ============================================================
# CELL 2: Setup Paths
# ============================================================

BASE_DIR = Path(r"C:\Users\VIJAY BHUSHAN SINGH\depression_detection_project")
DATA_DIR = BASE_DIR / "data" / "processed"
RESULTS_DIR = BASE_DIR / "results"
MODELS_DIR = RESULTS_DIR / "models"

for d in [DATA_DIR, RESULTS_DIR, MODELS_DIR]:
    d.mkdir(parents=True, exist_ok=True)

print("✓ Paths configured")


# ============================================================
# CELL 3: Load Data
# ============================================================

train = pd.read_csv(DATA_DIR / "train_data.csv")
val = pd.read_csv(DATA_DIR / "val_data.csv")
test = pd.read_csv(DATA_DIR / "test_data.csv")

print(f"✓ Data loaded: {len(train)} train, {len(val)} val, {len(test)} test")


# ============================================================
# CELL 4: Prepare Features
# ============================================================

label_col = "phq8"

X_train = train.drop(columns=[label_col])
y_train = train[label_col]

X_val = val.drop(columns=[label_col])
y_val = val[label_col]

X_test = test.drop(columns=[label_col])
y_test = test[label_col]

# Split by modality (assuming columns start with audio_, text_, video_)
audio_cols = [c for c in X_train.columns if c.startswith("audio_")]
text_cols = [c for c in X_train.columns if c.startswith("text_")]
video_cols = [c for c in X_train.columns if c.startswith("video_")]

print("Preparing modality-specific features...")
print(f"  Audio: {len(audio_cols)} features")
print(f"  Text: {len(text_cols)} features")
print(f"  Video: {len(video_cols)} features")

# Handle missing text features gracefully
if not text_cols:
    X_train["text_dummy"] = 0
    X_val["text_dummy"] = 0
    X_test["text_dummy"] = 0
    text_cols = ["text_dummy"]

# Standardize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

X_train = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_val = pd.DataFrame(X_val_scaled, columns=X_val.columns)
X_test = pd.DataFrame(X_test_scaled, columns=X_test.columns)

print("✓ Data normalized successfully")


# ============================================================
# CELL 5: Create PyTorch Datasets and Loaders
# ============================================================

def make_tensor(df, cols):
    return torch.tensor(df[cols].values, dtype=torch.float32)

train_audio = make_tensor(X_train, audio_cols)
train_text = make_tensor(X_train, text_cols)
train_video = make_tensor(X_train, video_cols)
train_y = torch.tensor(y_train.values, dtype=torch.float32)

val_audio = make_tensor(X_val, audio_cols)
val_text = make_tensor(X_val, text_cols)
val_video = make_tensor(X_val, video_cols)
val_y = torch.tensor(y_val.values, dtype=torch.float32)

test_audio = make_tensor(X_test, audio_cols)
test_text = make_tensor(X_test, text_cols)
test_video = make_tensor(X_test, video_cols)
test_y = torch.tensor(y_test.values, dtype=torch.float32)

train_dataset = TensorDataset(train_audio, train_text, train_video, train_y)
val_dataset = TensorDataset(val_audio, val_text, val_video, val_y)
test_dataset = TensorDataset(test_audio, test_text, test_video, test_y)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

print(f"Shapes -> Audio: {train_audio.shape}, Text: {train_text.shape}, Video: {train_video.shape}")
print("✓ Dataloaders ready")


# ============================================================
# CELL 6: Define Model
# ============================================================

class MultiModalAttentionModel(nn.Module):
    def __init__(self, audio_dim, text_dim, video_dim):
        super().__init__()

        self.audio_fc = nn.Sequential(nn.Linear(audio_dim, 64), nn.ReLU())
        self.text_fc = nn.Sequential(nn.Linear(text_dim, 32), nn.ReLU())
        self.video_fc = nn.Sequential(nn.Linear(video_dim, 64), nn.ReLU())

        # Attention layers
        self.attn_audio = nn.Linear(64, 1)
        self.attn_text = nn.Linear(32, 1)
        self.attn_video = nn.Linear(64, 1)

        self.final_fc = nn.Sequential(
            nn.Linear(64 + 32 + 64, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, audio, text, video):
        a_out = self.audio_fc(audio)
        t_out = self.text_fc(text)
        v_out = self.video_fc(video)

        a_w = torch.sigmoid(self.attn_audio(a_out).mean(dim=0))
        t_w = torch.sigmoid(self.attn_text(t_out).mean(dim=0))
        v_w = torch.sigmoid(self.attn_video(v_out).mean(dim=0))

        # Normalize weights
        weights = torch.softmax(torch.stack([a_w, t_w, v_w]), dim=0)
        a_w, t_w, v_w = weights

        fused = torch.cat([a_w * a_out, t_w * t_out, v_w * v_out], dim=1)
        preds = self.final_fc(fused)

        return preds  # returning only preds for simplicity


# ============================================================
# CELL 7: Training Setup
# ============================================================

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model = MultiModalAttentionModel(len(audio_cols), len(text_cols), len(video_cols)).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.L1Loss()

print(f"✓ Model ready ({sum(p.numel() for p in model.parameters()):,} params)")
print("✓ Training setup complete")


# ============================================================
# CELL 8: Training Loop
# ============================================================

best_val = float("inf")

for epoch in range(1, 41):
    model.train()
    train_losses = []

    for batch in train_loader:
        audio, text, video, targets = batch
        audio, text, video, targets = audio.to(device), text.to(device), video.to(device), targets.to(device)

        preds = model(audio, text, video).squeeze()
        loss = criterion(preds, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

    model.eval()
    val_losses = []

    with torch.no_grad():
        for batch in val_loader:
            audio, text, video, targets = batch
            audio, text, video, targets = audio.to(device), text.to(device), video.to(device), targets.to(device)
            preds = model(audio, text, video).squeeze()
            val_loss = criterion(preds, targets)
            val_losses.append(val_loss.item())

    train_mae = np.mean(train_losses)
    val_mae = np.mean(val_losses)

    if val_mae < best_val:
        best_val = val_mae
        torch.save(model.state_dict(), MODELS_DIR / "attention_model_best.pth")

    if epoch % 5 == 0:
        print(f"Epoch {epoch}/40 - Train MAE: {train_mae:.3f}, Val MAE: {val_mae:.3f}")

print(f"✓ Training done (best Val MAE = {best_val:.3f})")


# ============================================================
# CELL 9: Evaluation
# ============================================================

def evaluate(model, loader, criterion, device):
    model.eval()
    preds_all, targets_all = [], []
    with torch.no_grad():
        for audio, text, video, targets in loader:
            audio, text, video, targets = audio.to(device), text.to(device), video.to(device), targets.to(device)

            outputs = model(audio, text, video)
            if isinstance(outputs, (tuple, list)):
                preds = outputs[0]
            else:
                preds = outputs

            preds_all.extend(preds.squeeze().cpu().numpy())
            targets_all.extend(targets.cpu().numpy())

    mae = mean_absolute_error(targets_all, preds_all)
    r2 = r2_score(targets_all, preds_all)
    return mae, r2, preds_all, targets_all


print("=" * 60)
print("🔍 Evaluating Best Attention Model on Test Set...")
print("=" * 60)

model.load_state_dict(torch.load(MODELS_DIR / "attention_model_best.pth", map_location=device))

test_mae, test_r2, test_preds, test_targets = evaluate(model, test_loader, criterion, device)

print(f"\n📊 Test Results:")
print(f"  MAE: {test_mae:.4f}")
print(f"  R²: {test_r2:.4f}")
print("✓ Evaluation complete ✅")


Importing libraries...
✓ Libraries imported
✓ Paths configured
✓ Data loaded: 11 train, 2 val, 3 test


KeyError: "['phq8'] not found in axis"