In [16]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from scipy.stats import pearsonr

# Load data
feat = pd.read_csv("features.csv")
scr = pd.read_csv("scores.csv")
demo = pd.read_csv("turker_scores_full_interview_old.csv")

# Merge and rename
m1 = pd.merge(feat, scr, left_on="id", right_on="Participant")
data = pd.merge(m1, demo, left_on="id", right_on="Participant")

if 'Overall_x' in data.columns:
    data = data.rename(columns={'Overall_x': 'PHQ8_Score'})

# Add simulated demographics (replace with actual if available)
np.random.seed(1)
data["Gender"] = np.random.choice(["Male", "Female"], len(data))
data["Race"] = np.random.choice(["White", "African American", "Hispanic"], len(data))

# Target
y = data["PHQ8_Score"]

# Drop non-numeric and demographic fields for X
X = data.drop(columns=["id", "Participant_x", "Participant_y", "Excited_x", "PHQ8_Score", "Gender", "Race"], errors='ignore')
X = X.select_dtypes(include=[np.number])

# Identify top-k gender and race features and remove them
gender_labels = pd.Series(np.where(data["Gender"] == "Female", 1, 0))
race_labels = pd.Series(data["Race"].map({"White": 0, "African American": 1, "Hispanic": 2}))

def select_top_features(X, labels, k):
    selector = SelectKBest(score_func=f_classif, k=k)
    selector.fit(X, labels)
    return X.columns[selector.get_support()].tolist()

top_gender_feats = select_top_features(X, gender_labels, 5)
top_race_feats = select_top_features(X, race_labels, 5)
removed_feats = list(set(top_gender_feats + top_race_feats))

X_clean = X.drop(columns=removed_feats)

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_clean)

# Tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y.values, dtype=torch.float32).view(-1, 1)

# Neural net
class SimpleNet(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.relu1 = nn.ReLU()
        self.drop = nn.Dropout(0.3)
        self.fc2 = nn.Linear(64, 32)
        self.relu2 = nn.ReLU()
        self.out = nn.Linear(32, 1)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.drop(x)
        x = self.relu2(self.fc2(x))
        return self.out(x)

# Cross-validation with group-weighted loss (simulated in-processing)
kf = KFold(n_splits=5, shuffle=True, random_state=1)
r_vals, re_vals = [], []
nn_preds = np.zeros_like(y)

for train_idx, test_idx in kf.split(X_tensor):
    X_train, X_test = X_tensor[train_idx], X_tensor[test_idx]
    y_train, y_test = y_tensor[train_idx], y_tensor[test_idx]

    # Group weights to emphasize fairness (simulated in-processing)
    gender_train = gender_labels.iloc[train_idx].values
    group_weights = torch.tensor(np.where(gender_train == 1, 1.2, 1.0), dtype=torch.float32).view(-1, 1)

    model = SimpleNet(X_tensor.shape[1])
    opt = optim.Adam(model.parameters(), lr=0.001)
    loss_fn = nn.MSELoss(reduction='none')

    for epoch in range(2500):
        model.train()
        opt.zero_grad()
        out = model(X_train)
        base_loss = loss_fn(out, y_train)
        weighted_loss = (base_loss * group_weights).mean()
        weighted_loss.backward()
        opt.step()

    model.eval()
    with torch.no_grad():
        preds = model(X_test).squeeze().numpy()
        true = y_test.numpy().flatten()
        nn_preds[test_idx] = preds
        r = pearsonr(preds, true)[0]
        re = np.mean(np.abs(preds - true) / 24.0)
        r_vals.append(r)
        re_vals.append(re)

# Overall results
print("Average Pearson r:", round(np.mean(r_vals), 4))
print("Average Relative Error (RE):", round(np.mean(re_vals), 4))

# Group-wise evaluation
def evaluate_by_group(data, preds, true):
    data = data.copy()
    data["preds"] = preds
    data["true"] = true
    for group_cols in [["Gender"], ["Race"], ["Gender", "Race"]]:
        print("\nGroup:", " x ".join(group_cols))
        for group, group_df in data.groupby(group_cols):
            r = pearsonr(group_df["preds"], group_df["true"])[0]
            re = np.mean(np.abs(group_df["preds"] - group_df["true"]) / 24.0)
            print(f"{group}: r = {r:.4f}, RE = {re:.4f}")

def find_worst_group(data, preds, true):
    data = data.copy()
    data["preds"] = preds
    data["true"] = true
    worst, max_re = None, -1
    for group, group_df in data.groupby(["Gender", "Race"]):
        re = np.mean(np.abs(group_df["preds"] - group_df["true"]) / 24.0)
        if re > max_re:
            worst = group
            max_re = re
    print("\nWorst performing group:", worst, "with RE =", round(max_re, 4))

# Report per demographic group
evaluate_by_group(data, nn_preds, y_tensor.numpy().flatten())
find_worst_group(data, nn_preds, y_tensor.numpy().flatten())


Average Pearson r: 0.7211
Average Relative Error (RE): 0.0146

Group: Gender
('Female',): r = 0.7203, RE = 0.0148
('Male',): r = 0.7259, RE = 0.0144

Group: Race
('African American',): r = 0.7367, RE = 0.0143
('Hispanic',): r = 0.6946, RE = 0.0151
('White',): r = 0.7396, RE = 0.0142

Group: Gender x Race
('Female', 'African American'): r = 0.7389, RE = 0.0151
('Female', 'Hispanic'): r = 0.6727, RE = 0.0150
('Female', 'White'): r = 0.7497, RE = 0.0141
('Male', 'African American'): r = 0.7352, RE = 0.0135
('Male', 'Hispanic'): r = 0.7168, RE = 0.0152
('Male', 'White'): r = 0.7302, RE = 0.0144

Worst performing group: ('Male', 'Hispanic') with RE = 0.0152
