Training notebook. Save models for use in `founder-rank.ipynb`. Simple gradient-based quadratic classification model to compute $W$. 

In [14]:
import sys
import numpy as np
import pandas as pd
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import classification_report
from pathlib import Path
import pickle

sys.path.append("..")

from src.models.trainer import create_model_and_trainer
from src.config.config import cfg
from src.viz.model_viz import plot_model_performance, plot_feature_importance, plot_ranked_feature_importance, plot_sample_predictions, display_final_metrics, plot_top_feature_interactions
from src.data.preprocessing import load_and_preprocess_data

np.set_printoptions(precision=2, suppress=True, linewidth=120)


### Hyperparameters

In [None]:
# model_dir = Path("../models")
# with open(model_dir / "founder_rank_best.pkl", 'rb') as f:
#     model_data = pickle.load(f)

# print("Hyperparameters used:")
# for key, value in model_data['hyperparameters'].items():
#     print(f"{key}: {value}")

In [16]:
hyp = {
    "data_paths": [
        "../data/encoded/S21_encoded_with_outcomes.csv",
        "../data/encoded/W21_encoded_with_outcomes.csv",
        "../data/encoded/S17_encoded_with_outcomes.csv",
        "../data/encoded/W17_encoded_with_outcomes.csv",
        "../data/encoded/top_companies_encoded_with_outcomes.csv"
    ],
    "synthetic_data_path": '../data/synth/encoded_founders_composites.csv',
    "test_size": 0.125,  
    "val_size": 0.125,   
    "random_state": 42,
    "batch_size": 32,     
    "lr": 0.0005,         
    "weight_decay": 1.8e-3,  
    "epochs": 1500,       
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "exclude_columns": ["success", "exit_value", "funding_amount", "batch"],
    "target_column": "success",
    
    # Regularization parameters
    "diag_penalty": 0.0002,    
    "l1_penalty": 0.0004,      
    "top_k_penalty": 0.0005,   
    "top_k": 15,               
    "dropout": 0.15,           
    
    # Training parameters
    "log_every": 10,
    "early_stopping_patience": 250, 
}

### Training

In [None]:

tensors, feature_names, scaler = load_and_preprocess_data(hyp)

X_train_tensor, y_train_tensor, is_synthetic_train_tensor = tensors['train']
X_val_tensor, y_val_tensor = tensors['val']
X_test_tensor, y_test_tensor = tensors['test']

train_dataset = TensorDataset(X_train_tensor, y_train_tensor, is_synthetic_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=hyp["batch_size"], shuffle=True)

input_dim = X_train_tensor.shape[1]
pos_weight = (y_train_tensor == 0).sum() / (y_train_tensor == 1).sum()
model, trainer = create_model_and_trainer(input_dim, hyp, pos_weight=pos_weight)

W_init = trainer.train(train_loader,  X_val_tensor, y_val_tensor, X_test_tensor, y_test_tensor)

W_final = model.get_W().detach().cpu().numpy()

predictions = trainer.predict(X_train_tensor, X_val_tensor, X_test_tensor)
train_probs, train_preds = predictions['train']
val_probs, val_preds = predictions['val'] 
test_probs, test_preds = predictions['test']

### Model Performance

In [None]:
plot_model_performance(trainer, W_init)

### Features

In [None]:
plot_feature_importance(W_final, feature_names)
plot_ranked_feature_importance(W_final, feature_names)
plot_top_feature_interactions(W_final, feature_names)

### Samples

Note: deliberately samples some TP, TN, FP, FN for ref. 

In [None]:
plot_sample_predictions(model, X_test_tensor.numpy(), y_test_tensor.numpy(), cfg.MATRIX, scaler, n_samples=10, device=hyp['device'])

### Metrics

Note: NDCG is eval metric. 

In [None]:

display_final_metrics(trainer)

print("\n" + "="*80)
print("TRAIN SET CLASSIFICATION REPORT")
print("="*80)
print(classification_report(y_train_tensor.cpu().numpy(), train_preds, zero_division=0))

print("\n" + "="*80)
print("VALIDATION SET CLASSIFICATION REPORT")
print("="*80)
print(classification_report(y_val_tensor.cpu().numpy(), val_preds, zero_division=0))

print("\n" + "="*80)
print("TEST SET CLASSIFICATION REPORT")
print("="*80)
print(classification_report(y_test_tensor.cpu().numpy(), test_preds, zero_division=0))

In [18]:
model_dir = Path("../models")
model_dir.mkdir(exist_ok=True)

model_data = {
    'model_state_dict': model.state_dict(),
    'W_init': W_init,
    'W_final': model.get_W().detach().cpu().numpy(),
    'feature_names': feature_names,
    'scaler_state': scaler.__getstate__(),
    'hyperparameters': hyp 
}

with open(model_dir / "founder_rank.pkl", 'wb') as f:
    pickle.dump(model_data, f)
