# Notebook 7: Ensembling Strategies

We use:
- Simple weighted average of 4 models (normalized scores) for complementary signals, reduced variance, and robust prediction (less sensitive to hyperparameter choices). 
- Weight grid search for optimal combination

## Best Ensemble Configuration
- **XGB AFT (83 unfixed)**: 50%
- **CoxPH elastic net (128 fixed)**: 5%
- **DeepSurv (83 fixed)**: 15%
- **CatBoost CLF + LGB REG (128 fixed)**: 30%
- **CV Score**: 0.6989 weighted C-index (+0.0025 over best single model)

In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
import torch
import torch.nn as nn
from catboost import CatBoostClassifier
import lightgbm as lgb
from lifelines import KaplanMeierFitter
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sksurv.linear_model import CoxnetSurvivalAnalysis
from sksurv.metrics import concordance_index_ipcw
from sksurv.util import Surv

TRAIN_PATH = '/your_path/SurvivalPrediction/data'

## 1. Load Data and Setup

In [21]:
# Load all datasets
X_83_unfixed_full = pd.read_csv(f'{TRAIN_PATH}/X_train_83features_with_id.csv')
X_83_fixed_scaled_full = pd.read_csv(f'{TRAIN_PATH}/X_train_83features_with_id_fixed_scaled.csv')
X_128_fixed_full = pd.read_csv(f'{TRAIN_PATH}/X_train_128features_with_id_clean_fixed.csv')
X_128_fixed_scaled_full = pd.read_csv(f'{TRAIN_PATH}/X_train_128features_with_id_clean_fixed_scaled.csv')
target = pd.read_csv(f'{TRAIN_PATH}/target_train_clean_aligned.csv')

# Align all datasets with target (filter + reorder to match target's patient sequence)
X_83_unfixed = X_83_unfixed_full.set_index('ID').loc[target['ID']].reset_index(drop=True)
X_83_fixed_scaled = X_83_fixed_scaled_full.set_index('ID').loc[target['ID']].reset_index(drop=True)
X_128_fixed = X_128_fixed_full.set_index('ID').loc[target['ID']].reset_index(drop=True)
X_128_fixed_scaled = X_128_fixed_scaled_full.set_index('ID').loc[target['ID']].reset_index(drop=True)

y_time = target['OS_YEARS'].values
y_event = target['OS_STATUS'].values.astype(bool)
y_surv = Surv.from_arrays(event=y_event, time=y_time)
y_lower = y_time.copy()
y_upper = np.where(y_event, y_time, np.inf)
n_samples = len(target)

# Risk groups
def define_risk_groups(X):
    risk_factors = pd.DataFrame(index=X.index)
    risk_factors['high_blast'] = (X['BM_BLAST'] > 10).astype(int)
    risk_factors['has_TP53'] = (X['has_TP53'] > 0).astype(int)
    risk_factors['low_hb'] = (X['HB'] < 10).astype(int)
    risk_factors['low_plt'] = (X['PLT'] < 50).astype(int)
    risk_factors['high_cyto'] = (X['cyto_risk_score'] >= 3).astype(int)
    n_risk_factors = risk_factors.sum(axis=1)
    return {'test_like': n_risk_factors >= 1, 'high_risk': n_risk_factors >= 2}

risk_groups = define_risk_groups(X_83_unfixed)
has_tp53 = (X_83_unfixed['has_TP53'] > 0).astype(int).values
strat_var = pd.Series([f"{int(e)}_{int(t)}" for e, t in zip(y_event, has_tp53)])

print(f"Samples: {n_samples}")
print(f"X_83_unfixed: {X_83_unfixed.shape}, X_83_fixed_scaled: {X_83_fixed_scaled.shape}")
print(f"X_128_fixed: {X_128_fixed.shape}, X_128_fixed_scaled: {X_128_fixed_scaled.shape}")

Samples: 3120
X_83_unfixed: (3120, 83), X_83_fixed_scaled: (3120, 83)
X_128_fixed: (3120, 128), X_128_fixed_scaled: (3120, 128)


## 3. Model Definitions and Hyperparameters

In [22]:
# Evaluation metric
def weighted_cindex_ipcw(risk, y_surv_all, risk_groups, tau=7.0):
    c_overall = concordance_index_ipcw(y_surv_all, y_surv_all, risk, tau=tau)[0]

    mask_test = risk_groups['test_like'].values
    y_surv_test = Surv.from_arrays(event=y_surv_all['event'][mask_test], time=y_surv_all['time'][mask_test])
    c_test = concordance_index_ipcw(y_surv_all, y_surv_test, risk[mask_test], tau=tau)[0]

    mask_high = risk_groups['high_risk'].values
    y_surv_high = Surv.from_arrays(event=y_surv_all['event'][mask_high], time=y_surv_all['time'][mask_high])
    c_high = concordance_index_ipcw(y_surv_all, y_surv_high, risk[mask_high], tau=tau)[0]

    weighted = 0.3 * c_overall + 0.4 * c_test + 0.3 * c_high
    return {'overall': c_overall, 'test_like': c_test, 'high_risk': c_high, 'weighted': weighted}

# Best hyperparameters from individual model tuning
xgb_aft_params = {
    'n_estimators': 147, 'max_depth': 5, 'learning_rate': 0.026342,
    'min_child_weight': 41, 'subsample': 0.920435, 'colsample_bytree': 0.513695,
    'gamma': 2.829713, 'reg_alpha': 0.095703, 'reg_lambda': 0.446806,
}
xgb_aft_dist = 'normal'

coxph_l1_ratio = 0.4380
coxph_alpha = 0.0391

deepsurv_params = {
    'hidden_layers': [64, 64, 64], 'dropout': 0.6594556222699043, 'activation': 'selu',
    'lr': 0.00011237410700529054, 'batch_size': 32, 'epochs': 58,
}

two_model_params = {'cat_depth': 5, 'cat_iterations': 262, 
                    'cat_learning_rate': 0.019227204273246305, 'cat_l2_leaf_reg': 0.12876998314647772, 
                    'lgb_max_depth': 9, 'lgb_n_estimators': 149, 'lgb_learning_rate': 0.08025255147825751, 
                    'lgb_num_leaves': 36, 'lgb_min_child_samples': 10, 'lgb_subsample': 0.9638486108112962, 
                    'lgb_colsample_bytree': 0.8126162431458441, 'lgb_reg_alpha': 4.6796458896222854e-06, 
                    'lgb_reg_lambda': 4.190436671160808e-07
                    }

print("Hyperparameters loaded for all 4 models.")

Hyperparameters loaded for all 4 models.


## 4. Preliminary Functions

DeepSurv neural network with custom-defined loss, KM sample weights, and merge function for the Two-Model approach.

In [23]:
device = torch.device('cpu')

# Define neural network and loss function; same as in 05_deepsurv_cox.ipynb
class DeepSurvNet(nn.Module):
    def __init__(self, in_features, hidden_layers, dropout=0.1, activation='relu'):
        super().__init__()
        layers = []
        prev_size = in_features
        act_fn = nn.ReLU() if activation == 'relu' else nn.SELU()
        for hidden_size in hidden_layers:
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(act_fn)
            layers.append(nn.Dropout(dropout))
            prev_size = hidden_size
        layers.append(nn.Linear(prev_size, 1))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

def cox_ph_loss(risk_pred, time, event):
    """Cox partial likelihood loss."""
    # sort by survival time (descending)
    sorted_indices = torch.argsort(time, descending=True) 
    sorted_risk = risk_pred[sorted_indices]
    sorted_event = event[sorted_indices]

    # compute risk set denominators log(sum of exp)
    log_risk = torch.logcumsumexp(sorted_risk, dim=0)

    # partial likelihood contribution 
    uncensored_likelihood = sorted_risk - log_risk

    # mask out censored patients
    censored_likelihood = uncensored_likelihood * sorted_event

    # negative log partial likelihood
    return -torch.sum(censored_likelihood)

# Same functions as in 06_two_model_approach.ipynb
# Sample weight computation
def compute_sample_weights(times, events):
    """
    Events (deaths): weight = 1.0
    Censored: weight = F(t) / F_max (KM cumulative density)
    """

    # Fit Kaplan_Meier curve
    kmf_event = KaplanMeierFitter()
    kmf_event.fit(times, event_observed=events)
    
    # Get maximum cdf
    t_max = times.max() # maximum observed time
    F_max = kmf_event.cumulative_density_at_times([t_max]).values[0] #maximum cdf value
    F_max = max(F_max, 0.01) # clipping to avoid division by zero

    # Assign weights to samples
    weights = np.zeros(len(times))
    for i in range(len(times)):
        if events[i] == 1:
            weights[i] = 1.0
        else:
            F_t = kmf_event.cumulative_density_at_times([times[i]]).values[0]
            weights[i] = F_t / F_max
            
    # normalize so average weight = 1.0
    weights = weights / weights.mean()
    return weights

# Merge function: combine classifier and regressor predictions
def merge_predictions(clf_pred, reg_pred, time_min, time_max):
    # normalized predicted times then clip to [0,1]
    pred_time_norm = (reg_pred - time_min) / (time_max - time_min + 1e-8)
    pred_time_norm = np.clip(pred_time_norm, 0, 1) 

    # compute odds of predicted population deaths
    avg_pred_event = np.mean(clf_pred)
    odds = avg_pred_event / (1 - avg_pred_event + 1e-8)
    odds = np.clip(odds, 0.1, 10)

    # compute score
    risk = clf_pred * (1 + odds * (1 - pred_time_norm))
    return risk

print("Model definitions loaded: DeepSurvNet, cox_ph_loss, compute_sample_weights, merge_predictions")

Model definitions loaded: DeepSurvNet, cox_ph_loss, compute_sample_weights, merge_predictions


## 5. Generate OOF Predictions

Train all 4 models in 5-fold CV, collecting out-of-fold predictions for each.

In [25]:
n_splits = 5
seed = 42
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)

oof_xgb_aft = np.zeros(n_samples)
oof_coxph = np.zeros(n_samples)
oof_deepsurv = np.zeros(n_samples)
oof_twomodel = np.zeros(n_samples)

print("Running 5-fold CV for all 4 models...\n")

for fold_idx, (train_idx, val_idx) in enumerate(skf.split(X_83_unfixed, strat_var)):
    print(f"--- Fold {fold_idx+1}/{n_splits} ---")

    # Model 1: XGB AFT 83 unfixed
    print("  XGB AFT...", end=" ")
    X_tr = X_83_unfixed.iloc[train_idx].values
    X_val = X_83_unfixed.iloc[val_idx].values
    dtrain = xgb.DMatrix(X_tr)
    dtrain.set_float_info('label_lower_bound', y_lower[train_idx])
    dtrain.set_float_info('label_upper_bound', y_upper[train_idx])
    dval = xgb.DMatrix(X_val)
    params = {
        'objective': 'survival:aft', 'eval_metric': 'aft-nloglik',
        'aft_loss_distribution': xgb_aft_dist, 'aft_loss_distribution_scale': 1.0,
        'tree_method': 'hist', 'seed': seed + fold_idx,
        **{k: v for k, v in xgb_aft_params.items() if k != 'n_estimators'},
    }
    model = xgb.train(params, dtrain, num_boost_round=xgb_aft_params['n_estimators'], verbose_eval=False)
    oof_xgb_aft[val_idx] = -model.predict(dval)
    print("done")

    # Model 2: CoxPH 128 fixed (elastic net)
    print("  CoxPH...", end=" ")
    X_tr_128 = X_128_fixed.iloc[train_idx].values
    X_val_128 = X_128_fixed.iloc[val_idx].values
    scaler = StandardScaler()
    X_tr_128_scaled = scaler.fit_transform(X_tr_128)
    X_val_128_scaled = scaler.transform(X_val_128)
    y_surv_tr = Surv.from_arrays(event=y_event[train_idx], time=y_time[train_idx])
    coxph = CoxnetSurvivalAnalysis(l1_ratio=coxph_l1_ratio, alphas=[coxph_alpha])
    coxph.fit(X_tr_128_scaled, y_surv_tr)
    oof_coxph[val_idx] = coxph.predict(X_val_128_scaled)
    print("done")

    # Model 3: DeepSurv 83 fixed
    print("  DeepSurv...", end=" ")
    X_tr_ds = X_83_fixed_scaled.iloc[train_idx].values
    X_val_ds = X_83_fixed_scaled.iloc[val_idx].values
    torch.manual_seed(seed + fold_idx)
    np.random.seed(seed + fold_idx)
    ds_model = DeepSurvNet(X_tr_ds.shape[1], deepsurv_params['hidden_layers'],
                           deepsurv_params['dropout'], deepsurv_params['activation']).to(device)
    optimizer = torch.optim.Adam(ds_model.parameters(), lr=deepsurv_params['lr'], weight_decay=1e-4)
    X_tensor = torch.FloatTensor(X_tr_ds).to(device)
    time_tensor = torch.FloatTensor(y_time[train_idx]).to(device)
    event_tensor = torch.FloatTensor(y_event[train_idx].astype(float)).to(device)
    batch_size = deepsurv_params['batch_size']
    n_train = len(X_tr_ds)
    for epoch in range(deepsurv_params['epochs']):
        ds_model.train()
        indices = np.random.permutation(n_train)
        for start in range(0, n_train, batch_size):
            end = min(start + batch_size, n_train)
            batch_idx = indices[start:end]
            if len(batch_idx) < 10:
                continue
            optimizer.zero_grad()
            risk_pred = ds_model(X_tensor[batch_idx]).squeeze()
            loss = cox_ph_loss(risk_pred, time_tensor[batch_idx], event_tensor[batch_idx])
            loss.backward()
            optimizer.step()
    ds_model.eval()
    with torch.no_grad():
        oof_deepsurv[val_idx] = ds_model(torch.FloatTensor(X_val_ds).to(device)).squeeze().cpu().numpy()
    print("done")

    # Model 4: CatBoost CLF + LightGBM REG (4th place sample weights)
    print("  CatBoost CLF + LGB REG...", end=" ")
    X_tr_tm = X_128_fixed_scaled.iloc[train_idx].values
    X_val_tm = X_128_fixed_scaled.iloc[val_idx].values
    y_time_tr = y_time[train_idx]
    y_event_tr = y_event[train_idx].astype(int)
    clf_weights = compute_sample_weights(y_time_tr, y_event_tr)
    cat_clf = CatBoostClassifier(
        depth=int(two_model_params.get('cat_depth', 5)),
        iterations=int(two_model_params.get('cat_iterations', 262)),
        learning_rate=two_model_params.get('cat_learning_rate', 0.019227204273246305),
        l2_leaf_reg=two_model_params.get('cat_l2_leaf_reg', 0.12876998314647772),
        random_seed=seed + fold_idx, verbose=False, allow_writing_files=False,
    )
    cat_clf.fit(X_tr_tm, y_event_tr, sample_weight=clf_weights)
    clf_pred = cat_clf.predict_proba(X_val_tm)[:, 1]
    event_mask = y_event_tr == 1
    lgb_reg = lgb.LGBMRegressor(
        max_depth=int(two_model_params.get('lgb_max_depth', 9)),
        n_estimators=int(two_model_params.get('lgb_n_estimators', 149)),
        learning_rate=two_model_params.get('lgb_learning_rate', 0.08025255147825751),
        num_leaves=int(two_model_params.get('lgb_num_leaves', 36)),
        min_child_samples=int(two_model_params.get('lgb_min_child_samples', 10)),
        subsample=two_model_params.get('lgb_subsample', 0.9638486108112962),
        colsample_bytree=two_model_params.get('lgb_colsample_bytree', 0.8126162431458441),
        reg_alpha=two_model_params.get('lgb_reg_alpha', 4.6796458896222854e-06),
        reg_lambda=two_model_params.get('lgb_reg_lambda', 4.190436671160808e-07),
        random_state=seed + fold_idx, verbosity=-1,
    )
    lgb_reg.fit(X_tr_tm[event_mask], y_time_tr[event_mask])
    reg_pred = lgb_reg.predict(X_val_tm)
    oof_twomodel[val_idx] = merge_predictions(clf_pred, reg_pred, y_time_tr.min(), y_time_tr.max())
    print("done")

print("\nOOF predictions generated for all 4 models!")

Running 5-fold CV for all 4 models...

--- Fold 1/5 ---
  XGB AFT... done
  CoxPH... done
  DeepSurv... done
  CatBoost CLF + LGB REG... done
--- Fold 2/5 ---
  XGB AFT... done
  CoxPH... done
  DeepSurv... done
  CatBoost CLF + LGB REG... done
--- Fold 3/5 ---
  XGB AFT... done
  CoxPH... done
  DeepSurv... done
  CatBoost CLF + LGB REG... done
--- Fold 4/5 ---
  XGB AFT... done
  CoxPH... done
  DeepSurv... done
  CatBoost CLF + LGB REG... done
--- Fold 5/5 ---
  XGB AFT... done
  CoxPH... done
  DeepSurv... done
  CatBoost CLF + LGB REG... done

OOF predictions generated for all 4 models!


## 6. Normalize and Evaluate Individual Models

Z-score normalization ensures all models contribute on the same scale.

In [26]:
# Z-score normalize OOF predictions
oof_xgb_aft_norm = (oof_xgb_aft - oof_xgb_aft.mean()) / (oof_xgb_aft.std() + 1e-8)
oof_coxph_norm = (oof_coxph - oof_coxph.mean()) / (oof_coxph.std() + 1e-8)
oof_deepsurv_norm = (oof_deepsurv - oof_deepsurv.mean()) / (oof_deepsurv.std() + 1e-8)
oof_twomodel_norm = (oof_twomodel - oof_twomodel.mean()) / (oof_twomodel.std() + 1e-8)

# Evaluate individual models
print("Individual Model Performance (OOF, competition metric):")
print(f"{'Model':<30} {'Overall':>8} {'Test-like':>10} {'High-risk':>10} {'Weighted':>9}")
print("-" * 70)

for name, preds in [('XGB AFT 83 unfixed', oof_xgb_aft_norm),
                     ('CoxPH 128 fixed', oof_coxph_norm),
                     ('DeepSurv 83 fixed', oof_deepsurv_norm),
                     ('CatBoost CLF + LGB REG', oof_twomodel_norm)]:
    r = weighted_cindex_ipcw(preds, y_surv, risk_groups)
    print(f"{name:<30} {r['overall']:>8.4f} {r['test_like']:>10.4f} {r['high_risk']:>10.4f} {r['weighted']:>9.4f}")

Individual Model Performance (OOF, competition metric):
Model                           Overall  Test-like  High-risk  Weighted
----------------------------------------------------------------------
XGB AFT 83 unfixed               0.7214     0.6967     0.6709    0.6964
CoxPH 128 fixed                  0.7175     0.6931     0.6607    0.6907
DeepSurv 83 fixed                0.7169     0.6918     0.6614    0.6902
CatBoost CLF + LGB REG           0.7184     0.6937     0.6608    0.6912


## 7. Weight Grid Search

Search over all valid 4-weight combinations (step=0.05, 1,771 total) to find the optimal ensemble.

In [31]:
grid_step = 0.05
grid_values = np.arange(0, 1 + grid_step/2, grid_step)

# Generate all valid weight combinations (sum = 1)
valid_combinations = []
for w1 in grid_values:
    for w2 in grid_values:
        for w3 in grid_values:
            w4 = 1 - w1 - w2 - w3
            if w4 >= -1e-9 and w4 <= 1 + 1e-9:
                valid_combinations.append((w1, w2, w3, max(0, min(1, w4))))

print(f"Grid step: {grid_step}")
print(f"Total valid weight combinations: {len(valid_combinations)}")


best_weighted = 0
best_weights = None
best_results = None
all_results = []

print("Searching...")
for w1, w2, w3, w4 in valid_combinations:
    ensemble_pred = (w1 * oof_xgb_aft_norm + w2 * oof_coxph_norm +
                     w3 * oof_deepsurv_norm + w4 * oof_twomodel_norm)
    results = weighted_cindex_ipcw(ensemble_pred, y_surv, risk_groups)
    all_results.append({
        'w_xgb_aft': w1, 'w_coxph': w2, 'w_deepsurv': w3, 'w_twomodel': w4,
        'overall': results['overall'], 'test_like': results['test_like'],
        'high_risk': results['high_risk'], 'weighted': results['weighted'],
    })
    if results['weighted'] > best_weighted:
        best_weighted = results['weighted']
        best_weights = (w1, w2, w3, w4)
        best_results = results

print(f"\nBest weights: XGB={best_weights[0]:.2f}, Cox={best_weights[1]:.2f}, "
      f"DS={best_weights[2]:.2f}, TM={best_weights[3]:.2f}")
print(f"Best weighted C-index: {best_results['weighted']:.4f}")

# Top 10 combinations
results_df = pd.DataFrame(all_results).sort_values('weighted', ascending=False)
print(f"\nTop 10 Weight Combinations:")
print(f"{'XGB':>6} {'Cox':>6} {'DS':>6} {'TM':>6} | {'Weighted':>8}")
print("-" * 42)
for _, row in results_df.head(10).iterrows():
    print(f"{row['w_xgb_aft']:6.2f} {row['w_coxph']:6.2f} {row['w_deepsurv']:6.2f} "
          f"{row['w_twomodel']:6.2f} | {row['weighted']:8.4f}")

# Comparison with best single model
single_best = weighted_cindex_ipcw(oof_xgb_aft_norm, y_surv, risk_groups)
print(f"\nImprovement over best single model: {best_results['weighted'] - single_best['weighted']:+.4f}")

Grid step: 0.05
Total valid weight combinations: 1771
Searching...

Best weights: XGB=0.60, Cox=0.00, DS=0.20, TM=0.20
Best weighted C-index: 0.6999

Top 10 Weight Combinations:
   XGB    Cox     DS     TM | Weighted
------------------------------------------
  0.60   0.00   0.20   0.20 |   0.6999
  0.55   0.05   0.15   0.25 |   0.6998
  0.55   0.00   0.20   0.25 |   0.6998
  0.55   0.05   0.20   0.20 |   0.6998
  0.65   0.00   0.20   0.15 |   0.6998
  0.60   0.05   0.15   0.20 |   0.6998
  0.50   0.05   0.15   0.30 |   0.6998
  0.55   0.00   0.25   0.20 |   0.6998
  0.50   0.00   0.25   0.25 |   0.6998
  0.50   0.05   0.20   0.25 |   0.6997

Improvement over best single model: +0.0035


## 8. Train Final Models and Generate Test Predictions

In [36]:
# Load test data
print("Loading test data...")
X_test_83_unfixed = pd.read_csv(f'{TRAIN_PATH}/X_test_83features_with_id.csv')
X_test_83_fixed_scaled = pd.read_csv(f'{TRAIN_PATH}/X_test_83features_with_id_fixed_scaled.csv')
X_test_128_fixed_scaled = pd.read_csv(f'{TRAIN_PATH}/X_test_128features_with_id_fixed_scaled.csv')

# Extract IDs and remove from features
test_ids = X_test_83_unfixed['ID'].values
X_test_83_unfixed = X_test_83_unfixed.drop(columns=['ID'])
X_test_83_fixed_scaled = X_test_83_fixed_scaled.drop(columns=['ID'])
X_test_128_fixed_scaled = X_test_128_fixed_scaled.drop(columns=['ID'])

print(f"Test samples: {len(test_ids)}")

# Train final models on full training data
print("\nTraining final models on full training data...")

# Model 1: XGB AFT 83 unfixed
print("  Training XGB AFT...", end=" ")
X_train_full = X_83_unfixed.values
dtrain_full = xgb.DMatrix(X_train_full)
dtrain_full.set_float_info('label_lower_bound', y_lower)
dtrain_full.set_float_info('label_upper_bound', y_upper)
dtest = xgb.DMatrix(X_test_83_unfixed.values)
params_xgb = {
    'objective': 'survival:aft', 'eval_metric': 'aft-nloglik',
    'aft_loss_distribution': xgb_aft_dist, 'aft_loss_distribution_scale': 1.0,
    'tree_method': 'hist', 'seed': seed,
    **{k: v for k, v in xgb_aft_params.items() if k != 'n_estimators'},
}
model_xgb = xgb.train(params_xgb, dtrain_full, num_boost_round=xgb_aft_params['n_estimators'], verbose_eval=False)
test_pred_xgb_aft = -model_xgb.predict(dtest)
print("done")

# Model 2: CoxPH 128 fixed
print("  Training CoxPH...", end=" ")
X_train_128_full = X_128_fixed.values
scaler_cox = StandardScaler()
X_train_128_scaled = scaler_cox.fit_transform(X_train_128_full)
X_test_128_scaled = scaler_cox.transform(X_test_128_fixed_scaled.values)
y_surv_full = Surv.from_arrays(event=y_event, time=y_time)
coxph_final = CoxnetSurvivalAnalysis(l1_ratio=coxph_l1_ratio, alphas=[coxph_alpha])
coxph_final.fit(X_train_128_scaled, y_surv_full)
test_pred_coxph = coxph_final.predict(X_test_128_scaled)
print("done")

# Model 3: DeepSurv 83 fixed
print("  Training DeepSurv...", end=" ")
X_train_ds_full = X_83_fixed_scaled.values
X_test_ds = X_test_83_fixed_scaled.values
torch.manual_seed(seed)
np.random.seed(seed)
ds_final = DeepSurvNet(X_train_ds_full.shape[1], deepsurv_params['hidden_layers'],
                       deepsurv_params['dropout'], deepsurv_params['activation']).to(device)
optimizer_ds = torch.optim.Adam(ds_final.parameters(), lr=deepsurv_params['lr'], weight_decay=1e-4)
X_tensor_full = torch.FloatTensor(X_train_ds_full).to(device)
time_tensor_full = torch.FloatTensor(y_time).to(device)
event_tensor_full = torch.FloatTensor(y_event.astype(float)).to(device)
batch_size_ds = deepsurv_params['batch_size']
n_train_full = len(X_train_ds_full)
for epoch in range(deepsurv_params['epochs']):
    ds_final.train()
    indices = np.random.permutation(n_train_full)
    for start in range(0, n_train_full, batch_size_ds):
        end = min(start + batch_size_ds, n_train_full)
        batch_idx = indices[start:end]
        if len(batch_idx) < 10:
            continue
        optimizer_ds.zero_grad()
        risk_pred = ds_final(X_tensor_full[batch_idx]).squeeze()
        loss = cox_ph_loss(risk_pred, time_tensor_full[batch_idx], event_tensor_full[batch_idx])
        loss.backward()
        optimizer_ds.step()
ds_final.eval()
with torch.no_grad():
    test_pred_deepsurv = ds_final(torch.FloatTensor(X_test_ds).to(device)).squeeze().cpu().numpy()
print("done")

# Model 4: CatBoost CLF + LightGBM REG
print("  Training CatBoost CLF + LGB REG...", end=" ")
X_train_tm_full = X_128_fixed_scaled.values
X_test_tm = X_test_128_fixed_scaled.values
clf_weights_full = compute_sample_weights(y_time, y_event.astype(int))
cat_clf_final = CatBoostClassifier(
    depth=int(two_model_params.get('cat_depth', 5)),
    iterations=int(two_model_params.get('cat_iterations', 262)),
    learning_rate=two_model_params.get('cat_learning_rate', 0.019227204273246305),
    l2_leaf_reg=two_model_params.get('cat_l2_leaf_reg', 0.12876998314647772),
    random_seed=seed, verbose=False, allow_writing_files=False,
)
cat_clf_final.fit(X_train_tm_full, y_event.astype(int), sample_weight=clf_weights_full)
test_clf_pred = cat_clf_final.predict_proba(X_test_tm)[:, 1]
event_mask_full = y_event == 1
lgb_reg_final = lgb.LGBMRegressor(
    max_depth=int(two_model_params.get('lgb_max_depth', 9)),
    n_estimators=int(two_model_params.get('lgb_n_estimators', 149)),
    learning_rate=two_model_params.get('lgb_learning_rate', 0.08025255147825751),
    num_leaves=int(two_model_params.get('lgb_num_leaves', 36)),
    min_child_samples=int(two_model_params.get('lgb_min_child_samples', 10)),
    subsample=two_model_params.get('lgb_subsample', 0.9638486108112962),
    colsample_bytree=two_model_params.get('lgb_colsample_bytree', 0.8126162431458441),
    reg_alpha=two_model_params.get('lgb_reg_alpha', 4.6796458896222854e-06),
    reg_lambda=two_model_params.get('lgb_reg_lambda', 4.190436671160808e-07),
    random_state=seed, verbosity=-1,
)
lgb_reg_final.fit(X_train_tm_full[event_mask_full], y_time[event_mask_full])
test_reg_pred = lgb_reg_final.predict(X_test_tm)
test_pred_twomodel = merge_predictions(test_clf_pred, test_reg_pred, y_time.min(), y_time.max())
print("done")

# Z-score normalize test predictions
print("\nNormalizing test predictions...")
test_pred_xgb_aft_norm = (test_pred_xgb_aft - test_pred_xgb_aft.mean()) / (test_pred_xgb_aft.std() + 1e-8)
test_pred_coxph_norm = (test_pred_coxph - test_pred_coxph.mean()) / (test_pred_coxph.std() + 1e-8)
test_pred_deepsurv_norm = (test_pred_deepsurv - test_pred_deepsurv.mean()) / (test_pred_deepsurv.std() + 1e-8)
test_pred_twomodel_norm = (test_pred_twomodel - test_pred_twomodel.mean()) / (test_pred_twomodel.std() + 1e-8)

# Combine using best weights
w_xgb, w_cox, w_ds, w_tm = 0.50, 0.05, 0.15, 0.30
ensemble_pred = (w_xgb * test_pred_xgb_aft_norm + 
                 w_cox * test_pred_coxph_norm + 
                 w_ds * test_pred_deepsurv_norm + 
                 w_tm * test_pred_twomodel_norm)

print(f"\nEnsemble weights: XGB={w_xgb:.2f}, Cox={w_cox:.2f}, DS={w_ds:.2f}, TM={w_tm:.2f}")

# Create submission file
submission = pd.DataFrame({
    'ID': test_ids,
    'risk_score': ensemble_pred
})

# Save to outputs/submissions/
import os
os.makedirs('outputs/submissions', exist_ok=True)
output_path = 'outputs/submissions/submission_ensemble_4model.csv'
submission.to_csv(output_path, index=False)

print(f"\nSubmission file created: {output_path}")
print(f"  Shape: {submission.shape}")
print(f"  Risk range: [{ensemble_pred.min():.4f}, {ensemble_pred.max():.4f}]")
print(f"\nFirst few predictions:")
print(submission.head(10))

Loading test data...
Test samples: 1193

Training final models on full training data...
  Training XGB AFT... done
  Training CoxPH... done
  Training DeepSurv... done
  Training CatBoost CLF + LGB REG... done

Normalizing test predictions...

Ensemble weights: XGB=0.50, Cox=0.05, DS=0.15, TM=0.30

Submission file created: outputs/submissions/submission_ensemble_4model.csv
  Shape: (1193, 2)
  Risk range: [-3.6665, 1.4591]

First few predictions:
      ID  risk_score
0   KYW1    0.771414
1   KYW2    0.851775
2   KYW3    0.111809
3   KYW4    0.757784
4   KYW5    0.684432
5   KYW6    0.440218
6   KYW7    0.269404
7   KYW8    0.404195
8   KYW9   -1.593878
9  KYW10   -1.093464


## Summary

### Final Ensemble Configuration

Selected weights (most even distribution at 0.6998/0.6999 level):

| Component | Weight | Individual Score |
|-----------|--------|------------------|
| XGB AFT 83 unfixed | **50%** | 0.6964 |
| CoxPH elastic net 128 fixed | 5% | 0.6907 |
| DeepSurv 83 fixed | 15% | 0.6902 |
| CatBoost CLF + LGB REG 128 fixed | **30%** | 0.6912 |
| **Ensemble** | | **0.6998** |

#### - Public leaderboard score: 0.7632

## Potentially useful next steps:
- Bagging using different seeds
- More extensive hyperparameter tuning
- PCA for feature engineering (preliminary analysis not useful but more extensive analysis may be)
- Autoencoder for compressing categorical/binary features