## Finding Core Experiments via Sequential Feature Selection

We found that Robert's experiments imply very similar rankings of models, meaning that not all of them are necessary to get a good estimate of human-machine EC.

In fact, one should probably invest trials differently: Rather than measuring more corruptions, one should do more trials per condition, to get stable values.

Here, we compile a dataframe of n_samples x n_experiments, where n_experiments is 17, one column for each of Robert's experiments. The n_samples are bootstraps, with all models thrown into one bucket, i.e bootstrap i of model j is a sample, for all values of i and j. We scatter all of these points into a 17-dimensional space and want to find those dimensions that best reconstruct the full thing.

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# load our bootstrap data
standard_df = pd.read_parquet(
    "data/model_wise_bootstrapped_ecs_standard_1000.parquet",
    engine="pyarrow",
)

# Robert takes the average EC by first averaging within each experiment, then averaging across them.
# (how you average within each experiment doesn't matter, because first conditions then humans = first humans then conditions = all at once)

# take the average within each of the 17 experiments, like Robert does
exp_mean_df = standard_df.groupby(
    ["bootstrap_id", "experiment", "model"], observed=True, as_index=False
).mean(numeric_only=True)

display(exp_mean_df)

# unfortunately we have a few NaNs
nan_df = exp_mean_df[exp_mean_df["model-human-ec"].isna()]
display(nan_df)
print(nan_df["model"].unique())

nan_combinations = exp_mean_df[exp_mean_df["model-human-ec"].isna()][
    ["bootstrap_id", "model"]
].drop_duplicates()

df_filtered = exp_mean_df.merge(
    nan_combinations, on=["bootstrap_id", "model"], how="left", indicator=True
)
exp_mean_df_no_nan = df_filtered[df_filtered["_merge"] == "left_only"].drop(
    columns=["_merge"]
)

display(exp_mean_df_no_nan)

In [None]:
# transform it to clean numpy array representation
pivoted = exp_mean_df_no_nan.pivot(
    columns=["experiment"], index=["bootstrap_id", "model"], values="model-human-ec"
).reset_index()
display(pivoted)

trimmed = pivoted.drop(columns=["bootstrap_id", "model"])
datasets = trimmed.columns
idx_to_dataset = {i: ds for i, ds in enumerate(datasets)}
data = trimmed.to_numpy(dtype=float)
print(data.shape)

In [None]:
# learn Sequential Feature Selector
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, explained_variance_score
from sklearn.preprocessing import StandardScaler

# --- Conduct a train-test-split
X_train, X_test = train_test_split(data, test_size=0.2, random_state=42)

# --- Scale data to zero-mean and unit std
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# --- Estimator: train on subset of features, predict all 17 ---
# Multi-output regression: input = subset of features, output = full vector
estimator = LinearRegression()

mses = []  # will contain the MSEs for plotting the drop
exp_vars = []  # will contain the explained variances of each run
ordered_features = []  # will contain the newly selected feature for every run

features_last_it = []
for n_features in range(1, X_train_scaled.shape[1]):

    # --- Feature selector ---
    sfs = SequentialFeatureSelector(
        estimator,
        n_features_to_select=n_features,
        direction="forward",
        scoring="neg_mean_squared_error",  # We want to minimize reconstruction error
        cv=3,  # Use cross-validation to avoid overfitting
        n_jobs=-1,
    )

    # --- Fit selector ---
    sfs.fit(X_train_scaled, X_train_scaled)  # literally regressing X -> X

    # --- Selected features ---
    selected_features = sfs.get_support(indices=True)
    new_feature = [f for f in selected_features if f not in features_last_it][0]
    features_last_it = selected_features.copy()
    ordered_features.append(new_feature)
    print(f"Selected feature indices: {selected_features}, new feature: {new_feature}")

    # --- Evaluate reconstruction on test set ---
    X_train_reduced = X_train_scaled[:, selected_features]
    X_test_reduced = X_test_scaled[:, selected_features]

    # Refit on full training data
    final_model = LinearRegression().fit(X_train_reduced, X_train_scaled)
    X_test_reconstructed = final_model.predict(X_test_reduced)

    # --- Compute reconstruction error ---
    mse = mean_squared_error(X_test_scaled, X_test_reconstructed)
    print(f"Test reconstruction MSE: {mse:.4f}")
    mses.append(mse)

    # --- Express this in terms of explained variance ---
    score = explained_variance_score(
        X_test_scaled, X_test_reconstructed, multioutput="variance_weighted"
    )
    print(f"Test reconstruction Explained Variance: {score:.4f}")
    exp_vars.append(score)

In [None]:
df = pd.DataFrame(
    {
        "MSE": mses,
        "Explained Variance": exp_vars,
        "feature_idx": ordered_features,
        "experiment": [idx_to_dataset[idx] for idx in ordered_features],
        "n_features": np.arange(1, len(mses) + 1),
    }
)
display(df)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 5))
plt.grid(axis="y")
sns.lineplot(data=df, x="n_features", y="MSE", ax=ax)
sns.despine()
ax.set_xlim(1, 16)
ax.set_ylim(0, df["MSE"].max())
ax.set_xlabel("Selected Experiments")
ax.set_ylabel("Reconstruction MSE")
ax.set_xticks(ticks=np.arange(1, len(df) + 1), labels=df["experiment"].tolist())
ax.tick_params(axis="x", labelrotation=90)
plt.tight_layout()
plt.savefig("figures/sfs_result_mse.pdf", bbox_inches="tight")
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 5))
plt.grid(axis="y")
sns.lineplot(data=df, x="n_features", y="Explained Variance", ax=ax)
sns.despine()
ax.set_xlim(1, 16)
ax.set_ylim(0, 1)
ax.set_xlabel("Selected Experiments")
ax.set_ylabel("Explained Variance")
ax.set_xticks(ticks=np.arange(1, len(df) + 1), labels=df["experiment"].tolist())
ax.tick_params(axis="x", labelrotation=90)
plt.tight_layout()
plt.savefig("figures/sfs_result_expvar.pdf", bbox_inches="tight")
plt.show()