In [3]:
import pandas as pd
import numpy as np

# List of submission file names and their corresponding scores (used as weights)
files = [
    "submission (8) - 095333.csv",   # score: 0.95333
    "submission (9) - 0.96245.csv",    # score: 0.96245
    "submission_ensemble - 0.96245.csv", # score: 0.96245
    "submission (8) - 0.96218.csv"     # score: 0.96218
]
weights = [0.95333, 0.96245, 0.96245, 0.96218]

# Read each CSV into a list of DataFrames
dfs = [pd.read_csv(f) for f in files]

# Assume each submission file has the same 'id' order and a column named 'rainfall'
# Use the 'id' column from the first submission file as reference.
ensemble_df = dfs[0][["id"]].copy()

# -------------------------------
# Simple Average Ensemble
# -------------------------------
# Sum up the predictions from all files
simple_preds = np.zeros(len(ensemble_df))
for df in dfs:
    simple_preds += df["rainfall"].values
# Compute the mean prediction
ensemble_df["rainfall_simple"] = simple_preds / len(dfs)

# -------------------------------
# Weighted Average Ensemble
# -------------------------------
# Compute weighted predictions using the provided scores as weights
weighted_preds = np.zeros(len(ensemble_df))
for df, w in zip(dfs, weights):
    weighted_preds += df["rainfall"].values * w
ensemble_df["rainfall_weighted"] = weighted_preds / sum(weights)

# -------------------------------
# Save the Ensemble Submissions
# -------------------------------
ensemble_df[["id", "rainfall_simple"]].to_csv("submission_simple_ensemble.csv", index=False)
ensemble_df[["id", "rainfall_weighted"]].to_csv("submission_weighted_ensemble.csv", index=False)

print("Ensemble submissions saved as:")
print(" - submission_simple_ensemble.csv")
print(" - submission_weighted_ensemble.csv")

Ensemble submissions saved as:
 - submission_simple_ensemble.csv
 - submission_weighted_ensemble.csv
