In [57]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# basic imports
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline

In [58]:
!curl https://opendata.cern.ch/record/328/files/atlas-higgs-challenge-2014-v2.csv.gz -o atlas-higgs-challenge-2014-v2.csv.gz
!gunzip -f atlas-higgs-challenge-2014-v2.csv.gz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 62.5M  100 62.5M    0     0  9939k      0  0:00:06  0:00:06 --:--:-- 15.2M


In [59]:
df = pd.read_csv('atlas-higgs-challenge-2014-v2.csv')

df['Label'] = df['Label'].map({'b': 0, 's': 1})

training_mask = df['KaggleSet'] == 't'
test_mask = df['KaggleSet'] == 'b'

feature_columns = [
    'DER_mass_MMC', 'DER_mass_transverse_met_lep', 'DER_mass_vis', 'DER_pt_h',
    'DER_deltaeta_jet_jet', 'DER_mass_jet_jet', 'DER_prodeta_jet_jet',
    'DER_deltar_tau_lep', 'DER_pt_tot', 'DER_sum_pt', 'DER_pt_ratio_lep_tau',
    'DER_met_phi_centrality', 'DER_lep_eta_centrality', 'PRI_tau_pt',
    'PRI_tau_eta', 'PRI_tau_phi', 'PRI_lep_pt', 'PRI_lep_eta', 'PRI_lep_phi',
    'PRI_met', 'PRI_met_phi', 'PRI_met_sumet', 'PRI_jet_num', 'PRI_jet_leading_pt',
    'PRI_jet_leading_eta', 'PRI_jet_leading_phi', 'PRI_jet_subleading_pt',
    'PRI_jet_subleading_eta', 'PRI_jet_subleading_phi', 'PRI_jet_all_pt',
    # 'Weight'
]

X_train = torch.FloatTensor(df[training_mask][feature_columns].values)
y_train = torch.FloatTensor(df[training_mask]['Label'].values)
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

X_test = torch.FloatTensor(df[test_mask][feature_columns].values)
y_test = torch.FloatTensor(df[test_mask]['Label'].values)
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [60]:
# Function to create a model based on the given architecture
def create_model(architecture, input_dim=30):
    layers = []
    for units in architecture:
        layers.append(nn.Linear(input_dim, units))
        layers.append(nn.Sigmoid())
        input_dim = units
    layers.append(nn.Linear(input_dim, 1))  # Output layer
    layers.append(nn.Sigmoid())  # Ensure output is in [0,1]
    return nn.Sequential(*layers)

In [61]:
# Define the ensemble architectures
architectures = [
    [50, 1],
    [50, 25, 1],
    [50, 50, 25, 1]
]

# Create 108 models in total (36 models for each architecture)
ensemble_models = [create_model(arch) for arch in architectures for _ in range(36)]

In [62]:
# Function to train a single model
def train_model(model, train_loader, criterion, optimizer, device, num_epochs=50):
    model.to(device)
    model.train()
    for epoch in range(num_epochs):
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            # Squeeze the outputs to match the shape of targets
            outputs = outputs.squeeze()  # Shape: [batch_size, 1] -> [batch_size]
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

In [63]:
# Function to make predictions using the ensemble
def ensemble_predict(models, data_loader, device):
    with torch.no_grad():
        predictions = torch.zeros(len(models), len(data_loader.dataset))
        for i, model in enumerate(models):
            model.to(device)
            model.eval()
            for j, (inputs, _) in enumerate(data_loader):
                inputs = inputs.to(device)
                outputs = model(inputs).squeeze()  # Squeeze the outputs
                predictions[i, j * inputs.size(0):(j + 1) * inputs.size(0)] = outputs.cpu()
        return predictions.mean(dim=0)

In [64]:
# Define loss function and optimizers
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizers = [optim.Adam(model.parameters(), lr=0.001) for model in ensemble_models]

In [65]:
if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = 'mps'
else:
    device = 'cpu'

In [None]:
for model, optimizer in zip(ensemble_models, optimizers):
    train_model(model, train_loader, criterion, optimizer, device=device, num_epochs=50)

In [None]:
predictions = ensemble_predict(ensemble_models, test_loader, device=device)
print("Ensemble predictions:", predictions)