# AFML Hackathon Part 1 - Team 44_XLR8
## Neural Network Weight Denoising

**Goal**: Achieve NMSE < 0.3 using ensemble and residual learning

In [None]:
# Mount Google Drive (if using Colab)
try:
    from google.colab import drive
    drive.mount('/content/drive')
    import os
    os.chdir('/content/drive/MyDrive/AFML_KAAGLE')  # Adjust path as needed
    print("✓ Running on Google Colab")
except:
    print("✓ Running locally")

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import matplotlib.pyplot as plt

np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

## Load Data

In [None]:
print("Loading data...")
train_clean = pd.read_csv('train-part1-clean.csv').values
train_noise = pd.read_csv('train-part1-noise.csv').values
test_data = pd.read_csv('test-part1.csv').values

print(f"Clean: {train_clean.shape}, Noisy: {train_noise.shape}, Test: {test_data.shape}")

## Strategy: Residual Learning
Instead of predicting clean weights, we predict the NOISE and subtract it!

In [None]:
# Calculate noise = noisy - clean
train_noise_residual = train_noise - train_clean

# Split data
X_train, X_val, y_train, y_val = train_test_split(
    train_noise, train_noise_residual, test_size=0.1, random_state=42
)

print(f"Training: {X_train.shape}, Validation: {X_val.shape}")

In [None]:
# Normalize
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_val_scaled = scaler_X.transform(X_val)
test_scaled = scaler_X.transform(test_data)

y_train_scaled = scaler_y.fit_transform(y_train)
y_val_scaled = scaler_y.transform(y_val)

# Convert to tensors
X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
y_train_tensor = torch.FloatTensor(y_train_scaled).to(device)
X_val_tensor = torch.FloatTensor(X_val_scaled).to(device)
y_val_tensor = torch.FloatTensor(y_val_scaled).to(device)
test_tensor = torch.FloatTensor(test_scaled).to(device)

## Improved Model with Residual Connections

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.fc1 = nn.Linear(dim, dim)
        self.bn1 = nn.BatchNorm1d(dim)
        self.fc2 = nn.Linear(dim, dim)
        self.bn2 = nn.BatchNorm1d(dim)
        
    def forward(self, x):
        residual = x
        out = torch.relu(self.bn1(self.fc1(x)))
        out = self.bn2(self.fc2(out))
        out += residual
        return torch.relu(out)

class ImprovedDenoiser(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
        )
        
        # Residual blocks
        self.res1 = ResidualBlock(256)
        self.res2 = ResidualBlock(256)
        self.res3 = ResidualBlock(256)
        
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, input_dim)
        )
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.res1(x)
        x = self.res2(x)
        x = self.res3(x)
        x = self.decoder(x)
        return x

input_dim = X_train_scaled.shape[1]
model = ImprovedDenoiser(input_dim).to(device)
print(f"Parameters: {sum(p.numel() for p in model.parameters())}")

## Training Setup

In [None]:
BATCH_SIZE = 256
NUM_EPOCHS = 100
LEARNING_RATE = 0.001

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS)

## Training Loop

In [None]:
train_losses = []
val_losses = []
best_val_loss = float('inf')

for epoch in range(NUM_EPOCHS):
    # Train
    model.train()
    train_loss = 0.0
    
    for batch_X, batch_y in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}", leave=False):
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        train_loss += loss.item()
    
    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    
    # Validate
    model.eval()
    val_loss = 0.0
    
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            val_loss += loss.item()
    
    val_loss /= len(val_loader)
    val_losses.append(val_loss)
    
    scheduler.step()
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pth')
        print(f"✓ Epoch {epoch+1} - Train: {train_loss:.6f}, Val: {val_loss:.6f} [SAVED]")
    elif (epoch + 1) % 10 == 0:
        print(f"  Epoch {epoch+1} - Train: {train_loss:.6f}, Val: {val_loss:.6f}")

print(f"\nBest validation loss: {best_val_loss:.6f}")

## Generate Predictions

In [None]:
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

with torch.no_grad():
    # Predict noise
    noise_pred_scaled = model(test_tensor).cpu().numpy()
    val_noise_pred_scaled = model(X_val_tensor).cpu().numpy()

# Inverse transform
noise_pred = scaler_y.inverse_transform(noise_pred_scaled)
val_noise_pred = scaler_y.inverse_transform(val_noise_pred_scaled)

# Denoise: clean = noisy - noise
predictions = test_data - noise_pred
val_predictions = X_val - val_noise_pred

# Calculate actual clean values for validation
val_clean = X_val - y_val

# Calculate NMSE
mse = np.mean((val_clean - val_predictions) ** 2)
variance = np.var(val_clean)
nmse = mse / variance

print(f"\n{'='*60}")
print(f"Validation NMSE: {nmse:.6f}")
print(f"Target: < 0.3")
print(f"{'='*60}")

## Save Submission

In [None]:
submission_df = pd.DataFrame(predictions)
submission_df.to_csv('submission.csv', index=False)
print("✓ Saved: submission.csv")
print(f"Shape: {submission_df.shape}")
submission_df.head()

## Plot Training History

In [None]:
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training History')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(train_losses[10:], label='Train Loss')
plt.plot(val_losses[10:], label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training History (after epoch 10)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

## Next Steps

1. ✅ Upload `submission.csv` to Kaggle
2. ✅ Share this notebook with TAs
3. ✅ Use `submission.csv` in Part 2