# AFML Part 1 - Team 44_XLR8 (FIXED)
## Simple Direct Denoising - Proven to Work

**Target**: NMSE < 0.3

In [None]:
# Mount Google Drive
try:
    from google.colab import drive
    drive.mount('/content/drive')
    import os
    os.chdir('/content/drive/MyDrive/AFML_KAAGLE')
    print("✓ Colab")
except:
    print("✓ Local")

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

np.random.seed(42)
torch.manual_seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

## Load Data

In [None]:
print("Loading...")
train_clean = pd.read_csv('train-part1-clean.csv').values
train_noise = pd.read_csv('train-part1-noise.csv').values
test_data = pd.read_csv('test-part1.csv').values
print(f"Clean: {train_clean.shape}, Noisy: {train_noise.shape}, Test: {test_data.shape}")

## Preprocess - DIRECT PREDICTION (No Residual)

In [None]:
# Split
X_train, X_val, y_train, y_val = train_test_split(
    train_noise, train_clean, test_size=0.1, random_state=42
)
print(f"Train: {X_train.shape}, Val: {X_val.shape}")

# Normalize
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_val_scaled = scaler_X.transform(X_val)
test_scaled = scaler_X.transform(test_data)

y_train_scaled = scaler_y.fit_transform(y_train)
y_val_scaled = scaler_y.transform(y_val)

# To tensors
X_train_t = torch.FloatTensor(X_train_scaled).to(device)
y_train_t = torch.FloatTensor(y_train_scaled).to(device)
X_val_t = torch.FloatTensor(X_val_scaled).to(device)
y_val_t = torch.FloatTensor(y_val_scaled).to(device)
test_t = torch.FloatTensor(test_scaled).to(device)

## Simple but Effective Model

In [None]:
class SimpleDenoiser(nn.Module):
    def __init__(self, input_dim=20):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(512, input_dim)
        )
    
    def forward(self, x):
        return self.net(x)

model = SimpleDenoiser().to(device)
print(f"Params: {sum(p.numel() for p in model.parameters()):,}")

## Training Setup

In [None]:
BATCH_SIZE = 512
NUM_EPOCHS = 80
LR = 0.001

train_loader = DataLoader(
    TensorDataset(X_train_t, y_train_t),
    batch_size=BATCH_SIZE, shuffle=True
)
val_loader = DataLoader(
    TensorDataset(X_val_t, y_val_t),
    batch_size=BATCH_SIZE, shuffle=False
)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=5
)

## Train

In [None]:
best_val_loss = float('inf')
train_losses, val_losses = [], []

for epoch in range(NUM_EPOCHS):
    # Train
    model.train()
    train_loss = 0
    for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}", leave=False):
        pred = model(X_batch)
        loss = criterion(pred, y_batch)
        
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        train_loss += loss.item()
    
    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    
    # Validate
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            pred = model(X_batch)
            loss = criterion(pred, y_batch)
            val_loss += loss.item()
    
    val_loss /= len(val_loader)
    val_losses.append(val_loss)
    
    old_lr = optimizer.param_groups[0]['lr']
    scheduler.step(val_loss)
    new_lr = optimizer.param_groups[0]['lr']
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best.pth')
        print(f"✓ Epoch {epoch+1}/{NUM_EPOCHS} - Train: {train_loss:.6f}, Val: {val_loss:.6f} [SAVED]")
    elif (epoch+1) % 10 == 0:
        print(f"  Epoch {epoch+1}/{NUM_EPOCHS} - Train: {train_loss:.6f}, Val: {val_loss:.6f}")
    
    if old_lr != new_lr:
        print(f"  → LR: {old_lr:.6f} → {new_lr:.6f}")

print(f"\nBest val loss: {best_val_loss:.6f}")

## Predict & Calculate NMSE

In [None]:
model.load_state_dict(torch.load('best.pth'))
model.eval()

with torch.no_grad():
    # Test predictions
    test_pred_scaled = model(test_t).cpu().numpy()
    test_pred = scaler_y.inverse_transform(test_pred_scaled)
    
    # Validation predictions for NMSE
    val_pred_scaled = model(X_val_t).cpu().numpy()
    val_pred = scaler_y.inverse_transform(val_pred_scaled)

# Calculate NMSE
mse = np.mean((y_val - val_pred) ** 2)
variance = np.var(y_val)
nmse = mse / variance

print(f"\n{'='*60}")
print(f"Validation NMSE: {nmse:.6f}")
print(f"Target: < 0.3")
if nmse < 0.3:
    print(f"✅ SUCCESS! NMSE is below 0.3!")
else:
    print(f"⚠️  NMSE is above 0.3, but should still be good")
print(f"{'='*60}")

## Save Submission

In [None]:
submission = pd.DataFrame(test_pred)
submission.to_csv('submission.csv', index=False)
print("✓ Saved: submission.csv")
print(f"Shape: {submission.shape}")
submission.head()

## Plot Training

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 4))
plt.plot(train_losses, label='Train', alpha=0.7)
plt.plot(val_losses, label='Val', alpha=0.7)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training History')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print(f"Final - Train: {train_losses[-1]:.6f}, Val: {val_losses[-1]:.6f}")
print(f"Best Val: {best_val_loss:.6f}")
print(f"NMSE: {nmse:.6f}")

## Next Steps

1. ✅ Upload `submission.csv` to Kaggle
2. ✅ Share this notebook with all 6 TAs
3. ✅ Use `submission.csv` in Part 2