In [None]:
# -*- coding: utf-8 -*-
"""pytorch_multiclass.ipynb"""

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# 1) Load data
df = pd.read_csv('/content/train.csv')

# 2) Drop unused columns
df = df.drop(columns=['id','CustomerId','Surname'])

# 3) Suppose 'Exited' is now a multi‐class label 0…K-1
y = df['Exited'].values
X = df.drop(columns='Exited')

# 4) Encode categoricals
le_geo = LabelEncoder().fit(X['Geography'])
le_gen = LabelEncoder().fit(X['Gender'])
X['Geography'] = le_geo.transform(X['Geography'])
X['Gender']    = le_gen.transform(X['Gender'])

# 5) Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X.values, y, test_size=0.2, random_state=42, stratify=y
)

# 6) Standardize
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test  = scaler.transform(X_test)

# 7) Build PyTorch datasets
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

X_train_t = torch.from_numpy(X_train).float().to(device)
y_train_t = torch.from_numpy(y_train).long().to(device)       # note .long()
X_test_t  = torch.from_numpy(X_test).float().to(device)
y_test_t  = torch.from_numpy(y_test).long().to(device)

train_ds = TensorDataset(X_train_t, y_train_t)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)

# 8) Determine number of classes
num_classes = len(np.unique(y_train))

# 9) Define MLP
class MLP(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 64), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(64, 32),    nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(32, out_dim)
        )
    def forward(self, x):
        return self.net(x)

model = MLP(X_train.shape[1], num_classes).to(device)
criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(model.parameters(), lr=1e-3)
optimizer = optim.SGD(model.parameters(), lr=1e-3)


# 10) Training loop
epochs = 200
for epoch in range(1, epochs+1):
    model.train()
    running_loss = 0.0
    for xb, yb in train_loader:
        optimizer.zero_grad()
        logits = model(xb)                   # shape [B, num_classes]
        loss   = criterion(logits, yb)       # expects yb long [B]
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * xb.size(0)
    avg_loss = running_loss / len(train_ds)
    print(f"Epoch {epoch}/{epochs}  Loss: {avg_loss:.4f}")

# 11) Evaluate on test set
model.eval()
with torch.no_grad():
    logits = model(X_test_t)
    preds = logits.argmax(dim=1).cpu().numpy()
    y_true = y_test

print("\nClassification Report on Test Set:")
print(classification_report(y_true, preds, digits=4))

# 12) Inference on unseen test.csv
test_df = pd.read_csv('/content/test.csv')
ids = test_df['id']
test_df = test_df.drop(columns=['CustomerId','Surname'])
test_df['Geography'] = le_geo.transform(test_df['Geography'])
test_df['Gender']    = le_gen.transform(test_df['Gender'])
X_sub = scaler.transform(test_df.drop(columns='id').values)
X_sub_t = torch.from_numpy(X_sub).float().to(device)

model.eval()
with torch.no_grad():
    logits = model(X_sub_t)
    preds  = logits.argmax(dim=1).cpu().numpy()

submission = pd.DataFrame({'id': ids, 'Exited': preds})
submission.to_csv('prediction.csv', index=False)
print("Saved prediction.csv")

Epoch 1/200  Loss: 0.6744
Epoch 2/200  Loss: 0.6251
Epoch 3/200  Loss: 0.5880
Epoch 4/200  Loss: 0.5620
Epoch 5/200  Loss: 0.5417
Epoch 6/200  Loss: 0.5265
Epoch 7/200  Loss: 0.5162
Epoch 8/200  Loss: 0.5069
Epoch 9/200  Loss: 0.5024
Epoch 10/200  Loss: 0.4945
Epoch 11/200  Loss: 0.4909
Epoch 12/200  Loss: 0.4885
Epoch 13/200  Loss: 0.4822
Epoch 14/200  Loss: 0.4797
Epoch 15/200  Loss: 0.4740
Epoch 16/200  Loss: 0.4732
Epoch 17/200  Loss: 0.4690
Epoch 18/200  Loss: 0.4670
Epoch 19/200  Loss: 0.4616
Epoch 20/200  Loss: 0.4590
Epoch 21/200  Loss: 0.4574
Epoch 22/200  Loss: 0.4526
Epoch 23/200  Loss: 0.4495
Epoch 24/200  Loss: 0.4475
Epoch 25/200  Loss: 0.4483
Epoch 26/200  Loss: 0.4436
Epoch 27/200  Loss: 0.4439
Epoch 28/200  Loss: 0.4400
Epoch 29/200  Loss: 0.4352
Epoch 30/200  Loss: 0.4351
Epoch 31/200  Loss: 0.4327
Epoch 32/200  Loss: 0.4297
Epoch 33/200  Loss: 0.4269
Epoch 34/200  Loss: 0.4233
Epoch 35/200  Loss: 0.4214
Epoch 36/200  Loss: 0.4202
Epoch 37/200  Loss: 0.4197
Epoch 38/2