In [6]:
# ==========================================
# 🚀 MLPRegressor (Neural Network) for Regression
# ==========================================
import pandas as pd
import numpy as np   # ✅ needed for sqrt
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# 1. Load data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
sample_submission = pd.read_csv("sample_submission.csv")

# 2. Features & target
target_col = "target"   # change if your target column name is different
id_col = "id"           # change if ID column is different

X = train.drop(columns=[target_col])
y = train[target_col]

# Drop ID column from test before scaling
if id_col in test.columns:
    test = test.drop(columns=[id_col])

# 3. Scale features (MLP needs normalized input)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
test_scaled = scaler.transform(test)

# 4. Train/Validation split
X_train, X_valid, y_train, y_valid = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# 5. Define MLPRegressor
mlp = MLPRegressor(
    hidden_layer_sizes=(256, 128, 64),   # 3 hidden layers
    activation="relu",
    solver="adam",
    learning_rate_init=0.001,
    max_iter=500,
    random_state=42,
    early_stopping=True,  # ✅ built-in early stopping
    n_iter_no_change=20,
    verbose=True
)

# 6. Train
mlp.fit(X_train, y_train)

# 7. Validation check
valid_pred = mlp.predict(X_valid)
mse = mean_squared_error(y_valid, valid_pred)
rmse = np.sqrt(mse)   # ✅ manual RMSE
print("✅ Validation RMSE:", rmse)

# 8. Predict on test set
test_pred = mlp.predict(test_scaled)

# 9. Save submission
submission = sample_submission.copy()
submission[submission.columns[-1]] = test_pred
submission.to_csv("submission.csv", index=False)
print("🎉 submission.csv saved!")


Iteration 1, loss = 15475.08865341
Validation score: -35.845870
Iteration 2, loss = 14152.41904698
Validation score: -30.868783
Iteration 3, loss = 11289.85791056
Validation score: -20.844520
Iteration 4, loss = 6268.63838040
Validation score: -6.566689
Iteration 5, loss = 1176.92693967
Validation score: 0.671063
Iteration 6, loss = 501.25107877
Validation score: 0.168215
Iteration 7, loss = 184.98719864
Validation score: 0.787060
Iteration 8, loss = 124.25842973
Validation score: 0.720434
Iteration 9, loss = 75.77316692
Validation score: 0.876464
Iteration 10, loss = 63.27929204
Validation score: 0.879782
Iteration 11, loss = 51.77193545
Validation score: 0.882605
Iteration 12, loss = 47.54826269
Validation score: 0.890318
Iteration 13, loss = 42.93791048
Validation score: 0.901163
Iteration 14, loss = 39.96335315
Validation score: 0.903232
Iteration 15, loss = 36.64525521
Validation score: 0.905757
Iteration 16, loss = 33.95322216
Validation score: 0.910440
Iteration 17, loss = 31.46