In [None]:
import joblib
import pandas as pd

from pathlib import Path
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [None]:
# === Load train dataset only === #
project = Path.cwd()
target_folder = project / "0 - data"
train_df = pd.read_excel(target_folder / "train_clean.xlsx", sheet_name="Sheet1")

In [None]:
# === Define target + features === #
target_col = "Automation Suitable"
drop_cols = ["Task ID", "Task Name"]
drop_cols = [c for c in drop_cols if c in train_df.columns]

X_train = train_df.drop(columns=[target_col] + drop_cols)

# Convert target: Yes=1, No=0
y_train = train_df[target_col].map({"No": 0, "Yes": 1})
if y_train.isna().any():
    raise ValueError("Target column contains values other than 'Yes'/'No'.")

cat_cols = X_train.select_dtypes(include=["object"]).columns.tolist()
num_cols = X_train.select_dtypes(exclude=["object"]).columns.tolist()

In [None]:
# === Preprocessing (OneHot + passthrough) === #
preprocess = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
        ("num", Pipeline(steps=[
            ("scaler", StandardScaler())        # - Scale numeric features (needed for neural networks)
        ]), num_cols),
    ]
)

In [None]:
# === Build Neural Network model pipeline === #
nn_model = MLPClassifier(
    hidden_layer_sizes=(64, 32),    # 2 hidden layers (32) → underfitting | (128, 64) → overfitting / unstable learning
    activation="relu",              # ReLU handles sparse, one-hot encoded features much better | "tanh" saturates quickly and struggles with high-dimensional inputs
    solver="adam",
    alpha=0.0005,                   # L2 regularization No chnages when updated which Indicates the NN is already regularized well
    learning_rate="adaptive",
    max_iter=2000,
    early_stopping=True,
    validation_fraction=0.1,
    random_state=42
)

nn_pipeline = Pipeline(steps=[
    ("preprocess", preprocess),
    ("model", nn_model)
])

In [None]:
# === Train and Save model pipeline === #
nn_pipeline.fit(X_train, y_train)
print("✅ Neural Network (MLP) pipeline trained.")

save_dir = "2 - models"
os.makedirs(save_dir, exist_ok=True)

model_path = os.path.join(save_dir, "neural_network_model.pkl")
joblib.dump(nn_pipeline, model_path)

print(f"✅ Neural Network model saved to: {model_path}")

✅ Neural Network (MLP) pipeline trained.
✅ Neural Network model saved to: 2 - models\neural_network_model.pkl


In [None]:
joblib.dump(nn_pipeline, "2 - models/neural_network_model.pkl")
print("✅ Model overwritten and saved.")

✅ Model overwritten and saved.
