In [None]:
import joblib
import pandas as pd

from pathlib import Path
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [None]:
# === Load train dataset only === #
project = Path.cwd()
target_folder = project / "0 - data"
train_df = pd.read_excel( target_folder / "train_clean.xlsx", sheet_name="Sheet1")

In [None]:
# === Define target + features === #
target_col = "Automation Suitable"
drop_cols = ["Task ID", "Task Name"]
drop_cols = [c for c in drop_cols if c in train_df.columns]

X_train = train_df.drop(columns=[target_col] + drop_cols)
y_train = train_df[target_col]


In [None]:
# === Preprocessing (OneHot + passthrough) === #
cat_cols = X_train.select_dtypes(include=["object"]).columns.tolist()
num_cols = X_train.select_dtypes(exclude=["object"]).columns.tolist()

preprocess = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
        ("num", "passthrough", num_cols),
    ]
)

In [None]:
# === Build Logistic Regression model pipeline === #
logreg_model = LogisticRegression(
    C=1.0,                 # default is 1.0
    penalty="l2",
    max_iter=2000,
    class_weight="balanced",
    solver="lbfgs"
)

logreg_pipeline = Pipeline(steps=[
    ("preprocess", preprocess),
    ("model", logreg_model)
])

In [None]:
# === Train and Save model pipeline === #
logreg_pipeline.fit(X_train, y_train)
print("✅ Logistic Regression pipeline trained.")

save_dir = "2 - models"
os.makedirs(save_dir, exist_ok=True)

model_path = os.path.join(save_dir, "logistic_regression_model.pkl")
joblib.dump(logreg_pipeline, model_path)
print(f"✅ Logistic Regression model saved to: {model_path}")

✅ Logistic Regression pipeline trained.
✅ Logistic Regression model saved to: 2 - models\logistic_regression_model.pkl


In [None]:
joblib.dump(logreg_pipeline, "2 - models/logistic_regression_model.pkl")
print("✅ Model overwritten and saved.")

✅ Model overwritten and saved.
