In [10]:
import os
import joblib
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import pandas as pd

# Pastikan folder data ada
os.makedirs("data", exist_ok=True)

# Load data
df = pd.read_csv("data/ai4i2020.csv")
X = df.drop(["UDI", "Product ID", "Machine failure"], axis=1)
y = df["Machine failure"]

num_cols = X.select_dtypes(include=["float64", "int64"]).columns
cat_cols = ["Type"]

preprocessor = ColumnTransformer([
    ("num", StandardScaler(), num_cols),
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols)
])

pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("clf", RandomForestClassifier(n_estimators=200, random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline.fit(X_train, y_train)

joblib.dump(pipeline, "data/rf_model_new.joblib")
print("✅ Model retrained and saved for sklearn 1.7.2")


✅ Model retrained and saved for sklearn 1.7.2
