# Import Lib

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

## 1. Read csv

In [None]:
df = pd.read_csv("/dataset/ETL_data.csv")
X_columns = [f'x{i}' for i in range(1, 66)]
Y_columns = ['Y 10 min', 'Y 15 min', 'Y 20 min',
       'Y 30 min', 'Y 45 min', 'Y 60 min']

X_data = df[X_columns]
Y_data = df[Y_columns]

# 2. Data preprocessing

## 2.1 Identify categorical columns in X

In [3]:
categorical_cols = X_data.select_dtypes(include=["object", "category"]).columns.tolist()
numerical_cols = [col for col in X_columns if col not in categorical_cols]

## 2.3 Encode categorical columns (Label Encoding)

In [5]:
encoders = {}
X_encoded = X_data.copy()

for col in categorical_cols:
    le = LabelEncoder()
    X_encoded[col] = le.fit_transform(X_data[col])
    encoders[col] = le  # เก็บไว้ใช้ inverse later

In [6]:
Y_data.columns

Index(['Y 10 min', 'Y 15 min', 'Y 20 min', 'Y 30 min', 'Y 45 min', 'Y 60 min'], dtype='object')

## 2.4 Train/Test Split

In [7]:
Y_train, Y_test, X_train, X_test = train_test_split(Y_data, X_encoded, test_size=0.2, random_state=42)

# 3. Train MultiOutputRegressor

In [8]:
model = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
model.fit(Y_train, X_train)

# 4. Predict from new Y
`Test result of model`

In [9]:
new_Y = pd.DataFrame([[65,83,88,91,92,93]], columns=Y_columns)
X_pred = model.predict(new_Y)
X_pred_df = pd.DataFrame(X_pred, columns=X_columns)
# Inverse transform categorical columns
for col in categorical_cols:
    # ต้องปัดค่าเพราะเป็น regression output
    X_pred_df[col] = X_pred_df[col].round().astype(int)
    # ให้ค่าอยู่ในช่วง index encoder
    max_index = len(encoders[col].classes_) - 1
    X_pred_df[col] = X_pred_df[col].clip(0, max_index)
    # กลับเป็นค่าหมวดหมู่เดิม
    X_pred_df[col] = encoders[col].inverse_transform(X_pred_df[col])

In [10]:
new_Y

Unnamed: 0,Y 10 min,Y 15 min,Y 20 min,Y 30 min,Y 45 min,Y 60 min
0,65,83,88,91,92,93


In [11]:
from tabulate import tabulate
print(tabulate(X_pred_df, headers='keys', tablefmt='pretty'))

+---+---------+------+-----+------+------+--------------------+--------------------+--------------------+--------------------+-------------------+-------------------+------+--------------------+--------------------+-------------------+-----+-----+-------------------+-----+-----+---------+-----+-----+-----+-----+------+------+-------+-----+-----+---------------------+------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+-----+-------+-----+-----+-----+------+--------------------+-------------------+------+-------+------+--------------------+--------------------+-----+-----+---------------------+--------------------+-------+-------+-------------------+--------+-----+
|   |   x1    |  x2  | x3  |  x4  |  x5  |         x6         |         x7         |         x8         |         x9         |        x10        |        x11        | x12  |        x13         |        x14         |        x15        | x16 | x17 |        x18        | x19 | x20 |   x21   | x22 | x23 | x24 

# 5. load model as a joblib

In [12]:
import joblib

# สมมุติว่า model และ encoders สร้างไว้แล้วจากขั้นตอนก่อนหน้า
# model = MultiOutputRegressor(...)
# encoders = {col_name: LabelEncoder(), ...}

# รวมทุกอย่างไว้ใน dictionary
save_bundle = {
    "model": model,
    "encoders": encoders,
    "categorical_cols": categorical_cols,
    "X_columns": X_columns,
    "Y_columns": Y_columns
}

# บันทึกเป็นไฟล์ .joblib
joblib.dump(save_bundle, "inverse_model.joblib")
print("✅ Model saved to inverse_model.joblib")

✅ Model saved to inverse_model.joblib


# 6. Test load model

In [13]:
# โหลดโมเดล + encoders
bundle = joblib.load("inverse_model.joblib")

model = bundle["model"]
encoders = bundle["encoders"]
categorical_cols = bundle["categorical_cols"]
X_columns = bundle["X_columns"]
Y_columns = bundle["Y_columns"]

In [14]:
import pandas as pd

# สมมุติว่า new Y มา
new_Y = pd.DataFrame([[0.5, 1.2, 3.1, 2.2,23,4]], columns=Y_columns)

# ทำนาย
X_pred = model.predict(new_Y)
X_pred_df = pd.DataFrame(X_pred, columns=X_columns)

# Inverse transform categorical columns
for col in categorical_cols:
    X_pred_df[col] = X_pred_df[col].round().astype(int)
    max_index = len(encoders[col].classes_) - 1
    X_pred_df[col] = X_pred_df[col].clip(0, max_index)
    X_pred_df[col] = encoders[col].inverse_transform(X_pred_df[col])

X_pred_df

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,x56,x57,x58,x59,x60,x61,x62,x63,x64,x65
0,Class 3,Salt,Low,High,1.94,1.94885,0.0086,15.98681,2.772,8.729,...,0.91,HPMC,2.55,0.5612,0.74799,306.0,Round,9.728,60 rpm,SLS


In [18]:
X_pred

array([[1.1200000e+00, 5.5000000e-01, 1.0000000e+00, 4.9000000e-01,
        1.9400000e+00, 1.9488500e+00, 8.6000000e-03, 1.5986810e+01,
        2.7720000e+00, 8.7290000e+00, 4.1089000e+00, 1.5000000e+00,
        3.4286000e-01, 3.2628000e-01, 4.4293400e+01, 1.1795200e+01,
        0.0000000e+00, 0.0000000e+00, 1.3332000e+01, 0.0000000e+00,
        8.6427000e+00, 0.0000000e+00, 0.0000000e+00, 8.5651000e+00,
        0.0000000e+00, 1.9200000e+00, 2.6158000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 2.0700000e+00, 2.6158000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 3.4680000e+00, 0.0000000e+00, 7.3235000e+00,
        0.0000000e+00, 7.6000000e+00, 4.1000000e-01, 2.0950000e+00,
        0.0000000e+00, 9.9910000e-01, 3.0661000e+00, 0.0000000e+00,
        0.0000000e+00, 2.2514720e+01, 4.3051451e+02, 8.8400000e+01,
        1.3900000e+03, 6.3000000e+00, 1.1552000e+00, 9.1000000e-01,
        0.0000000e+00, 2.5500000e+00, 5.6120000e