In [6]:
# 1. Import Library Dulu

import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# RandomForestRegressor, GradientBoostingRegressor, LinearRegression, SVR → Model yang kita pakai.


In [7]:
# 2. Siapkan Dataset

# Load dataset
data = fetch_california_housing()
X, y = data.data, data.target

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling (opsional, tergantung model)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# 3. Definisikan Base Models (Level-0)

# Kita pakai 3 model sebagai base models:
# Random Forest Regressor
# Gradient Boosting Regressor
# SVR (Support Vector Regressor)

# Normalisasi hanya untuk SVR
# Update base models
base_models = [
    ("rf", RandomForestRegressor(n_estimators=100, random_state=42)),
    ("gbr", GradientBoostingRegressor(n_estimators=100, random_state=42)),
]

In [9]:
# 4. Definisikan Meta Model (Level-1)
# Meta model
meta_model = Ridge(alpha=1.0)

# 5 Stacking Regressor
stacking_reg = StackingRegressor(estimators=base_models, final_estimator=meta_model)
# passthrough=False → Meta model hanya belajar dari output base models, bukan fitur asli.


In [10]:
# 6. Train & Evaluasi Model

# Train Stacking Model
stacking_reg.fit(X_train, y_train)

# Prediksi
y_pred = stacking_reg.predict(X_test)

# Evaluasi
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"R² Score: {r2:.4f}")

MAE: 0.33
MSE: 0.25
R² Score: 0.8066


In [11]:
# 1️⃣ Cek apakah R² score stacking lebih tinggi dari base models.
def evaluate_model(model, X_test, y_test, name):
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    print(f"{name} R² Score: {r2:.4f}")

# Evaluasi masing-masing model
for name, model in base_models:
    model.fit(X_train, y_train)  # Train ulang tiap model
    evaluate_model(model, X_test, y_test, name)

# Evaluasi Stacking Model
evaluate_model(stacking_reg, X_test, y_test, "Stacking Model")


rf R² Score: 0.8053
gbr R² Score: 0.7756
Stacking Model R² Score: 0.8066


In [None]:
# 🔥 Kesimpulan
# 1️⃣ Kalau stacking R² lebih rendah dari 0.84 → Coba ganti meta model jadi Random Forest, XGBoost, atau LightGBM.
# 2️⃣ Kalau masih belum optimal → Coba tambah base models lain yang punya pendekatan berbeda.
# 3️⃣ Kalau stacking udah lebih tinggi dari 0.84 → Mantap! 🚀 Berarti kombinasi model kita udah optimal.