In [1]:
import sklearn
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor, BaggingRegressor
from sklearn.ensemble import RandomTreesEmbedding, HistGradientBoostingRegressor, StackingRegressor, VotingRegressor
from sklearn.multioutput import MultiOutputRegressor, RegressorChain
from sklearn.model_selection import train_test_split
import time
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, SGDRegressor, RidgeCV, Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ParameterGrid, ParameterSampler, RandomizedSearchCV
from sklearn.gaussian_process.kernels import RBF, RationalQuadratic
from sklearn.neighbors import KNeighborsRegressor, RadiusNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler, KernelCenterer, RobustScaler
from sklearn.svm import SVR, LinearSVR, NuSVR
from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
import joblib
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline

## REDUCE 4

In [19]:
df1 = pd.read_csv("data_all.csv")
df2 = pd.read_csv("data_clean_reduce_4_sec.csv")
missing_indices = df1.index.difference(df2.index)
missing_rows = df1.merge(df2, how='outer', indicator=True).query('_merge == "left_only"')
print("Reduce index:", missing_rows.index.tolist())

Reduce index: [1058, 1059, 5140, 5246]


In [20]:
df_all = pd.read_csv('data_clean_reduce_4_sec.csv')
x_all = df_all.drop(columns = ["V","α","β"])
y_all = df_all[["V","α","β"]]

x0_train, x0_test, y0_train, y0_test = train_test_split(x_all, y_all,
                                                        random_state = 42, test_size = 0.2)

### MODEL 1

In [21]:
def max_absolute_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred), axis=0)
scaler = StandardScaler()

# Base models (Level Pertama)
base_models = [
    ('random_forest', RandomForestRegressor(n_estimators=250, max_depth=None, random_state=42, min_samples_leaf= 1, min_samples_split= 2, max_features = 'sqrt')),
    ('gradient_boosting', GradientBoostingRegressor(n_estimators=200, learning_rate=0.0579, random_state=42, max_depth = 5)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=2, weights='distance', algorithm='auto')))
]

# Meta-model (Level Kedua)
# Menggunakan Ridge Regression sebagai meta-model untuk menghindari overfitting
meta_model = Ridge(alpha=0.1)

# Membentuk Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True  # Memasukkan data asli ke meta-model bersama prediksi base models
)

# Untuk multi-output, kita bungkus dengan MultiOutputRegressor
model_all_best = MultiOutputRegressor(stacking_regressor)
start = time.time()
# Training
model_all_best.fit(x0_train, y0_train)
end = time.time()
waktu = (end-start)/60
# Prediction
y0_pred = model_all_best.predict(x0_test)
r2 = r2_score(y0_test, y0_pred, multioutput = 'raw_values')
mae = mean_absolute_error(y0_test, y0_pred, multioutput = 'raw_values')
mse = mean_squared_error(y0_test, y0_pred, multioutput = 'raw_values')
max_ae = max_absolute_error(y0_test, y0_pred)
print(f'R2 : \n Alpha : {r2[1]} \n Beta : {r2[2]} \n V : {r2[0]} ')
print(f'MAE : \n Alpha : {mae[1]} \n Beta : {mae[2]} \n V : {mae[0]} ')
print(f'MSE : \n Alpha : {mse[1]} \n Beta : {mse[2]} \n V : {mse[0]} ')
print('Max AERR : \n', max_ae)
print("Waktu yang dibutuhkan : ", waktu)

R2 : 
 Alpha : 0.9999999992424851 
 Beta : 0.9830145499835 
 V : 0.9999997324550666 
MAE : 
 Alpha : 0.0006444930320737723 
 Beta : 0.5304223075408471 
 V : 0.0018995723076137777 
MSE : 
 Alpha : 9.108658017103407e-07 
 Beta : 1.84683543618431 
 V : 5.374949956678052e-05 
Max AERR : 
 V     0.081587
α     0.010602
β    11.529605
dtype: float64
Waktu yang dibutuhkan :  1.3699819008509317


### MODEL 2

In [22]:
def max_absolute_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred), axis=0)
scaler = StandardScaler()

# Base models (Level Pertama)
base_models = [
    ('random_forest', RandomForestRegressor(n_estimators=100, max_depth=None, random_state=21, max_leaf_nodes= 800)),
    ('gradient_boosting', GradientBoostingRegressor(n_estimators=200, learning_rate=0.05789, random_state=42, max_depth = 10)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=2, weights='distance', p =1)))
]

# Meta-model (Level Kedua)
# Menggunakan Ridge Regression sebagai meta-model untuk menghindari overfitting
meta_model = Ridge(alpha=0.1)

# Membentuk Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True  # Memasukkan data asli ke meta-model bersama prediksi base models
)

# Untuk multi-output, kita bungkus dengan MultiOutputRegressor
model_all_best = MultiOutputRegressor(stacking_regressor)
start = time.time()
# Training
model_all_best.fit(x0_train, y0_train)
end = time.time()
waktu = (end-start)/60
# Prediction
y0_pred = model_all_best.predict(x0_test)
r2 = r2_score(y0_test, y0_pred, multioutput = 'raw_values')
mae = mean_absolute_error(y0_test, y0_pred, multioutput = 'raw_values')
mse = mean_squared_error(y0_test, y0_pred, multioutput = 'raw_values')
max_ae = max_absolute_error(y0_test, y0_pred)
print(f'R2 : \n Alpha : {r2[1]} \n Beta : {r2[2]} \n V : {r2[0]} ')
print(f'MAE : \n Alpha : {mae[1]} \n Beta : {mae[2]} \n V : {mae[0]} ')
print(f'MSE : \n Alpha : {mse[1]} \n Beta : {mse[2]} \n V : {mse[0]} ')
print('Max AERR : \n', max_ae)
print("Waktu yang dibutuhkan : ", waktu)

R2 : 
 Alpha : 0.99999995732403 
 Beta : 0.9841811677261304 
 V : 0.9999997991471611 
MAE : 
 Alpha : 0.003638403931507106 
 Beta : 0.4512846714085587 
 V : 0.0016721616652833773 
MSE : 
 Alpha : 5.131526988940055e-05 
 Beta : 1.719988576932532 
 V : 4.0351126959437945e-05 
Max AERR : 
 V     0.057577
α     0.068975
β    12.561576
dtype: float64
Waktu yang dibutuhkan :  1.8764140685399373


## REDUCE 5

In [23]:
df1 = pd.read_csv("data_all.csv")
df2 = pd.read_csv("data_clean_reduce_5.csv")
missing_indices = df1.index.difference(df2.index)
missing_rows = df1.merge(df2, how='outer', indicator=True).query('_merge == "left_only"')
print("Reduce index:", missing_rows.index.tolist())

Reduce index: [1058, 1059, 4075, 5140, 5246]


In [24]:
df_all = pd.read_csv('data_clean_reduce_5.csv')
x_all = df_all.drop(columns = ["V","α","β"])
y_all = df_all[["V","α","β"]]

x0_train, x0_test, y0_train, y0_test = train_test_split(x_all, y_all,
                                                        random_state = 42, test_size = 0.2)

### MODEL 1

In [25]:
def max_absolute_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred), axis=0)
scaler = StandardScaler()

# Base models (Level Pertama)
base_models = [
    ('random_forest', RandomForestRegressor(n_estimators=250, max_depth=None, random_state=42, min_samples_leaf= 1, min_samples_split= 2, max_features = 'sqrt')),
    ('gradient_boosting', GradientBoostingRegressor(n_estimators=200, learning_rate=0.0579, random_state=42, max_depth = 5)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=2, weights='distance', algorithm='auto')))
]

# Meta-model (Level Kedua)
# Menggunakan Ridge Regression sebagai meta-model untuk menghindari overfitting
meta_model = Ridge(alpha=0.1)

# Membentuk Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True  # Memasukkan data asli ke meta-model bersama prediksi base models
)

# Untuk multi-output, kita bungkus dengan MultiOutputRegressor
model_all_best = MultiOutputRegressor(stacking_regressor)
start = time.time()
# Training
model_all_best.fit(x0_train, y0_train)
end = time.time()
waktu = (end-start)/60
# Prediction
y0_pred = model_all_best.predict(x0_test)
r2 = r2_score(y0_test, y0_pred, multioutput = 'raw_values')
mae = mean_absolute_error(y0_test, y0_pred, multioutput = 'raw_values')
mse = mean_squared_error(y0_test, y0_pred, multioutput = 'raw_values')
max_ae = max_absolute_error(y0_test, y0_pred)
print(f'R2 : \n Alpha : {r2[1]} \n Beta : {r2[2]} \n V : {r2[0]} ')
print(f'MAE : \n Alpha : {mae[1]} \n Beta : {mae[2]} \n V : {mae[0]} ')
print(f'MSE : \n Alpha : {mse[1]} \n Beta : {mse[2]} \n V : {mse[0]} ')
print('Max AERR : \n', max_ae)
print("Waktu yang dibutuhkan : ", waktu)

R2 : 
 Alpha : 0.9999999990985894 
 Beta : 0.9829301574896132 
 V : 0.9999997271513653 
MAE : 
 Alpha : 0.0008529667078136998 
 Beta : 0.5263667931815713 
 V : 0.001928033875127796 
MSE : 
 Alpha : 1.080529104191424e-06 
 Beta : 1.8140243363640258 
 V : 5.5869608882901275e-05 
Max AERR : 
 V     0.081670
α     0.006061
β    11.387483
dtype: float64
Waktu yang dibutuhkan :  1.3963629166285196


### MODEL 2

In [26]:
def max_absolute_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred), axis=0)
scaler = StandardScaler()

# Base models (Level Pertama)
base_models = [
    ('random_forest', RandomForestRegressor(n_estimators=100, max_depth=None, random_state=21, max_leaf_nodes= 800)),
    ('gradient_boosting', GradientBoostingRegressor(n_estimators=200, learning_rate=0.05789, random_state=42, max_depth = 10)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=2, weights='distance', p =1)))
]

# Meta-model (Level Kedua)
# Menggunakan Ridge Regression sebagai meta-model untuk menghindari overfitting
meta_model = Ridge(alpha=0.1)

# Membentuk Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True  # Memasukkan data asli ke meta-model bersama prediksi base models
)

# Untuk multi-output, kita bungkus dengan MultiOutputRegressor
model_all_best = MultiOutputRegressor(stacking_regressor)
start = time.time()
# Training
model_all_best.fit(x0_train, y0_train)
end = time.time()
waktu = (end-start)/60
# Prediction
y0_pred = model_all_best.predict(x0_test)
r2 = r2_score(y0_test, y0_pred, multioutput = 'raw_values')
mae = mean_absolute_error(y0_test, y0_pred, multioutput = 'raw_values')
mse = mean_squared_error(y0_test, y0_pred, multioutput = 'raw_values')
max_ae = max_absolute_error(y0_test, y0_pred)
print(f'R2 : \n Alpha : {r2[1]} \n Beta : {r2[2]} \n V : {r2[0]} ')
print(f'MAE : \n Alpha : {mae[1]} \n Beta : {mae[2]} \n V : {mae[0]} ')
print(f'MSE : \n Alpha : {mse[1]} \n Beta : {mse[2]} \n V : {mse[0]} ')
print('Max AERR : \n', max_ae)
print("Waktu yang dibutuhkan : ", waktu)

R2 : 
 Alpha : 0.9999999451555259 
 Beta : 0.9842701515474581 
 V : 0.999999796678151 
MAE : 
 Alpha : 0.0037722867644173572 
 Beta : 0.4487072514939109 
 V : 0.0017215322079450284 
MSE : 
 Alpha : 6.574257415733471e-05 
 Beta : 1.6716222122650684 
 V : 4.163301823338225e-05 
Max AERR : 
 V     0.056881
α     0.076400
β    11.636704
dtype: float64
Waktu yang dibutuhkan :  1.9020386815071106


## REDUCE 8

In [27]:
df1 = pd.read_csv("data_all.csv")
df2 = pd.read_csv("data_clean_reduce_8.csv")
missing_indices = df1.index.difference(df2.index)
missing_rows = df1.merge(df2, how='outer', indicator=True).query('_merge == "left_only"')
print("Reduce index:", missing_rows.index.tolist())

Reduce index: [859, 860, 1058, 1059, 4075, 5042, 5140, 5246]


In [28]:
df_all = pd.read_csv('data_clean_reduce_8.csv')
x_all = df_all.drop(columns = ["V","α","β"])
y_all = df_all[["V","α","β"]]

x0_train, x0_test, y0_train, y0_test = train_test_split(x_all, y_all,
                                                        random_state = 42, test_size = 0.2)

### MODEL 1

In [29]:
def max_absolute_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred), axis=0)
scaler = StandardScaler()

# Base models (Level Pertama)
base_models = [
    ('random_forest', RandomForestRegressor(n_estimators=250, max_depth=None, random_state=42, min_samples_leaf= 1, min_samples_split= 2, max_features = 'sqrt')),
    ('gradient_boosting', GradientBoostingRegressor(n_estimators=200, learning_rate=0.0579, random_state=42, max_depth = 5)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=2, weights='distance', algorithm='auto')))
]

# Meta-model (Level Kedua)
# Menggunakan Ridge Regression sebagai meta-model untuk menghindari overfitting
meta_model = Ridge(alpha=0.1)

# Membentuk Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True  # Memasukkan data asli ke meta-model bersama prediksi base models
)

# Untuk multi-output, kita bungkus dengan MultiOutputRegressor
model_all_best = MultiOutputRegressor(stacking_regressor)
start = time.time()
# Training
model_all_best.fit(x0_train, y0_train)
end = time.time()
waktu = (end-start)/60
# Prediction
y0_pred = model_all_best.predict(x0_test)
r2 = r2_score(y0_test, y0_pred, multioutput = 'raw_values')
mae = mean_absolute_error(y0_test, y0_pred, multioutput = 'raw_values')
mse = mean_squared_error(y0_test, y0_pred, multioutput = 'raw_values')
max_ae = max_absolute_error(y0_test, y0_pred)
print(f'R2 : \n Alpha : {r2[1]} \n Beta : {r2[2]} \n V : {r2[0]} ')
print(f'MAE : \n Alpha : {mae[1]} \n Beta : {mae[2]} \n V : {mae[0]} ')
print(f'MSE : \n Alpha : {mse[1]} \n Beta : {mse[2]} \n V : {mse[0]} ')
print('Max AERR : \n', max_ae)
print("Waktu yang dibutuhkan : ", waktu)

R2 : 
 Alpha : 0.9999999977560645 
 Beta : 0.9816938348983458 
 V : 0.9999997507218131 
MAE : 
 Alpha : 0.0011856221515241419 
 Beta : 0.5149061510188144 
 V : 0.0018436500366758757 
MSE : 
 Alpha : 2.739043012460526e-06 
 Beta : 1.9083829640531487 
 V : 5.1352859712633046e-05 
Max AERR : 
 V     0.079201
α     0.006674
β    13.233628
dtype: float64
Waktu yang dibutuhkan :  1.3958321730295817


### MODEL 2

In [30]:
def max_absolute_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred), axis=0)
scaler = StandardScaler()

# Base models (Level Pertama)
base_models = [
    ('random_forest', RandomForestRegressor(n_estimators=100, max_depth=None, random_state=21, max_leaf_nodes= 800)),
    ('gradient_boosting', GradientBoostingRegressor(n_estimators=200, learning_rate=0.05789, random_state=42, max_depth = 10)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=2, weights='distance', p =1)))
]

# Meta-model (Level Kedua)
# Menggunakan Ridge Regression sebagai meta-model untuk menghindari overfitting
meta_model = Ridge(alpha=0.1)

# Membentuk Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True  # Memasukkan data asli ke meta-model bersama prediksi base models
)

# Untuk multi-output, kita bungkus dengan MultiOutputRegressor
model_all_best = MultiOutputRegressor(stacking_regressor)
start = time.time()
# Training
model_all_best.fit(x0_train, y0_train)
end = time.time()
waktu = (end-start)/60
# Prediction
y0_pred = model_all_best.predict(x0_test)
r2 = r2_score(y0_test, y0_pred, multioutput = 'raw_values')
mae = mean_absolute_error(y0_test, y0_pred, multioutput = 'raw_values')
mse = mean_squared_error(y0_test, y0_pred, multioutput = 'raw_values')
max_ae = max_absolute_error(y0_test, y0_pred)
print(f'R2 : \n Alpha : {r2[1]} \n Beta : {r2[2]} \n V : {r2[0]} ')
print(f'MAE : \n Alpha : {mae[1]} \n Beta : {mae[2]} \n V : {mae[0]} ')
print(f'MSE : \n Alpha : {mse[1]} \n Beta : {mse[2]} \n V : {mse[0]} ')
print('Max AERR : \n', max_ae)
print("Waktu yang dibutuhkan : ", waktu)

R2 : 
 Alpha : 0.9999956392563608 
 Beta : 0.9836289600364033 
 V : 0.9999998375396193 
MAE : 
 Alpha : 0.00458804596394519 
 Beta : 0.4553502300203264 
 V : 0.0014144839451088608 
MSE : 
 Alpha : 0.005322908916564594 
 Beta : 1.706649841562836 
 V : 3.346785068771051e-05 
Max AERR : 
 V     0.053854
α     2.668491
β    12.422116
dtype: float64
Waktu yang dibutuhkan :  1.8982275207837422


## REDUCE 16

In [31]:
df1 = pd.read_csv("data_all.csv")
df2 = pd.read_csv("data_clean_reduce_16.csv")
missing_indices = df1.index.difference(df2.index)
missing_rows = df1.merge(df2, how='outer', indicator=True).query('_merge == "left_only"')
print("Reduce index:", missing_rows.index.tolist())

Reduce index: [859, 860, 1058, 1059, 2034, 2041, 4075, 5038, 5039, 5040, 5041, 5042, 5140, 5246, 5248, 5257]


In [32]:
df_all = pd.read_csv('data_clean_reduce_16.csv')
x_all = df_all.drop(columns = ["V","α","β"])
y_all = df_all[["V","α","β"]]

x0_train, x0_test, y0_train, y0_test = train_test_split(x_all, y_all,
                                                        random_state = 42, test_size = 0.2)

### MODEL 1

In [33]:
def max_absolute_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred), axis=0)
scaler = StandardScaler()

# Base models (Level Pertama)
base_models = [
    ('random_forest', RandomForestRegressor(n_estimators=250, max_depth=None, random_state=42, min_samples_leaf= 1, min_samples_split= 2, max_features = 'sqrt')),
    ('gradient_boosting', GradientBoostingRegressor(n_estimators=200, learning_rate=0.0579, random_state=42, max_depth = 5)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=2, weights='distance', algorithm='auto')))
]

# Meta-model (Level Kedua)
# Menggunakan Ridge Regression sebagai meta-model untuk menghindari overfitting
meta_model = Ridge(alpha=0.1)

# Membentuk Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True  # Memasukkan data asli ke meta-model bersama prediksi base models
)

# Untuk multi-output, kita bungkus dengan MultiOutputRegressor
model_all_best = MultiOutputRegressor(stacking_regressor)
start = time.time()
# Training
model_all_best.fit(x0_train, y0_train)
end = time.time()
waktu = (end-start)/60
# Prediction
y0_pred = model_all_best.predict(x0_test)
r2 = r2_score(y0_test, y0_pred, multioutput = 'raw_values')
mae = mean_absolute_error(y0_test, y0_pred, multioutput = 'raw_values')
mse = mean_squared_error(y0_test, y0_pred, multioutput = 'raw_values')
max_ae = max_absolute_error(y0_test, y0_pred)
print(f'R2 : \n Alpha : {r2[1]} \n Beta : {r2[2]} \n V : {r2[0]} ')
print(f'MAE : \n Alpha : {mae[1]} \n Beta : {mae[2]} \n V : {mae[0]} ')
print(f'MSE : \n Alpha : {mse[1]} \n Beta : {mse[2]} \n V : {mse[0]} ')
print('Max AERR : \n', max_ae)
print("Waktu yang dibutuhkan : ", waktu)

R2 : 
 Alpha : 0.9999999722640506 
 Beta : 0.9809908660892865 
 V : 0.9999997261396991 
MAE : 
 Alpha : 0.0033016681508294243 
 Beta : 0.5571939394287377 
 V : 0.0018872762421283439 
MSE : 
 Alpha : 3.276216270262372e-05 
 Beta : 2.0407315616503667 
 V : 5.586147130575189e-05 
Max AERR : 
 V     0.093131
α     0.067307
β    12.690991
dtype: float64
Waktu yang dibutuhkan :  1.3646158774693806


### MODEL 2

In [34]:
def max_absolute_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred), axis=0)
scaler = StandardScaler()

# Base models (Level Pertama)
base_models = [
    ('random_forest', RandomForestRegressor(n_estimators=100, max_depth=None, random_state=21, max_leaf_nodes= 800)),
    ('gradient_boosting', GradientBoostingRegressor(n_estimators=200, learning_rate=0.05789, random_state=42, max_depth = 10)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=2, weights='distance', p =1)))
]

# Meta-model (Level Kedua)
# Menggunakan Ridge Regression sebagai meta-model untuk menghindari overfitting
meta_model = Ridge(alpha=0.1)

# Membentuk Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True  # Memasukkan data asli ke meta-model bersama prediksi base models
)

# Untuk multi-output, kita bungkus dengan MultiOutputRegressor
model_all_best = MultiOutputRegressor(stacking_regressor)
start = time.time()
# Training
model_all_best.fit(x0_train, y0_train)
end = time.time()
waktu = (end-start)/60
# Prediction
y0_pred = model_all_best.predict(x0_test)
r2 = r2_score(y0_test, y0_pred, multioutput = 'raw_values')
mae = mean_absolute_error(y0_test, y0_pred, multioutput = 'raw_values')
mse = mean_squared_error(y0_test, y0_pred, multioutput = 'raw_values')
max_ae = max_absolute_error(y0_test, y0_pred)
print(f'R2 : \n Alpha : {r2[1]} \n Beta : {r2[2]} \n V : {r2[0]} ')
print(f'MAE : \n Alpha : {mae[1]} \n Beta : {mae[2]} \n V : {mae[0]} ')
print(f'MSE : \n Alpha : {mse[1]} \n Beta : {mse[2]} \n V : {mse[0]} ')
print('Max AERR : \n', max_ae)
print("Waktu yang dibutuhkan : ", waktu)

R2 : 
 Alpha : 0.9999955107748518 
 Beta : 0.9822726063412797 
 V : 0.9999997610301807 
MAE : 
 Alpha : 0.003486248175924444 
 Beta : 0.48138013501474947 
 V : 0.0016804793141357054 
MSE : 
 Alpha : 0.005302747068206564 
 Beta : 1.903129932961447 
 V : 4.874458130629577e-05 
Max AERR : 
 V     0.091628
α     2.666621
β    14.041708
dtype: float64
Waktu yang dibutuhkan :  1.8639519810676575


## REDUCE 22

In [35]:
df1 = pd.read_csv("data_all.csv")
df2 = pd.read_csv("data_clean_reduce_22.csv")
missing_indices = df1.index.difference(df2.index)
missing_rows = df1.merge(df2, how='outer', indicator=True).query('_merge == "left_only"')
print("Reduce index:", missing_rows.index.tolist())

Reduce index: [755, 859, 860, 862, 1058, 1059, 2034, 2041, 3958, 4075, 4275, 5033, 5038, 5039, 5040, 5041, 5042, 5140, 5148, 5246, 5248, 5257]


In [36]:
df_all = pd.read_csv('data_clean_reduce_22.csv')
x_all = df_all.drop(columns = ["V","α","β"])
y_all = df_all[["V","α","β"]]

x0_train, x0_test, y0_train, y0_test = train_test_split(x_all, y_all,
                                                        random_state = 42, test_size = 0.2)

### MODEL 1

In [37]:
def max_absolute_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred), axis=0)
scaler = StandardScaler()

# Base models (Level Pertama)
base_models = [
    ('random_forest', RandomForestRegressor(n_estimators=250, max_depth=None, random_state=42, min_samples_leaf= 1, min_samples_split= 2, max_features = 'sqrt')),
    ('gradient_boosting', GradientBoostingRegressor(n_estimators=200, learning_rate=0.0579, random_state=42, max_depth = 5)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=2, weights='distance', algorithm='auto')))
]

# Meta-model (Level Kedua)
# Menggunakan Ridge Regression sebagai meta-model untuk menghindari overfitting
meta_model = Ridge(alpha=0.1)

# Membentuk Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True  # Memasukkan data asli ke meta-model bersama prediksi base models
)

# Untuk multi-output, kita bungkus dengan MultiOutputRegressor
model_all_best = MultiOutputRegressor(stacking_regressor)
start = time.time()
# Training
model_all_best.fit(x0_train, y0_train)
end = time.time()
waktu = (end-start)/60
# Prediction
y0_pred = model_all_best.predict(x0_test)
r2 = r2_score(y0_test, y0_pred, multioutput = 'raw_values')
mae = mean_absolute_error(y0_test, y0_pred, multioutput = 'raw_values')
mse = mean_squared_error(y0_test, y0_pred, multioutput = 'raw_values')
max_ae = max_absolute_error(y0_test, y0_pred)
print(f'R2 : \n Alpha : {r2[1]} \n Beta : {r2[2]} \n V : {r2[0]} ')
print(f'MAE : \n Alpha : {mae[1]} \n Beta : {mae[2]} \n V : {mae[0]} ')
print(f'MSE : \n Alpha : {mse[1]} \n Beta : {mse[2]} \n V : {mse[0]} ')
print('Max AERR : \n', max_ae)
print("Waktu yang dibutuhkan : ", waktu)

R2 : 
 Alpha : 0.9999999894312268 
 Beta : 0.9854671309424441 
 V : 0.9999998650516262 
MAE : 
 Alpha : 0.0016037096540434685 
 Beta : 0.47542394189361026 
 V : 0.0014316577039070592 
MSE : 
 Alpha : 1.2071084600412103e-05 
 Beta : 1.5633593614869405 
 V : 2.712960303543747e-05 
Max AERR : 
 V     0.048881
α     0.052283
β    11.654270
dtype: float64
Waktu yang dibutuhkan :  1.4044374624888103


### MODEL 2

In [38]:
def max_absolute_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred), axis=0)
scaler = StandardScaler()

# Base models (Level Pertama)
base_models = [
    ('random_forest', RandomForestRegressor(n_estimators=100, max_depth=None, random_state=21, max_leaf_nodes= 800)),
    ('gradient_boosting', GradientBoostingRegressor(n_estimators=200, learning_rate=0.05789, random_state=42, max_depth = 10)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=2, weights='distance', p =1)))
]

# Meta-model (Level Kedua)
# Menggunakan Ridge Regression sebagai meta-model untuk menghindari overfitting
meta_model = Ridge(alpha=0.1)

# Membentuk Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True  # Memasukkan data asli ke meta-model bersama prediksi base models
)

# Untuk multi-output, kita bungkus dengan MultiOutputRegressor
model_all_best = MultiOutputRegressor(stacking_regressor)
start = time.time()
# Training
model_all_best.fit(x0_train, y0_train)
end = time.time()
waktu = (end-start)/60
# Prediction
y0_pred = model_all_best.predict(x0_test)
r2 = r2_score(y0_test, y0_pred, multioutput = 'raw_values')
mae = mean_absolute_error(y0_test, y0_pred, multioutput = 'raw_values')
mse = mean_squared_error(y0_test, y0_pred, multioutput = 'raw_values')
max_ae = max_absolute_error(y0_test, y0_pred)
print(f'R2 : \n Alpha : {r2[1]} \n Beta : {r2[2]} \n V : {r2[0]} ')
print(f'MAE : \n Alpha : {mae[1]} \n Beta : {mae[2]} \n V : {mae[0]} ')
print(f'MSE : \n Alpha : {mse[1]} \n Beta : {mse[2]} \n V : {mse[0]} ')
print('Max AERR : \n', max_ae)
print("Waktu yang dibutuhkan : ", waktu)

R2 : 
 Alpha : 0.9999943426894022 
 Beta : 0.9858977549361515 
 V : 0.9999998877734583 
MAE : 
 Alpha : 0.005391641460849807 
 Beta : 0.4252383661565283 
 V : 0.0012234098768596218 
MSE : 
 Alpha : 0.006461476051649968 
 Beta : 1.517035401009682 
 V : 2.2561676311078124e-05 
Max AERR : 
 V     0.046039
α     2.154938
β    11.564064
dtype: float64
Waktu yang dibutuhkan :  1.8587570587793987


## REDUCE 23

In [39]:
df1 = pd.read_csv("data_all.csv")
df2 = pd.read_csv("data_clean_reduce_23.csv")
missing_indices = df1.index.difference(df2.index)
missing_rows = df1.merge(df2, how='outer', indicator=True).query('_merge == "left_only"')
print("Reduce index:", missing_rows.index.tolist())

Reduce index: [755, 859, 860, 862, 1058, 1059, 2034, 2041, 3958, 4075, 4275, 5033, 5038, 5039, 5040, 5041, 5042, 5140, 5148, 5246, 5248, 5257, 5348]


In [40]:
df_all = pd.read_csv('data_clean_reduce_23.csv')
x_all = df_all.drop(columns = ["V","α","β"])
y_all = df_all[["V","α","β"]]

x0_train, x0_test, y0_train, y0_test = train_test_split(x_all, y_all,
                                                        random_state = 42, test_size = 0.2)

### MODEL 1

In [41]:
def max_absolute_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred), axis=0)
scaler = StandardScaler()

# Base models (Level Pertama)
base_models = [
    ('random_forest', RandomForestRegressor(n_estimators=250, max_depth=None, random_state=42, min_samples_leaf= 1, min_samples_split= 2, max_features = 'sqrt')),
    ('gradient_boosting', GradientBoostingRegressor(n_estimators=200, learning_rate=0.0579, random_state=42, max_depth = 5)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=2, weights='distance', algorithm='auto')))
]

# Meta-model (Level Kedua)
# Menggunakan Ridge Regression sebagai meta-model untuk menghindari overfitting
meta_model = Ridge(alpha=0.1)

# Membentuk Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True  # Memasukkan data asli ke meta-model bersama prediksi base models
)

# Untuk multi-output, kita bungkus dengan MultiOutputRegressor
model_all_best = MultiOutputRegressor(stacking_regressor)
start = time.time()
# Training
model_all_best.fit(x0_train, y0_train)
end = time.time()
waktu = (end-start)/60
# Prediction
y0_pred = model_all_best.predict(x0_test)
r2 = r2_score(y0_test, y0_pred, multioutput = 'raw_values')
mae = mean_absolute_error(y0_test, y0_pred, multioutput = 'raw_values')
mse = mean_squared_error(y0_test, y0_pred, multioutput = 'raw_values')
max_ae = max_absolute_error(y0_test, y0_pred)
print(f'R2 : \n Alpha : {r2[1]} \n Beta : {r2[2]} \n V : {r2[0]} ')
print(f'MAE : \n Alpha : {mae[1]} \n Beta : {mae[2]} \n V : {mae[0]} ')
print(f'MSE : \n Alpha : {mse[1]} \n Beta : {mse[2]} \n V : {mse[0]} ')
print('Max AERR : \n', max_ae)
print("Waktu yang dibutuhkan : ", waktu)

R2 : 
 Alpha : 0.9999999937807436 
 Beta : 0.9856542614448092 
 V : 0.9999998496967869 
MAE : 
 Alpha : 0.0012397692513765974 
 Beta : 0.46223609553781386 
 V : 0.0014879345507213932 
MSE : 
 Alpha : 7.077813795261297e-06 
 Beta : 1.503431723484817 
 V : 3.082600059842903e-05 
Max AERR : 
 V     0.061472
α     0.038274
β    11.039069
dtype: float64
Waktu yang dibutuhkan :  1.3765215754508973


### MODEL 2

In [42]:
def max_absolute_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred), axis=0)
scaler = StandardScaler()

# Base models (Level Pertama)
base_models = [
    ('random_forest', RandomForestRegressor(n_estimators=100, max_depth=None, random_state=21, max_leaf_nodes= 800)),
    ('gradient_boosting', GradientBoostingRegressor(n_estimators=200, learning_rate=0.05789, random_state=42, max_depth = 10)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=2, weights='distance', p =1)))
]

# Meta-model (Level Kedua)
# Menggunakan Ridge Regression sebagai meta-model untuk menghindari overfitting
meta_model = Ridge(alpha=0.1)

# Membentuk Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True  # Memasukkan data asli ke meta-model bersama prediksi base models
)

# Untuk multi-output, kita bungkus dengan MultiOutputRegressor
model_all_best = MultiOutputRegressor(stacking_regressor)
start = time.time()
# Training
model_all_best.fit(x0_train, y0_train)
end = time.time()
waktu = (end-start)/60
# Prediction
y0_pred = model_all_best.predict(x0_test)
r2 = r2_score(y0_test, y0_pred, multioutput = 'raw_values')
mae = mean_absolute_error(y0_test, y0_pred, multioutput = 'raw_values')
mse = mean_squared_error(y0_test, y0_pred, multioutput = 'raw_values')
max_ae = max_absolute_error(y0_test, y0_pred)
print(f'R2 : \n Alpha : {r2[1]} \n Beta : {r2[2]} \n V : {r2[0]} ')
print(f'MAE : \n Alpha : {mae[1]} \n Beta : {mae[2]} \n V : {mae[0]} ')
print(f'MSE : \n Alpha : {mse[1]} \n Beta : {mse[2]} \n V : {mse[0]} ')
print('Max AERR : \n', max_ae)
print("Waktu yang dibutuhkan : ", waktu)

R2 : 
 Alpha : 0.9999944647827697 
 Beta : 0.9853359749624195 
 V : 0.9999998759490556 
MAE : 
 Alpha : 0.004747588827707138 
 Beta : 0.4103006195118796 
 V : 0.0012764389527800846 
MSE : 
 Alpha : 0.00629934421122571 
 Beta : 1.536788109629735 
 V : 2.5441867857637428e-05 
Max AERR : 
 V     0.050625
α     2.113305
β    10.387315
dtype: float64
Waktu yang dibutuhkan :  1.885900624593099


## REDUCE 24

In [2]:
df1 = pd.read_csv("data_all.csv")
df2 = pd.read_csv("data_clean_reduce_24.csv")
missing_indices = df1.index.difference(df2.index)
missing_rows = df1.merge(df2, how='outer', indicator=True).query('_merge == "left_only"')
print(missing_rows)
print("Reduce index:", missing_rows.index.tolist())

          Pβ2      Pα1      Pβ1      Pα2       P3          V   α   β  \
755  -0.37203 -0.07912 -0.38382 -0.28786 -0.36698  20.028001  85   0   
859  -0.83328 -0.18517 -0.83497 -0.61107 -0.82992  30.024409  85   0   
860  -0.83328 -0.18517 -0.84170 -0.60939 -0.82150  30.024409  85   0   
862  -0.84170 -0.18686 -0.84507 -0.58414 -0.82655  30.024409  85   0   
1058 -2.36013 -0.51007 -2.29616 -1.60765 -2.34161  50.031069  85   0   
1059 -2.31131 -0.51512 -2.33824 -1.60260 -2.33319  50.031069  85   0   
2034 -1.49991 -0.33836 -1.46624 -1.03866 -1.48139  40.037530  85  -5   
2041 -1.48981 -0.34341 -1.48308 -1.03024 -1.48139  40.037530  85  -5   
3958 -0.36530 -0.02862 -0.38213 -0.28955 -0.34678  20.037151  80 -15   
4075 -0.87200 -0.18349 -0.84338 -0.60771 -0.83160  29.990107  85 -15   
4275 -2.34161 -0.51344 -2.35340 -1.60091 -2.34161  50.099339  85 -15   
5033 -0.38213 -0.08417 -0.38382 -0.27944 -0.37203  20.037314  85 -20   
5038 -0.37203 -0.13467 -0.36698 -0.27608 -0.37708  20.027975  90

In [3]:
df_all = pd.read_csv('data_clean_reduce_24.csv')
x_all = df_all.drop(columns = ["V","α","β"])
y_all = df_all[["V","α","β"]]

x0_train, x0_test, y0_train, y0_test = train_test_split(x_all, y_all,
                                                        random_state = 42, test_size = 0.2)

In [6]:
df_all.iloc[5033:5047]

Unnamed: 0,Pβ2,Pα1,Pβ1,Pα2,P3,V,α,β
5033,0.03703,-0.02188,-0.0303,0.00673,0.03872,10.058192,-5,-20
5034,0.03872,-0.0202,-0.03198,0.00673,0.03872,10.058192,-5,-20
5035,0.03872,-0.0202,-0.03198,0.00673,0.03872,10.058192,-5,-20
5036,0.0404,-0.0101,-0.02862,-0.00168,0.04209,10.072441,0,-20
5037,0.0404,-0.0101,-0.02862,-0.00337,0.04209,10.072441,0,-20
5038,0.0404,-0.01178,-0.0303,-0.00337,0.03872,10.072441,0,-20
5039,0.0404,-0.0101,-0.02862,-0.00168,0.04209,10.072441,0,-20
5040,0.03872,-0.0101,-0.0303,-0.00337,0.0404,10.072441,0,-20
5041,0.0404,-0.0101,-0.02862,-0.00168,0.04377,10.072441,0,-20
5042,0.0404,-0.0101,-0.02862,-0.00168,0.04377,10.072441,0,-20


### MODEL 1

In [45]:
def max_absolute_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred), axis=0)
scaler = StandardScaler()

# Base models (Level Pertama)
base_models = [
    ('random_forest', RandomForestRegressor(n_estimators=250, max_depth=None, random_state=42, min_samples_leaf= 1, min_samples_split= 2, max_features = 'sqrt')),
    ('gradient_boosting', GradientBoostingRegressor(n_estimators=200, learning_rate=0.0579, random_state=42, max_depth = 5)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=2, weights='distance', algorithm='auto')))
]

# Meta-model (Level Kedua)
# Menggunakan Ridge Regression sebagai meta-model untuk menghindari overfitting
meta_model = Ridge(alpha=0.1)

# Membentuk Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True  # Memasukkan data asli ke meta-model bersama prediksi base models
)

# Untuk multi-output, kita bungkus dengan MultiOutputRegressor
model_all_best = MultiOutputRegressor(stacking_regressor)
start = time.time()
# Training
model_all_best.fit(x0_train, y0_train)
end = time.time()
waktu = (end-start)/60
# Prediction
y0_pred = model_all_best.predict(x0_test)
r2 = r2_score(y0_test, y0_pred, multioutput = 'raw_values')
mae = mean_absolute_error(y0_test, y0_pred, multioutput = 'raw_values')
mse = mean_squared_error(y0_test, y0_pred, multioutput = 'raw_values')
max_ae = max_absolute_error(y0_test, y0_pred)
print(f'R2 : \n Alpha : {r2[1]} \n Beta : {r2[2]} \n V : {r2[0]} ')
print(f'MAE : \n Alpha : {mae[1]} \n Beta : {mae[2]} \n V : {mae[0]} ')
print(f'MSE : \n Alpha : {mse[1]} \n Beta : {mse[2]} \n V : {mse[0]} ')
print('Max AERR : \n', max_ae)
print("Waktu yang dibutuhkan : ", waktu)

R2 : 
 Alpha : 0.9999999888364483 
 Beta : 0.9855572270835813 
 V : 0.9999998576576354 
MAE : 
 Alpha : 0.0018578545045363193 
 Beta : 0.4649694558260702 
 V : 0.0014418516960277908 
MSE : 
 Alpha : 1.2746920367403166e-05 
 Beta : 1.5371880960149589 
 V : 2.9044107812892754e-05 
Max AERR : 
 V     0.050330
α     0.048791
β    10.644980
dtype: float64
Waktu yang dibutuhkan :  1.342847458521525


### MODEL 2

In [47]:
def max_absolute_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred), axis=0)
scaler = StandardScaler()

# Base models (Level Pertama)
base_models = [
    ('random_forest', RandomForestRegressor(n_estimators=100, max_depth=None, random_state=21, max_leaf_nodes= 800)),
    ('gradient_boosting', GradientBoostingRegressor(n_estimators=200, learning_rate=0.05789, random_state=42, max_depth = 10)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=2, weights='distance', p =1)))
]

# Meta-model (Level Kedua)
# Menggunakan Ridge Regression sebagai meta-model untuk menghindari overfitting
meta_model = Ridge(alpha=0.1)

# Membentuk Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True  # Memasukkan data asli ke meta-model bersama prediksi base models
)

# Untuk multi-output, kita bungkus dengan MultiOutputRegressor
model_all_best = MultiOutputRegressor(stacking_regressor)
start = time.time()
# Training
model_all_best.fit(x0_train, y0_train)
end = time.time()
waktu = (end-start)/60
# Prediction
y0_pred = model_all_best.predict(x0_test)
r2 = r2_score(y0_test, y0_pred, multioutput = 'raw_values')
mae = mean_absolute_error(y0_test, y0_pred, multioutput = 'raw_values')
mse = mean_squared_error(y0_test, y0_pred, multioutput = 'raw_values')
max_ae = max_absolute_error(y0_test, y0_pred)
print(f'R2 : \n Alpha : {r2[1]} \n Beta : {r2[2]} \n V : {r2[0]} ')
print(f'MAE : \n Alpha : {mae[1]} \n Beta : {mae[2]} \n V : {mae[0]} ')
print(f'MSE : \n Alpha : {mse[1]} \n Beta : {mse[2]} \n V : {mse[0]} ')
print('Max AERR : \n', max_ae)
print("Waktu yang dibutuhkan : ", waktu)

R2 : 
 Alpha : 0.9999945002960798 
 Beta : 0.9858208718425366 
 V : 0.999999878865795 
MAE : 
 Alpha : 0.004992038705802635 
 Beta : 0.41231362960226553 
 V : 0.0012671845015793518 
MSE : 
 Alpha : 0.006279747629302335 
 Beta : 1.5091275852399044 
 V : 2.4716709748121086e-05 
Max AERR : 
 V     0.050640
α     2.114263
β    13.346970
dtype: float64
Waktu yang dibutuhkan :  1.8437037905057272
