In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV 
from sklearn.metrics import mean_squared_error, mean_absolute_error
import xgboost as xgb

df =  pd.read_csv(r'C:\Users\kian3\Consulting\ORTEC\data\Basic_random= 1000,100.csv') # Adjust the path once data has been generated with all policies
df =  df.drop_duplicates(subset=['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'cost'])
print(df)

# Splitting the data into 80% training and 20% testing
X = df[['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14']]
y = df['cost']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

train_data = xgb.DMatrix(X_train, label=y_train)
test_data = xgb.DMatrix(X_test, y_test)

# Define parameter space for RandomizedSearchCV
param_dist = {
    'max_depth': [5, 7, 9],  
    'learning_rate': [0.01, 0.05, 0.1],  
    'n_estimators': [500, 1000, 1500],  
    'subsample': [0.7, 0.8, 0.9],  
    'colsample_bytree': [0.7, 0.8, 0.9]
}

# Initialize XGBoost model and perform RandomizedSearchCV (can also use GridSearchCV)
xgb_model = xgb.XGBRegressor(objective="reg:squarederror", random_state=42)

random_search = RandomizedSearchCV(xgb_model, param_dist, n_iter=10, 
                                   scoring='neg_mean_absolute_error', cv=3, 
                                   verbose=1, n_jobs=-1, random_state=42)
random_search.fit(X_train, y_train)

best_xgb = random_search.best_estimator_
print("Best Parameters:", random_search.best_params_)



       policy_id  demand_pattern     C1     C2     C3     C4     C5    C6  \
0            0.0             0.0  210.0  270.0  229.0  248.0  267.0  53.0   
1            1.0             0.0  212.0  275.0  235.0  258.0  271.0  54.0   
2            2.0             0.0  230.0  294.0  247.0  267.0  267.0  56.0   
3            3.0             0.0  220.0  284.0  232.0  267.0  286.0  55.0   
4            4.0             0.0  221.0  272.0  250.0  256.0  280.0  55.0   
...          ...             ...    ...    ...    ...    ...    ...   ...   
99995      995.0            99.0  219.0  286.0  236.0  255.0  283.0  56.0   
99996      996.0            99.0  226.0  270.0  243.0  248.0  291.0  57.0   
99997      997.0            99.0  211.0  275.0  232.0  257.0  287.0  56.0   
99998      998.0            99.0  230.0  275.0  247.0  256.0  289.0  53.0   
99999      999.0            99.0  214.0  287.0  231.0  269.0  285.0  57.0   

         C7    C8    C9   C10    C11   C12   C13   C14      cost  
0      3

In [None]:
# xgb_model_ = xgb.XGBRegressor(
    objective="reg:squarederror",
    subsample=0.7,
    n_estimators=1500,
    max_depth=5,
    learning_rate=0.01,
    colsample_bytree=0.7,
    random_state=42
)
# xgb_model_.fit(X_train, y_train, verbose=True)

[0]	validation_0-rmse:142.01160
[1]	validation_0-rmse:142.01061
[2]	validation_0-rmse:142.00951
[3]	validation_0-rmse:142.00914
[4]	validation_0-rmse:142.00854
[5]	validation_0-rmse:142.00849
[6]	validation_0-rmse:142.00781
[7]	validation_0-rmse:142.00709
[8]	validation_0-rmse:142.00718
[9]	validation_0-rmse:142.00671
[10]	validation_0-rmse:142.00633
[11]	validation_0-rmse:142.00626
[12]	validation_0-rmse:142.00607
[13]	validation_0-rmse:142.00628
[14]	validation_0-rmse:142.00681
[15]	validation_0-rmse:142.00602
[16]	validation_0-rmse:142.00589
[17]	validation_0-rmse:142.00521
[18]	validation_0-rmse:142.00503
[19]	validation_0-rmse:142.00472
[20]	validation_0-rmse:142.00426
[21]	validation_0-rmse:142.00441
[22]	validation_0-rmse:142.00462
[23]	validation_0-rmse:142.00448
[24]	validation_0-rmse:142.00523
[25]	validation_0-rmse:142.00472
[26]	validation_0-rmse:142.00545
[27]	validation_0-rmse:142.00572
[28]	validation_0-rmse:142.00570
[29]	validation_0-rmse:142.00596
[30]	validation_0-rm

In [26]:

# Evaluate the model

y_test_preds = best_xgb.predict(X_test)
mae = mean_absolute_error(y_test, y_test_preds)
print(f"Mean Absolute Error (MAE): {mae}")
mse = mean_squared_error(y_test, y_test_preds)
print(f"Mean Squared Error (MSE): {mse}")

Mean Absolute Error (MAE): 116.55222245012207
Mean Squared Error (MSE): 20427.918871575974
