In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, GridSearchCV


In [2]:
data=pd.read_csv(r'preprocessed_natural_gas_prices.csv')

In [3]:
# Features and target
x=data.drop(columns='Price', axis=1)
y=data['Price']

In [4]:
# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)

In [5]:
# Initialize models with default hyperparameters
dt_model = DecisionTreeRegressor()
svr_model = SVR()
rf_model = RandomForestRegressor()


In [6]:
# Train and evaluate Decision Tree Regressor
dt_model.fit(x_train, y_train)
y_pred_dt = dt_model.predict(x_test)
baseline_dt = mean_squared_error(y_test, y_pred_dt, squared=False)


In [7]:
# Train and evaluate SVR
svr_model.fit(x_train, y_train)
y_pred_svr = svr_model.predict(x_test)
baseline_svr = mean_squared_error(y_test, y_pred_svr, squared=False)


In [8]:
# Train and evaluate Random Forest Regressor
rf_model.fit(x_train, y_train)
y_pred_rf = rf_model.predict(x_test)
baseline_rf = mean_squared_error(y_test, y_pred_rf, squared=False)

In [9]:
print(f'Baseline RMSE for Decision Tree: {baseline_dt}')
print(f'Baseline RMSE for SVR: {baseline_svr}')
print(f'Baseline RMSE for Random Forest: {baseline_rf}')

Baseline RMSE for Decision Tree: 0.020540231956418322
Baseline RMSE for SVR: 0.12873558875775146
Baseline RMSE for Random Forest: 0.01611641473655759


In [10]:
# Perform hyperparameter tuning and evaluation
# Decision Tree Regressor
param_grid_dt = {
    'max_depth': [3, 5, 7, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 5]
}
grid_search_dt = GridSearchCV(DecisionTreeRegressor(), param_grid_dt, cv=5, scoring='neg_mean_squared_error')
grid_search_dt.fit(x_train, y_train)
y_pred_dt_opt = grid_search_dt.predict(x_test)
optimized_dt = mean_squared_error(y_test, y_pred_dt_opt, squared=False)


In [11]:
# SVR
param_grid_svr = {
    'C': [0.1, 1, 10, 100],
    'epsilon': [0.01, 0.1, 0.2],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}
grid_search_svr = GridSearchCV(SVR(), param_grid_svr, cv=5, scoring='neg_mean_squared_error')
grid_search_svr.fit(x_train, y_train)
y_pred_svr_opt = grid_search_svr.predict(x_test)
optimized_svr = mean_squared_error(y_test, y_pred_svr_opt, squared=False)

In [12]:
# Random Forest Regressor
param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
grid_search_rf = GridSearchCV(RandomForestRegressor(), param_grid_rf, cv=5, scoring='neg_mean_squared_error')
grid_search_rf.fit(x_train, y_train)
y_pred_rf_opt = grid_search_rf.predict(x_test)
optimized_rf = mean_squared_error(y_test, y_pred_rf_opt, squared=False)

In [13]:
print(f'Optimized RMSE for Decision Tree: {optimized_dt}')
print(f'Optimized RMSE for SVR: {optimized_svr}')
print(f'Optimized RMSE for Random Forest: {optimized_rf}')

Optimized RMSE for Decision Tree: 0.017249806930300513
Optimized RMSE for SVR: 0.10890416589873322
Optimized RMSE for Random Forest: 0.01622791715705109
