In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
import numpy as np

In [4]:
data=pd.read_csv(r'preprocessed_natural_gas_prices.csv')

In [5]:
data

Unnamed: 0,Price,Day,Month,Year,Price_lag1,Price_lag7,Price_rolling_mean7
0,0.209983,16,1,1997,0.188755,0.158921,0.172445
1,0.164085,17,1,1997,0.209983,0.157774,0.173346
2,0.126793,20,1,1997,0.164085,0.146873,0.170478
3,0.111302,21,1,1997,0.126793,0.164659,0.162856
4,0.114745,22,1,1997,0.111302,0.169248,0.155069
...,...,...,...,...,...,...,...
5941,0.084337,26,8,2020,0.085485,0.072863,0.081141
5942,0.084337,27,8,2020,0.084337,0.080321,0.081715
5943,0.080895,28,8,2020,0.084337,0.079174,0.081960
5944,0.071715,31,8,2020,0.080895,0.074584,0.081551


In [6]:
# Features and target
x=data.drop(columns='Price', axis=1)
y=data['Price']

In [7]:
x

Unnamed: 0,Day,Month,Year,Price_lag1,Price_lag7,Price_rolling_mean7
0,16,1,1997,0.188755,0.158921,0.172445
1,17,1,1997,0.209983,0.157774,0.173346
2,20,1,1997,0.164085,0.146873,0.170478
3,21,1,1997,0.126793,0.164659,0.162856
4,22,1,1997,0.111302,0.169248,0.155069
...,...,...,...,...,...,...
5941,26,8,2020,0.085485,0.072863,0.081141
5942,27,8,2020,0.084337,0.080321,0.081715
5943,28,8,2020,0.084337,0.079174,0.081960
5944,31,8,2020,0.080895,0.074584,0.081551


In [8]:
y

0       0.209983
1       0.164085
2       0.126793
3       0.111302
4       0.114745
          ...   
5941    0.084337
5942    0.084337
5943    0.080895
5944    0.071715
5945    0.067126
Name: Price, Length: 5946, dtype: float64

In [9]:
# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)

In [10]:
print(x.shape,x_train.shape,x_test.shape)

(5946, 6) (4756, 6) (1190, 6)


In [11]:
# Train a Decision Tree Regressor
model_dt = DecisionTreeRegressor(max_depth=5, min_samples_split=10, random_state=2)
model_dt.fit(x_train, y_train)

In [12]:
#predictions
y_pred_dt = model_dt.predict(x_test)

In [13]:
# Evaluating the model
mse_dt = mean_squared_error(y_test, y_pred_dt)
rmse_dt = np.sqrt(mse_dt)

In [14]:
rmse_dt

0.017540079972366295

In [15]:
from sklearn import metrics
print(metrics.r2_score(y_test, y_pred_dt))

0.9815322881744076


In [16]:
# Train a Random Forest Regressor
model_rf = RandomForestRegressor(n_estimators=100, random_state=2)
model_rf.fit(x_train, y_train)

In [17]:
y_pred_rf = model_rf.predict(x_test)

In [18]:
# Evaluate the model
mse_rf = mean_squared_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mse_rf)

In [19]:
rmse_rf

0.016233192522670137

In [20]:
print(metrics.r2_score(y_test, y_pred_rf))

0.9841817720586651


In [21]:
# Train a Support Vector Regressor
model_svm = SVR(kernel='rbf', C=100, gamma=0.1)
model_svm.fit(x_train, y_train)

In [22]:
# Make predictions
y_pred_svm = model_svm.predict(x_test)

In [23]:
# Evaluate the model
mse_svm = mean_squared_error(y_test, y_pred_svm)
rmse_svm = np.sqrt(mse_svm)

In [24]:
rmse_svm

0.06461344492463574

In [25]:
print(metrics.r2_score(y_test, y_pred_svm))

0.7493915307908854


In [26]:
from sklearn.linear_model import LinearRegression


In [27]:
# Initialize and train the Linear Regression model
lr_model = LinearRegression()
lr_model.fit(x_train, y_train)


In [28]:
# Make predictions on the test set
y_pred = lr_model.predict(x_test)


In [29]:
# Calculate evaluation metrics
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

In [30]:
print(metrics.r2_score(y_test, y_pred))

0.9871686856443018


In [31]:
rmse

0.014620445192057996

In [39]:
import pickle

model_path = r'C:\Users\vyshn\OneDrive\Desktop\machinelearning approach for predicting the price of natural gas.pkl'

# Save the model to the specified path
with open(model_path, 'wb') as f:
    pickle.dump(model_dt, f)

print(f"Model saved to {model_path}")


Model saved to C:\Users\vyshn\OneDrive\Desktop\machinelearning approach for predicting the price of natural gas.pkl
