In [None]:
#module imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#SKLEARN
from sklearn.model_selection import train_test_split

#
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
filepath = "./Nat_Gas.csv"
data = pd.read_csv(filepath)
print ('shape of data: ',data.shape)
data.head()

In [None]:
#plotting
headers = data.columns.tolist()

#plotting
#ax=data.plot(x=headers[0], y=[headers[1]], kind='line', title='Monthly price values')

ax=plt.plot(data[headers[0]], data[headers[1]], color='blue', linestyle='-', linewidth=1)
ax=plt.scatter(data[headers[0]], data[headers[1]], color='red', marker='o', s=50)

plt.xlabel('Dates')
plt.ylabel('Prices')
plt.title('Monthly price values')
plt.xticks(size=5, rotation=90)
plt.tight_layout()
plt.grid(True)
plt.show()

In [None]:
#Convert the date column to datetime and  Replace 'Dates' with the name of your date column that is 'Dates'
data[headers[0]] = pd.to_datetime(data[headers[0]])

# Step 2: Feature Engineering
data['year'] = data['Dates'].dt.year
data['month'] = data['Dates'].dt.month
data['day'] = data['Dates'].dt.day
data['day_of_year'] = data['Dates'].dt.dayofyear

data.head()

In [None]:
# Data : Features and Target
X=data.iloc[:,2:4]
Y=data.iloc[:,1]

In [None]:
#Train-Test split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=40)

In [None]:
# Fit with Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)

model.fit(X_train, y_train)

In [None]:
#Make predictions
y_pred = model.predict(X_test)

In [None]:
#Visualization
data['predicted'] = model.predict(X)  # Predictions on full dataset

plt.figure(figsize=(12, 6))
plt.plot(data[headers[0]], data[headers[1]], label='Actual', alpha=0.7)
plt.plot(data[headers[0]], data['predicted'], label='Predicted', alpha=0.7)
plt.title('Seasonal Pattern: Actual vs Predicted')
plt.xlabel('Dates')
plt.ylabel('Prices')
plt.legend()
plt.show()

In [None]:
#Evaluating the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.3f}")
print(f"R^2 Score: {r2:.3f}")

In [None]:
dates_input = input("Enter dates in YYYY-MM-DD format separated by commas: ")
date_in = pd.to_datetime(dates_input.split(","))
X_unseen = pd.DataFrame({'Dates': date_in})

#Feature Engineering
X_unseen['year'] = X_unseen['Dates'].dt.year
X_unseen['month'] = X_unseen['Dates'].dt.month
X_unseen['day'] = X_unseen['Dates'].dt.day
X_unseen['day_of_year'] = X_unseen['Dates'].dt.dayofyear


#Make predictions
X_new=X_unseen.iloc[:,1:3]
X_unseen['predicted'] = model.predict(X_new)  # Predictions on new dataset

print ('Price at ', dates_input,' is', X_unseen['predicted'].iloc[0])

In [None]:
d_injection = input("Enter injection date in YYYY-MM-DD format separated by -: ")
d_withdrawl = input("Enter withdrawl date in YYYY-MM-DD format separated by -: ")

d_both=[d_injection, d_withdrawl]

X_unseen = pd.DataFrame({'Dates': d_both})
#Convert the date column to datetime and  Replace 'Dates' with the name of your date column that is 'Dates'
X_unseen[headers[0]] = pd.to_datetime(X_unseen[headers[0]])


print (X_unseen.head())

#Feature Engineering
X_unseen['year'] = X_unseen['Dates'].dt.year
X_unseen['month'] = X_unseen['Dates'].dt.month
X_unseen['day'] = X_unseen['Dates'].dt.day
X_unseen['day_of_year'] = X_unseen['Dates'].dt.dayofyear


#Make predictions
X_new=X_unseen.iloc[:,1:]

#Visualization
X_unseen['predicted'] = model.predict(X_new)  # Predictions on new dataset

price_injection = X_unseen['predicted'].iloc[0]
price_withdrawl = X_unseen['predicted'].iloc[1]

print ('Price at ', d_both,' is', X_unseen['predicted'])

max_volume = input("Enter maximum volume of natural gas that can be stored in million MMBtu: ")
rate_movement = input("Enter injection/withdrawl date in terms of dollars per 1 million MMBtu: ")
storage_cost = input("Enter storage cost of the natural gas per month: ")


contract_price = price_withdrawl - ( price_injection + rate_movement * max_volume + storage_cost * np.abs(X_unseen['month'].iloc[0] - X_unseen['month'].iloc[0]) )
print ('The contract price is: ', contract_price)
