In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


In [2]:
# Load Historical Dataset
historical_path = r"E:\OIL_InfySpringboard\Code\oil and gas.csv"
df_historical = pd.read_csv(historical_path)

# Load Competitor Dataset
competitor_path = r"E:\OIL_InfySpringboard\Code\competitor-dataset.csv"
df_competitor = pd.read_csv(competitor_path)

# Load Scraped Real-Time Prices
scraped_prices_path = r"E:\OIL_InfySpringboard\Code\scraped_oil_prices.csv"
df_scraped = pd.read_csv(scraped_prices_path)

# Merge datasets on Date & Symbol
df = pd.merge(df_historical, df_competitor, on=["Date", "Symbol"], how="left")
df = pd.merge(df, df_scraped, on=["Date", "Symbol"], how="left")


In [3]:
for col in ["Closing Price", "Selling Price", "Volume Sold"]:
    if col not in df.columns:
        df[col] = np.nan  # Assign NaN if missing

# Fill missing values
df.fillna(method="ffill", inplace=True)  # Forward-fill missing data
df.fillna(0, inplace=True)  # Replace remaining NaNs with 0

  df.fillna(method="ffill", inplace=True)  # Forward-fill missing data


In [4]:
features = ["Closing Price", "Selling Price", "Volume Sold"]
target = "Selling Price"

X = df[features]
y = df[target]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
xgb_model = XGBRegressor(n_estimators=100, random_state=42)
xgb_model.fit(X_train_scaled, y_train)

In [8]:
xgb_train_preds = xgb_model.predict(X_train_scaled).reshape(-1, 1)
xgb_test_preds = xgb_model.predict(X_test_scaled).reshape(-1, 1)

In [9]:
scaler_preds = StandardScaler()
xgb_train_preds_scaled = scaler_preds.fit_transform(xgb_train_preds)
xgb_test_preds_scaled = scaler_preds.transform(xgb_test_preds)

In [10]:
X_train_lstm = np.reshape(xgb_train_preds_scaled, (xgb_train_preds_scaled.shape[0], 1, 1))
X_test_lstm = np.reshape(xgb_test_preds_scaled, (xgb_test_preds_scaled.shape[0], 1, 1))

In [11]:
lstm_model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(1, 1)),  # LSTM Layer 1
    LSTM(50),  # LSTM Layer 2
    Dense(1)  # Output Layer
])

  super().__init__(**kwargs)


In [12]:
lstm_model.compile(optimizer='adam', loss='mse')

In [13]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

lstm_model.fit(X_train_lstm, y_train, epochs=100, batch_size=16, 
               validation_data=(X_test_lstm, y_test), callbacks=[early_stopping])


Epoch 1/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 8ms/step - loss: 1191.1157 - val_loss: 36.3906
Epoch 2/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 10ms/step - loss: 16.4818 - val_loss: 1.3704
Epoch 3/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 10ms/step - loss: 0.8170 - val_loss: 0.2656
Epoch 4/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 9ms/step - loss: 0.2583 - val_loss: 0.2585
Epoch 5/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 9ms/step - loss: 0.2577 - val_loss: 0.2254
Epoch 6/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 8ms/step - loss: 0.2661 - val_loss: 0.2563
Epoch 7/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 9ms/step - loss: 0.2710 - val_loss: 0.2305
Epoch 8/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 8ms/step - loss: 0.2297 - val_loss: 0.2666
E

<keras.src.callbacks.history.History at 0x26fb1116960>

In [14]:
lstm_preds = lstm_model.predict(X_test_lstm)

[1m576/576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [15]:
mae = mean_absolute_error(y_test, lstm_preds)
r2 = r2_score(y_test, lstm_preds)

print(f"✅ LSTM Model - MAE: {mae:.2f}, R² Score: {r2:.2f}")

✅ LSTM Model - MAE: 0.18, R² Score: 1.00


In [17]:
# Save the entire model
lstm_model.save("lstm_oil_price_model.h5")

# (Optional) Save only the model weights
lstm_model.save_weights("lstm_oil_price.weights.h5")




In [18]:
import joblib

# Save the scaler (example if you used MinMaxScaler)
joblib.dump(scaler, "scaler.pkl")


['scaler.pkl']

In [19]:
import joblib

# Assuming xgb_model is your trained XGBoost model
joblib.dump(xgb_model, "xgb_oil_price_model.pkl")
print("✅ XGBoost model saved successfully!")


✅ XGBoost model saved successfully!
