In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


In [5]:
# Load Historical Dataset
historical_path = r"E:\OIL_InfySpringboard\Code\oil and gas.csv"
df_historical = pd.read_csv(historical_path)

# Load Competitor Dataset
competitor_path = r"E:\OIL_InfySpringboard\Code\competitor-dataset.csv"
df_competitor = pd.read_csv(competitor_path)

# Load Scraped Real-Time Prices
scraped_prices_path = r"E:\OIL_InfySpringboard\Code\scraped_oil_prices.csv"
df_scraped = pd.read_csv(scraped_prices_path)

# Merge datasets on Date & Symbol
df = pd.merge(df_historical, df_competitor, on=["Date", "Symbol"], how="left")
df = pd.merge(df, df_scraped, on=["Date", "Symbol"], how="left")


In [6]:
for col in ["Closing Price", "Selling Price", "Volume Sold"]:
    if col not in df.columns:
        df[col] = np.nan  # Assign NaN if missing

# Fill missing values
df.fillna(method="ffill", inplace=True)  # Forward-fill missing data
df.fillna(0, inplace=True)  # Replace remaining NaNs with 0

  df.fillna(method="ffill", inplace=True)  # Forward-fill missing data


In [7]:
features = ["Closing Price", "Selling Price", "Volume Sold"]
target = "Selling Price"

X = df[features]
y = df[target]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
xgb_model = XGBRegressor(n_estimators=100, random_state=42)
xgb_model.fit(X_train_scaled, y_train)

In [11]:
xgb_train_preds = xgb_model.predict(X_train_scaled).reshape(-1, 1)
xgb_test_preds = xgb_model.predict(X_test_scaled).reshape(-1, 1)

In [12]:
scaler_preds = StandardScaler()
xgb_train_preds_scaled = scaler_preds.fit_transform(xgb_train_preds)
xgb_test_preds_scaled = scaler_preds.transform(xgb_test_preds)

In [13]:
X_train_lstm = np.reshape(xgb_train_preds_scaled, (xgb_train_preds_scaled.shape[0], 1, 1))
X_test_lstm = np.reshape(xgb_test_preds_scaled, (xgb_test_preds_scaled.shape[0], 1, 1))

In [14]:
lstm_model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(1, 1)),  # LSTM Layer 1
    LSTM(50),  # LSTM Layer 2
    Dense(1)  # Output Layer
])

  super().__init__(**kwargs)


In [15]:
lstm_model.compile(optimizer='adam', loss='mse')

In [17]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

lstm_model.fit(X_train_lstm, y_train, epochs=100, batch_size=16, 
               validation_data=(X_test_lstm, y_test), callbacks=[early_stopping])


Epoch 1/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - loss: 3.3869 - val_loss: 0.5267
Epoch 2/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - loss: 0.3827 - val_loss: 0.2457
Epoch 3/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - loss: 0.2429 - val_loss: 0.2350
Epoch 4/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - loss: 0.2432 - val_loss: 0.2306
Epoch 5/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - loss: 0.2500 - val_loss: 0.2319
Epoch 6/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - loss: 0.2514 - val_loss: 0.2370
Epoch 7/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - loss: 0.2643 - val_loss: 0.3966
Epoch 8/100
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - loss: 0.2581 - val_loss: 0.2381
Epoch 9/

<keras.src.callbacks.history.History at 0x1ca4151c680>

In [18]:
lstm_model.fit(X_train_lstm, y_train, epochs=80, batch_size=16, validation_data=(X_test_lstm, y_test))

Epoch 1/80
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 5ms/step - loss: 0.2329 - val_loss: 0.2678
Epoch 2/80
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 5ms/step - loss: 0.2574 - val_loss: 0.2398
Epoch 3/80
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 6ms/step - loss: 0.2416 - val_loss: 0.2644
Epoch 4/80
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 6ms/step - loss: 0.2515 - val_loss: 0.2339
Epoch 5/80
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 6ms/step - loss: 0.2511 - val_loss: 0.2555
Epoch 6/80
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4ms/step - loss: 0.2462 - val_loss: 0.2936
Epoch 7/80
[1m4605/4605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - loss: 0.2518 - val_loss: 0.2576
Epoch 8/80
[1m3739/4605[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m2s[0m 3ms/step - loss: 0.2456

KeyboardInterrupt: 