In [2]:
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split

In [3]:
import requests
import pandas as pd

columns = ['_id', 'daysToExpiry', 'expiry', 'index', 'close', 'date', 'high',
       'index_close', 'index_high', 'index_low', 'index_open',
       'intradayMovement', 'intradayTotal', 'low', 'open', 'overnightExpected',
       'overnightGap']


dte = [0,1,2,3,4]

def get_for_index(ind):
  url = f"https://live.markethound.in/api/history/expiries?index={ind}"
  response = requests.get(url)
  expiries = response.json()["result"]
  return expiries

def get_for_index_dte_list(ind,expiry_list):
  combined_df_for_index = pd.DataFrame(columns=columns)
  for expiry in expiry_list:
    for i in range(6):
      url = f"https://live.markethound.in/api/history/decay?name={ind}&expiry={expiry}&dte={str(i)}"
      response = requests.get(url)
      if response.status_code != 200:
        print("error")
      data = response.json()
      result = data.get("result", [])
      df = pd.DataFrame(result)
      combined_df_for_index = pd.concat([combined_df_for_index, df], axis=0, ignore_index=True)
  return combined_df_for_index

def get_data():
  combined_df = pd.DataFrame(columns=columns)
  indices = ["NIFTY", "SENSEX", "FINNIFTY","BANKEX"]
  for ind in indices:
    expiry_list = get_for_index(ind)
    df = get_for_index_dte_list(ind,expiry_list)
    combined_df = pd.concat([combined_df, df], axis=0, ignore_index=True)

  return combined_df


df_full = get_data()
df_full.info()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3528 entries, 0 to 3527
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   _id                3528 non-null   object
 1   daysToExpiry       3528 non-null   object
 2   expiry             3528 non-null   object
 3   index              3528 non-null   object
 4   close              3528 non-null   object
 5   date               3528 non-null   object
 6   high               3528 non-null   object
 7   index_close        3528 non-null   object
 8   index_high         3528 non-null   object
 9   index_low          3528 non-null   object
 10  index_open         3528 non-null   object
 11  intradayMovement   3528 non-null   object
 12  intradayTotal      3528 non-null   object
 13  low                3528 non-null   object
 14  open               3528 non-null   object
 15  overnightExpected  3528 non-null   object
 16  overnightGap       3528 non-null   object


In [4]:
columns_to_convert = [ 'daysToExpiry', 'close', 'date', 'high',
       'index_close', 'index_high', 'index_low', 'index_open',
       'intradayMovement', 'intradayTotal', 'low', 'open', 'overnightExpected',
       'overnightGap']  # List of columns to convert

# Converting selected columns to numeric
df_full[columns_to_convert] = df_full[columns_to_convert].apply(pd.to_numeric, errors='coerce')


In [5]:
df_full.info()
df_full_sorted = df_full.sort_values(by=['expiry', 'daysToExpiry'], ascending=[True, False])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3528 entries, 0 to 3527
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   _id                3528 non-null   object 
 1   daysToExpiry       3528 non-null   int64  
 2   expiry             3528 non-null   object 
 3   index              3528 non-null   object 
 4   close              3528 non-null   float64
 5   date               0 non-null      float64
 6   high               3528 non-null   float64
 7   index_close        3528 non-null   float64
 8   index_high         3528 non-null   float64
 9   index_low          3528 non-null   float64
 10  index_open         3528 non-null   float64
 11  intradayMovement   3528 non-null   float64
 12  intradayTotal      3528 non-null   float64
 13  low                3528 non-null   float64
 14  open               3528 non-null   float64
 15  overnightExpected  3528 non-null   float64
 16  overnightGap       3528 

In [6]:
def create_features_and_targets(df):
    features = df[['expiry','daysToExpiry', 'intradayMovement', 'intradayTotal', 'overnightExpected', 'overnightGap',
                   'index_close', 'index_high', 'index_low', 'index_open']]

    features = features[df['daysToExpiry'].isin([1, 2, 3, 4, 5])]
    next_day_expiry_values = features['daysToExpiry'] - 1

    filtered_rows = df[(df['expiry'].isin(features['expiry'])) & (df['daysToExpiry'].isin(next_day_expiry_values))]


    targets = pd.DataFrame(columns=['high', 'low', 'open', 'close'])


    if not filtered_rows.empty:
        targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])
        # Filling the missing rows with zeros
        targets = targets.reindex(index=features.index, fill_value=0)
    else:
        # If no rows satisfy the condition, fill targets with zeros
        targets = pd.DataFrame(np.zeros((features.shape[0], 4)), columns=['high', 'low', 'open', 'close'])


    features = features.drop('expiry', axis=1)

    return features, targets


def prepare_data(df):
    # Scale the features
    scaler = MinMaxScaler()
    scaled_features = scaler.fit_transform(df)

    # Reshape data into LSTM input shape [samples, time steps, features]
    X = np.reshape(scaled_features, (scaled_features.shape[0], 1, scaled_features.shape[1]))
    return X

# Defining an LSTM model
def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(units=50, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(units=50, return_sequences=False),
        Dropout(0.2),
        Dense(units=4)  # Output layer for 4 target variables (open, high, low, close)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model


# Iterate over indices and train models
indices = ["NIFTY", "SENSEX", "FINNIFTY", "BANKEX"]

lstm_models = {}

for index in indices:
    index_data = df_full[df_full["index"] == index]
    train_data, test_data = train_test_split(index_data, test_size=0.2, random_state=42)
    train_features, train_targets = create_features_and_targets(train_data)
    test_features, test_targets = create_features_and_targets(test_data)

    # Scale features
    scaler = MinMaxScaler()
    train_features_scaled = scaler.fit_transform(train_features)
    test_features_scaled = scaler.transform(test_features)

    # Converting data to float32 and reshape for LSTM
    X_train = np.asarray(train_features_scaled).astype(np.float32)
    y_train = np.asarray(train_targets).astype(np.float32)
    X_test = np.asarray(test_features_scaled).astype(np.float32)
    y_test = np.asarray(test_targets).astype(np.float32)


    X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
    X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])


    lstm_model = build_lstm_model(input_shape=(X_train.shape[1], X_train.shape[2]))
    lstm_model.fit(X_train, y_train, epochs=250, batch_size=32, verbose=2, validation_split=0.1)

    # Evaluating the model
    mse = lstm_model.evaluate(X_test, y_test)
    print(f"Index: {index}, Test MSE: {mse}")

    lstm_models[index] = lstm_model




  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])
  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])


Epoch 1/250
19/19 - 6s - loss: 30488.3125 - val_loss: 30758.5020 - 6s/epoch - 334ms/step
Epoch 2/250
19/19 - 0s - loss: 30448.2168 - val_loss: 30690.8164 - 122ms/epoch - 6ms/step
Epoch 3/250
19/19 - 0s - loss: 30326.2266 - val_loss: 30485.6465 - 126ms/epoch - 7ms/step
Epoch 4/250
19/19 - 0s - loss: 30011.7383 - val_loss: 30028.9980 - 129ms/epoch - 7ms/step
Epoch 5/250
19/19 - 0s - loss: 29455.9902 - val_loss: 29405.5059 - 101ms/epoch - 5ms/step
Epoch 6/250
19/19 - 0s - loss: 28827.2422 - val_loss: 28808.1680 - 133ms/epoch - 7ms/step
Epoch 7/250
19/19 - 0s - loss: 28265.9766 - val_loss: 28319.1855 - 117ms/epoch - 6ms/step
Epoch 8/250
19/19 - 0s - loss: 27800.1035 - val_loss: 27914.2480 - 109ms/epoch - 6ms/step
Epoch 9/250
19/19 - 0s - loss: 27428.4062 - val_loss: 27565.6426 - 131ms/epoch - 7ms/step
Epoch 10/250
19/19 - 0s - loss: 27104.1621 - val_loss: 27258.0859 - 115ms/epoch - 6ms/step
Epoch 11/250
19/19 - 0s - loss: 26818.1230 - val_loss: 26969.0664 - 116ms/epoch - 6ms/step
Epoch 12/

  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])
  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])


Epoch 1/250
19/19 - 5s - loss: 384662.5312 - val_loss: 381584.5000 - 5s/epoch - 257ms/step
Epoch 2/250
19/19 - 0s - loss: 384480.8750 - val_loss: 381267.5938 - 116ms/epoch - 6ms/step
Epoch 3/250
19/19 - 0s - loss: 383922.9375 - val_loss: 380312.9688 - 125ms/epoch - 7ms/step
Epoch 4/250
19/19 - 0s - loss: 382450.0000 - val_loss: 378303.6562 - 119ms/epoch - 6ms/step
Epoch 5/250
19/19 - 0s - loss: 380085.4375 - val_loss: 375771.5312 - 115ms/epoch - 6ms/step
Epoch 6/250
19/19 - 0s - loss: 377655.6250 - val_loss: 373592.8438 - 110ms/epoch - 6ms/step
Epoch 7/250
19/19 - 0s - loss: 375592.1250 - val_loss: 371875.9375 - 105ms/epoch - 6ms/step
Epoch 8/250
19/19 - 0s - loss: 374032.0312 - val_loss: 370477.0312 - 123ms/epoch - 6ms/step
Epoch 9/250
19/19 - 0s - loss: 372716.7812 - val_loss: 369258.5312 - 121ms/epoch - 6ms/step
Epoch 10/250
19/19 - 0s - loss: 371515.1875 - val_loss: 368154.7188 - 109ms/epoch - 6ms/step
Epoch 11/250
19/19 - 0s - loss: 370250.3750 - val_loss: 367129.3438 - 123ms/epoc

  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])
  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])


Epoch 1/250
18/18 - 6s - loss: 42941.5625 - val_loss: 40444.6172 - 6s/epoch - 308ms/step
Epoch 2/250
18/18 - 0s - loss: 42894.9453 - val_loss: 40374.5703 - 89ms/epoch - 5ms/step
Epoch 3/250
18/18 - 0s - loss: 42756.1211 - val_loss: 40166.7812 - 85ms/epoch - 5ms/step
Epoch 4/250
18/18 - 0s - loss: 42382.8320 - val_loss: 39667.3438 - 85ms/epoch - 5ms/step
Epoch 5/250
18/18 - 0s - loss: 41711.7148 - val_loss: 38914.8672 - 117ms/epoch - 7ms/step
Epoch 6/250
18/18 - 0s - loss: 40886.9297 - val_loss: 38175.1836 - 87ms/epoch - 5ms/step
Epoch 7/250
18/18 - 0s - loss: 40171.6602 - val_loss: 37581.5000 - 95ms/epoch - 5ms/step
Epoch 8/250
18/18 - 0s - loss: 39606.5156 - val_loss: 37112.9023 - 110ms/epoch - 6ms/step
Epoch 9/250
18/18 - 0s - loss: 39129.8633 - val_loss: 36715.1250 - 108ms/epoch - 6ms/step
Epoch 10/250
18/18 - 0s - loss: 38777.9727 - val_loss: 36371.0898 - 92ms/epoch - 5ms/step
Epoch 11/250
18/18 - 0s - loss: 38440.0430 - val_loss: 36054.2734 - 103ms/epoch - 6ms/step
Epoch 12/250
18

  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])
  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])


Epoch 1/250
12/12 - 5s - loss: 415475.1562 - val_loss: 419831.3125 - 5s/epoch - 399ms/step
Epoch 2/250
12/12 - 0s - loss: 415406.4062 - val_loss: 419739.5000 - 88ms/epoch - 7ms/step
Epoch 3/250
12/12 - 0s - loss: 415285.3438 - val_loss: 419548.7500 - 84ms/epoch - 7ms/step
Epoch 4/250
12/12 - 0s - loss: 415014.1875 - val_loss: 419141.6562 - 88ms/epoch - 7ms/step
Epoch 5/250
12/12 - 0s - loss: 414434.9375 - val_loss: 418342.7188 - 81ms/epoch - 7ms/step
Epoch 6/250
12/12 - 0s - loss: 413457.3438 - val_loss: 417066.5000 - 80ms/epoch - 7ms/step
Epoch 7/250
12/12 - 0s - loss: 412046.2188 - val_loss: 415466.3750 - 122ms/epoch - 10ms/step
Epoch 8/250
12/12 - 0s - loss: 410458.7500 - val_loss: 413846.0000 - 134ms/epoch - 11ms/step
Epoch 9/250
12/12 - 0s - loss: 408983.1875 - val_loss: 412352.8125 - 141ms/epoch - 12ms/step
Epoch 10/250
12/12 - 0s - loss: 407537.2188 - val_loss: 411051.6875 - 128ms/epoch - 11ms/step
Epoch 11/250
12/12 - 0s - loss: 406262.1875 - val_loss: 409926.1875 - 132ms/epoch

In [7]:
def create_features(df):
    features = df[['daysToExpiry', 'intradayMovement', 'intradayTotal', 'overnightExpected', 'overnightGap',
                   'index_close', 'index_high', 'index_low', 'index_open']]
    return features

def predict_next_day_data(df, model):
    features = create_features(df)
    features = prepare_data(features)
    predictions = model.predict(features)

    return predictions

data = {
    'date': ['2024-03-14T00:00:00.000Z'],
    'daysToExpiry': [2],
    'intradayMovement': [220.70],
    'intradayTotal': [233.15],
    'overnightExpected': [195.25],
    'overnightGap': [198.55],
    'index_close': [22.15],
    'index_high': [10.90],
    'index_low': [22341.65],
    'index_open': [22450.70]
}

df = pd.DataFrame(data)
df['date'] = pd.to_datetime(df['date'])

features = create_features(df)

X = prepare_data(features)

predicted_prices = lstm_model.predict(X)

print(predicted_prices)

[[109.845406 108.32008  109.56341  108.775246]]
