In [4]:
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split

In [3]:
import requests
import pandas as pd

columns = ['_id', 'daysToExpiry', 'expiry', 'index', 'close', 'date', 'high',
       'index_close', 'index_high', 'index_low', 'index_open',
       'intradayMovement', 'intradayTotal', 'low', 'open', 'overnightExpected',
       'overnightGap']


dte = [0,1,2,3,4]

def get_for_index(ind):
  url = f"https://live.markethound.in/api/history/expiries?index={ind}"
  response = requests.get(url)
  expiries = response.json()["result"]
  return expiries

def get_for_index_dte_list(ind,expiry_list):
  combined_df_for_index = pd.DataFrame(columns=columns)
  for expiry in expiry_list:
    for i in range(6):
      url = f"https://live.markethound.in/api/history/decay?name={ind}&expiry={expiry}&dte={str(i)}"
      response = requests.get(url)
      if response.status_code != 200:
        print("error")
      data = response.json()
      result = data.get("result", [])
      df = pd.DataFrame(result)
      combined_df_for_index = pd.concat([combined_df_for_index, df], axis=0, ignore_index=True)
  return combined_df_for_index

def get_data():
  combined_df = pd.DataFrame(columns=columns)
  indices = ["NIFTY", "SENSEX", "FINNIFTY","BANKEX"]
  for ind in indices:
    expiry_list = get_for_index(ind)
    df = get_for_index_dte_list(ind,expiry_list)
    combined_df = pd.concat([combined_df, df], axis=0, ignore_index=True)

  return combined_df


df_full = get_data()
df_full.info()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3528 entries, 0 to 3527
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   _id                3528 non-null   object
 1   daysToExpiry       3528 non-null   object
 2   expiry             3528 non-null   object
 3   index              3528 non-null   object
 4   close              3528 non-null   object
 5   date               3528 non-null   object
 6   high               3528 non-null   object
 7   index_close        3528 non-null   object
 8   index_high         3528 non-null   object
 9   index_low          3528 non-null   object
 10  index_open         3528 non-null   object
 11  intradayMovement   3528 non-null   object
 12  intradayTotal      3528 non-null   object
 13  low                3528 non-null   object
 14  open               3528 non-null   object
 15  overnightExpected  3528 non-null   object
 16  overnightGap       3528 non-null   object


In [5]:
columns_to_convert = [ 'daysToExpiry', 'close', 'date', 'high',
       'index_close', 'index_high', 'index_low', 'index_open',
       'intradayMovement', 'intradayTotal', 'low', 'open', 'overnightExpected',
       'overnightGap']  # List of columns to convert

# Convert selected columns to numeric
df_full[columns_to_convert] = df_full[columns_to_convert].apply(pd.to_numeric, errors='coerce')


In [26]:
df_full.info()
df_full_sorted = df_full.sort_values(by=['expiry', 'daysToExpiry'], ascending=[True, False])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3528 entries, 0 to 3527
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   _id                3528 non-null   object 
 1   daysToExpiry       3528 non-null   int64  
 2   expiry             3528 non-null   object 
 3   index              3528 non-null   object 
 4   close              3528 non-null   float64
 5   date               0 non-null      float64
 6   high               3528 non-null   float64
 7   index_close        3528 non-null   float64
 8   index_high         3528 non-null   float64
 9   index_low          3528 non-null   float64
 10  index_open         3528 non-null   float64
 11  intradayMovement   3528 non-null   float64
 12  intradayTotal      3528 non-null   float64
 13  low                3528 non-null   float64
 14  open               3528 non-null   float64
 15  overnightExpected  3528 non-null   float64
 16  overnightGap       3528 

In [27]:
def create_features_and_targets(df):
    # Create features
    features = df[['expiry','daysToExpiry', 'intradayMovement', 'intradayTotal', 'overnightExpected', 'overnightGap',
                   'index_close', 'index_high', 'index_low', 'index_open']]

    features = features[df['daysToExpiry'].isin([1, 2, 3, 4, 5])]
    next_day_expiry_values = features['daysToExpiry'] - 1

    # Filter rows that satisfy the condition
    filtered_rows = df[(df['expiry'].isin(features['expiry'])) & (df['daysToExpiry'].isin(next_day_expiry_values))]

    # Create an empty DataFrame with the same structure as features
    targets = pd.DataFrame(columns=['high', 'low', 'open', 'close'])

    # Fill in the rows that match the condition
    if not filtered_rows.empty:
        targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])
        # Fill the missing rows with zeros
        targets = targets.reindex(index=features.index, fill_value=0)
    else:
        # If no rows satisfy the condition, fill targets with zeros
        targets = pd.DataFrame(np.zeros((features.shape[0], 4)), columns=['high', 'low', 'open', 'close'])

    # Drop 'expiry' column from features if needed
    features = features.drop('expiry', axis=1)

    return features, targets

# Function to prepare data for LSTM model
def prepare_data(df):
    # Scale the features
    scaler = MinMaxScaler()
    scaled_features = scaler.fit_transform(df)

    # Reshape data into LSTM input shape [samples, time steps, features]
    X = np.reshape(scaled_features, (scaled_features.shape[0], 1, scaled_features.shape[1]))
    return X

# Defining an LSTM model
def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(units=50, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(units=50, return_sequences=False),
        Dropout(0.2),
        Dense(units=4)  # Output layer for 4 target variables (open, high, low, close)
    ])
    model.compile(optimizer='adam', loss='mse')  # Use Mean Squared Error loss
    return model


# Iterate over indices and train models
indices = ["NIFTY", "SENSEX", "FINNIFTY", "BANKEX"]
# Iterate over indices and train models
lstm_models = {}

for index in indices:
    index_data = df_full[df_full["index"] == index]
    train_data, test_data = train_test_split(index_data, test_size=0.2, random_state=42)
    train_features, train_targets = create_features_and_targets(train_data)
    test_features, test_targets = create_features_and_targets(test_data)

    # Scale features
    scaler = MinMaxScaler()
    train_features_scaled = scaler.fit_transform(train_features)
    test_features_scaled = scaler.transform(test_features)

    # Convert data to float32 and reshape for LSTM
    X_train = np.asarray(train_features_scaled).astype(np.float32)
    y_train = np.asarray(train_targets).astype(np.float32)
    X_test = np.asarray(test_features_scaled).astype(np.float32)
    y_test = np.asarray(test_targets).astype(np.float32)

    # Reshape data for LSTM (assuming input_shape is (timesteps, features))
    X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
    X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

    # Build and train the LSTM model
    lstm_model = build_lstm_model(input_shape=(X_train.shape[1], X_train.shape[2]))
    lstm_model.fit(X_train, y_train, epochs=250, batch_size=32, verbose=2, validation_split=0.1)

    # Evaluate the model
    mse = lstm_model.evaluate(X_test, y_test)
    print(f"Index: {index}, Test MSE: {mse}")

    lstm_models[index] = lstm_model


# Make predictions for the next day's stock prices for each index


  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])
  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])


Epoch 1/250
19/19 - 5s - loss: 30488.2812 - val_loss: 30756.0859 - 5s/epoch - 244ms/step
Epoch 2/250
19/19 - 0s - loss: 30441.8906 - val_loss: 30681.8691 - 94ms/epoch - 5ms/step
Epoch 3/250
19/19 - 0s - loss: 30313.5625 - val_loss: 30460.5195 - 89ms/epoch - 5ms/step
Epoch 4/250
19/19 - 0s - loss: 29968.0957 - val_loss: 29968.2012 - 93ms/epoch - 5ms/step
Epoch 5/250
19/19 - 0s - loss: 29367.3574 - val_loss: 29291.2051 - 95ms/epoch - 5ms/step
Epoch 6/250
19/19 - 0s - loss: 28671.0449 - val_loss: 28651.8438 - 108ms/epoch - 6ms/step
Epoch 7/250
19/19 - 0s - loss: 28119.6348 - val_loss: 28133.1328 - 99ms/epoch - 5ms/step
Epoch 8/250
19/19 - 0s - loss: 27620.4883 - val_loss: 27719.1895 - 111ms/epoch - 6ms/step
Epoch 9/250
19/19 - 0s - loss: 27254.4648 - val_loss: 27361.8398 - 107ms/epoch - 6ms/step
Epoch 10/250
19/19 - 0s - loss: 26864.8535 - val_loss: 27050.5488 - 114ms/epoch - 6ms/step
Epoch 11/250
19/19 - 0s - loss: 26577.5625 - val_loss: 26762.1543 - 98ms/epoch - 5ms/step
Epoch 12/250
19

  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])
  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])


Epoch 1/250
19/19 - 5s - loss: 384670.3750 - val_loss: 381595.6250 - 5s/epoch - 276ms/step
Epoch 2/250
19/19 - 0s - loss: 384500.3438 - val_loss: 381309.1250 - 107ms/epoch - 6ms/step
Epoch 3/250
19/19 - 0s - loss: 383994.0625 - val_loss: 380431.2188 - 103ms/epoch - 5ms/step
Epoch 4/250
19/19 - 0s - loss: 382617.6562 - val_loss: 378527.1875 - 97ms/epoch - 5ms/step
Epoch 5/250
19/19 - 0s - loss: 380363.6250 - val_loss: 376128.9375 - 99ms/epoch - 5ms/step
Epoch 6/250
19/19 - 0s - loss: 377950.4688 - val_loss: 373993.6562 - 111ms/epoch - 6ms/step
Epoch 7/250
19/19 - 0s - loss: 376004.4062 - val_loss: 372299.4062 - 98ms/epoch - 5ms/step
Epoch 8/250
19/19 - 0s - loss: 374504.0312 - val_loss: 370925.2812 - 102ms/epoch - 5ms/step
Epoch 9/250
19/19 - 0s - loss: 373104.7500 - val_loss: 369716.2500 - 100ms/epoch - 5ms/step
Epoch 10/250
19/19 - 0s - loss: 371918.4688 - val_loss: 368611.6250 - 115ms/epoch - 6ms/step
Epoch 11/250
19/19 - 0s - loss: 370817.0625 - val_loss: 367589.8125 - 92ms/epoch - 

  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])
  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])


Epoch 1/250
18/18 - 4s - loss: 42939.2656 - val_loss: 40441.6914 - 4s/epoch - 228ms/step
Epoch 2/250
18/18 - 0s - loss: 42888.4648 - val_loss: 40364.9961 - 125ms/epoch - 7ms/step
Epoch 3/250
18/18 - 0s - loss: 42741.0898 - val_loss: 40144.1406 - 122ms/epoch - 7ms/step
Epoch 4/250
18/18 - 0s - loss: 42372.7852 - val_loss: 39643.4805 - 139ms/epoch - 8ms/step
Epoch 5/250
18/18 - 0s - loss: 41685.8672 - val_loss: 38934.1523 - 123ms/epoch - 7ms/step
Epoch 6/250
18/18 - 0s - loss: 40925.0781 - val_loss: 38228.2852 - 145ms/epoch - 8ms/step
Epoch 7/250
18/18 - 0s - loss: 40259.6055 - val_loss: 37658.1836 - 120ms/epoch - 7ms/step
Epoch 8/250
18/18 - 0s - loss: 39709.5469 - val_loss: 37204.4922 - 117ms/epoch - 7ms/step
Epoch 9/250
18/18 - 0s - loss: 39285.2461 - val_loss: 36821.3633 - 127ms/epoch - 7ms/step
Epoch 10/250
18/18 - 0s - loss: 38894.9297 - val_loss: 36474.1680 - 142ms/epoch - 8ms/step
Epoch 11/250
18/18 - 0s - loss: 38555.9688 - val_loss: 36164.0547 - 150ms/epoch - 8ms/step
Epoch 12/

  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])
  targets = targets.append(filtered_rows[['high', 'low', 'open', 'close']])


Epoch 1/250
12/12 - 6s - loss: 415477.9375 - val_loss: 419836.1562 - 6s/epoch - 480ms/step
Epoch 2/250
12/12 - 0s - loss: 415412.2812 - val_loss: 419750.3438 - 74ms/epoch - 6ms/step
Epoch 3/250
12/12 - 0s - loss: 415300.3125 - val_loss: 419575.3125 - 92ms/epoch - 8ms/step
Epoch 4/250
12/12 - 0s - loss: 415044.0938 - val_loss: 419193.7812 - 94ms/epoch - 8ms/step
Epoch 5/250
12/12 - 0s - loss: 414520.0312 - val_loss: 418426.9688 - 89ms/epoch - 7ms/step
Epoch 6/250
12/12 - 0s - loss: 413554.3125 - val_loss: 417165.8750 - 79ms/epoch - 7ms/step
Epoch 7/250
12/12 - 0s - loss: 412110.6562 - val_loss: 415559.1875 - 88ms/epoch - 7ms/step
Epoch 8/250
12/12 - 0s - loss: 410534.6875 - val_loss: 413914.0000 - 71ms/epoch - 6ms/step
Epoch 9/250
12/12 - 0s - loss: 408948.4688 - val_loss: 412381.0625 - 75ms/epoch - 6ms/step
Epoch 10/250
12/12 - 0s - loss: 407592.6875 - val_loss: 411073.6250 - 94ms/epoch - 8ms/step
Epoch 11/250
12/12 - 0s - loss: 406375.8750 - val_loss: 409934.8125 - 93ms/epoch - 8ms/st

In [25]:
def create_features(df):
    features = df[['daysToExpiry', 'intradayMovement', 'intradayTotal', 'overnightExpected', 'overnightGap',
                   'index_close', 'index_high', 'index_low', 'index_open']]
    return features

def predict_next_day_data(df, model):
    # Prepare features for prediction
    features = create_features(df)
    features = prepare_data(features)

    # Make predictions using the model
    predictions = model.predict(features)

    # Denormalize predictions if needed (using scaler.inverse_transform)
    # Store or display the predicted prices as required

    return predictions

data = {
    'date': ['2024-03-14T00:00:00.000Z'],
    'daysToExpiry': [2],
    'intradayMovement': [220.70],
    'intradayTotal': [233.15],
    'overnightExpected': [195.25],
    'overnightGap': [198.55],
    'index_close': [22.15],
    'index_high': [10.90],
    'index_low': [22341.65],
    'index_open': [22450.70]
}

df = pd.DataFrame(data)
df['date'] = pd.to_datetime(df['date'])

# Preprocess the data
features = create_features(df)

# Prepare data for LSTM model
X = prepare_data(features)

# Make predictions
predicted_prices = lstm_model.predict(X)

# Print or use the predicted prices as needed
print(predicted_prices)

[[110.716095 105.5973   110.51655  107.86923 ]]
