In [None]:
%pip install gym-trading-env pandas numpy matplotlib stable_baselines3 'shimmy>=0.2.1'

In [None]:
import gym_trading_env

import gymnasium as gym
import pandas as pd
from matplotlib import pyplot as plt

from stable_baselines3 import DQN,A2C,PPO
from stable_baselines3.common.vec_env import DummyVecEnv

In [None]:
df = pd.read_csv('Data/NIFTY50 .csv')
df['DatetimeIndex'] = pd.to_datetime(df['DatetimeIndex'])
df.set_index('DatetimeIndex', inplace=True)
df["feature_pct_change"] = df["close"].pct_change()
df["feature_high"] = df["high"] / df["close"] - 1
df["feature_low"] = df["low"] / df["close"] - 1
df.dropna(inplace= True)

df.head()

In [None]:
env=gym.make('TradingEnv',df=df,verbose=1,name='NIFTY50',trading_fees = 0.01/100,borrow_interest_rate= 0.0003/100,windows=5)
done, truncated = False, False
observation, info = env.reset()
while not done and not truncated:
    # Pick a position by its index in your position list (=[-1, 0, 1])....usually something like : position_index = your_policy(observation)
    position_index = env.action_space.sample() # At every timestep, pick a random position index from your position list (=[-1, 0, 1])
    observation, reward, done, truncated, info = env.step(position_index)

In [None]:
def env_maker():
    return gym.make('TradingEnv',df=df,verbose=1,name='NIFTY50',trading_fees = 0.01/100,borrow_interest_rate= 0.0003/100)

# Create the environment using make_vec_env
env = DummyVecEnv([env_maker])


# Train the PPO model
model1 = PPO('MlpPolicy', env, verbose=1)
model1.learn(total_timesteps=1000)
# Train the A2C model
model2 = A2C('MlpPolicy', env, verbose=1)
model2.learn(total_timesteps=1000)
# Train the DQN model
model3 = DQN('MlpPolicy', env, verbose=1)
model3.learn(total_timesteps=1000)

In [None]:
df_test=pd.read_csv('Data/test_data.csv')
df_test['DatetimeIndex'] = pd.to_datetime(df_test['DatetimeIndex'])
df_test.set_index('DatetimeIndex', inplace=True)
df_test["feature_pct_change"] = df_test["close"].pct_change()
df_test["feature_high"] = df_test["high"] / df_test["close"] - 1
df_test["feature_low"] = df_test["low"] / df_test["close"] - 1
df_test.dropna(inplace= True)

In [None]:
test_env=gym.make('TradingEnv',df=df_test,verbose=1,name='NIFTY50',trading_fees = 0.01/100,borrow_interest_rate= 0.0003/100)
state,_=test_env.reset()
truncated=0
terminated=0
while True:     
    action,_states=model1.predict(state)
    n_state,reward,truncated,terminated,info=test_env.step(action)
    if truncated or terminated:
        break

In [None]:
test_env=gym.make('TradingEnv',df=df_test,verbose=1,name='NIFTY50',trading_fees = 0.01/100,borrow_interest_rate= 0.0003/100)
state,_=test_env.reset()
truncated=0
terminated=0
while True:     
    action,_states=model2.predict(state)
    n_state,reward,truncated,terminated,info=test_env.step(action)
    if truncated or terminated:
        break

In [None]:
test_env=gym.make('TradingEnv',df=df_test,verbose=1,name='NIFTY50',trading_fees = 0.01/100,borrow_interest_rate= 0.0003/100)
state,_=test_env.reset()
truncated=0
terminated=0
while True:     
    action,_states=model3.predict(state)
    n_state,reward,truncated,terminated,info=test_env.step(action)
    if truncated or terminated:
        break

As per the results above we conclude that the PPO & DQN model is best and hence we will be using that only for our predictions in all later models.