In [150]:
import gym
from gym import Env
from gym import spaces
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA

class TradingEnv(Env):
    def __init__(self, data, max_shares=100, initial_balance=100000, max_episode_steps=None):
        super(TradingEnv, self).__init__()

        # Load the CSV data
        self.data = data

        self.current_step = 0
        self.max_episode_steps = max_episode_steps if max_episode_steps else len(self.data) - 1

        # Define action and observation spaces
        self.max_shares = max_shares
        self.action_space = spaces.Box(low=-max_shares, high=max_shares, shape=(1,))
        self.observation_space = spaces.Box(low=0, high=1, shape=(len(self.data.columns) + 2,))  # Add 2 for balance and ARIMA forecast

        # Initialize the starting balance, shares held, and ARIMA forecast
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.shares_held = 0
        self.current_price = 0
        self.arima_forecast = 0  # Initialize ARIMA forecast

        # Train ARIMA model (you can do this in the constructor if needed)
        self.train_arima_model()

    def step(self, action):
        # Implement the logic for taking an action in the environment
        self.current_price = self.data['Close'][self.current_step]

        action = np.clip(action, -self.max_shares, self.max_shares)  # Clip action to the allowed range
        shares_bought = max(action, 0)
        shares_sold = abs(min(action, 0))

        cost = shares_bought * self.current_price
        revenue = shares_sold * self.current_price

        self.balance -= cost
        self.balance += revenue
        self.shares_held += shares_bought - shares_sold

        # Calculate rewards (e.g., based on profit or loss)
        reward = self.balance - self.initial_balance  # Simple reward based on profit/loss

        # Move to the next time step
        self.current_step += 1

        # Check if the episode is done
        done = self.current_step >= self.max_episode_steps or self.balance <= 0

        # Update ARIMA forecast (you can do this differently based on your ARIMA model)
        self.arima_forecast = self.get_arima_forecast()

        # Define info (optional)
        info = {}

        # Include the current balance and ARIMA forecast in the observation
        obs = np.concatenate([self._get_obs(), np.array([self.balance / self.initial_balance, self.arima_forecast])])

        return obs, reward, done, info

    def reset(self):
        # Reset the environment to the initial state
        self.current_step = 0
        self.balance = self.initial_balance
        self.shares_held = 0
        self.current_price = 0
        self.arima_forecast = 0  # Reset ARIMA forecast
        return self._get_obs()

    def render(self):
        # Implement a visualization of the current state (optional)
        pass

    def _get_obs(self):
        # Get the current observation (e.g., select relevant columns from the data frame)
        observation_columns = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'RSI', 'SMA50', 'SMA200']
        obs = self.data.iloc[self.current_step][observation_columns].values
        return obs

    def train_arima_model(self):
        # Train your ARIMA model here using historical data
        # Example:
        train_data = self.data['Close'][:self.current_step]
        order = (5, 1, 0)  # Example ARIMA order (p, d, q)
        self.arima_model = ARIMA(train_data, order=order)
        self.arima_model = self.arima_model.fit()

    def get_arima_forecast(self):
        # Generate ARIMA forecast for the current time step
        forecast_steps = 1  # You can adjust the number of steps to forecast
        forecast_values, _, _ = self.arima_model.forecast(steps=forecast_steps)
        return forecast_values[0]


In [145]:
env.action_space.sample()

array([58.34501], dtype=float32)

In [146]:
import gym
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv


In [156]:
data = pd.read_csv("C:\\Users\\Yash\\Downloads\\tata.csv")
# Convert the date column to a numerical format (e.g., Unix timestamp)

# data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')
# data['Date'] = (data['Date'] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1D')

# data['Date'] = (data['Date'] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1D')
# Create your custom trading environment
env = TradingEnv(data)
print(data)

# Wrap the environment in a DummyVecEnv to make it compatible with Stable Baselines3
env = DummyVecEnv([lambda: env])


            Open        High         Low       Close   Adj Close     Volume  \
0       9.881520   10.361560    9.881520   10.361560    2.303619  138828915   
1      10.864058   11.004420   10.389632   10.529995    2.341065  257373746   
2      10.555260   10.886516   10.434548   10.799491    2.400981  181469397   
3      10.838792   10.976348   10.647899   10.704044    2.379761  122125737   
4      10.667550   10.751768   10.445777   10.538417    2.342938  131632371   
...          ...         ...         ...         ...         ...        ...   
6728  119.599998  119.599998  118.000000  118.849998  118.849998   20711952   
6729  119.000000  119.500000  117.949997  118.150002  118.150002   21275054   
6730  118.349998  120.500000  116.900002  120.199997  120.199997   34154214   
6731  120.000000  120.650002  119.250000  120.000000  120.000000   24439101   
6732  120.300003  121.400002  119.150002  120.500000  120.500000   19175105   

            RSI       SMA50      SMA200  
0     64.

In [155]:
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.envs import DummyVecEnv

# Define a function to train a PPO agent
def train_ppo(env, num_timesteps):
    # Wrap the environment in a DummyVecEnv to make it compatible with PPO
    env = DummyVecEnv([lambda: env])

    # Create and configure the PPO agent
    model = PPO("MlpPolicy", env, verbose=1)

    # Train the model
    model.learn(total_timesteps=num_timesteps)

    return model

ImportError: cannot import name 'DummyVecEnv' from 'stable_baselines3.common.envs' (C:\Users\Yash\AppData\Roaming\Python\Python310\site-packages\stable_baselines3\common\envs\__init__.py)

In [154]:
# Train the PPO agent
trained_model = train_ppo(env, num_timesteps=1000)


Using cpu device


ValueError: too many values to unpack (expected 2)

In [None]:

    # Evaluate the trained model (optional)
    mean_reward, _ = evaluate_policy(trained_model, env, n_eval_episodes=10)
    print(f"Mean reward: {mean_reward}")

In [96]:

a= pd.to_datetime('05-9-2000', format='%d-%m-%Y')

2000-09-05 00:00:00


In [89]:
( 9792 - pd.Timestamp("1970-01-01")) // pd.Timedelta('1D')


TypeError: unsupported operand type(s) for -: 'int' and 'Timestamp'

In [158]:
!pip install stable-baselines3


Defaulting to user installation because normal site-packages is not writeable


