In [6]:
import gym
import numpy as np
import pandas as pd
from gym import spaces

In [7]:
class StockTradingEnv(gym.Env):
    def __init__(self, data):
        super(StockTradingEnv, self).__init__()
        self.data = data.reset_index(drop=True)
        self.n_features = self.data.shape[1] - 1  # Exclude the target column
        self.current_step = 0
        self.cash = 10000  # Starting cash
        self.stocks_held = 0  # Shares 
        self.total_shares = 0

        # Define action space: 0 = Hold, 1 = Buy, 2 = Sell
        self.action_space = spaces.Discrete(3)

        # Observation space: Features from the dataset
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(self.n_features,), dtype=np.float32
        )

    def reset(self):
        """Reset the environment to start a new episode."""
        self.current_step = 0
        self.cash = 10000
        self.stocks_held = 0
        self.total_shares = 0
        return self.data.iloc[self.current_step, 1:].values  # Exclude 'date'

    def step(self, action):
        """Perform an action and return the next state, reward, and done flag."""
        current_price = self.data.iloc[self.current_step]['PRC']
        reward = 0

        # Execute action
        if action == 1:  # Buy
            if self.cash >= current_price:
                self.cash -= current_price
                self.stocks_held += 1
        elif action == 2:  # Sell
            if self.stocks_held > 0:
                self.cash += current_price
                self.stocks_held -= 1

        # Calculate reward as the change in portfolio value
        portfolio_value = self.cash + self.stocks_held * current_price
        reward = portfolio_value - 10000  # Compare with initial cash

        # Increment step
        self.current_step += 1
        done = self.current_step >= len(self.data) - 1

        # Get the next state
        next_state = self.data.iloc[self.current_step, 1:].values

        return next_state, reward, done, {}

    def render(self):
        """Render the current state of the environment."""
        print(
            f"Step: {self.current_step}, Cash: {self.cash}, Stocks Held: {self.stocks_held}"
        )

# Load your feature-engineered data
data = pd.read_csv('/Users/amulya/Desktop/Capstone/DSCI-601-Amy/Data/FeatureEngineered/AKAM_feature_engineeredv2.csv')

# Initialize the environment
env = StockTradingEnv(data)

In [8]:
pip install stable-baselines3


Collecting stable-baselines3
  Downloading stable_baselines3-2.3.2-py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.3/182.3 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting gymnasium<0.30,>=0.28.1
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting matplotlib
  Downloading matplotlib-3.9.2-cp310-cp310-macosx_11_0_arm64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting torch>=1.13
  Downloading torch-2.5.0-cp310-none-macosx_11_0_arm64.whl (64.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.3/64.3 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting farama-notifications>=0.0.1
  Downloading Farama_Notific