In [56]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import gymnasium as gym
import matplotlib.pyplot as plt
from stable_baselines3 import PPO

In [57]:
import yfinance as yf

nvidia_df = yf.download(tickers='NVDA', start='2009-01-01', end='2024-06-01')
apple_df = yf.download(tickers='AAPL', start='2009-01-01', end='2024-06-01')
microsoft_df = yf.download(tickers='MSFT', start='2009-01-01', end='2024-06-01')
google_df = yf.download(tickers='GOOGL', start='2009-01-01', end='2024-06-01')
amazon_df = yf.download(tickers='AMZN', start='2009-01-01', end='2024-06-01')

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [58]:

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

class StockTradingEnv(gym.Env):
    def __init__(self, df):
        super(StockTradingEnv, self).__init__()
        self.df = df
        self.action_space = gym.spaces.Discrete(3)  # buy, hold, sell
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(df.shape[1]-1,), dtype=np.float32)
        self.reset()

    def reset(self):
        self.current_step = 0
        self.done = False
        self.total_profit = 0
        self.total_reward = 0
        self.stock_owned = 0
        self.starting_cash = 10000
        self.cash_in_hand = self.starting_cash
        return self._next_observation()

    def _next_observation(self):
        return self.df.iloc[self.current_step, 1:].values

    def step(self, action):
        self.current_step += 1
        reward = 0

        if action == 0:  # buy
            if self.cash_in_hand >= self.df.iloc[self.current_step, 1]:
                self.stock_owned += 1
                self.cash_in_hand -= self.df.iloc[self.current_step, 1]
        elif action == 2:  # sell
            if self.stock_owned > 0:
                self.stock_owned -= 1
                self.cash_in_hand += self.df.iloc[self.current_step, 1]

        self.total_profit = self.cash_in_hand + self.stock_owned * self.df.iloc[self.current_step, 1] - self.starting_cash
        reward = self.total_profit
        self.total_reward += reward

        self.done = self.current_step >= len(self.df) - 1

        # Returning 'terminated' and 'truncated' as part of the return values
        return self._next_observation(), reward, self.done, {}

    def render(self, mode='human'):
        print(f'Step: {self.current_step}, Total Profit: {self.total_profit}')

# Custom VecEnv Wrapper to handle the four return values
class CustomDummyVecEnv(DummyVecEnv):
    def step_wait(self):
        results = [env.step(self.actions[env_idx]) for env_idx, env in enumerate(self.envs)]
        obs, rews, dones, infos = zip(*results)
        self.buf_rews[:] = rews
        self.buf_dones[:] = dones
        self.buf_infos[:] = infos
        for env_idx in range(self.num_envs):
            if self.buf_dones[env_idx]:
                obs[env_idx] = self.envs[env_idx].reset()
        self._save_obs(obs)
        return self._obs_from_buf(), self.buf_rews, self.buf_dones, [{} for _ in range(self.num_envs)]

In [59]:
df = nvidia_df
df = df.sort_values('Date')
df = df.reset_index(drop=True)


In [60]:
def create_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(64))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    return model

In [61]:
# Initialize the environment
env = CustomDummyVecEnv([lambda: StockTradingEnv(df)])
model = PPO('MlpPolicy', env, verbose=1)

# Define input shape for the LSTM model
input_shape = (df.shape[1]-1, 1)
lstm_model = create_lstm_model(input_shape)
lstm_model.compile(optimizer='adam', loss='mse')

# Train the PPO model
model.learn(total_timesteps=10000)

# Save the trained model
model.save('ppo_stock_trading')

# Load the trained model for evaluation
model = PPO.load('ppo_stock_trading')

# Evaluate the model
obs = env.reset()
for i in range(len(df)):
    action, _states = model.predict(obs)
    obs, rewards, dones, infos = env.step(action)
    env.render()

Using cpu device


TypeError: StockTradingEnv.reset() got an unexpected keyword argument 'seed'

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df['Date'], df['Close'], label='Stock Price')
# Assuming total_profit was recorded during the run
plt.plot(df['Date'], [env.total_profit]*len(df), label='Total Profit')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()

In [None]:
# Placeholder values for performance metrics
baseline_buy_and_hold = 0
baseline_ppo_mlp = 0
baseline_lstm = 0
clstm_ppo_returns = 0

performance_metrics = {
    'Model': ['Buy and Hold', 'PPO-MLP', 'LSTM', 'CLSTM-PPO'],
    'Cumulative Returns': [baseline_buy_and_hold, baseline_ppo_mlp, baseline_lstm, clstm_ppo_returns],
    'Max Earning Rate': [baseline_buy_and_hold, baseline_ppo_mlp, baseline_lstm, clstm_ppo_returns],
    'Average Profitability per Trade': [baseline_buy_and_hold, baseline_ppo_mlp, baseline_lstm, clstm_ppo_returns]
}

performance_df = pd.DataFrame(performance_metrics)
print(performance_df)