In [9]:
import gym
from gym import spaces
import numpy as np
import pandas as pd
import yfinance as yf
from ta.trend import MACD
from ta.volatility import BollingerBands
from ta.momentum import RSIIndicator
from ta.momentum import StochasticOscillator

In [10]:
class TradingEnv(gym.Env):
    def __init__(self, symbol, start, end):
        super(TradingEnv, self).__init__()
        
        # Fetching the historical prices
        df = yf.download(symbol, start, end)
        
        # Defining the indicators
        df['RSI'] = RSIIndicator(df['Close']).rsi()
        df['MACD'] = MACD(df['Close']).macd_diff()
        df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
        df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()
        df['EMA_20'] = df['Close'].ewm(span=20, adjust=False).mean()
        
        # Dropping the NaN rows
        df.dropna(inplace=True)
        
        # Prices includes all the available indicators
        self.prices = df.values
        
        # Current index in the prices data
        self.current_step = 0
        
        # Action space is to either Buy, Sell or Hold
        self.action_space = spaces.Box(low=-1, high=1, shape=(1,))
        
        # Observation space includes the prices and current balance and shares
        # which are initialized to be zero
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(9,))
        
        # Starting balance, you can change this to whatever you want
        self.balance = 10000
        
        # Number of shares bought
        self.shares_bought = 0
        
    def step(self, action):
        # Fetch the current price and indicators
        price = self.prices[self.current_step, :4].mean()
        volume = self.prices[self.current_step, 5]
        rsi = self.prices[self.current_step, 6]
        macd = self.prices[self.current_step, 7]
        ema_5 = self.prices[self.current_step, 8]
        ema_10 = self.prices[self.current_step, 9]
        ema_20 = self.prices[self.current_step, 10]
        action = action[0]
        if action < 0:  # sell
            if self.shares_bought > 0:
                self.balance = self.balance + self.shares_bought * (-action) * price
                self.shares_bought = self.shares_bought*(1+action)

        elif action > 0:  # buy
            if self.balance > 0:
                self.shares_bought = self.shares_bought + self.balance*action/price
                self.balance = self.balance - self.balance*action

        # Next day
        self.current_step += 1
        if self.current_step >= len(self.prices) - 1:
            done = True
        else:
            done = False

        # Calculate net worth and normalize it by the initial balance
        net_worth = self.balance + self.shares_bought * price

        reward = net_worth - 10000  # The reward is the increase in net worth

        # The state includes the indicators and current balance and shares
        obs = np.array([price, volume, rsi, macd, ema_5, ema_10, ema_20, self.balance, self.shares_bought])
        return obs, reward, done, {}

    def reset(self):
        # Reset to day 1
        self.current_step = 0

        # Reset balance
        self.balance = 10000
        self.shares_bought = 0

        # Fetch first day prices and indicators
        price = self.prices[self.current_step, :4].mean()
        volume = self.prices[self.current_step, 5]
        rsi = self.prices[self.current_step, 6]
        macd = self.prices[self.current_step, 7]
        ema_5 = self.prices[self.current_step, 8]
        ema_10 = self.prices[self.current_step, 9]
        ema_20 = self.prices[self.current_step, 10]

        # The state includes the indicators and current balance and shares
        obs = np.array([price, volume, rsi, macd, ema_5,ema_10, ema_20, self.balance, self.shares_bought])

        return obs

In [16]:
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
from stable_baselines3.common.callbacks import EvalCallback

# Create environment
env = TradingEnv('AAPL', '2015-01-01', '2020-01-01')

# Create action noise (required for DDPG)
n_actions = env.action_space.shape[-1]
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

# Initialize agent
model = DDPG("MlpPolicy", env, action_noise=action_noise, verbose=1, device='cuda')

# Create evaluation environment
eval_env = TradingEnv('AAPL', '2020-01-01', '2022-01-01')

# Create evaluation callback
eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/',
                             log_path='./logs/', eval_freq=500,
                             deterministic=True, render=False)

# Train agent
model.learn(total_timesteps=10000, callback=eval_callback)

[*********************100%***********************]  1 of 1 completed
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
[*********************100%***********************]  1 of 1 completed




Eval num_timesteps=500, episode_reward=2363909.16 +/- 0.00
Episode length: 471.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 471      |
|    mean_reward     | 2.36e+06 |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=2363909.16 +/- 0.00
Episode length: 471.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 471      |
|    mean_reward     | 2.36e+06 |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
Eval num_timesteps=1500, episode_reward=2363909.16 +/- 0.00
Episode length: 471.00 +/- 0.00
----------------------------------
| eval/              |           |
|    mean_ep_length  | 471       |
|    mean_reward     | 2.36e+06  |
| time/              |           |
|    total_timesteps | 1500      |
| train/         

KeyboardInterrupt: 

In [67]:
# Create a new environment with the test data
test_env = TradingEnv('AAPL', '2022-01-01', '2023-01-01')

# Initialize the state using the first observation
obs = test_env.reset()

# To keep track of rewards
total_rewards = 0

# Run until the environment is done
while True:
    # Get the action from the agent
    action, _ = model.predict(obs, deterministic=True)

    # Take a step in the environment
    obs, reward, done, info = test_env.step(action)

    # Update the total reward
    total_rewards += reward

    # If the environment is done, break the loop
    if done:
        break

print(f'Total rewards: {total_rewards}')

[*********************100%***********************]  1 of 1 completed
Total rewards: -206059.67392272642


In [54]:
df = yf.download('AAPL', '2020-01-01', '2021-01-01')
df['RSI'] = RSIIndicator(df['Close']).rsi()
df['MACD'] = MACD(df['Close']).macd_diff()
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()
df['EMA_20'] = df['Close'].ewm(span=20, adjust=False).mean()
prices = df.dropna().values
price = prices[0, :4].mean()
volume = prices[0, 5]
rsi = prices[0, 6]
macd = prices[0, 7]
ema_5 = prices[0, 8]
ema_10 = prices[0, 9]
ema_20 = prices[0, 10]
round(np.array([price, volume, rsi, macd, ema_5,ema_10, ema_20, 10000, 0]))

[*********************100%***********************]  1 of 1 completed


TypeError: type numpy.ndarray doesn't define __round__ method

In [52]:
observation_shape = (600, 800, 3)
observation_space = spaces.Box(low = np.zeros(observation_shape), 
                                    high = np.ones(observation_shape),
                                    dtype = np.float16)
(np.ones(observation_shape) * 1).shape

  sample[upp_bounded] = (


(600, 800, 3)

In [48]:
observation_space.shape

(600, 800, 3)

In [49]:
env.observation_space.shape

(9,)

In [1]:
import torch 
use_cuda = torch.cuda.is_available()