<a href="https://colab.research.google.com/github/wutpppppppppppppppppp/Stocktradegym/blob/main/trademodel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow stable-baselines3 gym_trading_env gymnasium
# Improvement from gym -> gymnasium (newer version) stable-baselines -> stable-baselines3 gym-anytrading -> gym-trading-env

In [None]:
import gymnasium as gym
import gym_trading_env
from stable_baselines3 import A2C
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
# import CSV file containing open close high low and volume data of KBANK from marketwatch.com
url = 'https://raw.githubusercontent.com/wutpppppppppppppppppp/Stocktradegym/main/Kbank_Stock.csv'
df = pd.read_csv(url)

In [None]:
df.head()

In [None]:
df.columns = [column.lower() for column in df.columns]
df.head()

In [None]:
# clean the data that doesn't have value and change the data type of volume column to float instead of string.
df.sort_index(inplace=True)
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)
df['volume'] = df['volume'].str.replace(',', '').astype(float)
df.head()

In [None]:
# change the data type of Date to datetime64
df['date'] = pd.to_datetime(df['date'])
print(df.dtypes)
df.set_index('date',inplace=True)
df.head()

In [None]:
# create new data called feature_close feature_open feature_high feature_low feature_volume
# df["feature_close"] = df["close"].pct_change()
# df["feature_open"] = df["open"]/df["close"]
# df["feature_high"] = df["high"]/df["close"]
# df["feature_low"] = df["low"]/df["close"]
# df["feature_volume"] = df["volume"]/df["volume"].rolling(7*24).max()
df.dropna(inplace=True)
df.head()

After setting up the data, it's time to create environment for our RL agent to interact with

In [None]:
def reward_function(history):
        return np.log(history["portfolio_valuation", -1] / history["portfolio_valuation", -2])
env = gym.make('TradingEnv',
               name="KBank",
               df=df, # use "df" dataFrame
               positions = [ -1, 0, 1], # -1 (=SHORT), 0(=OUT), +1 (=LONG)
               trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
               borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
               portfolio_initial_value = 100,
               reward_function = reward_function
               )

In [None]:
env.signal_features

In [None]:
env.prices

Build Environment

In [None]:
# Run an episode until it ends :
done, truncated = False, False
observation, info = env.reset()
while not done and not truncated:
    # Pick a position by its index in your position list (=[-1, 0, 1])....usually something like : position_index = your_policy(observation)
    position_index = env.action_space.sample() # At every timestep, pick a random position index from your position list (=[-1, 0, 1])
    observation, reward, done, truncated, info = env.step(position_index)

In [None]:
env.save_for_render(dir = "render_logs")

In [None]:
from gym_trading_env.renderer import Renderer
renderer = Renderer(render_logs_dir="render_logs")
renderer.run()