# Exercise 5

## Reinforcement Learning


---

## Overview

Welcome to this Excercise. We are now going to use our new skills to build our first Deep Learning Reinforcement Learning Model. 




In [6]:
# We can decide whther we want to download the data or use the saved csv version of it
DOWNLOAD_DATA_FROM_API = False 
MIN_REQUIRED_NUM_OBS_PER_TICKER=100

In [7]:
# Import Libraries
import yfinance as yf
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
OMP_NUM_THREADS=2
import matplotlib.pyplot as plt
import numpy as np

In [8]:

if DOWNLOAD_DATA_FROM_API == True:
    # Get the list of S&P 500 constituents
    sp500_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol'].tolist()
    
    # Filter out Class B shares that have a '.B' in the ticker name
    sp500_tickers = [ticker for ticker in sp500_tickers if '.B' not in ticker]
    
    # Define the start and end dates for historical data
    start_date = '2000-01-01'
    end_date   = '2024-05-01'
    
    # Download historical prices for the list of ticker sp500_tickers
    historical_prices = yf.download(sp500_tickers, start=start_date, end=end_date)

    # Remove the MultiIndex and keep only the second level
    historical_prices.columns = historical_prices.columns.droplevel(0)
    
    # Filter and keep only columns where the first level of the MultiIndex is 'Adj Close'
    historical_prices  = historical_prices.loc[:, historical_prices.columns.get_level_values(0) == 'Adj Close']

    # Count non-missing values for each ticker
    ticker_counts = historical_prices.count()

    # Filter out tickers with fewer than n=MIN_REQUIRED_NUM_OBS_PER_TICKER=100 non-missing values
    valid_tickers = ticker_counts[ticker_counts >= MIN_REQUIRED_NUM_OBS_PER_TICKER].index
    
    # Filter the DataFrame based on valid tickers
    historical_prices = historical_prices[valid_tickers]
    

else:
    # Read the previously download data
    historical_prices = pd.read_csv('historical_prices.csv', index_col='Date', parse_dates=True)
    historical_prices.columns.name = 'Ticker'

In [9]:
# Count non-missing values for each ticker
ticker_counts = historical_prices.count()

# Filter out tickers with fewer than n=MIN_REQUIRED_NUM_OBS_PER_TICKER=100 non-missing values
valid_tickers = ticker_counts[ticker_counts >= MIN_REQUIRED_NUM_OBS_PER_TICKER].index

# Filter the DataFrame based on valid tickers
historical_prices = historical_prices[valid_tickers]
    

In [10]:
# Print the first 5 rows
historical_prices.head()


Ticker,A,AAL,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-03,43.613007,,0.844981,,,8.992848,1.277778,,16.274675,28.438286,...,,11.505342,,6.977997,18.328693,,4.680301,,25.027779,
2000-01-04,40.281452,,0.773741,,,8.735912,1.270833,,14.9094,26.999619,...,,11.073115,,7.138673,17.977634,,4.586222,,24.666668,
2000-01-05,37.782795,,0.785063,,,8.719849,1.388889,,15.204174,27.393782,...,,11.659699,,7.41412,18.957697,,4.60974,,25.138889,
2000-01-06,36.344185,,0.717125,,,9.024967,1.375,,15.32829,26.644875,...,,12.205125,,7.34526,19.937763,,4.570544,,23.777779,
2000-01-07,39.372852,,0.751094,,,9.121321,1.451389,,16.072987,27.393782,...,,11.803776,,7.34526,19.879248,,4.468626,,23.513889,


In [11]:
historical_prices.count()

Ticker
A       6120
AAL     4679
AAPL    6120
ABBV    2851
ABNB     851
        ... 
XYL     3156
YUM     6120
ZBH     5727
ZBRA    6120
ZTS     2830
Length: 499, dtype: int64

In [12]:
# Use the pandas info function to verify the data types of the dataframe column
historical_prices.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6120 entries, 2000-01-03 to 2024-04-30
Columns: 499 entries, A to ZTS
dtypes: float64(499)
memory usage: 23.3 MB


In [15]:
def computingReturns(close_prices,list_of_momentums): 
    forecast=1        
    f_returns = close_prices.pct_change(forecast)            
    f_returns = f_returns.shift(-forecast)
    f_returns = pd.DataFrame(f_returns.unstack())
    name = "F_"+str(forecast)+"_d_returns"
    f_returns.rename(columns={0: name}, inplace = True)
    f_returns.reset_index(inplace = True)
    f_returns.rename(columns={'level_0':'Ticker'}, inplace=True)
    # We add the forward returns to total_returns
    total_returns = f_returns
    
    for i in list_of_momentums:   
        feature = close_prices.pct_change(i)
        feature = pd.DataFrame(feature.unstack())
        name = str(i)+"_d_returns"        
        feature.reset_index(inplace = True)
        feature.rename(columns={0: name, 'level_0':'Ticker'}, inplace = True)
        # We add each 
        total_returns = pd.merge(total_returns,feature,left_on=['Ticker', 'Date'],right_on=['Ticker', 'Date'], how='left', suffixes=('_original', 'right'))
      
    total_returns.dropna(axis=0, how='any', inplace=True) 
    total_returns.set_index(['Date', 'Ticker'], inplace=True)

    return total_returns

In [16]:
#We can choose how many momentums and which ones we want to create
list_of_momentums = [1,2] # [1,2,3,4,5,10].
#list_of_momentums = []
total_data = computingReturns(historical_prices, list_of_momentums)
total_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,F_1_d_returns,1_d_returns,2_d_returns
Date,Ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-05,A,-0.038076,-0.06203,-0.133681
2000-01-06,A,0.083333,-0.038076,-0.097744
2000-01-07,A,0.060577,0.083333,0.042084
2000-01-10,A,-0.013599,0.060577,0.148958
2000-01-11,A,-0.020221,-0.013599,0.046154


In [18]:
df = total_data

In [None]:
from gymnasium import Env
from gymnasium.spaces import Discrete, Box
import random
import gymnasium as gym

In [None]:


class CustomEnv(gym.Env):
    def __init__(self, df):
        super(CustomEnv, self).__init__()
        self.df = df
        self.action_space = gym.spaces.Discrete(1)  # Action space (predict F_1_d_returns)
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32)  # State space (1_d_returns, 2_d_returns)
        self.current_step = 0

    def reset(self):
        # Reset the environment to initial state
        self.current_step = 0
        self.state = self.df.iloc[self.current_step, 1:3].values  # Start with first row's 1_d_returns and 2_d_returns
        return self.state

    def step(self, action):
        # Take an action (not relevant here as we are predicting)
        self.current_step += 1
        done = self.current_step >= len(self.df) - 1
        if done:
            next_state = self.state
        else:
            next_state = self.df.iloc[self.current_step, 1:3].values
        reward = 0  # No reward for predicting
        info = {}   # Additional information (if needed)
        return next_state, reward, done, info


In [None]:
env = CustomEnv(df)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
#from tensorflow.keras.optimizers import Adam

In [None]:
states = env.observation_space.shape
actions = env.action_space.n

In [None]:
states

In [None]:
# Define your custom model
def build_model(input_shape, nb_actions):
    model = Sequential()
    model.add(Flatten(input_shape=input_shape))  # Adjust input shape here
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(nb_actions, activation='linear'))
    return model


In [None]:
del model 

In [None]:
model = build_model(states, actions)

In [None]:
model.summary()

In [None]:
import tensorflow as tf
from keras import __version__
tf.keras.__version__ = __version__
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from tensorflow.keras.optimizers.legacy import Adam


In [None]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [None]:

dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

In [None]:
dqn.summary

In [20]:
import numpy as np
import pandas as pd
import gym
import dopamine
import logging  # Add this line
from dopamine.agents.dqn import dqn_agent
from dopamine.replay_memory import circular_replay_buffer
from dopamine.colab import utils as colab_utils
import tensorflow as tf
import os

# Create your environment
class CustomEnv(gym.Env):
    def __init__(self, df):
        super(CustomEnv, self).__init__()
        self.df = df
        self.action_space = gym.spaces.Discrete(1)  # Action space (predict F_1_d_returns)
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32)  # State space (1_d_returns, 2_d_returns)
        self.current_step = 0

    def reset(self):
        # Reset the environment to initial state
        self.current_step = 0
        self.state = self.df.iloc[self.current_step, 1:3].values  # Start with first row's 1_d_returns and 2_d_returns
        return self.state

    def step(self, action):
        # Take an action (not relevant here as we are predicting)
        self.current_step += 1
        done = self.current_step >= len(self.df) - 1
        if done:
            next_state = self.state
        else:
            next_state = self.df.iloc[self.current_step, 1:3].values
        reward = 0  # No reward for predicting
        info = {}   # Additional information (if needed)
        return next_state, reward, done, info

# Create your environment

df = pd.DataFrame({
    'F_1_d_returns': [-0.038076, 0.083333, 0.060577, -0.013599, -0.020221],
    '1_d_returns': [-0.062030, -0.038076, 0.083333, 0.060577, -0.013599],
    '2_d_returns': [-0.133681, -0.097744, 0.042084, 0.148958, 0.046154]
})

env = CustomEnv(df)

# Set up logging
LOG_PATH = '/tmp/dopamine/logs'
logging.basicConfig(level=logging.INFO)

# Create a TensorFlow session
tf.compat.v1.reset_default_graph()
sess = tf.compat.v1.Session()

# Set up a replay buffer


# Set up a replay buffer with increased capacity
replay_buffer = circular_replay_buffer.WrappedReplayBuffer(
    observation_shape=(1,) + env.observation_space.shape,
    stack_size=1,
    replay_capacity=100000)  # Increased capacity


# Create the agent
# Create the agent with decreased min replay history
# Create the agent with a higher min replay history
agent = dqn_agent.DQNAgent(
    sess,
    num_actions=env.action_space.n,
    observation_shape=(1,) + env.observation_space.shape,
    observation_dtype=tf.float32,
    stack_size=1,
    network='dqn',
    gamma=0.99,
    update_horizon=1,
    min_replay_history=1000,  # Increase min replay history
    update_period=4,
    target_update_period=100,
    epsilon_fn=lambda x: 0.1)

# Create a checkpoint directory
checkpoint_dir = os.path.join(LOG_PATH, 'checkpoints')
checkpoint_file_prefix = os.path.join(checkpoint_dir, 'ckpt')
if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)

# Create a logger
logger = colab_utils.Logger(LOG_PATH)

# Initialize variables
sess.run(tf.compat.v1.global_variables_initializer())

# Train the agent
for episode in range(100):
    obs = env.reset()
    done = False
    while not done:
        action = agent.begin_episode(obs)
        next_obs, reward, done, _ = env.step(action)
        agent.end_episode(reward)
        replay_buffer.add(obs, action, reward, next_obs, done)
        obs = next_obs

        if len(replay_buffer) >= agent.min_replay_history:
            experience = replay_buffer.sample(1)
            agent.step(experience)

    if episode % 10 == 0:
        logger.scalar_summary('Return', reward, step=episode)

# Save the final checkpoint
checkpoint_path = agent._saver.save(sess, checkpoint_file_prefix)
print('Final checkpoint saved at: %s' % checkpoint_path)


INFO:absl:Creating a OutOfGraphReplayBuffer replay memory with the following parameters:
INFO:absl:	 observation_shape: (1, 2)
INFO:absl:	 observation_dtype: <class 'numpy.uint8'>
INFO:absl:	 terminal_dtype: <class 'numpy.uint8'>
INFO:absl:	 stack_size: 1
INFO:absl:	 replay_capacity: 100000
INFO:absl:	 batch_size: 32
INFO:absl:	 update_horizon: 1
INFO:absl:	 gamma: 0.990000
INFO:absl:	 checkpoint_duration: 4
INFO:absl:	 keep_every: None


RuntimeError: Cannot sample a batch with fewer than stack size (1) + update_horizon (1) transitions.
  In call to configurable 'WrappedReplayBuffer' (<class 'dopamine.replay_memory.circular_replay_buffer.WrappedReplayBuffer'>)

In [None]:
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

In [None]:

# Define the model
input_shape = env.observation_space.shape[0]
nb_actions = env.action_space.n

In [None]:

model = build_model(input_shape, nb_actions)






In [None]:
# Define the memory
memory = SequentialMemory(limit=10000, window_length=1)

# Define the policy
policy = BoltzmannQPolicy()

In [None]:
# Create the DQN agent
dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=nb_actions,
               nb_steps_warmup=100, target_model_update=1e-2)

In [None]:
from tensorflow.keras.optimizers import Adam
# Instantiate the optimizer
optimizer = Adam(learning_rate=0.001)  # Adjust learning rate as needed

In [None]:
optimizer._name

In [None]:
# Compile the model
dqn.compile(optimizer=optimizer, metrics=['mae'])

In [None]:
# Compile the model
dqn.compile(optimizer='adam', metrics=['mae'])

In [None]:



# Train the agent
dqn.fit(env, nb_steps=5000, visualize=False, verbose=1)

# Predict using the trained agent
obs = env.reset()  # Reset the environment
for _ in range(len(df) - 1):
    action = dqn.forward(obs)
    obs, rewards, dones, info = env.step(action)
    # Here, obs contains the predicted F_1_d_returns for each step


In [29]:
!python –V

python: can't open file 'C:\\Users\\cramk\\Documents\\Metin\\building-a-workflow-for-aI\\l5-reinforcement-learning\\–V': [Errno 2] No such file or directory


In [31]:
import keras
print(keras.__version__)

2.13.1
