## Data

In [1]:
from data_simulation import process_price_traces

start_price =  100
alpha = 1.0
time_steps = 100
num_traces = 500

process_traces = process_price_traces(
    start_price=start_price,
    alpha=alpha,
    time_steps=time_steps,
    num_traces=num_traces
)

## Modeling

In [2]:
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam
import math
import numpy as np
import random
import yfinance as yf
from collections import deque

2023-05-08 16:56:31.327252: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [118]:
class Agent:
        def __init__(self, state_size, is_eval=False):
            self.state_size = state_size 
            self.action_size = 3 # hold, buy, sell
            self.memory = deque(maxlen=1000)
            self.inventory = []
            self.is_eval = is_eval
            self.gamma = 0.95
            self.epsilon = 1.0
            self.epsilon_min = 0.01
            self.epsilon_decay = 0.995
            self.model = self._model()
            self.loss_l = []
            self.val_loss_l = []

        def _model(self):
            model = Sequential()
            model.add(Dense(units=64, input_dim=self.state_size, activation="relu"))
            model.add(Dense(units=32, activation="relu"))
            model.add(Dense(units=8, activation="relu"))
            model.add(Dense(self.action_size, activation="linear"))
            model.compile(loss="mse", optimizer=Adam(learning_rate=0.001))
            return model

        def act(self, state):
            if not self.is_eval and random.random()<= self.epsilon:
                return random.randrange(self.action_size)
            options = self.model.predict(state, verbose=0)
            return np.argmax(options[0])

        def optimize(self, batch_size):
            mini_batch = []
            l = len(self.memory)
            for i in range(l - batch_size + 1, l):
                mini_batch.append(self.memory[i])
            for state, action, reward, next_state, done in mini_batch:
                target = reward
                if not done:
                    target = reward + self.gamma * np.amax(self.model.predict(next_state,verbose=0)[0])
                target_f = self.model.predict(state, verbose=0)
                target_f[0][action] = target
                train_history = self.model.fit(state, target_f, epochs=1, verbose=0)
                self.loss_l.append(train_history.history['loss'])
                #self.val_loss_l.append(train_history.history['val_loss'])
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay

In [88]:
def formatPrice(n):
    return("-Rs." if n<0 else "Rs.")+"{0:.2f}".format(abs(n))

def sigmoid(x):
    return 1/(1+math.exp(-x))

def getState(data, t, n):

    if t < 5:
        window = -(t-n+1)*[data[0]] + list(data[0: t+1])
    else:
        window = data[t-n+1:t + 1]

    return np.array(window).reshape((1, n))


In [None]:
sp500 = yf.download("^GSPC", start="2021-05-8", end="2023-05-7", interval="1h")
sp500 = sp500['Adj Close'] - sp500['Open']

In [None]:
data = sp500.values # np.ndarray
window_size = 5 # state space size, consist with previous window_size days
agent = Agent(window_size)
num_episodes = 100
l = len(data) - 1
batch_size = 32
t_p = []
for i_episode in range(num_episodes + 1):
    print("Episode " + str(i_episode) + "/" + str(num_episodes))
    state = getState(data, 0, window_size)
    total_profit = 0
    agent.inventory = []
    for t in range(l):
        action = agent.act(state)
        # sit
        next_state = getState(data, t + 1, window_size)#) + 1)
        reward = 0
        if action == 1: # buy
            agent.inventory.append(data[t])
        elif action == 2 and len(agent.inventory) > 0: # sell
            bought_price = window_size_price = agent.inventory.pop(0)
            # positive income from the transactions or 0
            reward = max(data[t] - bought_price, 0)
            # cumulative profit for the episode
            total_profit += data[t] - bought_price
        done = True if t == l - 1 else False
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state
        if done:
            t_p.append(total_profit)
    if len(agent.memory) > batch_size:
        agent.optimize(batch_size)

In [None]:
# DEFINE YOUR VISUALIZATION
import matplotlib.pyplot as plt
fig, axs = plt.subplots(1,1, figsize=(6, 6),)

#fig.suptitle("Total profits over episodes", fontsize=15)
fig.tight_layout(pad=2)

axs.plot(range(len(t_p)), t_p,)
axs.set_title("Total profits over episodes")
axs.set_xlabel("Episode")
axs.set_ylabel("Total profit")

In [None]:
data = process_traces # np.ndarray
window_size = 100 # state space size, consist with previous window_size days
agent = Agent(window_size)
num_episodes = 1000
l = len(data) - 101
batch_size = 32
t_p = []
for i_episode in range(num_episodes):
    print("Episode " + str(i_episode) + "/" + str(num_episodes))
    state = getState(data[i_episode], 0, window_size)
    total_profit = 0
    agent.inventory = []
    for t in range(l):
        action = agent.act(state)
        # sit
        next_state = getState(data[i_episode], t + 1, window_size)#) + 1)
        reward = 0
        if action == 1: # buy
            agent.inventory.append(data[i_episode][t])
        elif action == 2 and len(agent.inventory) > 0: # sell
            bought_price = window_size_price = agent.inventory.pop(0)
            # positive income from the transactions or 0
            reward = max(data[i_episode][t] - bought_price, 0)
            # cumulative profit for the episode
            total_profit += data[i_episode][t] - bought_price
        done = True if t == l - 1 else False
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state
        if done:
            t_p.append(total_profit)
    if i_episode % 50 == 0:
        if len(agent.memory) > batch_size:
            agent.optimize(batch_size)

In [None]:
fig, axs = plt.subplots(1,1, figsize=(6, 6),)

#fig.suptitle("Total profits over episodes", fontsize=15)
fig.tight_layout(pad=2)

axs.plot(range(len(t_p)), t_p,)
axs.set_title("Total profits over episodes")
axs.set_xlabel("Episode")
axs.set_ylabel("Total profit")

In [148]:
vector_norm = agent.loss_l/np.linalg.norm(agent.loss_l)
fig, axs = plt.subplots(1,1, figsize=(6, 6),)

fig.tight_layout(pad=2)

axs.plot(np.array(range(4500)).astype(int), vector_norm[:4500])#list(np.array(agent.loss_l)),)
axs.set_title("Normalized Training losses over episodes")
axs.set_xlabel("Fits")
axs.set_ylabel("Training Loss")