## Etapa 1: Instalação da bibliotecas


In [3]:
!pip uninstall -y tensorflow #Comando necessário, pois o TensorFlow-gpu não desinstala a versão mais recente do Tensorflow, pode gerar conflitos.



In [4]:
!pip install tensorflow==2.0.0-alpha0 #Algumas bibliotecas ainda não tem compatibilidade com versões acima, favor utilizar somente essa versão.

Collecting tensorflow==2.0.0-alpha0
[?25l  Downloading https://files.pythonhosted.org/packages/29/39/f99185d39131b8333afcfe1dcdb0629c2ffc4ecfb0e4c14ca210d620e56c/tensorflow-2.0.0a0-cp36-cp36m-manylinux1_x86_64.whl (79.9MB)
[K     |████████████████████████████████| 79.9MB 65kB/s 
[?25hCollecting tf-estimator-nightly<1.14.0.dev2019030116,>=1.14.0.dev2019030115
[?25l  Downloading https://files.pythonhosted.org/packages/13/82/f16063b4eed210dc2ab057930ac1da4fbe1e91b7b051a6c8370b401e6ae7/tf_estimator_nightly-1.14.0.dev2019030115-py2.py3-none-any.whl (411kB)
[K     |████████████████████████████████| 419kB 38.8MB/s 
Collecting tb-nightly<1.14.0a20190302,>=1.14.0a20190301
[?25l  Downloading https://files.pythonhosted.org/packages/a9/51/aa1d756644bf4624c03844115e4ac4058eff77acd786b26315f051a4b195/tb_nightly-1.14.0a20190301-py3-none-any.whl (3.0MB)
[K     |████████████████████████████████| 3.0MB 40.1MB/s 
Installing collected packages: tf-estimator-nightly, tb-nightly, tensorflow
Successfu

In [5]:
!pip install tensorflow-gpu==2.0.0-alpha0 #Algumas bibliotecas ainda não tem compatibilidade com versões acima, favor utilizar somente essa versão.

Collecting tensorflow-gpu==2.0.0-alpha0
[?25l  Downloading https://files.pythonhosted.org/packages/1a/66/32cffad095253219d53f6b6c2a436637bbe45ac4e7be0244557210dc3918/tensorflow_gpu-2.0.0a0-cp36-cp36m-manylinux1_x86_64.whl (332.1MB)
[K     |████████████████████████████████| 332.1MB 48kB/s 
Installing collected packages: tensorflow-gpu
Successfully installed tensorflow-gpu-2.0.0a0


In [6]:
!pip install pandas-datareader



## Etapa 2: Importação das bibliotecas

In [7]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas_datareader as data_reader
from pandas.util.testing import assert_frame_equal #import alterado

from tqdm import tqdm_notebook, tqdm
from collections import deque

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  from pandas.util.testing import assert_frame_equal


In [8]:
tf.__version__

'2.0.0-alpha0'

## Etapa 3: Construção da IA para negociação de ações

In [9]:
class AI_Trader():
  
  def __init__(self, state_size, action_space = 3, model_name = "AITrader"):
    #tamanho do estado 
    self.state_size = state_size
    #
    self.action_space = action_space
    self.memory = deque(maxlen = 2000)
    self.model_name = model_name
    
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
    self.model = self.model_builder()
    
  def model_builder(self):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(units = 32, activation = "relu", input_dim = self.state_size))
    model.add(tf.keras.layers.Dense(units = 64, activation = "relu"))
    model.add(tf.keras.layers.Dense(units = 128, activation = "relu"))
    model.add(tf.keras.layers.Dense(units = self.action_space, activation = "linear"))
    model.compile(loss = "mse", optimizer = tf.keras.optimizers.Adam(lr = 0.001))
    return model
  
  def trade(self, state):
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
    
    actions = self.model.predict(state)
    return np.argmax(actions[0])
  
  def batch_train(self, batch_size):
    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
      
    for state, action, reward, next_state, done in batch:
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
        
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose=0)
      
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay
      

## Etapa 4: Pré-processamento da base de dados

### Definição de funções auxiliares

#### Sigmoid

In [10]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

In [11]:
sigmoid(0.5)

0.6224593312018546

#### Formatação de preços

In [12]:
def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

In [13]:
stocks_price_format(100)

'$ 100.000000'

#### Carregador da base de dados

In [14]:
dataset = data_reader.DataReader("AAPL", data_source = "yahoo")

In [15]:
dataset.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-06-24,129.800003,127.120003,127.209999,128.110001,55280900.0,118.067299
2015-06-25,129.199997,127.5,128.860001,127.5,31938100.0,117.505127
2015-06-26,127.989998,126.510002,127.669998,126.75,44066800.0,116.813896
2015-06-29,126.470001,124.480003,125.459999,124.529999,49161400.0,114.767921
2015-06-30,126.120003,124.860001,125.57,125.43,44370700.0,115.597382


In [16]:
str(dataset.index[0]).split()[0]

'2015-06-24'

In [17]:
dataset.index[-1]

Timestamp('2020-06-22 00:00:00')

In [18]:
dataset['Close']

Date
2015-06-24    128.110001
2015-06-25    127.500000
2015-06-26    126.750000
2015-06-29    124.529999
2015-06-30    125.430000
                 ...    
2020-06-16    352.079987
2020-06-17    351.589996
2020-06-18    351.730011
2020-06-19    349.720001
2020-06-22    358.880005
Name: Close, Length: 1258, dtype: float64

In [19]:
def dataset_loader(stock_name):
  dataset = data_reader.DataReader(stock_name, data_source = "yahoo")
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  close = dataset['Close']
  return close

### Criador de estados

In [20]:
0 - 5 + 1

-4

In [21]:
20 - 5 + 1

16

In [22]:
dataset[16:21]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-07-17,129.619995,128.309998,129.080002,129.619995,46164700.0,119.458923
2015-07-20,132.970001,130.699997,130.970001,132.070007,58900200.0,121.716888
2015-07-21,132.919998,130.320007,132.850006,130.75,76756400.0,120.500343
2015-07-22,125.5,121.989998,121.989998,125.220001,115450600.0,115.403839
2015-07-23,127.089996,125.059998,126.199997,125.160004,50999500.0,115.348526


In [23]:
def state_creator(data, timestep, window_size):
  starting_id = timestep - window_size + 1
  
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep + 1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep + 1])
    
  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i + 1] - windowed_data[i]))
    
  return np.array([state]), windowed_data

### Carregando a base de dados

In [24]:
stock_name = "AAPL"
data = dataset_loader(stock_name)

In [25]:
s, w = state_creator(data, 0, 5)

In [26]:
s

array([[0.5, 0.5, 0.5, 0.5]])

In [27]:
w

[128.11000061035156,
 128.11000061035156,
 128.11000061035156,
 128.11000061035156,
 128.11000061035156]

## Etapa 5: Treinando a IA

### Configuração dos hyper parâmetros

In [28]:
window_size = 10
episodes = 1000
batch_size = 32
data_samples = len(data) - 1

In [29]:
data_samples

1257

### Definição do modelo

In [30]:
trader = AI_Trader(window_size)

In [31]:
trader.model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                352       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 387       
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________


### Loop de treinamento

In [32]:
for episode in range(1, episodes + 1):
  print("Episode: {}/{}".format(episode, episodes))
  state = state_creator(data, 0, window_size + 1)
  total_profit = 0
  trader.inventory = []
  for t in tqdm(range(data_samples)):
    action = trader.trade(state)
    next_state = state_creator(data, t + 1, window_size + 1)
    reward = 0
    
    if action == 1: # Comprando uma ação
      trader.inventory.append(data[t])
      print("AI Trader bought: ", stocks_price_format(data[t]))
    elif action == 2 and len(trader.inventory) > 0: # Vendendo uma ação  
      buy_price = trader.inventory.pop(0)
      
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price))
      
    if t == data_samples - 1:
      done = True
    else:
      done = False
      
    trader.memory.append((state, action, reward, next_state, done))
    
    state = next_state
    
    if done:
      print("########################")
      print("Total profit: {}".format(total_profit))
      print("########################")
      
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
     
  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))
    

 96%|█████████▌| 1207/1257 [05:12<00:13,  3.72it/s]

AI Trader sold:  $ 267.989990  Profit: - $ 20.089996


 96%|█████████▌| 1208/1257 [05:12<00:13,  3.66it/s]

AI Trader sold:  $ 273.250000  Profit: - $ 19.399994


 96%|█████████▌| 1209/1257 [05:13<00:13,  3.68it/s]

AI Trader sold:  $ 287.049988  Profit: $ 13.529999


 96%|█████████▋| 1212/1257 [05:13<00:12,  3.67it/s]

AI Trader sold:  $ 282.799988  Profit: $ 9.440002


 96%|█████████▋| 1213/1257 [05:14<00:11,  3.70it/s]

AI Trader bought:  $ 276.929993


 97%|█████████▋| 1214/1257 [05:14<00:11,  3.68it/s]

AI Trader bought:  $ 268.369995


 97%|█████████▋| 1215/1257 [05:14<00:11,  3.72it/s]

AI Trader sold:  $ 276.100006  Profit: - $ 26.639984


 97%|█████████▋| 1216/1257 [05:15<00:11,  3.61it/s]

AI Trader sold:  $ 275.029999  Profit: - $ 17.890015


KeyboardInterrupt: ignored