In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
!cp drive/'My Drive'/'Colab Notebooks'/Data/crypto_data/BTC-USD.csv .
!cp drive/'My Drive'/'Colab Notebooks'/Data/crypto_data/BCH-USD.csv .
!cp drive/'My Drive'/'Colab Notebooks'/Data/crypto_data/LTC-USD.csv .
!cp drive/'My Drive'/'Colab Notebooks'/Data/crypto_data/ETH-USD.csv .

In [41]:
import pandas as pd
import numpy as np
import random
from sklearn import preprocessing
from collections import deque
import time 
import tensorflow as tf
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint


# we will use last 60 minits of our data to make predictions
SEQ_LEN = 60
# period in minits
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = 'ETH-USD'
EPOCHS = 10
BATCH_SIZE = 64
NAME = f'{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}'


def classify(current, future):
    # if the price in the future greater then now return 1
    # 1 - is buy
    if float(future) > float(current):
        return 1
    # 0 - is sell
    else:
        return 0

# preparing our data for recurrent neural network
def preprocess_df(df):
    df = df.drop('future', 1)

    for col in df.columns:
        if col != 'target':
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)

    df.dropna(inplace=True)

    sequential_data = []
    # make queue from list with max len when it reaches the max len it pops the 
    # old items 
    prev_days = deque(maxlen=SEQ_LEN) 

    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])

    random.shuffle(sequential_data)

    buys = []
    sells = []

    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])
        
    random.shuffle(buys)
    random.shuffle(sells)

    lower = min(len(buys), len(sells))

    buys = buys[:lower]
    sells = sells[:lower]

    sequential_data = buys + sells
    random.shuffle(sequential_data)

    X  = []
    y = []

    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)

    return np.array(X), y


df = pd.read_csv('LTC-USD.csv', 
                 names=['time', 'low', 'high', 'open', 'close', 'volume'])

main_df = pd.DataFrame()

ratios = ['BTC-USD', 'LTC-USD', 'ETH-USD', 'BCH-USD']
for ratio in ratios:
    dataset = f'{ratio}.csv'

    df = pd.read_csv(dataset, 
                     names=['time', 'low', 'high', 'open', 'close', 'volume'])

    df.rename(columns={'close' : f'{ratio}_close', 
                       'volume': f'{ratio}_volume'}, 
              inplace=True)  

    df.set_index('time', inplace=True)
    df = df[[f'{ratio}_close', f'{ratio}_volume']]   

    if len(main_df) == 0:
        main_df = df
    else:
        main_df = main_df.join(df)

main_df['future'] = main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)


main_df['target'] = list(map(classify, main_df[f'{RATIO_TO_PREDICT}_close'],
                             main_df['future']))

times = sorted(main_df.index.values)
last_5pct = times[-int(0.05 * len(times))]

validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)

print(f'train data: {len(train_x)} validation: {len(validation_x)}')
print(f'Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}')
print(f'VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}')

model = Sequential()
model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), 
                    return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), 
                    return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

tensorboard = TensorBoard(log_dir=f'logs/{NAME}')

filepath = 'RNN_Final-{epoch:02d}-{val_acc:.3f}'
checkpoint = ModelCheckpoint('{}.model'.format(filepath), 
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True,
                             model='max')

history = model.fit(
          train_x, train_y,
          batch_size=BATCH_SIZE,
          epochs=EPOCHS,
          validation_data=(validation_x, validation_y),
          callbacks=[tensorboard, checkpoint])



train data: 74196 validation: 3260
Dont buys: 37098, buys: 37098
VALIDATION Dont buys: 1630, buys: 1630
Train on 74196 samples, validate on 3260 samples
Epoch 1/10
Epoch 00001: val_acc improved from -inf to 0.52791, saving model to RNN_Final-01-0.528.model
Epoch 2/10
Epoch 00002: val_acc improved from 0.52791 to 0.54080, saving model to RNN_Final-02-0.541.model
Epoch 3/10
Epoch 00003: val_acc improved from 0.54080 to 0.55675, saving model to RNN_Final-03-0.557.model
Epoch 4/10
Epoch 00004: val_acc improved from 0.55675 to 0.56411, saving model to RNN_Final-04-0.564.model
Epoch 5/10
Epoch 00005: val_acc did not improve from 0.56411
Epoch 6/10
Epoch 00006: val_acc did not improve from 0.56411
Epoch 7/10
Epoch 00007: val_acc did not improve from 0.56411
Epoch 8/10
Epoch 00008: val_acc did not improve from 0.56411
Epoch 9/10
Epoch 00009: val_acc did not improve from 0.56411
Epoch 10/10
Epoch 00010: val_acc did not improve from 0.56411
