In [None]:
import pandas as pd
import numpy as np
import datetime
from IPython.core.debugger import set_trace
from glob import glob
from talib.abstract import *
import xarray as xr
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


In [None]:
PATH_DATA = 'YOUR_DATA_PATH'
BINANCE_DATA = PATH_DATA+'BINANCE_PATH/*'
list_all = glob(BINANCE_DATA)

In [None]:
def get_peak(func):
    def algo(data, window):
        max_local = func(data, window)
        max_local = np.asarray(max_local)
        max_local_valid = np.where(max_local==int(window/2))[0]
        return max_local_valid
    return algo
    
@get_peak
def get_max_peak(data, window):
    return data.rolling(window, center=True).apply(lambda x: np.where(x==np.max(x))[0][0], raw=True)

@get_peak
def get_min_peak(data, window):
    return data.rolling(window, center=True).apply(lambda x: np.where(x==np.min(x))[0][0], raw=True)

def return_min_max_peak(data, window=11):
    i_x = get_max_peak(data['close'], window) #i_x stands for Index_maX
    i_n = get_min_peak(data['close'], window) #i_n stands for Index_miN
    return (i_x,i_n)

In [None]:
def normalize(mm_scaler, vector):
    norm_vector = mm_scaler.fit_transform(np.asarray(vector).reshape(-1,1))
    norm_vector = np.reshape(norm_vector, len(norm_vector))
    return norm_vector
    
def get_TA_4_model(daily, SMA_value, label):
    mm_scaler = MinMaxScaler(feature_range=(0, 1)) 
    return "FUNCTION NON AVAILABLE AND KEPT PRIVATE FOR COMPETITIVE REASON"

def remove_nan(data_tot, label_tot):
    ind = np.argwhere(np.isnan(data_tot))
    ind = np.unique(ind[:,0])
    data_new = np.delete(data_tot, ind, axis=0)
    label_new = np.delete(label_tot, ind)
    return (data_new, label_new)

def get_tot_size_time(list_file):
    len_data = 0
    for i, filename in enumerate(list_all):
        data = pd.read_csv(filename)
        len_data += len(data)
    return len_data

## Get total number of pictures we will produce

In [None]:
nb_pictures = get_tot_size_time(list_all)

## Peak detection

In [None]:
period = np.arange(6,21)

In [None]:
begin_time = datetime.datetime.now()
data_tot = np.zeros((nb_pictures, 15, 15))
label_tot = np.zeros(nb_pictures)
ind_init = 0
#foo = xr.DataArray(data, coords=[times, locs], dims=["time", "space"])

for i, filename in enumerate(list_all):
    print('{}/{}'.format(i+1, len(list_all)))
    data = pd.read_csv(filename)
    label = np.zeros(len(data))
    i_x, i_n = return_min_max_peak(data,window=11)
    label[i_x]=1
    label[i_n]=2
    mat,label = get_TA_4_model(data, period, label)
    lb = len(label)
    data_tot[ind_init:ind_init+lb] = mat
    label_tot[ind_init:ind_init+lb] = label
    ind_init+=lb

In [None]:
data_input, label = remove_nan(data_tot, label_tot)

In [None]:
shape_input = np.shape(data_input)
data_input = np.reshape(data_input, (shape_input[0], shape_input[1], shape_input[2], 1))

## Data splitting and encoding

In [None]:
integer_encoded = np.reshape(label,(len(label), 1))
label_encoded = onehot_encoder.fit_transform(integer_encoded)

In [None]:
x_train, x_cv, y_train, y_cv = train_test_split(data_input, label_encoded, train_size=0.8, test_size=0.2,
                                                shuffle=True)

## CNN model

In [None]:
from keras.callbacks import ModelCheckpoint
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Activation, Dropout, Input, Masking, TimeDistributed, LSTM, Conv2D, Reshape, Flatten
from keras.layers import GRU, Bidirectional, BatchNormalization, Reshape, MaxPool2D
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import multilabel_confusion_matrix
from tensorflow.keras import models, layers
import tensorflow as tf
from keras.utils.vis_utils import plot_model


In [None]:
def build_model():
    model = models.Sequential()

    model.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(15,15,1), padding='same'))
    model.add(layers.Conv2D(64, (3,3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Dropout(rate=0.25))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(10, activation='relu'))
    model.add(layers.Dropout(rate=0.5))
    model.add(layers.Dense(3, activation='softmax'))
    
    return model

def compile_model(model, METRICS, opt='adam', loss='categorical_crossentropy'):
    
    model.compile(optimizer=opt,
              loss=loss,
              metrics=METRICS)
    
    return model
    
def train_model(model, x_train, y_train, x_cv, y_cv, class_weight, EPOCHS):
    
    history = model.fit(x_train, y_train, epochs=EPOCHS, 
                    validation_data = (x_cv, y_cv),
                    class_weight=class_weight)
    
    return history

In [None]:
model = build_model()
model = compile_model(model, None, opt='adam', loss='categorical_crossentropy')

plot_model(model, to_file='/Users/xavier/Projets/crypto/figures/Article/Linkedin/model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
class_weight = {
    0:1,
    1:3.2,
    2:3.5
}

EPOCHS=5

model = build_model()
model = compile_model(model, None, opt='adam', loss='categorical_crossentropy')
history = train_model(model, x_train, y_train, x_cv, y_cv, class_weight, EPOCHS=EPOCHS)

In [None]:
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label = 'val_loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
#plt.ylim([0.903, 0.916])
plt.legend(loc='upper right')