In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime

from sklearn.preprocessing import MinMaxScaler

plt.style.use('ggplot')

In [None]:
df = pd.read_csv('aggregated_data.csv',parse_dates=['date'])
df[['unique_active_address', 'sp500', 'gold_price','nlp_compound','nlp_subjectivity','nlp_polarity']]=df[['unique_active_address', 'sp500', 'gold_price','nlp_compound','nlp_subjectivity','nlp_polarity']].fillna(0)
df=df[df['date']>='2018-05-01']
df = df.fillna(method='bfill')


df.set_index('date', drop=True, inplace=True)
df.sort_index(inplace=True)
df=df.interpolate(method='linear')

df.head()

In [None]:

#different combinations of models

#TRADING DATA
col_trading_only = ['open','high','low','volume','close']

#TRADING DATA + HASHRATE
col_trading_hash_rate = ['open','high','low','volume','hash_rate','close']

#TRADING DATA + BLOCKCHAIN DATA
col_trading_blockchain = ['open','high','low','volume','avg_block_size','difficulty','hash_rate','miner_revenue','unique_active_address','close']

#TRADING DATA + SEARCHVOLUME
col_trading_search_volume = ['open','high','low','volume','search_volume','close']

#TRADING DATA + FNG DATA
col_trading_social= ['open','high','low','volume','fng','close']

#TRADING DATA + ECONOMIC DATA
col_trading_economic = ['open','high','low','volume','sp500','gold','close']

#TRADING DATA + SENTIMENT DATA
col_trading_sentiment = ['open','high','low','volume','fng','nlp_compound','nlp_subjectivity','nlp_polarity','close']

#TRADING DATA + TWITTER DATA
col_trading_twitter = ['open','high','low','volume','nlp_compound','nlp_subjectivity','nlp_polarity','close']

#ALL_DATA
all_data = ['open','high','low','volume','nlp_compound','nlp_subjectivity','nlp_polarity', 'sp500','hash_rate','unique_active_address','close']



df_NLP=pd.DataFrame(df,columns=col_trading_only)
df_NLP.info()

In [None]:
# Train/Test split

# Number of periods to incluse in validation split
val_periods = 337

df_train = df_NLP[:df_NLP.shape[0]-val_periods]
df_val = df_NLP[df_NLP.shape[0]-val_periods:]

print('Train shape', df_train.shape)
print('Validation shape', df_val.shape)


scaler = MinMaxScaler()
df_train_scaled = scaler.fit_transform(df_train)
df_train_scaled.shape

scaler_close_price = MinMaxScaler() # Used to inverse_transform close price prediction data
train_close_price = scaler_close_price.fit(df_train.close.values.reshape(-1,1))


In [None]:
# Transforms the original time series into the input formar required by the LSTM model

nb_timesteps = 3

def makeXy(ts, nb_timesteps, features='itself'): 
    """ 
    Input:  
           ts: original scaled time series 
           nb_timesteps: number of time steps in the regressors 
           features: itself == use the previous values of the label only
                     all == use previous values of all avaialable data
    Output:  
           X: 2-D array of regressors 
           y: 1-D array of target  
   """
    x_train = []
    y_train = []

    for i in range(nb_timesteps, ts.shape[0]):
        if features == 'itself':
            x_train.append(ts[i-nb_timesteps:i,:-1])
        else:
            x_train.append(ts[i-nb_timesteps:i,0:])
        y_train.append(ts[i, -1])

    x_train, y_train = np.array(x_train), np.array(y_train)
    return x_train, y_train 


X_train, y_train = makeXy(df_train_scaled, nb_timesteps ,'all')
X_train.shape

In [None]:
# Reshape validation data
data = pd.concat([df_train, df_val], axis=0)
data.reset_index(inplace=True, drop=True)

val_inputs = data[df_train_scaled.shape[0]-nb_timesteps:].values

# Scale validation inputs
val_inputs = scaler.transform(val_inputs)

X_val, y_val = makeXy(val_inputs, nb_timesteps,'all')
X_val.shape

# LSTM

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, InputLayer
from tensorflow.keras.callbacks import Callback, EarlyStopping, ModelCheckpoint,ReduceLROnPlateau,TensorBoard
from keras.optimizers import Adam

In [None]:
# Define LSTM Neural Network
regressor = Sequential()

regressor.add(InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])))
regressor.add(LSTM(units=500, return_sequences=True, ))
regressor.add(Dropout(rate=0.3))
regressor.add(LSTM(units=100, return_sequences=True))
regressor.add(Dropout(rate=0.2))
regressor.add(LSTM(units=75, return_sequences=True))
regressor.add(Dropout(rate = 0.2))
regressor.add(LSTM(units=50))
regressor.add(Dropout(rate = 0.2))
regressor.add(Dense(units=1))


regressor.summary()

In [None]:
# Fit and save best parameters of model
epoch = 50

# Compiler and loss function
regressor.compile(loss='mean_squared_error', optimizer = Adam(learning_rate=0.0003))

es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=10, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)

# Model Checkpoint
model_folder ='lstm_model'
model_file = 'model_close_price.hdf5'
save_weights_at = os.path.join(model_folder, model_file) 
mcp = ModelCheckpoint(save_weights_at, monitor='val_loss', verbose=0, 
                            save_best_only=True, save_weights_only=False, mode='min', 
                            save_freq='epoch') 

tb = TensorBoard('logs')
# Fit model
regressor.fit(X_train, y_train, shuffle=True, epochs=epoch, callbacks=[es, rlr, mcp, tb], validation_data=(X_val, y_val), verbose=1, batch_size=32)

In [None]:
#regressor.load_weights(model_folder + '\\' + 'BTC_close_price_lstm_weights_MV.75-0.0253.hdf5')
# Validation predict
predicted_price1 = regressor.predict(X_val)
predicted_price1 = scaler_close_price.inverse_transform(predicted_price1)
predicted_price1 = predicted_price1.reshape(X_val.shape[0])

# Train predict
predicted_price_train1 = regressor.predict(X_train)
predicted_price_train1 = scaler_close_price.inverse_transform(predicted_price_train1)
predicted_price_train1 = predicted_price_train1.reshape(X_train.shape[0])

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, mean_absolute_error, mean_absolute_percentage_error
 
#training evaluate metrics
print('Train RMSE: ',  mean_squared_error(df_train[nb_timesteps:].close, predicted_price_train1, squared=False))
print('Train MAE: ', mean_absolute_error(df_train[nb_timesteps:].close, predicted_price_train1))
print('Train MAPE: ',  mean_absolute_percentage_error(df_train[nb_timesteps:].close, predicted_price_train1)*100)
train_mape1 = mean_absolute_percentage_error(df_train[nb_timesteps:].close, predicted_price_train1)*100

#validate evaluate metrics
print('Validation RMSE: ',  mean_squared_error(df_val['close'],predicted_price1, squared=False))
print('Validation MAE:',  mean_absolute_error(df_val['close'],predicted_price1))
print('Validation MAPE: ',  mean_absolute_percentage_error(df_val['close'],predicted_price1)*100)
test_mape1 = mean_absolute_percentage_error(df_val['close'],predicted_price1)*100

# adaboost-LSTM

In [None]:
# Define LSTM Neural Network
def create_model():
    regressor = Sequential()

    regressor.add(InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])))
    regressor.add(LSTM(units=500, return_sequences=True, ))
    regressor.add(Dropout(rate=0.3))
    regressor.add(LSTM(units=100, return_sequences=True))
    regressor.add(Dropout(rate=0.2))
    regressor.add(LSTM(units=75, return_sequences=True))
    regressor.add(Dropout(rate = 0.2))
    regressor.add(LSTM(units=50))
    regressor.add(Dropout(rate = 0.2))
    regressor.add(Dense(units=1))
    regressor.compile(loss='mean_squared_error', optimizer = Adam(learning_rate=0.0003))
    return regressor

In [None]:
from keras.wrappers.scikit_learn import KerasRegressor
# Fit and save best parameters of model
epoch = 50


es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=10, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)

# Model Checkpoint
model_folder ='lstm_model'
model_file = 'model_close_price.hdf5'
save_weights_at = os.path.join(model_folder, model_file) 
mcp = ModelCheckpoint(save_weights_at, monitor='val_loss', verbose=0, 
                            save_best_only=True, save_weights_only=False, mode='min', 
                            save_freq='epoch') 

tb = TensorBoard('logs')

ann_estimator = KerasRegressor(build_fn = create_model, shuffle=True, epochs=epoch, callbacks=[es, rlr, mcp, tb], validation_data=(X_val, y_val), verbose=1, batch_size=32)

In [None]:
# Create directory to save model
!mkdir "lstm_model"

In [None]:
from sklearn.ensemble import AdaBoostRegressor
boosted_ann = AdaBoostRegressor(base_estimator= ann_estimator,n_estimators=5)
boosted_ann.fit(X_train, y_train)# scale your training data 

In [None]:
# Validation predict
predicted_price = boosted_ann.predict(X_val)
predicted_price = scaler_close_price.inverse_transform(predicted_price.reshape(-1,1))
predicted_price = predicted_price.reshape(X_val.shape[0])

# Train predict
predicted_price_train = boosted_ann.predict(X_train)
predicted_price_train = scaler_close_price.inverse_transform(predicted_price_train.reshape(-1,1))
predicted_price_train = predicted_price_train.reshape(X_train.shape[0])


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, mean_absolute_error, mean_absolute_percentage_error
 
#training evaluate metrics
print('Train RMSE: ',  mean_squared_error(df_train[nb_timesteps:].close, predicted_price_train, squared=False))
print('Train MAE: ', mean_absolute_error(df_train[nb_timesteps:].close, predicted_price_train))
print('Train MAPE: ',  mean_absolute_percentage_error(df_train[nb_timesteps:].close, predicted_price_train)*100)
train_mape = mean_absolute_percentage_error(df_train[nb_timesteps:].close, predicted_price_train)*100

#validate evaluate metrics
print('Validation RMSE: ',  mean_squared_error(df_val['close'],predicted_price, squared=False))
print('Validation MAE:',  mean_absolute_error(df_val['close'],predicted_price))
print('Validation MAPE: ',  mean_absolute_percentage_error(df_val['close'],predicted_price)*100)
test_mape = mean_absolute_percentage_error(df_val['close'],predicted_price)*100

In [None]:
import plotly.express as px
train_results = pd.DataFrame(df_train['close'][nb_timesteps:])
train_results['LSTM training'] = predicted_price_train1
train_results['adaboost-LSTM training'] = predicted_price_train
test_results = pd.DataFrame(df_val.close)
test_results['LSTM validating'] = predicted_price1
test_results['adaboost-LSTM validating'] = predicted_price


In [None]:
train_results = train_results.rename(columns={'close':'giá đóng cửa thực tế'})
test_results = test_results.rename(columns={'close':'giá đóng cửa thực tế'})
total_results = pd.concat([train_results,test_results])

In [None]:
px.line(train_results, 
        title='<span style="color:#012888;font-weight:bold">Kết quả trên tập data training\
        <br><span style="font-size: 13px;color:#444444;">LSTM Train MAPE %: {:.2f}<br>Adaboost-lSTM Train MAPE %: {:.2f}'.format(train_mape1, train_mape),
         labels={"date": "Ngày","value": "Giá"}, width=1800, height=650)

In [None]:
px.line(test_results, 
        title='<span style="color:#012888;font-weight:bold">Kết quả trên tập data validating\
        <br><span style="font-size: 13px;color:#444444;">LSTM Validating MAPE %: {:.2f}<br>Adaboost-lSTM Validating MAPE %: {:.2f}'.format(test_mape1,test_mape),
         labels={"date": "Ngày","value": "Giá"}, width=1800, height=650)

In [None]:
px.line(total_results, 
        title='<span style="color:#012888;font-weight:bold">giá đóng của đồng BTC - đơn vị USD\
        <br><span style="font-size: 13px;color:#444444;">LSTM Train MAPE %: {:.1f}                       lSTM Validation MAPE %: {:.1f}<br>Ada-lSTM Training MAPE %: {:.1f}           Ada-lSTM Validation MAPE %: {:.1f}</span>'.format(train_mape1, test_mape1,train_mape, test_mape),
         labels={"date": "Ngày","value": "Giá"}, width=1800, height=650)