# Bitcoin Time Series Prediction with LSTM

#### Import necessary library needed for the model training

In [1]:
from math import sqrt
from numpy import concatenate
from matplotlib import pyplot
import pandas as pd
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import plotly.offline as py
import plotly.graph_objs as go
import numpy as np
import seaborn as sns
py.init_notebook_mode(connected=True)
%matplotlib inline

In [2]:
data = pd.read_csv(filepath_or_buffer="../input/mcmproblemc/data_mixed.csv", index_col="Date")

In [3]:
data.index = pd.to_datetime(data.index)

In [4]:
data

In [5]:
data_gold = data.drop('Value', axis = 1)
data_bit = data.drop('USD (PM)', axis = 1)

In [6]:
data_gold_conti = data_gold.dropna()

In [7]:
data_gold_conti

In [8]:
data.info()

In [9]:
data.fillna(method = 'pad', inplace = True)

In [10]:
btc_trace = go.Scatter(x=data.index, y=data['Value'], name= 'Price')
py.iplot([btc_trace])

In [11]:
def create_dataset(dataset, look_back):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])

    return np.array(dataX), np.array(dataY)

In [12]:
def model_LSTM(data, train_ratio = 0.05):  
    from sklearn.preprocessing import MinMaxScaler
    values = data['Value'].values.reshape(-1,1)
    values = values.astype('float32')
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_1 = scaler.fit_transform(values)

    values_2 = data['USD (PM)'].values.reshape(-1,1)
    values_2 = values_2.astype('float32')
    scaler_2 = MinMaxScaler(feature_range=(0, 1))
    scaled_2 = scaler_2.fit_transform(values_2)
    
    scaled = np.hstack((scaled_1, scaled_2))

    
    train_size = int(len(scaled_1) * train_ratio)
    test_size = len(scaled_1) - train_size
    train, test = scaled[0:train_size,:], scaled[train_size:len(scaled),:]


    look_back = 10
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)


    trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
    print(testX.shape)

    model = Sequential()
    model.add(LSTM(200, input_shape=(trainX.shape[1], trainX.shape[2])))
    model.add(Dense(1))
    model.compile(loss='mae', optimizer='adam')
    history = model.fit(trainX, trainY, epochs=int(50), batch_size=int(train_size / 10), validation_data=(testX, testY), verbose=0, shuffle=False)
    return model, testX, testY, history, scaler

In [13]:
model, testX, testY, history, scaler = model_LSTM(data, 0.2)

In [14]:
history.history['loss'][-1]

In [15]:
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()

In [16]:
yhat = model.predict(testX)
pyplot.plot(yhat, label='predict')
pyplot.plot(testY, label='true')
pyplot.legend()
pyplot.show()

In [17]:
yhat_inverse = scaler.inverse_transform(yhat.reshape(-1, 1))
testY_inverse = scaler.inverse_transform(testY.reshape(-1, 1))

In [18]:
yhat = model.predict(testX)
pyplot.figure(figsize=(15, 10))
pyplot.plot(yhat_inverse[230:260], label='predict')
pyplot.plot(testY_inverse[230:260], label='true')
pyplot.legend()
pyplot.show()

In [19]:
rmse = sqrt(mean_squared_error(testY_inverse, yhat_inverse))
print('Test RMSE: %.3f' % rmse)
mse = mean_squared_error(testY_inverse, yhat_inverse)
print('Test MSE: %.3f' % mse)

- LSTM with single feature of `Weighted Price` have RMSE of 159.194
- LSTM with features of `Volume(BTC)`, `Volume(Currency)` and `Weighted Price` have RMSE of 96.184
- LSTM with multi features shows more accurate results as show in line chart above

In [20]:
model = Sequential()
model.add(LSTM(100, input_shape=(trainX.shape[1], trainX.shape[2])))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')

In [None]:
def create_dataset(dataset, look_back):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    print(len(dataY))
    return np.array(dataX), np.array(dataY)

In [None]:
np.array(list(range(11)))[0:]

In [None]:
from tqdm import tqdm

In [None]:
data = data_gold_conti

In [None]:
data = data_bit
predict_list = []
look_back = 10

model = Sequential()
model.add(LSTM(200, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')
loss_list= []

for i in tqdm(range(look_back+1, len(data))):
    known_data = data[:i]
    
    values = known_data['Value'].values.reshape(-1,1)
    values = values.astype('float32')
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled = scaler.fit_transform(values)
    
    train = scaled[i-1-look_back: ]
    test = scaled[i-look_back: ]

    trainX, trainY = create_dataset(train, look_back)
    testX = test
    
    trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    testX = np.reshape(testX, (1, 1, testX.shape[0]))
    
    history = model.fit(trainX, trainY, epochs=10, verbose = 0, shuffle=False)
    loss_list.append(history.history['loss'][-1])
    y_predict = model.predict(testX)
    y_predict = scaler.inverse_transform(y_predict.reshape(-1, 1)).reshape(-1)
    predict_list.append(float(y_predict))

In [None]:
loss_list = np.array(loss_list)

In [None]:
data = data[11:]
data['loss_list'] = loss_list

In [None]:
pyplot.figure(figsize=(10,7))
pyplot.plot(data['loss_list'][1:])
pyplot.ylabel('bit_loss')
pyplot.xlabel('time')
pyplot.legend()
pyplot.savefig('bit_loss_curve.jpg', dpi = 100)
pyplot.show()

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(loss_list.reshape(-1, 1)[1:])

In [None]:
pyplot.plot(scaled.reshape(-1))
pyplot.legend()
pyplot.show()

In [None]:
pd.Series(scaled.reshape(-1)).describe()

In [None]:
pd.Series(loss_list.reshape(-1)).describe()

In [None]:
true_list = data['USD (PM)'].values[look_back+1:]
pyplot.figure(figsize=(15, 10))
pyplot.plot(predict_list[490:520], label='predict')
pyplot.plot(true_list[490:520], label='true')
pyplot.legend()
pyplot.show()

In [None]:
rmse = sqrt(mean_squared_error(predict_list, true_list))
print('Test RMSE: %.3f' % rmse)
mse = mean_squared_error(predict_list, true_list)
print('Test MSE: %.3f' % mse)

In [None]:
loss_bit = list(scaled.reshape(-1))

In [None]:
loss_bit= [1]*12 + loss_bit

In [None]:
bit_predict  = [0]*11 + predict_list

In [None]:
data_bit['bit_predict'] = bit_predict

In [None]:
data_bit['loss_bit'] = loss_bit

In [None]:
data_bit.describe()

In [None]:
data_bit.drop('Value', axis = 1, inplace = True)

In [None]:
data['bit_predict'] = bit_predict

In [None]:
result = pd.read_csv(filepath_or_buffer="../input/mcmproblemc/data_mixed.csv", index_col="Date")

In [None]:
result.index = pd.to_datetime(result.index)

In [None]:
data_gold_predict

In [None]:
data_merge = pd.merge(data_merge, data_bit, on = 'Date', how = 'outer')

In [None]:
data_merge

In [None]:
data_merge.describe()

In [None]:
data_merge.to_csv("./result.csv")

In [None]:
data_merge.replace(0,np.nan)

In [None]:
data_merge

In [None]:
result = pd.read_csv(filepath_or_buffer="../input/mcmproblemc/data_mixed.csv", index_col="Date")

In [None]:
result['bit_predict'] = bit_predict

In [None]:
result['gold_predict'] = data_merge['gold_predict']

In [None]:
result.index = pd.to_datetime(result.index)

In [None]:
result = result.drop('gold_predict', axis = 1)

In [None]:


data['Close'] = data['Value']



signal_lookback = 4 # days * hours * minutes

# here's our signal columns
data['Buy'] = np.zeros(len(data))
data['Sell'] = np.zeros(len(data))

# this is our 'working out', you could collapse these into the .loc call later on and save memory 
# but I've left them in for debug purposes, makes it easier to see what is going on
data['RollingMax'] = data['Close'].shift(1).rolling(signal_lookback, min_periods=signal_lookback).max()
data['RollingMin'] = data['Close'].shift(1).rolling(signal_lookback, min_periods=signal_lookback).min()
data.loc[data['RollingMax'] < data['Close'], 'Buy'] = 1
data.loc[data['RollingMin'] > data['Close'], 'Sell'] = -1

# lets now take a look and see if its doing something sensible
import matplotlib
import matplotlib.pyplot as plt

# plt.figure(figsize=(30, 30))
fig,ax1 = plt.subplots(1,1, figsize=(30, 24))
ax1.plot(data['Close'])
y = ax1.get_ylim()
# ax1.set_ylim(y[0] - (y[1]-y[0])*0.4, y[1])
ax1.set_ylim(y[0], y[1])

ax2 = ax1.twinx()
ax2.set_position(matplotlib.transforms.Bbox([[0.125,0.1],[0.9,0.32]]))
ax2.plot(data['Buy'], color='#77dd77')
ax2.plot(data['Sell'], color='#dd4444')

# plt.tight_layout() 
plt.show()