In [109]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats
from sklearn.metrics import mean_squared_error
from math import sqrt
from random import randint

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import LSTM
from keras.layers import GRU
from keras.callbacks import EarlyStopping
from keras import initializers

from datetime import datetime
from matplotlib import pyplot as plt
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True)
%matplotlib inline

In [110]:
data = pd.read_csv('data/bitcoin2.csv')
data = data.dropna()
data.reset_index()
data.head(10)

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1325292180,4.247,4.247,4.247,4.247,0.4,1.6988,4.247
138,1325300460,4.1,4.1,4.1,4.1,0.623628,2.556875,4.1
212,1325304900,4.1,4.1,4.1,4.1,6.503072,26.662595,4.1
284,1325309220,4.045,4.045,4.044,4.044,2.3793,9.624254,4.044994
311,1325310840,4.044,4.044,4.011,4.011,0.8964,3.607223,4.024122
339,1325312520,4.218,4.218,4.218,4.218,0.200462,0.845549,4.218
358,1325313660,4.03,4.03,4.003,4.003,129.999948,520.692617,4.005329
367,1325314200,4.218,4.218,4.218,4.218,0.149,0.628482,4.218
381,1325315040,4.18,4.19,4.18,4.19,11.283664,47.273749,4.189574
386,1325315340,4.19,4.2,4.19,4.2,1.775189,7.450411,4.196968


In [111]:
data['date'] = pd.to_datetime(data['Timestamp'],unit='s').dt.date
group = data.groupby('date')
Daily_Price = group['Weighted_Price'].mean()

Daily_Price.head()

date
2011-12-31    4.444100
2012-01-01    5.049446
2012-01-02    5.188622
2012-01-03    4.940613
2012-01-04    5.220669
Name: Weighted_Price, dtype: float64

In [112]:
Daily_Price.tail()

date
2017-05-27    1966.300193
2017-05-28    2071.396826
2017-05-29    2145.914241
2017-05-30    2217.294593
2017-05-31    2139.402594
Name: Weighted_Price, dtype: float64

In [113]:
ntrain = int(len(Daily_Price) * 0.75)

df_train= Daily_Price[:ntrain - 1]
df_test= Daily_Price[ntrain:]

print(len(df_train), len(df_test))

1438 480


In [114]:
working_data = [df_train, df_test]
working_data = pd.concat(working_data)

working_data = working_data.reset_index()
working_data['date'] = pd.to_datetime(working_data['date'])
working_data = working_data.set_index('date')

In [115]:
s = sm.tsa.seasonal_decompose(working_data.Weighted_Price.values, freq=60)

In [116]:
trace1 = go.Scatter(x = np.arange(0, len(s.trend), 1),y = s.trend,mode = 'lines',name = 'Trend',
    line = dict(color = ('rgb(244, 146, 65)'), width = 4))
trace2 = go.Scatter(x = np.arange(0, len(s.seasonal), 1),y = s.seasonal,mode = 'lines',name = 'Seasonal',
    line = dict(color = ('rgb(66, 244, 155)'), width = 2))

trace3 = go.Scatter(x = np.arange(0, len(s.resid), 1),y = s.resid,mode = 'lines',name = 'Residual',
    line = dict(color = ('rgb(209, 244, 66)'), width = 2))

trace4 = go.Scatter(x = np.arange(0, len(s.observed), 1),y = s.observed,mode = 'lines',name = 'Observed',
    line = dict(color = ('rgb(66, 134, 244)'), width = 2))

data = [trace1, trace2, trace3, trace4]
layout = dict(title = 'Seasonal decomposition', xaxis = dict(title = 'Time'), yaxis = dict(title = 'Price, USD'))
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='seasonal_decomposition')

In [117]:
def create_lookback(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

In [118]:
from sklearn.preprocessing import MinMaxScaler

training_set = df_train.values
training_set = np.reshape(training_set, (len(training_set), 1))
test_set = df_test.values
test_set = np.reshape(test_set, (len(test_set), 1))

#scale datasets
scaler = MinMaxScaler()
training_set = scaler.fit_transform(training_set)
test_set = scaler.transform(test_set)

# create datasets which are suitable for time series forecasting
look_back = 1
X_train, Y_train = create_lookback(training_set, look_back)
X_test, Y_test = create_lookback(test_set, look_back)

 # reshape datasets so that they will be ok for the requirements of the LSTM model in Keras
X_train = np.reshape(X_train, (len(X_train), 1, X_train.shape[1]))
X_test = np.reshape(X_test, (len(X_test), 1, X_test.shape[1]))

In [119]:
# initialize sequential model, add 2 stacked LSTM layers and densely connected output neuron
model = Sequential()
model.add(LSTM(256, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.3))
model.add(LSTM(256))
model.add(Dropout(0.3))
model.add(Dense(1))

# compile and fit the model
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(X_train, Y_train, epochs=100, batch_size=16, shuffle=False,
                    validation_data=(X_test, Y_test),
                    callbacks = [EarlyStopping(monitor='val_loss', min_delta=5e-5, patience=20, verbose=1)])

Train on 1437 samples, validate on 479 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 00030: early stopping


In [120]:
model.save('data/line_model.h5')

In [121]:
trace1 = go.Scatter(
    x = np.arange(0, len(history.history['loss']), 1),
    y = history.history['loss'],
    mode = 'lines',
    name = 'Train loss',
    line = dict(color=('rgb(66, 244, 155)'), width=2, dash='dash')
)
trace2 = go.Scatter(
    x = np.arange(0, len(history.history['val_loss']), 1),
    y = history.history['val_loss'],
    mode = 'lines',
    name = 'Test loss',
    line = dict(color=('rgb(244, 146, 65)'), width=2)
)

data = [trace1, trace2]
layout = dict(title = 'Train and Test Loss during training',
              xaxis = dict(title = 'Epoch number'), yaxis = dict(title = 'Loss'))
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='training_process')

In [122]:
# add one additional data point to align shapes of the predictions and true labels
X_test = np.append(X_test, scaler.transform(working_data.iloc[-1][0]))
X_test = np.reshape(X_test, (len(X_test), -1, 1))

# get predictions and then make some transformations to be able to calculate RMSE properly in USD
prediction = model.predict(X_test)
prediction_inverse = scaler.inverse_transform(prediction.reshape(-1, 1))
Y_test_inverse = scaler.inverse_transform(Y_test.reshape(-1, 1))
prediction2_inverse = np.array(prediction_inverse[:,0][1:])
Y_test2_inverse = np.array(Y_test_inverse[:,0])

In [123]:
Test_Dates = Daily_Price[ntrain:].index

trace1 = go.Scatter(x=Test_Dates, y=Y_test2_inverse, name= 'Precio Actual',
                   line = dict(color = ('rgb(0, 96, 252)'),width = 3))
trace2 = go.Scatter(x=Test_Dates, y=prediction2_inverse, name= 'Precio Predicho',
                   line = dict(color = ('rgb(244, 146, 65)'),width = 4))
data = [trace1, trace2]
layout = dict(title = 'Comparación entre los valores predichos y los valores actuales.',
             xaxis = dict(title = 'Date'), yaxis = dict(title = 'Price, USD'))
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='results_demonstrating1')

In [124]:
RMSE = sqrt(mean_squared_error(Y_test2_inverse, prediction2_inverse))
print('Test RMSE: %.3f' % RMSE)

Test RMSE: 81.787


In [231]:
date = datetime.strptime('2016-10-01', "%Y-%m-%d").date()
index_array = Daily_Price.index
almost_index = np.where(index_array.get_values() == date)
index = almost_index[0][0]
test_dates = Daily_Price[index:index+31]
test_dates = test_dates.reset_index()


date = datetime.strptime('2016-10-01', "%Y-%m-%d").date()
index_array = Daily_Price.index
almost_index = np.where(index_array.get_values() == date)
index = almost_index[0][0] - 1439
predicted_values = prediction2_inverse[index:index+31]

test_dates['predicted'] = predicted_values

frames = []
frames2 = []
for i, value in enumerate(predicted_values):
    if i > 1:   
        frames.append(
            {'data': [
                {
                    'x': frames[i-1]['data'][0]['x'] + [test_dates['date'][i]], 
                    'y': frames[i-1]['data'][0]['y'] + [predicted_values[i]],
                    'mode':'line',
                    'line': {
                        'simplify': False,
                        'color': ('rgb(244, 146, 65)'),
                        'width': 4
                    }
                }
            ]}
        )
        frames2.append(
            {'data': [
                {
                    'x': frames[i-1]['data'][0]['x'] + [test_dates['date'][i]], 
                    'y': frames[i-1]['data'][0]['y'] + [test_dates['Weighted_Price'][i]],
                    'mode':'line',
                    'line': {
                        'simplify': False,
                        'width': 4
                    }

                }
            ]}
        )
    else:
        frames.append(
            {'data': [
                {
                    'x': [test_dates['date'][i]], 
                    'y': [predicted_values[i]],
                    'mode':'line',
                    'line': {
                        'simplify': False,
                        'color': ('rgb(244, 146, 65)'),
                        'width': 4
                    }
                }
            ]}
        )
        frames2.append(
            {'data': [
                {
                    'x': [test_dates['date'][i]], 
                    'y': [test_dates['Weighted_Price'][i]],
                    'mode':'line',
                    'line': {
                        'simplify': False,
                        'width': 4
                    }
                }
            ]}
        )
        

figure1 = {'data': [{'x': [test_dates['date'][0]], 'y': [predicted_values[0]]}],
          'layout': {'xaxis': {'type': 'date', 'range': [test_dates['date'][0], test_dates['date'][-1:][30]] , 'autorange': False},
                     'yaxis': {'range': [predicted_values[0]-5, predicted_values[-1]], 'autorange': False},
                     'title': 'Valores Predichos',
                     'updatemenus': [{'type': 'buttons',
                                      'buttons': [{'label': 'Play',
                                                   'method': 'animate',
                                                   'args': [None]
                                                  }]
                                     }]
                    },
          'frames': frames
         }

figure2 = {'data': [{'x': [test_dates['date'][0]], 'y': [predicted_values[0]]}],
          'layout': {'xaxis': {'type': 'date', 'range': [test_dates['date'][0], test_dates['date'][-1:][30]] , 'autorange': False},
                     'yaxis': {'range': [predicted_values[0]-5, predicted_values[-1]], 'autorange': False},
                     'title': 'Valores Reales',
                     'updatemenus': [{'type': 'buttons',
                                      'buttons': [{'label': 'Play',
                                                   'method': 'animate',
                                                   'args': [None]
                                                  }]
                                     }]
                    },
          'frames': frames2
         }
import time
py.iplot(figure1)
time.sleep(2)
py.iplot(figure2)