In [None]:
import datetime
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

# RNNs, GRUs, and LSTMs

## RNNs

![title](Recurrent_neural_network_unfold.png)

RNN equations:

$h_t = tanh(Vh_{t-1} + Ux_t + b_h)$

$o_t = sigmoid(Wh_t + b_o)$

![title](Recurrent_Unit.png)

## GRUs
https://en.wikipedia.org/wiki/Gated_recurrent_unit


$\begin{aligned}z_{t}&=sigmoid(W_{z}x_{t}+U_{z}h_{t-1}+b_{z})\\r_{t}&=sigmoid(W_{r}x_{t}+U_{r}h_{t-1}+b_{r})\\h_{t}&=(1-z_{t})\circ h_{t-1}+z_{t}\circ tanh(W_{h}x_{t}+U_{h}(r_{t}\circ h_{t-1})+b_{h})\end{aligned}$

![title](Gated_Recurrent_Unit,_base_type.png)

## LSTMs

https://en.wikipedia.org/wiki/Long_short-term_memory

${\begin{aligned}f_{t}&=sigmoid(W_{f}x_{t}+U_{f}h_{t-1}+b_{f})\\i_{t}&=sigmoid(W_{i}x_{t}+U_{i}h_{t-1}+b_{i})\\o_{t}&=sigmoid(W_{o}x_{t}+U_{o}h_{t-1}+b_{o})\\c_{t}&=f_{t}\circ c_{t-1}+i_{t}\circ tanh(W_{c}x_{t}+U_{c}h_{t-1}+b_{c})\\h_{t}&=o_{t}\circ c_{t}\end{aligned}}$

![title](The_LSTM_cell.png)

# Predicting bitcoin prices from data of the [bitstamp](https://www.bitstamp.net/) exchange
The dataset has very good, 1 minute resolution. We're going to predict the daily price.

In [None]:
df = pd.read_csv('bitstampUSD_1-min_data_2012-01-01_to_2019-03-13_tail.csv', parse_dates=True,
                 date_parser=lambda x: datetime.datetime.fromtimestamp(int(x)))

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.tail()

### Fill NAs

In [None]:
# we won't fill NA-s here as we take the mean for each day
# df = df[['Timestamp', 'Open', 'High', 'Low', 'Close']].fillna(method='ffill')

### Add dates

In [None]:
df['date'] = pd.to_datetime(df['Timestamp'],unit='s').dt.date

In [None]:
df.tail()

## Compute daily price

In [None]:
# group by date
grouped_by_date = df.groupby('date')

In [None]:
# take the mean price on each day
df = grouped_by_date.mean()

In [None]:
df

In [None]:
# are there any missing values?
df.isna().any().any()

## Look at closing prices

In [None]:
df['Close'].plot(figsize=(20, 10))

## Process data for LSTM
We're going to predict the closing prices one timestep into the future.
For this, first we extract the closing prices, then we concatenate it with itself shifted one timestep into the future. These will be the training and test examples.
Then we're going to scale the elements of the time series to lie into the interval (-1, 1).

In [None]:
close = df.loc[:, 'Close']
close.head()

In [None]:
examples = pd.concat((close, close.shift(-1)), axis=1)
examples.head()

In [None]:
examples.tail()

In [None]:
# remove the trailing NaN
examples = examples[:-1]

In [None]:
examples.tail()

In [None]:
examples.iloc[:5, 0].values

In [None]:
examples.shape

In [None]:
# we don't shuffle the data to preserve the time order
num_train = int(examples.shape[0]*0.9)
print("Number of training examples: {}".format(num_train))
train = examples[:num_train].values
test = examples[num_train:].values

In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))
# it is important to fit the scaler only on the training examples so we don't get information about the test examples
train = scaler.fit_transform(train)
test = scaler.transform(test)

In [None]:
test[:5]

In [None]:
X_train, y_train = train[:, 0], train[:, 1]
X_test, y_test = test[:, 0], test[:, 1]

In [None]:
X_train[:5], y_train[:5]

## Build and train LSTM model

In [None]:
batch_size = 64
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(16, batch_input_shape=[batch_size, 1, 1], stateful=True),
    tf.keras.layers.Dense(1) # we don't have an activation here as we're doing regression
])

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
              loss='mean_squared_error',
              metrics=['mean_squared_error', 'mean_absolute_error'])

In [None]:
# the batch_size has to divide into the number of examples
num_batches = X_train.shape[0] // batch_size
X_train = X_train[:num_batches*batch_size]
y_train = y_train[:num_batches*batch_size]

In [None]:
X_train = X_train.reshape((X_train.shape[0], 1, 1))

In [None]:
X_train.shape

In [None]:
# important: shuffle=False as the data points are in time order
# important: reset the states between epochs, as the time series restarts from the beginning
num_epochs = 200
for epoch in range(num_epochs):
    model.fit(X_train, y_train, epochs=1, shuffle=False)
    model.reset_states()

In [None]:
X_test = X_test.reshape((X_test.shape[0], 1, 1))
num_batches = X_test.shape[0] // batch_size
X_test = X_test[:num_batches*batch_size]
y_test = y_test[:num_batches*batch_size]

In [None]:
model.evaluate(X_test, y_test)

## Evaluate the results

In [None]:
predictions = model.predict(X_test).squeeze()
results = pd.DataFrame(scaler.inverse_transform(np.array([y_test.squeeze(), predictions]).T), columns = ['ground truth', 'prediction'])

In [None]:
results.plot(figsize=(20, 10))

In [None]:
results.diff(1).plot(figsize=(20, 10))

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(20, 10))
ax[0].scatter(results.iloc[:, 0], results.iloc[:, 1])
ax[0].set_title('ground truth vs predictions')
ax[0].set_xlabel('ground truth')
ax[0].set_ylabel('predictions')
ax[1].scatter(results.iloc[:, 0].shift(1), results.iloc[:, 1])
ax[1].set_title('ground truth vs shifted predictions')
ax[1].set_xlabel('ground truth')
ax[1].set_ylabel('shifted predictions')