In [187]:
!pip install tensorflow
!pip install keras
!pip install numpy
!pip install pandas
!pip install matplotlib
!pip install scikit-learn



In [278]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import io
%matplotlib inline

In [279]:
from google.colab import files
uploaded = files.upload()

Saving btc.csv to btc (3).csv


In [280]:
btc = pd.read_csv(io.StringIO(uploaded['btc.csv'].decode('utf-8')))
btc.head()

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume From,Volume To
0,5/26/2018,BTCUSD,7459.11,7640.46,7380.0,7520.0,2722.8,20422650.0
1,5/25/2018,BTCUSD,7584.15,7661.85,7326.94,7459.11,8491.93,63420690.0
2,5/24/2018,BTCUSD,7505.0,7734.99,7269.0,7584.15,11033.72,82931370.0
3,5/23/2018,BTCUSD,7987.7,8030.0,7433.19,7505.0,14905.99,114810400.0
4,5/22/2018,BTCUSD,8393.44,8400.0,7950.0,7987.7,6589.43,53897530.0


In [281]:
# these columns will not be useful for our model
# we will work only with the opening and closing price of bitcoin
useful_btc_data = btc.drop(labels = ['Date', 'Symbol', 'Volume From', 'Volume To', 'High', 'Low'], axis = 1)

In [282]:
useful_btc_data.head()

Unnamed: 0,Open,Close
0,7459.11,7520.0
1,7584.15,7459.11
2,7505.0,7584.15
3,7987.7,7505.0
4,8393.44,7987.7


In [283]:
#drop rows with NaN data
clean_data = useful_btc_data.dropna()
clean_data.size

2546

In [284]:
data_array = clean_data.to_numpy()
data_array

array([[7459.11, 7520.  ],
       [7584.15, 7459.11],
       [7505.  , 7584.15],
       ...,
       [ 378.  ,  378.  ],
       [ 370.  ,  378.  ],
       [ 300.  ,  370.  ]])

In [285]:
# we are going to convert this 2-d array to a 1-d array
flatten_data = data_array.flatten().reshape(-1, 1)
flatten_data

array([[7459.11],
       [7520.  ],
       [7584.15],
       ...,
       [ 378.  ],
       [ 300.  ],
       [ 370.  ]])

In [286]:
scaler = StandardScaler()
scaled_data = scaler.fit_transform(flatten_data)
scaled_data

array([[ 1.30947341],
       [ 1.32555627],
       [ 1.34250021],
       ...,
       [-0.56085933],
       [-0.58146147],
       [-0.56297237]])

In [306]:
# to predict the closing price of btc in a given day, we are going to use historical
# data (openning and closing price of btc from the previous days) as well as the openning price 
# of btc of the given day

class WindowData():
  def __init__(self, raw_data, labels, window_size, train_frac = 0.8, test_frac = 0.2):
    self.raw_data = raw_data
    self.window_size = window_size
    self.labels = labels
    length = raw_data.size
    self.test_size = int(length * test_frac)
    self.train_size = length - self.test_size
    assert self.train_size + self.test_size == length

  def initialize(self):
    input = self.raw_data
    window_size = self.window_size
    data, labels = [], []
    for i in range(0, input.size - window_size, 2):
      cur_window = input[i:i+window_size-1]
      # append firs values to X_data
      data.append(cur_window.reshape(-1, 1))
      # last value of the range is what we want to predict (closing value for the last day)
      labels.append(cur_window[:-1].reshape(-1, 1))
    self.data = np.array(data)
    self.labels = np.array(labels)

  @property
  def X_train(self):
    return self.data[0:self.train_size]

  @property
  def y_train(self):
    return self.labels[0:self.train_size]

  @property
  def X_test(self):
    return self.data[:-self.test_size]
  
  @property
  def y_test(self):
    return self.labels[:-self.test_size]   

In [307]:
windowed_data = WindowData(scaled_data, labels, window_size)
windowed_data.initialize()
windowed_data.data.shape

(1268, 9, 1)

In [308]:
def build_model():
  #now we have our data windowed, we have to create our model
  lstm_model = tf.keras.models.Sequential([
      tf.keras.layers.LSTM(1, return_sequences=False),
      tf.keras.layers.Dense(units=1)
  ])

  lstm_model.compile(loss=tf.losses.MeanSquaredError(),
                optimizer=tf.optimizers.Adam(),
                metrics=[tf.metrics.MeanAbsoluteError()])

  return lstm_model

In [309]:
lstm_model = build_model()

In [316]:
def fit_model(model, X_train, y_train, validation_split, max_epochs, patience=10):
    # this will stop the training if we do not decrease the loss within 'patience' epochs
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=patience,
                                                    mode='min')
    
    history = model.fit(X_train, y_train, epochs=max_epochs,
                    validation_split=validation_split,
                    callbacks=[early_stopping])

In [317]:
X_train = windowed_data.X_train
y_train = windowed_data.y_train

In [319]:
fit_model(lstm_model, X_train, y_train, 0.2, 10000)

Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 16/10000
Epoch 17/10000
Epoch 18/10000
Epoch 19/10000
Epoch 20/10000
Epoch 21/10000
Epoch 22/10000
Epoch 23/10000
Epoch 24/10000
Epoch 25/10000
Epoch 26/10000
Epoch 27/10000
Epoch 28/10000
Epoch 29/10000
Epoch 30/10000
Epoch 31/10000
Epoch 32/10000
Epoch 33/10000
Epoch 34/10000
Epoch 35/10000
Epoch 36/10000
Epoch 37/10000
Epoch 38/10000
Epoch 39/10000
Epoch 40/10000
Epoch 41/10000
Epoch 42/10000
Epoch 43/10000
Epoch 44/10000
Epoch 45/10000
Epoch 46/10000
Epoch 47/10000
Epoch 48/10000
Epoch 49/10000
Epoch 50/10000
Epoch 51/10000
Epoch 52/10000
Epoch 53/10000
Epoch 54/10000
Epoch 55/10000
Epoch 56/10000
Epoch 57/10000
Epoch 58/10000
Epoch 59/10000
Epoch 60/10000
Epoch 61/10000
Epoch 62/10000
Epoch 63/10000
Epoch 64/10000
Epoch 65/10000
Epoch 66/10000
Epoch 67/10000
Epoc

In [320]:
lstm_model.summary()

Model: "sequential_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_30 (LSTM)               (None, 1)                 12        
_________________________________________________________________
dense_23 (Dense)             (None, 1)                 2         
Total params: 14
Trainable params: 14
Non-trainable params: 0
_________________________________________________________________


In [321]:
X_test = windowed_data.X_test
y_test = windowed_data.y_test

In [322]:
lstm_model.evaluate(X_test, y_test)



[0.016498399898409843, 0.06390006840229034]