In [1]:
# Import required libraries

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from time import time

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import horovod.tensorflow.keras as hvd

2021-11-18 13:43:24.426152: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-11-18 13:43:24.426191: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


## CS5488 Project - Price Prediction on Cryptocurrencies
### Project Group 5
<!--
Poon Bing-chun
Jia Shuyue
Li Ka-faat
Tso Yiu-chuen
-->

#### Objective
In this project, we attempt to predict the exchange rates of cryptocurrencies using historical prices of other cryptocurrencies, by training a deep neural network distributedly across several machines.

Distributed training is a collection of techniques for using multiple processors located on different machines for training machine learning models. It is an increasingly important deep learning technique, since it enables the training of wider neural networks which is too cumbersome to manage on one machine only.

#### Plan
The [**Horovod** library](https://github.com/horovod/horovod) ([paper](https://towardsdatascience.com/paper-summary-horovod-fast-and-easy-distributed-deep-learning-in-tensorflow-5be535c748d1)) will be used and the training machines will be grouped by an [**Apache Spark** cluster](https://horovod.readthedocs.io/en/stable/spark_include.html) (which will be covered in the last few weeks of the lecture). Horovod distributes training batches to machines for training, averages the gradients of gradient descents, and aggregates the validation metrics returned by each machine. It supports common deep learning frameworks like **Keras**, **TensorFlow** and **PyTorch**.

We will compare the convergence rates with and without distributed training using TensorBoard.

#### Data Collection

We have written a Python script [**downloadData_5m.py**](https://github.com/verybighub/CS5488_Project/blob/main/downloadData_5m.py) to collect historical cryptocurrency finance data from [https://coinmarketcap.com/](https://coinmarketcap.com/). The date range is 1st January, 2019 to 22nd September, 2021 (i.e. the day we collected the data). We have parsed the data into the machine-readable `pandas` `DataFrame` format and placed it in Google Drive.

In [2]:
import findspark
findspark.init()
from pyspark import SparkConf

from pyspark.sql import SparkSession
import pyspark.sql.functions as f
spark = SparkSession.builder \
    .master("local") \
    .appName("group5_project.com") \
    .getOrCreate()


21/11/18 13:43:40 WARN Utils: Your hostname, LAPTOP-0TFTFNMR resolves to a loopback address: 127.0.1.1; using 172.22.106.188 instead (on interface eth0)
21/11/18 13:43:40 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
21/11/18 13:43:42 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
21/11/18 13:43:43 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


In [3]:
sparkdf = spark.read.options(header='True', delimiter=',').csv("historical_coin_data_5m.csv")


In [4]:
import pyspark.sql.functions as F
from pyspark.sql import Window
sparkdf.count()

currency = 'Stellar'
#select hourly data
w = Window().orderBy(F.lit("_c0"))
cdf = sparkdf.filter(sparkdf.Currency == currency).withColumn("id", F.row_number().over(w)).filter("id % 12 == 1")
cdf = cdf.select(sparkdf["DateTime"], 
                 sparkdf["Price USD"], 
                 sparkdf["Trading Volume Last 24h"],
                 sparkdf["Market Cap"])
cdf.show()

# price = cdf.select("Price USD")
# vol = cdf.select("Trading Volume Last 24h")
# marketcap = cdf.select('Market Cap')


21/11/18 13:44:16 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.

+-------------------+-------------------+-----------------------+------------------+
|           DateTime|          Price USD|Trading Volume Last 24h|        Market Cap|
+-------------------+-------------------+-----------------------+------------------+
|2019-01-01 08:04:04|       0.1146302991|       90503046.2253028|2196405505.5719624|
|2019-01-01 09:04:00|       0.1145872532|       91258541.6394973| 2195580713.329094|
|2019-01-01 10:04:03|       0.1144470426|       93396258.1621297|2192894168.9233046|
|2019-01-01 11:04:02|0.11516714160000001|       96180774.6256204|2206691825.2562256|
|2019-01-01 12:04:00|       0.1131706549|       98258636.7127816|2168437590.3445463|
|2019-01-01 13:04:02|0.11282784850000001|       100275117.458896|2161869164.0564756|
|2019-01-01 14:04:00|0.11273712420000001|        100027778.86779| 2160130828.079797|
|2019-01-01 15:04:03|       0.1127905684|       100478771.924145|2161154860.0623918|
|2019-01-01 16:04:00|       0.1129315288|       100760017.714081|

                                                                                

In [None]:
# # Choose a currency
# currency = 'Bitcoin'

# # Originally the data has a 5-minute interval
# # We can use Python to get hourly interval using this syntax: [::12] because 60 / 5 = 12
# # Skip every 12 values
# price = df[df['Currency'] == currency]['Price USD'][::12]
# vol =  df[df['Currency'] == currency]['Trading Volume Last 24h'][::12]
# marketcap =  df[df['Currency'] == currency]['Market Cap'][::12]

In [None]:
# plt.plot(price, label='Price USD')
# plt.plot(vol, label='Trading Volume')
# plt.plot(marketcap, label='Market Cap')

In [5]:
oldtime = time()
from pyspark.sql.functions import col, log
cdf = cdf.withColumn('Log Price', log(10.0, col("Price USD")))
cdf = cdf.withColumn('Log Trading Volume Last 24h', log(10.0, col("Trading Volume Last 24h")))
cdf = cdf.withColumn('Log Market Cap', log(10.0, col("Market Cap")))
cdf.show()
print(f'Time needed: {time()-oldtime} s')


21/11/18 13:44:41 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.

+-------------------+-------------------+-----------------------+------------------+-------------------+---------------------------+-----------------+
|           DateTime|          Price USD|Trading Volume Last 24h|        Market Cap|          Log Price|Log Trading Volume Last 24h|   Log Market Cap|
+-------------------+-------------------+-----------------------+------------------+-------------------+---------------------------+-----------------+
|2019-01-01 08:04:04|       0.1146302991|       90503046.2253028|2196405505.5719624|-0.9407005744041936|         7.9566631972889015|9.341712523653495|
|2019-01-01 09:04:00|       0.1145872532|       91258541.6394973| 2195580713.329094|-0.9408636910317881|          7.960273524233052|9.341549407147134|
|2019-01-01 10:04:03|       0.1144470426|       93396258.1621297|2192894168.9233046|-0.9413954253493801|          7.970329476957159|9.341017672734665|
|2019-01-01 11:04:02|0.11516714160000001|       96180774.6256204|2206691825.2562256|-0.9386714

                                                                                

In [6]:
# # Data scaling - Normalization
oldtime = time()
from pyspark.sql.functions import mean as _mean
from pyspark.sql.functions import stddev as _std
# Data scaling
def column_statistics(df, name=""):
    df_stats = df.select(
        _mean(col(name)).alias('mean'),
        _std(col(name)).alias('std')
    ).collect()
    
    return df_stats[0]['mean'], df_stats[0]['std']

data_p_mean, data_p_std = column_statistics(cdf, "Log Price")
data_v_mean, data_v_std = column_statistics(cdf, "Log Trading Volume Last 24h")
data_m_mean, data_m_std = column_statistics(cdf, "Log Market Cap")

cdf = cdf.withColumn("Price_Mean", f.lit(data_p_mean))
cdf = cdf.withColumn("Price_Std", f.lit(data_p_std))
cdf = cdf.withColumn("Price_Normalized", (f.col("Log Price") - f.col("Price_Mean")) / f.col("Price_Std"))

cdf = cdf.withColumn("Volume_Mean", f.lit(data_v_mean))
cdf = cdf.withColumn("Volume_Std", f.lit(data_v_std))
cdf = cdf.withColumn("Volume_Normalize", (f.col("Log Trading Volume Last 24h") - f.col("Volume_Mean")) / f.col("Volume_Std"))

cdf = cdf.withColumn("Market_Mean", f.lit(data_m_mean))
cdf = cdf.withColumn("Market_Std", f.lit(data_m_std))
cdf = cdf.withColumn("Market_Normalize", (f.col("Log Market Cap") - f.col("Market_Mean")) / f.col("Market_Std"))
cdf.show()
print(f'Time needed: {time()-oldtime} s')

21/11/18 13:46:42 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
21/11/18 13:46:56 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
21/11/18 13:47:09 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
21/11/18 13:47:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.

+-------------------+-------------------+-----------------------+------------------+-------------------+---------------------------+-----------------+-------------------+------------------+--------------------+-----------------+-------------------+-------------------+-----------------+------------------+--------------------+
|           DateTime|          Price USD|Trading Volume Last 24h|        Market Cap|          Log Price|Log Trading Volume Last 24h|   Log Market Cap|         Price_Mean|         Price_Std|    Price_Normalized|      Volume_Mean|         Volume_Std|   Volume_Normalize|      Market_Mean|        Market_Std|    Market_Normalize|
+-------------------+-------------------+-----------------------+------------------+-------------------+---------------------------+-----------------+-------------------+------------------+--------------------+-----------------+-------------------+-------------------+-----------------+------------------+--------------------+
|2019-01-01 08:04:0

                                                                                

In [7]:
feats_scaled = cdf.select(cdf["Price_Normalized"], cdf["Volume_Normalize"], cdf["Market_Normalize"])
feats_scaled = feats_scaled.toPandas().values


21/11/16 22:40:29 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
                                                                                

In [8]:
feats_scaled.shape

(23880, 3)

In [24]:
# Determine best sliding window size
# size: 3 * 7 => last three days
sliding_window = np.lib.stride_tricks.sliding_window_view(feats_scaled, (5 * 7 + 3, 3), axis=(0, 1))
sliding_window = sliding_window.reshape((sliding_window.shape[0], 
                                         sliding_window.shape[2], 
                                         sliding_window.shape[3]))

# Xs: Price, volume, market cap and price of the past week
Xs = np.array([i[:-3] for i in sliding_window])

# Xs: Price of the next three hours
Ys = np.array([i[-3:,0] for i in sliding_window])

print('-----------------------\nXs:\n')
print(Xs)
print('-----------------------\nYs:\n')
print(Ys)
print('-----------------------\nXs:\n')
print(Xs.shape)
print('-----------------------\nYs:\n')
print(Ys.shape)

Xs = np.reshape(Xs, [-1, 5 * 7 * 3])
Xs = pd.DataFrame(Xs)
Xs = spark.createDataFrame(Xs)

Ys = pd.DataFrame(Ys)
Ys = spark.createDataFrame(Ys)

# Xs: Price, volume, market cap and price of the past week
Xs = np.array([i[:-3] for i in sliding_window])
Xs = np.reshape(Xs, [-1, 5 * 7 * 3])
Xs = pd.DataFrame(Xs)
Xs = spark.createDataFrame(Xs, ['features' * 5 * 7 * 3])

# Xs: Price of the next three hours
Ys = np.array([i[-3:,0] for i in sliding_window])
Ys = pd.DataFrame(Ys)
Ys = spark.createDataFrame(Ys, ['label_vec' * 3])

overall_df = Xs.withColumn(['label_vec' * 3], Ys['label_vec' * 3])

# Split the data into training set and testing set
train_test_ratio = 0.8
num_data = overall_df.count()
train_df = overall_df.limit(int(train_test_ratio * num_data))
test_df = overall_df.tail(int((1 - train_test_ratio) * num_data))

-----------------------
Xs:

[[[-1.51537513 -2.69691701 -1.53550281]
  [-1.51672965 -2.69585777 -1.53682049]
  [-1.5190318  -2.69271461 -1.53906312]
  ...
  [-1.50103445 -2.79822043 -1.52130626]
  [-1.50194935 -2.78721045 -1.52219748]
  [-1.50591149 -2.78512901 -1.52606424]]

 [[-1.51672965 -2.69585777 -1.53682049]
  [-1.5190318  -2.69271461 -1.53906312]
  [-1.51737651 -2.69123981 -1.53743677]
  ...
  [-1.50194935 -2.78721045 -1.52219748]
  [-1.50591149 -2.78512901 -1.52606424]
  [-1.50383709 -2.78256111 -1.52403061]]

 [[-1.5190318  -2.69271461 -1.53906312]
  [-1.51737651 -2.69123981 -1.53743677]
  [-1.53485904 -2.68932467 -1.55452633]
  ...
  [-1.50591149 -2.78512901 -1.52606424]
  [-1.50383709 -2.78256111 -1.52403061]
  [-1.50398199 -2.80991918 -1.52416626]]

 ...

 [[ 1.5823902   0.05574923  1.58392278]
  [ 1.58082987  0.054354    1.58239699]
  [ 1.57607854  0.04324676  1.57775756]
  ...
  [ 1.42659067  0.72916515  1.43165383]
  [ 1.42124992  0.8473787   1.42643129]
  [ 1.44902523 

Py4JError: An error occurred while calling o341.withColumn. Trace:
py4j.Py4JException: Method withColumn([class java.util.ArrayList, class org.apache.spark.sql.Column]) does not exist
	at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318)
	at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326)
	at py4j.Gateway.invoke(Gateway.java:274)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.base/java.lang.Thread.run(Thread.java:829)



# Data splitting
We will use the holdout method. The split percentages are 90% for training, 5% for verification, and 5% for validation.

In [18]:
'''TrainLen = int(len(Xs) * 0.90)
ValLen = int(len(Xs) * 0.95)
TestLen = len(Xs) - TrainLen

x_train = Xs[0:TrainLen,:]
y_train = Ys[0:TrainLen]

x_val = Xs[TrainLen:ValLen,:]
y_val = Ys[TrainLen:ValLen]

x_test = Xs[ValLen:,:]
y_test = Ys[ValLen:]

y_train = y_train.reshape((y_train.shape[0],y_train.shape[1],1))
y_val = y_val.reshape((y_val.shape[0],y_val.shape[1],1))
y_test = y_test.reshape((y_test.shape[0],y_test.shape[1],1))

print(x_train.shape)
print(x_val.shape)
print(x_test.shape)

print(y_train.shape)
print(y_val.shape)
print(y_test.shape)'''

(21458, 35, 3)
(1192, 35, 3)
(1193, 35, 3)
(21458, 3, 1)
(1192, 3, 1)
(1193, 3, 1)


In [22]:
# Split the data into training set and testing set
train_test_ratio = 0.8
num_data = Xs.count()
train_data = Xs.limit(int(train_test_ratio * num_data))
train_label = Ys.limit(int(train_test_ratio * num_data))

test_data = Xs.tail(int((1 - train_test_ratio) * num_data))
test_label = Ys.tail(int((1 - train_test_ratio) * num_data))

21/11/16 22:55:45 WARN TaskSetManager: Stage 19 contains a task of very large size (22084 KiB). The maximum recommended task size is 1000 KiB.
21/11/16 22:55:45 WARN TaskSetManager: Stage 21 contains a task of very large size (22084 KiB). The maximum recommended task size is 1000 KiB.
                                                                                

In [None]:
#Horovod on spark
import horovod.spark.keras as hvd


In [19]:
# Horovod: initialize Horovod
hvd.init()

In [20]:
model = Sequential()
model.add(LSTM(128, dropout=0.05, return_sequences=True))
model.add(LSTM(64, dropout=0.05))
model.add(Dense(3, activation='sigmoid'))

# Horovod: adjust learning rate based on the number of GPUs
scaled_lr = 0.001 * hvd.size()
opt = tf.optimizers.Adam(scaled_lr)
# Horovod: adjust Horovod DistributedOptimizer
opt = hvd.DistributedOptimizer(opt, backward_passes_per_step=1, average_aggregated_gradients=True)


# Horovod: Specify `experimental_run_tf_function=False` to ensure TensorFlow 
# uses hvd.DistributedOptimizer() to compute gradients.
# Adam optimiser allows high learning rate at first and speeds up training
model.compile(optimizer=opt, loss='mean_squared_error', metrics=['mean_squared_error'],experimental_run_tf_function=False)

callbackHs = [
    # Horovod: broadcast initial variable states from rank 0 to all other processes.
    # This is necessary to ensure consistent initialization of all workers when
    # training is started with random weights or restored from a checkpoint.
    hvd.callbacks.BroadcastGlobalVariablesCallback(0),
    
    # Horovod: average metrics among workers at the end of every epoch.
    # Note: This callback must be in the list before the ReduceLROnPlateau,
    # TensorBoard or other metrics-based callbacks.
    hvd.callbacks.MetricAverageCallback(),
    
    # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final
    # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during
    # the first three epochs. See https://arxiv.org/abs/1706.02677 for details.
    hvd.callbacks.LearningRateWarmupCallback(initial_lr=scaled_lr, warmup_epochs=3, verbose=1),
]

# Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them.
if hvd.rank() == 0:
    callbackHs.append(tf.keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5'))

# Horovod: write logs on worker 0.
verbose = 1 if hvd.rank() == 0 else 0

# Early stop to prevent overfitting
callbackEs = tf.keras.callbacks.EarlyStopping(monitor="loss", patience=3, verbose=1, restore_best_weights=True)

# Log performance using TensorBoard
callbackTb = tf.keras.callbacks.TensorBoard()

model.fit(x_train, y_train, shuffle=True, epochs=500, verbose=verbose, validation_data=(x_val, y_val), callbacks = [callbackTb, callbackEs,callbackHs])

2021-11-16 22:19:46.809565: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-11-16 22:19:46.809615: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-11-16 22:19:46.809634: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (LAPTOP-0TFTFNMR): /proc/driver/nvidia/version does not exist
2021-11-16 22:19:46.809858: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-11-16 22:19:46.882997: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initia

Epoch 1/500
  2/671 [..............................] - ETA: 3:37 - loss: 1.3149 - mean_squared_error: 1.3149 

2021-11-16 22:19:50.557599: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2021-11-16 22:19:50.557644: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2021-11-16 22:19:50.663056: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2021-11-16 22:19:50.680478: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2021-11-16 22:19:50.737309: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: logs/train/plugins/profile/2021_11_16_22_19_50



  3/671 [..............................] - ETA: 3:18 - loss: 1.2185 - mean_squared_error: 1.2185

2021-11-16 22:19:50.773645: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to logs/train/plugins/profile/2021_11_16_22_19_50/LAPTOP-0TFTFNMR.trace.json.gz
2021-11-16 22:19:50.800051: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: logs/train/plugins/profile/2021_11_16_22_19_50

2021-11-16 22:19:50.804963: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for memory_profile.json.gz to logs/train/plugins/profile/2021_11_16_22_19_50/LAPTOP-0TFTFNMR.memory_profile.json.gz
2021-11-16 22:19:50.832808: I tensorflow/core/profiler/rpc/client/capture_profile.cc:251] Creating directory: logs/train/plugins/profile/2021_11_16_22_19_50
Dumped tool data for xplane.pb to logs/train/plugins/profile/2021_11_16_22_19_50/LAPTOP-0TFTFNMR.xplane.pb
Dumped tool data for overview_page.pb to logs/train/plugins/profile/2021_11_16_22_19_50/LAPTOP-0TFTFNMR.overview_page.pb
Dumped tool data for i

Epoch 2/500

KeyboardInterrupt: 

In [None]:
i = model.predict(x_test)
predicted = np.ravel(i)

print(i)
print(y_test)

print(i.shape)
print(y_val.shape)
'''
for x_test:
    model.predict()
'''

In [None]:
predicted2 = np.ravel(i)
y_test2 = np.ravel(y_test)

plt.title('Error')
# Apply inverse transform to cancel the effects of MinMax scaler and Robust scaler to get back the original
error = scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in predicted2]))[:,0] - scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in y_test2]))[:, 0]
plt.plot(np.arange(len(error)), [0] * len(error))
plt.plot(error)
plt.fill_between(np.arange(len(error)), [0] * len(error), error)

In [None]:
predicted3 = scaler2.inverse_transform([[x,0,0] for x in predicted2])[:,0].ravel()
y_test3 = scaler2.inverse_transform([[x,0,0] for x in y_test2])[:,0].ravel()

plt.title('Prediction (log scale)')
plt.xlabel('Price USD (log)')
plt.plot(predicted3,label="predict")
plt.plot(y_test3,label="real")

In [None]:
# Undo the log to see the actual price in USD
predicted0 = np.exp(predicted3)-1
y_test0 = np.exp(y_test3)-1

plt.title('Prediction (actual)')
plt.xlabel('Price USD')
plt.plot(predicted0,label="predict")
plt.plot(y_test0,label="real")

In [None]:
for _ in range(100):
    print('--------------------------------------------------------------------')

In [None]:
i3 = scaler2.inverse_transform([[x,0,0] for x in predicted2])[:,0].reshape(i.shape)
y_test3 = scaler2.inverse_transform([[x,0,0] for x in y_test2])[:,0].reshape(y_test.shape)

from matplotlib.pyplot import figure

figure(figsize=(8, 6), dpi=80)
for x in range(len(i3)):
    plt.plot(np.arange(x,x+3), i3[x],'r--',label="predict")
for x1 in range(len(y_test)):
    plt.plot(np.arange(x1,x1+3), y_test3[x1],'b-',label="real")
plt.legend()

In [None]:
for x in range(80,120):
    plt.plot(np.arange(x,x+3), i3[x],'r--',label="predict")
    plt.plot(np.arange(x,x+3), y_test3[x],'b-',label="real")

In [None]:
predicted2 = np.ravel(i)
y_test2 = np.ravel(y_test)

plt.title('Error')
error = scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in predicted2]))[:,0] - scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in y_test2]))[:, 0]
plt.plot(np.arange(len(error)), [0] * len(error))
plt.plot(error)
plt.fill_between(np.arange(len(error)), [0] * len(error), error)

In [None]:
predicted2 = np.ravel(i)
y_test2 = np.ravel(y_test)

plt.title('Error')
error = scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in predicted2]))[:,0] - scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in y_test2]))[:, 0]
plt.plot(np.arange(len(error)), [0] * len(error))
plt.plot(error)
plt.fill_between(np.arange(len(error)), [0] * len(error), error)

In [None]:
i3 = scaler2.inverse_transform([[x,0,0] for x in predicted2])[:,0].reshape(i.shape)
y_test3 = scaler2.inverse_transform([[x,0,0] for x in y_test2])[:,0].reshape(y_test.shape)

from matplotlib.pyplot import figure

figure(figsize=(8, 6), dpi=80)
for x in range(len(i3)):
    plt.plot(np.arange(x,x+3), i3[x],'r--',label="predict")
for x1 in range(len(y_test)):
    plt.plot(np.arange(x1,x1+3), y_test3[x1],'b-',label="real")
plt.legend()

In [None]:
for x in range(120,170):
    plt.plot(np.arange(x,x+3), i3[x],'r--',label="predict")
    plt.plot(np.arange(x,x+3), y_test3[x],'b-',label="real")

In [None]:
predicted2 = np.ravel(i)
y_test2 = np.ravel(y_test)

plt.title('Error')
error = scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in predicted2]))[:,0] - scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in y_test2]))[:, 0]
plt.plot(np.arange(len(error)), [0] * len(error))
plt.plot(error)
plt.fill_between(np.arange(len(error)), [0] * len(error), error)

In [None]:
for x in range(200,210):
    plt.plot(np.arange(x,x+3), i3[x],'r--',label="predict")
    plt.plot(np.arange(x,x+3), y_test3[x],'b-',label="real")

In [None]:
i3 = scaler2.inverse_transform([[x,0,0] for x in predicted2])[:,0].reshape(i.shape)
y_test3 = scaler2.inverse_transform([[x,0,0] for x in y_test2])[:,0].reshape(y_test.shape)

from matplotlib.pyplot import figure

figure(figsize=(8, 6), dpi=80)
for x in range(len(i3)):
    plt.plot(np.arange(x,x+3), i3[x],'r--',label="predict")
for x1 in range(len(y_test)):
    plt.plot(np.arange(x1,x1+3), y_test3[x1],'b-',label="real")
plt.legend()

In [None]:
plt.plot(scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in predicted]))[:,0], label='predict')
plt.plot(scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in y_test]))[:, 0], label='actual')
plt.xlabel('Hours')
plt.ylabel('Price BTC')
plt.legend()

In [None]:
plt.plot(scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in predicted]))[:,0], label='predict')
plt.plot(scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in y_test]))[:, 0], label='actual')
plt.xlabel('Hours')
plt.ylabel('Price USD')
plt.legend()

In [None]:
plt.title('Error')
error = scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in predicted]))[:,0] - scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in y_test]))[:, 0]
plt.plot(np.arange(len(error)), [0] * len(error))
plt.plot(error)
plt.fill_between(np.arange(len(error)), [0] * len(error), error)

In [None]:
#plt.plot(scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in np.array(a[-946:])]))[:,0], label='predict')
plt.plot(scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in predicted2]))[:50,0], label='predict')
plt.plot(scaler.inverse_transform(scaler2.inverse_transform([[x,0,0] for x in y_test]))[:50, 0], label='actual')
plt.xlabel('Hours')
plt.ylabel('Price USD')
plt.legend()