In [None]:
import pandas as pd
import numpy
import numpy as np
import statistics as st
from scipy.stats import mode, skew, kurtosis
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_percentage_error, root_mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from datetime import datetime, timedelta
from pmdarima import auto_arima
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM, RNN, GRU, Dropout, Activation

In [None]:
bnb_df = pd.read_csv("./BNB Historical Data.csv")
bnb_df

### Pre-processing Data

In [None]:
print(bnb_df.info())

* Định dạng lại cột `Date` để nó trở thành dạng `datetime`
* Xử lý và chuyển đổi kiểu dữ liệu của cột `Vol.` và `Change %` thành kiểu float

In [None]:
bnb_df['Date'] = pd.to_datetime(bnb_df['Date'])
bnb_df['Vol.'] = bnb_df['Vol.'].str.replace('M', 'e6').str.replace('K', 'e3').fillna('0')
bnb_df['Vol.'] = bnb_df['Vol.'].astype(float)
bnb_df['Change %'] = bnb_df['Change %'].str.replace('%', '').astype(float)
bnb_df.reset_index(drop=True)
bnb_df.set_index('Date', inplace=True)
bnb_df.sort_index(ascending=True, inplace=True)
bnb_df

In [None]:
missing_values = bnb_df.isnull().sum()
print("Missing values:\n", missing_values)

In [None]:
print(bnb_df.info())

In [None]:
bnb_description = bnb_df.describe()
bnb_description

In [None]:
# Descriptive statistics for Price
print('More Price Statistics:')

print('Mode:', bnb_df['Price'].mode().values[0])
print('Standard Deviation:', bnb_df['Price'].std())
print('Variance:', bnb_df['Price'].var())
print('Kurtosis:', bnb_df['Price'].kurt())
print('Skewness:', bnb_df['Price'].skew())

##### Price by Date Plot

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(bnb_df['Price'], label='Price', color='b')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Historical BNB Price from 1/3/2019 to 1/3/2024')
plt.legend()
plt.grid(True)
plt.show()


##### Boxplot for Price

In [None]:
plt.figure(figsize=(12, 6))
plt.boxplot(bnb_df['Price'])
plt.grid()
plt.show()

### Parameter

In [None]:
window_size = 10
feature_dim = 5
test_size=0.2

# model params
lstm_neurons = 20
rnn_neurons = 20
epochs = 100
batch_size = 4
loss = 'mae'
dropout = 0.25
optimizer = 'adam'

### Split data

In [None]:
train_ratio = 0.6
test_ratio = 0.3
val_ratio = 0.1

In [None]:
train_data = int(len(bnb_df)*train_ratio)
test_data = train_data + int(len(bnb_df)*test_ratio)

print('train index: ',train_data)
print('test index: ',test_data)

In [None]:
train_df = bnb_df.iloc[:train_data]
test_df  = bnb_df[train_data:test_data]
val_df   = bnb_df[test_data:]

print('train_df.shape: ',train_df.shape)
print('test_df.shape: ',test_df.shape)
print('val_df.shape: ',val_df.shape)

In [None]:
plt.figure(figsize=(12,6))
plt.plot(pd.DataFrame(train_df).index.to_list(), pd.DataFrame(train_df['Price']), color='r', label='Train')
plt.plot(pd.DataFrame(test_df).index.to_list(),pd.DataFrame(test_df['Price']),color='b' ,label ='Test')
plt.plot(pd.DataFrame(val_df).index.to_list(),pd.DataFrame(val_df['Price']),color ='y', label ='Validation')
plt.legend()
plt.show()

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
train_df_scaled = scaler.fit_transform(train_df)
test_df_scaled = scaler.transform(test_df)
val_df_scaled = scaler.transform(val_df)

train_df_scaled = pd.DataFrame(train_df_scaled, index=train_df.index, columns=train_df.columns)
test_df_scaled = pd.DataFrame(test_df_scaled, index=test_df.index, columns=test_df.columns)
val_df_scaled = pd.DataFrame(val_df_scaled,index =val_df.index,columns=val_df.columns)

In [None]:
print('train_df_scaled shape: ',train_df_scaled.shape)
print('test_df_scaled shape: ',test_df_scaled.shape)
print('val_df_scaled shape: ',val_df_scaled.shape)

In [None]:
# Define the target variable
target = 'Price'

In [None]:
# Split data into features and target
X_train = train_df_scaled.drop(columns=target).values
y_train = train_df_scaled[target]

X_test = test_df_scaled.drop(columns=target).values
y_test = test_df_scaled[target]

X_val = val_df_scaled.drop(columns=target).values
y_val = val_df_scaled[target]

### Model Training

#### Linear Regression

In [None]:
# Train Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred_test_lr = model.predict(X_test)
y_pred_val_lr = model.predict(X_val)

##### Evaluate Linear Regression

In [None]:
# Evaluate model on test set
mape_test_lr = mean_absolute_percentage_error(y_test, y_pred_test_lr)
mae_test_lr = mean_absolute_error(y_test, y_pred_test_lr)
rmse_test_lr = root_mean_squared_error(y_test, y_pred_test_lr)

print("Test Set MAPE: ", mape_test_lr)
print("Test Set MAE: ", mae_test_lr)
print("Test Set RMSE: ", rmse_test_lr)

# Evaluate model on validation set
mape_val_lr = mean_absolute_percentage_error(y_val, y_pred_val_lr)
mae_val_lr = mean_absolute_error(y_val, y_pred_val_lr)
rmse_val_lr = root_mean_squared_error(y_val, y_pred_val_lr)

print("Validation Set MAPE: ", mape_val_lr)
print("Validation Set MAE: ", mae_val_lr)
print("Validation Set RMSE: ", rmse_val_lr)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(pd.DataFrame(y_train).index.to_list(),pd.DataFrame(y_train),color='red',label='Train value')
plt.plot(pd.DataFrame(y_test).index.to_list(), pd.DataFrame(y_test), color='blue', label='Test value')
plt.plot(pd.DataFrame(y_test).index.to_list(), pd.DataFrame(y_pred_test_lr.reshape(-1)), color='g', label='Predicted test value')
plt.plot(pd.DataFrame(y_val).index.to_list(),pd.DataFrame(y_val),color='y', label='Validation value')
plt.plot(pd.DataFrame(y_val).index.to_list(),pd.DataFrame(y_pred_val_lr.reshape(-1)),color='purple', label='Predicted validation value')
plt.legend()
plt.show()

#### ARIMA

In [None]:
y_train

In [None]:
stepwise_model = auto_arima(y_train, start_p=1, start_q=1,
                            max_p=5, max_q=5, m=1,
                            start_P=0, seasonal=False,
                            d=None, D=0, trace=True,
                            error_action='ignore',  
                            suppress_warnings=True, 
                            stepwise=True)

print(stepwise_model.summary())

In [None]:
y_pred_test_arima = stepwise_model.predict(n_periods=len(X_test))
mape_test_arima = mean_absolute_percentage_error(y_test, y_pred_test_arima)
mae_test_arima = mean_absolute_error(y_test, y_pred_test_arima)
rmse_test_arima = root_mean_squared_error(y_test, y_pred_test_arima)

print("Test Set MAPE: ", mape_test_arima)
print("Test Set MAE: ", mae_test_arima)
print("Test Set RMSE: ", rmse_test_arima)

y_pred_val_arima = stepwise_model.predict(n_periods=len(X_val))
mape_val_arima = mean_absolute_percentage_error(y_val, y_pred_val_arima)
mae_val_arima = mean_absolute_error(y_val, y_pred_val_arima)
rmse_val_arima = root_mean_squared_error(y_val, y_pred_val_arima)

print("Val Set MAPE: ", mape_val_arima)
print("Val Set MAE: ", mae_val_arima)
print("Val Set RMSE: ", rmse_val_arima)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(pd.DataFrame(y_train).index.to_list(),pd.DataFrame(y_train),color='red',label='Train value')
plt.plot(pd.DataFrame(y_test).index.to_list(), pd.DataFrame(y_test), color='blue', label='Test value')
plt.plot(pd.DataFrame(y_test).index.to_list(), pd.DataFrame(y_pred_test_arima), color='g', label='Predicted test value')
plt.plot(pd.DataFrame(y_val).index.to_list(),pd.DataFrame(y_val),color='y', label='Validation value')
plt.plot(pd.DataFrame(y_val).index.to_list(),pd.DataFrame(y_pred_val_arima),color='purple', label='Predicted validation value')
plt.legend()
plt.show()

#### GRU

In [None]:
X_train_GRU = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
y_train_GRU = y_train.values


X_test_GRU = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))
y_test_GRU = y_test


X_val_GRU = X_val.reshape((X_val.shape[0], 1, X_val.shape[1]))
y_val_GRU = y_val

In [None]:
# Xây dựng mô hình GRU
model_GRU = Sequential()
model_GRU.add(GRU(50, return_sequences=True, input_shape=(1, X_train_GRU.shape[2])))
model_GRU.add(GRU(50, return_sequences=False))
model_GRU.add(Dense(25))
model_GRU.add(Dense(1))

# Compile mô hình
model_GRU.compile(optimizer='adam', loss='mean_squared_error')

# Huấn luyện mô hình
history_GRU = model_GRU.fit(X_train_GRU, y_train_GRU, validation_data=(X_val_GRU, y_val_GRU), epochs=100, batch_size=64, verbose=1)

In [None]:
# Compile mô hình
model_GRU.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# Dự đoán giá
train_predict_GRU = model_GRU.predict(X_train_GRU)
val_predict_GRU = model_GRU.predict(X_val_GRU)
test_predict_GRU = model_GRU.predict(X_test_GRU)

In [None]:
mape_test_GRU = mean_absolute_percentage_error(y_test_GRU, test_predict_GRU)
mae_test_GRU = mean_absolute_error(y_test_GRU, test_predict_GRU)
rmse_test_GRU = root_mean_squared_error(y_val_GRU, val_predict_GRU)

print("Test Set MAPE: ", mape_test_GRU)
print("Test Set MAE: ", mae_test_GRU)
print("Test Set RMSE: ", rmse_test_GRU)

mape_val_GRU = mean_absolute_percentage_error(y_val_GRU, val_predict_GRU)
mae_val_GRU = mean_absolute_error(y_val_GRU, val_predict_GRU)
rmse_val_GRU = root_mean_squared_error(y_val_GRU, val_predict_GRU)

print("Val Set MAPE: ", mape_val_GRU)
print("Val Set MAE: ", mae_val_GRU)
print("Val Set RMSE: ", rmse_val_GRU)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(pd.DataFrame(y_train).index.to_list(),pd.DataFrame(y_train),color='red',label='Train value')
plt.plot(pd.DataFrame(y_test).index.to_list(), pd.DataFrame(y_test), color='blue', label='Test value')
plt.plot(pd.DataFrame(y_test).index.to_list(), pd.DataFrame(test_predict_GRU), color='g', label='Predicted test value')
plt.plot(pd.DataFrame(y_val).index.to_list(),pd.DataFrame(y_val),color='y', label='Validation value')
plt.plot(pd.DataFrame(y_val).index.to_list(),pd.DataFrame(val_predict_GRU),color='purple', label='Predicted validation value')
plt.legend()
plt.show()

#### AutoFormer