In [144]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from math import sqrt

# Đọc dữ liệu từ file CSV và in ra để kiểm tra
df = pd.read_csv('btc_post_covid.csv')
# Chuyển đổi cột Date thành datetime và sắp xếp theo ngày tăng dần
try:
    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
    df = df.sort_values('Date').reset_index(drop=True)
    print("Date parsed and sorted.")
except Exception as e:
    print("Lỗi parse date:", str(e))

Date parsed and sorted.


In [145]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd

# Sau khi df đã được đọc và sắp xếp theo Date tăng dần
features = ['Open', 'High', 'Low', 'Close']
data = df[features].copy()

# ===== CHUẨN HÓA DỮ LIỆU =====
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = pd.DataFrame(
    scaler.fit_transform(data),
    columns=features,
    index=data.index
)


lag = 5  # Số ngày quá khứ dùng để dự đoán

X = []
y = []

for i in range(len(data_scaled) - lag):
    # Lấy lag ngày liên tiếp làm input (từ i đến i+lag-1)
    X.append(data_scaled.iloc[i:i + lag].values)      
    # Dự đoán giá Close của ngày tiếp theo (i + lag)
    y.append(data_scaled.iloc[i + lag]['Close'])

X = np.array(X)   
y = np.array(y)   

print("X shape:", X.shape)  
print("y shape:", y.shape)  

X shape: (1505, 5, 4)
y shape: (1505,)


In [None]:
# Sau khi đã tạo X (shape: samples, lag, 4) và y

# ===== CHIA TRAIN/TEST =====
train_size = int(len(X) * 0.8)
X_train = X[:train_size]
X_test = X[train_size:]
y_train = y[:train_size]
y_test = y[train_size:]

# ===== FLATTEN CHO MLP =====
# MLP chỉ nhận vector phẳng → reshape thành (samples, lag * 4)
X_train_mlp = X_train.reshape(X_train.shape[0], -1)  # ví dụ lag=5 → (1791, 20)
X_test_mlp = X_test.reshape(X_test.shape[0], -1)      # ví dụ lag=5 → (448, 20)

# Chuyển thành tensor
X_train_tensor = torch.FloatTensor(X_train_mlp)
y_train_tensor = torch.FloatTensor(y_train).unsqueeze(1)

X_test_tensor = torch.FloatTensor(X_test_mlp)  # ← Giờ đúng shape (448, 20)

# DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)

------------------- MLP Model ------------------------

In [129]:
import torch.nn as nn

class MLP_Level1(nn.Module):
    def __init__(self, input_size=20, hidden_size=32):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc_out = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        # x: (batch, 4)
        x = self.relu(self.fc1(x))
        x = self.fc_out(x)
        return x.squeeze(-1)  # return shape (batch,)

In [130]:
class MLP_Level2(nn.Module):
    def __init__(self, input_size=20, hidden1=64, hidden2=32, dropout=0.2):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden1)
        self.relu = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)
        self.fc2 = nn.Linear(hidden1, hidden2)
        self.dropout2 = nn.Dropout(dropout)
        self.fc_out = nn.Linear(hidden2, 1)
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc_out(x)
        return x.squeeze(-1)

In [131]:
class MLP_Level3(nn.Module):
    def __init__(self, input_size=20, h1=128, h2=64, h3=32, dropout=0.3):
        super().__init__()
        self.fc1 = nn.Linear(input_size, h1)
        self.fc2 = nn.Linear(h1, h2)
        self.fc3 = nn.Linear(h2, h3)
        self.fc_out = nn.Linear(h3, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc_out(x)
        return x.squeeze(-1)

In [None]:
# Train function 
def train_model(model, loader, epochs=100, lr=0.001, patience=10):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    best_loss = float('inf')
    patience_counter = 0
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, targets in loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        avg_loss = running_loss / len(loader)
        print(f"Epoch {epoch+1}, Loss: {avg_loss}")
        if avg_loss < best_loss:
            best_loss = avg_loss
            
            patience_counter = 0
        else:
            patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping at epoch", epoch+1)
            break
    return model

In [133]:
mlp1 = MLP_Level1()
mlp2 = MLP_Level2()
mlp3 = MLP_Level3()

In [138]:
# Train MLP
#mlp_model = MLP()
mlp_model = train_model(mlp1, train_loader)

# Predict MLP
mlp_model.eval()
with torch.no_grad():
    y_pred_mlp = mlp_model(X_test_tensor).numpy().flatten()

# Metrics MLP
rmse_mlp = sqrt(mean_squared_error(y_test, y_pred_mlp))
mae_mlp = mean_absolute_error(y_test, y_pred_mlp)
mape_mlp = np.mean(np.abs((y_test - y_pred_mlp) / y_test)) * 100
r2_mlp = r2_score(y_test, y_pred_mlp)

print("MLP Metrics:")
print(f"RMSE: {rmse_mlp}")
print(f"MAE: {mae_mlp}")
print(f"MAPE: {mape_mlp}")
print(f"R2: {r2_mlp}")

Epoch 1, Loss: 0.008387672990801022
Epoch 2, Loss: 0.008055321688420679
Epoch 3, Loss: 0.007589633166054754
Epoch 4, Loss: 0.007404570121587695
Epoch 5, Loss: 0.007308501587862051
Epoch 6, Loss: 0.007321324774621692
Epoch 7, Loss: 0.007188088137282696
Epoch 8, Loss: 0.007274831151957379
Epoch 9, Loss: 0.0072086877176173545
Epoch 10, Loss: 0.0072581007765333385
Epoch 11, Loss: 0.007259626587926361
Epoch 12, Loss: 0.007265839733008761
Epoch 13, Loss: 0.007262245017811533
Epoch 14, Loss: 0.007266754340794011
Epoch 15, Loss: 0.007277562673583529
Epoch 16, Loss: 0.007270432147427192
Epoch 17, Loss: 0.007268997713828484
Early stopping at epoch 17
MLP Metrics:
RMSE: 0.028301042143129104
MAE: 0.02014702541171013
MAPE: 5.290209323253708
R2: 0.8882768656392404


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


----------------------- SimpleRNN model --------------------------

In [146]:
import torch
from torch.utils.data import TensorDataset, DataLoader
import numpy as np

# Giả sử X (shape: samples, lag, 4) và y (shape: samples,) đã có sẵn từ trước

total_samples = len(X)
train_ratio = 0.8
train_size = int(total_samples * train_ratio)  # Làm tròn xuống

# Chia dữ liệu theo thời gian (train trước, test sau – đúng cho time series)
X_train = X[:train_size]
X_test  = X[train_size:]
y_train = y[:train_size]
y_test  = y[train_size:]

# Kiểm tra độ dài để đảm bảo không lệch
print(f"Total samples: {total_samples}")
print(f"Train samples: {len(X_train)} (X), {len(y_train)} (y)")
print(f"Test samples : {len(X_test)} (X), {len(y_test)} (y)")


X_train_tensor = torch.FloatTensor(X_train)   
X_test_tensor  = torch.FloatTensor(X_test)    

y_train_tensor = torch.FloatTensor(y_train).unsqueeze(1)  
y_test_tensor  = torch.FloatTensor(y_test).unsqueeze(1)    

# Nếu vô tình có chiều thừa (4D)
if X_train_tensor.dim() == 4:
    X_train_tensor = X_train_tensor.squeeze(-1)
    X_test_tensor  = X_test_tensor.squeeze(-1)

# DataLoader dành riêng cho RNN
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=False  
)

print("\n=== Shape kiểm tra ===")
print(f"X_train_tensor: {X_train_tensor.shape}")   
print(f"X_test_tensor : {X_test_tensor.shape}")    
print(f"y_train_tensor: {y_train_tensor.shape}")   
print(f"y_test_tensor : {y_test_tensor.shape}")    

Total samples: 1505
Train samples: 1204 (X), 1204 (y)
Test samples : 301 (X), 301 (y)

=== Shape kiểm tra ===
X_train_tensor: torch.Size([1204, 5, 4])
X_test_tensor : torch.Size([301, 5, 4])
y_train_tensor: torch.Size([1204, 1])
y_test_tensor : torch.Size([301, 1])


In [147]:
class RNN_Level1(nn.Module):
    def __init__(self, input_size=4, hidden_size=64, dropout=0.2):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers=1,
                          batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 32)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc_out = nn.Linear(32, 1)
    
    def forward(self, x):
        # x: (batch, seq_len=1, 4)
        _, hn = self.rnn(x)          # hn: (1, batch, hidden)
        x = hn.squeeze(0)            # (batch, hidden)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc_out(x)
        return x.squeeze(-1)

In [121]:
class RNN_Level2(nn.Module):
    def __init__(self, input_size=4, hidden_size=96, dropout=0.3):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers=2,
                          batch_first=True, dropout=dropout)  # Dropout giữa 2 lớp
        self.fc1 = nn.Linear(hidden_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc_out = nn.Linear(32, 1)
    
    def forward(self, x):
        _, hn = self.rnn(x)          # hn: (2, batch, hidden)
        x = hn[-1]                   # Lấy hidden của lớp thứ 2
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc_out(x)
        return x.squeeze(-1)

In [76]:
class RNN_Level3(nn.Module):
    def __init__(self, input_size=4, hidden_size=128, dropout=0.4):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers=3,
                          batch_first=True, dropout=dropout)  # Dropout giữa các lớp
        self.fc1 = nn.Linear(hidden_size, 96)
        self.fc2 = nn.Linear(96, 64)
        self.fc3 = nn.Linear(64, 32)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc_out = nn.Linear(32, 1)
    
    def forward(self, x):
        _, hn = self.rnn(x)          # hn: (3, batch, hidden)
        x = hn[-1]                   # Lấy hidden của lớp thứ 3
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc_out(x)
        return x.squeeze(-1)

In [122]:
rnn1 = RNN_Level1()
rnn2 = RNN_Level2()
rnn3 = RNN_Level3()

In [149]:
# Train RNN
#rnn_model = SimpleRNN()
rnn_model = train_model(rnn1, train_loader)

# Predict RNN

rnn_model.eval()
with torch.no_grad():
    y_pred_rnn = rnn_model(X_test_tensor).cpu().numpy().flatten()  # ← Đảm bảo tên biến đúng

# Kiểm tra độ dài - DÙNG ĐÚNG BIẾN y_pred_rnn
print("len(y_test):", len(y_test))
print("len(y_pred_rnn):", len(y_pred_rnn))  # ← Sửa từ y_pred_mlp thành y_pred_rnn

assert len(y_test) == len(y_pred_rnn), "Số mẫu không khớp!"

# Metrics RNN - giờ sẽ chạy ngon
rmse_rnn = sqrt(mean_squared_error(y_test, y_pred_rnn))
mae_rnn = mean_absolute_error(y_test, y_pred_rnn)
mape_rnn = np.mean(np.abs((y_test - y_pred_rnn) / y_test)) 
r2_rnn = r2_score(y_test, y_pred_rnn)

print("\nSimpleRNN Metrics:")
print(f"RMSE: {rmse_rnn:.4f}")
print(f"MAE: {mae_rnn:.4f}")
print(f"MAPE: {mape_rnn:.2f}")
print(f"R²: {r2_rnn:.4f}")

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1, Loss: 0.010337532626612014
Epoch 2, Loss: 0.009190440908576803
Epoch 3, Loss: 0.009315590611597719
Epoch 4, Loss: 0.007874603741679732
Epoch 5, Loss: 0.007352853042998743
Epoch 6, Loss: 0.006947750941770592
Epoch 7, Loss: 0.006852857080766147
Epoch 8, Loss: 0.006969073768950214
Epoch 9, Loss: 0.00731297251944565
Epoch 10, Loss: 0.00768147788231114
Epoch 11, Loss: 0.0072398756456095725
Epoch 12, Loss: 0.008953454332904107
Epoch 13, Loss: 0.007764653299665569
Epoch 14, Loss: 0.00817696527289962
Epoch 15, Loss: 0.008478199977329686
Epoch 16, Loss: 0.007309432438648257
Epoch 17, Loss: 0.006921063205625519
Early stopping at epoch 17
len(y_test): 301
len(y_pred_rnn): 301

SimpleRNN Metrics:
RMSE: 0.0331
MAE: 0.0226
MAPE: 0.04
R²: 0.9317


In [150]:
import torch
import joblib

# 1. Lưu trọng số mô hình PyTorch
# Nên lưu file có đuôi .pth hoặc .pt
torch.save(rnn_model.state_dict(), 'rnn_model_weights.pth')

# 2. Lưu bộ chuẩn hóa Scaler
# Quan trọng: Streamlit cần cái này để transform input mới
joblib.dump(scaler, 'scaler.pkl')

print("Đã lưu mô hình và scaler thành công!")

Đã lưu mô hình và scaler thành công!
