In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [29]:
import requests
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Function to fetch historical data from Binance API
def fetch_historical_data(symbol, interval='1d', start_date=None, end_date=None, limit=1000):
    url = f'https://api.binance.com/api/v3/klines?symbol={symbol}&interval={interval}&limit={limit}'
    
    if start_date:
        url += f'&startTime={int(pd.Timestamp(start_date).timestamp() * 1000)}'
    if end_date:
        url += f'&endTime={int(pd.Timestamp(end_date).timestamp() * 1000)}'
    
    response = requests.get(url)
    
    if response.status_code != 200:
        raise Exception(f'Error fetching data: {response.status_code} - {response.text}')
    
    data = response.json()
    
    df = pd.DataFrame(data, columns=[
        'timestamp', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_asset_volume', 'number_of_trades',
        'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'
    ])
    
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    df.set_index('timestamp', inplace=True)
    df.drop(columns=['close_time', 'ignore'], inplace=True)
    
    # Convert columns to numeric
    df = df.apply(pd.to_numeric, errors='coerce')
    
    df['price_range'] = df['high'] - df['low']
    df['close_to_open'] = df['close'] - df['open']
    
    return df

# Fetch Ethereum data
# symbol = 'ETHUSDT'
symbol = 'BTCUSDT'
df = fetch_historical_data(symbol, start_date='2022-06-01', end_date='2025-02-06')


In [30]:
# Preprocess: Shift the target (closing price) by -1 to predict the next day's closing price
df['target'] = df['close'].shift(-1)

# Drop the last row because it will have a NaN target (due to the shift)
df = df.dropna()

In [31]:
# Define sequence length
sequence_length = 30

# Function to create sequences
def create_sequences(data, sequence_length):
    xs, ys = [], []
    for i in range(len(data) - sequence_length):
        x = data.iloc[i:(i + sequence_length)].values
        y = data.iloc[i + sequence_length]['target']
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Create sequences
X_seq, y_seq = create_sequences(df, sequence_length)



In [32]:

train_size = int(0.8 * len(X_seq))
X_train, X_test = X_seq[:train_size], X_seq[train_size:]
y_train, y_test = y_seq[:train_size], y_seq[train_size:]

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create DataLoader
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [33]:
len(train_loader)

24

In [34]:
len(test_loader)

6

In [35]:
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]  # Number of features
hidden_size = 50
output_size = 1
num_layers = 1

# Initialize the model
model = RNNModel(input_size, hidden_size, output_size, num_layers)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [36]:
# Training loop
num_epochs = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(num_epochs):
    model.train()
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        
        # Forward pass
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/100], Loss: 4055348480.0000
Epoch [2/100], Loss: 4055197952.0000
Epoch [3/100], Loss: 4055051776.0000
Epoch [4/100], Loss: 4054907136.0000
Epoch [5/100], Loss: 4054763776.0000
Epoch [6/100], Loss: 4054620928.0000
Epoch [7/100], Loss: 4054478336.0000
Epoch [8/100], Loss: 4054336768.0000
Epoch [9/100], Loss: 4054194944.0000
Epoch [10/100], Loss: 4054053376.0000
Epoch [11/100], Loss: 4053912320.0000
Epoch [12/100], Loss: 4053770240.0000
Epoch [13/100], Loss: 4053629184.0000
Epoch [14/100], Loss: 4053488128.0000
Epoch [15/100], Loss: 4053347328.0000
Epoch [16/100], Loss: 4053206272.0000
Epoch [17/100], Loss: 4053065728.0000
Epoch [18/100], Loss: 4052924672.0000
Epoch [19/100], Loss: 4052784896.0000
Epoch [20/100], Loss: 4052643840.0000
Epoch [21/100], Loss: 4052502784.0000
Epoch [22/100], Loss: 4052362496.0000
Epoch [23/100], Loss: 4052221952.0000
Epoch [24/100], Loss: 4052081408.0000
Epoch [25/100], Loss: 4051941120.0000
Epoch [26/100], Loss: 4051800576.0000
Epoch [27/100], Loss:

In [43]:
model.eval()
with torch.no_grad():
    y_pred = []
    for batch_X, batch_y in test_loader:
        batch_X = batch_X.to(device)
        outputs = model(batch_X)
        y_pred.extend(outputs.cpu().numpy())
    
    y_pred = np.array(y_pred).flatten()
    y_test_np = y_test.numpy()
    
    mse = mean_squared_error(y_test_np, y_pred)
    mae = mean_absolute_error(y_test_np, y_pred)
    rmse = np.sqrt(mse)
    
    print(f'Root Mean Squared Error: {rmse:.4f}')
    print(f'Mean Absolute Error: {mae:.4f}')
    print(f'Mean Squared Error: {mse:.4f}')

Root Mean Squared Error: 80967.0547
Mean Absolute Error: 78947.1172
Mean Squared Error: 6555663360.0000
