In [None]:
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from IPython.display import Image

In [None]:
from models.scripts.lstm import AssetDataset, AssetRelationshipModel, plot_correlation_matrix_with_labels
from models.scripts.pearson import calculate_returns, analyze_correlation, rolling_correlation_multi
from models.scripts.var import DataProcessor, VARModel, Plotter

In [21]:
input_file = "./processed_data/processed_data.csv"
output_dir = "./results"

### 1. Load and normalize the data

In [None]:
prices = pd.read_csv(input_file, index_col=0, parse_dates=True)
normalized_data = (prices - prices.min()) / (prices.max() - prices.min())
data_values = normalized_data.values

### 2. Using different methods analyze the correlation 
#### 2.1 VAR model(non-linear)

In [None]:
processor = DataProcessor(input_file)
data = processor.load_data()
processor.check_missing_values()
processor.fill_missing_values()
processor.check_date_range()
normalized_data_for_var = processor.normalize_data()
var_plot_path = os.path.join(output_dir, "normalized_asset_prices.png")

In [None]:
var_model = VARModel(data)
best_lag = var_model.select_best_lag()
var_model.fit_model(best_lag)
causality_results = var_model.test_causality()

In [None]:
plotter = Plotter(normalized_data_for_var)
plotter.plot_normalized_data_with_causality(var_plot_path, causality_results)

#### 2.2 LSTM model(non-linear)

In [None]:
seq_length = 10
input_size = data_values.shape[1]
hidden_size = 64
num_layers = 2
output_size = input_size
batch_size = 32
num_epochs = 10
learning_rate = 0.001

dataset = AssetDataset(data_values, seq_length)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
model = AssetRelationshipModel(input_size, hidden_size, num_layers, output_size)
features = []
with torch.no_grad():
    for i in range(len(dataset)):
        inputs, _ = dataset[i]
        inputs = inputs.unsqueeze(0)
        feature = model(inputs).squeeze().numpy()
        features.append(feature)
features = np.array(features)

corr_matrix = np.corrcoef(features.T)
asset_labels = list(prices.columns)

corr_heatmap_path = os.path.join(output_dir, "lstm_correlation_heatmap_with_labels.png")
plot_correlation_matrix_with_labels(corr_matrix, asset_labels, corr_heatmap_path)
Image(filename=corr_heatmap_path)

#### 2.3 Pearson Correlation(linear)

In [None]:
returns = calculate_returns(prices)
corr_csv = os.path.join(output_dir, "correlation_matrix.csv")
corr_matrix_returns = analyze_correlation(returns, output_path=corr_csv)
rolling_corr_path = os.path.join(output_dir, "rolling_correlation_multi.png")
rolling_correlation_multi(returns, base_asset="BTC", compare_assets=["SPY", "GLD", "AGG"], window=30, output_path=rolling_corr_path)
Image(filename=rolling_corr_path)