In [None]:
from pathlib import Path
from datetime import datetime

from tqdm import tqdm
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

import stock

In [None]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
params = stock.dl.train.TrainerParams(
    output_dir=Path(f"./tmp/{timestamp}")
)
trainer = stock.dl.train.Trainer(params=params)
trainer.model.summary()

In [None]:
trainer.train()

In [None]:
_, _, test_ds = trainer.dataset.get_train_val_test_dataset()
model = trainer.model

result = [[x, y, model(x)] for x, y in test_ds]

In [None]:
x = tf.concat([x[0] for x in result], axis=0).numpy()
y = tf.concat([x[1] for x in result], axis=0).numpy()
y_pred_mean = tf.concat([x[2].mean() for x in result], axis=0).numpy()
y_pred_std = tf.concat([x[2].stddev() for x in result], axis=0).numpy()

In [None]:
plt.plot(y_pred_std[0])

In [None]:
plt.plot(y[:, 0])
plt.plot(y[:, 1])

In [None]:
plt.plot(y_pred_mean[:, 0])
plt.plot(y_pred_mean[:, 1])

In [None]:
print(np.corrcoef(y[:, 0], y_pred_mean[:, 0]))
plt.plot(y[:, 0])
plt.plot(y_pred_mean[:, 0])

In [None]:
print(np.corrcoef(y[:, 1], y_pred_mean[:, 1]))
plt.plot(y[:, 1])
plt.plot(y_pred_mean[:, 1])

In [None]:
plt.scatter(y[:, 1], y_pred_mean[:, 1])
plt.plot([-5, 1], [-5, 1], color="red")

In [None]:
low_corrs = []
high_corrs = []
for i in range(y.shape[1] // 2):
    low_corr = np.corrcoef(y[:, i * 2], y_pred_mean[:, i * 2])[0, 1]
    high_corr = np.corrcoef(y[:, i * 2 + 1], y_pred_mean[:, i * 2 + 1])[0, 1]
    low_corrs.append(low_corr)
    high_corrs.append(high_corr)

In [None]:
best_data_true = []
best_data_pred = []
indices = []
for i in range(y.shape[0]):
    min_idx = np.argmin(y_pred_std[i])
    indices.append(min_idx)
    best_data_true.append(y[i, min_idx])
    best_data_pred.append(y_pred_mean[i, min_idx])

In [None]:
print(np.corrcoef(best_data_true, best_data_pred))
plt.scatter(best_data_true, best_data_pred)
plt.plot([-5, 5], [-5, 5], color="red")

In [None]:
us_data = trainer.dataset.data[:, trainer.dataset._us_data_indices]
jp_data = trainer.dataset.data[:, trainer.dataset._jp_data_indices]

In [None]:
corr_mat = np.zeros((us_data.shape[1], jp_data.shape[1]))
for i in tqdm(range(us_data.shape[1])):
    for j in range(jp_data.shape[1]):
        corr_mat[i, j] = np.corrcoef(us_data[:, i], jp_data[:, j])[0, 1]

In [None]:
corr_mat.max(axis=0).max()

In [None]:
plt.imshow(corr_mat)