# data

In [10]:
import FinanceDataReader as fdr
import numpy as np
import pandas as pd
np.random.seed(14)

import torch
import torch.nn as nn
from scipy.spatial import distance
from sklearn.metrics import pairwise_distances

In [11]:
# Loading Data
AAPL = fdr.DataReader('AAPL', '2021')
GOOGL = fdr.DataReader('GOOGL', '2021')
MSFT = fdr.DataReader('MSFT', '2021')
NFLX = fdr.DataReader('NFLX', '2021')
TSLA = fdr.DataReader('TSLA', '2021')

In [12]:
# Making a dataframe for close prices
df_close = pd.DataFrame({'AAPL': AAPL['Close'],
                         'GOOGL': GOOGL['Close'],
                         'MSFT': MSFT['Close'],
                         'NFLX': NFLX['Close'],
                         'TSLA': TSLA['Close']})
# Making a dataframe for log return
df_logret = np.log(df_close / df_close.shift(1))
df_logret = df_logret.dropna()
# calculate mu
mu_df = df_logret.mean() * len(df_logret)
mu = mu_df.to_numpy()


In [13]:
# calculate Sigma

Sigma_df = df_logret.cov() * len(df_logret)
Sigma = Sigma_df.to_numpy()


In [20]:
# word embeddings (random)

word_embeds = np.random.rand(5, 128)

# Sigma_embeddings
- non-diagonal: 두 주식의 text embedding 사이의 Minkowski distance 
- diagonal: 주식의 과거 시계열의 분산
    - look-back window of 4 months

In [None]:
class MyModel(nn.Module):

    def __init__(self, device, word_embed, lambda_:float, Sigma_original) -> None:
        """
        Args:
            word_embed: vector representations of stocks
            lambda_: weights between original cov and new cov (hyperparameter)
        """
        super().__init__()
        self.device = device
        self.word_embed = word_embed
        self.lambda_ = lambda_
        self.Sigma_original = Sigma_original
        self.n = Sigma_original.shape[0] # 주식 개수
        self.d = word_embed.shape[1] # 임베딩 차원
    
    def get_Sigma_embeddings(self):
        diagonal = self.Sigma_original.diagonal() # (n x 1)
        non_diagonal = pairwise_distances(self.word_embed, metric='minkowski') # (n x n)

        self.Sigma_embeddings = np.fill_diagonal(non_diagonal, diagonal)

    def get_Sigma_new(self):
        return self.lambda_*self.Sigma_original + (1-self.lambda_)*self.Sigma_embeddings
