# data

In [46]:
import FinanceDataReader as fdr
import numpy as np
import pandas as pd
np.random.seed(14)

import torch
import torch.nn as nn, optim
import torch.nn.functional as F
from scipy.spatial import distance
from sklearn.metrics import pairwise_distances

  from .autonotebook import tqdm as notebook_tqdm


In [47]:
# Loading Data
AAPL = fdr.DataReader('AAPL', '2021')
GOOGL = fdr.DataReader('GOOGL', '2021')
MSFT = fdr.DataReader('MSFT', '2021')
NFLX = fdr.DataReader('NFLX', '2021')
TSLA = fdr.DataReader('TSLA', '2021')

In [48]:
# Making a dataframe for close prices
df_close = pd.DataFrame({'AAPL': AAPL['Close'],
                         'GOOGL': GOOGL['Close'],
                         'MSFT': MSFT['Close'],
                         'NFLX': NFLX['Close'],
                         'TSLA': TSLA['Close']})
# Making a dataframe for log return
df_logret = np.log(df_close / df_close.shift(1))
df_logret = df_logret.dropna()
# calculate mu
mu_df = df_logret.mean() * len(df_logret)
mu = mu_df.to_numpy()


In [49]:
# calculate Sigma

Sigma_df = df_logret.cov() * len(df_logret)
Sigma = Sigma_df.to_numpy()


In [50]:
# word embeddings (random)

word_embeds = np.random.rand(5, 128)

# Model
- Sigma_embeddings
    - non-diagonal: 두 주식의 text embedding 사이의 Minkowski distance 
    - diagonal: 주식의 과거 시계열의 분산
        - look-back window of 4 months
- mu
    - input: merge text embeddings and historical returns 

In [65]:
class MyModel(nn.Module):

    def __init__(self, device, word_embeds, lambda_:float, Sigma_original) -> None:
        """
        Args:
            word_embeds: vector representations of stocks
            lambda_: weights between original cov and new cov (hyperparameter)
        """
        super().__init__()
        self.device = device
        self.word_embeds = word_embeds
        self.lambda_ = lambda_
        self.Sigma_original = Sigma_original
        self.n = Sigma_original.shape[0] # 주식 개수
        self.d = word_embeds.shape[1] # 임베딩 차원
    
    def get_Sigma_embeddings(self):
        Sigma_embeddings = pairwise_distances(self.word_embeds, metric='minkowski') # (n x n)
        diagonal = self.Sigma_original.diagonal() # (n x 1) 
        np.fill_diagonal(Sigma_embeddings, diagonal)
        self.Sigma_embeddings = Sigma_embeddings

    def get_Sigma_new(self):
        return self.lambda_*self.Sigma_original + (1-self.lambda_)*self.Sigma_embeddings

    def get_mu(self, DNN_trained, input):
        DNN_trained.eval()
        with torch.no_grad():
            output = DNN_trained.forward(input)
        pass

In [None]:
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 106)
        self.fc2 = nn.Linear(106, 53)
        self.fc3 = nn.Linear(53, 26)
        self.fc4 = nn.Linear(26, 21)

        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        # Now with dropout
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))

        # output so no dropout here
        # x = F.log_softmax(self.fc4(x), dim=1)

        return x

def train():

    model = Classifier()

    criterion = nn.L1Loss()
    optimizer = optim.Adam(model.parameters(), lr=0.1)

    epochs = 1
    steps = 0

    train_losses, test_losses = [], []
    for e in range(epochs):
        running_loss = 0
        for images, labels in trainloader:

            optimizer.zero_grad()

            log_ps = model(images)
            loss = criterion(log_ps, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        else:
            test_loss = 0
            accuracy = 0

            # Turn off gradients for validation, saves memory and computations
            with torch.no_grad():
                model.eval()
                for images, labels in testloader:
                    log_ps = model(images)
                    test_loss += criterion(log_ps, labels)

                    ps = torch.exp(log_ps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor))

            model.train()

            train_losses.append(running_loss/len(trainloader))
            test_losses.append(test_loss/len(testloader))

            print("Epoch: {}/{}.. ".format(e+1, epochs),
                "Training Loss: {:.3f}.. ".format(running_loss/len(trainloader)),
                "Test Loss: {:.3f}.. ".format(test_loss/len(testloader)),
                "Test Accuracy: {:.3f}".format(accuracy/len(testloader)))

    return model
        

In [67]:
MyModel = MyModel(word_embeds=word_embeds, device=torch.device("cuda"), lambda_=0.8, Sigma_original=Sigma)

MyModel.get_Sigma_embeddings()

Sigma_new = MyModel.get_Sigma_new()
Sigma_new

array([[0.10496193, 0.97825637, 0.99331543, 0.94968438, 0.98139989],
       [0.97825637, 0.10917826, 0.95832465, 1.02096128, 0.95982202],
       [0.99331543, 0.95832465, 0.09099032, 1.00257978, 1.01041754],
       [0.94968438, 1.02096128, 1.00257978, 0.45598878, 1.02048379],
       [0.98139989, 0.95982202, 1.01041754, 1.02048379, 0.47934328]])