In [1]:
#origin pytorch

In [2]:
import torch 

In [3]:
import torch.nn as nn

In [4]:
import base

In [5]:
import os

import pandas as pd
import numpy as np
from pytorch_lightning import (
    LightningDataModule,
)
import torch
import torch.utils.data as torch_data
from esig import tosig
from sklearn.preprocessing import MinMaxScaler


In [6]:
def leadlag(X):
    lag = []
    lead = []

    for val_lag, val_lead in zip(X[:-1], X[1:]):
        lag.append(val_lag)
        lead.append(val_lag)

        lag.append(val_lag)
        lead.append(val_lead)

    lag.append(X[-1])
    lead.append(X[-1])

    return np.c_[lag, lead]

In [7]:
batch_size = 32
num_workers = 16
minmax = MinMaxScaler(feature_range=(0.00001, 0.99999))
level = 4
lr = 0.005

In [8]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, data, index_data):
        self.data = torch.FloatTensor(data)
        self.index_data = torch.FloatTensor(index_data)

    def __len__(self):
        # return self.local.shape[1]
        return len(self.index_data)

    def __getitem__(self, index):
        return self.data[index], self.index_data[index]

In [9]:
# path=os.path.join(base.data_path,'EURUSD.csv')
path=os.path.join(base.data_path,'SP 500 GSPC.csv')

data = pd.read_csv(path, index_col=0)["Close"]#

In [10]:
data

Date
1927-12-30      17.660000
1928-01-03      17.760000
1928-01-04      17.719999
1928-01-05      17.549999
1928-01-06      17.660000
                 ...     
2020-04-28    2863.389893
2020-04-29    2939.510010
2020-04-30    2912.429932
2020-05-01    2830.709961
2020-05-04    2842.739990
Name: Close, Length: 23194, dtype: float64

In [11]:
def our_resample(data,window=5):
    return np.array([data[i:i+window].values for i in range(len(data)) if i%window==0])

In [12]:
# for values in our_resample(data):


In [13]:
data.index = data.index.astype("datetime64[ns]")
windows = []
windows_values = []
for _, window in data.resample("M"):
    values = window.values  # / window.values[0]
    path = leadlag(values)
    windows.append(path)
    windows_values.append(values)

windows = windows
windows_values = windows_values
orig_logsig = np.array([tosig.stream2logsig(path, level) for path in windows])

logsig = minmax.fit_transform(orig_logsig)

logsigs = logsig[1:]
conditions = logsig[:-1]

train_data = Dataset(logsigs,conditions)

In [14]:
class Encoder(nn.Module):
    def __init__(self, common_network, mean_network, logvar_network):
        super().__init__()
        self.common_network = common_network
        self.mean_network = mean_network
        self.logvar_network = logvar_network

    def forward(self, x, cond=None):
        x_cond = torch.cat((x, cond), dim=1)
        x_cond = self.common_network(x_cond)
        mean, logvar = self.mean_network(x_cond), self.logvar_network(x_cond)
        return mean, logvar

class Decoder(nn.Module):
    def __init__(self, decode_network):
        super(Decoder, self).__init__()
        self.decode_network = decode_network

    def forward(self, x, cond=None):
        x_cond = torch.cat((x,cond),dim=1)
        x_cond = self.decode_network(x_cond)
        return x_cond

In [15]:
input_dim = 8
condition_dim = 8


middle_dim = 50
hidden_dim = 8

In [16]:
common_network = nn.Sequential(
    nn.Linear(input_dim+ condition_dim,middle_dim),
    nn.LeakyReLU(0.3),
)
mean_network = nn.Sequential(
    nn.Linear(middle_dim,hidden_dim),
    nn.LeakyReLU(0.3),
)
logvar_network = nn.Sequential(
    nn.Linear(middle_dim,hidden_dim),
    nn.LeakyReLU(0.3),
)
decode_network = nn.Sequential(
    nn.Linear(hidden_dim+condition_dim,hidden_dim),
    nn.LeakyReLU(0.3),
    nn.Linear(hidden_dim,input_dim),
    nn.Sigmoid(),
)

In [17]:
class CVAE(nn.Module):
    def __init__(self,common_network, mean_network, logvar_network,decode_network):
        super(CVAE, self).__init__()
        self.encoder = Encoder(common_network, mean_network, logvar_network)

        self.decoder = Decoder(decode_network)
        
    def _sample(self,mean,logvar):
        eps = torch.randn(mean.shape)
        sigma = torch.exp(0.5 * logvar)
        return mean + eps * sigma
    
    def forward(self, x,y):
        mean,logvar = self.encoder(x,y)
        z = self._sample(mean,logvar)
        x_hat = self.decoder(z,y)
        return x_hat,mean,logvar

In [18]:
def loss(X, X_hat, mean, logvar):  # X,self.dec,mn,sd
    alpha = 0.003
    KL_divergence = 0.5*torch.sum(-1 - logvar + torch.exp(logvar) + torch.square(mean), dim=1)
    end = X - X_hat
    squ_dif =  torch.square(end)
    smooth = torch.sum(squ_dif)

    return torch.mean((1-alpha)*smooth +alpha *KL_divergence)
    

In [19]:
net = CVAE(common_network, mean_network, logvar_network,decode_network)#0.005

In [20]:
optim = torch.optim.Adam(net.parameters(),lr =lr)

In [21]:
from torch.utils.data import Dataset,DataLoader

In [22]:
dataloader = DataLoader(train_data,batch_size=247,shuffle=True,drop_last=False)


In [23]:
from tqdm.auto import tqdm

In [24]:
n_epochs = 10000

In [25]:
for epoch in tqdm(range(n_epochs), desc="Training"):
    net.train()
    for i, (X, y) in enumerate(dataloader):
        optim.zero_grad()   
        X_hat,mean,logvar = net(X,y)
        loss_end = loss(X,X_hat,mean,logvar)
        loss_end.backward()
        optim.step()


Training:   0%|          | 0/10000 [00:00<?, ?it/s]

KeyboardInterrupt: 