In [1]:
import os, datetime
import pandas as pd
import numpy as np
import seaborn as sn
import copy
import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision

# from sklearn import tree, svm
# from sklearn.pipeline import make_pipeline
# from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, normalize
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

from scipy import stats

plt.style.use('fivethirtyeight')
plt.rcParams['lines.linewidth'] = .5
plt.rcParams['font.size'] = 10
plt.rcParams['figure.figsize'] = (16, 10)

In [2]:
# from lutils.fin.autoencoder import Autoencoder, Normalizer

In [3]:
# np.set_printoptions(precision=4, suppress=True)

In [4]:
# %matplotlib inline
%matplotlib qt

In [5]:
# from lutils.fin.data_loader import load

In [6]:
# rbs = (
#     ('2021-04-06', 'SHFE.rb2110'),
#     ('2021-08-10', 'SHFE.rb2201'),
#     ('2021-11-25', 'SHFE.rb2205'),
#     ('2022-03-29', 'SHFE.rb2210'),
#     ('2022-08-30', 'SHFE.rb2301'),
#     ('2022-12-02', 'SHFE.rb2305'),
#     ('2023-04-04', 'SHFE.rb2310'),
# )

In [7]:
# dfs = []
# for i, (d, exchange_symbol) in enumerate(rbs):
#     start_date = d
    
#     exchange, symbol = exchange_symbol.split('.')
#     df = load(exchange, symbol)
#     df.index = df.datetime
#     if i < len(rbs) - 1:
#         end_date = rbs[i+1][0]
#         dfs.append(df[(df.index >= start_date) & (df.index < end_date)])
#     else:
#         dfs.append(df[start_date:])

In [8]:
# df = pd.concat(dfs)

In [9]:
# store = pd.HDFStore('D:/option/rb.h5', 'w', complevel=7)
# store.append('rb', df)
# store.flush()
# store.close()

In [10]:
# store = pd.HDFStore('D:/option/rb.h5', 'r')
# df = store['rb']
# store.close()

In [11]:
# df.drop(df.loc['2021-05-10'].index, inplace=True)

In [12]:
# df.dropna(inplace=True)

In [13]:
def resample_df(df, exchange, symbol, rule):
    
    between_times = [['09:00', '10:15'], ['10:30', '11:30'], ['13:30', '15:00'], ['21:00', '23:00']]
    
    dfs = []
    with tqdm.tqdm(np.unique(df.index.date)) as bar:
        bar.set_description('%s %s' % (exchange, symbol))
        for d in bar:
            bar.set_postfix({'date': d})
            df_day = df[df.index.date == d]
            
            if df_day.shape[0] > 0:
                df_resample_sec = df_day.resample(rule).last()
            
                for (start, end) in between_times:
                    df_hour = df_resample_sec.between_time(start, end)

                    if df_hour.shape[0] > 0:
                        dfs.append(df_hour)
                    
            
    if len(dfs) > 0:
        df_sec = pd.concat(dfs)
    
        store = pd.HDFStore(os.path.join('D:/option', '%s.%s_%s.h5' % (exchange, symbol, rule)), 'w', complevel=7)
        store.append(symbol, df_sec)
        store.flush()
        store.close()

In [14]:
# resample_df(df, 'SHFE', 'rb', '1S')

In [15]:
store = pd.HDFStore('D:/option/SHFE.rb_1S.h5', 'r')
df = store['rb']
store.close()

In [16]:
class RbDataset(Dataset):
    def __init__(self, x):
#         x = np.log(x)
#         x = np.diff(x, n=1)
        x = x.reshape([-1, 1])
        self.x = x.astype(np.float32)
        
    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, idx):
        return self.x[idx]

In [17]:
device = 0
n_epochs = 40
batch_size = 32

In [18]:
split_index = int(df.shape[0] * .8)
        
data_train = df[:split_index]
data_val = df[split_index:]

dataset_train = RbDataset(data_train['last_price'].dropna().values)
dataset_val = RbDataset(data_val['last_price'].dropna().values)

In [19]:
train_dataloader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(dataset_val, batch_size=batch_size, shuffle=True)

In [20]:
# class Autoencoder(nn.Module):
#     def __init__(self): # 1 1
#         super(Autoencoder, self).__init__()
        
#         self.encoder = nn.Sequential(
#             nn.Linear(1, 256),
#             nn.BatchNorm1d(256),
#             nn.ReLU(),
#             nn.Linear(256, 128),
#             nn.ReLU(),
#             nn.Linear(128, 64),
#             nn.ReLU(),
#             nn.Linear(64, 32),
#             nn.ReLU(),
#             nn.Linear(32, 1),
#         )
        
#         self.decoder = nn.Sequential(
#             nn.Linear(1, 32),
#             nn.ReLU(),
#             nn.Linear(32, 64),
#             nn.ReLU(),
#             nn.Linear(64, 128),
#             nn.ReLU(),
#             nn.Linear(128, 256),
#             nn.ReLU(),
#             nn.BatchNorm1d(256),
#             nn.Linear(256, 1),
#         )
        
#     def forward(self, x):
#         encoded = self.encoder(x)
#         return self.decoder(encoded), encoded

In [21]:
# class Autoencoder(nn.Module):
#     def __init__(self): # 1 1
#         super(Autoencoder, self).__init__()
        
#         self.encoder = nn.Sequential(
#             nn.Linear(1, 128),
#             nn.BatchNorm1d(128),
#             nn.Sigmoid(),
#             nn.Linear(128, 64),
#             nn.Sigmoid(),
#             nn.Linear(64, 32),
#             nn.Sigmoid(),
#             nn.Linear(32, 1),
#         )
        
#         self.decoder = nn.Sequential(
#             nn.Linear(1, 32),
#             nn.Sigmoid(),
#             nn.Linear(32, 64),
#             nn.Sigmoid(),
#             nn.Linear(64, 128),
#             nn.Sigmoid(),
#             nn.BatchNorm1d(128),
#             nn.Linear(128, 1),
#         )
        
#     def forward(self, x):
#         encoded = self.encoder(x)
#         return self.decoder(encoded), encoded 

In [22]:
# class Autoencoder(nn.Module):
#     def __init__(self): # 1 1
#         super(Autoencoder, self).__init__()
        
#         self.encoder = nn.Sequential(
#             nn.Linear(1, 128),
#             nn.BatchNorm1d(128),
#             nn.Sigmoid(),
#             nn.Linear(128, 64),
#             nn.BatchNorm1d(64),
#             nn.Sigmoid(),
#             nn.Linear(64, 32),
#             nn.BatchNorm1d(32),
#             nn.Sigmoid(),
#             nn.Linear(32, 1),
#         )
        
#         self.decoder = nn.Sequential(
#             nn.Linear(1, 32),
#             nn.BatchNorm1d(32),
#             nn.Sigmoid(),
#             nn.Linear(32, 64),
#             nn.BatchNorm1d(64),
#             nn.Sigmoid(),
#             nn.Linear(64, 128),
#             nn.BatchNorm1d(128),
#             nn.Sigmoid(),
#             nn.Linear(128, 1),
#         )
        
#     def forward(self, x):
#         encoded = self.encoder(x)
#         return self.decoder(encoded), encoded 

In [23]:
# class Autoencoder(nn.Module):
#     def __init__(self): # 1 1
#         super(Autoencoder, self).__init__()
        
#         self.encoder = nn.Sequential(
#             nn.Linear(1, 256),
#             nn.BatchNorm1d(256),
#             nn.Sigmoid(),
#             nn.Linear(256, 128),
#             nn.Sigmoid(),
#             nn.Linear(128, 64),
#             nn.Sigmoid(),
#             nn.Linear(64, 32),
#             nn.Sigmoid(),
#             nn.Linear(32, 1),
            
#             nn.BatchNorm1d(1),
#         )
        
#         self.decoder = nn.Sequential(
#             nn.Linear(1, 32),
#             nn.Sigmoid(),
#             nn.Linear(32, 64),
#             nn.Sigmoid(),
#             nn.Linear(64, 128),
#             nn.Sigmoid(),
#             nn.Linear(128, 256),
#             nn.Sigmoid(),
#             nn.BatchNorm1d(256),
#             nn.Linear(256, 1),
#         )
        
#     def forward(self, x):
#         encoded = self.encoder(x)
#         return self.decoder(encoded), encoded

In [24]:
# class Autoencoder(nn.Module):
#     def __init__(self): # 1 1
#         super(Autoencoder, self).__init__()
        
#         self.encoder = nn.Sequential(
#             nn.Linear(1, 256, bias=False), # 32 256
#             nn.LayerNorm(256, elementwise_affine=False),
#             nn.Sigmoid(),
#             nn.Linear(256, 128),
#             nn.Sigmoid(),
#             nn.Linear(128, 64),
#             nn.Sigmoid(),
#             nn.Linear(64, 32),
#             nn.Sigmoid(),
#             nn.Linear(32, 1),
            
#             nn.LayerNorm(1, elementwise_affine=False),
#         )
        
#         self.decoder = nn.Sequential(
#             nn.Linear(1, 32),
#             nn.Sigmoid(),
#             nn.Linear(32, 64),
#             nn.Sigmoid(),
#             nn.Linear(64, 128),
#             nn.Sigmoid(),
#             nn.Linear(128, 256),
#             nn.Sigmoid(),
#             nn.LayerNorm(256, elementwise_affine=False),
#             nn.Linear(256, 1),
#         )
        
#     def forward(self, x):
#         encoded = self.encoder(x)
#         return self.decoder(encoded), encoded

In [27]:
class Autoencoder(nn.Module):
    def __init__(self): # 1 1
        super(Autoencoder, self).__init__()
        
        self.encoder = nn.Sequential(
            nn.Linear(1, 256), # 32 256
            nn.BatchNorm1d(256),
            nn.Sigmoid(),
            nn.Linear(256, 128),
            nn.Sigmoid(),
            nn.Linear(128, 64),
            nn.Sigmoid(),
            nn.Linear(64, 32),
            nn.Sigmoid(),
            nn.Linear(32, 16),
            
            nn.Softmax(1)
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(16, 32),
            nn.Sigmoid(),
            nn.Linear(32, 64),
            nn.Sigmoid(),
            nn.Linear(64, 128),
            nn.Sigmoid(),
            nn.Linear(128, 256),
            nn.Sigmoid(),
            nn.BatchNorm1d(256),
            nn.Linear(256, 1),
        )
        
    def forward(self, x):
        encoded = self.encoder(x)
        return self.decoder(encoded), encoded

In [28]:
model = Autoencoder().to(device)

In [29]:
criterion = nn.MSELoss().to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.98), eps=1e-9)
lambda1 = lambda epoch: np.power(0.1, epoch) if epoch <= 8 else 1e-9
scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)

In [30]:
writer = SummaryWriter('runs/log')

In [31]:
model.train()

step = 0
for epoch in range(n_epochs):
    running_loss = 0
    for idx, (X_batch) in enumerate(train_dataloader, 1):
        X_batch = X_batch.to(device)
        optimizer.zero_grad()

        y_pred, encoded = model(X_batch)
        loss = criterion(y_pred, X_batch)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * X_batch.shape[0]

        step += X_batch.shape[0]
#         print(encoder_out)

        if idx % 5000 == 0:
            writer.add_scalar('loss/loss', loss.item(), step)
            writer.add_scalar('loss/total_loss', running_loss / step, step)
#             print(X_batch)
#             print(encoder_out, y_pred)
            writer.add_histogram('train/01_x', X_batch.data.cpu(), step)
            writer.add_histogram('train/02_encoder', encoded.data.cpu(), step)
            
            writer.add_histogram('train/03_pred', y_pred.data.cpu(), step)
#             writer.add_histogram('train/04_linear', out, step)
    
    lr = scheduler.get_last_lr()[0]
    print('Epoch: [{}/{}], Loss: {:.8f}, lr:{:.16f}'.format(epoch + 1, n_epochs, running_loss / (step), lr))
    scheduler.step()

KeyboardInterrupt: 

In [49]:
# torch.save(model.state_dict(), 'D:/option/models/SHFE.rb_encoder_256.pt')
# torch.save(model.state_dict(), 'D:/option/models/SHFE.rb_encoder_128.pt')
# torch.save(model.state_dict(), 'D:/option/models/SHFE.rb_encoder_128_n.pt')
# torch.save(model.state_dict(), 'D:/option/models/SHFE.rb_encoder_tanh.pt')
torch.save(model.state_dict(), 'D:/option/models/SHFE.rb_encoder_bn.pt')

In [26]:
# model.load_state_dict(torch.load('D:/option/models/SHFE.rb_encoder.pt'))
# model.load_state_dict(torch.load('D:/option/models/SHFE.rb_encoder_256.pt'))
# model.load_state_dict(torch.load('D:/option/models/SHFE.rb_encoder_128.pt'))
# model.load_state_dict(torch.load('D:/option/models/SHFE.rb_encoder_128_n.pt'))
# model.load_state_dict(torch.load('D:/option/models/SHFE.rb_encoder_bn.pt'))

<All keys matched successfully>

In [41]:
%matplotlib qt

In [42]:
model.eval()
for idx, (X_batch) in enumerate(val_dataloader, 1):
    print(idx)
    X_batch = X_batch.to(device)
    y_pred, encoder_out = model(X_batch)
#     print(X_batch)
#     print(y_pred)

#     plt.plot(np.exp(y_pred.cpu().detach().numpy()), label='pred')
#     plt.plot(np.exp(X_batch.cpu().detach().numpy()), label='real')
#     plt.legend()
#     plt.show()
#     plt.savefig(os.path.join('D:/option/xxx', '%s.jpg' % idx), dpi=160)
#     plt.close()
    break

1


In [46]:
encoder_out

tensor([[-1.1099],
        [-1.0078],
        [-1.2392],
        [-2.1602],
        [-0.4577],
        [-0.6896],
        [-0.7609],
        [-1.8314],
        [-1.8560],
        [-0.8191],
        [-0.5836],
        [-0.7513],
        [-0.4728],
        [-0.3604],
        [-1.1499],
        [-1.8406],
        [-0.7633],
        [-2.6426],
        [-0.5232],
        [-0.8142],
        [-1.1365],
        [-0.5566],
        [-0.3728],
        [-0.8265],
        [-0.7729],
        [-2.0270],
        [-0.8684],
        [-2.4859],
        [-0.9009],
        [-0.1378],
        [-0.5454],
        [-0.8314]], device='cuda:0', grad_fn=<NativeBatchNormBackward0>)

In [47]:
plt.plot(y_pred.cpu().detach().numpy() - 50, label='pred')
plt.plot(X_batch.cpu().detach().numpy(), label='real')
plt.legend()
plt.show()