In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import gresearch_crypto
env = gresearch_crypto.make_env()
iter_test = env.iter_test()

In [2]:
df = pd.read_csv("../input/g-research-crypto-forecasting/supplemental_train.csv")

In [3]:
# df =  pd.read_csv("../input/g-research-crypto-forecasting/train.csv")
# df = df[pd.to_datetime(df['timestamp'],unit='s')<pd.to_datetime('2021-06-13')]

In [4]:
features = ['Asset_ID','Count', 'Open', 'High', 'Low', 'Close',
           'Volume', 'VWAP','time_sin','time_cos']

def preprocessing(df):
    time = pd.to_datetime(df['timestamp'],unit='s')
    time = time.dt.hour * 60 + time.dt.minute
    df['time_sin'] = np.sin(2*np.pi*time/1440)
    df['time_cos'] = np.cos(2*np.pi*time/1440)
    return df

def remove_old_data(df,max_len=120):
    latest_ts = df['timestamp'].iloc[-1]
    mask = df['timestamp'] > (latest_ts-max_len*60)
    return df[mask]

In [5]:
df = remove_old_data(df)
df = preprocessing(df)

In [6]:
!pip install ../input/einops-030/einops-0.3.0-py2.py3-none-any.whl

Processing /kaggle/input/einops-030/einops-0.3.0-py2.py3-none-any.whl
Installing collected packages: einops
Successfully installed einops-0.3.0


In [7]:
import torch
from torch import nn, einsum
from torch.nn import functional as F
from einops import rearrange
from einops.layers.torch import Rearrange

In [8]:
def exists(val):
    return val is not None

def default(val, d):
    return val if exists(val) else d

class Attention(nn.Module):
    def __init__(
        self,
        dim,
        heads = 8,
        dim_head = 64,
        dropout = 0.,
        max_pos_emb = 512
    ):
        super().__init__()
        inner_dim = dim_head * heads
        self.heads= heads
        self.to_q = nn.Linear(dim, inner_dim, bias = False)
        self.to_kv = nn.Linear(dim, inner_dim * 2, bias = False)
        self.to_out = nn.Linear(inner_dim, dim)

        self.rel_pos_emb = nn.Parameter(torch.zeros([1,heads,max_pos_emb,max_pos_emb]))
        self.scale = nn.Parameter(torch.full([1,heads,max_pos_emb,max_pos_emb] ,dim_head ** -0.5))

        self.dropout = nn.Dropout(dropout)

    def forward(self, x, context = None, mask = None, context_mask = None):
        n, device, h, has_context = x.shape[-2], x.device, self.heads, exists(context)
        context = default(context, x)

        q, k, v = (self.to_q(x), *self.to_kv(context).chunk(2, dim = -1))
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), (q, k, v))

        dots = einsum('b h i d, b h j d -> b h i j', q, k)
        dots = dots * self.scale + self.rel_pos_emb

        if exists(mask) or exists(context_mask):
            mask = default(mask, lambda: torch.ones(*x.shape[:2], device = device))
            context_mask = default(context_mask, mask) if not has_context else default(context_mask, lambda: torch.ones(*context.shape[:2], device = device))
            mask_value = -torch.finfo(dots.dtype).max
            mask = rearrange(mask, 'b i -> b () i ()') * rearrange(context_mask, 'b j -> b () () j')
            dots.masked_fill_(~mask, mask_value)

        attn = dots.softmax(dim = -1)

        out = einsum('b h i j, b h j d -> b h i d', attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        out = self.to_out(out)
        return self.dropout(out)
    
class TransformerLayer2d(nn.Module):
    def __init__(self, d_model, nheads, d_hidden, dropout=0.0, nts=90, nasset=14):
        super().__init__()
        self.norm1 = nn.LayerNorm(d_model)
#         self.mlp1 = nn.Sequential(nn.Linear(d_ts,d_hidden),nn.GELU(),nn.Dropout(dropout),
#                                   nn.Linear(d_hidden,d_ts),nn.Dropout(dropout))
        self.attn1 = Attention(d_model, nheads, d_model//nheads, dropout, max_pos_emb=nts)
        self.norm2 = nn.LayerNorm(d_model)
#         self.mlp2 = nn.Sequential(nn.Linear(d_asset,d_hidden),nn.GELU(),nn.Dropout(dropout),
#                                   nn.Linear(d_hidden,d_asset),nn.Dropout(dropout))
        self.attn2 = Attention(d_model, nheads, d_model//nheads, dropout, max_pos_emb=nasset)
        
        self.norm3 = nn.LayerNorm(d_model)
        self.mlp3 = nn.Sequential(nn.Linear(d_model,d_hidden),nn.GELU(),nn.Dropout(dropout),
                                  nn.Linear(d_hidden,d_model),nn.Dropout(dropout))
    def forward(self, x):
        #(B,ts,asset,D)
        B,T,A,D = x.shape
        
        x2 = self.norm1(x)
        x2 = rearrange(x2, 'b t a d -> (b a) t d')
        x2 = self.attn1(x2)
        x2 = rearrange(x2, '(b a) t d -> b t a d', b=B)
        x = x + x2
        
        x2 = self.norm2(x)
        x2 = rearrange(x2, 'b t a d -> (b t) a d')
        x2 = self.attn2(x2)
        x2 = rearrange(x2, '(b t) a d -> b t a d', b=B)
        x = x + x2
        
        x2 = self.norm3(x)
        x2 = self.mlp3(x2)
        x = x + x2
        return x

class TransformerLayer1d(nn.Module):
    def __init__(self, d_model, nheads, d_hidden, dropout=0.0, nasset=14):
        super().__init__()
        self.norm1 = nn.LayerNorm(d_model)
        self.attn1 = Attention(d_model, nheads, d_model//nheads, dropout, max_pos_emb=nasset)
        self.norm2 = nn.LayerNorm(d_model)
        self.mlp2 = nn.Sequential(nn.Linear(d_model,d_hidden),nn.GELU(),nn.Dropout(dropout),
                                  nn.Linear(d_hidden,d_model),nn.Dropout(dropout))
    def forward(self, x):
        #(B,asset,D)
        x2 = self.norm1(x)
        x2 = self.attn1(x2)
        x = x + x2
        
        x2 = self.norm2(x)
        x2 = self.mlp2(x2)
        x = x + x2
        return x
    
class CryptoModel(nn.Module):
    def __init__(self, SEQ_LENGTH=60):
        super().__init__()
        d_model = 64
        n_layers = 2
        nheads = 4
        self.norm = nn.InstanceNorm1d(7)
        self.asset_emb = nn.Embedding(14,d_model)
        self.dense_emb = nn.Linear(9,d_model)
        self.encoder1 = nn.ModuleList()
        for i in range(n_layers):
            self.encoder1.append(TransformerLayer2d(d_model,nheads,4*d_model,0.2,SEQ_LENGTH,14))
        self.post_norm1 = nn.LayerNorm(d_model)
        self.decoder1 = nn.Sequential(nn.Linear(SEQ_LENGTH,d_model),nn.GELU(),nn.Dropout(0.2),
                                  nn.Linear(d_model,1))
        self.encoder2 = nn.ModuleList()
        for i in range(n_layers):
            self.encoder2.append(TransformerLayer1d(d_model,nheads,4*d_model,0.2,14))
        self.fc = nn.Sequential(nn.Linear(d_model,d_model),nn.GELU(),nn.Dropout(0.0),
                                  nn.Linear(d_model,1))
        self.out_norm = nn.BatchNorm1d(1, eps=0, affine=False)
        
        self.pe1 = nn.Parameter(torch.empty([1,SEQ_LENGTH,1,d_model]))
        self.pe2 = nn.Parameter(torch.empty([1,1,14,d_model]))
        nn.init.normal_(self.pe1, 0.0, 0.2)
        nn.init.normal_(self.pe2, 0.0, 0.2)
        
        self.missing_emb = nn.Parameter(torch.empty([1,SEQ_LENGTH,14,d_model]))
        nn.init.normal_(self.missing_emb)
            
    def forward(self, inp):
        inp = inp.clone()
        B,T,A,D = inp.shape
        mask = inp.abs().sum(-1) == 0
        inp = rearrange(inp,'b t a d -> (b a) d t')
        inp[:,1:8] = self.norm(inp[:,1:8])
        inp = rearrange(inp,'(b a) d t -> b t a d', b=B)
        X = self.asset_emb(inp[:,:,:,0].long()) + self.dense_emb(inp[:,:,:,1:])
        X[mask] = self.missing_emb.repeat([len(X),1,1,1])[mask]
            
        X = X + self.pe1 + self.pe2
        for layer in self.encoder1:
            X = layer(X)
        X = self.post_norm1(X)
        X = rearrange(X,'b t a d -> b a d t')
        X = self.decoder1(X).squeeze(-1)
        for layer in self.encoder2:
            X = layer(X)
        y = self.fc(X).squeeze(-1)
        y = self.out_norm(y.unsqueeze(1)).squeeze(1)
        return y

In [9]:
from glob import glob
device = torch.device('cuda')

model_45_0 = CryptoModel(45).to(device)
model_45_0.eval()
model_45_0.load_state_dict(torch.load("../input/cvonly-transformer-freq5-seq45-holdout-v4/model_fold0.pt",map_location=device))

model_45_1 = CryptoModel(45).to(device)
model_45_1.eval()
model_45_1.load_state_dict(torch.load("../input/cvonly-transformer-freq5-seq45-holdout-v4/model_fold1.pt",map_location=device))

model_45_2 = CryptoModel(45).to(device)
model_45_2.eval()
model_45_2.load_state_dict(torch.load("../input/cvonly-transformer-freq5-seq45-holdout-v4/model_fold2.pt",map_location=device))

model_60_0 = CryptoModel(60).to(device)
model_60_0.eval()
model_60_0.load_state_dict(torch.load("../input/cvonly-transformer-freq5-seq60-holdout-v4/model_fold0.pt",map_location=device))

model_60_1 = CryptoModel(60).to(device)
model_60_1.eval()
model_60_1.load_state_dict(torch.load("../input/cvonly-transformer-freq5-seq60-holdout-v4/model_fold1.pt",map_location=device))

model_60_2 = CryptoModel(60).to(device)
model_60_2.eval()
model_60_2.load_state_dict(torch.load("../input/cvonly-transformer-freq5-seq60-holdout-v4/model_fold2.pt",map_location=device))

model_90_0 = CryptoModel(90).to(device)
model_90_0.eval()
model_90_0.load_state_dict(torch.load("../input/cvonly-transformer-freq5-seq90-holdout-v4/model_fold0.pt",map_location=device))

model_90_1 = CryptoModel(90).to(device)
model_90_1.eval()
model_90_1.load_state_dict(torch.load("../input/cvonly-transformer-freq5-seq90-holdout-v4/model_fold1.pt",map_location=device))

model_90_2 = CryptoModel(90).to(device)
model_90_2.eval()
model_90_2.load_state_dict(torch.load("../input/cvonly-transformer-freq5-seq90-holdout-v4/model_fold2.pt",map_location=device))

model_120_0 = CryptoModel(120).to(device)
model_120_0.eval()
model_120_0.load_state_dict(torch.load("../input/cvonly-transformer-freq5-seq120-holdout-v4/model_fold0.pt",map_location=device))

model_120_1 = CryptoModel(120).to(device)
model_120_1.eval()
model_120_1.load_state_dict(torch.load("../input/cvonly-transformer-freq5-seq120-holdout-v4/model_fold1.pt",map_location=device))

model_120_2 = CryptoModel(120).to(device)
model_120_2.eval()
model_120_2.load_state_dict(torch.load("../input/cvonly-transformer-freq5-seq120-holdout-v4/model_fold2.pt",map_location=device))

<All keys matched successfully>

In [10]:
with torch.no_grad():
    for i, (df_test, df_pred) in enumerate(iter_test):
        current_t = df_test['timestamp'].iloc[0]
        row_order = df_test['Asset_ID'].values
        df_test = preprocessing(df_test)
        df = pd.concat([df,df_test],axis=0)
        df = remove_old_data(df)
        X = np.zeros([120,14,10])
        for t, d in df.groupby('timestamp'):
            t = (t - (current_t - 120*60))//60 - 1
            if (t < 0) or (t >= 120):
                continue
            X[t,d['Asset_ID'].values] = d[features].values

        X = X.astype(np.float32)
        X[:,:,-4] = np.log1p(X[:,:,-4])
        X[:,:,-3] = np.log1p(X[:,:,-3])
        X[np.isnan(X)|np.isinf(X)] = 0

        X = torch.Tensor(X).to(device).unsqueeze(0)

        pred = np.zeros(14)
        pred += model_45_0(X[:,-45:])[0].cpu().numpy()
        pred += model_45_1(X[:,-45:])[0].cpu().numpy()
        pred += model_45_2(X[:,-45:])[0].cpu().numpy()
        pred += model_60_0(X[:,-60:])[0].cpu().numpy()
        pred += model_60_1(X[:,-60:])[0].cpu().numpy()
        pred += model_60_2(X[:,-60:])[0].cpu().numpy()
        pred += model_90_0(X[:,-90:])[0].cpu().numpy()
        pred += model_90_1(X[:,-90:])[0].cpu().numpy()
        pred += model_90_2(X[:,-90:])[0].cpu().numpy()
        pred += model_120_0(X)[0].cpu().numpy()
        pred += model_120_1(X)[0].cpu().numpy()
        pred += model_120_2(X)[0].cpu().numpy()
        pred /= 12
        
        df_pred['Target'] = pred[row_order]
        # Send submissions
        env.predict(df_pred)

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.
