以下、Bをバッチサイズ、Cをチャネル数、Wをウィンドウサイズ、Lをコンテキスト長、Tを予測長とする。

* Context: (B, C, W, L, 1)
* Positive Sampleを1個（B, C, W, T, 1)
* Negative Samples (B, C, W, T)
  * same seq, different loc
  * same user, different seq
  * different user, different seq

* 方針1：Samplerで頑張る
  * Pro：データセットは基本的に今のままでいい
  * Con:NegativeのSamplerもSamplerを複数作れば色んな種類を試しやすい
  * Con:Samplerの仕様をよく把握してない、最終的なコードがわかりにくくなる
* 方針2:データセット自体で頑張る
  * Pro:多分簡単
  * 

In [25]:
import torch
from torch import nn


class Flatten(nn.Module):
    def __init__(self):
        super(Flatten, self).__init__()

    def forward(self, x):
        x = x.view(x.size(0), -1)
        return x


class Encoder(nn.Module):
    def __init__(self, input_shape, hidden_size=400, activation='relu'):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.input_shape = input_shape
        linear_size = 20 * input_shape[1] * 2

        if activation == 'relu':
            activation = nn.ReLU
        elif activation == 'lrelu':
            activation = nn.LeakyReLU
        
        self.feature = nn.Sequential(
            nn.Conv2d(input_shape[0], 50, kernel_size=(1, 5)), 
            activation(),
            nn.MaxPool2d(kernel_size=(1, 2)), 
            nn.Conv2d(50, 40, kernel_size=(1, 5)), 
            activation(),
            nn.MaxPool2d(kernel_size=(1, 2)), 
            nn.Conv2d(40, 20, kernel_size=(1, 3)), 
            activation(),
            nn.Dropout(0.5),
            Flatten(),
            nn.Linear(linear_size, self.hidden_size), 
            activation(),
            nn.Dropout(0.5),
        )

    def forward(self, input_data):
        feature = self.feature(input_data)
        return feature

    def output_shape(self):
        return (None, self.hidden_size)
    
    
class ContextEncoder(nn.Module):
    def __init__(self, input_shape, hidden_size=200, num_layers=2):
        super(ContextEncoder, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = 200
        self.gru = nn.GRU(input_shape[1], hidden_size=self.hidden_size, num_layers=self.num_layers)
        
    def forward(self, X):
        h0 = Variable(torch.zeros(self.num_layers, X.shape[1], self.hidden_size)).cuda()
        return self.gru(X, h0)
    

        
class Predictor(nn.Module):
    def __init__(self, input_shape, hidden_size, max_steps):
        super(Predictor, self).__init__()
        self.max_steps = max_steps
        self.linears = nn.ModuleList([nn.Linear(input_shape[1], hidden_size) for i in range(max_steps)])
        
    def forward(self, c, k):
        """
        predict the k step forward future from the context vector c
        
        Parameter
        ---------
        c : torch.Variable
            context vector
        k : int
            the number of forward steps, which is used to determine the weight matrix 
        """
        
        return self.linears[k](c)
    
    
"""
class ContextEncoderCell(nn.Module):
    def __init__(self, input_shape, hidden_size=200):
        super(ContextEncoderCell, self).__init__()
        self.hidden_size = hidden_size
        self.gru = nn.GRUCell(input_shape[1], hidden_size=self.hidden_size)
        
    def forward(self, X, h):
        return self.gru(X, h) 
"""

'\nclass ContextEncoderCell(nn.Module):\n    def __init__(self, input_shape, hidden_size=200):\n        super(ContextEncoderCell, self).__init__()\n        self.hidden_size = hidden_size\n        self.gru = nn.GRUCell(input_shape[1], hidden_size=self.hidden_size)\n        \n    def forward(self, X, h):\n        return self.gru(X, h) \n'

In [27]:
from datasets import _SingleUserSingleADL, OppG
import torch.utils.data as data
T = 3 # maximum prediction steps (sequence length of future sequences)
L = 12  # context size

dataset = OppG('S1', 'Gestures', l_sample=30, interval=15, T=T+L)
loader = data.DataLoader(dataset, batch_size=128, shuffle=True)
neg_dataset = OppG('S2', 'Gestures', l_sample=30, interval=15, T=T)  # marginal sample come from a different user for simplicity
neg_loader = data.DataLoader(neg_dataset, batch_size=128, shuffle=True)

In [28]:
g_enc = Encoder(input_shape=dataset.get('input_shape'), hidden_size=100).cuda()
c_enc = ContextEncoder(input_shape=g_enc.output_shape(), num_layers=2, hidden_size=50).cuda()
predictor = Predictor((None, c_enc.hidden_size), g_enc.output_shape()[1], max_steps=T).cuda()

In [29]:
def get_context(X, g_enc, c_enc):
    z_context = []
    nb_context = X.shape[-1]

    h = Variable(torch.zeros(X.shape[0], c_enc.hidden_size).cuda(), requires_grad=False)
    for i in range(nb_context):
        z_context.append(g_enc(X[..., i]))

    o, h = c_enc(torch.stack(z_context))
    c = h[-1]
    return c

In [30]:
from torch.autograd import Variable
from torch import optim
optimizer = optim.Adam(list(g_enc.parameters()) + list(c_enc.parameters()) + list(predictor.parameters()), lr=0.0001)

criterion = nn.BCELoss()


for num_iter in range(10000):
    X, _ = loader.__iter__().__next__()
    X_m, _ = neg_loader.__iter__().__next__()

    optimizer.zero_grad()
    X = Variable(X.float()).cuda()
    X_m = Variable(X_m.float()).cuda()
    c = get_context(X[..., :L], g_enc, c_enc)
    loss = 0
    for i in range(T):
        z_j = g_enc(X[..., L+i]) 
        z_m = g_enc(X_m[..., i])  # Replace here
        z_p = predictor(c, i)
        score_j = torch.sigmoid(torch.bmm(z_j.unsqueeze(1), z_p.unsqueeze(2)).squeeze(1))
        score_m = torch.sigmoid(torch.bmm(z_m.unsqueeze(1), z_m.unsqueeze(2)).squeeze(1))
        loss += criterion(score_j, Variable(torch.ones((len(score_j), 1))).cuda()) + criterion(score_m, Variable(torch.zeros((len(score_j), 1))).cuda())
    loss = loss / (2*T) 
    loss.backward()
    optimizer.step()
    
    if (num_iter+1) % 10 == 0:
        print(num_iter+1, loss.data[0])

RuntimeError: cuda runtime error (2) : out of memory at /pytorch/torch/lib/THC/generic/THCStorage.cu:58

In [20]:
criterion(score_j, Variable(torch.ones((len(score_j), 1))).cuda()), criterion(score_m, Variable(torch.zeros((len(score_j), 1))).cuda())

(Variable containing:
 1.00000e-03 *
   4.5608
 [torch.cuda.FloatTensor of size 1 (GPU 0)], Variable containing:
  0.7047
 [torch.cuda.FloatTensor of size 1 (GPU 0)])

In [24]:
z_j.mean(dim=0)

Variable containing:
1.00000e-02 *
 -0.4368
 -0.7576
 -1.2021
 -0.9422
 -1.4026
 -1.3457
 -1.0399
 -1.3105
 -1.2388
 -1.2168
 -1.3796
 -1.4887
 -1.1039
 -1.4232
 -1.1180
 -1.1781
 -1.1295
 -1.3833
 -1.1118
 -1.3323
 -0.0861
 -1.5210
 -1.6649
 -0.8586
 -0.7627
  0.0330
 -0.3615
 -0.7373
 -0.9396
 -1.0086
 -1.0875
 -0.4759
 -1.2189
 -0.9275
 -1.1164
 -1.2138
 -1.4260
 -0.9656
 -0.5279
 -1.3039
 -1.3837
 -1.2215
 -1.2313
 -0.7943
 -1.2797
 -1.1289
 -0.8287
 -0.9755
 -1.2844
 -1.2717
 -0.4271
 -1.3253
 -1.2310
 -1.3833
 -0.9311
 -1.4309
 -1.2430
 -1.2566
 -1.1460
 -1.1509
 -1.1814
 -0.8964
 -1.3915
 -0.7207
 -1.2342
 -0.6426
 -1.3436
 -0.5251
 -1.1668
 -1.1839
 -1.2082
 -0.6802
 -1.2136
 -1.2043
 -1.3012
 -0.3438
 -0.9048
 -0.8551
 -1.3036
 -0.5764
 -1.1605
 -1.0928
 -1.0758
 -1.0668
 -1.1033
 -1.6213
 -1.3621
 -1.4203
 -0.5075
 -1.7488
 -1.0139
 -1.2182
 -0.9673
 -0.3971
 -1.0751
 -0.4653
 -0.6074
 -0.7619
 -0.7002
 -1.2391
[torch.cuda.FloatTensor of size 100 (GPU 0)]

In [78]:
criterion = nn.BCEWithLogitsLoss()

for i in range(T):
    z_j = g_enc(X[..., L+i]) 
    z_m = g_enc(X[..., -i])  # Replace here
    z_p = predictor(c, i)
    score_j = torch.sigmoid(torch.bmm(z_j.unsqueeze(1), z_p.unsqueeze(2)).squeeze(1))
    score_m = torch.sigmoid(torch.bmm(z_m.unsqueeze(1), z_m.unsqueeze(2)).squeeze(1))
    loss = criterion(score_j, Variable(torch.ones((len(score_j), 1))).cuda()) + criterion(score_m, Variable(torch.zeros((len(score_j), 1))).cuda())
loss.backward()

In [77]:
loss

Variable containing:
 1.6947
[torch.cuda.FloatTensor of size 1 (GPU 0)]

(torch.Size([128, 1, 400]), torch.Size([128, 400, 1]))

In [26]:
predictor(c).shape

torch.Size([128, 400])