気になっていること
* PositiveとNegativeは実際どれくらい離れているのか
* 特徴量は縮退していないか
* 一部の次元だけで判断されていないか =>これはCMD的にも多分発生してはいなそうではある


手法アイディア
* ランダム行列を挟んだ上で、複数の重みでスコアを出す
* Maximum Classifier Discrepancyみたいなことをする
* Hard Negative以外は無視する（Positiveの最小スコアより低いNegativeについては信用しない） => 学習が遅くなりそう（初期で全く学習がされない？）

In [1]:
from run_sacred import data_ingredient, method_ingredient, optim_ingredient
from sacred import Experiment
ex = Experiment('jupyter_ex', ingredients=[data_ingredient, method_ingredient, optim_ingredient], interactive=True)

In [2]:
@ex.main
def main(_config):
    print(_config)
    return _config

In [3]:
ex.run(config_updates={'seed':123456})

INFO - jupyter_ex - Running command 'main'
INFO - jupyter_ex - Started
INFO - jupyter_ex - Result: {'seed': 123456, 'dataset': {'name': 'oppG', 'validation': 'ADL4-ADL5', 'test_domain': 'S1', 'L': 12, 'K': 5}, 'method': {'name': 'CPC', 'hidden': 1600, 'context': 800, 'num_gru': 1, 'sampler_mode': 'random', 'num_negative': 1, 'cont_type': 'sigmoid'}, 'optim': {'lr': 0.0001, 'num_batch': 10000, 'batch_size': 128}}
INFO - jupyter_ex - Completed after 0:00:00


{'seed': 123456, 'dataset': {'name': 'oppG', 'validation': 'ADL4-ADL5', 'test_domain': 'S1', 'L': 12, 'K': 5}, 'method': {'name': 'CPC', 'hidden': 1600, 'context': 800, 'num_gru': 1, 'sampler_mode': 'random', 'num_negative': 1, 'cont_type': 'sigmoid'}, 'optim': {'lr': 0.0001, 'num_batch': 10000, 'batch_size': 128}}


<sacred.run.Run at 0x7fba120c31d0>

In [4]:
_config = {'seed': 123456, 'dataset': {'name': 'oppG', 'validation': 'ADL4-ADL5', 'test_domain': 'S1', 'L': 12, 'K': 5}, 'method': {'name': 'CPC', 'hidden': 1600, 'context': 800, 'num_gru': 1, 'sampler_mode': 'random'}, 'optim': {'lr': 0.0001, 'num_batch': 10000, 'batch_size': 128}}

In [5]:
from run_sacred import *
from sacred_wrap import MongoExtractor
from utils import flatten_dict
from copy import deepcopy
import os
import torch

In [6]:
datasets = get_dataset(**_config['dataset'])
train_dataset_joint, valid_dataset_joint, train_dataset_marginal, valid_dataset_marginal, test_dataset = datasets


In [7]:
# Load pretrained model
model = get_model(input_shape=train_dataset_joint.get('input_shape'), K=_config['dataset']['K'], **_config['method'])
query = deepcopy(_config)
query = flatten_dict(query)
extractor = MongoExtractor(None, 'CPC_test')
result = list(extractor.find(query, ['config', 'info'], False, 'COMPLETED'))
assert len(result) == 1, "There are too many or no results. Please check the query {}".format(query)
result = result[0]

path = os.path.join(result['info']['log_dir'], 'model_{}.pth'.format(_config['optim']['num_batch']))
model = model.cpu()
model.load_state_dict(torch.load(path, map_location='cpu'))
model = model.cuda()

1600 800


In [8]:
model

CPCModel(
  (g_enc): Encoder(
    (feature): Sequential(
      (0): Conv2d(1, 50, kernel_size=(1, 5), stride=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=(1, 2), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(50, 40, kernel_size=(1, 5), stride=(1, 1))
      (4): ReLU()
      (5): MaxPool2d(kernel_size=(1, 2), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
      (6): Conv2d(40, 20, kernel_size=(1, 3), stride=(1, 1))
      (7): ReLU()
      (8): Dropout(p=0.5)
      (9): Flatten()
      (10): Linear(in_features=4520, out_features=1600, bias=True)
      (11): ReLU()
      (12): Dropout(p=0.5)
    )
  )
  (c_enc): ContextEncoder(
    (gru): GRU(1600, 800)
  )
  (predictor): Predictor(
    (linears): ModuleList(
      (0): Linear(in_features=800, out_features=1600, bias=True)
      (1): Linear(in_features=800, out_features=1600, bias=True)
      (2): Linear(in_features=800, out_features=1600, bias=True)
      (3): Linear(in_features=800, out_f

In [9]:
train_loader_joint = data.DataLoader(
    train_dataset_joint, batch_size=12, shuffle=True)
train_loader_marginal = data.DataLoader(
    train_dataset_marginal, batch_size=12, shuffle=True)


In [10]:
"""
optimizer = optim.Adam(model.parameters(), lr=_config['optim']['lr'])
for num_iter in range(100):
    loss = train_CPC(train_loader_joint, train_loader_marginal, model, optimizer)
"""

"\noptimizer = optim.Adam(model.parameters(), lr=_config['optim']['lr'])\nfor num_iter in range(100):\n    loss = train_CPC(train_loader_joint, train_loader_marginal, model, optimizer)\n"

In [11]:
def validate(dataset_joint, dataset_marginal, model, num_eval=10, batch_size=128):
    """Evaluate the model."""
    model.eval()

    loader_joint = data.DataLoader(dataset_joint, batch_size=batch_size, shuffle=True, drop_last=True)
    loader_marginal = data.DataLoader(dataset_marginal, batch_size=batch_size, shuffle=True, drop_last=True)

    if num_eval is None:
        num_eval = len(loader_joint)

    K = dataset_marginal.T
    L = dataset_joint.T - K
    losses = [0] * K
    TP = [0] * K
    TN = [0] * K
    FP = [0] * K
    FN = [0] * K

    for i, ((X_j, _), (X_m, _)) in enumerate(zip(loader_joint, loader_marginal)):
        X_j = X_j.float().cuda()
        X_m = X_m.float().cuda()

        score_j_list, score_m_list = model(X_j, X_m, L, K)
        for k in range(K):
            losses[k] += (-1.0 * torch.log(torch.clamp(score_j_list[k], min=1e-8)).mean()
                          - torch.log(torch.clamp(1-score_m_list[k], min=1e-8)).mean()).item()
            TP[k] += (score_j_list[k] > 0.5).sum().item()
            TN[k] += (score_m_list[k] < 0.5).sum().item()
            FP[k] += (score_m_list[k] > 0.5).sum().item()
            FN[k] += (score_j_list[k] < 0.5).sum().item()
        if i+1 == num_eval:
            break
    results = OrderedDict()
    for i in range(K):
        print(score_j_list[i].min(), score_m_list[i].max())

    for k in range(K):
        results['loss-{}'.format(k)] = losses[k] / (2*(i+1))
        results['accuracy-{}'.format(k)] = float(TP[k]+TN[k]) / float(FP[k]+FN[k]+TP[k]+TN[k])

    model.train()
    return results

In [12]:
train_result = validate(train_dataset_joint, train_dataset_marginal, model, num_eval=1)

tensor(4.9851, device='cuda:0', grad_fn=<MinBackward1>) tensor(-1.3982, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(3.6562, device='cuda:0', grad_fn=<MinBackward1>) tensor(-0.4716, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(3.8864, device='cuda:0', grad_fn=<MinBackward1>) tensor(0.3559, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(3.8961, device='cuda:0', grad_fn=<MinBackward1>) tensor(4.0562, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(2.6404, device='cuda:0', grad_fn=<MinBackward1>) tensor(5.0485, device='cuda:0', grad_fn=<MaxBackward1>)


In [13]:
from cpc import get_context

X_j, _ = train_loader_joint.__iter__().__next__()
X_m, _ = train_loader_marginal.__iter__().__next__()
X_j = X_j.float().cuda()
X_m = X_m.float().cuda()
K = train_dataset_marginal.T
L = train_dataset_joint.T - K

c = get_context(X_j[..., :L], model.g_enc, model.c_enc)
score_j = [None] * K
score_m = [None] * K
for i in range(K):
    z_j = model.g_enc(X_j[..., L+i])
    z_m = model.g_enc(X_m[..., i])
    z_p = model.predictor(c, i)
    score_j[i] = torch.sigmoid(torch.bmm(z_j.unsqueeze(1), z_p.unsqueeze(2)).squeeze(2))
    score_m[i] = torch.sigmoid(torch.bmm(z_m.unsqueeze(1), z_p.unsqueeze(2)).squeeze(2))

In [14]:
from torch.nn import functional as F

In [39]:
(F.dropout(z_j, p=1.0)==0).sum()

tensor(19200, device='cuda:0')

In [42]:
mask = torch.bernoulli(z_j.data.new(z_j.data.size()).fill_(0.5))


In [22]:
p = 0.0
num_mask = 10

model.eval()
score_j = [0] * K
score_m = [0] * K
for i in range(K):
    z_j = model.g_enc(X_j[..., L+i])
    z_m = model.g_enc(X_m[..., i])
    z_p = model.predictor(c, i)
    for j in range(num_mask):
        mask = torch.bernoulli(z_j.data.new(z_j.data.size()).fill_(p))
        # _z_j = (z_j * mask)
        # _z_m = (z_m * mask)
        _z_p =  (z_p * mask)
        score_j[i] += 1.0/num_mask * torch.bmm(z_j.unsqueeze(1), _z_p.unsqueeze(2)).squeeze(2)
        score_m[i] += 1.0/num_mask * torch.bmm(z_m.unsqueeze(1), _z_p.unsqueeze(2)).squeeze(2)
model.train()
score_j[0]


tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]], device='cuda:0', grad_fn=<AddBackward0>)

In [19]:
mask = torch.bernoulli(z_j.data.new(z_j.data.size()).fill_(0.0))

In [20]:
mask

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')