## Refactoring learner

In [1]:
import regularizer; reload(regularizer)

import torch
from torch import nn, optim
from torch.autograd import Variable
import torch.utils.data as data
import numpy as np

In [2]:
from pytransfer.learners.utils import calc_acc, Flatten

class MNISTR_Encoder(nn.Module):
    def __init__(self, input_shape):
        super(MNISTR_Encoder, self).__init__()

        row = input_shape[2]
        self.latent_row = ((row - 4) - 4) / 2
        self.latent_dim = 48 * self.latent_row**2
        self.feature = nn.Sequential()
        self.feature.add_module('f_conv1', nn.Conv2d(1, 32, kernel_size=5))
        self.feature.add_module('f_relu1', nn.ReLU(True))
        self.feature.add_module('f_conv2', nn.Conv2d(32, 48, kernel_size=5))
        self.feature.add_module('f_relu2', nn.ReLU(True))
        self.feature.add_module('f_pool2', nn.MaxPool2d(2))
        self.feature.add_module('f_flat', Flatten())
        self.feature.add_module('f_fc1', nn.Linear(self.latent_dim, 100))
        self.feature.add_module('f_relu1', nn.ReLU(True))  # FIXME
        self.feature.add_module('f_fc2', nn.Linear(100, self.latent_dim))
        self.feature.add_module('f_relu2', nn.ReLU(True))

    def forward(self, input_data):
        feature = self.feature(input_data)
        return feature

    def output_shape(self):
        return (None, self.latent_dim)


class MNISTR_Classifier(nn.Module):
    def __init__(self, num_classes, input_shape, last_layer='log_softmax'):
        super(MNISTR_Classifier, self).__init__()
        self.class_classifier = nn.Sequential()
        self.class_classifier.add_module('c_fc1', nn.Linear(input_shape[1], 100))
        self.class_classifier.add_module('c_relu1', nn.ReLU(True))
        self.class_classifier.add_module('c_fc2', nn.Linear(100, 100))
        self.class_classifier.add_module('c_relu2', nn.ReLU(True))
        self.class_classifier.add_module('c_fc3', nn.Linear(100, num_classes))
        if last_layer == 'log_softmax':
            self.class_classifier.add_module('c_log_softmax', nn.LogSoftmax(dim=1))
        elif last_layer == 'softmax':
            self.class_classifier.add_module('c_softmax', nn.Softmax(dim=1))
        elif last_layer == 'linear':
            pass
        else:
            raise NameError()

    def forward(self, input_data):
        return self.class_classifier(input_data)


In [3]:
from pytransfer.datasets.utils import prepare_datasets
from pytransfer.datasets import MNISTR, OppG

train_dataset, valid_dataset, test_dataset = prepare_datasets('M0','train', MNISTR, True)
E = MNISTR_Encoder(train_dataset.get('input_shape'))
M = MNISTR_Classifier(train_dataset.get('num_classes'), E.output_shape())

In [4]:
# parameters
discriminator_config = {
    'num_domains': 5, 
    'input_shape': E.output_shape(), 
    'hiddens': [800, 100]
}
K = 1
alpha = 0.0

batch_size = 128
lr = 0.001

In [5]:
loader = data.DataLoader(train_dataset, batch_size=12800, shuffle=False)

In [6]:
loader.__iter__().__next__()[2].max()

4

In [7]:
# create learner and add regularizer
reload(regularizer)
learner = regularizer.Learner(E, M).cuda()
reg1 = regularizer.DANReguralizer(learner=learner, discriminator_config=discriminator_config, K=1).cuda()
reg1.set_optimizer(optim.RMSprop(filter(lambda p: p.requires_grad, reg1.D.parameters()), lr=lr, alpha=0.9))
learner.add_regularizer('d', reg1, alpha)

learner.set_loader(train_dataset, batch_size)

<torch.utils.data.dataloader.DataLoader at 0x7f8024334dd0>

In [8]:
# train
optimizer = optim.RMSprop(learner.parameters(), lr=lr, alpha=0.9)

for i in range(100):
    # update regularizers
    learner.update_regularizers()
    
    # update learner's parameter
    optimizer.zero_grad()
    X, y, d = learner.get_batch()
    loss = learner.loss(X, y, d)
    loss.backward()
    optimizer.step()
    print(learner.losses(X, y, d))

{'y': 2.289016008377075, 0: -1.601906657218933}
{'y': 2.2774336338043213, 0: -1.596584439277649}
{'y': 2.296994209289551, 0: -1.609724998474121}
{'y': 2.2771193981170654, 0: -1.6337519884109497}
{'y': 2.2131567001342773, 0: -1.6508123874664307}
{'y': 2.2394330501556396, 0: -1.590888500213623}
{'y': 2.1626343727111816, 0: -1.6202645301818848}
{'y': 2.1113083362579346, 0: -1.561773657798767}
{'y': 2.146761417388916, 0: -1.5715571641921997}
{'y': 1.93916654586792, 0: -1.6155956983566284}
{'y': 1.9040277004241943, 0: -1.5821532011032104}
{'y': 1.6744723320007324, 0: -1.639083981513977}
{'y': 1.6856836080551147, 0: -1.56588613986969}
{'y': 2.7197279930114746, 0: -1.6215894222259521}
{'y': 1.741658329963684, 0: -1.5374647378921509}
{'y': 1.7017606496810913, 0: -1.5392876863479614}
{'y': 1.5270133018493652, 0: -1.6306371688842773}
{'y': 1.4606997966766357, 0: -1.5209839344024658}
{'y': 1.3277032375335693, 0: -1.5703285932540894}
{'y': 1.2959564924240112, 0: -1.5330469608306885}
{'y': 1.174832

In [None]:
train_dataset, valid_dataset, test_dataset = prepare_datasets('M0','train', MNISTR, True)
E = MNISTR_Encoder(train_dataset.get('input_shape'))
M = MNISTR_Classifier(train_dataset.get('num_classes'), E.output_shape())

# create learner and add regularizer
reload(regularizer)
learner = regularizer.Learner(E, M).cuda()
reg = regularizer.MultilabelDAN(learner=learner, discriminator_config=discriminator_config, K=1).cuda()
reg.set_optimizer(optim.RMSprop(filter(lambda p: p.requires_grad, reg.D.parameters()), lr=lr, alpha=0.9))
reg.set_loader(train_dataset, batch_size=128)
learner.add_regularizer(reg, 0.0001)

learner.set_loader(train_dataset, batch_size)


In [117]:
# train
optimizer = optim.RMSprop(learner.parameters(), lr=lr, alpha=0.9)

for i in range(100):
    # update regularizers
    learner.update_regularizers()
    
    # update learner's parameter
    optimizer.zero_grad()
    X, y, d = learner.get_batch()
    loss = learner.loss(X, y, d)
    loss.backward()
    optimizer.step()
    print(learner.losses(X, y, d))

{'y': 2.2946529388427734, 0: 0.02334335446357727}
{'y': 2.2772059440612793, 0: 1.053621530532837}
{'y': 2.2896292209625244, 0: 0.31895527243614197}
{'y': 2.2663674354553223, 0: 1.4587292671203613}
{'y': 2.19545841217041, 0: 3.093266248703003}
{'y': 2.2725424766540527, 0: 1.4379481077194214}
{'y': 2.1823904514312744, 0: 2.775700807571411}
{'y': 2.0532288551330566, 0: 2.4621403217315674}
{'y': 2.044004440307617, 0: 3.972172737121582}
{'y': 1.9444072246551514, 0: 8.164834976196289}
{'y': 2.2076573371887207, 0: 0.7531702518463135}
{'y': 2.011641263961792, 0: 3.9575748443603516}
{'y': 1.8873004913330078, 0: 2.9986257553100586}
{'y': 1.616126537322998, 0: 7.115194320678711}
{'y': 1.5193337202072144, 0: 16.172590255737305}
{'y': 1.8144150972366333, 0: 8.334641456604004}
{'y': 1.6263090372085571, 0: 11.57399845123291}
{'y': 1.3295502662658691, 0: 15.37466812133789}
{'y': 1.4336886405944824, 0: 20.17281150817871}
{'y': 1.3697203397750854, 0: 13.087739944458008}
{'y': 1.3566240072250366, 0: 14.6

### Multiple Adversares

In [99]:
reload(regularizer)

train_dataset, valid_dataset, test_dataset = prepare_datasets('M0','train', MNISTR, True)
E = MNISTR_Encoder(train_dataset.get('input_shape'))
M = MNISTR_Classifier(train_dataset.get('num_classes'), E.output_shape())
learner = regularizer.Learner(E, M).cuda()

# create learner and add regularizer
discriminator_config = {
    'num_domains': 5, 
    'input_shape': E.output_shape(), 
    'hiddens': [100], 
    'use_softmax': False, 
}
reg = regularizer.MultipleDAN(learner=learner, num_discriminator=10, discriminator_config=discriminator_config, K=10, KL_weight=100).cuda()
reg.set_optimizer(optim.RMSprop(filter(lambda p: p.requires_grad, reg.parameters()), lr=lr, alpha=0.9))
learner.add_regularizer("d", reg, 1.0)

learner.set_loader(train_dataset, batch_size)


<torch.utils.data.dataloader.DataLoader at 0x7f18600fe490>

In [100]:
print(learner.evaluate(data.DataLoader(train_dataset, batch_size=128), 100))

OrderedDict([('y-accuracy', 0.0965), ('y-f1macro', 0.017601459188326493), ('y-loss', 0.7378050613403321), ('d-accuracy', 0.2), ('d-f1macro', 0.06666666666666668), ('d-loss', 0.515129667520523)])


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [102]:
# train
optimizer = optim.RMSprop(learner.parameters(), lr=lr, alpha=0.9)

for i in range(10):
    # update regularizers
    learner.update_regularizers()
    
    # update learner's parameter
    optimizer.zero_grad()
    X, y, d = learner.get_batch()
    loss = learner.loss(X, y, d)
    loss.backward()
    optimizer.step()
print(learner.evaluate(data.DataLoader(train_dataset, batch_size=128), None))

OrderedDict([('y-accuracy', 0.86975), ('y-f1macro', 0.8691818387545611), ('y-loss', 0.44405343011021614), ('d-accuracy', 0.2975), ('d-f1macro', 0.2657326657286933), ('d-loss', 1.5444549545645714)])


### Use pretrained model

In [8]:
class Encoder(nn.Module):
    def __init__(self, input_shape):
        super(Encoder, self).__init__()

        row = input_shape[2]
        self.input_shape = input_shape
        self.latent_row = ((row - 4) - 4) / 2

        self.feature = nn.Sequential()
        self.feature.add_module('f_conv1', nn.Conv2d(input_shape[0], 32, kernel_size=5))
        self.feature.add_module('f_relu1', nn.ReLU(True))
        self.feature.add_module('f_conv2', nn.Conv2d(32, 48, kernel_size=5))
        self.feature.add_module('f_relu2', nn.ReLU(True))
        self.feature.add_module('f_pool2', nn.MaxPool2d(2))
        self.feature.add_module('flatten', Flatten())
        self.feature.add_module('c_fc1', nn.Linear(48*self.latent_row**2, 100))
        self.feature.add_module('c_relu1', nn.ReLU(True))

    def forward(self, input_data):
        feature = self.feature(input_data)
        return feature
    
    def output_shape(self):
        return (None, 100)


class Classifier(nn.Module):
    def __init__(self, num_classes, input_shape):
        super(Classifier, self).__init__()
        self.class_classifier = nn.Sequential()
        self.class_classifier.add_module('c_fc2', nn.Linear(input_shape[1], 100))
        self.class_classifier.add_module('c_relu2', nn.ReLU(True))
        self.class_classifier.add_module('c_fc3', nn.Linear(100, num_classes))
        self.class_classifier.add_module('c_softmax', nn.LogSoftmax(dim=-1))

    def forward(self, input_data):
        return self.class_classifier(input_data)

train_dataset, valid_dataset, test_dataset = prepare_datasets('M0','train', MNISTR, True)
E = Encoder(train_dataset.get('input_shape'))
M = Classifier(train_dataset.get('num_classes'), E.output_shape())
learner = regularizer.Learner(E, M).cuda()

In [9]:
E_path = './../../similarity_confusion_training/pretrain_model/{}-{}-{}-E.pth'.format('mnistr', 'train', 'M0')
M_path = './../../similarity_confusion_training/pretrain_model/{}-{}-{}-M.pth'.format('mnistr', 'train', 'M0')

In [10]:
learner.E.load_state_dict(torch.load(E_path))
learner.M.load_state_dict(torch.load(M_path))

In [11]:
loader = data.DataLoader(test_dataset, batch_size=128, shuffle=False)

In [12]:
loader = data.DataLoader(test_dataset, batch_size=128, shuffle=False)
learner.evaluate(loader)

OrderedDict([('y-accuracy', 0.801),
             ('y-f1macro', 0.8036341464847403),
             ('y-loss', 1.1135996356606483)])

### H Divergence

In [54]:
from regularizer import Discriminator
from pytransfer.datasets.base import Subset
from pytransfer.datasets.utils import get_joint_valid_dataloader

In [128]:
class HDivergence(nn.Module):
    def __init__(self, learner, H_hiddens):
        super(HDivergence, self).__init__()
        self.learner = learner
        H_config = {
            'num_domains': 1, 
            'input_shape': learner.E.output_shape(),
            'use_softmax': False, 
            'hiddens': H_hiddens
        }
        
        self.H = Discriminator(**H_config)
        self.criterion = torch.nn.BCELoss()
        
    def parameters(self):
        return self.H.parameters()
    
    def forward(self, z):
        return nn.functional.sigmoid(self.H(z))
    
    def set_datasets(self, source, target, valid_split=0.0):
        # reduce source dataset
        indices = np.arange(len(source))
        r = np.random.RandomState(1234)
        r.shuffle(indices)
        source = Subset(source, indices[:len(target)])
        
        # prepar validation data
        if valid_split == 0.0:
            source_train, source_test = source, source
            target_train, target_test = target, target
        else:
            num_train = int(len(target) * (1-valid_split))
            indices = np.arange(len(target))
            r = np.random.RandomState(1234)
            r.shuffle(indices)
            self.source_train, self.source_test = Subset(source, indices[:num_train]), Subset(source, indices[num_train:])
            self.target_train, self.target_test = Subset(target, indices[:num_train]), Subset(target, indices[num_train:])
        return self.source_train, self.source_test, self.target_train, self.target_test
        
    def find_sup(self, source, target, batch_size=128, valid_split=0.5, num_iterations=1000, verbose=0):
        criterion = torch.nn.BCELoss()
        optimizer = optim.RMSprop(self.H.parameters(), lr=0.001, alpha=0.9)
        source_train, source_test, target_train, target_test = reg.set_datasets(train_dataset, test_dataset, valid_split=valid_split)
        
        batch_size = batch_size/2
        source_loader = data.DataLoader(source_train, batch_size=batch_size, shuffle=True)
        target_loader = data.DataLoader(target_train, batch_size=batch_size, shuffle=True)

        counter = 0

        while True:
            for (X_s, _, _), (X_t, _, _) in zip(source_loader, target_loader):
                optimizer.zero_grad()
                X_s = Variable(X_s.float()).cuda()
                X_t = Variable(X_t.float()).cuda()
                y_s = Variable(torch.FloatTensor(X_s.size(0), 1).fill_(1), requires_grad=False).cuda()
                y_t = Variable(torch.FloatTensor(X_t.size(0), 1).fill_(0), requires_grad=False).cuda()
                z_s = learner.E(X_s)
                z_t = learner.E(X_t)

                y_s_pred = reg(z_s)
                y_t_pred = reg(z_t)
                s_loss = criterion(y_s_pred, y_s)
                t_loss = criterion(y_t_pred, y_t)
                loss = s_loss + t_loss
                loss.backward()
                optimizer.step()

                counter += 1
                if (verbose > 0) and (counter % 100) == 0:
                    print(reg.evaluate(source_train, target_train), reg.evaluate(source_test, target_test))
            if counter >= num_iterations:
                break;
                
    def evaluate(self, source, target, batch_size=None):
        if batch_size is None:
            batch_size = len(source)
        source_loader = data.DataLoader(source, batch_size=batch_size, shuffle=True)
        target_loader = data.DataLoader(target, batch_size=batch_size, shuffle=True)
        
        h_divergence = 0
        nb_batch = len(source_loader)
        for (X_s, _, _), (X_t, _, _) in zip(source_loader, target_loader):
            X_s = Variable(X_s.float()).cuda()
            X_t = Variable(X_t.float()).cuda()
            h_divergence += self._evaluate(X_s, X_t).data[0]
        result = {}
        result['divergence'] = h_divergence/nb_batch
        return result

    def _evaluate(self, X_s, X_t):
        self.learner.eval()
        self.H.eval()
        Pr_s = self(self.learner.E(X_s))
        Pr_t = self(self.learner.E(X_t))
        self.learner.train()
        self.H.train()
        return 2 * torch.abs(Pr_s.mean() - Pr_t.mean())

In [131]:
reg = HDivergence(learner, [800]).cuda()
reg.find_sup(train_dataset, test_dataset, batch_size=128, valid_split=0.1, num_iterations=5000, verbose=1)

({'divergence': 0.6415772438049316}, {'divergence': 0.5632404088973999})
({'divergence': 1.1329022645950317}, {'divergence': 0.9242223501205444})
({'divergence': 1.177211046218872}, {'divergence': 0.9128605127334595})
({'divergence': 1.34457266330719}, {'divergence': 1.0325337648391724})
({'divergence': 1.452427864074707}, {'divergence': 1.0759499073028564})
({'divergence': 1.5082886219024658}, {'divergence': 1.092562198638916})
({'divergence': 1.570929765701294}, {'divergence': 1.052901268005371})
({'divergence': 1.4367001056671143}, {'divergence': 1.0058636665344238})
({'divergence': 1.615248203277588}, {'divergence': 1.1351488828659058})
({'divergence': 1.424480676651001}, {'divergence': 0.953916072845459})
({'divergence': 1.7244157791137695}, {'divergence': 1.0923317670822144})
({'divergence': 1.6008427143096924}, {'divergence': 1.0103230476379395})
({'divergence': 1.7372565269470215}, {'divergence': 1.1225272417068481})
({'divergence': 1.6920890808105469}, {'divergence': 1.0500255

In [127]:
reg.evaluate(source_test, target_test)

{'divergence': 1.2436727285385132}