# Maximum Likelihood Density Ratio Estimation for MI

paper: http://proceedings.mlr.press/v4/suzuki08a/suzuki08a.pdf

In [1]:
import os 
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import sys
import numpy as np
import pycuda.driver as cuda
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from models import DNN, CNN

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
use_cuda = torch.cuda.is_available()
print("Setting Info")
print("=========")
print("- use_cuda: ", use_cuda)
print("- Path: ", os.getcwd())
print("- PyTorch", torch.__version__)
print("- Python: ", sys.version)

Setting Info
- use_cuda:  True
- Path:  /home/uchiumi/JNNS2019/mnist_pytorch
- PyTorch 1.0.1.post2
- Python:  3.5.2 (default, Nov 12 2018, 13:43:14) 
[GCC 5.4.0 20160609]


## Load Model

In [3]:
# model reload
model = DNN()
PRETRAINED_MODEL_PATH = "/home/uchiumi/JNNS2019/mnist_pytorch/train_log/dnn_mnist__2019-0425-1923.pth"
model.load_state_dict(torch.load(PRETRAINED_MODEL_PATH))

## Data Loader

cf.) https://www.aiworkbox.com/lessons/examine-mnist-dataset-from-pytorch-torchvision

In [4]:
mnist_trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=None)
mnist_testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=None)

In [5]:
X_train_0 = np.asarray(mnist_trainset[0][0]) # image
y_train_0 = mnist_trainset[0][1] # label

In [6]:
# train
train_loader_for_MINE = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('./data', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=1, shuffle=False)

In [7]:
# test
test_loader_for_MINE = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('./data', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=1, shuffle=False)

## Get layer values (the state of each nodes)

In [8]:
def get_nodes_with_train_data(model):
    model.eval()
    list = []
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(train_loader_for_MINE):
            result = model(data)
            list.append(result)
    return list

In [9]:
def get_nodes_with_test_data(model):
    model.eval()
    list = []
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader_for_MINE):            
            result = model(data)
            list.append(result)
    return list

In [10]:
list = get_nodes_with_train_data(model)

  x = F.log_softmax(self.fc5(x))


In [11]:
len(list)

60000

In [12]:
list[0].keys()

dict_keys(['fc3_output', 'model_input', 'fc1_output', 'model_output', 'fc2_output', 'fc4_output'])

In [13]:
print("model_input", list[0]["model_input"].shape)
print("fc1_output",list[0]["fc1_output"].shape)
print("fc2_output",list[0]["fc2_output"].shape)
print("fc3_output",list[0]["fc3_output"].shape)
print("fc4_output",list[0]["fc4_output"].shape)
print("model_output",list[0]["model_output"].shape)

model_input torch.Size([1, 784])
fc1_output torch.Size([1, 1024])
fc2_output torch.Size([1, 512])
fc3_output torch.Size([1, 256])
fc4_output torch.Size([1, 128])
model_output torch.Size([1, 10])


In [14]:
model_input = []
fc1_output = []
fc2_output = []
fc3_output = []
fc4_output = []
model_output = []

for i in range(len(train_loader_for_MINE)):
    model_input.append(list[i]["model_input"].data.numpy().flatten())
    fc1_output.append(list[i]["fc1_output"].data.numpy().flatten())
    fc2_output.append(list[i]["fc2_output"].data.numpy().flatten())
    fc3_output.append(list[i]["fc3_output"].data.numpy().flatten())
    fc4_output.append(list[i]["fc4_output"].data.numpy().flatten())
    model_output.append(list[i]["model_output"].data.numpy().flatten())

In [15]:
model_input = np.array(model_input)
fc1_output = np.array(fc1_output)
fc2_output = np.array(fc1_output)
fc3_output = np.array(fc1_output)
fc4_output = np.array(fc1_output)
model_output = np.array(model_output)

## Maximum Likelihood Density Ratio Estimation for MI

In [16]:
x = model_input
y = fc3_output
z = np.concatenate([x, y], axis=1)

In [17]:
print(x.shape)
print(y.shape)
print(z.shape)

(60000, 784)
(60000, 1024)
(60000, 1808)


$$
    \underset{\alpha \in \mathbb{R}^{b}}{\rm maximize} \, \sum_{i=1}^{n} \log \left( {\alpha}^{\mathrm{T}} \phi(z_i) \right) \\
    s.t. \, \frac{1}{n(n-1)} \sum_{(i, j)} {\alpha}^{\mathrm{T}} \phi(z_i) = 1, \, \alpha \geq 0
$$

$$
    \alpha \in \mathbb{R}^{b}, \, \phi() \in \mathbb{R}^{b}
$$

$$
    \phi(z_i) = \left( \begin{array}{c} \phi_1(z_i) \\ \phi_2(z_i) \\ \vdots \\ \phi_b(z_i) \end{array} \right)
    = \left( \begin{array}{c} k(z_i, c_1) \\ k(z_i, c_2) \\ \vdots \\ k(z_i, c_b) \end{array} \right)
    = \left( \begin{array}{c} \exp\left( - \frac{{|| z_i - c_1||}^{2}}{2 \sigma^2} \right) \\ \exp\left( - \frac{{|| z_i - c_2||}^{2}}{2 \sigma^2} \right) \\ \vdots \\ \exp\left( - \frac{{|| z_i - c_b||}^{2}}{2 \sigma^2} \right) \end{array} \right)
$$


$$
    \frac{\partial L(\alpha)}{\partial \alpha} = \left( \begin{array}{c} \sum_{i=1}^{n} \frac{\phi_{1}(z_i)}{\alpha} \\ \end{array} \left)
$$

#### Kernel function

In [18]:
class RBFkernel():
    def __init__(self, sigma=0.5):
        self.sigma = sigma
        
    def __call__(self, x, y):
        numerator = -1 * np.sum((x - y)**2)
        denominator = 2 * (self.sigma**2)
        return np.exp(numerator / denominator)
    
    def get_params(self):
        return self.sigma
    
    def set_params(self, sigma):
        self.sigma = sigma

#### Density Ratio Estimation

In [None]:
class DensityRatioEstimation():
    def __init__(self, kernel):
        self.kernel = kernel
    
    def fit(self, x, y):
        self.x = x
        self.y = y 
        self.z = np.concatenate([x, y], axis=1)
        self.n = x.shape[0]
        
    def loss(self, alpha, n):
        for i in range(n):
            np.dot(alpha, x)

In [None]:
class KernelRegression():
    def __init__(self, kernel):
        self.kernel = kernel
        
    def fit_kernel(self, X, y, lr=0.01, nb_epoch=1000, log_freq=50):
        self.X = X
        self.y = y
        self.n = X.shape[0] # sample size
        self.alpha = np.full(self.n, 1) # param alpha: initialize
        self.gram_matrix = np.zeros((self.n, self.n))
        
        # Gradient Descent Algorithm to optimize alpha
        for epoch in range(nb_epoch):
            
            # Gram Matrix
            for i in range(self.n):
                for j in range(self.n):
                    self.gram_matrix[i][j] = self.kernel(self.X[i], self.X[j])
                    self.loss, self.loss_grad = self.mse(self.X, self.y, self.alpha, self.gram_matrix)
                    self.alpha = self.alpha - lr * self.loss_grad
                    
            if epoch % log_freq == 0:
                print("epoch: {} \t MSE of sample data: {:.4f}".format(epoch, self.loss))
                        
                        
    def mse(self, X, y, alpha, gram_matrix):
        loss = np.dot((y - np.dot(gram_matrix, alpha)), (y - np.dot(gram_matrix, alpha)))
        loss_grad = -2 * np.dot(gram_matrix.T, (y - np.dot(gram_matrix, alpha)))
        return loss, loss_grad
    
    def predict(self, X_new):
        n_new = X_new.shape[0]
        y_new = np.zeros(n_new)
        for i in range(n_new):
            for j in range(self.n):
                y_new[i] += self.alpha[j] * self.kernel(X_new[i], self.X[j])
        return y_new