# KL-DIVERGENCE
- $KL-DIV(P||Q) = E[\frac{logP(x)}{logQ(x)}] = \sum_{x}P(x)log\frac{P(x)}{Q(x)}$

In [1]:
from scipy.special import kl_div
import numpy as np

# 두 이산 확률 분포 (길이 같고 합이 1)
P = np.array([0.1, 0.4, 0.3, 0.2])
Q = np.array([0.3, 0.3, 0.1, 0.3])
kl_divergence = kl_div(P,Q) # np.sum(np.log(P/Q)*P)
print(np.sum(kl_divergence))
print(np.sum(np.log(P/Q)*P))

0.25370226509270144
0.2537022650927015


# Jensen Shannon Divergence

- $ JSD(P||Q) = \frac{1}{2} KL(P||M) + \frac{1}{2}KL(Q||M) $


- $M = \frac{1}{2}(P+Q)$

In [2]:
from scipy.spatial.distance import jensenshannon
jsd = jensenshannon(P, Q, base=2) ** 2  # scipy는 sqrt(JSD)를 반환하므로 제곱
print("Jensen-Shannon Divergence:", jsd)
M = (P+Q)/2
np.sum(np.log2(P/M)*P)*0.5 + np.sum(np.log2(Q/M)*Q)*0.5

Jensen-Shannon Divergence: 0.0879212539906917


0.08792125399069166

# Total Variation 전변동
- $sup|P_{r}(A) - P_{g}(A)|$
=> Supremum (두 확률 분포의 측정값이 벌어질 수 있는 가장 큰 값 = 사이의 최대거리)

In [3]:
import os
import numpy as np
import math
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch

In [4]:
epochs, batch_size = 100,64
lr,b1,b2 = 2e-4,0.5,0.999
latent_dim = 100
img_size = 28
channels = 1
img_shape = (channels,img_size,img_size)

if torch.cuda.is_available():
    print("Train on GPU Cuda is Available")
    cuda = True
else:
    print("Train on the CPU, CUDA IS NOT Available")
    cuda = False

Train on GPU Cuda is Available


In [5]:
os.makedirs("FashionMNIST_DATASET",exist_ok=True)

dataloader = DataLoader(
    datasets.FashionMNIST(
        'FashionMNIST_DATASET',
        train=True,
        download=True,
        transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.ToTensor(),
            transforms.Normalize([0.5],[0.5])
        ])
    ),
    batch_size=batch_size,
    shuffle=True
)

3584x28 and 100x128

In [6]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator,self).__init__()
    
        def block(in_feature, out_feature, normalize=True):
            layers = [nn.Linear(in_feature,out_feature)]
            if normalize:
                layers.append(nn.BatchNorm1d(out_feature,0.8))
            layers.append(nn.LeakyReLU(0.2,inplace=True))
            return layers
        
        self.model = nn.Sequential(
            *block(latent_dim,128,normalize=False),
            *block(128,256),
            *block(256,512),
            *block(512,1024),
            nn.Linear(1024,int(np.prod(img_shape))),
            nn.Tanh() # -1 ~ 1 (CLIPPING)
        )
    def forward(self,z):
        img = self.model(z)
        img = img.view(img.shape[0],*img_shape)
        return img
    
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator,self).__init__()

        self.model = nn.Sequential(
            nn.Linear(int(np.prod(img_shape)),512),
            nn.LeakyReLU(0.2,inplace=True),
            nn.Linear(512,256),
            nn.LeakyReLU(0.2,inplace=True),
            nn.Linear(256,1),
            nn.Sigmoid(),
        )
    
    def forward(self,img):
        flat_img = img.view(img.size(0),-1)
        pred = self.model(flat_img)
        return pred

In [None]:
G = Generator()
D = Discriminator()
adversarial_loss = torch.nn.BCELoss()

if cuda:
    G.cuda()
    D.cuda()
    adversarial_loss.cuda()

optimizer_G = torch.optim.Adam(G.parameters(),lr=lr,betas=(b1,b2))
optimizer_D = torch.optim.Adam(D.parameters(),lr=lr,betas=(b1,b2))
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
os.makedirs("GAN_RESULTS",exist_ok=True)

for epoch in range(epochs):
    for i, (imgs,_) in enumerate(dataloader):
        real = Variable(Tensor(imgs.size(0),1).fill_(1.0),requires_grad=False)
        fake = Variable(Tensor(imgs.size(0),1).fill_(0.0),requires_grad=False)
 
        real_imgs = Variable(imgs.type(Tensor))
        
        ## Train G
        optimizer_G.zero_grad()
        z = Variable(Tensor(np.random.normal(0,1, (imgs.shape[0],latent_dim))))
        gen_imgs = G(z)
        g_loss = adversarial_loss(D(gen_imgs),real)
        g_loss.backward()
        optimizer_G.step()
        
        ## Train D
        optimizer_D.zero_grad()
        real_loss = adversarial_loss(D(real_imgs),real)
        fake_loss = adversarial_loss(D(gen_imgs.detach()),fake)
        d_loss = (real_loss + fake_loss) / 2
        d_loss.backward()
        optimizer_D.step()
        
    print(f"EPOCH [{epoch+1}/{epochs}] [D loss: {d_loss.item()}] [G loss: {g_loss.item()}]")
    save_image(gen_imgs.data[:25], f"GAN_RESULTS/epoch_{epoch}.png",nrow=5,normalize=True)
torch.save(G.state_dict(),'./Generator.pth')
torch.save(D.state_dict(),'./Discriminator.pth')

  real = Variable(Tensor(imgs.size(0),1).fill_(1.0),requires_grad=False)


tensor([[[[-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          ...,
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000]]],


        [[[-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          ...,
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000]]],


        [[[-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.000

KeyboardInterrupt: 