In [None]:
import torch.nn as nn
import torch.utils.data
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import numpy as np
from PIL import Image

from tqdm import tqdm

torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)

In [None]:
!pip install -q wandb
import wandb
wandb.login()

config = {
    "dataset": "celeba",
    "gpu": "colab",
    "model": "GAN",
}

wandb.init(project="assignment_2", config=config)
# api key 3a629afb6d101b0cc3a0123089694d3b03f196e3

[34m[1mwandb[0m: Currently logged in as: [33mvinnyshin[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
from zipfile import ZipFile
zip_path = '/content/drive/MyDrive/Univ/4-1/Deep Learning/Assignment/assignment_2/assignment_2_student-1/training_data/faces.zip'
faces_zip = ZipFile(zip_path, 'r')
faces_name_list = faces_zip.namelist()

faces_PIL_img = []

In [None]:
# 50초 걸림
for i in range(len(faces_name_list)):
  if i == 0: continue # faces_name_list[0] == celebe/
  _file = faces_zip.open(faces_name_list[i])
  img = Image.open(_file)
  faces_PIL_img.append(img)

In [None]:
class CustomDataset(torch.utils.data.Dataset):
  def __init__(self, imgs, transforms=None):
      super().__init__()
      self.imgs = imgs
      self.transforms = transforms

  def __len__(self):
      return len(self.imgs)
  
  def __getitem__(self, idx):
      image = self.imgs[idx]

      if self.transforms:
        image = self.transforms(image)

      return image

In [None]:
# transform = transforms.Compose([
#   transforms.ToTensor()
# ])

In [None]:
# np.array(transform(faces_PIL_img[0]))

In [None]:
# train_meanRGB = [np.mean(np.array(transform(x)), axis=(1,2)) for x in faces_PIL_img]
# train_stdRGB = [np.std(np.array(transform(x)), axis=(1,2)) for x in faces_PIL_img]

# train_meanR = np.mean([m[0] for m in train_meanRGB])
# train_meanG = np.mean([m[1] for m in train_meanRGB])
# train_meanB = np.mean([m[2] for m in train_meanRGB])

# train_stdR = np.mean([s[0] for s in train_stdRGB])
# train_stdG = np.mean([s[1] for s in train_stdRGB])
# train_stdB = np.mean([s[2] for s in train_stdRGB])

In [None]:
# train_meanRGB

In [None]:
# print(train_meanR)
# print(train_meanG)
# print(train_meanB)
# print(train_stdR)
# print(train_stdG)
# print(train_stdB)

In [None]:
transform = transforms.Compose([
  transforms.ToTensor(),
  transforms.Normalize([0.5079152,0.42205644,0.37666804],[0.25580716, 0.23393774, 0.23002408])
])

dataset = CustomDataset(faces_PIL_img, transforms=transform)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

lr = 0.0002
batch_size = 128

train_dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
class Generator(nn.Module):
    def __init__(self, ):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.ConvTranspose2d(128, 64 * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(64 * 8),
            nn.ReLU(),
            nn.ConvTranspose2d(64 * 8, 64 * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 4),
            nn.ReLU(),
            nn.ConvTranspose2d(64 * 4, 64 * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 2),
            nn.ReLU(),
            nn.ConvTranspose2d(64 * 2, 64, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 3, 4, 2, 1, bias=False),
            nn.Tanh()
        )

    def forward(self, input):
        # input data는 [batch size, 128, 1, 1]의 형태로 주어야합니다.
        return self.main(input)

In [None]:
class Discriminator(nn.Module):
    def __init__(self):
      super(Discriminator, self).__init__()
      
      self.dropout = 0.3
      self.image_size = 64
      
      self.layer1 = nn.Sequential(
          nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5),
          nn.BatchNorm2d(6),
          nn.LeakyReLU(0.2, inplace=True),
          nn.MaxPool2d(2),
          nn.Dropout(self.dropout)
      )

      self.image_size = int(((self.image_size - 5) + 1) / 2)

      self.layer2 = nn.Sequential(
          nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
          nn.BatchNorm2d(16),
          nn.LeakyReLU(0.2, inplace=True),
          nn.MaxPool2d(2),
          nn.Dropout(self.dropout)
      )
      
      self.image_size = int(((self.image_size - 5) + 1) / 2)

      self.layer3 = nn.Sequential(
          nn.Linear(in_features= 16 * self.image_size * self.image_size, out_features=1024, bias=True),
          nn.BatchNorm1d(1024),
          nn.LeakyReLU(0.2, inplace=True),
          nn.Dropout(self.dropout)
      )

      self.layer4 = nn.Sequential(
          nn.Linear(in_features=1024, out_features=512, bias=True),
          nn.BatchNorm1d(512),
          nn.LeakyReLU(0.2, inplace=True),
          nn.Dropout(self.dropout)
      )

      self.layer5 = nn.Sequential(
          nn.Linear(in_features=512, out_features=256, bias=True),
          nn.BatchNorm1d(256),
          nn.LeakyReLU(0.2, inplace=True),
          nn.Dropout(self.dropout)
      )

      self.layer6 = nn.Sequential(
          nn.Linear(in_features= 256, out_features=1, bias=True),
          nn.Sigmoid(),
      )
      
      
      self.sigmoid = nn.Sigmoid()
    
    def forward(self, input):
      input = self.layer1(input)
      input = self.layer2(input)
      input = input.view(-1, 16 * self.image_size * self.image_size)
      input = self.layer3(input)
      input = self.layer4(input)
      input = self.layer5(input)
      input = self.layer6(input)
      return input

In [None]:
generator = Generator().to(device)
discriminator = Discriminator().to(device)

In [None]:
criterion = torch.nn.BCELoss()
g_optimizer = torch.optim.Adam(generator.parameters(), lr=lr)
d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=lr)

In [None]:
import time
import PIL
import matplotlib.pyplot as plt

epochs = 200
total_batch_num = len(train_dataloader)

for epoch in range(epochs):
  start = time.time()
  
  generator.train()
  discriminator.train()

  avg_g_cost = 0
  avg_d_cost = 0

  for step, batch in enumerate(train_dataloader):
    b_x = batch.to(device)
    # b_x = b_x.view(-1, 4096).to(device)

    num_img = len(b_x)

    real_label = torch.ones((num_img, 1)).to(device)
    fake_label = torch.zeros((num_img, 1)).to(device)

    real_logit = discriminator(b_x)
    d_real_loss = criterion(real_logit, real_label)
    # Discriminator에게는 실제 사진에 대해서는 1에 근사하도록 Weight를 학습

    # randn은 정규분포로 뽑아줘, uniform distribution은 rand함수
    z = torch.randn((num_img, 128, 1, 1), requires_grad=False).to(device)
    fake_data = generator(z)

    fake_logit = discriminator(fake_data)
    d_fake_loss = criterion(fake_logit, fake_label)
    # 가짜 사진에 대해서는 0에 근사하도록 Weight를 학습

    d_loss = d_real_loss + d_fake_loss
    d_optimizer.zero_grad()
    d_loss.backward()
    d_optimizer.step()

    z = torch.randn((num_img, 128, 1, 1), requires_grad=False).to(device)
    fake_data = generator(z)
    fake_logit = discriminator(fake_data)
    g_loss = criterion(fake_logit, real_label)
    # Generator에게는 가짜 사진에 대해서 1에 근사하도록 Weight를 학습

    g_optimizer.zero_grad()
    g_loss.backward()
    g_optimizer.step()

    avg_d_cost += d_loss
    avg_g_cost += g_loss

  avg_d_cost /= total_batch_num
  avg_g_cost /= total_batch_num

  # observe fake images
  generator.eval()
  with torch.no_grad():
    z = torch.randn((64, 128, 1, 1), requires_grad=False).to(device)
    fake_data = generator(z)
    
    # fake_img = fake_data.detach().cpu().numpy().reshape(64, 3, 64, 64)
    fake_img = fake_data.detach().cpu()

    transform = transforms.ToPILImage()

    wandb.log({
        "discriminator loss": avg_d_cost,
        "generator loss": avg_g_cost,
        "fake image": [wandb.Image(transform(i)) for i in fake_img]
    })
  
  print("time :", time.time() - start)
  print(f'Epoch: {epoch} \t discriminator loss: {avg_d_cost} \t generator loss: {avg_g_cost}')
    

time : 182.22099804878235
Epoch: 0 	 discriminator loss: 1.3772577047348022 	 generator loss: 0.7190449237823486
time : 180.89605855941772
Epoch: 1 	 discriminator loss: 1.313231110572815 	 generator loss: 0.7798025012016296
time : 180.9370458126068
Epoch: 2 	 discriminator loss: 0.7756184339523315 	 generator loss: 1.7067111730575562
time : 180.3532636165619
Epoch: 3 	 discriminator loss: 1.0931658744812012 	 generator loss: 1.3530759811401367
time : 180.7745921611786
Epoch: 4 	 discriminator loss: 1.2958927154541016 	 generator loss: 0.9882283210754395
time : 180.66463351249695
Epoch: 5 	 discriminator loss: 1.2792223691940308 	 generator loss: 0.9575323462486267
time : 180.68183183670044
Epoch: 6 	 discriminator loss: 1.315573811531067 	 generator loss: 0.8702454566955566
time : 180.79141998291016
Epoch: 7 	 discriminator loss: 1.3069047927856445 	 generator loss: 0.8807793855667114
time : 180.47492599487305
Epoch: 8 	 discriminator loss: 1.3038314580917358 	 generator loss: 0.85943

In [None]:
# FID score 측정에 사용할 fake 이미지를 생성하는 코드 입니다.
# generator의 학습을 완료한 뒤 마지막에 실행하여 fake 이미지를 저장하시기 바랍니다.
test_noise = torch.randn(3000, 100, 1, 1, device=device)
with torch.no_grad():
    test_fake = generator(test_noise).detach().cpu()

    for index, img in enumerate(test_fake):
        fake = np.transpose(img.detach().cpu().numpy(), [1, 2, 0])
        fake = (fake * 127.5 + 127.5).astype(np.uint8)
        im = Image.fromarray(fake)
        im.save("./fake_img/fake_sample{}.jpeg".format(index))

In [None]:
import os
import torch

from pytorch_fid.fid_score import *

os.environ['KMP_DUPLICATE_LIB_OK']='True'

real_img_path = 'training_data/celeba/'
fake_img_path = 'fake_img/'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)

if __name__ == "__main__":
    fid = calculate_fid_given_paths(
        paths=[real_img_path, fake_img_path],
        batch_size=128,
        device=device,
        dims=2048
    )

    print("fid score : {}".format(fid))
