In [1]:
from py.Training import training_and_testing, loss_function
from py.vae import NeuralNet as nnet
import py.config as CFG

In [2]:
import os
import glob
from tqdm import tqdm

import torch
import torchvision
import torchvision.transforms.functional as TF
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torchinfo import summary

from matplotlib import pyplot as plt
plt.rc('font', family='NanumGothic')
from PIL import Image

In [3]:
def rotate_images(image):
    # 이미지를 0도, 90도, 180도, 270도로 회전시키는 함수
    images = []
    for angle in [0, 90, 180, 270]:
        rotated_image = TF.rotate(image, angle)
        images.append(rotated_image)
    return images

def gauss_noise(image_tensor, sigma=0.05):
    # 이미지에 가우시안 노이즈를 추가하는 함수
    noise = torch.randn(image_tensor.size()) * sigma
    noisy_image = image_tensor + noise
    noisy_image = torch.clamp(noisy_image, 0, 1)
    return noisy_image


class VAECustomDataset(Dataset):
    def __init__(self, file_paths, transform=None, gauss_sigma=0.05):
        # 데이터셋 초기화
        self.file_paths = file_paths
        self.transform = transform
        self.gauss_sigma = gauss_sigma

    def __len__(self):
        # 데이터셋의 길이를 반환
        return len(self.file_paths)

    def __getitem__(self, idx):
        # 주어진 인덱스에 해당하는 데이터를 반환
        image = Image.open(self.file_paths[idx])
        if self.transform:
            # 변환이 주어진 경우
            original_image = self.transform(image)  # 원본 이미지 변환
            noisy_image = gauss_noise(original_image, self.gauss_sigma)  # 노이즈 추가 이미지 생성
            images = rotate_images(image)  # 회전 이미지 생성
            transformed_images = [self.transform(img) for img in images]  # 회전 이미지를 변환
            noisy_images = [gauss_noise(img, self.gauss_sigma) for img in transformed_images]  # 회전된 이미지에 노이즈 추가
            return original_image, noisy_image, transformed_images, noisy_images
        else:
            # 변환이 주어지지 않은 경우 원본 이미지 반환
            return image


In [4]:
trainset = torchvision.datasets.MNIST(root=CFG.pre_data_dir, train=True, download=True,
                                        transform=CFG.transform_pre)
train_loader = DataLoader(dataset=trainset,batch_size=CFG.batch_size, shuffle=True,
                          num_workers=2)

In [5]:
# 데이터 폴더 경로 설정
folder = CFG.normal_root_dir
file_path = glob.glob(os.path.join(folder, "*.jpg"))

In [6]:
train_size = int(0.8 * len(file_path))
test_size = len(file_path) - train_size

train_paths, test_paths = random_split(file_path, [train_size, test_size])

train_dataset = VAECustomDataset(file_paths=train_paths, transform=CFG.transform)
test_dataset = VAECustomDataset(file_paths=test_paths, transform=CFG.transform)

In [7]:
# using linear layer : The number of parameters: 968903145.0000
# only conv layer : The number of parameters: 195025.0000
neuralnet = nnet(height=CFG.height, width=CFG.width, channel=CFG.channel,
                 device=CFG.device, ngpu=CFG.ngpu)

The number of parameters: 25337.0000


In [8]:
summary(neuralnet.encoder, input_size=(32, 1, 480, 480), device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
Encoder                                  [32, 16, 5, 5]            --
├─Sequential: 1-1                        [32, 16, 5, 5]            --
│    └─Conv2d: 2-1                       [32, 4, 480, 480]         40
│    └─ELU: 2-2                          [32, 4, 480, 480]         --
│    └─Conv2d: 2-3                       [32, 4, 480, 480]         148
│    └─ELU: 2-4                          [32, 4, 480, 480]         --
│    └─MaxPool2d: 2-5                    [32, 4, 240, 240]         --
│    └─Conv2d: 2-6                       [32, 8, 240, 240]         296
│    └─ELU: 2-7                          [32, 8, 240, 240]         --
│    └─Conv2d: 2-8                       [32, 8, 240, 240]         584
│    └─ELU: 2-9                          [32, 8, 240, 240]         --
│    └─MaxPool2d: 2-10                   [32, 8, 60, 60]           --
│    └─Conv2d: 2-11                      [32, 16, 60, 60]          1,168
│    └─EL

In [10]:
summary(neuralnet.decoder, input_size=(32, 16, 5, 5), device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
Decoder                                  [32, 1, 480, 480]         --
├─Sequential: 1-1                        [32, 16, 5, 5]            --
│    └─Conv2d: 2-1                       [32, 16, 5, 5]            2,320
│    └─ELU: 2-2                          [32, 16, 5, 5]            --
├─Sequential: 1-2                        --                        --
│    └─Conv2d: 2-3                       [32, 16, 5, 5]            2,320
│    └─ELU: 2-4                          [32, 16, 5, 5]            --
│    └─Conv2d: 2-5                       [32, 16, 5, 5]            2,320
│    └─ELU: 2-6                          [32, 16, 5, 5]            --
│    └─Upsample: 2-7                     [32, 16, 15, 15]          --
│    └─Conv2d: 2-8                       [32, 16, 15, 15]          2,320
│    └─ELU: 2-9                          [32, 16, 15, 15]          --
│    └─Conv2d: 2-10                      [32, 16, 15, 15]          2,320


In [10]:
def pretrain(neuralnet, train_loader, epochs):
    neuralnet.train(mode=True)
    for epoch in tqdm(range(epochs)):
        for i, (inputs, _) in enumerate(train_loader):
            input = inputs.to(device=CFG.device)
            enc, mu, sigma = neuralnet.encoder(input)
            x_hat = neuralnet.decoder(enc)
            loss = loss_function(
                x=input, x_hat=x_hat, mu=mu, sigma=sigma
            )
            neuralnet.optimizer.zero_grad()
            loss.backward()
            neuralnet.optimizer.step()
            
            if i % 10 == 0:
                print(f'Epoch : [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
    torch.save(neuralnet.state_dict(), 'vae_cifar10_pretrained.pth')

In [11]:
pretrain(neuralnet=neuralnet, train_loader=train_loader, epochs=CFG.epochs)

  0%|          | 0/100 [00:03<?, ?it/s]


KeyboardInterrupt: 

In [None]:
training_and_testing(neuralnet=neuralnet ,train_dataset=train_dataset, test_dataset= test_dataset,
                      epochs=CFG.epochs, batch_size=CFG.batch_size)