#### TODO list:
  - test and pick the best loss function
  - data loader and transforms
  - test and run the training code
    - text output
    - img output
    - save models
  - convert to .py file for easy future running (tuning)

In [60]:
from model import Generator, Discriminator
from loss import SRGAN_Loss
from utils import TrainDatasetFromFolder, ValDatasetFromFolder

ImportError: cannot import name 'ValDatasetFromFolder' from 'utils' (/Users/wang/Documents/Duke/Learning/ECE.590D.DeepLearning/ece590D/project/gan/srgan/utils.py)

In [9]:
import os
import pandas as pd
import torch.optim as optim
import torch.utils.data
import torchvision.utils as utils
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

In [3]:
### global parameters
SCALE_FACTOR = 4
HR_DIR = "../data/train_images_hr/"
LR_DIR = "../data/train_images_lr/"
RESULTS_DIR = "results/" + "SR" + str(SCALE_FACTOR) + "/"

# network parameters
CONTENT_LOSS = "vgg"  # try "both" in future test
ADVERSARIAL_LOSS = "bce"
TV_LOSS_ON = False

# training parameters
BATCH_SIZE = 32
NUM_EPOCHS = 10
NUM_WORKERS = 0  # workers for loading data

In [4]:
train_dataset = DatasetFromFolder(hr_dir=HR_DIR, lr_dir=LR_DIR)
train_loader = DataLoader(dataset=train_dataset, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, shuffle=True)

netG = Generator(scale_factor=SCALE_FACTOR)
print('# generator parameters:', sum(param.numel() for param in netG.parameters()))
netD = Discriminator()
print('# discriminator parameters:', sum(param.numel() for param in netD.parameters()))

generator_criterion = SRGAN_Loss(content_loss=CONTENT_LOSS, 
                                 adversarial_loss=ADVERSARIAL_LOSS, 
                                 tv_loss_on=TV_LOSS_ON)

if torch.cuda.is_available():
    netG.cuda()
    netD.cuda()
    generator_criterion.cuda()
    
optimizerG = optim.Adam(netG.parameters())
optimizerD = optim.Adam(netD.parameters())

results = {'d_loss': [], 'g_loss': [], 'd_score': [], 'g_score': [], 'psnr': [], 'ssim': []}
if not os.path.isdir(RESULTS_DIR):
    os.mkdir(RESULTS_DIR)
out_path_val = RESULTS_DIR + "val_predict/"
if not os.path.isdir(out_path_val):
    os.mkdir(out_path_val)
out_path_net = RESULTS_DIR + "net_weights/"
if not os.path.isdir(out_path_net):
    os.mkdir(out_path_net)

for epoch in range(1, NUM_EPOCHS + 1):
    train_bar = tqdm(train_loader)
    running_results = {'batch_sizes': 0, 'd_loss': 0, 'g_loss': 0, 'd_score': 0, 'g_score': 0}

    netG.train()
    netD.train()
    
    ### training
    for lr_img, hr_img in train_bar:
        g_update_first = True
        batch_size = lr_img.size(0)
        running_results['batch_sizes'] += batch_size
        
        ############################
        # (1) Update D network: maximize D(x)-1-D(G(z))
        ###########################
        real_img = Variable(hr_img)
        if torch.cuda.is_available():
            real_img = real_img.cuda()
        z = Variable(lr_img)
        if torch.cuda.is_available():
            z = z.cuda()
        fake_img = netG(z)

        netD.zero_grad()
        real_out = netD(real_img)
        fake_out = netD(fake_img)
        d_loss = 1 - real_out.mean() + fake_out.mean()  # L1 loss
        #  could also try a BCELoss: log(1-real_out)+log(fake_out)
        d_loss.backward(retain_graph=True)
        optimizerD.step()
        
        ############################
        # (2) Update G network: minimize 1-D(G(z)) + Perception Loss + Image Loss + TV Loss
        ###########################
        netG.zero_grad()
        g_loss = generator_criterion(fake_out, fake_img, real_img)
        g_loss.backward()

        fake_img = netG(z)
        fake_out = netD(fake_img).mean()

        optimizerG.step()

        # loss for current batch before optimization 
        running_results['g_loss'] += g_loss.item() * batch_size
        running_results['d_loss'] += d_loss.item() * batch_size
        running_results['d_score'] += real_out.sum().item()
        running_results['g_score'] += fake_out.sum().item()

        train_bar.set_description(desc='[%d/%d] Loss_D: %.4f Loss_G: %.4f D(hr): %.4f D(G(lr)): %.4f' % (
            epoch, NUM_EPOCHS, running_results['d_loss'] / running_results['batch_sizes'],
            running_results['g_loss'] / running_results['batch_sizes'],
            running_results['d_score'] / running_results['batch_sizes'],
            running_results['g_score'] / running_results['batch_sizes']))
        pass
    
    ### evaluating
    netG.eval()
    out_path_val_epoch = out_path_val + "epoch_%d/" % epoch
    if not os.path.isdir(out_path_val_epoch):
        os.mkdir(out_path_val_epoch)
    with torch.no_grad():
        valing_results = {'mse': 0, 'ssims': 0, 'psnr': 0, 'ssim': 0, 'batch_sizes': 0}
    
    ### save model parameters
    torch.save(netG.state_dict(), out_path_net + "netG_epoch_%d.pth" % epoch)
    torch.save(netD.state_dict(), out_path_net + "netD_epoch_%d.pth" % epoch)
    
    ### save loss\scores\psnr\ssim
    results['d_loss'].append(running_results['d_loss'] / running_results['batch_sizes'])
    results['g_loss'].append(running_results['g_loss'] / running_results['batch_sizes'])
    results['d_score'].append(running_results['d_score'] / running_results['batch_sizes'])
    results['g_score'].append(running_results['g_score'] / running_results['batch_sizes'])
    valing_results['psnr'] = valing_results['ssim'] = 0
    results['psnr'].append(valing_results['psnr'])
    results['ssim'].append(valing_results['ssim'])

    if epoch % 1 == 0 and epoch != 0:
        data_frame = pd.DataFrame(
            data={'Loss_D': results['d_loss'], 'Loss_G': results['g_loss'], 'Score_D': results['d_score'],
                  'Score_G': results['g_score'], 'PSNR': results['psnr'], 'SSIM': results['ssim']},
            index=range(1, epoch + 1))
        data_frame.to_csv(RESULTS_DIR + 'train_stats.csv', index_label='Epoch')

# generator parameters: 734219
# discriminator parameters: 138908865


[1/20] Loss_D: 1.0335 Loss_G: 0.0249 D(hr): 0.0344 D(G(lr)): 0.0101: 100%|██████████| 19/19 [2:17:36<00:00, 434.58s/it]


FileNotFoundError: [Errno 2] No such file or directory: 'results/SR_4/net_weights/netG_epoch_1.pth'

In [None]:
    ### Evaluating
    with torch.no_grad():
        val_bar = tqdm(val_loader)
        valing_results = {'mse': 0, 'ssims': 0, 'psnr': 0, 'ssim': 0, 'batch_sizes': 0}
        val_images = []
        for val_lr, val_hr, val_name in val_bar:
            batch_size = val_lr.size(0)
            valing_results['batch_sizes'] += batch_size
            lr = val_lr
            hr = val_hr
            if torch.cuda.is_available():
                lr = lr.cuda()
                hr = hr.cuda()
            sr = netG(lr)

            batch_mse = ((sr - hr) ** 2).data.mean()
            valing_results['mse'] += batch_mse * batch_size
            batch_ssim = pytorch_ssim.ssim(sr, hr).item()
            valing_results['ssims'] += batch_ssim * batch_size
            valing_results['psnr'] = 10 * log10(1 / (valing_results['mse'] / valing_results['batch_sizes']))
            valing_results['ssim'] = valing_results['ssims'] / valing_results['batch_sizes']
            val_bar.set_description(
                desc='[converting LR images to SR images] PSNR: %.4f dB SSIM: %.4f' % (
                    valing_results['psnr'], valing_results['ssim']))

            val_images.extend(
                [display_transform()(val_hr_restore.squeeze(0)), display_transform()(hr.data.cpu().squeeze(0)),
                 display_transform()(sr.data.cpu().squeeze(0))])
        val_images = torch.stack(val_images)
        val_images = torch.chunk(val_images, val_images.size(0) // 15)
        val_save_bar = tqdm(val_images, desc='[saving training results]')
        index = 1
        for image in val_save_bar:
            image = utils.make_grid(image, nrow=3, padding=5)
            utils.save_image(image, out_path + 'epoch_%d_index_%d.png' % (epoch, index), padding=5)
            index += 1