In [1]:
!pip3 install torchvision==0.14.1
!pip3 install tensorboard==2.11.1
!pip3 install torch-summary

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting torchvision==0.14.1
  Downloading torchvision-0.14.1-cp37-cp37m-manylinux1_x86_64.whl (24.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.2/24.2 MB[0m [31m30.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting torch==1.13.1
  Downloading torch-1.13.1-cp37-cp37m-manylinux1_x86_64.whl (887.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.5/887.5 MB[0m [31m983.2 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting nvidia-cudnn-cu11==8.5.0.96
  Downloading nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl (557.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.1/557.1 MB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting nvidia-cublas-cu11==11.10.3.66
  Downloading nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━

In [2]:
import numpy as np
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import matplotlib.pyplot as plt
import imageio 
import glob
import datetime
import time
import torchvision
from torch.utils.tensorboard import SummaryWriter
from data.dataloader import create_dataloader
from models.pose_transfer_model import PoseTransferModel
from torch.utils.tensorboard import SummaryWriter

In [18]:
# configurations
# -----------------------------------------------------------------------------
root_path = '/home/ec2-user/SageMaker'
dataset_name = 'deepfashion'

dataset_root = f'{root_path}/datasets/{dataset_name}'
img_pairs_train = f'{dataset_root}/train_img_pairs1.csv'
img_pairs_test = f'{dataset_root}/test_img_pairs1.csv'

pose_maps_dir_train = f'{dataset_root}/train_pose_maps'
pose_maps_dir_test = f'{dataset_root}/test_pose_maps'

In [19]:
gpu_ids = [0]

batch_size_train = 8
batch_size_test = 8
n_epoch = 1
out_freq = 500

ckpt_id = None
ckpt_dir = None

run_info = ''
out_path = f'{root_path}/output/{dataset_name}'

In [26]:
# create timestamp and infostamp
timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
infostamp = f'_{run_info.strip()}' if run_info.strip() else ''

# create tensorboard logger
logger = SummaryWriter(f'{out_path}/runs/{timestamp}{infostamp}')

# create transforms
img_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
map_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])

# create dataloaders
train_dataloader = create_dataloader(dataset_root, img_pairs_train, pose_maps_dir_train,
                                     img_transform, map_transform,
                                     batch_size=batch_size_train, shuffle=True)
test_dataloader = create_dataloader(dataset_root, img_pairs_test, pose_maps_dir_test,
                                    img_transform, map_transform,
                                    batch_size=batch_size_test, shuffle=False)

# create fixed batch for testing
fixed_test_batch = next(iter(test_dataloader))

# create model
model = PoseTransferModel(gpuids=gpu_ids)
model.print_networks(verbose=False)

# load pretrained weights into model
if ckpt_id and ckpt_dir:
    model.load_networks(ckpt_dir, ckpt_id, verbose=True)

# train model
n_batch = len(train_dataloader)
w_batch = len(str(n_batch))
w_epoch = len(str(n_epoch))
n_iters = 0

[INFO] Using device: GPU0 -> Tesla T4
[INFO] Network netG initialized
[INFO] Network netD initialized
--------------------------------------------------------------------------------
[INFO] Total parameters of network netG: 126.40M
[INFO] Total parameters of network netD: 2.77M
--------------------------------------------------------------------------------


In [27]:
for epoch in range(n_epoch):
    for batch, data in enumerate(train_dataloader):
        time_0 = time.time()
        model.set_inputs(data)
        model.optimize_parameters()
        losses = model.get_losses()
        loss_G = losses['lossG']
        loss_D = losses['lossD']
        time_1 = time.time()
        print(f'[TRAIN] Epoch: {epoch+1:{w_epoch}d}/{n_epoch} | Batch: {batch+1:{w_batch}d}/{n_batch} |',
              f'LossG: {loss_G:7.4f} | LossD: {loss_D:7.4f} | Time: {round(time_1-time_0, 2):.2f} sec |')
        
        if (n_iters % out_freq == 0) or (batch+1 == n_batch and epoch+1 == n_epoch):
            model.save_networks(f'{out_path}/ckpt/{timestamp}{infostamp}', n_iters, verbose=True)
            for loss_name, loss in losses.items():
                loss_group = 'LossG' if loss_name.startswith('lossG') else 'LossD'
                logger.add_scalar(f'{loss_group}/{loss_name}', loss, n_iters)
            model.set_inputs(fixed_test_batch)
            visuals = model.compute_visuals()
            logger.add_image(f'Iteration_{n_iters}', visuals, n_iters)
        
        n_iters += 1


torch.Size([8, 3, 256, 256])
torch.Size([8, 36, 256, 256])
torch.Size([8, 6, 256, 256])
y shape:  torch.Size([8, 3, 256, 256])
[TRAIN] Epoch: 1/1 | Batch:    1/4670 | LossG: 154.4315 | LossD:  1.9043 | Time: 1.82 sec |
[INFO] Network netG weights saved to /home/ec2-user/SageMaker/output/deepfashion/ckpt/2023-05-02-01-08-16/netG_0.pth
[INFO] Network netD weights saved to /home/ec2-user/SageMaker/output/deepfashion/ckpt/2023-05-02-01-08-16/netD_0.pth
torch.Size([8, 3, 256, 256])
torch.Size([8, 36, 256, 256])
torch.Size([8, 6, 256, 256])
y shape:  torch.Size([8, 3, 256, 256])
torch.Size([8, 3, 256, 256])
torch.Size([8, 36, 256, 256])
torch.Size([8, 6, 256, 256])
y shape:  torch.Size([8, 3, 256, 256])
[TRAIN] Epoch: 1/1 | Batch:    2/4670 | LossG: 148.3748 | LossD: 112.0056 | Time: 1.82 sec |
torch.Size([8, 3, 256, 256])
torch.Size([8, 36, 256, 256])
torch.Size([8, 6, 256, 256])
y shape:  torch.Size([8, 3, 256, 256])
[TRAIN] Epoch: 1/1 | Batch:    3/4670 | LossG: 12.9942 | LossD: 156.3380 

KeyboardInterrupt: 

In [None]:
# ndf = pd.read_csv(img_pairs_train)
# ndf.shape

In [25]:
'img_seg/WOMEN/Sweaters/id_00005106/07_1_front.png' in ndf['imgB_seg']

False

In [23]:
# Image.open('/home/ec2-user/SageMaker/datasets/deepfashion/img_seg/WOMEN/Sweaters/id_00005106/07_1_front.png')

FileNotFoundError: [Errno 2] No such file or directory: '/home/ec2-user/SageMaker/datasets/deepfashion/img_seg/WOMEN/Sweaters/id_00005106/07_1_front.png'

In [22]:
# for im in ndf['imgB_seg']:
#     print(im)
#     Image.open(dataset_root +'/'+ im)