In [43]:
# imports
import datetime
import time
import torchvision
import torch
from torch.utils.tensorboard import SummaryWriter
from data.dataloader import create_dataloader
from models.pose_transfer_model import PoseTransferModel


# configurations
# -----------------------------------------------------------------------------
root_path = 'D:/LjmuMSc/Projects/Github/PoseTransfer_MS_RnD'
dataset_name = 'deepfashion'

dataset_root = f'{root_path}/datasets/{dataset_name}/'
img_pairs_train = f'{dataset_root}/train_img_pairs1.csv'
img_pairs_test = f'{dataset_root}/test_img_pairs1.csv'
pose_maps_dir_train = f'{dataset_root}/train_pose_maps'
pose_maps_dir_test = f'{dataset_root}/test_pose_maps'


gpu_ids = [0]

batch_size_train = 1
batch_size_test = 1
n_epoch = 1
out_freq = 500

ckpt_id = None
ckpt_dir = None

run_info = ''
out_path = f'{root_path}/output/{dataset_name}'


# -----------------------------------------------------------------------------
# create timestamp and infostamp
timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
infostamp = f'_{run_info.strip()}' if run_info.strip() else ''

# create tensorboard logger
logger = SummaryWriter(f'{out_path}/runs/{timestamp}{infostamp}')

# create transforms
img_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
map_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])

# create dataloaders
train_dataloader = create_dataloader(dataset_root, img_pairs_train, pose_maps_dir_train,
                                     img_transform, map_transform,
                                     batch_size=batch_size_train, shuffle=True)

In [44]:
 d1 = next(iter(train_dataloader))
print('imagA: ',d1['imgA'].shape)
print('imagB: ',d1['imgB'].shape)
print('imgA_seg: ',d1['imgA_seg'].shape)
print('imgB_seg: ',d1['imgB_seg'].shape)
segmab =torch.cat((d1['imgA_seg'], d1['imgB_seg']), dim=0)
print('segmab shape: ',segmab.shape)
print('mapA: ',d1['mapA'].shape)
print('mapB: ',d1['mapB'].shape)
mapab =torch.cat((d1['mapA'], d1['mapB']), dim=1)
print('mapab shape: ',mapab.shape)
print('fidA: ',d1['fidA'])
print('fidB: ',d1['fidB'])  

imagA:  torch.Size([1, 3, 256, 256])
imagB:  torch.Size([1, 3, 256, 256])
imgA_seg:  torch.Size([1, 3, 256, 256])
imgB_seg:  torch.Size([1, 3, 256, 256])
segmab shape:  torch.Size([2, 3, 256, 256])
mapA:  torch.Size([1, 18, 256, 256])
mapB:  torch.Size([1, 18, 256, 256])
mapab shape:  torch.Size([1, 36, 256, 256])
fidA:  ['imgMENShirts_Polosid_0000238301_4_full']
fidB:  ['imgMENShirts_Polosid_0000238301_1_front']


In [45]:
# create model
model = PoseTransferModel(gpuids=gpu_ids)
model.print_networks(verbose=False)

[INFO] Using device: GPU0 -> NVIDIA GeForce GTX 1060 6GB
[INFO] Network netG initialized
[INFO] Network netD initialized
--------------------------------------------------------------------------------
[INFO] Total parameters of network netG: 126.40M
[INFO] Total parameters of network netD: 2.77M
--------------------------------------------------------------------------------


In [46]:
for epoch in range(n_epoch):
    for batch, data in enumerate(train_dataloader):
        time_0 = time.time()
        model.set_inputs(data)
        model.optimize_parameters()
        losses = model.get_losses()
        loss_G = losses['lossG']
        loss_D = losses['lossD']
        time_1 = time.time()

real_map_AB_shape:  torch.Size([1, 36, 256, 256])
seg_shape:  torch.Size([1, 6, 256, 256])
torch.Size([1, 3, 256, 256])
torch.Size([1, 36, 256, 256])
torch.Size([1, 6, 256, 256])


RuntimeError: Given groups=1, weight of size [64, 36, 3, 3], expected input[1, 6, 256, 256] to have 36 channels, but got 6 channels instead

In [58]:
import torch
import torch.nn as nn
from torchsummary import summary
from torch import permute

In [59]:
def conv1x1(in_channels, out_channels):
    return nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False)


def conv3x3(in_channels, out_channels):
    return nn.Conv2d(in_channels, out_channels, 3, 1, 1, bias=False)


def downconv2x(in_channels, out_channels):
    return nn.Conv2d(in_channels, out_channels, 4, 2, 1, bias=False)


def upconv2x(in_channels, out_channels):
    return nn.ConvTranspose2d(in_channels, out_channels, 4, 2, 1, bias=False)

In [60]:
class ResidualBlock(nn.Module):
    
    def __init__(self, num_channels):
        super(ResidualBlock, self).__init__()
        layers = [
            conv3x3(num_channels, num_channels),
            nn.BatchNorm2d(num_channels),
            nn.ReLU(inplace=True),
            conv3x3(num_channels, num_channels),
            nn.BatchNorm2d(num_channels)
        ]
        self.layers = nn.Sequential(*layers)
    
    def forward(self, x):
        y = self.layers(x) + x
        return y

In [61]:
def print_s(x):
    print(x.shape)

In [90]:


class NetG(nn.Module):
    
    def __init__(self, in1_channels, in2_channels,in3_channels, out_channels, ngf=64):
        super(NetG, self).__init__()
        
        self.in1_conv1 = self.inconv(in1_channels, ngf)
        self.in1_down1 = self.down2x(ngf, ngf*2)
        self.in1_down2 = self.down2x(ngf*2, ngf*4)
        self.in1_down3 = self.down2x(ngf*4, ngf*8)
        self.in1_down4 = self.down2x(ngf*8, ngf*16)
        
        self.in2_conv1 = self.inconv(in2_channels, ngf)
        self.in2_down1 = self.down2x(ngf, ngf*2)
        self.in2_down2 = self.down2x(ngf*2, ngf*4)
        self.in2_down3 = self.down2x(ngf*4, ngf*8)
        self.in2_down4 = self.down2x(ngf*8, ngf*16)

        self.in3_conv1 = self.inconv(in3_channels, ngf)
        self.in3_down1 = self.down2x(ngf, ngf*2)
        self.in3_down2 = self.down2x(ngf*2, ngf*4)
        self.in3_down3 = self.down2x(ngf*4, ngf*8)
        self.in3_down4 = self.down2x(ngf*8, ngf*16)
        
        self.out_up1 = self.up2x(ngf*16, ngf*8)
        self.out_up2 = self.up2x(ngf*8, ngf*4)
        self.out_up3 = self.up2x(ngf*4, ngf*2)
        self.out_up4 = self.up2x(ngf*2, ngf)
        self.out_conv1 = self.outconv(ngf, out_channels)
    
    def inconv(self, in_channels, out_channels):
        return nn.Sequential(
            conv3x3(in_channels, out_channels),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    
    def outconv(self, in_channels, out_channels):
        return nn.Sequential(
            ResidualBlock(in_channels),
            ResidualBlock(in_channels),
            ResidualBlock(in_channels),
            ResidualBlock(in_channels),
            conv1x1(in_channels, out_channels),
            nn.Tanh()
        )
    
    def down2x(self, in_channels, out_channels):
        return nn.Sequential(
            downconv2x(in_channels, out_channels),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            ResidualBlock(out_channels)
        )
    
    def up2x(self, in_channels, out_channels):
        return nn.Sequential(
            upconv2x(in_channels, out_channels),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            ResidualBlock(out_channels)
        )
    
    def forward(self, x1, x2, x3):
        print(x1.shape)
        print(x2.shape)
        print(x3.shape)
        x1_c1 = self.in1_conv1(x1)
        x1_d1 = self.in1_down1(x1_c1)
        x1_d2 = self.in1_down2(x1_d1)
        x1_d3 = self.in1_down3(x1_d2)
        x1_d4 = self.in1_down4(x1_d3)
        
        # Heat maps(joints)
        x2_c1 = self.in2_conv1(x2)
        x2_d1 = self.in2_down1(x2_c1)
        x2_d2 = self.in2_down2(x2_d1)
        x2_d3 = self.in2_down3(x2_d2)
        x2_d4 = self.in2_down4(x2_d3)

        # Parsing maps

        # x3 = permute(x3,(0, 3, 1, 2))
        x3_c1 = self.in3_conv1(x3)
        x3_d1 = self.in3_down1(x3_c1)
        x3_d2 = self.in3_down2(x3_d1)
        x3_d3 = self.in3_down3(x3_d2)
        x3_d4 = self.in3_down4(x3_d3)
        
        y = (x1_d4 * torch.sigmoid(x2_d4)) * torch.sigmoid(x3_d4)
        y = self.out_up1(y)
        y = (y * torch.sigmoid(x2_d3)) * torch.sigmoid(x3_d3)
        y = self.out_up2(y)
        y = (y * torch.sigmoid(x2_d2)) * torch.sigmoid(x3_d2)
        y = self.out_up3(y)
        y = (y * torch.sigmoid(x2_d1)) * torch.sigmoid(x3_d1)
        y = self.out_up4(y)
        y = self.out_conv1(y)

        print('y shape: ', y.shape)
        return y



In [92]:
netG = NetG(3,36,6,3)
netG

NetG(
  (in1_conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (in1_down1): Sequential(
    (0): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): ResidualBlock(
      (layers): Sequential(
        (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (in1_down2): Sequential(
    (0): Conv2d(128, 256

In [93]:
d1 = next(iter(train_dataloader))
realAim = d1['imgA']
realBim =d1['imgB']
realA_Seg = d1['imgA_seg']
realB_Seg = d1['imgB_seg']
realAB_seg =torch.cat((d1['imgA_seg'], d1['imgB_seg']), dim=1)

mapA= d1['mapA']
mapB= d1['mapB']
mapAB =torch.cat((d1['mapA'], d1['mapB']), dim=1)

fidA=d1['fidA']
fidB=d1['fidB'] 

In [94]:
realAB_seg.shape

torch.Size([1, 6, 256, 256])

In [96]:
netG(realAim,mapAB,realAB_seg)

torch.Size([1, 3, 256, 256])
torch.Size([1, 36, 256, 256])
torch.Size([1, 6, 256, 256])
y shape:  torch.Size([1, 3, 256, 256])


tensor([[[[ 0.5397,  0.7425,  0.3778,  ...,  0.6346,  0.1451,  0.5389],
          [ 0.6915,  0.3944,  0.4177,  ...,  0.0256,  0.5642,  0.4263],
          [ 0.5132,  0.6834, -0.1883,  ...,  0.4184,  0.3359,  0.4097],
          ...,
          [ 0.7244,  0.5369,  0.0932,  ...,  0.4598,  0.4348,  0.3783],
          [ 0.4901,  0.6845,  0.3693,  ...,  0.2875,  0.0191, -0.1129],
          [ 0.6545,  0.6072,  0.5303,  ...,  0.6318,  0.6688,  0.6457]],

         [[ 0.0085,  0.0129,  0.4209,  ...,  0.1230,  0.3354, -0.1279],
          [ 0.4551,  0.3502,  0.3932,  ...,  0.4352,  0.0190, -0.1280],
          [ 0.3628, -0.0677,  0.3508,  ...,  0.4364,  0.3561,  0.1282],
          ...,
          [ 0.0754,  0.3705,  0.5618,  ...,  0.1591,  0.2797, -0.0727],
          [ 0.3460,  0.1963,  0.2124,  ...,  0.3658,  0.0265,  0.3597],
          [-0.0319, -0.0787,  0.3359,  ..., -0.3577, -0.0252, -0.2606]],

         [[-0.6560, -0.3133, -0.6880,  ..., -0.5547, -0.4571, -0.0217],
          [-0.6756, -0.5363, -

nn.Conv3d : expects the input to have size [batch_size, channels, depth, height, width]. 
The first convolution expects 3 channels, but with your input having size [100, 16, 16, 16, 3], that would be 16 channels.

Assuming that your data is given as [batch_size, depth, height, width, channels], you need to swap the dimensions around, which can be done with torch.Tensor.permute:

From: [batch_size, depth, height, width, channels]
To: [batch_size, channels, depth, height, width]

input = input.permute(0, 4, 1, 2, 3)