<a href="https://colab.research.google.com/github/za4n/A-budgetApp/blob/master/vtryModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install kaggle package
!pip install kaggle

# Create directory for Kaggle API token
!mkdir -p ~/.kaggle



In [2]:
# Move the Kaggle API token to the correct location
!cp kaggle.json ~/.kaggle/
# Set correct permissions
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
from google.colab import drive
drive.mount('/content/drive')

# Create a project directory in your Google Drive
!mkdir -p "/content/drive/My Drive/VITON_HD_Project"

Mounted at /content/drive


In [4]:
# Create necessary subdirectories
!mkdir -p "/content/drive/My Drive/VITON_HD_Project/dataset"
!mkdir -p "/content/drive/My Drive/VITON_HD_Project/models"
!mkdir -p "/content/drive/My Drive/VITON_HD_Project/results"

In [5]:
# Change to the dataset directory
%cd "/content/drive/My Drive/VITON_HD_Project/dataset"

/content/drive/My Drive/VITON_HD_Project/dataset


In [6]:
# Download the dataset
!kaggle datasets download marquis03/high-resolution-viton-zalando-dataset


Dataset URL: https://www.kaggle.com/datasets/marquis03/high-resolution-viton-zalando-dataset
License(s): CC-BY-NC-SA-4.0
Downloading high-resolution-viton-zalando-dataset.zip to /content/drive/My Drive/VITON_HD_Project/dataset
100% 4.38G/4.39G [01:03<00:00, 98.3MB/s]
100% 4.39G/4.39G [01:03<00:00, 73.6MB/s]


In [7]:
!unzip -q high-resolution-viton-zalando-dataset.zip

In [8]:
# Check if GPU is being recognized
import torch
print("GPU Available:", torch.cuda.is_available())
print("GPU Device Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

GPU Available: True
GPU Device Name: Tesla T4


In [9]:
!pip install torch torchvision
!pip install opencv-python-headless
!pip install numpy pandas matplotlib



In [1]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms

# Verify GPU
print("GPU Available:", torch.cuda.is_available())
print("GPU Device Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

# Check dataset structure
base_path = '/content/drive/MyDrive/VITON_HD_Project/dataset'
print("\nDataset contents:")
print(os.listdir(base_path))

GPU Available: True
GPU Device Name: Tesla T4

Dataset contents:
['high-resolution-viton-zalando-dataset.zip', 'test', 'test_pairs.txt', 'train', 'train_pairs.txt']


In [2]:
# Read and display first few pairs from test_pairs.txt
pairs_file = os.path.join(base_path, 'test_pairs.txt')
print("First 5 lines of test_pairs.txt:")
with open(pairs_file, 'r') as f:
    for i, line in enumerate(f):
        if i < 5:
            print(line.strip())

First 5 lines of test_pairs.txt:
05006_00.jpg 11001_00.jpg
02532_00.jpg 14096_00.jpg
03921_00.jpg 08015_00.jpg
12419_00.jpg 01944_00.jpg
12562_00.jpg 14025_00.jpg


DATA SET CLASS

In [3]:
class VITONDataset(Dataset):
    def __init__(self, root_dir, is_train=True, transform=None):
        self.root_dir = root_dir
        self.is_train = is_train
        self.transform = transform
        self.split = 'train' if is_train else 'test'

        # Get all image files from the directory
        self.image_dir = os.path.join(root_dir, self.split, 'image')
        self.cloth_dir = os.path.join(root_dir, self.split, 'cloth')

        # Get list of files
        self.image_files = sorted(os.listdir(self.image_dir))
        self.cloth_files = sorted(os.listdir(self.cloth_dir))

        # Take only first 100 files for testing
        self.image_files = self.image_files[:100]
        self.cloth_files = self.cloth_files[:100]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        # Get file names
        image_name = self.image_files[idx]
        cloth_name = self.cloth_files[idx]  # For simplicity, using same index

        # Construct full paths
        image_path = os.path.join(self.image_dir, image_name)
        cloth_path = os.path.join(self.cloth_dir, cloth_name)

        # Load images
        try:
            person_img = Image.open(image_path).convert('RGB')
            cloth_img = Image.open(cloth_path).convert('RGB')
        except Exception as e:
            print(f"Error loading images: {e}")
            print(f"Image path: {image_path}")
            print(f"Cloth path: {cloth_path}")
            raise e

        if self.transform:
            person_img = self.transform(person_img)
            cloth_img = self.transform(cloth_img)

        return {
            'person': person_img,
            'cloth': cloth_img,
            'person_name': image_name,
            'cloth_name': cloth_name
        }

# Define transforms
transform = transforms.Compose([
    transforms.Resize((512, 384)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [4]:
# Initialize dataset with a try-except block
try:
    # Create dataset instance
    dataset_path = '/content/drive/MyDrive/VITON_HD_Project/dataset'
    train_dataset = VITONDataset(dataset_path, is_train=True, transform=transform)

    print(f"Dataset size: {len(train_dataset)}")

    # Create dataloader
    train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)

    # Try loading one batch
    for batch in train_loader:
        print("\nSuccessfully loaded batch:")
        print("Person image shape:", batch['person'].shape)
        print("Cloth image shape:", batch['cloth'].shape)
        print("Person filename:", batch['person_name'])
        print("Cloth filename:", batch['cloth_name'])
        break

except Exception as e:
    print(f"Error occurred: {str(e)}")

Dataset size: 100

Successfully loaded batch:
Person image shape: torch.Size([1, 3, 512, 384])
Cloth image shape: torch.Size([1, 3, 512, 384])
Person filename: ['00054_00.jpg']
Cloth filename: ['00054_00.jpg']


In [5]:
try:
    train_image_dir = os.path.join(base_path, 'train', 'image')
    train_cloth_dir = os.path.join(base_path, 'train', 'cloth')

    print("Files in train/image directory:")
    print(os.listdir(train_image_dir)[:5])  # First 5 files

    print("\nFiles in train/cloth directory:")
    print(os.listdir(train_cloth_dir)[:5])  # First 5 files

except Exception as e:
    print(f"Error checking directories: {str(e)}")

Files in train/image directory:
['00000_00.jpg', '00001_00.jpg', '00002_00.jpg', '00003_00.jpg', '00005_00.jpg']

Files in train/cloth directory:
['00000_00.jpg', '00001_00.jpg', '00002_00.jpg', '00003_00.jpg', '00005_00.jpg']


In [1]:
import torch
import gc
import os
import psutil
import numpy as np
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import matplotlib.pyplot as plt
from tqdm import tqdm

def clear_memory():
    """Clear GPU and RAM memory"""
    # Clear GPU
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

    # Clear RAM
    gc.collect()

    # Delete existing models
    try:
        del model
        del optimizer
        del train_loader
        del test_loader
    except:
        pass

    # Print memory status
    if torch.cuda.is_available():
        print(f"GPU Memory allocated: {torch.cuda.memory_allocated()/1e9:.2f} GB")
        print(f"GPU Memory cached: {torch.cuda.memory_reserved()/1e9:.2f} GB")

    process = psutil.Process(os.getpid())
    print(f"RAM Used: {process.memory_info().rss/1e9:.2f} GB")

# Set environment variable for memory management
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

# Clear everything
clear_memory()

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set random seeds
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

GPU Memory allocated: 0.00 GB
GPU Memory cached: 0.00 GB
RAM Used: 0.59 GB
Using device: cuda


ENHANCED DATASET

In [2]:
class ImprovedVITONDataset(Dataset):
    def __init__(self, root_dir, is_train=True, max_samples=2000):
        self.root_dir = root_dir
        self.is_train = is_train
        self.split = 'train' if is_train else 'test'

        # Get all directories
        self.image_dir = os.path.join(root_dir, self.split, 'image')
        self.cloth_dir = os.path.join(root_dir, self.split, 'cloth')
        self.segment_dir = os.path.join(root_dir, self.split, 'image-parse')

        # Get files with limit
        self.image_files = sorted(os.listdir(self.image_dir))[:max_samples]
        self.cloth_files = sorted(os.listdir(self.cloth_dir))[:max_samples]

        # Advanced augmentation
        self.transform = transforms.Compose([
            transforms.Resize((512, 384)),
            transforms.RandomHorizontalFlip(p=0.3),
            transforms.ColorJitter(
                brightness=0.2,
                contrast=0.2,
                saturation=0.2,
                hue=0.1
            ),
            transforms.RandomAffine(
                degrees=5,
                translate=(0.05, 0.05),
                scale=(0.95, 1.05)
            ),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        try:
            # Load images
            image_name = self.image_files[idx]
            cloth_name = self.cloth_files[idx]

            image_path = os.path.join(self.image_dir, image_name)
            cloth_path = os.path.join(self.cloth_dir, cloth_name)

            person_img = Image.open(image_path).convert('RGB')
            cloth_img = Image.open(cloth_path).convert('RGB')

            # Transform images
            if self.transform:
                person_img = self.transform(person_img)
                cloth_img = self.transform(cloth_img)

            return {
                'person': person_img,
                'cloth': cloth_img,
                'person_name': image_name,
                'cloth_name': cloth_name
            }

        except Exception as e:
            print(f"Error loading image {image_name}: {str(e)}")
            return self.__getitem__((idx + 1) % self.__len__())

MODEL ARCHITECTURE

In [3]:
class EnhancedVITONModel(nn.Module):
    def __init__(self):
        super().__init__()

        # Feature Extraction
        vgg = models.vgg19(pretrained=True)
        self.feature_extraction = nn.Sequential(*list(vgg.features.children())[:18])
        for param in self.feature_extraction.parameters():
            param.requires_grad = False

        # Attention Module
        self.attention = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.InstanceNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 1, kernel_size=1),
            nn.Sigmoid()
        )

        # Enhanced Warping Module
        self.warping = nn.Sequential(
            nn.Conv2d(512, 256, 3, padding=1),
            nn.InstanceNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 128, 3, padding=1),
            nn.InstanceNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 64, 3, padding=1),
            nn.InstanceNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 2, 3, padding=1),
            nn.Tanh()
        )

        # Generator Encoder
        self.enc1 = nn.Sequential(
                  nn.Conv2d(6, 64, 3, padding=1),
                  nn.InstanceNorm2d(64),
                  nn.ReLU(inplace=True)
              )
        self.enc2 = nn.Sequential(
                  nn.Conv2d(64, 128, 3, padding=1),
                  nn.InstanceNorm2d(128),
                  nn.ReLU(inplace=True),
                  nn.MaxPool2d(2)
              )
        self.enc3 = nn.Sequential(
                  nn.Conv2d(128, 256, 3, padding=1),
                  nn.InstanceNorm2d(256),
                  nn.ReLU(inplace=True),
                  nn.MaxPool2d(2)
              )
        self.enc4 = nn.Sequential(
                  nn.Conv2d(256, 512, 3, padding=1),
                  nn.InstanceNorm2d(512),
                  nn.ReLU(inplace=True)
              )


        # Generator Decoder with correct dimensions
        self.dec1 = nn.Sequential(
            nn.ConvTranspose2d(512, 256, 4, stride=2, padding=1),
            nn.InstanceNorm2d(256),
            nn.ReLU(inplace=True)
        )

        # Additional upsampling layer for d1 to match e3
        self.upsample_d1 = nn.Upsample(
                size=(128, 96),  # Match e3 size
                mode='bilinear',
                align_corners=True
            )
        self.upsample_d2 = nn.Upsample(
                size=(256, 192),  # Match e2 size
                mode='bilinear',
                align_corners=True
            )
        self.upsample_d3 = nn.Upsample(
                size=(512, 384),  # Match e1 size
                mode='bilinear',
                align_corners=True
            )

        self.dec2 = nn.Sequential(
                nn.Conv2d(512, 256, 3, padding=1),
                nn.InstanceNorm2d(256),
                nn.ReLU(inplace=True),
                nn.Conv2d(256, 128, 3, padding=1)
            )

        self.dec3 = nn.Sequential(
                nn.Conv2d(256, 128, 3, padding=1),
                nn.InstanceNorm2d(128),
                nn.ReLU(inplace=True)
            )

        self.dec4 = nn.Sequential(
                nn.Conv2d(192, 64, 3, padding=1),
                nn.InstanceNorm2d(64),
                nn.ReLU(inplace=True),
                nn.Conv2d(64, 3, 3, padding=1),
                nn.Tanh()
            )



        # Upsampling for flow field
        self.upsample = nn.Upsample(
            size=(512, 384),
            mode='bilinear',
            align_corners=True
        )

    def forward(self, person_img, cloth_img):
        # Extract features
        with torch.no_grad():
            person_features = self.feature_extraction(person_img)
            cloth_features = self.feature_extraction(cloth_img)

        # Apply attention
        attention_mask = self.attention(cloth_features)
        cloth_features = cloth_features * attention_mask

        # Generate flow field
        combined_features = torch.cat([person_features, cloth_features], dim=1)
        flow_field = self.warping(combined_features)
        flow_field = self.upsample(flow_field)

        # Generate sampling grid
        batch_size = person_img.size(0)
        grid = F.affine_grid(
            torch.eye(2, 3).unsqueeze(0).repeat(batch_size, 1, 1).to(person_img.device),
            size=person_img.size(),
            align_corners=True
        )

        # Warp cloth
        warped_cloth = F.grid_sample(
            cloth_img,
            grid + flow_field.permute(0, 2, 3, 1),
            mode='bilinear',
            padding_mode='border',
            align_corners=True
        )
        x = torch.cat([person_img, warped_cloth], dim=1)
        print("\nEncoder shapes:")
        e1 = self.enc1(x)
        print(f"e1: {e1.shape}")

        e2 = self.enc2(e1)
        print(f"e2: {e2.shape}")

        e3 = self.enc3(e2)
        print(f"e3: {e3.shape}")

        e4 = self.enc4(e3)
        print(f"e4: {e4.shape}")

        # Decoder with debugging
        print("\nDecoder shapes:")
        # First decoder block
        d1 = self.dec1(e4)
        print(f"d1 after ConvTranspose: {d1.shape}")

        d1 = self.upsample_d1(d1)
        print(f"d1 after upsample: {d1.shape}")
        print(f"e3 for concatenation: {e3.shape}")

        d1_cat = torch.cat([d1, e3], dim=1)
        print(f"d1_cat: {d1_cat.shape}")

        # Second decoder block
        d2 = self.dec2(d1_cat)
        print(f"d2 before upsample: {d2.shape}")

        d2 = self.upsample_d2(d2)
        print(f"d2 after upsample: {d2.shape}")
        print(f"e2 for concatenation: {e2.shape}")

        d2_cat = torch.cat([d2, e2], dim=1)
        print(f"d2_cat: {d2_cat.shape}")

        # Third decoder block
        d3 = self.dec3(d2_cat)
        print(f"d3 before upsample: {d3.shape}")

        d3 = self.upsample_d3(d3)
        print(f"d3 after upsample: {d3.shape}")
        print(f"e1 for concatenation: {e1.shape}")

        d3_cat = torch.cat([d3, e1], dim=1)
        print(f"d3_cat: {d3_cat.shape}")

        # Final output
        output = self.dec4(d3_cat)
        print(f"\nFinal output: {output.shape}")

        return output


    def get_warped_cloth(self, person_img, cloth_img):
        """Helper function to visualize intermediate results"""
        with torch.no_grad():
            # Extract features
            person_features = self.feature_extraction(person_img)
            cloth_features = self.feature_extraction(cloth_img)

            # Apply attention
            attention_mask = self.attention(cloth_features)
            cloth_features = cloth_features * attention_mask

            # Generate flow field
            combined_features = torch.cat([person_features, cloth_features], dim=1)
            flow_field = self.warping(combined_features)
            flow_field = self.upsample(flow_field)

            # Generate grid and warp
            batch_size = person_img.size(0)
            grid = F.affine_grid(
                torch.eye(2, 3).unsqueeze(0).repeat(batch_size, 1, 1).to(person_img.device),
                size=person_img.size(),
                align_corners=True
            )

            warped_cloth = F.grid_sample(
                cloth_img,
                grid + flow_field.permute(0, 2, 3, 1),
                mode='bilinear',
                padding_mode='border',
                align_corners=True
            )

            return warped_cloth, attention_mask

def test_enhanced_model():
    model = EnhancedVITONModel().to(device)
    person = torch.randn(4, 3, 512, 384).to(device)
    cloth = torch.randn(4, 3, 512, 384).to(device)

    try:
        # Add shape printing for debugging
        output = model(person, cloth)
        warped_cloth, attention = model.get_warped_cloth(person, cloth)

        print("Shapes:")
        print(f"Output: {output.shape}")
        print(f"Warped cloth: {warped_cloth.shape}")
        print(f"Attention mask: {attention.shape}")
        print("\nModel test successful!")

    except Exception as e:
        print(f"Error: {str(e)}")

# Test the model
test_enhanced_model()




Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])
Shapes:
Output: torch.Size([4, 3, 512, 384])
Warped cloth: torch.Size([4, 3, 512, 384])
Attention mask: torch.Size([4, 1, 128, 96])

Model test successful!


LOSS FUNCTION

In [4]:
class CompositeLoss(nn.Module):
    def __init__(self):
        super().__init__()
        # VGG for perceptual loss
        vgg = models.vgg19(pretrained=True).features[:29].eval()
        for param in vgg.parameters():
            param.requires_grad = False
        self.vgg = vgg.to(device)

        # Loss components
        self.l1_loss = nn.L1Loss()
        self.mse_loss = nn.MSELoss()

    def perceptual_loss(self, x, target):
        """VGG based perceptual loss"""
        vgg_x = self.vgg(x)
        vgg_target = self.vgg(target)
        return F.mse_loss(vgg_x, vgg_target)

    def style_loss(self, x, target):
        """Gram matrix based style loss"""
        def gram_matrix(feat):
            b, ch, h, w = feat.size()
            feat = feat.view(b, ch, h * w)
            gram = torch.bmm(feat, feat.transpose(1, 2))
            return gram.div(ch * h * w)

        x_gram = gram_matrix(self.vgg(x))
        target_gram = gram_matrix(self.vgg(target))
        return F.mse_loss(x_gram, target_gram)

    def forward(self, output, target, warped_cloth=None):
        # Calculate different loss components
        l1 = self.l1_loss(output, target)
        perceptual = self.perceptual_loss(output, target)
        style = self.style_loss(output, target)

        # Warping loss if available
        warp_loss = torch.tensor(0.0).to(device)
        if warped_cloth is not None:
            warp_loss = self.l1_loss(warped_cloth, target)

        # Combine losses with weights
        total_loss = (0.5 * l1 +
                     0.2 * perceptual +
                     0.2 * style +
                     0.1 * warp_loss)

        return total_loss

Training Functions

In [5]:
def train_epoch(model, dataloader, criterion, optimizer, device, gradient_accumulation_steps):
    model.train()
    total_loss = 0
    progress_bar = tqdm(dataloader, desc='Training')

    for batch_idx, batch in enumerate(progress_bar):
        # Load data
        person_images = batch['person'].to(device)
        cloth_images = batch['cloth'].to(device)

        # Clear gradients
        optimizer.zero_grad()

        try:
            # Forward pass
            outputs = model(person_images, cloth_images)

            # Calculate loss
            loss = criterion(outputs, person_images)

            # Normalize loss for gradient accumulation
            loss = loss / gradient_accumulation_steps

            # Backward pass
            loss.backward()

            # Update weights if needed
            if (batch_idx + 1) % gradient_accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()

            # Update metrics
            total_loss += loss.item() * gradient_accumulation_steps
            progress_bar.set_postfix({'loss': loss.item() * gradient_accumulation_steps})

            # Clear memory periodically
            if batch_idx % 10 == 0:
                clear_memory()

        except RuntimeError as e:
            print(f"Error in batch {batch_idx}: {str(e)}")
            continue

    return total_loss / len(dataloader)

def evaluate_model(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    total_psnr = 0

    with torch.no_grad():
        for batch in test_loader:
            person_images = batch['person'].to(device)
            cloth_images = batch['cloth'].to(device)

            try:
                outputs = model(person_images, cloth_images)
                loss = criterion(outputs, person_images)

                # Calculate PSNR
                mse = F.mse_loss(outputs, person_images)
                psnr = 10 * torch.log10(1 / mse)

                total_loss += loss.item()
                total_psnr += psnr.item()

            except RuntimeError as e:
                print(f"Error during evaluation: {str(e)}")
                continue

    return total_loss / len(test_loader), total_psnr / len(test_loader)

TRAINING AND EVALUATION

In [None]:
# Clear memory before starting
clear_memory()

# Initialize model
model = EnhancedVITONModel().to(device)

# Training parameters
num_epochs = 30
batch_size = 4
gradient_accumulation_steps = 2
learning_rate = 0.0001

# Initialize optimizer and scheduler
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=5, verbose=True
)

# Initialize loss function
criterion = CompositeLoss().to(device)

# Create dataloaders
dataset_path = "/content/drive/MyDrive/VITON_HD_Project/dataset"
train_dataset = ImprovedVITONDataset(dataset_path, is_train=True, max_samples=2000)
test_dataset = ImprovedVITONDataset(dataset_path, is_train=False, max_samples=200)

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=1,
    shuffle=False,
    pin_memory=True
)

# Training metrics tracking
class MetricsTracker:
    def __init__(self):
        self.train_losses = []
        self.test_losses = []
        self.psnr_scores = []

    def update(self, train_loss, test_loss, psnr):
        self.train_losses.append(train_loss)
        self.test_losses.append(test_loss)
        self.psnr_scores.append(psnr)

    def plot_metrics(self):
        plt.figure(figsize=(15, 5))

        plt.subplot(1, 3, 1)
        plt.plot(self.train_losses)
        plt.title('Training Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')

        plt.subplot(1, 3, 2)
        plt.plot(self.test_losses)
        plt.title('Test Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')

        plt.subplot(1, 3, 3)
        plt.plot(self.psnr_scores)
        plt.title('PSNR Score')
        plt.xlabel('Epoch')
        plt.ylabel('PSNR (dB)')

        plt.tight_layout()
        plt.show()

# Initialize metrics tracker
metrics = MetricsTracker()

# Training loop
best_psnr = 0
for epoch in range(num_epochs):
    print(f'\nEpoch [{epoch + 1}/{num_epochs}]')

    # Train
    train_loss = train_epoch(
        model, train_loader, criterion, optimizer, device, gradient_accumulation_steps
    )

    # Evaluate
    test_loss, psnr = evaluate_model(model, test_loader, criterion, device)
    print(f'Train Loss: {train_loss:.4f}')
    print(f'Test Loss: {test_loss:.4f}')
    print(f'PSNR: {psnr:.2f} dB')

    # Update metrics
    metrics.update(train_loss, test_loss, psnr)

    # Update learning rate
    scheduler.step(test_loss)

    # Save best model
    if psnr > best_psnr:
        best_psnr = psnr
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'psnr': psnr,
        }, f'/content/drive/MyDrive/VITON_HD_Project/models/best_model.pth')
        print(f'New best model saved with PSNR: {psnr:.2f} dB')

    # Regular checkpoint
    if (epoch + 1) % 5 == 0:
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'psnr': psnr,
        }, f'/content/drive/MyDrive/VITON_HD_Project/models/checkpoint_epoch_{epoch+1}.pth')

    # Clear memory
    clear_memory()

# Plot final metrics
metrics.plot_metrics()
print("Training completed!")

GPU Memory allocated: 0.01 GB
GPU Memory cached: 0.04 GB
RAM Used: 0.90 GB





Epoch [1/30]


Training:   0%|          | 0/500 [00:00<?, ?it/s]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   0%|          | 1/500 [00:02<23:48,  2.86s/it, loss=2.98]

GPU Memory allocated: 0.17 GB
GPU Memory cached: 0.26 GB
RAM Used: 1.26 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   0%|          | 2/500 [00:04<18:04,  2.18s/it, loss=2.95]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   1%|          | 3/500 [00:06<15:53,  1.92s/it, loss=2.78]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   1%|          | 4/500 [00:07<14:56,  1.81s/it, loss=2.56]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   1%|          | 5/500 [00:09<14:23,  1.74s/it, loss=2.38]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   1%|          | 6/500 [00:11<14:02,  1.71s/it, loss=2.59]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   1%|▏         | 7/500 [00:12<13:46,  1.68s/it, loss=2.74]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   2%|▏         | 8/500 [00:14<13:39,  1.67s/it, loss=2.67]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   2%|▏         | 9/500 [00:15<13:33,  1.66s/it, loss=2.46]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   2%|▏         | 10/500 [00:17<13:29,  1.65s/it, loss=2.7]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   2%|▏         | 11/500 [00:19<14:19,  1.76s/it, loss=2.24]

GPU Memory allocated: 0.23 GB
GPU Memory cached: 0.32 GB
RAM Used: 1.28 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   2%|▏         | 12/500 [00:21<14:01,  1.72s/it, loss=2.12]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   3%|▎         | 13/500 [00:22<13:51,  1.71s/it, loss=2.07]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   3%|▎         | 14/500 [00:24<13:42,  1.69s/it, loss=2.15]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   3%|▎         | 15/500 [00:26<13:35,  1.68s/it, loss=2.4]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   3%|▎         | 16/500 [00:27<13:32,  1.68s/it, loss=2.55]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   3%|▎         | 17/500 [00:29<13:29,  1.68s/it, loss=2.24]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   4%|▎         | 18/500 [00:31<13:26,  1.67s/it, loss=2.07]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   4%|▍         | 19/500 [00:32<13:24,  1.67s/it, loss=2.15]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   4%|▍         | 20/500 [00:34<13:24,  1.68s/it, loss=1.91]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   4%|▍         | 21/500 [00:36<14:24,  1.80s/it, loss=1.97]

GPU Memory allocated: 0.23 GB
GPU Memory cached: 0.28 GB
RAM Used: 1.28 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   4%|▍         | 22/500 [00:38<14:04,  1.77s/it, loss=2.09]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   5%|▍         | 23/500 [00:40<13:52,  1.75s/it, loss=2.14]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   5%|▍         | 24/500 [00:41<13:44,  1.73s/it, loss=1.92]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   5%|▌         | 25/500 [00:43<13:36,  1.72s/it, loss=1.63]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   5%|▌         | 26/500 [00:45<13:31,  1.71s/it, loss=1.62]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   5%|▌         | 27/500 [00:46<13:27,  1.71s/it, loss=1.64]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   6%|▌         | 28/500 [00:48<13:25,  1.71s/it, loss=1.71]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   6%|▌         | 29/500 [00:50<13:22,  1.70s/it, loss=1.75]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   6%|▌         | 30/500 [03:07<5:31:15, 42.29s/it, loss=1.84]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   6%|▌         | 31/500 [03:09<3:56:03, 30.20s/it, loss=1.36]

GPU Memory allocated: 0.23 GB
GPU Memory cached: 0.30 GB
RAM Used: 1.28 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   6%|▋         | 32/500 [03:10<2:48:43, 21.63s/it, loss=1.33]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   7%|▋         | 33/500 [03:12<2:01:41, 15.64s/it, loss=1.75]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   7%|▋         | 34/500 [03:14<1:28:49, 11.44s/it, loss=2.08]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   7%|▋         | 35/500 [03:15<1:05:51,  8.50s/it, loss=1.61]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   7%|▋         | 36/500 [03:17<49:50,  6.45s/it, loss=1.43]  


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   7%|▋         | 37/500 [03:19<38:39,  5.01s/it, loss=1.24]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   8%|▊         | 38/500 [03:20<30:53,  4.01s/it, loss=1.6]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   8%|▊         | 39/500 [03:22<25:23,  3.31s/it, loss=1.55]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   8%|▊         | 40/500 [03:24<21:35,  2.82s/it, loss=1.37]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   8%|▊         | 41/500 [03:26<19:43,  2.58s/it, loss=1.54]

GPU Memory allocated: 0.23 GB
GPU Memory cached: 0.32 GB
RAM Used: 1.28 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   8%|▊         | 42/500 [03:27<17:34,  2.30s/it, loss=1.44]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   9%|▊         | 43/500 [03:29<16:08,  2.12s/it, loss=1.43]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   9%|▉         | 44/500 [03:31<15:07,  1.99s/it, loss=1.44]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   9%|▉         | 45/500 [03:32<14:24,  1.90s/it, loss=1.42]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   9%|▉         | 46/500 [03:34<13:55,  1.84s/it, loss=1.58]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:   9%|▉         | 47/500 [03:36<13:34,  1.80s/it, loss=1.52]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  10%|▉         | 48/500 [03:37<13:19,  1.77s/it, loss=1.38]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  10%|▉         | 49/500 [03:39<13:08,  1.75s/it, loss=1.38]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  10%|█         | 50/500 [03:41<12:59,  1.73s/it, loss=1.26]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  10%|█         | 51/500 [03:43<13:42,  1.83s/it, loss=1.3]

GPU Memory allocated: 0.23 GB
GPU Memory cached: 0.32 GB
RAM Used: 1.28 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  10%|█         | 52/500 [03:45<13:21,  1.79s/it, loss=1.38]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  11%|█         | 53/500 [03:46<13:09,  1.77s/it, loss=1.34]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  11%|█         | 54/500 [03:48<12:59,  1.75s/it, loss=1.25]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  11%|█         | 55/500 [03:50<12:51,  1.73s/it, loss=1.26]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  11%|█         | 56/500 [03:51<12:43,  1.72s/it, loss=1.16]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  11%|█▏        | 57/500 [03:53<12:35,  1.71s/it, loss=1.15]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  12%|█▏        | 58/500 [03:55<12:30,  1.70s/it, loss=1.24]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  12%|█▏        | 59/500 [03:56<12:25,  1.69s/it, loss=1.18]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  12%|█▏        | 60/500 [03:58<12:21,  1.69s/it, loss=0.885]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  12%|█▏        | 61/500 [04:00<13:12,  1.81s/it, loss=1.05]

GPU Memory allocated: 0.23 GB
GPU Memory cached: 0.30 GB
RAM Used: 1.28 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  12%|█▏        | 62/500 [04:02<12:54,  1.77s/it, loss=1.19]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  13%|█▎        | 63/500 [04:04<12:39,  1.74s/it, loss=1.1]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  13%|█▎        | 64/500 [04:05<12:26,  1.71s/it, loss=1.2]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  13%|█▎        | 65/500 [04:07<12:17,  1.70s/it, loss=1.4]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  13%|█▎        | 66/500 [04:09<12:11,  1.68s/it, loss=0.907]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  13%|█▎        | 67/500 [04:10<12:06,  1.68s/it, loss=1.15]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  14%|█▎        | 68/500 [04:12<12:01,  1.67s/it, loss=0.951]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  14%|█▍        | 69/500 [04:14<11:58,  1.67s/it, loss=0.875]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  14%|█▍        | 70/500 [04:15<11:56,  1.67s/it, loss=0.955]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  14%|█▍        | 71/500 [04:17<12:39,  1.77s/it, loss=1.11]

GPU Memory allocated: 0.23 GB
GPU Memory cached: 0.34 GB
RAM Used: 1.28 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  14%|█▍        | 72/500 [04:19<12:23,  1.74s/it, loss=1.12]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  15%|█▍        | 73/500 [04:21<12:12,  1.71s/it, loss=0.821]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  15%|█▍        | 74/500 [04:22<12:03,  1.70s/it, loss=0.837]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  15%|█▌        | 75/500 [04:24<11:56,  1.69s/it, loss=0.83]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  15%|█▌        | 76/500 [04:25<11:51,  1.68s/it, loss=1.06]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  15%|█▌        | 77/500 [04:27<11:47,  1.67s/it, loss=0.871]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  16%|█▌        | 78/500 [04:29<11:44,  1.67s/it, loss=0.96]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  16%|█▌        | 79/500 [04:30<11:42,  1.67s/it, loss=0.913]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  16%|█▌        | 80/500 [04:32<11:40,  1.67s/it, loss=0.972]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  16%|█▌        | 81/500 [04:34<12:22,  1.77s/it, loss=0.821]

GPU Memory allocated: 0.23 GB
GPU Memory cached: 0.30 GB
RAM Used: 1.28 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  16%|█▋        | 82/500 [04:36<12:05,  1.74s/it, loss=1]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  17%|█▋        | 83/500 [04:37<11:55,  1.72s/it, loss=0.902]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  17%|█▋        | 84/500 [04:39<11:45,  1.70s/it, loss=0.952]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  17%|█▋        | 85/500 [04:41<11:40,  1.69s/it, loss=1.07]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  17%|█▋        | 86/500 [04:42<11:36,  1.68s/it, loss=1.07]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  17%|█▋        | 87/500 [04:44<11:33,  1.68s/it, loss=1.05]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  18%|█▊        | 88/500 [04:46<11:30,  1.68s/it, loss=0.944]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  18%|█▊        | 89/500 [04:47<11:28,  1.68s/it, loss=0.86]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  18%|█▊        | 90/500 [04:49<11:26,  1.67s/it, loss=0.816]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  18%|█▊        | 91/500 [04:51<12:07,  1.78s/it, loss=0.93]

GPU Memory allocated: 0.23 GB
GPU Memory cached: 0.34 GB
RAM Used: 1.28 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  18%|█▊        | 92/500 [04:53<11:51,  1.74s/it, loss=0.826]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  19%|█▊        | 93/500 [04:55<11:41,  1.72s/it, loss=0.971]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  19%|█▉        | 94/500 [04:56<11:34,  1.71s/it, loss=0.908]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  19%|█▉        | 95/500 [04:58<11:28,  1.70s/it, loss=0.787]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  19%|█▉        | 96/500 [05:00<11:24,  1.69s/it, loss=0.808]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  19%|█▉        | 97/500 [05:01<11:20,  1.69s/it, loss=0.728]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  20%|█▉        | 98/500 [05:03<11:17,  1.68s/it, loss=0.897]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  20%|█▉        | 99/500 [05:05<11:13,  1.68s/it, loss=1.02]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  20%|██        | 100/500 [05:06<11:11,  1.68s/it, loss=0.713]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  20%|██        | 101/500 [05:08<11:50,  1.78s/it, loss=0.717]

GPU Memory allocated: 0.23 GB
GPU Memory cached: 0.30 GB
RAM Used: 1.28 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  20%|██        | 102/500 [05:10<11:35,  1.75s/it, loss=0.838]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  21%|██        | 103/500 [05:12<11:25,  1.73s/it, loss=0.93]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  21%|██        | 104/500 [05:13<11:18,  1.71s/it, loss=0.78]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  21%|██        | 105/500 [05:15<11:11,  1.70s/it, loss=0.696]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  21%|██        | 106/500 [05:17<11:05,  1.69s/it, loss=0.823]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  21%|██▏       | 107/500 [05:18<11:01,  1.68s/it, loss=0.894]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  22%|██▏       | 108/500 [05:20<10:59,  1.68s/it, loss=0.87]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  22%|██▏       | 109/500 [05:22<10:55,  1.68s/it, loss=0.804]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  22%|██▏       | 110/500 [05:23<10:52,  1.67s/it, loss=0.685]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  22%|██▏       | 111/500 [05:25<11:37,  1.79s/it, loss=0.732]

GPU Memory allocated: 0.23 GB
GPU Memory cached: 0.34 GB
RAM Used: 1.28 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  22%|██▏       | 112/500 [05:27<11:20,  1.75s/it, loss=0.671]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  23%|██▎       | 113/500 [05:29<11:09,  1.73s/it, loss=0.898]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  23%|██▎       | 114/500 [05:30<11:00,  1.71s/it, loss=0.914]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  23%|██▎       | 115/500 [05:32<10:53,  1.70s/it, loss=0.961]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  23%|██▎       | 116/500 [05:34<10:47,  1.69s/it, loss=0.752]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  23%|██▎       | 117/500 [05:35<10:42,  1.68s/it, loss=0.692]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  24%|██▎       | 118/500 [05:37<10:39,  1.67s/it, loss=0.703]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  24%|██▍       | 119/500 [05:39<10:37,  1.67s/it, loss=0.774]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  24%|██▍       | 120/500 [05:40<10:35,  1.67s/it, loss=0.728]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  24%|██▍       | 121/500 [05:42<11:14,  1.78s/it, loss=0.756]

GPU Memory allocated: 0.23 GB
GPU Memory cached: 0.30 GB
RAM Used: 1.28 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  24%|██▍       | 122/500 [05:44<10:58,  1.74s/it, loss=0.73]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  25%|██▍       | 123/500 [05:46<10:48,  1.72s/it, loss=0.753]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  25%|██▍       | 124/500 [05:47<10:39,  1.70s/it, loss=0.722]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  25%|██▌       | 125/500 [05:49<10:33,  1.69s/it, loss=0.783]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  25%|██▌       | 126/500 [05:51<10:28,  1.68s/it, loss=0.787]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  25%|██▌       | 127/500 [05:52<10:26,  1.68s/it, loss=0.861]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  26%|██▌       | 128/500 [05:54<10:23,  1.68s/it, loss=0.753]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  26%|██▌       | 129/500 [05:56<10:21,  1.68s/it, loss=0.687]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  26%|██▌       | 130/500 [05:57<10:19,  1.67s/it, loss=0.596]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  26%|██▌       | 131/500 [05:59<10:55,  1.78s/it, loss=0.936]

GPU Memory allocated: 0.23 GB
GPU Memory cached: 0.34 GB
RAM Used: 1.28 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  26%|██▋       | 132/500 [06:01<10:40,  1.74s/it, loss=0.737]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  27%|██▋       | 133/500 [06:03<10:31,  1.72s/it, loss=0.627]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  27%|██▋       | 134/500 [06:04<10:24,  1.71s/it, loss=0.691]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  27%|██▋       | 135/500 [06:06<10:17,  1.69s/it, loss=0.641]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  27%|██▋       | 136/500 [06:08<10:14,  1.69s/it, loss=0.687]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  27%|██▋       | 137/500 [06:09<10:11,  1.68s/it, loss=0.593]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  28%|██▊       | 138/500 [06:11<10:08,  1.68s/it, loss=0.661]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  28%|██▊       | 139/500 [06:13<10:05,  1.68s/it, loss=0.552]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  28%|██▊       | 140/500 [06:14<10:02,  1.67s/it, loss=0.648]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  28%|██▊       | 141/500 [06:16<10:38,  1.78s/it, loss=0.814]

GPU Memory allocated: 0.23 GB
GPU Memory cached: 0.30 GB
RAM Used: 1.28 GB

Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  28%|██▊       | 142/500 [06:18<10:24,  1.74s/it, loss=0.554]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  29%|██▊       | 143/500 [06:20<10:14,  1.72s/it, loss=0.656]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  29%|██▉       | 144/500 [06:21<10:08,  1.71s/it, loss=0.814]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  29%|██▉       | 145/500 [06:23<10:03,  1.70s/it, loss=0.67]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  29%|██▉       | 146/500 [06:25<09:58,  1.69s/it, loss=0.693]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  29%|██▉       | 147/500 [06:26<09:53,  1.68s/it, loss=0.741]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])


Training:  30%|██▉       | 148/500 [06:28<09:50,  1.68s/it, loss=0.581]


Encoder shapes:
e1: torch.Size([4, 64, 512, 384])
e2: torch.Size([4, 128, 256, 192])
e3: torch.Size([4, 256, 128, 96])
e4: torch.Size([4, 512, 128, 96])

Decoder shapes:
d1 after ConvTranspose: torch.Size([4, 256, 256, 192])
d1 after upsample: torch.Size([4, 256, 128, 96])
e3 for concatenation: torch.Size([4, 256, 128, 96])
d1_cat: torch.Size([4, 512, 128, 96])
d2 before upsample: torch.Size([4, 128, 128, 96])
d2 after upsample: torch.Size([4, 128, 256, 192])
e2 for concatenation: torch.Size([4, 128, 256, 192])
d2_cat: torch.Size([4, 256, 256, 192])
d3 before upsample: torch.Size([4, 128, 256, 192])
d3 after upsample: torch.Size([4, 128, 512, 384])
e1 for concatenation: torch.Size([4, 64, 512, 384])
d3_cat: torch.Size([4, 192, 512, 384])

Final output: torch.Size([4, 3, 512, 384])
