In [8]:
from dataset.ganhands import GanHands
from dpt.models import HandModel
from dpt.transforms import Resize
import torch.utils.data as data
from torchvision.transforms import Compose
import torch
from datetime import datetime
from dpt.transforms import PrepareForNet
import cv2

# Load Data

In [9]:
device = torch.device("cuda")
net_w = 384
net_h = 384
transforms = Compose([
    Resize(
        net_w,
        net_h,
        resize_target=None,
        keep_aspect_ratio=True,
        ensure_multiple_of=32,
        resize_method="upper_bound",
        image_interpolation_method=cv2.INTER_CUBIC,
    ),
    PrepareForNet()
])

training_dataset = GanHands('/home/zain/University/Datasets/GANeratedHands_Release/data/noObject/0001', 1024, transforms)
validation_dataset = GanHands('/home/zain/University/Datasets/GANeratedHands_Release/data/noObject/0002', 1024, transforms)

training_loader = data.DataLoader(
    training_dataset, batch_size=1, num_workers=0, shuffle=False, pin_memory=True
)
validation_loader = data.DataLoader(
    validation_dataset, batch_size=1, num_workers=0, shuffle=False, pin_memory=True
)

# Prepare model

In [10]:
hand_model = HandModel()
hand_model.to(device)

HandModel(
  (encoder): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): ModuleList(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU()
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
      )
      (1): Block(
        (norm1): LayerNorm((768,)

# Loss function

In [11]:
loss_fn = torch.nn.MSELoss()

# Optimizer

In [12]:
# Adam
optimizer = torch.optim.Adam(
    [
        {'params': hand_model.encoder.parameters()},
        {'params': hand_model.head.parameters(), 'lr': 1e-4}
    ], lr=1e-6
)

# Training Loop

## One Epoch

In [13]:
def train_one_epoch(model, training_loader, optimizer, device):
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(training_loader):
        # Every data instance is an input + label pair
        for k, v in data.items():
            data[k] = v.to(device)
        inputs = data["image"]
        y_true = data["coords"]
        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, y_true)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
        if i % 8 == 7:
            last_loss = running_loss / 20 # loss per batch
            print('  batch {} loss: {}'.format(i + 1, last_loss))

    return last_loss

# Training over epochs

In [14]:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
epoch_number = 0
EPOCHS = 5

best_vloss = 1_000_000.

for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch_number + 1))

    # Make sure gradient tracking is on, and do a pass over the data
    hand_model.train(True)
    avg_loss = train_one_epoch(hand_model, training_loader, optimizer, device)

    # We don't need gradients on to do reporting
    hand_model.train(False)

    running_vloss = 0.0
    i = 0
    for vdata in validation_loader:
        vinputs, vlabels = vdata
        voutputs = hand_model(vinputs)
        vloss = loss_fn(voutputs, vlabels)
        running_vloss += vloss
        i += 1

    avg_vloss = running_vloss / (i + 1)
    print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))

    # Log the running loss averaged per batch
    # for both training and validation

    # Track best performance, and save the model's state
    if avg_vloss < best_vloss:
        best_vloss = avg_vloss
        model_path = 'model_{}_{}'.format(timestamp, epoch_number)
        torch.save(hand_model.state_dict(), model_path)

    epoch_number += 1

EPOCH 1:


RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 3.95 GiB total capacity; 2.95 GiB already allocated; 29.81 MiB free; 3.04 GiB reserved in total by PyTorch)