In [1]:
import os
import glob
import cv2

from time import sleep

import torch
import torchvision

import matplotlib.pyplot as plt
%matplotlib inline

from dataset import WeizmannHumanActionVideo
from image_autoencoder import ImageAutoEncoder

In [2]:
#  use gpu if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

## Dataset 

In [3]:
"""
trans_data = torchvision.transforms.Compose(
    [torchvision.transforms.ToTensor(),
     torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
"""

trans_data = torchvision.transforms.ToTensor()
trans_label = None

dataset = WeizmannHumanActionVideo(trans_data=None, trans_label=trans_label, train=True)

## Train-test split

In [4]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

In [5]:
print("train: ", len(train_dataset))
print("test: ", len(test_dataset))

train:  74
test:  19


## Training

**Dataloader**

In [6]:
batch_size=1

In [7]:
train_loader = torch.utils.data.DataLoader(train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True, 
                                           num_workers=4)

test_loader = torch.utils.data.DataLoader(test_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True, 
                                           num_workers=1)

In [8]:
for i, (data, label) in enumerate(train_loader):
    print(i,  data.shape, label)

0 torch.Size([1, 48, 3, 96, 96]) tensor([1])
1 torch.Size([1, 64, 3, 96, 96]) tensor([1])
2 torch.Size([1, 61, 3, 96, 96]) tensor([4])
3 torch.Size([1, 65, 3, 96, 96]) tensor([4])
4 torch.Size([1, 36, 3, 96, 96]) tensor([6])
5 torch.Size([1, 56, 3, 96, 96]) tensor([7])
6 torch.Size([1, 54, 3, 96, 96]) tensor([7])
7 torch.Size([1, 54, 3, 96, 96]) tensor([9])
8 torch.Size([1, 28, 3, 96, 96]) tensor([6])
9 torch.Size([1, 88, 3, 96, 96]) tensor([3])
10 torch.Size([1, 54, 3, 96, 96]) tensor([5])
11 torch.Size([1, 39, 3, 96, 96]) tensor([1])
12 torch.Size([1, 67, 3, 96, 96]) tensor([8])
13 torch.Size([1, 62, 3, 96, 96]) tensor([5])
14 torch.Size([1, 55, 3, 96, 96]) tensor([0])
15 torch.Size([1, 146, 3, 96, 96]) tensor([0])
16 torch.Size([1, 47, 3, 96, 96]) tensor([8])
17 torch.Size([1, 114, 3, 96, 96]) tensor([2])
18 torch.Size([1, 60, 3, 96, 96]) tensor([7])
19 torch.Size([1, 48, 3, 96, 96]) tensor([7])
20 torch.Size([1, 70, 3, 96, 96]) tensor([0])
21 torch.Size([1, 82, 3, 96, 96]) tensor([

In [9]:
# data

In [10]:
# type(data)

**Iterative algorithm (SGD)**

In [11]:
n_epochs=10

In [12]:
# create a model from `AE` autoencoder class
# load it to the specified device, either gpu or cpu

model = ImageAutoEncoder(n_channel=3, dim_zm=2, dim_zc=2).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

criterion = torch.nn.MSELoss()

In [13]:
for epoch in range(n_epochs):
    model.train()
    train_loss = 0
    
    for batch_id, (batch_data, _) in enumerate(train_loader):
        print(torch.cuda.memory_allocated(device))
        
        # x: 4D (video_len, channel, height, width)
        x = torch.squeeze(batch_data, dim=0).to(device) 
        
        optimizer.zero_grad()
    
        print(torch.cuda.memory_allocated(device))
        x_hat, zc = model(x) # x_hat: (video_len, channel, height, width),  zc: (video_len, dim_zc)        
        print(torch.cuda.memory_allocated(device))

        # loss = criterion(x_hat, x) + torch.norm(zc.std(dim=0), 2)
        loss = criterion(x_hat, x) + torch.norm(zc.std(dim=0), 2)
        loss.backward() # compute accumulated gradients
        
        # train_loss += loss.item()

        optimizer.step()
                
        print("epoch : {}/{}, batch : {}/{}, loss = {:.6f}".format(
            epoch + 1, n_epochs, batch_id, int(len(train_dataset)/batch_size), loss.item()))
        
        del loss
        del x_hat
        del x        
    # display the epoch training loss
    print("epoch : {}/{}, loss = {:.4f}".format(epoch + 1, n_epochs, train_loss / len(train_loader)))
    # print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader.dataset)))

8673792
14645760
3518821888
epoch : 1/10, batch : 0/74, loss = 0.757574
34936832
40908800
3088010752
epoch : 1/10, batch : 1/74, loss = 0.439633
34936832
40908800
3411451392
epoch : 1/10, batch : 2/74, loss = 0.377576
34936832
42346496
4386195456
epoch : 1/10, batch : 3/74, loss = 0.283066
34937344
40909312
3024748032
epoch : 1/10, batch : 4/74, loss = 0.244832
34936832
40908800
3543955456
epoch : 1/10, batch : 5/74, loss = 0.233834
34936832
38365184
2048748544
epoch : 1/10, batch : 6/74, loss = 0.227067
34936832
39249920
2568700416
epoch : 1/10, batch : 7/74, loss = 0.214250
34936832
42899456
4710548480
epoch : 1/10, batch : 8/74, loss = nan
34937344
40909312
3477203968
epoch : 1/10, batch : 9/74, loss = nan
34936832
40908800
3152310272
epoch : 1/10, batch : 10/74, loss = nan
34936832
44226560
5491292160
epoch : 1/10, batch : 11/74, loss = nan
34937344
39139840
2503007744
epoch : 1/10, batch : 12/74, loss = nan
34936832
41682944
3995145728
epoch : 1/10, batch : 13/74, loss = nan
34936

RuntimeError: CUDA out of memory. Tried to allocate 236.00 MiB (GPU 0; 7.93 GiB total capacity; 6.88 GiB already allocated; 161.06 MiB free; 195.52 MiB cached)