In [1]:
import os
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm
import json

from non_sequential.dataset import ActionDataset, read_action_annotations, parse_coco
from non_sequential.model import ANN
from feature_extractor import resnet50_extractor, pad_and_stack_vectors

In [3]:
annotations_dir = os.path.join(".", "activity_data")
root_dir = os.path.join(".", "activity_data", "actions")
data = parse_coco(annotations_dir, name_only=True, file_name="annotations.json")

In [4]:
actions = read_action_annotations(os.path.join(annotations_dir, "activity_annotations.json"))

### Data augmentation

In [5]:
aug_ann = [] # augmented annotations
for i in range(1, 4):
    with open(os.path.join(root_dir, "..", "augmented", f"aug{i}_annotations.json")) as annotations:
        aug_ann.extend(json.load(annotations))

In [6]:
aug_action_ann = {}
for i in range(1, 4):
    with open(os.path.join(root_dir, "..", "augmented", f"aug{i}_action_annotations.json")) as action_annotations:
        aug_action_ann.update(json.load(action_annotations))

In [7]:
aug_action_ann["aug2_s2_t2_00070.jpg"]

'use screw driver'

In [8]:
aug_data = data + aug_ann
aug_actions = {**actions, **aug_action_ann}

In [9]:
aug_dataset = ActionDataset(
    root_dir, aug_data, aug_actions, resnet50_extractor, pad_and_stack_vectors,
    max_length_distance_vector=16
)

In [10]:
dataloader = DataLoader(aug_dataset, batch_size=4, shuffle=True)

In [12]:
input_size = 858
num_classes = 8
model = ANN(input_size, num_classes)

device = "cpu" #torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 8

In [13]:
for epoch in range(num_epochs):
    running_loss = 0
    # x = feature vectors
    # y = activity
    for i, (x, y) in tqdm(enumerate(dataloader), total=len(dataloader)):
        y = y.to(device)
        x = x.to(device).float()
        activity_oh = F.one_hot(y, num_classes).squeeze(0).float()
        #activity_oh = activity_oh.to(device)
        optimizer.zero_grad()

        out = model(x)
        #print(out)
        loss = criterion(out, activity_oh)
        
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    epoch_loss = running_loss / len(dataloader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

100%|██████████| 279/279 [39:00<00:00,  8.39s/it]


Epoch [1/8], Loss: 1.6403


100%|██████████| 279/279 [35:50<00:00,  7.71s/it]


Epoch [2/8], Loss: 1.5118


100%|██████████| 279/279 [34:55<00:00,  7.51s/it]


Epoch [3/8], Loss: 1.3622


100%|██████████| 279/279 [35:01<00:00,  7.53s/it]


Epoch [4/8], Loss: 1.2730


100%|██████████| 279/279 [34:17<00:00,  7.37s/it]


Epoch [5/8], Loss: 1.2192


100%|██████████| 279/279 [35:25<00:00,  7.62s/it]


Epoch [6/8], Loss: 1.1333


100%|██████████| 279/279 [34:52<00:00,  7.50s/it]


Epoch [7/8], Loss: 1.0702


100%|██████████| 279/279 [36:50<00:00,  7.92s/it]

Epoch [8/8], Loss: 0.9733





In [14]:
with torch.no_grad():
    model.eval()
    for _ in range(1):
        x, y = next(iter(dataloader))
        y = y.to(device)
        x = x.to(device).float()
        x_hats = model(x)
        for x_hat in x_hats:
            print(torch.argmax(x_hat))
        print(y)

tensor(3)
tensor(1)
tensor(1)
tensor(2)
tensor([3, 1, 1, 2])


In [15]:
torch.save(model.state_dict(), "non_sequential_model_params.pth")