In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from clearml import Task, Logger, Dataset
import os
import os.path as op
import argparse
import random
import logging
import torch
import time
import multiprocessing

import numpy as np
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from torchvision import transforms
from torch.utils.data import DataLoader
from pathlib import Path

from utils import __balance_val_split, __split_of_train_sequence, __log_class_statistics
from datasets import CzechSLRDataset, SLREmbeddingDataset, collate_fn_triplet_padd, collate_fn_padd
from spoter import SPOTER, SPOTER_EMBEDDINGS, train_epoch, evaluate, evaluate_top_k, train_epoch_embedding, \
                    evaluate_embedding, embeddings_scatter_plot, GaussianNoise, BatchAllTripletLoss, \
                    train_epoch_embedding_online

# Training Loop

In [3]:
experiment_name = "embedding_scheduler_test"
epochs = 5
lr = 0.001
dataset_project = "Sign Language Recognition"
dataset_name = "wlasl"
training_set_path = "WLASL100_train_25fps.csv"
validation_set_path = "WLASL100_val_25fps.csv"
embedding_model = 1
vector_length = 32
epoch_iters = 100
scheduler_factor = 0.5
num_classes = 100
hidden_dim = 108
batch_size = 25
hard_triplet_mining = "in_batch"

gaussian_mean = 0
gaussian_std = 0.001

In [4]:
device = torch.device("cpu")
if torch.cuda.is_available():
    device = torch.device("cuda")
print('Device: ', device)

Device:  cuda


In [5]:
slrt_model = SPOTER_EMBEDDINGS(features=vector_length, hidden_dim=hidden_dim)
model_type = 'embed'
slrt_model.to(device)

SPOTER_EMBEDDINGS(
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=108, out_features=108, bias=True)
          )
          (linear1): Linear(in_features=108, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=108, bias=True)
          (norm1): LayerNorm((108,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((108,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
        (1): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=108, out_features=108, bias=True)
          )
          (linear1): Linear(in_featur

In [6]:
# cel_criterion = nn.TripletMarginLoss(margin=1.0, p=2)
cel_criterion = BatchAllTripletLoss(device, margin=1, filter_easy_triplets=False)
sgd_optimizer = optim.SGD(slrt_model.parameters(), lr=lr)
scheduler = None

In [7]:
# Training set
transform = transforms.Compose([GaussianNoise(gaussian_mean, gaussian_std)])
dataset_folder = Dataset.get(dataset_project=dataset_project, dataset_name=dataset_name).get_local_copy()
training_set_path = op.join(dataset_folder, training_set_path)

train_val_set = SLREmbeddingDataset(training_set_path, triplet=False)
# Train dataloader for validation
train_val_loader = DataLoader(
    train_val_set,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=collate_fn_padd,
    pin_memory=torch.cuda.is_available(),
    num_workers=multiprocessing.cpu_count()
)

train_set = SLREmbeddingDataset(
    training_set_path,
    triplet=False,
    transform=transform,
    augmentations=True
)

train_loader = DataLoader(
    train_set,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn_padd,
    pin_memory=torch.cuda.is_available(),
    num_workers=multiprocessing.cpu_count()
)

validation_set_path = op.join(dataset_folder, validation_set_path)
val_set = SLREmbeddingDataset(validation_set_path, triplet=False)

val_loader = DataLoader(
    val_set,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=collate_fn_padd,
    pin_memory=torch.cuda.is_available(),
    num_workers=multiprocessing.cpu_count()
)

In [8]:
train_acc, val_acc = 0, 0
losses, train_accs, val_accs = [], [], []
lr_progress = []
top_val_acc = -999
top_model_name = None

In [9]:
for epoch in range(1, 3):
    slrt_model.train(True)
    train_loss, val_silhouette_coef, valid_triplets, used_triplets = train_epoch_embedding_online(
        slrt_model,
        10,
        hard_triplet_mining,
        train_loader,
        val_loader,
        cel_criterion,
        sgd_optimizer,
        device,
        scheduler,
    )
    print(f"train loss {train_loss}")
    print(f"valid triplets {valid_triplets}")
    print(f"num positive losses {num_positive_losses}")
    
    print(f"val acc {val_silhouette_coef}")
    
    
    slrt_model.train(False)
    # calculate acc on train dataset
    silhouette_coefficient_train = evaluate_embedding(slrt_model, train_val_loader, device)
    print(f"train acc {silhouette_coefficient_train}")



train loss 0.8892873308875344
valid triplets 92
num positive losses 92
val acc -0.42441365122795105
train acc -0.3172035217285156
train loss 0.9172803922133013
valid triplets 138
num positive losses 138
val acc -0.432142972946167
train acc -0.3174569606781006


In [10]:
print(rompe)

NameError: name 'rompe' is not defined

In [None]:
tsne_results, labels = embeddings_scatter_plot(slrt_model, val_loader, device, perplexity=40, n_iter=1000)

In [None]:
plt.scatter(
    x=tsne_results[:, 0],
    y=tsne_results[:, 1]
)
plt.show()

# Testing

In [None]:
experiment_name = "no_rotation"  # Name of the experiment after which the logs and plots will be named
num_classes = 100  # Number of classes to be recognized by the model")
hidden_dim = 108  # Hidden dimension of the underlying Transformer model
seed = 379  # Seed with which to initialize all the random components of the training
dataset_name = "wlasl"
dataset_project = "Sign Language Recognition"
testing_set_path = "WLASL100_val_25fps.csv"  # Path to the testing dataset CSV file

In [None]:
# Initialize all the random seeds
random.seed(seed)
np.random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
g = torch.Generator()
g.manual_seed(seed)

# Set device to CUDA only if applicable
device = torch.device("cpu")
if torch.cuda.is_available():
    device = torch.device("cuda")
print('using device:', device)

In [None]:
# Construct the model
slrt_model = SPOTER(num_classes=num_classes, hidden_dim=hidden_dim)
slrt_model.train(False)
slrt_model.to(device)

# Construct the other modules
cel_criterion = nn.CrossEntropyLoss()

In [None]:
# Testing set
dataset_folder = Dataset.get(dataset_project=dataset_project, dataset_name=dataset_name).get_local_copy()
testing_set_path = op.join(dataset_folder, testing_set_path)

eval_set = CzechSLRDataset(testing_set_path)
eval_loader = DataLoader(eval_set, generator=g)

# Find Best Model

In [None]:
top_result, top_result_name = 0, ""
checkpoints_filenames = os.listdir("out-checkpoints/" + experiment_name)

for checkpoint_filename in sorted(checkpoints_filenames):
    tested_model = torch.load("out-checkpoints/" + experiment_name + '/' + checkpoint_filename)
    _, _, eval_acc = evaluate(tested_model, eval_loader, device, print_stats=False)

    if eval_acc > top_result:
        top_result = eval_acc
        top_result_name = experiment_name + '/' + checkpoint_filename

    print(checkpoint_filename + "  ->  " + str(eval_acc))

print("\n" "Best checkpoint: " + top_result_name + ", acc: " + str(top_result))


# Accuracy in top k predictions

In [None]:
k = 5  # Accepted ranking with specified model
best_model_path = "out-checkpoints/no_rotation/checkpoint_v_14.pth"  # Best model to test

In [None]:
tested_model = torch.load(best_model_path)
_, _, eval_acc = evaluate(tested_model, eval_loader, device, print_stats=False)
_, _, eval_acc_k = evaluate_top_k(tested_model, eval_loader, device, k=k)

print(f'Acc k=1: {best_model_path} ->  {str(eval_acc)}')
print(f'Acc k={k}: {best_model_path} ->  {str(eval_acc_k)}')

# Testing Model

In [None]:
tested_model = torch.load(best_model_path)

inputs, labels = next(iter(eval_loader))
inputs = inputs.squeeze(0).to(device)
print(inputs.shape)

labels = labels.to(device, dtype=torch.long)

outputs = tested_model(inputs).expand(1, -1, -1)
print(outputs)
loss = cel_criterion(outputs[0], labels[0]).item()