In [None]:
'''File to test the results of model in FreiHand dataset - equivelant to test_all_models.py but does it in jupyter'''

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
from torchvision import transforms
import sys
import torch
from torch.utils.data import DataLoader

sys.path.append("../")
from datasets.FreiHAND import FreiHAND, FreiHAND_evaluation,FreiHAND_albu
from models.models import CustomHeatmapsModel,EfficientWaterfall
from utils.testing import batch_epe_calculation,batch_auc_calculation,batch_pck_calculation, show_batch_predictions
from utils.utils import heatmaps_to_coordinates
from config import *



### Config data

In [2]:
config = {
    "data_dir": "/data/wmucha/datasets/FreiHAND",
    "model_path": "/caa/Homes01/wmucha/repos/applied_deep_learning/applied_dl/waterfall_fulldata_scratch9_121",
    "test_batch_size": 1,
    "device": TESTING_DEVICE
}

## Test subset of data

In [3]:
val_img_transform = val_image_transform = transforms.Compose(
                [
                    transforms.Resize(MODEL_IMG_SIZE),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=TRAIN_DATASET_MEANS, std=TRAIN_DATASET_STDS)
                ]
            )

# test_dataset = FreiHAND(config=config, set_type="test", img_transform=val_img_transform)
test_dataset = FreiHAND_albu(config=config, set_type="test", albumetations = ALBUMENTATION_VAL)
final_evaluation = FreiHAND_evaluation(config, img_transform= val_img_transform)


test_dataloader = DataLoader(
    test_dataset,
    config["test_batch_size"],
    shuffle=False,
    drop_last=False,
    num_workers=2,
)

final_evaluation_dataloader = DataLoader(
    final_evaluation,
    config["test_batch_size"],
    shuffle=False,
    drop_last=False,
    num_workers=2,
)



Number of test samples: 13024


## Model

In [4]:
# model = CustomHeatmapsModel(3, 21)
model = EfficientWaterfall(N_KEYPOINTS)
model.load_state_dict(
    torch.load(config["model_path"], map_location=torch.device(config["device"]))
)
model.eval()
print("Model loaded")




Model loaded


## Evaluate on Test Data

In [None]:
def evaluate(model, dataloader, using_heatmaps = True, batch_size = 0):
    accuracy_all = []
    image_id = []
    pred = []
    gt = []
    pck_acc = []
    epe_lst = []
    auc_lst = []
   
    for data in tqdm(dataloader):
        inputs = data["image"]
        pred_heatmaps = model(inputs)
        pred_heatmaps = pred_heatmaps.detach().numpy()
        true_keypoints = (data["keypoints"]).numpy()

        # heatmaps = data["heatmaps"]
        # print(heatmaps.shape)
        # heatmaps.resize(1,21,MODEL_IMG_SIZE,MODEL_IMG_SIZE)
        # print(heatmaps.shape)
        # true_keypoints = heatmaps_to_coordinates(np.array(heatmaps)) * MODEL_IMG_SIZE
       
        # keypoints = sample["keypoints"]
        # true_keypoints = keypoints[0] #* MODEL_IMG_SIZE

        if using_heatmaps == True:
            pred_keypoints = heatmaps_to_coordinates(pred_heatmaps)
        else:
            pred_keypoints = pred_heatmaps.reshape(batch_size,N_KEYPOINTS,2)

        # print(true_keypoints)
        # print(pred_keypoints)
        accuracy_keypoint = ((true_keypoints - pred_keypoints) ** 2).sum(axis=2) ** (1 / 2)
        accuracy_image = accuracy_keypoint.mean(axis=1)
        accuracy_all.extend(list(accuracy_image))

        # Calculate PCK@02
        avg_acc = batch_pck_calculation(pred_keypoints, true_keypoints, treshold = 0.2, mask = None, normalize = None)
        pck_acc.append(avg_acc)

        # Calculate EPE mean and median, mind that it depends on what scale of input keypoints 
        epe = batch_epe_calculation(pred_keypoints, true_keypoints)
        epe_lst.append(epe)

        # AUC calculation
        auc = batch_auc_calculation(pred_keypoints, true_keypoints, num_step=20, mask = None)
        auc_lst.append(auc)
        # break
        if avg_acc < 0.5:
            show_batch_predictions(data, model, epe)

    pck = sum(pck_acc) / len(pck_acc)
    epe_final = sum(epe_lst) / len(epe_lst)
    auc_final = sum(auc_lst) / len(auc_lst)

    lines = [pck,epe_final,auc_final]
    with open('results.txt', 'w') as f:
        f.writelines(lines)
    print (f'PCK@2: {pck}, EPE: {epe_final}, AUC: {auc_final}')
    return accuracy_all, pck

In [None]:
accuracy_all, pck = evaluate(model,test_dataloader)

In [None]:
error = np.mean(accuracy_all)
print("Average error per keypoint: {:.1f}% from image size".format(error * 100))

for img_size in [RAW_IMG_SIZE, MODEL_IMG_SIZE]:
    error_pixels = error * img_size
    image_size = f"{img_size}x{img_size}"
    print(
        "Average error per keypoint: {:.0f} pixels for image {}".format(
            error_pixels, image_size
        )
    )

## Inference on Test Data

In [None]:
for data in test_dataloader:
    show_batch_predictions(data, model)
    break

## Evaluation on final evaluation dataset

In [None]:
accuracy_all, pck = evaluate(model,final_evaluation_dataloader)

In [None]:
error = np.mean(accuracy_all)
print("Average error per keypoint: {:.1f}% from image size".format(error * 100))

for img_size in [RAW_IMG_SIZE, MODEL_IMG_SIZE]:
# for img_size in [1, RAW_IMG_SIZE/MODEL_IMG_SIZE]:
    error_pixels = error * img_size
    image_size = f"{img_size}x{img_size}"
    print(
        "Average error per keypoint: {:.0f} pixels for image {}".format(
            error_pixels, image_size
        )
    )

In [None]:
for data in final_evaluation_dataloader:
    show_batch_predictions(data, model)
    break