In [1]:
import torch

In [2]:
print(torch.__version__)

2.2.0+cu121


In [3]:
import json
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import random
from skimage import io, transform, util

In [4]:
import torch.nn as nn
import torchvision.transforms.functional as TF
from torchvision import models
import math
import time
import warnings; warnings.simplefilter('ignore')
import matplotlib.pyplot as plt
import os
from torchvision.models import resnet18

In [5]:
class camera_image_network(torch.nn.Module):
    """
    output size is 512 x 512 x 4
    """
    def __init__(self, orig_resnet, n_classes=9):
        super().__init__()
        self.orig_resnet = orig_resnet
        self.orig_resnet.avgpool = nn.AvgPool2d(kernel_size=(8,8), stride=(4,4))
        self.orig_resnet.fc = nn.Linear(in_features=3*3*512, out_features=1000)
        self.orig_resnet.out = nn.Linear(in_features=1000, out_features=n_classes)

    def forward(self, x):
        # encoder
        x1 = self.orig_resnet.conv1(x) # 64 x 256 x 256
        x2 = self.orig_resnet.bn1(x1) 
        x3 = self.orig_resnet.relu(x2)
        x4 = self.orig_resnet.maxpool(x3) # 64 x 128 x 128
        x5 = self.orig_resnet.layer1(x4) # 64 x 128 x 128
        x6 = self.orig_resnet.layer2(x5) # 128 x 64 x 64
        x7 = self.orig_resnet.layer3(x6) # 256 x 32 x 32
        x8 = self.orig_resnet.layer4(x7) # 512 x 16 x 16
        x9 = self.orig_resnet.avgpool(x8) # 512 x 3 x 3
        x10 = x9.reshape(x9.shape[0], -1) # N x (512 x 3 x 3)
        x11 = self.orig_resnet.relu(self.orig_resnet.fc(x10)) # N x 1000
        x12 = self.orig_resnet.out(x11) # N x 9
        return x12

In [56]:
# Camera network
resnet_camera = models.resnet18(pretrained=False)
# choose resnet of your choice
n_input_channel_camera = 4
resnet_camera.conv1 = torch.nn.Conv2d(n_input_channel_camera, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device =", device)
resnet_enc_dec_concat_more = camera_image_network(resnet_camera) 
model_test_camera = resnet_enc_dec_concat_more.to(device)
    
# load previous model
checkpoint_camera = torch.load("../datset_for_all/navigation_network/best_model/epoch_178val_checkpoint.pth.tar")
model_test_camera.load_state_dict(checkpoint_camera['state_dict'])
model_test_camera = model_test_camera.to(device).eval()

device = cuda:0


In [43]:
# validation accuracy calculation for 5400 images
sz = 20
error = 0
correct = 0
count = 0
csv_path = "./validation_tool_tip_and_targets_label.csv"
labels = pd.read_csv(csv_path, header = None)
for ii in range(5400):
    label = labels.iloc[ii, 0:2].values
    target = labels.iloc[ii, 2:4].values
    input_image = io.imread("../datset_for_all/navigation_network/validation_data/camera_image_{:06d}".format(ii) + ".jpg")
    target_image = np.zeros((512, 512))
    target_image[int(target[1])-sz:int(target[1])+sz+1, int(target[0])-sz:int(target[0])+sz+1] = 1 
    target_image = target_image[:,:,np.newaxis]*255 
    image = np.concatenate((input_image, target_image), axis = 2) / 255.0
    print(image.shape)

    image = TF.to_tensor(image) # C x H x W
    image = image.unsqueeze(0).float().to(device) # N x C x H x W

    outputs = model_test_camera(image)
    outputs = outputs.cpu().detach().numpy()
    outputs = outputs[0]
    argmax_output = np.argmax(outputs)
    print("Predicted result = ", argmax_output)
    v = target - label
    angle = np.arctan2(-v[1], v[0]) * 180 / np.pi
    if angle >= 157.5 and angle <= 180 or angle < -157.5:
        cur_class_label = 0
    else:
        cur_class_label = int((angle + 202.5) / 45)
    if (np.linalg.norm(v) < 2.5):
        cur_class_label = 8
    print("cur_class_label = ", cur_class_label)
    abs_error = abs(cur_class_label - argmax_output)
    if (abs_error == 0):
        correct += 1
    count += 1
    if (cur_class_label == 8 and argmax_output != 8):
        error += 4
    elif (abs_error > 4):
        error += 8 - abs_error
    else:
        error += abs_error
    print("No. ", ii)
    print("currenet mean error = ", error / count)
    print("accuracy = ", correct / count)

(512, 512, 4)
Predicted result =  0
cur_class_label =  0
No.  0
currenet mean error =  0.0
accuracy =  1.0


In [62]:
# test accuracy calculation for 300 x 10 = 3000 targets
sz = 20
error = 0
correct = 0
count = 0
for i in range(9):
    for ii in range(300):
        base_path = "test_label"
        json_path = os.path.join(base_path, "camera_image_{:06d}".format(ii) + ".jpg.json")
        with open(json_path) as json_file:
            data = json.load(json_file)
            label = data['objects'][0]['points']['exterior'][0]
            label = np.array(label)
            if i == -1:
                target = label
            else:
                x = random.randint(-50, 51) + label[0]
                y = random.randint(-50, 51) + label[1]
                if (x > 511):
                    x = 511
                elif (x < 0):
                    x = 0
                if (y > 511):
                    y = 511
                elif (y < 0):
                    y = 0
                target = [x, y]
                target = np.array(target)
        input_image = io.imread("all/camera_image_{:06d}".format(ii) + ".jpg")
        target_image = np.zeros((512, 512))
        target_image[int(target[1])-sz:int(target[1])+sz+1, int(target[0])-sz:int(target[0])+sz+1] = 1 
        target_image = target_image[:,:,np.newaxis]*255 #
        image = np.concatenate((input_image, target_image), axis = 2) / 255.0
        image = TF.to_tensor(image) # C x H x W
        image = image.unsqueeze(0).float().to(device) # N x C x H x W

        outputs = model_test_camera(image)
        outputs = outputs.cpu().detach().numpy()
        outputs = outputs[0]
        argmax_output = np.argmax(outputs)
        print("Predicted result = ", argmax_output)
        v = target - label
        angle = np.arctan2(-v[1], v[0]) * 180 / np.pi
        if angle >= 157.5 and angle <= 180 or angle < -157.5:
            cur_class_label = 0
        else:
            cur_class_label = int((angle + 202.5) / 45)
        if (np.linalg.norm(v) < 2.5):
            cur_class_label = 8
        print("cur_class_label = ", cur_class_label)
        abs_error = abs(cur_class_label - argmax_output)
        if (abs_error == 0):
            correct += 1
        count += 1
        if (cur_class_label == 8 and argmax_output != 8):
            error += 4
        elif (abs_error > 4):
            error += 8 - abs_error
        else:
            error += abs_error
        print("No. ", i * 300 + ii)
        print("currenet mean error = ", error / count)
        print("accuracy = ", correct / count)
for i in range(1):
    for ii in range(300):
        base_path = "test_label"
        json_path = os.path.join(base_path, "camera_image_{:06d}".format(ii) + ".jpg.json")
        with open(json_path) as json_file:
            data = json.load(json_file)
            label = data['objects'][0]['points']['exterior'][0]
            label = np.array(label)
            if i == -1:
                target = label
            else:
                x = label[0]
                y = label[1]
                if (x > 511):
                    x = 511
                elif (x < 0):
                    x = 0
                if (y > 511):
                    y = 511
                elif (y < 0):
                    y = 0
                target = [x, y]
                target = np.array(target)
        input_image = io.imread("all/camera_image_{:06d}".format(ii) + ".jpg")
        target_image = np.zeros((512, 512))
        target_image[int(target[1])-sz:int(target[1])+sz+1, int(target[0])-sz:int(target[0])+sz+1] = 1 
        target_image = target_image[:,:,np.newaxis]*255
        image = np.concatenate((input_image, target_image), axis = 2) / 255.0
        image = TF.to_tensor(image) # C x H x W
        image = image.unsqueeze(0).float().to(device) # N x C x H x W

        outputs = model_test_camera(image)
        outputs = outputs.cpu().detach().numpy()
        outputs = outputs[0]
        argmax_output = np.argmax(outputs)
        print("Predicted result = ", argmax_output)
        v = target - label
        angle = np.arctan2(-v[1], v[0]) * 180 / np.pi
        if angle >= 157.5 and angle <= 180 or angle < -157.5:
            cur_class_label = 0
        else:
            cur_class_label = int((angle + 202.5) / 45)
        if (np.linalg.norm(v) < 2.5):
            cur_class_label = 8
        print("cur_class_label = ", cur_class_label)
        abs_error = abs(cur_class_label - argmax_output)
        if (abs_error == 0):
            correct += 1
        count += 1
        if (cur_class_label == 8 and argmax_output != 8):
            error += 4
        elif (abs_error > 4):
            error += 8 - abs_error
        else:
            error += abs_error
        print("No. ", i * 300 + ii + 2700)
        print("currenet mean error = ", error / count)
        print("accuracy = ", correct / count)

Predicted result =  4
cur_class_label =  4
No.  0
currenet mean error =  0.0
accuracy =  1.0
Predicted result =  7
cur_class_label =  7
No.  1
currenet mean error =  0.0
accuracy =  1.0
Predicted result =  5
cur_class_label =  5
No.  2
currenet mean error =  0.0
accuracy =  1.0
Predicted result =  7
cur_class_label =  7
No.  3
currenet mean error =  0.0
accuracy =  1.0
Predicted result =  4
cur_class_label =  4
No.  4
currenet mean error =  0.0
accuracy =  1.0
Predicted result =  4
cur_class_label =  4
No.  5
currenet mean error =  0.0
accuracy =  1.0
Predicted result =  0
cur_class_label =  0
No.  6
currenet mean error =  0.0
accuracy =  1.0
Predicted result =  5
cur_class_label =  5
No.  7
currenet mean error =  0.0
accuracy =  1.0
Predicted result =  7
cur_class_label =  7
No.  8
currenet mean error =  0.0
accuracy =  1.0
Predicted result =  3
cur_class_label =  3
No.  9
currenet mean error =  0.0
accuracy =  1.0
Predicted result =  5
cur_class_label =  5
No.  10
currenet mean error