### 这部分是模型训练与验证的主要代码

In [1]:
ARTIFACTS_FOLDER = "./artifacts"

In [2]:
%load_ext autoreload

from datetime import datetime
from functools import partial
import glob
from multiprocessing import Pool

# Disable multiprocesing for numpy/opencv. We already multiprocess ourselves, this would mean every subprocess produces
# even more threads which would lead to a lot of context switching, slowing things down a lot.
import os
os.environ["OMP_NUM_THREADS"] = "1"

import matplotlib.pyplot as plt
%matplotlib inline

import torch
import pandas as pd
import cv2
from PIL import Image
import numpy as np
from tqdm import tqdm, tqdm_notebook
import scipy
import scipy.ndimage
import scipy.special
from scipy.spatial.transform import Rotation as R
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data

from lyft_dataset_sdk.lyftdataset import LyftDataset
from lyft_dataset_sdk.utils.data_classes import LidarPointCloud, Box, Quaternion
from lyft_dataset_sdk.utils.geometry_utils import view_points, transform_matrix

In [3]:
def create_transformation_matrix_to_voxel_space(shape, voxel_size, offset):
    """
    Constructs a transformation matrix given an output voxel shape such that (0,0,0) ends up in the center.
    Voxel_size defines how large every voxel is in world coordinate, 
    (1,1,1) would be the same as Minecraft voxels.
    An offset per axis in world coordinates (metric) can be provided, 
    this is useful for Z (up-down) in lidar points.
    No rotation, only a scale and translation.
    """
    
    shape, voxel_size, offset = np.array(shape), np.array(voxel_size), np.array(offset)
    tm = np.eye(4, dtype=np.float32) #(4,4)
    translation = shape/2 + offset/voxel_size #(3,)
    tm = tm * np.array(np.hstack((1/voxel_size, [1]))) # (4,4)
    tm[:3, 3] = np.transpose(translation)
    return tm

def transform_points(points, transf_matrix):
    """
    Transform (3,N) or (4,N) points using transformation matrix.
    """
    if points.shape[0] not in [3,4]:
        raise Exception("Points input should be (3,N) or (4,N) shape, received {}".format(points.shape))
    return transf_matrix.dot(np.vstack((points[:3, :], np.ones(points.shape[1]))))[:3, :]

def car_to_voxel_coords(points, shape, voxel_size, z_offset=0):
    if len(shape) != 3:
        raise Exception("Voxel volume shape should be 3 dimensions (x,y,z)")
        
    if len(points.shape) != 2 or points.shape[0] not in [3, 4]:
        raise Exception("Input points should be (3,N) or (4,N) in shape, found {}".format(points.shape))

    tm = create_transformation_matrix_to_voxel_space(shape, voxel_size, (0, 0, z_offset))
    p = transform_points(points, tm) #(3, N)
    return p

def create_voxel_pointcloud(points, shape, voxel_size=(0.5,0.5,1), z_offset=0):

    points_voxel_coords = car_to_voxel_coords(points.copy(), shape, voxel_size, z_offset) #(3,N)
    points_voxel_coords = points_voxel_coords[:3].transpose(1,0) #(N, 3)
    points_voxel_coords = np.int0(points_voxel_coords)
    bev = np.zeros(shape, dtype=np.float32)
    bev_shape = np.array(shape)
    within_bounds = (np.all(points_voxel_coords >= 0, axis=1) * np.all(
        points_voxel_coords < bev_shape, axis=1))
    points_voxel_coords = points_voxel_coords[within_bounds]
    coord, count = np.unique(points_voxel_coords, axis=0, return_counts=True)
    # Note X and Y are flipped:
    bev[coord[:,1], coord[:,0], coord[:,2]] = count
    
    return bev

def normalize_voxel_intensities(bev, max_intensity=16):
    return (bev/max_intensity).clip(0,1)

def move_boxes_to_car_space(boxes, ego_pose):
    """
    Move boxes from world space to car space.
    Note: mutates input boxes.
    """
    translation = -np.array(ego_pose['translation'])
    rotation = Quaternion(ego_pose['rotation']).inverse
    for box in boxes:
        # Bring box to car space
        box.translate(translation)
        box.rotate(rotation)
        
def scale_boxes(boxes, factor):
    """
    Note: mutates input boxes
    """
    for box in boxes:
        box.wlh = box.wlh * factor

def draw_boxes(im, voxel_size, boxes, classes, z_offset=0.0):
    for box in boxes:
        # We only care about the bottom corners
        corners = box.bottom_corners()
        corners_voxel = car_to_voxel_coords(corners, im.shape, voxel_size, z_offset).transpose(1,0)
        corners_voxel = corners_voxel[:,:2] # Drop z coord
        class_color = classes.index(box.name) + 1
        if class_color == 0:
            raise Exception("Unknown class: {}".format(box.name))
        cv2.drawContours(im, np.int0([corners_voxel]), 0, (class_color, class_color, class_color), -1)

In [4]:
class BEVImageDataset(torch.utils.data.Dataset):
    def __init__(self, input_filepaths, target_filepaths, map_filepaths=None):
        self.input_filepaths = input_filepaths
        self.target_filepaths = target_filepaths
        self.map_filepaths = map_filepaths
        
        if map_filepaths is not None:
            assert len(input_filepaths) == len(map_filepaths)        
        assert len(input_filepaths) == len(target_filepaths)

    def __len__(self):
        return len(self.input_filepaths)

    def __getitem__(self, idx):
        input_filepath = self.input_filepaths[idx]
        target_filepath = self.target_filepaths[idx]        
        sample_token = input_filepath.split("/")[-1].replace("_input.png","")        
        im = cv2.imread(input_filepath, cv2.IMREAD_UNCHANGED)        
        if self.map_filepaths:
            map_filepath = self.map_filepaths[idx]
            map_im = cv2.imread(map_filepath, cv2.IMREAD_UNCHANGED)
            im = np.concatenate((im, map_im), axis=2)        
        target = cv2.imread(target_filepath, cv2.IMREAD_UNCHANGED)        
        im = im.astype(np.float32)/255
        target = target.astype(np.int64)        
        im = torch.from_numpy(im.transpose(2,0,1))
        target = torch.from_numpy(target)      
        return im, target, sample_token
    
class UNet(nn.Module):
    def __init__(
        self,
        in_channels=1,
        n_classes=2,
        depth=5,
        wf=6,
        padding=False,
        batch_norm=False,
        up_mode='upconv',
    ):
        super(UNet, self).__init__()
        assert up_mode in ('upconv', 'upsample')
        self.padding = padding
        self.depth = depth
        prev_channels = in_channels
        self.down_path = nn.ModuleList()
        for i in range(depth):
            self.down_path.append(
                UNetConvBlock(prev_channels, 2 ** (wf + i), padding, batch_norm)
            )
            prev_channels = 2 ** (wf + i)

        self.up_path = nn.ModuleList()
        self.dropout = torch.nn.Dropout(0.5)
        
        for i in reversed(range(depth - 1)):
            self.up_path.append(
                UNetUpBlock(prev_channels, 2 ** (wf + i), up_mode, padding, batch_norm)
            )
            prev_channels = 2 ** (wf + i)

        self.last = nn.Conv2d(prev_channels, n_classes, kernel_size=1) # dropout is absent

    def forward(self, x):
        blocks = []
        for i, down in enumerate(self.down_path):
            x = down(x)
            if i == len(self.down_path) - 1:
                x = self.dropout(x)
                continue
            elif i == len(self.down_path) - 2:
                x = self.dropout(x)
                blocks.append(x)
                x = F.max_pool2d(x, 2)
            else:
                blocks.append(x)
                x = F.max_pool2d(x, 2)

        for i, up in enumerate(self.up_path):
            x = up(x, blocks[-i - 1])

        return self.last(x)

class UNetConvBlock(nn.Module):
    def __init__(self, in_size, out_size, padding, batch_norm):
        super(UNetConvBlock, self).__init__()
        block = []

        block.append(nn.Conv2d(in_size, out_size, kernel_size=3, padding=int(padding)))
        block.append(nn.ReLU())
        if batch_norm:
            block.append(nn.BatchNorm2d(out_size))

        block.append(nn.Conv2d(out_size, out_size, kernel_size=3, padding=int(padding)))
        block.append(nn.ReLU())
        if batch_norm:
            block.append(nn.BatchNorm2d(out_size))

        self.block = nn.Sequential(*block)

    def forward(self, x):
        out = self.block(x)
        return out

class UNetUpBlock(nn.Module):
    def __init__(self, in_size, out_size, up_mode, padding, batch_norm):
        super(UNetUpBlock, self).__init__()
        if up_mode == 'upconv':
            self.up = nn.ConvTranspose2d(in_size, out_size, kernel_size=2, stride=2)
        elif up_mode == 'upsample':
            self.up = nn.Sequential(
                nn.Upsample(mode='bilinear', scale_factor=2),
                nn.Conv2d(in_size, out_size, kernel_size=1),
            )

        self.conv_block = UNetConvBlock(in_size, out_size, padding, batch_norm)

    def center_crop(self, layer, target_size):
        _, _, layer_height, layer_width = layer.size()
        diff_y = (layer_height - target_size[0]) // 2
        diff_x = (layer_width - target_size[1]) // 2
        return layer[
            :, :, diff_y : (diff_y + target_size[0]), diff_x : (diff_x + target_size[1])
        ]

    def forward(self, x, bridge):
        up = self.up(x)
        crop1 = self.center_crop(bridge, up.shape[2:])
        out = torch.cat([up, crop1], 1)
        out = self.conv_block(out)
        return out
    


In [5]:
## ResUnet Model
def init_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        m.weight.data.normal_(0, 1e-3)
        if m.bias is not None:
            m.bias.data.zero_()
    elif isinstance(m, nn.ConvTranspose2d):
        m.weight.data.normal_(0, 1e-3)
        if m.bias is not None:
            m.bias.data.zero_()
    elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1)
        m.bias.data.zero_()

def conv_bn_relu(in_channels, out_channels, kernel_size, \
        stride=1, padding=0, bn=True, relu=True):
    bias = not bn
    layers = []
    layers.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride,
        padding, bias=bias))
    if bn:
        layers.append(nn.BatchNorm2d(out_channels))
    if relu:
        layers.append(nn.LeakyReLU(0.2, inplace=True))
    layers = nn.Sequential(*layers)

    # initialize the weights
    for m in layers.modules():
        init_weights(m)

    return layers

def convt_bn_relu(in_channels, out_channels, kernel_size, \
        stride=1, padding=0, output_padding=0, bn=True, relu=True):
    bias = not bn
    layers = []
    layers.append(nn.ConvTranspose2d(in_channels, out_channels, kernel_size,
        stride, padding, output_padding, bias=bias))
    if bn:
        layers.append(nn.BatchNorm2d(out_channels))
    if relu:
        layers.append(nn.LeakyReLU(0.2, inplace=True))
    layers = nn.Sequential(*layers)

    # initialize the weights
    for m in layers.modules():
        init_weights(m)

    return layers

class DepthCompletionNet(nn.Module):
    def __init__(self, num_out_channels, res_layers=34, pretrained=False):
        assert (res_layers in [18, 34, 50, 101, 152]
               ),'Only layers 18, 34, 50, 101, and 152 are defined, but got {}'.format(res_layers)
        super(DepthCompletionNet, self).__init__()
        
        channels = 64
        self.conv1_img = conv_bn_relu(3, channels, kernel_size=3, stride=1, padding=1)
        pretrained_model = resnet.__dict__['resnet{}'.format(res_layers)](pretrained=pretrained)
        if not pretrained:
            pretrained_model.apply(init_weights)
        #self.maxpool = pretrained_model._modules['maxpool']
        self.conv2 = pretrained_model._modules['layer1']
        self.conv3 = pretrained_model._modules['layer2']
        self.conv4 = pretrained_model._modules['layer3']
        self.conv5 = pretrained_model._modules['layer4']
        del pretrained_model # clear memory

        # define number of intermediate channels
        if res_layers <= 34:
            num_channels = 512
        elif res_layers >= 50:
            num_channels = 2048
        self.conv6 = conv_bn_relu(num_channels, 512, kernel_size=3, stride=2, padding=1)

        # decoding layers
        kernel_size = 3
        stride = 2
        self.convt5 = convt_bn_relu(in_channels=512, out_channels=256,
            kernel_size=kernel_size, stride=stride, padding=1, output_padding=1)
        self.convt4 = convt_bn_relu(in_channels=768, out_channels=128,
            kernel_size=kernel_size, stride=stride, padding=1, output_padding=1)
        self.convt3 = convt_bn_relu(in_channels=(256+128), out_channels=64,
            kernel_size=kernel_size, stride=stride, padding=1, output_padding=1)
        self.convt2 = convt_bn_relu(in_channels=(128+64), out_channels=64,
            kernel_size=kernel_size, stride=stride, padding=1, output_padding=1)
        self.convt1 = convt_bn_relu(in_channels=128, out_channels=64,
            kernel_size=kernel_size, stride=1, padding=1)
        self.convtf = conv_bn_relu(in_channels=128, out_channels=num_out_channels, kernel_size=1, 
                                   stride=1, bn=False, relu=False)

    def forward(self, x):
        # first layer
        conv1 = self.conv1_img(x)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2) # batchsize * ? * 176 * 608
        conv4 = self.conv4(conv3) # batchsize * ? * 88 * 304
        conv5 = self.conv5(conv4) # batchsize * ? * 44 * 152
        conv6 = self.conv6(conv5) # batchsize * ? * 22 * 76

        # decoder
        convt5 = self.convt5(conv6)
        y = torch.cat((convt5, conv5), 1)

        convt4 = self.convt4(y)
        y = torch.cat((convt4, conv4), 1)

        convt3 = self.convt3(y)
        y = torch.cat((convt3, conv3), 1)

        convt2 = self.convt2(y)
        y = torch.cat((convt2, conv2), 1)

        convt1 = self.convt1(y)
        y = torch.cat((convt1,conv1), 1)

        y = self.convtf(y)
        
        return y

## Load Data

In [6]:
classes = ["car", "motorcycle", "bus", "bicycle", "truck", "pedestrian", 
           "other_vehicle", "animal", "emergency_vehicle"]
#train_data_folder = os.path.join(ARTIFACTS_FOLDER, "bev_train_data")
#input_filepaths = sorted(glob.glob(os.path.join(train_data_folder, "*_input.png")))
#target_filepaths = sorted(glob.glob(os.path.join(train_data_folder, "*_target.png")))
#map_filepaths = sorted(glob.glob(os.path.join(train_data_folder, "*_map.png")))
#train_dataset = BEVImageDataset(input_filepaths, target_filepaths)
#trainloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers=os.cpu_count())
batch_size = 8
validation_data_folder = os.path.join(ARTIFACTS_FOLDER, "./bev_validation_data")
val_input_filepaths = sorted(glob.glob(os.path.join(validation_data_folder, "*_input.png")))
val_target_filepaths = sorted(glob.glob(os.path.join(validation_data_folder, "*_target.png")))
#map_filepaths = sorted(glob.glob(os.path.join(train_data_folder, "*_map.png")))
val_dataset = BEVImageDataset(val_input_filepaths, val_target_filepaths)
valloader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=True, num_workers=os.cpu_count())

## HyperParameters

In [7]:
# Some hyperparameters we'll need to define for the system
voxel_size = (0.3, 0.3, 1.0)
z_offset = -2.0
bev_shape = (496, 496, 3)
# We scale down each box so they are more separated when projected into our coarse voxel space.
box_scale = 0.9
epochs = 15
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
print(torch.cuda.is_available())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class_weights = torch.from_numpy(np.array([0.2] + [1.0]*len(classes), dtype=np.float32))
class_weights = class_weights.to(device)

True


In [8]:
def get_unet_model(in_channels=6, num_output_classes=2):
    model = UNet(in_channels=in_channels, n_classes=num_output_classes, wf=6, depth=5, padding=True, 
                 up_mode='upsample')
    return model

## mAP on Val
### 计算模型在验证集上的mAP
1. 确定预测目标框
2. 利用mAP_evaluation.py计算mAP

In [9]:
## 读入初始点云数据,后面对语义分割预测结果进行后处理时,需要用到车身的角度等信息
level5data = LyftDataset(data_path='.', json_path='/home/bob/data/lyft_data/train_data', verbose=True)

9 category,
18 attribute,
4 visibility,
15991 instance,
8 sensor,
128 calibrated_sensor,
149072 ego_pose,
148 log,
148 scene,
18634 sample,
149072 sample_data,
539765 sample_annotation,
1 map,
Done loading in 6.4 seconds.
Reverse indexing ...
Done reverse indexing in 2.5 seconds.


In [10]:
import json
import gc
from lyft_dataset_sdk.eval.detection.mAP_evaluation import Box3D, recall_precision
from torchvision.models import resnet

def val_map(reslayers, resolution, epoch):
    ## load model
    model = DepthCompletionNet(num_out_channels=len(classes)+1, res_layers=reslayers)
    model = model.to(device)
    checkpoint_filename = "resnet{}_{}/resnet_epoch_{}.pth".format(reslayers, resolution, epoch)
    checkpoint_filepath = os.path.join(ARTIFACTS_FOLDER, checkpoint_filename)
    model.load_state_dict(torch.load(checkpoint_filepath))
    
    gc.collect()
    progress_bar = tqdm_notebook(valloader)
    targets = np.zeros((len(val_target_filepaths), bev_shape[0], bev_shape[1]), dtype=np.uint8)
    # We quantize to uint8 here to conserve memory. We're allocating >20GB of memory otherwise.
    predictions = np.zeros((len(val_target_filepaths), 1+len(classes), bev_shape[0], 
                            bev_shape[1]), dtype=np.uint8)
    sample_tokens = []
    
    ## predict
    with torch.no_grad():
        model.eval()
        for ii, (X, target, batch_sample_tokens) in enumerate(progress_bar):
            offset = ii*batch_size
            targets[offset:offset+batch_size] = target.numpy()
            sample_tokens.extend(batch_sample_tokens)
            X = X.to(device)  # [N, 1, H, W]
            target = target.to(device)  # [N, H, W] with class indices (0, 1)
            prediction = model(X)  # [N, 2, H, W]
            prediction = F.softmax(prediction, dim=1)
            prediction_cpu = prediction.cpu().numpy()
            predictions[offset:offset+batch_size] = np.round(prediction_cpu*255).astype(np.uint8)
    predictions_non_class0 = 255 - predictions[:,0]   
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))
    predictions_opened = np.zeros((predictions_non_class0.shape), dtype=np.uint8)
    background_threshold = 255//2
    for i, p in enumerate(tqdm(predictions_non_class0)):
        thresholded_p = (p > background_threshold).astype(np.uint8)
        predictions_opened[i] = cv2.morphologyEx(thresholded_p, cv2.MORPH_OPEN, kernel)
    
    ## 2d detection
    detection_boxes = []
    detection_scores = []
    detection_classes = []

    for i in tqdm_notebook(range(len(predictions))):
        prediction_opened = predictions_opened[i]
        probability_non_class0 = predictions_non_class0[i]
        class_probability = predictions[i]
        sample_boxes = []
        sample_detection_scores = []
        sample_detection_classes = []
        contours, hierarchy = cv2.findContours(prediction_opened, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)     
        for cnt in contours:
            rect = cv2.minAreaRect(cnt)
            box = cv2.boxPoints(rect)        
            # Let's take the center pixel value as the confidence value
            box_center_index = np.int0(np.mean(box, axis=0))        
            for class_index in range(len(classes)):
                box_center_value = class_probability[class_index+1, box_center_index[1], box_center_index[0]]            
                # Let's remove candidates with very low probability
                if box_center_value < 0.01:
                    continue            
                box_center_class = classes[class_index]
                box_detection_score = box_center_value
                sample_detection_classes.append(box_center_class)
                sample_detection_scores.append(box_detection_score)
                sample_boxes.append(box)            
        detection_boxes.append(np.array(sample_boxes))
        detection_scores.append(sample_detection_scores)
        detection_classes.append(sample_detection_classes)
    
    ## 3d detection
    pred_box3ds = []
    for (sample_token, sample_boxes, sample_detection_scores, sample_detection_class) in tqdm_notebook(
            zip(sample_tokens, detection_boxes, detection_scores, 
            detection_classes), total=len(sample_tokens)):
        sample_boxes = sample_boxes.reshape(-1, 2) # (N, 4, 2) -> (N*4, 2)
        sample_boxes = sample_boxes.transpose(1,0) # (N*4, 2) -> (2, N*4)
        # Add Z dimension
        sample_boxes = np.vstack((sample_boxes, np.zeros(sample_boxes.shape[1]),)) # (2, N*4) -> (3, N*4)
        sample = level5data.get("sample", sample_token)
        sample_lidar_token = sample["data"]["LIDAR_TOP"]
        lidar_data = level5data.get("sample_data", sample_lidar_token)
        lidar_filepath = level5data.get_sample_data_path(sample_lidar_token)
        ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"])
        ego_translation = np.array(ego_pose['translation'])
        global_from_car = transform_matrix(ego_pose['translation'],
                                       Quaternion(ego_pose['rotation']), inverse=False)
        car_from_voxel = np.linalg.inv(create_transformation_matrix_to_voxel_space(
                                        bev_shape, voxel_size, (0, 0, z_offset)))
        global_from_voxel = np.dot(global_from_car, car_from_voxel)
        sample_boxes = transform_points(sample_boxes, global_from_voxel)
        sample_boxes[2,:] = ego_pose["translation"][2]
        # (3, N*4) -> (N, 4, 3)
        sample_boxes = sample_boxes.transpose(1,0).reshape(-1, 4, 3)
        # We don't know the height of our boxes, let's assume every object is the same height.
        box_height = 1.75
        sample_boxes_centers = sample_boxes.mean(axis=1)
        sample_boxes_centers[:,2] += box_height/2    
        sample_lengths = np.linalg.norm(sample_boxes[:,0,:] - sample_boxes[:,1,:], axis=1) * 1/box_scale
        sample_widths = np.linalg.norm(sample_boxes[:,1,:] - sample_boxes[:,2,:], axis=1) * 1/box_scale    
        sample_boxes_dimensions = np.zeros_like(sample_boxes_centers) 
        sample_boxes_dimensions[:,0] = sample_widths
        sample_boxes_dimensions[:,1] = sample_lengths
        sample_boxes_dimensions[:,2] = box_height
        for i in range(len(sample_boxes)):
            translation = sample_boxes_centers[i]
            size = sample_boxes_dimensions[i]
            class_name = sample_detection_class[i]
            ego_distance = float(np.linalg.norm(ego_translation - translation))          
            # Determine the rotation of the box
            v = (sample_boxes[i,0] - sample_boxes[i,1])
            v /= np.linalg.norm(v)
            r = R.from_dcm([ # initialization from direction cosine matrices
                [v[0], -v[1], 0],
                [v[1],  v[0], 0],
                [   0,     0, 1],
            ])
            quat = r.as_quat() # as quaternions
            # XYZW -> WXYZ order of elements
            quat = quat[[3,0,1,2]]        
            detection_score = float(sample_detection_scores[i])        
            box3d = Box3D(
                sample_token=sample_token,
                translation=list(translation),
                size=list(size),
                rotation=list(quat),
                name=class_name,
                score=detection_score
            )
            pred_box3ds.append(box3d)
    
    ## mAP calc
    pred = [b.serialize() for b in pred_box3ds]
    os.makedirs(os.path.join(ARTIFACTS_FOLDER, "json/resnet{}_{}".format(reslayers, 
            resolution)), exist_ok=True)
    with open(os.path.join(ARTIFACTS_FOLDER, "json/resnet{}_{}/pred_{}.json".format(reslayers, 
            resolution, epoch)), "w") as f:
        json.dump(pred, f)
    

In [12]:
val_map(34, 496, 17)
val_map(34, 496, 14)
val_map(34, 496, 15)

HBox(children=(IntProgress(value=0, max=630), HTML(value='')))




100%|██████████| 5040/5040 [00:01<00:00, 4307.75it/s]


HBox(children=(IntProgress(value=0, max=5040), HTML(value='')))




HBox(children=(IntProgress(value=0, max=5040), HTML(value='')))




HBox(children=(IntProgress(value=0, max=630), HTML(value='')))




100%|██████████| 5040/5040 [00:01<00:00, 4282.85it/s]


HBox(children=(IntProgress(value=0, max=5040), HTML(value='')))




HBox(children=(IntProgress(value=0, max=5040), HTML(value='')))




HBox(children=(IntProgress(value=0, max=630), HTML(value='')))




100%|██████████| 5040/5040 [00:01<00:00, 4396.35it/s]


HBox(children=(IntProgress(value=0, max=5040), HTML(value='')))




HBox(children=(IntProgress(value=0, max=5040), HTML(value='')))




In [13]:
gc.collect()

52

In [26]:
!python ./eval/mAP_evaluation.py --pred_file './artifacts/json/resnet34_496/pred_5.json' --gt "./artifacts/gt.json" --iou_threshold 0.4

Class_names =  ['animal', 'bicycle', 'bus', 'car', 'motorcycle', 'other_vehicle', 'pedestrian', 'truck']
Average per class mean average precision =  0.11975817030258928
('animal', 0.0)
('bicycle', 0.039912937576430364)
('bus', 0.10288388972858527)
('car', 0.5008256375777733)
('motorcycle', 0.029779762104956333)
('other_vehicle', 0.2468314778460151)
('pedestrian', 0.017650038300566957)
('truck', 0.020181619286386863)


In [27]:
!python ./eval/mAP_evaluation.py --pred_file './artifacts/json/resnet34_496/pred_6.json' --gt "./artifacts/gt.json" --iou_threshold 0.4

Class_names =  ['animal', 'bicycle', 'bus', 'car', 'motorcycle', 'other_vehicle', 'pedestrian', 'truck']
Average per class mean average precision =  0.12389716664967979
('animal', 0.0)
('bicycle', 0.03157909324444978)
('bus', 0.09410808625968548)
('car', 0.5227501821457663)
('motorcycle', 0.02335367550589498)
('other_vehicle', 0.28597740843624136)
('pedestrian', 0.012112069587737893)
('truck', 0.021296818017662526)


In [29]:
!python ./eval/mAP_evaluation.py --pred_file './artifacts/json/resnet34_496/pred_7.json' --gt "./artifacts/gt.json" --iou_threshold 0.4

Class_names =  ['animal', 'bicycle', 'bus', 'car', 'motorcycle', 'other_vehicle', 'pedestrian', 'truck']
Average per class mean average precision =  0.11731166124413105
('animal', 0.0)
('bicycle', 0.028445990272446327)
('bus', 0.09677346727455668)
('car', 0.5033989492688048)
('motorcycle', 0.016592293827658637)
('other_vehicle', 0.2644164046445354)
('pedestrian', 0.008997785890750697)
('truck', 0.019868398774295776)


In [30]:
!python ./eval/mAP_evaluation.py --pred_file './artifacts/json/resnet34_496/pred_8.json' --gt "./artifacts/gt.json" --iou_threshold 0.4

Class_names =  ['animal', 'bicycle', 'bus', 'car', 'motorcycle', 'other_vehicle', 'pedestrian', 'truck']
Average per class mean average precision =  0.12073994251500897
('animal', 0.0)
('bicycle', 0.027183279435158022)
('bus', 0.09695469931046397)
('car', 0.5094287971235998)
('motorcycle', 0.013829826009779321)
('other_vehicle', 0.28883483653176995)
('pedestrian', 0.009378773083396295)
('truck', 0.0203093286259044)


In [33]:
!python ./eval/mAP_evaluation.py --pred_file './artifacts/json/resnet34_496/pred_10.json' --gt "./artifacts/gt.json" --iou_threshold 0.4

Class_names =  ['animal', 'bicycle', 'bus', 'car', 'motorcycle', 'other_vehicle', 'pedestrian', 'truck']
Average per class mean average precision =  0.11977386558904493
('animal', 0.0)
('bicycle', 0.032493030542954035)
('bus', 0.09590759105511631)
('car', 0.5040280271581521)
('motorcycle', 0.017063569849596217)
('other_vehicle', 0.2762636129029292)
('pedestrian', 0.010173404213630645)
('truck', 0.022261688989980856)


In [34]:
!python ./eval/mAP_evaluation.py --pred_file './artifacts/json/resnet34_496/pred_11.json' --gt "./artifacts/gt.json" --iou_threshold 0.4

Class_names =  ['animal', 'bicycle', 'bus', 'car', 'motorcycle', 'other_vehicle', 'pedestrian', 'truck']
Average per class mean average precision =  0.11959670156728441
('animal', 0.0)
('bicycle', 0.02884378216006104)
('bus', 0.09204634568923813)
('car', 0.5020172672771814)
('motorcycle', 0.018747390621877302)
('other_vehicle', 0.28336968843247246)
('pedestrian', 0.008571127518510194)
('truck', 0.0231780108389349)


In [14]:
!python ./eval/mAP_evaluation.py --pred_file './artifacts/json/resnet34_496/pred_17.json' --gt "./artifacts/gt.json" --iou_threshold 0.4

Class_names =  ['animal', 'bicycle', 'bus', 'car', 'motorcycle', 'other_vehicle', 'pedestrian', 'truck']
Average per class mean average precision =  0.11939690157026486
('animal', 0.0)
('bicycle', 0.030799605272083856)
('bus', 0.08972601582707765)
('car', 0.49994655364088725)
('motorcycle', 0.017197650275445852)
('other_vehicle', 0.2875608951717036)
('pedestrian', 0.0076383398205137065)
('truck', 0.022306152554407092)


In [15]:
!python ./eval/mAP_evaluation.py --pred_file './artifacts/json/resnet34_496/pred_14.json' --gt "./artifacts/gt.json" --iou_threshold 0.4

Class_names =  ['animal', 'bicycle', 'bus', 'car', 'motorcycle', 'other_vehicle', 'pedestrian', 'truck']
Average per class mean average precision =  0.12239122997635343
('animal', 0.0)
('bicycle', 0.03502736081936683)
('bus', 0.09732330180033058)
('car', 0.5002790922148279)
('motorcycle', 0.028800744038881387)
('other_vehicle', 0.28735357619282836)
('pedestrian', 0.008578418287515543)
('truck', 0.02176734645707686)


In [16]:
!python ./eval/mAP_evaluation.py --pred_file './artifacts/json/resnet34_496/pred_15.json' --gt "./artifacts/gt.json" --iou_threshold 0.4

Class_names =  ['animal', 'bicycle', 'bus', 'car', 'motorcycle', 'other_vehicle', 'pedestrian', 'truck']
Average per class mean average precision =  0.11983471488778014
('animal', 0.0)
('bicycle', 0.03284659046339944)
('bus', 0.09001153702322082)
('car', 0.5021283554507802)
('motorcycle', 0.017105834196256494)
('other_vehicle', 0.28638917597916713)
('pedestrian', 0.008422013272659592)
('truck', 0.02177421271675741)
