In [1]:
!git clone https://github.com/uonat/SS2023_DI-Lab_Precitaste.git

Cloning into 'SS2023_DI-Lab_Precitaste'...
remote: Enumerating objects: 261, done.[K
remote: Counting objects: 100% (261/261), done.[K
remote: Compressing objects: 100% (195/195), done.[K
remote: Total 261 (delta 88), reused 212 (delta 53), pack-reused 0[K
Receiving objects: 100% (261/261), 23.53 MiB | 34.52 MiB/s, done.
Resolving deltas: 100% (88/88), done.


In [2]:
%cd '/kaggle/working/SS2023_DI-Lab_Precitaste'

/kaggle/working/SS2023_DI-Lab_Precitaste


In [3]:
!git fetch

In [4]:
!git branch -v -a

* [32mmain                              [m 2df23e7 Bug fix
  [31mremotes/origin/CLIP-implementation[m 134fe54 Updated CLIP Notebook
  [31mremotes/origin/HEAD               [m -> origin/main
  [31mremotes/origin/dinov2             [m 4859060 Added second return parameter for eliminate_boxes post process
  [31mremotes/origin/main               [m 2df23e7 Bug fix
  [31mremotes/origin/yushan             [m 94fc5af Update: different TEXT_PROMPT for RPC datasets


In [5]:
!git switch dinov2

Branch 'dinov2' set up to track remote branch 'dinov2' from 'origin'.
Switched to a new branch 'dinov2'


In [6]:
# Dataset paths
rpc_main_path = '/kaggle/input/retail-product-checkout-dataset'
knn_train_features_path = '/kaggle/input/idp-repo/rpc-train-dinov2-feat/rpc-train-dinov2-feat'

# Dimension of the feature vector obtained from DINO
FEATURE_DIM = 384

# Knn params
num_neighbor = 5
sim_metric = 'cosine'
num_sample = 60

In [7]:
import json
import numpy as np
from models.knnClassifier import KNNClassifier
from tqdm.notebook import tqdm
import os

In [8]:
from dataset.RPCDataset import RPCDataset
val_dataset = RPCDataset(rpc_main_path, 'val')

In [9]:
class Prediction:
    def __init__(self, img_name, img_path, pred_bbox, pred_score_bbox):
        self.img_name = img_name
        self.img_path = img_path
        
        if pred_bbox is not None:
            self.pred_bbox = pred_bbox.tolist()
        else:
            self.pred_bbox = None
            
        self.pred_score_bbox = pred_score_bbox

        # Obtained when prediction matches with a gt bounding box
        self.gt_bbox = None
        self.gt_label = None

        # Obtained from knn
        self.pred_label = None
        # Obtained from knn by measuring mean distance to its predicted label
        self.class_score = None
        self.pred_features = None
        self.is_train = None
        
    def add_gt_bbox(self, gt_bbox, gt_label, train_class_flag):
        self.gt_bbox = gt_bbox
        self.gt_label = gt_label
        self.is_train = train_class_flag
    
    def add_feature_vector(self, feature_vector):
        self.pred_features = feature_vector.tolist()
    
    def add_classification_res(self, pred_label, mean_dist):
        self.pred_label = pred_label
        self.class_score = mean_dist
        
    def to_dict(self):
        return {
            'img_name': self.img_name,
            'img_path': self.img_path,
            'pred_bbox': self.pred_bbox,
            'pred_score_bbox': self.pred_score_bbox,
            'gt_bbox': self.gt_bbox,
            'gt_label': self.gt_label,
            'pred_label': self.pred_label,
            'class_score': self.class_score,
            'pred_features': self.pred_features,
            'is_train': self.is_train
        }
    
    def read_dict(self, content):
        self.img_name = content['img_name']
        self.img_path = content['img_path']
        self.pred_bbox = content['pred_bbox']
        
        self.pred_score_bbox = content['pred_score_bbox']
        self.gt_bbox = content['gt_bbox']
        self.gt_label = content['gt_label']
        self.pred_label = content['pred_label']
        self.class_score = content['class_score']
        self.pred_features = content['pred_features']
        self.is_train = content['is_train']

In [10]:
def read_pred_objects_json(json_path):
    pred_objects = []
    with open(json_path, "r") as jfile:
        json_objects = json.load(jfile)    
    
    for json_object in json_objects:
        cur_object = Prediction("", "", np.array([]), 0.0)
        cur_object.read_dict(json_object)
        pred_objects.append(cur_object)
    return pred_objects

def get_pred_objects_per_image(pred_objects):
    img_names = list(set([pobject.img_name for pobject in pred_objects]))
    objects_per_img = {}
    for img_name in img_names:
        img_objects = [pobject for pobject in pred_objects if pobject.img_name == img_name]
        objects_per_img[img_name] = img_objects
    return objects_per_img

In [11]:
with open("/kaggle/input/idp-repo/vith_dino_knn_files/vith_dino_knn_files/train_config.json", "r") as jfile:
    train_config = json.load(jfile)

In [12]:
def export_per_img_pred_objects(per_image_pred_objects, gt_path, pred_path):
    for img_name in per_image_pred_objects:
        txt_name = img_name.split('.')[0] + '.txt'
        pred_txt_dir = os.path.join(pred_path, txt_name)
        with open(pred_txt_dir, "w") as txtfile:
            for predict_object in per_image_pred_objects[img_name]:
                
                if predict_object.pred_bbox is not None:
                    
                    label = predict_object.pred_label
                    x1, y1, x2, y2 = predict_object.pred_bbox
                    conf = predict_object.pred_score_bbox
                    txtfile.write("{} {} {} {} {} {}\n".format(label, conf, x1, y1, x2-x1, y2-y1))

        gt_txt_dir = os.path.join(gt_path, txt_name)
        with open(gt_txt_dir, "w") as txtfile:
            for predict_object in per_image_pred_objects[img_name]:
                
                if predict_object.gt_bbox is not None:
                    label = predict_object.gt_label
                    x1, y1, w, h = predict_object.gt_bbox
                    # GT bbox is in the form of xywh
                    txtfile.write("{} {} {} {} {}\n".format(label, x1, y1, w, h))

In [13]:
!mkdir '../eval_1_gt'
!mkdir '../eval_1_pred'

In [14]:
# First Evaluation: Evaluate ViT-H + DinoV2 + KNN without postprocessing or any threshold
# Use all data to evaluate
train_pred_objects = read_pred_objects_json("/kaggle/input/di-lab-idea2-artifacts/Idea2-Files/Dino/vith_res_train_pred_objects_2.json")
val_pred_objects = read_pred_objects_json("/kaggle/input/di-lab-idea2-artifacts/Idea2-Files/Dino/vith_res_val_pred_objects_2.json")

all_pred_objects = train_pred_objects + val_pred_objects

# KNN model with all classes and extracted train image features
knn = KNNClassifier(num_neighbor, similarity_metric=sim_metric)
knn.load_and_fit_RPC(knn_train_features_path, n_sample=num_sample)

classified_pred_objects = []

for pred_object in tqdm(all_pred_objects):
    
    if pred_object.pred_features is not None:
        
        predicted_label = knn.predict_sample(pred_object.pred_features)
        predicted_sub_class = knn.label_to_class[predicted_label[0]]
        knn_mean_dist = knn.get_mean_dist_neighbors(pred_object.pred_features, predicted_label)

        pred_object.add_classification_res(predicted_sub_class, knn_mean_dist[0])        
        classified_pred_objects.append(pred_object)
        
classified_per_image_objects = get_pred_objects_per_image(classified_pred_objects)
export_per_img_pred_objects(classified_per_image_objects, "../eval_1_gt/", "../eval_1_pred/")

  0%|          | 0/200 [00:00<?, ?it/s]

Loadded feature vector with shape: (12000, 384), label array with shape: (12000,)


  0%|          | 0/83123 [00:00<?, ?it/s]

In [15]:
!python "evaluation/object-detection-metrics/evaluate.py" -gt "../eval_1_gt/" -det "../eval_1_pred/" -sp "../eval_1_res"

Average precision values per class for the whole images:

Class: 100_milk: AP: 0.483235
Class: 100_milk: Recall: 0.500000 Precision: 0.857143
Class: 101_milk: AP: 0.417082
Class: 101_milk: Recall: 0.597561 Precision: 0.593939
Class: 102_milk: AP: 0.104443
Class: 102_milk: Recall: 0.190000 Precision: 0.463415
Class: 103_milk: AP: 0.154539
Class: 103_milk: Recall: 0.351275 Precision: 0.402597
Class: 104_milk: AP: 0.339674
Class: 104_milk: Recall: 0.390663 Precision: 0.836842
Class: 105_milk: AP: 0.883485
Class: 105_milk: Recall: 1.000000 Precision: 0.845890
Class: 106_milk: AP: 0.249188
Class: 106_milk: Recall: 0.524725 Precision: 0.452607
Class: 107_milk: AP: 0.168266
Class: 107_milk: Recall: 0.395683 Precision: 0.352564
Class: 108_canned_food: AP: 0.250537
Class: 108_canned_food: Recall: 0.416357 Precision: 0.446215
Class: 109_canned_food: AP: 0.425738
Class: 109_canned_food: Recall: 0.568452 Precision: 0.729008
Class: 10_puffed_food: AP: 0.203177
Class: 10_puffe

In [16]:
!mkdir '../eval_2_gt'
!mkdir '../eval_2_pred'

In [17]:
# Second Evaluation: Evaluate ViT-H + DinoV2 + KNN with postprocessing without any threshold
# Use all post processed data to evaluate

train_pred_objects = read_pred_objects_json("/kaggle/input/di-lab-idea2-artifacts/Idea2-Files/Dino/vith_res_train_processed_pred_objects_2.json")
val_pred_objects = read_pred_objects_json("/kaggle/input/di-lab-idea2-artifacts/Idea2-Files/Dino/vith_res_val_processed_pred_objects_2.json")

all_pred_objects = train_pred_objects + val_pred_objects

# We can use the same KNN model from previous step
classified_pred_objects = []

for pred_object in tqdm(all_pred_objects):
    
    if pred_object.pred_features is not None:
        
        predicted_label = knn.predict_sample(pred_object.pred_features)
        predicted_sub_class = knn.label_to_class[predicted_label[0]]
        knn_mean_dist = knn.get_mean_dist_neighbors(pred_object.pred_features, predicted_label)

        pred_object.add_classification_res(predicted_sub_class, knn_mean_dist[0])        
        classified_pred_objects.append(pred_object)
        
classified_per_image_objects = get_pred_objects_per_image(classified_pred_objects)
export_per_img_pred_objects(classified_per_image_objects, "../eval_2_gt/", "../eval_2_pred/")

  0%|          | 0/69905 [00:00<?, ?it/s]

In [18]:
!python "evaluation/object-detection-metrics/evaluate.py" -gt "../eval_2_gt/" -det "../eval_2_pred/" -sp "../eval_2_res"

Average precision values per class for the whole images:

Class: 100_milk: AP: 0.487350
Class: 100_milk: Recall: 0.502890 Precision: 0.915789
Class: 101_milk: AP: 0.423885
Class: 101_milk: Recall: 0.593168 Precision: 0.692029
Class: 102_milk: AP: 0.104454
Class: 102_milk: Recall: 0.184300 Precision: 0.529412
Class: 103_milk: AP: 0.162188
Class: 103_milk: Recall: 0.357349 Precision: 0.446043
Class: 104_milk: AP: 0.345123
Class: 104_milk: Recall: 0.393484 Precision: 0.872222
Class: 105_milk: AP: 0.902308
Class: 105_milk: Recall: 1.000000 Precision: 0.891697
Class: 106_milk: AP: 0.261085
Class: 106_milk: Recall: 0.523810 Precision: 0.485714
Class: 107_milk: AP: 0.167261
Class: 107_milk: Recall: 0.396364 Precision: 0.374570
Class: 108_canned_food: AP: 0.253291
Class: 108_canned_food: Recall: 0.415730 Precision: 0.460581
Class: 109_canned_food: AP: 0.432614
Class: 109_canned_food: Recall: 0.571429 Precision: 0.743083
Class: 10_puffed_food: AP: 0.213580
Class: 10_puffe