In [1]:
!git clone https://github.com/uonat/SS2023_DI-Lab_Precitaste.git

Cloning into 'SS2023_DI-Lab_Precitaste'...
remote: Enumerating objects: 261, done.[K
remote: Counting objects: 100% (261/261), done.[K
remote: Compressing objects: 100% (195/195), done.[K
remote: Total 261 (delta 88), reused 212 (delta 53), pack-reused 0[K
Receiving objects: 100% (261/261), 23.53 MiB | 30.81 MiB/s, done.
Resolving deltas: 100% (88/88), done.


In [2]:
%cd '/kaggle/working/SS2023_DI-Lab_Precitaste'

/kaggle/working/SS2023_DI-Lab_Precitaste


In [3]:
!git fetch
!git branch -v -a

* [32mmain                              [m 2df23e7 Bug fix
  [31mremotes/origin/CLIP-implementation[m 134fe54 Updated CLIP Notebook
  [31mremotes/origin/HEAD               [m -> origin/main
  [31mremotes/origin/dinov2             [m 4859060 Added second return parameter for eliminate_boxes post process
  [31mremotes/origin/main               [m 2df23e7 Bug fix
  [31mremotes/origin/yushan             [m 94fc5af Update: different TEXT_PROMPT for RPC datasets


In [4]:
!git switch dinov2

Branch 'dinov2' set up to track remote branch 'dinov2' from 'origin'.
Switched to a new branch 'dinov2'


In [5]:
!pip install . &> /dev/null

In [6]:
import numpy as np
import cv2
import torch
import json
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import math
import torchvision.transforms as T
from PIL import Image

In [7]:
def crop_object_with_bbox(np_img, bbox):
    x, y, x2, y2 = [int(i) for i in bbox]
    return np_img[y:y2, x:x2].astype('float32')

In [8]:
# Dataset paths
rpc_main_path = '/kaggle/input/retail-product-checkout-dataset'
# Dimension of the feature vector obtained from DINO
FEATURE_DIM = 384

In [9]:
from dataset.RPCDataset import RPCDataset
val_dataset = RPCDataset(rpc_main_path, 'val')
sub_classes = val_dataset.get_class_names()
device = "cuda" if torch.cuda.is_available() else "cpu"

In [10]:
with open('/kaggle/input/di-lab-idea2-artifacts/Idea2-Files/VitH/train_config.json', 'r') as jfile:
    train_config = json.load(jfile)

In [11]:
class Prediction:
    def __init__(self, img_name, img_path, pred_bbox, pred_score_bbox):
        self.img_name = img_name
        self.img_path = img_path
        
        if pred_bbox is not None:
            self.pred_bbox = pred_bbox.tolist()
        else:
            self.pred_bbox = None
            
        self.pred_score_bbox = pred_score_bbox

        # Obtained when prediction matches with a gt bounding box
        self.gt_bbox = None
        self.gt_label = None

        # Obtained from knn
        self.pred_label = None
        # Obtained from knn by measuring mean distance to its predicted label
        self.class_score = None
        self.pred_features = None
        self.is_train = None
        
    def add_gt_bbox(self, gt_bbox, gt_label, train_class_flag):
        self.gt_bbox = gt_bbox
        self.gt_label = gt_label
        self.is_train = train_class_flag
    
    def add_feature_vector(self, feature_vector):
        self.pred_features = feature_vector.tolist()
    
    def add_classification_res(self, pred_label, mean_dist):
        self.pred_label = pred_label
        self.class_score = mean_dist
        
    def to_dict(self):
        return {
            'img_name': self.img_name,
            'img_path': self.img_path,
            'pred_bbox': self.pred_bbox,
            'pred_score_bbox': self.pred_score_bbox,
            'gt_bbox': self.gt_bbox,
            'gt_label': self.gt_label,
            'pred_label': self.pred_label,
            'class_score': self.class_score,
            'pred_features': self.pred_features,
            'is_train': self.is_train
        }
    
    def read_dict(self, content):
        self.img_name = content['img_name']
        self.img_path = content['img_path']
        self.pred_bbox = content['pred_bbox']
        
        self.pred_score_bbox = content['pred_score_bbox']
        self.gt_bbox = content['gt_bbox']
        self.gt_label = content['gt_label']
        self.pred_label = content['pred_label']
        self.class_score = content['class_score']
        self.pred_features = content['pred_features']
        self.is_train = content['is_train']

In [12]:
from models.DINO import DINOFeatureExtractor
feat_extractor = DINOFeatureExtractor()

Downloading: "https://github.com/facebookresearch/dinov2/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth" to /root/.cache/torch/hub/checkpoints/dinov2_vits14_pretrain.pth
100%|██████████| 84.2M/84.2M [00:00<00:00, 163MB/s]


In [13]:
def read_pred_objects_json(json_path):
    pred_objects = []
    with open(json_path, "r") as jfile:
        json_objects = json.load(jfile)    
    
    for json_object in json_objects:
        cur_object = Prediction("", "", np.array([]), 0.0)
        cur_object.read_dict(json_object)
        pred_objects.append(cur_object)
    return pred_objects

def get_pred_objects_per_image(pred_objects):
    img_names = list(set([pobject.img_name for pobject in pred_objects]))
    objects_per_img = {}
    for img_name in img_names:
        img_objects = [pobject for pobject in pred_objects if pobject.img_name == img_name]
        objects_per_img[img_name] = img_objects
    return objects_per_img

In [14]:
def find_feature_vector_for_bbox(pred_objects):
    pred_objects_per_image = get_pred_objects_per_image(pred_objects)
    processed_pred_objects = []
    with torch.no_grad():
        for img_name in tqdm(pred_objects_per_image):

            img_pred_objects = pred_objects_per_image[img_name]
            img_path = img_pred_objects[0].img_path

            pil_img = Image.open(img_path)
            np_img = np.asarray(pil_img)

            for pred_object in img_pred_objects:
                if pred_object.pred_bbox is None:
                    continue
                if (pred_object.pred_bbox[2] - pred_object.pred_bbox[0]) < 1 or (pred_object.pred_bbox[3] - pred_object.pred_bbox[1]) < 1:
                    continue

                cropped_object_np_img = crop_object_with_bbox(np_img, pred_object.pred_bbox)
                h,w,_ = cropped_object_np_img.shape

                sample_feature = feat_extractor.predict(cropped_object_np_img)
                sample_feature = sample_feature.cpu().numpy()
                
                pred_object.add_feature_vector(sample_feature)
                processed_pred_objects.append(pred_object)
    return processed_pred_objects

In [15]:
def dump_pred_objects(prediction_objects, jpath):
    json_content = [pred_object.to_dict() for pred_object in prediction_objects]
    with open(jpath, "w") as jfile:
        json.dump(json_content, jfile)

In [16]:
pred_objects = read_pred_objects_json("/kaggle/input/di-lab-idea2-artifacts/Idea2-Files/VitH/vith_res_train_pred_objects.json")
feature_extracted_pred_objects = find_feature_vector_for_bbox(pred_objects)
dump_pred_objects(feature_extracted_pred_objects, "../vith_res_train_pred_objects_2.json")

pred_objects = read_pred_objects_json("/kaggle/input/di-lab-idea2-artifacts/Idea2-Files/VitH/vith_res_train_processed_pred_objects.json")
feature_extracted_pred_objects = find_feature_vector_for_bbox(pred_objects)
dump_pred_objects(feature_extracted_pred_objects, "../vith_res_train_processed_pred_objects_2.json")

pred_objects = read_pred_objects_json("/kaggle/input/di-lab-idea2-artifacts/Idea2-Files/VitH/vith_res_val_pred_objects.json")
feature_extracted_pred_objects = find_feature_vector_for_bbox(pred_objects)
dump_pred_objects(feature_extracted_pred_objects, "../vith_res_val_pred_objects_2.json")

pred_objects = read_pred_objects_json("/kaggle/input/di-lab-idea2-artifacts/Idea2-Files/VitH/vith_res_val_processed_pred_objects.json")
feature_extracted_pred_objects = find_feature_vector_for_bbox(pred_objects)
dump_pred_objects(feature_extracted_pred_objects, "../vith_res_val_processed_pred_objects_2.json")


  0%|          | 0/4800 [00:00<?, ?it/s]

  0%|          | 0/4800 [00:00<?, ?it/s]

  0%|          | 0/1200 [00:00<?, ?it/s]

  0%|          | 0/1200 [00:00<?, ?it/s]