# Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!git clone https://github.com/uonat/SS2023_DI-Lab_Precitaste.git &> /dev/null
%cd SS2023_DI-Lab_Precitaste
%pip install . &> /dev/null

import distutils.core
import sys,os
!git clone 'https://github.com/facebookresearch/detectron2'  &> /dev/null
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])} &> /dev/null
sys.path.insert(0, os.path.abspath('./detectron2'))

%pip install ftfy regex tqdm &> /dev/null
%pip install git+https://github.com/openai/CLIP.git &> /dev/null

In [None]:
import torch
from models.CLIP import load_model as load_clip,tokenize_text,Calculate_Scores,get_total_num_obj

device = "cuda" if torch.cuda.is_available() else "cpu"
clip_model, preprocess = load_clip("ViT-B/32",device)
clip_model.eval()

In [None]:
!mkdir '/content/retail_product_checkout'
!unzip -q -j "/content/drive/MyDrive/ApplicationProject/Data/retail-product-checkout-dataset.zip" "val2019/*" -d '/content/retail_product_checkout/val2019' 
!unzip -q -j "/content/drive/MyDrive/ApplicationProject/Data/retail-product-checkout-dataset.zip" "instances_val2019.json" -d '/content/retail_product_checkout'

In [None]:
from dataset.RPCDataset import RPCDataset
val_dataset_path = "/content/retail_product_checkout"
val_dataset = RPCDataset(val_dataset_path, "val")

In [None]:
all_labels = set()

for i in range(val_dataset.get_num_imgs()):
  annots  = val_dataset.get_annots_by_img_id(i, key_for_category='sku_name')
  for annot in annots:
    all_labels.add(' '.join(annot[1].split('_')[1:]))
all_labels = list(all_labels)
all_labels_a_an = []
for i in range(len(all_labels)):
  if all_labels[i][0] in ('a', 'e', 'i', 'o', 'u'):
    all_labels_a_an.append( "an " + all_labels[i])
  else:
    all_labels_a_an.append( "a " + all_labels[i])

# Calculate Scores

In [None]:
text_p = tokenize_text(["a photo of {}".format(s) for s in all_labels_a_an],device)
with torch.no_grad():
  text_features = clip_model.encode_text(text_p)
text_features /= text_features.norm(dim=-1, keepdim=True)

Results_path = "/content/drive/MyDrive/ApplicationProject/Results"
output_name = "clip_gt_txt_Result_v1.pkl"
if os.path.isfile(os.path.join(Results_path,output_name)):
  from datetime import datetime
  tmp_str = str(datetime.now()).split(' ')
  output_name = output_name.split('.')[0] + '_' + tmp_str[0] + '_' + tmp_str[1].split('.')[0].replace(":", "-") +".pkl"
output_dir = os.path.join(Results_path,output_name)

Results = Calculate_Scores(clip_model,preprocess,val_dataset,text_features,all_labels,output_dir,device,False)

# Evaluate

In [None]:
scores = []
gt_label = []
for res in Results:
  tmp_arr = [0] * len(all_labels)
  tmp_arr[res[1]] = 1
  scores += res[0]
  gt_label += tmp_arr  

In [None]:
print("Number of images: ",len(val_dataset.get_num_imgs())) 
print("Number of objects: ",get_total_num_obj(val_dataset)) 
print("Number of classes: ",len(all_labels))
assert len(scores) == len(gt_label)
assert len(scores) == get_total_num_obj(val_dataset)*len(all_labels)
print('-'*20)

In [None]:
from sklearn.metrics import f1_score,precision_score,recall_score,average_precision_score,accuracy_score
import numpy as np

print("Prediction Results:")

def to_labels(pos_probs, threshold):
 return [1 if nm > threshold else 0 for nm in pos_probs]

cand_thresholds = [x / 100.0 for x in range(10, 95, 5)]
f1_scores_for_thrs = [f1_score(gt_label, to_labels(scores, t)) for t in cand_thresholds]
ix = np.argmax(f1_scores_for_thrs)
Th = cand_thresholds[ix]

print('Threshold=%.2f, F-Score=%.5f' % Th, f1_scores_for_thrs[ix])
print("precision: %.5f" % precision_score(gt_label, to_labels(scores, Th)))
print("recall: %.5f" % recall_score(gt_label, to_labels(scores, Th)))
print("f1_score: %.5f" % f1_score(gt_label, to_labels(scores, Th)))

print("Classification Results:")
y_true = []
y_score = []

for res in Results:
  tmp_arr = [0] * len(all_labels)
  tmp_arr[res[1]] = 1
  y_true.append(tmp_arr)

  tmp_arr = [0] * len(all_labels)
  tmp_arr[np.argmax(res[0])] = 1
  y_score.append(tmp_arr)

print('average_precision=%.5f, ACC=%.5f' % (average_precision_score(y_true, y_score), accuracy_score(y_true, y_score)))