# Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!git clone https://github.com/uonat/SS2023_DI-Lab_Precitaste.git &> /dev/null
%cd SS2023_DI-Lab_Precitaste
%pip install . &> /dev/null

import distutils.core
import sys,os
!git clone 'https://github.com/facebookresearch/detectron2'  &> /dev/null
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])} &> /dev/null
sys.path.insert(0, os.path.abspath('./detectron2'))

%pip install ftfy regex tqdm &> /dev/null
%pip install git+https://github.com/openai/CLIP.git &> /dev/null

In [None]:
import torch
from models.CLIP import load_model as load_clip,Calculate_Embeddings,tokenize_text,Calculate_Scores,get_total_num_obj,available_clip_models
#available_clip_models()
device = "cuda" if torch.cuda.is_available() else "cpu"
clip_model, preprocess = load_clip("ViT-B/32",device)
clip_model.eval()

In [None]:
!mkdir '/content/retail_product_checkout'
!unzip -q -j "/content/drive/MyDrive/ApplicationProject/Data/retail-product-checkout-dataset.zip" "val2019/*" -d '/content/retail_product_checkout/val2019' 
!unzip -q -j "/content/drive/MyDrive/ApplicationProject/Data/retail-product-checkout-dataset.zip" "instances_val2019.json" -d '/content/retail_product_checkout'
!unzip -q -j "/content/drive/MyDrive/ApplicationProject/Data/retail-product-checkout-dataset.zip" "train2019/*" -d '/content/retail_product_checkout/train2019' 
!unzip -q -j "/content/drive/MyDrive/ApplicationProject/Data/retail-product-checkout-dataset.zip" "instances_train2019.json" -d '/content/retail_product_checkout'

In [None]:
from dataset.RPCDataset import RPCDataset
dataset_path = "/content/retail_product_checkout"
val_dataset = RPCDataset(dataset_path, "val")
train_dataset = RPCDataset(dataset_path, "train")

# Get the embeddings of all the training data

In [None]:
Results_path = "/content/drive/MyDrive/ApplicationProject/Results"
output_name = "clip_gt_train_image_embs.pkl"
if os.path.isfile(os.path.join(Results_path,output_name)):
  from datetime import datetime
  tmp_str = str(datetime.now()).split(' ')
  output_name = output_name.split('.')[0] + '_' + tmp_str[0] + '_' + tmp_str[1].split('.')[0].replace(":", "-") +".pkl"
output_dir = os.path.join(Results_path,output_name)

Results = Calculate_Embeddings(clip_model,preprocess,train_dataset,output_dir,device)

dict_all = {}
for res in Results:
  try:
    dict_all[res[1]].append(res[0])
  except KeyError:
    dict_all[res[1]] = [res[0]]

# Fine-grained labels

TODO: currently averaging over all train data to get target_feature embeddings => try few shot

In [None]:
import numpy as np

dict_finegrained_mean_feature_vecs = {}
for key in dict_all.keys():
  dict_finegrained_mean_feature_vecs[key] =  np.stack(dict_all[key],axis=0).mean(axis=0)

#order of labels
all_labels_finegrained = list(dict_all.keys())
print("len(all_labels_finegrained):",len(all_labels_finegrained))
print("all_labels_finegrained:",all_labels_finegrained)

all_labels_broad = set() 
for i in range(val_dataset.get_num_imgs()):
  annots  = val_dataset.get_annots_by_img_id(i, key_for_category='sku_name')
  for annot in annots:
    all_labels_broad.add(' '.join(annot[1].split('_')[1:]))
all_labels_broad = list(all_labels_broad) #Broad labels
print("len(all_labels_broad):",len(all_labels_broad))
print("all_labels_broad:",all_labels_broad)

In [None]:
target_features = []
for label in all_labels_finegrained:
  target_features.append(dict_finegrained_mean_feature_vecs[label])
target_features = torch.from_numpy(np.stack(target_features, axis=0)).type(torch.float16).to(device)

Results_path = "/content/drive/MyDrive/ApplicationProject/Results"
output_name = "clip_gt_fewShot_finegrained_allTrain_Result.pkl"
if os.path.isfile(os.path.join(Results_path,output_name)):
  from datetime import datetime
  tmp_str = str(datetime.now()).split(' ')
  output_name = output_name.split('.')[0] + '_' + tmp_str[0] + '_' + tmp_str[1].split('.')[0].replace(":", "-") +".pkl"
output_dir = os.path.join(Results_path,output_name)

Results = Calculate_Scores(clip_model,preprocess,val_dataset,target_features,all_labels_finegrained,output_dir,device,True)

## Evaluation - Fine-grained Labels

In [None]:
scores = []
gt_label = []
for res in Results:
  tmp_arr = [0] * len(all_labels_finegrained)
  tmp_arr[res[1]] = 1
  scores += res[0]
  gt_label += tmp_arr  

In [None]:
print("Number of images: ",len(val_dataset.get_num_imgs())) 
print("Number of objects: ",get_total_num_obj(val_dataset)) 
print("Number of classes: ",len(all_labels_finegrained))
assert len(scores) == len(gt_label)
assert len(scores) == get_total_num_obj(val_dataset)*len(all_labels_finegrained)
print('-'*20)

In [None]:
from sklearn.metrics import f1_score,precision_score,recall_score,average_precision_score,accuracy_score
import numpy as np

print("Prediction Results:")

def to_labels(pos_probs, threshold):
 return [1 if nm > threshold else 0 for nm in pos_probs]

cand_thresholds = [x / 100.0 for x in range(10, 95, 5)]
f1_scores_for_thrs = [f1_score(gt_label, to_labels(scores, t)) for t in cand_thresholds]
ix = np.argmax(f1_scores_for_thrs)
Th = cand_thresholds[ix]

print('Threshold=%.2f, F-Score=%.5f' % Th, f1_scores_for_thrs[ix])
print("precision: %.5f" % precision_score(gt_label, to_labels(scores, Th)))
print("recall: %.5f" % recall_score(gt_label, to_labels(scores, Th)))
print("f1_score: %.5f" % f1_score(gt_label, to_labels(scores, Th)))

print("Classification Results:")
y_true = []
y_score = []

for res in Results:
  tmp_arr = [0] * len(all_labels_finegrained)
  tmp_arr[res[1]] = 1
  y_true.append(tmp_arr)

  tmp_arr = [0] * len(all_labels_finegrained)
  tmp_arr[np.argmax(res[0])] = 1
  y_score.append(tmp_arr)

print('average_precision=%.5f, ACC=%.5f' % (average_precision_score(y_true, y_score), accuracy_score(y_true, y_score)))

### Map fine grained-label scores to broad grained label scores 

In [None]:
mapping_fine2broad = {} # can also be a list
for ind,label in enumerate(all_labels_finegrained):  
  mapping_fine2broad[ind] = all_labels_broad.index(' '.join(label.split('_')[1:]))

mapping_broad2fine = {}
for label,value in mapping_fine2broad.items(): 
  try:
    mapping_broad2fine[value].append(label)
  except KeyError:
    mapping_broad2fine[value] = [label] 

In [None]:
class_num_fine = len(all_labels_finegrained)
class_num_broad = len(all_labels_broad)
scores = []
gt_label = []
for res in Results:
  tmp_arr = [0] * class_num_broad
  tmp_arr[mapping_fine2broad[res[1]]] = 1
  scores_tmp = []
  for cnbi in range(class_num_broad):
    all_scores_corr_broadLabel_cnbi = [res[0][corr_ind] for corr_ind in mapping_broad2fine[cnbi]]
    scores_tmp.append(np.amax(all_scores_corr_broadLabel_cnbi))
  scores += scores_tmp
  gt_label += tmp_arr  

assert len(scores) == len(gt_label)
assert len(scores) == get_total_num_obj(val_dataset)*len(all_labels_broad)

In [None]:
from sklearn.metrics import f1_score,precision_score,recall_score,average_precision_score,accuracy_score
import numpy as np

print("Prediction Results:")

def to_labels(pos_probs, threshold):
 return [1 if nm > threshold else 0 for nm in pos_probs]

cand_thresholds = [x / 100.0 for x in range(10, 95, 5)]
f1_scores_for_thrs = [f1_score(gt_label, to_labels(scores, t)) for t in cand_thresholds]
ix = np.argmax(f1_scores_for_thrs)
Th = cand_thresholds[ix]

print('Threshold=%.2f, F-Score=%.5f' % Th, f1_scores_for_thrs[ix])
print("precision: %.5f" % precision_score(gt_label, to_labels(scores, Th)))
print("recall: %.5f" % recall_score(gt_label, to_labels(scores, Th)))
print("f1_score: %.5f" % f1_score(gt_label, to_labels(scores, Th)))

print("Classification Results:")
y_true = []
y_score = []

for res in Results:
  tmp_arr = [0] * len(all_labels_broad)
  tmp_arr[res[1]] = 1
  y_true.append(tmp_arr)

  tmp_arr = [0] * len(all_labels_broad)
  tmp_arr[np.argmax(res[0])] = 1
  y_score.append(tmp_arr)

print('average_precision=%.5f, ACC=%.5f' % (average_precision_score(y_true, y_score), accuracy_score(y_true, y_score)))

# Broad Labels

TODO: currently averaging over all train data to get target_feature embeddings => try few shot

In [None]:
#Merge for broad
dict_broad = {}
for key,value in dict_all.items():
  broad_label = ' '.join(key.split('_')[1:])
  try:
    dict_broad[broad_label].append(value)
  except KeyError:
    dict_broad[broad_label] = [value]

dict_broad_mean_feature_vecs = {}
for key in dict_broad.keys():
  dict_broad_mean_feature_vecs[key] = np.concatenate(dict_broad[key],axis=0).mean(axis=0)

In [None]:
target_features = []
for label in all_labels_broad:
  target_features.append(dict_broad_mean_feature_vecs[label])
target_features = torch.from_numpy(np.stack(target_features, axis=0)).type(torch.float16).to(device)

Results_path = "/content/drive/MyDrive/ApplicationProject/Results"
output_name = "clip_gt_fewShot_broad_allTrain_Result.pkl"
if os.path.isfile(os.path.join(Results_path,output_name)):
  from datetime import datetime
  tmp_str = str(datetime.now()).split(' ')
  output_name = output_name.split('.')[0] + '_' + tmp_str[0] + '_' + tmp_str[1].split('.')[0].replace(":", "-") +".pkl"
output_dir = os.path.join(Results_path,output_name)

Results = Calculate_Scores(clip_model,preprocess,val_dataset,target_features,all_labels_finegrained,output_dir,device,False)

# Evaluation - Broad labels

In [None]:
scores = []
gt_label = []
for res in Results:
  tmp_arr = [0] * len(all_labels_broad)
  tmp_arr[res[1]] = 1
  scores += res[0]
  gt_label += tmp_arr  

In [None]:
print("Number of images: ",len(val_dataset.get_num_imgs())) 
print("Number of objects: ",get_total_num_obj(val_dataset)) 
print("Number of classes: ",len(all_labels_finegrained))
assert len(scores) == len(gt_label)
assert len(scores) == get_total_num_obj(val_dataset)*len(all_labels_broad)
print('-'*20)

In [None]:
from sklearn.metrics import f1_score,precision_score,recall_score,average_precision_score,accuracy_score
import numpy as np

print("Prediction Results:")

def to_labels(pos_probs, threshold):
 return [1 if nm > threshold else 0 for nm in pos_probs]

cand_thresholds = [x / 100.0 for x in range(10, 95, 5)]
f1_scores_for_thrs = [f1_score(gt_label, to_labels(scores, t)) for t in cand_thresholds]
ix = np.argmax(f1_scores_for_thrs)
Th = cand_thresholds[ix]

print('Threshold=%.2f, F-Score=%.5f' % Th, f1_scores_for_thrs[ix])
print("precision: %.5f" % precision_score(gt_label, to_labels(scores, Th)))
print("recall: %.5f" % recall_score(gt_label, to_labels(scores, Th)))
print("f1_score: %.5f" % f1_score(gt_label, to_labels(scores, Th)))

print("Classification Results:")
y_true = []
y_score = []

for res in Results:
  tmp_arr = [0] * len(all_labels_broad)
  tmp_arr[res[1]] = 1
  y_true.append(tmp_arr)

  tmp_arr = [0] * len(all_labels_broad)
  tmp_arr[np.argmax(res[0])] = 1
  y_score.append(tmp_arr)

print('average_precision=%.5f, ACC=%.5f' % (average_precision_score(y_true, y_score), accuracy_score(y_true, y_score)))