In [3]:
import os
import pandas as pd
import faiss
import numpy as np
from sklearn.preprocessing import normalize
from collections import Counter
from sklearn.metrics import f1_score, accuracy_score
import matplotlib.pyplot as plt
from PIL import Image
import glob
import os
import cv2

In [10]:
csv = []
for image in glob.glob(os.path.join("/SSD/DGFIP_2/Icons-50/", "*", "*.png")):
    
    info = os.path.basename(image).split("_")
    company = info[0]
    style = "_".join(info[2:])[:-4]
    
    if company == "microsoft":
        train_test = "test"
    else:
        train_test = "train"
    csv.append({"name": image, "label_name": image.split("/")[-2], "type": train_test})

In [15]:
df = pd.DataFrame(csv)
train = df[df["type"] == "train"]
test = df[df["type"] == "test"]

class_indices = { v:k for k, v in enumerate(pd.Categorical(train["label_name"]).categories) }

train["label"] = [class_indices[x] for x in train["label_name"]]
test["label"] = [class_indices[x] for x in test["label_name"]]

In [18]:
train.to_csv("train_microsoft.csv", index=False)
test.to_csv("test_microsoft.csv", index=False)

In [25]:
train_gnd = [ class_indices[x] for x in train["label_name"]]
test_gnd = [ class_indices[x] for x in test["label_name"]]

In [71]:
data_augmix = pd.read_csv("data_augmix.csv")
train = train.append(
    data_augmix[data_augmix["source"].isin(train["name"])]
)

train["label"] = [class_indices[x] for x in train["label_name"]]

## I) Retrieval Evaluation

In [163]:
feature_pool = np.concatenate((
        np.load("./features/MICROSOFT_v4_32_train.npy"),
        np.load("./SSD/features/MICROSOFT_v4_64_train.npy")
    ), 1)

feature_query = np.concatenate((
        np.load("./features/MICROSOFT_v4_32_test.npy"),
        np.load("./features/MICROSOFT_v4_64_test.npy")
    ), 1)

In [164]:
index = faiss.IndexFlatIP(feature_query.shape[1])
index.add(normalize(feature_pool))
D, I = index.search(normalize(feature_query), 200)

preds = []
for k, (i, d) in enumerate(zip(I, D)):
    ii = i[d > 0.3]
    labels = [ train_gnd[it] for it in ii[:10] ]
    pred = Counter(labels).most_common()[0][0]
    preds.append(pred)
preds = np.array(preds)

print("F1_score macro:", f1_score(test_gnd, preds, average="macro"))
print("F1_score macro:", f1_score(test_gnd, preds, average="weighted"))
print("Accuracy: ", accuracy_score(test_gnd, preds))

F1_score macro: 0.9208836911498736
F1_score macro: 0.9321203707245164
Accuracy:  0.9335968379446641
