In [None]:
!pip install git+https://github.com/openai/CLIP.git
!pip install open_clip_torch
!pip install sentence_transformers

In [None]:
from PIL import Image
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset, DataLoader
from fastai.vision.all import *
import torchvision
import torch
import torchvision.models as models
import torch
import open_clip
import cv2
from sentence_transformers import util
from PIL import Image
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm, trange

In [None]:
path = Path(r"/kaggle/input/lfw-yyy3-lbq", image_size=(250, 250))
files = get_image_files(path)


class SiameseTransform(Dataset):
    def __init__(self, files, label_func, splits, transform=None):
        self.labels = files.map(label_func).unique()
        self.lbl2files = {l: L(get_image_files(path / l)) for l in self.labels}
        self.label_func = label_func
        self.files = files
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        f1 = self.files[idx]
        f2, same = self._draw(f1)

        # if self.transform:
        #     img1 = self.transform(img1)
        #     img2 = self.transform(img2)

        # img1, img2 = PILImage.create(f1), PILImage.create(f2)
        # img1 = ToTensor()(img1)  # 转换为张量
        # img2 = ToTensor()(img2)
        # return img1,img2,same
        return f1, f2, same

    def _draw(self, f):
        same = random.random() < 0.5
        cls = self.label_func(f)
        if not same: cls = random.choice(L(l for l in self.labels if l != cls))
        return random.choice(self.lbl2files[cls]), same

In [None]:
# image processing model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16-plus-240', pretrained="laion400m_e32")
model.to(device)
def imageEncoder(img):
    img1 = Image.fromarray(img).convert('RGB')
    img1 = preprocess(img1).unsqueeze(0).to(device)
    img1 = model.encode_image(img1)
    return img1
def generateScore(image1, image2):
    test_img = cv2.imread(image1, cv2.IMREAD_UNCHANGED)
    data_img = cv2.imread(image2, cv2.IMREAD_UNCHANGED)
    img1 = imageEncoder(test_img)
    img2 = imageEncoder(data_img)
    cos_scores = util.pytorch_cos_sim(img1, img2)
    score = round(float(cos_scores[0][0])*100, 2)
    return score

In [None]:
splits = RandomSplitter()(files)
dataset = SiameseTransform(files, parent_label, splits)
scores = []
ifsame = []
long_num = 39698
a=1
while(a):
    for images1, images2, labels in tqdm(dataset, desc="Processing"):
        # print(images1)
        # print(images2)
        if(len(scores)>long_num):
            a=0
            break
        num = round(generateScore(str(images1), str(images2)), 2)
        score = generateScore(str(images1), str(images2))
        scores.append(score)
        ifsame.append(labels)
  # print(f"Batch labels: {labels}\t Score: {num}")
print(f"数据量为：{len(scores)}")

# Step 2: Calculate metrics for a range of thresholds
best_accuracy = 0
best_thresh = 0
for thresh in tqdm(np.arange(int(min(scores)), int(max(scores))), desc="Processing"):  # assuming scores are in range 0-100
    predictions = [s > thresh for s in scores]
    accuracy = accuracy_score(ifsame, predictions)
    precision = precision_score(ifsame, predictions)
    recall = recall_score(ifsame, predictions)
    f1 = f1_score(ifsame, predictions)
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_thresh = thresh
        best_precision = precision
        best_recall = recall
        best_f1 = f1

# Step 3: Print the best threshold and corresponding metrics
print(f"min scores={int(min(scores))}")
print(f"min scores={int(max(scores))}")
print(f"Best threshold = {best_thresh:.3f}")  # 保留三位小数
print(f"Accuracy = {best_accuracy:.3f}")
print(f"Precision = {best_precision:.3f}")
print(f"Recall = {best_recall:.3f}")
print(f"F1_score = {best_f1:.3f}")

# print(f"similarity Score: ", round(generateScore(r"/kaggle/input/lfw-yuantu-2/AJ_Cook/AJ_Cook_0001.jpg",
#                                                  r"/kaggle/input/lfw-yuantu-2/AJ_Lamas/AJ_Lamas_0001.jpg"), 2))
# similarity Score: 42.78