In [2]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/ABC_Top3"
main_folder_path = "datasets/D"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/ABC_Top3

Total Accuracy: 81.16%


In [3]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/ABC_Top3"
main_folder_path = "datasets/E"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/ABC_Top3

Total Accuracy: 26.94%


ABC_Top4 on D and E

In [4]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/ABC_Top4"
main_folder_path = "datasets/D"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/ABC_Top4

Total Accuracy: 80.33%


In [5]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/ABC_Top4"
main_folder_path = "datasets/E"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/ABC_Top4

Total Accuracy: 78.56%


BCD_Top3 on A and E

In [6]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/BCD_Top3"
main_folder_path = "datasets/A"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/BCD_Top3

Total Accuracy: 68.84%


In [7]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/BCD_Top3"
main_folder_path = "datasets/E"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/BCD_Top3

Total Accuracy: 12.60%


BCD_Top4 on A and E

In [8]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/BCD_Top4"
main_folder_path = "datasets/A"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/BCD_Top4

Total Accuracy: 42.49%


In [9]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/BCD_Top4"
main_folder_path = "datasets/E"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/BCD_Top4

Total Accuracy: 24.58%


AD_Top3 on B and C

In [10]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/AD_Top3"
main_folder_path = "datasets/B"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/AD_Top3

Total Accuracy: 63.35%


In [11]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/AD_Top3"
main_folder_path = "datasets/C"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/AD_Top3

Total Accuracy: 65.12%


AD_Top4 on B and C

In [12]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/AD_Top4"
main_folder_path = "datasets/B"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/AD_Top4

Total Accuracy: 54.35%


In [13]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/AD_Top4"
main_folder_path = "datasets/C"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/AD_Top4

Total Accuracy: 52.75%


AB_Top3 on D and E

In [15]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/AB_Top3"
main_folder_path = "datasets/D"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/AB_Top3

Total Accuracy: 67.32%


In [16]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/AB_Top3"
main_folder_path = "datasets/E"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/AB_Top3

Total Accuracy: 21.53%


AB_Top4 on D and E

In [19]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/AB_Top4"
main_folder_path = "datasets/D"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/AB_Top4

Total Accuracy: 51.65%


In [20]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/AB_Top4"
main_folder_path = "datasets/E"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/AB_Top4

Total Accuracy: 21.99%


BD_Top3 on A and C and E

In [21]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/BD_Top3"
main_folder_path = "datasets/A"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/BD_Top3

Total Accuracy: 26.91%


In [22]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/BD_Top3"
main_folder_path = "datasets/C"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/BD_Top3

Total Accuracy: 66.77%


BD_Top4 on A and C and E

In [23]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/BD_Top4"
main_folder_path = "datasets/A"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/BD_Top4

Total Accuracy: 12.18%


In [24]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/BD_Top4"
main_folder_path = "datasets/C"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/BD_Top4

Total Accuracy: 27.18%


In [25]:
#!/usr/bin/env python
# coding=utf-8

"""
Evaluation script for zero-shot classification using a finetuned MedCLIP model.

Loads a finetuned MedCLIP model, processes breast cancer X-ray images from a test dataset,
classifies them by comparing image and text embeddings, and computes overall accuracy.
"""

import os
from medclip import MedCLIPProcessor, MedCLIPModel, MedCLIPVisionModelViT
from PIL import Image
import torch
import warnings

# Suppress specific warnings from huggingface_hub and transformers
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.*")
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.*")

# ------------------- Parameters -------------------
model_dir = "models/BD_Top4"
main_folder_path = "datasets/E"
BATCH_SIZE = 32

# ------------------- Get candidate labels -------------------
candidate_labels = sorted([d for d in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, d))])
print("Candidate labels:", candidate_labels)

# ------------------- Load model and processor -------------------
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT)
model.from_pretrained(model_dir)
model.cuda()
model.eval()

# ------------------- Generate text embeddings -------------------
text_inputs = processor(text=candidate_labels, return_tensors="pt", padding=True)
for k, v in text_inputs.items():
    if isinstance(v, torch.Tensor):
        text_inputs[k] = v.cuda()
with torch.no_grad():
    text_embeds = model.encode_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"])

# ------------------- Helper functions -------------------
def get_image_paths(main_folder_path):
    """Returns all image paths and their true labels from the test dataset."""
    image_paths = []
    labels = []
    for subfolder in candidate_labels:
        subfolder_path = os.path.join(main_folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(subfolder)
    return image_paths, labels

def batch_generator(items, batch_size):
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

# ------------------- Evaluation -------------------
all_predictions = []
all_true_labels = []

image_paths, true_labels = get_image_paths(main_folder_path)
image_paths_and_labels = list(zip(image_paths, true_labels))

for batch in batch_generator(image_paths_and_labels, BATCH_SIZE):
    batch_images = []
    batch_labels = []
    for image_path, label in batch:
        try:
            with Image.open(image_path) as img:
                img = img.convert('RGB')
                batch_images.append(img.copy())
            batch_labels.append(label)
        except Exception as e:
            print(f"Error opening image {image_path}: {e}")
            continue
    if not batch_images:
        continue

    image_inputs = processor(images=batch_images, return_tensors="pt")
    for k, v in image_inputs.items():
        if isinstance(v, torch.Tensor):
            image_inputs[k] = v.cuda()

    with torch.no_grad():
        image_embeds = model.encode_image(pixel_values=image_inputs["pixel_values"])

    logit_scale = model.logit_scale.exp()
    logits = logit_scale * image_embeds @ text_embeds.t()
    probs = torch.softmax(logits, dim=-1)
    predicted_indices = torch.argmax(probs, dim=-1).cpu().numpy()
    predicted_labels = [candidate_labels[i] for i in predicted_indices]

    all_predictions.extend(predicted_labels)
    all_true_labels.extend(batch_labels)

# ------------------- Compute accuracy -------------------
correct = sum(1 for t, p in zip(all_true_labels, all_predictions) if t == p)
total = len(all_true_labels)
accuracy = (correct / total) * 100 if total > 0 else 0
print(f"\nTotal Accuracy: {accuracy:.2f}%")

Candidate labels: ['The mammogram reveals the presence of benign findings characterized by one or more areas suggestive of the non-cancerous growth in the breast tissue', 'The mammogram reveals the presence of malignant findings characterized by one or more areas suggestive of the cancerous growth in the breast tissue', 'The mammogram shows healthy and normal breast tissue']


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transfo

load model weight from: models/BD_Top4

Total Accuracy: 42.81%
