In [4]:
import torch
from PIL import Image
import open_clip
import os

# Load the model and tokenizer
model, _, preprocess = open_clip.create_model_and_transforms(
    'ViT-B-32', 
    pretrained='/home/umair.nawaz/Research_Work/Main-DATA/My_Surgical/CLIP/open_clip/logs/2024_08_05-00_53_17-model_ViT-B-32-lr_0.001-b_128-j_8-p_amp/checkpoints/epoch_30.pt'
)
model.eval()
tokenizer = open_clip.get_tokenizer('ViT-B-32')

# Base folder containing subfolders for each deficiency type
base_folder = "/share/sdb/umairnawaz/Thesis_Work/downstream/crops/dataset_22"

# Dynamically extract subfolder names
deficiency_types = sorted([folder for folder in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, folder))])
print("Deficiency types found:", deficiency_types)

# Generate class prompts based on subfolder names
class_prompts = [f"a photo of {def_type} deficient leaf" for def_type in deficiency_types]
print("Class prompts generated:", class_prompts)

text = tokenizer(class_prompts)

# Initialize counter for accuracy calculation
correct_predictions = 0
total_images = 0

# Process each subfolder corresponding to a deficiency
for deficiency in deficiency_types:
    image_folder = os.path.join(base_folder, deficiency)
    print(f"Processing folder: {image_folder}")

    # Iterate through each image in the current subfolder
    for image_filename in os.listdir(image_folder):
        if image_filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            total_images += 1
            image_path = os.path.join(image_folder, image_filename)
            image = preprocess(Image.open(image_path)).unsqueeze(0)

            with torch.no_grad(), torch.cuda.amp.autocast():
                image_features = model.encode_image(image)
                text_features = model.encode_text(text)
                image_features /= image_features.norm(dim=-1, keepdim=True)
                text_features /= text_features.norm(dim=-1, keepdim=True)

                text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)

            # print(text_probs)
            # Determine the predicted class
            predicted_class_index = text_probs.squeeze().argmax().item()
            predicted_class = class_prompts[predicted_class_index]

            # Verify and compare predicted class with expected
            print(f"Prediction for {image_filename}: {predicted_class} (Expected: a photo of {deficiency} deficient leaf)")

            # Check if prediction matches the folder name (deficiency type)
            if predicted_class == f"a photo of {deficiency} deficient leaf":
                correct_predictions += 1

# Calculate and print the final accuracy
accuracy = (correct_predictions / total_images) * 100 if total_images > 0 else 0
print(f"Total images processed: {total_images}, Correct predictions: {correct_predictions}")
print(f"Final Accuracy: {accuracy:.2f}%")


  checkpoint = torch.load(checkpoint_path, map_location=map_location)


FileNotFoundError: [Errno 2] No such file or directory: '/share/sdb/umairnawaz/Thesis_Work/downstream/crops/dataset_22'

In [2]:
import torch
from PIL import Image
import open_clip
import os

# # Load the model and tokenizer
# model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')
# model.eval()  # model in train mode by default, impacts some models with BatchNorm or stochastic depth active
# tokenizer = open_clip.get_tokenizer('ViT-B-32')

# Load the model and tokenizer
model, _, preprocess = open_clip.create_model_and_transforms(
    'ViT-B-32', 
    pretrained='/share/sdb/umairnawaz/Thesis_Work/CLIP/open_clip/logs/2024_08_05-00_53_17-model_ViT-B-32-lr_0.001-b_128-j_8-p_amp/checkpoints/epoch_30.pt'
)
model.eval()
tokenizer = open_clip.get_tokenizer('ViT-B-32')


# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
model.to(device)  # Move model to GPU if available

# tokenizer = open_clip.get_tokenizer('ViT-B-32')

# Base folder containing subfolders for each deficiency type
base_folder = "/share/sdb/umairnawaz/Thesis_Work/downstream/crops/dataset_24/dataset"

# Dynamically extract and sort subfolder names
deficiency_types = sorted([folder for folder in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, folder))])
print("Deficiency types found:", deficiency_types)

# Generate class prompts based on subfolder names
class_prompts = [f"a photo of {def_type} weed" for def_type in deficiency_types]
print("Class prompts generated:", class_prompts)

text = tokenizer(class_prompts).to(device)  # Move text tokens to GPU

# Initialize counter for accuracy calculation
correct_predictions = 0
total_images = 0

# Process each subfolder corresponding to a deficiency
for deficiency in deficiency_types:
    image_folder = os.path.join(base_folder, deficiency)
    print(f"Processing folder: {image_folder}")

    # Iterate through each image in the current subfolder
    for image_filename in os.listdir(image_folder):
        if image_filename.lower().endswith(('.png', '.jpg', '.jpeg' , '.tif')):
            total_images += 1
            image_path = os.path.join(image_folder, image_filename)
            image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)  # Move image to GPU

            with torch.no_grad(), torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
                image_features = model.encode_image(image)
                text_features = model.encode_text(text)
                image_features /= image_features.norm(dim=-1, keepdim=True)
                text_features /= text_features.norm(dim=-1, keepdim=True)

                text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)

            # Determine the predicted class
            predicted_class_index = text_probs.squeeze().argmax().item()
            predicted_class = class_prompts[predicted_class_index]

            # Verify and compare predicted class with expected
            # print(f"Prediction for {image_filename}: {predicted_class} (Expected: a photo of {deficiency} deficient leaf)")

            # Check if prediction matches the folder name (deficiency type)
            if predicted_class == f"a photo of {deficiency} weed":
                correct_predictions += 1

# Calculate and print the final accuracy
accuracy = (correct_predictions / total_images) * 100 if total_images > 0 else 0
print(f"Total images processed: {total_images}, Correct predictions: {correct_predictions}")
print(f"Final Accuracy: {accuracy:.2f}%")


ModuleNotFoundError: No module named 'open_clip'

['a photo of zinc deficient leaf',
 'a photo of sulphur deficient leaf',
 'a photo of iron deficient leaf',
 'a photo of manganese deficient leaf',
 'a photo of healthy deficient leaf',
 'a photo of potassium deficient leaf',
 'a photo of calcium deficient leaf',
 'a photo of boron deficient leaf',
 'a photo of magnesium deficient leaf']

In [5]:
import torch
import clip
from PIL import Image
import os

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

# Base path containing all class folders
base_path = "/share/sdb/umairnawaz/My_Surgical/downstream/crops/dataset_48/YELLOW-RUST-19/YELLOW-RUST-19"
class_folders = [f for f in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, f))]

# Create descriptive prompts for each class and tokenize
prompts = [f"a photo of yellow rust leaf with {class_name.lower()} disease" for class_name in class_folders]

print(prompts)
text_inputs = clip.tokenize(prompts).to(device)

# Store results and count correct predictions
results = []
correct_predictions = 0
total_images = 0

for folder in class_folders:
    folder_path = os.path.join(base_path, folder)
    files = os.listdir(folder_path)

    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):  # Checking file extension
            total_images += 1
            image_path = os.path.join(folder_path, file)
            image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)

            with torch.no_grad():
                image_features = model.encode_image(image)
                logits_per_image, logits_per_text = model(image, text_inputs)
                probs = logits_per_image.softmax(dim=-1).cpu().numpy()

                predicted_class_index = probs.argmax()
                predicted_class_name = class_folders[predicted_class_index]
                correct = (folder == predicted_class_name)

                results.append((file, folder, predicted_class_name, probs[0][predicted_class_index]))

                if correct:
                    correct_predictions += 1

# Print or process results
for file_name, actual_class, predicted_class, prob in results:
    print(f"{file_name} - Actual: {actual_class}, Predicted: {predicted_class}, Prob: {prob:.4f}")

# Calculate accuracy
accuracy = correct_predictions / total_images if total_images > 0 else 0
print(f"Accuracy: {accuracy:.4f}")


['a photo of yellow rust leaf with moderately resistant disease', 'a photo of yellow rust leaf with moderately susceptible disease', 'a photo of yellow rust leaf with susceptible disease', 'a photo of yellow rust leaf with moderately resistant-moderately susceptible disease', 'a photo of yellow rust leaf with no disease', 'a photo of yellow rust leaf with resistant disease']
2_MR_1377.jpg - Actual: Moderately Resistant, Predicted: Resistant, Prob: 0.3306
2_MR_2296.jpg - Actual: Moderately Resistant, Predicted: Resistant, Prob: 0.2118
2_MR_1785.jpg - Actual: Moderately Resistant, Predicted: No, Prob: 0.3013
2_MR_931.jpg - Actual: Moderately Resistant, Predicted: Resistant, Prob: 0.3445
2_MR_1749.jpg - Actual: Moderately Resistant, Predicted: Resistant, Prob: 0.2690
2_MR_982.jpg - Actual: Moderately Resistant, Predicted: Resistant, Prob: 0.2991
2_MR_1969.jpg - Actual: Moderately Resistant, Predicted: Resistant, Prob: 0.2466
2_MR_2363.jpg - Actual: Moderately Resistant, Predicted: Resista

In [1]:
import os
import torch
import clip
from PIL import Image
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

# Load the CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-B/32', device=device)

# Function to load images from a folder
def load_images_from_folder(folder):
    images = []
    labels = []
    for class_folder in os.listdir(folder):
        class_path = os.path.join(folder, class_folder)
        if os.path.isdir(class_path):
            for img_name in os.listdir(class_path):
                img_path = os.path.join(class_path, img_name)
                try:
                    with Image.open(img_path).convert("RGB") as img:
                        img = preprocess(img).unsqueeze(0).to(device)
                        images.append(img)
                        labels.append(class_folder)
                except Exception as e:
                    print(f"Skipping file {img_path}, due to error: {e}")
    return images, labels

# Path to your dataset folder
dataset_folder = "/share/sdb/umairnawaz/My_Surgical/downstream/fish/dataset_50/AlevinosRV"

# Load and preprocess images
images, labels = load_images_from_folder(dataset_folder)
images = torch.cat(images)

# Generate embeddings
with torch.no_grad():
    image_features = model.encode_image(images)
    image_features = image_features / image_features.norm(dim=-1, keepdim=True)
    image_features = image_features.cpu().numpy()

# Compute t-SNE
tsne = TSNE(n_components=2, random_state=42)
tsne_results = tsne.fit_transform(image_features)

# Create a color map for the labels
unique_labels = list(set(labels))
color_map = {label: i for i, label in enumerate(unique_labels)}
colors = [color_map[label] for label in labels]

# Plotting
plt.figure(figsize=(12, 8))
scatter = plt.scatter(tsne_results[:, 0], tsne_results[:, 1], c=colors, cmap='tab20', alpha=0.6)
plt.colorbar(scatter, ticks=range(len(unique_labels)), label='Class Labels')
plt.title('t-SNE Visualization of CLIP Image Embeddings')
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.grid(True)
plt.show()


TypeError: 

Collecting scikit-learn
  Downloading scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.3/13.3 MB[0m [31m32.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hUsing cached joblib-1.4.2-py3-none-any.whl (301 kB)
Using cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.5.2 threadpoolctl-3.5.0


In [2]:
# import os
# import shutil
# import pandas as pd

# # Load the CSV file
# df = pd.read_csv('/share/sdb/umairnawaz/My_Surgical/downstream/livestock/dataset_53/dataset.csv')

# # Base path where the current sku folders are located
# base_path = '/share/sdb/umairnawaz/My_Surgical/downstream/livestock/dataset_53/images/'

# # Base path for the breed folders
# breed_base_path = '/share/sdb/umairnawaz/My_Surgical/downstream/livestock/dataset_53/breeds'

# # Ensure breed directories exist
# for breed in df['breed'].unique():
#     breed_dir = os.path.join(breed_base_path, breed)
#     if not os.path.exists(breed_dir):
#         os.makedirs(breed_dir)

# # Move images from sku to breed directories
# for index, row in df.iterrows():
#     sku_folder = os.path.join(base_path, row['sku'])
#     breed_folder = os.path.join(breed_base_path, row['breed'])
    
#     if os.path.exists(sku_folder):
#         for file_name in os.listdir(sku_folder):
#             if file_name[0] == '.':
#                 print("Hi")
#                 continue
#             src_file = os.path.join(sku_folder, file_name)
#             dst_file = os.path.join(breed_folder, file_name)
#             shutil.move(src_file, dst_file)

# print("Images have been organized into breed folders.")


In [5]:
# import os

# # Translation dictionary
# translate = {
#     "cane": "dog", "cavallo": "horse", "elefante": "elephant", 
#     "farfalla": "butterfly", "gallina": "chicken", "gatto": "cat", 
#     "mucca": "cow", "pecora": "sheep", "scoiattolo": "squirrel", 
#     "dog": "cane", "horse": "cavallo", "elephant": "elefante", 
#     "butterfly": "farfalla", "chicken": "gallina", "cat": "gatto", 
#     "cow": "mucca", "spider": "ragno", "squirrel": "scoiattolo"
# }

# # Base directory containing the folders to be renamed
# base_dir = '/share/sdb/umairnawaz/My_Surgical/downstream/livestock/dataset_55/raw-img'

# # Rename folders according to the translation dictionary
# for folder_name in os.listdir(base_dir):
#     full_path = os.path.join(base_dir, folder_name)
#     if folder_name in translate and os.path.isdir(full_path):
#         new_name = translate[folder_name]
#         new_path = os.path.join(base_dir, new_name)
#         # Renaming the directory
#         os.rename(full_path, new_path)
#         print(f"Renamed {folder_name} to {new_name}")

# print("Folder renaming completed.")
