In [44]:
import numpy as np
import pandas as pd
import json, os, time, pickle
from modules.llm import GeminiModel
from modules.prompts import CoTPrompt
from modules.gen_features import Caltech101
from dotenv import load_dotenv; load_dotenv()

CoT = CoTPrompt("CoT")

caltech_images_path = '../database/Caltech/'
caltech_class_meta_path = '../data/caltech-101/meta/caltech_220_images.json'

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
Gemini = GeminiModel(GEMINI_API_KEY, model_name='gemini-2.0-flash-exp')

In [58]:
from modules.gen_features import ImageFeatures
caltech_image_working_file = "../data/caltech-101/cotPrompt/gemini/image_features.parquet"
Cal101Features = ImageFeatures(caltech_images_path, caltech_class_meta_path, 
                             caltech_image_working_file, llm_model=Gemini, PromptSys=CoT)
# Cal101Features.gen_info()

Loaded parquet!


In [None]:
from modules.gen_features import LabelFeatures
caltech_label_working_file = "../data/caltech-101/cotPrompt/gemini/label_features.parquet"
Cal101_LabelFeatures = LabelFeatures(caltech_images_path, caltech_class_meta_path, 
                                    caltech_label_working_file, llm_model=Gemini)
# Cal101_LabelFeatures.gen_info()

In [59]:
from modules.prompts import DefaultPrompt
from modules.encoder import FeaturesEncoder

model = "../models/clip-vit-large-patch14"
caltech_images_path = '../database/Caltech/caltech-101/101_ObjectCategories'

caltech_image_working_file = "../data/caltech-101/cotPrompt/gemini/image_features.parquet"
caltech_label_working_file = "../data/caltech-101/cotPrompt/gemini/label_features.parquet"

encoding_images_path = "../data/caltech-101/cotPrompt/gemini/image_features.pkl"
encoding_labels_path = "../data/caltech-101/cotPrompt/gemini/label_features.pkl"

FE = FeaturesEncoder(caltech_images_path, encoding_images_path, encoding_labels_path, 
                     img_file_type='jpg', model=model)

DefPrompt = DefaultPrompt('Default')

human_design_prompt = "A photo of {}"

FE.encode_images(caltech_image_working_file)
# FE.encode_labels(caltech_label_working_file, human_design_prompt)

Loading model: clip-vit-large-patch14
Creating embedding dict...
1/202
Time taken per label: 7.79 seconds
--------------------------------------------------
2/202
Time taken per label: 0.68 seconds
--------------------------------------------------
3/202
Time taken per label: 0.57 seconds
--------------------------------------------------
4/202
Time taken per label: 1.94 seconds
--------------------------------------------------
5/202
Time taken per label: 1.64 seconds
--------------------------------------------------
6/202
Time taken per label: 1.49 seconds
--------------------------------------------------
7/202
Time taken per label: 0.58 seconds
--------------------------------------------------
8/202
Time taken per label: 0.85 seconds
--------------------------------------------------
9/202
Time taken per label: 0.48 seconds
--------------------------------------------------
10/202
Time taken per label: 0.63 seconds
--------------------------------------------------
11/202
Time ta

In [None]:
FE.encode_labels(caltech_label_working_file, human_design_prompt)

In [None]:
caltech_image_working_file = "../data/caltech-101/cotPrompt/gemini/image_features.parquet"
caltech_label_working_file = "../data/caltech-101/cotPrompt/gemini/label_features.parquet"
import pandas as pd
df = pd.read_parquet(caltech_image_working_file)
# df['init_pred'] = df['init_pred'].apply(lambda x: x.strip())
# df['init_pred'] = df['init_pred'].str.lower()
# df.to_parquet(caltech_image_working_file, index=False)
df['file_name'].unique()

array(['gerenuk_0011', 'gerenuk_0010', 'hawksbill_0085', 'hawksbill_0017',
       'headphone_0024', 'headphone_0004', 'ant_0011', 'ant_0037',
       'butterfly_0089', 'butterfly_0038', 'lamp_0033', 'lamp_0030',
       'strawberry_0026', 'strawberry_0004', 'water_lilly_0033',
       'water_lilly_0012', 'chandelier_0016', 'chandelier_0077',
       'dragonfly_0051', 'dragonfly_0039', 'crab_0020', 'crab_0038',
       'pagoda_0035', 'pagoda_0039', 'dollar_bill_0034',
       'dollar_bill_0032', 'emu_0010', 'emu_0011', 'inline_skate_0016',
       'inline_skate_0026', 'platypus_0020', 'platypus_0005',
       'dalmatian_0058', 'dalmatian_0018', 'cup_0047', 'cup_0029',
       'airplanes_0011', 'airplanes_0463', 'joshua_tree_0037',
       'joshua_tree_0061', 'cougar_body_0033', 'cougar_body_0009',
       'grand_piano_0080', 'grand_piano_0039', 'trilobite_0008',
       'trilobite_0030', 'brontosaurus_0041', 'brontosaurus_0034',
       'wild_cat_0025', 'wild_cat_0003', 'pigeon_0028', 'pigeon_0010',

### Prediction

In [1]:
import pickle
from modules.classifier import ImageClassifier
# Classification
encoded_image_file = "../data/caltech-101/cotPrompt/gemini/image_features.pkl"
encoded_text_file  = "../data/caltech-101/cotPrompt/gemini/label_features.pkl"

with open(encoded_image_file, "rb") as f: 
    img_features = pickle.load(f)

with open(encoded_text_file, "rb") as f: 
    label_features = pickle.load(f)

In [None]:
import pickle
from modules.classifier import ImageClassifier
# Classification
# encoded_image_file = "../data/caltech-101/defaultPrompt/gemini/image_features.pkl"
# encoded_text_file  = "../data/caltech-101/defaultPrompt/gemini/label_features.pkl"

# with open(encoded_image_file, "rb") as f: 
#     img_features = pickle.load(f)

# with open(encoded_text_file, "rb") as f: 
#     label_features = pickle.load(f)

Using model M4: Fused Features Embedding
Using Image Feature: Encoded Image Description X_df
Accuracy: 0.8416
Precision: 0.8416
Recall: 0.8208
F1-score: 0.8106


In [2]:
import pandas as pd
acc_df = pd.DataFrame(columns=['accuracy', 'precision', 'recall', 'f1'])
for X in ['X_if', 'X_df', 'X_pf', 'X_q']:
    I4P = ImageClassifier(label_features, mode='M4', img_features=img_features, ifeature=X)
    print("="*50)
    df = I4P.classify()
    accuracy, precision, recall, f1 = I4P.evaluation(df)
    acc_df.loc[X] = [accuracy, precision, recall, f1]

save_path = "../data/accuracies/caltech_gemini_cot.csv"
acc_df.to_csv(save_path, index=False)
acc_df.head()

Using model M4: Fused Features Embedding
Using Image Feature: Encoded Image X_if
Accuracy: 0.9010
Precision: 0.9010
Recall: 0.8960
F1-score: 0.8848
Using model M4: Fused Features Embedding
Using Image Feature: Encoded Image Description X_df
Accuracy: 0.8416
Precision: 0.8416
Recall: 0.8306
F1-score: 0.8138
Using model M4: Fused Features Embedding
Using Image Feature: Encoded Init Prediction X_pf
Accuracy: 0.9455
Precision: 0.9455
Recall: 0.9224
F1-score: 0.9287
Using model M4: Fused Features Embedding
Using Image Feature: Encoded Fused Image Feature X_q
Accuracy: 0.9257
Precision: 0.9257
Recall: 0.9010
F1-score: 0.9050


Unnamed: 0,accuracy,precision,recall,f1
X_if,0.90099,0.90099,0.89604,0.884818
X_df,0.841584,0.841584,0.830583,0.813831
X_pf,0.945545,0.945545,0.922442,0.928713
X_q,0.925743,0.925743,0.90099,0.90495
