In [8]:
import numpy as np
import pandas as pd
import json, os, time, pickle
from modules.llm import OpenAIModel
from modules.prompts import CoTPrompt
from dotenv import load_dotenv; load_dotenv()

CoT = CoTPrompt("Defaut")

caltech_images_path = '../database/Caltech/'
caltech_class_meta_path = '../data/caltech-101/meta/caltech_220_images.json'

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OpenAI = OpenAIModel(OPENAI_API_KEY, model_name='gpt-4.1-nano-2025-04-14')

In [15]:
from modules.gen_features import ImageFeatures
caltech_image_working_file = "../data/caltech-101/cotPrompt/openai/image_features.parquet"
Cal101Features = ImageFeatures(caltech_images_path, caltech_class_meta_path, 
                             caltech_image_working_file, llm_model=OpenAI, PromptSys=CoT)
# Cal101Features.gen_info()
Cal101Features.img_features

Loaded parquet!


Unnamed: 0,file_name,label_id,init_pred,img_desc
0,gerenuk_0011,gerenuk,ant,A model or sculpture of a deer. It has a slen...
1,gerenuk_0010,gerenuk,gerenuk,A horse with a body positioned horizontally an...
2,hawksbill_0085,hawksbill,turtle,"Sea turtle Oval-shaped body, greenish-brown s..."
3,hawksbill_0017,hawksbill,turtle,Sea turtle The turtle has an oval-shaped shel...
4,headphone_0024,headphone,headphone,"Headphones Over-ear design, black color, oval..."
...,...,...,...,...
197,elephant_0039,elephant,elephant,"Elephant Large, gray body with a thick, wrink..."
198,tick_0011,tick,tick,"Tick Small, oval-shaped body with a dark brow..."
199,tick_0029,tick,scorpion,"Tick Small, round body with a dark, shiny, an..."
200,metronome_0031,metronome,chandelier,A metronome. It has a triangular shape with a...


In [None]:
from modules.gen_features import LabelFeatures
caltech_label_working_file = "../data/caltech-101/cotPrompt/openai/label_features.parquet"
Cal101_LabelFeatures = LabelFeatures(caltech_images_path, caltech_class_meta_path, 
                                    caltech_label_working_file, llm_model=OpenAI)
# Cal101_LabelFeatures.gen_info()

In [16]:
from modules.prompts import DefaultPrompt
from modules.encoder import FeaturesEncoder

model = "../models/clip-vit-large-patch14"
caltech_images_path = '../database/Caltech/caltech-101/101_ObjectCategories'

caltech_image_working_file = "../data/caltech-101/cotPrompt/openai/image_features.parquet"
caltech_label_working_file = "../data/caltech-101/cotPrompt/openai/label_features.parquet"

encoding_images_path = "../data/caltech-101/cotPrompt/openai/image_features.pkl"
encoding_labels_path = "../data/caltech-101/cotPrompt/openai/label_features.pkl"

FE = FeaturesEncoder(caltech_images_path, encoding_images_path, encoding_labels_path, 
                     img_file_type='jpg', model=model)

DefPrompt = DefaultPrompt('Default')

human_design_prompt = "A photo of {}"

FE.encode_images(caltech_image_working_file)
# FE.encode_labels(caltech_label_working_file, human_design_prompt)


  from .autonotebook import tqdm as notebook_tqdm


Loading model: clip-vit-large-patch14
Creating embedding dict...
1/202
Time taken per label: 3.81 seconds
--------------------------------------------------
2/202
Time taken per label: 1.52 seconds
--------------------------------------------------
3/202
Time taken per label: 0.92 seconds
--------------------------------------------------
4/202
Time taken per label: 0.46 seconds
--------------------------------------------------
5/202
Time taken per label: 0.52 seconds
--------------------------------------------------
6/202
Time taken per label: 0.51 seconds
--------------------------------------------------
7/202
Time taken per label: 0.5 seconds
--------------------------------------------------
8/202
Time taken per label: 0.48 seconds
--------------------------------------------------
9/202
Time taken per label: 0.48 seconds
--------------------------------------------------
10/202
Time taken per label: 0.5 seconds
--------------------------------------------------
11/202
Time take

In [None]:
FE.encode_labels(caltech_label_working_file, human_design_prompt)

### Prediction

In [1]:
import pickle
from modules.classifier import ImageClassifier
# Classification
encoded_image_file = "../data/caltech-101/cotPrompt/openai/image_features.pkl"
encoded_text_file  = "../data/caltech-101/cotPrompt/openai/label_features.pkl"

with open(encoded_image_file, "rb") as f: 
    img_features = pickle.load(f)

with open(encoded_text_file, "rb") as f: 
    label_features = pickle.load(f)

In [2]:
import pandas as pd
acc_df = pd.DataFrame(columns=['accuracy', 'precision', 'recall', 'f1'])
for X in ['X_if', 'X_df', 'X_pf', 'X_q']:
    I4P = ImageClassifier(label_features, mode='M4', img_features=img_features, ifeature=X)
    print("="*50)
    df = I4P.classify()
    accuracy, precision, recall, f1 = I4P.evaluation(df)
    acc_df.loc[X] = [accuracy, precision, recall, f1]

save_path = "../data/accuracies/caltech_openai_cot.csv"
acc_df.to_csv(save_path, index=False)
acc_df.head()

Using model M4: Fused Features Embedding
Using Image Feature: Encoded Image X_if
Accuracy: 0.9109
Precision: 0.9109
Recall: 0.9059
F1-score: 0.8967
Using model M4: Fused Features Embedding
Using Image Feature: Encoded Image Description X_df
Accuracy: 0.7723
Precision: 0.7723
Recall: 0.7374
F1-score: 0.7286
Using model M4: Fused Features Embedding
Using Image Feature: Encoded Init Prediction X_pf
Accuracy: 0.6337
Precision: 0.6337
Recall: 0.5933
F1-score: 0.5765
Using model M4: Fused Features Embedding
Using Image Feature: Encoded Fused Image Feature X_q
Accuracy: 0.7673
Precision: 0.7673
Recall: 0.7122
F1-score: 0.7117


Unnamed: 0,accuracy,precision,recall,f1
X_if,0.910891,0.910891,0.905941,0.8967
X_df,0.772277,0.772277,0.737444,0.728553
X_pf,0.633663,0.633663,0.593329,0.576497
X_q,0.767327,0.767327,0.712211,0.71174
