In [1]:
import os
import json
from openai import OpenAI
import pandas as pd
import pickle as pkl
from collections import defaultdict
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import pairwise_cos_sim

In [2]:
with open('CUB/gpt_res_cleaning.pkl', 'rb') as fp:
    gpt_res_cleaning = pkl.load(fp)

with open('CUB/gpt_res_generation.pkl', 'rb') as fp:
    gpt_res_generation = pkl.load(fp)

with open('CUB/gpt_res_gen_patch.pkl', 'rb') as fp:
    gpt_res_gen_patch = pkl.load(fp)

In [3]:
mappings = dict()
for part_res in gpt_res_cleaning:
    part_name, part_res_parsed = part_res['part_name'], json.loads(part_res['response'])
    to_remove, merge2dups = part_res_parsed['remove'], part_res_parsed['merge']
    merge = dict()
    for merge_to, duplicates in merge2dups.items():
        merge.update({d: merge_to for d in duplicates})
    mappings[part_name] = (to_remove, merge)

In [4]:
import itertools

def clean_concepts(concept_dict: dict):
    if all(type(cpt_list) is list for cpt_list in concept_dict.values()):
        return concept_dict
    cleaned_concept_dict = dict()
    # Here concepts is a dict of str or list
    for part_name, concepts in concept_dict.items():
        cleaned_concepts = []
        cleaned_concepts += itertools.chain(*[v if type(v) is list else [v]for v in concepts.values()])
        cleaned_concept_dict[part_name] = cleaned_concepts
    
    return cleaned_concept_dict

patch_dict = {patch_res['class_name']: patch_res['response'] for patch_res in gpt_res_gen_patch}

concepts_cleaned = dict()
for class_res in gpt_res_generation:
    class_name, class_concepts = class_res['class_name'], json.loads(class_res['response'])
    if class_name in patch_dict:
        class_concepts = json.loads(patch_dict[class_name])
    class_concepts = clean_concepts(class_concepts)
    class_cpts_processed = dict()
    for part_name, (removes, merges) in mappings.items():
        original_cpts = class_concepts[part_name]
        filtered_cpts = [cpt for cpt in original_cpts if cpt not in removes]
        mapped_concepts = [merges.get(cpt, cpt) for cpt in filtered_cpts]
        class_cpts_processed[part_name] = mapped_concepts
    # print(class_name)
    # print({k: len(v) for k, v in class_cpts_cleaned.items()})
    # Deal with responses that do not follow format
    counts = {k: len(v) for k, v in class_cpts_processed.items()}
    if any(v == 0 for v in counts.values()):
        print(class_name)
        print(counts)
        print(class_concepts)
    concepts_cleaned[class_name] = class_cpts_processed

In [6]:
len(concepts_cleaned)

200

In [8]:
with open('CUB/concepts_processed.json', 'w') as fp:
    json.dump(concepts_cleaned, fp=fp)

In [None]:

with open('CUB/gpt_res_gen_patch.pkl', 'rb') as fp:
    gpt_res_gen_patch = pkl.load(fp)
gpt_res_gen_patch[0]['class_name'] = 'Prairie Warbler'
gpt_res_gen_patch


with open('CUB/gpt_res_gen_patch.pkl', 'wb') as fp:
    pkl.dump(gpt_res_gen_patch, file=fp)

In [None]:
def chat_completion_cub(client, example_class_name: str, example_answer: str):
    messages = [
        {"role": "system",
        "content": "You are a helpful assistant designed to provide visual features of different species of birds and output them in JSON."},
        
        {"role": "user",
        "content": (f"To visually identify {example_class_name}, please provide 3 most common characteristics of each of {example_class_name}'s parts "
                    "(head, beak, tail, wing, eye, leg, torso) in shape, color, or size:")},
        
        {"role": 'assistant', "content": example_answer},
        
        {"role": "user",
        "content": (f"To visually identify Prairie Warbler, please provide 3 most common characteristics of each of Prairie Warbler's parts "
                    "(head, beak, tail, wing, eye, leg, torso) in shape, color, or size:")}
    ]
    response = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        response_format={ "type": "json_object" },
        messages=messages
    )
    return response.choices[0].message.content

client = OpenAI()

with open('CUB/example_generation.json', 'r') as fp:
    example = json.load(fp=fp)

example_class_name, example_answer = example['class'], json.dumps(example['concepts'])
response = chat_completion_cub(client, example_class_name, example_answer)

print(response)

patches = [dict(class_name=example_class_name, response=response)]
with open('CUB/gpt_res_gen_patch.pkl', 'wb') as fp:
   pkl.dump(patches) 

In [None]:

with open('CUB/gpt_res_gen_patch.pkl', 'rb') as fp:
   patch = pkl.load(fp) 

In [None]:
patch

In [None]:
gpt_res_generation