Simple example workflow of training an evaluation model on images.

Example provided here is of binary absolute quality scores, but it's straightforward to apply this to pairwise scoring (which I highly recommend, both for consistency/quality of data and speed of labeling).

In [None]:
import os, pickle
import numpy as np
import pandas as pd
from importlib import reload
%load_ext autoreload
%autoreload 1

import gspread
gc = gspread.oauth(flow=gspread.auth.console_flow)

import sdtools.sdexp as sdexp
import sdtools.cfg as cfg

In [None]:
import clip
model, preprocess = clip.load("ViT-B/32", device="cuda")

In [None]:
ROOT_DIR = cfg.data_dir
N_EXAMPLES_PER_MODEL = 50
SHEET_URL = <INSERT SHEET URL>
WS_LABELS_NAME = 'labels'

# Generate, display, label, and store images + labels

In [None]:
# generate images for labeling
spec_base = {
    "exp":None,
    "entities":[
        {
            "finetune_path":None,
            "class_prompt":"a cell phone photo of a kid",
            "finetune_prompt":"a cell phone photo of alskj kid",
            "n_class_img":200
        }
    ],
    "lr":1e-6,
    "n_iters":[1500,2800],
    "dir_model":None,
    "dir_parent_classimg":os.path.join(ROOT_DIR,"class_sets"),
    "test_prompts":
        [
            "alskj kid sits in a cornfield, smiling. Watercolor."
        ]
}

'''
Run through sample models to generate images for labeling:
- A6 2800
- A5 2800
- A2 2800
- A3 1500
'''
lst_img = []
for exp, iters, finetune_path in [
    ("A6", 2800, 'finetune_sets/entityboy_ablationC'),
    ("A5", 2800, 'finetune_sets/entityboy_ablationB'),
    ("A2", 2800, 'finetune_sets/entityboy_small'),
    ("A3", 1500, 'finetune_sets/entityboy_large')
]:
    spec = spec_base.copy()
    spec['exp'] = exp
    spec['dir_model'] = os.path.join(ROOT_DIR, f"modelexp/{exp}")
    spec['entities'][0]['finetune_path'] = os.path.join(ROOT_DIR, finetune_path)
    lst_img.extend(sdexp.sample_exp(spec, iters=iters, n_samples=N_EXAMPLES_PER_MODEL))

np.random.shuffle(lst_img)

# pickle image data to disk (inside ROOT_DIR)
with open(cfg.path_labeled_img, 'wb') as f:
    pickle.dump(lst_img, f)

In [None]:
# load and display images for labeling
with open(cfg.path_labeled_img, 'rb') as f:
    lst_img = pickle.load(f)

#display each image in succession, alongside its numerical index in the list, for purposes of labeling.
for i, img in enumerate(lst_img):
    print(i)
    display(img)

In [None]:
#open SHEET_URL and WS_LABELS_NAME with gc and read B1:B100 as the instance 0/1 labels, and C1:C100 as the prompt 0/1 labels. output a tuple of lists of integers.
ws = gc.open_by_url(SHEET_URL).worksheet(WS_LABELS_NAME)
lst_instance_labels = ws.range(f'B1:B{len(lst_img)}')
lst_prompt_labels = ws.range(f'C1:C{len(lst_img)}')
lst_instance_labels = [int(cell.value) for cell in lst_instance_labels]
lst_prompt_labels = [int(cell.value) for cell in lst_prompt_labels]

# pickle labels to disk (inside ROOT_DIR)
with open(cfg.path_labeled_labels, 'wb') as f:
    pickle.dump((lst_instance_labels, lst_prompt_labels), f)


# Train + store score classifier

In [None]:
lst_img = pickle.load(open(cfg.path_labeled_img, 'rb'))
lst_instance_labels, lst_prompt_labels = pickle.load(open(cfg.path_labeled_labels, 'rb'))
clf_quality = sdexp.CLFQuality(lst_img, lst_instance_labels, lst_prompt_labels, path_cache=cfg.path_clf_quality, force_retrain=True)