Illustration of how to perform automated evaluation of finetune experiments. Recommended!

In [None]:
from diffusers import StableDiffusionPipeline
import numpy as np
import os
import pickle
import copy
import pandas as pd

import sdtools.sdexp as sdexp
import sdtools.cfg as cfg

# Load the evaluation model

In [None]:
ROOT_DIR = cfg.data_dir
N_SAMPLES_EVAL = 100

In [None]:
lst_img = pickle.load(open(cfg.path_labeled_img, 'rb'))
lst_instance_labels, lst_prompt_labels = pickle.load(open(cfg.path_labeled_labels, 'rb'))
clf_quality = sdexp.CLFQuality(lst_img=lst_img, lst_instance_labels=lst_instance_labels, lst_prompt_labels=lst_prompt_labels, path_cache=cfg.path_clf_quality, force_retrain=False)

# Example experiment comparing the performance of multiple training sets.

In [None]:
spec_base = {
    "exp":None,
    "entities":[
        {
            "finetune_path":None,
            "class_prompt":"a cell phone photo of a kid",
            "finetune_prompt":"a cell phone photo of alskj kid",
            "n_class_img":200
        }
    ],
    "lr":1e-6,
    "n_iters":[1500,2800,3500],
    "dir_model":None,
    "dir_parent_classimg":os.path.join(ROOT_DIR,"class_sets"),
    "test_prompts":
        [
            "alskj kid sits in a cornfield, smiling. Watercolor."
        ]
}
lst_spec = []
for exp,path in [
    ("A2","finetune_sets/entityboy_small"),
    ("A3","finetune_sets/entityboy_large"),
    ("A4","finetune_sets/entityboy_ablationA"),
    ("A5","finetune_sets/entityboy_ablationB"),
    ("A6","finetune_sets/entityboy_ablationC"),
]:
    spec = copy.deepcopy(spec_base)
    spec['exp'] = exp
    spec['dir_model']=os.path.join(ROOT_DIR,f"modelexp/{exp}")
    spec['entities'][0]['finetune_path'] = os.path.join(ROOT_DIR,path)
    lst_spec.append(spec)

In [None]:
results = {}
for spec in lst_spec:
    results[exp] = {}
    for iters in spec['n_iters']:
        lst_test_img = sdexp.sample_exp(spec, iters=iters, n_samples=N_SAMPLES_EVAL)
        score_instance, score_prompt = clf_quality.predict_proba(lst_test_img)
        print(
            spec['exp'], 
            '%.2f'%(np.mean(score_instance[:,clf_quality.clf_instance.classes_.tolist().index(1)])), 
            '%.2f'%(np.mean(score_prompt[:,clf_quality.clf_prompt.classes_.tolist().index(1)]))
        )
        results[iters] = {
            'instance':score_instance,
            'prompt':score_prompt
        }


# Report

In [None]:
#Flatten results dictionary and put it into a dataframe
df_results = pd.DataFrame.from_dict({
    (i,j): results[i][j]
    for i in results.keys()
    for j in results[i].keys()
}, orient='index')
df_results['combined'] = df_results['instance']*df_results['prompt']

In [None]:
df_results