- Load two eval_results

EvalResult
- example, label, result, item_summary

In [None]:
import time
import typing
import weave
import random
import string
from weave import weave_internal
weave.use_frontend_devmode()
from weave.legacy.panels import panel_board
from weave.legacy import ops_domain

In [None]:
def rand_string_n(n: int) -> str:
    return "".join(
        random.choice(string.ascii_uppercase + string.digits) for _ in range(n)
    )

dataset_raw = [{
    'id': str(i),
    'example': rand_string_n(10),
    'label': random.choice(string.ascii_uppercase)} for i in range(50)]
dataset = weave.save(dataset_raw, 'dataset')
#dataset

In [None]:
def predict(dataset_row, config):
    if random.random() < config['correct_chance']:
        return dataset_row['label']
    return random.choice(string.ascii_uppercase)

In [None]:
def evaluate(dataset, predict_config):
    eval_result = []
    correct_count = 0
    count = 0
    for dataset_row in dataset:
        start_time = time.time()
        result = predict(dataset_row, predict_config)
        latency = time.time() - start_time
        latency = random.gauss(predict_config['latency_mu'], predict_config['latency_sigma'])
        correct = dataset_row['label'] == result
        if correct:
            correct_count += 1
        count +=1 
        eval_result.append({
            'dataset_id': dataset_row['id'],
            'result': result,
            'summary': {
                'latency': latency,
                'correct': correct
            }
        })
    return {
        'config': predict_config,
        'eval_table': eval_result,
        'summary': {'accuracy': correct_count / len(dataset)}}

In [None]:
eval_result_raw0 = evaluate(dataset_raw, {'correct_chance': 0.5, 'latency_mu': 0.3, 'latency_sigma': 0.1})
eval_result_raw1 = evaluate(dataset_raw, {'correct_chance': 0.5, 'latency_mu': 0.4, 'latency_sigma': 0.2})
eval_result0 = weave.save(eval_result_raw0, 'eval_result0')
eval_result1 = weave.save(eval_result_raw1, 'eval_result1')

In [None]:


varbar = panel_board.varbar()

dataset_var = varbar.add('dataset', dataset)
eval_result0_var = varbar.add('eval_result0', eval_result0)
eval_result1_var = varbar.add('eval_result1', eval_result1)

summary = varbar.add('summary', weave.legacy.ops.make_list(
    a=weave.legacy.ops.TypedDict.merge(weave.legacy.ops.dict_(name='res0'), eval_result0_var['summary']),
    b=weave.legacy.ops.TypedDict.merge(weave.legacy.ops.dict_(name='res1'), eval_result1_var['summary']),
))

weave.legacy.ops.make_list(a=eval_result0_var['eval_table'], b=eval_result0_var['eval_table'])

concatted_evals = varbar.add('concatted_evals', weave.legacy.ops.List.concat(
    weave.legacy.ops.make_list(
        a=eval_result0_var['eval_table'].map(
            lambda row: weave.legacy.ops.TypedDict.merge(
                weave.legacy.ops.dict_(name='res0'), row)),
        b=eval_result1_var['eval_table'].map(
            lambda row: weave.legacy.ops.TypedDict.merge(
                weave.legacy.ops.dict_(name='res1'), row)))))

# join evals together first
joined_evals = varbar.add('joined_evals', weave.legacy.ops.join_all(
    weave.legacy.ops.make_list(a=eval_result0_var['eval_table'], b=eval_result1_var['eval_table']),
    lambda row: row['dataset_id'],
    False))

# then join dataset to evals
dataset_evals = varbar.add('dataset_evals', weave.legacy.ops.join_2(
    dataset_var,
    joined_evals,
    lambda row: row['id'],
    lambda row: row['dataset_id'][0],
    'dataset',
    'evals',
    False,
    False
))


main = weave.legacy.panels.Group(
        layoutMode="grid",
        showExpressions=True,
        enableAddPanel=True,
    )

#### Run/config info TODO

#### Summary info

main.add("accuracy",
         weave.legacy.panels.Plot(summary,
                           x=lambda row: row['accuracy'],
                           y=lambda row: row['name'],
                           color=lambda row: row['name']
                          ),
         layout=weave.legacy.panels.GroupPanelLayout(x=0, y=0, w=12, h=4))


main.add("latency",
         weave.legacy.panels.Plot(concatted_evals,
                           x=lambda row: row['summary']['latency'],
                           y=lambda row: row['name'],
                           color=lambda row: row['name'],
                           mark='boxplot'),
         layout=weave.legacy.panels.GroupPanelLayout(x=12, y=0, w=12, h=4))

#ct = main.add('concat_t', concatted_evals, layout=weave.legacy.panels.GroupPanelLayout(x=0, y=4, w=24, h=12))
# main.add('dataset_table', dataset)
# main.add('joined_evals', joined_evals)
# main.add('dataset_evals', dataset_evals, layout=weave.legacy.panels.GroupPanelLayout(x=0, y=4, w=24, h=6))

##### Example details

# more ideas: show examples that all got wrong, or that are confusing

faceted_view = weave.legacy.panels.Facet(dataset_evals,
                             x=lambda row: row['evals.summary'][0]['correct'],
                             y=lambda row: row['evals.summary'][1]['correct'],
                             select=lambda row: row.count())

faceted = main.add('faceted', faceted_view, layout=weave.legacy.panels.GroupPanelLayout(x=0, y=4, w=12, h=6))

main.add("example_latencies",
         weave.legacy.panels.Plot(dataset_evals,
                           x=lambda row: row['evals.summary']['latency'][0],
                           y=lambda row: row['evals.summary']['latency'][1]),
         layout=weave.legacy.panels.GroupPanelLayout(x=12, y=4, w=12, h=6))

faceted_sel = weave.legacy.panels.Table(faceted.selected())
faceted_sel.config.rowSize = 2
faceted_sel.add_column(lambda row: row['dataset.id'], 'id')
faceted_sel.add_column(lambda row: row['dataset.example'], 'example')
faceted_sel.add_column(lambda row: row['dataset.label'], 'label')
faceted_sel.add_column(lambda row: weave.legacy.ops.dict_(res0=row['evals.result'][0], res1=row['evals.result'][1]), 'result')
faceted_sel.add_column(lambda row: weave.legacy.ops.dict_(res0=row['evals.summary'][0]['correct'], res1=row['evals.summary'][1]['correct']), 'correct')
faceted_sel.add_column(lambda row: weave.legacy.ops.dict_(res0=row['evals.summary'][0]['latency'], res1=row['evals.summary'][1]['latency']), 'latency')

main.add('faceted_sel', faceted_sel, layout=weave.legacy.panels.GroupPanelLayout(x=0, y=10, w=24, h=12))

weave.legacy.panels.Board(vars=varbar, panels=main)