In [None]:
import glob
import os
import random
import time
import typing

import weave

weave.use_frontend_devmode()

In [None]:
raw_labels = {
    "Articles_of_Incorporation_Real_Example_3_txt": {
        "name": "HealthFirst Solutions LLC",
        "shares": 500000,
    },
    "Articles_of_Incorporation_Real_Example_2_txt": {
        "name": "GreenLeaf LLC",
        "shares": None,
        "directors": ["Sarah Miller", "Daniel Lee"],
    },
    "Articles_of_Incorporation_Real_Example_1_txt": {
        "name": "TechBoost Corp",
        "shares": 1000000,
    },
    "Highly_Varied_Article_of_Incorporation_10_txt": {
        "name": "Brown, Fernandez and Smith",
        "shares": 41141,
    },
    "Highly_Varied_Article_of_Incorporation_9_txt": {
        "name": "Ruiz-Goodman",
        "shares": 31783,
    },
    "Highly_Varied_Article_of_Incorporation_8_txt": {
        "name": "Gibson, Hunt and Davidson",
        "shares": 96403,
    },
    "Highly_Varied_Article_of_Incorporation_7_txt": {
        "name": "Boyd-Browning",
        "shares": 41300,
    },
    "Highly_Varied_Article_of_Incorporation_6_txt": {
        "name": "Newton, Moreno and Yang",
        "shares": 73981,
    },
    "Highly_Varied_Article_of_Incorporation_5_txt": {
        "name": "Matthews and Sons",
        "shares": 98608,
    },
    "Highly_Varied_Article_of_Incorporation_4_txt": {
        "name": "Moore LLC",
        "shares": 5732,
    },
    "Highly_Varied_Article_of_Incorporation_3_txt": {
        "name": "Mullen Inc",
        "shares": 76197,
    },
    "Highly_Varied_Article_of_Incorporation_2_txt": {
        "name": "Ellis and Sons",
        "shares": 54183,
    },
    "Highly_Varied_Article_of_Incorporation_1_txt": {
        "name": "French, Wyatt and Coleman",
        "shares": 78821,
    },
}


def read_dataset(root):
    dataset_rows = []
    for p in glob.glob(os.path.join(root, "*.txt")):
        # Have to do replace here because of weave '.' access issues
        example_id = os.path.basename(p).replace(".", "_")
        label = raw_labels.get(example_id)
        if label:
            dataset_rows.append(
                {"id": example_id, "example": open(p).read(), "label": label}
            )
    return dataset_rows

In [None]:
# Can't just make our own types, server won't deserialize.
# A fairly easy fix.
@weave.type()
class Dataset:
    rows: list[typing.Any]

In [None]:
raw_dataset = read_dataset("/Users/shawn/datasets/aoi")

In [None]:
# raw_dataset

In [None]:
dataset = weave.save(Dataset(raw_dataset), "my_dataset5")
# Now, here I really want to make my own labels in the UI immediately.
# where should the added column go? A new version of this dataset?
# yeah sure why not.
# What's missing for editing to be good?
#   - batch editing, ie make a bunch of changes and choose where/how to save

In [None]:
# published = weave.publish(Dataset(raw_dataset), 'weave-flow1/my_dataset1')

In [None]:
dataset.rows
# Here i went to render labels next to dataset.
# I need access to labels in the notebook memory... would be easy enough to pass in

In [None]:
import re


def split_paragraphs(doc):
    lines = [l.strip() for l in doc.split("\n")]
    stripped_doc = "\n".join(lines)
    return [p.strip() for p in stripped_doc.split("\n\n")]


def find_first_numeric(s):
    match = re.search(r"\d+", s)
    if match is None:
        return None
    return int(match.group().replace(",", ""))


def predict(dataset_row, config):
    paragraphs = split_paragraphs(dataset_row["example"])
    capital_paragraph = None
    name_paragraph = None
    for p in paragraphs:
        if "name" in p.lower():
            name_paragraph = p
        if "share" in p.lower():
            capital_paragraph = p
    result = {
        "name": None,
        "shares": None,
    }
    if capital_paragraph:
        paragraph_start = config.get("shares_skip_chars", 0)
        result["shares"] = find_first_numeric(capital_paragraph[paragraph_start:])
    if name_paragraph:
        match = re.search(r"is ", name_paragraph)
        if match is not None:
            result["name"] = name_paragraph[match.end() :]
        if result["name"] and config.get("name_up_to_period"):
            match = re.search(r"\.", result["name"])
            if match is not None:
                result["name"] = result["name"][: match.start()]
    return result

In [None]:
for dataset_row in weave.use(dataset.rows):
    print(predict(dataset_row, {"shares_skip_chars": 4}))

In [None]:
fields = ["name", "shares", "directors"]


def p_r_f1(tp, fp, fn):
    # if any denom is zero, then zero. could use NaN instead...
    precision = 0
    if tp or fp:
        precision = tp / (tp + fp)
    recall = 0
    if tp or fn:
        recall = tp / (tp + fn)
    f1 = 0
    if precision or recall:
        f1 = 2 * (precision * recall) / (precision + recall)
    return precision, recall, f1


def summarize_item(item_result, item_label):
    item_summary = {}
    for f in fields:
        item_summary[f"{f}_negative"] = item_result.get(f) is None
        item_summary[f"{f}_correct"] = item_result.get(f) == item_label.get(f)

    item_correct = sum([item_summary[f"{f}_correct"] for f in fields])
    item_tp = sum(
        [item_label.get(f) is not None and item_summary[f"{f}_correct"] for f in fields]
    )
    item_fp = sum(
        [
            item_label.get(f) is not None and not item_summary[f"{f}_correct"]
            for f in fields
        ]
    )
    item_tn = sum(
        [item_label.get(f) is None and item_summary[f"{f}_correct"] for f in fields]
    )
    item_fn = sum(
        [item_label.get(f) is None and not item_summary[f"{f}_correct"] for f in fields]
    )

    item_precision, item_recall, item_f1 = p_r_f1(item_tp, item_fp, item_fn)

    return {
        **item_summary,
        "correct": item_correct,
        "tp": item_tp,
        "fp": item_fp,
        "tn": item_tn,
        "fn": item_fn,
        "precision": item_precision,
        "recall": item_recall,
        "f1": item_f1,
    }


def field_pr(eval_result, field_name):
    tp = sum(
        not item["summary"][f"{field_name}_negative"]
        and item["summary"][f"{field_name}_correct"]
        for item in eval_result
    )
    fp = sum(
        not item["summary"][f"{field_name}_negative"]
        and not item["summary"][f"{field_name}_correct"]
        for item in eval_result
    )
    tn = sum(
        item["summary"][f"{field_name}_negative"]
        and item["summary"][f"{field_name}_correct"]
        for item in eval_result
    )
    fn = sum(
        item["summary"][f"{field_name}_negative"]
        and not item["summary"][f"{field_name}_correct"]
        for item in eval_result
    )

    precision, recall, f1 = p_r_f1(tp, fp, fn)
    return {
        "tp": tp,
        "fp": fp,
        "tn": tn,
        "fn": fn,
        "precision": precision,
        "recall": recall,
        "f1": f1,
    }


def summarize(eval_result):
    summary = {}
    for field in fields:
        summary[f"field_{field}"] = field_pr(eval_result, field)
    for metric in ["precision", "recall", "f1"]:
        summary[f"avg_{metric}"] = sum(
            summary[f"field_{f}"][metric] for f in fields
        ) / len(fields)
    return summary


def evaluate(dataset, predict_config):
    eval_result = []
    correct_count = 0
    count = 0
    for dataset_row in dataset:
        start_time = time.time()
        result = predict(dataset_row, predict_config)
        latency = time.time() - start_time
        latency = random.gauss(
            predict_config["latency_mu"], predict_config["latency_sigma"]
        )
        item_summary = summarize_item(result, dataset_row["label"])
        eval_result.append(
            {
                "dataset_id": dataset_row["id"],
                "result": result,
                "summary": {"latency": latency, **item_summary},
            }
        )
    return {
        "config": predict_config,
        "eval_table": eval_result,
        "summary": summarize(eval_result),
    }

In [None]:
eval_results0 = weave.save(
    evaluate(weave.use(dataset).rows, {"latency_mu": 0.3, "latency_sigma": 0.1}),
    "eval_results0",
)
eval_results1 = weave.save(
    evaluate(
        weave.use(dataset).rows,
        {"latency_mu": 0.3, "latency_sigma": 0.1, "name_up_to_period": True},
    ),
    "eval_results1",
)
eval_results2 = weave.save(
    evaluate(
        weave.use(dataset).rows,
        {
            "latency_mu": 0.3,
            "latency_sigma": 0.1,
            "name_up_to_period": True,
            "shares_skip_chars": 4,
        },
    ),
    "eval_results2",
)

In [None]:
from weave.legacy.weave.panels_py import panel_eval

panel_eval.eval_board(dataset.rows, eval_results0, eval_results2)

In [None]:
# To add:
#   - backed by W&B runs
#   - show run code / config comparison
#   - show traces of pipelines and compare them
#   - add view of N runs instead of 2