In [101]:
import os
import glob
import weave
import typing
weave.use_frontend_devmode()

In [30]:
def read_dataset(root):
    # Have to do replace here because of weave '.' access issues
    return [{'name': os.path.basename(p).replace('.', '_'), 'contents': open(p).read()}
            for p in glob.glob(os.path.join(root, 'Articles*Real*.txt'))]

In [102]:
# Can't just make our own types, server won't deserialize.
# A fairly easy fix.
@weave.type()
class Dataset:
    rows: list[typing.Any]

In [31]:
raw_dataset = read_dataset('/Users/shawn/Downloads')

In [104]:
dataset = weave.save(Dataset(raw_dataset), 'my_dataset3')
# Now, here I really want to make my own labels in the UI immediately.
# where should the added column go? A new version of this dataset?
# yeah sure why not.
# What's missing for editing to be good?
#   - batch editing, ie make a bunch of changes and choose where/how to save

In [108]:
published = weave.publish(Dataset(raw_dataset), 'weave-flow1/my_dataset1')

In [105]:
dataset

In [86]:
raw_labels = {
    'Articles_of_Incorporation_Real_Example_3_txt': {
        'shares': 500000
    },
    'Articles_of_Incorporation_Real_Example_2_txt': {
        'shares': None
    },
    'Articles_of_Incorporation_Real_Example_1_txt': {
        'shares': 1000000
    }
}
labels = weave.save(raw_labels, 'my_labels')

In [36]:
dataset
# Here i went to render labels next to dataset.
# I need access to labels in the notebook memory... would be easy enough to pass in

In [97]:
import re

def split_paragraphs(doc):
    return [p.strip() for p in doc.split('\n\n')]

def find_first_numeric(s):
    match = re.search(r'\b(\d{1,3}(?:,\d{3})*(?:\.\d+)?)\b', s)
    if match is None:
        return None
    return float(match.group().replace(',', ''))

def extraction_pipeline(doc):
    paragraphs = split_paragraphs(doc)
    capital_paragraph = None
    for p in paragraphs:
        if 'share' in p.lower():
            capital_paragraph = p
    if not p:
        return {'shares': None}
    
    return {'shares': find_first_numeric(capital_paragraph)}

In [62]:
raw_labeled_ds = []
labels_val = weave.use(labels)
for row in weave.use(dataset):
    labeled_row = {**row, 'labels': labels_val.get(row['name'])}
    raw_labeled_ds.append(labeled_row)

In [69]:
# OK so at this point, I have a working model. It just extracts one field.
# and it sucks. But it's a start
for row in raw_labeled_ds:
    print(extraction_pipeline(row['contents']))

S Section 5: Capital Structure
The total number of shares authorized is 500,000.
{'shares': 5.0}
S 4. Authorized Capital
The authorized share capital is $500,000.
{'shares': 4.0}
S Article IV: Share Structure
The corporation is authorized to issue 1,000,000 shares of Common Stock.
{'shares': 1000000.0}


In [81]:
weave.use(labels)

{'Articles_of_Incorporation_Real_Example_3_txt': {'shares': 500000},
 'Articles_of_Incorporation_Real_Example_2_txt': {'shares': 1000000}}

In [98]:
# Next I want to automate the process of evaluating this model

def summarize_item(result, label):
    return result['shares'] == label['shares']

def evaluate(dataset, labels, pipeline):
    # TODO: pipeline should also produce a trace
    results = {row['name']: pipeline(row['contents']) for row in dataset}
    item_summaries = {}
    for key in results:
        item_summaries[key] = summarize_item(results[key], labels[key])
    eval_results = []
    for row in dataset:
        eval_result = {'example': row, 'label': labels[row['name']], 'item_eval': item_summaries[row['name']]}
        eval_results.append(eval_result)
    summary = {'n_examples': len(dataset), 'n_correct': len([summ for summ in item_summaries.values() if summ])}
    return {'eval_results': eval_results, 'summary': summary}

In [99]:
eval_results = evaluate(weave.use(dataset), raw_labels, extraction_pipeline)

In [None]:
# I really want W&B runs for this
# It's just that I need an eval dashboard

In [None]:
# What do I want to see on an evaluation dashboard?
# Pick two evaluations and compare them (or pick N)
# See side by side pipeline code, and parameters
# See scores
# See examples we did better or worse on
# (easy examples, hard examples)
# or really, see confusing examples?

# This is the Zoox board. Just make that and all will be good.
# Need to specify the appropriate Weave data structures for it.
# VarBar
# 1. pick a project
# 2. pick a dataset from that project
# 3. pick N eval runs for that dataset
# 
# Main
# Code/Config comparison, what runs am I looking at?
# summary metrics comparison
# example comparison / exploration

# OK if inside a wandb run, I need weave.use() to do use_artifact, and weave.save() to do save_artifact
#   (and sticking this in summary to make an output edge I think? yes.)