In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pandas as pd
import ipywidgets as widgets
from IPython.display import display as disp, clear_output
from functools import partial
from autoplan.token import OCamlTokenizer
from scripts.rainfall_ingest import load_new_labels, ingest_dataset
from grammars.rainfall.labels import CountWhere
from pickle_cache import PickleCache
import pickle
pcache = PickleCache()

In [None]:
REPO_DIR = os.path.expanduser('~/autoplan')
DATA_DIR = f'{REPO_DIR}/data/rainfall/raw'
CODE_DIR = f'{DATA_DIR}/Fall2013-RawData'

def read_coding_csv(name):
    return pd.read_csv(f'{DATA_DIR}/Fall2013Coding{name}.csv', index_col=0, header=None).T

In [None]:
plan_codes = pd.read_csv(f'{DATA_DIR}/PlanCodes-codes.csv')

def read_and_join_coding(name):
    coding_csv = read_coding_csv(name)
    valid_entries = coding_csv[coding_csv.PlanStructure.isnull()]
    combined_entries = valid_entries.set_index('PlanStructure').join(plan_codes.set_index('Code'))
    return combined_entries

In [None]:
coding_csv = read_coding_csv('T1')
missing_entries = coding_csv[coding_csv.PlanStructure.isnull()]

In [None]:
sources = {}
tokenizer = OCamlTokenizer()
for _, row in missing_entries.iterrows():
    id = row.ID
    path = f'{CODE_DIR}/T1/{id}.ml'
    if os.path.isfile(path):
        try:
            sources[id] = tokenizer.tokenize(open(path, 'r').read())[1]
        except UnicodeDecodeError:
            pass

In [None]:
labels = {}

In [None]:
plans = ['rainfall', 'helper', 'own']

gen = iter(sources.items())

def render():
    id, source = next(gen)
    
    def on_click(plan, _):
        labels[id] = plan
        render()
        
    btns = []
    for p in plans:
        btn = widgets.Button(description=p)        
        btn.on_click(partial(on_click, p))
        btns.append(btn)
        
    clear_output()
    disp(widgets.HBox(btns))
    print(source)

render()

In [None]:
planstr_to_plan = {
    'rainfall': CountWhere.Rainfall,
    'helper': CountWhere.Helper,
    'own': CountWhere.Own
}

plan_labels = {k: planstr_to_plan[v] for k, v in labels.items()}

In [None]:
pickle.dump(plan_labels, open(f'{DATA_DIR}/T1-newlabels-countwhere.pkl', 'wb'))