In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pandas as pd
import ipywidgets as widgets
from IPython.display import display as disp, clear_output
from functools import partial
from autoplan.token import OCamlTokenizer
from scripts.rainfall_ingest import load_new_labels, ingest_dataset
from grammars.rainfall.labels import CountWhere
from pickle_cache import PickleCache
import pickle
pcache = PickleCache()

In [3]:
REPO_DIR = os.path.expanduser('~/autoplan')
DATA_DIR = f'{REPO_DIR}/data/rainfall/raw'
CODE_DIR = f'{DATA_DIR}/Fall2013-RawData'

def read_coding_csv(name):
    return pd.read_csv(f'{DATA_DIR}/Fall2013Coding{name}.csv', index_col=0, header=None).T

In [4]:
plan_codes = pd.read_csv(f'{DATA_DIR}/PlanCodes-codes.csv')

def read_and_join_coding(name):
    coding_csv = read_coding_csv(name)
    valid_entries = coding_csv[coding_csv.PlanStructure.isnull()]
    combined_entries = valid_entries.set_index('PlanStructure').join(plan_codes.set_index('Code'))
    return combined_entries

In [5]:
coding_csv = read_coding_csv('T1')
missing_entries = coding_csv[coding_csv.PlanStructure.isnull()]

In [6]:
sources = {}
tokenizer = OCamlTokenizer()
for _, row in missing_entries.iterrows():
    id = row.ID
    path = f'{CODE_DIR}/T1/{id}.ml'
    if os.path.isfile(path):
        try:
            sources[id] = tokenizer.tokenize(open(path, 'r').read())[1]
        except UnicodeDecodeError:
            pass

In [9]:
labels = {}

In [10]:
plans = ['rainfall', 'helper', 'own']

gen = iter(sources.items())

def render():
    id, source = next(gen)
    
    def on_click(plan, _):
        labels[id] = plan
        render()
        
    btns = []
    for p in plans:
        btn = widgets.Button(description=p)        
        btn.on_click(partial(on_click, p))
        btns.append(btn)
        
    clear_output()
    disp(widgets.HBox(btns))
    print(source)

render()

HBox(children=(Button(description='rainfall', style=ButtonStyle()), Button(description='helper', style=ButtonS…

let rainfall (my_list : 'a list) =
  (let rec helper (alon : 'a list) (sum : int) (count : int) =
     (match (alon, sum) with
      | ([], v) -> if count > 0 then (v + 1) / count else 0
      | (hd::tl, v) ->
          if hd = (-999)
          then (if count > 0 then v / count else 0)
          else
            if hd >= 0
            then helper tl (v + hd) (count + 1)
            else helper tl v count : int) in
   helper my_list 0 0 : int)



StopIteration: 

In [12]:
old_labels = pickle.load(open(f'{DATA_DIR}/T1-newlabels-countwhere.pkl', 'rb'))

In [22]:
sum([
    1 if v == CountWhere.from_string(labels[k]) else 0 for k,v in old_labels.items() if k in labels
]) / len(labels)

0.9010989010989011

In [26]:
sources.keys()

dict_keys(['1', '2', '5', '6', '7', '9', '12', '13', '16', '17', '19', '20', '21', '24', '25', '26', '27', '29', '30', '31', '32', '33', '34', '36', '37', '38', '39', '40', '41', '42', '44', '45', '46', '47', '48', '49', '51', '52', '54', '55', '56', '57', '58', '59', '62', '63', '64', '67', '69', '70', '71', '72', '74', '75', '76', '80', '81', '82', '83', '84', '87', '88', '89', '90', '91', '92', '93', '94', '95', '97', '101', '102', '104', '105', '106', '107', '108', '110', '112', '116', '118', '121', '123', '125', '126', '129', '132', '133', '134', '135', '137', '138'])

In [31]:
print(sources['24'])

let rec rain_helper (rain_list : int list) =
  (match rain_list with
   | [] -> []
   | (-999)::tl -> []
   | num::tl -> if num >= 0 then num :: (rain_helper tl) else rain_helper tl : 
  int list)
let rainfall (rain_list : int list) =
  (let usable_rain = rain_helper rain_list in
   if rain_list = []
   then failwith "You should seriously consider entering actual data."
   else
     if usable_rain = []
     then
       failwith
         "Dividing by zero? Not on my watch! Enter at least one non-negative number, plz."
     else (List.fold_left (+) 0 usable_rain) / (List.length usable_rain) : 
  int)



In [33]:
planstr_to_plan = {
    'rainfall': CountWhere.Rainfall,
    'helper': CountWhere.Helper,
    'own': CountWhere.Own
}

plan_labels = {k: planstr_to_plan[v] for k, v in labels.items()}

In [34]:
pickle.dump(plan_labels, open(f'{DATA_DIR}/T1-newlabels-countwhere-will.pkl', 'wb'))