In [1]:
from zss import simple_distance, Node

In [2]:
from autoplan.token import OCamlTokenizer
from scripts.rainfall_ingest import ingest_dataset
from tqdm.auto import tqdm
from iterextras import par_for, unzip
import numpy as np
from scipy.stats import mode
from autoplan.trainer import ClassEvaluation

In [3]:
dataset_name = 'T1'
dataset = ingest_dataset(dataset_name)

Skipped 16 programs


In [4]:
import json
tokenizer = OCamlTokenizer()

asts = []
for item in dataset.dataset:
    source = item['source']
    stdout, stderr = tokenizer._call_process('ocaml', 'ast.native', source)
    try:
        asts.append(json.loads(stdout))
    except json.JSONDecodeError:
        print('='*30)
        print(stdout)
        print(stderr)

In [5]:
asts[0]

[['fun',
  ['fun',
   ['let',
    ['fun',
     ['fun',
      ['fun',
       ['match',
        ['tuple', 'ident', 'ident'],
        ['apply', 'ident', 'const'],
        ['apply', 'ident', 'const'],
        ['tuple', 'ident', 'ident'],
        ['tuple', 'ident', 'ident'],
        ['if',
         ['apply', 'ident', 'ident', 'const'],
         ['apply',
          'ident',
          'ident',
          ['apply', 'ident', 'ident', 'ident'],
          ['apply', 'ident', 'ident', 'const']],
         ['apply', 'ident', 'ident', 'ident', 'ident']]]]]],
    ['match',
     ['apply', 'ident', 'ident', 'const', 'const'],
     ['apply', 'ident', 'ident', 'ident']]]]]]

In [6]:
def json_to_tree(toplevel):
    prog = Node("toplevel")
    
    def helper(obj):
        if isinstance(obj, list):
            node = Node(obj[0])
            for kid in obj[1:]:
                node.addkid(helper(kid))
            return node
        else:
            return Node(obj)
    
    
    for fun in toplevel:
        prog.addkid(helper(fun))
        
    return prog

In [7]:
trees = [json_to_tree(ast) for ast in asts]

In [8]:
def compute_dists(tup):
    (i, t) = tup
    dists = [(j, simple_distance(t, t2)) for j, t2 in enumerate(trees) if i != j]
    sorted_idx = [tup[0] for tup in sorted(dists, key=lambda tup: tup[1])]
    return sorted_idx

sorted_idxs = par_for(compute_dists, list(enumerate(trees)), process=True)

100%|██████████| 45/45 [01:05<00:00,  1.46s/it]


In [9]:
true = [dataset.dataset[i]['labels'].item() for i in range(len(trees))]
pred = [[dataset.dataset[j]['labels'].item() for j in sorted_idx] for sorted_idx in sorted_idxs]

In [10]:
def topk_pred(k):    
    return [mode(p[:k])[0][0] for t, p in zip(true, pred)]

class_names = [str(cls).split('.')[1] for cls in dataset.label_set]

In [11]:
print(dataset.dataset[25]['source'])

let rainfall alon =
  let rec _rainfall alon sum quantity =
    match alon with
    | [] -> sum / quantity
    | hd::tl ->
        (match hd < 0 with
         | true ->
             if hd = (-999)
             then sum / quantity
             else _rainfall tl sum quantity
         | false -> _rainfall tl (sum + hd) (succ quantity)) in
  _rainfall alon 0 0



In [12]:
print(dataset.dataset[pred[25][0]]['source'])

let rec rainfall_helper (aloi : int list) (q_rain : int) (counter : int) =
  (match aloi with
   | [] ->
       if counter = 0
       then failwith "no rainfall value could be calculated"
       else q_rain / counter
   | head::tail ->
       (match head with
        | (-999) ->
            if counter = 0
            then failwith "no rainfall value could be calculated"
            else q_rain / counter
        | other ->
            if head < 0
            then rainfall_helper tail q_rain counter
            else rainfall_helper tail (q_rain + head) (counter + 1)) : 
  int)
let rainfall (aloi : int list) = (rainfall_helper aloi 0 0 : int)



In [13]:
evl = ClassEvaluation.from_preds(true, topk_pred(1), class_names)
print(evl.accuracy)
evl.plot_cm('Confusion matrix')

0.9111111111111111


In [15]:
idxs = evl.incorrect()
for i in idxs:
    print(dataset.dataset[i]['source'])
    print('='*30)
    print(dataset.dataset[sorted_idxs[i][0]]['source'])
    print('\n\n')

let rainfall (alon : int list) =
  (let rec rainfallHelp (alon : int list) (nlon : int list) =
     (match alon with
      | [] ->
          (List.fold_right (fun x -> fun y -> x + y) nlon 0) /
            (List.length nlon)
      | hd::tl ->
          (match hd with
           | (-999) ->
               if (List.length nlon) = 0
               then 0
               else
                 (List.fold_right (fun x -> fun y -> x + y) nlon 0) /
                   (List.length nlon)
           | _ ->
               if hd >= 0
               then rainfallHelp tl (hd :: nlon)
               else rainfallHelp tl nlon) : int) in
   rainfallHelp alon [] : int)

let rec rainfall (alon : int list) =
  (let rec avg_rain (alon : int list) (sum : int) (count : int) =
     (match alon with
      | [] -> if count = 0 then 0 else sum / count
      | hd::tl ->
          if hd > 0
          then avg_rain tl (sum + hd) (count + 1)
          else
            if hd > (-999)
            then avg_rain tl sum co