# Node evaluation

For each file, a list of annotated nodes are available. 
- show the annotated nodes
- show the text in the file
- show extracted nodes one by one

In [None]:
import os
import json
import csv
import ipywidgets as widgets
from IPython.display import display, clear_output

import numpy as np
from pprint import pprint

**Variable name**: Is this variable [variable_name] present in the annotation or in the text?
- choice: y or n

In [None]:
def annotate_name_lists(list1, list2):
    results = []
    toggle_button = widgets.ToggleButtons(
        options=[('yes', 1), ('no', 0)],
        description='',
        disabled=False
    )
    confirmation_button = widgets.Button(description="Confirm")
    out = widgets.Output()
    
    def on_confirm_button_clicked(b):
        with out:
            clear_output()  # Clear previous output
            result = toggle_button.value
            results.append(result)
            show_next_item()

    def show_next_item():
        with out:
            clear_output()  # Clear previous output
            if list1:
                item = list1.pop(0)
                pprint(f"List 2: {list2}")
                toggle_button.description = f"{item}: Belongs to list2?"
                display(toggle_button)
                display(confirmation_button)
            else:
                print("Annotation complete.")
                print("Results:", results)
                annotate_name_lists.result = results  # Store results in a function attribute
    
    confirmation_button.on_click(on_confirm_button_clicked)
    
    display(out)
    show_next_item()

In [None]:
def annotate_type_lists(list1, list2):
    """ note: lists contain nodes (Dict)"""
    results = []
    toggle_button = widgets.ToggleButtons(
        options=[('yes', 1), ('no', 0)],
        description='',
        disabled=False
    )
    confirmation_button = widgets.Button(description="Confirm")
    out = widgets.Output()
    
    def on_confirm_button_clicked(b):
        with out:
            clear_output()  # Clear previous output
            result = toggle_button.value
            results.append(result)
            show_next_item()
    
    def on_toggle_button_change(change):
        with out:
            clear_output()  # Clear previous output
            print(f"Current choice: {change['new']}")
            print(f"List 2: {list2}")
            display(toggle_button)
            display(confirmation_button)

    def show_next_item():
        with out:
            clear_output()  # Clear previous output
            if list1:
                item = list1.pop(0)

                pprint(f"List: {list2}")
                toggle_button.description = f"Is [{item[0]}] a [{item[1]}] node?"
                display(toggle_button)
                display(confirmation_button)
            else:
                print("Annotation complete.")
                print("Results:", results)
                annotate_name_lists.result = results  # Store results in a function attribute
    
    confirmation_button.on_click(on_confirm_button_clicked)
    #toggle_button.observe(on_toggle_button_change, names='value')
    
    display(out)
    show_next_item()

In [None]:
def annotate_values_lists(list1, list2): #TODO
    """ note: lists contain nodes (Dict)"""
    results = []
    index = 0
    
    out = widgets.Output()

    def annotate_next(index):
        if index >= len(list1):
            with out:
                print("Annotations complete.")
                print("Results:", results)
            annotate_values_lists.result = results
            return
        
        item1 = list1[index]
        item1_name = item1["variable_name"]
        item1_values = item1["values"]
        
        slider = widgets.IntSlider(
            min=0,
            max=len(item1["values"]),
            description=item1["variable_name"])
        confirm_button = widgets.Button(description="Confirm")
        slider2 = widgets.IntSlider(
            min=0,
            max=10,
            description="counterpart num values:")

        with out:
            print(f"How many values of [{item1_name}] appear in its counterpart's values in List 2?\n Values: {item1_values}\n\nList 2:")
            for i in range(len(list2)):
                pprint(list2[i])
            display(slider)

            print(f"How many values does [{item1_name}]'s counterpart in List 2 has?")
            display(slider2)

            display(confirm_button)


        def on_confirm_button_clicked(b):
            with out:
                result = slider.value
                len_item2_values = slider2.value
                precision = result / len(item1_values) if len(item1_values) > 0 else 1.0
                recall = result / len_item2_values if len_item2_values > 0 else 1.0
                if precision > 1:
                    precision = 1.0
                if recall > 1:
                    recall = 1.0
                results.append((precision, recall))

                clear_output()
                annotate_next(index + 1)

        confirm_button.on_click(on_confirm_button_clicked)

    display(out)
    annotate_next(index)

In [None]:
# config dir
node_label_dir = "./data/node_adjusted"
node_extract_dir = "./experiments/node/few_shot/nodes_1shot_0"
node_label_files = os.listdir(node_label_dir)


# restart HERE ▶

In [None]:
# pop a new file
file = node_label_files.pop(0) # allow manual set
print(f"{file}; {len(node_label_files)} remaining.")


In [None]:
# evaluate: variable name
print(f"{file}; {len(node_label_files)} remaining.")

with open(f"{node_label_dir}/{file}", "r", encoding='utf-8') as f:
    nodes_label = json.load(f)
with open(f"{node_extract_dir}/{file}", "r", encoding='utf-8') as f:
    nodes_extract = json.load(f)

names_label = [node["variable_name"] for node in nodes_label]
names_extract = [node["variable_name"] for node in nodes_extract]

print("List2: labels")
annotate_name_lists(names_extract, names_label)

In [None]:
results = getattr(annotate_name_lists, 'result', None)
print("Final Results:", results)

precision = np.sum(results)/len(nodes_extract)
if precision > 1:
    precision = 1.0
print("Precision:", precision)

recall = np.sum(results)/len(nodes_label)
if recall > 1:
    recall = 1.0
print("Recall:", recall)

successful_nodes_extract = [nodes_extract[i] for i in range(len(nodes_extract)) if results[i]]

**Variable type**: `annotate_type_lists` Is this variable [variable_name] a [variable_type] node?
- choice: y or n

In [None]:
# evaluate: variable type (1/1)

print(f"{file}; {len(node_label_files)} remaining.")

with open(f"{node_label_dir}/{file}", "r", encoding='utf-8') as f:
    nodes_label = json.load(f)
with open(f"{node_extract_dir}/{file}", "r", encoding='utf-8') as f:
    nodes_extract = json.load(f)
types_label = [(node["variable_name"],node["variable_type"]) for node in nodes_label]
types_extract = [(node["variable_name"],node["variable_type"]) for node in successful_nodes_extract]

annotate_type_lists(types_extract,types_label)

In [None]:
results = getattr(annotate_name_lists, 'result', None)
print("Final Results:", results)

type_acc = np.sum(results)/len(results)
print("Type Accuracy:", np.sum(results)/len(results))

**Values**: (For extracted variables that appear in the annotation) 
- Is this extracted value [value] present in the annotation?
- Is this annotated value [value] present in the extraction?

Use slider: how many are present?

In [None]:
print(f"{file}; {len(node_label_files)} remaining.")

annotate_values_lists(successful_nodes_extract, nodes_label)

In [None]:
results = getattr(annotate_values_lists, 'result', None)
print("Final Results:", results)

value_precision = np.mean([precision for precision, recall in results])
value_recall = np.mean([recall for precision, recall in results])

file, precision, recall, type_acc, value_precision, value_recall

In [None]:
with open("evaluation_human/node/nodes_1shot_0.csv", "a", encoding="utf-8", newline="") as f:
    csv_writer = csv.writer(f)

    row = [file, precision, recall, type_acc, value_precision, value_recall]
    csv_writer.writerow(row)