# Python Environment Setup

In [1]:
!pip3 install virtualenv



In [2]:
!python3 -m venv myenv

In [3]:
!source myenv/bin/activate

# Support Set Generator Utils Code

### Run this code block as it is required to run the other modules of prediction and evaluation.

In [13]:

import json

class SSG:

    def read_NDB(self,data_file):
        with open(data_file) as file:
            dataset = []

            for line in file:
                db = json.loads(line)

                facts = db["facts"]
                queries = db["queries"]
                dataset.append([facts, queries])
            return dataset


    def create_dataset(self,db):
        dataset = []
        eos = "<eos>"
        for d in db:

            questions = d[1]
            ctx = d[0]

            for q in questions:

                t = q["height"]
                gold_facts = q["facts"]
                context = ctx[: t + 1]
                flat_facts = [item for sublist in gold_facts for item in sublist]

                # all facts in flat facts can be positive
                state = [q["query"]]
                pos_act = [context[g] for g in flat_facts]
                # everything else is negative
                neg_act = [x for i, x in enumerate(context) if i not in flat_facts]

                dataset.append([state, eos, 0])
                dataset.extend([[state, n, 0] for n in neg_act])
                pos_set = [[state, p, 1] for p in pos_act]

                dataset.extend(pos_set)

                for g in gold_facts:
                    if len(g) <= 1:
                        state = [q["query"], context[g[0]]]

                        pos_act = eos
                        neg_act = context
                        item = [state, pos_act, 1]
                        dataset.append(item)
                        dataset.extend([[state, n, 0] for n in neg_act])
                    else:
                        g_0 = g[0]
                        g_1 = g[1]

                        state = [q["query"], context[g_0]]
                        pos_act = context[g_1]
                        neg_act = [x for i, x in enumerate(context) if i != g_1]
                        item = [state, pos_act, 1]
                        dataset.append(item)
                        dataset.extend([[state, n, 0] for n in neg_act])

                        state = [q["query"], context[g_1]]
                        pos_act = context[g_0]
                        neg_act = [x for i, x in enumerate(context) if i != g_0]
                        item = [state, pos_act, 1]
                        dataset.append(item)
                        dataset.extend([[state, n, 0] for n in neg_act])

                        state = [q["query"], context[g_0], context[g_1]]
                        pos_act = eos
                        neg_act = context
                        item = [state, pos_act, 1]
                        dataset.append(item)
                        dataset.extend([[state, n, 0] for n in neg_act])

        return dataset


    def prepare_tokenizer(self,tokenizer):
        special_tokens = []
        special_tokens.extend(["<sep>", "<SEP>", "<eos>", "[SEP]"])
        tokenizer.add_special_tokens({"additional_special_tokens": special_tokens})

# Prediction Code

### This module will take the path where you have the trained model along with path of Dev and Test file.

In [14]:
import argparse
import json
import os

import numpy as np
import torch.nn as nn
from sentence_transformers import SentenceTransformer, util

class Prediction:
    def __init__(self,input_path,model_path,threshold):
        self.input_path = input_path
        self.model_path = model_path
        self.threshold = threshold
    
    
    def find_pred(self,test_file):
        ssg = SSG()

        folder = self.input_path
        batch_size = 32

        model_path = self.model_path
        device = "cuda:0"

        model = SentenceTransformer(model_path, device=device)

        thresholds = self.threshold

        names = test_file

        softmax = nn.Softmax()
        for threshold in thresholds:
            for name in names:
                data_file = folder + "/" + name + ".jsonl"

                outfile = folder + "/" + name + "_" + str(threshold) + "_ssg_sup.json"
                dataset = ssg.read_NDB(data_file)
                ssg_data = []

                db_count = 0
                for d in dataset:

                    questions = d[1]
                    ctx = d[0]

                    ctx.insert(0, "<eos>")
                    ctx_reps = model.encode(ctx)
                    q_count = 0
                    for q in questions:

                        states = [[[-1, q["query"]]]]
                        new_states = []
                        final_sets = []
                        a_reps = ctx_reps[0: q["height"] + 2]

                        for t in range(2):

                            while states:
                                state = states.pop(0)

                                state_text = [s[1] for s in state]
                                s_text = ["[SEP]".join(state_text)]
                                s_reps = model.encode(s_text)

                                cos_scores = util.pytorch_cos_sim(s_reps, a_reps)[0]
                                cos_scores = cos_scores.cpu()

                                next_actions = np.nonzero(cos_scores > threshold).squeeze(1)

                                next_actions = next_actions.tolist()

                                if not next_actions:
                                    st = state.copy()
                                    final_sets.append(st[1:])

                                for a in next_actions:
                                    if a == 0:
                                        st = state.copy()
                                        final_sets.append(st[1:])
                                    else:
                                        pre_acts = [pre_act[0] for pre_act in state[1:]]
                                        if (a - 1) not in pre_acts:
                                            new_state = state.copy()
                                            new_state.append([a - 1, ctx[a]])
                                            new_states.append(new_state)
                            states = new_states
                            new_states = []

                        for s in states:
                            st = s.copy()
                            facts = st[1:]
                            if (
                                facts not in final_sets
                                and [facts[1], facts[0]] not in final_sets
                            ):
                                final_sets.append(st[1:])
                        data = {}
                        data["db_id"] = db_count
                        data["question_id"] = q_count
                        data["query"] = q["query"]
                        data["context_height"] = q["height"]
                        data["gold_facts"] = q["facts"]
                        data["answer"] = q["answer"]
                        data["metadata"] = {
                            "relation_type": q["relation"],
                            "query_type": q["type"],
                        }
                        data["ssg_output"] = final_sets

                        ssg_data.append(data)
                        q_count = q_count + 1

                    db_count = db_count + 1

                with open(outfile, "w") as out_file:
                    json.dump(ssg_data, out_file)

# Evaluating Precision, Recall, Accuracy

### This module will take the file generated from the previous cell for each of Dev and Test
### After that it will compute accuracy measures by treating the original Dev and Test file as base.

In [35]:

import argparse
import json
import os

class evaluate:
    def __init__(self):
        pass
        
    def find_matches(self,a_set, a_set_of_sets):
        exact = 0
        soft = 0
        found = False
        for s in a_set_of_sets:
            s_set = set(s)
            if a_set == s_set:
                exact = 1
                soft = 1
                found = True
                break
        if not found:
            for s in a_set_of_sets:
                s_set = set(s)
                if a_set <= s_set:
                    soft = 1
                    break

        return exact, soft


    def evaluate_ndb_with_ssg(self,data_file):
        with open(data_file) as json_file:
            data = json.load(json_file)

        counter = 0

        Ps_soft = {}
        Rs_soft = {}

        Ps_exact = {}
        Rs_exact = {}
        
        Acc_soft = {}
        Acc_exact = {}
        
        Relation_Ps_soft = {}
        Relation_Rs_soft = {}

        Relation_Ps_exact = {}
        Relation_Rs_exact = {}
        
        Relation_Acc_soft = {}
        Relation_Acc_exact = {}

        C = {}
        Relation_C = {}

        for d in data:
            counter = counter + 1

            gold_facts = d["gold_facts"]
            ssg_output = [[f[0] for f in ss] for ss in d["ssg_output"]]
            
            remove_lst = []
            for s in ssg_output:
                if (
                    len(s) > 1
                    and [s[1], s[0]] in ssg_output
                    and [s[1], s[0]] not in remove_lst
                ):
                    remove_lst.append(s)
            for r in remove_lst:
                ssg_output.remove(r)
            answer = d["answer"]
            q_type = d["metadata"]["query_type"]

            if "complex" in q_type:
                q_type = "join"
            if "arg" in q_type or "min" in q_type or "max" in q_type:
                q_type = "min/max"
            if q_type not in Ps_soft:
                P_soft = 0
                P_exact = 0
                R_soft = 0
                R_exact = 0
                A_soft = 0
                A_exact = 0
                c = 1
            else:
                P_soft = Ps_soft[q_type]
                R_soft = Rs_soft[q_type]
                P_exact = Ps_exact[q_type]
                R_exact = Rs_exact[q_type]
                A_soft = Acc_soft[q_type]
                A_exact = Acc_exact[q_type]
                c = C[q_type] + 1

            relation_type = d["metadata"]["relation_type"]
            if relation_type not in Relation_Ps_soft:
                Relation_P_soft = 0
                Relation_P_exact = 0
                Relation_R_soft = 0
                Relation_R_exact = 0
                Relation_A_soft = 0
                Relation_A_exact = 0
                Relation_c = 1
            else:
                Relation_P_soft = Relation_Ps_soft[relation_type]
                Relation_R_soft = Relation_Rs_soft[relation_type]
                Relation_P_exact = Relation_Ps_exact[relation_type]
                Relation_R_exact = Relation_Rs_exact[relation_type]
                Relation_A_soft = Relation_Acc_soft[relation_type]
                Relation_A_exact = Relation_Acc_exact[relation_type]
                Relation_c = Relation_C[relation_type] + 1

            
            ssg_count = 0
            gold_count = 0
            total_soft = 0
            total_exact = 0
            acc_soft = 0
            acc_exact = 0

            
            if len(ssg_output) == 0:
                total_soft = 1
                total_exact = 1
                ssg_count = 1
                
            
            for s in ssg_output:
                ssg_count = ssg_count + 1
                
                if s in gold_facts:
                    acc_soft+=1
                    acc_exact+=1

                if s in gold_facts or len(s) == 0:
                    total_soft = total_soft + 1
                    total_exact = total_exact + 1
                else:
                    if len(s) > 1 and [s[1], s[0]] in gold_facts:
                        total_soft = total_soft + 1
                        total_exact = total_exact + 1
                        acc_soft+=1
                        acc_exact+=1
                    else:
                        for gold_s in gold_facts:
                            if set(gold_s) <= set(s):
                                total_soft = total_soft + 1
                                acc_soft+=1
                                break
            P_soft = P_soft + total_soft / ssg_count
            P_exact = P_exact + total_exact / ssg_count
            if acc_soft>1:
                acc_soft=1
            if acc_exact>1:
                acc_exact=1
            A_soft = A_soft + acc_soft
            A_exact = A_exact + acc_exact


            Relation_P_soft = Relation_P_soft + total_soft / ssg_count
            Relation_P_exact = Relation_P_exact + total_exact / ssg_count
            
            Relation_A_soft+=acc_soft
            Relation_A_exact+=acc_exact
            
            total_exact = 0
            total_soft = 0

            # Recall
            if len(gold_facts) == 0 or answer == "None":
                total_soft = 1
                total_exact = 1
                gold_count = 1
            else:
                for g in gold_facts:
                    gold_count = gold_count + 1
                    exact, soft = self.find_matches(set(g), ssg_output)
                    total_soft = total_soft + soft
                    total_exact = total_exact + exact


            R_soft = R_soft + total_soft / gold_count
            R_exact = R_exact + total_exact / gold_count

            Relation_R_soft = Relation_R_soft + total_soft / gold_count
            Relation_R_exact = Relation_R_exact + total_exact / gold_count

            Ps_exact[q_type] = P_exact
            Rs_exact[q_type] = R_exact
            Ps_soft[q_type] = P_soft
            Rs_soft[q_type] = R_soft
            Acc_soft[q_type] = A_soft
            Acc_exact[q_type] = A_exact
            C[q_type] = c

            Relation_Ps_exact[relation_type] = Relation_P_exact
            Relation_Rs_exact[relation_type] = Relation_R_exact
            Relation_Ps_soft[relation_type] = Relation_P_soft
            Relation_Rs_soft[relation_type] = Relation_R_soft
            Relation_Acc_soft[relation_type] = Relation_A_soft
            Relation_Acc_exact[relation_type] = Relation_A_exact
            Relation_C[relation_type] = Relation_c



        total_p_exact = 0
        total_r_exact = 0
        total_p_soft = 0
        total_r_soft = 0
        total_a_soft = 0
        total_a_exact = 0
        total_c = 0
        
        print("##########----Query Type Accuracy measures----##########")
        print('\n')
        
        for t in Ps_exact:
            print(t + ":")
            print('Exact Precision',round((Ps_exact[t] / C[t])*100, 2),'Exact Recall', round((Rs_exact[t] / C[t])*100,2))
            print('Soft Precision',round((Ps_soft[t] / C[t])*100,2), 'Soft Recall',round((Rs_soft[t] / C[t])*100,2))
            print("Soft Accuracy",round((Acc_soft[t]/C[t])*100,2), "Exact Accuracy",round((Acc_exact[t] / C[t])*100,2))
            total_c = total_c + C[t]
            total_r_exact = total_r_exact + Rs_exact[t]
            total_p_exact = total_p_exact + Ps_exact[t]
            total_r_soft = total_r_soft + Rs_soft[t]
            total_p_soft = total_p_soft + Ps_soft[t]
            total_a_soft =total_a_soft + Acc_soft[t]
            total_a_exact =total_a_exact + Acc_exact[t]

        print("Total: ")
        print('Exact Precision',round((total_p_exact / total_c)*100,2), 'Exact Recall',round((total_r_exact / total_c)*100,2))
        print('Soft Precision',round((total_p_soft / total_c)*100,2), 'Soft Recall',round((total_r_soft / total_c)*100,2))
        print('Soft Accuracy',round((total_a_soft / total_c)*100,2), 'Exact Accuracy',round((total_a_exact / total_c)*100,2))
        
        Relation_total_p_exact = 0
        Relation_total_r_exact = 0
        Relation_total_p_soft = 0
        Relation_total_r_soft = 0
        Relation_total_a_soft = 0
        Relation_total_a_exact = 0
        Relation_total_c = 0
        
        print('\n')
        print('#########----Relation Accuracy measures----##########')
        for t in Relation_Ps_exact:
            print(t + ":")
            print('Exact Precision',round((Relation_Ps_exact[t] / Relation_C[t])*100, 2),'Exact Recall', round((Relation_Rs_exact[t] / Relation_C[t])*100,2))
            print('Soft Precision',round((Relation_Ps_soft[t] / Relation_C[t])*100,2), 'Soft Recall',round((Relation_Rs_soft[t] / Relation_C[t])*100,2))
            print("Soft Accuracy",round((Relation_Acc_soft[t]/Relation_C[t])*100,2), "Exact Accuracy",round((Relation_Acc_exact[t] / Relation_C[t])*100,2))
            Relation_total_c = Relation_total_c + 1
            Relation_total_r_exact = Relation_total_r_exact + round((Relation_Rs_exact[t] / Relation_C[t])*100,2)
            Relation_total_p_exact = Relation_total_p_exact + round((Relation_Ps_exact[t] / Relation_C[t])*100, 2)
            Relation_total_r_soft = Relation_total_r_soft + round((Relation_Rs_soft[t] / Relation_C[t])*100,2)
            Relation_total_p_soft = Relation_total_p_soft + round((Relation_Ps_soft[t] / Relation_C[t])*100,2)
            Relation_total_a_soft =Relation_total_a_soft + round((Relation_Acc_soft[t]/Relation_C[t])*100,2)
            Relation_total_a_exact =Relation_total_a_exact + round((Relation_Acc_exact[t] / Relation_C[t])*100,2)

        print("Total: ")
        print('Exact Precision',round((Relation_total_p_exact / Relation_total_c),2), 'Exact Recall',round((Relation_total_r_exact / Relation_total_c),2))
        print('Soft Precision',round((Relation_total_p_soft / Relation_total_c),2), 'Soft Recall',round((Relation_total_r_soft / Relation_total_c),2))
        print('Soft Accuracy',round((Relation_total_a_soft / Relation_total_c),2), 'Exact Accuracy',round((Relation_total_a_exact / Relation_total_c),2))

    

# Prediction for original task (KELM Data) trained on itself

### The Prediction class takes 3 parameters
### 1. Input File path
### 2. Model File path
### 3. Threshold value for cosine similarity

In [45]:
pred = Prediction("5k_data","5k_data/Model_Data",[0.7])

### The find_pred method takes 1 parameter
### 1. A list of file name whose prediction we want from the model
### The file name path is already provided in the previous step

In [46]:
pred.find_pred(["balanced_dev","balanced_test"])

### Evaluation class has method evaluate_ndb_with_ssg
### Which takes the exact location of the Prediction file generated from the previous step

In [48]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("5k_data/balanced_test_0.7_ssg_sup.json")

##########----Query Type Accuracy measures----##########


set:
Exact Precision 74.33 Exact Recall 91.67
Soft Precision 74.33 Soft Recall 91.67
Soft Accuracy 54.17 Exact Accuracy 54.17
count:
Exact Precision 52.23 Exact Recall 92.31
Soft Precision 52.23 Soft Recall 92.31
Soft Accuracy 61.54 Exact Accuracy 61.54
min/max:
Exact Precision 58.66 Exact Recall 88.51
Soft Precision 58.66 Soft Recall 88.51
Soft Accuracy 75.86 Exact Accuracy 75.86
bool:
Exact Precision 47.66 Exact Recall 73.08
Soft Precision 47.66 Soft Recall 73.08
Soft Accuracy 42.31 Exact Accuracy 42.31
Total: 
Exact Precision 57.93 Exact Recall 86.35
Soft Precision 57.93 Soft Recall 86.35
Soft Accuracy 59.05 Exact Accuracy 59.05


#########----Relation Accuracy measures----##########
P47:
Exact Precision 35.0 Exact Recall 100.0
Soft Precision 35.0 Soft Recall 100.0
Soft Accuracy 75.0 Exact Accuracy 75.0
P21:
Exact Precision 74.32 Exact Recall 96.55
Soft Precision 74.32 Soft Recall 96.55
Soft Accuracy 86.21 Exact Accuracy 86.

# Inference with Original(KELM Data) + Table Data(Wikipedia)

### Here we are running the original+table data through the model trained on Original KELM Data

In [49]:
pred = Prediction("Inference_5k_datatable","Inference_5k_datatable/Model_Data",[0.7])
pred.find_pred(["combined_data"])

In [50]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("Inference_5k_datatable/combined_data_0.6_ssg_sup.json")

##########----Query Type Accuracy measures----##########


min/max:
Exact Precision 52.53 Exact Recall 90.95
Soft Precision 54.53 Soft Recall 91.24
Soft Accuracy 72.65 Exact Accuracy 72.65
set:
Exact Precision 42.7 Exact Recall 94.32
Soft Precision 46.41 Soft Recall 94.32
Soft Accuracy 69.32 Exact Accuracy 69.32
bool:
Exact Precision 35.73 Exact Recall 90.59
Soft Precision 40.38 Soft Recall 92.94
Soft Accuracy 72.94 Exact Accuracy 70.59
count:
Exact Precision 36.56 Exact Recall 88.63
Soft Precision 44.3 Soft Recall 94.12
Soft Accuracy 67.06 Exact Accuracy 62.35
Total: 
Exact Precision 42.8 Exact Recall 91.13
Soft Precision 47.1 Soft Recall 93.0
Soft Accuracy 70.67 Exact Accuracy 69.07


#########----Relation Accuracy measures----##########
P69:
Exact Precision 38.43 Exact Recall 90.74
Soft Precision 40.65 Soft Recall 90.74
Soft Accuracy 59.26 Exact Accuracy 59.26
P21:
Exact Precision 59.66 Exact Recall 96.69
Soft Precision 61.72 Soft Recall 96.69
Soft Accuracy 88.24 Exact Accuracy 88.2

# Inference with only Table Data(Wikipedia)

### Here we are running the table data through the model trained on Original KELM Data

In [51]:
pred = Prediction("Inference_original_onlytable","Inference_original_onlytable/Model_Data",[0.7])
pred.find_pred(["combined_data"])

In [52]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("Inference_original_onlytable/combined_data_0.7_ssg_sup.json")

##########----Query Type Accuracy measures----##########


min/max:
Exact Precision 60.97 Exact Recall 20.78
Soft Precision 84.27 Soft Recall 36.03
Soft Accuracy 38.33 Exact Accuracy 21.67
bool:
Exact Precision 19.52 Exact Recall 55.71
Soft Precision 58.8 Soft Recall 94.29
Soft Accuracy 78.57 Exact Accuracy 42.86
count:
Exact Precision 58.03 Exact Recall 47.62
Soft Precision 84.09 Soft Recall 67.86
Soft Accuracy 41.07 Exact Accuracy 21.43
set:
Exact Precision 44.62 Exact Recall 61.83
Soft Precision 72.77 Soft Recall 75.81
Soft Accuracy 54.84 Exact Accuracy 41.94
Total: 
Exact Precision 44.52 Exact Recall 46.96
Soft Precision 74.17 Soft Recall 69.6
Soft Accuracy 54.44 Exact Accuracy 32.66


#########----Relation Accuracy measures----##########
P54:
Exact Precision 40.35 Exact Recall 48.51
Soft Precision 72.07 Soft Recall 71.49
Soft Accuracy 60.1 Exact Accuracy 38.42
P118:
Exact Precision 63.33 Exact Recall 40.0
Soft Precision 83.61 Soft Recall 61.11
Soft Accuracy 28.89 Exact Accuracy 6.

# Prediction for Original(KELM Data) + Table Data(Wikipedia) trained on itself

In [56]:
pred = Prediction("5k_datatable","5k_datatable/Model_Data",[0.7])
pred.find_pred(["balanced_dev","balanced_test"])

In [61]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("5k_datatable/balanced_test_0.7_ssg_sup.json")

##########----Query Type Accuracy measures----##########


set:
Exact Precision 58.55 Exact Recall 83.87
Soft Precision 58.55 Soft Recall 83.87
Soft Accuracy 64.52 Exact Accuracy 64.52
bool:
Exact Precision 33.08 Exact Recall 100.0
Soft Precision 33.08 Soft Recall 100.0
Soft Accuracy 84.21 Exact Accuracy 84.21
count:
Exact Precision 63.52 Exact Recall 86.36
Soft Precision 63.52 Soft Recall 86.36
Soft Accuracy 63.64 Exact Accuracy 63.64
min/max:
Exact Precision 65.46 Exact Recall 87.85
Soft Precision 65.46 Soft Recall 87.85
Soft Accuracy 68.42 Exact Accuracy 68.42
Total: 
Exact Precision 57.53 Exact Recall 88.53
Soft Precision 57.53 Soft Recall 88.53
Soft Accuracy 69.09 Exact Accuracy 69.09


#########----Relation Accuracy measures----##########
P106:
Exact Precision 42.43 Exact Recall 100.0
Soft Precision 42.43 Soft Recall 100.0
Soft Accuracy 82.61 Exact Accuracy 82.61
P27:
Exact Precision 29.68 Exact Recall 100.0
Soft Precision 29.68 Soft Recall 100.0
Soft Accuracy 90.0 Exact Accuracy

# Inference with only Table Data(Wikipedia)

### Here we are running the table data through the model trained on original+table Data

In [60]:
pred = Prediction("Inference_orig+table_onlytable","Inference_orig+table_onlytable/Model_Data",[0.7])
pred.find_pred(["combined_data"])

In [62]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("Inference_orig+table_onlytable/combined_data_0.7_ssg_sup.json")

##########----Query Type Accuracy measures----##########


min/max:
Exact Precision 46.44 Exact Recall 45.69
Soft Precision 64.58 Soft Recall 49.44
Soft Accuracy 60.0 Exact Accuracy 55.0
bool:
Exact Precision 44.94 Exact Recall 85.0
Soft Precision 48.51 Soft Recall 85.0
Soft Accuracy 70.0 Exact Accuracy 70.0
count:
Exact Precision 75.16 Exact Recall 73.21
Soft Precision 82.59 Soft Recall 73.21
Soft Accuracy 46.43 Exact Accuracy 46.43
set:
Exact Precision 56.34 Exact Recall 65.32
Soft Precision 73.58 Soft Recall 77.42
Soft Accuracy 56.45 Exact Accuracy 46.77
Total: 
Exact Precision 54.98 Exact Recall 67.91
Soft Precision 66.36 Soft Recall 71.84
Soft Accuracy 58.87 Exact Accuracy 55.24


#########----Relation Accuracy measures----##########
P54:
Exact Precision 50.29 Exact Recall 69.17
Soft Precision 64.2 Soft Recall 73.97
Soft Accuracy 65.52 Exact Accuracy 61.08
P118:
Exact Precision 76.11 Exact Recall 62.22
Soft Precision 76.11 Soft Recall 62.22
Soft Accuracy 28.89 Exact Accuracy 28.89

# Prediction for only Tabe Data(Wikipedia) trained on itself

In [63]:
pred = Prediction("onlytable","onlytable/Model_Data",[0.7])
pred.find_pred(["balanced_dev","balanced_test"])

In [65]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("onlytable/balanced_test_0.7_ssg_sup.json")

##########----Query Type Accuracy measures----##########


set:
Exact Precision 49.62 Exact Recall 96.79
Soft Precision 53.84 Soft Recall 96.79
Soft Accuracy 85.71 Exact Accuracy 85.71
min/max:
Exact Precision 45.0 Exact Recall 76.6
Soft Precision 45.0 Soft Recall 76.6
Soft Accuracy 77.78 Exact Accuracy 77.78
bool:
Exact Precision 35.29 Exact Recall 88.8
Soft Precision 35.29 Soft Recall 88.8
Soft Accuracy 75.0 Exact Accuracy 75.0
count:
Exact Precision 51.65 Exact Recall 93.33
Soft Precision 51.65 Soft Recall 93.33
Soft Accuracy 53.33 Exact Accuracy 53.33
Total: 
Exact Precision 45.13 Exact Recall 86.72
Soft Precision 45.95 Soft Recall 86.72
Soft Accuracy 73.61 Exact Accuracy 73.61


#########----Relation Accuracy measures----##########
P54:
Exact Precision 46.95 Exact Recall 88.88
Soft Precision 47.95 Soft Recall 88.88
Soft Accuracy 84.75 Exact Accuracy 84.75
P118:
Exact Precision 36.86 Exact Recall 76.92
Soft Precision 36.86 Soft Recall 76.92
Soft Accuracy 23.08 Exact Accuracy 23.08
