# Python Environment Setup

In [1]:
!pip3 install virtualenv



In [2]:
!python3 -m venv myenv

In [9]:
!source myenv/bin/activate

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


# Support Set Generator Utils Code

### Run this code block as it is required to run the other modules of prediction and evaluation.

In [253]:

import json

class SSG:

    def read_NDB(self,data_file):
        with open(data_file) as file:
            dataset = []

            for line in file:
                db = json.loads(line)

                facts = db["facts"]
                queries = db["queries"]
                dataset.append([facts, queries])
            return dataset


    def create_dataset(self,db):
        dataset = []
        eos = "<eos>"
        for d in db:

            questions = d[1]
            ctx = d[0]

            for q in questions:

                t = q["height"]
                gold_facts = q["facts"]
                context = ctx[: t + 1]
                flat_facts = [item for sublist in gold_facts for item in sublist]

                # all facts in flat facts can be positive
                state = [q["query"]]
                pos_act = [context[g] for g in flat_facts]
                # everything else is negative
                neg_act = [x for i, x in enumerate(context) if i not in flat_facts]

                dataset.append([state, eos, 0])
                dataset.extend([[state, n, 0] for n in neg_act])
                pos_set = [[state, p, 1] for p in pos_act]

                dataset.extend(pos_set)

                for g in gold_facts:
                    if len(g) <= 1:
                        state = [q["query"], context[g[0]]]

                        pos_act = eos
                        neg_act = context
                        item = [state, pos_act, 1]
                        dataset.append(item)
                        dataset.extend([[state, n, 0] for n in neg_act])
                    else:
                        g_0 = g[0]
                        g_1 = g[1]

                        state = [q["query"], context[g_0]]
                        pos_act = context[g_1]
                        neg_act = [x for i, x in enumerate(context) if i != g_1]
                        item = [state, pos_act, 1]
                        dataset.append(item)
                        dataset.extend([[state, n, 0] for n in neg_act])

                        state = [q["query"], context[g_1]]
                        pos_act = context[g_0]
                        neg_act = [x for i, x in enumerate(context) if i != g_0]
                        item = [state, pos_act, 1]
                        dataset.append(item)
                        dataset.extend([[state, n, 0] for n in neg_act])

                        state = [q["query"], context[g_0], context[g_1]]
                        pos_act = eos
                        neg_act = context
                        item = [state, pos_act, 1]
                        dataset.append(item)
                        dataset.extend([[state, n, 0] for n in neg_act])

        return dataset


    def prepare_tokenizer(self,tokenizer):
        special_tokens = []
        special_tokens.extend(["<sep>", "<SEP>", "<eos>", "[SEP]"])
        tokenizer.add_special_tokens({"additional_special_tokens": special_tokens})

# Prediction Code

### This module will take the path where you have the trained model along with path of Dev and Test file.

In [254]:
import argparse
import json
import os

import numpy as np
import torch.nn as nn
from sentence_transformers import SentenceTransformer, util

class Prediction:
    def __init__(self,input_path,model_path,threshold):
        self.input_path = input_path
        self.model_path = model_path
        self.threshold = threshold
    
    
    def find_pred(self,test_file):
        ssg = SSG()

        folder = self.input_path
        batch_size = 32

        model_path = self.model_path
        device = "cuda:0"

        model = SentenceTransformer(model_path, device=device)

        thresholds = self.threshold

        names = test_file

        softmax = nn.Softmax()
        for threshold in thresholds:
            for name in names:
                data_file = folder + "/" + name + ".jsonl"

                outfile = folder + "/" + name + "_" + str(threshold) + "_ssg_sup.json"
                dataset = ssg.read_NDB(data_file)
                ssg_data = []

                db_count = 0
                for d in dataset:

                    questions = d[1]
                    ctx = d[0]

                    ctx.insert(0, "<eos>")
                    ctx_reps = model.encode(ctx)
                    q_count = 0
                    for q in questions:

                        states = [[[-1, q["query"]]]]
                        new_states = []
                        final_sets = []
                        a_reps = ctx_reps[0: q["height"] + 2]

                        for t in range(2):

                            while states:
                                state = states.pop(0)

                                state_text = [s[1] for s in state]
                                s_text = ["[SEP]".join(state_text)]
                                s_reps = model.encode(s_text)

                                cos_scores = util.pytorch_cos_sim(s_reps, a_reps)[0]
                                cos_scores = cos_scores.cpu()

                                next_actions = np.nonzero(cos_scores > threshold).squeeze(1)

                                next_actions = next_actions.tolist()

                                if not next_actions:
                                    st = state.copy()
                                    final_sets.append(st[1:])

                                for a in next_actions:
                                    if a == 0:
                                        st = state.copy()
                                        final_sets.append(st[1:])
                                    else:
                                        pre_acts = [pre_act[0] for pre_act in state[1:]]
                                        if (a - 1) not in pre_acts:
                                            new_state = state.copy()
                                            new_state.append([a - 1, ctx[a]])
                                            new_states.append(new_state)
                            states = new_states
                            new_states = []

                        for s in states:
                            st = s.copy()
                            facts = st[1:]
                            if (
                                facts not in final_sets
                                and [facts[1], facts[0]] not in final_sets
                            ):
                                final_sets.append(st[1:])
                        data = {}
                        data["db_id"] = db_count
                        data["question_id"] = q_count
                        data["query"] = q["query"]
                        data["context_height"] = q["height"]
                        data["gold_facts"] = q["facts"]
                        data["answer"] = q["answer"]
                        data["metadata"] = {
                            "relation_type": q["relation"],
                            "query_type": q["type"],
                        }
                        data["ssg_output"] = final_sets

                        ssg_data.append(data)
                        q_count = q_count + 1

                    db_count = db_count + 1

                with open(outfile, "w") as out_file:
                    json.dump(ssg_data, out_file)

# Evaluating Precision, Recall, Accuracy

### This module will take the file generated from the previous cell for each of Dev and Test
### After that it will compute accuracy measures by treating the original Dev and Test file as base.

In [255]:

import argparse
import json
import os
import pandas as pd

class evaluate:
    def __init__(self):
        pass
        
    def find_matches(self,a_set, a_set_of_sets):
        exact = 0
        soft = 0
        found = False
        for s in a_set_of_sets:
            s_set = set(s)
            if a_set == s_set:
                exact = 1
                soft = 1
                found = True
                break
        if not found:
            for s in a_set_of_sets:
                s_set = set(s)
                if a_set <= s_set:
                    soft = 1
                    break

        return exact, soft


    def evaluate_ndb_with_ssg(self,data_file):
        with open(data_file) as json_file:
            data = json.load(json_file)

        counter = 0

        Ps_soft = {}
        Rs_soft = {}

        Ps_exact = {}
        Rs_exact = {}
        
        Acc_soft = {}
        Acc_exact = {}
        
        Relation_Ps_soft = {}
        Relation_Rs_soft = {}

        Relation_Ps_exact = {}
        Relation_Rs_exact = {}
        
        Relation_Acc_soft = {}
        Relation_Acc_exact = {}

        C = {}
        Relation_C = {}

        for d in data:
            counter = counter + 1

            gold_facts = d["gold_facts"]
            ssg_output = [[f[0] for f in ss] for ss in d["ssg_output"]]
            
            remove_lst = []
            for s in ssg_output:
                if (
                    len(s) > 1
                    and [s[1], s[0]] in ssg_output
                    and [s[1], s[0]] not in remove_lst
                ):
                    remove_lst.append(s)
            for r in remove_lst:
                ssg_output.remove(r)
            answer = d["answer"]
            q_type = d["metadata"]["query_type"]

            if "complex" in q_type:
                q_type = "join"
            if "arg" in q_type or "min" in q_type or "max" in q_type:
                q_type = "min/max"
            if q_type not in Ps_soft:
                P_soft = 0
                P_exact = 0
                R_soft = 0
                R_exact = 0
                A_soft = 0
                A_exact = 0
                c = 1
            else:
                P_soft = Ps_soft[q_type]
                R_soft = Rs_soft[q_type]
                P_exact = Ps_exact[q_type]
                R_exact = Rs_exact[q_type]
                A_soft = Acc_soft[q_type]
                A_exact = Acc_exact[q_type]
                c = C[q_type] + 1

            relation_type = d["metadata"]["relation_type"]
            if relation_type not in Relation_Ps_soft:
                Relation_P_soft = 0
                Relation_P_exact = 0
                Relation_R_soft = 0
                Relation_R_exact = 0
                Relation_A_soft = 0
                Relation_A_exact = 0
                Relation_c = 1
            else:
                Relation_P_soft = Relation_Ps_soft[relation_type]
                Relation_R_soft = Relation_Rs_soft[relation_type]
                Relation_P_exact = Relation_Ps_exact[relation_type]
                Relation_R_exact = Relation_Rs_exact[relation_type]
                Relation_A_soft = Relation_Acc_soft[relation_type]
                Relation_A_exact = Relation_Acc_exact[relation_type]
                Relation_c = Relation_C[relation_type] + 1

            
            ssg_count = 0
            gold_count = 0
            total_soft = 0
            total_exact = 0
            acc_soft = 0
            acc_exact = 0

            
            if len(ssg_output) == 0:
                total_soft = 1
                total_exact = 1
                ssg_count = 1
                
            
            for s in ssg_output:
                ssg_count = ssg_count + 1
                
                if s in gold_facts:
                    acc_soft+=1
                    acc_exact+=1

                if s in gold_facts or len(s) == 0:
                    total_soft = total_soft + 1
                    total_exact = total_exact + 1
                else:
                    if len(s) > 1 and [s[1], s[0]] in gold_facts:
                        total_soft = total_soft + 1
                        total_exact = total_exact + 1
                        acc_soft+=1
                        acc_exact+=1
                    else:
                        for gold_s in gold_facts:
                            if set(gold_s) <= set(s):
                                total_soft = total_soft + 1
                                acc_soft+=1
                                break
            P_soft = P_soft + total_soft / ssg_count
            P_exact = P_exact + total_exact / ssg_count
            if acc_soft>1:
                acc_soft=1
            if acc_exact>1:
                acc_exact=1
            A_soft = A_soft + acc_soft
            A_exact = A_exact + acc_exact


            Relation_P_soft = Relation_P_soft + total_soft / ssg_count
            Relation_P_exact = Relation_P_exact + total_exact / ssg_count
            
            Relation_A_soft+=acc_soft
            Relation_A_exact+=acc_exact
            
            total_exact = 0
            total_soft = 0

            # Recall
            if len(gold_facts) == 0 or answer == "None":
                total_soft = 1
                total_exact = 1
                gold_count = 1
            else:
                for g in gold_facts:
                    gold_count = gold_count + 1
                    exact, soft = self.find_matches(set(g), ssg_output)
                    total_soft = total_soft + soft
                    total_exact = total_exact + exact


            R_soft = R_soft + total_soft / gold_count
            R_exact = R_exact + total_exact / gold_count

            Relation_R_soft = Relation_R_soft + total_soft / gold_count
            Relation_R_exact = Relation_R_exact + total_exact / gold_count

            Ps_exact[q_type] = P_exact
            Rs_exact[q_type] = R_exact
            Ps_soft[q_type] = P_soft
            Rs_soft[q_type] = R_soft
            Acc_soft[q_type] = A_soft
            Acc_exact[q_type] = A_exact
            C[q_type] = c

            Relation_Ps_exact[relation_type] = Relation_P_exact
            Relation_Rs_exact[relation_type] = Relation_R_exact
            Relation_Ps_soft[relation_type] = Relation_P_soft
            Relation_Rs_soft[relation_type] = Relation_R_soft
            Relation_Acc_soft[relation_type] = Relation_A_soft
            Relation_Acc_exact[relation_type] = Relation_A_exact
            Relation_C[relation_type] = Relation_c



        total_p_exact = 0
        total_r_exact = 0
        total_p_soft = 0
        total_r_soft = 0
        total_a_soft = 0
        total_a_exact = 0
        total_c = 0
        
        # creating the necessary columns as required
        qt_column_names = ["Query Type","EM Precision", "EM Recall", "EM Accuracy", "EM F1 score","SM Precision", "SM Recall", "SM Accuracy", "SM F1 score"]
        global qt_table
        qt_table = pd.DataFrame(columns = qt_column_names)

#         print("##########----Query Type Accuracy measures----##########")
#         print('\n')
        
        for t in Ps_exact:
#             print(t + ":")
#             print('Exacte Precision',round((Ps_exact[t] / C[t])*100, 2),'Exact Recall', round((Rs_exact[t] / C[t])*100,2))
#             print('Soft Precision',round((Ps_soft[t] / C[t])*100,2), 'Soft Recall',round((Rs_soft[t] / C[t])*100,2))
#             print("Soft Accuracy",round((Acc_soft[t]/C[t])*100,2), "Exact Accuracy",round((Acc_exact[t] / C[t])*100,2))
            try:
                sm_f1_score = 2*((round((Ps_soft[t] / C[t])*100, 2))*(round((Rs_soft[t] / C[t])*100,2)))/((round((Ps_soft[t] / C[t])*100, 2))+(round((Rs_soft[t] / C[t])*100,2)))
            except:
                sm_f1_score = 0
            try:
                em_f1_score = 2*((round((Ps_exact[t] / C[t])*100, 2))*(round((Rs_exact[t] / C[t])*100,2)))/((round((Ps_exact[t] / C[t])*100, 2))+(round((Rs_exact[t] / C[t])*100,2)))
            except:
                em_f1_score = 0
            qt_table = qt_table.append({"Query Type":t, "EM Precision":round((Ps_exact[t] / C[t])*100, 2), "EM Recall":round((Rs_exact[t] / C[t])*100,2),"EM F1 score":round(em_f1_score,2),"SM F1 score":round(sm_f1_score,2),"SM Precision":round((Ps_soft[t] / C[t])*100,2),"SM Recall":round((Rs_soft[t] / C[t])*100,2),"SM Accuracy":round((Acc_soft[t]/C[t])*100,2),"EM Accuracy":round((Acc_exact[t] / C[t])*100,2)},ignore_index=True)
            
            total_c = total_c + C[t]
            total_r_exact = total_r_exact + Rs_exact[t]
            total_p_exact = total_p_exact + Ps_exact[t]
            total_r_soft = total_r_soft + Rs_soft[t]
            total_p_soft = total_p_soft + Ps_soft[t]
            total_a_soft =total_a_soft + Acc_soft[t]
            total_a_exact =total_a_exact + Acc_exact[t]
#         print(qt_table.head())
        try:
            avg_em_f1_score = 2*(round((total_p_exact / total_c)*100,2)*round((total_r_exact / total_c)*100,2))/(round((total_p_exact / total_c)*100,2)+round((total_r_exact / total_c)*100,2))
        except:
            avg_em_f1_score = 0
        try:
            avg_sm_f1_score = 2*(round((total_p_soft / total_c)*100,2)*round((total_r_soft / total_c)*100,2))/(round((total_p_soft / total_c)*100,2)+round((total_r_soft / total_c)*100,2))
        except:
            avg_sm_f1_score = 0
#         print("Total: ")
#         print('Exact Precision',round((total_p_exact / total_c)*100,2), 'Exact Recall',round((total_r_exact / total_c)*100,2))
#         print('Soft Precision',round((total_p_soft / total_c)*100,2), 'Soft Recall',round((total_r_soft / total_c)*100,2))
#         print('Soft Accuracy',round((total_a_soft / total_c)*100,2), 'Exact Accuracy',round((total_a_exact / total_c)*100,2))
        qt_table = qt_table.append({"Query Type":"Average", "EM Precision":round((total_p_soft / total_c)*100,2), "EM Recall":round((total_r_exact / total_c)*100,2),"EM F1 score":round(avg_em_f1_score,2),"SM F1 score":round(avg_sm_f1_score,2),"SM Precision":round((total_p_soft / total_c)*100,2),"SM Recall":round((total_r_soft / total_c)*100,2),"SM Accuracy":round((total_a_soft / total_c)*100,2),"EM Accuracy":round((total_a_exact / total_c)*100,2)},ignore_index=True)
        
        rt_column_names = ["Relation Type","EM Precision", "EM Recall", "EM Accuracy", "EM F1 score","SM Precision", "SM Recall", "SM Accuracy", "SM F1 score"]
        global rt_table
        rt_table = pd.DataFrame(columns = rt_column_names)
        Relation_total_p_exact = 0
        Relation_total_r_exact = 0
        Relation_total_p_soft = 0
        Relation_total_r_soft = 0
        Relation_total_a_soft = 0
        Relation_total_a_exact = 0
        Relation_total_c = 0
        
#         print('\n')
#         print('#########----Relation Accuracy measures----##########')
        for t in Relation_Ps_exact:
#             print(t + ":")
#             print('Exact Precision',round((Relation_Ps_exact[t] / Relation_C[t])*100, 2),'Exact Recall', round((Relation_Rs_exact[t] / Relation_C[t])*100,2))
#             print('Soft Precision',round((Relation_Ps_soft[t] / Relation_C[t])*100,2), 'Soft Recall',round((Relation_Rs_soft[t] / Relation_C[t])*100,2))
#             print("Soft Accuracy",round((Relation_Acc_soft[t]/Relation_C[t])*100,2), "Exact Accuracy",round((Relation_Acc_exact[t] / Relation_C[t])*100,2))
            try:
                rel_sm_f1_score = 2*((round((Relation_Ps_soft[t] / Relation_C[t])*100, 2))*(round((Relation_Rs_soft[t] / Relation_C[t])*100,2)))/((round((Relation_Ps_soft[t] / Relation_C[t])*100, 2))+(round((Relation_Rs_soft[t] / Relation_C[t])*100,2)))
            except:
                rel_sm_f1_score = 0
            try:
                rel_em_f1_score = 2*((round((Relation_Ps_exact[t] / Relation_C[t])*100, 2))*(round((Relation_Rs_exact[t] / Relation_C[t])*100,2)))/((round((Relation_Ps_exact[t] / Relation_C[t])*100, 2))+(round((Relation_Rs_exact[t] / Relation_C[t])*100,2)))
            except:
                rel_em_f1_score = 0
            rt_table = rt_table.append({"Relation Type":t, "EM Precision":round((Relation_Ps_exact[t] / Relation_C[t])*100, 2), "EM Recall":round((Relation_Rs_exact[t] / Relation_C[t])*100,2),"EM F1 score":round(rel_em_f1_score,2),"SM F1 score":round(rel_sm_f1_score,2),"SM Precision":round((Relation_Ps_soft[t] / Relation_C[t])*100,2),"SM Recall":round((Relation_Rs_soft[t] / Relation_C[t])*100,2),"SM Accuracy":round((Relation_Acc_soft[t]/Relation_C[t])*100,2),"EM Accuracy":round((Relation_Acc_exact[t] / Relation_C[t])*100,2)},ignore_index=True)
            
            Relation_total_c = Relation_total_c + 1
            Relation_total_r_exact = Relation_total_r_exact + round((Relation_Rs_exact[t] / Relation_C[t])*100,2)
            Relation_total_p_exact = Relation_total_p_exact + round((Relation_Ps_exact[t] / Relation_C[t])*100, 2)
            Relation_total_r_soft = Relation_total_r_soft + round((Relation_Rs_soft[t] / Relation_C[t])*100,2)
            Relation_total_p_soft = Relation_total_p_soft + round((Relation_Ps_soft[t] / Relation_C[t])*100,2)
            Relation_total_a_soft =Relation_total_a_soft + round((Relation_Acc_soft[t]/Relation_C[t])*100,2)
            Relation_total_a_exact =Relation_total_a_exact + round((Relation_Acc_exact[t] / Relation_C[t])*100,2)
        
        try:
            rel_avg_em_f1_score = 2*((round((Relation_total_p_exact / Relation_total_c)*100,2))*(round((Relation_total_r_exact / Relation_total_c)*100,2)))/((round((Relation_total_p_exact / Relation_total_c)*100,2))+(round((Relation_total_r_exact / Relation_total_c)*100,2)))
        except:
            rel_avg_em_f1_score = 0
        try:
            rel_avg_sm_f1_score = 2*((round((Relation_total_p_soft / Relation_total_c)*100,2))*(round((Relation_total_r_soft / Relation_total_c)*100,2)))/((round((Relation_total_p_soft / Relation_total_c)*100,2))+(round((Relation_total_r_soft / Relation_total_c)*100,2)))
        except:
            rel_avg_sm_f1_score = 0
        rt_table = rt_table.append({"Relation Type":"Average", "EM Precision":round((Relation_total_p_exact / Relation_total_c),2), "EM Recall":round((Relation_total_r_exact / Relation_total_c),2),"EM F1 score":round(rel_avg_em_f1_score//100,2),"SM F1 score":round(rel_avg_sm_f1_score//100,2),"SM Precision":round((Relation_total_p_soft / Relation_total_c),2),"SM Recall":round((Relation_total_r_soft / Relation_total_c),2),"SM Accuracy":round((Relation_total_a_soft / Relation_total_c),2),"EM Accuracy":round((Relation_total_a_exact / Relation_total_c),2)},ignore_index=True)

#         print("Total: ")
#         print('Exact Precision',round((Relation_total_p_exact / Relation_total_c),2), 'Exact Recall',round((Relation_total_r_exact / Relation_total_c),2))
#         print('Soft Precision',round((Relation_total_p_soft / Relation_total_c),2), 'Soft Recall',round((Relation_total_r_soft / Relation_total_c),2))
#         print('Soft Accuracy',round((Relation_total_a_soft / Relation_total_c),2), 'Exact Accuracy',round((Relation_total_a_exact / Relation_total_c),2))

    

# Prediction for original task (KELM Data) trained on itself

### The Prediction class takes 3 parameters
### 1. Input File path
### 2. Model File path
### 3. Threshold value for cosine similarity

In [256]:
pred = Prediction("5k_data","5k_data/Model_Data",[0.7])

### The find_pred method takes 1 parameter
### 1. A list of file name whose prediction we want from the model
### The file name path is already provided in the previous step

In [257]:
pred.find_pred(["balanced_dev","balanced_test"])

### Evaluation class has method evaluate_ndb_with_ssg
### Which takes the exact location of the Prediction file generated from the previous step

In [258]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("5k_data/balanced_test_0.7_ssg_sup.json")

In [259]:
qt_table.head()

Unnamed: 0,Query Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,set,74.33,91.67,54.17,82.09,74.33,91.67,54.17,82.09
1,count,52.23,92.31,61.54,66.71,52.23,92.31,61.54,66.71
2,min/max,58.66,88.51,75.86,70.56,58.66,88.51,75.86,70.56
3,bool,47.66,73.08,42.31,57.69,47.66,73.08,42.31,57.69
4,Average,57.93,86.35,59.05,69.34,57.93,86.35,59.05,69.34


In [260]:
rt_table.head(11)

Unnamed: 0,Relation Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,P47,35.0,100.0,75.0,51.85,35.0,100.0,75.0,51.85
1,P21,74.32,96.55,86.21,83.99,74.32,96.55,86.21,83.99
2,P106,51.94,74.6,57.14,61.24,51.94,74.6,57.14,61.24
3,P19,71.12,85.71,57.14,77.74,71.12,85.71,57.14,77.74
4,P108,8.33,66.67,33.33,14.81,8.33,66.67,33.33,14.81
5,P27,56.56,86.67,46.67,68.45,56.56,86.67,46.67,68.45
6,P54,75.0,90.0,30.0,81.82,75.0,90.0,30.0,81.82
7,P50,100.0,100.0,0.0,100.0,100.0,100.0,0.0,100.0
8,P69,43.06,100.0,66.67,60.2,43.06,100.0,66.67,60.2
9,P1082,24.07,66.67,33.33,35.37,24.07,66.67,33.33,35.37


In [261]:
final_rt_column_names = ["Trained Model","Relation Type","EM Precision", "EM Recall", "EM Accuracy", "EM F1 score","SM Precision", "SM Recall", "SM Accuracy", "SM F1 score"]
final_rt_table = pd.DataFrame(columns = final_rt_column_names)
copy_rt_table = rt_table.copy()
temp_trained_model = ["Prediction of Original Trained Model on itself"]*len(rt_table)
copy_rt_table.insert(0,"Trained Model",temp_trained_model)
final_rt_table = final_rt_table.append(copy_rt_table, ignore_index=True)

# Inference with Original(KELM Data) + Table Data(Wikipedia)

### Here we are running the original+table data through the model trained on Original KELM Data

In [262]:
pred = Prediction("Inference_5k_datatable","Inference_5k_datatable/Model_Data",[0.7])
pred.find_pred(["combined_data"])

In [263]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("Inference_5k_datatable/combined_data_0.7_ssg_sup.json")

In [264]:
qt_table.head()

Unnamed: 0,Query Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,min/max,66.0,82.34,66.67,73.27,66.89,82.34,66.67,73.82
1,set,61.52,84.09,59.09,71.06,64.81,85.23,60.23,73.63
2,bool,45.23,77.65,57.65,57.16,47.74,78.82,58.82,59.46
3,count,53.47,86.27,60.0,66.02,57.63,90.59,63.53,70.45
4,Average,59.96,82.58,61.33,67.73,59.96,84.09,62.67,70.0


In [265]:
rt_table.head(20)

Unnamed: 0,Relation Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,P69,54.01,79.63,48.15,64.36,54.01,79.63,48.15,64.36
1,P21,70.05,93.26,85.29,80.01,71.18,93.26,85.29,80.74
2,P27,60.82,75.0,66.67,67.17,61.65,75.0,66.67,67.67
3,P1110,75.0,87.5,12.5,80.77,75.0,87.5,12.5,80.77
4,P106,44.16,89.44,70.0,59.13,45.41,89.44,70.0,60.24
5,P19,47.49,92.89,60.98,62.85,47.49,92.89,60.98,62.85
6,P1082,71.75,85.09,63.16,77.85,71.75,85.09,63.16,77.85
7,P47,56.92,96.15,61.54,71.51,58.85,96.15,61.54,73.01
8,P118,32.29,33.33,12.5,32.8,45.83,56.25,31.25,50.51
9,P54,55.13,59.0,46.0,57.0,65.4,63.0,50.0,64.18


In [266]:
copy_rt_table = rt_table.copy()
temp_trained_model = ["# Inference with Original(KELM Data) + Table Data(Wikipedia) on Trained Model"]*len(rt_table)
copy_rt_table.insert(0,"Trained Model",temp_trained_model)
final_rt_table = final_rt_table.append(copy_rt_table, ignore_index=True)

# Inference with only Table Data(Wikipedia)

### Here we are running the table data through the model trained on Original KELM Data

In [267]:
pred = Prediction("Inference_original_onlytable","Inference_original_onlytable/Model_Data",[0.7])
pred.find_pred(["combined_data"])

In [268]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("Inference_original_onlytable/combined_data_0.7_ssg_sup.json")

In [269]:
qt_table.head()

Unnamed: 0,Query Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,min/max,60.97,20.78,21.67,31.0,84.27,36.03,38.33,50.48
1,bool,19.52,55.71,42.86,28.91,58.8,94.29,78.57,72.43
2,count,58.03,47.62,21.43,52.31,84.09,67.86,41.07,75.11
3,set,44.62,61.83,41.94,51.83,72.77,75.81,54.84,74.26
4,Average,74.17,46.96,32.66,45.71,74.17,69.6,54.44,71.81


In [270]:
rt_table.head(20)

Unnamed: 0,Relation Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,P54,40.35,48.51,38.42,44.06,72.07,71.49,60.1,71.78
1,P118,63.33,40.0,6.67,49.03,83.61,61.11,28.89,70.61
2,Average,51.84,44.25,22.55,47.0,77.84,66.3,44.5,71.0


In [271]:
copy_rt_table = rt_table.copy()
temp_trained_model = ["Inference with only Table Data(Wikipedia) on Trained Model"]*len(rt_table)
copy_rt_table.insert(0,"Trained Model",temp_trained_model)
final_rt_table = final_rt_table.append(copy_rt_table, ignore_index=True)

# Prediction for Original(KELM Data) + Table Data(Wikipedia) trained on itself

In [272]:
pred = Prediction("5k_datatable","5k_datatable/Model_Data",[0.7])
pred.find_pred(["balanced_dev","balanced_test"])

In [273]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("5k_datatable/balanced_test_0.7_ssg_sup.json")

In [274]:
qt_table.head()

Unnamed: 0,Query Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,set,58.55,83.87,64.52,68.96,58.55,83.87,64.52,68.96
1,bool,33.08,100.0,84.21,49.71,33.08,100.0,84.21,49.71
2,count,63.52,86.36,63.64,73.2,63.52,86.36,63.64,73.2
3,min/max,65.46,87.85,68.42,75.02,65.46,87.85,68.42,75.02
4,Average,57.53,88.53,69.09,69.74,57.53,88.53,69.09,69.74


In [275]:
rt_table.head(20)

Unnamed: 0,Relation Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,P106,42.43,100.0,82.61,59.58,42.43,100.0,82.61,59.58
1,P27,29.68,100.0,90.0,45.77,29.68,100.0,90.0,45.77
2,P118,53.12,41.67,37.5,46.7,53.12,41.67,37.5,46.7
3,P26,75.0,100.0,100.0,85.71,75.0,100.0,100.0,85.71
4,P21,63.11,100.0,94.12,77.38,63.11,100.0,94.12,77.38
5,P19,54.06,87.06,58.82,66.7,54.06,87.06,58.82,66.7
6,P54,72.27,84.09,90.91,77.73,72.27,84.09,90.91,77.73
7,P1082,58.93,100.0,50.0,74.16,58.93,100.0,50.0,74.16
8,P69,66.67,60.0,20.0,63.16,66.67,60.0,20.0,63.16
9,P50,91.67,83.33,33.33,87.3,91.67,83.33,33.33,87.3


In [276]:
copy_rt_table = rt_table.copy()
temp_trained_model = ["Prediction for Original(KELM Data) + Table Data(Wikipedia) trained on itself"]*len(rt_table)
copy_rt_table.insert(0,"Trained Model",temp_trained_model)
final_rt_table = final_rt_table.append(copy_rt_table, ignore_index=True)

# Inference with only Table Data(Wikipedia)

### Here we are running the table data through the model trained on original+table Data

In [277]:
pred = Prediction("Inference_orig+table_onlytable","Inference_orig+table_onlytable/Model_Data",[0.7])
pred.find_pred(["combined_data"])

In [278]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("Inference_orig+table_onlytable/combined_data_0.7_ssg_sup.json")

In [279]:
qt_table.head()

Unnamed: 0,Query Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,min/max,46.44,45.69,55.0,46.06,64.58,49.44,60.0,56.0
1,bool,44.94,85.0,70.0,58.79,48.51,85.0,70.0,61.77
2,count,75.16,73.21,46.43,74.17,82.59,73.21,46.43,77.62
3,set,56.34,65.32,46.77,60.5,73.58,77.42,56.45,75.45
4,Average,66.36,67.91,55.24,60.76,66.36,71.84,58.87,68.99


In [280]:
rt_table.head(20)

Unnamed: 0,Relation Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,P54,50.29,69.17,61.08,58.24,64.2,73.97,65.52,68.74
1,P118,76.11,62.22,28.89,68.47,76.11,62.22,28.89,68.47
2,Average,63.2,65.69,44.98,64.0,70.16,68.09,47.2,69.0


In [281]:
copy_rt_table = rt_table.copy()
temp_trained_model = ["Inference with only Table Data(Wikipedia) on Original+Table Trained Model"]*len(rt_table)
copy_rt_table.insert(0,"Trained Model",temp_trained_model)
final_rt_table = final_rt_table.append(copy_rt_table, ignore_index=True)

# Prediction for only Tabe Data(Wikipedia) trained on itself

In [282]:
pred = Prediction("onlytable","onlytable/Model_Data",[0.7])
pred.find_pred(["balanced_dev","balanced_test"])

In [283]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("onlytable/balanced_test_0.7_ssg_sup.json")

In [284]:
qt_table.head()

Unnamed: 0,Query Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,set,49.62,96.79,85.71,65.61,53.84,96.79,85.71,69.19
1,min/max,45.0,76.6,77.78,56.69,45.0,76.6,77.78,56.69
2,bool,35.29,88.8,75.0,50.51,35.29,88.8,75.0,50.51
3,count,51.65,93.33,53.33,66.5,51.65,93.33,53.33,66.5
4,Average,45.95,86.72,73.61,59.37,45.95,86.72,73.61,60.07


In [285]:
rt_table.head(20)

Unnamed: 0,Relation Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,P54,46.95,88.88,84.75,61.44,47.95,88.88,84.75,62.29
1,P118,36.86,76.92,23.08,49.84,36.86,76.92,23.08,49.84
2,Average,41.91,82.9,53.91,55.0,42.41,82.9,53.91,56.0


In [286]:
copy_rt_table = rt_table.copy()
temp_trained_model = ["Prediction for only Tabe Data(Wikipedia) trained on itself"]*len(rt_table)
copy_rt_table.insert(0,"Trained Model",temp_trained_model)
final_rt_table = final_rt_table.append(copy_rt_table, ignore_index=True)

In [287]:
(final_rt_table.tail())

Unnamed: 0,Trained Model,Relation Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
45,Inference with only Table Data(Wikipedia) on O...,P118,76.11,62.22,28.89,68.47,76.11,62.22,28.89,68.47
46,Inference with only Table Data(Wikipedia) on O...,Average,63.2,65.69,44.98,64.0,70.16,68.09,47.2,69.0
47,Prediction for only Tabe Data(Wikipedia) train...,P54,46.95,88.88,84.75,61.44,47.95,88.88,84.75,62.29
48,Prediction for only Tabe Data(Wikipedia) train...,P118,36.86,76.92,23.08,49.84,36.86,76.92,23.08,49.84
49,Prediction for only Tabe Data(Wikipedia) train...,Average,41.91,82.9,53.91,55.0,42.41,82.9,53.91,56.0


In [288]:
rslt_df = final_rt_table.sort_values(by = 'Relation Type')

In [289]:
rslt_df.to_csv('last_week_result.tsv',sep="\t", index =False)

# Week 2

# Prediction for original task (KELM Data) with new templates trained on itself

In [290]:
pred = Prediction("mtab_alldata","mtab_alldata/Model_Data",[0.7])
pred.find_pred(["balanced_dev","balanced_test"])

In [291]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("mtab_alldata/balanced_test_0.7_ssg_sup.json")

In [292]:
qt_table.head()

Unnamed: 0,Query Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,bool,61.36,83.67,61.22,70.8,61.36,83.67,61.22,70.8
1,count,66.63,86.21,62.07,75.17,66.63,86.21,62.07,75.17
2,min/max,61.07,83.95,62.79,70.71,61.07,83.95,62.79,70.71
3,set,54.95,96.67,63.33,70.07,55.32,96.67,63.33,70.37
4,Average,61.09,86.82,62.25,71.67,61.09,86.82,62.25,71.72


In [293]:
rt_table.head(50)

Unnamed: 0,Relation Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,P106,46.72,82.76,51.72,59.72,46.72,82.76,51.72,59.72
1,P19,71.37,84.62,61.54,77.43,72.22,84.62,61.54,77.93
2,P69,58.54,93.75,56.25,72.07,58.54,93.75,56.25,72.07
3,P21,74.45,98.62,96.55,84.85,74.45,98.62,96.55,84.85
4,P1092,0.0,100.0,0.0,0.0,0.0,100.0,0.0,0.0
5,P1113,56.67,83.33,66.67,67.46,56.67,83.33,66.67,67.46
6,P1082,33.33,66.67,55.56,44.44,33.33,66.67,55.56,44.44
7,P54,66.67,84.62,61.54,74.58,66.67,84.62,61.54,74.58
8,P710,100.0,100.0,33.33,100.0,100.0,100.0,33.33,100.0
9,P118,100.0,100.0,33.33,100.0,100.0,100.0,33.33,100.0


In [294]:
final_rt_column_names = ["Trained Model","Relation Type","EM Precision", "EM Recall", "EM Accuracy", "EM F1 score","SM Precision", "SM Recall", "SM Accuracy", "SM F1 score"]
final_rt_table = pd.DataFrame(columns = final_rt_column_names)
copy_rt_table = rt_table.copy()
temp_trained_model = ["Inference of Original Task with new templates trained on itself"]*len(rt_table)
copy_rt_table.insert(0,"Trained Model",temp_trained_model)
final_rt_table = final_rt_table.append(copy_rt_table, ignore_index=True)

# Inference with Original(KELM Data) + Mtab Table Data(Wikipedia) on Original Trained Model

### Here we are running the original+mtab table data through the model trained on Original KELM Data

In [295]:
pred = Prediction("Inference_of_mtab_orig+table_on_original","Inference_of_mtab_orig+table_on_original/Model_Data",[0.7])
pred.find_pred(["combined_data"])

In [296]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("Inference_of_mtab_orig+table_on_original/combined_data_0.7_ssg_sup.json")

In [297]:
qt_table.head()

Unnamed: 0,Query Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,count,52.26,84.78,60.87,64.66,59.06,90.58,66.3,71.5
1,bool,56.49,84.46,62.41,67.7,60.05,87.97,65.41,71.38
2,set,68.44,85.27,53.49,75.93,71.04,86.43,54.65,77.98
3,min/max,69.33,73.9,60.71,71.54,72.75,76.4,62.5,74.53
4,Average,65.43,81.9,59.81,70.18,65.43,85.16,62.65,74.0


In [298]:
rt_table.head(25)

Unnamed: 0,Relation Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,P54,55.07,62.32,43.48,58.47,55.07,62.32,43.48,58.47
1,P19,69.52,71.05,44.74,70.28,69.52,71.05,44.74,70.28
2,P27,67.23,82.52,62.22,74.09,68.23,82.52,62.22,74.7
3,P118,86.11,100.0,77.78,92.54,94.44,100.0,77.78,97.14
4,P17,77.22,78.33,53.33,77.77,77.78,78.33,53.33,78.05
5,P571,62.5,100.0,100.0,76.92,75.0,100.0,100.0,85.71
6,P69,64.63,97.56,68.29,77.75,66.67,97.56,68.29,79.21
7,P1082,49.02,64.8,47.06,55.82,67.35,84.8,64.71,75.07
8,P106,57.57,91.48,70.49,70.67,57.57,91.48,70.49,70.67
9,P2046,37.5,100.0,100.0,54.55,37.5,100.0,100.0,54.55


In [299]:
final_rt_column_names = ["Trained Model","Relation Type","EM Precision", "EM Recall", "EM Accuracy", "EM F1 score","SM Precision", "SM Recall", "SM Accuracy", "SM F1 score"]
final_rt_table = pd.DataFrame(columns = final_rt_column_names)
copy_rt_table = rt_table.copy()
temp_trained_model = ["Inference of Original+Mtab Table on Original Trained Model"]*len(rt_table)
copy_rt_table.insert(0,"Trained Model",temp_trained_model)
final_rt_table = final_rt_table.append(copy_rt_table, ignore_index=True)

# Inference with only Mtab Table Data(Wikipedia) on Original Task

In [300]:
pred = Prediction("Inference_of_mtab_onlytable_on_original","Inference_of_mtab_onlytable_on_original/Model_Data",[0.7])
pred.find_pred(["combined_data"])

In [301]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("Inference_of_mtab_onlytable_on_original/combined_data_0.7_ssg_sup.json")

In [302]:
qt_table.head()

Unnamed: 0,Query Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,min/max,66.6,53.29,43.42,59.21,82.18,61.4,48.68,70.29
1,set,47.2,33.76,24.14,39.36,75.47,64.37,51.72,69.48
2,bool,53.4,70.97,60.67,60.94,76.76,89.33,77.53,82.57
3,count,54.95,67.03,51.61,60.39,84.98,89.25,73.12,87.06
4,Average,80.25,58.73,47.15,57.27,80.25,78.01,64.56,79.11


In [303]:
rt_table.head(25)

Unnamed: 0,Relation Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,P1082,35.77,79.17,70.0,49.28,58.17,92.5,80.0,71.42
1,P393,55.15,74.44,61.9,63.36,82.41,86.03,72.38,84.18
2,P17,66.09,43.02,32.56,52.12,87.97,66.28,55.81,75.6
3,P1113,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
4,P2046,36.63,92.86,64.29,52.54,48.83,100.0,71.43,65.62
5,P710,64.69,33.64,29.63,44.26,88.22,61.32,53.09,72.35
6,P571,55.28,55.56,33.33,55.42,89.45,88.89,66.67,89.17
7,P2308,50.0,38.46,7.69,43.48,73.5,69.23,38.46,71.3
8,Average,57.95,64.64,49.93,61.0,78.57,83.03,67.23,80.0


In [304]:
copy_rt_table = rt_table.copy()
temp_trained_model = ["Inference of Only Mtab Table on Original Trained Model"]*len(rt_table)
copy_rt_table.insert(0,"Trained Model",temp_trained_model)
final_rt_table = final_rt_table.append(copy_rt_table, ignore_index=True)

# Prediction of Original(KELM Data) + Mtab Table Data(Wikipedia) trained on itself

In [305]:
pred = Prediction("mtab_original+table","mtab_original+table/Model_Data",[0.7])
pred.find_pred(["balanced_test"])

In [306]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("mtab_original+table/balanced_test_0.7_ssg_sup.json")

In [307]:
qt_table.head()

Unnamed: 0,Query Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,min/max,62.44,89.17,86.67,73.45,70.21,89.17,86.67,78.56
1,bool,55.89,92.86,73.81,69.78,60.2,92.86,73.81,73.05
2,count,43.73,100.0,71.43,60.85,45.31,100.0,71.43,62.36
3,set,54.86,95.65,52.17,69.73,65.29,100.0,56.52,79.0
4,Average,60.21,94.11,72.36,69.05,60.21,94.92,73.17,73.68


In [308]:
rt_table.head(25)

Unnamed: 0,Relation Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,P21,55.74,100.0,81.82,71.58,58.01,100.0,81.82,73.43
1,P106,58.71,93.75,87.5,72.2,59.75,93.75,87.5,72.98
2,P27,53.95,96.97,90.91,69.33,61.15,96.97,90.91,75.0
3,P69,55.56,93.33,53.33,69.65,63.89,100.0,60.0,77.97
4,P393,49.23,100.0,69.23,65.98,55.77,100.0,69.23,71.61
5,P50,100.0,100.0,50.0,100.0,100.0,100.0,50.0,100.0
6,P47,23.61,100.0,66.67,38.2,23.61,100.0,66.67,38.2
7,P19,43.19,91.67,66.67,58.72,49.17,91.67,66.67,64.01
8,P17,58.16,91.07,71.43,70.99,75.26,91.07,71.43,82.41
9,P1082,48.0,100.0,80.0,64.86,48.0,100.0,80.0,64.86


In [309]:
copy_rt_table = rt_table.copy()
temp_trained_model = ["Prediction of Original+Mtab Table Trained on itself"]*len(rt_table)
copy_rt_table.insert(0,"Trained Model",temp_trained_model)
final_rt_table = final_rt_table.append(copy_rt_table, ignore_index=True)

# Inference with only Mtab Table Data(Wikipedia) on Original + MTAB Table

In [310]:
pred = Prediction("Inference_of_mtab_onlytable_on_mtab_orig+table","Inference_of_mtab_onlytable_on_mtab_orig+table/Model_Data",[0.7])
pred.find_pred(["combined_data"])

In [311]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("Inference_of_mtab_onlytable_on_mtab_orig+table/combined_data_0.7_ssg_sup.json")

In [312]:
qt_table.head()

Unnamed: 0,Query Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,min/max,62.49,80.04,68.42,70.18,63.28,80.04,68.42,70.68
1,set,60.3,80.46,67.24,68.94,61.17,80.46,67.24,69.5
2,bool,62.63,98.13,85.39,76.46,63.29,98.13,85.39,76.95
3,count,75.07,90.32,75.27,81.99,75.07,90.32,75.27,81.99
4,Average,66.36,88.24,75.0,75.41,66.36,88.24,75.0,75.75


In [313]:
rt_table.head(20)

Unnamed: 0,Relation Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,P1082,56.78,92.5,80.0,70.37,57.4,92.5,80.0,70.84
1,P393,68.21,92.38,77.14,78.48,68.68,92.38,77.14,78.79
2,P17,74.02,93.99,86.05,82.82,74.8,93.99,86.05,83.3
3,P1113,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
4,P2046,35.71,100.0,71.43,52.63,40.0,100.0,71.43,57.14
5,P710,66.09,77.06,70.37,71.15,66.09,77.06,70.37,71.15
6,P571,57.41,83.33,61.11,67.98,57.41,83.33,61.11,67.98
7,P2308,84.62,84.62,53.85,84.62,84.62,84.62,53.85,84.62
8,Average,67.85,90.49,74.99,77.0,68.62,90.49,74.99,78.0


In [314]:
copy_rt_table = rt_table.copy()
temp_trained_model = ["Inference of Only Mtab Table Trained on Original+Mtab Table Model"]*len(rt_table)
copy_rt_table.insert(0,"Trained Model",temp_trained_model)
final_rt_table = final_rt_table.append(copy_rt_table, ignore_index=True)

# Prediction of Mtab Table Data(Wikipedia) trained on itself

In [315]:
pred = Prediction("mtab_onlytable","mtab_onlytable/Model_Data",[0.7])
pred.find_pred(["balanced_test"])

In [316]:
evalu = evaluate()
evalu.evaluate_ndb_with_ssg("mtab_onlytable/balanced_test_0.7_ssg_sup.json")

In [317]:
qt_table.head()

Unnamed: 0,Query Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,min/max,85.0,100.0,85.0,91.89,85.0,100.0,85.0,91.89
1,bool,52.63,96.0,84.0,67.99,52.63,96.0,84.0,67.99
2,count,85.56,100.0,83.87,92.22,85.56,100.0,83.87,92.22
3,set,62.5,80.0,70.0,70.18,62.5,80.0,70.0,70.18
4,Average,73.18,96.51,82.56,83.24,73.18,96.51,82.56,83.24


In [318]:
rt_table.head(20)

Unnamed: 0,Relation Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
0,P17,75.33,92.0,76.0,82.83,75.33,92.0,76.0,82.83
1,P571,64.1,100.0,69.23,78.12,64.1,100.0,69.23,78.12
2,P1082,79.17,100.0,92.86,88.37,79.17,100.0,92.86,88.37
3,P710,59.52,100.0,85.71,74.62,59.52,100.0,85.71,74.62
4,P393,75.98,96.3,88.89,84.94,75.98,96.3,88.89,84.94
5,Average,70.82,97.66,82.54,82.0,70.82,97.66,82.54,82.0


In [319]:
copy_rt_table = rt_table.copy()
temp_trained_model = ["Prediction of Only Mtab Table Trained on itself"]*len(rt_table)
copy_rt_table.insert(0,"Trained Model",temp_trained_model)
final_rt_table = final_rt_table.append(copy_rt_table, ignore_index=True)

In [320]:
rslt_df = final_rt_table.sort_values(by = 'Relation Type')

In [321]:
rslt_df.head(30)

Unnamed: 0,Trained Model,Relation Type,EM Precision,EM Recall,EM Accuracy,EM F1 score,SM Precision,SM Recall,SM Accuracy,SM F1 score
64,Prediction of Only Mtab Table Trained on itself,Average,70.82,97.66,82.54,82.0,70.82,97.66,82.54,82.0
22,Inference of Original+Mtab Table on Original T...,Average,61.17,83.26,59.91,70.0,66.78,86.21,62.58,75.0
58,Inference of Only Mtab Table Trained on Origin...,Average,67.85,90.49,74.99,77.0,68.62,90.49,74.99,78.0
49,Prediction of Original+Mtab Table Trained on i...,Average,60.01,95.11,71.43,73.0,63.81,95.5,71.82,76.0
31,Inference of Only Mtab Table on Original Train...,Average,57.95,64.64,49.93,61.0,78.57,83.03,67.23,80.0
8,Inference of Original+Mtab Table on Original T...,P106,57.57,91.48,70.49,70.67,57.57,91.48,70.49,70.67
33,Prediction of Original+Mtab Table Trained on i...,P106,58.71,93.75,87.5,72.2,59.75,93.75,87.5,72.98
10,Inference of Original+Mtab Table on Original T...,P108,60.0,93.33,66.67,73.04,60.0,93.33,66.67,73.04
46,Prediction of Original+Mtab Table Trained on i...,P108,33.33,100.0,100.0,50.0,33.33,100.0,100.0,50.0
61,Prediction of Only Mtab Table Trained on itself,P1082,79.17,100.0,92.86,88.37,79.17,100.0,92.86,88.37


In [322]:
rslt_df.to_csv('final_result.tsv',sep='\t', index=False)

