In [1]:
import os
from dotenv import dotenv_values
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from utils import map_cpa_to_labels, calculate_f1_scores, save_pickle_file, load_pickle_file, load_cpa_dataset, decimal
import tqdm
from sklearn.metrics.pairwise import cosine_similarity
import json

In [2]:
# Load env file with API KEY using full path
config = dotenv_values("/full/path/to/file/key.env")
os.environ['OPENAI_API_KEY'] = config["OPENAI_API_KEY"]
OPENAI_API_KEY = config["OPENAI_API_KEY"]

In [158]:
datasets = ["sotabv2", "t2dv2-webtables"]
model_name = 'gpt-3.5-turbo-0301'
# model_name = 'gpt-4-0613'
chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model=model_name)

In [159]:
a_prompts = ["A1", "A2", "A3", "A4", "A5", "A6"]
tb_prompts = ["TB1", "TB2", "TB3", "TB4", "TB5", "TB6", "TB7"]
inst_prompts = ["I1", "I2"]
syst_prompts = ["S1", "S2", "S3", "S5"] #"S4",

## Run generated definitions experiments

In [None]:
for dataset in datasets:
    print(dataset)
    # Load dataset
    examples, labels, test_table_type_labels, train_examples, train_example_labels, train_table_type_labels, labels_to_text, text_to_label, labels_joined, train, test = load_cpa_dataset(dataset,"",False)
    labels_joined = ", ".join([labels_to_text[l] for l in labels_to_text])
    all_labels = [labels_to_text[l] for l in labels_to_text]

    # Load embeddings
    test_embeddings = load_pickle_file(f"embeddings/cpa-test_embeddings_{dataset}.pkl")
    
    # Run Test A prompts
    for syst in syst_prompts:
        for g in a_prompts:
            if f"cpa-{syst}_{g}-table-0-shot.pkl" not in os.listdir(f"predictions/{dataset}/{model_name}/"):
                print(f"Loading knowledge {syst}_{g}")
                definitions = load_pickle_file(f"knowledge/{model_name}/{dataset}/cpa-{syst}_{g}_prompt_knowledge.pkl")
                knowledge_embeddings = load_pickle_file(f"embeddings/{model_name}/cpa-{syst}_{g}_knowledge_embeddings_{dataset}.pkl")

                examples_definitions = []
                for i, example in tqdm.tqdm(enumerate(examples), total=len(examples)):
                    cos = cosine_similarity([test_embeddings[i]], knowledge_embeddings)
                    cos_dict = {}
                    for j, c in enumerate(cos[0]):
                        cos_dict[j] = c
                    sorted_cos_dict = {k: v for k, v in sorted(cos_dict.items(), key=lambda item: item[1])}
                    examples_definitions.append(list(sorted_cos_dict.keys())[-10:])
                
                try:
                    preds = []

                    for j, example in tqdm.tqdm(enumerate(examples), total=len(examples)):

                        messages = []

                        knowledge_string = f""
                        for index in examples_definitions[j]:
                            knowledge_string += f"{all_labels[index]}: {definitions[index]}\n"
                        knowledge_string = knowledge_string.strip()

                        messages.append(SystemMessage(content=knowledge_string))

                        #Task and instructions
                        messages.append(SystemMessage(content=f"Your task is to classify the relationship between two columns of a given table with one of the following relationships that are separated with comma: {labels_joined}."))
                        messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column2: relationship. Don't return any relationship for the first column! 5. Answer only with labels from the provided label set!"))
                        messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

                        res = chat(messages)
                        preds.append(res.content)
                    save_pickle_file(f"predictions/{dataset}/{model_name}/cpa-{syst}_{g}-table-0-shot.pkl", preds)
                except Exception:
                    print(f"Error in cpa-{syst}_{g}_{dataset}")
                    continue

    # Run B prompts             
    for tb in tb_prompts:
        for syst in syst_prompts[:-1]: # skip last system for prompt b
            if f"cpa-{syst}_I_{tb}-table-0-shot.pkl" not in os.listdir(f"predictions/{dataset}/{model_name}/"):
                print(f"Loading knowledge {syst}_I_{tb}")
                definitions = load_pickle_file(f"knowledge/{model_name}/{dataset}/cpa-{syst}_I_{tb}_prompt_knowledge.pkl")
                knowledge_embeddings = load_pickle_file(f"embeddings/{model_name}/cpa-{syst}_I_{tb}_knowledge_embeddings_{dataset}.pkl")

                examples_definitions = []
                for i, example in tqdm.tqdm(enumerate(examples), total=len(examples)):
                    cos = cosine_similarity([test_embeddings[i]], knowledge_embeddings)
                    cos_dict = {}
                    for j, c in enumerate(cos[0]):
                        cos_dict[j] = c
                    sorted_cos_dict = {k: v for k, v in sorted(cos_dict.items(), key=lambda item: item[1])}
                    examples_definitions.append(list(sorted_cos_dict.keys())[-10:])

                try:
                    preds = []

                    for j, example in tqdm.tqdm(enumerate(examples), total=len(examples)):

                        messages = []

                        knowledge_string = f""
                        for index in examples_definitions[j]:
                            knowledge_string += f"{all_labels[index]}: {definitions[index]}\n"
                        knowledge_string = knowledge_string.strip()

                        messages.append(SystemMessage(content=knowledge_string))

                        #Task and instructions
                        messages.append(SystemMessage(content=f"Your task is to classify the relationship between two columns of a given table with one of the following relationships that are separated with comma: {labels_joined}."))
                        messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column2: relationship. Don't return any relationship for the first column! 5. Answer only with labels from the provided label set!"))
                        messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

                        res = chat(messages)
                        preds.append(res.content)

                    save_pickle_file(f"predictions/{dataset}/{model_name}/cpa-{syst}_I_{tb}-table-0-shot.pkl", preds)
                except Exception:
                    print(f"Error in cpa-{syst}_I_{tb}_{dataset}.pkl")
                    continue

## Run manual definitions experiments

In [None]:
for dataset in datasets:
    print(dataset)
    # Load dataset
    examples, labels, test_table_type_labels, train_examples, train_example_labels, train_table_type_labels, labels_to_text, text_to_label, labels_joined, train, test = load_cpa_dataset(dataset,"",False)
    all_labels = [labels_to_text[l] for l in labels_to_text]

    f = open(f'../data/cpa-{dataset}-definitions.txt')
    definitions = json.load(f)
    all_labels = [labels_to_text[defn] for defn in definitions]
    definitions = [definitions[defn] for defn in definitions]
    test_embeddings = load_pickle_file(f'embeddings/cpa-test_embeddings_{dataset}.pkl')

    # Pick the 10 most similar definitions for each example
    knowledge_embeddings = load_pickle_file(f"embeddings/cpa-{dataset}-definitions-embeddings.pkl")
    examples_definitions = []
    for i, example in tqdm.tqdm(enumerate(examples), total=len(examples)):
        cos = cosine_similarity([test_embeddings[i]], knowledge_embeddings)
        cos_dict = {}
        for j, c in enumerate(cos[0]):
            cos_dict[j] = c
        sorted_cos_dict = {k: v for k, v in sorted(cos_dict.items(), key=lambda item: item[1])}
        examples_definitions.append(list(sorted_cos_dict.keys())[-10:])
        
    for model_name in ["gpt-3.5-turbo-0301","gpt-4-0613"]:
        print(model_name)
        chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model=model_name)
        
        preds = []

        for j, example in tqdm.tqdm(enumerate(examples), total=len(examples)):

            messages = []

            knowledge_string = f""
            for index in examples_definitions[j]:
                knowledge_string += f"{all_labels[index]}: {definitions[index]}\n"
            knowledge_string = knowledge_string.strip()

            messages.append(SystemMessage(content=knowledge_string))

            #Task and instructions
            messages.append(SystemMessage(content=f"Your task is to classify the relationship between two columns of a given table with one of the following relationships that are separated with comma: {labels_joined}."))
            messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column2: relationship. Don't return any relationship for the first column! 5. Answer only with labels from the provided label set!"))
            messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

            res = chat(messages)
            preds.append(res.content)

        save_pickle_file(f"predictions/{dataset}/{model_name}/cpa-manual-definitions-table-0-shot.pkl", preds)

## Evaluate KG experiments

In [None]:
for dataset in datasets:
    print(dataset)
    print(f"\tPrecision)\tRecall\tMacro-F1\tMicro-F1\tOOV")
    # Load dataset
    examples, labels, test_table_type_labels, train_examples, train_example_labels, train_table_type_labels, labels_to_text, text_to_label, labels_joined, train, test = load_cpa_dataset(dataset,"",False)
    all_labels = [labels_to_text[l] for l in labels_to_text]
    
    # Evaluate A prompts
    for syst in syst_prompts:
        for g in a_prompts:
            preds = load_pickle_file(f"predictions/{dataset}/{model_name}/cpa-{syst}_{g}-table-0-shot.pkl")

            predictions, num = map_cpa_to_labels(preds, test, text_to_label)
        
            types = list(set(labels))
            types = types+["-"] if '-' in predictions else types
            evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)

            print(f"{syst}_{g}\t{decimal(evaluation['Precision'])}\t{decimal(evaluation['Recall'])}\t{decimal(evaluation['Macro-F1'])}\t{decimal(evaluation['Micro-F1'])}\t{num}")
    
    # Evaluate TB prompts
    for tb in tb_prompts:
        for syst in syst_prompts[:-1]: # skip last system for prompt b
            preds = load_pickle_file(f"predictions/{dataset}/{model_name}/cpa-{syst}_I_{tb}-table-0-shot.pkl")

            predictions, num = map_cpa_to_labels(preds, test, text_to_label)

            types = list(set(labels))
            types = types+["-"] if '-' in predictions else types
            evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)

            print(f"{syst}_I_{tb}\t{decimal(evaluation['Precision'])}\t{decimal(evaluation['Recall'])}\t{decimal(evaluation['Macro-F1'])}\t{decimal(evaluation['Micro-F1'])}\t{num}")