In [1]:
import os
from dotenv import dotenv_values
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage, AIMessage
from utils import map_cpa_to_labels, calculate_f1_scores, save_pickle_file, load_cpa_dataset_column, load_pickle_file, load_cpa_dataset, decimal, map_answers_column
import tqdm
import random

In [3]:
# Load env file with API KEY using full path
config = dotenv_values("/full/path/to/file/key.env")
os.environ['OPENAI_API_KEY'] = config["OPENAI_API_KEY"]
OPENAI_API_KEY = config["OPENAI_API_KEY"]

In [4]:
datasets = ["sotabv2", "t2dv2-webtables"]
models = ["gpt-3.5-turbo-0301", "gpt-4-0613"]

## Column-prompts experiments

In [None]:
for dataset in datasets:
    print(dataset)
    # Load dataset
    examples, labels, train_examples, train_labels, labels_to_text, text_to_label, labels_joined, train, test = load_cpa_dataset_column(dataset,"")
    examples_demonstrations = load_pickle_file(f"embeddings/cpa-examples_demonstrations_{dataset}-column.pkl")
    
    for model_name in models:
        print(model_name)
        chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model=model_name)

        #Zero-shot and Few-shot random demonstrations:
        for nr in [0, 1, 5]:
            preds = []

            for example in tqdm.tqdm(examples, total=len(examples)):
                messages = []
                messages.append(SystemMessage(content=f"You are a world-class data engineer and your task is to classify the relationship between two columns with one of the following labels that are separated with comma: {labels_joined}."))
                messages.append(SystemMessage(content="Your instructions are: 1. Look at the two columns and the classes given to you 2. Look at their values in detail. 3. Select a class that best represents the relationship between the two columns. 4. Answer with only one label!"))#

                for i in range(0,nr):
                    index = random.randint(0, len(train_examples)-1)
                    messages.append(HumanMessage(content=f"Column1: {train_examples[index][0]}\nColumn2: {train_examples[index][1]}"))
                    messages.append(AIMessage(content=f"{train_labels[index]}"))

                messages.append(HumanMessage(content=f"Column1: {example[0]}\nColumn2: {example[1]}"))
                res = chat(messages)
                preds.append(res.content)
            save_pickle_file(f"predictions/{dataset}/{model_name}/cpa-chat-column-{nr}-shot.pkl", preds)

        # Few-shot: similar
        for nr in [1, 5]:
            preds = []

            for i, example in tqdm.tqdm(enumerate(examples), total=len(examples)):
                messages = []
                messages.append(SystemMessage(content=f"You are a world-class data engineer and your task is to classify the relationship between two columns with one of the following labels that are separated with comma: {labels_joined}."))
                messages.append(SystemMessage(content="Your instructions are: 1. Look at the two columns and the classes given to you 2. Look at their values in detail. 3. Select a class that best represents the relationship between the two columns. 4. Answer with only one class!"))#

                for index in examples_demonstrations[i][-nr:]:
                    messages.append(HumanMessage(content=f"Column1: {train_examples[index][0]}\nColumn2: {train_examples[index][1]}"))
                    messages.append(AIMessage(content=f"{train_labels[index]}"))

                messages.append(HumanMessage(content=f"Column1: {example[0]}\nColumn2: {example[1]}"))
                res = chat(messages)
                preds.append(res.content)
            save_pickle_file(f"predictions/{dataset}/{model_name}/chat-column-{nr}-similar-shot.pkl", preds)

## Table-prompts experiments

In [None]:
for dataset in datasets:
    print(dataset)
    # Load dataset
    examples, labels, test_table_type_labels, train_examples, train_example_labels, train_table_type_labels, labels_to_text, text_to_label, labels_joined, train, test = load_cpa_dataset(dataset,"",False)
    examples_demonstrations = load_pickle_file(f"embeddings/cpa-examples_demonstrations_{dataset}.pkl")
    cc_examples_demonstratons = load_pickle_file(f"embeddings/cpa-cc_examples_demonstrations_{dataset}.pkl")


    for model_name in models:
        print(model_name)
        chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model=model_name)
        
        for nr in [0, 1, 5]:
            print(nr)
            try:
                preds = []

                for example in tqdm.tqdm(examples, total=len(examples)):
                    messages = []
                    messages.append(SystemMessage(content=f"Your task is to classify the relationship between two columns of a given table with one of the following relationships that are separated with comma: {labels_joined}."))
                    messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column2: relationship. Don't return any relationship for the first column! 5. Answer only with labels from the provided label set!"))

                    for i in range(0,nr):
                        index = random.randint(0, len(train_examples)-1)
                        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
                        messages.append(AIMessage(content=f"{train_example_labels[index]}"))

                    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
                    res = chat(messages)
                    preds.append(res.content)
                save_pickle_file(f"predictions/{dataset}/{model_name}/cpa-chat-table-{nr}-shot.pkl", preds)
            except Exception:
                print(f"Error in {nr}-shot")

        # Few-shot: similar
        for nr in [1, 5]:
            print(nr)
            
            try:
                preds = []

                for i, example in tqdm.tqdm(enumerate(examples), total=len(examples)):
                    messages = []
                    messages.append(SystemMessage(content=f"Your task is to classify the relationship between two columns of a given table with one of the following relationships that are separated with comma: {labels_joined}."))
                    messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column2: relationship. Don't return any relationship for the first column! 5. Answer only with labels from the provided label set!"))

                    for index in examples_demonstrations[i][-nr:]:
                        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
                        messages.append(AIMessage(content=f"{train_example_labels[index]}"))

                    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
                    res = chat(messages)
                    preds.append(res.content)
                save_pickle_file(f"predictions/{dataset}/{model_name}/cpa-chat-table-{nr}-similar-shot.pkl", preds)
            except Exception:
                print(f"Error in {nr}-similar-shot")


        # Few-shot corner-case demonstrations
        preds = []
        for i, example in tqdm.tqdm(enumerate(examples), total=len(examples)):
            try:
                messages = []

                #Task and instructions
                messages.append(SystemMessage(content=f"Your task is to classify the relationship between two columns of a given table with one of the following relationships that are separated with comma: {labels_joined}."))
                messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column2: relationship. Don't return any relationship for the first column! 5. Answer only with labels from the provided label set!"))

                # Add the 5 most similar training examples
                for index in cc_examples_demonstratons[i]:
                    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
                    messages.append(AIMessage(content=f"{train_example_labels[index]}"))

                messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
                res = chat(messages)
                preds.append(res.content)
            except Exception:
                    print(f"Error in cc-method")
                
        save_pickle_file(f"predictions/{dataset}/{model_name}/cpa-chat-table-4-cc-shot.pkl", preds)

## Two-step approach

In [179]:
def get_clean_table_prediction(table_pred, domains):
    cleaned_table_pred="-"
    for dom in domains:
#     for dom in new_domains:
        if dom in table_pred:
            cleaned_table_pred = dom
            break
    return cleaned_table_pred

In [175]:
task_messages = {
    "t1": f"Your task is to classify a table into one of these domains: ",
    "t2": f"You are a world-class data engineer and your task is to classify a table into one of these domains: ",    
}

instruction_messages = {
    "i1": "Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. Decide the domain that best represents the table. 4. Answer with one domain.",
    "i2": "Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. Decide the domain that best represents the table. 4. Answer with one domain. 5. If you are not sure, pick the most likely domain.",
    "i3": "Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. Decide the domain that best represents the table. 4. Answer with one domain. 5. Answer only with the domains given to you!",
}

In [None]:
for dataset in datasets:
    # Load dataset
    examples, labels, test_table_type_labels, train_examples, train_example_labels, train_table_type_labels, labels_to_text, text_to_label, labels_joined, train, test = load_cpa_dataset(dataset,"",False)    
    # Load domain labels
    domains = list(set(train_table_type_labels))
    domains_list = ", ".join(domains)
    labels_dict = {}
    for dom in domains:
        f = open(f"../data/{dataset}-labels/{dataset}_cpa_{dom}_labels.txt", 'r')
        t = [line.split('\n')[0] for line in f.readlines()]
        labels_dict[dom] = t
        
    for model_name in models:
        print(model_name)
        chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model=model_name)

        #Few-shot and zero-shot random
        for nr in [0, 1, 5]:
            print(nr)
            try:
                table_preds = []
                preds = []

                for example in tqdm.tqdm(examples, total=len(examples)):
                    #Step 1
                    messages = []
                    #Task and instructions
                    messages.append(SystemMessage(content=task_messages["t2"]+f" {domains_list}."))
                    messages.append(SystemMessage(content=instruction_messages["i2"]))

                    for i in range(0, nr):
                        index = random.randint(0, len(train_examples)-1)
                        messages.append(HumanMessage(content=f"Classify this table: {train_examples[index]}"))
                        messages.append(AIMessage(content=f"{train_table_type_labels[index]}"))

                    messages.append(HumanMessage(content=f"Classify this table:\n{example}"))

                    res = chat(messages)
                    table_preds.append(res.content)

                    clean_prediction = get_clean_table_prediction(res.content.strip(), domains)

                    # Step 2
                    messages = []

                    #Show only a subset of labels related to the table type predicted
                    if clean_prediction != "-":
                        labels_dom = ", ".join([labels_to_text[l] for l in labels_dict[clean_prediction]])
                    else:
                        labels_dom = labels_joined

                    #Show only a subset of labels related to the table type predicted
                    messages.append(SystemMessage(content=f"Your task is to classify the relationship between two columns of a given table with one of the following relationships that are separated with comma: {labels_dom}."))
                    messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column2: relationship. Don't return any relationship for the first column! 5. Answer only with labels from the provided label set!"))

                    #Pick four random demonstrations from the predicted table type in step one
                    for m in range(0,nr):
                        if clean_prediction != "-" and clean_prediction in train_table_type_labels:
                            index = random.choice([j for j, e in enumerate(train_table_type_labels) if e == clean_prediction])
                        else:
                            index = random.randint(0, len(train_examples)-1)
                        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
                        messages.append(AIMessage(content=f"{train_example_labels[index]}"))

                    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
                    res = chat(messages)
                    preds.append(res.content)

                save_pickle_file(f"predictions/{dataset}/{model_name}/cpa-chat-two-step-{nr}-shot-step1.pkl", table_preds)
                save_pickle_file(f"predictions/{dataset}/{model_name}/cpa-chat-two-step-{nr}-shot-step2.pkl", preds)
            except Exception:
                print(f"Error in {nr}-shot")

## Evaluation

In [None]:
# Column prompt
for nr in [0, 1, 5]:
    preds = load_pickle_file(f"predictions/{dataset}/{model_name}/cpa-chat-column-{nr}-shot.pkl")
    predictions, num = map_answers_column(preds, test, text_to_label)
    types = list(set(labels))
    types = types + ["-"] if '-' in predictions else types
    evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)
    print(f"{decimal(evaluation['Precision'])}\t{decimal(evaluation['Recall'])}\t{decimal(evaluation['Macro-F1'])}\t{decimal(evaluation['Micro-F1'])}\t{num}")

In [None]:
# Table prompt
for nr in [0, 1, 5,"5-similar", "4-cc"]:
    preds = load_pickle_file(f"predictions/{dataset}/{model_name}/cpa-chat-table-{nr}-shot.pkl")
    predictions, num = map_cpa_to_labels(preds, test, text_to_label)
    types = list(set(labels))
    types = types + ["-"] if '-' in predictions else types
    evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)
    print(f"{decimal(evaluation['Precision'])}\t{decimal(evaluation['Recall'])}\t{decimal(evaluation['Macro-F1'])}\t{decimal(evaluation['Micro-F1'])}\t{num}")

## Error Analysis

In [None]:
errors = 0
errors_per_class = {}
for i in range(len(predictions)):
    if predictions[i] != labels[i]:
        errors += 1
        print(f"Predicted as {predictions[i]} when it was {labels[i]}")
        if labels[i] not in errors_per_class:
            errors_per_class[labels[i]] = 0
        errors_per_class[labels[i]] +=1
errors