In [1]:
import os
from dotenv import dotenv_values
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage, AIMessage
from utils import map_cta_labels, map_answers_column, map_sportstables, calculate_f1_scores, save_pickle_file, load_cta_dataset, load_pickle_file, load_cta_dataset_column, decimal
import tqdm
import random

In [2]:
# Load env file with API KEY using full path
config = dotenv_values("/full/path/to/file/key.env")
os.environ['OPENAI_API_KEY'] = config["OPENAI_API_KEY"]
OPENAI_API_KEY = config["OPENAI_API_KEY"]

In [3]:
datasets = ["sotabv2", "t2dv2-webtables", "sportstables"]
models = ["gpt-3.5-turbo-0301", "gpt-4-0613"]

## Column-prompts experiments

In [None]:
for model_name in models:
    print(model_name)
    
    for dataset in datasets[:2]:
        print(dataset)
        # Load dataset
        examples, labels, train_examples, train_labels, labels_to_text, text_to_label, labels_joined, train, test = load_cta_dataset_column(dataset,"")

        # Load embeddings
        train_embeddings = load_pickle_file(f"embeddings/train_embeddings_{dataset}-column.pkl")
        test_embeddings = load_pickle_file(f"embeddings/test_embeddings_{dataset}-column.pkl")
        examples_demonstrations = load_pickle_file(f"embeddings/examples_demonstrations_{dataset}-column.pkl")

        chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model=model_name)
        
        #Zero-shot and Few-shot random demonstrations:
        for nr in [0, 1, 5]:
            preds = []

            for example in tqdm.tqdm(examples, total=len(examples)):
                messages = []
                messages.append(SystemMessage(content=f"You are a world-class data engineer and your task is to annotate a given column with only one of the following labels that are separated with comma: {labels_joined}."))
                messages.append(SystemMessage(content="Your instructions are: 1. Look at the column and the labels given to you. 2. Examine the values of the column. 3. Select a label that best represents the meaning of the column. 4. Answer with the selected label. 5. Answer only with labels from the provided label set!"))

                for i in range(0,nr):
                    index = random.randint(0, len(train_examples)-1)
                    messages.append(HumanMessage(content=f"Classify this column: {train_examples[index]}"))
                    messages.append(AIMessage(content=f"{train_labels[index]}"))

                messages.append(HumanMessage(content=f"Classify this column: {example}"))
                res = chat(messages)
                preds.append(res.content)
            save_pickle_file(f"predictions/{dataset}/{model_name}/chat-column-{nr}-shot.pkl", preds)

        # Few-shot similar demonstrations
        for nr in [1, 5]:
            preds = []

            for i, example in tqdm.tqdm(enumerate(examples), total=len(examples)):
                messages = []
                messages.append(SystemMessage(content=f"You are a world-class data engineer and your task is to annotate a given column with only one of the following labels that are separated with comma: {labels_joined}."))
                messages.append(SystemMessage(content="Your instructions are: 1. Look at the column and the labels given to you. 2. Examine the values of the column. 3. Select a label that best represents the meaning of the column. 4. Answer with the selected label. 5. Answer only with labels from the provided label set!"))#

                for index in examples_demonstrations[i][-nr:]:
                    messages.append(HumanMessage(content=f"Classify this column: {train_examples[index]}"))
                    messages.append(AIMessage(content=f"{train_labels[index]}"))

                messages.append(HumanMessage(content=f"Classify this column: {example}"))
                res = chat(messages)
                preds.append(res.content)
            save_pickle_file(f"predictions/{dataset}/{model_name}/chat-column-{nr}-similar-shot.pkl", preds)

## Table-prompts experiments

In [None]:
for model_name in models:
    print(model_name)
    for dataset in datasets[:2]:
        print(dataset)
        # Load dataset
        examples, labels, test_table_type_labels, train_examples, train_example_labels, train_table_type_labels, labels_to_text, text_to_label, labels_joined, train, test = load_cta_dataset(dataset,"")
        examples_demonstrations = load_pickle_file(f"embeddings/examples_demonstrations_{dataset}.pkl")
        cc_examples_demonstratons = load_pickle_file(f"embeddings/cc_examples_demonstrations_{dataset}.pkl")
        
        chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model=model_name)
        
        #Zero-shot and Few-shot random demonstrations:
        for nr in [0, 1, 5]:
            preds = []

            for example in tqdm.tqdm(examples, total=len(examples)):
                messages = []
                messages.append(SystemMessage(content=f"You are a world-class data engineer and your task is to annotate the columns of a given table with only one of the following labels that are separated with comma: {labels_joined}."))
                messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a label that best represents the meaning of all cells in the column. 4. Answer with the selected label for each column using the format Column1: label. 5. Answer only with labels from the provided label set!"))

                for i in range(0,nr):
                    index = random.randint(0, len(train_examples)-1)
                    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
                    messages.append(AIMessage(content=f"{train_example_labels[index]}"))

                messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
                res = chat(messages)
                preds.append(res.content)
            save_pickle_file(f"predictions/{dataset}/{model_name}/chat-table-{nr}-shot.pkl", preds)

        # Few-shot similar demonstrations
        for nr in [1, 5]:
            preds = []

            for i, example in tqdm.tqdm(enumerate(examples), total=len(examples)):
                messages = []
                messages.append(SystemMessage(content=f"You are a world-class data engineer and your task is to annotate the columns of a given table with only one of the following labels that are separated with comma: {labels_joined}."))
                messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a label that best represents the meaning of all cells in the column. 4. Answer with the selected label for each column using the format Column1: label. 5. Answer only with labels from the provided label set!"))

                for index in examples_demonstrations[i][-nr:]:
                    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
                    messages.append(AIMessage(content=f"{train_example_labels[index]}"))

                messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
                res = chat(messages)
                preds.append(res.content)
            save_pickle_file(f"predictions/{dataset}/{model_name}/chat-table-{nr}-similar-shot.pkl", preds)

        # Few-shot corner-case demonstrations
        preds = []
        for i, example in tqdm.tqdm(enumerate(examples), total=len(examples)):
            messages = []

            #Task and instructions
            messages.append(SystemMessage(content=f"You are a world-class data engineer and your task is to annotate the columns of a given table with only one of the following labels that are separated with comma: {labels_joined}."))
            messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a label that best represents the meaning of all cells in the column. 4. Answer with the selected label for each column using the format Column1: label. 5. Answer only with labels from the provided label set!"))

            # Add the 5 most similar training examples
            for index in cc_examples_demonstratons[i]:
                messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
                messages.append(AIMessage(content=f"{train_example_labels[index]}"))

            messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
            res = chat(messages)
            preds.append(res.content)
        save_pickle_file(f"predictions/{dataset}/{model_name}/chat-table-4-cc-shot.pkl", preds)

## Two-step Approach

In [None]:
def get_clean_table_prediction(table_pred, domains):
    cleaned_table_pred="-"
    for dom in domains:
#     for dom in new_domains:
        if dom in table_pred:
            cleaned_table_pred = dom
            break
    return cleaned_table_pred

In [None]:
task_messages = {
    "t1": f"Your task is to classify a table into one of these domains: ",
    "t2": f"You are a world-class data engineer and your task is to classify a table into one of these domains: ",    
}

instruction_messages = {
    "i1": "Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. Decide the domain that best represents the table. 4. Answer with one domain.",
    "i2": "Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. Decide the domain that best represents the table. 4. Answer with one domain. 5. If you are not sure, pick the most likely domain.",
    "i3": "Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. Decide the domain that best represents the table. 4. Answer with one domain. 5. Answer only with the domains given to you!",
}

In [None]:
for dataset in datasets[:2]:
    # Load dataset
    examples, labels, test_table_type_labels, train_examples, train_example_labels, train_table_type_labels, labels_to_text, text_to_label, labels_joined, train, test = load_cta_dataset(dataset,"")    
    # Load domain labels
    domains = list(set(train_table_type_labels))
    domains_list = ", ".join(domains)
    labels_dict = {}
    for dom in domains:
        f = open(f"../data/{dataset}-labels/{dataset}_{dom}_labels.txt", 'r')
        t = [line.split('\n')[0] for line in f.readlines()]
        labels_dict[dom] = t
    
    for model_name in models:
        print(model_name)
        chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model=model_name)

        #Few-shot and zero-shot random
        for nr in [0, 1, 5]:
            print(nr)
            table_preds = []
            preds = []

            for example in tqdm.tqdm(examples, total=len(examples)):
                
                #Step 1
                messages = []
                #Task and instructions
                messages.append(SystemMessage(content=task_messages["t2"]+f" {domains_list}."))
                messages.append(SystemMessage(content=instruction_messages["i2"]))

                for i in range(0, nr):
                    index = random.randint(0, len(train_examples)-1)
                    messages.append(HumanMessage(content=f"Classify this table: {train_examples[index]}"))
                    messages.append(AIMessage(content=f"{train_table_type_labels[index]}"))
                    
                messages.append(HumanMessage(content=f"Classify this table:\n{example}"))

                res = chat(messages)
                table_preds.append(res.content)

                clean_prediction = get_clean_table_prediction(res.content.strip(), domains)
                                
                # Step 2
                messages = []
                
                #Show only a subset of labels related to the table type predicted
                if clean_prediction != "-":
                    labels_dom = ", ".join([labels_to_text[l] for l in labels_dict[clean_prediction]])
                else:
                    labels_dom = labels_joined
                    
                #Show only a subset of labels related to the table type predicted
                messages.append(SystemMessage(content=f"You are a world-class data engineer and your task is to annotate the columns of a given table with only one of the following labels that are separated with comma: {labels_dom}."))
                messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a label that best represents the meaning of all cells in the column. 4. Answer with the selected label for each column using the format Column1: label. 5. Answer only with labels from the provided label set!"))
                
                
                # Pick random demonstrations from the predicted table type in step one otherwise pick one from all the set
                for m in range(0,nr):
                    if clean_prediction != "-" and clean_prediction in train_table_type_labels:
                        index = random.choice([j for j, e in enumerate(train_table_type_labels) if e == clean_prediction])
                    else:
                        index = random.randint(0, len(train_examples)-1)
                    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
                    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
                    
                messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
                res = chat(messages)
                preds.append(res.content)
                

            save_pickle_file(f"predictions/{dataset}/{model_name}/chat-two-step-{nr}-shot-step1.pkl", table_preds)
            save_pickle_file(f"predictions/{dataset}/{model_name}/chat-two-step-{nr}-shot-step2.pkl", preds)

## Evaluation

In [None]:
# Column-prompt evaluation
for nr in [0, 1, 5]:
    preds = load_pickle_file(f"predictions/{dataset}/{model_name}/chat-column-{nr}-shot.pkl")
    labels = [l for l in labels if l!=""]
    predictions, num = map_answers_column(preds)
    types = list(set(labels))
    types = types + ["-"] if '-' in predictions else types
    evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)
    print(f"{decimal(evaluation['Precision'])}\t{decimal(evaluation['Recall'])}\t{decimal(evaluation['Macro-F1'])}\t{decimal(evaluation['Micro-F1'])}\t{num}")

In [None]:
# Table-prompt evaluation
for nr in [0, 1, 5,"5-similar","4-cc"]:
    preds = load_pickle_file(f"predictions/{dataset}/{model_name}/chat-table-{nr}-shot.pkl")
    predictions, num = map_cta_labels(preds, test, text_to_label)
    labels = [l for l in labels if l!=""]
    
    predictions, num = map_cta_labels(preds)
    types = list(set(labels))
    types = types + ["-"] if '-' in predictions else types
    evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)
    print(f"{decimal(evaluation['Precision'])}\t{decimal(evaluation['Recall'])}\t{decimal(evaluation['Macro-F1'])}\t{decimal(evaluation['Micro-F1'])}\t{num}")

In [None]:
# Sportstables table-prompt
for nr in [0, 1, 5,"5-similar","4-cc"]:
    preds = load_pickle_file(f"predictions/{dataset}/{model_name}/chat-table-{nr}-shot.pkl")
    predictions, num = map_cta_labels(preds, test, text_to_label)
    labels = [l for l in labels if l!=""]
    
    predictions, num = map_sportstables(preds)
    types = list(set(labels))
    types = types + ["-"] if '-' in predictions else types
    evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)
    print(f"{decimal(evaluation['Precision'])}\t{decimal(evaluation['Recall'])}\t{decimal(evaluation['Macro-F1'])}\t{decimal(evaluation['Micro-F1'])}\t{num}")

In [None]:
for class_ in per_class_eval:
    print(f"{class_}: {per_class_eval[class_]['F1']}")

## Error Analysis

In [None]:
errors = 0
errors_per_class = {}
for i in range(len(predictions)):
    if predictions[i] != labels[i]:
        errors += 1
        print(f"Predicted as {predictions[i]} when it was {labels[i]}")
        if labels[i] not in errors_per_class:
            errors_per_class[labels[i]] = 0
        errors_per_class[labels[i]] +=1
errors

### Re-load previous prediction files

In [7]:
preds = load_pickle_file(f"predictions/{dataset}/{model_name}/chat-table-0-shot.pkl")

## Calculate tokens

In [66]:
# https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
import tiktoken
def num_tokens_from_messages(messages, model):
    """Return the number of tokens used by a list of messages."""
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        print("Warning: model not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")
    if model in {
        "gpt-3.5-turbo-0613",
        "gpt-3.5-turbo-16k-0613",
        "gpt-4-0314",
        "gpt-4-32k-0314",
        "gpt-4-0613",
        "gpt-4-32k-0613",
        }:
        tokens_per_message = 3
        tokens_per_name = 1
    elif model == "gpt-3.5-turbo-0301":
        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
        tokens_per_name = -1  # if there's a name, the role is omitted
    elif "gpt-3.5-turbo" in model:
        print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
    elif "gpt-4" in model:
        print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
        return num_tokens_from_messages(messages, model="gpt-4-0613")
    else:
        raise NotImplementedError(
            f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
        )
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
    return num_tokens

In [None]:
total_token_number = 0
for model_name in ["gpt-4-0613"]:#"gpt-3.5-turbo-0301", 
    print(model_name)
    for dataset in datasets[:1]:
        print(dataset)
        # Load dataset
        examples, labels, test_table_type_labels, train_examples, train_example_labels, train_table_type_labels, labels_to_text, text_to_label, labels_joined, train, test = load_cta_dataset(dataset,"")

        for nr in [0]:
            
            for example in tqdm.tqdm(examples, total=len(examples)):
                messages = []
                
                messages.append({"role":"system", "content":f"You are a world-class data engineer and your task is to annotate the columns of a given table with only one of the following labels that are separated with comma: {labels_joined}."})
                messages.append({"role":"system", "content":"Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a label that best represents the meaning of all cells in the column. 4. Answer with the selected label for each column using the format Column1: label. 5. Answer only with labels from the provided label set!"})
                    
                messages.append({"role":"user", "content":f"Classify these table columns: {example}"})
                
                total_token_number += num_tokens_from_messages(messages, model_name)

print(total_token_number)