In [1]:
import tqdm
import random

from langchain import PromptTemplate
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from utils import load_cpa_dataset, save_pickle_file, load_pickle_file, load_cpa_dataset_column, calculate_f1_scores, decimal, map_cpa_to_labels, map_answers_column

import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"#3,4,5,6

In [None]:
datasets = ["sotabv2", "t2dv2-webtables"]

# StableBeluga7B
model_name = "stabilityai/StableBeluga-7B"
mod = "stablebeluga7b"
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir="hf_cache/")
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, low_cpu_mem_usage=True, device_map="auto", cache_dir="hf_cache/")

# SOLAR
# model_name = "upstage/SOLAR-0-70b-16bit"
# mod = "solar"
# tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir="hf_cache/")
# model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, load_in_8bit=True, device_map="auto", cache_dir="hf_cache/", temperature=0, do_sample=True)

## Column prompt experiments

In [None]:
zero_template = """Answer the question based on the task and instructions below.
Task: Classify the relationship between two columns with one of the following classes that are separated with comma: {labels_joined}.
Instructions: 1. Look at the two columns and the classes given to you 2. Look at their values in detail. 3. Select a class that best represents the relationship between the two columns. 4. Answer with only one class.
Column1: {column_1}
Column2: {column_2}
Class:"""

In [None]:
few_template = """Answer the question based on the task and instructions below.
Task: Classify the relationship between two columns with one of the following classes that are separated with comma: {labels_joined}.
Instructions: 1. Look at the two columns and the classes given to you 2. Look at their values in detail. 3. Select a class that best represents the relationship between the two columns. 4. Answer with only one class.
{examples}
Column1: {column_1}
Column2: {column_2}
Class:"""

In [None]:
for dataset in datasets:
    examples, labels, train_examples, train_labels, labels_to_text, text_to_label, labels_joined, train, test = load_cpa_dataset_column(dataset,"")
    all_labels = [labels_to_text[l] for l in labels_to_text]

    # Zero-shot
    prompt = PromptTemplate(template=zero_template, input_variables=['labels_joined', 'column_1', 'column_2'])
    prompts = []
    model_answers = []

    for example in tqdm.tqdm(examples, total=len(examples)):
        text_prompt = prompt.format(labels_joined=labels_joined, column_1=example[0], column_2=example[1])
        inputs = tokenizer(text_prompt, return_tensors="pt").to("cuda")
        output = model.generate(**inputs, do_sample=True, top_p=0.95, top_k=0, max_new_tokens=256)
        
        prompts.append(text_prompt)
        model_answers.append(tokenizer.decode(output[0], skip_special_tokens=True))

    save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-column-0-shot.pkl", model_answers)
    save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-column-0-shot-prompts.pkl", prompts)

    # Few-shot
    for j in [1, 3]:
        
        prompts = []
        model_answers = []
        prompt = PromptTemplate(template=few_template, input_variables=['column_1', 'column_2', 'examples', 'labels_joined'])

        for example in tqdm.tqdm(examples, total=len(examples)):

            random_examples = """"""

            for i in range(0,j):
                index = random.randint(0, len(train_examples)-1)
                random_examples += f"""Column1: {train_examples[index][0]}\nColumn2:{train_examples[index][1]}\nClass: {train_labels[index]}\n"""

            random_examples = random_examples.strip()

            text_prompt = prompt.format(column_1=example[0], column_2=example[1], examples=random_examples, labels_joined=labels_joined)
            prompts.append(text_prompt)

            inputs = tokenizer(text_prompt, return_tensors="pt").to("cuda")
            output = model.generate(**inputs, do_sample=True, top_p=0.95, top_k=0, max_new_tokens=256)
            model_answers.append(tokenizer.decode(output[0], skip_special_tokens=True))            
            
        save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-column-{j}-shot.pkl", model_answers)
        save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-column-{j}-shot-prompts.pkl", prompts)

## Table prompt experiments

In [9]:
zero_template = """Answer the question based on the task and instructions below.
Task: Classify the relationship between two columns of a given table with one of the following relationships that are separated with comma: {labels_joined}.
Instructions: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column2: relationship. Don't return any relationship for the first column!
Table:
{input_string}Class:
"""

In [7]:
few_template = """Answer the question based on the task and instructions below.
Task: Classify the relationship between two columns of a given table with one of the following relationships that are separated with comma: {labels_joined}.
Instructions: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column2: relationship. Don't return any relationship for the first column!
{examples}
Table:
{input_string}Class:"""

In [None]:
for dataset in datasets:
    examples, labels, test_table_type_labels, train_examples, train_example_labels, train_table_type_labels, labels_to_text, text_to_label, labels_joined, train, test = load_cpa_dataset(dataset,"",False)
    labels_joined = ", ".join([labels_to_text[l] for l in labels_to_text])

    # Zero-shot
    prompt = PromptTemplate(template=zero_template, input_variables=['input_string', 'labels_joined'])
            
    prompts = []
    model_answers = []
            
    for example in tqdm.tqdm(examples, total=len(examples)):
        text_prompt = prompt.format(input_string=example.strip(), labels_joined=labels_joined)
        prompts.append(text_prompt) 

        inputs = tokenizer(text_prompt, return_tensors="pt").to("cuda")
        output = model.generate(**inputs, do_sample=True, top_p=0.95, top_k=0, max_new_tokens=256)
        model_answers.append(tokenizer.decode(output[0], skip_special_tokens=True))        

    save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-table-0-shot.pkl", model_answers)
    save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-table-0-shot-prompts.pkl", prompts)

    # Few-shot: random
    for j in [1, 3]:
        
        prompts = []
        model_answers = []
        prompt = PromptTemplate(template=few_template, input_variables=['input_string', 'examples', 'labels_joined'])

        for example in tqdm.tqdm(examples, total=len(examples)):

            random_examples = """"""
            for i in range(0,j):
                index = random.randint(0, len(train_examples)-1)
                random_examples += f"""Table:\n{train_examples[index]}Class:\n{train_example_labels[index]}\n"""
            random_examples = random_examples.strip()

            text_prompt = prompt.format(input_string=example, examples=random_examples, labels_joined=labels_joined)
            prompts.append(text_prompt)
            
            inputs = tokenizer(text_prompt, return_tensors="pt").to("cuda")
            output = model.generate(**inputs, do_sample=True, top_p=0.95, top_k=0, max_new_tokens=256)
            model_answers.append(tokenizer.decode(output[0], skip_special_tokens=True))
            
        save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-table-{j}-shot.pkl", model_answers)
        save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-table-{j}-shot-prompts.pkl", prompts)

    # Few-shot: similar
    prompts = []
    model_answers = []
    prompt = PromptTemplate(template=few_template, input_variables=['input_string', 'examples', 'labels_joined'])
    examples_demonstrations = load_pickle_file(f"embeddings/cpa-examples_demonstrations_{dataset}.pkl")

    for i, example in tqdm.tqdm(enumerate(examples), total=len(examples)):

        random_examples = """"""
        for index in examples_demonstrations[i][-3:]:
            random_examples += f"""Table:\n{train_examples[index]}Class:\n{train_example_labels[index]}\n"""
        random_examples = random_examples.strip()

        text_prompt = prompt.format(input_string=example, examples=random_examples, labels_joined=labels_joined)
        prompts.append(text_prompt)

        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        output = model.generate(**inputs, do_sample=True, top_p=0.95, top_k=0, max_new_tokens=256)
        model_answers.append(tokenizer.decode(output[0], skip_special_tokens=True))

    save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-table-3-similar-shot.pkl", model_answers)
    save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-table-3-similar-shot-prompts.pkl", prompts)

    # Few-shot: corner cases
    prompts = []
    model_answers = []
    prompt = PromptTemplate(template=few_template, input_variables=['input_string', 'examples', 'labels_joined'])
    cc_demonstrations = load_pickle_file(f"embeddings/cpa-cc_examples_demonstrations_{dataset}.pkl")

    for i, example in tqdm.tqdm(enumerate(examples), total=len(examples)):

        random_examples = """"""
        for index in cc_demonstrations[i][:2]:
            random_examples += f"""Table:\n{train_examples[index]}Class:\n{train_example_labels[index]}\n"""
        random_examples = random_examples.strip()

        text_prompt = prompt.format(input_string=example, examples=random_examples, labels_joined=labels_joined)
        prompts.append(text_prompt)

        inputs = tokenizer(text_prompt, return_tensors="pt").to("cuda")
        output = model.generate(**inputs, do_sample=True, top_p=0.95, top_k=0, max_new_tokens=256)
        model_answers.append(tokenizer.decode(output[0], skip_special_tokens=True))

    save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-table-2-cc-shot.pkl", model_answers)
    save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-table-2-cc-shot-prompts.pkl", prompts)

### Two-step approach

In [None]:
table_template = """Your task is to classify a table into one of these domains: {domains_list}.
Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. Decide the domain that best represents the table. 4. Answer with one domain.
{examples}
Classify this table: {input_string}
Answer:"""

In [None]:
template = """Answer the question based on the task and instructions below.
Task: Classify the relationship between two columns of a given table with one of the following relationships that are separated with comma: {labels_joined}.
Instructions: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column2: relationship. Don't return any relationship for the first column!
{examples}
Table:
{input_string}
Class:"""

In [None]:
def get_clean_table_prediction(table_pred, domains):
    cleaned_table_pred="-"
    for dom in domains:
        if dom in table_pred:
            cleaned_table_pred = dom
            break
    return cleaned_table_pred

In [None]:
for dataset in datasets:
    examples, labels, test_table_type_labels, train_examples, train_example_labels, train_table_type_labels, labels_to_text, text_to_label, labels_joined, train, test = load_cpa_dataset(dataset,"",False)
    labels_joined = ", ".join([labels_to_text[l] for l in labels_to_text])

    domains = set(train_table_type_labels)

    labels_dict = {}
    for dom in domains:
        f = open(f"data/labels/{dataset}_cpa_{dom}_labels.txt", 'r')
        t = [line.split('\n')[0] for line in f.readlines()]
        labels_dict[dom] = t

    domains_list = ", ".join(domains)

    for j in [0, 1, 3]:
        
        # Step 1
        table_prompts = []
        table_model_answers = []
        # Step 2
        prompts = []
        model_answers = []

        for example in tqdm.tqdm(examples, total=len(examples)):
            prompt = PromptTemplate(template=table_template, input_variables=['input_string', 'domains_list', 'examples'])

            random_examples = """"""

            for i in range(0,j):
                index = random.randint(0, len(train_examples)-1)
                random_examples += f"""Classify this table:{train_examples[index]}Class: {train_table_type_labels[index]}\n"""

            random_examples = random_examples.strip()

            text_prompt = prompt.format(input_string=example, examples=random_examples, domains_list=domains_list)
            table_prompts.append(text_prompt)

            inputs = tokenizer(text_prompt, return_tensors="pt").to("cuda")
            output = model.generate(**inputs, do_sample=True, top_p=0.95, top_k=0, max_new_tokens=256)
            # output = model.generate(**inputs, top_k=0, max_new_tokens=256, use_cache=True)
            answer = tokenizer.decode(output[0], skip_special_tokens=True)
            table_model_answers.append(answer)

            clean_prediction = get_clean_table_prediction(answer.replace(text_prompt, "").strip(), domains)

            prompt = PromptTemplate(template=template, input_variables=['input_string','labels_joined', 'examples'])

            random_examples = """"""

            if clean_prediction != "-":
                labels_dom = ", ".join([labels_to_text[l] for l in labels_dict[clean_prediction]])

                for m in range(0,j):
                    index = random.choice([j for j, e in enumerate(train_table_type_labels) if e == clean_prediction])
                    random_examples += f"""Table:\n{train_examples[index]}Class:\n{train_example_labels[index]}\n"""
                random_examples = random_examples.strip()

            else:
                labels_dom = labels_joined

            text_prompt = prompt.format(input_string=example, examples=random_examples, labels_joined=labels_dom)
            prompts.append(text_prompt)

            # pdb.set_trace()

            inputs = tokenizer(text_prompt, return_tensors="pt").to("cuda")
            output = model.generate(**inputs, do_sample=True, top_p=0.95, top_k=0, max_new_tokens=256)
            model_answers.append(tokenizer.decode(output[0], skip_special_tokens=True))

        save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-two-step-{j}-shot-step1.pkl", table_model_answers)
        save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-two-step-{j}-shot-step1-prompts.pkl", table_prompts)
        save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-two-step-{j}-shot-step2.pkl", model_answers)
        save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-two-step-{j}-shot-step2-prompts.pkl", prompts)

## Evaluation

In [None]:
# Column prompt evaluation
print(f"\tPrecision)\tRecall\tMacro-F1\tMicro-F1\tOOV")
for nr in [0, 1, 3]:
    preds = load_pickle_file(f'predictions/{dataset}/{model}/cpa-prompt-column-{nr}-shot.pkl')
    prompts = load_pickle_file(f'predictions/{dataset}/{model}/cpa-prompt-column-{nr}-shot-prompts.pkl')

    preds = [pred.replace(prompts[i], "") for i,pred in enumerate(preds)]
    predictions, num = map_answers_column(preds,prompts)
    
    types = list(set(labels))
    types = types + ["-"] if "-" in predictions else types
    evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)
    
    print(f"{decimal(evaluation['Precision'])}\t{decimal(evaluation['Recall'])}\t{decimal(evaluation['Macro-F1'])}\t{decimal(evaluation['Micro-F1'])}\t{num}")

In [None]:
# Table prompt evaluation
print(f"\tPrecision)\tRecall\tMacro-F1\tMicro-F1\tOOV")
for nr in [0, 1, 3, "3-similar", "2-cc"]:
    preds = load_pickle_file(f'predictions/{dataset}/{model}/cpa-prompt-table-{nr}-shot.pkl')
    prompts = load_pickle_file(f'predictions/{dataset}/{model}/cpa-prompt-table-{nr}-shot-prompts.pkl')

    preds = [pred.replace(prompts[i], "") for i,pred in enumerate(preds)]
    predictions, num = map_cpa_to_labels(preds,prompts)
    
    types = list(set(labels))
    types = types + ["-"] if "-" in predictions else types
    evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)
    
    print(f"{decimal(evaluation['Precision'])}\t{decimal(evaluation['Recall'])}\t{decimal(evaluation['Macro-F1'])}\t{decimal(evaluation['Micro-F1'])}\t{num}")