In [None]:
import tqdm
import random

from langchain import PromptTemplate
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from utils import load_cpa_dataset, save_pickle_file, load_pickle_file, load_cpa_dataset_column, calculate_f1_scores, decimal, map_cpa_to_labels, map_answers_column

import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [None]:
datasets = ["sotabv2", "t2dv2-webtables"]

model_name = "upstage/SOLAR-0-70b-16bit"
mod = "solar"
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir="hf_cache/")
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, load_in_8bit=True, device_map="auto", cache_dir="hf_cache/")

## Table prompt experiments

In [None]:
# Multi-experiments prompt
zero_template ="""### System:
Answer the question based on the task and instructions below.
Task: {task} {labels_joined}.
Instructions: {instruction}

### User:
{mess}
{input_string}

### Assistant:"""

In [None]:
# Multi-experiments prompt
few_template ="""### System:
Answer the question based on the task and instructions below.
Task: {task} {labels_joined}.
Instructions: {instruction}

{examples}

### User:
{mess}
{input_string}

### Assistant:"""

In [None]:
tasks = {
    "": "Classify the relationship between two columns of a given table with one of the following relationships that are separated with comma:",
    "-cpa": "Your task is to perform column property annotation (CPA), meaning that your task is to annotate the relationship between the leftmost column (name column) and a second column of a given table with only one of the following relationships that are separated with comma:",
}

In [None]:
instructions = {
    "": "1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column 2: relationship. Don't return any relationship for the first column! 5. Answer only with labels from the provided label set!",
    "-less-instructions": "1. For each column, select a relationship from the list that best represents the relationship between that column and the first column of the table. 2. Answer with only one selected relationship for each column with the format Column 2: relationship. Don't return any relationship for the first column! 3. Answer only with labels from the provided label set!",
}

In [None]:
last_message = {
    "": "Classify these table columns:",
    "-annotate": "Please annotate the columns of the following table:",
    "-determine": "Please determine the relationships for columns of this table:",
    "-relationships": "Please classify the relationships between the first column and the other columns of this table:",
}

In [None]:
for dataset in datasets:
    examples, labels, test_table_type_labels, train_examples, train_example_labels, train_table_type_labels, labels_to_text, text_to_label, labels_joined, train, test = load_cpa_dataset(dataset,"-markdown-20",False)
    labels_joined = ", ".join([labels_to_text[l] for l in labels_to_text])
    
    for task in tasks:
        for instruction in instructions:
            for mess in last_message:
                prompt_formulation = f"{task}{instruction}{mess}"
                print(prompt_formulation)
                
                if f"cpa-prompt-table{prompt_formulation}-0-shot.pkl" not in os.listdir(f"predictions/{dataset}/{mod}/"):
                    # Zero-shot
                    prompt = PromptTemplate(template=zero_template, input_variables=['input_string', 'labels_joined', 'task', 'instruction', 'mess'])

                    prompts = []
                    model_answers = []

                    for example in tqdm.tqdm(examples, total=len(examples)):
                        text_prompt = prompt.format(input_string=example.strip(), labels_joined=labels_joined, task=tasks[task], instruction=instructions[instruction], mess=last_message[mess])
                        prompts.append(text_prompt) 

                        inputs = tokenizer(text_prompt, return_tensors="pt").to("cuda")
                        output = model.generate(**inputs, do_sample=True, top_p=0.95, top_k=0, max_new_tokens=1000)
                        model_answers.append(tokenizer.decode(output[0], skip_special_tokens=True))        

                    save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-table{prompt_formulation}-0-shot.pkl", model_answers)
                    save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-table{prompt_formulation}-0-shot-prompts.pkl", prompts)

                if f"cpa-prompt-table{prompt_formulation}-3-shot.pkl" not in os.listdir(f"predictions/{dataset}/{mod}/"):
                    # Few-shot: random
                    for j in [1,3]:

                        prompts = []
                        model_answers = []
                        prompt = PromptTemplate(template=few_template, input_variables=['input_string', 'examples', 'labels_joined', 'task', 'instruction', 'mess'])

                        for example in tqdm.tqdm(examples, total=len(examples)):

                            random_examples = """"""
                            for i in range(0,j):
                                index = random.randint(0, len(train_examples)-1)
                                random_examples += f"""### User:\n{last_message[mess]}\n{train_examples[index].strip()}\n\n### Assistant:\n{train_example_labels[index]}\n\n"""
                            random_examples = random_examples.strip()

                            text_prompt = prompt.format(input_string=example, examples=random_examples, labels_joined=labels_joined, task=tasks[task], instruction=instructions[instruction], mess=last_message[mess])
                            prompts.append(text_prompt)

                            inputs = tokenizer(text_prompt, return_tensors="pt").to("cuda")
                            output = model.generate(**inputs, do_sample=True, top_p=0.95, top_k=0, max_new_tokens=1000)
                            model_answers.append(tokenizer.decode(output[0], skip_special_tokens=True))

                        save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-table{prompt_formulation}-{j}-shot.pkl", model_answers)
                        save_pickle_file(f"predictions/{dataset}/{mod}/cpa-prompt-table{prompt_formulation}-{j}-shot-prompts.pkl", prompts)

## Evaluation

In [None]:
# Table prompt evaluation
print(f"\tPrecision)\tRecall\tMacro-F1\tMicro-F1\tOOV")
for nr in [0, 1, 3]:
    preds = load_pickle_file(f'predictions/{dataset}/{mod}/cpa-prompt-table-{nr}-shot.pkl')
    prompts = load_pickle_file(f'predictions/{dataset}/{mod}/cpa-prompt-table-{nr}-shot-prompts.pkl')

    preds = [pred.replace(prompts[i], "") for i,pred in enumerate(preds)]
    predictions, num = map_cpa_to_labels(preds,prompts)
    
    types = list(set(labels))
    types = types + ["-"] if "-" in predictions else types
    evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)
    
    print(f"{decimal(evaluation['Precision'])}\t{decimal(evaluation['Recall'])}\t{decimal(evaluation['Macro-F1'])}\t{decimal(evaluation['Micro-F1'])}\t{num}")