In [1]:
few_shot_path = '/scratch/dzhang5/LLM/TWEET-FID/1.0.expert.devfortrain.short.csv'
data_path = '/scratch/dzhang5/LLM/TWEET-FID/expert.traintestfortest.csv'
output_dir = '/scratch/dzhang5/LLM/TWEET-FID/traintestfortest-results-autolabel-ner-qa-expert/0.1/1.0/lv_v1_tv_v1'
tweet_output_dir = '/scratch/dzhang5/LLM/TWEET-FID/traintestfortest-results-autolabel-expert/0.1/1.0/lv_v1_tv_v1'
model_name = "gpt-3.5-turbo"
label_column = 'Food_answer'
text_column = 'context'
explanation_column = 'Two_step_Food_explanation'
example_selection_label_column = 'has_Food'
label_symbol = "^^^^"
few_shot_num = 8
few_shot_selection = "semantic_similarity"
verify_few_shot_selection = 'label_diversity_similarity'
tweet_few_shot_selection = "semantic_similarity"
second_verify_few_shot_selection = 'label_diversity_similarity'
use_current_explanation = False
use_ground_explanation = False
token_path = "/home/dzhang5/.cache/huggingface/token"
cache=False
console_output=True
temperature=0.1
verify=True
label_version='v1'
task_version='v2'
random_shuffle_examples = True
random_shuffle_examples_seed = 1

# Generation Stage

In [2]:
from autolabel.schema import ModelProvider, TaskType
from autolabel.models import register_model, MODEL_REGISTRY
from hf_pipeline_new import HFPipelineLLMNew
from few_shot_new import NewAutoLabelConfig, NewExampleSelectorFactory
from autolabel.few_shot import ExampleSelectorFactory
from template_inst import update_inst_mode
from named_entity_recognition_new import NewNamedEntityRecognitionTask
from classification_new import NewClassificationTask
from question_answering_new import NewQuestionAnsweringTask
from autolabel.tasks import TASK_TYPE_TO_IMPLEMENTATION
from prompt_template import load_ner_second_verify_task_prompt
import sys

In [3]:
update_inst_mode(model_name)
TASK_TYPE_TO_IMPLEMENTATION[TaskType.NAMED_ENTITY_RECOGNITION] = NewNamedEntityRecognitionTask
TASK_TYPE_TO_IMPLEMENTATION[TaskType.CLASSIFICATION] = NewClassificationTask
TASK_TYPE_TO_IMPLEMENTATION[TaskType.QUESTION_ANSWERING] = NewQuestionAnsweringTask
sys.modules['autolabel.labeler'].ExampleSelectorFactory = NewExampleSelectorFactory
register_model(ModelProvider.HUGGINGFACE_PIPELINE, HFPipelineLLMNew)

In [4]:
from autolabel import LabelingAgent, AutolabelDataset
import json
import os
import pandas as pd

In [5]:
with open(token_path) as tfile:
    token_str = tfile.read()

from huggingface_hub import login
login(token=token_str)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/dzhang5/.cache/huggingface/token
Login successful


In [6]:
no_auto = ["microsoft/prophetnet", "microsoft/phi-2", "google/pegasus-x"]
if any([pre in model_name for pre in no_auto]):
    device_map = None
else:
    device_map = "auto"

In [7]:
if not os.path.exists(output_dir):
    # Create the directory
    os.makedirs(output_dir)
label_type = label_column.split('_')[0]

In [8]:
final_name = '_aggregated_final_COT_'
if explanation_column.startswith('Two_step'):
    explanation_column_str = 'two_step'
else:
    explanation_column_str = 'one_step'
if few_shot_selection != verify_few_shot_selection:
    final_name = final_name + verify_few_shot_selection + '_'
agg_output_name = os.path.split(model_name)[-1] + '_strict_' + few_shot_selection + '_COT_' + str(explanation_column_str) + '_cur_' + str(use_current_explanation) + '_ground_' + str(use_ground_explanation) + final_name + os.path.split(data_path)[-1]
agg_output_path = os.path.join(output_dir, agg_output_name)
agg_output_data = pd.read_csv(agg_output_path)

In [9]:
tweet_output_name = os.path.split(model_name)[-1] + '_' + tweet_few_shot_selection + '_COT_check_' + agg_output_name
tweet_output_path = os.path.join(tweet_output_dir, tweet_output_name)
tweet_output_data = pd.read_csv(tweet_output_path)

In [10]:
refuel_models = ["refuel-llm", "llama-13b-chat"]
if model_name in refuel_models:
    provider = "refuel"
    em_provider = "huggingface_pipeline"
    model_params = {"max_length":4096, "temperature": temperature}
    task_name = f"FoodborneIllnessIncidentTweetNERQA_{few_shot_selection}_{label_type}_{model_name}"
elif model_name.startswith('gpt'):
    provider = "openai"
    em_provider = "openai"
    model_params = {"max_tokens":4096, "temperature": temperature}
    task_name = f"FoodborneIllnessIncidentTweetNERQA_{few_shot_selection}_{label_type}_{model_name}"
else:
    provider = "huggingface_pipeline"
    em_provider = "huggingface_pipeline"
    model_params = {"max_length":4096, "temperature": temperature,
                    "quantize": 16, "device_map": device_map,
                    "token": token_str}
    task_name = f"FoodborneIllnessIncidentTweetNERQA_{few_shot_selection}_{label_type}_{model_name.split('/')[1]}"

In [11]:
sym_len = len(label_symbol)
label_prefix, label_suffix, label_description, verify_task_guideline, verify_output_guideline = load_ner_second_verify_task_prompt(label_type, label_symbol, label_version, task_version, True)

In [12]:
test_data = pd.read_csv(data_path)

# Verification Stage

In [13]:
from utils import generate_word_second_verification_refer_ans

In [14]:
verify_few_shot_data = pd.read_csv(few_shot_path)

In [15]:
verify_sen_reference_column = 'second_sentence_verify_reference'
verify_word_reference_column = f'{label_type}_second_verify_reference'
verify_explanation_column = f'{label_type}_second_verify_explanation'

In [16]:
verify_few_shot_data[[verify_sen_reference_column, verify_word_reference_column, verify_explanation_column]] = \
verify_few_shot_data[['CategorizedLabels', 'sentence_class']].apply(lambda x: generate_word_second_verification_refer_ans(x['CategorizedLabels'], label_type, x['sentence_class']), axis=1, result_type='expand')

In [17]:
verify_few_shot_data.to_csv(few_shot_path, index=False)

In [18]:
assert (agg_output_data[text_column] == tweet_output_data[text_column]).all()
assert (agg_output_data.index == tweet_output_data.index).all()

In [19]:
new_test_data = agg_output_data[[text_column, 'CategorizedLabels_prediction', label_column]].copy()
new_test_data['sentence_class_label'] = tweet_output_data['sentence_class_label']
new_test_data[[verify_sen_reference_column, verify_word_reference_column, '_']] = \
new_test_data[['CategorizedLabels_prediction', 'sentence_class_label']].apply(lambda x: generate_word_second_verification_refer_ans(x['CategorizedLabels_prediction'], label_type, x['sentence_class_label']), axis=1, result_type='expand')
new_test_data.drop(columns=['CategorizedLabels_prediction', 'sentence_class_label', '_'], inplace=True)

In [20]:
output_final_name = os.path.split(model_name)[-1] + '_' + few_shot_selection + '_COT_' + str(explanation_column) + '_cur_' + str(use_current_explanation) + '_ground_' + str(use_ground_explanation) + '_' + label_type + '_second_final_COT_' + second_verify_few_shot_selection + '_' + os.path.split(data_path)[-1]
output_final_path = os.path.join(output_dir, output_final_name)

In [21]:
config = {
    "task_name": task_name+'_second_verification',
    "task_type": "question_answering",
    "dataset": {
        "label_column": label_column,
        "text_column": text_column,
        "explanation_column": verify_explanation_column,
        "example_selection_label_column": example_selection_label_column,
        "delimiter": ",",
        "label_description": label_description
    },
    "model": {
        "provider": provider,
        "name": model_name,
        "params": model_params
    },
    "embedding": {
        "provider": em_provider,
    },
    "prompt": {
        "task_guidelines": verify_task_guideline,
        "output_guidelines": verify_output_guideline,
        "example_selection_labels":[
            "yes",
            "no"
        ],
        "few_shot_examples": few_shot_path,
        "few_shot_selection": second_verify_few_shot_selection,
        "few_shot_num": few_shot_num,
        "random_shuffle_examples": random_shuffle_examples,
        "random_shuffle_examples_seed": random_shuffle_examples_seed,
        "example_template": f"Context: {{{text_column}}}\nFinding: 1. {{{verify_sen_reference_column}}}\n2. {{{verify_word_reference_column}}}\nAnswer: Let's think step by step.\n{{{verify_explanation_column}}}\n{{{label_column}}}",
        "chain_of_thought": True
    }
}

config = NewAutoLabelConfig(config)

In [22]:
agent = LabelingAgent(config=config, console_output=console_output, cache=cache)

  warn_deprecated(


In [23]:
verify_ds = AutolabelDataset(new_test_data, config=config)

In [24]:
agent.plan(verify_ds)

  warn_deprecated(
2024-04-09 01:20:14 httpx INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Output()

2024-04-09 01:20:16 httpx INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-04-09 01:20:16 httpx INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-04-09 01:20:16 httpx INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-04-09 01:20:16 httpx INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-04-09 01:20:16 httpx INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-04-09 01:20:17 httpx INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-04-09 01:20:17 httpx INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-04-09 01:20:17 httpx INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-04-09 01:20:17 httpx INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-04-09 01:20:17 httpx INFO: HTTP Request: 

In [None]:
# now, do the actual labeling
verify_ds = agent.run(verify_ds)

In [None]:
metrics = verify_ds.eval()

In [None]:
verify_ds.df.to_csv(output_final_path, index=False)
verify_ds.df.to_pickle(output_final_path.replace('.csv', '.pkl'))