In [1]:
import argparse
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,TrainingArguments,AutoConfig
from datasets import Dataset
import torch
import logging
import os
from peft import LoraConfig, TaskType,get_peft_model,prepare_model_for_kbit_training, PeftModel
import pandas as pd
import math
import bitsandbytes as bnb
import transformers
from typing import Dict
from typing import List, Optional
from accelerate import Accelerator
import numpy as np
import random
import json


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def create_model_and_tokenizer():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        cache_dir = "/data2/yikyungkim/cache"
    )

    model = AutoModelForCausalLM.from_pretrained(
        "meta-llama/Llama-2-13b-chat-hf",
        use_safetensors=True,
        quantization_config=bnb_config,
        trust_remote_code=True,
        device_map="auto",
        cache_dir = "/data2/yikyungkim/cache"
    )

    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-chat-hf",cache_dir = "/data2/yikyungkim/cache")

    return model, tokenizer

In [3]:
def generate_description(model, text: str):
    inputs = tokenizer(text, return_tensors="pt").to("cuda:0")
    inputs_length = len(inputs["input_ids"][0])
    with torch.inference_mode():
        outputs = model.generate(**inputs)
    return tokenizer.decode(outputs[0][inputs_length:], skip_special_tokens=True)

In [5]:
model, tokenizer = create_model_and_tokenizer()
output_dir = "/home/yikyungkim/CBR-FinQA/generator_llama2/Finetune_LLMs/finetuning_repo/checkpoints2/checkpoint-368"
model = PeftModel.from_pretrained(model, output_dir)
model.eval()

train = pd.read_csv("cbr_train.csv")
dev = pd.read_csv("cbr_dev.csv")
test = pd.read_csv("cbr_test.csv")


Loading checkpoint shards: 100%|██████████| 3/3 [00:05<00:00,  1.98s/it]


In [15]:
data = test.iloc[0]['text'].split('Output')
result = generate_description(model, data[0])
print(result.split('Output')[1].replace('\n','').replace(':','').replace('</s>','').strip())

subtract(5829, 5735)


In [2]:
result_path = '/home/yikyungkim/CBR-FinQA/generator_llama2/Finetune_LLMs/finetuning_repo/result_llama3_noCase.json'
with open(result_path) as input_file:
    generated_output = json.load(input_file)

In [3]:
generated_output = generated_output['pred']
generated_output

['subtract(5829, 5735)',
 'divide(8.1, 56.0)',
 'subtract(153.7, 139.9), divide(#0, 139.9)',
 'divide(121.4, 4187.8)',
 'subtract(318.46, 100), divide(#0, 100), subtract(206.49, 100), divide(#2, 100), subtract(#1, #3)',
 'subtract(100690000, 92710000), divide(#0, 92710000)',
 'divide(463, 4612)',
 'divide(301, 2575)',
 'greater(237.92, 176.94)',
 'subtract(1136, 1171)',
 'subtract(4711, 4926), divide(#0, 4926)',
 'divide(817388, 3644331)',
 'divide(137.4, 559.3)',
 'greater(281.09, 286.22)',
 'divide(195237, 177947), divide(#0, 177947)',
 'table_average(net change for the year, none)',
 'divide(455, 7)',
 'add(705.4, 703.1), divide(#0, const_2)',
 'divide(15.3, 139549)',
 'add(25.0, 9.7), multiply(#0, const_1000)',
 'subtract(772, 843), subtract(#0, const_1)',
 'subtract(311.81, const_100), divide(#0, const_100), subtract(198.18, const_100), divide(#2, const_100), subtract(#1, #3)',
 'divide(101.88, 93.21), subtract(#0, const_1), divide(#1, 93.21)',
 'divide(230, 13208)',
 'divide(2690

In [4]:
import collections

QAExample = collections.namedtuple(
        "QAExample",
        "id question program answer"
    )

def read_examples(input_path):
    with open(input_path) as input_file:
        input_data = json.load(input_file)

    examples = []
    for entry in input_data:
        examples.append(read_QA_example(entry))
    return examples

def read_QA_example(entry):
    example_id = entry['id']
    question = entry['qa']['question']
    program = entry['qa']['program']
    answer = entry['qa']['exe_ans']

    return QAExample(
        id=example_id,
        question=question,
        program=program,
        answer=answer
    )


In [5]:
test_file = "/data2/yikyungkim/dataset/finqa_retriever_output/test_retrieve.json"
test_case = '/data2/yikyungkim/case_retriever/inference/bi_bert-base_q+p_mixed_100300/training_100/results/predictions.json'

examples = read_examples(test_file)

In [6]:
examples[0]

QAExample(id='ETR/2016/page_23.pdf-2', question='what is the net change in net revenue during 2015 for entergy corporation?', program='subtract(5829, 5735)', answer=94.0)

In [7]:
def write_predictions(output, output_file):
    with open(output_file, "w") as writer:
        writer.write(json.dumps(output, indent=4) + "\n")


def program_tokenization(original_program):
    original_program = original_program.split(', ')
    program = []
    for tok in original_program:
        cur_tok = ''
        for c in tok:
            if c == ')':
                if cur_tok != '':
                    program.append(cur_tok)
                    cur_tok = ''
            cur_tok += c
            if c in ['(', ')']:
                program.append(cur_tok)
                cur_tok = ''
        if cur_tok != '':
            program.append(cur_tok)
    program.append('EOF')
    return program

def generate_output_file(examples, generated_output):

    all_predictions = collections.OrderedDict()
    all_predictions["pred_programs"] = collections.OrderedDict()
    all_predictions["ref_programs"] = collections.OrderedDict()
    all_nbest = collections.OrderedDict()

    for index, example in enumerate(examples):
        nbest_json=[]
        output = collections.OrderedDict()
        output['id']=example.id
        output['ref_prog']=program_tokenization(example.program)
        output['ref_answer']=example.answer
        output['pred_prog']=program_tokenization(generated_output[index])
        nbest_json.append(output)

        all_predictions["pred_programs"][str(index)] = nbest_json[0]["pred_prog"]
        all_predictions["ref_programs"][str(index)] = nbest_json[0]["ref_prog"]
        all_nbest[str(index)] = nbest_json
    
    return all_predictions, all_nbest

In [9]:
results_path = '/home/yikyungkim/CBR-FinQA/generator_llama2/inference_llama3_noCase'

In [10]:
prediction_file = os.path.join(results_path, "predictions.json")
nbest_file = os.path.join(results_path, "nbest_predictions.json")
eval_file = os.path.join(results_path, "full_results.json")
error_file = os.path.join(results_path, "full_results_error.json")

In [11]:
all_predictions, all_nbest = generate_output_file(examples, generated_output)
write_predictions(all_predictions, prediction_file)
write_predictions(all_nbest, nbest_file)

In [12]:
from cbr_utils import *

exe_acc, prog_acc, op_acc = evaluate_result(nbest_file, test_file, test_case, eval_file, error_file, "seq")
prog_res = "exe acc: " + str(exe_acc) + " prog acc: " + str(prog_acc) + " operator acc: " + str(op_acc)

FIS/2016/page_31.pdf-2
['subtract(', '198.18', 'const_100', ')', 'divide(', '#0', 'const_100', ')', 'subtract(', '311.81', 'const_100', ')', 'divide(', '#2', 'const_100', ')', 'subtract(', '#3', '#1', ')', 'EOF']
['subtract(', '311.81', 'const_100', ')', 'divide(', '#0', 'const_100', ')', 'subtract(', '198.18', 'const_100', ')', 'divide(', '#2', 'const_100', ')', 'subtract(', '#1', '#3', ')', 'EOF']
1.1363
1.1363
FIS/2016/page_31.pdf-2
MRO/2004/page_46.pdf-3
['multiply(', '45686', '37.73', ')', 'EOF']
['multiply(', '37.73', '45686', ')', 'EOF']
1723732.78
1723732.78
MRO/2004/page_46.pdf-3
PPG/2008/page_52.pdf-1
['add(', '21', '99', ')', 'EOF']
['add(', '99', '21', ')', 'EOF']
120.0
120.0
PPG/2008/page_52.pdf-1
AMAT/2013/page_37.pdf-1
['divide(', '100000', 'const_100', ')', 'subtract(', '118.21', 'const_100', ')', 'multiply(', '#0', '#1', ')', 'EOF']
['subtract(', '118.21', 'const_100', ')', 'divide(', '#0', 'const_100', ')', 'multiply(', '100000', '#1', ')', 'EOF']
18210.0
18210.0
AMAT