In [1]:
import os, json
from transformers import AutoTokenizer, AutoModelForCausalLM
model_name = 'models--google--gemma-7b-it'
QnA_dir_path = 'ProteinLMBench.json'
with open(QnA_dir_path, 'r') as f:
    file_data = json.load(f)
os.environ['HUGGINGFACE_HUB_CACHE'] = '/data/llm_models/huggingface/hub'
model_path = f'/data/llm_models/{model_name}'
if 'models--' in model_name:
    fs = f'/data/llm_models/huggingface/hub/{model_name}/snapshots/'
    model_path = fs + os.listdir(f'/data/llm_models/huggingface/hub/{model_name}/snapshots/')[0]
benchmark_size = 1000
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, device_map = "auto").eval()

In [4]:

import re
answer_list = [f['answer'] for f in file_data]
answer_list = [re.search(r'\d+', a).group() for a in answer_list]

prompt = """Please answer the multiple-choice question to the best of your ability by selecting only one correct option. Indicate your choice by stating the option number only. Begin your response with 'The correct option is,' followed by the number of the option. For example, if you believe the correct answer is option 3, respond with 'The correct option is 3.' If you are unsure, make your best guess and follow the same format. Remember to think through each step before deciding. \n\n"""
question = []

for f in file_data[:benchmark_size]:
    options = ''
    for o in f['options']:
        options += o + '\n'
    sb = prompt + '\n Question: \n' + f['question'] + '\n Options: \n' + options + '\n The correct option is'
    question.append(sb)

chat_model = ('chat' in model_name) or ('Chat' in model_name)
if 'Yi' or 'Qwen' in model_name:
    chat_model = False
inputs = []
tokenizer.pad_token = tokenizer.eos_token
if 'struct' or 'it' in model_name:
    for q in question:
        # q = q['prompt']
        a = tokenizer.apply_chat_template([{"role": "user", "content": q}], return_tensors="pt").to("cuda")
        inputs.append(a)
elif not chat_model:
    for q in question:
        # q = q['prompt']
        a = tokenizer(q, return_tensors="pt", padding=True)
        input_ids = a.input_ids.to('cuda')
        inputs.append(input_ids)
else:
    inputs = [q for q in question]

print(len(inputs))


In [8]:
inputs[0]

In [5]:
from tqdm import tqdm
output_list = []

temp = 0.8
mnt = 15
for q in tqdm(inputs[:]):
    if chat_model:
        try:
            if 'Mistral' in model_name:
                output_list.append(model.chat(tokenizer, q, do_sample=True, max_new_tokens=mnt, temperature=temp, history=[], eos_token_id=2, pad_token_id=2))
            else:
                output_list.append(model.chat(tokenizer, q, max_new_tokens=mnt, do_sample=True, temperature=temp, history=[]))
        except:
            output_list.append(model.generate(q, max_new_tokens=mnt, do_sample=True, temperature=temp))
    else:
        if 'Mistral' in model_name:
            output_list.append(model.generate(q, max_new_tokens=mnt,do_sample=True, temperature=temp, eos_token_id=2, pad_token_id=2))
        elif 'llama3-8B' or 'Llama-3' in model_name:
            output_list.append(model.generate(q, max_new_tokens=mnt,do_sample=True, temperature=temp, eos_token_id=128001, pad_token_id=128001))
        elif 'falcon' in model_name:
            output_list.append(model.generate(q, max_new_tokens=mnt,do_sample=True, temperature=temp, eos_token_id=11, pad_token_id=11))
        else:
            output_list.append(model.generate(q, max_new_tokens=mnt, do_sample=True,temperature=temp))

In [6]:

after = []
if not chat_model:
    lst = [tokenizer.decode(i[0], skip_special_tokens=True) for i in output_list]
    for i, j in zip(lst, question):
        after.append(i.replace(j, ''))
else:
    for i, j in zip(output_list, question):
        after.append(i[0].replace(j, ''))


In [7]:
after

In [9]:

v_ans = []
non_number = 0
for o in after:
    try:
        v_ans.append(re.search(r'\d+', o).group())
    except:
        non_number += 1
        v_ans.append("None")

print(non_number)
psd = 0
# wrong_list = []
from datetime import datetime
now = datetime.now()
formatted_time = now.strftime("%Y%m%d_%H%M%S")
if "/" in model_name:
    model_name = model_name.split("/")[2]

with open(f'result/final_result_{benchmark_size}_{formatted_time}_{model_name}.json', 'w') as jj:
    json.dump(after, jj)

with open(f'result/final_compare_{benchmark_size}_{formatted_time}_{model_name}.txt', 'w') as results:
    for i in range(len(v_ans)):
        # print(i)
        if v_ans[i] != answer_list[i]:
            results.write(str(v_ans[i]) + "   "+ str(answer_list[i]))
            results.write("\n")
            continue
        else:
            results.write("Right")
            psd+=1
            results.write("\n")

accuracy = psd/len(v_ans)
print('correct rate: ' + str(psd / len(v_ans)))

In [21]:
import json
with open('ProteinLMBench.json', 'r') as f:
    file_data = json.load(f)

In [22]:
bad = []
for t in file_data:
    if 'options' not in t:
        bad.append(t)
        continue
    for i in range(6):
        try:
            t[f'option {i+1}'] = t['options'][i][10:]
        except IndexError:
            continue

    # Remove the original 'options' key if it exists
    t.pop('options', None)


In [24]:
file_data

In [26]:
import pandas as pd
df = pd.DataFrame(file_data)

# Convert DataFrame to CSV file
df.to_csv('ProteinLMBench.csv', index=False)


In [29]:
c = pd.read_csv('ProteinLMBench.csv')

In [31]:
new_order = ['question', 'option 1','option 2','option 3', 'option 4','option 5','option 6','answer', 'explanation']
# Reorder the columns
c = c[new_order]

In [33]:
c.to_csv('ProteinLMBench.csv', index=False)