In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm


from question_answering import load_pretrained_qa_model
from question_answering import answer_question

In [3]:
tokenizer, model = load_pretrained_qa_model()

In [12]:
task_ids = [2, 3, 7]
question_files = [f'../results/Q{n}/question_list.csv' for n in task_ids]
sentence_files = [f'../results/Q{n}/ALLRanking_TASK4_1000.csv' for n in task_ids]
output_files = [f'../results/Q{n}/answers_top1000.csv' for n in task_ids]

In [13]:
def ask_all_possible_questions(question_list, keyword_list, sentence):
    answers = []
    for keyword, question in zip(keyword_list, question_list):
        ans, score = answer_question(question, sentence, model, tokenizer)
        if ans != '':
            answers.append((ans, score, keyword))
    if len(answers) == 0:
        return '', ''
    answers, scores, keywords = zip(*answers)
    ans_idx = np.argmax(scores)
    return answers[ans_idx], keywords[ans_idx]

In [16]:
def answer_all_possible_questions(question_file, sentence_file, output_file):
    sentences_df = pd.read_csv(sentence_file, usecols=range(1, 6))
    
    keyword_question_df = pd.read_csv('../results/Q2/question_list.csv', 
                                      header=None, names=['keyword', 'question'])
    keyword_list = keyword_question_df['keyword'].tolist()
    question_list = keyword_question_df['question'].tolist()
        
    sentences = sentences_df['sent'].tolist()
    answers = []
    for i in tqdm(range(len(sentences))):
        sent = sentences[i]
        ans = ask_all_possible_questions(question_list, keyword_list, sent)
        answers.append(ans)
    
    ans_lst, val_lst = zip(*answers)
    sentences_df = sentences_df.assign(answer=ans_lst)
    sentences_df = sentences_df.assign(keyword=val_lst)
    
    sentences_df.to_csv(output_file)
    
    return sentences_df

In [17]:
answers_to_all = []
for args in zip(question_files, sentence_files, output_files):
    answer_df = answer_all_possible_questions(*args)
    answers_to_all.append(answer_df)

100%|██████████| 1000/1000 [04:25<00:00,  3.77it/s]
100%|██████████| 1000/1000 [04:25<00:00,  3.77it/s]
100%|██████████| 1000/1000 [04:25<00:00,  3.76it/s]


In [19]:
answers_to_all[0]

Unnamed: 0,pid,newpid,category,sent,newscore,answer,keyword
0,3qdjmb2j,8652,ADE,"In addition, both vaccines also induced insert...",0.180748,t cell response,antibody dependent enhancement (ADE)
1,ynef1d1t,8058,ADE,Type I interferon is important in anti-viral r...,0.180612,innate immune response,antibody dependent enhancement (ADE)
2,mbvwh2ky,17361,ADE,"Importantly, attenuation of the dORF3-5 mutant...",0.180191,dysregulated host responses,antibody dependent enhancement (ADE)
3,zq58ot3c,18412,ADE,"Furthermore, T-cell proliferative responses an...",0.179922,gamma,interleukin
4,cwfujgya,29285,ADE,By contrast there may be a lack of type 1 inte...,0.179907,type 1 interferon response,antibody dependent enhancement (ADE)
...,...,...,...,...,...,...,...
995,gq9age1b,15673,ADE,"Immunization of adult mice with UV-V, with or ...",0.086258,adult mice,immunopathology
996,y4dc91nw,34104,ADE,"Here, using a high-throughput yeast two-hybrid...",0.086255,inhibited its activity,immunopathology
997,nl8nzz16,17580,ADE,These include host and pathogen factors as wel...,0.086254,host and pathogen factors,antibody dependent enhancement (ADE)
998,skaom5z7,25435,ADE,The mucosa is the largest immune organ of the ...,0.086252,the mucosa,antibody dependent enhancement (ADE)
