In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm


from question_answering import load_pretrained_qa_model
from question_answering import answer_question

In [2]:
tokenizer, model = load_pretrained_qa_model()

In [3]:
task_ids = [2, 3, 7]
question_files = [f'../results/Q{n}/question_list.csv' for n in task_ids]
sentence_files = [f'../results/Q{n}/ALLRanking_TASK4_1000.csv' for n in task_ids]
output_files = [f'../results/Q{n}/answers_top1000.csv' for n in task_ids]

In [4]:
def ask_all_possible_questions(question_list, keyword_list, sentence):
    answers = []
    for keyword, question in zip(keyword_list, question_list):
        ans, score = answer_question(question, sentence, model, tokenizer)
        if ans != '':
            answers.append((ans, score, keyword))
    if len(answers) == 0:
        return '', ''
    answers, scores, keywords = zip(*answers)
    ans_idx = np.argmax(scores)
    return answers[ans_idx], keywords[ans_idx]

In [5]:
def answer_all_possible_questions(question_file, sentence_file, output_file):
    sentences_df = pd.read_csv(sentence_file, usecols=range(1, 6))
    
    keyword_question_df = pd.read_csv(question_file, 
                                      header=None, names=['keyword', 'question'])
    keyword_list = keyword_question_df['keyword'].tolist()
    question_list = keyword_question_df['question'].tolist()
        
    sentences = sentences_df['sent'].tolist()
    answers = []
    for i in tqdm(range(len(sentences))):
        sent = sentences[i]
        ans = ask_all_possible_questions(question_list, keyword_list, sent)
        answers.append(ans)
    
    ans_lst, val_lst = zip(*answers)
    sentences_df = sentences_df.assign(answer=ans_lst)
    sentences_df = sentences_df.assign(keyword=val_lst)
    
    sentences_df.to_csv(output_file)
    
    return sentences_df

In [6]:
answers_to_all = []
for args in zip(question_files, sentence_files, output_files):
    answer_df = answer_all_possible_questions(*args)
    answers_to_all.append(answer_df)

100%|██████████| 1000/1000 [04:22<00:00,  3.80it/s]
100%|██████████| 1000/1000 [02:21<00:00,  7.08it/s]
100%|██████████| 1000/1000 [02:22<00:00,  6.99it/s]


In [7]:
answers_to_all[0]

Unnamed: 0,pid,newpid,category,sent,newscore,answer,keyword
0,3qdjmb2j,8652,ADE,"In addition, both vaccines also induced insert...",0.180748,t cell response,antibody dependent enhancement (ADE)
1,ynef1d1t,8058,ADE,Type I interferon is important in anti-viral r...,0.180612,innate immune response,antibody dependent enhancement (ADE)
2,mbvwh2ky,17361,ADE,"Importantly, attenuation of the dORF3-5 mutant...",0.180191,dysregulated host responses,antibody dependent enhancement (ADE)
3,zq58ot3c,18412,ADE,"Furthermore, T-cell proliferative responses an...",0.179922,gamma,interleukin
4,cwfujgya,29285,ADE,By contrast there may be a lack of type 1 inte...,0.179907,type 1 interferon response,antibody dependent enhancement (ADE)
...,...,...,...,...,...,...,...
995,gq9age1b,15673,ADE,"Immunization of adult mice with UV-V, with or ...",0.086258,adult mice,immunopathology
996,y4dc91nw,34104,ADE,"Here, using a high-throughput yeast two-hybrid...",0.086255,inhibited its activity,immunopathology
997,nl8nzz16,17580,ADE,These include host and pathogen factors as wel...,0.086254,host and pathogen factors,antibody dependent enhancement (ADE)
998,skaom5z7,25435,ADE,The mucosa is the largest immune organ of the ...,0.086252,the mucosa,antibody dependent enhancement (ADE)


In [8]:
answers_to_all[1]

Unnamed: 0,pid,newpid,category,sent,newscore,answer,keyword
0,7y7ke10e,38011,animal,Vaccination is an effective method to prevent ...,0.488901,vaccination is an effective method to prevent ...,Mice
1,rme6b022,6604,animal,Vaccination is an effective approach to preven...,0.488714,vaccination is an effective approach to preven...,Mice
2,j7bqvdvw,33446,animal,Our data indicate that antibody-mediated immun...,0.488675,our data indicate that antibody-mediated immun...,Mice
3,ql30rv83,21976,animal,Our results showed that immunized mice generat...,0.488313,immunized mice,animal model
4,iiv08rcq,12687,animal,Our results show that chimeric VLP can be an e...,0.488145,can be an effective vaccine strategy against s...,Mice
...,...,...,...,...,...,...,...
995,4ua0oqbz,39887,animal,It is well established that increasing age is ...,0.484167,increasing age,Mice
996,fn8k8q3e,36218,animal,"Together, these results suggest that the relat...",0.484164,mice,animal model
997,q3be4e15,15986,animal,In order to study the immunogenicity of UV-ina...,0.484154,mice,animal model
998,gug45wqz,3580,animal,Ebola virus infection is a highly lethal disea...,0.484153,no effective therapeutic or preventive treatments,Mice


In [9]:
answers_to_all[2]

Unnamed: 0,pid,newpid,category,sent,newscore,answer,keyword
0,ogb83fgc,5041,animaldesign,The immunogenicity of this double-inactivated ...,0.761497,immunogenicity of this double-inactivated vacc...,Challenge
1,st0wcabn,6427,animaldesign,"Accordingly, surface expression of DC-targeted...",0.759277,could be used as a vaccine .,Challenge
2,uxnbie09,6707,animaldesign,We evaluated the safety and immunogenicity of...,0.759201,we evaluated the safety and immunogenicity of ...,Challenge
3,3rr0j5tk,5312,animaldesign,This engineered vaccine demonstrates significa...,0.758517,lethal mers-cov challenge,Challenge
4,hkyiy1gm,12936,animaldesign,We examined the immune responses to SARS-CoV i...,0.758509,we examined the immune responses to sars-cov,Challenge
...,...,...,...,...,...,...,...
995,6s1dni4p,12294,animaldesign,While the exact route of transmission to human...,0.751175,while the exact route of transmission to human...,Challenge
996,86l110no,15212,animaldesign,To determine the potential anti-ZIKV inhibitor...,0.751172,to determine the potential anti-zikv inhibitor...,Challenge
997,zhf8jdcl,15136,animaldesign,When given via intraperitoneal and oral routes...,0.751172,mouse model of human ovarian cancer,Challenge
998,04pp3lv0,15138,animaldesign,"Since its discovery, tremendous efforts have b...",0.751170,"since its discovery , tremendous efforts have ...",Challenge
