In [1]:
import pandas as pd
import numpy as np
import time
import re

In [2]:
import torch
torch.cuda.is_available()

True

In [3]:
from haystack.utils import clean_wiki_text, convert_files_to_dicts, fetch_archive_from_http, print_answers
from haystack.nodes import FARMReader, TransformersReader
from haystack.nodes import TextConverter, PDFToTextConverter, DocxToTextConverter, PreProcessor
from haystack.pipelines import ExtractiveQAPipeline

#haystack contains a search system for retrieval and QA across documents.
#designed for large documents, but pipeline also works for single document QA 

INFO - haystack.document_stores.base -  Numba not found, replacing njit() with no-op implementation. Enable it with 'pip install numba'.
INFO - haystack.modeling.model.optimization -  apex not found, won't use it. See https://nvidia.github.io/apex/


In [4]:
# In-Memory Document Store
from haystack.document_stores import InMemoryDocumentStore

In [5]:
# Retrieve from store
from haystack.nodes import TfidfRetriever

# import file containing all abstracts

In [6]:
fulldf = pd.read_csv('data/included_abstracts.csv', index_col=0)

In [7]:
abstract_list = fulldf['abstract']
title_list = list(range(0,len(abstract_list)))
pmid_list = fulldf['pmid']

In [8]:
abstract_list.head()

0    Background In this study, we aimed to compare two outbreaks of coronavirus d...
1    Social and behavioral determinants of health (SBDoH) have important roles in...
2    Chronic diabetes can lead to microvascular complications, including diabetic...
3    Opioid Use Disorder (OUD) is a public health crisis costing the US billions ...
4    Automatic epilepsy detection is of great significance for the diagnosis and ...
Name: abstract, dtype: object

In [9]:
len(abstract_list)

42587

In [10]:
n = 55
test = abstract_list[n]
title = title_list[n]

In [11]:
test_dict = {'content': test, 'meta': {'name': title}}

In [12]:
test_dict

{'content': "Heart failure (HF) is a major cause of mortality. Accurately monitoring HF progress and adjusting therapies are critical for improving patient outcomes. An experienced cardiologist can make accurate HF stage diagnoses based on combination of symptoms, signs, and lab results from the electronic health records (EHR) of a patient, without directly measuring heart function. We examined whether machine learning models, more specifically the XGBoost model, can accurately predict patient stage based on EHR, and we further applied the SHapley Additive exPlanations (SHAP) framework to identify informative features and their interpretations. Our results indicate that based on structured data from EHR, our models could predict patients' ejection fraction (EF) scores with moderate accuracy. SHAP analyses identified informative features and revealed potential clinical subtypes of HF. Our findings provide insights on how to design computing systems to accurately monitor disease progress

# set up question bank

In [13]:
q1="what disease is being studied?"
q2="What is the objective of the study?"
###
q3="how many patient data samples were included in this study?"
q4="what modality of data is used in this study?"
###
q5="what country was the study conducted in?"
q6="what hospital did the data come from?"
q7="What existing database did the data come from?"
q8="What organisation did the data come from?"
###
#q10="how does the model perform relative to a human?"
#q11="how does the model perform in prospective testing"
#q12="what were the results of the study?"
#q12="what was the area under the curve (AUC) value?"

qlist = [q1, q2, q3, q4, q5, q6, q7, q8]

# initialising pre-processor module

In [14]:
preprocessor = PreProcessor(
    clean_empty_lines=True,
    clean_whitespace=True,
    clean_header_footer=False,
    split_by="word",
    split_length=2000,
    split_respect_sentence_boundary=True,
)

document_store = InMemoryDocumentStore()

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1


# question answer pipeline

In [15]:
##create dataframe to hold results

resultsdf = fulldf[['pmid', 'title', 'abstract']].copy()

for q in qlist:
    resultsdf[q] = pd.Series(dtype='object')
    
#resultsdf['q1_disease'] = pd.Series(dtype='object')
#resultsdf['q2_objective'] = pd.Series(dtype='object')
#resultsdf['q3_size'] = pd.Series(dtype='object')
#resultsdf['q4_modality'] = pd.Series(dtype='object')
#resultsdf['q5_country'] = pd.Series(dtype='object')
#resultsdf['q6_hospital'] = pd.Series(dtype='object')
#resultsdf['q7_database'] = pd.Series(dtype='object')
#resultsdf['q8_organisation'] = pd.Series(dtype='object')

print(len(resultsdf))
resultsdf.head(2)

42587


Unnamed: 0,pmid,title,abstract,what disease is being studied?,What is the objective of the study?,how many patient data samples were included in this study?,what modality of data is used in this study?,what country was the study conducted in?,what hospital did the data come from?,What existing database did the data come from?,What organisation did the data come from?
0,35308674,Comparison of Chest Computed Tomography Between the Two Waves of Coronavirus...,"Background In this study, we aimed to compare two outbreaks of coronavirus d...",,,,,,,,
1,35309014,A Study of Social and Behavioral Determinants of Health in Lung Cancer Patie...,Social and behavioral determinants of health (SBDoH) have important roles in...,,,,,,,,


In [16]:
testdf = resultsdf[0:500]
#testdf = resultsdf.copy()
testdf.head()

Unnamed: 0,pmid,title,abstract,what disease is being studied?,What is the objective of the study?,how many patient data samples were included in this study?,what modality of data is used in this study?,what country was the study conducted in?,what hospital did the data come from?,What existing database did the data come from?,What organisation did the data come from?
0,35308674,Comparison of Chest Computed Tomography Between the Two Waves of Coronavirus...,"Background In this study, we aimed to compare two outbreaks of coronavirus d...",,,,,,,,
1,35309014,A Study of Social and Behavioral Determinants of Health in Lung Cancer Patie...,Social and behavioral determinants of health (SBDoH) have important roles in...,,,,,,,,
2,35308968,A Federated Mining Approach on Predicting Diabetes-Related Complications: De...,"Chronic diabetes can lead to microvascular complications, including diabetic...",,,,,,,,
3,35308960,Identifying Opioid Use Disorder from Longitudinal Healthcare Data using a Mu...,Opioid Use Disorder (OUD) is a public health crisis costing the US billions ...,,,,,,,,
4,35306966,Both Cross-Patient and Patient-Specific Seizure Detection Based on Self-Orga...,Automatic epilepsy detection is of great significance for the diagnosis and ...,,,,,,,,


In [34]:
retriever = TfidfRetriever(document_store=document_store)
tuned_reader = FARMReader(model_name_or_path="pubmed_tuned", use_gpu=True, use_confidence_scores=True)
tuned_pipe = ExtractiveQAPipeline(tuned_reader, retriever)

INFO - haystack.nodes.retriever.sparse -  Found 1 candidate paragraphs from 1 docs in DB
INFO - haystack.modeling.utils -  Using devices: CUDA
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at pubmed_tuned
INFO - haystack.modeling.model.language_model -  Loaded pubmed_tuned
INFO - haystack.modeling.model.adaptive_model -  Found files for loading 1 prediction heads
INFO - haystack.modeling.model.prediction_head -  Loading prediction head from pubmed_tuned\prediction_head_0.bin
INFO - haystack.modeling.data_handler.processor -  Initialized processor without tasks. Supply `metric` and `label_list` to the constructor for using the default task or add a custom task later via processor.add_task()
INFO - haystack.modeling.logger -  ML Logging is turned off. No parameters, metrics or artifacts will be logged to MLFlow.
INFO - haystack.modeling.utils -  Using 

In [35]:
start = time.time()

for i, row in testdf.iterrows():
    #set up dict for new abstract
    abstract_dict = {'content': abstract_list[i], 'meta': {'name': title_list[i]}}
    docs_proc = preprocessor.process(abstract_dict)
        #####test#####print(f"n_docs_input: 1\nn_docs_output: {len(docs_proc)}")
    
    #dump old doc store, and import next document into doc store
    document_store.delete_documents()
    document_store.write_documents(docs_proc)

    #set up pipeline
    retriever = TfidfRetriever(document_store=document_store)
    
    #run pipeline for current 'i'
    plist = qlist.copy() #same length list to set-up iteration through question bank
    l = len(qlist)
    
    temp_list = []
    temp_list = [row['pmid'], row['title'], row['abstract']]
    
    for answer in range(0,l):
        plist[answer] = tuned_pipe.run(
            query=qlist[answer], params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
        )
        
        #append top answer for each row/question
        print(plist[answer]['answers'][0])
        temp_list.append(plist[answer]['answers'][0])
        
    testdf.loc[i] = temp_list
    
#measure time
end = time.time()
print(end - start)

INFO - haystack.nodes.retriever.sparse -  Found 1 candidate paragraphs from 1 docs in DB
  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.29 Batches/s]


<Answer: answer='coronavirus disease 2019', score=0.6681817770004272, context='Background In this study, we aimed to compare two ...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.50 Batches/s]


<Answer: answer='to compare two outbreaks of coronavirus disease 2019 (COVID-19) in Belgium in tomographic and biological-clinical aspects', score=0.2802339196205139, context='tudy, we aimed to compare two outbreaks of coronav...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.53 Batches/s]


<Answer: answer='202', score=0.8213668465614319, context='sidents. The chest CT severity score was calculate...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.02 Batches/s]


<Answer: answer='chest computed tomography', score=0.5243260562419891, context=' in the first seven days with COVID-19 infection, ...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.09 Batches/s]


<Answer: answer='Belgium', score=0.875027596950531, context='imed to compare two outbreaks of coronavirus disea...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.74 Batches/s]


<Answer: answer='Belgium', score=0.010742113459855318, context='imed to compare two outbreaks of coronavirus disea...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 11.01 Batches/s]


<Answer: answer='COVID-19', score=0.23281709849834442, context='urred during the course of the disease. The quanti...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.61 Batches/s]
INFO - haystack.nodes.retriever.sparse -  Found 1 candidate paragraphs from 1 docs in DB


<Answer: answer='Belgium', score=0.09624548628926277, context='imed to compare two outbreaks of coronavirus disea...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.89 Batches/s]


<Answer: answer='coronavirus disease 2019', score=0.6681817770004272, context='Background In this study, we aimed to compare two ...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 10.23 Batches/s]


<Answer: answer='to compare two outbreaks of coronavirus disease 2019 (COVID-19) in Belgium in tomographic and biological-clinical aspects', score=0.2802339196205139, context='tudy, we aimed to compare two outbreaks of coronav...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.83 Batches/s]


<Answer: answer='202', score=0.8213668465614319, context='sidents. The chest CT severity score was calculate...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.40 Batches/s]


<Answer: answer='chest computed tomography', score=0.5243260562419891, context=' in the first seven days with COVID-19 infection, ...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.49 Batches/s]


<Answer: answer='Belgium', score=0.875027596950531, context='imed to compare two outbreaks of coronavirus disea...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.40 Batches/s]


<Answer: answer='Belgium', score=0.010742113459855318, context='imed to compare two outbreaks of coronavirus disea...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.63 Batches/s]


<Answer: answer='COVID-19', score=0.23281709849834442, context='urred during the course of the disease. The quanti...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.93 Batches/s]
INFO - haystack.nodes.retriever.sparse -  Found 1 candidate paragraphs from 1 docs in DB


<Answer: answer='Belgium', score=0.09624548628926277, context='imed to compare two outbreaks of coronavirus disea...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 11.79 Batches/s]


<Answer: answer='coronavirus disease 2019', score=0.6681817770004272, context='Background In this study, we aimed to compare two ...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 18.90 Batches/s]


<Answer: answer='to compare two outbreaks of coronavirus disease 2019 (COVID-19) in Belgium in tomographic and biological-clinical aspects', score=0.2802339196205139, context='tudy, we aimed to compare two outbreaks of coronav...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 18.92 Batches/s]


<Answer: answer='202', score=0.8213668465614319, context='sidents. The chest CT severity score was calculate...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 22.28 Batches/s]


<Answer: answer='chest computed tomography', score=0.5243260562419891, context=' in the first seven days with COVID-19 infection, ...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 21.32 Batches/s]


<Answer: answer='Belgium', score=0.875027596950531, context='imed to compare two outbreaks of coronavirus disea...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 23.32 Batches/s]


<Answer: answer='Belgium', score=0.010742113459855318, context='imed to compare two outbreaks of coronavirus disea...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 23.30 Batches/s]


<Answer: answer='COVID-19', score=0.23281709849834442, context='urred during the course of the disease. The quanti...'>


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 23.33 Batches/s]

<Answer: answer='Belgium', score=0.09624548628926277, context='imed to compare two outbreaks of coronavirus disea...'>
61.947543144226074





In [36]:
#testdf.to_csv('output/100_test.csv')

# clean data

In [37]:
cleandf = testdf.copy().applymap(str)
cleandf.head()

Unnamed: 0,pmid,title,abstract,what disease is being studied?,What is the objective of the study?,how many patient data samples were included in this study?,what modality of data is used in this study?,what country was the study conducted in?,what hospital did the data come from?,What existing database did the data come from?,What organisation did the data come from?
0,35308674,Comparison of Chest Computed Tomography Between the Two Waves of Coronavirus...,"Background In this study, we aimed to compare two outbreaks of coronavirus d...","<Answer: answer='coronavirus disease 2019', score=0.6681817770004272, contex...",<Answer: answer='to compare two outbreaks of coronavirus disease 2019 (COVID...,"<Answer: answer='202', score=0.8213668465614319, context='sidents. The chest...","<Answer: answer='chest computed tomography', score=0.5243260562419891, conte...","<Answer: answer='Belgium', score=0.875027596950531, context='imed to compare...","<Answer: answer='Belgium', score=0.010742113459855318, context='imed to comp...","<Answer: answer='COVID-19', score=0.23281709849834442, context='urred during...","<Answer: answer='Belgium', score=0.09624548628926277, context='imed to compa..."
1,35309014,A Study of Social and Behavioral Determinants of Health in Lung Cancer Patie...,Social and behavioral determinants of health (SBDoH) have important roles in...,"<Answer: answer='coronavirus disease 2019', score=0.6681817770004272, contex...",<Answer: answer='to compare two outbreaks of coronavirus disease 2019 (COVID...,"<Answer: answer='202', score=0.8213668465614319, context='sidents. The chest...","<Answer: answer='chest computed tomography', score=0.5243260562419891, conte...","<Answer: answer='Belgium', score=0.875027596950531, context='imed to compare...","<Answer: answer='Belgium', score=0.010742113459855318, context='imed to comp...","<Answer: answer='COVID-19', score=0.23281709849834442, context='urred during...","<Answer: answer='Belgium', score=0.09624548628926277, context='imed to compa..."
2,35308968,A Federated Mining Approach on Predicting Diabetes-Related Complications: De...,"Chronic diabetes can lead to microvascular complications, including diabetic...","<Answer: answer='coronavirus disease 2019', score=0.6681817770004272, contex...",<Answer: answer='to compare two outbreaks of coronavirus disease 2019 (COVID...,"<Answer: answer='202', score=0.8213668465614319, context='sidents. The chest...","<Answer: answer='chest computed tomography', score=0.5243260562419891, conte...","<Answer: answer='Belgium', score=0.875027596950531, context='imed to compare...","<Answer: answer='Belgium', score=0.010742113459855318, context='imed to comp...","<Answer: answer='COVID-19', score=0.23281709849834442, context='urred during...","<Answer: answer='Belgium', score=0.09624548628926277, context='imed to compa..."


In [38]:
## create lists for answer/score pairs
disease_score = []
disease_answer = []
question_score = []
question_answer = []
sample_score = []
sample_answer = []
modality_score = []
modality_answer = []
country_score = []
country_answer = []
hospital_score = []
hospital_answer = []
database_score = []
database_answer = []
organisation_score = []
organisation_answer = []

#categories = [disease_score, disease_answer, sample_score, sample_answer, modality_score, modality_answer,
#              country_score, country_answer, hospital_score, hospital_answer, database_score, database_answer,
#              organisation_score, organisation_answer]


In [39]:
## start/end strings for scores and answers
score_start = 'score='
score_end = ', context'
#print((s.split(score_start))[1].split(score_end)[0])

answer_start = 'answer='
answer_end = ', score'
#print((s.split(answer_start))[1].split(answer_end)[0])

In [40]:
for i, row in cleandf.iterrows():
    disease_score.append((row['what disease is being studied?'].split(score_start))[1].split(score_end)[0])
    disease_answer.append((row['what disease is being studied?'].split(answer_start))[1].split(answer_end)[0])
    sample_score.append((row['how many patient data samples were included in this study?'].split(score_start))[1].split(score_end)[0])
    sample_answer.append((row['how many patient data samples were included in this study?'].split(answer_start))[1].split(answer_end)[0])
    question_score.append((row['What is the objective of the study?'].split(score_start))[1].split(score_end)[0])
    question_answer.append((row['What is the objective of the study?'].split(answer_start))[1].split(answer_end)[0])
    modality_score.append((row['what modality of data is used in this study?'].split(score_start))[1].split(score_end)[0])
    modality_answer.append((row['what modality of data is used in this study?'].split(answer_start))[1].split(answer_end)[0])
    country_score.append((row['what country was the study conducted in?'].split(score_start))[1].split(score_end)[0])
    country_answer.append((row['what country was the study conducted in?'].split(answer_start))[1].split(answer_end)[0])    
    hospital_score.append((row['what hospital did the data come from?'].split(score_start))[1].split(score_end)[0])
    hospital_answer.append((row['what hospital did the data come from?'].split(answer_start))[1].split(answer_end)[0])
    database_score.append((row['What existing database did the data come from?'].split(score_start))[1].split(score_end)[0])
    database_answer.append((row['What existing database did the data come from?'].split(answer_start))[1].split(answer_end)[0])
    organisation_score.append((row['What organisation did the data come from?'].split(score_start))[1].split(score_end)[0])
    organisation_answer.append((row['What organisation did the data come from?'].split(answer_start))[1].split(answer_end)[0])        

In [41]:
scoredf = cleandf[['pmid', 'title', 'abstract']].copy()

In [48]:
scoredf['disease_answer'] = disease_answer
scoredf['disease_answer'] = scoredf['disease_answer'].str[1:-1]
scoredf['disease_score'] = disease_score

scoredf['sample_answer'] = sample_answer
scoredf['sample_answer'] = scoredf['sample_answer'].str[1:-1]
scoredf['sample_score'] = sample_score

scoredf['question_answer'] = question_answer
scoredf['question_answer'] = scoredf['question_answer'].str[1:-1]
scoredf['question_score'] = question_score

scoredf['modality_answer'] = modality_answer
scoredf['modality_answer'] = scoredf['modality_answer'].str[1:-1]
scoredf['modality_score'] = modality_score

scoredf['country_answer'] = country_answer
scoredf['country_answer'] = scoredf['country_answer'].str[1:-1]
scoredf['country_score'] = country_score

scoredf['hospital_answer'] = hospital_answer
scoredf['hospital_answer'] = scoredf['hospital_answer'].str[1:-1]
scoredf['hospital_score'] = hospital_score

scoredf['database_answer'] = database_answer
scoredf['database_answer'] = scoredf['database_answer'].str[1:-1]
scoredf['database_score'] = database_score

scoredf['organisation_answer'] = database_answer
scoredf['organisation_answer'] = scoredf['organisation_answer'].str[1:-1]
scoredf['organisation_score'] = database_score

In [49]:
scoredf.head(5)

Unnamed: 0,pmid,title,abstract,disease_answer,disease_score,sample_answer,sample_score,question_answer,question_score,modality_answer,modality_score,country_answer,country_score,hospital_answer,hospital_score,database_answer,database_score,organisation_answer,organisation_score
0,35308674,Comparison of Chest Computed Tomography Between the Two Waves of Coronavirus...,"Background In this study, we aimed to compare two outbreaks of coronavirus d...",coronavirus disease 2019,0.6681817770004272,202,0.8213668465614319,to compare two outbreaks of coronavirus disease 2019 (COVID-19) in Belgium i...,0.2802339196205139,chest computed tomography,0.5243260562419891,Belgium,0.875027596950531,Belgium,0.0107421134598553,COVID-19,0.2328170984983444,COVID-19,0.2328170984983444
1,35309014,A Study of Social and Behavioral Determinants of Health in Lung Cancer Patie...,Social and behavioral determinants of health (SBDoH) have important roles in...,coronavirus disease 2019,0.6681817770004272,202,0.8213668465614319,to compare two outbreaks of coronavirus disease 2019 (COVID-19) in Belgium i...,0.2802339196205139,chest computed tomography,0.5243260562419891,Belgium,0.875027596950531,Belgium,0.0107421134598553,COVID-19,0.2328170984983444,COVID-19,0.2328170984983444
2,35308968,A Federated Mining Approach on Predicting Diabetes-Related Complications: De...,"Chronic diabetes can lead to microvascular complications, including diabetic...",coronavirus disease 2019,0.6681817770004272,202,0.8213668465614319,to compare two outbreaks of coronavirus disease 2019 (COVID-19) in Belgium i...,0.2802339196205139,chest computed tomography,0.5243260562419891,Belgium,0.875027596950531,Belgium,0.0107421134598553,COVID-19,0.2328170984983444,COVID-19,0.2328170984983444


In [266]:
#scoredf.to_csv('output/100_scored.csv')