In [23]:
import numpy as np
import pandas as pd
import timeit
import time
from sklearn import preprocessing
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import pickle as pkl
import umap
import umap.plot
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.decomposition import TruncatedSVD

from matplotlib import rcParams
plt.style.use("ggplot")
rcParams['figure.figsize'] = (12, 6)
from numpy.random import seed
seed(1)
from tensorflow.random import set_seed
set_seed(2)

to look at the kmers available for each organism

In [24]:
#loading training dataset
with open('datasets/train.dataset.6mer.npy', 'rb') as open_file:
    df = np.load(open_file)
df = pd.DataFrame(df)
df.shape

(505536, 2080)

In [25]:
# loading the training labels
df_y = pd.read_csv('datasets/train_labels.csv')
le = preprocessing.LabelEncoder()
le.fit(df_y['genome_name'].unique())
labels = le.transform(df_y['genome_name'].values)

df['genome_label'] = labels
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2071,2072,2073,2074,2075,2076,2077,2078,2079,genome_label
0,0.00247,0.004528,0.003292,0.000823,0.003704,0.00288,0.001646,0.001646,0.001646,0.001646,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20
1,0.001818,0.002857,0.002077,0.001558,0.003635,0.002338,0.002338,0.001039,0.001818,0.001039,...,0.000519,0.0,0.0,0.000519,0.00026,0.0,0.0,0.0,0.0,20
2,0.003702,0.003084,0.001234,0.001851,0.002468,0.003084,0.003084,0.001851,0.001234,0.000617,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20
3,0.001102,0.002756,0.003584,0.001378,0.003307,0.002481,0.002481,0.001102,0.001654,0.001378,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20
4,0.004318,0.003534,0.002748,0.001701,0.003534,0.002356,0.002224,0.002617,0.003271,0.00144,...,0.0,0.0,0.0,0.000262,0.000131,0.0,0.0,0.0,0.0,20


In [26]:
df_y['genome_name'].value_counts()

genome_name
decoy                              446209
burkholderia_pseudomallei            3787
pseudomonas_aeruginosa               3342
klebsiella_michiganensis             3167
mycobacterium_ulcerans               2999
klebsiella_pneumoniae                2840
serratia_liquefaciens                2832
citrobacter_freundii                 2718
salmonella_enterica_typhimurium      2595
salmonella_enterica_paratyphi        2579
yersinia_enterocolitica              2416
stenotrophomonas_maltophilia         2388
mycobacterium_tuberculosis           2354
clostridioides_difficile             2249
acinetobacter_baumannii              2133
legionella_pneumophila               1814
vibrio_parahaemolyticus              1743
listeria_monocytogenes               1588
vibrio_cholerae                      1564
staphylococcus_aureus                1493
staphylococcus_pseudintermedius      1381
corynebacterium_ulcerans             1306
corynebacterium_diphtheriae          1274
neisseria_meningitidis

# data cleaning

In [27]:
sum_row = df.iloc[:,:-1].sum(axis=1)
print(sum_row.describe())
print((sum_row < 0.9).sum())

# to remove samples with low kmer count
print(df.shape)
df = df.loc[sum_row >= 0.9,:]
print(df.shape)
y_index = labels[sum_row >= 0.9]

count    505536.000000
mean               NaN
std           0.000000
min           0.000000
25%           0.975098
50%           1.010742
75%           1.039062
max           1.311523
dtype: float64
22011
(505536, 2081)
(483525, 2081)


# sampling

In [28]:
def extract_nc(filename):
    nc = []
    with open(filename, 'r') as file:
        for line in file:
            if line.startswith('>'):
                header = line.strip().lstrip('>')
                parts = header.split()
                #print(parts)
                if len(parts) >= 1:
                    x = parts[1]
                    x = x.split(',')[0]
                    nc.append(x.split('.')[0])
    return nc

In [29]:
filename = 'datasets/train.dataset.raw.reads.fna'
species = extract_nc(filename)

In [30]:
labels

array([20, 20, 20, ...,  7,  7,  7])

In [31]:
# labelling for the rfseq numbers

sample_label = df_y.copy()
sample_label['rfseq'] = species
print(sample_label.shape)
sample_label

(505536, 2)


Unnamed: 0,genome_name,rfseq
0,staphylococcus_aureus,NC_007795
1,staphylococcus_aureus,NC_007795
2,staphylococcus_aureus,NC_007795
3,staphylococcus_aureus,NC_007795
4,staphylococcus_aureus,NC_007795
...,...,...
505531,decoy,NC_000004
505532,decoy,NC_000017
505533,decoy,NC_000011
505534,decoy,NT_167249


In [32]:
def sampling(x, frac=None, random=4):
    """" subsampling certain fraction, unless when the species is only 1 """
    current = x.shape[0]
    if current==1:
        return x
    x = x.sample(frac=frac, random_state=random)
    return x

In [33]:
# decoy train set

filt = sample_label.loc[sum_row >= 0.9,:]
x = filt[filt['genome_name']=='decoy'].groupby('rfseq').apply(lambda x: sampling(x,0.05)).index.get_level_values(1) # index for selected decoys
# x2 =sample_label[sample_label['genome_name']!='decoy'].index # index for all organisms
# x = x.append(x2) # index for all selected samples
x

Index([421679, 285281, 481246, 263189, 105081, 355473, 317703, 278399, 321451,
       470007,
       ...
        14438,  14766,  14983,  14759,  14572,  14768,  14133,  14123,  15213,
        14199],
      dtype='int64', length=21248)

In [34]:
df['genome_label'] = y_index
df['rfseq'] = sample_label['rfseq']
decoy = df.loc[x, :]
decoy.shape

(21248, 2082)

# dim red

In [40]:
X_red = pd.concat([df[df['genome_label']!=7].iloc[:,:-2], decoy.iloc[:,:-2]])
y_red = pd.concat([df[df['genome_label']!=7]['genome_label'], decoy['genome_label']])
print(X_red.shape, y_red.shape)

(80385, 2080) (80385,)


In [None]:
# percentage 0 - w/o removing
print(sum(decoy[decoy == 0].iloc[:,:-2].count(axis=1))/(len(decoy.columns)*len(decoy.index)))

X_red = pd.concat([df[df['genome_label']!=7].iloc[:,:-2], decoy.iloc[:,:-2]])
print(sum(X_red[X_red == 0].count(axis=1))/(len(X_red.columns)*len(X_red.index)))


0.4067185980955522
0.3880290524925718


In [37]:
def create_pca(data, seed=4220, save=None):
    """ Returns PCA & transformed data """
    from sklearn.decomposition import PCA
    import pickle as pkl
    print("shape of data: ", data.shape)

    new_pca = PCA(n_components=None, random_state=seed).fit(data)

    lim = 0.9
    ACC_VAR = 0
    for i, var in enumerate(new_pca.explained_variance_ratio_):
        ACC_VAR+=var
        # print(var)
        if ACC_VAR > lim: 
            print(f"{i+1} components explained {lim}S of total var")
            break
    
    new_pca = PCA(n_components=i+1, random_state=seed) 
    data = new_pca.fit_transform(data)
    print(new_pca)
    print("shape of new data: ", data.shape)

    if save!=None:
        with open(save, 'wb') as pickle_file:
            pkl.dump(new_pca, pickle_file)
        print("model saved")

    return [new_pca, data]

In [None]:
new_pca, pca_data = create_pca(X_red, save='models/pca_ver1.2.pkl')

shape of data:  (80385, 2080)
887 components explained 0.9S of total var
PCA(n_components=887, random_state=4220)
shape of new data:  (80385, 887)
model saved


In [41]:
svd = TruncatedSVD(n_components=500, random_state=4220)
svd_data = svd.fit_transform(X_red) # 0.8942969124131721 explained variance

with open('svd_ver1.2.pkl', 'wb') as pickle_file:
    pkl.dump(svd, pickle_file)


lim = 0.9
ACC_VAR = 0
for i, var in enumerate(svd.explained_variance_ratio_):
    ACC_VAR+=var
    # print(var)
    if ACC_VAR > lim: 
        print(f"{i+1} components explained {lim}S of total var")
        break
print(ACC_VAR)

0.8008002764091865


# training set

In [43]:
# decoy test set
x = filt[filt['genome_name']=='decoy'].groupby('rfseq').apply(lambda x: sampling(x,0.01,random=1)).index.get_level_values(1) # index for selected decoys

decoy_test = df.loc[x, :]
decoy_test.shape

(4198, 2082)

In [44]:
from sklearn.model_selection import train_test_split

df_train = df[df['genome_label']!=7].iloc[:,:-1].groupby('genome_label').apply(lambda x: x.sample(910))
# X_train,X_val,y_train,y_val = train_test_split(df[df['genome_label']!=7].iloc[:,:-2],df[df['genome_label']!=7]['genome_label'],random_state=4,test_size=0.2, stratify=df[df['genome_label']!=7]['genome_label'])
X_train,X_val,y_train,y_val = train_test_split(df_train.iloc[:,:-1], df_train['genome_label'],random_state=4,test_size=0.2, stratify=df_train['genome_label'])


In [45]:
# full set  + decoy
X_train = pd.concat([X_train, decoy.iloc[:,:-2]])
y_train = pd.concat([y_train, decoy['genome_label']])

X_val = pd.concat([X_val, decoy_test.iloc[:,:-2]])
y_val = pd.concat([y_val, decoy_test['genome_label']])

print(X_train.shape, y_train.shape, X_val.shape, y_val.shape)

(43088, 2080) (43088,) (9658, 2080) (9658,)


In [16]:
# df_train.index.get_level_values(0)

# training model

In [None]:
# tri_df= pd.read_csv('corr_matrix.csv', index_col=0) # created from running the full data
# # to_drop = [c for c in tri_df.columns if any(tri_df[c] > 0.7)] # if column is labeled
# to_drop = [c for c in range(len(tri_df.columns)) if any(tri_df[tri_df.columns[c]] > 0.7)] # if column is indexed
# X_train.drop(columns=to_drop, inplace=True)
# X_train.shape

In [19]:
def create_svm(x_data, y_label, save=None):
    """ training support vector machine """
    from sklearn.svm import SVC
    from joblib import dump
    import timeit

    print("training")
    starting_time = timeit.default_timer()

    clf = SVC(kernel='rbf', probability=True)
    clf.fit(x_data , y_label)
    print(clf)

    print("Time taken :", timeit.default_timer() - starting_time)

    if save != None:   
        dump(clf, save) 
        print("model saved")
    
    return clf

In [18]:
## prelim model running SVD
# with open('svd_n500.pkl', 'rb') as pickle_file: # PCA embeddings trained on full data
#     preprocess=pkl.load(pickle_file) 


# X_train = preprocess.transform(X_train)
# print(X_train.shape, y_train.shape)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


(43088, 500) (43088,)


In [None]:
## prelim svd and corr matrix with downsampled training set
# clf = create_svm(X_train, y_train, save='models/svm_prelim_ver1.2.joblib')

training
SVC(probability=True)
Time taken : 2316.0964414000046
model saved


In [46]:
print(X_train.shape, y_train.shape)
X_train = svd.transform(X_train)  # svd with downsampled 5% decoy
clf = create_svm(X_train, y_train, save='models/svm_ver1.2.joblib')

(43088, 2080) (43088,)
training
SVC(probability=True)
Time taken : 1461.7157511999976
model saved


# testing new

In [51]:
def precision_per_patient(patient_id, preds):
    df_true = pd.read_csv('datasets/validation/patient{}_labels.txt'.format(patient_id))
    tp,fp, tp_labels=0,0, df_true['true_label'].shape[0]
    print('my prediction(s) for patient {}:'.format(patient_id))
    print(preds)
    print('true pathogen')
    print(df_true['true_label'].values)
    #if don't predict any pathogen, it means there is only decoy in the test dataset (your prediction)
    if len(preds) == 0:
        preds = ['decoy']
    for item in np.unique(preds):
        if item in df_true['true_label'].values:
            tp+=1
        else:
            fp+=1
    #you have to predict all labels correctly, but you are penalized for any false positive
    return round(tp/(tp_labels+fp),5)



#prediction for all patients
def run_test(threshold=0.99, model=clf, preprocess=None):
    all_precision = []
    for patient_id in range(1,11):
        print('predicting for patient {}'.format(patient_id))
        
        starting_time = timeit.default_timer()
        with open('datasets/validation/patient{}.6mer.npy'.format(patient_id), 'rb') as read_file:
            df_test = np.load(read_file)
            df_test = pd.DataFrame(df_test)
            
        # df_test.drop(columns=to_drop, inplace=True) # for prelim model
        df_test = preprocess.transform(df_test)
        
        y_predprob = model.predict_proba(df_test)
        
        #we get only predictions larger than the threshold and if there is more than one, we take the argmax again
        final_predictions = le.inverse_transform(np.unique([np.argmax(item) for item in y_predprob  if len(np.where(item>= threshold)[0]) >=1]
                                                    ))
        #my pathogens dectected, decoy will be ignored
        final_predictions = [item for item in final_predictions if item !='decoy']
        
        precision = precision_per_patient(patient_id, final_predictions)
        print('precision: {}'.format(precision))
        all_precision.append(precision)
        print("Time taken :", timeit.default_timer() - starting_time)
    # performance per patient and its final average
    print([f'patient {c}: {item}' for c, item in enumerate(all_precision, start=1)])
    print(f'avg: {np.mean(all_precision)}')
    return round(np.mean(all_precision), 5)

In [52]:
# new model 
run_test(model=clf, preprocess=svd, threshold=0.99)

predicting for patient 1
my prediction(s) for patient 1:
['staphylococcus_aureus']
true pathogen
['staphylococcus_aureus']
precision: 1.0
Time taken : 138.59140799999295
predicting for patient 2
my prediction(s) for patient 2:
['burkholderia_pseudomallei', 'staphylococcus_aureus', 'staphylococcus_pyogenes']
true pathogen
['staphylococcus_pyogenes']
precision: 0.33333
Time taken : 144.5654641000001
predicting for patient 3
my prediction(s) for patient 3:
['burkholderia_pseudomallei', 'corynebacterium_ulcerans']
true pathogen
['burkholderia_pseudomallei' 'corynebacterium_ulcerans']
precision: 1.0
Time taken : 146.0772649999999
predicting for patient 4
my prediction(s) for patient 4:
['pseudomonas_aeruginosa']
true pathogen
['pseudomonas_aeruginosa']
precision: 1.0
Time taken : 114.68902330000128
predicting for patient 5
my prediction(s) for patient 5:
['corynebacterium_diphtheriae', 'corynebacterium_ulcerans']
true pathogen
['corynebacterium_diphtheriae']
precision: 0.5
Time taken : 140.

0.78333

In [54]:
# new model 
run_test(model=clf, preprocess=svd, threshold=0.995)

predicting for patient 1
my prediction(s) for patient 1:
[]
true pathogen
['staphylococcus_aureus']
precision: 0.0
Time taken : 149.9431970000005
predicting for patient 2
my prediction(s) for patient 2:
['staphylococcus_pyogenes']
true pathogen
['staphylococcus_pyogenes']
precision: 1.0
Time taken : 147.58025810000254
predicting for patient 3


## prelim model with new training set (thres: 0.99) - might be .95 dont rmb
``` predicting for patient 1
C:\Users\xiaox\AppData\Roaming\Python\Python310\site-packages\sklearn\base.py:439: UserWarning: X does not have valid feature names, but TruncatedSVD was fitted with feature names
  warnings.warn(
my prediction(s) for patient 1:
['staphylococcus_aureus']
true pathogen
['staphylococcus_aureus']
precision: 1.0
Time taken : 211.64389069999743
predicting for patient 2
C:\Users\xiaox\AppData\Roaming\Python\Python310\site-packages\sklearn\base.py:439: UserWarning: X does not have valid feature names, but TruncatedSVD was fitted with feature names
  warnings.warn(
my prediction(s) for patient 2:
['staphylococcus_pyogenes']
true pathogen
['staphylococcus_pyogenes']
precision: 1.0
Time taken : 217.85566260000633
predicting for patient 3
C:\Users\xiaox\AppData\Roaming\Python\Python310\site-packages\sklearn\base.py:439: UserWarning: X does not have valid feature names, but TruncatedSVD was fitted with feature names
  warnings.warn(
my prediction(s) for patient 3:
['burkholderia_pseudomallei', 'corynebacterium_ulcerans']
true pathogen
['burkholderia_pseudomallei' 'corynebacterium_ulcerans']
precision: 1.0
Time taken : 209.61945990000095
predicting for patient 4
C:\Users\xiaox\AppData\Roaming\Python\Python310\site-packages\sklearn\base.py:439: UserWarning: X does not have valid feature names, but TruncatedSVD was fitted with feature names
  warnings.warn(
my prediction(s) for patient 4:
['pseudomonas_aeruginosa']
true pathogen
['pseudomonas_aeruginosa']
precision: 1.0
Time taken : 214.08469499999774
predicting for patient 5
C:\Users\xiaox\AppData\Roaming\Python\Python310\site-packages\sklearn\base.py:439: UserWarning: X does not have valid feature names, but TruncatedSVD was fitted with feature names
  warnings.warn(
my prediction(s) for patient 5:
['corynebacterium_diphtheriae', 'corynebacterium_ulcerans']
true pathogen
['corynebacterium_diphtheriae']
precision: 0.5
Time taken : 206.45827980000468
predicting for patient 6
C:\Users\xiaox\AppData\Roaming\Python\Python310\site-packages\sklearn\base.py:439: UserWarning: X does not have valid feature names, but TruncatedSVD was fitted with feature names
  warnings.warn(
my prediction(s) for patient 6:
['streptococcus_pneumoniae']
true pathogen
['streptococcus_pneumoniae']
precision: 1.0
Time taken : 214.49726079999527
predicting for patient 7
C:\Users\xiaox\AppData\Roaming\Python\Python310\site-packages\sklearn\base.py:439: UserWarning: X does not have valid feature names, but TruncatedSVD was fitted with feature names
  warnings.warn(
my prediction(s) for patient 7:
['mycobacterium_tuberculosis', 'mycobacterium_ulcerans']
true pathogen
['mycobacterium_ulcerans']
precision: 0.5
Time taken : 219.3507050999906
predicting for patient 8
C:\Users\xiaox\AppData\Roaming\Python\Python310\site-packages\sklearn\base.py:439: UserWarning: X does not have valid feature names, but TruncatedSVD was fitted with feature names
  warnings.warn(
my prediction(s) for patient 8:
['streptococcus_pneumoniae']
true pathogen
['mycobacterium_tuberculosis' 'streptococcus_pneumoniae']
precision: 0.5
Time taken : 206.25283109999145
predicting for patient 9
C:\Users\xiaox\AppData\Roaming\Python\Python310\site-packages\sklearn\base.py:439: UserWarning: X does not have valid feature names, but TruncatedSVD was fitted with feature names
  warnings.warn(
my prediction(s) for patient 9:
['streptococcus_pneumoniae']
true pathogen
['streptococcus_pneumoniae']
precision: 1.0
Time taken : 215.84426240000175
predicting for patient 10
C:\Users\xiaox\AppData\Roaming\Python\Python310\site-packages\sklearn\base.py:439: UserWarning: X does not have valid feature names, but TruncatedSVD was fitted with feature names
  warnings.warn(
my prediction(s) for patient 10:
['burkholderia_pseudomallei']
true pathogen
['burkholderia_pseudomallei']
precision: 1.0
Time taken : 220.01919639999687
['patient 1: 1.0', 'patient 2: 1.0', 'patient 3: 1.0', 'patient 4: 1.0', 'patient 5: 0.5', 'patient 6: 1.0', 'patient 7: 0.5', 'patient 8: 0.5', 'patient 9: 1.0', 'patient 10: 1.0']
avg: 0.85 ```

# testing (prelim model)

In [2]:
def precision_per_patient(patient_id, preds):
    df_true = pd.read_csv('datasets/validation/patient{}_labels.txt'.format(patient_id))
    tp,fp, tp_labels=0,0, df_true['true_label'].shape[0]
    print('my prediction(s) for patient {}:'.format(patient_id))
    print(preds)
    print('true pathogen')
    print(df_true['true_label'].values)
    #if don't predict any pathogen, it means there is only decoy in the test dataset (your prediction)
    if len(preds) == 0:
        preds = ['decoy']
    for item in np.unique(preds):
        if item in df_true['true_label'].values:
            tp+=1
        else:
            fp+=1
    #you have to predict all labels correctly, but you are penalized for any false positive
    return round(tp/(tp_labels+fp),5)



In [3]:
#snippet to load the grouth truth training labels and normalize the label predictions. 
#your trained model will predict in this space (0 to 10)
df_y = pd.read_csv('datasets/train_labels.csv')
le = preprocessing.LabelEncoder()
le.fit(df_y['genome_name'].unique())

## dropping highly corr column
tri_df= pd.read_csv('corr_matrix.csv', index_col=0)
to_drop = [c for c in range(len(tri_df.columns)) if any(tri_df[tri_df.columns[c]] > 0.7)]

In [6]:
from joblib import dump, load
#load trained model
clf = load('svm_svdfull_910.joblib')

with open('svd_n500.pkl', 'rb') as pickle_file: # PCA embeddings trained on full data
    preprocess=pkl.load(pickle_file) 

In [10]:
#prediction for all patients
def run_test(threshold=0.99, model=clf, preprocess=None):
    all_precision = []
    for patient_id in range(1,11):
        print('predicting for patient {}'.format(patient_id))
        
        starting_time = timeit.default_timer()
        with open('datasets/validation/patient{}.6mer.npy'.format(patient_id), 'rb') as read_file:
            df_test = np.load(read_file)
            df_test = pd.DataFrame(df_test)
            
        df_test.drop(columns=to_drop, inplace=True)
        df_test = preprocess.transform(df_test)
        
        y_predprob = model.predict_proba(df_test)
        
        #we get only predictions larger than the threshold and if there is more than one, we take the argmax again
        final_predictions = le.inverse_transform(np.unique([np.argmax(item) for item in y_predprob  if len(np.where(item>= threshold)[0]) >=1]
                                                    ))
        #my pathogens dectected, decoy will be ignored
        final_predictions = [item for item in final_predictions if item !='decoy']
        
        precision = precision_per_patient(patient_id, final_predictions)
        print('precision: {}'.format(precision))
        all_precision.append(precision)
        print("Time taken :", timeit.default_timer() - starting_time)
    # performance per patient and its final average
    print([f'patient {c}: {item}' for c, item in enumerate(all_precision, start=1)])
    print(f'avg: {np.mean(all_precision)}')
    return round(np.mean(all_precision), 5)

run_test(model=clf, preprocess=preprocess, threshold=0.99)

predicting for patient 1




my prediction(s) for patient 1:
['listeria_monocytogenes', 'staphylococcus_aureus']
true pathogen
['staphylococcus_aureus']
precision: 0.5
Time taken : 197.43884419999085
predicting for patient 2




my prediction(s) for patient 2:
['listeria_monocytogenes', 'staphylococcus_aureus', 'staphylococcus_pyogenes']
true pathogen
['staphylococcus_pyogenes']
precision: 0.33333
Time taken : 174.24373459999333
predicting for patient 3




my prediction(s) for patient 3:
['burkholderia_pseudomallei', 'corynebacterium_ulcerans', 'listeria_monocytogenes', 'staphylococcus_aureus']
true pathogen
['burkholderia_pseudomallei' 'corynebacterium_ulcerans']
precision: 0.5
Time taken : 160.4835132000153
predicting for patient 4




my prediction(s) for patient 4:
['listeria_monocytogenes', 'pseudomonas_aeruginosa', 'staphylococcus_aureus']
true pathogen
['pseudomonas_aeruginosa']
precision: 0.33333
Time taken : 156.60702299998957
predicting for patient 5




my prediction(s) for patient 5:
['corynebacterium_diphtheriae', 'corynebacterium_ulcerans', 'listeria_monocytogenes']
true pathogen
['corynebacterium_diphtheriae']
precision: 0.33333
Time taken : 156.52495269998326
predicting for patient 6




my prediction(s) for patient 6:
['streptococcus_pneumoniae']
true pathogen
['streptococcus_pneumoniae']
precision: 1.0
Time taken : 191.80242230001022
predicting for patient 7




my prediction(s) for patient 7:
['mycobacterium_tuberculosis', 'mycobacterium_ulcerans']
true pathogen
['mycobacterium_ulcerans']
precision: 0.5
Time taken : 155.87797979998868
predicting for patient 8




my prediction(s) for patient 8:
['mycobacterium_tuberculosis', 'streptococcus_pneumoniae']
true pathogen
['mycobacterium_tuberculosis' 'streptococcus_pneumoniae']
precision: 1.0
Time taken : 158.9368245000078
predicting for patient 9




my prediction(s) for patient 9:
['listeria_monocytogenes', 'staphylococcus_aureus', 'streptococcus_pneumoniae']
true pathogen
['streptococcus_pneumoniae']
precision: 0.33333
Time taken : 176.23887879998074
predicting for patient 10




my prediction(s) for patient 10:
['burkholderia_pseudomallei', 'listeria_monocytogenes', 'staphylococcus_aureus']
true pathogen
['burkholderia_pseudomallei']
precision: 0.33333
Time taken : 168.83315320001566
['patient 1: 0.5', 'patient 2: 0.33333', 'patient 3: 0.5', 'patient 4: 0.33333', 'patient 5: 0.33333', 'patient 6: 1.0', 'patient 7: 0.5', 'patient 8: 1.0', 'patient 9: 0.33333', 'patient 10: 0.33333']
avg: 0.516665


0.51667