In [None]:
!chmod 600 ~/.passwd-s3fs

In [None]:
!apt install s3fs

In [None]:
!mkdir /content/s3
!s3fs um2ii-datasets /content/s3

In [None]:
import pandas as pd
from fastai.vision.all import *
from sklearn.model_selection import train_test_split
import os
from glob import glob
from sklearn import metrics

MIMIC Age Validation on BRAX Dataset

In [None]:
df = pd.read_csv('/content/brax_split.csv')
df['Path'] = 's3/BRAX/brax/1.1.0/' + df['PngPath']
mimic_age_model = load_learner('/content/mimic_age.pkl')
mimic_age_model.dls.to(device='cuda')
mimic_age_model.model.to(device='cuda')
age_test_dl = mimic_age_model.dls.test_dl(test_df)
age_preds = mimic_age_model.get_preds(dl=age_test_dl, with_decoded=True)
predict_df = pd.DataFrame(test_df[['Path']])
predict_df['Age'] = test_df[['Patient Age Category']]
predict_df['Age_Tensor_Id'] =age_preds[2].numpy()
predict_df['Age_Probability'] =age_preds[0].numpy().tolist()
mimic_age_model.dls.vocab.o2i

In [None]:
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[0] for prob in predict_df['Age_Probability']], pos_label = '0')
plt.plot(fpr,tpr, color='orange', label='0-20 years ' + str(metrics.auc(fpr, tpr)))
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[1] for prob in predict_df['Age_Probability']], pos_label = '1')
plt.plot(fpr,tpr, color='blue', label='21-40 years ' + str(metrics.auc(fpr, tpr)))
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[2] for prob in predict_df['Age_Probability']], pos_label = '2')
plt.plot(fpr,tpr, color='green', label='41-60 years ' + str(metrics.auc(fpr, tpr)))
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[3] for prob in predict_df['Age_Probability']], pos_label = '3')
plt.plot(fpr,tpr, color='red', label='61-80 years ' + str(metrics.auc(fpr, tpr)))
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[4] for prob in predict_df['Age_Probability']], pos_label = '4')
plt.plot(fpr,tpr, color='black', label='80+ years ' + str(metrics.auc(fpr, tpr)))
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.legend(loc=4)
plt.show()

In [None]:
test_df.insert(len(test_df.columns), 'Age_Tensor_Id', age_preds[2].numpy())
test_df.insert(len(test_df.columns), 'Age_Probability', age_preds[0].numpy().tolist())
print(test_df)
test_df.to_csv('/content/sample_data/mimic_age_brax_test.csv')

CheXpert Age Validation on BRAX Dataset

In [None]:
df = pd.read_csv('/content/brax_split.csv')
df['Path'] = 's3/BRAX/brax/1.1.0/' + df['PngPath']
chexpert_age_model = load_learner('/content/chexpert_age.pkl')
chexpert_age_model.dls.to(device='cuda')
chexpert_age_model.model.to(device='cuda')
age_test_dl = chexpert_age_model.dls.test_dl(test_df)
age_preds = chexpert_age_model.get_preds(dl=age_test_dl, with_decoded=True)

In [None]:
predict_df = pd.DataFrame(test_df[['Path']])
predict_df['Age'] = test_df[['Patient Age Category']]
predict_df['Age_Tensor_Id'] =age_preds[2].numpy()
predict_df['Age_Probability'] =age_preds[0].numpy().tolist()
chexpert_age_model.dls.vocab.o2i

In [None]:
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[0] for prob in predict_df['Age_Probability']], pos_label = '0')
plt.plot(fpr,tpr, color='orange', label='1-20 years ' + str(metrics.auc(fpr, tpr)))
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[1] for prob in predict_df['Age_Probability']], pos_label = '1')
plt.plot(fpr,tpr, color='blue', label='21-40 years ' + str(metrics.auc(fpr, tpr)))
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[2] for prob in predict_df['Age_Probability']], pos_label = '2')
plt.plot(fpr,tpr, color='green', label='41-60 years ' + str(metrics.auc(fpr, tpr)))
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[3] for prob in predict_df['Age_Probability']], pos_label = '3')
plt.plot(fpr,tpr, color='red', label='61-80 years ' + str(metrics.auc(fpr, tpr)))
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[4] for prob in predict_df['Age_Probability']], pos_label = '4')
plt.plot(fpr,tpr, color='black', label='80+ years ' + str(metrics.auc(fpr, tpr)))
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.legend(loc=4)
plt.show()

In [None]:
test_df.insert(len(test_df.columns), 'Age_Tensor_Id', age_preds[2].numpy())
test_df.insert(len(test_df.columns), 'Age_Probability', age_preds[0].numpy().tolist())
print(test_df)
test_df.to_csv('/content/sample_data/chexpert_age_brax_test.csv')

NIH Age Validation on BRAX Dataset

In [None]:
df = pd.read_csv('/content/brax_split.csv')
df['Path'] = 'content/s3/BRAX/brax/1.1.0/' + df['PngPath']
nih_age_model = load_learner('/content/nih_age.pkl')
nih_age_model.dls.to(device='cuda')
nih_age_model.model.to(device='cuda')
age_test_dl = nih_age_model.dls.test_dl(test_df, num_workers = 10)
age_preds = nih_age_model.get_preds(dl=age_test_dl, with_decoded=True)

In [None]:
predict_df = pd.DataFrame(test_df[['Path']])
predict_df['Age'] = test_df[['Patient Age Category']]
predict_df['Age_Tensor_Id'] =age_preds[2].numpy()
predict_df['Age_Probability'] =age_preds[0].numpy().tolist()
nih_age_model.dls.vocab.o2i

In [None]:
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[0] for prob in predict_df['Age_Probability']], pos_label = '0')
plt.plot(fpr,tpr, color='orange', label='1-20 years ' + str(metrics.auc(fpr, tpr)))
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[1] for prob in predict_df['Age_Probability']], pos_label = '1')
plt.plot(fpr,tpr, color='blue', label='21-40 years ' + str(metrics.auc(fpr, tpr)))
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[2] for prob in predict_df['Age_Probability']], pos_label = '2')
plt.plot(fpr,tpr, color='green', label='41-60 years ' + str(metrics.auc(fpr, tpr)))
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[3] for prob in predict_df['Age_Probability']], pos_label = '3')
plt.plot(fpr,tpr, color='red', label='61-80 years ' + str(metrics.auc(fpr, tpr)))
fpr, tpr, _ = metrics.roc_curve(y_true = predict_df['Age'].astype(str),y_score = [prob[4] for prob in predict_df['Age_Probability']], pos_label = '4')
plt.plot(fpr,tpr, color='black', label='80+ years ' + str(metrics.auc(fpr, tpr)))
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.legend(loc=4)
plt.show()

In [None]:
test_df.insert(len(test_df.columns), 'Age_Tensor_Id', age_preds[2].numpy())
test_df.insert(len(test_df.columns), 'Age_Probability', age_preds[0].numpy().tolist())
print(test_df)
test_df.to_csv('/content/sample_data/nih_age_brax_test.csv')