In [1]:
import pandas as pd
import pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.metrics import f1_score

In [2]:
with open('images_features_2d.pickle','rb') as features:

    data=pickle.load(features)

In [3]:
data['CXR1_1_IM-0001-3001.png'].shape

(1, 7, 7, 1024)

In [4]:
def fetch_tags(image_id, tags):

    new_tags=[]

    for image in image_id.to_list():

        tag=tags[image.split('_')[0]]

        if tag[0].lower()=='normal':
            new_tags.append('normal')
        else:
            new_tags.append('abnormal')

    return new_tags

path='/media/zaheer/Data/Image_Text_Datasets/IU_Xray/latest/One_Image_norm_abnorm_split/r2gen_annotations/'

def read_r2gen_annotations():
    with open(path+'annotation.json', 'rb') as f:
        full_records = json.load(f)

    splits=['train','val','test']
    train=full_records['train']
    val = full_records['val']
    test = full_records['test']
    
    
    new_records={}

    for s in splits:
        split_records = []
        records=full_records[s]
        for r in records:
            tag=tags[r['id'].split('_')[0]]
            if tag==0:
                split_records.append(r)
            else:
                continue
        new_records[s]=split_records

with open("./data/iu_xray/annotation_10.json", "w") as write_file:
    json.dump(new_records, write_file)

def load_preprocess_data():
    tags = np.load('Data_with_tags.npy', allow_pickle=True).item()
    train = pd.read_json(path+'/train/train.json')
    #print(train.shape, train.columns)
    x_train=train['image_id']

    train_tags=fetch_tags(x_train,tags)
    #print(train_tags)
    #y_train = mlb.fit_transform([train_tags])
    y_train=pd.get_dummies(train_tags)
    #print(y_train)

    #print(y_train.head())

    test = pd.read_json(path+'/test/test.json')
    x_test = test['image_id']

    test_tags=fetch_tags(x_test,tags)
    y_test = pd.get_dummies(test_tags)
    
    return x_train,y_train,x_test,y_test



In [5]:
def load_features(split):
    features = []#np.empty((0,7,7,1024))
    
    for image in split.tolist():
        #print(image)
        #features=np.hstack((features, data[image]))
        #features=np.append(features, data[image], axis=0)
        features.append(data[image])
    features=np.array(features)
    features=np.reshape(features,(-1,49,1024))
    
    return features

In [6]:
def class_model(n):
    model = tf.keras.Sequential()
    model.add(layers.Input(shape=(49,1024)))
    
    model.add(layers.GlobalAveragePooling1D())
    #model.add(layers.Dense(128,activation='relu'))
    model.add(layers.LeakyReLU(alpha=0.05))
    model.add(layers.Dense(64,activation='relu'))
    model.add(layers.Dropout(0.4))
    model.add(layers.Dense(n, activation="sigmoid"))

    model.summary()
    
    return model



In [7]:
images_train,y_train,images_test,y_test=load_preprocess_data()

In [8]:
train_features=load_features(images_train)
test_features=load_features(images_test)

In [9]:
model=class_model(2)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
global_average_pooling1d (Gl (None, 1024)              0         
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 1024)              0         
_________________________________________________________________
dense (Dense)                (None, 64)                65600     
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 130       
Total params: 65,730
Trainable params: 65,730
Non-trainable params: 0
_________________________________________________________________


In [10]:
earlyStopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, verbose=0, mode='min')
reduce_lr_loss = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min')
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.Precision(), tf.keras.metrics.Recall()
])

model.fit(train_features, y_train.to_numpy(), epochs=50, callbacks=[earlyStopping],validation_data=(test_features, y_test.to_numpy()), batch_size=16)

test_tags=model.predict(test_features)

Train on 5068 samples, validate on 500 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50


Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [11]:
true=np.array(y_test).argmax(axis=1)
pred=test_tags.argmax(axis=1)
# #print(sum(test_tags.argmax(axis=1)))
# y_test=y_test.idxmax(axis=1)
# print(y_test)
print(f1_score(true,pred))
print(y_test)

0.6542553191489362
     abnormal  normal
0           1       0
1           1       0
2           1       0
3           1       0
4           0       1
..        ...     ...
495         1       0
496         0       1
497         0       1
498         1       0
499         1       0

[500 rows x 2 columns]


In [12]:
m=tf.keras.metrics.Recall()
pred=test_tags.argmax(axis=1)
m.update_state(np.array(y_test),test_tags)
m.result().numpy()

0.74

In [13]:
import pickle

all_tags={}

for idx,image in enumerate(images_test.to_list()):
    all_tags[image.split('_')[0]]=pred[idx]
    
train_true=np.array(y_train).argmax(axis=1)
print(sum(pred))
for idx,image in enumerate(images_train.to_list()):
    all_tags[image.split('_')[0]]=train_true[idx]

print(all_tags['CXR63'])
with open(path+'binary_tags_chex.pkl','wb') as file:
    pickle.dump(all_tags, file, protocol=2)
    

190
0
