In [11]:
import pandas as pd
import pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.metrics import f1_score
import json

In [12]:
with open('images_features_2d_r2gen.pickle','rb') as features:

    data=pickle.load(features)

In [13]:
data['CXR1'][0].shape

(1, 7, 7, 1024)

In [18]:
def fetch_tags(image_id, tags):

    new_tags=[]

    for image in image_id:

        tag=tags[image.split('_')[0]]

        if tag[0].lower()=='normal':
            new_tags.append(0)
        else:
            new_tags.append(1)

    return new_tags

path='/media/zaheer/Data/Image_Text_Datasets/IU_Xray/latest/One_Image_norm_abnorm_split/r2gen_annotations/'

def convert_r2gen_annotations():
    with open(path+'annotation.json', 'rb') as f:
        full_records = json.load(f)

    splits=['train','val','test']
    
    new_images_records={}
    
    for s in splits:
        images=[]
        records=full_records[s]
        for r in records:
            for image in r['image_path']:
                images.append(image)
                
        new_images_records[s]=images
    return new_images_records

convert_r2gen_annotations()

def load_preprocess_data():
    tags = np.load('Data_with_tags.npy', allow_pickle=True).item()
    converted_records = convert_r2gen_annotations()
    x_train = converted_records['train']
    x_test = converted_records['test']
    x_val = converted_records['val']
    #print(train.shape, train.columns)

    train_tags=fetch_tags(x_train,tags)
    #print(train_tags)
    #y_train = mlb.fit_transform([train_tags])
    y_train=pd.Series(train_tags)
    #print(y_train)

    #print(y_train.head())
    
    
    test_tags=fetch_tags(x_test,tags)
    y_test = pd.Series(test_tags)
    
    
    val_tags=fetch_tags(x_val,tags)
    y_val = pd.Series(val_tags)
    
    return x_train,y_train,x_test,y_test, x_val,y_val



In [23]:
def load_features(split):
    features = []#np.empty((0,7,7,1024))
    
    for image in split:
        report_id = image.split('_')[0]
        image_id = image.split('.')[0].split('/')[1]
        features.append(data[report_id][int(image_id)])
    features=np.array(features)
    features=np.reshape(features,(-1,49,1024))
    
    return features

In [24]:
def class_model(n):
    model = tf.keras.Sequential()
    model.add(layers.Input(shape=(49,1024)))
    
    model.add(layers.GlobalAveragePooling1D())
    #model.add(layers.Dense(128,activation='relu'))
    #model.add(layers.LeakyReLU(alpha=0.05))
    model.add(layers.Dense(64,activation='relu'))
    model.add(layers.Dropout(0.4))
    model.add(layers.Dense(n, activation="sigmoid"))

    model.summary()
    
    return model



In [25]:
images_train,y_train,images_test,y_test, images_val,y_val=load_preprocess_data()

In [26]:
train_features=load_features(images_train)
test_features=load_features(images_test)
val_features=load_features(images_val)

In [27]:
model=class_model(1)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
global_average_pooling1d (Gl (None, 1024)              0         
_________________________________________________________________
dense (Dense)                (None, 64)                65600     
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
Total params: 65,665
Trainable params: 65,665
Non-trainable params: 0
_________________________________________________________________


In [28]:
earlyStopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, verbose=0, mode='min')
reduce_lr_loss = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min')
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.Precision(), tf.keras.metrics.Recall()
])

model.fit(train_features, y_train.to_numpy(), epochs=50, callbacks=[earlyStopping],validation_data=(val_features, y_val.to_numpy()), batch_size=16)

test_tags=model.predict(test_features)
val_tags=model.predict(val_features)

Train on 4138 samples, validate on 592 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50


Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [29]:
test_tags=np.where(test_tags<0.60,0,1)
test_tags

array([[0],
       [1],
       [0],
       ...,
       [0],
       [1],
       [1]])

In [30]:
val_tags=np.where(val_tags<0.60,0,1)
val_tags

array([[1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
    

In [31]:
test_tags=[i[0] for i in test_tags.tolist()]
test_tags=pd.Series(test_tags)

In [32]:
y_test

0       0
1       0
2       0
3       0
4       0
       ..
1175    1
1176    0
1177    0
1178    1
1179    1
Length: 1180, dtype: int64

In [33]:
val_tags=[i[0] for i in val_tags.tolist()]
val_tags=pd.Series(val_tags)

In [34]:
print(f1_score(y_test,test_tags))
#print(y_test)

0.6926483613817538


In [35]:
m=tf.keras.metrics.Recall()
m.update_state(y_test,test_tags)
m.result().numpy()

0.6908127

In [37]:
import pickle

all_tags={}

for idx,image in enumerate(images_test):
    all_tags[image.split('_')[0]]=test_tags[idx]

for idx,image in enumerate(images_val):
    all_tags[image.split('_')[0]]=val_tags[idx]
    
train_true=np.array(y_train)
for idx,image in enumerate(images_train):
    all_tags[image.split('_')[0]]=train_true[idx]

print(all_tags['CXR63'])
with open(path+'binary_tags_chex_r2gen.pkl','wb') as file:
    pickle.dump(all_tags, file, protocol=2)
    

1


{'CXR3030': 1,
 'CXR38': 0,
 'CXR3957': 0,
 'CXR621': 1,
 'CXR1347': 0,
 'CXR2915': 0,
 'CXR34': 0,
 'CXR2590': 0,
 'CXR1176': 1,
 'CXR738': 1,
 'CXR2480': 1,
 'CXR3222': 0,
 'CXR1005': 0,
 'CXR3542': 0,
 'CXR325': 0,
 'CXR2785': 0,
 'CXR3991': 0,
 'CXR3527': 1,
 'CXR3460': 0,
 'CXR2784': 0,
 'CXR1425': 1,
 'CXR779': 0,
 'CXR1966': 0,
 'CXR3765': 0,
 'CXR2686': 1,
 'CXR2354': 0,
 'CXR3445': 0,
 'CXR3751': 0,
 'CXR3734': 1,
 'CXR377': 0,
 'CXR344': 0,
 'CXR3646': 1,
 'CXR3335': 1,
 'CXR2780': 0,
 'CXR1997': 0,
 'CXR1440': 0,
 'CXR1259': 0,
 'CXR658': 1,
 'CXR1812': 1,
 'CXR2357': 0,
 'CXR2232': 0,
 'CXR993': 0,
 'CXR2734': 1,
 'CXR461': 0,
 'CXR2098': 0,
 'CXR2277': 0,
 'CXR2099': 1,
 'CXR1633': 0,
 'CXR3883': 0,
 'CXR3041': 1,
 'CXR269': 1,
 'CXR3552': 0,
 'CXR1854': 0,
 'CXR3745': 0,
 'CXR1467': 0,
 'CXR1270': 1,
 'CXR3098': 1,
 'CXR1603': 1,
 'CXR1008': 1,
 'CXR2916': 0,
 'CXR1632': 0,
 'CXR3265': 0,
 'CXR2851': 0,
 'CXR242': 1,
 'CXR3427': 0,
 'CXR3220': 0,
 'CXR2005': 0,
 'CXR1410'