# Chest X-ray Image Report Generation (CXIRG) Original

## Install Required Modules

In [13]:
!pip install openpyxl
!pip install pandas
!pip install pillow
!pip install pytorch-ignite
!pip install scikit-learn
!pip install torch
!pip install transformers



## Import Required Modules

In [14]:
import os
import random
import torch

import numpy as np
import pandas as pd
import torch.nn as nn
import nltk

from ignite.metrics import Rouge
from pandas.core.common import random_state
from PIL import Image
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM, VisionEncoderDecoderModel, ViTImageProcessor
from typing import Any, Dict, List
from torch.utils.data import DataLoader
from datasets import load_metric

## Set The Random Seed

In [15]:
seed = 48763

np.random.seed(seed=seed, )

random_state(state=seed, )

random.seed(a=seed, )

torch.manual_seed(seed=seed, )
torch.cuda.manual_seed(seed=seed, )
torch.cuda.manual_seed_all(seed=seed, )
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

## Set The Device & Initialize Models

In [16]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained("nickmuchi/vit-finetuned-chest-xray-pneumonia", "emilyalsentzer/Bio_ClinicalBERT").to(device)

tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")

print(tokenizer.all_special_tokens)

tokenizer.add_special_tokens({
    "bos_token": tokenizer.cls_token,
    "eos_token": tokenizer.sep_token,
    "pad_token": tokenizer.pad_token,
    #"bos_token": "<|beginoftext|>",
    #"pad_token": "<|pad|>",
})

model.config.decoder_start_token_id = tokenizer.bos_token_id
model.config.pad_token_id = tokenizer.pad_token_id
model.config.eos_token_id = tokenizer.eos_token_id
#model.generation_config.decoder_start_token_id = tokenizer.cls_token_id

model.decoder.resize_token_embeddings(len(tokenizer))

processor = ViTImageProcessor.from_pretrained("nickmuchi/vit-finetuned-chest-xray-pneumonia")

Some weights of ViTModel were not initialized from the model checkpoint at nickmuchi/vit-finetuned-chest-xray-pneumonia and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertLMHeadModel were not initialized from the model checkpoint at emilyalsentzer/Bio_ClinicalBERT and are newly initialized: ['bert.encoder.layer.0.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.0.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.0.crossattention.output.dense.bias', 'bert.encoder.layer.0.crossattention.output.dense.weight', 'bert.encoder.layer.0.crossattention.self.key.bias', 'bert.encoder.layer.0.crossattention.self.key.weight', 'bert.encoder.layer.0.crossattention.self.query.bias', 'bert.encoder.layer.0.crossattention.self.query.weight', 'bert.encoder.layer.0.crossattention.self.value.bias', 'bert.encoder.layer.0.crossatten

['[UNK]', '[SEP]', '[PAD]', '[CLS]', '[MASK]']


## The CXIRG Dataset

In [17]:
class CXIRGDataset(Dataset):
    def __init__(self, data: List[Dict[str, Any]]) -> None:
        super(CXIRGDataset, self).__init__()
        self.data = data

    def __getitem__(self, index: int) -> Dict[str, Any]:
        return self.data[index]

    def __len__(self) -> int:
        return len(self.data)

## The Collate Function for The DataLoader

In [18]:
max_length = 256

def train_dl_collate_fn(one_batch_data: List[Dict[str, Any]]) -> Dict[str, Any]:
    names, pixel_values, reports = [], [], []

    for one_data in one_batch_data:
        names.append(one_data["name"])
        pixel_values.append(one_data["image"])
        reports.append(one_data["text"])

    # Process images
    pixel_values = processor(images=pixel_values, return_tensors="pt").pixel_values
    
    # Tokenize reports
    label_ids = tokenizer(
        reports,
        padding="max_length",
        truncation=True,
        max_length=max_length,
        return_tensors="pt"
    ).input_ids

    return {
        "names": names,
        "pixel_values": pixel_values.to(device, dtype=torch.float32),
        "label_ids": label_ids.to(device, dtype=torch.int64)
    }

def valid_dl_collate_fn(one_batch_data: List[Dict[str, Any]]) -> Dict[str, Any]:
    names, pixel_values, reports = [], [], []

    for one_data in one_batch_data:
        names.append(one_data["name"])
        pixel_values.append(one_data["image"])
        reports.append(one_data["text"])

    # Process images
    pixel_values = processor(images=pixel_values, return_tensors="pt").pixel_values

    # Tokenize reports
    label_ids = tokenizer(
        reports,
        padding="max_length",
        truncation=True,
        max_length=max_length,
        return_tensors="pt"
    ).input_ids

    return {
        "names": names,
        "pixel_values": pixel_values.to(device, dtype=torch.float32),
        "label_ids": label_ids.to(device, dtype=torch.int64)
    }


## Load The Train & Validation Data

In [19]:
report_path = "CXIRG_Data/train_data/reports.xlsx"
report_df = pd.read_excel(report_path)

image_dir_path = "CXIRG_Data/train_data/images"

train_data = []

for image_name in os.listdir(image_dir_path):
    image_path = os.path.join(image_dir_path, image_name)
    
    with Image.open(image_path) as img:
        if img.mode != "RGB":
            img = img.convert("RGB")
        text = report_df[report_df["name"] == image_name[:13]]["text"].values[0].replace("_x000D_", "\r")

        train_data.append({
            "name": image_name[:13],
            "image": img,
            "text": text
        })


train_dataset = CXIRGDataset(train_data)

train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=train_dl_collate_fn)

for batch in train_dataloader:
    print(batch)

{'names': ['NLP_CHEST_001', 'NLP_CHEST_048', 'NLP_CHEST_060', 'NLP_CHEST_094'], 'pixel_values': tensor([[[[-0.1843, -0.4118, -0.4118,  ..., -0.9137, -0.9137, -0.9137],
          [-0.5686, -0.4667, -0.3804,  ..., -0.7098, -0.6235, -0.5765],
          [-0.6549, -0.5216, -0.3490,  ..., -0.4902, -0.5059, -0.4196],
          ...,
          [ 0.2941,  0.3412,  0.3882,  ..., -0.9529, -0.9451, -0.9373],
          [ 0.2706,  0.3020,  0.3804,  ..., -0.9451, -0.9451, -0.9373],
          [ 0.2471,  0.2863,  0.3490,  ..., -0.9451, -0.9373, -0.9294]],

         [[-0.1843, -0.4118, -0.4118,  ..., -0.9137, -0.9137, -0.9137],
          [-0.5686, -0.4667, -0.3804,  ..., -0.7098, -0.6235, -0.5765],
          [-0.6549, -0.5216, -0.3490,  ..., -0.4902, -0.5059, -0.4196],
          ...,
          [ 0.2941,  0.3412,  0.3882,  ..., -0.9529, -0.9451, -0.9373],
          [ 0.2706,  0.3020,  0.3804,  ..., -0.9451, -0.9451, -0.9373],
          [ 0.2471,  0.2863,  0.3490,  ..., -0.9451, -0.9373, -0.9294]],

      

In [20]:
report_path = "CXIRG_Data/valid_data/reports.xlsx"
report_df = pd.read_excel(report_path)

image_dir_path = "CXIRG_Data/valid_data/images"

valid_data = []


for image_name in os.listdir(image_dir_path):
    image_path = os.path.join(image_dir_path, image_name)
    
    with Image.open(image_path) as img:
        if img.mode != "RGB":
            img = img.convert("RGB")

        text = report_df[report_df["name"] == image_name[:13]]["text"].values[0].replace("_x000D_", "\r")

        valid_data.append({
            "name": image_name[:13],
            "image": img,
            "text": text
        })

valid_dataset = CXIRGDataset(valid_data)

valid_dataloader = DataLoader(valid_dataset, batch_size=4, shuffle=False, collate_fn=valid_dl_collate_fn)

for batch in valid_dataloader:
    print(batch)

{'names': ['NLP_CHEST_002', 'NLP_CHEST_004', 'NLP_CHEST_011', 'NLP_CHEST_015'], 'pixel_values': tensor([[[[-0.4980, -0.4824, -0.4745,  ..., -0.7255, -0.6549, -0.5059],
          [-0.3961, -0.3961, -0.3882,  ..., -0.7333, -0.7255, -0.6863],
          [-0.3882, -0.3882, -0.3882,  ..., -0.7412, -0.7333, -0.7255],
          ...,
          [-0.6863, -0.6863, -0.7020,  ..., -1.0000, -0.9922, -0.9922],
          [-0.6784, -0.6941, -0.7020,  ..., -1.0000, -0.9922, -0.9922],
          [-0.6706, -0.6784, -0.6863,  ..., -1.0000, -0.9922, -0.9922]],

         [[-0.4980, -0.4824, -0.4745,  ..., -0.7255, -0.6549, -0.5059],
          [-0.3961, -0.3961, -0.3882,  ..., -0.7333, -0.7255, -0.6863],
          [-0.3882, -0.3882, -0.3882,  ..., -0.7412, -0.7333, -0.7255],
          ...,
          [-0.6863, -0.6863, -0.7020,  ..., -1.0000, -0.9922, -0.9922],
          [-0.6784, -0.6941, -0.7020,  ..., -1.0000, -0.9922, -0.9922],
          [-0.6706, -0.6784, -0.6863,  ..., -1.0000, -0.9922, -0.9922]],

      

## Set The Hyperparameters & Initialize The Optimizer, Dataloaders and Evaluation Metric

In [21]:
lr = 1e-5
epochs = 50
optimizer = AdamW(params=model.parameters(), lr=lr)

train_batch_size = 1
valid_batch_size = 1
train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=train_batch_size,
    shuffle=True,
    collate_fn=train_dl_collate_fn
)
valid_dataloader = DataLoader(
    dataset=valid_dataset,
    batch_size=valid_batch_size,
    shuffle=False,
    collate_fn=valid_dl_collate_fn
)

rouge = Rouge(variants=["L", 2], multiref="best")

## The Evaluation Function

In [31]:
def evaluate(model: VisionEncoderDecoderModel, epoch: int) -> Dict[str, float]:
    model.eval()
    pbar = tqdm(valid_dataloader)
    pbar.set_description(f"Evaluating Epoch: {epoch + 1}")

    with torch.no_grad():
        for batch in pbar:
            names = batch["names"]
            pixel_values = batch["pixel_values"]
            label_ids = batch["label_ids"]

            predictions = model.generate(
                pixel_values, 
                decoder_start_token_id=model.config.decoder_start_token_id,
                max_length=256,
                #num_beams=4,     
                #early_stopping=True
            )

            _predictions = tokenizer.batch_decode(predictions, skip_special_tokens=True)
            _labels = tokenizer.batch_decode(label_ids, skip_special_tokens=True)

            print(f"Names      : {names}")
            print(f"Predictions: {_predictions}")
            print(f"Labels     : {_labels}")
            print()

            for _prediction, _label in zip(_predictions, _labels):
                split_prediction = _prediction.split()
                split_label = _label.split()

                rouge.update(([split_prediction], [[split_label]]))

    return rouge.compute()

def eval(model: VisionEncoderDecoderModel) -> Dict[str, float]:
    model.eval()
    pbar = tqdm(valid_dataloader)
    pbar.set_description(f"Evaluating: ")

    with torch.no_grad():
        for batch in pbar:
            names = batch["names"]
            pixel_values = batch["pixel_values"]
            label_ids = batch["label_ids"]

            predictions = model.generate(
                pixel_values, 
                decoder_start_token_id=model.config.decoder_start_token_id,
                max_length=256,
                #num_beams=4,     
                #early_stopping=True
            )

            _predictions = tokenizer.batch_decode(predictions, skip_special_tokens=True)
            _labels = tokenizer.batch_decode(label_ids, skip_special_tokens=True)

            print(f"Names      : {names}")
            print(f"Predictions: {_predictions}")
            print(f"Labels     : {_labels}")
            print()

            for _prediction, _label in zip(_predictions, _labels):
                split_prediction = _prediction.split()
                split_label = _label.split()

                rouge.update(([split_prediction], [[split_label]]))

    return rouge.compute()

In [23]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)


cuda


In [33]:
min_loss = float("inf")

for epoch in range(epochs):
    model.train()

    pbar = tqdm(train_dataloader)
    pbar.set_description(f"Training Epoch [{epoch + 1} / {epochs}]")

    loss_list = []
    average_loss = 0

    for batch in pbar:
        optimizer.zero_grad()

        pixel_values = batch["pixel_values"]
        label_ids = batch["label_ids"]

        outputs = model(
            pixel_values=pixel_values,
            labels=label_ids
        )
        loss = outputs.loss

        loss_list.append(loss.item())
        average_loss = np.mean(np.array(loss_list))
        loss.backward()
        optimizer.step()

        pbar.set_postfix(loss=average_loss)

    checkpoint_path = os.path.join("./outputs_original", f"checkpoint_epoch_{epoch + 1}.pt")
    torch.save(model.state_dict(), checkpoint_path)    

    if average_loss < min_loss:
        min_loss = average_loss
        torch.save(model.state_dict(), f"outputs_original/best_checkpoint.pt")

    print(f"Rouge-2 score :", eval(model=model))

Training Epoch [1 / 50]:  17%|█▋        | 15/89 [00:04<00:20,  3.69it/s, loss=0.0121]


KeyboardInterrupt: 

In [34]:
checkpoint_path = "outputs_original/checkpoint_epoch_50.pt"
model.load_state_dict(torch.load(checkpoint_path, map_location=device))

print(f"Rouge-2 score on epoch {epoch+1}:", eval(model=model))


Evaluating: :  10%|█         | 1/10 [00:03<00:29,  3.30s/it]

Names      : ['NLP_CHEST_002']
Predictions: ['chest pa view shows : impression : - increased both lung markings. blunting bilateral cp angles. - the mediastinum is not widened. - normal heart size. - atherosclerotic aorta. - djd of spine. mild scoliosis of spine. - s / p left pleural pigtail tube. subcutaneous emphysema in left chest wall.']
Labels     : ['chest pa view shows : partial atelectasis at left lower lung field. left pleural effusion. borderline heart size. atherosclerotic change of aortic knob. s / p port - a implantation via left subclavian vein. intraperitoneal port catheter. surgical clips at ruq of abdomen.']



Evaluating: :  20%|██        | 2/10 [00:06<00:25,  3.13s/it]

Names      : ['NLP_CHEST_004']
Predictions: ['chest plain film view showed : impression : - suspicious pulmonary edema. superimposed pneumonia cannot be excluded. - elevated right hemidiaphragm. - normal heart size. atherosclerosis of aorta. - spondylosis of spine. - s / p lt subclavian port - a - cath.']
Labels     : ['chest pa view show : impression : - s / p rll wedge resection. - bilateral lungs metastasis. - left lower lung subsegmental atelectasis. - increased infiltrations in both lungs. - blunting right cp angle. - tortuous atherosclerotic aorta. - scoliosis, djd and osteoporosis of spine. - compression fracture of l1.']



Evaluating: :  30%|███       | 3/10 [00:09<00:21,  3.11s/it]

Names      : ['NLP_CHEST_011']
Predictions: ["chest ap view showed : 1. enlarged heart size with tortuous aorta. 2. r't middle lung and l't lower lung reticulonodular infiltrations. mild l't pleural effusion. 3. no mediastinum widening. 4. s / p endotracheal tube and ng intubation. s / p r't jugular cvp."]
Labels     : ["chest ap view showed : 1. s / p sternotomy and cabg. enlarged heart size with tortuous aorta. 2. r't middle and lower lung faint patches. l't lower lung consolidation. l't pleural effusion. 3. no mediastinum widening. 4. s / p endotracheal tube and ng intubation."]



Evaluating: :  40%|████      | 4/10 [00:12<00:18,  3.12s/it]

Names      : ['NLP_CHEST_015']
Predictions: ['']
Labels     : ['chest x ray : - no obvious lung mass nor consolidation patch. - normal heart size. - no pleural effusion.']



Evaluating: :  50%|█████     | 5/10 [00:15<00:15,  3.15s/it]

Names      : ['NLP_CHEST_027']
Predictions: ['chest plain film shows : impression : - cardiomegaly with left atrium enlargement. - blunting bilateral cp angles. - atherosclerotic aorta. - scoliosis and djd of spine. - bilateral apical pleural thickening.']
Labels     : ['chest pa view : impression : > cardiomegaly with bilateral pulmonary congestion. > postinflammatory fibrosis in both upper lungs. > atherosclerosis of aorta. > old fractures of left 5th and 6th ribs. > r / o osteoporosis. > spondylosis of thoracolumbar spine. > s / p abdominal operation in ruq.']



Evaluating: :  60%|██████    | 6/10 [00:18<00:12,  3.18s/it]

Names      : ['NLP_CHEST_031']
Predictions: ['chest pa view : impression : - increased both lung markings. - compatible with right lung nodules. - fibrotic lesion in left upper lung. - normal heart size. atherosclerotic aorta. - suspect right pleural effusion. - spondylosis. r / o osteoporosis. - compatible with multiple bony metastases. - s / p ng and endotracheal tube. - s / p lt subclavian port - a - cath.']
Labels     : ['chest pa view : impression : - increased both lung markings. - compatible with right lung nodules. - fibrotic lesion in left upper lung. - normal heart size. atherosclerotic aorta. - suspect right pleural effusion. - spondylosis. r / o osteoporosis. - compatible with multiple bony metastases. - s / p ng and endotracheal tube. - s / p lt subclavian port - a - cath.']



Evaluating: :  70%|███████   | 7/10 [00:22<00:09,  3.15s/it]

Names      : ['NLP_CHEST_057']
Predictions: ['chest plain film shows : impression : - suspicious pulmonary edema. superimposed pneumonia cannot be excluded. - blunting bilateral cp angles. - normal heart size. atherosclerotic aorta. - scoliosis and djd of spine. - s / p rt subclavian cvc insertion.']
Labels     : ['chest plain film shows : impression : - increased infiltrations in both lungs. - tortuous atherosclerotic dilated aorta. - normal heart size. - djd of spine. old fracture of right ribs. - increased both lung markings. s / p lt jugular cvc insertion. s / p ng and endotracheal tube. - susp. lt pneumothorax. suspect pneumomediastinum. subcutaneous emphysema in bilateral neck. - s / p bilateral chest tube insertion.']



Evaluating: :  80%|████████  | 8/10 [00:25<00:06,  3.11s/it]

Names      : ['NLP_CHEST_059']
Predictions: ['']
Labels     : ['supine chest ap view shows : pigtail catheter at right lower chest. decreased amount of right pleural effusion. air in the right lower pleural space. poor lung expansion. mild subcutaneous emphysema at right chest wall. right pleural thickening. ill - defined masses at right perihilar region. normal heart size.']



Evaluating: :  90%|█████████ | 9/10 [00:28<00:03,  3.09s/it]

Names      : ['NLP_CHEST_071']
Predictions: ['chest pa view show : impression : > increased bilateral lung markings. > normal heart size. > atherosclerotic aorta. > spondylosis. > s / p ng and endotracheal tube.']
Labels     : ['chest film shows : impression : - bilateral lung infiltrations. - suspect right lower lung patch. blunting right cp angle. - tortuous atherosclerotic aorta. - scoliosis, djd and osteoporosis of spine. compression fracture of t12. old fracture of left ribs. - s / p fixation in l - spine. - s / p tracheostomy and ng tube.']



Evaluating: : 100%|██████████| 10/10 [00:31<00:00,  3.14s/it]

Names      : ['NLP_CHEST_085']
Predictions: ['chest pa view shows : impression : > increased bilateral lung markings. > normal heart size. > atherosclerotic aorta. > spondylosis. > stent in left axillary region.']
Labels     : ['chest x ray : - right pneumothorax with pleural effusion, status post drainge tube placement. - extensive subcutaneous emphysema from neck to right side thoracic cage. - increased right hilar opacity. - atherosclerosis and tortuous aorta. - obscured bilateral costophrenic angles. - spondylosis and mild scoliosis of thoracolumbar spine. - oa of right glenohumeral joint. - generalized osteopenia.']

Rouge-2 score on epoch 1: {'Rouge-L-P': 0.23986318233760573, 'Rouge-L-R': 0.19769402915089648, 'Rouge-L-F': 0.19769402915089648, 'Rouge-2-P': 0.1614026540181316, 'Rouge-2-R': 0.13965889175673846, 'Rouge-2-F': 0.13965889175673846}



