# Chest X-ray Image Report Generation (CXIRG)

## Install Required Modules

In [1]:
!pip install openpyxl
!pip install pandas
!pip install pillow
!pip install pytorch-ignite
!pip install scikit-learn
!pip install torch
!pip install transformers

Collecting openpyxl
  Downloading openpyxl-3.1.4-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-1.1.0-py3-none-any.whl.metadata (1.8 kB)
Downloading openpyxl-3.1.4-py2.py3-none-any.whl (251 kB)
   ---------------------------------------- 0.0/251.4 kB ? eta -:--:--
   - -------------------------------------- 10.2/251.4 kB ? eta -:--:--
   ------ -------------------------------- 41.0/251.4 kB 393.8 kB/s eta 0:00:01
   ----------- --------------------------- 71.7/251.4 kB 491.5 kB/s eta 0:00:01
   --------------------- ---------------- 143.4/251.4 kB 774.0 kB/s eta 0:00:01
   ------------------------------ ------- 204.8/251.4 kB 888.4 kB/s eta 0:00:01
   -------------------------------------- 251.4/251.4 kB 964.5 kB/s eta 0:00:00
Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.4
Collecting pytorch-ignite
  Using cached pytor

## Import Required Modules

In [1]:
import os
import random
import torch

import numpy as np
import pandas as pd
import torch.nn as nn

from ignite.metrics import Rouge
from pandas.core.common import random_state
from PIL import Image
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from transformers import VisionEncoderDecoderModel, AutoTokenizer, AutoModel
from typing import Any, Dict, List

  from .autonotebook import tqdm as notebook_tqdm


## Set The Random Seed

In [2]:
seed = 48763

np.random.seed(seed=seed, )

random_state(state=seed, )

random.seed(a=seed, )

torch.manual_seed(seed=seed, )
torch.cuda.manual_seed(seed=seed, )
torch.cuda.manual_seed_all(seed=seed, )
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

## Set The Device & Initialize Models

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained("keremberke/yolov8m-chest-xray-classification", "medicalai/ClinicalBERT").to(device)

tokenizer = AutoTokenizer.from_pretrained("medicalai/ClinicalBERT")

print(tokenizer.all_special_tokens)

tokenizer.add_special_tokens({
    "bos_token": "<|beginoftext|>",
    "pad_token": "<|pad|>",
})

model.config.decoder_start_token_id = tokenizer.bos_token_id
model.config.pad_token_id = tokenizer.pad_token_id

model.decoder.resize_token_embeddings(len(tokenizer))

processor = .from_pretrained("keremberke/yolov8m-chest-xray-classification")

Some weights of ViTModel were not initialized from the model checkpoint at nickmuchi/vit-finetuned-chest-xray-pneumonia and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of GPT2LMHeadModel were not initialized from the model checkpoint at openai-community/gpt2 and are newly initialized: ['h.0.crossattention.c_attn.bias', 'h.0.crossattention.c_attn.weight', 'h.0.crossattention.c_proj.bias', 'h.0.crossattention.c_proj.weight', 'h.0.crossattention.q_attn.bias', 'h.0.crossattention.q_attn.weight', 'h.0.ln_cross_attn.bias', 'h.0.ln_cross_attn.weight', 'h.1.crossattention.c_attn.bias', 'h.1.crossattention.c_attn.weight', 'h.1.crossattention.c_proj.bias', 'h.1.crossattention.c_proj.weight', 'h.1.crossattention.q_attn.bias', 'h.1.crossattention.q_attn.weight', 'h.1.ln_cross_attn.bias', 'h.1.ln_cross_attn.weight', 'h.10.crossattention.c_attn.b

['<|endoftext|>']


## The CXIRG Dataset

In [5]:
class CXIRGDataset(Dataset):
    def __init__(self, data: List[Dict[str, Any]]) -> None:
        super(CXIRGDataset, self).__init__()
        self.data = data

    def __getitem__(self, index: int) -> Dict[str, Any]:
        return self.data[index]

    def __len__(self) -> int:
        return len(self.data)

## The Collate Function for The DataLoader

In [7]:
max_length = 256

def train_dl_collate_fn(one_batch_data: List[Dict[str, Any]]):
    names = [one_data["name"] for one_data in one_batch_data]

    pixel_values = processor(
        images=[one_data["image"] for one_data in one_batch_data], 
        return_tensors="pt"
    ).pixel_values

    label_ids = tokenizer.batch_encode_plus(
        batch_text_or_text_pairs=[
            (tokenizer.bos_token + one_data["text"] + tokenizer.eos_token) for one_data in one_batch_data
        ],
        max_length=max_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    ).input_ids

    return names, pixel_values.to(device), label_ids.to(device)

def valid_dl_collate_fn(one_batch_data: List[Dict[str, Any]]):
    names = [one_data["name"] for one_data in one_batch_data]

    pixel_values = processor(
        images=[one_data["image"] for one_data in one_batch_data], 
        return_tensors="pt"
    ).pixel_values

    label_ids = tokenizer.batch_encode_plus(
        batch_text_or_text_pairs=[
            (tokenizer.bos_token + one_data["text"] + tokenizer.eos_token) for one_data in one_batch_data
        ],
        max_length=max_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    ).input_ids

    return names, pixel_values.to(device), label_ids.to(device)

## Load The Train & Validation Data

In [8]:
train_data = []

report_path = "CXIRG_Data/train_data/reports.xlsx"
report_df = pd.read_excel(report_path)

image_dir_path = "CXIRG_Data/train_data/images"
for image_name in os.listdir(image_dir_path):
    image = Image.open(os.path.join(image_dir_path, image_name))
    if image.mode != "RGB":
        image = image.convert("RGB")

    text = report_df[report_df["name"] == image_name[:13]]["text"].values[0].replace("_x000D_", "\r")

    train_data.append({
        "name": image_name[:13],
        "image": image,
        "text": text
    })

train_dataset = CXIRGDataset(train_data)

In [10]:
valid_data = []

report_path = "CXIRG_Data/valid_data/reports.xlsx"
report_df = pd.read_excel(report_path)

image_dir_path = "CXIRG_Data/valid_data/images"
for image_name in os.listdir(image_dir_path):
    image = Image.open(os.path.join(image_dir_path, image_name))
    if image.mode != "RGB":
        image = image.convert("RGB")

    text = report_df[report_df["name"] == image_name[:13]]["text"].values[0].replace("_x000D_", "\r")

    valid_data.append({
        "name": image_name[:13],
        "image": image,
        "text": text
    })

valid_dataset = CXIRGDataset(valid_data)

## Set The Hyperparameters & Initialize The Optimizer, Dataloaders and Evaluation Metric

In [11]:
lr = 1e-5
epochs = 30
optimizer = AdamW(params=model.parameters(), lr=lr)

train_batch_size = 1
valid_batch_size = 1
train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=train_batch_size,
    shuffle=True,
    collate_fn=train_dl_collate_fn
)
valid_dataloader = DataLoader(
    dataset=valid_dataset,
    batch_size=valid_batch_size,
    shuffle=False,
    collate_fn=valid_dl_collate_fn
)

rouge = Rouge(variants=["L", 2], multiref="best")

## The Evaluation Function

In [12]:
def evaluate(model: VisionEncoderDecoderModel, epoch: int) -> Dict[str, float]:
    model.eval()

    pbar = tqdm(valid_dataloader)
    pbar.set_description(f"Evaluting Epoch: {epoch + 1}")

    with torch.no_grad():
        for names, pixel_values, label_ids in pbar:
            predictions = model.generate(pixel_values)

            _predictions = tokenizer.batch_decode(
                predictions,
                skip_special_tokens=True
            )

            _labels = tokenizer.batch_decode(
                label_ids,
                skip_special_tokens=True
            )

            print(f"Names      : {names}")
            print(f"Predictions: {_predictions}")
            print(f"Labels     : {_labels}")
            print()

            for _prediction, _label in zip(_predictions, _labels):
                split_prediction = _prediction.split()
                split_label = _label.split()

                rouge.update(([split_prediction], [[split_label]]))

    return rouge.compute()

In [13]:
min_loss = float("inf")

for epoch in range(epochs):
    model.train()

    pbar = tqdm(train_dataloader)
    pbar.set_description(f"Training Epoch [{epoch + 1} / {epochs}]")

    lost_list = []
    average_loss = 0

    for _, pixel_values, label_ids in pbar:
        optimizer.zero_grad()

        loss = model(
            pixel_values=pixel_values,
            labels=label_ids
        ).loss

        lost_list.append(loss.item())
        average_loss = np.mean(np.array(lost_list))
        loss.backward()
        optimizer.step()

        pbar.set_postfix(loss=average_loss)

    if average_loss < min_loss:
        min_loss = average_loss
        torch.save(model, f"outputs/best_checkpoint.pt")

    print(f"Rouge-2 score on epoch {epoch}:", evaluate(model=model, epoch=epoch))

Training Epoch [1 / 30]:  87%|████████▋ | 77/89 [06:53<01:04,  5.37s/it, loss=6.41]


KeyboardInterrupt: 