In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'mura-v11:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F427555%2F813639%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240513%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240513T200523Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D8e5668a8f425067a1312dbc5a0449de71cc8d62ad0df2e73901ff04c21eea29377fc82c17245a2dce9154b714acdaccf157ceb10eb34642881834cc2f00020e6160f80a0038d2dcfc38ef794be9fb91cdcc3da25241ec47844cd35723d3569577b8c6a84110044dbdad241de545a54db22e161a280b6ec1014ab4997a309d6806af52e6b4607686f9df0704580593bd32c0b11b04b515d2839e94ac14d5b5ada4d6249ce59385e10fb3c16de115ee6abc788cf98720b7e6a99f2e45c547d8180a5352c101cab4ef86ed2669e6626cc556ede12ad8f61bb644cb15a72f68196064715ec85570c13e36e4007e2f1a702a431bd71b9eb11755e623d9f8e62d03d29'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading mura-v11, 3374985543 bytes compressed

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader

import torchvision
from torchvision import transforms, models

import numpy as np
import matplotlib.pyplot as plt
import random
import pandas as pd

from PIL import Image
from pathlib import Path

import os

In [None]:
# Set Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

directory_path = Path('/kaggle/input/mura-v11/')
os.chdir(str(Path(directory_path)))

cuda


In [None]:
# Set Path
data_path = Path('MURA-v1.1')

train_df = pd.read_csv(data_path/'train_image_paths.csv', names=['image_path'])
valid_df = pd.read_csv(data_path/'valid_image_paths.csv', names=['image_path'])
trainlabel_df = pd.read_csv(data_path/'train_labeled_studies.csv', names=['image_path', 'label'])
validlabel_df = pd.read_csv(data_path/'valid_labeled_studies.csv', names=['image_path', 'label'])


In [None]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_path = self.df.iloc[idx, 0]
        image = Image.open(image_path).convert('LA')
        label = 1 if 'positive' in image_path else 0

        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.456], std=[0.224]),
])

valid_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.456], std=[0.224]),
])
study_types = ['XR_ELBOW', 'XR_FINGER', 'XR_FOREARM', 'XR_HAND', 'XR_HUMERUS', 'XR_SHOULDER', 'XR_WRIST']
study_train_df, study_valid_df, train_dataset, valid_dataset, train_dataloader, valid_dataloader = {}, {}, {}, {}, {}, {}


for study_type in study_types:
    study_train_df[study_type] = train_df[train_df['image_path'].str.contains(study_type)]
    study_valid_df[study_type] = valid_df[valid_df['image_path'].str.contains(study_type)]

    train_dataset[study_type] = Dataset(study_train_df[study_type], transform=train_transform)
    valid_dataset[study_type] = Dataset(study_valid_df[study_type], transform=valid_transform)

    train_dataloader[study_type] = DataLoader(train_dataset[study_type], batch_size=12, shuffle=True)
    valid_dataloader[study_type] = DataLoader(valid_dataset[study_type], batch_size=12, shuffle=False)

In [None]:
for study_type in study_types:
    print(study_type, len(train_dataset[study_type]), len(valid_dataset[study_type]))
    for batch in train_dataloader[study_type]:
        print(batch[0].shape, batch[1].shape)
        break

XR_ELBOW 4931 465
torch.Size([8, 2, 224, 224]) torch.Size([8])
XR_FINGER 5106 461
torch.Size([8, 2, 224, 224]) torch.Size([8])
XR_FOREARM 1825 301
torch.Size([8, 2, 224, 224]) torch.Size([8])
XR_HAND 5543 460
torch.Size([8, 2, 224, 224]) torch.Size([8])
XR_HUMERUS 1272 288
torch.Size([8, 2, 224, 224]) torch.Size([8])
XR_SHOULDER 8379 563
torch.Size([8, 2, 224, 224]) torch.Size([8])
XR_WRIST 9752 659
torch.Size([8, 2, 224, 224]) torch.Size([8])


In [None]:
fig, axs = plt.subplots(7, 3, figsize=(15, 15))
for study_type in study_types:
    indices = random.sample(range(len(train_dataset[study_type])), 3)
    for i, idx in enumerate(indices):
        image_path = study_train_df[study_type].iloc[idx]['image_path']
        image = Image.open(image_path).convert('L')
        rows = study_types.index(study_type)
        cols = i
        axs[rows, cols].imshow(image)
        axs[rows, cols].axis('off')
        axs[rows, cols].set_title(study_type)

plt.show()

In [None]:
# U-NET Architecture
class conv_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.sequential = nn.Sequential(
            nn.Conv2d(in_c, out_c, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_c),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_c, out_c, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_c),
            nn.ReLU(inplace=True)
        )
    def forward(self, x):
        return self.sequential(x)

class encoder_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.conv = conv_block(in_c, out_c)
        self.pool = nn.MaxPool2d((2, 2))

    def forward(self, inputs):
        x = self.conv(inputs)
        p = self.pool(x)
        return x, p

class decoder_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_c, out_c, kernel_size=2, stride=2, padding=0)
        self.conv = conv_block(out_c+out_c, out_c)

    def forward(self, inputs, skip):
        x = self.up(inputs)
        concat = torch.cat([x, skip], axis=1)
        x = self.conv(concat)
        return x

class build_unet(nn.Module):
    def __init__(self):
        super().__init__()
        self.enc1 = encoder_block(2, 64)
        self.enc2 = encoder_block(64, 128)
        self.enc3 = encoder_block(128, 256)
        self.enc4 = encoder_block(256, 512)

        self.mid = conv_block(512, 1024)

        self.dec1 = decoder_block(1024, 512)
        self.dec2 = decoder_block(512, 256)
        self.dec3 = decoder_block(256, 128)
        self.dec4 = decoder_block(128, 64)

        self.last = nn.Conv2d(64, 1, kernel_size=1, padding=0)

    def forward(self, inputs):
        enc1, p1 = self.enc1(inputs)
        enc2, p2 = self.enc2(p1)
        enc3, p3 = self.enc3(p2)
        enc4, p4 = self.enc4(p3)

        mid = self.mid(p4)

        dec1 = self.dec1(mid, enc4)
        dec2 = self.dec2(dec1, enc3)
        dec3 = self.dec3(dec2, enc2)
        dec4 = self.dec4(dec3, enc1)

        outputs = self.last(dec4)

        return outputs

In [None]:
model2 = build_unet()
model2.to(device)

from torchinfo import summary

build_unet(
  (enc1): encoder_block(
    (conv): conv_block(
      (sequential): Sequential(
        (0): Conv2d(2, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): ReLU(inplace=True)
      )
    )
    (pool): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (enc2): encoder_block(
    (conv): conv_block(
      (sequential): Sequential(
        (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (4): Batc

In [None]:
def train(model, train_dataloader, criterion, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_dataloader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        output = output[:, 0, 0, 0]
        target = target.float()
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    return loss.item()

In [None]:
def validate(model, val_dataloader, criterion):
    model.eval()
    val_loss = 0
    total = 0
    correct = 0
    with torch.no_grad():
        for data, target in val_dataloader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            output = output[:, 0, 0, 0]
            target = target.float()
            loss = criterion(output, target)
            val_loss += loss.item()
            predicted = (output >= 0.5).float()
            total += target.size(0)
            correct += (predicted == target).sum().item()
    val_loss /= len(val_dataloader)
    accuracy = (correct / total) * 100
    return val_loss, accuracy


In [None]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model2.parameters(), lr=0.001)
num_epochs = 50

study_types = ['XR_WRIST', 'XR_FINGER', 'XR_FOREARM', 'XR_HAND', 'XR_HUMERUS', 'XR_SHOULDER', 'XR_FOOT', 'XR_ELBOW']
study_type = study_types[0]

for epoch in range(num_epochs):
    train_loss = train(model2, train_dataloader[study_type], criterion, optimizer)
    validation_loss, accuracy = validate(model2, valid_dataloader[study_type], criterion)
    print(f'Epoch {epoch + 1}/{num_epochs}, '
          f'Training Loss: {train_loss:.4f}, '
          f'Validation Loss: {validation_loss:.4f}, '
          f'Validation Accuracy: {accuracy:.4f}')
    print('Training Complete')

In [None]:
torch.cuda.empty_cache()