In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("bmadushanirodrigo/fracture-multi-region-x-ray-data")

print("Path to dataset files:", path)


Path to dataset files: /root/.cache/kagglehub/datasets/bmadushanirodrigo/fracture-multi-region-x-ray-data/versions/2


In [2]:
import os

In [3]:
os.listdir(path)

['README.dataset.txt', 'Bone_Fracture_Binary_Classification']

In [4]:
os.listdir(os.path.join(path, "Bone_Fracture_Binary_Classification"))

['Bone_Fracture_Binary_Classification']

In [5]:
os.listdir(os.path.join(path, "Bone_Fracture_Binary_Classification","Bone_Fracture_Binary_Classification"))

['test', 'val', 'train']

In [6]:
train_dir=os.path.join(path, "Bone_Fracture_Binary_Classification","Bone_Fracture_Binary_Classification","train")

In [7]:
valid_dir=os.path.join(path, "Bone_Fracture_Binary_Classification","Bone_Fracture_Binary_Classification","val")

In [8]:
test_dir=os.path.join(path, "Bone_Fracture_Binary_Classification","Bone_Fracture_Binary_Classification","test")

In [9]:
!pip install kagglehub

import os
from PIL import Image

def find_corrupted_images(directory):
    corrupted_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                img = Image.open(file_path)
                img.verify()  # Check if the image is corrupted
            except (IOError, SyntaxError) as e:
                corrupted_files.append(file_path)
                print(f"Corrupted file: {file_path}, Error: {e}") # Print the error for debugging
    return corrupted_files

# Assuming 'train_dir', 'valid_dir', and 'test_dir' are already defined
train_corrupted = find_corrupted_images(train_dir)
valid_corrupted = find_corrupted_images(valid_dir)
test_corrupted = find_corrupted_images(test_dir)

# Remove corrupted files (be careful with this!)
# Consider backing up the files before deleting them.
for file_path in train_corrupted + valid_corrupted + test_corrupted:
    # os.remove(file_path)  # Uncomment to remove corrupted files
    print(f"Removed or replaced corrupted file: {file_path}")



In [10]:
import os
from PIL import Image

def remove_corrupted_images(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                img = Image.open(file_path)
                img.verify()  # Verify the file integrity
            except (IOError, SyntaxError):
                print(f"Removing corrupted image: {file_path}")
                os.remove(file_path)  # Remove the corrupted file

# Clean train, validation, and test directories
remove_corrupted_images(train_dir)
remove_corrupted_images(valid_dir)
remove_corrupted_images(test_dir)


In [11]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models
from sklearn.metrics import classification_report, roc_auc_score
import numpy as np


# Define image size and batch size
IMG_SIZE = (299, 299)  # ResNet requires 224x224 images
BATCH_SIZE = 32

# Data augmentation and preprocessing
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
)

valid_datagen = ImageDataGenerator(rescale=1.0/255)
test_datagen = ImageDataGenerator(rescale=1.0/255)

# Load data
train_data = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary"  # Binary classification
)

valid_data = valid_datagen.flow_from_directory(
    valid_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary"  # Binary classification
)

test_data = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary",  # Binary classification
    shuffle=False
)

Found 9246 images belonging to 2 classes.
Found 829 images belonging to 2 classes.
Found 506 images belonging to 2 classes.


In [12]:
!pip install pillow --upgrade # Upgrade to latest version or install if you haven't installed yet
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models
from sklearn.metrics import classification_report, roc_auc_score
import numpy as np
from PIL import Image, ImageFile # Import ImageFile instead of UnImagePlugin

# Define image size and batch size
IMG_SIZE = (299, 299)  # ResNet requires 224x224 images
BATCH_SIZE = 32

# Data augmentation and preprocessing
# ... (rest of your code)

# Load data, incorporating error handling for corrupted images
def load_data_with_error_handling(directory, datagen, target_size, batch_size, class_mode, shuffle=True):
    """
    Loads data from a directory, handling corrupted image files gracefully.
    """
    ImageFile.LOAD_TRUNCATED_IMAGES = True # Set ImageFile.LOAD_TRUNCATED_IMAGES to True
    i = 0
    while True:
        try:
            data_generator = datagen.flow_from_directory(
                directory,
                target_size=target_size,
                batch_size=batch_size,
                class_mode=class_mode,
                shuffle=shuffle
            )
            # If this point is reached, data loading was successful
            break  # Exit the loop
        except Exception as e:  # Handle generic exceptions during data loading
            i += 1
            print(f"Error encountered: {e}. Attempt {i}. Retrying...")
            if i >= 5:  # Maximum retry attempts
                print("Maximum retry attempts reached. Aborting data loading.")
                raise  # Re-raise the exception if max attempts are reached
            # Handle the corrupted file or error appropriately
            # E.g., log the error, skip the file, or replace it with a placeholder
    return data_generator
train_data = load_data_with_error_handling(train_dir, train_datagen, IMG_SIZE, BATCH_SIZE, "binary")
valid_data = load_data_with_error_handling(valid_dir, valid_datagen, IMG_SIZE, BATCH_SIZE, "binary")
test_data = load_data_with_error_handling(test_dir, test_datagen, IMG_SIZE, BATCH_SIZE, "binary", shuffle=False)
# ... (rest of your code)


Found 9246 images belonging to 2 classes.
Found 829 images belonging to 2 classes.
Found 506 images belonging to 2 classes.


In [13]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras import layers, models

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
checkpoint = ModelCheckpoint('inception_best_model.keras', save_best_only=True)

# Define the model
# Load pre-trained InceptionV3 model
base_model = InceptionV3(weights="imagenet", include_top=False, input_shape=(299, 299, 3))
base_model.trainable = False  # Freeze base layers

# Build the model
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(1, activation="sigmoid")  # Sigmoid for binary classification
])

# Compile the model
model.compile(
    optimizer='adam',  # You can choose a different optimizer if needed
    loss='binary_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    train_data,
    validation_data=valid_data,
    epochs=10,
    steps_per_epoch=len(train_data),  # Or a fixed value
    validation_steps=len(valid_data),  # Or a fixed value
)


Epoch 1/10


  self._warn_if_super_not_called()


[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 728ms/step - accuracy: 0.6817 - loss: 0.6281



[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 810ms/step - accuracy: 0.6820 - loss: 0.6277 - val_accuracy: 0.8504 - val_loss: 0.3653
Epoch 2/10
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/10


  self.gen.throw(typ, value, traceback)


[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 702ms/step - accuracy: 0.8186 - loss: 0.4000 - val_accuracy: 0.8637 - val_loss: 0.3874
Epoch 4/10
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/10
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 699ms/step - accuracy: 0.8621 - loss: 0.3179 - val_accuracy: 0.8890 - val_loss: 0.2655
Epoch 6/10
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 7/10
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 696ms/step - accuracy: 0.8857 - loss: 0.2846 - val_accuracy: 0.8951 - val_loss: 0.2389
Epoch 8/10
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 9/10
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 699ms/step - accuracy: 0.8872 - loss: 0.2652 - 