In [14]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, concatenate, Dropout
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Load the Excel file
excel_path = 'C:/Users/eunjun/hemorrhage_diagnosis1.xlsx'
df = pd.read_excel(excel_path)

# Function to preprocess the image
def preprocess_image(image_path, target_size):
    try:
        img = load_img(image_path, target_size=target_size)
        img_array = img_to_array(img)
        return img_array
    except Exception as e:
        print(f"Error loading image {image_path}: {e}")
        return None

# Parameters
IMG_SIZE = (128, 128)
brain_images = []
bone_images = []
labels = []
image_paths = []

# Load images and labels
image_dir = 'C:/Users/eunjun/[2] tomography-images-for-intracranial-hemorrhage-detection-and-segmentation/Patients_CT'
for index, row in df.iterrows():
    patient_number = row['PatientNumber']
    if patient_number > 100:
        continue
    
    slice_number = row['SliceNumber']
    
    brain_image_path = os.path.join(image_dir, str(patient_number), 'brain', f'{slice_number}.jpg')
    bone_image_path = os.path.join(image_dir, str(patient_number), 'bone', f'{slice_number}.jpg')
    
    if os.path.exists(brain_image_path) and os.path.exists(bone_image_path):
        if 'HGE' not in brain_image_path:  # Exclude images with 'HGE' in the filename
            brain_image = preprocess_image(brain_image_path, IMG_SIZE)
            bone_image = preprocess_image(bone_image_path, IMG_SIZE)
            if brain_image is not None and bone_image is not None:
                label = row[['Intraventricular', 'Intraparenchymal', 'Subarachnoid', 'Epidural', 'Subdural', 'No_Hemorrhage', 'Fracture_Yes_No']].values
                
                brain_images.append(brain_image)
                bone_images.append(bone_image)
                labels.append(label)
                image_paths.append((brain_image_path, bone_image_path))

# Convert lists to numpy arrays and ensure they have the same length
min_length = min(len(brain_images), len(bone_images), len(labels))
brain_images = np.array(brain_images[:min_length])
bone_images = np.array(bone_images[:min_length])
labels = np.array(labels[:min_length])
image_paths = image_paths[:min_length]

# Normalize images
brain_images = brain_images / 255.0
bone_images = bone_images / 255.0

# Check class distribution
print("Number of brain images:", len(brain_images))
print("Number of bone images:", len(bone_images))
print("Number of labels:", len(labels))

# Convert labels to float32 numpy arrays
labels = np.array(labels, dtype=np.float32)

if len(brain_images) > 0 and len(bone_images) > 0 and len(labels) > 0:
    # Split the data into training (70%), validation (20%), and testing (10%) sets
    X_train_brain, X_temp_brain, X_train_bone, X_temp_bone, y_train, y_temp, train_paths, temp_paths = train_test_split(
        brain_images, bone_images, labels, image_paths, test_size=0.3, random_state=42)
    
    X_val_brain, X_test_brain, X_val_bone, X_test_bone, y_val, y_test, val_paths, test_paths = train_test_split(
        X_temp_brain, X_temp_bone, y_temp, temp_paths, test_size=(1/3), random_state=42)

    # Print test paths
    print("Test Paths:")
    for brain_path, bone_path in test_paths:
        print(f"Brain: {brain_path}, Bone: {bone_path}")

    # Define custom loss function with class weights
    def weighted_binary_crossentropy(y_true, y_pred):
        weights = np.array([5, 3, 5, 2, 3, 0.5, 0.5])
        weights = tf.constant(weights, dtype='float32')
        bce = tf.keras.losses.binary_crossentropy(y_true, y_pred)
        weighted_bce = bce * weights
        return tf.reduce_mean(weighted_bce, axis=-1)
        
    # Build the CNN model for brain images
    input_brain = Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
    x1 = Conv2D(16, (3, 3), activation='relu')(input_brain)
    x1 = MaxPooling2D((2, 2))(x1)
    x1 = Conv2D(32, (3, 3), activation='relu')(x1)
    x1 = MaxPooling2D((2, 2))(x1)
    x1 = Flatten()(x1)

    # Build the CNN model for bone images
    input_bone = Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
    x2 = Conv2D(16, (3, 3), activation='relu')(input_bone)
    x2 = MaxPooling2D((2, 2))(x2)
    x2 = Conv2D(32, (3, 3), activation='relu')(x2)
    x2 = MaxPooling2D((2, 2))(x2)
    x2 = Flatten()(x2)

    # Combine the outputs
    combined = concatenate([x1, x2])
    combined = Dense(64, activation='relu')(combined)
    combined = Dropout(0.5)(combined)
    output = Dense(7, activation='sigmoid')(combined)  # Change to sigmoid for multi-label classification

    model = Model(inputs=[input_brain, input_bone], outputs=output)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    model.summary()

    # Train the model
    model.fit([X_train_brain, X_train_bone], y_train, epochs=20, batch_size=32, validation_data=([X_val_brain, X_val_bone], y_val))

    # Save the model
    model.save('intracranial_hemorrhage_model.h5')

Number of brain images: 1583
Number of bone images: 1583
Number of labels: 1583
Test Paths:
Brain: C:/Users/eunjun/[2] tomography-images-for-intracranial-hemorrhage-detection-and-segmentation/Patients_CT\62\brain\3.jpg, Bone: C:/Users/eunjun/[2] tomography-images-for-intracranial-hemorrhage-detection-and-segmentation/Patients_CT\62\bone\3.jpg
Brain: C:/Users/eunjun/[2] tomography-images-for-intracranial-hemorrhage-detection-and-segmentation/Patients_CT\89\brain\3.jpg, Bone: C:/Users/eunjun/[2] tomography-images-for-intracranial-hemorrhage-detection-and-segmentation/Patients_CT\89\bone\3.jpg
Brain: C:/Users/eunjun/[2] tomography-images-for-intracranial-hemorrhage-detection-and-segmentation/Patients_CT\51\brain\7.jpg, Bone: C:/Users/eunjun/[2] tomography-images-for-intracranial-hemorrhage-detection-and-segmentation/Patients_CT\51\bone\7.jpg
Brain: C:/Users/eunjun/[2] tomography-images-for-intracranial-hemorrhage-detection-and-segmentation/Patients_CT\56\brain\7.jpg, Bone: C:/Users/eunjun

Epoch 1/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 201ms/step - accuracy: 0.7205 - loss: 0.3577 - val_accuracy: 0.7753 - val_loss: 0.2046
Epoch 2/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 192ms/step - accuracy: 0.8168 - loss: 0.2051 - val_accuracy: 0.8006 - val_loss: 0.1780
Epoch 3/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 193ms/step - accuracy: 0.8093 - loss: 0.1851 - val_accuracy: 0.7943 - val_loss: 0.1508
Epoch 4/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 191ms/step - accuracy: 0.8299 - loss: 0.1423 - val_accuracy: 0.8133 - val_loss: 0.1424
Epoch 5/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 201ms/step - accuracy: 0.8623 - loss: 0.1210 - val_accuracy: 0.8196 - val_loss: 0.1215
Epoch 6/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 182ms/step - accuracy: 0.8413 - loss: 0.1122 - val_accuracy: 0.8544 - val_loss: 0.1146
Epoch 7/20
[1m35/35[0m [3



In [29]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Load the Excel file
excel_path = 'C:/Users/eunjun/hemorrhage_diagnosis1.xlsx'
df = pd.read_excel(excel_path)

# Function to preprocess the image
def preprocess_image(image_path, target_size):
    try:
        img = load_img(image_path, target_size=target_size)
        img_array = img_to_array(img)
        return img_array
    except Exception as e:
        print(f"Error loading image {image_path}: {e}")
        return None

# Parameters
IMG_SIZE = (128, 128)
brain_images = []
bone_images = []
labels = []
image_paths = []

# Load images and labels
image_dir = 'C:/Users/eunjun/[2] tomography-images-for-intracranial-hemorrhage-detection-and-segmentation/Patients_CT'
for index, row in df.iterrows():
    patient_number = row['PatientNumber']
    slice_number = row['SliceNumber']
    
    brain_image_path = os.path.join(image_dir, str(patient_number), 'brain', f'{slice_number}.jpg')
    bone_image_path = os.path.join(image_dir, str(patient_number), 'bone', f'{slice_number}.jpg')
    
    if os.path.exists(brain_image_path) and os.path.exists(bone_image_path):
        if 'HGE' not in brain_image_path:  # Exclude images with 'HGE' in the filename
            brain_image = preprocess_image(brain_image_path, IMG_SIZE)
            bone_image = preprocess_image(bone_image_path, IMG_SIZE)
            if brain_image is not None and bone_image is not None:
                label = row[['Intraventricular', 'Intraparenchymal', 'Subarachnoid', 'Epidural', 'Subdural', 'No_Hemorrhage', 'Fracture_Yes_No']].values
                
                brain_images.append(brain_image)
                bone_images.append(bone_image)
                labels.append(label)
                image_paths.append((brain_image_path, bone_image_path))

# Convert lists to numpy arrays and ensure they have the same length
min_length = min(len(brain_images), len(bone_images), len(labels))
brain_images = np.array(brain_images[:min_length])
bone_images = np.array(bone_images[:min_length])
labels = np.array(labels[:min_length])
image_paths = image_paths[:min_length]

# Normalize images
brain_images = brain_images / 255.0
bone_images = bone_images / 255.0

# Convert labels to float32 numpy arrays
labels = np.array(labels, dtype=np.float32)

# Split the data into training (70%), validation (20%), and testing (10%) sets
X_train_brain, X_temp_brain, X_train_bone, X_temp_bone, y_train, y_temp, train_paths, temp_paths = train_test_split(
    brain_images, bone_images, labels, image_paths, test_size=0.3, random_state=42)

X_val_brain, X_test_brain, X_val_bone, X_test_bone, y_val, y_test, val_paths, test_paths = train_test_split(
    X_temp_brain, X_temp_bone, y_temp, temp_paths, test_size=(1/3), random_state=42)

# Load the pre-trained model
model = load_model('intracranial_hemorrhage_model.h5')

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate([X_test_brain, X_test_bone], y_test)
print(f"Test Accuracy: {test_accuracy}, Test Loss: {test_loss}")

# Predict on the test set
predicted_labels = model.predict([X_test_brain, X_test_bone])
predicted_labels = (predicted_labels > 0.5).astype(int)

# Helper function to convert numpy array to a string without dots
def array_to_string(array):
    return str(array).replace('.', '')

# Create a DataFrame to compare actual and predicted labels
comparison = pd.DataFrame({
    'PatientNumber': [path[0].split('/')[-3] for path in test_paths],
    'SliceNumber': [path[0].split('/')[-1].split('.')[0] for path in test_paths],
    'Actual': [array_to_string(label) for label in y_test],
    'Predicted': [array_to_string(label) for label in predicted_labels]
})

# Save the comparison to a CSV file
comparison.to_csv('prediction_comparison.csv', index=False)

# Save the comparison to an Excel file
comparison.to_excel('prediction_comparison.xlsx', index=False)

print(comparison.head())



[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 0.8760 - loss: 0.1151
Test Accuracy: 0.8840000033378601, Test Loss: 0.09368744492530823




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 66ms/step
  PatientNumber               SliceNumber           Actual        Predicted
0        eunjun  Patients_CT\113\brain\21  [0 0 0 0 0 1 0]  [0 0 0 0 0 1 0]
1        eunjun   Patients_CT\63\brain\10  [0 0 0 0 0 1 0]  [0 0 0 0 0 1 0]
2        eunjun  Patients_CT\112\brain\11  [0 0 0 0 0 1 0]  [0 0 0 1 0 0 0]
3        eunjun    Patients_CT\85\brain\5  [0 0 0 0 0 1 0]  [0 0 0 0 0 1 0]
4        eunjun  Patients_CT\104\brain\27  [0 0 0 0 0 1 0]  [0 0 0 0 0 1 0]


In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the provided CSV file
file_path = r'C:\Users\eunjun\[2] tomography-images-for-intracranial-hemorrhage-detection-and-segmentation/prediction_comparison.xlsx'
df = pd.read_csv(file_path)

# Fix the invalid syntax issue by adding commas in the arrays
def fix_array_syntax(array_string):
    return array_string.replace(' ', ', ')

# Apply the fix to both Actual and Predicted columns
df['Actual'] = df['Actual'].apply(fix_array_syntax).apply(eval)
df['Predicted'] = df['Predicted'].apply(fix_array_syntax).apply(eval)

# Convert the lists of lists into numpy arrays
actual_array = np.array(df['Actual'].tolist())
predicted_array = np.array(df['Predicted'].tolist())

# Define the labels
labels = ['Intraventricular', 'Intraparenchymal', 'Subarachnoid', 'Epidural', 'Subdural', 'No_Hemorrhage', 'Fracture_Yes_No']

# Define a function to count occurrences for individual conditions
def count_individual_occurrences(actual, predicted):
    counts = np.zeros((8, 8), dtype=int)  # 7 conditions + 1 for false positives/negatives
    for a, p in zip(actual, predicted):
        for i in range(len(a)):
            counts[i, i] += (a[i] == 1 and p[i] == 1)
            counts[i, -1] += (a[i] == 1 and p[i] == 0)
            counts[-1, i] += (a[i] == 0 and p[i] == 1)
    return counts

# Count occurrences for individual conditions
occurrence_matrix = count_individual_occurrences(actual_array, predicted_array)

# Create a heatmap with actual values on the y-axis and predicted values on the x-axis
plt.figure(figsize=(10, 8))
sns.heatmap(occurrence_matrix, annot=True, fmt="d", cmap="YlGnBu", xticklabels=labels + ['False Negative'], yticklabels=labels + ['False Positive'])
plt.title('Actual vs Predicted Values Heatmap')
plt.xlabel('Predicted Condition')
plt.ylabel('Actual Condition')
plt.show()

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xbe in position 12: invalid start byte

In [28]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Load the Excel file
excel_path = 'C:/Users/eunjun/hemorrhage_diagnosis1.xlsx'
df = pd.read_excel(excel_path)

# Function to preprocess the image
def preprocess_image(image_path, target_size):
    try:
        img = load_img(image_path, target_size=target_size)
        img_array = img_to_array(img)
        return img_array
    except Exception as e:
        print(f"Error loading image {image_path}: {e}")
        return None

# Parameters
IMG_SIZE = (128, 128)
brain_images = []
bone_images = []
labels = []
image_paths = []

# Load images and labels
image_dir = 'C:/Users/eunjun/[2] tomography-images-for-intracranial-hemorrhage-detection-and-segmentation/Patients_CT'
for index, row in df.iterrows():
    patient_number = row['PatientNumber']
    slice_number = row['SliceNumber']
    
    brain_image_path = os.path.join(image_dir, str(patient_number), 'brain', f'{slice_number}.jpg')
    bone_image_path = os.path.join(image_dir, str(patient_number), 'bone', f'{slice_number}.jpg')
    
    if os.path.exists(brain_image_path) and os.path.exists(bone_image_path):
        if 'HGE' not in brain_image_path:  # Exclude images with 'HGE' in the filename
            brain_image = preprocess_image(brain_image_path, IMG_SIZE)
            bone_image = preprocess_image(bone_image_path, IMG_SIZE)
            if brain_image is not None and bone_image is not None:
                label = row[['Intraventricular', 'Intraparenchymal', 'Subarachnoid', 'Epidural', 'Subdural', 'No_Hemorrhage', 'Fracture_Yes_No']].values
                
                brain_images.append(brain_image)
                bone_images.append(bone_image)
                labels.append(label)
                image_paths.append((brain_image_path, bone_image_path))

# Convert lists to numpy arrays and ensure they have the same length
min_length = min(len(brain_images), len(bone_images), len(labels))
brain_images = np.array(brain_images[:min_length])
bone_images = np.array(bone_images[:min_length])
labels = np.array(labels[:min_length])
image_paths = image_paths[:min_length]

# Normalize images
brain_images = brain_images / 255.0
bone_images = bone_images / 255.0

# Convert labels to float32 numpy arrays
labels = np.array(labels, dtype=np.float32)

# Split the data into training (70%), validation (20%), and testing (10%) sets
X_train_brain, X_temp_brain, X_train_bone, X_temp_bone, y_train, y_temp, train_paths, temp_paths = train_test_split(
    brain_images, bone_images, labels, image_paths, test_size=0.3, random_state=42)

X_val_brain, X_test_brain, X_val_bone, X_test_bone, y_val, y_test, val_paths, test_paths = train_test_split(
    X_temp_brain, X_temp_bone, y_temp, temp_paths, test_size=(1/3), random_state=42)

# Load the pre-trained model
model = load_model('intracranial_hemorrhage_model.h5')

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate([X_test_brain, X_test_bone], y_test)
print(f"Test Accuracy: {test_accuracy}, Test Loss: {test_loss}")

# Predict on the test set
predicted_labels = model.predict([X_test_brain, X_test_bone])
predicted_labels = (predicted_labels > 0.5).astype(int)

# Helper function to convert numpy array to a string without dots
def array_to_string(array):
    return str(array).replace('.', '')

# Create a DataFrame to compare actual and predicted labels
comparison = pd.DataFrame({
    'PatientNumber': [path[0].split('/')[-3] for path in test_paths],
    'SliceNumber': [path[0].split('/')[-1].split('.')[0] for path in test_paths],
    'Actual': [array_to_string(label) for label in y_test],
    'Predicted': [array_to_string(label) for label in predicted_labels]
})

# Save the comparison to a CSV file
comparison.to_csv('prediction_comparison.csv', index=False)

# Save the comparison to an Excel file
comparison.to_excel('prediction_comparison.xlsx', index=False)

print(comparison.head())



[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step - accuracy: 0.8760 - loss: 0.1151
Test Accuracy: 0.8840000033378601, Test Loss: 0.09368744492530823




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step
  PatientNumber               SliceNumber           Actual        Predicted
0        eunjun  Patients_CT\113\brain\21  [0 0 0 0 0 1 0]  [0 0 0 0 0 1 0]
1        eunjun   Patients_CT\63\brain\10  [0 0 0 0 0 1 0]  [0 0 0 0 0 1 0]
2        eunjun  Patients_CT\112\brain\11  [0 0 0 0 0 1 0]  [0 0 0 1 0 0 0]
3        eunjun    Patients_CT\85\brain\5  [0 0 0 0 0 1 0]  [0 0 0 0 0 1 0]
4        eunjun  Patients_CT\104\brain\27  [0 0 0 0 0 1 0]  [0 0 0 0 0 1 0]
