In [None]:

import zipfile
import os
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications.resnet_v2 import ResNet152V2, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
zip_file_path = '/content/drive/MyDrive/kidney_refined_img.zip'
csv_file_path = '/content/drive/MyDrive/Coldsheet.csv'

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall('/content/drive/MyDrive/kidney_refined_img')

extracted_files = os.listdir('/content/drive/MyDrive/kidney_refined_img')

scores_df = pd.read_csv(csv_file_path)
scores_df.head()

Unnamed: 0,Name of image,Score of 1 - 5
0,Cold Kidney 2,5
1,Cold Kidney 3,5
2,Cold Kidney 4,5
3,Cold Kidney 5,5
4,Cold Kidney 6,4


In [None]:
# Data augmentation setup
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

def load_and_augment_image(image_path):
    if os.path.isfile(image_path):
        img = load_img(image_path, target_size=(224, 224))
        img_array = img_to_array(img)
        img_array = preprocess_input(img_array)
        img_array = datagen.random_transform(img_array)
        img_array = np.expand_dims(img_array, axis=0)
        return img_array
    return None


In [None]:
scores_df.head()

Unnamed: 0,Name of image,Score of 1 - 5,formatted_name
0,Cold Kidney 2,5,cold kidney 2
1,Cold Kidney 3,5,cold kidney 3
2,Cold Kidney 4,5,cold kidney 4
3,Cold Kidney 5,5,cold kidney 5
4,Cold Kidney 6,4,cold kidney 6


# **Data Preparation**

In [None]:
# Image loading and preprocessing
image_dir = '/content/drive/MyDrive/kidney_refined_img/images new'
image_paths = [os.path.join(image_dir, filename) for filename in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, filename))]
image_arrays = [load_and_augment_image(path) for path in image_paths if load_and_augment_image(path) is not None]
image_arrays = np.vstack([img_array for img_array in image_arrays if img_array is not None])


In [None]:
# Additional setup as per your code
def format_filename(name):
    return os.path.splitext(name)[0].lower().strip()

In [None]:
scores_df['formatted_name'] = scores_df['Name of image'].apply(format_filename)
name_to_score = dict(zip(scores_df['formatted_name'], scores_df['Score of 1 - 5']))


In [None]:
# Cross the score with images and then create log file for those that is not matched
scores = []
not_found = []

for path in image_paths:
    if os.path.isfile(path):
        base_name = format_filename(os.path.basename(path))
        score = name_to_score.get(base_name, None)
        if score is not None:
            scores.append(score)
        else:
            not_found.append(os.path.basename(path))
            scores.append(-1)

if not_found:
    print("No scores found for the following files:")
    for file in not_found:
        print(file)
else:
    print("All files matched successfully.")

scores = np.array(scores)

All files matched successfully.


In [None]:
valid_indices = [i for i, score in enumerate(scores) if score != -1]
image_arrays = image_arrays[valid_indices]
scores = np.array([scores[i] for i in valid_indices])

In [None]:
# Load the base ResNet-152 model without the top layer
base_model = ResNet152V2(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)

# Define the layers from which you want to extract features
layer_names = ['conv1_conv', 'conv2_block3_out', 'conv3_block8_out', 'conv4_block36_out', 'conv5_block3_out']
layer_outputs = [base_model.get_layer(name).output for name in layer_names]
multi_output_model = Model(inputs=base_model.input, outputs=layer_outputs)
gap_layer = GlobalAveragePooling2D()

# Define function to extract and pool features from the specified layers
def extract_and_pool_features(image_tensor):
    image_tensor = np.expand_dims(image_tensor, axis=0)
    features_per_layer = multi_output_model.predict(image_tensor)
    pooled_features = [gap_layer(tf.convert_to_tensor(feature)).numpy().flatten() for feature in features_per_layer]
    return pooled_features

# Extract features for each image in the dataset
features_by_layer = [[] for _ in range(len(layer_names))]  # Storage for features from each layer


In [None]:
for image_array in image_arrays:
    pooled_features = extract_and_pool_features(image_array)
    for i, features in enumerate(pooled_features):
        features_by_layer[i].append(features)




In [None]:
# Convert feature lists to 2D numpy arrays
feature_arrays = [np.array(features) for features in features_by_layer]

In [None]:
# Define a function to train and evaluate classifiers
def train_and_evaluate(features, scores, layer_name):
    if features.shape[1] == 0:  # Check if the features are empty or incorrectly shaped
        print(f"No features available for training on layer: {layer_name}")
        return

    X_train, X_test, y_train, y_test = train_test_split(features, scores, test_size=0.2, random_state=42)

    # Normalize features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Logistic Regression Classifier
    classifier = LogisticRegression(max_iter=1000)
    classifier.fit(X_train, y_train)
    predictions = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    print(f"Accuracy for {layer_name}: {accuracy:.2f}")



# **Logistic Regression**

In [None]:
# Train and evaluate a classifier for each set of layer features
for i, feature_array in enumerate(feature_arrays):
    layer_name = layer_names[i]
    print(f"\nTraining classifier for features extracted from layer: {layer_name}")
    train_and_evaluate(feature_array, scores, layer_name)


Training classifier for features extracted from layer: conv1_conv
Accuracy for conv1_conv: 0.45

Training classifier for features extracted from layer: conv2_block3_out
Accuracy for conv2_block3_out: 0.45

Training classifier for features extracted from layer: conv3_block8_out
Accuracy for conv3_block8_out: 0.45

Training classifier for features extracted from layer: conv4_block36_out
Accuracy for conv4_block36_out: 0.45

Training classifier for features extracted from layer: conv5_block3_out
Accuracy for conv5_block3_out: 0.25


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
# Function to scale features
def scale_features(X_train, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled

# Classifier Training and Evaluation Function
def train_and_evaluate(classifier, X_train, X_test, y_train, y_test, layer_name, classifier_name):
    classifier.fit(X_train, y_train)
    predictions = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    print(f"{classifier_name} Accuracy for {layer_name}: {accuracy:.2f}")


# **SVM, Random Forest, GBM**

In [None]:

# Loop through each layer's features for different classifiers
for i, features in enumerate(feature_arrays):
    layer_name = layer_names[i]
    print(f"\nProcessing {layer_name}")

    # Split the dataset
    X_train, X_test, y_train, y_test = train_test_split(features, scores, test_size=0.2, random_state=42)

    # Scale features
    X_train_scaled, X_test_scaled = scale_features(X_train, X_test)

    # SVM Classifier
    svm_classifier = SVC(kernel='linear')
    train_and_evaluate(svm_classifier, X_train_scaled, X_test_scaled, y_train, y_test, layer_name, "SVM")



Processing conv1_conv
SVM Accuracy for conv1_conv: 0.45

Processing conv2_block3_out
SVM Accuracy for conv2_block3_out: 0.45

Processing conv3_block8_out
SVM Accuracy for conv3_block8_out: 0.45

Processing conv4_block36_out
SVM Accuracy for conv4_block36_out: 0.50

Processing conv5_block3_out
SVM Accuracy for conv5_block3_out: 0.25


In [None]:

# Loop through each layer's features for different classifiers
for i, features in enumerate(feature_arrays):
    layer_name = layer_names[i]
    print(f"\nProcessing {layer_name}")

    # Split the dataset
    X_train, X_test, y_train, y_test = train_test_split(features, scores, test_size=0.2, random_state=42)

    # Scale features
    X_train_scaled, X_test_scaled = scale_features(X_train, X_test)
    # Gradient Boosting Classifier
    gbm_classifier = GradientBoostingClassifier(n_estimators=100)
    train_and_evaluate(gbm_classifier, X_train_scaled, X_test_scaled, y_train, y_test, layer_name, "GBM")


Processing conv1_conv
GBM Accuracy for conv1_conv: 0.50

Processing conv2_block3_out
GBM Accuracy for conv2_block3_out: 0.40

Processing conv3_block8_out
GBM Accuracy for conv3_block8_out: 0.35

Processing conv4_block36_out
GBM Accuracy for conv4_block36_out: 0.50

Processing conv5_block3_out
GBM Accuracy for conv5_block3_out: 0.45


# **Random Forest without Scaling features givs better reasults**

In [None]:
# Loop through each layer's features for different classifiers
for i, features in enumerate(feature_arrays):
    layer_name = layer_names[i]
    print(f"\nProcessing {layer_name}")

    # Split the dataset
    X_train, X_test, y_train, y_test = train_test_split(features, scores, test_size=0.2, random_state=42)

    # Scale features
    #X_train_scaled, X_test_scaled = scale_features(X_train, X_test)

    # Random Forest Classifier
    rf_classifier = RandomForestClassifier(n_estimators=100)
    train_and_evaluate(rf_classifier, X_train_scaled, X_test_scaled, y_train, y_test, layer_name, "Random Forest")



Processing conv1_conv
Random Forest Accuracy for conv1_conv: 0.55

Processing conv2_block3_out
Random Forest Accuracy for conv2_block3_out: 0.60

Processing conv3_block8_out
Random Forest Accuracy for conv3_block8_out: 0.55

Processing conv4_block36_out
Random Forest Accuracy for conv4_block36_out: 0.45

Processing conv5_block3_out
Random Forest Accuracy for conv5_block3_out: 0.50
