<a href="https://colab.research.google.com/github/urvashi2004/ML_MiniProjects/blob/main/SkinCancer/FirstNonCancer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import os
import zipfile
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from google.colab import drive, files

In [7]:
# Mount Google Drive
drive.mount('/content/drive')

# Define paths
drive_base_path = '/content/drive/My Drive/Colab Notebooks/SkinCancer/Mendeley'
zip_file_path = os.path.join(drive_base_path, 'mendeleydataset.zip')
csv_file_path = os.path.join(drive_base_path, 'metadata.csv')
dataset_dir = '/content/dataset/My Drive/Colab Notebooks/'

# Extract dataset if not already extracted
if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir, exist_ok=True)
    print("Extracting dataset...")
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(dataset_dir)
    print(f"Dataset extracted to {dataset_dir}.")
else:
    print(f"Dataset already exists at {dataset_dir}.")

Mounted at /content/drive
Extracting dataset...
Dataset extracted to /content/dataset/My Drive/Colab Notebooks/.


In [8]:
# Adjust dataset subdirectory if necessary
image_dir = os.path.join(dataset_dir, "mendeleydataset")  # Modify this if images are in another subdirectory
if not os.path.exists(image_dir):
    image_dir = dataset_dir  # Fallback to top-level directory if no subdirectory found

# Reload metadata
labels_df = pd.read_csv(csv_file_path)
labels_df['label'] = labels_df['biopsed'].apply(lambda x: 'cancer' if x else 'non_cancer')

# Map images to labels
image_paths, labels = [], []
for _, row in labels_df.iterrows():
    img_path = os.path.join(image_dir, row['img_id'])
    if os.path.exists(img_path):
        image_paths.append(img_path)
        labels.append(row['label'])

print(f"Found {len(image_paths)} images with labels.")

Found 2298 images with labels.


In [9]:
# Split dataset
train_paths, val_paths, train_labels, val_labels = train_test_split(
    image_paths, labels, test_size=0.2, stratify=labels, random_state=42
)

# Compute class weights
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_labels),
    y=train_labels
)
class_weights = dict(enumerate(class_weights))

# Image preprocessing and augmentation
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
val_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'filename': train_paths, 'class': train_labels}),
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)
val_generator = val_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'filename': val_paths, 'class': val_labels}),
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

Found 1838 validated image filenames belonging to 2 classes.
Found 460 validated image filenames belonging to 2 classes.


In [10]:
# Define model function
def build_model(base_model):
    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Train VGG16
vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
vgg_model = build_model(vgg_base)
vgg_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,
    class_weight=class_weights
)

# Train ResNet50
resnet_base = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
resnet_model = build_model(resnet_base)
resnet_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,
    class_weight=class_weights
)

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import numpy as np

# Function to evaluate the model
def evaluate_model(model, val_generator):
    # Step 1: Predict the validation set
    val_predictions = (model.predict(val_generator) > 0.5).astype("int32")  # Binary predictions
    true_labels = val_generator.classes  # True labels from the generator

    # Step 2: Calculate metrics
    accuracy = accuracy_score(true_labels, val_predictions)
    precision = precision_score(true_labels, val_predictions)
    recall = recall_score(true_labels, val_predictions)
    f1 = f1_score(true_labels, val_predictions)

    # Step 3: Print detailed metrics
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

    # Step 4: Classification report
    target_names = ['Non-Cancerous', 'Cancerous']
    report = classification_report(true_labels, val_predictions, target_names=target_names)
    print("\nClassification Report:")
    print(report)

# Evaluate VGG16 model
print("Evaluating VGG16 Model...")
evaluate_model(vgg_model, val_generator)

# Evaluate ResNet50 model
print("\nEvaluating ResNet50 Model...")
evaluate_model(resnet_model, val_generator)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step
Epoch 1/10


  self._warn_if_super_not_called()


[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 3s/step - accuracy: 0.4690 - loss: 0.7332 - val_accuracy: 0.5848 - val_loss: 0.6901
Epoch 2/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 2s/step - accuracy: 0.5462 - loss: 0.6934 - val_accuracy: 0.4152 - val_loss: 0.6949
Epoch 3/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 2s/step - accuracy: 0.4831 - loss: 0.6947 - val_accuracy: 0.5848 - val_loss: 0.6931
Epoch 4/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 2s/step - accuracy: 0.5979 - loss: 0.6898 - val_accuracy: 0.5848 - val_loss: 0.6931
Epoch 5/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 2s/step - accuracy: 0.5719 - loss: 0.6960 - val_accuracy: 0.5848 - val_loss: 0.6931
Epoch 6/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 2s/step - accuracy: 0.5949 - loss: 0.6905 - val_accuracy: 0.5848 - val_loss: 0.6931
Epoch 7/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 1s/step
Accuracy: 0.4174
Precision: 0.4161
Recall: 1.0000
F1 Score: 0.5877

Classification Report:
               precision    recall  f1-score   support

Non-Cancerous       1.00      0.00      0.01       269
    Cancerous       0.42      1.00      0.59       191

     accuracy                           0.42       460
    macro avg       0.71      0.50      0.30       460
 weighted avg       0.76      0.42      0.25       460



In [11]:
# Save models
vgg_model.save('/content/vgg_model.h5')
resnet_model.save('/content/resnet_model.h5')

# Define preprocessing function for predictions
def preprocess_image(image_path, target_size):
    img = load_img(image_path, target_size=target_size)
    img_array = img_to_array(img) / 255.0
    return img_array

# Predict function
def predict_image(image_path, model):
    img = preprocess_image(image_path, target_size=(224, 224))
    img_array = np.expand_dims(img, axis=0)
    prediction = model.predict(img_array)
    return 'Cancerous' if prediction[0][0] > 0.5 else 'Non-Cancerous'

# Upload and test an image
print("Upload an image to test the model.")
uploaded = files.upload()
uploaded_image_path = list(uploaded.keys())[0]

print(f"Prediction using VGG16: {predict_image(uploaded_image_path, vgg_model)}")
print(f"Prediction using ResNet50: {predict_image(uploaded_image_path, resnet_model)}")



Upload an image to test the model.


Saving PAT_313_669_908.png to PAT_313_669_908.png
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Prediction using VGG16: Non-Cancerous
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
Prediction using ResNet50: Cancerous
