In [1]:
!gdown 1SCilGtXlIvTMFV3PJMhn2-d-T02ubRhW

Downloading...
From (original): https://drive.google.com/uc?id=1SCilGtXlIvTMFV3PJMhn2-d-T02ubRhW
From (redirected): https://drive.google.com/uc?id=1SCilGtXlIvTMFV3PJMhn2-d-T02ubRhW&confirm=t&uuid=c2da2769-02f7-4cd2-9ed9-d02d5b10c60a
To: /content/skin-cancer9-classesisic.zip
100% 824M/824M [00:16<00:00, 48.9MB/s]


In [None]:
!unzip /content/skin-cancer9-classesisic.zip

In [None]:
pip install tensorflow_federated

In [52]:
import pandas as pd
import os
import glob
from sklearn.preprocessing import LabelEncoder
from skimage import io
import cv2
import numpy as np

# Define the root directory where your data is stored
root_dir = 'Skin cancer ISIC The International Skin Imaging Collaboration'

# Initialize empty lists to store file paths and corresponding labels
image_paths = []
skin_diseases = []

# Iterate through each folder within train and test directories
for subdir in ['Train', 'Test']:
    # Iterate through each skin disease folder
    for disease_folder in os.listdir(os.path.join(root_dir, subdir)):
        # Construct path to the current disease folder
        disease_folder_path = os.path.join(root_dir, subdir, disease_folder)

        # Iterate through each image file within the disease folder
        for img_path in glob.glob(os.path.join(disease_folder_path, '*.jpg')):
            # Append the image path
            image_paths.append(img_path)
            # Append the corresponding skin disease label
            skin_diseases.append(disease_folder)

# Create a DataFrame from the lists
df = pd.DataFrame({'image_path': image_paths, 'skin_disease': skin_diseases})
def read_image(image_path):
    target_size=((784, 3))
    img_array = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

    # Resize image to target size
    img_resize = cv2.resize(img_array, target_size)

    # Normalize pixel values to [0, 1]
    img_normalize = img_resize.astype(np.float32) / 255.0

    return img_normalize

# Apply the read_image function to create the 'image_data' column
df['image_data'] = df['image_path'].apply(read_image)

# Label encode the 'skin_disease' column
label_encoder = LabelEncoder()
df['skin_disease_encoded'] = label_encoder.fit_transform(df['skin_disease'])

# Display the first few rows of the DataFrame
print(df.head())

                                          image_path  \
0  Skin cancer ISIC The International Skin Imagin...   
1  Skin cancer ISIC The International Skin Imagin...   
2  Skin cancer ISIC The International Skin Imagin...   
3  Skin cancer ISIC The International Skin Imagin...   
4  Skin cancer ISIC The International Skin Imagin...   

                 skin_disease  \
0  pigmented benign keratosis   
1  pigmented benign keratosis   
2  pigmented benign keratosis   
3  pigmented benign keratosis   
4  pigmented benign keratosis   

                                          image_data  skin_disease_encoded  
0  [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     5  
1  [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     5  
2  [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     5  
3  [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     5  
4  [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     5  


In [53]:
columns_to_remove = ['image_path', 'skin_disease']

# Drop the specified columns
df.drop(columns=columns_to_remove, inplace=True)

# Display the DataFrame after removing columns
print(df)

                                             image_data  skin_disease_encoded
0     [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     5
1     [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     5
2     [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     5
3     [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     5
4     [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     5
...                                                 ...                   ...
2352  [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     1
2353  [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     1
2354  [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     1
2355  [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     1
2356  [[0.61960787, 0.61960787, 0.61960787, 0.615686...                     1

[2357 rows x 2 columns]


In [54]:
import random

num_client = 5

df["client"] = ["client_{}".format(random.randint(1, num_client)) for _ in range(df.shape[0])]

In [55]:
client_id_colname = 'client'

client_ids = df[client_id_colname].unique()

train_client_ids = pd.DataFrame(client_ids).sample(frac=0.8).values.ravel().tolist()
test_client_ids = [x for x in client_ids if x not in train_client_ids]

In [56]:
import nest_asyncio
nest_asyncio.apply()

In [57]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [58]:
train_client_ids

['client_3', 'client_4', 'client_5', 'client_1']

In [59]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.3, random_state=42)

In [60]:
features ="image_data"

In [61]:
from collections import OrderedDict
import tensorflow as tf
import numpy as np

NUM_EPOCHS = 1
SHUFFLE_BUFFER = 100

def create_tf_dataset_for_client_fn(client_id):
    client_data = dataframe[dataframe[client_id_colname] == client_id]
    client_data_dict = OrderedDict()
    client_data_dict["image_data"] = np.array(client_data['image_data'].values.tolist(), dtype="float32")
    client_data_dict["skin_disease_encoded"] = np.array(client_data['skin_disease_encoded'].values.tolist(), dtype="int32")

    dataset = tf.data.Dataset.from_tensor_slices(client_data_dict)
    dataset = dataset.shuffle(SHUFFLE_BUFFER).batch(1).repeat(NUM_EPOCHS)
    return dataset

In [62]:
import tensorflow_federated as tff

In [63]:
dataframe = train_df
train_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
    client_ids=train_client_ids,
    serializable_dataset_fn=create_tf_dataset_for_client_fn)

dataframe = test_df
test_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
    client_ids=test_client_ids,
    serializable_dataset_fn=create_tf_dataset_for_client_fn)

In [64]:
train_data.element_type_structure

OrderedDict([('image_data',
              TensorSpec(shape=(None, 3, 784), dtype=tf.float32, name=None)),
             ('skin_disease_encoded',
              TensorSpec(shape=(None,), dtype=tf.int32, name=None))])

In [65]:
import collections

NUM_EPOCHS = 1
BATCH_SIZE = 8
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

def preprocess(dataset):
    def batch_format_fn(element):
        return collections.OrderedDict(x=tf.reshape(element['image_data'], [-1,28,28,3]),
                                       y=tf.reshape(element['skin_disease_encoded'], [-1, 1]))

    return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(
      BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

In [66]:
from tqdm import tqdm

def make_federated_data(client_data, client_ids):
    return [preprocess(client_data.create_tf_dataset_for_client(x)) for x in tqdm(client_ids)]

In [67]:
NUM_CLIENTS = len(np.unique(train_df[client_id_colname]))

sample_clients = train_data.client_ids[0:NUM_CLIENTS]

federated_train_data = make_federated_data(train_data, sample_clients)

print('Number of client datasets: {l}'.format(l=len(federated_train_data)))
print('First dataset: {d}'.format(d=federated_train_data[0]))

100%|██████████| 4/4 [00:00<00:00, 72.61it/s]

Number of client datasets: 4
First dataset: <_PrefetchDataset element_spec=OrderedDict([('x', TensorSpec(shape=(None, 28, 28, 3), dtype=tf.float32, name=None)), ('y', TensorSpec(shape=(None, 1), dtype=tf.int32, name=None))])>





In [68]:
import tensorflow as tf
from tensorflow.keras import layers, models
input_shape = (28, 28, 3)
num_classes = 9
def create_keras_model():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

# Input shape and number of classes


# Create the CNN model
simple_cnn_model = create_keras_model()

# Display model summary
simple_cnn_model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_10 (Conv2D)          (None, 26, 26, 32)        896       
                                                                 
 max_pooling2d_8 (MaxPoolin  (None, 13, 13, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_9 (MaxPoolin  (None, 5, 5, 64)          0         
 g2D)                                                            
                                                                 
 conv2d_12 (Conv2D)          (None, 3, 3, 64)          36928     
                                                                 
 flatten_3 (Flatten)         (None, 576)              

In [69]:
example_dataset = train_data.create_tf_dataset_for_client(train_data.client_ids[0])

example_element = next(iter(example_dataset))

example_element['skin_disease_encoded'].numpy()

array([8], dtype=int32)

In [70]:
preprocessed_example_dataset = preprocess(example_dataset)

sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(preprocessed_example_dataset)))

In [71]:
preprocessed_example_dataset.element_spec

OrderedDict([('x',
              TensorSpec(shape=(None, 28, 28, 3), dtype=tf.float32, name=None)),
             ('y', TensorSpec(shape=(None, 1), dtype=tf.int32, name=None))])

In [72]:
from keras.metrics import Recall, Precision

def model_fn():
    keras_model = create_keras_model()
    return tff.learning.models.from_keras_model(
        keras_model,
        input_spec=preprocessed_example_dataset.element_spec,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [73]:
iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate=0.001))

In [74]:
str(iterative_process.initialize.type_signature)

'( -> <global_model_weights=<trainable=<float32[3,3,3,32],float32[32],float32[3,3,32,64],float32[64],float32[3,3,64,64],float32[64],float32[576,64],float32[64],float32[64,9],float32[9]>,non_trainable=<>>,distributor=<>,client_work=<>,aggregator=<value_sum_process=<>,weight_sum_process=<>>,finalizer=<learning_rate=float32>>@SERVER)'

In [75]:
state = iterative_process.initialize()
state, metrics = iterative_process.next(state, federated_train_data)
print('round  1, metrics={}'.format(metrics))

round  1, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('sparse_categorical_accuracy', 0.21616872), ('loss', 2.1192877), ('num_examples', 569), ('num_batches', 72)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])


In [76]:
# After training, save the model
simple_cnn_model.save('skin_cancer_model.h5')


  saving_api.save_model(


In [77]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model

# Load the saved model
loaded_model = load_model('skin_cancer_model.h5')

# Define a function to preprocess new images
def preprocess_image(image_path):
    target_size = (28, 28)  # Ensure this matches the input shape of your model
    img_array = cv2.imread(image_path, cv2.IMREAD_COLOR)  # Read image in color mode

    # Resize image to target size
    img_resize = cv2.resize(img_array, target_size)

    # Normalize pixel values to [0, 1]
    img_normalize = img_resize.astype(np.float32) / 255.0

    # Add a batch dimension
    img_normalized_batch = np.expand_dims(img_normalize, axis=0)

    return img_normalized_batch

# Example usage for making predictions on new image(s)
new_image_path = '/content/Skin cancer ISIC The International Skin Imaging Collaboration/Test/actinic keratosis/ISIC_0010512.jpg'  # Replace with the path to your new image

# Preprocess the new image
new_image = preprocess_image(new_image_path)

# Make predictions using the loaded model
predictions = loaded_model.predict(new_image)

# Display the predictions
print(predictions)




[[0.10784444 0.11324544 0.11196733 0.13457085 0.11139758 0.09977899
  0.10814907 0.10377938 0.10926685]]


In [78]:
# Define class names
class_names = [
    "actinic keratosis",
    "basal cell carcinoma",
    "dermatofibroma",
    "melanoma",
    "nevus",
    "pigmented benign keratosis",
    "seborrheic keratosis",
    "squamous cell carcinoma",
    "vascular lesion"
]

# Load the saved model
loaded_model = load_model('skin_cancer_model.h5')

# Define a function to preprocess new images
def preprocess_image(image_path):
    target_size = (28, 28)  # Ensure this matches the input shape of your model
    img_array = cv2.imread(image_path, cv2.IMREAD_COLOR)  # Read image in color mode

    # Resize image to target size
    img_resize = cv2.resize(img_array, target_size)

    # Normalize pixel values to [0, 1]
    img_normalize = img_resize.astype(np.float32) / 255.0

    # Add a batch dimension
    img_normalized_batch = np.expand_dims(img_normalize, axis=0)

    return img_normalized_batch

# Example usage for making predictions on new image(s)
new_image_path = '/content/Skin cancer ISIC The International Skin Imaging Collaboration/Test/nevus/ISIC_0000000.jpg'  # Replace with the path to your new image

# Preprocess the new image
new_image = preprocess_image(new_image_path)

# Make predictions using the loaded model
predictions = loaded_model.predict(new_image)

# Convert predictions to class names
predicted_class_index = np.argmax(predictions)
predicted_class_name = class_names[predicted_class_index]

# Display the predicted class name
print("Predicted class:", predicted_class_name)




Predicted class: melanoma
