In [None]:
pip install tensorflow_federated



In [None]:
import pandas as pd
import os
import glob
from sklearn.preprocessing import LabelEncoder
from skimage import io
import cv2
import numpy as np

# Define the root directory where your data is stored
root_dir = '/content/drive/MyDrive/archive (1)/Skin cancer ISIC The International Skin Imaging Collaboration'

# Initialize empty lists to store file paths and corresponding labels
image_paths = []
skin_diseases = []

# Iterate through each folder within train and test directories
for subdir in ['Train', 'Test']:
    # Iterate through each skin disease folder
    for disease_folder in os.listdir(os.path.join(root_dir, subdir)):
        # Construct path to the current disease folder
        disease_folder_path = os.path.join(root_dir, subdir, disease_folder)

        # Iterate through each image file within the disease folder
        for img_path in glob.glob(os.path.join(disease_folder_path, '*.jpg')):
            # Append the image path
            image_paths.append(img_path)
            # Append the corresponding skin disease label
            skin_diseases.append(disease_folder)

# Create a DataFrame from the lists
df = pd.DataFrame({'image_path': image_paths, 'skin_disease': skin_diseases})
def read_image(image_path):
    target_size=((784, 3))
    img_array = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

    # Resize image to target size
    img_resize = cv2.resize(img_array, target_size)

    # Normalize pixel values to [0, 1]
    img_normalize = img_resize.astype(np.float32) / 255.0

    return img_normalize

# Apply the read_image function to create the 'image_data' column
df['image_data'] = df['image_path'].apply(read_image)

# Label encode the 'skin_disease' column
label_encoder = LabelEncoder()
df['skin_disease_encoded'] = label_encoder.fit_transform(df['skin_disease'])

# Display the first few rows of the DataFrame
print(df.head())

                                          image_path          skin_disease  \
0  /content/drive/MyDrive/archive (1)/Skin cancer...  seborrheic keratosis   
1  /content/drive/MyDrive/archive (1)/Skin cancer...  seborrheic keratosis   
2  /content/drive/MyDrive/archive (1)/Skin cancer...  seborrheic keratosis   
3  /content/drive/MyDrive/archive (1)/Skin cancer...  seborrheic keratosis   
4  /content/drive/MyDrive/archive (1)/Skin cancer...  seborrheic keratosis   

                                          image_data  skin_disease_encoded  
0  [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     6  
1  [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     6  
2  [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     6  
3  [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     6  
4  [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     6  


In [None]:
columns_to_remove = ['image_path', 'skin_disease']

# Drop the specified columns
df.drop(columns=columns_to_remove, inplace=True)

# Display the DataFrame after removing columns
print(df)

                                             image_data  skin_disease_encoded
0     [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     6
1     [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     6
2     [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     6
3     [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     6
4     [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     6
...                                                 ...                   ...
2352  [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     0
2353  [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     0
2354  [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     0
2355  [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     0
2356  [[0.6509804, 0.654902, 0.6627451, 0.6666667, 0...                     0

[2357 rows x 2 columns]


In [None]:
import random

num_client = 5

df["client"] = ["client_{}".format(random.randint(1, num_client)) for _ in range(df.shape[0])]

In [None]:
client_id_colname = 'client'

client_ids = df[client_id_colname].unique()

train_client_ids = pd.DataFrame(client_ids).sample(frac=0.8).values.ravel().tolist()
test_client_ids = [x for x in client_ids if x not in train_client_ids]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
%load_ext tensorboard

In [None]:
train_client_ids

['client_5', 'client_1', 'client_3', 'client_2']

In [None]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.3, random_state=42)

In [None]:
train_df.drop(columns=['client'], inplace=True)

In [None]:
features ="image_data"

In [None]:
from collections import OrderedDict
import tensorflow as tf
import numpy as np

NUM_EPOCHS = 1
SHUFFLE_BUFFER = 100

def create_tf_dataset_for_client_fn(client_id):
    client_data = dataframe[dataframe[client_id_colname] == client_id]
    client_data_dict = OrderedDict()
    client_data_dict["image_data"] = np.array(client_data['image_data'].values.tolist(), dtype="float32")
    client_data_dict["skin_disease_encoded"] = np.array(client_data['skin_disease_encoded'].values.tolist(), dtype="int32")

    dataset = tf.data.Dataset.from_tensor_slices(client_data_dict)
    dataset = dataset.shuffle(SHUFFLE_BUFFER).batch(1).repeat(NUM_EPOCHS)
    return dataset

In [None]:
import tensorflow_federated as tff

In [None]:
dataframe = train_df
train_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
    client_ids=train_client_ids,
    serializable_dataset_fn=create_tf_dataset_for_client_fn)

dataframe = test_df
test_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
    client_ids=test_client_ids,
    serializable_dataset_fn=create_tf_dataset_for_client_fn)

In [None]:
train_data.element_type_structure

OrderedDict([('image_data',
              TensorSpec(shape=(None, 3, 784), dtype=tf.float32, name=None)),
             ('skin_disease_encoded',
              TensorSpec(shape=(None,), dtype=tf.int32, name=None))])

In [None]:
import collections

NUM_EPOCHS = 1
BATCH_SIZE = 10
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

def preprocess(dataset):
    def batch_format_fn(element):
        return collections.OrderedDict(x=tf.reshape(element['image_data'], [-1,28,28,3]),
                                       y=tf.reshape(element['skin_disease_encoded'], [-1, 1]))

    return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(
      BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

In [None]:
from tqdm import tqdm

def make_federated_data(client_data, client_ids):
    return [preprocess(client_data.create_tf_dataset_for_client(x)) for x in tqdm(client_ids)]

In [None]:
NUM_CLIENTS = len(np.unique(train_df[client_id_colname]))

sample_clients = train_data.client_ids[0:NUM_CLIENTS]

federated_train_data = make_federated_data(train_data, sample_clients)

print('Number of client datasets: {l}'.format(l=len(federated_train_data)))
print('First dataset: {d}'.format(d=federated_train_data[0]))

100%|██████████| 4/4 [00:00<00:00, 29.22it/s]

Number of client datasets: 4
First dataset: <_PrefetchDataset element_spec=OrderedDict([('x', TensorSpec(shape=(None, 28, 28, 3), dtype=tf.float32, name=None)), ('y', TensorSpec(shape=(None, 1), dtype=tf.int32, name=None))])>





In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
input_shape = (28, 28, 3)
num_classes = 9
def create_keras_model():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

# Input shape and number of classes


# Create the CNN model
simple_cnn_model = create_keras_model()

# Display model summary
simple_cnn_model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           (None, 26, 26, 32)        896       
                                                                 
 max_pooling2d_6 (MaxPoolin  (None, 13, 13, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_10 (Conv2D)          (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_7 (MaxPoolin  (None, 5, 5, 64)          0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 3, 3, 64)          36928     
                                                                 
 flatten_3 (Flatten)         (None, 576)              

In [None]:
example_dataset = train_data.create_tf_dataset_for_client(train_data.client_ids[0])

example_element = next(iter(example_dataset))

example_element['skin_disease_encoded'].numpy()

array([4], dtype=int32)

In [None]:
preprocessed_example_dataset = preprocess(example_dataset)

sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(preprocessed_example_dataset)))

In [None]:
preprocessed_example_dataset.element_spec

OrderedDict([('x',
              TensorSpec(shape=(None, 28, 28, 3), dtype=tf.float32, name=None)),
             ('y', TensorSpec(shape=(None, 1), dtype=tf.int32, name=None))])

In [None]:
from keras.metrics import Recall, Precision

def model_fn():
    keras_model = create_keras_model()
    return tff.learning.models.from_keras_model(
        keras_model,
        input_spec=preprocessed_example_dataset.element_spec,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [None]:
iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate=0.001))

In [None]:
str(iterative_process.initialize.type_signature)

'( -> <global_model_weights=<trainable=<float32[3,3,3,32],float32[32],float32[3,3,32,64],float32[64],float32[3,3,64,64],float32[64],float32[576,64],float32[64],float32[64,9],float32[9]>,non_trainable=<>>,distributor=<>,client_work=<>,aggregator=<value_sum_process=<>,weight_sum_process=<>>,finalizer=<learning_rate=float32>>@SERVER)'

In [None]:
state = iterative_process.initialize()

In [None]:
state, metrics = iterative_process.next(state, federated_train_data)
print('round  1, metrics={}'.format(metrics))

NameError: name 'state' is not defined