In [1]:
pip install tensorflow_federated

Collecting tensorflow_federated
  Downloading tensorflow_federated-0.75.0-py3-none-manylinux_2_31_x86_64.whl.metadata (4.0 kB)
Collecting cachetools~=5.3 (from tensorflow_federated)
  Downloading cachetools-5.3.3-py3-none-any.whl.metadata (5.3 kB)
Collecting dp-accounting==0.4.3 (from tensorflow_federated)
  Downloading dp_accounting-0.4.3-py3-none-any.whl.metadata (1.8 kB)
Collecting farmhashpy==0.4.0 (from tensorflow_federated)
  Downloading farmhashpy-0.4.0.tar.gz (98 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting google-vizier==0.1.11 (from tensorflow_federated)
  Downloading google_vizier-0.1.11-py3-none-any.whl.metadata (10 kB)
Collecting jaxlib==0.4.14 (from tensorflow_federated)
  Downloading jaxlib-0.4.14-cp310-cp310-manylinux2014_x86_64.whl.metadata (2.0 kB)
Collecting jax==0.4.14 (from tensorflow_federated)
  Downloading jax-0.4.1

In [2]:
import os
import cv2
import random
import itertools
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.utils import to_categorical, plot_model
from keras.preprocessing.image import ImageDataGenerator
from keras import layers, regularizers, optimizers, callbacks
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

2024-04-06 17:04:43.500368: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-06 17:04:43.500437: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-06 17:04:43.500492: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder

# Check if GPU is available
device = '/GPU:0' if tf.config.experimental.list_physical_devices('GPU') else '/CPU:0'

# Define paths
HAM_PATH_PART1 = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1"
HAM_PATH_PART2 = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2"
METADATA_PATH = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv"

def load_data_ham(path, metadata_path):
    print("Loading data from:", path)
    data = []
    metadata = pd.read_csv(metadata_path)
    for img_file in os.listdir(path):
        img_id = img_file.split('.')[0]
        metadata_row = metadata[metadata['image_id'] == img_id]
        if not metadata_row.empty:
            lesion_type = metadata_row.iloc[0]['dx']
            img_path = os.path.join(path, img_file)
            img_resize = process_image(img_path)
            data.append([img_resize, lesion_type])
    return data

def process_image(img_path, target_size=(64, 48)):
    img_array = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

    # Resize image to target size
    img_resize = cv2.resize(img_array, target_size)

    # Normalize pixel values to [0, 1]
    img_normalize = img_resize.astype(np.float32) / 255.0

    return img_normalize

# Load data for HAM dataset
with tf.device(device):
    ham_data_part1 = load_data_ham(HAM_PATH_PART1, METADATA_PATH)
    ham_data_part2 = load_data_ham(HAM_PATH_PART2, METADATA_PATH)

# Concatenate HAM datasets into a single list
all_ham_data = ham_data_part1 + ham_data_part2

# Convert the list of lists to a DataFrame
column_names = ['image_data', 'skin_disease']
ham_df = pd.DataFrame(all_ham_data, columns=column_names)

# Print the structure of the merged HAM dataset
print("Structure of the merged HAM dataset:")
print(ham_df.head())

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform the 'skin_disease' column
ham_df['skin_disease'] = label_encoder.fit_transform(ham_df['skin_disease'])

# Print the encoded DataFrame
print(ham_df.head())
df=pd.DataFrame(ham_df)

Loading data from: /kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1
Loading data from: /kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2
Structure of the merged HAM dataset:
                                          image_data skin_disease
0  [[0.68235296, 0.6666667, 0.6784314, 0.6509804,...           nv
1  [[0.5568628, 0.5803922, 0.5764706, 0.58431375,...           nv
2  [[0.6666667, 0.6431373, 0.67058825, 0.7058824,...           nv
3  [[0.54509807, 0.53333336, 0.5254902, 0.4862745...          bkl
4  [[0.5764706, 0.5686275, 0.5647059, 0.6039216, ...          bkl
                                          image_data  skin_disease
0  [[0.68235296, 0.6666667, 0.6784314, 0.6509804,...             5
1  [[0.5568628, 0.5803922, 0.5764706, 0.58431375,...             5
2  [[0.6666667, 0.6431373, 0.67058825, 0.7058824,...             5
3  [[0.54509807, 0.53333336, 0.5254902, 0.4862745...             2
4  [[0.5764706, 0.5686275, 0.5647059, 0.6039216, ...             2

In [4]:
import random

num_client = 10

df["client"] = ["client_{}".format(random.randint(1, num_client)) for _ in range(df.shape[0])]

In [5]:
client_id_colname = 'client'

client_ids = df[client_id_colname].unique()

train_client_ids = pd.DataFrame(client_ids).sample(frac=0.8).values.ravel().tolist()
test_client_ids = [x for x in client_ids if x not in train_client_ids]

In [6]:
import nest_asyncio
nest_asyncio.apply()

In [7]:
%load_ext tensorboard

In [8]:
train_client_ids

['client_5',
 'client_10',
 'client_6',
 'client_9',
 'client_3',
 'client_4',
 'client_7',
 'client_1']

In [9]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [10]:
features ="image_data"

In [11]:
from collections import OrderedDict
import tensorflow as tf
import numpy as np

NUM_EPOCHS = 1
SHUFFLE_BUFFER = 100

def create_tf_dataset_for_client_fn(client_id):
    client_data = dataframe[dataframe[client_id_colname] == client_id]
    client_data_dict = OrderedDict()
    client_data_dict["image_data"] = np.array(client_data['image_data'].values.tolist(), dtype="float32")
    client_data_dict["skin_disease"] = np.array(client_data['skin_disease'].values.tolist(), dtype="int32")

    dataset = tf.data.Dataset.from_tensor_slices(client_data_dict)
    dataset = dataset.shuffle(SHUFFLE_BUFFER).batch(1).repeat(NUM_EPOCHS)
    return dataset

In [12]:
import tensorflow_federated as tff

In [13]:
dataframe = train_df
train_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
    client_ids=train_client_ids,
    serializable_dataset_fn=create_tf_dataset_for_client_fn)

dataframe = test_df
test_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
    client_ids=test_client_ids,
    serializable_dataset_fn=create_tf_dataset_for_client_fn)

In [14]:
train_data.element_type_structure

OrderedDict([('image_data',
              TensorSpec(shape=(None, 48, 64), dtype=tf.float32, name=None)),
             ('skin_disease',
              TensorSpec(shape=(None,), dtype=tf.int32, name=None))])

In [15]:
import collections

NUM_EPOCHS = 1
BATCH_SIZE = 10
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

def preprocess(dataset):
    def batch_format_fn(element):
        return collections.OrderedDict(x=tf.reshape(element['image_data'], [-1,32,32,3]),
                                       y=tf.reshape(element['skin_disease'], [-1, 1]))

    return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(
      BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

In [16]:
from tqdm import tqdm

def make_federated_data(client_data, client_ids):
    return [preprocess(client_data.create_tf_dataset_for_client(x)) for x in tqdm(client_ids)]

In [17]:
NUM_CLIENTS = len(np.unique(train_df[client_id_colname]))

sample_clients = train_data.client_ids[0:NUM_CLIENTS]

federated_train_data = make_federated_data(train_data, sample_clients)

print('Number of client datasets: {l}'.format(l=len(federated_train_data)))
print('First dataset: {d}'.format(d=federated_train_data[0]))

100%|██████████| 8/8 [00:00<00:00, 37.63it/s]

Number of client datasets: 8
First dataset: <_PrefetchDataset element_spec=OrderedDict([('x', TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None)), ('y', TensorSpec(shape=(None, 1), dtype=tf.int32, name=None))])>





In [18]:
def create_keras_model(input_shape=(32, 32, 3), num_classes=7):
    base_model = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False

    inputs = tf.keras.Input(shape=input_shape)
    x = tf.keras.applications.resnet.preprocess_input(inputs)
    x = base_model(x, training=False)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs, outputs)
    return model

In [19]:
example_dataset = train_data.create_tf_dataset_for_client(train_data.client_ids[0])

example_element = next(iter(example_dataset))

example_element['skin_disease'].numpy()

array([1], dtype=int32)

In [20]:
preprocessed_example_dataset = preprocess(example_dataset)

sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(preprocessed_example_dataset)))

In [21]:
preprocessed_example_dataset.element_spec

OrderedDict([('x',
              TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None)),
             ('y', TensorSpec(shape=(None, 1), dtype=tf.int32, name=None))])

In [22]:
from keras.metrics import Recall, Precision

def model_fn():
    keras_model = create_keras_model()
    return tff.learning.models.from_keras_model(
        keras_model,
        input_spec=preprocessed_example_dataset.element_spec,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [23]:
iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate=0.001))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [24]:
str(iterative_process.initialize.type_signature)

'( -> <global_model_weights=<trainable=<float32[2048,7],float32[7]>,non_trainable=<float32[7,7,3,64],float32[64],float32[64],float32[64],float32[64],float32[64],float32[1,1,64,64],float32[64],float32[64],float32[64],float32[64],float32[64],float32[3,3,64,64],float32[64],float32[64],float32[64],float32[64],float32[64],float32[1,1,64,256],float32[256],float32[1,1,64,256],float32[256],float32[256],float32[256],float32[256],float32[256],float32[256],float32[256],float32[256],float32[256],float32[1,1,256,64],float32[64],float32[64],float32[64],float32[64],float32[64],float32[3,3,64,64],float32[64],float32[64],float32[64],float32[64],float32[64],float32[1,1,64,256],float32[256],float32[256],float32[256],float32[256],float32[256],float32[1,1,256,64],float32[64],float32[64],float32[64],float32[64],float32[64],float32[3,3,64,64],float32[64],float32[64],float32[64],float32[64],float32[64],float32[1,1,64,256],float32[256],float32[256],float32[256],float32[256],float32[256],float32[1,1,256,128],fl

In [25]:
state = iterative_process.initialize()

I0000 00:00:1712423606.895581     234 session_provider.cc:108] Found devices: [/physical_device:CPU:0]
I0000 00:00:1712423606.895635     234 session_provider.cc:124] Skipping device: [/physical_device:CPU:0]
I0000 00:00:1712423606.895638     234 session_provider.cc:127] Found:
	0 GPUs
	0 TPUS
in total


In [26]:
NUM_ROUNDS = 10
for round_num in range(0, NUM_ROUNDS):
  result = iterative_process.next(state, federated_train_data)
  train_state = result.state
  train_metrics = result.metrics
  print('round {:2d}, metrics={}'.format(round_num, train_metrics))

round  0, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('sparse_categorical_accuracy', 0.55289054), ('loss', 3.4457488), ('num_examples', 1626), ('num_batches', 166)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
round  1, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('sparse_categorical_accuracy', 0.5436654), ('loss', 3.6372576), ('num_examples', 1626), ('num_batches', 166)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
round  2, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('sparse_categorical_accuracy', 0.50246), ('loss', 3.1170409), ('num_examples', 1626), ('num_batches', 166)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finali

In [27]:
evaluation_process = tff.learning.algorithms.build_fed_eval(model_fn)

In [28]:
evaluation_state = evaluation_process.initialize()
model_weights = iterative_process.get_model_weights(train_state)
evaluation_state = evaluation_process.set_model_weights(evaluation_state, model_weights)

In [29]:
federated_test_data = make_federated_data(test_data, test_client_ids)

len(federated_test_data), federated_test_data[0]

100%|██████████| 2/2 [00:00<00:00, 50.89it/s]


(2,
 <_PrefetchDataset element_spec=OrderedDict([('x', TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None)), ('y', TensorSpec(shape=(None, 1), dtype=tf.int32, name=None))])>)

In [30]:
evaluation_output = evaluation_process.next(evaluation_state, federated_test_data)

In [31]:
str(evaluation_output.metrics)

"OrderedDict([('distributor', ()), ('client_work', OrderedDict([('eval', OrderedDict([('current_round_metrics', OrderedDict([('sparse_categorical_accuracy', 0.69496024), ('loss', 4.6836557), ('num_examples', 377), ('num_batches', 39)])), ('total_rounds_metrics', OrderedDict([('sparse_categorical_accuracy', 0.69496024), ('loss', 4.6836557), ('num_examples', 377), ('num_batches', 39)]))]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', ())])"