In [21]:
# Imports
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
import flwr as fl
from typing import Dict, List, Tuple
import numpy as np
from collections import OrderedDict
import os
from pathlib import Path
import tempfile
import logging
import json
from datetime import datetime
import time

In [22]:
# Set client ID (change for each client: 1,2,3,4)
CLIENT_ID = 1  # Change this for each client

In [23]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(f"Client_{CLIENT_ID}")

def log_status(status: str, details: str = ""):
    status_line = f"\n{'='*20} {status} {'='*20}"
    logger.info(status_line)
    if details:
        logger.info(details)
    logger.info("="*len(status_line))
    print(status_line)
    if details:
        print(details)
    print("="*len(status_line))

# Set random seed
torch.manual_seed(42 + CLIENT_ID)

# Check CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
log_status("DEVICE INFO", f"Client {CLIENT_ID} using device: {device}")

2025-03-09 15:13:32,724 - Client_1 - INFO - 
2025-03-09 15:13:32,725 - Client_1 - INFO - Client 1 using device: cuda



Client 1 using device: cuda


In [24]:
# Create temporary directory for outputs
temp_dir = tempfile.mkdtemp()
log_status("DIRECTORY INFO", f"Using temporary directory: {temp_dir}")

# Define message size
GRPC_MAX_MESSAGE_LENGTH = 1024 * 1024 * 1024

2025-03-09 15:13:32,754 - Client_1 - INFO - 
2025-03-09 15:13:32,755 - Client_1 - INFO - Using temporary directory: C:\Users\NILATA~1\AppData\Local\Temp\tmp9bgt6qn4



Using temporary directory: C:\Users\NILATA~1\AppData\Local\Temp\tmp9bgt6qn4


In [25]:
# Load dataset
log_status("LOADING DATASET", f"Client {CLIENT_ID} loading dataset...")
try:
    dataset = load_dataset("medalpaca/medical_meadow_medical_flashcards")
    
    # Select different ranges for each client
    start_idx = (CLIENT_ID - 1) * 1000
    end_idx = CLIENT_ID * 1000
    small_dataset = dataset['train'].select(range(start_idx, end_idx))
    
    log_status("DATASET LOADED", 
              f"Dataset size: {len(small_dataset)} examples\n"
              f"Range: {start_idx} to {end_idx}")
    
    # Extract test questions from the dataset
    test_indices = np.linspace(0, len(small_dataset)-1, 5, dtype=int)

    test_questions = []
    test_answers = []
    for idx in test_indices:
        example = small_dataset[int(idx)]  # Convert idx to Python int
        test_questions.append(example['input'])   # Ensure correct column names
        test_answers.append(example['output'])    # Ensure correct column names
    
    log_status("TEST QUESTIONS SELECTED", 
              f"Number of test questions: {len(test_questions)}")
    
except Exception as e:
    log_status("DATASET ERROR", str(e))
    raise

2025-03-09 15:13:32,780 - Client_1 - INFO - 
2025-03-09 15:13:32,782 - Client_1 - INFO - Client 1 loading dataset...



Client 1 loading dataset...


2025-03-09 15:13:34,540 - Client_1 - INFO - 
2025-03-09 15:13:34,542 - Client_1 - INFO - Dataset size: 1000 examples
Range: 0 to 1000
2025-03-09 15:13:34,544 - Client_1 - INFO - 
2025-03-09 15:13:34,544 - Client_1 - INFO - Number of test questions: 5



Dataset size: 1000 examples
Range: 0 to 1000

Number of test questions: 5


In [26]:
test_questions

['What is the relationship between very low Mg2+ levels, PTH levels, and Ca2+ levels?',
 'Can you describe Gardner syndrome and the conditions that it is associated with?',
 'What is Plummer-Vinson syndrome and what are the main symptoms associated with this condition?',
 'What is the genetic cause of cystic fibrosis, and which gene and chromosome is affected by this defect?',
 'What is the relationship between upper motoneuron lesions and paralysis?']

In [27]:
# Format flashcards
def format_flashcard(example):
    return {
        'text': f"Question: {example['input']}\nAnswer: {example['output']}\n\n"
    }

formatted_dataset = small_dataset.map(format_flashcard)

  obj.co_lnotab,  # for < python 3.10 [not counted in args]


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [28]:
# Initialize model and tokenizer
log_status("MODEL INITIALIZATION", "Loading model and tokenizer...")
try:
    model_name = "gpt2"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
    
    # Configure tokenizer
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        model.config.pad_token_id = model.config.eos_token_id
    
    log_status("MODEL LOADED", 
              f"Model: {model_name}\n"
              f"Parameters: {sum(p.numel() for p in model.parameters())}")
except Exception as e:
    log_status("MODEL ERROR", str(e))
    raise

2025-03-09 15:13:34,774 - Client_1 - INFO - 
2025-03-09 15:13:34,776 - Client_1 - INFO - Loading model and tokenizer...



Loading model and tokenizer...


2025-03-09 15:13:36,528 - Client_1 - INFO - 
2025-03-09 15:13:36,529 - Client_1 - INFO - Model: gpt2
Parameters: 124439808



Model: gpt2
Parameters: 124439808


In [29]:
# Function to generate answers
def generate_answer(question: str, max_length: int = 100) -> str:
    try:
        prompt = f"Question: {question}\nAnswer:"
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        
        outputs = model.generate(
            inputs["input_ids"],
            max_length=max_length,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            temperature=0.7
        )
        
        return tokenizer.decode(outputs[0], skip_special_tokens=True)
    except Exception as e:
        logger.error(f"Error generating answer: {e}")
        return f"Error generating answer: {str(e)}"

In [30]:
# Function to evaluate model responses
def evaluate_model_responses(phase="Before"):
    log_status(f"{phase.upper()} TRAINING EVALUATION", "Starting model evaluation...")
    responses = {}
    for q, a in zip(test_questions, test_answers):
        response = generate_answer(q)
        responses[q] = {
            'model_response': response,
            'ground_truth': a
        }
        print(f"\nQuestion: {q}")
        print(f"Model Response: {response}")
        print(f"Ground Truth: {a}")
        print("-" * 50)
    return responses

In [31]:
# Tokenize dataset
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=256,
        return_tensors="pt"
    )

log_status("TOKENIZATION", "Tokenizing dataset...")
tokenized_dataset = formatted_dataset.map(
    tokenize_function,
    remove_columns=formatted_dataset.column_names,
    batched=True
)

2025-03-09 15:13:36,588 - Client_1 - INFO - 
2025-03-09 15:13:36,589 - Client_1 - INFO - Tokenizing dataset...



Tokenizing dataset...


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [32]:
# Training arguments
training_args = TrainingArguments(
    output_dir=os.path.join(temp_dir, f"client_{CLIENT_ID}"),
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    warmup_steps=100,
    learning_rate=5e-5,
    fp16=torch.cuda.is_available(),
    logging_steps=10,
    save_strategy="epoch",
    evaluation_strategy="no",
    save_total_limit=2,
    overwrite_output_dir=True,
)



In [33]:
# Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

In [34]:
# Define Flower client
class MedicalFlashcardsClient(fl.client.NumPyClient):
    def __init__(self):
        log_status("CLIENT INITIALIZATION", f"Client {CLIENT_ID} initializing...")
        self.trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=tokenized_dataset,
            data_collator=data_collator,
        )
        log_status("CLIENT READY", f"Client {CLIENT_ID} initialized and ready")
        
    def get_parameters(self, config: Dict[str, str]) -> List[np.ndarray]:
        log_status("PARAMETER RETRIEVAL", f"Client {CLIENT_ID}: Getting parameters")
        return [val.cpu().numpy() for _, val in model.state_dict().items()]
    
    def set_parameters(self, parameters: List[np.ndarray]) -> None:
        log_status("PARAMETER UPDATE", f"Client {CLIENT_ID}: Setting parameters")
        params_dict = zip(model.state_dict().keys(), parameters)
        state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
        model.load_state_dict(state_dict, strict=True)
    
    def fit(
        self, parameters: List[np.ndarray], config: Dict[str, str]
    ) -> Tuple[List[np.ndarray], int, Dict[str, float]]:
        log_status("TRAINING START", f"Client {CLIENT_ID}: Starting training round")
        self.set_parameters(parameters)
        self.trainer.train()
        log_status("TRAINING COMPLETE", f"Client {CLIENT_ID}: Completed training round")
        return self.get_parameters(config), len(tokenized_dataset), {}
    
    def evaluate(
        self, parameters: List[np.ndarray], config: Dict[str, str]
    ) -> Tuple[float, int, Dict[str, float]]:
        log_status("EVALUATION", f"Client {CLIENT_ID}: Evaluating model")
        self.set_parameters(parameters)
        metrics = self.trainer.evaluate()
        return float(metrics["eval_loss"]), len(tokenized_dataset), {"loss": float(metrics["eval_loss"])}


In [35]:
# Save model function
def save_model(path: str = None):
    try:
        if path is None:
            path = os.path.join(temp_dir, f"medical-model-client-{CLIENT_ID}")
        
        Path(path).mkdir(parents=True, exist_ok=True)
        model.save_pretrained(path)
        tokenizer.save_pretrained(path)
        log_status("MODEL SAVED", f"Model saved to {path}")
    except Exception as e:
        log_status("SAVE ERROR", str(e))
        # Try fallback location
        home_dir = os.path.expanduser("~")
        fallback_path = os.path.join(home_dir, f"medical_model_backup_client_{CLIENT_ID}")
        Path(fallback_path).mkdir(parents=True, exist_ok=True)
        model.save_pretrained(fallback_path)
        tokenizer.save_pretrained(fallback_path)
        log_status("FALLBACK SAVE", f"Model saved to fallback location: {fallback_path}")

In [36]:
# Calculate similarity between responses
def calculate_similarity(str1, str2):
    words1 = set(str1.lower().split())
    words2 = set(str2.lower().split())
    overlap = len(words1.intersection(words2))
    union = len(words1.union(words2))
    return overlap / union if union > 0 else 0

# Test before training
log_status("PRE-TRAINING EVALUATION", "Testing model before training...")
before_responses = evaluate_model_responses("Before")

2025-03-09 15:13:36,948 - Client_1 - INFO - 
2025-03-09 15:13:36,948 - Client_1 - INFO - Testing model before training...
2025-03-09 15:13:36,951 - Client_1 - INFO - 
2025-03-09 15:13:36,951 - Client_1 - INFO - Starting model evaluation...
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Testing model before training...

Starting model evaluation...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Question: What is the relationship between very low Mg2+ levels, PTH levels, and Ca2+ levels?
Model Response: Question: What is the relationship between very low Mg2+ levels, PTH levels, and Ca2+ levels?
Answer: The relationship is not clear. The most common explanation is that the higher the M g2 level, the more Ca 2+ is present. However, this is a very difficult question to answer.
The most commonly accepted explanation for the correlation between M G2 and M Th is:
Mg 2 = M + M (M + Th)
This
Ground Truth: Very low Mg2+ levels correspond to low PTH levels which in turn results in low Ca2+ levels.
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Question: Can you describe Gardner syndrome and the conditions that it is associated with?
Model Response: Question: Can you describe Gardner syndrome and the conditions that it is associated with?
Answer: Gardner Syndrome is a condition that occurs when a person is unable to communicate with others. It is characterized by a lack of communication and inability to understand the meaning of words.
Gardner Syndrome can be diagnosed by looking at the eyes, nose, or throat. The symptoms of Gardner are:

A lack or inability of coordination
 (a lack in coordination)
, (A inability or unwillingness
Ground Truth: Gardner syndrome is a medical condition that is associated with the combination of familial adenomatous polyposis, osteomas, and fibromatosis.
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Question: What is Plummer-Vinson syndrome and what are the main symptoms associated with this condition?
Model Response: Question: What is Plummer-Vinson syndrome and what are the main symptoms associated with this condition?
Answer: Plum-vinson is a condition that occurs when the body's immune system is weakened. It is caused by a deficiency of the immune response to the placenta. The plum is the part of your body that is responsible for producing the antibodies that protect your baby from the infection.
Plum vinson can be caused either by the mother's own immune deficiency or
Ground Truth: Plummer-Vinson syndrome is a medical condition characterized by iron deficiency anemia, esophageal webs, and atrophic glossitis. Some common symptoms of this syndrome include difficulty swallowing, mouth sores, and fatigue.
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Question: What is the genetic cause of cystic fibrosis, and which gene and chromosome is affected by this defect?
Model Response: Question: What is the genetic cause of cystic fibrosis, and which gene and chromosome is affected by this defect?
Answer: The genetic causes of the cyst are not known. However, the most common cause is a genetic mutation in the gene for the enzyme that causes the fibroblast cell to grow. The gene is responsible for producing the protein that makes cysts.
The cysteine is an enzyme in cytoplasm that converts cytochrome P450 (CYP
Ground Truth: Cystic fibrosis is caused by a genetic defect in the CFTR gene, which is located on chromosome 7. This gene is responsible for producing a protein that regulates the movement of salt and water in and out of cells. When the CFTR gene is defective, it leads to the production of thick, sticky mucus in the lungs, pancreas, and other organs, which can cause a range of symptoms and complications associated with cystic fibrosis

In [37]:
# Start Flower client
log_status("CONNECTION SETUP", 
         f"Starting Flower client {CLIENT_ID}\n"
         f"Server address: 127.0.0.1:8081")

connection_attempts = 0
max_attempts = 3
retry_delay = 5

while connection_attempts < max_attempts:
    connection_attempts += 1
    try:
        log_status("CONNECTION ATTEMPT", f"Attempt {connection_attempts} of {max_attempts}")
        
        fl.client.start_numpy_client(
            server_address="127.0.0.1:8081",
            client=MedicalFlashcardsClient(),
            transport="grpc-bidi",
            grpc_max_message_length=GRPC_MAX_MESSAGE_LENGTH
        )
        
        log_status("TRAINING SUCCESS", "Client completed all training rounds")
        break
        
    except ConnectionRefusedError:
        log_status("CONNECTION REFUSED", 
                  f"Server not available (attempt {connection_attempts})")
        if connection_attempts < max_attempts:
            logger.info(f"Retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)
    
    except Exception as e:
        log_status("CONNECTION ERROR", 
                  f"Error on attempt {connection_attempts}: {str(e)}")
        if connection_attempts < max_attempts:
            logger.info(f"Retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)

2025-03-09 15:13:43,360 - Client_1 - INFO - 
2025-03-09 15:13:43,361 - Client_1 - INFO - Starting Flower client 1
Server address: 127.0.0.1:8081
2025-03-09 15:13:43,363 - Client_1 - INFO - 
2025-03-09 15:13:43,364 - Client_1 - INFO - Attempt 1 of 3
2025-03-09 15:13:43,366 - Client_1 - INFO - 
2025-03-09 15:13:43,366 - Client_1 - INFO - Client 1 initializing...
2025-03-09 15:13:43,376 - Client_1 - INFO - 
2025-03-09 15:13:43,377 - Client_1 - INFO - Client 1 initialized and ready
	Instead, use `flwr.client.start_client()` by ensuring you first call the `.to_client()` method as shown below: 
	flwr.client.start_client(
		server_address='<IP>:<PORT>',
		client=FlowerClient().to_client(), # <-- where FlowerClient is of type flwr.client.NumPyClient object
	)
	Using `start_numpy_client()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
	Instead, use `flwr.client.start_client()` by ensuring you first call t


Starting Flower client 1
Server address: 127.0.0.1:8081

Attempt 1 of 3

Client 1 initializing...

Client 1 initialized and ready

Client 1: Getting parameters


[92mINFO [0m:      Sent reply
2025-03-09 15:13:44,361 - flwr - INFO - Sent reply
[92mINFO [0m:      
2025-03-09 15:13:58,690 - flwr - INFO - 
[92mINFO [0m:      Received: train message 08acf692-e783-4d4c-8a3d-3c054b443e5b
2025-03-09 15:13:58,699 - flwr - INFO - Received: train message 08acf692-e783-4d4c-8a3d-3c054b443e5b
2025-03-09 15:13:59,005 - Client_1 - INFO - 
2025-03-09 15:13:59,006 - Client_1 - INFO - Client 1: Starting training round
2025-03-09 15:13:59,008 - Client_1 - INFO - 
2025-03-09 15:13:59,008 - Client_1 - INFO - Client 1: Setting parameters



Client 1: Starting training round

Client 1: Setting parameters


  0%|          | 0/186 [00:00<?, ?it/s]

{'loss': 2.8175, 'grad_norm': 10.451602935791016, 'learning_rate': 4.5e-06, 'epoch': 0.16}
{'loss': 2.6761, 'grad_norm': 8.25395679473877, 'learning_rate': 9.5e-06, 'epoch': 0.32}
{'loss': 2.4606, 'grad_norm': 7.498008728027344, 'learning_rate': 1.45e-05, 'epoch': 0.48}
{'loss': 2.2761, 'grad_norm': 6.108491897583008, 'learning_rate': 1.9500000000000003e-05, 'epoch': 0.64}
{'loss': 2.1735, 'grad_norm': 5.146684646606445, 'learning_rate': 2.45e-05, 'epoch': 0.8}
{'loss': 2.143, 'grad_norm': 4.9028167724609375, 'learning_rate': 2.95e-05, 'epoch': 0.96}
{'loss': 2.0158, 'grad_norm': 4.702527046203613, 'learning_rate': 3.45e-05, 'epoch': 1.12}
{'loss': 1.929, 'grad_norm': 4.356714725494385, 'learning_rate': 3.9500000000000005e-05, 'epoch': 1.28}
{'loss': 1.9331, 'grad_norm': 6.24608039855957, 'learning_rate': 4.4500000000000004e-05, 'epoch': 1.44}
{'loss': 1.8212, 'grad_norm': 5.034273624420166, 'learning_rate': 4.9500000000000004e-05, 'epoch': 1.6}
{'loss': 1.8024, 'grad_norm': 5.90068721

2025-03-09 15:21:12,765 - Client_1 - INFO - 
2025-03-09 15:21:12,765 - Client_1 - INFO - Client 1: Completed training round
2025-03-09 15:21:12,767 - Client_1 - INFO - 
2025-03-09 15:21:12,768 - Client_1 - INFO - Client 1: Getting parameters


{'train_runtime': 432.6436, 'train_samples_per_second': 6.934, 'train_steps_per_second': 0.43, 'train_loss': 1.957088065403764, 'epoch': 2.98}

Client 1: Completed training round

Client 1: Getting parameters


[92mINFO [0m:      Sent reply
2025-03-09 15:21:14,610 - flwr - INFO - Sent reply
[92mINFO [0m:      
2025-03-09 15:21:25,013 - flwr - INFO - 
[92mINFO [0m:      Received: evaluate message b40c8516-cdd4-47ae-be18-81a4f7ae5694
2025-03-09 15:21:25,015 - flwr - INFO - Received: evaluate message b40c8516-cdd4-47ae-be18-81a4f7ae5694
2025-03-09 15:21:25,218 - Client_1 - INFO - 
2025-03-09 15:21:25,219 - Client_1 - INFO - Client 1: Evaluating model
2025-03-09 15:21:25,221 - Client_1 - INFO - 
2025-03-09 15:21:25,223 - Client_1 - INFO - Client 1: Setting parameters



Client 1: Evaluating model

Client 1: Setting parameters


[91mERROR [0m:     Client raised an exception.
Traceback (most recent call last):
  File "c:\Users\Nil Atabey\anaconda3\Lib\site-packages\flwr\client\app.py", line 570, in start_client_internal
    reply_message = client_app(message=message, context=context)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nil Atabey\anaconda3\Lib\site-packages\flwr\client\client_app.py", line 143, in __call__
    return self._call(message, context)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nil Atabey\anaconda3\Lib\site-packages\flwr\client\client_app.py", line 126, in ffn
    out_message = handle_legacy_message_from_msgtype(
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nil Atabey\anaconda3\Lib\site-packages\flwr\client\message_handler\message_handler.py", line 135, in handle_legacy_message_from_msgtype
    evaluate_res = maybe_call_evaluate(
                   ^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nil Atabey\anaconda3\Lib\site


Error on attempt 1: Trainer: evaluation requires an eval_dataset.


2025-03-09 15:21:30,906 - Client_1 - INFO - 
2025-03-09 15:21:30,909 - Client_1 - INFO - Attempt 2 of 3
2025-03-09 15:21:30,913 - Client_1 - INFO - 
2025-03-09 15:21:30,914 - Client_1 - INFO - Client 1 initializing...
2025-03-09 15:21:30,931 - Client_1 - INFO - 
2025-03-09 15:21:30,932 - Client_1 - INFO - Client 1 initialized and ready
	Instead, use `flwr.client.start_client()` by ensuring you first call the `.to_client()` method as shown below: 
	flwr.client.start_client(
		server_address='<IP>:<PORT>',
		client=FlowerClient().to_client(), # <-- where FlowerClient is of type flwr.client.NumPyClient object
	)
	Using `start_numpy_client()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
	Instead, use `flwr.client.start_client()` by ensuring you first call the `.to_client()` method as shown below: 
	flwr.client.start_client(
		server_address='<IP>:<PORT>',
		client=FlowerClient().to_client(), # <-- w


Attempt 2 of 3

Client 1 initializing...

Client 1 initialized and ready


[92mINFO [0m:      
2025-03-09 15:21:35,007 - flwr - INFO - 
[92mINFO [0m:      Received: train message 6c51b9d5-10a7-41c0-8b29-7d27c3a0b418
2025-03-09 15:21:35,009 - flwr - INFO - Received: train message 6c51b9d5-10a7-41c0-8b29-7d27c3a0b418
2025-03-09 15:21:35,235 - Client_1 - INFO - 
2025-03-09 15:21:35,236 - Client_1 - INFO - Client 1: Starting training round
2025-03-09 15:21:35,238 - Client_1 - INFO - 
2025-03-09 15:21:35,238 - Client_1 - INFO - Client 1: Setting parameters



Client 1: Starting training round

Client 1: Setting parameters


  0%|          | 0/186 [00:00<?, ?it/s]

{'loss': 1.5404, 'grad_norm': 5.486627578735352, 'learning_rate': 5e-06, 'epoch': 0.16}
{'loss': 1.5069, 'grad_norm': 5.991148948669434, 'learning_rate': 9.5e-06, 'epoch': 0.32}
{'loss': 1.5088, 'grad_norm': 5.791640281677246, 'learning_rate': 1.45e-05, 'epoch': 0.48}
{'loss': 1.4613, 'grad_norm': 6.073049545288086, 'learning_rate': 1.9500000000000003e-05, 'epoch': 0.64}
{'loss': 1.4368, 'grad_norm': 5.626175403594971, 'learning_rate': 2.45e-05, 'epoch': 0.8}
{'loss': 1.445, 'grad_norm': 5.616399765014648, 'learning_rate': 2.95e-05, 'epoch': 0.96}
{'loss': 1.3616, 'grad_norm': 5.085625648498535, 'learning_rate': 3.45e-05, 'epoch': 1.12}
{'loss': 1.342, 'grad_norm': 4.81468391418457, 'learning_rate': 3.9500000000000005e-05, 'epoch': 1.28}
{'loss': 1.3702, 'grad_norm': 6.492756366729736, 'learning_rate': 4.4500000000000004e-05, 'epoch': 1.44}
{'loss': 1.2872, 'grad_norm': 5.163137912750244, 'learning_rate': 4.9500000000000004e-05, 'epoch': 1.6}
{'loss': 1.3047, 'grad_norm': 5.64082956314

2025-03-09 15:25:45,122 - Client_1 - INFO - 
2025-03-09 15:25:45,123 - Client_1 - INFO - Client 1: Completed training round
2025-03-09 15:25:45,125 - Client_1 - INFO - 
2025-03-09 15:25:45,125 - Client_1 - INFO - Client 1: Getting parameters


{'train_runtime': 249.0954, 'train_samples_per_second': 12.044, 'train_steps_per_second': 0.747, 'train_loss': 1.342599727774179, 'epoch': 2.98}

Client 1: Completed training round

Client 1: Getting parameters


[92mINFO [0m:      Sent reply
2025-03-09 15:25:46,821 - flwr - INFO - Sent reply
[92mINFO [0m:      
2025-03-09 15:25:55,233 - flwr - INFO - 
[92mINFO [0m:      Received: evaluate message 51b40eb6-7bc3-408e-8eb2-7d2237004769
2025-03-09 15:25:55,241 - flwr - INFO - Received: evaluate message 51b40eb6-7bc3-408e-8eb2-7d2237004769
2025-03-09 15:25:55,439 - Client_1 - INFO - 
2025-03-09 15:25:55,440 - Client_1 - INFO - Client 1: Evaluating model
2025-03-09 15:25:55,441 - Client_1 - INFO - 
2025-03-09 15:25:55,441 - Client_1 - INFO - Client 1: Setting parameters



Client 1: Evaluating model

Client 1: Setting parameters


[91mERROR [0m:     Client raised an exception.
Traceback (most recent call last):
  File "c:\Users\Nil Atabey\anaconda3\Lib\site-packages\flwr\client\app.py", line 570, in start_client_internal
    reply_message = client_app(message=message, context=context)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nil Atabey\anaconda3\Lib\site-packages\flwr\client\client_app.py", line 143, in __call__
    return self._call(message, context)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nil Atabey\anaconda3\Lib\site-packages\flwr\client\client_app.py", line 126, in ffn
    out_message = handle_legacy_message_from_msgtype(
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nil Atabey\anaconda3\Lib\site-packages\flwr\client\message_handler\message_handler.py", line 135, in handle_legacy_message_from_msgtype
    evaluate_res = maybe_call_evaluate(
                   ^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nil Atabey\anaconda3\Lib\site


Error on attempt 2: Trainer: evaluation requires an eval_dataset.


2025-03-09 15:26:00,966 - Client_1 - INFO - 
2025-03-09 15:26:00,967 - Client_1 - INFO - Attempt 3 of 3
2025-03-09 15:26:00,968 - Client_1 - INFO - 
2025-03-09 15:26:00,970 - Client_1 - INFO - Client 1 initializing...
2025-03-09 15:26:01,000 - Client_1 - INFO - 
2025-03-09 15:26:01,001 - Client_1 - INFO - Client 1 initialized and ready
	Instead, use `flwr.client.start_client()` by ensuring you first call the `.to_client()` method as shown below: 
	flwr.client.start_client(
		server_address='<IP>:<PORT>',
		client=FlowerClient().to_client(), # <-- where FlowerClient is of type flwr.client.NumPyClient object
	)
	Using `start_numpy_client()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
	Instead, use `flwr.client.start_client()` by ensuring you first call the `.to_client()` method as shown below: 
	flwr.client.start_client(
		server_address='<IP>:<PORT>',
		client=FlowerClient().to_client(), # <-- w


Attempt 3 of 3

Client 1 initializing...

Client 1 initialized and ready


[92mINFO [0m:      
2025-03-09 15:26:14,574 - flwr - INFO - 
[92mINFO [0m:      Received: train message 6981a5b2-f69a-441d-8f50-c9b11c318974
2025-03-09 15:26:14,574 - flwr - INFO - Received: train message 6981a5b2-f69a-441d-8f50-c9b11c318974
2025-03-09 15:26:14,785 - Client_1 - INFO - 
2025-03-09 15:26:14,785 - Client_1 - INFO - Client 1: Starting training round
2025-03-09 15:26:14,787 - Client_1 - INFO - 
2025-03-09 15:26:14,788 - Client_1 - INFO - Client 1: Setting parameters



Client 1: Starting training round

Client 1: Setting parameters


  0%|          | 0/186 [00:00<?, ?it/s]

{'loss': 1.1948, 'grad_norm': 5.094603538513184, 'learning_rate': 5e-06, 'epoch': 0.16}
{'loss': 1.1588, 'grad_norm': 5.524725437164307, 'learning_rate': 1e-05, 'epoch': 0.32}
{'loss': 1.1458, 'grad_norm': 5.313349723815918, 'learning_rate': 1.45e-05, 'epoch': 0.48}
{'loss': 1.0994, 'grad_norm': 5.417939186096191, 'learning_rate': 1.9500000000000003e-05, 'epoch': 0.64}
{'loss': 1.0599, 'grad_norm': 4.723526954650879, 'learning_rate': 2.45e-05, 'epoch': 0.8}
{'loss': 1.0571, 'grad_norm': 4.8763017654418945, 'learning_rate': 2.95e-05, 'epoch': 0.96}
{'loss': 0.9626, 'grad_norm': 4.279062271118164, 'learning_rate': 3.45e-05, 'epoch': 1.12}
{'loss': 0.952, 'grad_norm': 4.070964813232422, 'learning_rate': 3.9500000000000005e-05, 'epoch': 1.28}
{'loss': 0.9731, 'grad_norm': 5.400766849517822, 'learning_rate': 4.4500000000000004e-05, 'epoch': 1.44}
{'loss': 0.8878, 'grad_norm': 4.302569389343262, 'learning_rate': 4.9500000000000004e-05, 'epoch': 1.6}
{'loss': 0.9198, 'grad_norm': 5.0142145156

2025-03-09 15:30:20,608 - Client_1 - INFO - 
2025-03-09 15:30:20,608 - Client_1 - INFO - Client 1: Completed training round
2025-03-09 15:30:20,610 - Client_1 - INFO - 
2025-03-09 15:30:20,611 - Client_1 - INFO - Client 1: Getting parameters


{'train_runtime': 244.7678, 'train_samples_per_second': 12.257, 'train_steps_per_second': 0.76, 'train_loss': 0.9999844335740612, 'epoch': 2.98}

Client 1: Completed training round

Client 1: Getting parameters


[92mINFO [0m:      Sent reply
2025-03-09 15:30:22,351 - flwr - INFO - Sent reply
[92mINFO [0m:      
2025-03-09 15:30:37,919 - flwr - INFO - 
[92mINFO [0m:      Received: evaluate message 6bab8ccc-e495-4b15-9ecf-efb6af67f242
2025-03-09 15:30:37,920 - flwr - INFO - Received: evaluate message 6bab8ccc-e495-4b15-9ecf-efb6af67f242
2025-03-09 15:30:38,132 - Client_1 - INFO - 
2025-03-09 15:30:38,132 - Client_1 - INFO - Client 1: Evaluating model
2025-03-09 15:30:38,134 - Client_1 - INFO - 
2025-03-09 15:30:38,134 - Client_1 - INFO - Client 1: Setting parameters



Client 1: Evaluating model

Client 1: Setting parameters


[91mERROR [0m:     Client raised an exception.
Traceback (most recent call last):
  File "c:\Users\Nil Atabey\anaconda3\Lib\site-packages\flwr\client\app.py", line 570, in start_client_internal
    reply_message = client_app(message=message, context=context)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nil Atabey\anaconda3\Lib\site-packages\flwr\client\client_app.py", line 143, in __call__
    return self._call(message, context)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nil Atabey\anaconda3\Lib\site-packages\flwr\client\client_app.py", line 126, in ffn
    out_message = handle_legacy_message_from_msgtype(
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nil Atabey\anaconda3\Lib\site-packages\flwr\client\message_handler\message_handler.py", line 135, in handle_legacy_message_from_msgtype
    evaluate_res = maybe_call_evaluate(
                   ^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nil Atabey\anaconda3\Lib\site


Error on attempt 3: Trainer: evaluation requires an eval_dataset.


In [41]:
# Test after training
log_status("POST-TRAINING EVALUATION", "Testing model after training...")
after_responses = evaluate_model_responses("After")

# Compare responses
comparison_data = {
    "client_id": CLIENT_ID,
    "timestamp": datetime.now().isoformat(),
    "connection_info": {
        "attempts": connection_attempts,
        "max_attempts": max_attempts,
        "status": "success" if connection_attempts < max_attempts else "failed"
    },
    "comparisons": []
}

log_status("RESULTS COMPARISON", "Analyzing before/after performance")
before_similarities = []
after_similarities = []

for q, a in zip(test_questions, test_answers):
    before_sim = calculate_similarity(a, before_responses[q]['model_response'])
    after_sim = calculate_similarity(a, after_responses[q]['model_response'])
    before_similarities.append(before_sim)
    after_similarities.append(after_sim)
    
    comparison = {
        "question": q,
        "ground_truth": a,
        "before": before_responses[q]['model_response'],
        "after": after_responses[q]['model_response'],
        "similarity_before": before_sim,
        "similarity_after": after_sim,
        "improvement": after_sim - before_sim
    }
    comparison_data["comparisons"].append(comparison)
    
    print(f"Improvement: {(after_sim - before_sim) * 100:.1f}%")

2025-03-09 15:32:23,944 - Client_1 - INFO - 
2025-03-09 15:32:23,946 - Client_1 - INFO - Testing model after training...
2025-03-09 15:32:23,947 - Client_1 - INFO - 
2025-03-09 15:32:23,948 - Client_1 - INFO - Starting model evaluation...
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Testing model after training...

Starting model evaluation...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Question: What is the relationship between very low Mg2+ levels, PTH levels, and Ca2+ levels?
Model Response: Question: What is the relationship between very low Mg2+ levels, PTH levels, and Ca2+ levels?
Answer: Very low Pthal levels are associated with very high Ca+ level levels.


Question, what is CaII+ and what are the two types of Ca-II levels that are commonly associated in relation to very Low MG2+.
A: CaI+ is a measurement of how high the levels of calcium in the body. This is often associated
Ground Truth: Very low Mg2+ levels correspond to low PTH levels which in turn results in low Ca2+ levels.
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Question: Can you describe Gardner syndrome and the conditions that it is associated with?
Model Response: Question: Can you describe Gardner syndrome and the conditions that it is associated with?
Answer: Gardner Syndrome is a condition characterized by the presence of a corpus callosum, which is located in the caudate nucleus of the neck. This condition is characterized as having a "mottled-out" appearance, with the corpus luteum located at the base of this neck region.


Question 5: What is the name of Gardner disease, and what is its association with Huntington's
Ground Truth: Gardner syndrome is a medical condition that is associated with the combination of familial adenomatous polyposis, osteomas, and fibromatosis.
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Question: What is Plummer-Vinson syndrome and what are the main symptoms associated with this condition?
Model Response: Question: What is Plummer-Vinson syndrome and what are the main symptoms associated with this condition?
Answer: Plumpy-Walker syndrome is a condition characterized by the presence of a triad of symptoms: acute (acute), chronic (coagulary), and/or delayed (intermediate). These symptoms can occur in various stages of the disease, and can range from mild to severe. The severity of these symptoms is not always known, but it is possible to have multiple episodes
Ground Truth: Plummer-Vinson syndrome is a medical condition characterized by iron deficiency anemia, esophageal webs, and atrophic glossitis. Some common symptoms of this syndrome include difficulty swallowing, mouth sores, and fatigue.
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Question: What is the genetic cause of cystic fibrosis, and which gene and chromosome is affected by this defect?
Model Response: Question: What is the genetic cause of cystic fibrosis, and which gene and chromosome is affected by this defect?
Answer: Cystic fibrillation (fibrils that form a chain of DNA defects) is caused by a defect in the CFTR gene on chromosome 5q. This gene is involved in regulating cell growth and division, which results in a range of symptoms including cystitis, malabsorption, pancreatic cysts, lung damage, liver damage and rupture, among
Ground Truth: Cystic fibrosis is caused by a genetic defect in the CFTR gene, which is located on chromosome 7. This gene is responsible for producing a protein that regulates the movement of salt and water in and out of cells. When the CFTR gene is defective, it leads to the production of thick, sticky mucus in the lungs, pancreas, and other organs, which can cause a range of symptoms and complications associated with cystic 

2025-03-09 15:32:34,154 - Client_1 - INFO - 
2025-03-09 15:32:34,155 - Client_1 - INFO - Analyzing before/after performance



Question: What is the relationship between upper motoneuron lesions and paralysis?
Model Response: Question: What is the relationship between upper motoneuron lesions and paralysis?
Answer: Upper motonesurs are associated with paralysis due to the presence of a thickening of the corpus callosum.


Question is: How is upper motor neuron paralysis caused?

Answer, Upper motor neurons are caused by the degeneration of dopaminergic neurons in the substantia nigra of each muscle of an elephant's body. This degenerated neuron leads to a loss of sensation and movement in both
Ground Truth: Upper motoneuron lesions may cause spastic paralysis.
--------------------------------------------------

Analyzing before/after performance
Improvement: 7.5%
Improvement: 0.7%
Improvement: -0.2%
Improvement: 0.6%
Improvement: -1.9%


In [42]:
# Calculate overall metrics
avg_improvement = np.mean(np.array(after_similarities) - np.array(before_similarities))
comparison_data["metrics"] = {
    "average_similarity_before": float(np.mean(before_similarities)),
    "average_similarity_after": float(np.mean(after_similarities)),
    "average_improvement": float(avg_improvement),
    "max_improvement": float(np.max(np.array(after_similarities) - np.array(before_similarities))),
    "min_improvement": float(np.min(np.array(after_similarities) - np.array(before_similarities)))
}

In [44]:
# Save results
results_path = os.path.join(temp_dir, f"client_{CLIENT_ID}_results.json")
with open(results_path, "w") as f:
    json.dump(comparison_data, f, indent=2)

log_status("FINAL STATISTICS", 
         f"Client {CLIENT_ID} Training Summary\n"
         f"Total examples: {len(small_dataset)}\n"
         f"Device: {device}\n"
         f"Model parameters: {sum(p.numel() for p in model.parameters())}\n"
         f"Average improvement: {avg_improvement * 100:.1f}%\n"
         f"Results saved: {results_path}")

# Save final model
save_model()

2025-03-09 15:33:58,811 - Client_1 - INFO - 
2025-03-09 15:33:58,812 - Client_1 - INFO - Client 1 Training Summary
Total examples: 1000
Device: cuda
Model parameters: 124439808
Average improvement: 1.3%
Results saved: C:\Users\NILATA~1\AppData\Local\Temp\tmp9bgt6qn4\client_1_results.json



Client 1 Training Summary
Total examples: 1000
Device: cuda
Model parameters: 124439808
Average improvement: 1.3%
Results saved: C:\Users\NILATA~1\AppData\Local\Temp\tmp9bgt6qn4\client_1_results.json


2025-03-09 15:33:59,816 - Client_1 - INFO - 
2025-03-09 15:33:59,816 - Client_1 - INFO - Model saved to C:\Users\NILATA~1\AppData\Local\Temp\tmp9bgt6qn4\medical-model-client-1



Model saved to C:\Users\NILATA~1\AppData\Local\Temp\tmp9bgt6qn4\medical-model-client-1
