In [1]:
from typing import Callable, Dict, Tuple
from dotenv import load_dotenv
import boto3
import os 
import time
import json
import web3
import torch
import numpy as np
import hashlib
import io

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
abi = json.load(open('contract/build/contracts/FugaController.json', 'r'))['abi']
S3_ACCESS_KEY = os.environ.get('S3_ACCESS_KEY')
S3_SCRETE_KEY = os.environ.get('S3_SCRETE_KEY')
BUCKET_NAME = 'fugaeth'

In [3]:
def s3_connection():
    try:
        s3 = boto3.client(
            service_name="s3",
            region_name="ap-northeast-2",
            aws_access_key_id=S3_ACCESS_KEY,
            aws_secret_access_key=S3_SCRETE_KEY
        )
    except Exception as e:
        print(e)
    else:
        print("s3 bucket connected!")
        return s3

def listen_for_event(contract, event_name):
    # create a filter to listen for the specified event
    event_filter = contract.events[event_name].createFilter(fromBlock='latest')

    while True:
        # check if any new events have been emitted
        for event in event_filter.get_new_entries():
            # if the specified event has been emitted, return its message
            if event.event == event_name:
                yield event.args
                return
        # wait for new events
        time.sleep(60)

def web3_connection(contract) -> Tuple[Callable[[], Dict]]:
    web3_message_iterator = listen_for_event(contract, "ServerMessage")

    receive: Callable[[], Dict] = lambda: next(web3_message_iterator)

    return receive



In [20]:
s3 = s3_connection()

s3 bucket connected!


In [6]:
s3.upload_file("s3uploadtest.txt",BUCKET_NAME, "test")

In [7]:
s3.download_file(BUCKET_NAME, "test", "s3downloadtest.txt")

In [13]:
contract_address = '0xA4A0cF0c1BF970A74Bb0453f94faA3fbf7f2585A'

w3 = web3.Web3(web3.HTTPProvider("http://localhost:7545"))
contract = w3.eth.contract(address=contract_address, abi=abi)

In [16]:
receive = web3_connection(contract)

In [17]:
receive()

AttributeDict({'field': 'test'})

In [4]:
def make_hash(params):
    # Concatenate all arrays in the list of parameters
    concatenated_array = np.concatenate([param.flatten() for param in params])

    # Convert the concatenated array to bytes and calculate the hash
    hash_bytes = hashlib.sha256(concatenated_array.tobytes()).digest()

    # Convert the hash to a string and return it
    return hash_bytes.hex()

# def upload_model(parameters_prime):
#     model_hash = make_hash(parameters_prime)

#     # specify the bucket name and object key
#     object_key = f'models/{model_hash}.pth'
#     file_path = object_key

#     # save the model to the local storage
#     torch.save(parameters_prime, file_path)

#     # upload the file to S3
#     s3.upload_file(file_path,BUCKET_NAME, object_key)

def upload_model(parameters_prime):
    model_hash = make_hash(parameters_prime)

    buffer = io.BytesIO()
    torch.save(parameters_prime, buffer)
    serialized_model = buffer.getvalue()
    with open(f'./models/{model_hash}.bin', 'wb') as f:
        f.write(serialized_model)
    # specify the bucket name and object key
    object_key = f'models/{model_hash}.bin'

    # upload the file to S3
    with open(f'./models/{model_hash}.bin', 'rb') as f:
        s3.upload_fileobj(f, BUCKET_NAME, object_key)

# def aggregate_fit(client, model_hashes, num_samples, scores, config):

#     params = []
#     for model_hash in model_hashes:
#         with open(f'./models/{model_hash}.bin', 'rb') as f:
#             params.append(f.read())
        
#     # Normalize the evaluation scores
#     sum_scores = sum(scores)
#     norm_scores = [score / sum_scores for score in scores]

#     # Combine normalized evaluation scores with the dataset portion
#     sum_samples = sum(num_samples)
#     combined_weights = [norm_score * (num_sample / sum_samples) for norm_score, num_sample in zip(norm_scores, num_samples)]

#     # Calculate the weighted model updates
#     weighted_updates = [np.multiply(w, update) for w, update in zip(combined_weights, params)]
#     new_params = [sum(updates) for updates in zip(*weighted_updates)]

    
#     return client.fit(new_params,config)

def aggregate_fit(client, model_hashes, num_samples, scores, config):

    params = []
    for model_hash in model_hashes:
        with open(f'./models/{model_hash}.bin', 'rb') as f:
            buffer = io.BytesIO(f.read())
            param = torch.load(buffer)
            params.append(param) 

        
    # Normalize the evaluation scores
    sum_scores = sum(scores)
    norm_scores = [score / sum_scores for score in scores]

    # Combine normalized evaluation scores with the dataset portion
    sum_samples = sum(num_samples)
    combined_weights = [norm_score * (num_sample / sum_samples) for norm_score, num_sample in zip(norm_scores, num_samples)]

    # Calculate the weighted model updates
    weighted_updates = [np.multiply(w, update) for w, update in zip(combined_weights, params)]
    new_params = [sum(updates) for updates in zip(*weighted_updates)]

    
    return client.fit(new_params,config)    


def start_web3_client(client, contract_address,abi):

    while True:
        s3 = s3_connection()
        w3 = web3.Web3(web3.HTTPProvider("http://localhost:7545"))
        contract = w3.eth.contract(address=contract_address, abi=abi)
        receive = web3_connection(contract)
        server_message = receive()
        print(server_message)
        if server_message['field'] == 'FitIns':
            response = contract.functions.FitIns().call()
            print(response)

            model_hashes = response[0]
            num_samples =response[1]
            scores = response[2]
            batch_size = response[3]
            local_epochs = response[4]
            config = {'batch_size': batch_size, 'local_epochs': local_epochs}

            for model_hash in model_hashes:
                object_key = f'models/{model_hash}.bin'
                file_path = object_key

                # download the file from S3
                s3.download_file(BUCKET_NAME, object_key, file_path)
            
        parameters_prime, num_examples_train, results = aggregate_fit2(client, model_hashes, num_samples, scores, config)
        print(parameters_prime, num_examples_train, results)
        upload_model2(parameters_prime)


In [6]:
import random
from collections import OrderedDict

import flwr as fl
import torch
from datasets import load_dataset, load_metric
from torch.utils.data import DataLoader
from transformers import (
    AdamW,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def load_data():
    """Load IMDB data (training and eval)"""
    raw_datasets = load_dataset("imdb")
    raw_datasets = raw_datasets.shuffle(seed=42)

    # remove unnecessary data split
    del raw_datasets["unsupervised"]

    tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")

    # random 10 samples
    population = random.sample(range(len(raw_datasets["train"])), 10)

    tokenized_datasets = raw_datasets.map(
        lambda examples: tokenizer(examples["text"], truncation=True), batched=True
    )
    tokenized_datasets["train"] = tokenized_datasets["train"].select(population)
    tokenized_datasets["test"] = tokenized_datasets["test"].select(population)

    tokenized_datasets = tokenized_datasets.remove_columns("text")
    tokenized_datasets = tokenized_datasets.rename_column("label", "labels")

    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
    trainloader = DataLoader(
        tokenized_datasets["train"],
        shuffle=True,
        batch_size=32,
        collate_fn=data_collator,
    )

    testloader = DataLoader(
        tokenized_datasets["test"], batch_size=32, collate_fn=data_collator
    )

    return trainloader, testloader


In [12]:

def train(net, trainloader, epochs):
    optimizer = AdamW(net.parameters(), lr=5e-5)
    net.train()
    for _ in range(epochs):
        for batch in trainloader:
            batch = {k: v.to(DEVICE) for k, v in batch.items()}
            outputs = net(**batch)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()


def test(net, testloader):
    metric = load_metric("accuracy")
    loss = 0
    net.eval()
    for batch in testloader:
        batch = {k: v.to(DEVICE) for k, v in batch.items()}
        with torch.no_grad():
            outputs = net(**batch)
        logits = outputs.logits
        loss += outputs.loss.item()
        predictions = torch.argmax(logits, dim=-1)
        metric.add_batch(predictions=predictions, references=batch["labels"])
    loss /= len(testloader.dataset)
    accuracy = metric.compute()["accuracy"]
    return loss, accuracy




In [7]:
net = AutoModelForSequenceClassification.from_pretrained(
    "albert-base-v2", num_labels=2
).to(DEVICE)

trainloader, testloader = load_data()

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.decoder.weight', 'predictions.dense.bias', 'predictions.dense.weight', 'predictions.bias', 'predictions.decoder.bias', 'predictions.LayerNorm.bias', 'predictions.LayerNorm.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You sho

In [9]:
class IMDBClient(fl.client.NumPyClient):
    def get_parameters(self, config):
        return [val.cpu().numpy() for _, val in net.state_dict().items()]

    def set_parameters(self, parameters):
        params_dict = zip(net.state_dict().keys(), parameters)
        state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
        net.load_state_dict(state_dict, strict=True)

    def fit(self, parameters, config):
        self.set_parameters(parameters)
        print("Training Started...")
        train(net, trainloader, epochs=1)
        print("Training Finished.")
        return self.get_parameters(config={}), len(trainloader), {}

    def evaluate(self, parameters, config):
        self.set_parameters(parameters)
        loss, accuracy = test(net, testloader)
        return float(loss), len(testloader), {"accuracy": float(accuracy)}

In [10]:
client = IMDBClient()

In [14]:
parameters_prime, num_examples_train, results = client.fit(parameters=client.get_parameters(config={}), config={})

Training Started...
Training Finished.


In [17]:
upload_model(parameters_prime)

In [15]:
model_hash = make_hash(parameters_prime)
batch_size = 32
local_epochs = 1
config = {'batch_size': batch_size, 'local_epochs': local_epochs}

buffer = io.BytesIO()
torch.save(parameters_prime, buffer)
serialized_model = buffer.getvalue()
with open(f'./models/{model_hash}.bin', 'wb') as f:
    f.write(serialized_model)

with open(f'./models/{model_hash}.bin', 'rb') as f:
    buffer = io.BytesIO(f.read())
    param = torch.load(buffer)
    print(make_hash(param)==model_hash)
    print(client.evaluate(param, config))

True


  from ipykernel import kernelapp as app


(0.08246778249740601, 1, {'accuracy': 0.4})


In [18]:
import io
def upload_model2(parameters_prime):
    model_hash = make_hash(parameters_prime)

    buffer = io.BytesIO()
    torch.save(parameters_prime, buffer)
    serialized_model = buffer.getvalue()
    with open(f'./models/{model_hash}.bin', 'wb') as f:
        f.write(serialized_model)
    # specify the bucket name and object key
    object_key = f'models/{model_hash}.bin'

    # upload the file to S3
    with open(f'./models/{model_hash}.bin', 'rb') as f:
        s3.upload_fileobj(f, BUCKET_NAME, object_key)


In [26]:
upload_model2(parameters_prime)

In [42]:
def aggregate_fit2(client, model_hashes, num_samples, scores, config):

    params = []
    for model_hash in model_hashes:
        with open(f'./models/{model_hash}.bin', 'rb') as f:
            buffer = io.BytesIO(f.read())
            param = torch.load(buffer)
            params.append(param) 

        
    # Normalize the evaluation scores
    sum_scores = sum(scores)
    norm_scores = [score / sum_scores for score in scores]

    # Combine normalized evaluation scores with the dataset portion
    sum_samples = sum(num_samples)
    combined_weights = [norm_score * (num_sample / sum_samples) for norm_score, num_sample in zip(norm_scores, num_samples)]

    # Calculate the weighted model updates
    weighted_updates = [np.multiply(w, update) for w, update in zip(combined_weights, params)]
    new_params = [sum(updates) for updates in zip(*weighted_updates)]

    
    return client.fit(new_params,config)    

In [41]:
model_hashes = ['440dd98d6875144eace9694a56c57adc521efa464795f657f865b3288b6a7b09','8288d0fe3fb314d9430fdac3e1c5ccdc365cfca6ae24e2fb32acc8d14a2812d6','ca952603775ea03d204e95410aa14be3b5b638153ff1a413755e0b2693380ce6']
num_samples = [10,10,10]
scores = [0.5,0.5,0.5]
batch_size = 32
local_epochs = 1
config = {'batch_size': batch_size, 'local_epochs': local_epochs}
parameters_prime,_,_ = aggregate_fit2(client, model_hashes, num_samples, scores, config)
upload_model2(parameters_prime)



Training Started...
Training Finished.


In [43]:
for model_hash in model_hashes+['5f0dcabe2e86f97fca8e115c06f24c82485a1a950897ca94bc0c50c2c6828fbc']:
    with open(f'./models/{model_hash}.bin', 'rb') as f:
        buffer = io.BytesIO(f.read())
        param = torch.load(buffer)
        print(client.evaluate(param, config))

  from ipykernel import kernelapp as app
Downloading builder script: 4.21kB [00:00, 841kB/s]                    


(0.07238720655441284, 1, {'accuracy': 0.5})
(0.06527422666549683, 1, {'accuracy': 0.6})
(0.0680972158908844, 1, {'accuracy': 0.6})
(0.06841739416122436, 1, {'accuracy': 0.6})


In [46]:
start_web3_client(client, '0x58F8B273dfd1Cbed08014dF85D71C0031Faf58c5', abi)

s3 bucket connected!
AttributeDict({'field': 'FitIns'})
[['440dd98d6875144eace9694a56c57adc521efa464795f657f865b3288b6a7b09', '8288d0fe3fb314d9430fdac3e1c5ccdc365cfca6ae24e2fb32acc8d14a2812d6', 'ca952603775ea03d204e95410aa14be3b5b638153ff1a413755e0b2693380ce6'], [100, 100, 100], [100, 100, 100], 32, 1]




Training Started...
Training Finished.
[array([[  0,   0,   0,   1,   1,   1,   2,   2,   2,   3,   3,   3,   4,
          4,   4,   5,   5,   5,   6,   6,   6,   7,   7,   7,   8,   8,
          8,   9,   9,   9,  10,  10,  10,  11,  11,  11,  12,  12,  12,
         13,  13,  13,  14,  14,  14,  15,  15,  15,  16,  16,  16,  17,
         17,  17,  18,  18,  18,  19,  19,  19,  20,  20,  20,  21,  21,
         21,  22,  22,  22,  23,  23,  23,  24,  24,  24,  25,  25,  25,
         26,  26,  26,  27,  27,  27,  28,  28,  28,  29,  29,  29,  30,
         30,  30,  31,  31,  31,  32,  32,  32,  33,  33,  33,  34,  34,
         34,  35,  35,  35,  36,  36,  36,  37,  37,  37,  38,  38,  38,
         39,  39,  39,  40,  40,  40,  41,  41,  41,  42,  42,  42,  43,
         43,  43,  44,  44,  44,  45,  45,  45,  46,  46,  46,  47,  47,
         47,  48,  48,  48,  49,  49,  49,  50,  50,  50,  51,  51,  51,
         52,  52,  52,  53,  53,  53,  54,  54,  54,  55,  55,  55,  56,
         56