# Homo NN

## 資料品質檢查

### 設定資料路徑 & 參數

In [1]:
import os
guest, host = 9999, 10000
data_base = "/data/projects/fate/"

dense_data = {"name": "breast_homo_guest", "namespace": f"experiment"}
dense_data_dir = os.path.join(data_base, "persistence/data/breast_homo_guest.csv")

### 缺失值 & 欄位名

In [2]:
import pandas as pd
dense_df = pd.read_csv(dense_data_dir)
print(dense_df.isna().sum())
print(dense_df.head(5))

id        0
target    0
x0        0
x1        0
x2        0
x3        0
x4        0
x5        0
x6        0
x7        0
x8        0
x9        0
x10       0
x11       0
x12       0
x13       0
x14       0
x15       0
x16       0
x17       0
x18       0
x19       0
x20       0
x21       0
x22       0
x23       0
x24       0
x25       0
x26       0
x27       0
x28       0
x29       0
dtype: int64
    id  target        x0        x1        x2        x3        x4        x5  \
0  133       1  0.254879 -1.046633  0.209656  0.074214 -0.441366 -0.377645   
1  273       1 -1.142928 -0.781198 -1.166747 -0.923578  0.628230 -1.021418   
2  175       1 -1.451067 -1.406518 -1.456564 -1.092337 -0.708765 -1.168557   
3  551       1 -0.879933  0.420589 -0.877527 -0.780484 -1.037534 -0.483880   
4  199       0  0.426758  0.723479  0.316885  0.287273  1.000835  0.962702   

         x6        x7  ...       x20       x21       x22       x23       x24  \
0 -0.485934  0.347072  ... -0.337360 -0.728193 -0.4425

## 上傳資料

In [3]:
from pipeline.backend.pipeline import PipeLine
pipeline_upload = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest)
partition = 4

pipeline_upload.add_upload_data(file=dense_data_dir,
                                table_name=dense_data["name"],             # table name
                                namespace=dense_data["namespace"],         # namespace
                                head=1, partition=partition)               # data info
pipeline_upload.upload(drop=1)

  from .autonotebook import tqdm as notebook_tqdm


 UPLOADING:||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||100.00%

[32m2023-06-13 03:20:47.496[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m83[0m - [1mJob id is 202306130320473464210
[0m
[32m2023-06-13 03:20:47.503[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:00[0m





[32m2023-06-13 03:20:48.512[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:01[0m
[0mm2023-06-13 03:20:49.530[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m125[0m - [1m
[32m2023-06-13 03:20:49.531[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component upload_0, time elapse: 0:00:02[0m
[32m2023-06-13 03:20:50.568[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component upload_0, time elapse: 0:00:03[0m
[32m2023-06-13 03:20:51.585[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component upload_0, time elapse: 0:00:04[0m
[32m2023-0

### 建構 Training pipeline 範例

使用 `pipeline` 模塊來構建聯邦學習流程

In [4]:
import torch as t
from torch import nn
from torch import optim

from pipeline import fate_torch_hook
from pipeline.backend.pipeline import PipeLine
from pipeline.component import Reader, DataTransform, HomoNN, Evaluation
from pipeline.component.nn import TrainerParam
from pipeline.interface import Data

# this is important, modify torch modules so that Sequential model be parsed by pipeline
fate_torch_hook(t)

<module 'torch' from '/data/projects/python/venv/lib/python3.8/site-packages/torch/__init__.py'>

實例化 `pipeline` 並設定 `initiator` 和 `roles`:

    - initiator: 
        * role: guest
        * party: 9999
    - roles:
        * guest: 9999
        * host: 10000
        * arbiter: 10000

In [5]:
pipeline = PipeLine()\
            .set_initiator(role='guest', party_id=guest)\
            .set_roles(guest=guest, host=host, arbiter=host)

使用 `Reader` 模塊來讀取資料

In [6]:
reader_0 = Reader(name="reader_0")
# set guest parameter
reader_0.get_party_instance(role='guest', party_id=guest).component_param(
    table={"name": "breast_homo_guest", "namespace": "experiment"})
# set host parameter
reader_0.get_party_instance(role='host', party_id=host).component_param(
    table={"name": "breast_homo_host", "namespace": "experiment"})

使用 `DataTransform` 模塊來讀取資料

`DataTransform` 負責資料前處理( 設定目標欄位名稱, 補缺值, 替換 outliers )

In [7]:
data_transform_0 = DataTransform(name="data_transform_0")
# set guest parameter
data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(
    with_label=True, label_name='target', label_type='int', 
    missing_fill=True, missing_fill_method="designated", default_value=0.0,
    outlier_replace=False, outlier_replace_method=None, outlier_replace_value=0.0
)

data_transform_0.get_party_instance(role='host', party_id=[host]).component_param(
    with_label=True, label_name='target', label_type='int', 
    missing_fill=True, missing_fill_method="designated", default_value=0.0,
    outlier_replace=False, outlier_replace_method=None, outlier_replace_value=0.0
)

跟 `pytorch` 一樣定義 `nn` & `loss`, 使用 `HomoNN` 模塊包裝. 用以下的參數來構建樹模型

In [8]:
model = nn.Sequential(
        nn.Linear(30, 1),
        nn.Sigmoid()
)
loss = nn.BCELoss()
optimizer = t.optim.Adam(model.parameters(), lr=0.01)

homo_nn_0 = HomoNN(name='homo_nn_0',
                   model=model,
                   loss=loss,
                   optimizer=optimizer,
                   trainer=TrainerParam(trainer_name='fedavg_trainer', epochs=20, batch_size=128,
                                       validation_freqs=1))


最後, 為了檢驗好壞 使用 `Evaluation` 模塊來驗證好壞

In [9]:
evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")

上一個 component 的 output 是下一個 component 的 input

    - data_transform_0 吃 reader_0 的 output
    - homo_secureboost_0 吃 data_transform_0 的 output
    - evaluation_0 吃 homo_secureboost_0 的 output (預測值)

記得用 `pipeline.compile()` 來打包整串流程
使用 `pipeline.fit()` 來開始進行訓練

In [10]:
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
pipeline.add_component(homo_nn_0, data=Data(train_data=data_transform_0.output.data))
pipeline.add_component(evaluation_0, data=Data(data=homo_nn_0.output.data))
pipeline.compile()
pipeline.fit()

[32m2023-06-13 03:20:59.378[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m83[0m - [1mJob id is 202306130320587887160
[0m
[32m2023-06-13 03:20:59.387[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:00[0m
[0mm2023-06-13 03:21:00.416[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m125[0m - [1m
[32m2023-06-13 03:21:00.418[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component reader_0, time elapse: 0:00:01[0m
[32m2023-06-13 03:21:01.438[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component reader_0, time elapse: 0:00:02[0m
[32m2023-06-13 03:21:02.511[0m | [1mI

[32m2023-06-13 03:21:38.643[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component reader_0, time elapse: 0:00:39[0m
[32m2023-06-13 03:21:39.659[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component reader_0, time elapse: 0:00:40[0m
[32m2023-06-13 03:21:40.676[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component reader_0, time elapse: 0:00:41[0m
[32m2023-06-13 03:21:41.692[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component reader_0, time elapse: 0:00:42[0m
[32m2023-06-13 03:21:42.714[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D

[32m2023-06-13 03:22:18.235[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:01:18[0m
[32m2023-06-13 03:22:19.252[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:01:19[0m
[0mm2023-06-13 03:22:20.273[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m125[0m - [1m
[32m2023-06-13 03:22:20.275[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:20[0m
[32m2023-06-13 03:22:21.293[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse

[32m2023-06-13 03:22:57.158[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:57[0m
[32m2023-06-13 03:22:58.173[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:58[0m
[32m2023-06-13 03:22:59.188[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:59[0m
[32m2023-06-13 03:23:00.204[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:02:00[0m
[32m2023-06-13 03:23:01.219[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m

當訓練結束後, 模型會用來做預測. 使用者可以自由選擇要不要儲存此次 `pipeline` 以方便未來重複使用
使用 `pipeline.dump(pipeline_saved_path)` 來完成儲存

In [11]:
pipeline.dump("pipeline_saved/homo_two_party_continual_input_classification_homo_nn.pkl");

### 建構 Inference pipeline 範例

首先, 使用 `PipeLine.load_model_from_file` load `pkl` 檔

部署 Inference 需要的模塊, 在這邊是 `data_transform_0`, `homo_nn_0`

In [15]:
pipeline = PipeLine.load_model_from_file('pipeline_saved/homo_two_party_continual_input_classification_homo_nn.pkl')
pipeline.deploy_component([pipeline.data_transform_0, pipeline.homo_nn_0]);

接著, 部署 `Reader` 模塊 `reader_1` 來讀取新data

In [16]:
reader_1 = Reader(name="reader_1")
reader_1.get_party_instance(role="guest", party_id=guest).component_param(table={"name": "breast_homo_guest", "namespace": "experiment"})
reader_1.get_party_instance(role="host", party_id=host).component_param(table={"name": "breast_homo_host", "namespace": "experiment"})

最後, 部署新的 `Evaluation` 來衡量 predict ( Inference ) 的表現

In [17]:
evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")

整合所有模塊

In [19]:
predict_pipeline = PipeLine()
predict_pipeline.add_component(reader_1)\
                .add_component(pipeline, 
                               data=Data(predict_input={pipeline.data_transform_0.input.data: reader_1.output.data}))\
                .add_component(evaluation_0, data=Data(data=pipeline.homo_nn_0.output.data));


預測!

In [20]:
predict_pipeline.predict()

[32m2023-06-13 03:25:00.043[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m83[0m - [1mJob id is 202306130324594206340
[0m
[32m2023-06-13 03:25:00.051[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:00[0m
[0mm2023-06-13 03:25:01.068[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m125[0m - [1m
[32m2023-06-13 03:25:01.069[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component reader_1, time elapse: 0:00:01[0m
[32m2023-06-13 03:25:02.085[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component reader_1, time elapse: 0:00:02[0m
[32m2023-06-13 03:25:03.100[0m | [1mI

[32m2023-06-13 03:25:37.130[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:37[0m
[32m2023-06-13 03:25:38.145[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:38[0m
[0mm2023-06-13 03:25:40.192[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m125[0m - [1m
[32m2023-06-13 03:25:40.193[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:00:40[0m
[32m2023-06-13 03:25:41.210[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse

[32m2023-06-13 03:26:16.572[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:16[0m
[32m2023-06-13 03:26:17.589[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:17[0m
[32m2023-06-13 03:26:18.604[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:18[0m
[32m2023-06-13 03:26:19.622[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:19[0m
[32m2023-06-13 03:26:20.632[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m89

用 `pipeline.get_component('evaluation_0').get_summary()` 

來取得 `evaluation_0` 模塊的資訓儲並存成 json 檔

In [21]:
import json
data_base = "/data/projects/fate/"
metadata_saved_dir = os.path.join(data_base, "persistence/metadata/homo_two_party_continual_input_classification_homo_nn.json")
metedata = json.dumps(pipeline.get_component('evaluation_0').get_summary(), indent=4)

with open(metadata_saved_dir, "w") as json_file:
    json_file.write(metedata)
                                  
print(f"Write in metadata_saved_dir : {metadata_saved_dir} \n {metedata}")

Write in metadata_saved_dir : /data/projects/fate/persistence/metadata/homo_two_party_continual_input_classification_homo_nn.json 
 {
    "homo_nn_0": {
        "train": {
            "auc": 0.9981818181818182,
            "ks": 0.954025974025974
        }
    }
}
