# Homo NN

## 資料品質檢查

### 設定資料路徑 & 參數

In [1]:
import os
guest, host = 9999, 10000
data_base = "/data/projects/fate/"

dense_data = {"name": "titanic_homo_guest", "namespace": f"experiment"}
dense_data_dir = os.path.join(data_base, "persistence/data/titanic_homo_guest.csv")

### 缺失值 & 欄位名

#### 欄位名請全部調整成小寫
#### Age欄位有缺失值, 等等可以使用 DataTransform 來填平均年齡 29.7

In [2]:
import pandas as pd
dense_df = pd.read_csv(dense_data_dir)
print(dense_df.isna().sum())
print(dense_df.head(5))
print(f"The Average Age of Passenger in guest data is {dense_df.age.mean():.2f}")

passengerid     0
survived        0
pclass          0
sex             0
age            78
sibsp           0
parch           0
fare            0
embarked        1
dtype: int64
   passengerid  survived  pclass     sex   age  sibsp  parch     fare embarked
0            1         0       3    male  22.0      1      0   7.2500        S
1            2         1       1  female  38.0      1      0  71.2833        C
2            3         1       3  female  26.0      0      0   7.9250        S
3            4         1       1  female  35.0      1      0  53.1000        S
4            5         0       3    male  35.0      0      0   8.0500        S
The Average Age of Passenger in guest data is 28.68


## 上傳資料

In [3]:
from pipeline.backend.pipeline import PipeLine
pipeline_upload = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest)
partition = 4

pipeline_upload.add_upload_data(file=dense_data_dir,
                                table_name=dense_data["name"],             # table name
                                namespace=dense_data["namespace"],         # namespace
                                head=1, partition=partition)               # data info
pipeline_upload.upload(drop=1)

 UPLOADING:|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||67.00%

[32m2023-06-13 05:38:04.507[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m83[0m - [1mJob id is 202306130538043474700
[0m
[32m2023-06-13 05:38:04.514[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:00[0m





[32m2023-06-13 05:38:05.523[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:01[0m
[0mm2023-06-13 05:38:07.555[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m125[0m - [1m
[32m2023-06-13 05:38:07.556[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component upload_0, time elapse: 0:00:03[0m
[32m2023-06-13 05:38:08.570[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component upload_0, time elapse: 0:00:04[0m
[32m2023-06-13 05:38:09.585[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component upload_0, time elapse: 0:00:05[0m
[32m2023-0

### 建構 Training pipeline 範例

使用 `pipeline` 模塊來構建聯邦學習流程

In [4]:
import torch as t
from torch import nn
from torch import optim

from pipeline import fate_torch_hook
from pipeline.backend.pipeline import PipeLine
from pipeline.component import Reader, DataTransform, HomoOneHotEncoder, HomoNN, Evaluation
from pipeline.component.nn import TrainerParam
from pipeline.interface import Data

# this is important, modify torch modules so that Sequential model be parsed by pipeline
fate_torch_hook(t)

<module 'torch' from '/data/projects/python/venv/lib/python3.8/site-packages/torch/__init__.py'>

實例化 `pipeline` 並設定 `initiator` 和 `roles`:

    - initiator: 
        * role: guest
        * party: 9999
    - roles:
        * guest: 9999
        * host: 10000
        * arbiter: 10000

In [19]:
pipeline = PipeLine()\
            .set_initiator(role='guest', party_id=guest)\
            .set_roles(guest=guest, host=host, arbiter=host)

使用 `Reader` 模塊來讀取資料

In [20]:
reader_0 = Reader(name="reader_0")
# set guest parameter
reader_0.get_party_instance(role='guest', party_id=guest).component_param(
    table={"name": "titanic_homo_guest", "namespace": "experiment"})
# set host parameter
reader_0.get_party_instance(role='host', party_id=host).component_param(
    table={"name": "titanic_homo_host", "namespace": "experiment"})

使用 `DataTransform` 模塊來讀取資料

`DataTransform` 負責資料前處理( 設定目標欄位名稱, 補缺值, 替換 outliers )

In [21]:
data_transform_0 = DataTransform(name="data_transform_0")

# set guest parameter
data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(
    with_label=True, label_name="survived", label_type="int",
    missing_fill=True, missing_fill_method="designated", default_value=["0", "male", 28.68, "0", "100", 0.0, "N"],
    outlier_replace=False, data_type="float", 
    exclusive_data_type={"pclass":"str", "sex":"str", "sibsp":"str", "parch":"str", "embarked":"str"}
)

data_transform_0.get_party_instance(role='host', party_id=[host]).component_param(
    with_label=True, label_name="survived", label_type="int",
    missing_fill=True, missing_fill_method="designated", default_value=["0", "male", 28.68, "0", "100", 0.0, "N"],
    outlier_replace=False, data_type="float", 
    exclusive_data_type={"pclass":"str", "sex":"str", "sibsp":"str", "parch":"str", "embarked":"str"}
)

使用 `HomoOneHotEncoder` 模塊來 `token` 類別資料


In [22]:
homo_onehot_encoder_0 = HomoOneHotEncoder(name="homo_onehot_encoder_0")

homo_onehot_encoder_0.get_party_instance(role='guest', party_id=guest).component_param(
    transform_col_indexes=[0, 1, 3, 4, 6], transform_col_names=["pclass", "sex", "sibsp", "parch", "embarked"]
)

homo_onehot_encoder_0.get_party_instance(role='host', party_id=[host]).component_param(
    transform_col_indexes=[0, 1, 3, 4, 6], transform_col_names=["pclass", "sex", "sibsp", "parch", "embarked"]
)

跟 `pytorch` 一樣定義 `nn` & `loss`, 使用 `HomoNN` 模塊包裝. 用以下的參數來構建樹模型

In [23]:
model = nn.Sequential(
        nn.Linear(25, 1),
        nn.Sigmoid()
)
loss = nn.BCELoss()
optimizer = t.optim.Adam(model.parameters(), lr=0.01)

homo_nn_0 = HomoNN(name='homo_nn_0',
                   model=model,
                   loss=loss,
                   optimizer=optimizer,
                   trainer=TrainerParam(trainer_name='fedavg_trainer', epochs=20, batch_size=128,
                                       validation_freqs=1))


最後, 為了檢驗好壞 使用 `Evaluation` 模塊來驗證好壞

In [24]:
evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")

上一個 component 的 output 是下一個 component 的 input

    - data_transform_0 吃 reader_0 的 output
    - homo_onehot_encoder_0 吃 data_transform_0 的 output
    - homo_secureboost_0 吃 homo_onehot_encoder_0 的 output
    - evaluation_0 吃 homo_secureboost_0 的 output (預測值)

記得用 `pipeline.compile()` 來打包整串流程
使用 `pipeline.fit()` 來開始進行訓練

In [25]:
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
pipeline.add_component(homo_onehot_encoder_0, data=Data(data=data_transform_0.output.data))
pipeline.add_component(homo_nn_0, data=Data(train_data=homo_onehot_encoder_0.output.data))
pipeline.add_component(evaluation_0, data=Data(data=homo_nn_0.output.data))
pipeline.compile()
pipeline.fit()

[32m2023-06-13 05:43:10.294[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m83[0m - [1mJob id is 202306130543098058910
[0m
[32m2023-06-13 05:43:10.302[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:00[0m
[32m2023-06-13 05:43:11.312[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:01[0m
[0mm2023-06-13 05:43:12.330[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m125[0m - [1m
[32m2023-06-13 05:43:12.332[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component reader_0, time elapse: 0:00:02[0m
[32m2023-06-13 05:43:13.348[0m | [1mINFO    

[32m2023-06-13 05:43:47.523[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:37[0m
[32m2023-06-13 05:43:48.540[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:38[0m
[32m2023-06-13 05:43:49.575[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:39[0m
[32m2023-06-13 05:43:50.658[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:40[0m
[32m2023-06-13 05:43:51.673[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_sta

[32m2023-06-13 05:44:24.871[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:14[0m
[32m2023-06-13 05:44:25.901[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:15[0m
[32m2023-06-13 05:44:26.922[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:16[0m
[32m2023-06-13 05:44:27.937[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:17[0m
[32m2023-06-13 05:44:28.952[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m

[32m2023-06-13 05:45:03.590[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:53[0m
[32m2023-06-13 05:45:04.605[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:54[0m
[32m2023-06-13 05:45:05.631[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:55[0m
[32m2023-06-13 05:45:06.649[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:56[0m
[32m2023-06-13 05:45:07.664[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127

當訓練結束後, 模型會用來做預測. 使用者可以自由選擇要不要儲存此次 `pipeline` 以方便未來重複使用
使用 `pipeline.dump(pipeline_saved_path)` 來完成儲存

In [26]:
pipeline.dump("pipeline_saved/homo_two_party_continual_input_classification_homo_nn.pkl");

### 建構 Inference pipeline 範例

首先, 使用 `PipeLine.load_model_from_file` load `pkl` 檔

部署 Inference 需要的模塊, 在這邊是 `data_transform_0`,`homo_onehot_encoder_0`,`homo_nn_0`

In [29]:
pipeline = PipeLine.load_model_from_file('pipeline_saved/homo_two_party_continual_input_classification_homo_nn.pkl')
pipeline.deploy_component([pipeline.data_transform_0, homo_onehot_encoder_0, pipeline.homo_nn_0]);

接著, 部署 `Reader` 模塊 `reader_1` 來讀取新data

In [34]:
reader_1 = Reader(name="reader_1")
reader_1.get_party_instance(role="guest", party_id=guest).component_param(table={"name": "titanic_homo_guest", "namespace": "experiment"})
reader_1.get_party_instance(role="host", party_id=host).component_param(table={"name": "titanic_homo_host", "namespace": "experiment"})

最後, 部署新的 `Evaluation` 來衡量 predict ( Inference ) 的表現

In [35]:
evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")

整合所有模塊

In [36]:
predict_pipeline = PipeLine()
predict_pipeline.add_component(reader_1)\
                .add_component(pipeline, 
                               data=Data(predict_input={pipeline.data_transform_0.input.data: reader_1.output.data}))\
                .add_component(evaluation_0, data=Data(data=pipeline.homo_nn_0.output.data));


預測!

In [37]:
predict_pipeline.predict()

[32m2023-06-13 05:46:34.098[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m83[0m - [1mJob id is 202306130546334163620
[0m
[32m2023-06-13 05:46:34.109[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:00[0m
[0mm2023-06-13 05:46:35.128[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m125[0m - [1m
[32m2023-06-13 05:46:35.129[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component reader_1, time elapse: 0:00:01[0m
[32m2023-06-13 05:46:36.144[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component reader_1, time elapse: 0:00:02[0m
[32m2023-06-13 05:46:37.158[0m | [1mI

[32m2023-06-13 05:47:12.128[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:38[0m
[32m2023-06-13 05:47:13.145[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:39[0m
[32m2023-06-13 05:47:14.159[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:40[0m
[0mm2023-06-13 05:47:15.175[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m125[0m - [1m
[32m2023-06-13 05:47:15.176[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_onehot_enc

[32m2023-06-13 05:47:50.142[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:16[0m
[32m2023-06-13 05:47:51.167[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:17[0m
[32m2023-06-13 05:47:52.183[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:18[0m
[32m2023-06-13 05:47:53.209[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:19[0m
[32m2023-06-13 05:47:54.234[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m

用 `pipeline.get_component('evaluation_0').get_summary()` 

來取得 `evaluation_0` 模塊的資訓儲並存成 json 檔

In [38]:
import json
data_base = "/data/projects/fate/"
metadata_saved_dir = os.path.join(data_base, "persistence/metadata/homo_two_party_continual_input_classification_homo_nn.json")
metedata = json.dumps(pipeline.get_component('evaluation_0').get_summary(), indent=4)

with open(metadata_saved_dir, "w") as json_file:
    json_file.write(metedata)
                                  
print(f"Write in metadata_saved_dir : {metadata_saved_dir} \n {metedata}")

Write in metadata_saved_dir : /data/projects/fate/persistence/metadata/homo_two_party_continual_input_classification_homo_nn.json 
 {
    "homo_nn_0": {
        "train": {
            "auc": 0.8290477313831879,
            "ks": 0.566800345993552
        }
    }
}
