# Homo NN

## 資料品質檢查

### 設定資料路徑 & 參數

In [35]:
import os
guest, host = 9999, 10000
data_base = "/data/projects/fate/"

dense_data = {"name": "motor_homo_guest", "namespace": f"experiment"}
dense_data_dir = os.path.join(data_base, "persistence/data/motor_homo_guest.csv")

### 缺失值 & 欄位名

In [2]:
import pandas as pd
dense_df = pd.read_csv(dense_data_dir)
print(dense_df.isna().sum())
print(dense_df.head(5))

idx               0
motor_speed       0
pm                0
stator_yoke       0
stator_tooth      0
stator_winding    0
ambient           0
coolant           0
u_d               0
u_q               0
torque            0
i_d               0
i_q               0
dtype: int64
   idx  motor_speed        pm  stator_yoke  stator_tooth  stator_winding  \
0    1     0.293536 -0.633105    -0.397535     -0.290147       -0.307720   
1    2    -1.222430 -0.150656    -0.777613     -1.226843       -1.321856   
2    3    -0.951901  1.136038     0.967011      0.454562        0.110899   
3    4     1.503905  1.046629     1.291209      1.302569        1.049111   
4    5     0.186439 -0.470173    -0.053767     -0.123047       -0.032705   

    ambient   coolant       u_d       u_q    torque       i_d       i_q  
0  0.047104 -0.555098  0.444256  1.704733 -0.342206  0.994721 -0.349791  
1  0.671626  0.350670  0.319092 -1.327874 -0.255640  1.029143 -0.245723  
2 -0.552205  1.989043 -0.084323 -0.775999  1.311

## 上傳資料

In [36]:
from pipeline.backend.pipeline import PipeLine
pipeline_upload = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest)
partition = 4

pipeline_upload.add_upload_data(file=dense_data_dir,
                                table_name=dense_data["name"],             # table name
                                namespace=dense_data["namespace"],         # namespace
                                head=1, partition=partition)               # data info
pipeline_upload.upload(drop=1)

 UPLOADING:||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||100.00%

[32m2023-06-13 06:12:20.390[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m83[0m - [1mJob id is 202306130612202398960
[0m
[32m2023-06-13 06:12:20.397[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:00[0m





[32m2023-06-13 06:12:21.407[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:01[0m
[0mm2023-06-13 06:12:22.423[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m125[0m - [1m
[32m2023-06-13 06:12:22.424[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component upload_0, time elapse: 0:00:02[0m
[32m2023-06-13 06:12:23.438[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component upload_0, time elapse: 0:00:03[0m
[32m2023-06-13 06:12:24.453[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component upload_0, time elapse: 0:00:04[0m
[32m2023-0

### 建構 Training pipeline 範例

使用 `pipeline` 模塊來構建聯邦學習流程

In [4]:
import torch as t
from torch import nn
from torch import optim

from pipeline import fate_torch_hook
from pipeline.backend.pipeline import PipeLine
from pipeline.component import Reader, DataTransform, HomoNN, Evaluation
from pipeline.component.nn import TrainerParam
from pipeline.interface import Data

# this is important, modify torch modules so that Sequential model be parsed by pipeline
fate_torch_hook(t)

<module 'torch' from '/data/projects/python/venv/lib/python3.8/site-packages/torch/__init__.py'>

實例化 `pipeline` 並設定 `initiator` 和 `roles`:

    - initiator: 
        * role: guest
        * party: 9999
    - roles:
        * guest: 9999
        * host: 10000
        * arbiter: 10000

In [5]:
pipeline = PipeLine()\
            .set_initiator(role='guest', party_id=guest)\
            .set_roles(guest=guest, host=host, arbiter=host)

使用 `Reader` 模塊來讀取資料

In [6]:
reader_0 = Reader(name="reader_0")
# set guest parameter
reader_0.get_party_instance(role='guest', party_id=guest).component_param(
    table={"name": "motor_homo_guest", "namespace": "experiment"})
# set host parameter
reader_0.get_party_instance(role='host', party_id=host).component_param(
    table={"name": "motor_homo_host", "namespace": "experiment"})

使用 `DataTransform` 模塊來讀取資料

`DataTransform` 負責資料前處理( 設定目標欄位名稱, 補缺值, 替換 outliers )

In [7]:
data_transform_0 = DataTransform(name="data_transform_0")
# set guest parameter
data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(
    with_label=True, label_name='motor_speed', label_type='float', 
    missing_fill=True, missing_fill_method="designated", default_value=0.0,
    outlier_replace=False, outlier_replace_method=None, outlier_replace_value=0.0
)

data_transform_0.get_party_instance(role='host', party_id=[host]).component_param(
    with_label=True, label_name='motor_speed', label_type='float', 
    missing_fill=True, missing_fill_method="designated", default_value=0.0,
    outlier_replace=False, outlier_replace_method=None, outlier_replace_value=0.0
)

跟 `pytorch` 一樣定義 `nn` & `loss`, 使用 `HomoNN` 模塊包裝. 用以下的參數來構建樹模型

In [8]:
model = nn.Sequential(
        nn.Linear(11, 4),
        nn.ReLU(),
        nn.Linear(4, 1),
)
loss = nn.MSELoss()
optimizer = t.optim.Adam(model.parameters(), lr=0.01)

homo_nn_0 = HomoNN(name='homo_nn_0',
                   model=model,
                   loss=loss,
                   optimizer=optimizer,
                   trainer=TrainerParam(trainer_name='fedavg_trainer', epochs=20, batch_size=128))


最後, 為了檢驗好壞 使用 `Evaluation` 模塊來驗證好壞

In [9]:
evaluation_0 = Evaluation(name="evaluation_0", eval_type="regression")

上一個 component 的 output 是下一個 component 的 input

    - data_transform_0 吃 reader_0 的 output
    - homo_secureboost_0 吃 data_transform_0 的 output
    - evaluation_0 吃 homo_secureboost_0 的 output (預測值)

記得用 `pipeline.compile()` 來打包整串流程
使用 `pipeline.fit()` 來開始進行訓練

In [10]:
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
pipeline.add_component(homo_nn_0, data=Data(train_data=data_transform_0.output.data))
pipeline.add_component(evaluation_0, data=Data(data=homo_nn_0.output.data))
pipeline.compile()
pipeline.fit()

[32m2023-06-13 06:01:28.594[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m83[0m - [1mJob id is 202306130601280421010
[0m
[32m2023-06-13 06:01:28.606[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:00[0m
[32m2023-06-13 06:01:29.618[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:01[0m
[32m2023-06-13 06:01:30.627[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:02[0m
[32m2023-06-13 06:01:31.636[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:03[

[32m2023-06-13 06:02:07.139[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:38[0m
[32m2023-06-13 06:02:08.156[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:39[0m
[32m2023-06-13 06:02:09.175[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:40[0m
[32m2023-06-13 06:02:10.194[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:41[0m
[32m2023-06-13 06:02:11.211[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_sta

[32m2023-06-13 06:02:46.319[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:17[0m
[32m2023-06-13 06:02:47.338[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:18[0m
[32m2023-06-13 06:02:48.354[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:19[0m
[32m2023-06-13 06:02:49.369[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component homo_nn_0, time elapse: 0:01:20[0m
[32m2023-06-13 06:02:50.384[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m

當訓練結束後, 模型會用來做預測. 使用者可以自由選擇要不要儲存此次 `pipeline` 以方便未來重複使用
使用 `pipeline.dump(pipeline_saved_path)` 來完成儲存

In [11]:
pipeline.dump("pipeline_saved/homo_two_party_continual_input_regression_homo_nn.pkl");

### 建構 Inference pipeline 範例

首先, 使用 `PipeLine.load_model_from_file` load `pkl` 檔

部署 Inference 需要的模塊, 在這邊是 `data_transform_0`, `homo_nn_0`

In [40]:
pipeline = PipeLine.load_model_from_file('pipeline_saved/homo_two_party_continual_input_regression_homo_nn.pkl')
pipeline.deploy_component([pipeline.data_transform_0, pipeline.homo_nn_0]);

接著, 部署 `Reader` 模塊 `reader_1` 來讀取新data

In [41]:
reader_1 = Reader(name="reader_1")
reader_1.get_party_instance(role="guest", party_id=guest).component_param(table={"name": "motor_homo_guest", "namespace": "experiment"})
reader_1.get_party_instance(role="host", party_id=host).component_param(table={"name": "motor_homo_host", "namespace": "experiment"})

最後, 部署新的 `Evaluation` 來衡量 predict ( Inference ) 的表現

In [42]:
evaluation_0 = Evaluation(name="evaluation_0", eval_type="regression")

整合所有模塊

In [43]:
predict_pipeline = PipeLine()
predict_pipeline.add_component(reader_1)\
                .add_component(pipeline, 
                               data=Data(predict_input={pipeline.data_transform_0.input.data: reader_1.output.data}))\
                .add_component(evaluation_0, data=Data(data=pipeline.homo_nn_0.output.data));


預測!

In [44]:
predict_pipeline.predict()

[32m2023-06-13 06:13:43.016[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m83[0m - [1mJob id is 202306130613424009210
[0m
[32m2023-06-13 06:13:43.024[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:00[0m
[32m2023-06-13 06:13:44.032[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:01[0m
[0mm2023-06-13 06:13:45.050[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m125[0m - [1m
[32m2023-06-13 06:13:45.052[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component reader_1, time elapse: 0:00:02[0m
[32m2023-06-13 06:13:46.067[0m | [1mINFO    

[32m2023-06-13 06:14:20.262[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:37[0m
[32m2023-06-13 06:14:21.278[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:38[0m
[32m2023-06-13 06:14:22.294[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:39[0m
[32m2023-06-13 06:14:23.309[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component data_transform_0, time elapse: 0:00:40[0m
[0mm2023-06-13 06:14:24.329[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_sta

[32m2023-06-13 06:14:58.863[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:15[0m
[32m2023-06-13 06:15:00.093[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:17[0m
[32m2023-06-13 06:15:01.121[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:18[0m
[32m2023-06-13 06:15:02.141[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:19[0m
[32m2023-06-13 06:15:03.163[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127

用 `pipeline.get_component('evaluation_0').get_summary()` 

來取得 `evaluation_0` 模塊的資訓儲並存成 json 檔

In [45]:
import json
data_base = "/data/projects/fate/"
metadata_saved_dir = os.path.join(data_base, "persistence/metadata/homo_two_party_continual_input_regression_homo_nn.json")
metedata = json.dumps(pipeline.get_component('evaluation_0').get_summary(), indent=4)

with open(metadata_saved_dir, "w") as json_file:
    json_file.write(metedata)
                                  
print(f"Write in metadata_saved_dir : {metadata_saved_dir} \n {metedata}")

Write in metadata_saved_dir : /data/projects/fate/persistence/metadata/homo_two_party_continual_input_regression_homo_nn.json 
 {
    "homo_nn_0": {
        "train": {
            "explained_variance": 0.9390856789311817,
            "mean_absolute_error": 0.2065045483317226,
            "mean_squared_error": 0.06403768233390633,
            "median_absolute_error": 0.17835265398025513,
            "r2_score": 0.9368337908609534,
            "root_mean_squared_error": 0.25305667810572857
        }
    }
}
