## MLOpsへのスコアリングリクエストのシミュレーション

参考 : [sample_request.ipynb](sample_request.ipynb)

In [1]:
import requests
import json
import time
import pandas as pd
import numpy as np

### シミュレーションデータの作成

In [2]:
# スコアリングAPIへのリクエストに必要なカラム
columns = ["LIMIT_BAL", "SEX", "EDUCATION", "MARRIAGE", "AGE", "PAY_0", "PAY_2", "PAY_3", "PAY_4", "PAY_5", "PAY_6", "BILL_AMT1", "BILL_AMT2", "BILL_AMT3", "BILL_AMT4", "BILL_AMT5", "PAY_AMT1", "PAY_AMT2", "PAY_AMT3", "PAY_AMT4", "PAY_AMT5", "PAY_AMT6"]
len(columns)

22

In [3]:
## 学習データの分布
df_stats = pd.read_csv("UciCreditCard_train_data_summary.csv")
df_stats[['stats', *columns]]

Unnamed: 0,stats,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
0,count,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,...,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0
1,mean,167484.322667,1.603733,1.853133,1.551867,35.4855,-0.0167,-0.133767,-0.1662,-0.220667,...,49179.075167,47013.15,43262.948967,40311.400967,5663.5805,5921.163,5225.6815,4826.076867,4799.387633,5215.502567
2,std,129747.661567,0.489129,0.790349,0.52197,9.217904,1.123802,1.197186,1.196868,1.169139,...,71173.768783,69349.39,64332.856134,60797.15577,16563.280354,23040.87,17606.96147,15666.159744,15278.305679,17777.465775
3,min,10000.0,1.0,0.0,0.0,21.0,-2.0,-2.0,-2.0,-2.0,...,-69777.0,-157264.0,-170000.0,-81334.0,0.0,0.0,0.0,0.0,0.0,0.0
4,25%,50000.0,1.0,1.0,1.0,28.0,-1.0,-1.0,-1.0,-1.0,...,2984.75,2666.25,2326.75,1763.0,1000.0,833.0,390.0,296.0,252.5,117.75
5,50%,140000.0,2.0,2.0,2.0,34.0,0.0,0.0,0.0,0.0,...,21200.0,20088.5,19052.0,18104.5,2100.0,2009.0,1800.0,1500.0,1500.0,1500.0
6,75%,240000.0,2.0,2.0,2.0,41.0,0.0,0.0,0.0,0.0,...,64006.25,60164.75,54506.0,50190.5,5006.0,5000.0,4505.0,4013.25,4031.5,4000.0
7,max,1000000.0,2.0,6.0,3.0,79.0,8.0,8.0,8.0,8.0,...,983931.0,1664089.0,891586.0,927171.0,873552.0,1684259.0,896040.0,621000.0,426529.0,528666.0


In [4]:
## ランダムにデータを生成する関数

def random_norm_value(col):
    ''' for "BILL_AMT1", "BILL_AMT2", "BILL_AMT3", "BILL_AMT4", "BILL_AMT5"
    '''
    val = np.random.normal(loc=df_stats.loc[df_stats['stats']=='mean', col],    # 正規乱数の平均値
                           scale=df_stats.loc[df_stats['stats']=='std', col],    # 正規乱数の標準偏差
                           size=1)[0].round()
    return int(val)

def random_norm_value_clip(col):
    ''' for "LIMIT_BAL", "AGE", "PAY_AMT1", "PAY_AMT2", "PAY_AMT3", "PAY_AMT4", "PAY_AMT5", "PAY_AMT6"
    '''
    val = np.random.normal(loc=df_stats.loc[df_stats['stats']=='mean', col],    # 正規乱数の平均値
                           scale=df_stats.loc[df_stats['stats']=='std', col],    # 正規乱数の標準偏差
                           size=1)[0].round()
    if val<0:
        val = 0
    return int(val)

def random_unif_value(col):
    ''' for "SEX", "EDUCATION", "MARRIAGE", "PAY_0", "PAY_2", "PAY_3", "PAY_4", "PAY_5", "PAY_6"
    '''
    val = np.random.uniform(low=df_stats.loc[df_stats['stats']=='min', col],     # 一様乱数の最小値
                            high=df_stats.loc[df_stats['stats']=='max', col],   # 一様乱数の最大値
                            size=1)[0].round()
    return int(val)

def get_request_data():
    ''' 1行スコアリングのためのデータをListで返す
    '''
    sim_data = []
    for col in columns:
        if col in ["BILL_AMT1", "BILL_AMT2", "BILL_AMT3", "BILL_AMT4", "BILL_AMT5"]:
            sim_data.append(random_norm_value(col=col))
        elif col in ["LIMIT_BAL", "AGE", "PAY_AMT1", "PAY_AMT2", "PAY_AMT3", "PAY_AMT4", "PAY_AMT5", "PAY_AMT6"]:
            sim_data.append(random_norm_value_clip(col=col))
        else:  # if in ["SEX", "EDUCATION", "MARRIAGE", "PAY_0", "PAY_2", "PAY_3", "PAY_4", "PAY_5", "PAY_6"]
            sim_data.append(random_unif_value(col=col))
    return sim_data

In [5]:
print( random_norm_value('BILL_AMT1') )
print( random_norm_value('AGE') )
print( random_unif_value('SEX') )

223386
46
1


In [6]:
# 乱数から生成したスコアリング用データ
get_request_data()

[52614,
 2,
 4,
 3,
 43,
 6,
 -2,
 3,
 3,
 8,
 7,
 31400,
 69683,
 15923,
 44973,
 99354,
 0,
 21027,
 38112,
 45425,
 13228,
 4988]

### シミュレーションデータによるスコアリングの実施

In [7]:
## Utility関数

def elem_to_str(lst):
    ''' [1,2,3]  ->  ['1','2','3']
    '''
    return [str(elem) for elem in lst]

def list_to_str(lst_external):
    ''' [[1,2,3], [4,5,6]]  ->  [['1','2','3'], ['4','5','6']]
    '''
    lst_return = []
    for lst in lst_external:
        lst_return.append(elem_to_str(lst))
    return lst_return

def get_pred(res_lst):
    proba_list = []
    for lst in res_lst:
        proba_list.append(float(lst[1]))
    return proba_list

In [8]:
# APIポイント
api_point = "http://34.222.144.49:1080/7956de57-b45d-40d7-9b38-f6c4f6478b39/model/score"
# header
headers = {'Content-Type': 'application/json'}

In [9]:
data = get_request_data()
print("Scoring input data: {}".format(data))
json_data = {"fields":columns, "rows":[elem_to_str(data)]}

response = requests.post(url=api_point, headers=headers, data=json.dumps(json_data))
response.json()

Scoring input data: [284111, 2, 5, 1, 40, 6, 6, 2, 0, 1, 6, 56718, 119811, 47836, 48965, 52789, 671, 12569, 1198, 5460, 15546, 12784]


{'fields': ['default payment next month.0', 'default payment next month.1'],
 'id': '1a6280c6-1f71-11eb-a0a1-0242ac11000f',
 'score': [['0.5378406047821045', '0.4621593952178955']]}

In [11]:
## シミュレーション実施関数

def data_pump(n_scoring=10):
    res_list = []
    counter = 0
    while counter<n_scoring:
        print('Count: {}'.format(counter))
        res = {}
        res['count'] = counter
        data = get_request_data()   # インプットデータ
        res['data'] = data
        json_data = {"fields":columns, "rows":[elem_to_str(data)]}
        response = requests.post(url=api_point, headers=headers, data=json.dumps(json_data))   # リクエストの実行
        res['predict'] = get_pred(response.json()['score'])[0]
        res_list.append(res)
        counter += 1
        time.sleep(np.random.uniform(low=0.1, high=3, size=1)[0])  # 乱数に従ってスリープ（0.1~3秒）
    return res_list

In [12]:
# n_scoring回、リクエストを実施
res = data_pump(n_scoring=15)

Count: 0
Count: 1
Count: 2
Count: 3
Count: 4
Count: 5
Count: 6
Count: 7
Count: 8
Count: 9
Count: 10
Count: 11
Count: 12
Count: 13
Count: 14


In [13]:
res[:2]

[{'count': 0,
  'data': [89401,
   2,
   1,
   1,
   35,
   8,
   6,
   0,
   6,
   2,
   6,
   165578,
   -7009,
   -70084,
   45021,
   96086,
   0,
   0,
   194,
   0,
   29641,
   9365],
  'predict': 0.529798835515976},
 {'count': 1,
  'data': [66750,
   2,
   5,
   2,
   32,
   2,
   3,
   8,
   0,
   5,
   0,
   12501,
   109468,
   40397,
   161493,
   31828,
   8559,
   0,
   42849,
   0,
   0,
   18800],
  'predict': 0.5029044821858406}]

#### Grafanaダッシュボード

![grafana_top](image/grafana_top.png)

スコアリング結果（prob(y=1)）の分布

![grafana_res](image/grafana_res.png)