## 配置文件

In [7]:
import sys
sys.path.append('C:\\Users\\Administrator\\Desktop\\风控产品\\risk_project')
from risk_models.config.read_config.read_func import Read_Oracle
from risk_models.config.write_config.write_func import Write_Oracle
import datetime
import pandas as pd
import numpy as np

## 工单耗用模型
### 清洗模块

In [21]:
def clean_pd1():
    # 读取加工工单产出表和加工工单耗用表（默认筛选耗用和产出表相关工单的数据）
    EMS_WORK_OUTPUT = Read_Oracle().read_oracle(sql = """Select * from ods_zmxpq.EMS_WORK_OUTPUT WHERE org_code = '{code}' and CAPXACTION != 'D' 
                                                         and rownum <= 100000 """.format(code = '91310000132612172J'), database = 'dbods')
    EMS_WORK_INPUT = Read_Oracle().read_oracle(sql = """Select * from ods_zmxpq.EMS_WORK_INPUT  WHERE org_code = '{code}' and CAPXACTION != 'D' and wo_no in 
                                     (Select wo_no from ods_zmxpq.EMS_WORK_OUTPUT WHERE CAPXACTION != 'D' and rownum <= 100000) """.format(code = '91310000132612172J'), database = 'dbods')
    
    # 筛选字段并清洗数据
    EMS_WORK_OUTPUT = EMS_WORK_OUTPUT[['ORG_CODE', 'BATCH_NO', 'WO_NO', 'COP_G_NO', 'QTY_CO']]
    EMS_WORK_OUTPUT['QTY_CO'] = EMS_WORK_OUTPUT['QTY_CO'].apply( lambda x : ''.join(x.split(',')))
    EMS_WORK_OUTPUT['QTY_CO'] = EMS_WORK_OUTPUT['QTY_CO'].apply(lambda x: float(x))
    EMS_WORK_INPUT = EMS_WORK_INPUT[['ORG_CODE', 'WO_NO', 'COP_G_NO', 'QTY_CO']]
    EMS_WORK_INPUT['QTY_CO'] = EMS_WORK_INPUT['QTY_CO'].apply( lambda x : ''.join(x.split(',')))
    EMS_WORK_INPUT['QTY_CO'] = EMS_WORK_INPUT['QTY_CO'].apply(lambda x: float(x))
    
    # 计算（料号粒度）耗用量和产出量
    create_sum = EMS_WORK_OUTPUT.groupby(['ORG_CODE', 'BATCH_NO', 'WO_NO', 'COP_G_NO'], as_index=False)['QTY_CO'].sum()
    spend_sum = EMS_WORK_INPUT.groupby(['ORG_CODE', 'WO_NO', 'COP_G_NO'], as_index=False)['QTY_CO'].sum()
    MX_PRO_GDHY = pd.merge(create_sum, spend_sum, left_on=['ORG_CODE', 'WO_NO'], right_on=['ORG_CODE', 'WO_NO'], how='left')
    
    # 过滤掉空行
    MX_PRO_GDHY = MX_PRO_GDHY.dropna(axis=0, how='any')
    
    # 计算产出/投入的耗用比
    MX_PRO_GDHY['OUT_IN_RATIO'] = MX_PRO_GDHY['QTY_CO_x'].map(float) / MX_PRO_GDHY['QTY_CO_y'].map(float)
    
    # 字段重命名
    PRO_GDHY_DETAIL = MX_PRO_GDHY.rename(columns={'COP_G_NO_x':'COP_G_NO_OUT', 'COP_G_NO_y':'COP_G_NO_IN', 'QTY_CO_x':'QTY_CO_OUT', 'QTY_CO_y':'QTY_CO_IN'})
    
    # 更新ID; 模型运行时间
    PRO_GDHY_DETAIL['ID'] = range(len(PRO_GDHY_DETAIL))
    now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    PRO_GDHY_DETAIL['CHECK_TIME'] = datetime.datetime.strptime(now, "%Y-%m-%d %H:%M:%S")
    
    # 整理结果表
    PRO_GDHY_DETAIL = PRO_GDHY_DETAIL[['ID','ORG_CODE','BATCH_NO','WO_NO','COP_G_NO_OUT','QTY_CO_OUT','COP_G_NO_IN','QTY_CO_IN','OUT_IN_RATIO','CHECK_TIME']]
    
    # 考虑到读写函数稳定性，将数值列转为字符串类型
    PRO_GDHY_DETAIL['QTY_CO_OUT'] = PRO_GDHY_DETAIL['QTY_CO_OUT'].map(str)
    PRO_GDHY_DETAIL['QTY_CO_IN'] = PRO_GDHY_DETAIL['QTY_CO_IN'].map(str)
    PRO_GDHY_DETAIL['OUT_IN_RATIO'] = PRO_GDHY_DETAIL['OUT_IN_RATIO'].map(str)
    
    # 读入数据库
    Write_Oracle().write_oracle('BD_RISK_DETAIL_PRODUCTION_PD1',PRO_GDHY_DETAIL, org_code='91310000132612172J')
    
    return PRO_GDHY_DETAIL

In [22]:
detail = clean_pd1()
detail

2021-05-11 10:41:36.408 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 3.285s
2021-05-11 10:41:54.998 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 18.58s
2021-05-11 10:42:03.214 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 0.753s
2021-05-11 10:42:06.914 | INFO     | risk_models.config.write_config.write_func:write_oracle:128 - Processing... Writing 445967 rows into database
2021-05-11 10:42:58.098 | INFO     | risk_models.config.write_config.write_func:write_oracle:135 - Insert data into BD_RISK_DETAIL_PRODUCTION_PD1 successfully! Total write time spent 56.13s


Unnamed: 0,ID,ORG_CODE,BATCH_NO,WO_NO,COP_G_NO_OUT,QTY_CO_OUT,COP_G_NO_IN,QTY_CO_IN,OUT_IN_RATIO,CHECK_TIME
496,0,91310000132612172J,0000062889,000100036187,000000410000033036,1.0,000000410000017504,1.0,1.0,2021-05-11 10:42:00
497,1,91310000132612172J,0000062890,000100036188,000000410000033040,1.0,000000410000017508,1.0,1.0,2021-05-11 10:42:00
536,2,91310000132612172J,0000063293,000100019121,000000410000021153,1.0,000000210000008946,10.0,0.1,2021-05-11 10:42:00
537,3,91310000132612172J,0000063293,000100019121,000000410000021153,1.0,000000410000021154,1.0,1.0,2021-05-11 10:42:00
538,4,91310000132612172J,0000063293,000100019121,000000410000021153,1.0,000000410000021155,1.0,1.0,2021-05-11 10:42:00
...,...,...,...,...,...,...,...,...,...,...
473586,445962,91310000132612172J,CA00013748,000100023089,000000410000036041,2.0,000000110000001880,36.0,0.05555555555555555,2021-05-11 10:42:00
473615,445963,91310000132612172J,CA00016685,000100084962,000000410000036042,1.0,000000110000001880,18.0,0.05555555555555555,2021-05-11 10:42:00
473616,445964,91310000132612172J,CA00016934,000100098639,000000410000051562,1.0,000000110000001890,2.0,0.5,2021-05-11 10:42:00
473617,445965,91310000132612172J,CA00017062,000100100279,000000410000051567,1.0,000000110000001890,2.5,0.4,2021-05-11 10:42:00


### 计算模块

In [23]:
def model_pd1():
    
    # 读取明细表
    PRO_GDHY_RESULT = Read_Oracle().read_oracle(sql= """ select * from BD_RISK_DETAIL_PRODUCTION_PD1 where org_code = '{code}' 
                                                        and iscurrent = 1 """.format(code = '91310000132612172J'), database = 'dbods')
    PRO_GDHY_RESULT['OUT_IN_RATIO'] = PRO_GDHY_RESULT['OUT_IN_RATIO'].map(float)
    
    # 阈值计算函数
    def Qfunc(df):
        Q1 = np.percentile(df['OUT_IN_RATIO'], 25)
        Q3 = np.percentile(df['OUT_IN_RATIO'], 75)
        IQR = Q3 - Q1
        outlier_step = 1.5 * IQR
        return (Q3 + outlier_step), (Q1 - outlier_step)
    
    # 打标签函数
    def Risk_scgs(ratio, high, low):
        if ratio > high:
            return '耗用比过高'
        if ratio < low:
            return '耗用比过低'
        else:
            return '耗用比正常'
        
    # 计算上下阈值
    cutoff = PRO_GDHY_RESULT.groupby(['ORG_CODE', 'BATCH_NO']).apply(Qfunc)
    cutoff = pd.DataFrame(cutoff).reset_index()
    cutoff['CUTOFF_HIGH'] = cutoff[0].map(lambda x: x[0])
    cutoff['CUTOFF_LOW'] = cutoff[0].map(lambda x: x[1])
    cutoff.drop(0, axis=1, inplace=True)
    
    #打标签
    PRO_GDHY_RESULT = pd.merge(PRO_GDHY_RESULT, cutoff, on=['ORG_CODE','BATCH_NO'], how='left')
    PRO_GDHY_RESULT['RISK_LABEL'] = PRO_GDHY_RESULT.apply(lambda x: Risk_scgs(x['OUT_IN_RATIO'], x['CUTOFF_HIGH'], x['CUTOFF_LOW']), axis=1)
    
    # 算分
    ratio = {'过高':float(-100 / len(PRO_GDHY_RESULT)), '过低':float(-100 / len(PRO_GDHY_RESULT))}
    # 计算分数
    PRO_GDHY_RESULT['SCORE'] = PRO_GDHY_RESULT['RISK_LABEL'].map(lambda x: ratio['过高'] if x == '耗用比过高' else (ratio['过低'] if x == '耗用比过低' else 0))
    
    # 更新ID; 模型运行时间
    PRO_GDHY_RESULT['ID'] = range(len(PRO_GDHY_RESULT))
    now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    PRO_GDHY_RESULT['CHECK_TIME'] = datetime.datetime.strptime(now, "%Y-%m-%d %H:%M:%S")
    
    # 整理结果表
    PRO_GDHY_RESULT = PRO_GDHY_RESULT[['ID','ORG_CODE','BATCH_NO','WO_NO','RISK_LABEL','SCORE','CHECK_TIME']]
    
    # 读入数据库
    Write_Oracle().write_oracle('BD_RISK_RESULT_PRODUCTION_PD1',PRO_GDHY_RESULT, org_code='91310000132612172J')
    
    return PRO_GDHY_RESULT

In [24]:
result = model_pd1()
result

2021-05-11 10:43:07.854 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 9.353s
  IQR = Q3 - Q1
  x2 = take(ap, indices_above, axis=axis) * weights_above
2021-05-11 10:44:15.192 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 0.122s
2021-05-11 10:44:21.351 | INFO     | risk_models.config.write_config.write_func:write_oracle:128 - Processing... Writing 445967 rows into database
2021-05-11 10:44:48.682 | INFO     | risk_models.config.write_config.write_func:write_oracle:135 - Insert data into BD_RISK_RESULT_PRODUCTION_PD1 successfully! Total write time spent 33.92s


Unnamed: 0,ID,ORG_CODE,BATCH_NO,WO_NO,RISK_LABEL,SCORE,CHECK_TIME
0,0,91310000132612172J,000100106762,000100106762,耗用比正常,0.0,2021-05-11 10:44:14
1,1,91310000132612172J,000100031085,000100031085,耗用比正常,0.0,2021-05-11 10:44:14
2,2,91310000132612172J,000100031085,000100031085,耗用比正常,0.0,2021-05-11 10:44:14
3,3,91310000132612172J,000100031085,000100031085,耗用比正常,0.0,2021-05-11 10:44:14
4,4,91310000132612172J,000100106762,000100106762,耗用比正常,0.0,2021-05-11 10:44:14
...,...,...,...,...,...,...,...
445962,445962,91310000132612172J,000100153967,000100153967,耗用比正常,0.0,2021-05-11 10:44:14
445963,445963,91310000132612172J,000100153967,000100153967,耗用比正常,0.0,2021-05-11 10:44:14
445964,445964,91310000132612172J,000100153967,000100153967,耗用比正常,0.0,2021-05-11 10:44:14
445965,445965,91310000132612172J,000100153967,000100153967,耗用比正常,0.0,2021-05-11 10:44:14
