## 配置文件

In [1]:
import sys
sys.path.append('C:\\Users\\Administrator\\Desktop\\风控产品\\risk_project')
from risk_models.config.read_config.read_func import Read_Oracle
from risk_models.config.write_config.write_func import Write_Oracle
import datetime
import pandas as pd
import numpy as np

## 生产工时模型
### 清洗模块

In [41]:
def clean_pd2():
    # 读取加工工时耗用表并过滤任何数据为空的行
    EMS_TIMECOST_INFO = Read_Oracle().read_oracle(sql = """ Select org_code, batch_no, wo_no, wo_type, TOTAL_ACT_TIMECOST from ods_zmxpq.EMS_TIMECOST_INFO where CAPXACTION != 'D' 
                                                        and org_code = '{code}' and batch_no is not null and wo_no is not null and wo_type is not null 
                                                        and TOTAL_ACT_TIMECOST is not null""".format(code = '91310000132612172J'), database = 'dbods')
    
    # 清洗“合计实动工时”字段
    EMS_TIMECOST_INFO['TOTAL_ACT_TIMECOST'] = EMS_TIMECOST_INFO['TOTAL_ACT_TIMECOST'].apply(lambda x: ''.join(x.split(',')))
    EMS_TIMECOST_INFO['TOTAL_ACT_TIMECOST'] = EMS_TIMECOST_INFO['TOTAL_ACT_TIMECOST'].apply(lambda x: x.replace('-',''))
    EMS_TIMECOST_INFO['TOTAL_ACT_TIMECOST'] = EMS_TIMECOST_INFO['TOTAL_ACT_TIMECOST'].map(float)
    
    # 根据企业-批次号-工单类型-工单号粒度，聚合生产工时
    PRO_SCGS_DETAIL = EMS_TIMECOST_INFO.groupby(['ORG_CODE', 'BATCH_NO', 'WO_TYPE', 'WO_NO'], as_index=False)['TOTAL_ACT_TIMECOST'].sum()
    # 字段重命名
    PRO_SCGS_DETAIL = PRO_SCGS_DETAIL.rename(columns={'TOTAL_ACT_TIMECOST':'TIMECOST'})
    
    # 更新ID; 模型运行时间
    PRO_SCGS_DETAIL['ID'] = range(len(PRO_SCGS_DETAIL))
    now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    PRO_SCGS_DETAIL['CHECK_TIME'] = datetime.datetime.strptime(now, "%Y-%m-%d %H:%M:%S")

    # 整理结果表
    PRO_SCGS_DETAIL = PRO_SCGS_DETAIL[['ID', 'ORG_CODE', 'BATCH_NO', 'WO_TYPE', 'WO_NO', 'TIMECOST', 'CHECK_TIME']]
    
    # 读入写入数据库
    Write_Oracle().write_oracle('BD_RISK_DETAIL_PRODUCTION_PD2',PRO_SCGS_DETAIL, org_code='91310000132612172J')
    
    return PRO_SCGS_DETAIL

In [42]:
detail = clean_pd2()
detail

2021-05-12 15:12:57.178 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 10.65s
2021-05-12 15:13:09.230 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 0.102s
2021-05-12 15:13:13.043 | INFO     | risk_models.config.write_config.write_func:write_oracle:128 - Processing... Writing 872896 rows into database
2021-05-12 15:14:05.363 | INFO     | risk_models.config.write_config.write_func:write_oracle:135 - Insert data into BD_RISK_DETAIL_PRODUCTION_PD2 successfully! Total write time spent 56.88s


Unnamed: 0,ID,ORG_CODE,BATCH_NO,WO_TYPE,WO_NO,TIMECOST,CHECK_TIME
0,0,91310000132612172J,0-0,1,000100155718,48.2,2021-05-12 15:13:07
1,1,91310000132612172J,00001,1,000100262102,0.0,2021-05-12 15:13:07
2,2,91310000132612172J,00001,1,000100341468,0.0,2021-05-12 15:13:07
3,3,91310000132612172J,00001,1,000100383022,0.0,2021-05-12 15:13:07
4,4,91310000132612172J,00001,1,000100383023,0.0,2021-05-12 15:13:07
...,...,...,...,...,...,...,...
872891,872891,91310000132612172J,SY49,1,000100320632,3.4,2021-05-12 15:13:07
872892,872892,91310000132612172J,SY49,1,000100323459,7.9,2021-05-12 15:13:07
872893,872893,91310000132612172J,SY49,1,000100325141,11.9,2021-05-12 15:13:07
872894,872894,91310000132612172J,SY49,2,000010112738,30.0,2021-05-12 15:13:07


### 计算模块

In [2]:
def model_pd2():
    
    # 读取明细表
    PRO_SCGS_RESULT = Read_Oracle().read_oracle(sql= """ select * from BD_RISK_DETAIL_PRODUCTION_PD2 where org_code = '{code}' 
                                                            and iscurrent = 1 """.format(code = '91310000132612172J'), database = 'dbods')
    
    # 阈值计算函数
    def Qfunc(df):
        Q1 = np.percentile(df['TIMECOST'], 25)
        Q3 = np.percentile(df['TIMECOST'], 75)
        IQR = Q3 - Q1
        outlier_step = 1.5 * IQR
        return (Q3 + outlier_step), (Q1 - outlier_step)
    
    # 打标签函数
    def Risk_label(ratio, high, low):
        if ratio > high:
            return '生产工时高于阈值'
        if ratio < low:
            return '生产工时低于阈值'
        else:
            return '生产工时正常'
    
    # 计算上下阈值
    cutoff = PRO_SCGS_RESULT.groupby(['ORG_CODE', 'BATCH_NO', 'WO_TYPE']).apply(Qfunc)
    cutoff = pd.DataFrame(cutoff).reset_index()
    cutoff['CUTOFF_HIGH'] = cutoff[0].map(lambda x: x[0])
    cutoff['CUTOFF_LOW'] = cutoff[0].map(lambda x: x[1])
    cutoff.drop(0, axis=1, inplace=True)
    
    # 打标签
    PRO_SCGS_RESULT = pd.merge(PRO_SCGS_RESULT, cutoff, on=['ORG_CODE', 'BATCH_NO', 'WO_TYPE'], how='left')
    PRO_SCGS_RESULT['RISK_LABEL'] = PRO_SCGS_RESULT.apply(lambda x: Risk_label(x['TIMECOST'], x['CUTOFF_HIGH'], x['CUTOFF_LOW']), axis=1)
    
    # 设置惩罚分数
    ratio = {'过高':float(-100 / len(PRO_SCGS_RESULT)), '过低':float(-100 / len(PRO_SCGS_RESULT))}
    # 计算分数
    PRO_SCGS_RESULT['SCORE'] = PRO_SCGS_RESULT['RISK_LABEL'].map(lambda x: ratio['过高'] if x == '生产工时高于阈值' else (ratio['过低'] if x == '生产工时低于阈值' else 0))
    
    # 更新ID; 模型运行时间
    PRO_SCGS_RESULT['ID'] = range(len(PRO_SCGS_RESULT))
    now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    PRO_SCGS_RESULT['CHECK_TIME'] = datetime.datetime.strptime(now, "%Y-%m-%d %H:%M:%S")
    
    # 整理结果表
    PRO_SCGS_RESULT = PRO_SCGS_RESULT[['ID','ORG_CODE','BATCH_NO','WO_TYPE','WO_NO','RISK_LABEL','SCORE','CHECK_TIME']]
    
    # 读入数据库
    Write_Oracle().write_oracle('BD_RISK_RESULT_PRODUCTION_PD2',PRO_SCGS_RESULT, org_code='91310000132612172J')
    
    return PRO_SCGS_RESULT


In [3]:
result = model_pd2()
result

2021-06-04 10:18:06.328 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 21.12s
2021-06-04 10:19:10.622 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 2.981s
2021-06-04 10:20:43.095 | INFO     | risk_models.config.write_config.write_func:write_oracle:141 - Processing... Writing 872896 rows into database
2021-06-04 10:21:39.617 | INFO     | risk_models.config.write_config.write_func:write_oracle:148 - Insert data into BD_RISK_RESULT_PRODUCTION_PD2 successfully! Total write time spent 152.3s


Unnamed: 0,ID,ORG_CODE,BATCH_NO,WO_TYPE,WO_NO,RISK_LABEL,SCORE,CHECK_TIME
0,0,91310000132612172J,158,1,000100580581,生产工时正常,0.0,2021-06-04 10:19:06
1,1,91310000132612172J,165,2,000010338288,生产工时正常,0.0,2021-06-04 10:19:06
2,2,91310000132612172J,10108,1,000100365377,生产工时正常,0.0,2021-06-04 10:19:06
3,3,91310000132612172J,10108,1,000100365378,生产工时正常,0.0,2021-06-04 10:19:06
4,4,91310000132612172J,158,1,000100580961,生产工时正常,0.0,2021-06-04 10:19:06
...,...,...,...,...,...,...,...,...
872891,872891,91310000132612172J,5170-5170,1,000100487367,生产工时正常,0.0,2021-06-04 10:19:06
872892,872892,91310000132612172J,5170-5170,1,000100487401,生产工时正常,0.0,2021-06-04 10:19:06
872893,872893,91310000132612172J,5170-5170,1,000100488062,生产工时正常,0.0,2021-06-04 10:19:06
872894,872894,91310000132612172J,5170-5170,1,000100488120,生产工时正常,0.0,2021-06-04 10:19:06


In [39]:
result[result['RISK_LABEL']!='生产工时正常']

Unnamed: 0,ID,ORG_CODE,BATCH_NO,WO_TYPE,WO_NO,RISK_LABEL,SCORE,CHECK_TIME
9,9,91310000132612172J,00001-99999,1,000100156938,生产工时高于阈值,-0.000115,2021-05-12 14:37:37
11,11,91310000132612172J,00001-99999,1,000100163929,生产工时高于阈值,-0.000115,2021-05-12 14:37:37
13,13,91310000132612172J,00001-99999,1,000100163938,生产工时高于阈值,-0.000115,2021-05-12 14:37:37
16,16,91310000132612172J,00001-99999,1,000100163992,生产工时高于阈值,-0.000115,2021-05-12 14:37:37
19,19,91310000132612172J,00001-99999,1,000100163995,生产工时高于阈值,-0.000115,2021-05-12 14:37:37
...,...,...,...,...,...,...,...,...
872870,872870,91310000132612172J,SY43,1,000100625756,生产工时高于阈值,-0.000115,2021-05-12 14:37:37
872874,872874,91310000132612172J,SY43,1,000100625760,生产工时高于阈值,-0.000115,2021-05-12 14:37:37
872877,872877,91310000132612172J,SY43,1,000100625763,生产工时高于阈值,-0.000115,2021-05-12 14:37:37
872878,872878,91310000132612172J,SY43,1,000100625764,生产工时高于阈值,-0.000115,2021-05-12 14:37:37


In [4]:
risk_count = result[result['RISK_LABEL'] != '生产工时正常'].groupby(['RISK_LABEL'], as_index=False)['ID'].count()
risk_count

Unnamed: 0,RISK_LABEL,ID
0,生产工时低于阈值,1181
1,生产工时高于阈值,77346


In [40]:
result['SCORE'].sum()

-8.996146161742066