In [1]:
import sys
sys.path.append('C:\\Users\\Administrator\\Desktop\\风控产品\\risk_project')
from risk_models.config.read_config.read_func import Read_Oracle
from risk_models.config.write_config.write_func import Write_Oracle
import datetime
import pandas as pd
import numpy as np

In [3]:
def cleanData():
    # 分别读取财务明细表和加工整机耗料明细表
    EMS_FINANCE_INFO = Read_Oracle().read_oracle(sql= """ select * from EMS_FINANCE_INFO where CAPXACTION != 'D' """, database = 'dbods')
    EMS_MANUFACTURE_TOTAL = Read_Oracle().read_oracle(sql= """ select * from EMS_MANUFACTURE_TOTAL where CAPXACTION != 'D' """, database = 'dbods')
    # 根据会计科目和企业信用代码过滤财务明细表
    EMS_FINANCE_INFO = EMS_FINANCE_INFO[(EMS_FINANCE_INFO['KSTAR'] == '制造费用-工资') & (EMS_FINANCE_INFO['ORG_CODE'] == '9527777')]
    # 根据企业信用代码过滤加工整机耗料明细表
    EMS_MANUFACTURE_TOTAL = EMS_MANUFACTURE_TOTAL[EMS_MANUFACTURE_TOTAL['ORG_CODE'] == '9527777']
    
    # 清洗财务明细表
    EMS_FINANCE_INFO['TRADE_TOTAL'] = EMS_FINANCE_INFO['TRADE_TOTAL'].apply(lambda x: ''.join(x.split(',')))
    EMS_FINANCE_INFO['TRADE_TOTAL'] = EMS_FINANCE_INFO['TRADE_TOTAL'].map(float)
    EMS_FINANCE_INFO.dropna(axis=0, subset = ['OBJ_CODE'], inplace=True)
    # 以OBJ_CODE/ORG_CODE为粒度进行聚合，避免一个批次有多条财务记录的情况
    EMS_FINANCE_INFO = EMS_FINANCE_INFO.groupby(['ORG_CODE','OBJ_CODE'])['TRADE_TOTAL'].sum().reset_index()
    
    # 清洗加工整机耗料明细表
    EMS_MANUFACTURE_TOTAL.dropna(axis=0, subset = ['BATCH_NO','BATCH_TYPE'], inplace=True)
    
    # 汇总两张表格
    EMS_FINANCE_LABOR_COST = EMS_FINANCE_INFO.merge(EMS_MANUFACTURE_TOTAL[['BATCH_NO','BATCH_TYPE']].drop_duplicates(), left_on = 'OBJ_CODE', right_on = 'BATCH_NO', how='left')
    EMS_FINANCE_LABOR_COST = EMS_FINANCE_LABOR_COST[['ORG_CODE','BATCH_TYPE','BATCH_NO','TRADE_TOTAL']]
    
    # 加入ID; 模型运行时间
    EMS_FINANCE_LABOR_COST['ID'] = range(len(EMS_FINANCE_LABOR_COST))
    now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    EMS_FINANCE_LABOR_COST['CHECK_TIME'] = datetime.datetime.strptime(now, "%Y-%m-%d %H:%M:%S")
    
    # 重新排序ORG
    FINANCE_LABOR_COST_DETAIL = EMS_FINANCE_LABOR_COST[['ID','ORG_CODE','BATCH_TYPE','BATCH_NO','TRADE_TOTAL','CHECK_TIME']]
    
    Write_Oracle().write_oracle('BD_RISK_DETAIL_FINANCE_FN2', FINANCE_LABOR_COST_DETAIL, org_code='9527777')
    return FINANCE_LABOR_COST_DETAIL
    # 判断结果是否为空：为空只返回table，不为空写入数据库
    #if STOCK_END_DETAIL.empty :
    #    return STOCK_END_DETAIL
    #else:
    #    Write_Oracle().write_oracle('BD_RISK_DETAIL_STOCK_ST1',STOCK_END_DETAIL)
    #    return STOCK_END_DETAIL

In [4]:
def calData():
    # 读入人工成本明细表
    FINANCE_LABOR_COST_DETAIL = cleanData()
    # 筛选对应企业的数据
    FINANCE_LABOR_COST_DETAIL = FINANCE_LABOR_COST_DETAIL[FINANCE_LABOR_COST_DETAIL['ORG_CODE'] == '9527777']
    
    # 根据历史计算阈值函数
    def func(df):
        Q1 = np.percentile(df['TRADE_TOTAL'], 25)
        Q3 = np.percentile(df['TRADE_TOTAL'], 75)
        IQR = Q3 - Q1
        outlier_step = 1.5 * IQR
        return (Q1 - outlier_step), (Q3 + outlier_step)
    
    # 根据历史计算阈值
    df_cutoff = FINANCE_LABOR_COST_DETAIL
    df_cutoff = df_cutoff.groupby([ 'ORG_CODE', 'BATCH_TYPE']).apply(func).reset_index()
    df_cutoff.rename(columns = {0:'cutoff'}, inplace=True)
    df_cutoff['cutoff_low'] = df_cutoff['cutoff'].map(lambda x: x[0])
    df_cutoff['cutoff_high'] = df_cutoff['cutoff'].map(lambda x: x[1])
    
    # 打标签函数
    def risk_tag(value, cutoff_low, cutoff_high):
        if value < cutoff_low:
            return '人工总成本偏低'
        elif value > cutoff_high:
            return '人工总成本偏高'
        else:
            return '人工总成本正常'
        
    # 打标签 
    FINANCE_LABOR_COST_RESULT = FINANCE_LABOR_COST_DETAIL.merge(df_cutoff, on = ['ORG_CODE','BATCH_TYPE'], how = 'left')
    FINANCE_LABOR_COST_RESULT['RISK_LABEL'] = FINANCE_LABOR_COST_RESULT.apply(lambda x: risk_tag(x['TRADE_TOTAL'], x['cutoff_low'], x['cutoff_high']), axis=1)
    
    # 算分
    ratio = {'过高':float(-100 / len(FINANCE_LABOR_COST_RESULT)), '过低':float(-100 / len(FINANCE_LABOR_COST_RESULT))}
    # 计算分数
    FINANCE_LABOR_COST_RESULT['SCORE'] = FINANCE_LABOR_COST_RESULT['RISK_LABEL'].map(lambda x: ratio['过高'] if x == '人工总成本偏高' else (ratio['过低'] if x == '人工总成本偏低' else 0))
    
    # 删去不用的列
    FINANCE_LABOR_COST_RESULT.drop(columns = ['ID','CHECK_TIME','TRADE_TOTAL','cutoff','cutoff_low','cutoff_high'],inplace=True)
    
    # 更新ID; 模型运行时间
    FINANCE_LABOR_COST_RESULT['ID'] = range(len(FINANCE_LABOR_COST_RESULT))
    now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    FINANCE_LABOR_COST_RESULT['CHECK_TIME'] = datetime.datetime.strptime(now, "%Y-%m-%d %H:%M:%S")
    
    # 整理结果表
    FINANCE_LABOR_COST_RESULT = FINANCE_LABOR_COST_RESULT[['ID','ORG_CODE','BATCH_TYPE','BATCH_NO','RISK_LABEL','SCORE','CHECK_TIME']]
    
    Write_Oracle().write_oracle('BD_RISK_RESULT_FINANCE_FN2',FINANCE_LABOR_COST_RESULT, org_code='9527777')
    return FINANCE_LABOR_COST_RESULT
    # 判断结果是否为空：为空只返回table，不为空写入数据库
    #    if STOCK_END_RESULT.empty :
    #        return STOCK_END_RESULT, STOCK_END_SCORE
    #    else:
    #        Write_Oracle().write_oracle('BD_RISK_RESULT_STOCK_ST1',STOCK_END_RESULT)
    #        return STOCK_END_RESULT, STOCK_END_SCORE
    

In [5]:
FINANCE_LABOR_COST_DETAIL = cleanData()
FINANCE_LABOR_COST_DETAIL

2021-04-30 11:22:11.273 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 0.095s
2021-04-30 11:22:11.372 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 0.096s
2021-04-30 11:22:11.535 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 0.088s
2021-04-30 11:22:13.111 | INFO     | risk_models.config.write_config.write_func:write_oracle:128 - Processing... Writing 12 rows into database
2021-04-30 11:22:13.147 | INFO     | risk_models.config.write_config.write_func:write_oracle:135 - Insert data into BD_RISK_DETAIL_FINANCE_FN2 successfully! Total write time spent 1.705s


Unnamed: 0,ID,ORG_CODE,BATCH_TYPE,BATCH_NO,TRADE_TOTAL,CHECK_TIME
0,0,9527777,ARJ21,WBS132,654324.0,2021-04-30 11:22:11
1,1,9527777,ARJ21,WBS133,1342132.0,2021-04-30 11:22:11
2,2,9527777,ARJ22,WBS133,1342132.0,2021-04-30 11:22:11
3,3,9527777,ARJ21,WBS144,3421351.0,2021-04-30 11:22:11
4,4,9527777,ARJ22,WBS144,3421351.0,2021-04-30 11:22:11
5,5,9527777,ARJ21,WBS154,3421351.0,2021-04-30 11:22:11
6,6,9527777,ARJ21,WBS155,654321.0,2021-04-30 11:22:11
7,7,9527777,ARJ21,WBS156,153241.0,2021-04-30 11:22:11
8,8,9527777,ARJ21,WBS157,5432653.0,2021-04-30 11:22:11
9,9,9527777,ARJ21,WBS158,3412552.0,2021-04-30 11:22:11


In [6]:
FINANCE_LABOR_COST_RESULT = calData()
FINANCE_LABOR_COST_RESULT

2021-04-30 11:22:13.286 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 0.100s
2021-04-30 11:22:13.388 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 0.099s
2021-04-30 11:22:13.521 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 0.083s
2021-04-30 11:22:13.669 | INFO     | risk_models.config.write_config.write_func:write_oracle:128 - Processing... Writing 12 rows into database
2021-04-30 11:22:13.692 | INFO     | risk_models.config.write_config.write_func:write_oracle:135 - Insert data into BD_RISK_DETAIL_FINANCE_FN2 successfully! Total write time spent 0.258s
2021-04-30 11:22:13.833 | INFO     | risk_models.config.read_config.read_func:read_oracle:82 - Read Table successfully! , Total read time spent 0.089s
2021-04-30 11:22:14.020 | INFO     | risk_models.config.write_conf

Unnamed: 0,ID,ORG_CODE,BATCH_TYPE,BATCH_NO,RISK_LABEL,SCORE,CHECK_TIME
0,0,9527777,ARJ21,WBS132,人工总成本正常,0.0,2021-04-30 11:22:13
1,1,9527777,ARJ21,WBS133,人工总成本正常,0.0,2021-04-30 11:22:13
2,2,9527777,ARJ22,WBS133,人工总成本正常,0.0,2021-04-30 11:22:13
3,3,9527777,ARJ21,WBS144,人工总成本正常,0.0,2021-04-30 11:22:13
4,4,9527777,ARJ22,WBS144,人工总成本正常,0.0,2021-04-30 11:22:13
5,5,9527777,ARJ21,WBS154,人工总成本正常,0.0,2021-04-30 11:22:13
6,6,9527777,ARJ21,WBS155,人工总成本正常,0.0,2021-04-30 11:22:13
7,7,9527777,ARJ21,WBS156,人工总成本正常,0.0,2021-04-30 11:22:13
8,8,9527777,ARJ21,WBS157,人工总成本正常,0.0,2021-04-30 11:22:13
9,9,9527777,ARJ21,WBS158,人工总成本正常,0.0,2021-04-30 11:22:13
