In [631]:
# 获取包
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression as LR
from sklearn.metrics import roc_curve,auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt

import xgboost as xgb

pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',300)

In [373]:
# 读取训练集数据
tr_user=pd.read_csv('./data/train/user_info_train.txt',header=None,names=['id','性别','职业','教育程度','婚姻状态','户口类型'])
tr_bank=pd.read_csv('./data/train/bank_detail_train.txt',header=None,names=['id','时间戳','交易类型','交易金额','工资收入标记'])
tr_browse=pd.read_csv('./data/train/browse_history_train.txt',header=None,names=['id','时间戳','浏览行为数据','浏览子行为编号'])
tr_bill=pd.read_csv('./data/train/bill_detail_train.txt',header=None,names=['id','时间戳','银行id','上期账单金额','上期还款金额','信用卡额度','本期账单余额','本期账单最低还款额','消费笔数','本期账单金额','调整金额','循环利息','可用金额','预借现金额度','还款状态'])
tr_loan=pd.read_csv('./data/train/loan_time_train.txt',header=None,names=['id','放款时间'])
tr_overdue=pd.read_csv('./data/train/overdue_train.txt',header=None,names=['id','label'])

# 读取测试集数据
te_user=pd.read_csv('./data/test/user_info_test.txt',header=None,names=['id','性别','职业','教育程度','婚姻状态','户口类型'])
te_bank=pd.read_csv('./data/test/bank_detail_test.txt',header=None,names=['id','时间戳','交易类型','交易金额','工资收入标记'])
te_browse=pd.read_csv('./data/test/browse_history_test.txt',header=None,names=['id','时间戳','浏览行为数据','浏览子行为编号'])
te_bill=pd.read_csv('./data/test/bill_detail_test.txt',header=None,names=['id','时间戳','银行id','上期账单金额','上期还款金额','信用卡额度','本期账单余额','本期账单最低还款额','消费笔数','本期账单金额','调整金额','循环利息','可用金额','预借现金额度','还款状态'])
te_loan=pd.read_csv('./data/test/loan_time_test.txt',header=None,names=['id','放款时间'])
te_id=pd.read_csv('./data/test/usersID_test.txt',header=None,names=['id'])

# 时间转换
for i in [tr_bank,tr_browse,tr_bill,te_bank,te_browse,te_bill]:
    i['时间戳']=i['时间戳']/86400
tr_loan['放款时间']=tr_loan['放款时间']/86400
te_loan['放款时间']=te_loan['放款时间']/86400

In [697]:
# 计算ks值，作图
def plot_ks_curve(df):
    df.columns=['labels','pred']
    df=df.sort_values(by=['pred','labels'],axis=0,ascending=[False,True])
    df=df.reset_index(drop=True)
    bad_cnt=df['labels'].sum()
    good_cnt=len(df)-bad_cnt

    df['acc_bad']=df['labels'].cumsum()
    df['acc_good']=(1-df['labels']).cumsum()
    df['acc_bad_rate']=df['acc_bad']/bad_cnt
    df['acc_good_rate']=df['acc_good']/good_cnt
    df['ks']=df['acc_bad_rate']-df['acc_good_rate']

    ks_max=df['ks'].max()
#     print(df.loc[df['ks']==ks_max,:])

    # auc
    fpr,tpr,threshold=roc_curve(df['labels'],df['pred'],pos_label=1)
    roc_auc = auc(fpr, tpr)
    print('auc_value:',roc_auc)
    print('ks_values:',ks_max)

#     x_=np.linspace(0,1,len(df))
#     plt.figure(figsize=(8, 8))
#     plt.plot(x_,df['acc_bad_rate'],color='red',label='tpr')
#     plt.plot(x_,df['acc_good_rate'],color='blue',label='fpr')
#     plt.plot(x_,df['ks'],color='yellow',label='ks_values (max = %0.2f)' % ks_max)
#     plt.xlabel('Customer cumulative proportion')
#     plt.title('KS curve')
#     plt.legend(loc='upper left')
#     plt.show()
    return roc_auc,ks_max

def mon_distance(x):
    if x is np.nan:
        return 100
    else:
        return int(x/30)

In [719]:
# 用户表特征
def user(df):
    fea_u=df[['id']].copy()
    gender_dummy=pd.get_dummies(df[['性别']].astype(str))
    work_dummy=pd.get_dummies(df[['职业']].astype(str))
    edu_dummy=pd.get_dummies(df[['教育程度']].astype(str))
    marital_dummy=pd.get_dummies(df[['婚姻状态']].astype(str))
    reg_dummy=pd.get_dummies(df[['户口类型']].astype(str))

    fea_u=pd.concat([fea_u,gender_dummy,work_dummy,edu_dummy,marital_dummy,reg_dummy],axis=1)
    
#     print('feature of user',fea_u.columns.tolist())
    return fea_u

In [674]:
# 贷前银行流水特征
def bef_bank(df,df_loan):
    df=pd.merge(df,df_loan,how='left',on='id',suffixes=['','_loan'])
    df=df[df['放款时间']>df['时间戳']]
    df['mon_dist']=(df['放款时间']-df['时间戳']).map(mon_distance)
    
    # id 列表
    fea_bank=df[['id']].drop_duplicates().copy()
    
    # 平均每次工资收入金额
    b1=df[(df['mon_dist']>=0) & (df['工资收入标记']==1)][['id','交易金额']].copy()
    b1=b1.groupby('id',as_index=False).agg('mean')
    b1=b1.rename(columns={'交易金额':'bef_bank_salary_ave'})
    # 收入总额
    b2=df[(df['mon_dist']>=0) & (df['交易类型']==0)][['id','交易金额']].copy()
    b2=b2.groupby('id',as_index=False).agg('sum')
    b2=b2.rename(columns={'交易金额':'bef_bank_inc_sum'})

    # 支出总额
    b3=df[(df['mon_dist']>=0) & (df['交易类型']==1)][['id','交易金额']].copy()
    b3=b3.groupby('id',as_index=False).agg('sum')
    b3=b3.rename(columns={'交易金额':'bef_bank_out_sum'})
    
    # 近3月总收入
    b4=df[(df['mon_dist']>=0) & (df['mon_dist']<=3) & (df['交易类型']==0)][['id','交易金额']].copy()
    b4=b4.groupby('id',as_index=False).agg('sum')
    b4=b4.rename(columns={'交易金额':'bef_bank_inc_m3_sum'})
    
    # 近3月总支出
    b5=df[(df['mon_dist']>=0) & (df['mon_dist']<=3) & (df['交易类型']==1)][['id','交易金额']].copy()
    b5=b5.groupby('id',as_index=False).agg('sum')
    b5=b5.rename(columns={'交易金额':'bef_bank_out_m3_sum'})   
    
    # 工资收入总额
    b6=df[(df['mon_dist']>=0) & (df['工资收入标记']==1)][['id','交易金额']].copy()
    b6=b6.groupby('id',as_index=False).agg('sum')
    b6=b6.rename(columns={'交易金额':'bef_bank_salary_sum'})    
    
    # 近3月收入笔数
    b7=df[(df['mon_dist']>=0) & (df['mon_dist']<=3) & (df['交易类型']==0)][['id','交易金额']].copy()
    b7=b7.groupby('id',as_index=False).agg('count')
    b7=b7.rename(columns={'交易金额':'bef_bank_inc_m3_count'})

    # 近3月支出笔数
    b8=df[(df['mon_dist']>=0) & (df['mon_dist']<=3) & (df['交易类型']==1)][['id','交易金额']].copy()
    b8=b8.groupby('id',as_index=False).agg('count')
    b8=b8.rename(columns={'交易金额':'bef_bank_out_m3_count'}) 
    
    # 近3月收入单次最大额
    b9=df[(df['mon_dist']>=0) & (df['mon_dist']<=3) & (df['交易类型']==0)][['id','交易金额']].copy()
    b9=b9.groupby('id',as_index=False).agg('max')
    b9=b9.rename(columns={'交易金额':'bef_bank_inc_m3_max'}) 

    # 近3月收入单次最小额
    b10=df[(df['mon_dist']>=0) & (df['mon_dist']<=3) & (df['交易类型']==0)][['id','交易金额']].copy()
    b10=b10.groupby('id',as_index=False).agg('min')
    b10=b10.rename(columns={'交易金额':'bef_bank_inc_m3_min'}) 
        
    # 近1月总收入
    b11=df[(df['mon_dist']>=0) & (df['mon_dist']<=1) & (df['交易类型']==0)][['id','交易金额']].copy()
    b11=b11.groupby('id',as_index=False).agg('sum')
    b11=b11.rename(columns={'交易金额':'bef_bank_inc_m1_sum'})
    
    # 近1月总支出
    b12=df[(df['mon_dist']>=0) & (df['mon_dist']<=1) & (df['交易类型']==1)][['id','交易金额']].copy()
    b12=b12.groupby('id',as_index=False).agg('sum')
    b12=b12.rename(columns={'交易金额':'bef_bank_out_m1_sum'})   
    
    # 近3月收入方差
    b13=df[(df['mon_dist']>=0) & (df['mon_dist']<=3)&(df['交易类型']==0)][['id','交易金额']].copy()
    b13=b13.groupby('id',as_index=False).agg('std')
    b13=b13.rename(columns={'交易金额':'bef_bank_inc_m3_std'})
    
       
    # 合并特征
    for i in [b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13]:
        fea_bank=pd.merge(fea_bank,i,how='left',on='id')
    
    # 近3月净收入
    fea_bank['bef_bank_ninc_m3_sum']=fea_bank['bef_bank_inc_m3_sum']-fea_bank['bef_bank_out_m3_sum']
    # 收入中工资占比
    fea_bank['bef_bank_salary_inc_rate']=fea_bank['bef_bank_salary_sum']/fea_bank['bef_bank_inc_sum']
    
    fea_bank=fea_bank.fillna(-1)
      
    
#     print('feature of bef bank detail',fea_bank.columns.tolist())
    return fea_bank

In [675]:
# 贷后银行流水特征
def aft_bank(df,df_loan):
    df=pd.merge(df,df_loan,how='left',on='id',suffixes=['','_loan'])
    df=df[df['放款时间']<=df['时间戳']]
    df['mon_dist']=(df['放款时间']-df['时间戳']).map(mon_distance)
    
    # id 列表
    fea_bank=df[['id']].drop_duplicates().copy()
    
#     # 平均每次工资收入金额
#     b1=df[(df['mon_dist']<=0) & (df['工资收入标记']==1)][['id','交易金额']].copy()
#     b1=b1.groupby('id',as_index=False).agg('mean')
#     b1=b1.rename(columns={'交易金额':'aft_bank_salary_ave'})
    # 收入总额
    b2=df[(df['mon_dist']<=0) & (df['交易类型']==0)][['id','交易金额']].copy()
    b2=b2.groupby('id',as_index=False).agg('sum')
    b2=b2.rename(columns={'交易金额':'aft_bank_inc_sum'})

    # 支出总额
    b3=df[(df['mon_dist']<=0) & (df['交易类型']==1)][['id','交易金额']].copy()
    b3=b3.groupby('id',as_index=False).agg('sum')
    b3=b3.rename(columns={'交易金额':'aft_bank_out_sum'})
    
    # 近3月总收入
    b4=df[(df['mon_dist']<=0) & (df['mon_dist']>=-3) & (df['交易类型']==0)][['id','交易金额']].copy()
    b4=b4.groupby('id',as_index=False).agg('sum')
    b4=b4.rename(columns={'交易金额':'aft_bank_inc_m3_sum'})
    
    # 近3月总支出
    b5=df[(df['mon_dist']<=0) & (df['mon_dist']>=-3) & (df['交易类型']==1)][['id','交易金额']].copy()
    b5=b5.groupby('id',as_index=False).agg('sum')
    b5=b5.rename(columns={'交易金额':'aft_bank_out_m3_sum'})   
    
    # 工资收入总额
    b6=df[(df['mon_dist']<=0) & (df['工资收入标记']==1)][['id','交易金额']].copy()
    b6=b6.groupby('id',as_index=False).agg('sum')
    b6=b6.rename(columns={'交易金额':'aft_bank_salary_sum'})    
    
    # 近3月收入笔数
    b7=df[(df['mon_dist']<=0) & (df['mon_dist']>=-3) & (df['交易类型']==0)][['id','交易金额']].copy()
    b7=b7.groupby('id',as_index=False).agg('count')
    b7=b7.rename(columns={'交易金额':'aft_bank_inc_m3_count'})

    # 近3月支出笔数
    b8=df[(df['mon_dist']<=0) & (df['mon_dist']>=-3) & (df['交易类型']==1)][['id','交易金额']].copy()
    b8=b8.groupby('id',as_index=False).agg('count')
    b8=b8.rename(columns={'交易金额':'aft_bank_out_m3_count'}) 
    
    # 近3月收入单次最大额
    b9=df[(df['mon_dist']<=0) & (df['mon_dist']>=-3) & (df['交易类型']==0)][['id','交易金额']].copy()
    b9=b9.groupby('id',as_index=False).agg('max')
    b9=b9.rename(columns={'交易金额':'aft_bank_inc_m3_max'}) 

    # 近3月收入单次最小额
    b10=df[(df['mon_dist']<=0) & (df['mon_dist']>=-3) & (df['交易类型']==0)][['id','交易金额']].copy()
    b10=b10.groupby('id',as_index=False).agg('min')
    b10=b10.rename(columns={'交易金额':'aft_bank_inc_m3_min'}) 
    
    # 近1月总收入
    b11=df[(df['mon_dist']>=-1) & (df['mon_dist']<=0) & (df['交易类型']==0)][['id','交易金额']].copy()
    b11=b11.groupby('id',as_index=False).agg('sum')
    b11=b11.rename(columns={'交易金额':'aft_bank_inc_m1_sum'})
    
    # 近1月总支出
    b12=df[(df['mon_dist']>=-1) & (df['mon_dist']<=0) & (df['交易类型']==1)][['id','交易金额']].copy()
    b12=b12.groupby('id',as_index=False).agg('sum')
    b12=b12.rename(columns={'交易金额':'aft_bank_out_m1_sum'}) 
        
    # 近3月收入方差
    b13=df[(df['mon_dist']>=-3) & (df['mon_dist']<=0)&(df['交易类型']==0)][['id','交易金额']].copy()
    b13=b13.groupby('id',as_index=False).agg('std')
    b13=b13.rename(columns={'交易金额':'aft_bank_inc_m3_std'})
    
    # 合并特征
    for i in [b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13]:
        fea_bank=pd.merge(fea_bank,i,how='left',on='id')
    
    # 近3月净收入
    fea_bank['aft_bank_ninc_m3_sum']=fea_bank['aft_bank_inc_m3_sum']-fea_bank['aft_bank_out_m3_sum']
    # 收入中工资占比
    fea_bank['aft_bank_salary_inc_rate']=fea_bank['aft_bank_salary_sum']/fea_bank['aft_bank_inc_sum']
    
    fea_bank=fea_bank.fillna(-1)
    
    
#     print('feature of aft bank detail',fea_bank.columns.tolist())
    return fea_bank

In [721]:
# 贷前浏览行为特征
def bef_browse(df,df_loan):
    df=pd.merge(df,df_loan,how='left',on='id')
    df=df[df['放款时间']>df['时间戳']]
    df['mon_dist']=(df['放款时间']-df['时间戳']).map(mon_distance)
    fea_browse = df[['id']].drop_duplicates().copy()
    
    # 浏览行为次数
    b1=df[['id']].copy()
    b1['bef_browse_bro_act_cnt']=1
    b1=b1.groupby('id').agg(sum).reset_index()
    
    # 浏览次数，按时间去重
    b2=df[['id','时间戳']].drop_duplicates().copy()
    b2=b2.groupby('id').agg('count').reset_index()
    b2=b2.rename(columns={'时间戳':'bef_browse_bro_cnt'})
    
    # 3个月内浏览子行为编号 
    b2=df[(df['mon_dist']<=3) & (df['mon_dist']>=0)][['id','浏览子行为编号']].copy()
    b2['num']=1
    b2=pd.pivot_table(b2,index='id',columns='浏览子行为编号',aggfunc='sum').reset_index()
    b2.columns=['bef_browse_浏览子行为编号_m3_'+str(i) for i in  b2.columns.droplevel()]
    b2=b2.rename(columns={'bef_browse_浏览子行为编号_m3_':'id'})
  
    # 所有浏览子行为编号 
    b3=df[['id','浏览子行为编号']].copy()
    b3['num']=1
    b3=pd.pivot_table(b3,index='id',columns='浏览子行为编号',aggfunc='sum').reset_index()
    b3.columns=['bef_browse_浏览子行为编号_'+str(i) for i in  b3.columns.droplevel()]
    b3=b3.rename(columns={'bef_browse_浏览子行为编号_':'id'})
    
    # 3个月内所有浏览行为编号
    b4=df[(df['mon_dist']<=3) & (df['mon_dist']>=0)][['id','浏览行为数据']].copy()
    b4['num']=1
    b4=pd.pivot_table(b4,index='id',columns='浏览行为数据',aggfunc='sum').reset_index()
    b4.columns=['bef_browse_浏览行为数据_m3_'+str(i) for i in b4.columns.droplevel()]
    b4=b4.rename(columns={'bef_browse_浏览行为数据_m3_':'id'})
    b4=b4[['id','bef_browse_浏览行为数据_m3_101','bef_browse_浏览行为数据_m3_110','bef_browse_浏览行为数据_m3_118','bef_browse_浏览行为数据_m3_164','bef_browse_浏览行为数据_m3_173','bef_browse_浏览行为数据_m3_189']]
    
    # 10天个内浏览子行为编号 
    b5=df[(df['时间戳']<=df['放款时间']) & (df['时间戳']>=df['放款时间']-10)][['id','浏览子行为编号']].copy()
    b5['num']=1
    b5=pd.pivot_table(b5,index='id',columns='浏览子行为编号',aggfunc='sum').reset_index()
    b5.columns=['bef_browse_浏览子行为编号_d10_'+str(i) for i in  b5.columns.droplevel()]
    b5=b5.rename(columns={'bef_browse_浏览子行为编号_d10_':'id'})    
    b5=b5[['id','bef_browse_浏览子行为编号_d10_1', 'bef_browse_浏览子行为编号_d10_3', 'bef_browse_浏览子行为编号_d10_4', 'bef_browse_浏览子行为编号_d10_5', 'bef_browse_浏览子行为编号_d10_6', 'bef_browse_浏览子行为编号_d10_7', 'bef_browse_浏览子行为编号_d10_8', 'bef_browse_浏览子行为编号_d10_9', 'bef_browse_浏览子行为编号_d10_10']]
    
    # 贷前浏览行为var
    b6=df[['id','浏览行为数据']].groupby('id',as_index=False)
    b6=b6['bef_browse_浏览行为数据'].agg({'bef_browse_浏览行为数据_var':'var'})
    
    for i in [b1,b2,b3,b4,b5,b6]:
        fea_browse=pd.merge(fea_browse,i,how='left',on='id')
    
    # test_set缺失此字段
    if  'bef_browse_浏览子行为编号_11' in fea_browse.columns:
        fea_browse=fea_browse.drop('bef_browse_浏览子行为编号_11',axis=1)
    if  'bef_browse_浏览子行为编号_m3_11' in fea_browse.columns:
        fea_browse=fea_browse.drop('bef_browse_浏览子行为编号_m3_11',axis=1)      
        
    fea_browse=fea_browse.fillna(-1)
#     print('feature of bef browse',fea_browse.columns.tolist())
    return fea_browse

In [677]:
# 贷后浏览行为特征
def aft_browse(df,df_loan):
    df=pd.merge(df,df_loan,how='left',on='id')
    df=df[df['放款时间']<=df['时间戳']]
    df['mon_dist']=(df['放款时间']-df['时间戳']).map(mon_distance)
    fea_browse = df[['id']].drop_duplicates().copy()
    
    # 浏览行为次数
    b1=df[['id']].copy()
    b1['aft_browse_bro_act_cnt']=1
    b1=b1.groupby('id').agg(sum).reset_index()
    
    # 浏览次数，按时间去重
    b2=df[['id','时间戳']].drop_duplicates().copy()
    b2=b2.groupby('id').agg('count').reset_index()
    b2=b2.rename(columns={'时间戳':'bef_browse_bro_cnt'})
    
    # 3个月内浏览子行为编号 
    b2=df[(df['mon_dist']<=0) & (df['mon_dist']>=-3)][['id','浏览子行为编号']].copy()
    b2['num']=1
    b2=pd.pivot_table(b2,index='id',columns='浏览子行为编号',aggfunc='sum').reset_index()
    b2.columns=['aft_browse_浏览子行为编号_m3_'+str(i) for i in  b2.columns.droplevel()]
    b2=b2.rename(columns={'aft_browse_浏览子行为编号_m3_':'id'})
    if 'aft_browse_浏览子行为编号_m3_2' in b2.columns:
        b2=b2.drop('aft_browse_浏览子行为编号_m3_2',axis=1)
    if 'aft_browse_浏览子行为编号_m3_11' in b2.columns:
        b2=b2.drop('aft_browse_浏览子行为编号_m3_11',axis=1)
        
  
    # 所有浏览子行为编号 
    b3=df[['id','浏览子行为编号']].copy()
    b3['num']=1
    b3=pd.pivot_table(b3,index='id',columns='浏览子行为编号',aggfunc='sum').reset_index()
    b3.columns=['aft_browse_浏览子行为编号_'+str(i) for i in  b3.columns.droplevel()]
    b3=b3.rename(columns={'aft_browse_浏览子行为编号_':'id'})
    if 'aft_browse_浏览子行为编号_2' in b3.columns:
        b3=b3.drop('aft_browse_浏览子行为编号_2',axis=1)
    if 'aft_browse_浏览子行为编号_11' in b3.columns:
        b3=b3.drop('aft_browse_浏览子行为编号_11',axis=1)
    
    # 3个月内所有浏览行为编号
    b4=df[(df['mon_dist']<=0) & (df['mon_dist']>=-3)][['id','浏览行为数据']].copy()
    b4['num']=1
    b4=pd.pivot_table(b4,index='id',columns='浏览行为数据',aggfunc='sum').reset_index()
    b4.columns=['aft_browse_浏览行为数据_m3_'+str(i) for i in b4.columns.droplevel()]
    b4=b4.rename(columns={'aft_browse_浏览行为数据_m3_':'id'})
    b4=b4[['id','aft_browse_浏览行为数据_m3_101','aft_browse_浏览行为数据_m3_110','aft_browse_浏览行为数据_m3_118','aft_browse_浏览行为数据_m3_164','aft_browse_浏览行为数据_m3_173','aft_browse_浏览行为数据_m3_189']]

    for i in [b1,b2,b3,b4]:
        fea_browse=pd.merge(fea_browse,i,how='left',on='id')
            
    fea_browse=fea_browse.fillna(-1)
#     print('feature of aft browse',fea_browse.columns.tolist())
    return fea_browse

In [678]:
# 贷前信用卡账单特征
def bef_bill(df,df_loan):
    df=pd.merge(df,df_loan,how='left',on='id',suffixes=['','_loan'])
    df=df[df['放款时间']>df['时间戳']]   
    df['mon_dist']=(df['放款时间']-df['时间戳']).map(mon_distance)
    fea_bill=df[['id']].drop_duplicates().copy()
    
    # 信用卡总银行数
    b1=df[['id','银行id']].drop_duplicates()[['id']].copy()
    b1['bef_bill_cre_bank_cnt']=1
    b1=b1.groupby('id').agg('sum').reset_index()
    
    # 信用卡在用的总银行数
    b2=df[df['上期账单金额']>0][['id','银行id']].drop_duplicates().copy()
    b2=b2.groupby('id').agg('count').reset_index()
    b2=b2.rename(columns={'银行id':'bef_bill_creused_bank_cnt'})
    
    # 信用卡在用的授信额度总和
    b3=df[df['上期账单金额']>0][['id','银行id','信用卡额度']].drop_duplicates().copy()
    b3=b3.drop('银行id',axis=1).groupby('id').agg('sum').reset_index()
    b3=b3.rename(columns={'信用卡额度':'bef_bill_creused_gramo_sum'})
    
    # 信用卡在用的单张最大授信额度
    b4=df[df['上期账单金额']>0][['id','信用卡额度']].copy()
    b4=b4.groupby('id').agg('max').reset_index()
    b4=b4.rename(columns={'信用卡额度':'bef_bill_creused_gramo_max'})
    
    # 信用卡在用的单张最小授信额度
    b5=df[df['上期账单金额']>0][['id','信用卡额度']].copy()
    b5=b5.groupby('id').agg('min').reset_index()
    b5=b5.rename(columns={'信用卡额度':'bef_bill_creused_gramo_min'})    
    
    # 信用卡消费笔数和
    b6=df[['id','消费笔数']].copy()
    b6=b6.groupby('id').agg('sum').reset_index()
    b6=b6.rename(columns={'消费笔数':'bef_bill_con_cnt'})
    
    # 信用卡消费笔数单月最大值
    b7=df[['id','消费笔数']].copy()
    b7=b7.groupby('id').agg('max').reset_index()
    b7=b7.rename(columns={'消费笔数':'bef_bill_con_max'})    
    
    # 循环利息和
    b8=df[['id','循环利息']].copy()
    b8=b8.groupby('id').agg('sum').reset_index()
    b8=b8.rename(columns={'循环利息':'bef_bill_inter_sum'})
    
    # 循环利息最大额
    b9=df[['id','循环利息']].copy()
    b9=b9.groupby('id').agg('max').reset_index()
    b9=b9.rename(columns={'循环利息':'bef_bill_inter_max'})    
    
    # 循环利息次数
    b10=df[df['循环利息']>0][['id']].copy()
    b10['bef_bill_inter_cnt']=1
    b10=b10.groupby('id').agg(sum).reset_index()
    
    # 可用金额和
    b11=df[['id','可用金额']].copy()
    b11=b11.groupby('id').agg('sum').reset_index()
    b11=b11.rename(columns={'可用金额':'bef_bill_amo_ava'})
    
    # 每期账单上期未还款次数和
    b12=df[(df['上期账单金额']>0) & (df['上期还款金额']==0)][['id']].copy()
    b12['bef_bill_ovdu_cnt']=1
    b12=b12.groupby('id').agg('sum').reset_index()
    
    # 每期账单上期未结清次数和
    b13=df[(df['上期账单金额']>df['上期还款金额']) & (df['上期还款金额']>0)][['id']].copy()
    b13['bef_bill_unc_cnt']=1
    b13=b13.groupby('id').agg('sum').reset_index()
    
    # 还款状态
    b14=df[['id','还款状态']].copy()
    b14=b14.groupby('id').agg('sum').reset_index()
    b14=b14.rename(columns={'还款状态':'bef_bill_payst_cnt'})
    
    # 近3月预借现金额度
    b15=df[(df['mon_dist']<=3) & (df['mon_dist']>=0)][['id','预借现金额度']].copy()
    b15=b15.groupby('id').agg('sum').reset_index()
    b15=b15.rename(columns={'预借现金额度':'bef_bill_preloan_m3_sum'})
    
    # 近3月预借现金次数
    b16=df[(df['mon_dist']<=3) & (df['mon_dist']>=0)][['id','预借现金额度']].copy()
    b16= b16.groupby('id').agg('count').reset_index()
    b16=b16.rename(columns={'预借现金额度':'bef_bill_preloan_m3_cnt'})
    
    # 近3月预借现金银行数
    b17=df[(df['mon_dist']<=3) & (df['mon_dist']>=0) &(df['预借现金额度']>0)][['id','银行id']].drop_duplicates().copy()
    b17=b17.groupby('id').agg('count').reset_index()
    b17=b17.rename(columns={'银行id':'bef_bill_preloan_bank_cnt'})
    
    # 信用卡当前使用额度
    b18=df[['id','时间戳','银行id','本期账单余额']].copy()
    b18=b18.loc[df.groupby(['id','银行id']).apply(lambda x: x['时间戳'].idxmax())].reset_index()
    b18=b18.pivot_table(index='id',values='本期账单余额',aggfunc='sum').reset_index()
    b18=b18.rename(columns={'本期账单余额':'bef_bill_creamo_used'})
    
    # 近3月每期账单上期未还款次数和
    b19=df[(df['上期账单金额']>0) & (df['上期还款金额']==0) &(df['mon_dist']<=3) & (df['mon_dist']>=0)][['id']].copy()
    b19['bef_bill_ovdu_m3_cnt']=1
    b19=b19.groupby('id').agg('sum').reset_index()
    
    # 近3月每期账单上期未结清次数和
    b20=df[(df['上期账单金额']>df['上期还款金额']) & (df['上期还款金额']>0)&(df['mon_dist']<=3) & (df['mon_dist']>=0)][['id']].copy()
    b20['bef_bill_unc_m3_cnt']=1
    b20=b20.groupby('id').agg('sum').reset_index()
    
    # 老段子特征1
    b21=df[df['时间戳']<=df['放款时间']][['id','银行id']].groupby('id',as_index=False).agg('count')
    b21=b21.rename(columns={'银行id':'bef_bill_t0'})
    
    # 老段子特征2
    b22=df[df['时间戳']<=df['放款时间']-1][['id','银行id']].groupby('id',as_index=False).agg('count')
    b22=b22.rename(columns={'银行id':'bef_bill_t1'})
    
    # 老段子特征3
    b23=df[df['时间戳']<=df['放款时间']-2][['id','银行id']].groupby('id',as_index=False).agg('count')
    b23=b23.rename(columns={'银行id':'bef_bill_t2'})
    
    # 近3月信用卡本期账单余额方差
    b24=df[(df['mon_dist']<=3) & (df['mon_dist']>=0)][['id','本期账单余额']].copy()
    b24=b24.groupby('id',as_index=False).agg('std')
    b24=b24.rename(columns={'本期账单余额':'bef_bill_amo_std'})
    
    for i in [b1,b2,b3,b4,b5,b6,b7,b7,b9,b10,b11,b12,b13,b14,b15,b16,b17,b18,b19,b20,b21,b22,b23,b24]:
        fea_bill=pd.merge(fea_bill,i,how='left',on='id')
    
    fea_bill['bef_bill_preloan_bank_rate']=fea_bill['bef_bill_cre_bank_cnt']/fea_bill['bef_bill_preloan_bank_cnt']
    fea_bill['bef_bill_creamo_use_rate']=(fea_bill['bef_bill_creamo_used']/fea_bill['bef_bill_creused_gramo_sum'])
    fea_bill.loc[np.isinf(fea_bill['bef_bill_creamo_use_rate']),'bef_bill_creamo_use_rate']=0
    
    fea_bill=fea_bill.fillna(-1)
        
#     print('feature of bef bill',fea_bill.columns.tolist())    
    
    return fea_bill

In [679]:
# 贷后信用卡账单特征
def aft_bill(df,df_loan):
    df=pd.merge(df,df_loan,how='left',on='id',suffixes=['','_loan'])
    df=df[df['放款时间']<=df['时间戳']]   
    df['mon_dist']=(df['放款时间']-df['时间戳']).map(mon_distance)
    fea_bill=df[['id']].drop_duplicates().copy()
    
    # 信用卡总银行数
    b1=df[['id','银行id']].drop_duplicates()[['id']].copy()
    b1['aft_bill_cre_bank_cnt']=1
    b1=b1.groupby('id').agg('sum').reset_index()
    
    # 信用卡在用的总银行数
    b2=df[df['上期账单金额']>0][['id','银行id']].drop_duplicates().copy()
    b2=b2.groupby('id').agg('count').reset_index()
    b2=b2.rename(columns={'银行id':'aft_bill_creused_bank_cnt'})
    
    # 信用卡在用的授信额度总和
    b3=df[df['上期账单金额']>0][['id','银行id','信用卡额度']].drop_duplicates().copy()
    b3=b3.drop('银行id',axis=1).groupby('id').agg('sum').reset_index()
    b3=b3.rename(columns={'信用卡额度':'aft_bill_creused_gramo_sum'})
    
    # 信用卡在用的单张最大授信额度
    b4=df[df['上期账单金额']>0][['id','信用卡额度']].copy()
    b4=b4.groupby('id').agg('max').reset_index()
    b4=b4.rename(columns={'信用卡额度':'aft_bill_creused_gramo_max'})
    
    # 信用卡在用的单张最小授信额度
    b5=df[df['上期账单金额']>0][['id','信用卡额度']].copy()
    b5=b5.groupby('id').agg('min').reset_index()
    b5=b5.rename(columns={'信用卡额度':'aft_bill_creused_gramo_min'})    
    
    # 信用卡消费笔数和
    b6=df[['id','消费笔数']].copy()
    b6=b6.groupby('id').agg('sum').reset_index()
    b6=b6.rename(columns={'消费笔数':'aft_bill_con_cnt'})
    
    # 信用卡消费笔数单月最大值
    b7=df[['id','消费笔数']].copy()
    b7=b7.groupby('id').agg('max').reset_index()
    b7=b7.rename(columns={'消费笔数':'aft_bill_con_max'})    
    
    # 循环利息和
    b8=df[['id','循环利息']].copy()
    b8=b8.groupby('id').agg('sum').reset_index()
    b8=b8.rename(columns={'循环利息':'aft_bill_inter_sum'})
    
    # 循环利息最大额
    b9=df[['id','循环利息']].copy()
    b9=b9.groupby('id').agg('max').reset_index()
    b9=b9.rename(columns={'循环利息':'aft_bill_inter_max'})    
    
    # 循环利息次数
    b10=df[df['循环利息']>0][['id']].copy()
    b10['aft_bill_inter_cnt']=1
    b10=b10.groupby('id').agg(sum).reset_index()
    
    # 可用金额和
    b11=df[['id','可用金额']].copy()
    b11=b11.groupby('id').agg('sum').reset_index()
    b11=b11.rename(columns={'可用金额':'aft_bill_amo_ava'})
    
    # 每期账单上期未还款次数和
    b12=df[(df['上期账单金额']>0) & (df['上期还款金额']==0)][['id']].copy()
    b12['aft_bill_ovdu_cnt']=1
    b12=b12.groupby('id').agg('sum').reset_index()
    
    # 每期账单上期未结清次数和
    b13=df[(df['上期账单金额']>df['上期还款金额']) & (df['上期还款金额']>0)][['id']].copy()
    b13['aft_bill_unc_cnt']=1
    b13=b13.groupby('id').agg('sum').reset_index()
    
    # 还款状态
    b14=df[['id','还款状态']].copy()
    b14=b14.groupby('id').agg('sum').reset_index()
    b14=b14.rename(columns={'还款状态':'aft_bill_payst_cnt'})
    
    # 近3月预借现金额度
    b15=df[(df['mon_dist']<=0) & (df['mon_dist']>=-3)][['id','预借现金额度']].copy()
    b15=b15.groupby('id').agg('sum').reset_index()
    b15=b15.rename(columns={'预借现金额度':'aft_bill_preloan_m3_sum'})
    
    # 近3月预借现金次数
    b16=df[(df['mon_dist']<=0) & (df['mon_dist']>=-3)][['id','预借现金额度']].copy()
    b16= b16.groupby('id').agg('count').reset_index()
    b16=b16.rename(columns={'预借现金额度':'aft_bill_preloan_m3_cnt'})
    
    # 近3月预借现金银行数
    b17=df[(df['mon_dist']<=0) & (df['mon_dist']>=-3) &(df['预借现金额度']>0)][['id','银行id']].drop_duplicates().copy()
    b17=b17.groupby('id').agg('count').reset_index()
    b17=b17.rename(columns={'银行id':'aft_bill_preloan_bank_cnt'})
    
    # 信用卡当前使用额度
    b18=df[['id','时间戳','银行id','本期账单余额']].copy()
    b18=b18.loc[df.groupby(['id','银行id']).apply(lambda x: x['时间戳'].idxmax())].reset_index()
    b18=b18.pivot_table(index='id',values='本期账单余额',aggfunc='sum').reset_index()
    b18=b18.rename(columns={'本期账单余额':'aft_bill_creamo_used'})
    
    # 近3月每期账单上期未还款次数和
    b19=df[(df['上期账单金额']>0) & (df['上期还款金额']==0) &(df['mon_dist']<=3) & (df['mon_dist']>=0)][['id']].copy()
    b19['aft_bill_ovdu_m3_cnt']=1
    b19=b19.groupby('id').agg('sum').reset_index()
    
    # 近3月每期账单上期未结清次数和
    b20=df[(df['上期账单金额']>df['上期还款金额']) & (df['上期还款金额']>0)&(df['mon_dist']<=3) & (df['mon_dist']>=0)][['id']].copy()
    b20['aft_bill_unc_m3_cnt']=1
    b20=b20.groupby('id').agg('sum').reset_index()
    
    # 老段子特征1
    b21=df[df['时间戳']>=df['放款时间']][['id','银行id']].groupby('id',as_index=False).agg('count')
    b21=b21.rename(columns={'银行id':'aft_bill_t0'})
    
    # 老段子特征2
    b22=df[df['时间戳']>=df['放款时间']+1][['id','银行id']].groupby('id',as_index=False).agg('count')
    b22=b22.rename(columns={'银行id':'aft_bill_t1'})
    
    # 老段子特征3
    b23=df[df['时间戳']>=df['放款时间']+2][['id','银行id']].groupby('id',as_index=False).agg('count')
    b23=b23.rename(columns={'银行id':'aft_bill_t2'})
    
    # 老段子特征4
    b24=df[df['时间戳']>=df['放款时间']+10][['id','银行id']].groupby('id',as_index=False).agg('count')
    b24=b24.rename(columns={'银行id':'aft_bill_t10'})    
    
    # 老段子特征5
    b25=df[df['时间戳']>=df['放款时间']+20][['id','银行id']].groupby('id',as_index=False).agg('count')
    b25=b25.rename(columns={'银行id':'aft_bill_t20'})    
    
    # 老段子特征6
    b26=df[df['时间戳']>=df['放款时间']+30][['id','银行id']].groupby('id',as_index=False).agg('count')
    b26=b26.rename(columns={'银行id':'aft_bill_t30'})    
    
    # 近3月信用卡本期账单余额方差
    b27=df[(df['mon_dist']<=0) & (df['mon_dist']>=-3)][['id','本期账单余额']].copy()
    b27=b27.groupby('id',as_index=False).agg('std')
    b27=b27.rename(columns={'本期账单余额':'aft_bill_amo_std'})
    
    # 0天后预借现金额度
    b28=df[(df['时间戳']>=df['放款时间'])][['id','预借现金额度']].copy()
    b28=b28.groupby('id').agg('sum').reset_index()
    b28=b28.rename(columns={'预借现金额度':'aft_bill_preloan_d0_sum'})    
    
    # 1天后预借现金额度
    b29=df[(df['时间戳']>=df['放款时间']+1)][['id','预借现金额度']].copy()
    b29=b29.groupby('id').agg('sum').reset_index()
    b29=b29.rename(columns={'预借现金额度':'aft_bill_preloan_d1_sum'}) 
    
    # 2天后预借现金额度
    b30=df[(df['时间戳']>=df['放款时间']+2)][['id','预借现金额度']].copy()
    b30=b30.groupby('id').agg('sum').reset_index()
    b30=b30.rename(columns={'预借现金额度':'aft_bill_preloan_d2_sum'}) 
    
    # 10天后预借现金额度
    b31=df[(df['时间戳']>=df['放款时间']+10)][['id','预借现金额度']].copy()
    b31=b31.groupby('id').agg('sum').reset_index()
    b31=b31.rename(columns={'预借现金额度':'aft_bill_preloan_d10_sum'}) 
    
    # 20天后预借现金额度
    b32=df[(df['时间戳']>=df['放款时间']+20)][['id','预借现金额度']].copy()
    b32=b32.groupby('id').agg('sum').reset_index()
    b32=b32.rename(columns={'预借现金额度':'aft_bill_preloan_d20_sum'})
    
    # 30天后预借现金额度
    b33=df[(df['时间戳']>=df['放款时间']+30)][['id','预借现金额度']].copy()
    b33=b33.groupby('id').agg('sum').reset_index()
    b33=b33.rename(columns={'预借现金额度':'aft_bill_preloan_d30_sum'}) 
    
    # 0天后每期账单上期未还款金额和
    b34=df[(df['上期账单金额']>0) & (df['上期还款金额']==0) &(df['时间戳']>=df['放款时间']+0)][['id','上期账单金额']].copy()
    b34=b34.groupby('id',as_index=False).agg('sum')
    b34=b34.rename(columns={'上期账单金额':'aft_bill_ovdu_d0_sum'})
    
    # 1天后每期账单上期未还款金额和
    b35=df[(df['上期账单金额']>0) & (df['上期还款金额']==0) &(df['时间戳']>=df['放款时间']+1)][['id','上期账单金额']].copy()
    b35=b35.groupby('id',as_index=False).agg('sum')
    b35=b35.rename(columns={'上期账单金额':'aft_bill_ovdu_d1_sum'})    
    
    # 2天后每期账单上期未还款金额和
    b36=df[(df['上期账单金额']>0) & (df['上期还款金额']==0) &(df['时间戳']>=df['放款时间']+2)][['id','上期账单金额']].copy()
    b36=b36.groupby('id',as_index=False).agg('sum')
    b36=b36.rename(columns={'上期账单金额':'aft_bill_ovdu_d2_sum'})    
    
    # 10天后每期账单上期未还款金额和
    b37=df[(df['上期账单金额']>0) & (df['上期还款金额']==0) &(df['时间戳']>=df['放款时间']+10)][['id','上期账单金额']].copy()
    b37=b37.groupby('id',as_index=False).agg('sum')
    b37=b37.rename(columns={'上期账单金额':'aft_bill_ovdu_d10_sum'})    
    
    # 20天后每期账单上期未还款金额和
    b38=df[(df['上期账单金额']>0) & (df['上期还款金额']==0) &(df['时间戳']>=df['放款时间']+20)][['id','上期账单金额']].copy()
    b38=b38.groupby('id',as_index=False).agg('sum')
    b38=b38.rename(columns={'上期账单金额':'aft_bill_ovdu_d20_sum'})    
    
    # 30天后每期账单上期未还款金额和
    b39=df[(df['上期账单金额']>0) & (df['上期还款金额']==0) &(df['时间戳']>=df['放款时间']+30)][['id','上期账单金额']].copy()
    b39=b39.groupby('id',as_index=False).agg('sum')
    b39=b39.rename(columns={'上期账单金额':'aft_bill_ovdu_d30_sum'})    
    
    # 0天后信用卡总银行数
    b40=df[(df['时间戳']>=df['放款时间']+0)][['id','银行id']].drop_duplicates()[['id']].copy()
    b40['aft_bill_cre_bank_d0_cnt']=1
    b40=b40.groupby('id').agg('sum').reset_index() 
    
    # 10天后信用卡总银行数
    b41=df[(df['时间戳']>=df['放款时间']+10)][['id','银行id']].drop_duplicates()[['id']].copy()
    b41['aft_bill_cre_bank_d10_cnt']=1
    b41=b41.groupby('id').agg('sum').reset_index()
    
    # 30天后信用卡总银行数
    b42=df[(df['时间戳']>=df['放款时间']+30)][['id','银行id']].drop_duplicates()[['id']].copy()
    b42['aft_bill_cre_bank_d30_cnt']=1
    b42=b42.groupby('id').agg('sum').reset_index()
    
    # 0天后每期账单上期未还款金额次数
    b43=df[(df['上期账单金额']>0) & (df['上期还款金额']==0) &(df['时间戳']>=df['放款时间']+0)][['id','上期账单金额']].copy()
    b43=b43.groupby('id',as_index=False).agg('count')
    b43=b43.rename(columns={'上期账单金额':'aft_bill_ovdu_d0_count'})
    
    # 1天后每期账单上期未还款金额和
    b44=df[(df['上期账单金额']>0) & (df['上期还款金额']==0) &(df['时间戳']>=df['放款时间']+1)][['id','上期账单金额']].copy()
    b44=b44.groupby('id',as_index=False).agg('count')
    b44=b44.rename(columns={'上期账单金额':'aft_bill_ovdu_d1_count'})    
    
    # 2天后每期账单上期未还款金额和
    b45=df[(df['上期账单金额']>0) & (df['上期还款金额']==0) &(df['时间戳']>=df['放款时间']+2)][['id','上期账单金额']].copy()
    b45=b45.groupby('id',as_index=False).agg('count')
    b45=b45.rename(columns={'上期账单金额':'aft_bill_ovdu_d2_count'})    
    
    # 10天后每期账单上期未还款金额和
    b46=df[(df['上期账单金额']>0) & (df['上期还款金额']==0) &(df['时间戳']>=df['放款时间']+10)][['id','上期账单金额']].copy()
    b46=b46.groupby('id',as_index=False).agg('count')
    b46=b46.rename(columns={'上期账单金额':'aft_bill_ovdu_d10_count'})    
    
    # 20天后每期账单上期未还款金额和
    b47=df[(df['上期账单金额']>0) & (df['上期还款金额']==0) &(df['时间戳']>=df['放款时间']+20)][['id','上期账单金额']].copy()
    b47=b47.groupby('id',as_index=False).agg('count')
    b47=b47.rename(columns={'上期账单金额':'aft_bill_ovdu_d20_count'})    
    
    # 30天后每期账单上期未还款金额和
    b48=df[(df['上期账单金额']>0) & (df['上期还款金额']==0) &(df['时间戳']>=df['放款时间']+30)][['id','上期账单金额']].copy()
    b48=b48.groupby('id',as_index=False).agg('count')
    b48=b48.rename(columns={'上期账单金额':'aft_bill_ovdu_d30_count'})    
    
   
    for i in [b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13,b14,b15,b16,b17,b18,b19,b20,b21,b22,b23,b24,b25,b26,b27,b28,b29,b30,b31,b32,b33,b34,b35,b36,b37,b38,b39,b40,b41,b42,b43,b44,b45,b46,b47,b48]:
        fea_bill=pd.merge(fea_bill,i,how='left',on='id')
        
    fea_bill['aft_bill_preloan_bank_rate']=fea_bill['aft_bill_cre_bank_cnt']/fea_bill['aft_bill_preloan_bank_cnt']
    fea_bill['aft_bill_creamo_use_rate']=fea_bill['aft_bill_creamo_used']/fea_bill['aft_bill_creused_gramo_sum']
    fea_bill.loc[np.isinf(fea_bill['aft_bill_creamo_use_rate']),'aft_bill_creamo_use_rate']=0
    fea_bill['aft_bill_cre_bank_add']=fea_bill['aft_bill_cre_bank_d30_cnt']/fea_bill['aft_bill_cre_bank_d0_cnt']
    
    
    fea_bill=fea_bill.fillna(-1)
        
#     print('feature of aft bill',fea_bill.columns.tolist())    
    
    return fea_bill

In [680]:
fea_user=user(tr_user)
fea_user.to_csv('data/feature_data/fea_user.csv')
feat_user=user(te_user)
feat_user.to_csv('data/feature_data/feat_user.csv')

In [681]:
fea_bef_bank=bef_bank(tr_bank,tr_loan)
fea_bef_bank.to_csv('data/feature_data/fea_bef_bank.csv')
feat_bef_bank=bef_bank(te_bank,te_loan)
feat_bef_bank.to_csv('data/feature_data/feat_bef_bank.csv')

In [682]:
fea_aft_bank=aft_bank(tr_bank,tr_loan)
fea_aft_bank.to_csv('data/feature_data/fea_aft_bank.csv')
feat_aft_bank=aft_bank(te_bank,te_loan)
feat_aft_bank.to_csv('data/feature_data/feat_aft_bank.csv')

In [714]:
fea_bef_browse=bef_browse(tr_browse,tr_loan)
fea_bef_browse.to_csv('data/feature_data/fea_bef_browse.csv')
feat_bef_browse=bef_browse(te_browse,te_loan)
feat_bef_browse.to_csv('data/feature_data/feat_bef_browse.csv')

   id   浏览行为数据_var
0   2  3390.796753
1   3  2536.790245
2   4  2942.068840
3   6     0.000000
4   7  3021.163081
      id   浏览行为数据_var
0  55597  2503.160574
1  55598  3573.716033
2  55601  3050.158869
3  55602  2929.986097
4  55603  3278.076146


In [684]:
fea_aft_browse=aft_browse(tr_browse,tr_loan)
fea_aft_browse.to_csv('data/feature_data/fea_aft_browse.csv')
feat_aft_browse=aft_browse(te_browse,te_loan)
feat_aft_browse.to_csv('data/feature_data/feat_aft_browse.csv')

In [685]:
fea_bef_bill=bef_bill(tr_bill,tr_loan)
fea_bef_bill.to_csv('data/feature_data/fea_bef_bill.csv')
feat_bef_bill=bef_bill(te_bill,te_loan)
feat_bef_bill.to_csv('data/feature_data/feat_bef_bill.csv')

In [686]:
fea_aft_bill=aft_bill(tr_bill,tr_loan)
fea_aft_bill.to_csv('data/feature_data/fea_aft_bill.csv')
feat_aft_bill=aft_bill(te_bill,te_loan)
feat_aft_bill.to_csv('data/feature_data/feat_aft_bill.csv')

In [715]:
train_set=tr_overdue.merge(fea_user,on='id',how='left').copy()
train_set=train_set.merge(fea_bef_bank,on='id',how='left')
train_set=train_set.merge(fea_bef_browse,on='id',how='left')
train_set=train_set.merge(fea_bef_bill,on='id',how='left')
train_set=train_set.merge(fea_aft_bank,on='id',how='left')
train_set=train_set.merge(fea_aft_browse,on='id',how='left')
train_set=train_set.merge(fea_aft_bill,on='id',how='left')

train_set=train_set.fillna(-1)

test_set=te_id.merge(feat_user,on='id',how='left').copy()
test_set=test_set.merge(feat_bef_bank,on='id',how='left')
test_set=test_set.merge(feat_bef_browse,on='id',how='left')
test_set=test_set.merge(feat_bef_bill,on='id',how='left')
test_set=test_set.merge(feat_aft_bank,on='id',how='left')
test_set=test_set.merge(feat_aft_browse,on='id',how='left')
test_set=test_set.merge(feat_aft_bill,on='id',how='left')

test_set=test_set.fillna(-1)

print('训练集大小',train_set.shape)
print('测试集大小',test_set.shape)
print('feature_list')
print(train_set.columns.tolist()[2:])

训练集大小 (55596, 194)
测试集大小 (13899, 193)
feature_list
['性别_0', '性别_1', '性别_2', '职业_0', '职业_1', '职业_2', '职业_3', '职业_4', '教育程度_0', '教育程度_1', '教育程度_2', '教育程度_3', '教育程度_4', '婚姻状态_0', '婚姻状态_1', '婚姻状态_2', '婚姻状态_3', '婚姻状态_4', '婚姻状态_5', '户口类型_0', '户口类型_1', '户口类型_2', '户口类型_3', '户口类型_4', 'bef_bank_salary_ave', 'bef_bank_inc_sum', 'bef_bank_out_sum', 'bef_bank_inc_m3_sum', 'bef_bank_out_m3_sum', 'bef_bank_salary_sum', 'bef_bank_inc_m3_count', 'bef_bank_out_m3_count', 'bef_bank_inc_m3_max', 'bef_bank_inc_m3_min', 'bef_bank_inc_m1_sum', 'bef_bank_out_m1_sum', 'bef_bank_inc_m3_std', 'bef_bank_ninc_m3_sum', 'bef_bank_salary_inc_rate', 'bef_browse_bro_act_cnt', 'bef_browse_浏览子行为编号_m3_1', 'bef_browse_浏览子行为编号_m3_2', 'bef_browse_浏览子行为编号_m3_3', 'bef_browse_浏览子行为编号_m3_4', 'bef_browse_浏览子行为编号_m3_5', 'bef_browse_浏览子行为编号_m3_6', 'bef_browse_浏览子行为编号_m3_7', 'bef_browse_浏览子行为编号_m3_8', 'bef_browse_浏览子行为编号_m3_9', 'bef_browse_浏览子行为编号_m3_10', 'bef_browse_浏览子行为编号_1', 'bef_browse_浏览子行为编号_2', 'bef_browse_浏览子行为编号_3', 'bef_b

In [707]:
# 此步跳过，数据归一化处理会过拟合
fea_list=train_set.columns.tolist()[2:]
scaler1 = MinMaxScaler()
train_set[fea_list]=scaler1.fit_transform(train_set[fea_list])
scaler2 = MinMaxScaler()
test_set[fea_list]=scaler2.fit_transform(test_set[fea_list])

In [717]:
# 拆分训练集和验证集
trainsub,validsub=train_test_split(train_set,test_size = 0.2, stratify = train_set['label'], random_state=100)

# 模型输入特征
trainsub_dm=xgb.DMatrix(trainsub.drop(['id','label'],axis=1),label=trainsub[['label']])
validsub_dm=xgb.DMatrix(validsub.drop(['id','label'],axis=1),label=validsub[['label']])
test_dm=xgb.DMatrix(test_set.drop(['id'],axis=1))

# 模型1训练调参
params= {'booster': 'gbtree', 'objective': 'binary:logistic',
         'eval_metric': 'error', 'gamma': 2,'min_child_weight': 1.1,
         'max_depth': 5, 'lambda': 20, 'subsample': 0.8, 'colsample_bytree': 0.7,
         'colsample_bylevel': 0.7, 'eta': 0.01, 'tree_method': 'exact', 
         'seed': 0, 'nthread': 12,'early_stop':100,'scale_pos_weight':7}
watchlist = [(trainsub_dm, 'train'),(validsub_dm, 'val')]
boost_round=2000
model=xgb.train(params,trainsub_dm,num_boost_round=boost_round,evals=watchlist,early_stopping_rounds=2000)

# 模型2训练调参
# params={'booster':'gbtree','objective': 'rank:pairwise','eval_metric':'auc','gamma':0.1,'min_child_weight':1.1,
#     'max_depth':7,'lambda':10,'subsample':0.7,'colsample_bytree':0.7,'colsample_bylevel':0.7,'eta': 0.01,
#     'tree_method':'exact','seed':1000,'nthread':12}
# watchlist = [(trainsub_dm, 'train'),(validsub_dm, 'val')]
# model = xgb.train(params, trainsub_dm,num_boost_round=2000,evals=watchlist,early_stopping_rounds=500)

# 训练集ks值
df_ks=trainsub[['label']].copy()
df_ks['pred']=model.predict(trainsub_dm)
tr_ks,tr_auc=plot_ks_curve(df_ks)

# 验证集ks值
vres=validsub[['label']].copy()
vres['pred']=model.predict(validsub_dm)
# vres.loc[vres['pred']>1,'pred']=1
# vres.loc[vres['pred']<0,'pred']=0
te_ks,te_auc=plot_ks_curve(vres)

para_txt_path='data/para.txt'
with open(para_txt_path,'a') as f:
    params['num_boost_round']=boost_round
    f.write(str(params)+'\n')
    f.write('train_ks_value:'+str(tr_ks)+', train_auc_value:'+str(tr_auc)+'\n')
    f.write('valid_ks_value:'+str(te_ks)+', valid_auc_value:'+str(te_auc)+'\n')
f.close()

[0]	train-error:0.33056	val-error:0.339748
Multiple eval metrics have been passed: 'val-error' will be used for early stopping.

Will train until val-error hasn't improved in 2000 rounds.
[1]	train-error:0.324512	val-error:0.333723
[2]	train-error:0.321477	val-error:0.331205
[3]	train-error:0.325029	val-error:0.335432
[4]	train-error:0.322668	val-error:0.331745
[5]	train-error:0.322736	val-error:0.331565
[6]	train-error:0.320645	val-error:0.330126
[7]	train-error:0.320375	val-error:0.329766
[8]	train-error:0.321342	val-error:0.330036
[9]	train-error:0.321005	val-error:0.329496
[10]	train-error:0.322084	val-error:0.330396
[11]	train-error:0.321499	val-error:0.329766
[12]	train-error:0.321207	val-error:0.329406
[13]	train-error:0.321567	val-error:0.329856
[14]	train-error:0.321477	val-error:0.330306
[15]	train-error:0.321589	val-error:0.330576
[16]	train-error:0.321837	val-error:0.330396
[17]	train-error:0.322196	val-error:0.330755
[18]	train-error:0.322601	val-error:0.330665
[19]	train-

[178]	train-error:0.318869	val-error:0.328147
[179]	train-error:0.318689	val-error:0.327608
[180]	train-error:0.318779	val-error:0.327608
[181]	train-error:0.318621	val-error:0.327698
[182]	train-error:0.318599	val-error:0.327428
[183]	train-error:0.318509	val-error:0.327428
[184]	train-error:0.318486	val-error:0.327428
[185]	train-error:0.318486	val-error:0.327338
[186]	train-error:0.318531	val-error:0.327698
[187]	train-error:0.318441	val-error:0.327518
[188]	train-error:0.318419	val-error:0.327428
[189]	train-error:0.318396	val-error:0.327428
[190]	train-error:0.318239	val-error:0.327428
[191]	train-error:0.318082	val-error:0.327428
[192]	train-error:0.317947	val-error:0.327158
[193]	train-error:0.317767	val-error:0.327158
[194]	train-error:0.317699	val-error:0.326799
[195]	train-error:0.317587	val-error:0.326889
[196]	train-error:0.317587	val-error:0.326978
[197]	train-error:0.317295	val-error:0.326889
[198]	train-error:0.31734	val-error:0.326799
[199]	train-error:0.317272	val-erro

[358]	train-error:0.309111	val-error:0.319335
[359]	train-error:0.309111	val-error:0.319424
[360]	train-error:0.309066	val-error:0.319155
[361]	train-error:0.308931	val-error:0.319155
[362]	train-error:0.308706	val-error:0.319245
[363]	train-error:0.308706	val-error:0.319155
[364]	train-error:0.308616	val-error:0.319065
[365]	train-error:0.308593	val-error:0.318795
[366]	train-error:0.308121	val-error:0.318615
[367]	train-error:0.308054	val-error:0.318435
[368]	train-error:0.308009	val-error:0.318345
[369]	train-error:0.307986	val-error:0.318525
[370]	train-error:0.307806	val-error:0.318525
[371]	train-error:0.307784	val-error:0.318705
[372]	train-error:0.307739	val-error:0.318435
[373]	train-error:0.307627	val-error:0.318255
[374]	train-error:0.307582	val-error:0.318345
[375]	train-error:0.307514	val-error:0.318345
[376]	train-error:0.307424	val-error:0.318165
[377]	train-error:0.307334	val-error:0.318165
[378]	train-error:0.307312	val-error:0.318255
[379]	train-error:0.307199	val-err

[537]	train-error:0.295845	val-error:0.309263
[538]	train-error:0.295755	val-error:0.309173
[539]	train-error:0.295643	val-error:0.309083
[540]	train-error:0.295598	val-error:0.308903
[541]	train-error:0.295575	val-error:0.308723
[542]	train-error:0.295463	val-error:0.308453
[543]	train-error:0.295215	val-error:0.308543
[544]	train-error:0.295103	val-error:0.308543
[545]	train-error:0.295125	val-error:0.308453
[546]	train-error:0.295013	val-error:0.308453
[547]	train-error:0.294923	val-error:0.308273
[548]	train-error:0.294856	val-error:0.308363
[549]	train-error:0.294653	val-error:0.308363
[550]	train-error:0.294563	val-error:0.308543
[551]	train-error:0.294428	val-error:0.308453
[552]	train-error:0.294473	val-error:0.308543
[553]	train-error:0.294406	val-error:0.308453
[554]	train-error:0.294361	val-error:0.308453
[555]	train-error:0.294339	val-error:0.308453
[556]	train-error:0.294316	val-error:0.308453
[557]	train-error:0.294271	val-error:0.308543
[558]	train-error:0.294226	val-err

[716]	train-error:0.287211	val-error:0.302968
[717]	train-error:0.287211	val-error:0.303327
[718]	train-error:0.287166	val-error:0.303237
[719]	train-error:0.287054	val-error:0.302968
[720]	train-error:0.287054	val-error:0.302788
[721]	train-error:0.286986	val-error:0.302698
[722]	train-error:0.286941	val-error:0.302518
[723]	train-error:0.286919	val-error:0.302698
[724]	train-error:0.286806	val-error:0.302788
[725]	train-error:0.286806	val-error:0.302518
[726]	train-error:0.286739	val-error:0.302338
[727]	train-error:0.286716	val-error:0.302428
[728]	train-error:0.286739	val-error:0.302518
[729]	train-error:0.286761	val-error:0.302698
[730]	train-error:0.286761	val-error:0.302788
[731]	train-error:0.286671	val-error:0.302698
[732]	train-error:0.286739	val-error:0.302698
[733]	train-error:0.286649	val-error:0.302608
[734]	train-error:0.286694	val-error:0.302698
[735]	train-error:0.286694	val-error:0.302428
[736]	train-error:0.286716	val-error:0.302428
[737]	train-error:0.286604	val-err

[897]	train-error:0.279701	val-error:0.297932
[898]	train-error:0.279679	val-error:0.297842
[899]	train-error:0.279566	val-error:0.297842
[900]	train-error:0.279589	val-error:0.297932
[901]	train-error:0.279544	val-error:0.297752
[902]	train-error:0.279544	val-error:0.297662
[903]	train-error:0.279477	val-error:0.297932
[904]	train-error:0.279454	val-error:0.297932
[905]	train-error:0.279252	val-error:0.297752
[906]	train-error:0.279252	val-error:0.297842
[907]	train-error:0.279207	val-error:0.297932
[908]	train-error:0.279162	val-error:0.297842
[909]	train-error:0.279184	val-error:0.297752
[910]	train-error:0.279139	val-error:0.297752
[911]	train-error:0.279027	val-error:0.297842
[912]	train-error:0.278982	val-error:0.297662
[913]	train-error:0.279049	val-error:0.297572
[914]	train-error:0.279072	val-error:0.297662
[915]	train-error:0.278937	val-error:0.297482
[916]	train-error:0.278914	val-error:0.297662
[917]	train-error:0.278802	val-error:0.297392
[918]	train-error:0.27878	val-erro

[1074]	train-error:0.273541	val-error:0.294245
[1075]	train-error:0.273451	val-error:0.294065
[1076]	train-error:0.273428	val-error:0.294155
[1077]	train-error:0.273316	val-error:0.294245
[1078]	train-error:0.273316	val-error:0.294245
[1079]	train-error:0.273338	val-error:0.294245
[1080]	train-error:0.273383	val-error:0.294065
[1081]	train-error:0.273451	val-error:0.294065
[1082]	train-error:0.273496	val-error:0.294065
[1083]	train-error:0.273383	val-error:0.294155
[1084]	train-error:0.273383	val-error:0.294155
[1085]	train-error:0.273338	val-error:0.294155
[1086]	train-error:0.273316	val-error:0.294245
[1087]	train-error:0.273271	val-error:0.294155
[1088]	train-error:0.273226	val-error:0.294155
[1089]	train-error:0.273159	val-error:0.294245
[1090]	train-error:0.273136	val-error:0.294245
[1091]	train-error:0.273069	val-error:0.294245
[1092]	train-error:0.273159	val-error:0.294245
[1093]	train-error:0.273091	val-error:0.294245
[1094]	train-error:0.273024	val-error:0.294335
[1095]	train-

[1249]	train-error:0.267403	val-error:0.290468
[1250]	train-error:0.267403	val-error:0.290558
[1251]	train-error:0.267425	val-error:0.290558
[1252]	train-error:0.267448	val-error:0.290647
[1253]	train-error:0.267448	val-error:0.290647
[1254]	train-error:0.267425	val-error:0.290558
[1255]	train-error:0.26729	val-error:0.290558
[1256]	train-error:0.26729	val-error:0.290468
[1257]	train-error:0.267268	val-error:0.290378
[1258]	train-error:0.267245	val-error:0.290378
[1259]	train-error:0.267245	val-error:0.290288
[1260]	train-error:0.26729	val-error:0.290468
[1261]	train-error:0.267223	val-error:0.290558
[1262]	train-error:0.267223	val-error:0.290647
[1263]	train-error:0.2672	val-error:0.290647
[1264]	train-error:0.267088	val-error:0.290378
[1265]	train-error:0.266998	val-error:0.290378
[1266]	train-error:0.266886	val-error:0.290468
[1267]	train-error:0.266908	val-error:0.290378
[1268]	train-error:0.266886	val-error:0.290378
[1269]	train-error:0.266908	val-error:0.290378
[1270]	train-error

[1426]	train-error:0.261692	val-error:0.2875
[1427]	train-error:0.261692	val-error:0.28741
[1428]	train-error:0.261669	val-error:0.2875
[1429]	train-error:0.261647	val-error:0.2875
[1430]	train-error:0.261624	val-error:0.2875
[1431]	train-error:0.261579	val-error:0.2875
[1432]	train-error:0.261512	val-error:0.2875
[1433]	train-error:0.261489	val-error:0.28741
[1434]	train-error:0.261489	val-error:0.28759
[1435]	train-error:0.261444	val-error:0.28741
[1436]	train-error:0.261444	val-error:0.28741
[1437]	train-error:0.261422	val-error:0.28732
[1438]	train-error:0.261444	val-error:0.28741
[1439]	train-error:0.261399	val-error:0.28741
[1440]	train-error:0.26122	val-error:0.28732
[1441]	train-error:0.261197	val-error:0.28741
[1442]	train-error:0.261242	val-error:0.28732
[1443]	train-error:0.261175	val-error:0.28723
[1444]	train-error:0.261175	val-error:0.28723
[1445]	train-error:0.261062	val-error:0.28714
[1446]	train-error:0.260882	val-error:0.28705
[1447]	train-error:0.260837	val-error:0.2

[1602]	train-error:0.256723	val-error:0.284083
[1603]	train-error:0.256723	val-error:0.284083
[1604]	train-error:0.256768	val-error:0.284083
[1605]	train-error:0.256768	val-error:0.284083
[1606]	train-error:0.256723	val-error:0.283903
[1607]	train-error:0.256678	val-error:0.283633
[1608]	train-error:0.256678	val-error:0.283543
[1609]	train-error:0.25661	val-error:0.283633
[1610]	train-error:0.256588	val-error:0.283723
[1611]	train-error:0.25652	val-error:0.283723
[1612]	train-error:0.25643	val-error:0.283813
[1613]	train-error:0.25643	val-error:0.283813
[1614]	train-error:0.256385	val-error:0.283813
[1615]	train-error:0.256363	val-error:0.283903
[1616]	train-error:0.256385	val-error:0.283813
[1617]	train-error:0.256161	val-error:0.283813
[1618]	train-error:0.256116	val-error:0.283633
[1619]	train-error:0.256183	val-error:0.283993
[1620]	train-error:0.256206	val-error:0.283723
[1621]	train-error:0.256161	val-error:0.283813
[1622]	train-error:0.256093	val-error:0.283633
[1623]	train-erro

[1777]	train-error:0.251057	val-error:0.281745
[1778]	train-error:0.250944	val-error:0.281565
[1779]	train-error:0.251102	val-error:0.281655
[1780]	train-error:0.251079	val-error:0.281835
[1781]	train-error:0.251079	val-error:0.281835
[1782]	train-error:0.251012	val-error:0.281655
[1783]	train-error:0.250944	val-error:0.281565
[1784]	train-error:0.250899	val-error:0.281655
[1785]	train-error:0.250854	val-error:0.281655
[1786]	train-error:0.250787	val-error:0.281565
[1787]	train-error:0.250809	val-error:0.281655
[1788]	train-error:0.250854	val-error:0.281835
[1789]	train-error:0.250832	val-error:0.281924
[1790]	train-error:0.250787	val-error:0.282014
[1791]	train-error:0.250832	val-error:0.282014
[1792]	train-error:0.250787	val-error:0.281924
[1793]	train-error:0.250787	val-error:0.281835
[1794]	train-error:0.250652	val-error:0.281835
[1795]	train-error:0.250607	val-error:0.281655
[1796]	train-error:0.250472	val-error:0.281745
[1797]	train-error:0.25054	val-error:0.281655
[1798]	train-e

[1952]	train-error:0.24674	val-error:0.279406
[1953]	train-error:0.246717	val-error:0.279317
[1954]	train-error:0.246672	val-error:0.279137
[1955]	train-error:0.24665	val-error:0.279227
[1956]	train-error:0.24674	val-error:0.279047
[1957]	train-error:0.246695	val-error:0.279227
[1958]	train-error:0.246695	val-error:0.279317
[1959]	train-error:0.246605	val-error:0.279317
[1960]	train-error:0.246537	val-error:0.279227
[1961]	train-error:0.246448	val-error:0.279137
[1962]	train-error:0.246358	val-error:0.279227
[1963]	train-error:0.24638	val-error:0.279227
[1964]	train-error:0.24629	val-error:0.279317
[1965]	train-error:0.246403	val-error:0.279227
[1966]	train-error:0.246313	val-error:0.279317
[1967]	train-error:0.246335	val-error:0.278957
[1968]	train-error:0.24629	val-error:0.279047
[1969]	train-error:0.246245	val-error:0.279047
[1970]	train-error:0.246178	val-error:0.279227
[1971]	train-error:0.246223	val-error:0.279227
[1972]	train-error:0.246268	val-error:0.279137
[1973]	train-error:

In [720]:
sorted(model.get_fscore().items(),key=lambda x: x[1],reverse=True) 

[('浏览行为数据_var', 1282),
 ('bef_bill_creamo_used', 1124),
 ('bef_bill_creamo_use_rate', 1026),
 ('bef_bill_preloan_m3_sum', 979),
 ('bef_bill_unc_cnt', 962),
 ('aft_bill_creamo_use_rate', 958),
 ('aft_bill_creamo_used', 870),
 ('bef_bill_creused_gramo_min', 821),
 ('bef_bill_creused_gramo_sum', 811),
 ('bef_bill_creused_gramo_max', 796),
 ('bef_browse_浏览子行为编号_10', 750),
 ('bef_browse_浏览子行为编号_7', 731),
 ('aft_bill_creused_gramo_max', 724),
 ('bef_browse_浏览子行为编号_6', 712),
 ('bef_browse_浏览子行为编号_8', 694),
 ('bef_bill_con_cnt', 676),
 ('bef_bill_inter_max', 648),
 ('bef_bill_preloan_m3_cnt', 643),
 ('bef_browse_浏览子行为编号_4', 636),
 ('bef_bill_ovdu_cnt', 636),
 ('bef_browse_浏览行为数据_m3_110', 634),
 ('bef_bill_t0', 622),
 ('aft_bill_creused_gramo_min', 615),
 ('aft_bill_creused_gramo_sum', 601),
 ('bef_browse_浏览行为数据_m3_101', 591),
 ('bef_bank_inc_m3_max', 585),
 ('bef_browse_浏览子行为编号_5', 584),
 ('bef_browse_bro_act_cnt', 538),
 ('bef_bank_inc_m3_min', 538),
 ('bef_browse_浏览子行为编号_1', 528),
 ('bef_bil

In [718]:
sorted(model.get_score(importance_type='gain').items(),key=lambda x: x[1],reverse=True) 

[('aft_bill_t10', 644.3514395405398),
 ('aft_bill_cre_bank_d10_cnt', 424.91575102564104),
 ('aft_bill_preloan_d10_sum', 408.3525258100561),
 ('aft_bill_t20', 404.1807284302319),
 ('aft_bill_preloan_d20_sum', 219.0763839882697),
 ('aft_bill_ovdu_d1_count', 172.36657249999996),
 ('aft_bill_ovdu_d0_count', 142.8900344117647),
 ('aft_bill_t2', 125.9886111926606),
 ('aft_bill_cre_bank_d30_cnt', 125.78780339130431),
 ('aft_bill_t30', 95.7914392192192),
 ('aft_bill_ovdu_cnt', 84.07204827956988),
 ('性别_0', 80.00189228105903),
 ('aft_bill_preloan_d1_sum', 71.1346036616162),
 ('aft_bill_t1', 61.287117392996166),
 ('aft_bill_ovdu_d0_sum', 60.90524017064845),
 ('aft_bill_preloan_d2_sum', 59.56505759146334),
 ('aft_browse_浏览行为数据_m3_173', 57.92125),
 ('aft_bill_ovdu_d10_sum', 57.87658961722487),
 ('aft_bill_cre_bank_add', 57.12216047999994),
 ('aft_bill_ovdu_d10_count', 56.70222860465117),
 ('aft_bank_inc_m3_max', 52.59229012658227),
 ('aft_bill_ovdu_d2_count', 52.05010619047618),
 ('aft_browse_浏览行为

In [722]:
# 预测及输出文件
pred=model.predict(test_dm)

res=test_set.copy()
res['pred']=pred
res=res[['id','pred']]
res.loc[res['pred']>1,'pred']=1
res.loc[res['pred']<0,'pred']=0

res.columns=['userid','probability']
res.to_csv('./data/predict_result/RES_TEST.csv',sep=',',index=False)


In [464]:
# fea_list
fea_list=['aft_bill_t2'
 ,'aft_bill_t1'
 ,'aft_bill_t0'
 ,'aft_bill_cre_bank_cnt'
 ,'性别_0'
 ,'aft_bill_ovdu_cnt'
 ,'aft_bill_con_cnt'
 ,'aft_bill_ovdu_m3_cnt'
 ,'aft_browse_浏览行为数据_m3_164'
 ,'aft_browse_浏览行为数据_m3_173'
 ,'aft_bill_con_max_x'
 ,'aft_bill_preloan_m3_cnt'
 ,'bef_browse_浏览行为数据_m3_101'
 ,'bef_browse_浏览行为数据_m3_110'
 ,'性别_2'
 ,'aft_bank_inc_m3_max'
 ,'bef_browse_浏览行为数据_m3_164'
 ,'bef_browse_浏览行为数据_m3_173'
 ,'bef_bill_ovdu_m3_cnt'
 ,'aft_browse_浏览行为数据_m3_101'
 ,'aft_bill_creamo_used'
 ,'性别_1'
 ,'教育程度_2'
 ,'bef_browse_浏览行为数据_m3_118'
 ,'aft_bank_inc_m3_sum'
 ,'aft_bank_out_m3_sum'
 ,'bef_browse_浏览子行为编号_m3_6'
 ,'aft_browse_浏览行为数据_m3_118'
 ,'aft_browse_浏览行为数据_m3_110'
 ,'aft_bill_preloan_m3_sum'
 ,'aft_bank_inc_m1_sum'
 ,'aft_bank_out_sum'
 ,'aft_bank_inc_sum'
 ,'bef_browse_浏览子行为编号_10'
 ,'aft_browse_浏览子行为编号_m3_7'
 ,'bef_bill_unc_cnt'
 ,'aft_browse_浏览子行为编号_6'
 ,'bef_browse_浏览子行为编号_6'
 ,'aft_bill_unc_cnt'
 ,'aft_browse_浏览子行为编号_m3_6'
 ,'bef_browse_浏览子行为编号_m3_7'
 ,'aft_browse_浏览子行为编号_1'
 ,'aft_browse_浏览子行为编号_7'
 ,'aft_bank_out_m1_sum'
 ,'bef_browse_浏览行为数据_m3_189'
 ,'bef_browse_浏览子行为编号_m3_10'
 ,'aft_browse_浏览子行为编号_m3_1'
 ,'教育程度_4'
 ,'bef_bank_inc_m3_count'
 ,'bef_bank_out_m3_count'
 ,'aft_browse_浏览子行为编号_4'
 ,'aft_browse_bro_act_cnt'
 ,'bef_browse_浏览子行为编号_8'
 ,'bef_bill_unc_m3_cnt'
 ,'aft_bill_creused_gramo_sum'
 ,'aft_bank_out_m3_count'
 ,'bef_bank_inc_m3_max'
 ,'aft_bill_creused_gramo_max'
 ,'aft_browse_浏览子行为编号_m3_10'
 ,'bef_bank_inc_m3_min'
 ,'aft_bill_unc_m3_cnt'
 ,'bef_bill_ovdu_cnt'
 ,'bef_bank_salary_ave'
 ,'bef_bill_creused_gramo_max'
 ,'bef_browse_浏览子行为编号_m3_4'
 ,'bef_bill_creused_gramo_sum'
 ,'aft_bill_creused_bank_cnt'
 ,'bef_browse_浏览子行为编号_m3_8'
 ,'bef_browse_浏览子行为编号_7'
 ,'bef_browse_浏览子行为编号_4'
 ,'aft_bill_creamo_use_rate'
 ,'aft_browse_浏览子行为编号_m3_4'
 ,'bef_browse_浏览子行为编号_m3_1'
 ,'aft_browse_浏览子行为编号_10'
 ,'bef_bill_creused_gramo_min'
 ,'aft_bill_preloan_bank_cnt'
 ,'bef_bill_creamo_used'
 ,'bef_bill_preloan_m3_sum'
 ,'bef_bank_inc_sum'
 ,'aft_browse_浏览子行为编号_m3_3'
 ,'bef_bank_out_m3_sum'
 ,'bef_bank_inc_m3_sum'
 ,'aft_bill_preloan_bank_rate'
 ,'bef_browse_浏览子行为编号_5'
 ,'bef_bill_cre_bank_cnt'
 ,'婚姻状态_1'
 ,'bef_bank_out_sum'
 ,'aft_bill_creused_gramo_min'
 ,'bef_bill_preloan_m3_cnt'
 ,'aft_browse_浏览子行为编号_3'
 ,'婚姻状态_2'
 ,'bef_bank_ninc_m3_sum'
 ,'bef_bill_inter_max'
 ,'bef_bill_t1'
 ,'户口类型_2'
 ,'bef_bill_inter_cnt'
 ,'bef_bill_preloan_bank_cnt'
 ,'bef_browse_浏览子行为编号_1'
 ,'aft_browse_浏览子行为编号_5'
 ,'bef_bill_t0'
 ,'bef_browse_bro_act_cnt'
 ,'bef_bank_out_m1_sum'
 ,'bef_bill_t2'
 ,'bef_bill_con_cnt'
 ,'bef_bill_con_max_y'
 ,'aft_browse_浏览行为数据_m3_189'
 ,'bef_bill_amo_ava'
 ,'bef_browse_浏览子行为编号_m3_5'
 ,'aft_bill_inter_max'
 ,'aft_browse_浏览子行为编号_m3_5'
 ,'户口类型_1'
 ,'bef_browse_浏览子行为编号_3'
 ,'bef_browse_浏览子行为编号_9'
 ,'bef_bill_creamo_use_rate'
 ,'aft_bill_amo_ava'
 ,'bef_bank_salary_inc_rate'
 ,'职业_2'
 ,'户口类型_3'
 ,'aft_browse_浏览子行为编号_8'
 ,'bef_bill_con_max_x'
 ,'aft_bill_con_max_y'
 ,'aft_browse_浏览子行为编号_m3_8'
 ,'aft_bill_inter_cnt'
]