In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno
import datetime,gc,math
import random
import lightgbm as lgb
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder,MinMaxScaler, StandardScaler
from sklearn.metrics import recall_score, auc, accuracy_score, f1_score, precision_score, classification_report, roc_auc_score,mean_squared_error
from pandas.api.types import is_categorical_dtype
from pandas.api.types import is_datetime64_any_dtype as is_datetime
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
%matplotlib inline

In [2]:
def active_time_transform(df):
    df['role_created_active_time'].fillna('"0-8,0","8-12,0","12-14,0","14-18,0","18-24,0"', inplace=True)
    df['role_created_active_time'] = df['role_created_active_time'].apply(lambda x:str(x)[1:-1])
    temp = df['role_created_active_time'].str.split(',',expand=True).iloc[:,[1,3,5,7,9]].rename(columns={1:'active_0-8',
                                        3:'active_8-12',5:'active_12-14',7:'active_14-18',9:'active_18-24'})
    for i in range(len(temp.columns)):
        temp.iloc[:,i] = temp.iloc[:,i].apply(lambda x:str(x)[:-1])
    df = df.join(temp).drop(['role_created_active_time'], axis=1)
    return df

In [3]:
def pay_grade_transform(df):
    df['pay_grade'].fillna('[0,0,0,0,0,0,0]',inplace=True)
    df['pay_grade'] = df['pay_grade'].apply(lambda x:str(x)[1:-1])
    temp = df['pay_grade'].str.split(',',expand=True).rename(columns={0:'pay_grade_1',
                1:'pay_grade_2',2:'pay_grade_3',3:'pay_grade_4',4:'pay_grade_5',5:'pay_grade_6',6:'pay_grade_7'})
    df = df.join(temp).drop(['pay_grade'], axis=1)
    return df

In [4]:
%%time
role_info = pd.read_csv('./data/mr_role_1d.csv')
role_info.drop_duplicates(subset=['user_id','cp_server_no','cp_role_id'],inplace=True)
role_info = active_time_transform(role_info)
role_info = pay_grade_transform(role_info)
role_info.to_pickle('./role_info_1d.pickle')

Wall time: 1min 46s


In [5]:
pd.set_option('display.max_columns',None)
role_info.head()

Unnamed: 0,user_id,mgame_id,cp_server_no,cp_role_id,create_role_time,role_name,role_id,p_model,model_money,sys_ver,role_created_login_num,role_created_active,role_created_online,max_role_level,ip_num,device_num,city_num,pay_num,pay_sum,role_created_30_pay_sum,active_0-8,active_8-12,active_12-14,active_14-18,active_18-24,pay_grade_1,pay_grade_2,pay_grade_3,pay_grade_4,pay_grade_5,pay_grade_6,pay_grade_7
0,96859161,1056,13313637,13313637,2020-02-16 15:48:52,13313637,11436054,iPhone 6s Plus,0.0,12.4.1,1.0,1.0,0.0,0.0,1.0,1.0,1.0,,,,0,0,0,1,0,0,0,0,0,0,0,0
1,20332688,1056,13542741,13542741,2020-02-12 20:10:30,無極,11430635,iPhone XR,3828.66,12.2,12.0,1.0,6.0,1.0,1.0,1.0,1.0,,,,0,0,0,0,12,0,0,0,0,0,0,0
5,99489283,1056,13697851,13697851,2020-02-14 15:35:07,13697851,11390804,iPhone XS Max,6625.0,13.1.3,128.0,2.0,39824.0,1.0,2.0,2.0,1.0,,,,33,39,9,59,82,0,0,0,0,0,0,0
7,100142067,1056,13800336,13800336,2020-02-16 15:44:03,13800336,11435951,iPhone 6,4950.88,12.4.4,36.0,2.0,12910.0,1.0,3.0,1.0,1.0,,,,8,3,0,39,20,0,0,0,0,0,0,0
8,101067630,1056,13974238,13974238,2020-02-13 18:01:47,13974238,11371861,iPhone 6s Plus,0.0,12.4.1,7.0,1.0,7.0,1.0,1.0,1.0,1.0,,,,0,0,0,6,0,0,0,0,0,0,0,0


In [6]:
role_info = pd.read_pickle('./role_info_1d.pickle')
# 删除2020-7-19日前30天的数据
role_info['create_role_time'] = pd.to_datetime(role_info['create_role_time'], format='%Y-%m-%d %H:%M:%S')
role_info.drop(role_info[role_info['create_role_time'] > (role_info['create_role_time'].max() + 
                                                          datetime.timedelta(-30))].index,inplace=True)

In [24]:
# print(role_info.shape)
# role_info.head(10)
# 删除没有心跳数据的样本(1、没有登录;2、没有拿到心跳)
role_info.dropna(subset=['role_created_login_num'], inplace=True)
role_info.dropna(subset=['role_created_online'], inplace=True)
role_info['pay_num'].fillna(0, inplace=True)
role_info['pay_sum'].fillna(0, inplace=True)
role_info['role_created_active'] = role_info['role_created_active'].clip(0,1)
role_info['pay_rate'] = role_info['pay_num'] / (role_info['role_created_active'] + 1e-4)
role_info['pay_avg'] = role_info['pay_sum'] / (role_info['pay_num'] + 1e-4)

In [17]:
df = role_info['p_model'].value_counts().reset_index()

In [18]:
df.head()

Unnamed: 0,index,p_model
0,iPhone X,129934
1,iPhone 7 Plus,112694
2,iPhone 8 Plus,103022
3,iPhone 6s Plus,82897
4,iPhone XR,77284


In [25]:
# 清洗设备金额
role_info.loc[role_info['p_model'] == 'iPhone 6s Plus','model_money'] = 3500
role_info.loc[role_info['p_model'] == 'iPhone12,1','model_money'] = 5000
role_info.loc[role_info['p_model'] == 'iPhone12,1','model_money'] = 2500
role_info.loc[role_info['p_model'] == 'iPhone12,1','model_money'] = 2000
role_info['model_money'] = role_info['model_money'].replace(0, np.nan)
role_info['model_money'].fillna(role_info[role_info['model_money'] !=0]['model_money'].mean(),inplace=True)
bins=[0,1000,1500,2000,2500,3000,3500,4000,4500,5000,6000,8000,10000]
role_info['model_money_level'] = pd.cut(role_info['model_money'],bins,labels=False)

In [26]:
# 清洗login_num
# 截取1登陆次数大于40的样本
role_info['role_created_login_num'].clip(1,40,inplace=True)
tmp = role_info[role_info['role_created_login_num'] <2]
tmp['role_created_login_num'] = tmp['max_role_level'] * 0.44
role_info['role_created_login_num'].update(tmp['role_created_login_num'])
role_info['role_created_login_num'].clip(1,40,inplace=True)

In [27]:
# 清洗max_role_level
# 按前1天在线时长更新等级小于5级的
tmp = role_info[role_info['max_role_level'] <5]
tmp['max_role_level'] = tmp['role_created_online'] * 0.0031
role_info['max_role_level'].update(tmp['max_role_level'])
# 截取等级大于65级的
role_info['max_role_level'].clip(1,55,inplace=True)

In [28]:
role_info['role_created_online'].mean()/role_info['max_role_level'].mean()

293.94706004586703

In [35]:
role_info['p_model'].str.contains('iPhone12')

0          False
1          False
5          False
7          False
8          False
           ...  
6042396    False
6042397    False
6042398    False
6042399    False
6042400     True
Name: p_model, Length: 5151293, dtype: object

In [36]:
role_info[role_info['p_model']=='iPhone12,3']

Unnamed: 0,user_id,mgame_id,cp_server_no,cp_role_id,create_role_time,role_name,role_id,p_model,model_money,sys_ver,role_created_login_num,role_created_active,role_created_online,max_role_level,ip_num,device_num,city_num,pay_num,pay_sum,role_created_30_pay_sum,active_0-8,active_8-12,active_12-14,active_14-18,active_18-24,pay_grade_1,pay_grade_2,pay_grade_3,pay_grade_4,pay_grade_5,pay_grade_6,pay_grade_7
89,102892907,1056,14491147,14491147,2020-02-11 15:46:23,14491147,11324265,"iPhone12,3",0.0,13.3,2.0,1.0,1.0,0.0,1.0,1.0,1.0,,,,0,0,0,2,0,0,0,0,0,0,0,0
394,103144114,1056,14551717,14551717,2020-02-13 20:23:34,14551717,11375162,"iPhone12,3",0.0,13.3.1,35.0,2.0,19369.0,1.0,4.0,1.0,1.0,,,,2,8,4,5,35,0,0,0,0,0,0,0
540,103253521,1056,14575431,14575431,2020-02-14 19:57:00,14575431,11396525,"iPhone12,3",0.0,13.3,1.0,1.0,0.0,0.0,1.0,1.0,1.0,,,,0,0,0,0,1,0,0,0,0,0,0,0
810,103461403,1056,14619771,14619771,2020-02-16 19:33:32,14619771,11440755,"iPhone12,3",0.0,13.3.1,13.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,0,0,0,0,13,0,0,0,0,0,0,0
22681,103573278,1056,660310,14644297,2020-02-17 20:15:53,14644297,11461414,"iPhone12,3",0.0,13.3,12.0,1.0,7.0,1.0,1.0,1.0,1.0,,,,0,0,0,0,12,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6042102,115010040,1056,S911,18200707,2020-06-16 19:35:37,18200707,13554358,"iPhone12,3",0.0,13.5.1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,0,0,0,0,1,0,0,0,0,0,0,0
6042217,115175407,1056,S919,18247821,2020-06-20 13:27:26,18247821,13573165,"iPhone12,3",0.0,13.5.1,1.0,1.0,0.0,1.0,1.0,1.0,1.0,,,,0,0,1,0,0,0,0,0,0,0,0,0
6042277,115241839,1056,S923,18271047,2020-06-21 18:19:33,18271047,13581974,"iPhone12,3",0.0,13.5.1,6.0,1.0,1.0,5.0,1.0,1.0,1.0,,,,0,0,0,6,0,0,0,0,0,0,0,0
6042353,115310926,1056,S927,18289855,2020-06-22 23:30:28,18289855,13590019,"iPhone12,3",0.0,13.2,47.0,2.0,19488.0,44.0,5.0,1.0,1.0,1.0,12.0,12.0,37,7,13,9,30,0,1,0,0,0,0,0


In [38]:
role_info['p_model'].str.contains('iPhone12')

0          False
1          False
5          False
7          False
8          False
           ...  
6042396    False
6042397    False
6042398    False
6042399    False
6042400     True
Name: p_model, Length: 5151293, dtype: object

In [40]:
role_info[(role_info['p_model'].str.contains('iPhone12'))&(role_info['p_model'].notna())].sort_values('create_role_time')

Unnamed: 0,user_id,mgame_id,cp_server_no,cp_role_id,create_role_time,role_name,role_id,p_model,model_money,sys_ver,role_created_login_num,role_created_active,role_created_online,max_role_level,ip_num,device_num,city_num,pay_num,pay_sum,role_created_30_pay_sum,active_0-8,active_8-12,active_12-14,active_14-18,active_18-24,pay_grade_1,pay_grade_2,pay_grade_3,pay_grade_4,pay_grade_5,pay_grade_6,pay_grade_7
705583,102857188,1056,14481620,14481620,2020-02-11 09:35:45,14481620,11316952,"iPhone12,1",0.0,13.3.1,5.0,1.0,1.0,0.0,1.0,1.0,1.0,,,,0,5,0,0,0,0,0,0,0,0,0,0
4635029,101385533,1056,14065779,14065779,2020-02-11 09:35:48,14065779,11316953,"iPhone12,1",0.0,13.3,7.0,1.0,303.0,1.0,1.0,1.0,1.0,,,,0,8,0,0,0,0,0,0,0,0,0,0
907240,102857215,1056,14481640,14481640,2020-02-11 09:36:56,14481640,11316968,"iPhone12,1",0.0,13.3.1,2.0,1.0,2.0,0.0,1.0,1.0,1.0,,,,0,2,0,0,0,0,0,0,0,0,0,0
2319737,102857336,1056,14481641,14481641,2020-02-11 09:37:01,14481641,11316970,"iPhone12,1",0.0,13.3,4.0,1.0,0.0,1.0,1.0,1.0,1.0,,,,0,4,0,0,0,0,0,0,0,0,0,0
4434321,102857355,1056,14481667,14481667,2020-02-11 09:38:54,夜风轻拂,11316994,"iPhone12,1",0.0,13.3.1,46.0,2.0,35286.0,1.0,2.0,1.0,1.0,,,,2,38,22,27,36,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1712627,115167879,1056,S930,18301591,2020-06-23 23:39:44,18301591,13596063,"iPhone12,3",0.0,13.3.1,1.0,1.0,0.0,1.0,1.0,1.0,1.0,,,,0,0,0,0,1,0,0,0,0,0,0,0
3224695,115376533,1056,S930,18301639,2020-06-23 23:44:23,18301639,13596084,"iPhone12,1",0.0,13.5.1,39.0,2.0,25509.0,39.0,3.0,1.0,1.0,,,170.0,1,20,17,18,31,0,0,0,0,0,0,0
2821928,115376749,1056,S930,18301707,2020-06-23 23:53:36,18301707,13596115,"iPhone12,3",0.0,13.5.1,2.0,1.0,0.0,1.0,1.0,1.0,1.0,,,,0,0,0,0,2,0,0,0,0,0,0,0
3325315,115376759,1056,S930,18301713,2020-06-23 23:53:58,18301713,13596119,"iPhone12,8",0.0,13.5,2.0,1.0,0.0,1.0,1.0,1.0,1.0,,,,0,0,0,0,2,0,0,0,0,0,0,0


In [27]:
role_info[role_info['role_created_online'] <1800].sort_values('pay_sum',ascending=False)

Unnamed: 0,user_id,mgame_id,cp_server_no,cp_role_id,create_role_time,role_name,role_id,p_model,model_money,sys_ver,role_created_login_num,role_created_active,role_created_online,max_role_level,ip_num,device_num,city_num,pay_num,pay_sum,role_created_30_pay_sum,active_0-8,active_8-12,active_12-14,active_14-18,active_18-24,pay_grade_1,pay_grade_2,pay_grade_3,pay_grade_4,pay_grade_5,pay_grade_6,pay_grade_7
2212946,103845649,1056,S566,长歌～,2020-05-25 21:34:49,14719895,13417562,iPhone XS Max,6625.00,13.3,2.0,1.0,602.0,129.0,1.0,1.0,1.0,3.0,1544.0,10616.0,0,0,0,0,4,0,0,0,0,0,3,0
5643365,91743563,1056,660017,12958355,2019-08-28 18:07:18,12958355,132541215,TNY-AL00,0.00,28,1.0,1.0,1529.0,1.0,1.0,1.0,1.0,5.0,1424.0,1424.0,0,0,0,6,0,0,2,1,0,0,2,0
2421718,92163872,1056,660021,12974699,2020-01-17 06:50:51,歼37,140542884,vivo X20A,1998.00,27,2.0,2.0,1034.0,52.0,2.0,2.0,2.0,13.0,1092.0,14008.0,1,0,0,0,4,1,8,3,0,0,1,0
5240718,91889044,1056,660018,12963544,2019-08-30 09:52:23,12963544,132688736,BLA-AL00,3793.50,28,1.0,1.0,613.0,1.0,1.0,1.0,1.0,15.0,666.0,1164.0,0,3,0,0,0,1,8,6,0,0,0,0
3730955,91434204,1056,660016,12950837,2019-08-25 20:52:27,12950837,132230663,ASUS_Z01QD,0.00,27,2.0,1.0,1206.0,1.0,1.0,1.0,1.0,1.0,648.0,648.0,0,0,0,0,6,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6042393,115368505,1056,S930,18299270,2020-06-23 21:14:41,18299270,13595095,"iPhone12,3",0.00,13.5.1,1.0,1.0,0.0,1.0,1.0,1.0,1.0,,,,0,0,0,0,1,0,0,0,0,0,0,0
6042395,115371548,1056,S930,18300097,2020-06-23 21:58:43,18300097,13595392,iPhone 7,2747.41,12.4.1,2.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,0,0,0,0,2,0,0,0,0,0,0,0
6042398,115373264,1056,S930,18300592,2020-06-23 22:30:09,18300592,13595580,iPhone X,6019.95,13.5.1,14.0,1.0,305.0,3.0,1.0,1.0,1.0,,,,0,0,0,0,15,0,0,0,0,0,0,0
6042399,115374690,1056,S930,18301020,2020-06-23 22:59:31,18301020,13595795,iPhone XS,0.00,13.3.1,2.0,1.0,0.0,2.0,1.0,1.0,1.0,,,,0,0,0,0,2,0,0,0,0,0,0,0


In [31]:
pd.to_numeric(role_info['role_id'],errors='raise')

0          11436054
1          11430635
5          11390804
7          11435951
8          11371861
             ...   
6042396    13595419
6042397    13595427
6042398    13595580
6042399    13595795
6042400    13595816
Name: role_id, Length: 5151293, dtype: int64

In [28]:
role_info['user_id'].value_counts()

103528836    615
102096756    276
109749597    210
99779395     203
100139061    191
            ... 
104780617      1
95927403       1
108495980      1
104295537      1
104859649      1
Name: user_id, Length: 4904426, dtype: int64

In [29]:
# 清洗online
# 按等级补全在线时长
tmp = role_info[role_info['role_created_online'] <1800]
tmp['role_created_online'] = tmp['max_role_level'] * 294
role_info['role_created_online'].update(tmp['role_created_online'])
# 截断 最小5分钟，最大8小时
role_info['role_created_online'].clip(300,28800,inplace=True)

In [30]:
# 处理时间特征create_role_time
role_info = role_info.sort_values('create_role_time').reset_index(drop=True)
role_info['hour'] = role_info['create_role_time'].dt.hour
role_info['weekend'] = role_info['create_role_time'].dt.weekday
holidays = ['2019-09-13','2019-09-14','2019-09-15','2019-10-01','2019-10-02','2019-10-03','2019-10-04','2019-10-05','2019-10-06','2019-10-07',
            '2020-01-01','2020-01-24','2020-01-25','2020-01-26','2020-01-27','2020-01-28','2020-01-29','2020-01-30','2020-04-04','2020-04-05',
            '2020-04-06','2020-04-04','2020-05-01','2020-05-02','2020-05-03','2020-05-04','2020-05-05','2020-06-25','2020-06-27']
role_info['timestamp'] = role_info['create_role_time'].dt.date.apply(lambda x:str(x))
role_info['is_holidays'] = (role_info['timestamp'].isin(holidays)).astype(int)
role_info.drop(['timestamp'], axis=1, inplace=True)

In [31]:
# 合并role_user
user_info = pd.read_csv('./user_info.csv')
user_info.drop_duplicates(inplace=True)
data = pd.merge(role_info,user_info,on='user_id',how='left',validate='many_to_one')
# 标记用户时否绑定了手机，绑定;1    未绑定：0
data['mobile'] = pd.to_numeric(data['mobile'],errors='coerce')
data['mobile'] = data['mobile'].apply(lambda x:0 if np.isnan(x) == True else 1)
# 用户注册时间与角色注册时间的时间差,并进行分桶
data['reg_date'] = pd.to_datetime(data['reg_date'])
data['time_interval'] = (data['create_role_time'] - data['reg_date']).dt.total_seconds()/3600
data['time_interval'] = data['time_interval'].clip(0,2000)
data['time_interval'] = pd.cut(data['time_interval'], bins=[0,1,3,12,24,72,168,720,2000],labels=False)

data['active_0-8'] = pd.to_numeric(data['active_0-8'], errors='coerce')
data['active_8-12'] = pd.to_numeric(data['active_8-12'], errors='coerce')
data['active_12-14'] = pd.to_numeric(data['active_12-14'], errors='coerce')
data['active_14-18'] = pd.to_numeric(data['active_14-18'], errors='coerce')
data['active_18-24'] = pd.to_numeric(data['active_18-24'], errors='coerce')

data['pay_grade_1'] = pd.to_numeric(data['pay_grade_1'], errors='coerce')
data['pay_grade_2'] = pd.to_numeric(data['pay_grade_2'], errors='coerce')
data['pay_grade_3'] = pd.to_numeric(data['pay_grade_3'], errors='coerce')
data['pay_grade_4'] = pd.to_numeric(data['pay_grade_4'], errors='coerce')
data['pay_grade_5'] = pd.to_numeric(data['pay_grade_5'], errors='coerce')
data['pay_grade_6'] = pd.to_numeric(data['pay_grade_6'], errors='coerce')
data['pay_grade_7'] = pd.to_numeric(data['pay_grade_7'], errors='coerce')


In [32]:
data.to_pickle('./data_1d.pickle')

In [17]:
data.head()

Unnamed: 0,user_id,mgame_id,cp_server_no,cp_role_id,create_role_time,role_name,role_id,p_model,model_money,sys_ver,role_created_login_num,role_created_active,role_created_online,max_role_level,ip_num,device_num,city_num,pay_num,pay_sum,role_created_30_pay_sum,active_0-8,active_8-12,active_12-14,active_14-18,active_18-24,pay_grade_1,pay_grade_2,pay_grade_3,pay_grade_4,pay_grade_5,pay_grade_6,pay_grade_7,pay_rate,pay_avg,model_money_level,hour,weekend,is_holidays,reg_date,mobile,user_type,user_flag,platform,channel_id,source_id,user_creates_3_server_num,user_creates_3_role_num,time_interval
0,90001783,1056,660011,12927595,2019-08-16 13:46:01,12927595,130788015,OPPO A59s,690.0,22,1.0,1.0,7288.0,22.5928,1.0,1.0,1.0,0.0,0.0,,0,0,15,10,0,0,0,0,0,0,0,0,0.0,0.0,0.0,13,4,0,2019-08-16 13:46:00,0,18.0,1.0,1.0,6867.0,198844.0,1.0,2.0,0.0
1,90001801,1056,660011,12927596,2019-08-16 13:46:17,12927596,130788044,OPPO R11st,2526.345777,25,1.0,1.0,300.0,1.0,1.0,1.0,1.0,0.0,0.0,,0,0,1,0,0,0,0,0,0,0,0,0,0.0,0.0,4.0,13,4,0,2019-08-16 13:46:17,0,18.0,1.0,1.0,6867.0,198844.0,1.0,1.0,
2,89947274,1056,660011,12927597,2019-08-16 13:46:30,12927597,130788072,PE-TL00M,2526.345777,4.4.2,2.64,1.0,1764.0,6.0,1.0,1.0,1.0,0.0,0.0,,0,0,2,0,0,0,0,0,0,0,0,0,0.0,0.0,4.0,13,4,0,2019-08-15 20:59:33,0,1.0,1.0,1.0,20762.0,199152.0,2.0,2.0,3.0
3,90001815,1056,660011,12927598,2019-08-16 13:46:34,12927598,130788079,Redmi Note 7 Pro,1207.63,9,1.0,1.0,300.0,1.0,1.0,1.0,1.0,0.0,0.0,,0,0,1,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,13,4,0,2019-08-16 13:46:26,0,1.0,1.0,1.0,6770.0,188588.0,1.0,1.0,0.0
4,90001842,1056,660011,12927599,2019-08-16 13:46:46,12927599,130788105,OPPO R9km,2499.0,5.1,7.92,1.0,2437.0,18.0,1.0,1.0,1.0,0.0,0.0,,0,0,9,0,0,0,0,0,0,0,0,0,0.0,0.0,3.0,13,4,0,2019-08-16 13:46:41,0,1.0,1.0,1.0,6770.0,191645.0,1.0,1.0,0.0


In [33]:
del role_info,tmp,data
gc.collect()

76

In [37]:
data[['user_id','role_created_30_pay_sum']].sort_values('role_created_30_pay_sum',ascending=False)

Unnamed: 0,user_id,role_created_30_pay_sum
3280397,109256440,156788.0
1094156,102471878,129240.0
538044,96579628,127774.0
1455458,104031742,115140.0
290434,96115482,114846.0
...,...,...
5141445,115376912,
5141446,115261590,
5141447,115376919,
5141448,115009251,


In [35]:
data = pd.read_pickle('./data_1d.pickle')