# 数据处理

In [None]:
%matplotlib inline

In [None]:
import pandas as pd
import numpy as np
import gc
import matplotlib.pyplot as plt
import zipfile
import os
import json

from sklearn.linear_model import SGDRegressor

In [None]:
# 取面部特征中面积最大的
def get_face_feature(row):
    ls = json.loads(row)
    r = [0, 0, 0, 0, 0]
    for l in ls:
        if r[0] < l[0]:
            r = l
    r.append(len(ls))
    return r

get_face_feature('[[0.0849, 0, 13, 54], [0.1221, 0, 22, 88]]')

In [None]:
# 面部特征按面积排序，并处理成逗号分隔的字符串，取前五个
def get_face_features(row):
    ls = json.loads(row)
    ls.sort(reverse=True)
    ls = ls[0:5]
    return ','.join(','.join(str(x) for x in l) for l in ls)

get_face_features('[[0.0849, 0, 13, 54], [0.1221, 0, 22, 88]]')

In [None]:
# 样本权重：
def get_sample_weight(row):
    w = 0.1
    if row['click'] == 0:
        return w
    else:
        if row['like'] == 1:
            w = w + 0.3
        if row['follow'] == 1:
            w = w + 0.3
        if row['duration_time'] == 0:
            w = w + 0.3
        else:
            w = w + 0.3 * min(1, row['playing_time'] / row['duration_time'])
    return w

In [None]:
# DataFrame的cols列转为onehot编码，返回编码后DataFrame
def pandas_onehot(df, select_cols, onehot_cols):
    df = pd.get_dummies(df[select_cols], columns=onehot_cols)
    return df

In [None]:
# DataFrame转为onehot编码，返回编码后的二维数组
def sklearn_onehot(df, cols):
    enc = preprocessing.OneHotEncoder()
    enc.fit(df[cols])  
    data = enc.transform(df[cols]).toarray()
    return data

In [None]:
# DataFrame转成libffm格式文件
# libffm格式：
# label field_1:index_1:value_1 field_2:index_2:value_2 ...
def export_libffm(df, file): 
    columns = df.columns.values
    d = len(columns)
    feature_index = [i for i in range(d)]  #默认从0开始
    field_index = [0]*d #初始化参数
    field = [] #初始化参数
    for col in columns:
        col = str(col)
        field.append(col.split('_')[0])  #onehot选出编码前的变量
    index = -1
    for i in range(d):
        if i==0 or field[i]!=field[i-1]:  #判断是否在同一个field里面
            index += 1
        field_index[i] = index           #默认从0开始

    with open(file, 'w') as f:
        for row in df.values:
            line = str(int(row[0]))  #label
            for i in range(1, len(row)):
                if row[i]!=0:
                    if int(row[i]) == row[i]:
                        line += ' ' + "%d:%d:%d" % (field_index[i], feature_index[i], row[i]) + ' '
                    else:
                        line += ' ' + "%d:%d:%f" % (field_index[i], feature_index[i], row[i]) + ' '
            line += '\n'
            f.write(line)
    
    print('finishing......')

## 交互数据

In [None]:
columns = ['user_id', 'photo_id', 'click', 'like', 'follow', 'time', 'playing_time', 'duration_time', '_flag_']

In [None]:
train_interaction_txt = '../final_contest/train/train_interaction_10000.txt'

In [None]:
df_train = pd.read_table(train_interaction_txt, names=columns)

In [None]:
df_tmp = df_train.sort_values('time')
df_tmp_tra = df_tmp.iloc[:int(df_tmp.shape[0] * 0.8), :].copy()
df_tmp_val = df_tmp.iloc[int(df_tmp.shape[0] * 0.8):, :].copy()

In [None]:
df_tmp_tra.shape

In [None]:
df_tmp_val.shape

In [None]:
val_set = set(df_tmp_val['photo_id']) - set(df_tmp_tra['photo_id'])

In [None]:
7413761 in val_set

In [None]:
df_train['_flag_'] = 0 # train data, _flag_=0  

In [None]:
test_interaction_txt = '../final_contest/test/test_interaction_1000.txt'

In [None]:
df_test = pd.read_table(test_interaction_txt, names=['user_id', 'photo_id', 'time', 'duration_time'])

In [None]:
df_test = df_test.reindex(columns=columns)

In [None]:
# fillna
df_test['click'] = 0
df_test['like'] = 0
df_test['follow'] = 0
df_test['playing_time'] = 0
df_test['_flag_'] = -1 # test data, _flag_=-1

In [None]:
df_interaction = pd.concat([df_train, df_test], axis=0)

In [None]:
df_interaction.head()

In [None]:
df_interaction.tail()

## 基础特征

In [None]:
# time => hour、minute
# duration_time => photo_type(图片、短视频<10、中短视频10~30、中视频30~60、中长视频60~120、长视频>120)

## 统计特征

In [None]:
# view_cn、click_cn、like_cn、follow_cn
# view_cn_0、click_cn_0、like_cn_0、follow_cn_0 (type=0，图片)
# view_cn_1、click_cn_1、like_cn_1、follow_cn_1 (type=1，短视频)
# ... 
# view_cn_5、click_cn_5、like_cn_5、follow_cn_5 (type=5，长视频)
# 
# -- 4*7 = 28
# 
# click_cn/view_cn、like_cn/view_cn、follow_cn/view_cn、like_cn/click_cn、follow_cn/click_cn、follow_cn/like_cn
# click_cn_0/view_cn_0、like_cn_0/view_cn_0、follow_cn_0/view_cn_0、like_cn_0/click_cn_0、follow_cn_0/click_cn_0、follow_cn_0/like_cn_0
# ...
# click_cn_5/view_cn_5、like_cn_5/view_cn_5、follow_cn_5/view_cn_5、like_cn_5/click_cn_5、follow_cn_5/click_cn_5、follow_cn_5/like_cn_5
#
# -- 6*7 = 42
# 
# 以下均为video统计特征，不含picture
# playing_time_view_sum、duration_time_view_sum
# playing_time_view_avg、duration_tiem_view_avg
# playing_time_view_mode、duration_time_view_mode
# playing_time_view_max、duration_time_view_max
# playing_time_view_min、duration_time_view_min
#
# playing_time_click_sum、duration_time_click_sum
# playing_time_click_avg、duration_tiem_click_avg
# playing_time_click_mode、duration_time_click_mode
# playing_time_click_max、duration_time_click_max
# playing_time_click_min、duration_time_click_min
# 
# playing_time_like_sum、duration_time_like_sum
# playing_time_like_avg、duration_tiem_like_avg
# playing_time_like_mode、duration_time_like_mode
# playing_time_like_max、duration_time_like_max
# playing_time_like_min、duration_time_like_min
# 
# playing_time_follow_sum、duration_time_follow_sum
# playing_time_follow_avg、duration_tiem_follow_avg
# playing_time_follow_mode、duration_time_follow_mode
# playing_time_follow_max、duration_time_follow_max
# playing_time_follow_min、duration_time_follow_min
# 
# -- 10*4 = 40

# time_max 用户最早活跃时间
# time_min 用户最晚活跃时间
# time_len = 用户活跃时长(time_max - time_min)

## 时间特征

In [None]:
# 最后3次点击的photo_type、duration_time、hour、minute
# 最初3次点击的photo_type、duration_time、hour、minute


In [None]:
df = pd.read_csv('../out/base_feature.csv')

In [None]:
df.head()

In [None]:
df = df[['user_id']]

In [None]:
df.columns = ['aaa']

In [None]:
df.head()

### 添加面部特征：

In [None]:
df = pd.read_table('../out/train_interaction_ext.txt')
df.head()

In [None]:
df_face = pd.read_table('../../data/train_face.txt', names=['photo_id', 'data'])

In [None]:
df_face.data = df_face.data.apply(get_face_features)

In [None]:
df_face = df_face[['photo_id']].join(df_face.data.str.split(',', expand=True))

In [None]:
df_face.head()

In [None]:
# df_face = pd.read_table('../../data/train_face.txt', names=['photo_id', 'data'])
# df_face['data'] = df_face['data'].apply(get_face_feature)
# # data列拆分：
# df_face_details = pd.DataFrame(data=list(df_face['data'].values), columns=['area', 'gender', 'age', 'score', 'face_count'])
# df_face = pd.concat([df_face, df_face_details], axis=1).drop(['data'], axis=1)

In [None]:
# 拼接交互数据与面部特征数据
df = pd.merge(df, df_face, on='photo_id', how='left')

In [None]:
df.head()

In [None]:
df.to_csv('../out/train_interaction_face.txt', index=False, header=False, sep='\t')

In [None]:
train_interaction_ext = pd.read_table('../out/train_interaction_ext.txt')
train_interaction_ext.head()

In [None]:
train_interaction_ext_weight = train_interaction_ext[['click', 'like', 'follow', 'playing_time', 'duration_time']]
train_interaction_ext_weight.reindex(columns=['click', 'like', 'follow', 'playing_time', 'duration_time', 'weight'])
train_interaction_ext_weight['weight'] = train_interaction_ext_weight.apply(get_sample_weight, axis=1)
train_interaction_ext_weight.head()

In [None]:
train_interaction_ext_weight[['weight']].to_csv('../out/train_interaction_face.txt.weight', index=False, header=False, sep='\t')

## 验证集

In [None]:
train_interaction_ext = train_interaction_ext.sort_values('time')
train_interaction_ext_tra = train_interaction_ext.iloc[:int(train_interaction_ext.shape[0] * 0.8), :].copy()
train_interaction_ext_val = train_interaction_ext.iloc[int(train_interaction_ext.shape[0] * 0.8):, :].copy()

In [None]:
train_interaction_ext_tra_weight = train_interaction_ext_tra[['click', 'like', 'follow', 'playing_time', 'duration_time']]
train_interaction_ext_tra_weight.reindex(columns=['click', 'like', 'follow', 'playing_time', 'duration_time', 'weight'])
train_interaction_ext_tra_weight['weight'] = train_interaction_ext_tra_weight.apply(get_sample_weight, axis=1)
train_interaction_ext_tra_weight.head()

In [None]:
train_interaction_ext_tra_weight[['weight']].to_csv('../out/train_interaction_face.train.weight', index=False, header=False)

In [None]:
train_interaction_ext_val_weight = train_interaction_ext_val[['click', 'like', 'follow', 'playing_time', 'duration_time']]
train_interaction_ext_val_weight.reindex(columns=['click', 'like', 'follow', 'playing_time', 'duration_time', 'weight'])
train_interaction_ext_val_weight['weight'] = train_interaction_ext_val_weight.apply(get_sample_weight, axis=1)
train_interaction_ext_val_weight.head()

In [None]:
train_interaction_ext_val_weight[['weight']].to_csv('../out/train_interaction_face.valid.weight', index=False, header=False)

In [None]:
val_photo_ids = list(set(train_interaction_ext_val['photo_id'].values) - set(train_interaction_ext_tra['photo_id'].values))
train_interaction_ext_val = train_interaction_ext_val.loc[train_interaction_ext_val['photo_id'].isin(val_photo_ids)]

In [None]:
train_interaction_face_tra = pd.merge(train_interaction_ext_tra, df_face, on='photo_id', how='left')

In [None]:
train_interaction_face_tra.to_csv('../out/train_interaction_face.train', index=False, header=False, sep='\t')

In [None]:
train_interaction_face_val = pd.merge(train_interaction_ext_val, df_face, on='photo_id', how='left')

In [None]:
train_interaction_face_val.to_csv('../out/train_interaction_face.valid', index=False, header=False, sep='\t')

## 用户分群

In [None]:
train_interaction_face_tra = pd.read_table(
    '../out/train_interaction_face_tra.txt',
    names=['user_id', 'photo_id', 'click', 'like', 'follow', 'time', 'playing_time', 'duration_time', 'activity', 'mean', 'area', 'gender', 'age', 'score', 'face_count']
)
train_interaction_face_val = pd.read_table(
    '../out/train_interaction_face_val.txt',
    names=['user_id', 'photo_id', 'click', 'like', 'follow', 'time', 'playing_time', 'duration_time', 'activity', 'mean', 'area', 'gender', 'age', 'score', 'face_count']
)

### 尾号为0的用户

In [None]:
train_interaction_face_tra_0 = train_interaction_face_tra[train_interaction_face_tra['user_id'] % 10 == 0]

In [None]:
train_interaction_face_tra_0.to_csv('../out/train_interaction_face_tra_0.txt', index=False, header=False, sep='\t')

In [None]:
train_interaction_face_val_0 = train_interaction_face_val[train_interaction_face_val['user_id'] % 10 == 0]

In [None]:
train_interaction_face_val_0.to_csv('../out/train_interaction_face_val_0.txt', index=False, header=False, sep='\t')

### 尾号为1的用户

In [None]:
train_interaction_face_tra_1 = train_interaction_face_tra[train_interaction_face_tra['user_id'] % 10 == 1]

In [None]:
train_interaction_face_tra_1.to_csv('../out/train_interaction_face_tra_1.txt', index=False, header=False, sep='\t')

In [None]:
train_interaction_face_val_1 = train_interaction_face_val[train_interaction_face_val['user_id'] % 10 == 1]

In [None]:
train_interaction_face_val_1.to_csv('../out/train_interaction_face_val_1.txt', index=False, header=False, sep='\t')

### 尾号为2的用户

In [None]:
train_interaction_face_tra_2 = train_interaction_face_tra[train_interaction_face_tra['user_id'] % 10 == 2]

In [None]:
train_interaction_face_tra_2.to_csv('../out/train_interaction_face_tra_2.txt', index=False, header=False, sep='\t')

In [None]:
train_interaction_face_val_2 = train_interaction_face_val[train_interaction_face_val['user_id'] % 10 == 2]

In [None]:
train_interaction_face_val_2.to_csv('../out/train_interaction_face_val_2.txt', index=False, header=False, sep='\t')

### 用户活跃度、平均点击率分布

In [None]:
users_tra = train_interaction_face_tra[['user_id', 'activity', 'mean']].groupby('user_id').min()
plt.plot(users_tra['activity'].values, users_tra['mean'].values, 'ro')

In [None]:
# 第四象限：活跃度高、平均点击率低，对应老用户，比较挑剔
train_interaction_face_tra[(train_interaction_face_tra['activity'] > 0.5) & (train_interaction_face_tra['mean'] <= 0.5)]\
    .to_csv('../out/train_interaction_face_tra_4.txt', index=False, header=False, sep='\t')
train_interaction_face_val[(train_interaction_face_val['activity'] > 0.5) & (train_interaction_face_val['mean'] <= 0.5)]\
    .to_csv('../out/train_interaction_face_val_4.txt', index=False, header=False, sep='\t')

## 测试集

### 添加活跃度和平均点击率

In [None]:
df.head()

In [None]:
df_test = pd.read_table('../../data/test_interaction.txt', names=['user_id', 'photo_id', 'time', 'duration_time'])

In [None]:
df_test = df_test.reindex(columns=['user_id', 'photo_id', 'click', 'like', 'follow', 'time', 'playing_time', 'duration_time'])

In [None]:
df_test = pd.merge(df_test, df[['user_id', 'activity', 'mean']].drop_duplicates(), on='user_id', how='left')

In [None]:
df_test.head()

In [None]:
df_test_face = pd.read_table('../../data/test_face.txt', names=['photo_id', 'data'])

In [None]:
df_test_face.data = df_test_face.data.apply(get_face_features)

In [None]:
df_test_face = df_test_face[['photo_id']].join(df_test_face.data.str.split(',', expand=True))

In [None]:
df_test_face.head()

In [None]:
# # 添加面部特征
# df_test_face = pd.read_table('../../data/test_face.txt', names=['photo_id', 'data'])
# df_test_face['data'] = df_test_face['data'].apply(get_face_features)
# df_test_face_details = pd.DataFrame(data=list(df_test_face['data'].values), columns=['area', 'gender', 'age', 'score', 'face_count'])
# df_test_face = pd.concat([df_test_face, df_test_face_details], axis=1).drop(['data'], axis=1)

In [None]:
# 拼接交互数据与面部特征数据
df_test = pd.merge(df_test, df_test_face, on='photo_id', how='left')

In [None]:
df_test.head()

In [None]:
df_test.to_csv('../out/test_interaction_face.txt', index=False, header=True, sep='\t')

In [None]:
df_test.columns.size

In [6]:
df_base = pd.read_csv('../out/base_feature.csv')

In [7]:
df_base.head()

Unnamed: 0,user_id,photo_id,click,like,follow,time,playing_time,duration_time,_flag_,f_hour,f_minute,f_photo_type
0,1637,6484142,0,0,0,761036604603,0,11,0,12,691,2
1,1637,4647664,0,0,0,761035501863,0,11,0,11,672,2
2,1637,4994626,1,0,0,761036933553,11,10,0,12,696,2
3,1637,4416881,0,0,0,761119247892,0,9,1,10,628,1
4,1637,2597756,0,0,0,761119421332,0,11,1,11,631,2


In [11]:
df_base.apply(lambda row: print(row['photo_id'], row['_flag_']), axis=1)

6484142 0
4647664 0
4994626 0
4416881 1
2597756 1
4977481 0
5089000 0
693195 0
163901 0
6472236 0
4509888 0
3678099 1
3856567 1
4937691 0
1140809 1
730381 1
5810278 0
99180 0
5103484 0
6394919 1
3911274 0
7300221 0
5677060 0
3018228 0
3896621 0
1663638 0
119781 1
5088204 0
4538593 1
2463556 0
7090577 0
1592688 1
31597 0
1564376 0
1244325 0
6485638 1
1072675 0
223191 0
5882308 0
1433106 0
3389328 0
4253625 0
2227206 0
5227605 0
5309646 0
5856176 0
3190983 0
7427964 1
1252093 0
5453338 1
4126342 0
6332981 0
7115682 0
6577026 0
3347783 1
1989927 1
7089041 0
2163018 0
1723330 0
2092857 1
1083737 0
7537504 0
2766231 0
4411829 0
830961 0
2632859 0
5569939 0
6920465 0
1721473 0
7006026 0
6992221 0
2277615 1
7323419 0
777406 0
2902846 0
959455 0
4466146 1
5091030 0
7201267 0
6880128 1
3629251 0
914877 0
3613939 0
1524925 0
2724592 1
7127433 1
5287265 0
5676746 0
818284 0
3024146 0
1267514 0
137980 0
7314185 0
561158 0
342841 0
4929331 1
4485899 0
198460 1
4161662 0
5510554 1
3599952 0
4191524 

5759607 1
2163104 0
3954761 0
1466479 0
205375 0
2153722 0
2842111 0
7290140 1
6715294 0
3591485 0
5793369 0
6831323 0
2968232 0
2838072 1
2584127 0
2585527 0
2684567 0
582744 0
3704988 0
6179498 0
1151017 0
1219816 0
1265826 0
1063250 0
2470978 0
1482741 0
3083704 0
1528528 1
501049 1
3020686 1
2245994 0
3159566 1
3699410 1
5328081 0
6617489 0
4217953 0
5261148 0
7436405 0
6846575 0
1192442 1
82166 1
363348 0
6300723 0
3686745 1
1469094 0
5528019 0
5033742 0
5111260 0
3145072 0
559923 0
4407834 0
1063108 1
1008761 0
2152990 0
1012329 0
5958481 0
2202262 0
5853591 0
5561411 0
3304346 0
3677456 0
2999507 0
72090 0
3292038 0
2688257 0
6005481 1
7029980 0
6454593 0
6624920 0
7549322 0
6712059 0
1679097 0
4293874 0
5406913 0
5803350 0
5331200 0
4773361 1
1638591 0
4109849 0
5672829 0
3993606 0
3672237 0
6104341 0
3992895 0
5656744 0
1332705 0
7482168 1
543555 0
3234873 0
4564981 0
2057175 0
4079010 0
4006277 0
14885 1
3287471 0
2413697 1
4441143 0
5482512 0
4499834 1
153500 0
4680017 1
495

877311 1
6809423 1
6284976 0
5189157 1
1693199 0
6266990 1
518365 1
4743955 0
5518443 1
802920 1
1240001 0
5026117 1
57903 0
1180891 1
4727193 0
6084373 0
4169821 1
331059 0
3512955 1
1266902 1
2818353 1
4144749 0
5207063 1
4432136 1
1864607 1
172264 1
6944460 1
2767676 0
2922658 1
3404005 0
551100 0
5935197 1
840164 1
721024 1
2922744 1
1670609 1
4957591 1
3506604 0
4993297 1
3330244 1
2846736 1
3014270 1
6669761 1
6285538 1
1131442 1
4703301 0
5302029 1
3056023 0
4995390 1
5159396 0
2072054 1
7120796 0
1390388 1
5365460 1
548865 1
4988108 0
1072554 0
2726870 0
5440358 0
6345269 0
5864392 0
6091112 0
6245534 0
5010852 0
4299297 0
2885419 0
6075172 0
2254740 0
4576490 0
3803605 0
7483925 0
1149780 0
2424792 0
4961642 0
4462632 0
5529018 0
2672388 0
1896478 0
3862023 0
933617 0
2217731 0
351407 0
1597934 0
6117325 0
6041834 0
307367 0
3865236 0
887236 0
5024794 0
1669337 0
6046799 0
6152467 0
4998077 0
5591151 0
3909072 0
5618090 0
4114448 0
6191520 0
5508625 0
7084180 0
5069151 0
24451

3957975 0
1741342 0
346977 0
1299792 0
3187099 0
7400298 0
932784 0
2116072 0
1429811 0
6668101 0
6099631 0
86147 0
1428092 0
4106989 0
7164045 0
2608620 0
251726 0
1360564 0
2274122 0
7392386 0
1110263 0
1879260 0
2542317 0
2421220 0
2777933 0
789280 0
5790537 0
3573824 0
2046422 0
4451350 0
6005925 0
6965132 0
3973428 0
1584046 0
1084201 0
4745897 0
7226683 0
1519380 0
3832317 0
4889028 0
2354001 0
5263232 0
4152560 0
6439993 0
1320815 0
2043849 0
1282228 0
34174 0
3056023 0
3252903 0
7225048 0
871939 0
5373313 0
4183403 0
7000470 0
3119223 0
2926789 0
6546578 0
7431805 0
260807 0
4689177 0
6299303 0
2295835 0
4153662 0
7126821 0
4082699 0
3108580 0
7067519 0
2667765 0
5128487 0
1469135 0
6994912 0
613445 0
4945448 0
1855916 0
2873859 0
1683664 0
7206549 0
4084449 0
4478781 0
6779434 0
7522837 0
5202973 0
4773074 0
541892 0
321902 0
4799891 0
2438878 0
1440994 0
1319424 0
4885938 0
4880695 0
5098218 0
2176043 0
2792694 0
1633407 0
6872555 0
2262237 0
3543054 0
4607296 0
5009935 0
965

3933455 0
422830 0
717437 1
5324695 0
2527893 0
6354529 0
2169476 0
5287059 0
3309703 0
3953202 0
4634209 0
4090443 0
3954215 1
2481083 1
2446187 0
7212708 0
2387944 0
3949690 0
3220250 0
3034421 0
1375204 0
638279 0
6370187 0
3145280 0
3203298 0
2904593 0
3939027 1
3375058 0
2861352 0
4937024 1
6599593 0
748354 0
5580116 1
1449074 0
6307475 0
261628 0
6704425 0
1810389 0
4965657 0
2451999 0
6017152 0
4150644 0
2544333 0
3470908 0
7337571 0
5250810 0
4016578 0
1101769 0
3477346 0
5057663 0
1649721 0
1343442 1
34334 0
7141129 0
3285662 0
929312 1
3758038 1
540525 0
6202642 1
3968926 0
3204452 0
884540 1
4268778 0
2249631 0
2455161 0
131701 0
6180918 0
14817 0
86483 0
115893 0
3489970 0
3788720 0
6284768 0
4733250 1
6436378 0
7088443 0
1338235 0
3754846 0
5709795 1
5421739 0
3798041 0
3446608 0
5354925 0
175743 0
2838983 0
4914741 0
3154964 0
2824865 0
3549471 0
1250872 0
7264188 0
4437682 0
3006434 0
3348468 0
6062526 0
5817136 0
1871537 0
3909077 0
1854625 0
5069702 0
3378913 0
5745755

502413 0
200303 0
4229727 0
5617422 0
3015527 1
5056921 0
3419393 0
4277774 0
7075612 0
6763599 0
2488022 0
5235715 0
341685 0
2508171 0
6939430 0
4079380 0
5583024 0
382866 0
6325431 0
4034221 0
3316913 0
6726889 0
6148915 0
1520835 0
4244357 0
3877127 0
468812 0
3638048 0
2435807 0
1714768 0
3593780 0
4387183 0
1921874 0
2092231 0
570405 0
597046 0
6786751 1
1357219 1
3278602 0
5708620 0
2349067 1
1645317 0
3116709 1
3515668 0
4637596 0
2792530 0
2039173 0
521147 0
1496230 0
3224151 0
3024728 0
723887 0
709010 0
5293437 0
3864131 0
5626176 0
1427391 0
6825762 1
7177722 0
1790641 0
7536629 0
5884815 0
5740643 0
6215235 0
4427126 0
4500328 0
2771156 0
5494169 0
5119649 0
6213745 0
4480779 0
6756563 0
3400620 0
203758 0
7247043 0
653573 0
4092703 0
738099 0
31430 0
533217 0
2313228 0
5062644 0
3493100 0
6665137 0
2461772 0
1246701 0
5614230 0
4596539 0
2465521 1
4285245 0
6282318 0
3600561 0
2687246 0
5390072 0
7083158 0
3274879 0
7432297 0
4312187 0
1528203 0
1628969 0
1052303 0
424669

2425907 1
4675567 1
7154073 1
4484491 1
1462702 0
1859520 1
2890967 0
4734549 0
5609678 0
6181837 1
5349523 0
299968 1
3784185 0
5949263 0
1138935 1
3108725 0
1860818 1
6959256 1
1247729 0
3295235 1
1723283 0
5224520 1
2379075 0
4888762 1
1037449 1
5247616 0
2595535 0
1823117 1
15269 0
4336157 1
1156501 1
1839243 0
678826 1
1466953 1
4439670 0
773930 1
1504991 0
6042602 1
440631 1
765936 0
5301280 0
1236945 1
3685543 0
6291093 0
1837306 1
3090006 1
4347911 1
6062817 0
875941 1
1993388 0
2912180 1
1233410 0
4844703 1
4264079 1
6218018 0
1408174 1
4680102 1
5028981 1
2954364 0
2865154 0
2988822 1
1697354 0
2322198 0
890200 1
2858753 0
997286 1
6493704 0
4768530 1
4962190 0
247163 0
4360919 1
5267334 0
6785942 1
3771667 0
5188752 0
6037915 1
6168860 0
2655137 0
5633256 1
2865038 1
6408344 1
4231306 1
7413761 1
4051371 1
6910552 1
3807731 1
1854288 1
281936 1
5861564 1
1525940 1
1953189 0
6939557 1
7148664 1
6283729 1
1066839 0
5590023 1
2551140 1
5906688 1
6973938 1
4332522 1
3591090 1
85

6838250 1
5698933 1
406928 1
4299297 0
3667903 1
6666389 1
6326165 1
4923175 1
230645 1
5485774 1
2947071 0
1584337 1
3504583 1
2480661 1
6168405 0
4478450 1
2750160 1
2385903 1
7384518 0
3405609 1
6708518 0
7388206 0
1349007 0
2826590 1
796621 0
3932742 0
57891 0
5266279 1
4783338 1
3679316 1
1682134 1
5635139 1
1004855 1
1692674 1
6166713 0
880641 1
5839640 1
7176628 1
4089246 0
5161841 1
5434766 1
5392315 1
4225370 0
6704537 1
3832457 0
2510340 0
4993297 1
2334301 0
1345858 1
4516366 0
3123917 1
2447774 1
4314228 0
6263271 0
1782265 0
3107112 0
3692041 1
2398427 0
7001046 1
1627237 0
638915 0
8159 1
229794 1
1175212 0
6607090 0
6206758 1
6830283 0
5269052 0
1852511 1
1486162 1
6410008 0
2410969 1
1881377 1
4971359 1
5776253 0
220265 0
2303721 1
7146675 0
5340444 0
3863704 0
6453552 0
4614755 0
6994912 0
5514637 1
1408084 0
5968430 1
620222 0
3132574 1
1227274 1
6303041 0
6113470 1
4962271 0
3504525 0
810540 0
622740 1
3062034 0
5220622 1
644244 1
179162 1
3003393 0
3113218 1
6353681

1458393 0
6758133 0
6985164 0
3268057 0
7078168 0
686337 0
6141615 0
4089246 0
3138337 0
2816982 0
3562769 0
4191348 0
3720451 0
3946369 0
4716015 0
2627212 0
196230 0
4632150 0
554972 0
4437172 0
6869994 0
6749298 0
3095455 0
4896509 0
6642055 0
1642637 0
6134226 0
5103308 0
1588825 0
3913954 0
5510314 0
3048420 0
5503444 0
4092854 0
1605843 0
7233093 0
6902277 0
2178766 0
4996007 0
5984856 0
106893 0
5207565 0
7338151 0
1112814 0
3357554 0
1058 0
1277342 0
7015700 0
544807 0
7389533 0
3470073 0
6324997 0
7388466 0
2113988 0
108845 1
4592247 0
3691186 0
6439547 0
4924490 0
6512516 0
3755365 0
1323 0
4869333 0
6280710 0
3417354 0
7265606 0
5865703 0
3309852 0
1236049 0
33524 0
4918243 0
3656667 0
6603445 0
3862043 0
7056970 0
1978648 0
4700999 0
711327 0
2331137 0
2505842 0
3300832 0
6515819 1
6282933 0
3033015 0
3119309 0
3178376 0
2945898 0
5072263 0
53017 0
6764485 0
6139683 0
6046101 0
5287133 0
6639077 0
6193328 0
1404649 0
2202114 0
768398 0
830132 0
2771343 0
1658594 0
3045303 0

3107406 0
5445453 0
4839928 0
4347549 0
3770857 0
3381159 0
4793883 0
542997 0
6313677 0
1278455 0
3100136 0
5764253 0
423420 1
5383944 0
420181 0
1121190 0
35278 0
6620918 0
6440631 0
5675784 0
1889296 0
2307742 0
6014003 0
2406426 1
4790724 0
5825415 0
7234001 0
2508067 0
6349269 0
1639983 0
570695 0
3738188 1
4441611 0
7485850 0
1624014 0
1497127 0
1108484 0
3711275 0
2749044 0
2612898 1
7243417 0
6740270 0
3694588 0
4992797 0
4695520 0
2607814 0
4302774 0
1030136 0
4752576 0
1902854 0
6680790 0
6410244 0
4206860 0
6990578 0
4683543 0
588514 0
2076821 0
650360 0
5572328 0
3914735 0
6362566 0
1051930 1
4641449 0
3895500 0
6676008 0
5983292 0
2149327 0
884660 0
977308 0
713794 1
3371572 0
3717533 0
350219 1
1912517 0
1931425 0
4501623 0
3878642 0
3995560 0
2894427 0
5372471 0
1403760 0
3998053 0
1170434 0
1872547 0
2455342 0
6947290 0
543797 0
5034784 0
2188449 0
229412 0
5266047 0
5677841 0
3125845 0
3132538 0
4577917 0
5423837 0
6946304 0
2873528 0
1476832 0
3687771 1
4185766 1
3309

7705569 -1
7741100 -1
8938041 -1
8231690 -1
8313615 -1
8689207 -1
9086274 -1
7874112 -1
7746367 -1
7832644 -1
7638605 -1
7673330 -1
7782815 -1
8690602 -1
8235606 -1
8276602 -1
7726206 -1
8627797 -1
7903757 -1
8619238 -1
8407955 -1
8079906 -1
8184856 -1
9141289 -1
7778049 -1
7814590 -1
8364382 -1
8449616 -1
8545497 -1
9166255 -1
8174092 -1
8517251 -1
8054201 -1
7794536 -1
7591228 -1
7769702 -1
8232903 -1
8671758 -1
9025756 -1
7700256 -1
8579316 -1
7723886 -1
8847829 -1
8171235 -1
9054951 -1
8132710 -1
8656074 -1
8624332 -1
7893685 -1
9149179 -1
8883746 -1
8324928 -1
7771535 -1
8167458 -1
7627134 -1
8673225 -1
8703140 -1
7899763 -1
8240324 -1
8963256 -1
8256665 -1
8526584 -1
7819365 -1
8631059 -1
8963977 -1
8448807 -1
8797352 -1
8711905 -1
7814333 -1
8185791 -1
8662261 -1
8722509 -1
7653152 -1
8184661 -1
7730888 -1
8654212 -1
7966869 -1
8677748 -1
8686849 -1
9187646 -1
7831895 -1
7808066 -1
8007049 -1
8326470 -1
8817626 -1
7796114 -1
7963120 -1
7578894 -1
7957658 -1
8802674 -1
8099993 -1

0        None
1        None
2        None
3        None
4        None
5        None
6        None
7        None
8        None
9        None
10       None
11       None
12       None
13       None
14       None
15       None
16       None
17       None
18       None
19       None
20       None
21       None
22       None
23       None
24       None
25       None
26       None
27       None
28       None
29       None
         ... 
10970    None
10971    None
10972    None
10973    None
10974    None
10975    None
10976    None
10977    None
10978    None
10979    None
10980    None
10981    None
10982    None
10983    None
10984    None
10985    None
10986    None
10987    None
10988    None
10989    None
10990    None
10991    None
10992    None
10993    None
10994    None
10995    None
10996    None
10997    None
10998    None
10999    None
Length: 11000, dtype: object