forked from YouChouNoBB/2018-tencent-ad-competition-baseline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
user_feature_tocsv.py
29 lines (28 loc) · 1.19 KB
/
user_feature_tocsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# coding=utf-8
# @author:bryan
# blog: https://blog.csdn.net/bryan__
# github: https://github.com/YouChouNoBB/2018-tencent-ad-competition-baseline
import pandas as pd
userFeature_data = []
with open('../data/userFeature.data', 'r') as f:
cnt=0
for i, line in enumerate(f):
line = line.strip().split('|')
userFeature_dict = {}
for each in line:
each_list = each.split(' ')
userFeature_dict[each_list[0]] = ' '.join(each_list[1:])
userFeature_data.append(userFeature_dict)
if i % 100000 == 0:
print(i)
if i % 1000000==0:
user_feature = pd.DataFrame(userFeature_data)
user_feature.to_csv('../data/userFeature_'+str(cnt)+'.csv', index=False)
cnt+=1
del userFeature_data,user_feature
userFeature_data=[]
user_feature = pd.DataFrame(userFeature_data)
user_feature.to_csv('../data/userFeature_' + str(cnt) + '.csv', index=False)
del userFeature_data, user_feature
user_feature=pd.concat([pd.read_csv('../data/userFeature_' + str(i) + '.csv') for i in range(cnt+1)]).reset_index(drop=True)
user_feature.to_csv('../data/userFeature.csv', index=False)