In [87]:
import pandas as pd
import numpy as np
import seaborn as sns
import warnings
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingRegressor
from pandas.tseries.offsets import *
warnings.filterwarnings('ignore')
%matplotlib inline

In [88]:
# description of the feature:
# Traffic Volume through the Tollgates
# time           datatime        the time when a vehicle passes the tollgate
# tollgate_id    string          ID of the tollgate
# direction      string           0:entry, 1:exit
# vehicle_model  int             this number ranges from 0 to 7, which indicates the capacity of the vehicle(bigger the higher)
# has_etc        string          does the vehicle use ETC (Electronic Toll Collection) device? 0: No, 1: Yes
# vehicle_type   string          vehicle type: 0-passenger vehicle, 1-cargo vehicle
volume_df = pd.read_csv("volume(table 6)_training.csv")
volume_df.head()

Unnamed: 0,time,tollgate_id,direction,vehicle_model,has_etc,vehicle_type
0,2016-09-19 23:09:25,2,0,1,0,
1,2016-09-19 23:11:53,2,0,1,0,
2,2016-09-19 23:13:54,2,0,1,0,
3,2016-09-19 23:17:48,1,0,1,1,
4,2016-09-19 23:16:07,2,0,1,0,


In [89]:
# 替换所有有标签含义的数字
volume_df['tollgate_id'] = volume_df['tollgate_id'].replace({1:"1S", 2:"2S", 3:"3S"})
volume_df['direction'] = volume_df['direction'].replace({0:"entry", 1:"exit"})
volume_df['has_etc'] = volume_df['has_etc'].replace({0:"No", 1:"Yes"})
volume_df['vehicle_type'] = volume_df['vehicle_type'].replace({0:"passenger", 1:"carge"})
volume_df['time'] = volume_df['time'].apply(lambda x: pd.Timestamp(x))
volume_df.head()

Unnamed: 0,time,tollgate_id,direction,vehicle_model,has_etc,vehicle_type
0,2016-09-19 23:09:25,2S,entry,1,No,
1,2016-09-19 23:11:53,2S,entry,1,No,
2,2016-09-19 23:13:54,2S,entry,1,No,
3,2016-09-19 23:17:48,1S,entry,1,Yes,
4,2016-09-19 23:16:07,2S,entry,1,No,


In [90]:
# 承载量：1-默认客车，2-默认货车，3-默认货车，4-默认客车
# 承载量大于等于5的为货运汽车，所有承载量为0的车都类型不明
volume_df = volume_df.sort_values(by="vehicle_model")
vehicle_model0 = volume_df[volume_df['vehicle_model'] == 0].fillna("No")
vehicle_model1 = volume_df[volume_df['vehicle_model'] == 1].fillna("passenger")
vehicle_model2 = volume_df[volume_df['vehicle_model'] == 2].fillna("carge")
vehicle_model3 = volume_df[volume_df['vehicle_model'] == 3].fillna("carge")
vehicle_model4 = volume_df[volume_df['vehicle_model'] == 4].fillna("passenger")
vehicle_model5 = volume_df[volume_df['vehicle_model'] >= 5].fillna("carge")
volume_df = pd.concat([vehicle_model0, vehicle_model1, vehicle_model2, vehicle_model3, vehicle_model4, vehicle_model5])
volume_df[volume_df.isnull()].count()

time             0
tollgate_id      0
direction        0
vehicle_model    0
has_etc          0
vehicle_type     0
dtype: int64

In [94]:
# 创建之和流量，20分钟跨度有关系的训练集
# volume_time_entry = pd.Series(data = 1, index=volume_df.loc[(volume_df['tollgate_id']=="1S") & (volume_df['direction']=="entry"), :]['time'])
# volume_time_entry = volume_time_entry.resample("20T").sum()
# volume_entry = pd.DataFrame(index=volume_time_entry.index)
# volume_entry['volume'] = np.log(volume_time_entry)

# volume_time_exit = pd.Series(data = 1, index=volume_df.loc[(volume_df['tollgate_id']=="1S") & (volume_df['direction']=="exit"), :]['time'])
# volume_time_exit = volume_time_exit.resample("20T").sum()
# volume_exit = pd.DataFrame(index=volume_time_exit.index)
# volume_exit['volume'] = np.log(volume_time_exit)
# volume_exit
volume_all_entry = volume_df.loc[(volume_df['tollgate_id']=='1S') & (volume_df['direction']=='entry'), ["time", "vehicle_model"]]
volume_all_entry['volume'] = 1
volume_all_entry['cargo_count'] = volume_df['vehicle_type'].apply(lambda x: 1 if x == "carge" else 0)
volume_all_entry['passenger_count'] = volume_df['vehicle_type'].apply(lambda x: 1 if x == "passenger" else 0)
volume_all_entry['no_count'] = volume_df['vehicle_type'].apply(lambda x: 1 if x == "No" else 0)
volume_all_entry["cargo_model"] = volume_all_entry["cargo_count"] * volume_all_entry["vehicle_model"]
volume_all_entry["passenger_model"] = volume_all_entry["passenger_count"] * volume_all_entry["vehicle_model"]
volume_all_entry.index = volume_all_entry["time"]
del volume_all_entry["time"]
volume_all_entry = volume_all_entry.resample("20T").sum()
volume_all_entry["cargo_model_avg"] = volume_all_entry["cargo_model"] / volume_all_entry["cargo_count"]
volume_all_entry["passenger_model_avg"] = volume_all_entry["passenger_model"] / volume_all_entry["passenger_count"]
volume_all_entry["vehicle_model_avg"] = volume_all_entry["vehicle_model"] / volume_all_entry["volume"]
volume_all_entry = volume_all_entry.fillna(0)

volume_all_exit = volume_df.loc[(volume_df['tollgate_id']=='1S') & (volume_df['direction']=='exit'), ["time", "vehicle_model"]]
volume_all_exit["volume"] = 1
volume_all_exit["cargo_count"] = volume_df['vehicle_type'].apply(lambda x: 1 if x == "carge" else 0)
volume_all_exit["passenger_count"] = volume_df['vehicle_type'].apply(lambda x: 1 if x == "passenger" else 0)
volume_all_exit["no_count"] = volume_df['vehicle_type'].apply(lambda x: 1 if x == "No" else 0)
volume_all_exit["cargo_model"] = volume_all_exit["cargo_count"] * volume_all_exit["vehicle_model"]
volume_all_exit["passenger_model"] = volume_all_exit["passenger_count"] * volume_all_exit["vehicle_model"]
volume_all_exit.index = volume_all_exit["time"]
del volume_all_exit["time"]
volume_all_exit = volume_all_exit.resample("20T").sum()
volume_all_exit["cargo_model_avg"] = volume_all_exit["cargo_model"] / volume_all_exit["cargo_count"]
volume_all_exit["passenger_model_avg"] = volume_all_exit["passenger_model"] / volume_all_exit["passenger_count"]
volume_all_exit["vehicle_model_avg"] = volume_all_exit["vehicle_model"] / volume_all_exit["volume"]
volume_all_exit = volume_all_exit.fillna(0)

volume_all_exit

Unnamed: 0_level_0,vehicle_model,volume,cargo_count,passenger_count,no_count,cargo_model,passenger_model,cargo_model_avg,passenger_model_avg,vehicle_model_avg
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-09-19 00:00:00,181.0,140.0,31.0,109.0,0.0,63.0,118.0,2.032258,1.082569,1.292857
2016-09-19 00:20:00,97.0,56.0,16.0,40.0,0.0,39.0,58.0,2.437500,1.450000,1.732143
2016-09-19 00:40:00,46.0,31.0,11.0,20.0,0.0,21.0,25.0,1.909091,1.250000,1.483871
2016-09-19 01:00:00,5.0,4.0,2.0,2.0,0.0,3.0,2.0,1.500000,1.000000,1.250000
2016-09-19 01:20:00,25.0,14.0,6.0,8.0,0.0,17.0,8.0,2.833333,1.000000,1.785714
2016-09-19 01:40:00,10.0,7.0,3.0,4.0,0.0,6.0,4.0,2.000000,1.000000,1.428571
2016-09-19 02:00:00,16.0,7.0,5.0,2.0,0.0,14.0,2.0,2.800000,1.000000,2.285714
2016-09-19 02:20:00,15.0,7.0,5.0,2.0,0.0,13.0,2.0,2.600000,1.000000,2.142857
2016-09-19 02:40:00,15.0,10.0,4.0,6.0,0.0,9.0,6.0,2.250000,1.000000,1.500000
2016-09-19 03:00:00,14.0,10.0,6.0,4.0,0.0,10.0,4.0,1.666667,1.000000,1.400000


In [None]:
# 创建训练集，总的要求就是以前两个小时数据为训练集，用迭代式预测方法
# 例如8点-10点的数据预测10点20,8点-10点20预测10点40……，每一次预测使用的都是独立的（可能模型一样）的模型
# 现在开始构建训练集
# 第一个训练集特征是所有两个小时（以20分钟为一个单位）的数据，因变量是该两小时之后20分钟的流量
# 第二个训练集，特征是所有两个小时又20分钟（以20分钟为一个单位）的数据，因变量是该两个小时之后20分钟的流量
# 以此类推训练12个GBDT模型，其中entry 6个，exit 6个（没调参）
old_index = volume_all_entry.columns
new_index = []
for i in range(6):
    new_index += [item + "%d" % (i) for item in old_index]
new_index.append("y")
models_entry = []
for j in range(1):
    train_df = pd.DataFrame()
    for i in range(len(volume_all_entry) - 6 - j):
#         df_temp = volume_entry.iloc[i:i + 6, 0]
#         df_temp = df_temp.append(pd.Series(volume_entry.iloc[i + 6 + j, 0]))
#         df_temp.index = range(7)
#         train_df = train_df.append(df_temp, ignore_index=True)
        se_temp = pd.Series()
        for k in range(6):
            se_temp = se_temp.append(volume_all_entry.iloc[i + k, :])
        se_temp = se_temp.append(pd.Series(volume_all_entry.iloc[i + 6 + j]["volume"]))
        se_temp.index = new_index
        se_temp.name = str(volume_all_entry.iloc[i, :].index)
        print se_temp
        train_df = train_df.append(se_temp)
    train_X = train_df.iloc[:, :-1].fillna(0)
    train_y = train_df["y"].fillna(0)
    model = GradientBoostingRegressor()
    model.fit(train_X, train_y)
    models_entry.append(model)

models_exit = []
for j in range(1):
    train_df = pd.DataFrame()
    for i in range(len(volume_all_exit) - 6 - j):
#         df_temp = volume_exit.iloc[i:i + 6, 0]
#         df_temp = df_temp.append(pd.Series(volume_exit.iloc[i + 6 + j, 0]))
#         df_temp.index = range(7)
#         train_df = train_df.append(df_temp, ignore_index=True)
        se_temp = pd.Series()
        for k in range(6):
            se_temp = se_temp.append(volume_all_exit.iloc[i + k, :])
        se_temp = se_temp.append(pd.Series(volume_all_exit.iloc[i + 6 + j]["volume"]))
        se_temp.index = new_index
        se_temp.name = str(volume_all_exit.iloc[i, :].index)
        train_df = train_df.append(se_temp)
    train_X = train_df.iloc[:, :-1].fillna(0)
    train_y = train_df.iloc[:, -1].fillna(0)
    model = GradientBoostingRegressor()
    model.fit(train_X, train_y)
    models_exit.append(model)

vehicle_model0          22.000000
volume0                 13.000000
cargo_count0             3.000000
passenger_count0        10.000000
no_count0                0.000000
cargo_model0            12.000000
passenger_model0        10.000000
cargo_model_avg0         4.000000
passenger_model_avg0     1.000000
vehicle_model_avg0       1.692308
vehicle_model1           6.000000
volume1                  6.000000
cargo_count1             0.000000
passenger_count1         6.000000
no_count1                0.000000
cargo_model1             0.000000
passenger_model1         6.000000
cargo_model_avg1         0.000000
passenger_model_avg1     1.000000
vehicle_model_avg1       1.000000
vehicle_model2          17.000000
volume2                  9.000000
cargo_count2             2.000000
passenger_count2         7.000000
no_count2                0.000000
cargo_model2             7.000000
passenger_model2        10.000000
cargo_model_avg2         3.500000
passenger_model_avg2     1.428571
vehicle_model_

In [105]:
volume_test = pd.read_csv("../testing_phase1/volume(table 6)_test1.csv")
volume_test.head()

Unnamed: 0,time,tollgate_id,direction,vehicle_model,has_etc,vehicle_type
0,2016-10-18 07:59:04,2,0,1,1,
1,2016-10-18 07:59:31,2,0,1,1,
2,2016-10-18 07:59:50,2,0,1,1,
3,2016-10-18 07:32:33,3,0,1,1,
4,2016-10-18 07:32:46,3,0,1,1,


In [106]:
# 替换所有有标签含义的数字
volume_test['tollgate_id'] = volume_test['tollgate_id'].replace({1:"1S", 2:"2S", 3:"3S"})
volume_test['direction'] = volume_test['direction'].replace({0:"entry", 1:"exit"})
volume_test['has_etc'] = volume_test['has_etc'].replace({0:"No", 1:"Yes"})
volume_test['vehicle_type'] = volume_test['vehicle_type'].replace({0:"passenger", 1:"carge"})
volume_test['time'] = volume_test['time'].apply(lambda x: pd.Timestamp(x))
volume_test.head()

Unnamed: 0,time,tollgate_id,direction,vehicle_model,has_etc,vehicle_type
0,2016-10-18 07:59:04,2S,entry,1,Yes,
1,2016-10-18 07:59:31,2S,entry,1,Yes,
2,2016-10-18 07:59:50,2S,entry,1,Yes,
3,2016-10-18 07:32:33,3S,entry,1,Yes,
4,2016-10-18 07:32:46,3S,entry,1,Yes,


In [107]:
# 承载量：1-默认客车，2-默认货车，3-默认货车，4-默认客车
# 承载量大于等于5的为货运汽车，所有承载量为0的车都类型不明
volume_test = volume_test.sort_values(by="vehicle_model")
vehicle_model0 = volume_test[volume_test['vehicle_model'] == 0].fillna("No")
vehicle_model1 = volume_test[volume_test['vehicle_model'] == 1].fillna("passenger")
vehicle_model2 = volume_test[volume_test['vehicle_model'] == 2].fillna("carge")
vehicle_model3 = volume_test[volume_test['vehicle_model'] == 3].fillna("carge")
vehicle_model4 = volume_test[volume_test['vehicle_model'] == 4].fillna("passenger")
vehicle_model5 = volume_test[volume_test['vehicle_model'] >= 5].fillna("carge")
volume_test = pd.concat([vehicle_model0, vehicle_model1, vehicle_model2, vehicle_model3, vehicle_model4, vehicle_model5])
volume_test[volume_test.isnull()].count()

time             0
tollgate_id      0
direction        0
vehicle_model    0
has_etc          0
vehicle_type     0
dtype: int64

In [108]:
# 创建之和流量，20分钟跨度有关系的预测集
# volume_time_entry_test = pd.Series(data = 1, 
#                                    index=volume_test.loc[(volume_test['tollgate_id']=="2S") & (volume_test['direction']=="entry"), :]['time'])
# volume_time_entry_test = volume_time_entry_test.resample("20T").sum()
# volume_entry_test = pd.DataFrame(index=volume_time_entry_test.index)
# volume_entry_test['volume'] = volume_time_entry_test
# volume_entry_test = volume_entry_test.dropna()

# volume_time_exit_test = pd.Series(data = 1, 
#                                   index=volume_test.loc[(volume_test['tollgate_id']=="2S") & (volume_test['direction']=="exit"), :]['time'])
# volume_time_exit_test = volume_time_exit_test.resample("20T").sum()
# volume_exit_test = pd.DataFrame(index=volume_time_exit_test.index)
# volume_exit_test['volume'] = volume_time_exit_test
# volume_exit_test = volume_exit_test.dropna()
volume_entry_test = volume_test.loc[(volume_test['tollgate_id']=="1S") & (volume_test["direction"]=="entry"), ["time", "vehicle_model"]]
volume_entry_test["volume"] = 1
volume_entry_test["cargo_count"] = volume_test["vehicle_type"].apply(lambda x: 1 if x == "cargo" else 0)
volume_entry_test["passenger_count"] = volume_test["vehicle_type"].apply(lambda x: 1 if x == "passenger" else 0)
volume_entry_test["no_count"] = volume_test["vehicle_type"].apply(lambda x: 1 if x == "No" else 0)
volume_entry_test["cargo_model"] = volume_entry_test["cargo_count"] * volume_entry_test["vehicle_model"]
volume_entry_test["passenger_model"] = volume_entry_test["passenger_count"] * volume_entry_test["vehicle_model"]
volume_entry_test.index = volume_entry_test["time"]
del volume_entry_test["time"]
volume_entry_test = volume_all_entry.resample("20T").sum()
volume_entry_test["cargo_model_avg"] = volume_entry_test["cargo_model"] / volume_entry_test["cargo_count"]
volume_entry_test["passenger_model_avg"] = volume_entry_test["passenger_model"] / volume_entry_test["passenger_count"]
volume_entry_test["vehicle_model_avg"] = volume_entry_test["vehicle_model"] / volume_entry_test["volume"]
volume_entry_test = volume_entry_test.fillna(0)

volume_exit_test = volume_test.loc[(volume_test['tollgate_id']=="1S") & (volume_test["direction"]=="entry"), ["time", "vehicle_model"]]
volume_exit_test["volume"] = 1
volume_exit_test["cargo_count"] = volume_test["vehicle_type"].apply(lambda x: 1 if x == "cargo" else 0)
volume_exit_test["passenger_count"] = volume_test["vehicle_type"].apply(lambda x: 1 if x == "passenger" else 0)
volume_exit_test["no_count"] = volume_test["vehicle_type"].apply(lambda x: 1 if x == "No" else 0)
volume_exit_test["cargo_model"] = volume_exit_test["cargo_count"] * volume_exit_test["vehicle_model"]
volume_exit_test["passenger_model"] = volume_exit_test["passenger_count"] * volume_exit_test["vehicle_model"]
volume_exit_test.index = volume_exit_test["time"]
del volume_exit_test["time"]
volume_exit_test = volume_exit_test.resample("20T").sum()
volume_exit_test["cargo_model_avg"] = volume_exit_test["cargo_model"] / volume_exit_test["cargo_count"]
volume_exit_test["passenger_model_avg"] = volume_exit_test["passenger_model"] / volume_exit_test["passenger_count"]
volume_exit_test["vehicle_model_avg"] = volume_exit_test["vehicle_model"] / volume_exit_test["volume"]
volume_exit_test = volume_exit_test.fillna(0)

In [109]:
volume_entry_test

Unnamed: 0_level_0,vehicle_model,volume,cargo_count,passenger_count,no_count,cargo_model,passenger_model,cargo_model_avg,passenger_model_avg,vehicle_model_avg
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-09-19 00:00:00,22.0,13.0,3.0,10.0,0.0,12.0,10.0,4.000000,1.000000,1.692308
2016-09-19 00:20:00,6.0,6.0,0.0,6.0,0.0,0.0,6.0,0.000000,1.000000,1.000000
2016-09-19 00:40:00,17.0,9.0,2.0,7.0,0.0,7.0,10.0,3.500000,1.428571,1.888889
2016-09-19 01:00:00,22.0,10.0,3.0,7.0,0.0,15.0,7.0,5.000000,1.000000,2.200000
2016-09-19 01:20:00,20.0,14.0,3.0,10.0,1.0,10.0,10.0,3.333333,1.000000,1.428571
2016-09-19 01:40:00,14.0,10.0,1.0,9.0,0.0,5.0,9.0,5.000000,1.000000,1.400000
2016-09-19 02:00:00,19.0,7.0,3.0,4.0,0.0,12.0,7.0,4.000000,1.750000,2.714286
2016-09-19 02:20:00,23.0,10.0,7.0,3.0,0.0,20.0,3.0,2.857143,1.000000,2.300000
2016-09-19 02:40:00,20.0,6.0,5.0,1.0,0.0,16.0,4.0,3.200000,4.000000,3.333333
2016-09-19 03:00:00,19.0,9.0,4.0,5.0,0.0,14.0,5.0,3.500000,1.000000,2.111111


In [114]:
# 转换预测集，将预测集训练成和预测集第一个模型的格式相同（entry方向）
# se_temp = pd.Series()
#         for k in range(6):
#             se_temp = se_temp.append(volume_all_entry.iloc[i + k, :])
#         se_temp = se_temp.append(pd.Series(volume_entry.iloc[i + 6 + j, 0]))
#         se_temp.index = new_index
#         se_temp.name = str(volume_entry.iloc[i, :].index)
#         train_df = train_df.append(se_temp)
test_entry_df = pd.DataFrame()
i = 0
while i < len(volume_entry_test) - 5:
#     df_temp = volume_entry_test.iloc[i:i + 6, 0].T
#     df_temp.index = range(6)
#     df_temp.name = volume_entry_test.index[i]
#     test_entry_df = test_entry_df.append(df_temp)
    se_temp = pd.Series()
    for k in range(6):
        se_temp = se_temp.append(volume_entry_test.iloc[i + k, :])
    se_temp.index = new_index[:-1]
    se_temp.name = volume_entry_test.index[i]
    test_entry_df = test_entry_df.append(se_temp)
    i = i + 6

predict_test_entry = pd.DataFrame()
for i in range(6):
    test_y = models_entry[i].predict(test_entry_df)
    predict_test_entry[i] = test_y
predict_test_entry

Unnamed: 0,0,1,2,3,4,5
0,8.943952,10.795692,13.889421,15.775883,19.456906,19.881573
1,10.059445,12.674009,13.630082,12.549288,18.247452,21.155938
2,13.045941,14.011648,24.217503,29.212668,27.410039,30.532368
3,40.804474,45.408814,42.296246,40.568806,42.935512,42.455880
4,46.967083,45.590510,45.388459,45.480782,43.371761,44.742197
5,36.833993,37.207814,34.974916,36.837064,36.395174,35.887015
6,53.348900,50.756499,49.639424,47.582438,47.097479,45.361158
7,45.750596,45.240058,46.880816,44.711315,45.725399,44.874567
8,25.414466,26.585184,25.680466,23.746784,23.903188,22.923752
9,25.106788,27.134283,28.305288,28.756075,29.036104,26.549723


In [116]:
# 转换预测集，将预测集训练成和预测集第一个模型的格式相同（exit方向）
test_exit_df = pd.DataFrame()
i = 0
while i < len(volume_exit_test) - 5:
#     df_temp = volume_exit_test.iloc[i:i + 6, 0].T
#     df_temp.index = range(6)
#     df_temp.name = volume_exit_test.index[i]
#     test_exit_df = test_exit_df.append(df_temp)
    se_temp = pd.Series()
    for k in range(6):
        se_temp = se_temp.append(volume_exit_test.iloc[i + k, :])
    se_temp.index = new_index[:-1]
    se_temp.name = volume_exit_test.index[i]
    test_exit_df = test_exit_df.append(se_temp)
    i = i + 6

predict_test_exit = pd.DataFrame()
for i in range(6):
    test_y = models_exit[i].predict(test_exit_df)
    predict_test_exit[i + 6] = test_y
predict_test_exit

Unnamed: 0,6,7,8,9,10,11
0,55.547774,22.185411,31.948173,22.779954,52.815177,53.494851
1,31.494431,4.592343,8.127071,11.591925,8.133314,14.456936
2,31.494431,4.592343,8.127071,11.591925,8.133314,14.456936
3,31.494431,4.592343,8.127071,11.591925,8.133314,14.456936
4,42.635392,0.541804,34.684789,21.892374,19.689519,26.679867
5,43.049443,30.909601,21.543476,51.614100,40.058943,17.318972
6,31.494431,4.592343,8.127071,11.591925,8.133314,14.456936
7,31.494431,4.592343,8.127071,11.591925,8.133314,14.456936
8,31.494431,4.592343,8.127071,11.591925,8.133314,14.456936
9,31.494431,4.592343,8.127071,11.591925,8.133314,14.456936


In [72]:
def transform_predict(predict_original, direction, tollgate_id):
    result = pd.DataFrame()
    for i in range(len(predict_original)):
        time_basic = predict_original.index[i]
        for j in range(6, 12, 1):
            time_window = "[" + str(time_basic + DateOffset(minutes=j * 20)) + "," + str(time_basic + DateOffset(minutes=(j + 1) * 20)) + ")"
            series = pd.Series({"tollage_id": tollgate_id, 
                                "time_window": time_window, 
                                "direction": direction, 
                                "volume": predict_original.iloc[i, j]})
            series.name = i + j - 6
            result = result.append(series)
    return result

print transform_predict(predict_test_entry, "entry", "1S")

   direction                                time_window tollage_id     volume
0      entry  [2016-10-18 08:00:00,2016-10-18 08:20:00)         1S  45.806886
1      entry  [2016-10-18 08:20:00,2016-10-18 08:40:00)         1S  52.958557
2      entry  [2016-10-18 08:40:00,2016-10-18 09:00:00)         1S  57.946432
3      entry  [2016-10-18 09:00:00,2016-10-18 09:20:00)         1S  61.429576
4      entry  [2016-10-18 09:20:00,2016-10-18 09:40:00)         1S  65.694853
5      entry  [2016-10-18 09:40:00,2016-10-18 10:00:00)         1S  77.994429
1      entry  [2016-10-18 17:00:00,2016-10-18 17:20:00)         1S  49.460976
2      entry  [2016-10-18 17:20:00,2016-10-18 17:40:00)         1S  48.004843
3      entry  [2016-10-18 17:40:00,2016-10-18 18:00:00)         1S  47.042836
4      entry  [2016-10-18 18:00:00,2016-10-18 18:20:00)         1S  43.703965
5      entry  [2016-10-18 18:20:00,2016-10-18 18:40:00)         1S  43.210875
6      entry  [2016-10-18 18:40:00,2016-10-18 19:00:00)         