In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import warnings
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingRegressor
from pandas.tseries.offsets import *
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
# description of the feature:
# Traffic Volume through the Tollgates
# time           datatime        the time when a vehicle passes the tollgate
# tollgate_id    string          ID of the tollgate
# direction      string           0:entry, 1:exit
# vehicle_model  int             this number ranges from 0 to 7, which indicates the capacity of the vehicle(bigger the higher)
# has_etc        string          does the vehicle use ETC (Electronic Toll Collection) device? 0: No, 1: Yes
# vehicle_type   string          vehicle type: 0-passenger vehicle, 1-cargo vehicle
volume_df = pd.read_csv("volume(table 6)_training.csv")
volume_df.head()

Unnamed: 0,time,tollgate_id,direction,vehicle_model,has_etc,vehicle_type
0,2016-09-19 23:09:25,2,0,1,0,
1,2016-09-19 23:11:53,2,0,1,0,
2,2016-09-19 23:13:54,2,0,1,0,
3,2016-09-19 23:17:48,1,0,1,1,
4,2016-09-19 23:16:07,2,0,1,0,


In [3]:
# 替换所有有标签含义的数字
volume_df['tollgate_id'] = volume_df['tollgate_id'].replace({1:"1S", 2:"2S", 3:"3S"})
volume_df['direction'] = volume_df['direction'].replace({0:"entry", 1:"exit"})
volume_df['has_etc'] = volume_df['has_etc'].replace({0:"No", 1:"Yes"})
volume_df['vehicle_type'] = volume_df['vehicle_type'].replace({0:"passenger", 1:"carge"})
volume_df['time'] = volume_df['time'].apply(lambda x: pd.Timestamp(x))
volume_df.head()

Unnamed: 0,time,tollgate_id,direction,vehicle_model,has_etc,vehicle_type
0,2016-09-19 23:09:25,2S,entry,1,No,
1,2016-09-19 23:11:53,2S,entry,1,No,
2,2016-09-19 23:13:54,2S,entry,1,No,
3,2016-09-19 23:17:48,1S,entry,1,Yes,
4,2016-09-19 23:16:07,2S,entry,1,No,


In [4]:
# 承载量：1-默认客车，2-默认货车，3-默认货车，4-默认客车
# 承载量大于等于5的为货运汽车，所有承载量为0的车都类型不明
volume_df = volume_df.sort_values(by="vehicle_model")
vehicle_model0 = volume_df[volume_df['vehicle_model'] == 0].fillna("No")
vehicle_model1 = volume_df[volume_df['vehicle_model'] == 1].fillna("passenger")
vehicle_model2 = volume_df[volume_df['vehicle_model'] == 2].fillna("carge")
vehicle_model3 = volume_df[volume_df['vehicle_model'] == 3].fillna("carge")
vehicle_model4 = volume_df[volume_df['vehicle_model'] == 4].fillna("passenger")
vehicle_model5 = volume_df[volume_df['vehicle_model'] >= 5].fillna("carge")
volume_df = pd.concat([vehicle_model0, vehicle_model1, vehicle_model2, vehicle_model3, vehicle_model4, vehicle_model5])
volume_df[volume_df.isnull()].count()

time             0
tollgate_id      0
direction        0
vehicle_model    0
has_etc          0
vehicle_type     0
dtype: int64

In [5]:
# 创建之和流量，20分钟跨度有关系的训练集
# volume_time_entry = pd.Series(data = 1, index=volume_df.loc[(volume_df['tollgate_id']=="1S") & (volume_df['direction']=="entry"), :]['time'])
# volume_time_entry = volume_time_entry.resample("20T").sum()
# volume_entry = pd.DataFrame(index=volume_time_entry.index)
# volume_entry['volume'] = np.log(volume_time_entry)

# volume_time_exit = pd.Series(data = 1, index=volume_df.loc[(volume_df['tollgate_id']=="1S") & (volume_df['direction']=="exit"), :]['time'])
# volume_time_exit = volume_time_exit.resample("20T").sum()
# volume_exit = pd.DataFrame(index=volume_time_exit.index)
# volume_exit['volume'] = np.log(volume_time_exit)
# volume_exit
volume_all_entry = volume_df.loc[(volume_df['tollgate_id']=='1S') & (volume_df['direction']=='entry'), ["time", "vehicle_model"]]
volume_all_entry['volume'] = 1
volume_all_entry['carge_count'] = volume_df['vehicle_type'].apply(lambda x: 1 if x == "carge" else 0)
volume_all_entry['passenger_count'] = volume_df['vehicle_type'].apply(lambda x: 1 if x == "passenger" else 0)
volume_all_entry['no_count'] = volume_df['vehicle_type'].apply(lambda x: 1 if x == "No" else 0)
volume_all_entry["cargo_model"] = volume_all_entry["cargo_count"] * volume_all_entry["vehicle_model"]
volume_all_entry["passenger_model"] = volume_all_entry["passenger_count"] * volume_all_entry["vehicle_model"]
volume_all_entry.index = volume_all_entry["time"]
del volume_all_entry["time"]
volume_all_entry = volume_all_entry.resample("20T").sum()
volume_all_entry["cargo_model_avg"] = volume_all_entry["cargo_model"] / volume_all_entry["cargo_count"]
volume_all_entry["passenger_model_avg"] = volume_all_entry["passenger_model"] / volume_all_entry["passenger_count"]
volume_all_entry["vehicle_model_avg"] = volume_all_entry["vehicle_model"] / volume_all_entry["volume"]
volume_all_entry = volume_all_entry.fillna(0)

volume_all_exit = volume_df.loc[(volume_df['tollgate_id']=='1S') & (volume_df['direction']=='exit'), ["time", "vehicle_model"]]
volume_all_exit["volume"] = 1
volume_all_exit["carge_count"] = volume_df['vehicle_type'].apply(lambda x: 1 if x == "carge" else 0)
volume_all_exit["passenger_count"] = volume_df['vehicle_type'].apply(lambda x: 1 if x == "passenger" else 0)
volume_all_exit["no_count"] = volume_df['vehicle_type'].apply(lambda x: 1 if x == "No" else 0)
volume_all_exit["cargo_model"] = volume_all_exit["cargo_count"] * volume_all_exit["vehicle_model"]
volume_all_exit["passenger_model"] = volume_all_exit["passenger_count"] * volume_all_exit["vehicle_model"]
volume_all_exit.index = volume_all_exit["time"]
del volume_all_exit["time"]
volume_all_exit = volume_all_exit.resample("20T").sum()
volume_all_exit["cargo_model_avg"] = volume_all_exit["cargo_model"] / volume_all_exit["cargo_count"]
volume_all_exit["passenger_model_avg"] = volume_all_exit["passenger_model"] / volume_all_exit["passenger_count"]
volume_all_exit["vehicle_model_avg"] = volume_all_exit["vehicle_model"] / volume_all_exit["volume"]
volume_all_exit = volume_all_exit.fillna(0)

volume_all_exit

Unnamed: 0_level_0,vehicle_model,volume,cargo_count,passenger_count,no_count,cargo_model,passenger_model,cargo_model_avg,passenger_model_avg,vehicle_model_avg
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-09-19 00:00:00,181.0,140.0,31.0,109.0,0.0,63.0,118.0,2.032258,1.082569,1.292857
2016-09-19 00:20:00,97.0,56.0,16.0,40.0,0.0,39.0,58.0,2.437500,1.450000,1.732143
2016-09-19 00:40:00,46.0,31.0,11.0,20.0,0.0,21.0,25.0,1.909091,1.250000,1.483871
2016-09-19 01:00:00,5.0,4.0,2.0,2.0,0.0,3.0,2.0,1.500000,1.000000,1.250000
2016-09-19 01:20:00,25.0,14.0,6.0,8.0,0.0,17.0,8.0,2.833333,1.000000,1.785714
2016-09-19 01:40:00,10.0,7.0,3.0,4.0,0.0,6.0,4.0,2.000000,1.000000,1.428571
2016-09-19 02:00:00,16.0,7.0,5.0,2.0,0.0,14.0,2.0,2.800000,1.000000,2.285714
2016-09-19 02:20:00,15.0,7.0,5.0,2.0,0.0,13.0,2.0,2.600000,1.000000,2.142857
2016-09-19 02:40:00,15.0,10.0,4.0,6.0,0.0,9.0,6.0,2.250000,1.000000,1.500000
2016-09-19 03:00:00,14.0,10.0,6.0,4.0,0.0,10.0,4.0,1.666667,1.000000,1.400000


In [22]:
# 创建训练集，总的要求就是以前两个小时数据为训练集，用迭代式预测方法
# 例如8点-10点的数据预测10点20,8点-10点20预测10点40……，每一次预测使用的都是独立的（可能模型一样）的模型
# 现在开始构建训练集
# 第一个训练集特征是所有两个小时（以20分钟为一个单位）的数据，因变量是该两小时之后20分钟的流量
# 第二个训练集，特征是所有两个小时又20分钟（以20分钟为一个单位）的数据，因变量是该两个小时之后20分钟的流量
# 以此类推训练12个GBDT模型，其中entry 6个，exit 6个（没调参）
old_index = volume_all_entry.columns
new_index = []
for i in range(6):
    new_index += [item + "%d" % (i) for item in old_index]
new_index.append("y")
models_entry = []
for j in range(6):
    train_df = pd.DataFrame()
    for i in range(len(volume_all_entry) - 6 - j):
#         df_temp = volume_entry.iloc[i:i + 6, 0]
#         df_temp = df_temp.append(pd.Series(volume_entry.iloc[i + 6 + j, 0]))
#         df_temp.index = range(7)
#         train_df = train_df.append(df_temp, ignore_index=True)
        se_temp = pd.Series()
        for k in range(6):
            se_temp = se_temp.append(volume_all_entry.iloc[i + k, :])
        se_temp = se_temp.append(pd.Series(volume_all_entry.iloc[i + 6 + j, :]["volume"]))
        se_temp.index = new_index
        se_temp.name = str(volume_all_entry.iloc[i, :].index)
        train_df = train_df.append(se_temp)
    train_X = train_df.iloc[:, :-1].fillna(0)
    train_y = train_df["y"].fillna(0)
    model = GradientBoostingRegressor()
    model.fit(train_X, train_y)
    models_entry.append(model)

models_exit = []
for j in range(6):
    train_df = pd.DataFrame()
    for i in range(len(volume_all_exit) - 6 - j):
#         df_temp = volume_exit.iloc[i:i + 6, 0]
#         df_temp = df_temp.append(pd.Series(volume_exit.iloc[i + 6 + j, 0]))
#         df_temp.index = range(7)
#         train_df = train_df.append(df_temp, ignore_index=True)
        se_temp = pd.Series()
        for k in range(6):
            se_temp = se_temp.append(volume_all_exit.iloc[i + k, :])
        se_temp = se_temp.append(pd.Series(volume_all_exit.iloc[i + 6 + j, :]["volume"]))
        se_temp.index = new_index
        se_temp.name = str(volume_all_exit.iloc[i, :].index)
        train_df = train_df.append(se_temp)
    train_X = train_df.iloc[:, :-1].fillna(0)
    train_y = train_df["y"].fillna(0)
    model = GradientBoostingRegressor()
    model.fit(train_X, train_y)
    models_exit.append(model)

In [8]:
volume_test = pd.read_csv("../testing_phase1/volume(table 6)_test1.csv")
volume_test.head()

Unnamed: 0,time,tollgate_id,direction,vehicle_model,has_etc,vehicle_type
0,2016-10-18 07:59:04,2,0,1,1,
1,2016-10-18 07:59:31,2,0,1,1,
2,2016-10-18 07:59:50,2,0,1,1,
3,2016-10-18 07:32:33,3,0,1,1,
4,2016-10-18 07:32:46,3,0,1,1,


In [9]:
# 替换所有有标签含义的数字
volume_test['tollgate_id'] = volume_test['tollgate_id'].replace({1:"1S", 2:"2S", 3:"3S"})
volume_test['direction'] = volume_test['direction'].replace({0:"entry", 1:"exit"})
volume_test['has_etc'] = volume_test['has_etc'].replace({0:"No", 1:"Yes"})
volume_test['vehicle_type'] = volume_test['vehicle_type'].replace({0:"passenger", 1:"carge"})
volume_test['time'] = volume_test['time'].apply(lambda x: pd.Timestamp(x))
volume_test.head()

Unnamed: 0,time,tollgate_id,direction,vehicle_model,has_etc,vehicle_type
0,2016-10-18 07:59:04,2S,entry,1,Yes,
1,2016-10-18 07:59:31,2S,entry,1,Yes,
2,2016-10-18 07:59:50,2S,entry,1,Yes,
3,2016-10-18 07:32:33,3S,entry,1,Yes,
4,2016-10-18 07:32:46,3S,entry,1,Yes,


In [10]:
# 承载量：1-默认客车，2-默认货车，3-默认货车，4-默认客车
# 承载量大于等于5的为货运汽车，所有承载量为0的车都类型不明
volume_test = volume_test.sort_values(by="vehicle_model")
vehicle_model0 = volume_test[volume_test['vehicle_model'] == 0].fillna("No")
vehicle_model1 = volume_test[volume_test['vehicle_model'] == 1].fillna("passenger")
vehicle_model2 = volume_test[volume_test['vehicle_model'] == 2].fillna("carge")
vehicle_model3 = volume_test[volume_test['vehicle_model'] == 3].fillna("carge")
vehicle_model4 = volume_test[volume_test['vehicle_model'] == 4].fillna("passenger")
vehicle_model5 = volume_test[volume_test['vehicle_model'] >= 5].fillna("carge")
volume_test = pd.concat([vehicle_model0, vehicle_model1, vehicle_model2, vehicle_model3, vehicle_model4, vehicle_model5])
volume_test[volume_test.isnull()].count()

time             0
tollgate_id      0
direction        0
vehicle_model    0
has_etc          0
vehicle_type     0
dtype: int64

In [25]:
# 创建之和流量，20分钟跨度有关系的预测集
# volume_time_entry_test = pd.Series(data = 1, 
#                                    index=volume_test.loc[(volume_test['tollgate_id']=="2S") & (volume_test['direction']=="entry"), :]['time'])
# volume_time_entry_test = volume_time_entry_test.resample("20T").sum()
# volume_entry_test = pd.DataFrame(index=volume_time_entry_test.index)
# volume_entry_test['volume'] = volume_time_entry_test
# volume_entry_test = volume_entry_test.dropna()

# volume_time_exit_test = pd.Series(data = 1, 
#                                   index=volume_test.loc[(volume_test['tollgate_id']=="2S") & (volume_test['direction']=="exit"), :]['time'])
# volume_time_exit_test = volume_time_exit_test.resample("20T").sum()
# volume_exit_test = pd.DataFrame(index=volume_time_exit_test.index)
# volume_exit_test['volume'] = volume_time_exit_test
# volume_exit_test = volume_exit_test.dropna()
volume_entry_test = volume_test.loc[(volume_test['tollgate_id']=="1S") & (volume_test["direction"]=="entry"), ["time", "vehicle_model"]]
volume_entry_test["volume"] = 1
volume_entry_test["carge_count"] = volume_test["vehicle_type"].apply(lambda x: 1 if x == "carge" else 0)
volume_entry_test["passenger_count"] = volume_test["vehicle_type"].apply(lambda x: 1 if x == "passenger" else 0)
volume_entry_test["no_count"] = volume_test["vehicle_type"].apply(lambda x: 1 if x == "No" else 0)
volume_entry_test["carge_model"] = volume_entry_test["carge_count"] * volume_entry_test["vehicle_model"]
volume_entry_test["passenger_model"] = volume_entry_test["passenger_count"] * volume_entry_test["vehicle_model"]
volume_entry_test.index = volume_entry_test["time"]
del volume_entry_test["time"]
volume_entry_test = volume_entry_test.resample("20T").sum()
volume_entry_test = volume_entry_test.dropna()
volume_entry_test["carge_model_avg"] = volume_entry_test["carge_model"] / volume_entry_test["carge_count"]
volume_entry_test["passenger_model_avg"] = volume_entry_test["passenger_model"] / volume_entry_test["passenger_count"]
volume_entry_test["vehicle_model_avg"] = volume_entry_test["vehicle_model"] / volume_entry_test["volume"]
volume_entry_test = volume_entry_test.fillna(0)

volume_exit_test = volume_test.loc[(volume_test['tollgate_id']=="1S") & (volume_test["direction"]=="exit"), ["time", "vehicle_model"]]
volume_exit_test["volume"] = 1
volume_exit_test["carge_count"] = volume_test["vehicle_type"].apply(lambda x: 1 if x == "carge" else 0)
volume_exit_test["passenger_count"] = volume_test["vehicle_type"].apply(lambda x: 1 if x == "passenger" else 0)
volume_exit_test["no_count"] = volume_test["vehicle_type"].apply(lambda x: 1 if x == "No" else 0)
volume_exit_test["carge_model"] = volume_exit_test["carge_count"] * volume_exit_test["vehicle_model"]
volume_exit_test["passenger_model"] = volume_exit_test["passenger_count"] * volume_exit_test["vehicle_model"]
volume_exit_test.index = volume_exit_test["time"]
del volume_exit_test["time"]
volume_exit_test = volume_exit_test.resample("20T").sum()
volume_exit_test = volume_exit_test.dropna()
volume_exit_test["carge_model_avg"] = volume_exit_test["carge_model"] / volume_exit_test["carge_count"]
volume_exit_test["passenger_model_avg"] = volume_exit_test["passenger_model"] / volume_exit_test["passenger_count"]
volume_exit_test["vehicle_model_avg"] = volume_exit_test["vehicle_model"] / volume_exit_test["volume"]
volume_exit_test = volume_exit_test.fillna(0)

In [30]:
volume_entry_test

Unnamed: 0_level_0,vehicle_model,volume,carge_count,passenger_count,no_count,carge_model,passenger_model,carge_model_avg,passenger_model_avg,vehicle_model_avg
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-10-18 06:00:00,13.0,13.0,0.0,13.0,0.0,0.0,13.0,0.000000,1.000000,1.000000
2016-10-18 06:20:00,21.0,17.0,4.0,13.0,0.0,8.0,13.0,2.000000,1.000000,1.235294
2016-10-18 06:40:00,28.0,21.0,5.0,16.0,0.0,12.0,16.0,2.400000,1.000000,1.333333
2016-10-18 07:00:00,44.0,31.0,7.0,24.0,0.0,14.0,30.0,2.000000,1.250000,1.419355
2016-10-18 07:20:00,31.0,28.0,3.0,25.0,0.0,6.0,25.0,2.000000,1.000000,1.107143
2016-10-18 07:40:00,54.0,47.0,6.0,41.0,0.0,13.0,41.0,2.166667,1.000000,1.148936
2016-10-18 15:00:00,56.0,52.0,4.0,48.0,0.0,8.0,48.0,2.000000,1.000000,1.076923
2016-10-18 15:20:00,40.0,38.0,2.0,36.0,0.0,4.0,36.0,2.000000,1.000000,1.052632
2016-10-18 15:40:00,35.0,35.0,0.0,35.0,0.0,0.0,35.0,0.000000,1.000000,1.000000
2016-10-18 16:00:00,59.0,57.0,2.0,55.0,0.0,4.0,55.0,2.000000,1.000000,1.035088


In [31]:
# 转换预测集，将预测集训练成和预测集第一个模型的格式相同（entry方向）
# se_temp = pd.Series()
#         for k in range(6):
#             se_temp = se_temp.append(volume_all_entry.iloc[i + k, :])
#         se_temp = se_temp.append(pd.Series(volume_entry.iloc[i + 6 + j, 0]))
#         se_temp.index = new_index
#         se_temp.name = str(volume_entry.iloc[i, :].index)
#         train_df = train_df.append(se_temp)
test_entry_df = pd.DataFrame()
i = 0
while i < len(volume_entry_test) - 5:
#     df_temp = volume_entry_test.iloc[i:i + 6, 0].T
#     df_temp.index = range(6)
#     df_temp.name = volume_entry_test.index[i]
#     test_entry_df = test_entry_df.append(df_temp)
    se_temp = pd.Series()
    for k in range(6):
        se_temp = se_temp.append(volume_entry_test.iloc[i + k, :])
    se_temp.index = new_index[:-1]
    se_temp.name = volume_entry_test.index[i]
    test_entry_df = test_entry_df.append(se_temp)
    i = i + 6

predict_test_entry = pd.DataFrame()
for i in range(6):
    test_y = models_entry[i].predict(test_entry_df)
    predict_test_entry[i] = test_y
predict_test_entry.index = test_entry_df.index
predict_test_entry

Unnamed: 0,0,1,2,3,4,5
2016-10-18 06:00:00,42.198929,47.731464,45.547857,48.998597,45.9128,46.361487
2016-10-18 15:00:00,50.063449,45.398834,43.618682,43.022126,43.363674,37.567023
2016-10-19 06:00:00,39.272157,43.316087,45.036237,46.333962,43.617404,50.034161
2016-10-19 15:00:00,40.779073,39.519213,39.960858,38.215779,36.057685,38.604606
2016-10-20 06:00:00,37.682464,38.303025,38.846318,37.412759,43.519615,41.95053
2016-10-20 15:00:00,39.840045,36.545085,36.174826,35.942027,34.952565,33.660654
2016-10-21 06:00:00,42.204678,42.501872,45.526669,46.115939,46.035806,43.920121
2016-10-21 15:00:00,56.587748,52.349114,50.125544,48.192973,44.261718,41.412782
2016-10-22 06:00:00,36.079005,38.093565,39.396312,45.108319,38.682095,45.469108
2016-10-22 15:00:00,53.73984,50.869219,47.880927,45.814197,46.231233,44.226184


In [32]:
# 转换预测集，将预测集训练成和预测集第一个模型的格式相同（exit方向）
test_exit_df = pd.DataFrame()
i = 0
while i < len(volume_exit_test) - 5:
#     df_temp = volume_exit_test.iloc[i:i + 6, 0].T
#     df_temp.index = range(6)
#     df_temp.name = volume_exit_test.index[i]
#     test_exit_df = test_exit_df.append(df_temp)
    se_temp = pd.Series()
    for k in range(6):
        se_temp = se_temp.append(volume_exit_test.iloc[i + k, :])
    se_temp.index = new_index[:-1]
    se_temp.name = volume_exit_test.index[i]
    test_exit_df = test_exit_df.append(se_temp)
    i = i + 6

predict_test_exit = pd.DataFrame()
for i in range(6):
    test_y = models_exit[i].predict(test_exit_df)
    predict_test_exit[i + 6] = test_y
predict_test_exit.index = test_exit_df.index
predict_test_exit

Unnamed: 0,6,7,8,9,10,11
2016-10-18 06:00:00,106.882572,115.110885,114.905444,119.686593,116.678623,116.090787
2016-10-18 15:00:00,100.694801,100.254568,98.786356,90.804477,86.277552,88.319049
2016-10-19 06:00:00,101.264903,108.386817,108.071692,118.030173,110.520891,109.056746
2016-10-19 15:00:00,85.763441,78.584154,77.23049,74.050966,66.700992,56.802325
2016-10-20 06:00:00,101.737046,111.244933,117.551241,115.814593,111.008602,100.601402
2016-10-20 15:00:00,100.944119,99.261909,93.450259,83.602863,84.941295,83.974078
2016-10-21 06:00:00,105.972998,101.717525,103.191332,114.916207,108.08264,95.90405
2016-10-21 15:00:00,99.159808,98.159019,83.612844,81.424298,69.341833,66.892465
2016-10-22 06:00:00,86.795713,92.72916,95.868054,103.640067,113.609076,100.721501
2016-10-22 15:00:00,80.368514,74.618847,67.497068,65.992392,55.829948,56.260255


In [72]:
def transform_predict(predict_original, direction, tollgate_id):
    result = pd.DataFrame()
    for i in range(len(predict_original)):
        time_basic = predict_original.index[i]
        for j in range(6, 12, 1):
            time_window = "[" + str(time_basic + DateOffset(minutes=j * 20)) + "," + str(time_basic + DateOffset(minutes=(j + 1) * 20)) + ")"
            series = pd.Series({"tollage_id": tollgate_id, 
                                "time_window": time_window, 
                                "direction": direction, 
                                "volume": predict_original.iloc[i, j]})
            series.name = i + j - 6
            result = result.append(series)
    return result

print transform_predict(predict_test_entry, "entry", "1S")

   direction                                time_window tollage_id     volume
0      entry  [2016-10-18 08:00:00,2016-10-18 08:20:00)         1S  45.806886
1      entry  [2016-10-18 08:20:00,2016-10-18 08:40:00)         1S  52.958557
2      entry  [2016-10-18 08:40:00,2016-10-18 09:00:00)         1S  57.946432
3      entry  [2016-10-18 09:00:00,2016-10-18 09:20:00)         1S  61.429576
4      entry  [2016-10-18 09:20:00,2016-10-18 09:40:00)         1S  65.694853
5      entry  [2016-10-18 09:40:00,2016-10-18 10:00:00)         1S  77.994429
1      entry  [2016-10-18 17:00:00,2016-10-18 17:20:00)         1S  49.460976
2      entry  [2016-10-18 17:20:00,2016-10-18 17:40:00)         1S  48.004843
3      entry  [2016-10-18 17:40:00,2016-10-18 18:00:00)         1S  47.042836
4      entry  [2016-10-18 18:00:00,2016-10-18 18:20:00)         1S  43.703965
5      entry  [2016-10-18 18:20:00,2016-10-18 18:40:00)         1S  43.210875
6      entry  [2016-10-18 18:40:00,2016-10-18 19:00:00)         