In [1]:
pwd

'e:\\two_stage_model\\data_processing\\Speed_Processing'

In [2]:
cd ..

e:\two_stage_model\data_processing


In [3]:
import pandas as pd
import numpy as np
import pickle
from tqdm import tqdm

In [4]:
# Hyper-parameters during Data Processing 

country_name = 'TSMO'
model_path = 'E:/two_stage_model'
slow_down_speed_upstream_range_mile = 0.3 # check if the range mile is in \processed_data\upstream_rage_dict

In [5]:
slow_down_speed_upstream_range_mile = float("{:.1f}".format(slow_down_speed_upstream_range_mile))

## Part 1. Read Files

In [6]:
# TMC Speed Data
spd_record_all = pickle.load(open(f"{model_path}/data/{country_name}/processed_data/{country_name}_df_spd_tmc_5min_all_from_1_min.pkl", "rb"))  

In [7]:
speed_available_tmc_list = list(spd_record_all.columns)

In [8]:
spd_record_all['110+04483']

measurement_tstamp
2022-02-14 05:30:00    67.0
2022-02-14 05:35:00    71.0
2022-02-14 05:40:00    68.0
2022-02-14 05:45:00    66.0
2022-02-14 05:50:00    69.0
                       ... 
2023-02-10 20:35:00    67.0
2023-02-10 20:40:00    70.0
2023-02-10 20:45:00    67.0
2023-02-10 20:50:00    64.0
2023-02-10 20:55:00    67.0
Name: 110+04483, Length: 48360, dtype: float64

In [9]:
spd_record_all['110+04483']

measurement_tstamp
2022-02-14 05:30:00    67.0
2022-02-14 05:35:00    71.0
2022-02-14 05:40:00    68.0
2022-02-14 05:45:00    66.0
2022-02-14 05:50:00    69.0
                       ... 
2023-02-10 20:35:00    67.0
2023-02-10 20:40:00    70.0
2023-02-10 20:45:00    67.0
2023-02-10 20:50:00    64.0
2023-02-10 20:55:00    67.0
Name: 110+04483, Length: 48360, dtype: float64

In [10]:
spd_record_all['110P04483']

measurement_tstamp
2022-02-14 05:30:00    68.00
2022-02-14 05:35:00    63.00
2022-02-14 05:40:00    64.39
2022-02-14 05:45:00    61.00
2022-02-14 05:50:00    59.00
                       ...  
2023-02-10 20:35:00    65.00
2023-02-10 20:40:00    60.00
2023-02-10 20:45:00    65.00
2023-02-10 20:50:00    57.00
2023-02-10 20:55:00    65.00
Name: 110P04483, Length: 48360, dtype: float64

In [11]:
difference = spd_record_all['110+04483'] - spd_record_all['110P04483']

absolute_difference = difference.abs()

mean_absolute_difference = absolute_difference.mean()
print(mean_absolute_difference)

8.281341397849461


## Part 2. Read Upstream Dict
-be careful to check if we have ran the dict for the specific length, other wise add value in Part 4 in \data_processing\geo_processing\generate_up_down_stream.ipynb

In [12]:
upstream_k_mile_dict =  pickle.load(open(f"{model_path}/data/{country_name}/processed_data/upstream_rage_dict/{country_name}_upstream_{slow_down_speed_upstream_range_mile}_mile.pkl", "rb"))

## Part 3. Generate Slowdown Speed

In [15]:
slowdown_speed_list = list(upstream_k_mile_dict.keys())
selected_link_spd = spd_record_all.loc[:, spd_record_all.columns.isin(slowdown_speed_list)]

In [16]:
df_slowdown_speed = pd.DataFrame(np.zeros(spd_record_all.shape), index=spd_record_all.index, columns=spd_record_all.columns)

In [17]:
no_upstream_selected_link_list = []
for tmc_id in tqdm(slowdown_speed_list):
    affected_link_list = upstream_k_mile_dict[tmc_id]
    if len(affected_link_list)==0:
        no_upstream_selected_link_list.append(tmc_id)
    affected_link_speed = spd_record_all.loc[:, spd_record_all.columns.isin(affected_link_list)]
    upstream_link_speed_average_list = list(affected_link_speed.min(axis=1))
    analysis_link_speed_list = list(spd_record_all[tmc_id])
    slow_down_speed_list =  [upstream_link_speed_average - analysis_link_speed for upstream_link_speed_average, analysis_link_speed in zip(upstream_link_speed_average_list, analysis_link_speed_list)]
    df_slowdown_speed[tmc_id] = list(np.maximum(np.array(slow_down_speed_list), 0))

100%|██████████| 1431/1431 [00:23<00:00, 60.53it/s]


In [18]:
no_upstream_selected_link_list

['110+05136',
 '110+09550',
 '110+09566',
 '110+11769',
 '110+12401',
 '110+12702',
 '110+12741',
 '110+12751',
 '110+18370',
 '110+52787',
 '110+52796',
 '110+52850',
 '110+52895',
 '110+52899',
 '110+53771',
 '110+53774',
 '110+53784',
 '110+53829',
 '110+53855',
 '110+53864',
 '110+53867',
 '110+53871',
 '110+55328',
 '110+55667',
 '110-04521',
 '110-04565',
 '110-06173',
 '110-06183',
 '110-09723',
 '110-12365',
 '110-12712',
 '110-12718',
 '110-12720',
 '110-13754',
 '110-52789',
 '110-52821',
 '110-52850',
 '110-52901',
 '110-53770',
 '110-53828',
 '110-53848',
 '110-53860',
 '110-53867',
 '110N52828',
 '110P04383',
 '110P04419',
 '110P04582',
 '110P12746',
 '110P15035',
 '110P15037',
 '110P15093',
 '110P15249',
 '110P52821',
 '110P54308',
 '110P54310',
 '110P54856',
 '110P54868',
 '110P55263',
 '110P55325',
 '110P55632',
 '110P55701',
 '110P55703',
 '110P57062',
 '110P57064']

In [19]:
len(no_upstream_selected_link_list)

64

In [20]:
df_slowdown_speed

tmc_code,110+04384,110+04385,110+04386,110+04387,110+04388,110+04389,110+04390,110+04409,110+04410,110+04411,...,110P55709,110P57062,110P57064,110P57066,110P57070,110P57072,110P57074,110P57076,110P57078,110P57082
measurement_tstamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-02-14 05:30:00,0.0,0.0,0.00,0.0,1.0,0.0,4.0,0.0,0.0,0.0,...,0.0,,,0.0,0.0,22.48,0.00,28.33,0.0,8.00
2022-02-14 05:35:00,0.0,0.0,0.00,0.0,4.0,0.0,6.0,0.0,0.0,0.0,...,0.0,,,0.0,0.0,21.48,0.00,28.33,0.0,4.02
2022-02-14 05:40:00,0.0,0.0,7.91,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,,,0.0,0.0,17.48,0.00,28.33,0.0,6.00
2022-02-14 05:45:00,0.0,0.0,0.00,0.0,0.8,0.0,6.0,0.0,0.0,0.0,...,0.0,,,0.0,0.0,21.08,0.00,28.33,0.0,0.00
2022-02-14 05:50:00,0.0,0.0,1.00,0.0,0.0,0.0,5.0,0.0,0.0,0.0,...,0.0,,,0.0,0.0,21.48,0.00,28.33,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-02-10 20:35:00,0.0,0.0,0.00,0.0,1.0,0.0,3.0,0.0,0.0,0.0,...,0.0,,,0.0,0.0,23.22,2.81,24.78,0.0,0.00
2023-02-10 20:40:00,0.0,0.0,0.00,0.0,0.0,0.0,10.0,0.0,0.0,0.0,...,0.0,,,0.0,0.0,23.59,0.82,24.00,0.0,0.00
2023-02-10 20:45:00,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,,,0.0,0.0,17.00,0.00,31.00,0.0,5.80
2023-02-10 20:50:00,0.0,0.0,0.00,0.0,2.0,0.0,3.0,0.0,0.0,0.0,...,0.0,,,0.0,0.0,17.00,0.00,31.00,0.0,2.59


In [21]:
pickle.dump(df_slowdown_speed, open(f"{model_path}/data/{country_name}/processed_data/{country_name}_new_slowdown_speed.pkl", "wb"))

## Part 4. Generate Travel Time Index

In [15]:
df_all_tti_tmc_5min = spd_record_all.quantile(q=0.85, axis=0)/spd_record_all

In [16]:
df_all_tti_tmc_5min 

tmc_code,110+04384,110+04385,110+04386,110+04387,110+04388,110+04389,110+04390,110+04409,110+04410,110+04411,...,110P55709,110P57062,110P57064,110P57066,110P57070,110P57072,110P57074,110P57076,110P57078,110P57082
measurement_tstamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-02-14 05:30:00,0.797619,0.942857,0.942857,1.000000,1.062500,1.061538,1.133333,1.071429,0.976056,1.019942,...,1.042424,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
2022-02-14 05:35:00,1.175439,1.178571,1.178571,1.014706,1.172414,1.061538,1.152542,1.111111,1.032258,0.997024,...,0.926154,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
2022-02-14 05:40:00,1.098361,1.016792,1.157895,1.210526,1.236364,1.277778,1.283019,0.952381,1.103448,1.116667,...,0.936965,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
2022-02-14 05:45:00,1.046875,1.047619,1.047619,1.029851,1.114754,1.061538,1.236364,1.176471,1.103448,1.116667,...,0.959363,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
2022-02-14 05:50:00,1.080645,1.100000,1.118644,1.078125,1.062500,1.078125,1.152542,1.000000,0.941176,0.943662,...,0.963200,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-02-10 20:35:00,1.000000,1.047619,1.118644,1.029851,1.062500,1.061538,1.096774,1.090909,1.032258,1.030769,...,1.081033,1.0,1.044813,1.139269,1.159700,1.081695,1.154061,1.009048,1.000000,1.093023
2023-02-10 20:40:00,1.135593,1.157895,1.064516,1.112903,1.000000,0.985714,1.046154,0.983607,1.049180,1.091205,...,1.038155,1.0,1.027755,1.154545,1.170732,1.100444,1.122454,1.009048,1.000000,1.044444
2023-02-10 20:45:00,0.971014,1.031250,0.985075,1.095238,1.114754,1.061538,1.046154,1.111111,0.984615,1.000000,...,1.070222,1.0,0.996438,1.170777,1.170732,0.952308,0.942776,1.009048,1.004249,1.063348
2023-02-10 20:50:00,0.985294,0.956522,0.929577,1.000000,1.046154,1.000000,1.062500,0.869565,1.032258,1.000000,...,1.070222,1.0,0.972201,1.181395,1.090909,0.952308,0.953654,1.009048,1.031951,1.044444


In [17]:
pickle.dump(df_all_tti_tmc_5min, open(f"{model_path}/data/{country_name}/processed_data/{country_name}_5min_tti.pkl", "wb"))