In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
FILE_OD_TRIP_ATTR = r"T:\SLD\Danville_OD_premium_trip_4712_TravelCount\Danville_OD_premium_4712_od_trip_attributes_counts_all.csv"

In [108]:
FILE_PA_AUTO_TRIPS = r"T:\SLD\Model_PA_Auto_Trips.csv"
FILE_DISTANCE_SKIM = r"T:\SLD\Model_Skim_Distance.csv"
FILE_TIME_SKIM = r"T:\SLD\Model_Skim_Time.csv"

In [109]:
def coincidence_ratio(x, y):
    coincidence = np.minimum(x, y).sum()
    total = np.maximum(x, y).sum()
    return coincidence*1.0 / total

### Process Streetlight Data

In [4]:
df_SLD = pd.read_csv(FILE_OD_TRIP_ATTR)

In [5]:
df_SLD.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178542 entries, 0 to 178541
Data columns (total 68 columns):
Type of Travel                          178542 non-null object
Origin Zone ID                          178542 non-null int64
Origin Zone Name                        178542 non-null int64
Origin Zone Is Pass-Through             178542 non-null object
Origin Zone Direction (degrees)         0 non-null float64
Origin Zone is Bi-Direction             178542 non-null object
Destination Zone ID                     178542 non-null int64
Destination Zone Name                   178542 non-null int64
Destination Zone Is Pass-Through        178542 non-null object
Destination Zone Direction (degrees)    0 non-null float64
Destination Zone is Bi-Direction        178542 non-null object
Day Type                                178542 non-null object
Day Part                                178542 non-null object
O-D Traffic (Trip Counts)               178542 non-null int64
O-D Traffic (StL Ind

In [6]:
selection = (df_SLD['Day Type'] == '1: Average Weekday (M-F)') & (df_SLD['Day Part'] == '0: All Day (12am-12am)') 
useful_cols = [
    'Origin Zone ID',
    'Destination Zone ID',
    'O-D Traffic (StL Index)',
    'Trip Length 0-1 mi (percent)',
    'Trip Length 1-2 mi (percent)',
    'Trip Length 2-5 mi (percent)',
    'Trip Length 5-10 mi (percent)',
    'Trip Length 10-20 mi (percent)',
    'Trip Length 20-30 mi (percent)',
    'Trip Length 30-40 mi (percent)',
    'Trip Length 40-50 mi (percent)',
    'Trip Length 50-60 mi (percent)',
    'Trip Length 60-70 mi (percent)',
    'Trip Length 70-80 mi (percent)',
    'Trip Length 80-90 mi (percent)',
    'Trip Length 90-100 mi (percent)',
    'Trip Length 100+ mi (percent)',
    
]
df_SLD_daily = df_SLD.loc[selection,useful_cols]

In [7]:
df_SLD_daily.head(10)

Unnamed: 0,Origin Zone ID,Destination Zone ID,O-D Traffic (StL Index),Trip Length 0-1 mi (percent),Trip Length 1-2 mi (percent),Trip Length 2-5 mi (percent),Trip Length 5-10 mi (percent),Trip Length 10-20 mi (percent),Trip Length 20-30 mi (percent),Trip Length 30-40 mi (percent),Trip Length 40-50 mi (percent),Trip Length 50-60 mi (percent),Trip Length 60-70 mi (percent),Trip Length 70-80 mi (percent),Trip Length 80-90 mi (percent),Trip Length 90-100 mi (percent),Trip Length 100+ mi (percent)
6,1,10,8,0.667,0.0,0.333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19,1,100,3,0.0,0.6,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
29,1,101,4,0.167,0.5,0.333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
41,1,102,7,0.455,0.455,0.091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
49,1,103,1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
55,1,104,7,0.0,0.545,0.091,0.091,0.091,0.182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
67,1,105,7,0.0,0.667,0.222,0.0,0.111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76,1,108,1,0.0,0.0,0.5,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
82,1,109,1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
90,1,11,12,0.333,0.417,0.167,0.083,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [43]:
print('Total number of trips in the SLD sample = {:,.0f}'.format(df_SLD_daily['O-D Traffic (StL Index)'].sum()))

Total number of trips in the SLD sample = 177,351


In [8]:
trip_length_percent = df_SLD_daily.values[:,3:]

In [11]:
trip_length_percent

array([[0.667, 0.   , 0.333, ..., 0.   , 0.   , 0.   ],
       [0.   , 0.6  , 0.4  , ..., 0.   , 0.   , 0.   ],
       [0.167, 0.5  , 0.333, ..., 0.   , 0.   , 0.   ],
       ...,
       [0.537, 0.317, 0.098, ..., 0.   , 0.   , 0.   ],
       [0.625, 0.188, 0.063, ..., 0.   , 0.   , 0.   ],
       [0.7  , 0.167, 0.067, ..., 0.   , 0.   , 0.   ]])

In [12]:
trips = df_SLD_daily.values[:,2]
trips

array([ 8.,  3.,  4., ..., 34., 17., 27.])

In [36]:
trips_by_distance = np.tile(trips.reshape(-1,1), (1,trip_length_percent.shape[1])) * trip_length_percent
trips_by_distance

array([[ 5.336,  0.   ,  2.664, ...,  0.   ,  0.   ,  0.   ],
       [ 0.   ,  1.8  ,  1.2  , ...,  0.   ,  0.   ,  0.   ],
       [ 0.668,  2.   ,  1.332, ...,  0.   ,  0.   ,  0.   ],
       ...,
       [18.258, 10.778,  3.332, ...,  0.   ,  0.   ,  0.   ],
       [10.625,  3.196,  1.071, ...,  0.   ,  0.   ,  0.   ],
       [18.9  ,  4.509,  1.809, ...,  0.   ,  0.   ,  0.   ]])

In [37]:
trips_by_distance = trips_by_distance.sum(axis=0)
trips_by_distance

array([1.7403619e+04, 3.1733101e+04, 6.9690129e+04, 4.3428948e+04,
       1.3156385e+04, 1.3403490e+03, 3.4597700e+02, 1.2861300e+02,
       6.5267000e+01, 3.1914000e+01, 1.5367000e+01, 6.1560000e+00,
       2.6600000e+00, 9.2610000e+00])

In [58]:
df_trip_length_distribution_SLD = pd.DataFrame(data=trips_by_distance, index=useful_cols[3:], columns=['Trips_SLD'])
df_trip_length_distribution_SLD['Percent_SLD'] = df_trip_length_distribution_SLD.Trips_SLD / df_trip_length_distribution_SLD.values.sum()
df_trip_length_distribution_SLD

Unnamed: 0,Trips_SLD,Percent_SLD
Trip Length 0-1 mi (percent),17403.619,0.098127
Trip Length 1-2 mi (percent),31733.101,0.178921
Trip Length 2-5 mi (percent),69690.129,0.392935
Trip Length 5-10 mi (percent),43428.948,0.244866
Trip Length 10-20 mi (percent),13156.385,0.07418
Trip Length 20-30 mi (percent),1340.349,0.007557
Trip Length 30-40 mi (percent),345.977,0.001951
Trip Length 40-50 mi (percent),128.613,0.000725
Trip Length 50-60 mi (percent),65.267,0.000368
Trip Length 60-70 mi (percent),31.914,0.00018


In [50]:
print('Total number of trips in the SLD sample = {:,.0f}'.format(df_trip_length_distribution_SLD.sum()[0]))

Total number of trips in the SLD sample = 177,358


### Process Model Data

In [53]:
model_trips_columns = ['I', 'J', 'HBW', 'HBO', 'NHB', 'XIIX', 'AUTOTRIPS']
df_model_trips = pd.read_csv(FILE_PA_AUTO_TRIPS, header=None, names=model_trips_columns)

In [54]:
model_skim_columns = ['I', 'J', 'DISTANCE']
df_model_skims = pd.read_csv(FILE_DISTANCE_SKIM, header=None, names=model_skim_columns)

In [55]:
df_model_trips.head()

Unnamed: 0,I,J,HBW,HBO,NHB,XIIX,AUTOTRIPS
0,1,1,0.0,0.0,0.625359,0.0,0.625359
1,1,2,0.0,0.0,0.509751,0.0,0.509751
2,1,3,0.0,0.0,0.691367,0.0,0.691367
3,1,4,0.0,0.0,0.616767,0.0,0.616767
4,1,5,0.0,0.0,0.174565,0.0,0.174565


In [56]:
df_model_skims.head()

Unnamed: 0,I,J,DISTANCE
0,1,1,0.0
1,1,2,0.262192
2,1,3,0.108211
3,1,4,0.477902
4,1,5,0.591185


In [57]:
df_model_trip_merge = pd.merge(left=df_model_trips, right=df_model_skims, left_on=['I', 'J'], right_on=['I', 'J'])
df_model_trip_merge.head()

Unnamed: 0,I,J,HBW,HBO,NHB,XIIX,AUTOTRIPS,DISTANCE
0,1,1,0.0,0.0,0.625359,0.0,0.625359,0.0
1,1,2,0.0,0.0,0.509751,0.0,0.509751,0.262192
2,1,3,0.0,0.0,0.691367,0.0,0.691367,0.108211
3,1,4,0.0,0.0,0.616767,0.0,0.616767,0.477902
4,1,5,0.0,0.0,0.174565,0.0,0.174565,0.591185


In [61]:
bins = [0, 1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 1000]

In [62]:
df_model_trip_merge['dist_bin'] = pd.cut(df_model_trip_merge.DISTANCE, bins=bins, right=False)
df_model_trip_merge.head()

Unnamed: 0,I,J,HBW,HBO,NHB,XIIX,AUTOTRIPS,DISTANCE,dist_bin
0,1,1,0.0,0.0,0.625359,0.0,0.625359,0.0,"[0, 1)"
1,1,2,0.0,0.0,0.509751,0.0,0.509751,0.262192,"[0, 1)"
2,1,3,0.0,0.0,0.691367,0.0,0.691367,0.108211,"[0, 1)"
3,1,4,0.0,0.0,0.616767,0.0,0.616767,0.477902,"[0, 1)"
4,1,5,0.0,0.0,0.174565,0.0,0.174565,0.591185,"[0, 1)"


In [63]:
df_trip_length_distribution_model = df_model_trip_merge.groupby('dist_bin')['DISTANCE'].sum()
df_trip_length_distribution_model

dist_bin
[0, 1)            444.069250
[1, 2)           3108.566211
[2, 5)          34010.334058
[5, 10)         98087.402040
[10, 20)       120798.864001
[20, 30)         6794.562562
[30, 40)            0.000000
[40, 50)            0.000000
[50, 60)            0.000000
[60, 70)            0.000000
[70, 80)            0.000000
[80, 90)            0.000000
[90, 100)           0.000000
[100, 1000)         0.000000
Name: DISTANCE, dtype: float64

### Process SLD with Model Skim

In [None]:
df_SLD_trips_daily = df_SLD_daily.loc[:,['Origin Zone ID', 'Destination Zone ID', 'O-D Traffic (StL Index)']]

In [77]:
df_SLD_trips_daily = pd.merge(left=df_SLD_trips_daily, right=df_model_skims, left_on=['Origin Zone ID', 'Destination Zone ID'],
                             right_on=['I','J'])
df_SLD_trips_daily.head()

Unnamed: 0,Origin Zone ID,Destination Zone ID,O-D Traffic (StL Index),I,J,DISTANCE
0,1,10,8,1,10,0.441919
1,1,100,3,1,100,1.602933
2,1,101,4,1,101,1.10048
3,1,102,7,1,102,0.739425
4,1,103,1,1,103,0.681908


In [78]:
df_SLD_trips_daily['dist_bin'] = pd.cut(df_SLD_trips_daily.DISTANCE, bins=bins, right=False)

In [79]:
df_trip_length_distribution_SLD_model_skim = df_SLD_trips_daily.groupby('dist_bin')['DISTANCE'].sum()

In [80]:
df_trip_length_distribution_SLD_model_skim

dist_bin
[0, 1)           432.134786
[1, 2)          2930.543665
[2, 5)         25667.526925
[5, 10)        45454.856108
[10, 20)       16471.288431
[20, 30)           0.000000
[30, 40)           0.000000
[40, 50)           0.000000
[50, 60)           0.000000
[60, 70)           0.000000
[70, 80)           0.000000
[80, 90)           0.000000
[90, 100)          0.000000
[100, 1000)        0.000000
Name: DISTANCE, dtype: float64

### Comparison

In [82]:
df_trip_length_distribution = df_trip_length_distribution_SLD.copy()
df_trip_length_distribution['Trips_Model'] = df_trip_length_distribution_model.values
df_trip_length_distribution['Percent_Model'] = df_trip_length_distribution['Trips_Model'] / sum(df_trip_length_distribution['Trips_Model'])

In [85]:
df_trip_length_distribution['Trips_TLD_Model_Skim'] = df_trip_length_distribution_SLD_model_skim.values
df_trip_length_distribution['Percent_TLD_Model_Skim'] = df_trip_length_distribution['Trips_TLD_Model_Skim'] / sum(df_trip_length_distribution['Trips_TLD_Model_Skim'])

In [86]:
df_trip_length_distribution

Unnamed: 0,Trips_SLD,Percent_SLD,Trips_Model,Percent_Model,Trips_TLD_Model_Skim,Percent_TLD_Model_Skim
Trip Length 0-1 mi (percent),17403.619,0.098127,444.06925,0.001687,432.134786,0.004751
Trip Length 1-2 mi (percent),31733.101,0.178921,3108.566211,0.011809,2930.543665,0.032219
Trip Length 2-5 mi (percent),69690.129,0.392935,34010.334058,0.129197,25667.526925,0.282196
Trip Length 5-10 mi (percent),43428.948,0.244866,98087.40204,0.37261,45454.856108,0.499744
Trip Length 10-20 mi (percent),13156.385,0.07418,120798.864001,0.458886,16471.288431,0.18109
Trip Length 20-30 mi (percent),1340.349,0.007557,6794.562562,0.025811,0.0,0.0
Trip Length 30-40 mi (percent),345.977,0.001951,0.0,0.0,0.0,0.0
Trip Length 40-50 mi (percent),128.613,0.000725,0.0,0.0,0.0,0.0
Trip Length 50-60 mi (percent),65.267,0.000368,0.0,0.0,0.0,0.0
Trip Length 60-70 mi (percent),31.914,0.00018,0.0,0.0,0.0,0.0


In [84]:
print("Coincidence Ratio between Model and SLD = {:.2f}".format(coincidence_ratio(
    df_trip_length_distribution.Percent_Model, df_trip_length_distribution.Percent_SLD
)))

Coincidence Ratio between Model and SLD = 0.31


In [87]:
print("Coincidence Ratio between Model and SLD with Model Skim = {:.2f}".format(coincidence_ratio(
    df_trip_length_distribution.Percent_Model, df_trip_length_distribution.Percent_TLD_Model_Skim
)))

Coincidence Ratio between Model and SLD with Model Skim = 0.53


### Comparison using Different Bins

In [88]:
df_model_trip_bins = df_model_trip_merge.copy()

In [102]:
bins2 = np.arange(0, 52, 2)
df_model_trip_bins['dist_bin'] = pd.cut(df_model_trip_bins.DISTANCE, bins=bins2, right=False)

In [103]:
df_TLD = pd.DataFrame(df_model_trip_bins.groupby('dist_bin')['DISTANCE'].sum())
df_TLD.rename(columns={'DISTANCE':'Trips_Model'}, inplace=True)
df_TLD['Percent_Model'] = df_TLD['Trips_Model'] / sum(df_TLD['Trips_Model']) 
df_TLD

Unnamed: 0_level_0,Trips_Model,Percent_Model
dist_bin,Unnamed: 1_level_1,Unnamed: 2_level_1
"[0, 2)",3552.635461,0.013496
"[2, 4)",19216.65377,0.072999
"[4, 6)",32232.918849,0.122445
"[6, 8)",38888.583357,0.147728
"[8, 10)",41759.580122,0.158635
"[10, 12)",39911.509621,0.151614
"[12, 14)",33890.675442,0.128743
"[14, 16)",25493.982475,0.096846
"[16, 18)",14006.054027,0.053206
"[18, 20)",7496.642436,0.028478


In [104]:
df_SLD_trips_daily['dist_bin'] = pd.cut(df_SLD_trips_daily.DISTANCE, bins=bins2, right=False)

In [105]:
df_TLD_SLD_Model_Skim = df_SLD_trips_daily.groupby('dist_bin')['DISTANCE'].sum()
df_TLD['Trips_SLD'] = df_TLD_SLD_Model_Skim.values
df_TLD['Percent_SLD'] = df_TLD['Trips_SLD'] / sum(df_TLD['Trips_SLD'])

In [106]:
print("Coincidence Ratio between Model and SLD with Model Skim = {:.2f}".format(coincidence_ratio(
    df_TLD.Percent_Model, df_TLD.Percent_SLD
)))

Coincidence Ratio between Model and SLD with Model Skim = 0.53


In [107]:
df_TLD

Unnamed: 0_level_0,Trips_Model,Percent_Model,Trips_SLD,Percent_SLD
dist_bin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"[0, 2)",3552.635461,0.013496,3362.678451,0.03697
"[2, 4)",19216.65377,0.072999,15413.447659,0.16946
"[4, 6)",32232.918849,0.122445,21175.007238,0.232804
"[6, 8)",38888.583357,0.147728,19628.746799,0.215804
"[8, 10)",41759.580122,0.158635,14905.181337,0.163872
"[10, 12)",39911.509621,0.151614,9136.885658,0.100454
"[12, 14)",33890.675442,0.128743,4702.623601,0.051702
"[14, 16)",25493.982475,0.096846,1883.441559,0.020707
"[16, 18)",14006.054027,0.053206,652.815171,0.007177
"[18, 20)",7496.642436,0.028478,95.522442,0.00105


## Travel Times as Skims

In [110]:
df_SLD = pd.read_csv(FILE_OD_TRIP_ATTR)

In [111]:
selection = (df_SLD['Day Type'] == '1: Average Weekday (M-F)') & (df_SLD['Day Part'] == '0: All Day (12am-12am)') 
useful_cols = [
    'Origin Zone ID',
    'Destination Zone ID',
    'O-D Traffic (StL Index)',
    'Trip Duration 0-10 min (percent)'   ,    
    'Trip Duration 10-20 min (percent)'  ,    
    'Trip Duration 20-30 min (percent)'  ,    
    'Trip Duration 30-40 min (percent)'  ,    
    'Trip Duration 40-50 min (percent)'  ,    
    'Trip Duration 50-60 min (percent)'  ,    
    'Trip Duration 60-70 min (percent)'  ,    
    'Trip Duration 70-80 min (percent)'  ,    
    'Trip Duration 80-90 min (percent)'  ,    
    'Trip Duration 90-100 min (percent)' ,    
    'Trip Duration 100-110 min (percent)',    
    'Trip Duration 110-120 min (percent)',    
    'Trip Duration 120-130 min (percent)',    
    'Trip Duration 130-140 min (percent)',    
    'Trip Duration 140-150 min (percent)',    
    'Trip Duration 150+ min (percent)'   ,
    
]
df_SLD_daily = df_SLD.loc[selection,useful_cols]

In [112]:
trip_duration_percent = df_SLD_daily.values[:,3:]

In [113]:
trips = df_SLD_daily.values[:,2]

In [114]:
trips_by_duration = np.tile(trips.reshape(-1,1), (1,trip_duration_percent.shape[1])) * trip_duration_percent
trips_by_duration

array([[ 2.664,  2.664,  1.336, ...,  0.   ,  0.   ,  0.   ],
       [ 1.2  ,  1.2  ,  0.6  , ...,  0.   ,  0.   ,  0.   ],
       [ 1.332,  1.332,  0.   , ...,  0.   ,  0.   ,  0.   ],
       ...,
       [12.444, 13.26 ,  4.148, ...,  0.   ,  0.   ,  0.   ],
       [ 5.321,  7.446,  0.   , ...,  0.   ,  0.   ,  0.   ],
       [10.8  ,  8.991,  1.809, ...,  0.   ,  0.   ,  0.   ]])

In [116]:
trips_by_duration = trips_by_duration.sum(axis=0)
trips_by_duration

array([29895.949, 63415.034, 39378.094, 20094.44 , 10422.131,  5584.652,
        3175.624,  1856.675,  1095.468,   702.041,   462.819,   336.76 ,
         230.719,   176.788,   113.185,   418.715])

In [117]:
df_trip_duration_distribution_SLD = pd.DataFrame(data=trips_by_duration, index=useful_cols[3:], columns=['Trips_SLD'])
df_trip_duration_distribution_SLD['Percent_SLD'] = df_trip_duration_distribution_SLD.Trips_SLD / df_trip_duration_distribution_SLD.values.sum()
df_trip_duration_distribution_SLD

Unnamed: 0,Trips_SLD,Percent_SLD
Trip Duration 0-10 min (percent),29895.949,0.168562
Trip Duration 10-20 min (percent),63415.034,0.357552
Trip Duration 20-30 min (percent),39378.094,0.222025
Trip Duration 30-40 min (percent),20094.44,0.113298
Trip Duration 40-50 min (percent),10422.131,0.058763
Trip Duration 50-60 min (percent),5584.652,0.031488
Trip Duration 60-70 min (percent),3175.624,0.017905
Trip Duration 70-80 min (percent),1856.675,0.010468
Trip Duration 80-90 min (percent),1095.468,0.006177
Trip Duration 90-100 min (percent),702.041,0.003958


### Process Model

In [118]:
model_tt_columns = ['I', 'J', 'TIME']
df_model_tt = pd.read_csv(FILE_TIME_SKIM, header=None, names=model_tt_columns)

In [119]:
df_model_tt_merge = pd.merge(left=df_model_trips, right=df_model_tt, left_on=['I', 'J'], right_on=['I', 'J'])

In [126]:
bins_tt = np.arange(0, 170, 10)

In [127]:
df_model_tt_merge['tt_bin'] = pd.cut(df_model_tt_merge.TIME, bins=bins_tt, right=False)

In [128]:
df_TLD_TT = df_model_tt_merge.groupby('tt_bin')['TIME'].sum()
df_TLD_TT

tt_bin
[0, 10)       102954.023022
[10, 20)      256684.338552
[20, 30)       59505.065787
[30, 40)        1397.172485
[40, 50)           0.000000
[50, 60)           0.000000
[60, 70)           0.000000
[70, 80)           0.000000
[80, 90)           0.000000
[90, 100)          0.000000
[100, 110)         0.000000
[110, 120)         0.000000
[120, 130)         0.000000
[130, 140)         0.000000
[140, 150)         0.000000
[150, 160)         0.000000
Name: TIME, dtype: float64

In [129]:
df_trip_duration_distribution = df_trip_duration_distribution_SLD.copy()
df_trip_duration_distribution['Trips_Model'] = df_TLD_TT.values
df_trip_duration_distribution['Percent_Model'] = df_trip_duration_distribution['Trips_Model'] / sum(df_trip_duration_distribution['Trips_Model'])

In [130]:
print("Coincidence Ratio between Model and SLD Using Travel Times = {:.2f}".format(coincidence_ratio(
    df_trip_duration_distribution.Percent_Model, df_trip_duration_distribution.Percent_SLD
)))

Coincidence Ratio between Model and SLD Using Travel Times = 0.50


### Process SLD with Model Travel Times

In [131]:
df_SLD_trips_daily = df_SLD_daily.loc[:,['Origin Zone ID', 'Destination Zone ID', 'O-D Traffic (StL Index)']]

In [132]:
df_SLD_trips_daily = pd.merge(left=df_SLD_trips_daily, right=df_model_tt, left_on=['Origin Zone ID', 'Destination Zone ID'],
                             right_on=['I','J'])

In [133]:
df_model_tt_merge['tt_bin'] = pd.cut(df_SLD_trips_daily.TIME, bins=bins_tt, right=False)

In [134]:
df_TLD_TT = df_model_tt_merge.groupby('tt_bin')['TIME'].sum()
df_TLD_TT

tt_bin
[0, 10)       111023.190290
[10, 20)       67903.375975
[20, 30)        2088.171013
[30, 40)           0.000000
[40, 50)           0.000000
[50, 60)           0.000000
[60, 70)           0.000000
[70, 80)           0.000000
[80, 90)           0.000000
[90, 100)          0.000000
[100, 110)         0.000000
[110, 120)         0.000000
[120, 130)         0.000000
[130, 140)         0.000000
[140, 150)         0.000000
[150, 160)         0.000000
Name: TIME, dtype: float64

In [135]:
df_trip_duration_distribution['Trips_SLD_Model_TT'] = df_TLD_TT.values
df_trip_duration_distribution['Percent_SLD_Model_TT'] = df_trip_duration_distribution['Trips_SLD_Model_TT'] / sum(df_trip_duration_distribution['Trips_SLD_Model_TT'])

In [136]:
print("Coincidence Ratio between Model and SLD Using Travel Times = {:.2f}".format(coincidence_ratio(
    df_trip_duration_distribution.Percent_Model, df_trip_duration_distribution.Percent_SLD_Model_TT
)))

Coincidence Ratio between Model and SLD Using Travel Times = 0.46


### Scratch

In [93]:
df_model_trip_bins['DISTANCE'].describe()

count    35344.000000
mean         7.448048
std          4.463948
min          0.000000
25%          3.907394
50%          6.685855
75%         10.315343
max         26.835344
Name: DISTANCE, dtype: float64

In [15]:
a = np.array([2,3])
b = np.array([[4,1,5],[2,5,7]])

In [22]:
a.reshape(2,1)

array([[2],
       [3]])

In [25]:
prod = np.tile(a.reshape(2,1), (1,b.shape[1])) * b
prod

array([[ 8,  2, 10],
       [ 6, 15, 21]])

In [27]:
prod.sum(axis=0)

array([14, 17, 31])