In [1]:
import numpy as np
import pandas as pd
from dbfread import DBF

In [78]:
FILE_OD_TRIP_ATTR = r"T:\SLD\Danville_OD_premium_trip_4712_TravelCount\Danville_OD_premium_4712_od_trip_attributes_counts_all.csv"
FILE_ZONE_DISTRICT_DBF = r"R:\Danville\DATA\Data_Received\DanvilleMPO_TAZ_RevDist_06182018\DanvilleMPO_TAZ_RevDist_06182018.dbf"
FILE_OD_TRIP_PURP = r"T:\SLD\Danville_OD_premium_trip_4712_TravelCount\Danville_OD_premium_4712_od_trip_purpose_counts.csv"
FILE_MODEL_TRIP = r"C:\Projects\2018\Danville\SLD_Comparison\Model_OD_Trips.csv"
FILE_MODEL_DIST = r"C:\Projects\2018\Danville\SLD_Comparison\Model_Skim_Distance.csv"
FILE_MODEL_PA_TRIP = r"T:\TransCAD_Conventional_UI\2016_test\Outputs\Auto_Trips.csv"

In [7]:
df_od_trip = pd.read_csv(FILE_OD_TRIP_ATTR)

In [34]:
useful_col = ['Type of Travel',
 'Origin Zone ID',
 'Destination Zone ID',
 'Day Type',
 'Day Part',
 'O-D Traffic (Trip Counts)',
 'O-D Traffic (StL Index)',
 'Avg Trip Duration (sec)',
 'Avg All Trip Duration (sec)',
 'Avg Trip Length (mi)',
 'Avg All Trip Length (mi)',
 'Avg Trip Speed (mph)',
 'Avg All Trip Speed (mph)',
 'Avg All Trip Circuity']

In [10]:
df_od_trip = df_od_trip.loc[:, useful_col]

In [11]:
df_od_trip.iloc[[0,6]].T

Unnamed: 0,0,6
Type of Travel,Personal,Personal
Origin Zone ID,1,1
Origin Zone Name,1,1
Origin Zone Is Pass-Through,no,no
Origin Zone Direction (degrees),,
Origin Zone is Bi-Direction,no,no
Destination Zone ID,10,10
Destination Zone Name,10,10
Destination Zone Is Pass-Through,no,no
Destination Zone Direction (degrees),,


In [12]:
df_zones = pd.DataFrame(iter(DBF(FILE_ZONE_DISTRICT_DBF)))

In [13]:
zone_district_mapper = df_zones.set_index('TAZ_2018').loc[:,'DistName']

In [14]:
df_od_trip.loc[:,'O_District'] = df_od_trip['Origin Zone ID'].map(zone_district_mapper)
df_od_trip.loc[:,'D_District'] = df_od_trip['Destination Zone ID'].map(zone_district_mapper)
df_od_trip.loc[:,'Trip_Length'] = df_od_trip['Avg All Trip Length (mi)'] * df_od_trip['O-D Traffic (Trip Counts)']

In [17]:
selection = (df_od_trip['Day Type'] == '1: Average Weekday (M-F)') & (df_od_trip['Day Part'] == '0: All Day (12am-12am)') 
df_od_trip = df_od_trip.loc[selection]

In [35]:
# Number of trips between districts 
df_od_traffic = df_od_trip.loc[selection].groupby(['O_District', 'D_District'])['O-D Traffic (Trip Counts)','Trip_Length'].sum().reset_index()
df_od_traffic['Avg_Trip_Len'] = df_od_traffic['Trip_Length'] / df_od_traffic['O-D Traffic (Trip Counts)']

In [54]:
df_od_traffic.loc[:,['O_District','D_District','Avg_Trip_Len']].pivot(index='O_District', columns='D_District', values='Avg_Trip_Len').to_csv('Avg_Trip_Length.csv')

In [28]:
# Average trip length to Danville Mall
df_mall = df_od_trip.loc[selection&(df_od_trip.D_District == "Danville Mall")]
print("Average trip length for trips to Danville Mall = {0:.2f} miles".format(sum(df_mall.Trip_Length)/sum(df_mall['O-D Traffic (Trip Counts)'])))

Average trip length for trips to Danville Mall = 3.97 miles


In [146]:
df_mall = df_od_trip.loc[selection&((df_od_trip.D_District == "Danville Mall") | (df_od_trip.O_District == "Danville Mall"))]
print("Average trip length for trips to Danville Mall = {0:.2f} miles".format(sum(df_mall.Trip_Length)/sum(df_mall['O-D Traffic (Trip Counts)'])))

Average trip length for trips to Danville Mall = 4.40 miles


#### By Purpose

In [45]:
df_od_purp = pd.read_csv(FILE_OD_TRIP_PURP)

In [46]:
df_od_purp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178542 entries, 0 to 178541
Data columns (total 18 columns):
Type of Travel                          178542 non-null object
Origin Zone ID                          178542 non-null int64
Origin Zone Name                        178542 non-null int64
Origin Zone Is Pass-Through             178542 non-null object
Origin Zone Direction (degrees)         0 non-null float64
Origin Zone is Bi-Direction             178542 non-null object
Destination Zone ID                     178542 non-null int64
Destination Zone Name                   178542 non-null int64
Destination Zone Is Pass-Through        178542 non-null object
Destination Zone Direction (degrees)    0 non-null float64
Destination Zone is Bi-Direction        178542 non-null object
Day Type                                178542 non-null object
Day Part                                178542 non-null object
O-D Traffic (Trip Counts)               178542 non-null int64
O-D Traffic (StL Ind

In [47]:
df_od_purp['HBW_Trip'] = df_od_purp['O-D Traffic (StL Index)'] * df_od_purp['Purpose HBW (percent)']
df_od_purp['HBO_Trip'] = df_od_purp['O-D Traffic (StL Index)'] * df_od_purp['Purpose HBO (percent)']
df_od_purp['NHB_Trip'] = df_od_purp['O-D Traffic (StL Index)'] * df_od_purp['Purpose NHB (percent)']

In [48]:
df_od_purp.loc[:,'O_District'] = df_od_purp['Origin Zone ID'].map(zone_district_mapper)
df_od_purp.loc[:,'D_District'] = df_od_purp['Destination Zone ID'].map(zone_district_mapper)

In [49]:
selection = (df_od_purp['Day Type'] == '1: Average Weekday (M-F)') & (df_od_purp['Day Part'] == '0: All Day (12am-12am)') 
df_od_purp_gb = df_od_purp.loc[selection].groupby(['O_District', 'D_District'])['HBW_Trip','HBO_Trip','NHB_Trip'].sum().reset_index()

In [50]:
df_od_purp_gb.sort_values(by=['O_District','D_District']).loc[:,['O_District','D_District','HBW_Trip']].pivot(index='O_District', columns='D_District', values='HBW_Trip').to_csv('HBW_Trips_STL.csv')
df_od_purp_gb.sort_values(by=['O_District','D_District']).loc[:,['O_District','D_District','HBO_Trip']].pivot(index='O_District', columns='D_District', values='HBO_Trip').to_csv('HBO_Trips_STL.csv')
df_od_purp_gb.sort_values(by=['O_District','D_District']).loc[:,['O_District','D_District','NHB_Trip']].pivot(index='O_District', columns='D_District', values='NHB_Trip').to_csv('NHB_Trips_STL.csv')

In [51]:
# See if two data files have the same number of trips in a certain zone pair
df_od_trip.loc[(df_od_trip['Origin Zone ID'] == 1) & (df_od_trip['Destination Zone ID'] == 55),'O-D Traffic (StL Index)']

559    22
Name: O-D Traffic (StL Index), dtype: int64

### Model District Flows - INTERNAL TRIPS ONLY

In [192]:
df_model_pa_trips = pd.read_csv(FILE_MODEL_PA_TRIP, names=['O','D','HBW_Trip', 'HBO_Trip', 'NHB_Trip'])
df_model_pa_trips.head()

Unnamed: 0,O,D,HBW_Trip,HBO_Trip,NHB_Trip
0,1,1,0.0,0.0,0.684369
1,1,2,0.0,0.0,0.575456
2,1,3,0.0,0.0,0.752683
3,1,4,0.0,0.0,0.690665
4,1,5,0.0,0.0,0.196167


In [193]:
df_model_pa_trips.loc[:,'O_District'] = df_model_pa_trips['O'].map(zone_district_mapper)
df_model_pa_trips.loc[:,'D_District'] = df_model_pa_trips['D'].map(zone_district_mapper)

In [194]:
# Get only the internal trips
df_model_pa_trips = df_model_pa_trips.loc[(df_model_pa_trips.O < 167) & (df_model_pa_trips.D < 167)]

In [195]:
# Top 10 feeding zones to Danville Mall
df_model_pa_trips.loc[df_model_pa_trips.D_District == 'Danville Mall'].sort_values(by='HBW_Trip', ascending=False).head(10)

Unnamed: 0,O,D,HBW_Trip,HBO_Trip,NHB_Trip,O_District,D_District
24883,133,68,111.657669,140.110352,30.837294,Mount Cross,Danville Mall
24884,133,69,84.858253,200.981476,48.041981,Mount Cross,Danville Mall
24695,132,68,69.858673,102.266235,16.203678,Mount Cross,Danville Mall
14168,76,69,56.398529,167.490662,60.572048,Woodbury,Danville Mall
14167,76,68,53.147587,73.875542,24.250967,Woodbury,Danville Mall
16424,88,69,51.37043,135.952789,61.482494,Nordan,Danville Mall
16423,88,68,51.330391,65.653931,27.036629,Nordan,Danville Mall
17176,92,69,51.328625,137.259369,40.604816,Nordan,Danville Mall
24885,133,70,51.253105,124.258377,29.578182,Mount Cross,Danville Mall
17175,92,68,50.49287,64.868721,17.462715,Nordan,Danville Mall


In [196]:
df_model_pa_trips.loc[df_model_pa_trips.D_District == 'Danville Mall', 'HBW_Trip'].sum()

11185.82582

In [197]:
df_model_pa_trips.loc[df_model_pa_trips.O_District == 'Danville Mall', 'HBW_Trip'].sum()

968.8217480000001

In [198]:
df_model_hbw = df_model_pa_trips.groupby(['O_District','D_District'])['HBW_Trip'].sum().reset_index().pivot(index='O_District', columns='D_District', values='HBW_Trip')
df_model_hbo = df_model_pa_trips.groupby(['O_District','D_District'])['HBO_Trip'].sum().reset_index().pivot(index='O_District', columns='D_District', values='HBO_Trip')
df_model_nhb = df_model_pa_trips.groupby(['O_District','D_District'])['NHB_Trip'].sum().reset_index().pivot(index='O_District', columns='D_District', values='NHB_Trip')

In [199]:
# Convert from PA to OD
df_model_hbw = pd.DataFrame(data=(df_model_hbw.values.T +df_model_hbw.values)/2.0, index=df_model_hbw.index, columns=df_model_hbw.columns)
df_model_hbo = pd.DataFrame(data=(df_model_hbo.values.T +df_model_hbo.values)/2.0, index=df_model_hbw.index, columns=df_model_hbw.columns)
df_model_nhb = pd.DataFrame(data=(df_model_nhb.values.T +df_model_nhb.values)/2.0, index=df_model_hbw.index, columns=df_model_hbw.columns)


In [200]:
df_model_hbw.to_csv('HBW_Trips_Model.csv')
df_model_hbo.to_csv('HBO_Trips_Model.csv')
df_model_nhb.to_csv('NHB_Trips_Model.csv')

#### Model Stats

In [60]:
df_model_trip = pd.read_csv(FILE_MODEL_TRIP, names=['O','D','Trips'])
df_model_dist = pd.read_csv(FILE_MODEL_DIST)

In [62]:
df_model_trip.loc[:,'O_District'] = df_model_trip['O'].map(zone_district_mapper)
df_model_trip.loc[:,'D_District'] = df_model_trip['D'].map(zone_district_mapper)
df_model_dist.loc[:,'O_District'] = df_model_dist['O'].map(zone_district_mapper)
df_model_dist.loc[:,'D_District'] = df_model_dist['D'].map(zone_district_mapper)

#### Find the zones that have most trips going to Danville Mall

In [73]:
df_top10_zones_to_mall = df_model_trip.loc[(df_model_trip.D_District == "Danville Mall") & (~df_model_trip.O_District.isnull())].sort_values(by='Trips', ascending=False).head(20)
df_top10_zones_to_mall

Unnamed: 0,O,D,Trips,O_District,D_District
12852,69,69,222.84317,Danville Mall,Danville Mall
24884,133,69,217.515366,Mount Cross,Danville Mall
12851,69,68,206.047119,Danville Mall,Danville Mall
12664,68,69,206.047119,Danville Mall,Danville Mall
12476,67,69,202.301727,Danville Mall,Danville Mall
12850,69,67,202.301727,Danville Mall,Danville Mall
14168,76,69,187.790039,Woodbury,Danville Mall
16424,88,69,169.758209,Nordan,Danville Mall
12853,69,70,160.078094,Danville Mall,Danville Mall
13040,70,69,160.078094,Danville Mall,Danville Mall


In [74]:
df_top10_zones_to_mall.Trips.sum()

3310.2477870000007

In [147]:
sorted(df_top10_zones_to_mall.O.unique())

[37, 46, 48, 62, 67, 68, 69, 70, 71, 76, 88, 92, 133]

In [75]:
df_model_dist.set_index(['O','D']).loc[df_top10_zones_to_mall.set_index(['O','D']).index]

Unnamed: 0_level_0,Unnamed: 1_level_0,Distance,O_District,D_District
O,D,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
69,69,1e-06,Danville Mall,Danville Mall
133,69,6.47144,Mount Cross,Danville Mall
69,68,1.32558,Danville Mall,Danville Mall
68,69,1.3279,Danville Mall,Danville Mall
67,69,1.63075,Danville Mall,Danville Mall
69,67,1.63686,Danville Mall,Danville Mall
76,69,1.92224,Woodbury,Danville Mall
88,69,3.872111,Nordan,Danville Mall
69,70,0.40215,Danville Mall,Danville Mall
70,69,0.40215,Danville Mall,Danville Mall


In [103]:
df_model_trip.loc[df_model_trip.D_District == "Danville Mall",'D'].unique()

array([ 55,  64,  65,  67,  68,  69,  70,  71,  72,  73,  78, 130],
      dtype=int64)

In [64]:
df_model_trip_mall = df_model_trip.loc[df_model_trip.D_District == "Danville Mall"]
df_model_dist_mall = df_model_dist.loc[df_model_dist.D_District == "Danville Mall"]
print("Average trip length for trips to Danville Mall = {0:.2f} miles".format(np.dot(df_model_trip_mall.Trips, df_model_dist_mall.Distance)/sum(df_model_trip_mall.Trips)))

Average trip length for trips to Danville Mall = 5.69 miles


In [145]:
df_model_trip_mall = df_model_trip.loc[(df_model_trip.D_District == "Danville Mall") | (df_model_trip.O_District == "Danville Mall")]
df_model_dist_mall = df_model_dist.loc[(df_model_trip.D_District == "Danville Mall") | (df_model_trip.O_District == "Danville Mall")]
print("Average trip length for trips from and to Danville Mall = {0:.2f} miles".format(np.dot(df_model_trip_mall.Trips, df_model_dist_mall.Distance)/sum(df_model_trip_mall.Trips)))

Average trip length for trips from and to Danville Mall = 6.11 miles


In [100]:
di = np.diag_indices(4)

In [101]:
a = np.arange(16).reshape(4, 4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [102]:
a[di] = np.array([10, 20, 30, 40])
a

array([[10,  1,  2,  3],
       [ 4, 20,  6,  7],
       [ 8,  9, 30, 11],
       [12, 13, 14, 40]])