In [1]:
import numpy as np
import pandas as pd
from dbfread import DBF

In [2]:
FILE_OD_TRIP_ATTR = r"T:\SLD\Danville_OD_premium_trip_4712_TravelCount\Danville_OD_premium_4712_od_trip_attributes_counts_all.csv"
FILE_ZONE_DISTRICT_DBF = r"R:\Danville\DATA\Data_Received\DanvilleMPO_TAZ_RevDist_06182018\DanvilleMPO_TAZ_RevDist_06182018.dbf"
FILE_OD_TRIP_PURP = r"T:\SLD\Danville_OD_premium_trip_4712_TravelCount\Danville_OD_premium_4712_od_trip_purpose_counts.csv"
FILE_MODEL_TRIP = r"T:\TransCAD_Conventional_UI\2016_test\Outputs\Auto_Trips.csv"
FILE_MODEL_DIST = r"C:\Projects\2018\Danville\SLD_Comparison\Model_Skim_Distance.csv"

#### Zone District Mapper

In [3]:
df_zones = pd.DataFrame(iter(DBF(FILE_ZONE_DISTRICT_DBF)))

In [4]:
zone_district_mapper = df_zones.set_index('TAZ_2018').loc[:,'DistName']

#### SLD Data

In [7]:
df_od_trip = pd.read_csv(FILE_OD_TRIP_ATTR)

In [8]:
df_od_trip.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178542 entries, 0 to 178541
Data columns (total 68 columns):
Type of Travel                          178542 non-null object
Origin Zone ID                          178542 non-null int64
Origin Zone Name                        178542 non-null int64
Origin Zone Is Pass-Through             178542 non-null object
Origin Zone Direction (degrees)         0 non-null float64
Origin Zone is Bi-Direction             178542 non-null object
Destination Zone ID                     178542 non-null int64
Destination Zone Name                   178542 non-null int64
Destination Zone Is Pass-Through        178542 non-null object
Destination Zone Direction (degrees)    0 non-null float64
Destination Zone is Bi-Direction        178542 non-null object
Day Type                                178542 non-null object
Day Part                                178542 non-null object
O-D Traffic (Trip Counts)               178542 non-null int64
O-D Traffic (StL Ind

In [34]:
useful_col = ['Type of Travel',
 'Origin Zone ID',
 'Destination Zone ID',
 'Day Type',
 'Day Part',
 'O-D Traffic (Trip Counts)',
 'O-D Traffic (StL Index)',
 'Avg Trip Duration (sec)',
 'Avg All Trip Duration (sec)',
 'Avg Trip Length (mi)',
 'Avg All Trip Length (mi)',
 'Avg Trip Speed (mph)',
 'Avg All Trip Speed (mph)',
 'Avg All Trip Circuity']

In [10]:
df_od_trip = df_od_trip.loc[:, useful_col]

In [11]:
df_od_trip.iloc[[0,6]].T

Unnamed: 0,0,6
Type of Travel,Personal,Personal
Origin Zone ID,1,1
Origin Zone Name,1,1
Origin Zone Is Pass-Through,no,no
Origin Zone Direction (degrees),,
Origin Zone is Bi-Direction,no,no
Destination Zone ID,10,10
Destination Zone Name,10,10
Destination Zone Is Pass-Through,no,no
Destination Zone Direction (degrees),,


In [14]:
df_od_trip.loc[:,'O_District'] = df_od_trip['Origin Zone ID'].map(zone_district_mapper)
df_od_trip.loc[:,'D_District'] = df_od_trip['Destination Zone ID'].map(zone_district_mapper)
df_od_trip.loc[:,'Trip_Length'] = df_od_trip['Avg All Trip Length (mi)'] * df_od_trip['O-D Traffic (Trip Counts)']

In [17]:
selection = (df_od_trip['Day Type'] == '1: Average Weekday (M-F)') & (df_od_trip['Day Part'] == '0: All Day (12am-12am)') 
df_od_trip = df_od_trip.loc[selection]

In [35]:
# Number of trips between districts 
df_od_traffic = df_od_trip.loc[selection].groupby(['O_District', 'D_District'])['O-D Traffic (Trip Counts)','Trip_Length'].sum().reset_index()
df_od_traffic['Avg_Trip_Len'] = df_od_traffic['Trip_Length'] / df_od_traffic['O-D Traffic (Trip Counts)']

In [54]:
df_od_traffic.loc[:,['O_District','D_District','Avg_Trip_Len']].pivot(index='O_District', columns='D_District', values='Avg_Trip_Len').to_csv('Avg_Trip_Length.csv')

In [28]:
# Average trip length to Danville Mall
df_mall = df_od_trip.loc[selection&(df_od_trip.D_District == "Danville Mall")]
print("Average trip length for trips to Danville Mall = {0:.2f} miles".format(sum(df_mall.Trip_Length)/sum(df_mall['O-D Traffic (Trip Counts)'])))

Average trip length for trips to Danville Mall = 3.97 miles


#### By Purpose

In [45]:
df_od_purp = pd.read_csv(FILE_OD_TRIP_PURP)

In [46]:
df_od_purp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178542 entries, 0 to 178541
Data columns (total 18 columns):
Type of Travel                          178542 non-null object
Origin Zone ID                          178542 non-null int64
Origin Zone Name                        178542 non-null int64
Origin Zone Is Pass-Through             178542 non-null object
Origin Zone Direction (degrees)         0 non-null float64
Origin Zone is Bi-Direction             178542 non-null object
Destination Zone ID                     178542 non-null int64
Destination Zone Name                   178542 non-null int64
Destination Zone Is Pass-Through        178542 non-null object
Destination Zone Direction (degrees)    0 non-null float64
Destination Zone is Bi-Direction        178542 non-null object
Day Type                                178542 non-null object
Day Part                                178542 non-null object
O-D Traffic (Trip Counts)               178542 non-null int64
O-D Traffic (StL Ind

In [47]:
df_od_purp['HBW_Trip'] = df_od_purp['O-D Traffic (StL Index)'] * df_od_purp['Purpose HBW (percent)']
df_od_purp['HBO_Trip'] = df_od_purp['O-D Traffic (StL Index)'] * df_od_purp['Purpose HBO (percent)']
df_od_purp['NHB_Trip'] = df_od_purp['O-D Traffic (StL Index)'] * df_od_purp['Purpose NHB (percent)']

In [48]:
df_od_purp.loc[:,'O_District'] = df_od_purp['Origin Zone ID'].map(zone_district_mapper)
df_od_purp.loc[:,'D_District'] = df_od_purp['Destination Zone ID'].map(zone_district_mapper)

In [49]:
selection = (df_od_purp['Day Type'] == '1: Average Weekday (M-F)') & (df_od_purp['Day Part'] == '0: All Day (12am-12am)') 
df_od_purp_gb = df_od_purp.loc[selection].groupby(['O_District', 'D_District'])['HBW_Trip','HBO_Trip','NHB_Trip'].sum().reset_index()

In [50]:
df_od_purp_gb.sort_values(by=['O_District','D_District']).loc[:,['O_District','D_District','HBW_Trip']].pivot(index='O_District', columns='D_District', values='HBW_Trip').to_csv('HBW_Trips_STL.csv')
df_od_purp_gb.sort_values(by=['O_District','D_District']).loc[:,['O_District','D_District','HBO_Trip']].pivot(index='O_District', columns='D_District', values='HBO_Trip').to_csv('HBO_Trips_STL.csv')
df_od_purp_gb.sort_values(by=['O_District','D_District']).loc[:,['O_District','D_District','NHB_Trip']].pivot(index='O_District', columns='D_District', values='NHB_Trip').to_csv('NHB_Trips_STL.csv')

In [51]:
# See if two data files have the same number of trips in a certain zone pair
df_od_trip.loc[(df_od_trip['Origin Zone ID'] == 1) & (df_od_trip['Destination Zone ID'] == 55),'O-D Traffic (StL Index)']

559    22
Name: O-D Traffic (StL Index), dtype: int64

In [54]:
df_od_purp.loc[(df_od_purp['Origin Zone ID'] == 1) & (df_od_purp['Destination Zone ID'] == 55)]

Unnamed: 0,Type of Travel,Origin Zone ID,Origin Zone Name,Origin Zone Is Pass-Through,Origin Zone Direction (degrees),Origin Zone is Bi-Direction,Destination Zone ID,Destination Zone Name,Destination Zone Is Pass-Through,Destination Zone Direction (degrees),...,O-D Traffic (Trip Counts),O-D Traffic (StL Index),Purpose HBW (percent),Purpose HBO (percent),Purpose NHB (percent),HBW_Trip,HBO_Trip,NHB_Trip,O_District,D_District
554,Personal,1,1,no,,no,55,55,no,,...,37,18,0.0,0.0,1.0,0.0,0.0,18.0,CBD,Danville Mall
555,Personal,1,1,no,,no,55,55,no,,...,1,1,0.0,0.0,1.0,0.0,0.0,1.0,CBD,Danville Mall
556,Personal,1,1,no,,no,55,55,no,,...,22,9,0.0,0.0,1.0,0.0,0.0,9.0,CBD,Danville Mall
557,Personal,1,1,no,,no,55,55,no,,...,7,5,0.0,0.0,1.0,0.0,0.0,5.0,CBD,Danville Mall
558,Personal,1,1,no,,no,55,55,no,,...,7,3,0.0,0.0,1.0,0.0,0.0,3.0,CBD,Danville Mall
559,Personal,1,1,no,,no,55,55,no,,...,33,22,0.0,0.0,1.0,0.0,0.0,22.0,CBD,Danville Mall
560,Personal,1,1,no,,no,55,55,no,,...,1,1,0.0,0.0,1.0,0.0,0.0,1.0,CBD,Danville Mall
561,Personal,1,1,no,,no,55,55,no,,...,19,11,0.0,0.0,1.0,0.0,0.0,11.0,CBD,Danville Mall
562,Personal,1,1,no,,no,55,55,no,,...,7,7,0.0,0.0,1.0,0.0,0.0,7.0,CBD,Danville Mall
563,Personal,1,1,no,,no,55,55,no,,...,6,4,0.0,0.0,1.0,0.0,0.0,4.0,CBD,Danville Mall


In [None]:
df_mall = df_od_purp.loc[selection&(df_od_trip.D_District == "Danville Mall")]

#### Model Trip District Distribution

In [28]:
df_model_trip = pd.read_csv(FILE_MODEL_TRIP, names=['O', 'D', 'HBW_Trip', 'HBO_Trip', 'NHB_Trip'])
df_model_trip.head()

Unnamed: 0,O,D,HBW_Trip,HBO_Trip,NHB_Trip
0,1,1,0.0,0.0,0.683564
1,1,2,0.0,0.0,0.574935
2,1,3,0.0,0.0,0.751772
3,1,4,0.0,0.0,0.689432
4,1,5,0.0,0.0,0.195913


In [29]:
df_model_trip.loc[:,'O_District'] = df_model_trip['O'].map(zone_district_mapper)
df_model_trip.loc[:,'D_District'] = df_model_trip['D'].map(zone_district_mapper)

In [30]:
df_model_purp_gb = df_model_trip.groupby(['O_District', 'D_District'])['HBW_Trip','HBO_Trip','NHB_Trip'].sum().reset_index()
df_model_purp_gb.head()

Unnamed: 0,O_District,D_District,HBW_Trip,HBO_Trip,NHB_Trip
0,Airport Industrial,Airport Industrial,150.298569,182.063174,126.468439
1,Airport Industrial,Averett,8.048068,17.210042,10.297805
2,Airport Industrial,Berry Hill,0.124813,4.931917,3.614509
3,Airport Industrial,Blairs,31.255012,89.46099,76.759
4,Airport Industrial,Brosville,3.902006,11.270106,10.708159


In [31]:
HBW_district_distribution_file = r"T:\TransCAD_Conventional_UI\2016_test\HBW_Model_District_Distribution.csv"
df_hbw_pa = df_model_purp_gb.sort_values(by=['O_District','D_District']).loc[:,['O_District','D_District','HBW_Trip']].pivot(index='O_District', columns='D_District', values='HBW_Trip')
df_hbw_od = pd.DataFrame(data = (df_hbw_pa.values + df_hbw_pa.T.values)/2, columns=df_hbw_pa.columns, index=df_hbw_pa.index)
df_hbw_od.to_csv(HBW_district_distribution_file)

In [32]:
HBO_district_distribution_file = r"T:\TransCAD_Conventional_UI\2016_test\HBO_Model_District_Distribution.csv"
df_hbo_pa = df_model_purp_gb.sort_values(by=['O_District','D_District']).loc[:,['O_District','D_District','HBO_Trip']].pivot(index='O_District', columns='D_District', values='HBO_Trip')
df_hbo_od = pd.DataFrame(data = (df_hbo_pa.values + df_hbo_pa.T.values)/2, columns=df_hbo_pa.columns, index=df_hbo_pa.index)
df_hbo_od.to_csv(HBO_district_distribution_file)

In [33]:
NHB_district_distribution_file = r"T:\TransCAD_Conventional_UI\2016_test\NHB_Model_District_Distribution.csv"
df_nhb_pa = df_model_purp_gb.sort_values(by=['O_District','D_District']).loc[:,['O_District','D_District','NHB_Trip']].pivot(index='O_District', columns='D_District', values='NHB_Trip')
df_nhb_od = pd.DataFrame(data = (df_nhb_pa.values + df_nhb_pa.T.values)/2, columns=df_nhb_pa.columns, index=df_nhb_pa.index)
df_nhb_od.to_csv(NHB_district_distribution_file)

#### Model Trip Distance Distribution

In [None]:
df_model_dist_mall = df_model_dist.loc[df_model_dist.D_District == "Danville Mall"]
print("Average trip length for trips to Danville Mall = {0:.2f} miles".format(np.dot(df_model_trip_mall.Trips, df_model_dist_mall.Distance)/sum(df_model_trip_mall.Trips)))

#### Scracth

In [19]:
s = 'TITLE\t\t\tConvertTrips'

In [20]:
s1 = s.replace("\t", "    ")
s1

'TITLE            ConvertTrips'

In [21]:
import re

In [22]:
re.split(r"[\s]{2,}", s1)

['TITLE', 'ConvertTrips']

In [18]:
re.split(r"[\s]{2,}", "title   value")

['title', 'value']

In [25]:
line = "TITLE\t\t\tConvertTrips"
line2 = line.replace("\t", "    ")
print(line2)
res = re.split(r"[\s]{2+}", line2)
print(res)

TITLE            ConvertTrips
['TITLE            ConvertTrips']


In [27]:
def foo():
    return 2,3

res = foo()
print(res)

(2, 3)
