In [55]:
# Import Libraries
import numpy as np
import pandas as pd
import os

In [56]:
## Set working directory
os.chdir('E:/CR2/Repos/TNC-Demand-Model-Southeast/')

In [57]:
## Read in Data
trips = pd.read_csv('Cleaned_Inputs/Intermediate/neg_bin_pred_trips.csv')
trips.head()

Unnamed: 0,origin,destination,NT_TRIPS,AM_TRIPS,MD_TRIPS,PM_TRIPS,EV_TRIPS
0,21067000101,21067004007,0.00099,0.023043,0.087519,0.0048,0.003503
1,21067000101,21067003802,0.019028,0.237334,0.946659,0.147911,0.063125
2,21067000101,21067003918,0.01402,0.16049,0.895582,0.072979,0.039691
3,21067000101,21067004207,0.270208,1.453685,4.184008,0.940822,0.063206
4,21067000101,21067004006,0.000429,0.003241,0.033683,0.002471,0.001592


In [58]:
## Read in Data
traveltime = pd.read_csv('Cleaned_Inputs/Intermediate/pred_trips.csv')
traveltime = traveltime[['origin', 'destination', 'SHARED_TRAVEL_TIME', 'SHARED_TRIP_FARES']]
traveltime.head()

Unnamed: 0,origin,destination,SHARED_TRAVEL_TIME,SHARED_TRIP_FARES
0,21067003802,21067004007,44.016667,20.105323
1,21067004007,21067003802,40.616667,23.364816
2,21067004007,21067003701,44.533333,22.134277
3,21067003701,21067004007,42.75,21.829425
4,21067004207,21067003918,36.583333,19.425726


In [59]:
## Read in Data
median_income = pd.read_csv('Raw_Inputs/Median Income/ACSDT5Y2019.B19013-Data.csv')
median_income.head()

Unnamed: 0,GEO_ID,NAME,B19013_001E,B19013_001M,Unnamed: 4
0,Geography,Geographic Area Name,Estimate!!Median household income in the past ...,Margin of Error!!Median household income in th...,
1,0400000US21,Kentucky,50589,294,
2,1400000US21001970100,"Census Tract 9701, Adair County, Kentucky",32071,9867,
3,1400000US21001970200,"Census Tract 9702, Adair County, Kentucky",41741,9141,
4,1400000US21001970300,"Census Tract 9703, Adair County, Kentucky",33413,3756,


In [60]:
## Fix column names
median_income = median_income.rename(columns={'GEO_ID': 'tract', 'B19013_001E':'median_income'})
median_income.head()

Unnamed: 0,tract,NAME,median_income,B19013_001M,Unnamed: 4
0,Geography,Geographic Area Name,Estimate!!Median household income in the past ...,Margin of Error!!Median household income in th...,
1,0400000US21,Kentucky,50589,294,
2,1400000US21001970100,"Census Tract 9701, Adair County, Kentucky",32071,9867,
3,1400000US21001970200,"Census Tract 9702, Adair County, Kentucky",41741,9141,
4,1400000US21001970300,"Census Tract 9703, Adair County, Kentucky",33413,3756,


In [61]:
## Clean Income Data
# Split out Name column and clear whitespace
median_income[['TRACT', 'COUNTY', 'STATE']] = median_income.NAME.str.split(',', expand=True)
median_income['COUNTY'] = median_income['COUNTY'].str.strip()
# Filter for Fayette County
median_income = median_income[median_income['COUNTY'] == 'Fayette County']
# Clean tract column by stipping string and converting it to a number
median_income['tract'] = median_income['tract'].str.replace('1400000US', '')
median_income.tract = median_income.tract.astype(np.int64)
# Keep relevant columns
median_income = median_income[['tract', 'median_income']]
## Convert income into $10,000 
median_income.median_income = median_income.median_income.astype(np.int64)
median_income['median_income'] = median_income['median_income']/10000
median_income.head()

Unnamed: 0,tract,median_income
264,21067000101,25363
265,21067000102,26477
266,21067000200,26725
267,21067000300,32089
268,21067000400,27031


In [62]:
median_income['tract'].nunique()

82

In [63]:
## Create origin and destination income tables
origin = median_income
origin = origin.rename(columns={"tract": "origin", "median_income": "origin_median_income"})
destination = median_income
destination = destination.rename(columns={"tract": "destination", "median_income": "destination_median_income"})

## Merge them with trips table
trips = pd.merge(trips, origin, how="left", on=["origin"])
trips = pd.merge(trips, destination, how="left", on=["destination"])
trips.head()

## Merge trips table with travel times
trips = pd.merge(trips, traveltime, how = "left", on = ["origin", "destination"])
trips.head()

## Add airport indicator
airport_census_tract = 21067004207
trips['airport'] = np.where(trips['origin'] == airport_census_tract, 1, 
                              np.where(trips['destination'] == airport_census_tract, 1, 0))

In [64]:
## Apply Table 5.7
## Use logit model to get probability
## Apply probabilities to trips
trips['share_prob'] = (np.exp(-0.85 - 0.08*trips['SHARED_TRAVEL_TIME'] - 0.14*trips['SHARED_TRIP_FARES'] - 0.06*trips['origin_median_income'] - 0.06*trips['destination_median_income'] - 2.88*trips['airport'])) / (1 + np.exp(-0.85 - 0.08*trips['SHARED_TRAVEL_TIME'] - 0.14*trips['SHARED_TRIP_FARES'] - 0.06*trips['origin_median_income'] - 0.06*trips['destination_median_income'] - 2.88*trips['airport']))
trips.head(10)

Unnamed: 0,origin,destination,NT_TRIPS,AM_TRIPS,MD_TRIPS,PM_TRIPS,EV_TRIPS,origin_median_income,destination_median_income,SHARED_TRAVEL_TIME,SHARED_TRIP_FARES,airport,share_prob
0,21067000101,21067004007,0.00099,0.023043,0.087519,0.0048,0.003503,25363,81250,31.566667,13.710748,0,0.0
1,21067000101,21067003802,0.019028,0.237334,0.946659,0.147911,0.063125,25363,67212,23.533333,11.379667,0,0.0
2,21067000101,21067003918,0.01402,0.16049,0.895582,0.072979,0.039691,25363,87656,23.266667,10.216036,0,0.0
3,21067000101,21067004207,0.270208,1.453685,4.184008,0.940822,0.063206,25363,127798,27.933333,10.938764,1,0.0
4,21067000101,21067004006,0.000429,0.003241,0.033683,0.002471,0.001592,25363,228125,29.566667,10.854516,0,0.0
5,21067000101,21067004005,0.002499,0.00325,0.079174,0.010141,0.007625,25363,128250,25.35,10.437584,0,0.0
6,21067000101,21067003913,0.02432,0.117139,0.629957,0.102294,0.063996,25363,109490,23.333333,9.525969,0,0.0
7,21067000101,21067004107,0.026545,0.234296,0.918269,0.142341,0.07704,25363,93438,24.516667,9.197809,0,0.0
8,21067000101,21067003701,0.01361,0.182846,0.809629,0.108785,0.043808,25363,93750,22.966667,9.182138,0,0.0
9,21067000101,21067004106,0.016251,0.12807,0.537855,0.087813,0.048942,25363,103871,24.75,9.557934,0,0.0


In [65]:
trips[["share_prob"]].describe()

Unnamed: 0,share_prob
count,6724.0
mean,0.0
std,0.0
min,0.0
25%,0.0
50%,0.0
75%,0.0
max,0.0
