In [1]:
from collections import OrderedDict
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep
from urbansim_templates.models import SmallMultinomialLogitStep
from urbansim.utils import misc
import orca
import os; os.chdir('../')
import warnings; warnings.simplefilter('ignore')

import pandas as pd
# import pandana as pdna
import time
import numpy as np
import copy
import matplotlib.pyplot as plt
import os
from functools import reduce

import scipy.stats as st
from scipy.stats import skewnorm

from scripts import datasources, models, variables

# import matplotlib
# matplotlib.style.use('ggplot')

%matplotlib inline

pd.options.display.max_columns = 80

Registering model step 'auto_ownership'
Registering model step 'dwell_work'
Registering model step 'TOD_choice'
Registering model step 'work_TOD_choice'
Registering model step 'primary_mode_choice'
Registering model step 'mode_to_work'
Registering model step 'WLCM'


In [None]:
# MTC_acc = pd.read_csv('/home/jayne/ual_model_workspace/fall-2018-models/data/MTC_TAZ_accessibility.csv')

In [None]:
# MTC_acc.head()

In [None]:
# am_acc = pd.read_csv('/home/data/urbanaccess_transit/access_indicators_ampeak.csv',dtype = {'block_id':str})

In [None]:
# am_acc.head()

In [2]:
beam_skims = pd.read_csv('/home/data/spring_2019/base/baseline-calibration-smart-rw-55-20.skimsExcerpt.csv')

In [3]:
beam_skims.head()

Unnamed: 0,period,mode,origTaz,destTaz,travelTimeInS,generalizedTimeInS,cost,generalizedCost,distanceInM,numObservations
0,AM,CAR,1453,1453,538.0,538.0,0.416894,1.612449,3801.0,2.0
1,PM,CAR,1453,1453,538.0,538.0,0.416894,1.612449,3801.0,2.0
2,OffPeak,CAR,1453,1453,538.0,538.0,0.416894,1.612449,3801.0,20.0
3,AM,CAR,1453,293,5241.0,5241.0,4.015385,15.662051,36610.0,2.0
4,PM,CAR,1453,293,5241.0,5241.0,4.015385,15.662051,36610.0,2.0


In [4]:
orca.run(['initialize_network_small', 'initialize_network_walk'])

Running step 'initialize_network_small'
Time to execute step 'initialize_network_small': 0.00 s
Running step 'initialize_network_walk'
Time to execute step 'initialize_network_walk': 0.00 s
Total time to execute iteration 1 with iteration value None: 0.00 s


In [5]:
warnings.simplefilter('ignore')
# accessibilities_mode = 'compute'
# year = 2010
data_mode = 'csv'
# output_tables = [
#     'zones','parcels', 'buildings', 'jobs', 'persons', 'households',
#     'establishments', 'rentals', 'units']
# output_bucket = 'urbansim-outputs'
# beam_bucket = 'urbansim-beam'
local_data_dir = '/home/data/spring_2019/base/'
# fname_walk = 'walk_net_vars.csv'
# fname_drive = 'drive_net_vars.csv'

orca.add_injectable('data_mode', data_mode)

In [6]:
if data_mode == 'csv':
        orca.add_injectable('store', None)
        orca.add_injectable('s3_input_data_url', None)
        orca.add_injectable('local_data_dir', local_data_dir)

In [7]:
def compute_range(travel_data, attr, travel_time_attr, dist, agg=np.sum):
    """
    Compute a zone-based accessibility query using the urbansim format
    travel data dataframe.
    Parameters
    ----------
    travel_data : dataframe
        The dataframe of urbansim format travel data.  Has from_zone_id as
        first index, to_zone_id as second index, and different impedances
        between zones as columns.
    attr : series
        The attr to aggregate.  Should be indexed by zone_id and the values
        will be aggregated.
    travel_time_attr : string
        The column name in travel_data to use as the impedance.
    dist : float
        The max distance to aggregate up to
    agg : function, optional, np.sum by default
        The numpy function to use for aggregation
    """
    travel_data = travel_data.reset_index(level=1)
    travel_data = travel_data[travel_data[travel_time_attr] < dist]
    travel_data["attr"] = attr[travel_data.to_zone_id].values
    return travel_data.groupby(level=0).attr.apply(agg)

In [8]:
def register_skim_access_variable(
        column_name, variable_to_summarize, impedance_measure,
        distance, mode_name, period, log=False):
    """
    Register skim-based accessibility variable with orca.
    Parameters
    ----------
    column_name : str
        Name of the orca column to register this variable as.
    impedance_measure : str
        Name of the skims column to use to measure inter-zone impedance.
    variable_to_summarize : str
        Name of the zonal variable to summarize.
    distance : int
        Distance to query in the skims (e.g. 30 minutes travel time).
    mode_name: str
        Name of the mode to query in the skims.
    period: str
        Period (AM, PM, OffPeak) to query in the skims.
        
    Returns
    -------
    column_func : function
    """
    @orca.column('zones', column_name, cache=True, cache_scope='iteration')
    def column_func(zones, beam_skims):
        df = beam_skims.to_frame()
        df = df[(df['period'] == period) & (df['mode'] == mode_name)]
        results = misc.compute_range(
            df, zones.get_column(variable_to_summarize),
            impedance_measure, distance, agg=np.sum)

        if len(results) < len(zones):
            results = results.reindex(zones.index).fillna(0)
            
# Commented this out to identify n/a zone combinations
#         results = results + zones[variable_to_summarize]
        
        if log:
            results = results.apply(eval('np.log1p'))
        
        return results
        
    return column_func

In [9]:
register_skim_access_variable('total_jobs_walk_30_AM','total_jobs','gen_tt_min',30,'WALK','AM')
register_skim_access_variable('total_jobs_walk_30_OffPeak','total_jobs','gen_tt_min',30,'WALK','OffPeak')
register_skim_access_variable('total_jobs_walk_30_PM','total_jobs','gen_tt_min',30,'WALK','PM')

register_skim_access_variable('total_jobs_bike_30_PM','total_jobs','gen_tt_min',30,'BIKE','PM')
register_skim_access_variable('total_jobs_bike_30_AM','total_jobs','gen_tt_min',30,'BIKE','AM')
register_skim_access_variable('total_jobs_bike_30_OffPeak','total_jobs','gen_tt_min',30,'BIKE','OffPeak')

register_skim_access_variable('total_jobs_WTran_30_AM','total_jobs','gen_tt_min',30,'WALK_TRANSIT','AM')
register_skim_access_variable('total_jobs_WTran_30_PM','total_jobs','gen_tt_min',30,'WALK_TRANSIT','PM')
register_skim_access_variable('total_jobs_WTran_30_OffPeak','total_jobs','gen_tt_min',30,'WALK_TRANSIT','OffPeak')

register_skim_access_variable('total_jobs_DTran_30_AM','total_jobs','gen_tt_min',30,'DRIVE_TRANSIT','AM')
register_skim_access_variable('total_jobs_DTran_30_PM','total_jobs','gen_tt_min',30,'DRIVE_TRANSIT','PM')
register_skim_access_variable('total_jobs_DTran_30_OffPeak','total_jobs','gen_tt_min',30,'DRIVE_TRANSIT','OffPeak')

register_skim_access_variable('total_jobs_drive_30_AM','total_jobs','gen_tt_min',30,'CAR','AM')
register_skim_access_variable('total_jobs_drive_30_PM','total_jobs','gen_tt_min',30,'CAR','PM')
register_skim_access_variable('total_jobs_drive_30_OffPeak','total_jobs','gen_tt_min',30,'CAR','OffPeak')

<function __main__.register_skim_access_variable.<locals>.column_func(zones, beam_skims)>

In [10]:
zones = orca.get_table('zones').to_frame()

In [11]:
zones.head()

Unnamed: 0_level_0,gid,area,acres,total_jobs,sum_residential_units,sum_persons,sum_income,total_jobs_walk_30_AM,total_jobs_walk_30_OffPeak,total_jobs_walk_30_PM,total_jobs_bike_30_PM,total_jobs_bike_30_AM,total_jobs_bike_30_OffPeak,total_jobs_WTran_30_AM,total_jobs_WTran_30_PM,total_jobs_WTran_30_OffPeak,total_jobs_DTran_30_AM,total_jobs_DTran_30_PM,total_jobs_DTran_30_OffPeak,total_jobs_drive_30_AM,total_jobs_drive_30_PM,total_jobs_drive_30_OffPeak
zone_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1442,525,5272042.0,121.028999,2194.0,2567.0,5839.0,477004000.0,2186.0,1093.0,19752.0,0.0,8509.0,4341.0,530.0,4341.0,3697.0,4168.0,0.0,0.0,123819.0,14153.0,145124.0
1434,508,7820341.5,179.529999,771.0,2425.0,5237.0,313941043.0,0.0,678.0,0.0,0.0,0.0,0.0,7969.0,4168.0,5619.0,8550.0,0.0,10306.0,31856.0,5075.0,85010.0
246,784,4247867.5,97.517998,4260.0,2946.0,7469.0,511362913.0,0.0,0.0,0.0,0.0,13090.0,0.0,0.0,0.0,0.0,0.0,0.0,10128.0,131659.0,18980.0,195530.0
1312,560,759732480.0,17441.058594,220.0,1018.0,1591.0,65422447.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
705,1205,27835386.0,639.013,948.0,3682.0,9915.0,372431913.0,0.0,1438.0,0.0,0.0,0.0,1570.0,0.0,0.0,0.0,0.0,0.0,1316.0,24570.0,36756.0,51493.0


In [12]:
print(len(zones[(zones['total_jobs_walk_30_AM'] == 0)])/len(zones['total_jobs_walk_30_AM']))
print(len(zones[(zones['total_jobs_walk_30_PM'] == 0)])/len(zones['total_jobs_walk_30_AM']))
print(len(zones[(zones['total_jobs_walk_30_OffPeak'] == 0)])/len(zones['total_jobs_walk_30_AM']))

0.4759284731774415
0.5213204951856947
0.4298486932599725


In [13]:
print(len(zones[(zones['total_jobs_bike_30_AM'] == 0)])/len(zones['total_jobs_walk_30_AM']))
print(len(zones[(zones['total_jobs_bike_30_PM'] == 0)])/len(zones['total_jobs_walk_30_AM']))
print(len(zones[(zones['total_jobs_bike_30_OffPeak'] == 0)])/len(zones['total_jobs_walk_30_AM']))

0.5612104539202201
0.5405777166437414
0.3170563961485557


In [14]:
print(len(zones[(zones['total_jobs_WTran_30_AM'] == 0)])/len(zones['total_jobs_walk_30_AM']))
print(len(zones[(zones['total_jobs_WTran_30_PM'] == 0)])/len(zones['total_jobs_walk_30_AM']))
print(len(zones[(zones['total_jobs_WTran_30_OffPeak'] == 0)])/len(zones['total_jobs_walk_30_AM']))

0.4016506189821183
0.5605226960110041
0.48762035763411277


In [15]:
print(len(zones[(zones['total_jobs_DTran_30_AM'] == 0)])/len(zones['total_jobs_walk_30_AM']))
print(len(zones[(zones['total_jobs_DTran_30_PM'] == 0)])/len(zones['total_jobs_walk_30_AM']))
print(len(zones[(zones['total_jobs_DTran_30_OffPeak'] == 0)])/len(zones['total_jobs_walk_30_AM']))

0.13411279229711143
0.3858321870701513
0.14580467675378267


In [16]:
print(len(zones[(zones['total_jobs_drive_30_AM'] == 0)])/len(zones['total_jobs_walk_30_AM']))
print(len(zones[(zones['total_jobs_drive_30_PM'] == 0)])/len(zones['total_jobs_walk_30_AM']))
print(len(zones[(zones['total_jobs_drive_30_OffPeak'] == 0)])/len(zones['total_jobs_walk_30_AM']))

0.0577716643741403
0.07152682255845942
0.04676753782668501


In [None]:
# Calculate skim-based accessibility variable
variables_to_aggregate = [
    'total_jobs', 'sum_residential_units', 'sum_persons', 'sum_income']
skim_access_vars = []
# Transit skim variables
travel_times = [15, 45]  # 15 and 45 min travel times in s

for time in travel_times:
    for variable in variables_to_aggregate:
        var_name = '_'.join([variable, str(time), 'gen_tt_min'])
        skim_access_vars.append(var_name)
        register_skim_access_variable(
            var_name, variable, 'gen_tt_min', time)