- To do: Create more time-based accesibilities

In [1]:
import numpy as np
import pandas as pd
import orca
from matplotlib import pyplot as plt
import matplotlib.animation as animation

import os; os.chdir('../../')
import warnings; warnings.simplefilter('ignore')

import pandana as pdna
from collections import OrderedDict
import pylogit as pl
from pylogit import nested_logit as nl
import math 
import pickle
import dill
import time
import random
import scipy.stats as st
from sklearn.neighbors import BallTree
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_validate
from sklearn import tree
from sklearn.ensemble import GradientBoostingClassifier
    
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep
from urbansim_templates.models import BinaryLogitStep
from urbansim.utils import misc
import urbansim_templates

from scripts import datasources, models, variables, utils

Registering model step 'TOD_choice'
Registering model step 'auto_ownership'
Registering model step 'WLCM_gen_tt'
Registering model step 'primary_mode_choice'
Registering model step 'WLCM'
Registering model step 'Public_vs_private'


In [2]:
input_file_format = 'csv'
input_data_dir = 's3://baus-data/spring_2019/'

formattable_fname_dict = {
    'parcels': 'parcels.{0}',
    'buildings': 'buildings.{0}',
    'jobs': 'jobs.{0}',
    'establishments': 'establishments.{0}',
    'households': 'households.{0}',
    'persons': 'persons.{0}',
    'rentals': 'craigslist.{0}',
    'units': 'units.{0}',
    'mtc_skims': 'mtc_skims.{0}',
    'beam_skims_raw': '30.skims-smart-23April2019-baseline.csv.gz',
    'beam_skims_imputed': 'beam_skims_imputed.{0}',
    # the following nodes and edges .csv's will be phased out and
    # replaced by travel model skims entirely
    'drive_nodes': 'drive_nodes.{0}',
    'drive_edges': 'drive_edges.{0}',
    'drive_access_vars': 'drive_net_vars.{0}',
    'walk_nodes': 'walk_nodes.{0}',
    'walk_edges': 'walk_edges.{0}',
    'walk_access_vars': 'walk_net_vars.{0}',
    'zones': 'zones.{0}',
    'zone_access_vars': 'zones_w_access_vars.{0}',
    'students_k12': 'students_k12.{0}',
    'schools_k12': 'schools_bay_area.{0}'
}

def format_fname_dict(formattable_fname_dict, format='csv'):
    formatted_dict = {
        k: v.format('csv')
        for k, v in formattable_fname_dict.items()}
    return formatted_dict

input_fnames = format_fname_dict(
            formattable_fname_dict, input_file_format)

In [3]:
orca.add_injectable('input_file_format', input_file_format)
orca.add_injectable('input_data_dir', input_data_dir)
orca.add_injectable('input_fnames', input_fnames)
orca.add_injectable('store', None)

In [4]:
orca.run(['initialize_network_small']) 
orca.run(['initialize_network_walk']) 
orca.run(['initialize_imputed_skims']) 
orca.run(['skims_aggregations']) 

Running step 'initialize_network_small'
Time to execute step 'initialize_network_small': 0.00 s
Total time to execute iteration 1 with iteration value None: 0.00 s
Running step 'initialize_network_walk'
Time to execute step 'initialize_network_walk': 0.00 s
Total time to execute iteration 1 with iteration value None: 0.00 s
Running step 'initialize_imputed_skims'
Time to execute step 'initialize_imputed_skims': 83.67 s
Total time to execute iteration 1 with iteration value None: 83.67 s
Running step 'skims_aggregations'
Time to execute step 'skims_aggregations': 0.00 s
Total time to execute iteration 1 with iteration value None: 0.00 s


In [5]:
#I have save this tables (Distance-based accessibilities) on disk just for convinience
nodessmall = pd.read_csv('/home/ubuntu/activitysynth/activitysynth/data/nodessmall.csv').set_index('osmid')
nodeswalk = pd.read_csv('/home/ubuntu/activitysynth/activitysynth/data/nodeswalk.csv').set_index('osmid')

orca.add_table('nodessmall', nodessmall)
orca.add_table('nodeswalk', nodeswalk)

<orca.orca.DataFrameWrapper at 0x7f318e14ac50>

In [6]:
orca.broadcast('nodessmall', 'persons', cast_index = True, onto_on = 'node_id_small')
orca.broadcast('nodeswalk', 'persons', cast_index = True, onto_on = 'node_id_walk')
orca.broadcast('zones', 'persons', cast_index = True, onto_on = 'zone_id_home')

In [9]:
@orca.column('nodessmall')
def mean9(public_schools_50):
    s = public_schools_50.mean9
    return s

In [None]:
@orca.column('households')
def income_rank(households):
    s = pd.cut(households.income, 
               bins = [-np.inf, 10000, 25000, 35000, 50000, 75000, 100000, 150000, 200000, 250000, np.inf],
               labels = [1,2,3,4,5,6,7,8,9,10])
    
    return s

In [28]:
merged_full = orca.merge_tables('persons', 
                  ['persons', 'nodessmall', 'nodeswalk', 'zones'])

In [29]:
merged_full.shape

(6571488, 195)

In [30]:
households = orca.get_table('households').to_frame()

In [33]:
nodessmall = orca.get_table('nodessmall').to_frame()

In [35]:
nodeswalk = orca.get_table('nodeswalk').to_frame()

In [37]:
zones = orca.get_table('zones').to_frame()

In [38]:
zones

Unnamed: 0_level_0,gid,area,acres,total_jobs,sum_residential_units,sum_persons,sum_income,avg_income,total_jobs_gen_tt_WALK_TRANSIT_15,total_jobs_gen_tt_WALK_TRANSIT_45,...,avg_income_gen_tt_WALK_TRANSIT_30,total_jobs_gen_tt_CAR_15,total_jobs_gen_tt_CAR_45,sum_persons_gen_tt_CAR_15,sum_persons_gen_tt_CAR_45,sum_income_gen_tt_CAR_15,sum_income_gen_tt_CAR_45,sum_residential_units_gen_tt_CAR_15,sum_residential_units_gen_tt_CAR_45,avg_income_gen_tt_CAR_30
zone_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1442,525,5.272042e+06,121.028999,2194.0,2567.0,5839.0,477004000.0,207557.460533,6535.0,15529.0,...,389859.575835,77852.0,817038.0,172198.0,1905753.0,1.061663e+10,9.200540e+10,82479.0,894586.0,324956.320230
1434,508,7.820342e+06,179.529999,771.0,2425.0,5237.0,313941043.0,141307.478037,2645.0,11639.0,...,278758.962238,32692.0,652413.0,84295.0,1681229.0,5.005057e+09,7.959563e+10,37851.0,791987.0,262874.802420
246,784,4.247868e+06,97.517998,4260.0,2946.0,7469.0,511362913.0,218191.372815,4260.0,4260.0,...,218191.372815,173884.0,1373518.0,360301.0,2984104.0,1.812744e+10,1.412099e+11,151778.0,1333556.0,347640.029933
1312,560,7.597325e+08,17441.058594,220.0,1018.0,1591.0,65422447.0,86841.113816,220.0,220.0,...,86841.113816,440.0,20452.0,3182.0,27961.0,1.308449e+08,1.370876e+09,2036.0,16441.0,189294.231209
705,1205,2.783539e+07,639.013000,948.0,3682.0,9915.0,372431913.0,117368.284928,1667.0,2497.0,...,214184.516958,19076.0,134984.0,76673.0,635582.0,2.694645e+09,2.362599e+10,26407.0,221699.0,237441.486169
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,1446,2.359150e+07,541.585999,4535.0,5456.0,8644.0,357485410.0,146363.284644,4535.0,4535.0,...,146363.284644,180444.0,1491176.0,518457.0,3577729.0,1.820532e+10,1.584445e+11,196638.0,1476888.0,261174.238894
874,1447,9.820885e+06,225.455994,1929.0,0.0,0.0,0.0,0.000000,1929.0,1929.0,...,0.000000,215299.0,1345925.0,598565.0,3292059.0,1.908649e+10,1.486743e+11,264320.0,1439477.0,103176.296058
1146,1452,1.179568e+06,27.079000,7496.0,2017.0,3164.0,140636020.0,80476.864185,7496.0,12239.0,...,80476.864185,151800.0,1171730.0,391180.0,3038364.0,1.971251e+10,1.280333e+11,170094.0,1326306.0,186845.728063
1201,1453,2.534543e+06,58.185001,695.0,1232.0,4130.0,92060546.0,80262.027899,1043.0,3763.0,...,135007.626519,62977.0,460112.0,297331.0,1375562.0,8.686066e+09,5.585676e+10,113794.0,582751.0,184010.124049


In [32]:
households.columns

Index(['serialno', 'persons', 'building_type', 'cars', 'income',
       'race_of_head', 'hispanic_head', 'age_of_head', 'workers', 'state',
       'county', 'tract', 'block_group', 'children', 'tenure', 'recent_mover',
       'block_group_id', 'single_family', 'unit_id', 'building_id',
       'node_id_small', 'node_id_walk', 'hh_inc_under_25k', 'hh_inc_25_to_75k',
       'hh_inc_75_to_200k', 'income_2', 'income_4', 'income_6', 'income_8',
       'income_10', 'income_12', 'income_12p', 'tenure_1', 'tenure_2',
       'tenure_3', 'tenure_4', 'single_family_int', 'building_type_2',
       'hh_inc_150kplus', 'hh_size_1per', 'income_rank', 'hh_size_over_4',
       'hispanic_head_SMC', 'tenure_SMC', 'income_1', 'race_head_white',
       'race_head_african_american', 'race_head_indian/alaska',
       'race_head_asian', 'race_head_hawaii/pacific', 'race_head_other',
       'race_head_2+races'],
      dtype='object')

In [7]:
persons = orca.get_table('persons').to_frame()
households = orca.get_table('households').to_frame()
units = orca.get_table('units').to_frame()
buildings = orca.get_table('buildings').to_frame()
parcels = orca.get_table('parcels').to_frame()

In [None]:
persons = orca.get_table('persons').to_frame()

In [None]:
persons.columns

In [None]:
households = orca.get_table('households').to_frame()
persons = orca.get_table('persons').to_frame()

In [None]:
households.head()

In [None]:
merge = pd.merge(persons, households, how = 'inner', left_on = 'household_id', right_index = True)

In [None]:
persons.shape

In [None]:
orca.list_tables()

In [None]:
orca.merge_tables('persons', ['persons', 'households'])