In [2]:
import os; os.chdir('..')
import numpy as np, pandas as pd 
import matplotlib.pyplot as plt
import warnings;
import glob
warnings.simplefilter('ignore')

from collections import OrderedDict
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import SmallMultinomialLogitStep
import orca
import seaborn as sns
%matplotlib notebook

import glob

from scripts import datasources, models, variables

mm.initialize()

Loading model step 'auto_ownership'
Loading model step 'WLCM'


In [79]:
trips = pd.read_csv('notebooks-jayne/mode_choice/trips_101218.csv',index_col =0)
trips.rename(columns = {'LIC_y':'LIC','HOURS_y':'HOURS'},inplace = True)

In [None]:
# Each row is a single trip.
# attributes wanted:
# Time of day leaving home for work, time of day leaving work
# home zone, work zone
# mode (recode)
# gender, age, race, ntvty, lic, jobs (>1 binary), full_time (hours<40 binary), education, disability, flexible work schedule (to dummies?)
# attributes from skims

In [80]:
hh_df = pd.read_csv('/home/data/CHTS_csv_format/data/Deliv_HH.csv')[
  ['SAMPN','HHVEH','HHBIC','VEHNEW','OWN','INCOM','HHSIZ']]
trips = trips.merge(hh_df,on = 'SAMPN',how = 'left')

In [81]:
na_dict = {
    'GEND':[9],
    'AGE':[998,999],
    'RACE':[97,98,99],
    'NTVTY':[8,9],
    'LIC':[8,9],
    'JOBS':[98,99],
    'HOURS':[998,999],
    'EDUCA':[7,8,9],
    'WSCHED':[8,9],
    'DISAB':[8,9],
    'HHVEH':[98,99],
    'HHBIC':[98,99],
    'VEHNEW':[8,9],
    'OWN':[7,8,9],
    'INCOM':[98,99],
    'HHSIZ':[98,99]
}

for col in na_dict:
    for vals in na_dict[col]:
        trips[col] = trips[col].replace(vals,np.nan)
        
trips.dropna(inplace = True)

In [168]:
TOD_list = ['EA','AM','MD','PM','EV']
var_list = ['Distance','Cost','Time']
skim_dir = '/home/data/fall_2018/mtc_skims'
skim_dict = dict()
for var in var_list:
    df_base = pd.read_csv(skim_dir + f'/{var}SkimsDatabaseEA.csv').replace(-999, 9999)
    df_base['TOD'] = 'EA'
    for TOD in TOD_list[1:]:
        df_tod = pd.read_csv(skim_dir + f'/{var}SkimsDatabase{TOD}.csv').replace(-999, 9999)
        df_tod['TOD'] = TOD
        df_base = pd.concat([df_base, df_tod])
    skim_dict[var] = df_base
        


# Time of day definitions

• Early (3:00 AM to 5:59 AM)
• AM peak (6:00 AM to 8:59 AM)
• Midday (9:00 AM to 3:29 PM)
• PM peak (3:30 PM to 6:29 PM)
• Late (6:30 PM to 2:59 AM)
EA, AM, MD, PM, EV

In [87]:
mode_dict = dict(zip(['drive', 'walk_transit', 'bike', 'drive_transit', 'walk'],range(1,6)))
trips['MODE'] = trips['MODE'].replace(mode_dict)

In [134]:
trips['TOD'] = (
    ((trips.loc_ET.between(3,6,inclusive = False)) | (trips.loc_ET==3))*1 +
    ((trips.loc_ET.between(6,9,inclusive = False)) | (trips.loc_ET==6))*2 +
    ((trips.loc_ET.between(9,15.5,inclusive = False)) | (trips.loc_ET==9))*3 +
    ((trips.loc_ET.between(15.5,18.5,inclusive = False)) | (trips.loc_ET==15.5))*4 +
    ((trips.loc_ET>=18.5))*5 +
    ((trips.loc_ET.between(0,3,inclusive = False)) | (trips.loc_ET==0))*5
)
TOD_dict = dict(zip(range(1,6),TOD_list))
trips['TOD'] = trips['TOD'].replace(TOD_dict)


In [160]:
for var in var_list: trips[var] = 0
trips['O'] = trips.place == 'home' * trips.zone_id_home + trips.place == 'work' * trips.zone_id_work
trips['D'] = trips.place == 'work' * trips.zone_id_home + trips.place == 'home' * trips.zone_id_work

In [174]:
trips.columns

Index(['SAMPN', 'PERNO', 'HHPER', 'HHPERTRIP', 'place', 'dwell', 'loc_ST',
       'loc_ET', 'trip_ET', 'TT', 'MODE', 'zone_id_home', 'zone_id_work',
       'HXCORD', 'HYCORD', 'WXCORD_lookup', 'WYCORD_lookup', 'GEND', 'AGE',
       'RACE', 'NTVTY', 'LIC', 'JOBS', 'HOURS', 'WSCHED', 'EDUCA', 'DISAB',
       'HHVEH', 'HHBIC', 'VEHNEW', 'OWN', 'INCOM', 'HHSIZ', 'TOD', 'Distance',
       'Cost', 'Time'],
      dtype='object')

In [197]:
trips1 = trips.copy()
for var in var_list:
    trips1 = trips1.merge(skim_dict[var].rename(
        columns = dict(zip([i for i in skim_dict[var].columns if i not in ['orig','dest','TOD']],
         [i+ f'_{var}' for i in skim_dict[var].columns if i not in ['orig','dest','TOD']]))                                          
                                           ), how = 'left',left_on = ['O','D','TOD'], 
    right_on = ['orig','dest','TOD'])
    trips1.drop(['orig','dest'],axis = 1,inplace = True)

In [200]:
@orca.table(cache=True)
def trips():
    return trips1

In [202]:
trips1.columns

Index(['SAMPN', 'PERNO', 'HHPER', 'HHPERTRIP', 'place', 'dwell', 'loc_ST',
       'loc_ET', 'trip_ET', 'TT', 'MODE', 'zone_id_home', 'zone_id_work',
       'HXCORD', 'HYCORD', 'WXCORD_lookup', 'WYCORD_lookup', 'GEND', 'AGE',
       'RACE', 'NTVTY', 'LIC', 'JOBS', 'HOURS', 'WSCHED', 'EDUCA', 'DISAB',
       'HHVEH', 'HHBIC', 'VEHNEW', 'OWN', 'INCOM', 'HHSIZ', 'TOD', 'Distance',
       'Cost', 'Time', 'O', 'D', 'da_Distance', 'daToll_Distance',
       's2_Distance', 's2Toll_Distance', 's3_Distance', 's3Toll_Distance',
       'walk_Distance', 'bike_Distance', 'da_Cost', 'daToll_Cost', 's2_Cost',
       's2Toll_Cost', 's3_Cost', 's3Toll_Cost', 'wTrnW_Cost', 'dTrnW_Cost',
       'wTrnD_Cost', 'da_Time', 'daToll_Time', 's2_Time', 's2Toll_Time',
       's3_Time', 's3Toll_Time', 'walk_Time', 'bike_Time', 'wTrnW_Time',
       'dTrnW_Time', 'wTrnD_Time'],
      dtype='object')

In [215]:
# mode_dict = dict(zip(['drive', 'walk_transit', 'bike', 'drive_transit', 'walk'],range(1,6)))
m = SmallMultinomialLogitStep()
m.name = 'primary_mode_choice'
m.tables = ['trips']
m.choice_column = 'MODE'
m.model_expression = OrderedDict([
    ('intercept', [0,1,2,3]), 
    ('da_Distance',[0]),
    ('da_Cost',[0]),
    ('da_Time',[0]),
    ('wTrnW_Cost',[1]),
    ('wTrnW_Time',[1]),
    ('bike_Distance',[2]),
    ('bike_Time',[2]),    
    ('wTrnD_Cost',[3]),
    ('dTrnW_Cost',[3]),
    ('wTrnD_Time',[3]),
    ('dTrnW_Time',[3]),
    ('walk_Distance',[4]),
    ('walk_Time',[4]),  
    
    
])

In [216]:
m.fit()

Log-likelihood at zero: -13,377.6479
Initial Log-likelihood: -13,377.6479
Estimation Time for Point Estimation: 0.98 seconds.
Final log-likelihood: -4,625.8747
                     Multinomial Logit Model Regression Results                    
Dep. Variable:                     _chosen   No. Observations:                8,496
Model:             Multinomial Logit Model   Df Residuals:                    8,479
Method:                                MLE   Df Model:                           17
Date:                     Wed, 17 Oct 2018   Pseudo R-squ.:                   0.654
Time:                             21:51:28   Pseudo R-bar-squ.:               0.653
AIC:                             9,285.749   Log-Likelihood:             -4,625.875
BIC:                             9,405.554   LL-Null:                   -13,377.648
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
