In [1]:
from collections import OrderedDict
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep
from urbansim_templates.models import SmallMultinomialLogitStep
import orca
import os; os.chdir('../')
import warnings; warnings.simplefilter('ignore')

import pandas as pd
# import pandana as pdna
import time
import numpy as np
import matplotlib.pyplot as plt
import os
from functools import reduce

import scipy.stats as st
from scipy.stats import skewnorm

# import matplotlib
# matplotlib.style.use('ggplot')

%matplotlib inline

pd.options.display.max_columns = 80

## Load Data

In [2]:
trips = pd.read_csv('/home/emma/ual_model_workspace/fall-2018-models/notebooks-emma/HWtrips_031418.csv')

trips.head()

Unnamed: 0.1,Unnamed: 0,HHPER,HHPERTRIP,origin,origin_dwell,origin_ST,origin_ET,trip_ET,TT,MODE
0,0,10351981,1035198000.0,home,14.0,17.333333,7.333333,7.7,0.366667,drive_alone
1,1,10351981,1035198000.0,work,9.383333,7.7,17.083333,17.333333,0.25,drive_alone
2,2,10351982,1035198000.0,home,10.416667,19.583333,6.0,6.25,0.25,drive_alone
3,3,10351982,1035198000.0,work,10.25,6.25,16.5,19.583333,3.083333,drive_alone
4,4,10352742,1035274000.0,home,13.583333,19.166667,8.75,9.166667,0.416667,drive_alone


## Prepare TOD and Dwell columns

In [3]:
#select people who make both home-work and work-home trips:
trips = trips.groupby('HHPER').filter(lambda x: len(x) == 2)

In [4]:
#make sure all home-work trip rows are listed first
trips = trips.sort_values(['HHPER','origin']).reset_index()

In [5]:
#move work-home trip info up into home-work trip rows

trips['work_dwell'] = trips.groupby('HHPER', group_keys=False).origin_dwell.shift(-1)
trips['work_ST'] = trips.groupby('HHPER', group_keys=False).origin_ST.shift(-1)
trips['WH_trip_ST'] = trips.groupby('HHPER', group_keys=False).origin_ET.shift(-1)
trips['WH_trip_ET'] = trips.groupby('HHPER', group_keys=False).trip_ET.shift(-1)
trips['WH_TT'] = trips.groupby('HHPER', group_keys=False).TT.shift(-1)
trips['WH_mode'] = trips.groupby('HHPER', group_keys=False).MODE.shift(-1)

In [6]:
trips = trips.groupby('HHPER').first().reset_index()

In [7]:
trips.rename(columns = {'origin_dwell':'home_dwell','origin_ST':'home_ST','origin_ET':'HW_trip_ST',
                           'trip_ET':'HW_trip_ET','TT':'HW_TT','MODE':'HW_mode'},inplace = True)

trips.head()

Unnamed: 0.1,HHPER,index,Unnamed: 0,HHPERTRIP,origin,home_dwell,home_ST,HW_trip_ST,HW_trip_ET,HW_TT,HW_mode,work_dwell,work_ST,WH_trip_ST,WH_trip_ET,WH_TT,WH_mode
0,10351981,0,0,1035198000.0,home,14.0,17.333333,7.333333,7.7,0.366667,drive_alone,9.383333,7.7,17.083333,17.333333,0.25,drive_alone
1,10351982,2,2,1035198000.0,home,10.416667,19.583333,6.0,6.25,0.25,drive_alone,10.25,6.25,16.5,19.583333,3.083333,drive_alone
2,10352742,4,4,1035274000.0,home,13.583333,19.166667,8.75,9.166667,0.416667,drive_alone,7.583333,9.166667,16.75,19.166667,2.416667,drive_alone
3,10353643,6,6,1035364000.0,home,11.333333,19.583333,6.916667,7.416667,0.5,drive_alone,8.633333,7.416667,16.05,19.583333,3.533333,drive_alone
4,10372952,8,8,1037295000.0,home,17.166667,21.833333,15.0,15.416667,0.416667,drive_alone,6.0,15.416667,21.416667,21.833333,0.416667,drive_alone


In [8]:
trips['TOD'] = (
    ((trips.HW_trip_ET.between(3,6,inclusive = False)) | (trips.HW_trip_ET==3))*1 +
    ((trips.HW_trip_ET.between(6,9,inclusive = False)) | (trips.HW_trip_ET==6))*2 +
    ((trips.HW_trip_ET.between(9,15.5,inclusive = False)) | (trips.HW_trip_ET==9))*3 +
    ((trips.HW_trip_ET.between(15.5,18.5,inclusive = False)) | (trips.HW_trip_ET==15.5))*4 +
    ((trips.HW_trip_ET>=18.5))*5 +
    ((trips.HW_trip_ET.between(0,3,inclusive = False)) | (trips.HW_trip_ET==0))*5
)

trips['TOD'] = trips['TOD'] - 1

In [9]:
trips['TOD'] = pd.to_numeric(trips['TOD'])

In [10]:
trips['dwell_work'] = (
    ((trips.work_dwell.between(0,4.5,inclusive = False)) | (trips.work_dwell==0))*1 +
    ((trips.work_dwell.between(4.5,7.75,inclusive = False)) | (trips.work_dwell==4.5))*2 +
    ((trips.work_dwell.between(7.75,9.0,inclusive = False)) | (trips.work_dwell==7.75))*3 +
    ((trips.work_dwell.between(9.0,10.5,inclusive = False)) | (trips.work_dwell==9.0))*4 +
    ((trips.work_dwell>=10.5))*5)

## Add the demographic variables

In [11]:
person = pd.read_csv('/home/data/CHTS_csv_format/data/Deliv_PER.csv')

person.head()

Unnamed: 0,SAMPN,PERNO,RELAT,GEND,AGE,AGEB,HISP,RACE1,RACE2,RACE3,RACE4,O_RACE,NTVTY,CNTRY,LIC,USER,TRANS,TPTYP1,TPTYP2,TPTYP3,TPTYP4,TPTYP5,TPTYP6,TPTYP7,O_TPTYP,CLIP1,CLIP2,CLIP3,COMP,MET,PASSTL,FLEX,EMPLY,WKSTAT,O_WKSTAT,JOBS,WLOC,WNAME,WCITY,WSTAT,...,HVLOG,PTRIPS,TOLLF,TOLLR1,TOLLR2,TOLLR3,TOLLR4,TOLLR5,TOLLR6,TOLLR7,TOLLR8,TOLLR9,TOLLR10,TOLLB1,TOLLB2,TOLLB3,TOLLB4,TOLLB5,TOLLB6,TOLLB7,TOLLB8,TOLLB9,TOLLB10,HOVL,NOGOWHY,NOGOWHY_O,InComplete,Moto_trip,WCTFIP,WTRACT,SCTFIP,STRACT,WPrimaryCity,WSTFIP,W2PrimaryCity,W2STFIP,SPrimaryCity,SSTFIP,PERWGT,EXPPERWGT
0,1031985,1,1,1,74,,2,1.0,,,,,1,,1.0,1.0,2.0,,,,,,,,,,,,,,2.0,2.0,2.0,1.0,,,,,,,...,1.0,2.0,3.0,,,,,,,,,,,,,,,,,,,,,2.0,,,,1.0,,,,,,,,,,,0.052086,17.647568
1,1031985,2,2,2,73,,2,1.0,,,,,1,,1.0,1.0,2.0,,,,,,,,,,,,,,2.0,2.0,2.0,1.0,,,,,,,...,1.0,2.0,3.0,,,,,,,,,,,,,,,,,,,,,2.0,,,,1.0,,,,,,,,,,,0.052086,17.647568
2,1032036,1,1,1,46,,2,1.0,,,,,1,,1.0,1.0,2.0,,,,,,,,,,,,,,2.0,2.0,1.0,,,1.0,1.0,HIDDEN,SAN DIEGO,CA,...,,5.0,3.0,,,,,,,,,,,,,,,,,,,,,1.0,,,,1.0,73.0,17032.0,,,SAN DIEGO,6.0,,,,,1.223974,414.701494
3,1032036,2,2,2,47,,2,1.0,97.0,,,MULTI-RACIAL,1,,1.0,1.0,2.0,,,,,,,,,,,,,,2.0,2.0,2.0,3.0,,,,,,,...,,18.0,3.0,,,,,,,,,,,,,,,,,,,,,1.0,,,,1.0,,,,,,,,,,,0.863473,292.558373
4,1032036,3,3,1,15,,2,1.0,97.0,,,MULTI-RACIAL,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,4.0,3.0,,,,,,,,,,,,,,,,,,,,,1.0,,,,1.0,,,73.0,17030.0,,,,,SAN DIEGO,6.0,0.941412,318.9651


In [12]:
person = person[['SAMPN','PERNO','GEND','AGE','HISP','RACE1','RACE2','RACE3','RACE4',
                 'HOURS','EDUCA','INDUS']]

In [13]:
person['HHPER'] = person['SAMPN'].map(str) + person['PERNO'].map(str)

In [14]:
hh_df = pd.read_csv('/home/data/CHTS_csv_format/data/Deliv_HH.csv')[
  ['SAMPN','HHVEH',
   'OWN','INCOM','HHSIZ']]

hh_df.head()

Unnamed: 0,SAMPN,HHVEH,OWN,INCOM,HHSIZ
0,1031985,2,1,3,2
1,1032036,1,1,7,5
2,1032053,2,2,2,6
3,1032425,2,1,7,2
4,1032558,0,2,1,1


In [15]:
demo = person.merge(hh_df,on = 'SAMPN',how = 'left')

demo.head()

Unnamed: 0,SAMPN,PERNO,GEND,AGE,HISP,RACE1,RACE2,RACE3,RACE4,HOURS,EDUCA,INDUS,HHPER,HHVEH,OWN,INCOM,HHSIZ
0,1031985,1,1,74,2,1.0,,,,,6,,10319851,2,1,3,2
1,1031985,2,2,73,2,1.0,,,,,6,,10319852,2,1,3,2
2,1032036,1,1,46,2,1.0,,,,40.0,6,54.0,10320361,1,1,7,5
3,1032036,2,2,47,2,1.0,97.0,,,,6,,10320362,1,1,7,5
4,1032036,3,1,15,2,1.0,97.0,,,,1,,10320363,1,1,7,5


## Get rid of null values

In [16]:
na_dict = {
    'GEND':[9],
    'AGE':[998,999],
    'HOURS':[998,999],
    'EDUCA':[8,9],
    'HHVEH':[98,99],
    'OWN':[7,8,9],
    'INCOM':[98,99],
    'HHSIZ':[98,99],
    'INDUS':[98,99],
}

for col in na_dict:
    for vals in na_dict[col]:
        demo[col] = demo[col].replace(vals,np.nan)

In [17]:
demo = demo.dropna(subset = ['GEND', 'AGE', 'HOURS', 'EDUCA','HHVEH','OWN','INCOM','HHSIZ','INDUS'])

In [18]:
trips['HHPER'] = trips['HHPER'].map(str)

trips1 = pd.merge(trips, demo, on='HHPER')

print (len(trips.index))
print (len(trips1.index))

21285
17943


## Prepare data for use in MNL estimation (make dummy columns)

In [19]:
trips1['minority'] = np.where((trips1['HISP'].isin([1.0]) |
                                trips1['RACE1'].isin([2.0]) | trips1['RACE2'].isin([2.0]) | trips1['RACE3'].isin([2.0]) | trips1['RACE4'].isin([2.0]) |
                              trips1['RACE1'].isin([3.0]) | trips1['RACE2'].isin([3.0]) | trips1['RACE3'].isin([3.0]) | trips1['RACE4'].isin([3.0]) |
                              trips1['RACE1'].isin([4.0]) | trips1['RACE2'].isin([4.0]) | trips1['RACE3'].isin([4.0]) | trips1['RACE4'].isin([4.0]) |
                              trips1['RACE1'].isin([5.0]) | trips1['RACE2'].isin([5.0]) | trips1['RACE3'].isin([5.0]) | trips1['RACE4'].isin([5.0]) |
                              trips1['RACE1'].isin([97.0]) | trips1['RACE2'].isin([97.0]) | trips1['RACE3'].isin([97.0]) | trips1['RACE4'].isin([97.0])),1,0)

trips1['HISP'] = np.where(trips1['HISP'].isin([1.0]),1,0)
trips1['black'] = np.where((trips1['RACE1'].isin([2.0]) | trips1['RACE2'].isin([2.0]) | trips1['RACE3'].isin([2.0]) | trips1['RACE4'].isin([2.0])),1,0)
trips1['native'] = np.where((trips1['RACE1'].isin([3.0]) | trips1['RACE2'].isin([3.0]) | trips1['RACE3'].isin([3.0]) | trips1['RACE4'].isin([3.0])),1,0)
trips1['asian'] = np.where((trips1['RACE1'].isin([4.0]) | trips1['RACE2'].isin([4.0]) | trips1['RACE3'].isin([4.0]) | trips1['RACE4'].isin([4.0])),1,0)
trips1['PI'] = np.where((trips1['RACE1'].isin([5.0]) | trips1['RACE2'].isin([5.0]) | trips1['RACE3'].isin([5.0]) | trips1['RACE4'].isin([5.0])),1,0)


In [20]:
trips1['hh_inc_less75k'] = np.where(trips1['INCOM'].isin([1.0,2.0,3.0,4.0,5.0]),1,0)
trips1['hh_inc_75kless100k'] = np.where(trips1['INCOM'].isin([6.0]),1,0)
trips1['hh_inc_150kplus'] = np.where(trips1['INCOM'].isin([8.0,9.0,10.0]),1,0)
trips1['hh_inc_150kless250k'] = np.where(trips1['INCOM'].isin([8.0,9.0]),1,0)
trips1['hh_inc_250kplus'] = np.where(trips1['INCOM'].isin([10.0]),1,0)

In [21]:
trips1['lessGED'] = np.where(trips1['EDUCA'].isin([1.0]),1,0)
trips1['GED'] = np.where(trips1['EDUCA'].isin([2.0]),1,0)
trips1['somebach'] = np.where(trips1['EDUCA'].isin([3.0]),1,0)
trips1['Assoc'] = np.where(trips1['EDUCA'].isin([4.0]),1,0)
trips1['Bach'] = np.where(trips1['EDUCA'].isin([5.0]),1,0)

trips1['lessGED_GED'] = np.where(trips1['EDUCA'].isin([1.0,2.0]),1,0)

trips1['no_higher_ed'] = (trips1['EDUCA'] < 5).astype(int)

In [22]:
trips1['age_16less25'] = np.where(((trips1.AGE.between(16,25,inclusive = False)) | (trips1.AGE==16)),1,0)
trips1['age_25less40'] = np.where(((trips1.AGE.between(25,40,inclusive = False)) | (trips1.AGE==25)),1,0)
trips1['age_40less50'] = np.where(((trips1.AGE.between(40,50,inclusive = False)) | (trips1.AGE==40)),1,0)
trips1['age_50less60'] = np.where(((trips1.AGE.between(50,60,inclusive = False)) | (trips1.AGE==50)),1,0)

In [23]:
trips1.rename(columns = {'HOURS':'hours'},inplace = True)

trips1['female'] = trips1['GEND'] - 1

trips1['tenure_2'] = trips1['OWN'] - 1

trips1['noveh'] = np.where(trips1.HHVEH.isin([0.0]),1,0)

trips1['hh_size_1per'] = np.where(trips1.HHSIZ.isin([1.0]),1,0)

In [24]:
trips1['sector_constr'] = np.where(trips1['INDUS'].isin([23]),1,0)
trips1['sector_mfg'] = np.where(trips1['INDUS'].isin([31]),1,0)
trips1['sector_retail'] = np.where(trips1['INDUS'].isin([44,45]),1,0)
trips1['sector_transport'] = np.where(trips1['INDUS'].isin([48]),1,0)
trips1['info'] = np.where(trips1['INDUS'].isin([51]),1,0)
trips1['finance'] = np.where(trips1['INDUS'].isin([52]),1,0)
trips1['scitech'] = np.where(trips1['INDUS'].isin([54]),1,0)
trips1['sector_edu_serv'] = np.where(trips1['INDUS'].isin([61]),1,0)
trips1['sector_healthcare'] = np.where(trips1['INDUS'].isin([62]),1,0)
trips1['sector_oth_serv'] = np.where(trips1['INDUS'].isin([81]),1,0)
trips1['sector_gov'] = np.where(trips1['INDUS'].isin([92]),1,0)

In [25]:
trips1['TOD_3to6'] = np.where(trips1['TOD'].isin([0]),1,0)
# trips1['TOD_6to9'] = np.where(trips1['TOD'].isin([1]),1,0)
trips1['TOD_9to1530'] = np.where(trips1['TOD'].isin([2]),1,0)
trips1['TOD_1530to1830'] = np.where(trips1['TOD'].isin([3]),1,0)
trips1['TOD_1830up'] = np.where(trips1['TOD'].isin([4]),1,0)

## Estimate the model for dwell time at work

In [26]:
@orca.table(cache=True)
def tripsA():
    return trips1

In [27]:
m = SmallMultinomialLogitStep()
m.name = 'dwell_work'
m.tables = ['tripsA']
m.choice_column = 'dwell_work'
m.model_expression = OrderedDict([
    ('intercept', [1,2,3,5]), 
    
    ('TOD_3to6',[2,5]),
#     ('TOD_6to9'),
    ('TOD_9to1530',[1,2,4,5]),
    ('TOD_1530to1830',[1,2,4]),
    ('TOD_1830up',[1,4]),
    
    ('sector_mfg',[1,2]),
    ('sector_retail',[1,5]),
    ('sector_transport',[4,5]),
    ('info',[1]),
    ('finance',[1,4,5]),
    ('scitech',[1,2]),
    ('sector_edu_serv',[2,4,5]),
    ('sector_healthcare',[1,2,4,5]),
    ('sector_gov',[1,2]),
    
    ('age_16less25',[1,2]),
    ('age_25less40',[1]),
    ('age_40less50',[1]),
    ('age_50less60',[1]),
    
    ('female',[[1,2],5]),
        
    ('minority',[1,2]),
    
    ('hh_inc_less75k',[1,4,5]), 
    ('hh_inc_75kless100k',[5]),
#     ('100kless150k')
    ('hh_inc_150kplus',[1,2,4]),
    
    ('lessGED_GED',[1]),
    ('Assoc',[1,4]),
       
    ('hours',[1,2,4,5]),
    
    ('noveh',[4]),
   
    ('hh_size_1per',[4]),
    
    ('tenure_2',[4]),
    
])

In [28]:
m.fit()

Log-likelihood at zero: -28,878.1445
Initial Log-likelihood: -28,878.1445
Estimation Time for Point Estimation: 9.98 seconds.
Final log-likelihood: -24,197.0957
                     Multinomial Logit Model Regression Results                    
Dep. Variable:                     _chosen   No. Observations:               17,943
Model:             Multinomial Logit Model   Df Residuals:                   17,881
Method:                                MLE   Df Model:                           62
Date:                     Fri, 29 Mar 2019   Pseudo R-squ.:                   0.162
Time:                             13:50:23   Pseudo R-bar-squ.:               0.160
AIC:                            48,518.191   Log-Likelihood:            -24,197.096
BIC:                            49,001.479   LL-Null:                   -28,878.144
                           coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------

In [29]:
m.name = 'dwell_work'

In [30]:
mm.initialize()

Registering model step 'auto_ownership'
Registering model step 'dwell_work'
Registering model step 'TOD_choice'
Registering model step 'work_TOD_choice'
Registering model step 'primary_mode_choice'
Registering model step 'WLCM'


In [31]:
m.tags = ['dwell_work','emma']
mm.register(m)

Saving 'dwell_work.yaml': /home/emma/activitysynth/activitysynth/configs
Model saved to configs/dwell_work-model-object.pkl
Registering model step 'dwell_work'


## Estimate the model for Home-to-Work Trip End Times

In [32]:
###model with p-values less than .01 (except hours4)

m = SmallMultinomialLogitStep()
m.name = 'work_TOD_choice'
m.tables = ['tripsA']
m.choice_column = 'TOD'
m.model_expression = OrderedDict([
    ('intercept', [0,1,3,4]), 
    
    ('sector_constr',[2,3]),
    ('sector_mfg',[0,2,3]),
    ('sector_retail',[2]),
    ('sector_transport',[0]),
    ('info',[0,2,3]),
    ('finance',[0,2,3]),
    ('scitech',[0,3]),
    ('sector_edu_serv',[0,2,3]),
    ('sector_healthcare',[0,2,3,4]),
    ('sector_oth_serv',[0,3]),
    ('sector_gov',[2,3]),
    
    ('age_16less25',[2,3,4]),
    ('age_25less40',[0]),
    
    ('female',[0,3,4]),
    
    ('minority',[0,4]),
    ('asian',[2]),
    
    ('hh_inc_less75k',[4]), 
#     ('75kless150k')
    ('hh_inc_150kless250k',[0]),
    ('hh_inc_250kplus',[0,2]),

    ('lessGED',[0,2,3]),
    ('GED',[0,2,3]),
    ('somebach',[0,2]),
    ('Assoc',[0,2]),
    ('no_higher_ed',[4]),
    ('Bach',[0,2,4]),
#     ('Grad')
       
    ('hours',[0,2,3,4]),

    ('noveh',[2]),

    ('hh_size_1per',[2]),
    
    ('tenure_2',[2]),
    
])

In [33]:
m.fit()

Log-likelihood at zero: -28,878.1445
Initial Log-likelihood: -28,878.1445
Estimation Time for Point Estimation: 15.62 seconds.
Final log-likelihood: -16,303.6766
                     Multinomial Logit Model Regression Results                    
Dep. Variable:                     _chosen   No. Observations:               17,943
Model:             Multinomial Logit Model   Df Residuals:                   17,878
Method:                                MLE   Df Model:                           65
Date:                     Fri, 29 Mar 2019   Pseudo R-squ.:                   0.435
Time:                             13:51:59   Pseudo R-bar-squ.:               0.433
AIC:                            32,737.353   Log-Likelihood:            -16,303.677
BIC:                            33,244.025   LL-Null:                   -28,878.144
                            coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------

In [34]:
m.name = 'work_TOD_choice'

In [35]:
m.tags = ['work_TOD_choice','emma']
mm.register(m)

Saving 'work_TOD_choice.yaml': /home/emma/activitysynth/activitysynth/configs
Model saved to configs/work_TOD_choice-model-object.pkl
Registering model step 'work_TOD_choice'
