In [1]:
import numpy as np
import pandas as pd
import orca
import os; os.chdir('../'); os.getcwd()
import warnings; warnings.simplefilter('ignore')

# Set data directory

d = '/home/data/fall_2018/'

if 'data_directory' in orca.list_injectables():
    d = orca.get_injectable('data_directory')
    

In [2]:
@orca.table("activity_freq")
def activity_freq():
    nact_df = pd.read_csv("data/hhper_purp_nact.csv", 
                        dtype={'HHPER': 'S',
                               'nact': 'i'})
    # A tibble: 20 x 2
    #    nact      n
    #   <int>  <int>
    # 1     0 263942
    # 2     1  38581
    # 3     2  18125
    # 4     3   7868
    # 5     4   3680
    # 6     5   1584
    # 7     6    803
    # 8     7    314
    # 9     8    151
    #10     9     84
    #11    10     40
    #12    11     20
    #13    12      6
    #14    13      4
    #15    14      4
    #16    15      5
    #17    16      2
    #18    17      1
    #19    18      1
    #20    21      1
    
    # truncate nact>5 to 5
    nact_df = (nact_df
           .assign(nact = lambda x: np.where(x["nact"]>5, 5, x["nact"]))
          )
    return nact_df

@orca.table("activities")
def activities():
    act_df = pd.read_csv("data/activities.csv", 
                        dtype={'HHPER': 'S'})
    act_df = (act_df
              .loc[:, ["HHPER", "purp", "age", "sex", "income", "edu", 
                       "worker", "student", "persons", "nchildren", 
                       "n65plus","start_mins", "act_duration"]]
              .dropna()
              .assign(female=lambda x: np.where(x["sex"]==2, 1, 0))
             )

    return act_df


# MNL models of activity frequency by Purpose

In [3]:
from urbansim_templates import modelmanager
from urbansim_templates.models import SmallMultinomialLogitStep, \
     LargeMultinomialLogitStep, SegmentedLargeMultinomialLogitStep

from collections import OrderedDict

modelmanager.initialize()

#nact_df = orca.get_table("activity_freq").to_frame()

#nact_df_shp = nact_df.query("purp=='Shopping'")

m0_shp = SmallMultinomialLogitStep(
    tables=["activity_freq"],
    #tables=["persons_tod"],
    choice_column = "nact",
    filters = "purp=='Shopping'",
    name = "shopping_activity_freq_choice_model",
)
#m0.model_expression = ("HW_EA")  #not working
m0_shp.model_expression = OrderedDict([
    ('intercept', [1, ]),
    #('sex', [1, 2, 3, 4, 5])
    ] )
    
m0_shp.fit()

Registering model step 'auto_ownership'
Registering model step 'WLCM_gen_tt'
Registering model step 'TOD_choice'
Registering model step 'primary_mode_choice'
Registering model step 'WLCM'
Log-likelihood at zero: 0.0000
Initial Log-likelihood: 0.0000
Estimation Time for Point Estimation: 0.18 seconds.
Final log-likelihood: 0.0000
                     Multinomial Logit Model Regression Results                    
Dep. Variable:                     _chosen   No. Observations:               23,944
Model:             Multinomial Logit Model   Df Residuals:                   23,943
Method:                                MLE   Df Model:                            1
Date:                     Fri, 27 Sep 2019   Pseudo R-squ.:                     nan
Time:                             11:45:42   Pseudo R-bar-squ.:                 inf
AIC:                                 2.000   Log-Likelihood:                  0.000
BIC:                                10.083   LL-Null:                         0.0

# Regression models of activity start time 

In [35]:
from urbansim_templates.models import OLSRegressionStep

m_eatout = OLSRegressionStep(
    tables=["activities"],
    filters="purp=='EatOut'"
)
m_eatout.model_expression = 'np.log1p(start_mins) ~ age + worker + student + n65plus'
m_eatout.fit()

                            OLS Regression Results                            
Dep. Variable:     np.log(start_mins)   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                     4.515
Date:                Fri, 27 Sep 2019   Prob (F-statistic):            0.00122
Time:                        13:01:09   Log-Likelihood:                -1155.7
No. Observations:                4737   AIC:                             2321.
Df Residuals:                    4732   BIC:                             2354.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      6.7315      0.009    709.370      0.0

In [21]:
m_escort = OLSRegressionStep(
    tables=["activities"],
    filters="purp=='Escort'"
)
m_escort.model_expression = 'np.log1p(start_mins) ~ nchildren'
m_escort.fit()

                             OLS Regression Results                             
Dep. Variable:     np.log1p(start_mins)   R-squared:                       0.002
Model:                              OLS   Adj. R-squared:                  0.002
Method:                   Least Squares   F-statistic:                     9.760
Date:                  Fri, 27 Sep 2019   Prob (F-statistic):            0.00179
Time:                          11:54:16   Log-Likelihood:                -3015.6
No. Observations:                  5576   AIC:                             6035.
Df Residuals:                      5574   BIC:                             6049.
Df Model:                             1                                         
Covariance Type:              nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      6.6327      0.009

In [27]:
m_personalbus = OLSRegressionStep(
    tables=["activities"],
    filters="purp=='PersonalBus'"
)
m_personalbus.model_expression = 'np.log1p(start_mins) ~ income + n65plus'
m_personalbus.fit()

                             OLS Regression Results                             
Dep. Variable:     np.log1p(start_mins)   R-squared:                       0.007
Model:                              OLS   Adj. R-squared:                  0.005
Method:                   Least Squares   F-statistic:                     3.203
Date:                  Fri, 27 Sep 2019   Prob (F-statistic):           0.000408
Time:                          11:56:48   Log-Likelihood:                -2337.8
No. Observations:                  4847   AIC:                             4698.
Df Residuals:                      4836   BIC:                             4769.
Df Model:                            10                                         
Covariance Type:              nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
Intercept     

In [31]:
m_rec = OLSRegressionStep(
    tables=["activities"],
    filters="purp=='Recreation'"
)
m_rec.model_expression = 'np.log1p(start_mins) ~ worker + student + n65plus'
m_rec.fit()

                             OLS Regression Results                             
Dep. Variable:     np.log1p(start_mins)   R-squared:                       0.006
Model:                              OLS   Adj. R-squared:                  0.006
Method:                   Least Squares   F-statistic:                     13.57
Date:                  Fri, 27 Sep 2019   Prob (F-statistic):           7.95e-09
Time:                          12:58:33   Log-Likelihood:                -3827.4
No. Observations:                  6513   AIC:                             7663.
Df Residuals:                      6509   BIC:                             7690.
Df Model:                             3                                         
Covariance Type:              nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      6.5620      0.010

In [38]:
m_soc = OLSRegressionStep(
    tables=["activities"],
    filters="purp=='SocialRec'"
)
m_soc.model_expression = 'np.log1p(start_mins) ~ age + worker + n65plus'
m_soc.fit()

                             OLS Regression Results                             
Dep. Variable:     np.log1p(start_mins)   R-squared:                       0.007
Model:                              OLS   Adj. R-squared:                  0.006
Method:                   Least Squares   F-statistic:                     11.97
Date:                  Fri, 27 Sep 2019   Prob (F-statistic):           8.34e-08
Time:                          13:02:20   Log-Likelihood:                -4291.9
No. Observations:                  5372   AIC:                             8592.
Df Residuals:                      5368   BIC:                             8618.
Df Model:                             3                                         
Covariance Type:              nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      6.6221      0.013

In [39]:
m_shp = OLSRegressionStep(
    tables=["activities"],
    filters="purp=='Shopping'"
)
m_shp.model_expression = 'np.log1p(start_mins) ~ age + worker + income + student + n65plus'
m_shp.fit()

                            OLS Regression Results                            
Dep. Variable:     np.log(start_mins)   R-squared:                       0.017
Model:                            OLS   Adj. R-squared:                  0.015
Method:                 Least Squares   F-statistic:                     9.585
Date:                Fri, 27 Sep 2019   Prob (F-statistic):           3.81e-20
Time:                        13:02:39   Log-Likelihood:                -158.43
No. Observations:                7351   AIC:                             344.9
Df Residuals:                    7337   BIC:                             441.5
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
Intercept               6.7093    

# Regression models of activity duration

In [87]:
m_eatout = OLSRegressionStep(
    tables=["activities"],
    filters="purp=='EatOut'"
)
m_eatout.model_expression = 'np.log(start_mins) ~ student + n65plus'
m_eatout.fit()

                            OLS Regression Results                            
Dep. Variable:     np.log(start_mins)   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                     6.589
Date:                Fri, 27 Sep 2019   Prob (F-statistic):            0.00139
Time:                        13:16:57   Log-Likelihood:                -1158.2
No. Observations:                4737   AIC:                             2322.
Df Residuals:                    4734   BIC:                             2342.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      6.7196      0.005   1262.306      0.0

In [78]:
m_escort = OLSRegressionStep(
    tables=["activities"],
    filters="purp=='Escort'"
)
m_escort.model_expression = 'np.log1p(start_mins) ~ n65plus + nchildren'
m_escort.fit()

                             OLS Regression Results                             
Dep. Variable:     np.log1p(start_mins)   R-squared:                       0.003
Model:                              OLS   Adj. R-squared:                  0.003
Method:                   Least Squares   F-statistic:                     8.278
Date:                  Fri, 27 Sep 2019   Prob (F-statistic):           0.000257
Time:                          13:15:21   Log-Likelihood:                -3012.2
No. Observations:                  5576   AIC:                             6030.
Df Residuals:                      5573   BIC:                             6050.
Df Model:                             2                                         
Covariance Type:              nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      6.6437      0.010

In [69]:
m_personalbus = OLSRegressionStep(
    tables=["activities"],
    filters="purp=='PersonalBus'"
)
m_personalbus.model_expression = 'np.log1p(act_duration) ~ edu + income + n65plus + nchildren'
m_personalbus.fit()

                              OLS Regression Results                              
Dep. Variable:     np.log1p(act_duration)   R-squared:                       0.018
Model:                                OLS   Adj. R-squared:                  0.015
Method:                     Least Squares   F-statistic:                     5.609
Date:                    Fri, 27 Sep 2019   Prob (F-statistic):           3.75e-12
Time:                            13:13:07   Log-Likelihood:                -8487.6
No. Observations:                    4847   AIC:                         1.701e+04
Df Residuals:                        4830   BIC:                         1.712e+04
Df Model:                              16                                         
Covariance Type:                nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------

In [64]:
m_rec = OLSRegressionStep(
    tables=["activities"],
    filters="purp=='Recreation'"
)
m_rec.model_expression = 'np.log1p(act_duration) ~  edu + female + student + n65plus + nchildren'
m_rec.fit()

                              OLS Regression Results                              
Dep. Variable:     np.log1p(act_duration)   R-squared:                       0.010
Model:                                OLS   Adj. R-squared:                  0.009
Method:                     Least Squares   F-statistic:                     6.802
Date:                    Fri, 27 Sep 2019   Prob (F-statistic):           1.23e-10
Time:                            13:11:00   Log-Likelihood:                -10551.
No. Observations:                    6513   AIC:                         2.112e+04
Df Residuals:                        6502   BIC:                         2.120e+04
Df Model:                              10                                         
Covariance Type:                nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------

In [50]:
m_soc = OLSRegressionStep(
    tables=["activities"],
    filters="purp=='SocialRec'"
)
m_soc.model_expression = 'np.log1p(act_duration) ~ age + income + persons +  n65plus'
m_soc.fit()

                              OLS Regression Results                              
Dep. Variable:     np.log1p(act_duration)   R-squared:                       0.009
Model:                                OLS   Adj. R-squared:                  0.006
Method:                     Least Squares   F-statistic:                     3.889
Date:                    Fri, 27 Sep 2019   Prob (F-statistic):           5.66e-06
Time:                            13:07:37   Log-Likelihood:                -9146.5
No. Observations:                    5372   AIC:                         1.832e+04
Df Residuals:                        5359   BIC:                         1.840e+04
Df Model:                              12                                         
Covariance Type:                nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------

In [65]:
m_shp = OLSRegressionStep(
    tables=["activities"],
    filters="purp=='Shopping'"
)
m_shp.model_expression = 'np.log1p(act_duration) ~ female + worker + edu + n65plus'
m_shp.fit()

                              OLS Regression Results                              
Dep. Variable:     np.log1p(act_duration)   R-squared:                       0.028
Model:                                OLS   Adj. R-squared:                  0.027
Method:                     Least Squares   F-statistic:                     23.27
Date:                    Fri, 27 Sep 2019   Prob (F-statistic):           1.35e-39
Time:                            13:11:21   Log-Likelihood:                -9322.2
No. Observations:                    7351   AIC:                         1.866e+04
Df Residuals:                        7341   BIC:                         1.873e+04
Df Model:                               9                                         
Covariance Type:                nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------