# Estimating Workplace Location

Integration with [larch](https://larch.newman.me) for model estimation. See [estimation tools review](https://github.com/ActivitySim/activitysim/wiki/Estimation-Tools-Review) for more information about larch.

# Run the Example

Output an estimation data bundle (EBD), which contains:
  - model settings - tour_mode_choice_model_settings.yaml
  - coefficients - tour_mode_choice_coefficients.csv
  - utilities specification - tour_mode_choice_SPEC.csv
  - ...

# Read EDB 

In [1]:
import larch  # !conda install larch #for estimation
import pandas as pd
import numpy as np
import yaml 
import larch.util.excel
import larch_asim  # utility functions in a local module
import os

from larch import P,X

In [2]:
edb_directory = "estimation_data_bundle/tour_mode_choice/"

def read_csv(filename, **kwargs):
    return pd.read_csv(os.path.join(edb_directory, filename), **kwargs)

In [3]:
coefficients = read_csv(
    "tour_mode_choice_coefficients.csv",
    index_col='coefficient_name',
)
coef_template = read_csv(
    "tour_mode_choice_coefficients_template.csv", 
    index_col='coefficient_name',
)
spec = read_csv("tour_mode_choice_SPEC.csv")
values = read_csv("tour_mode_choice_values_combined.csv")

## settings

In [4]:
settings = yaml.load( 
    open(os.path.join(edb_directory, "tour_mode_choice_model_settings.yaml"),"r"), 
    Loader=yaml.SafeLoader,
)

settings

{'LOGIT_TYPE': 'NL',
 'NESTS': {'name': 'root',
  'coefficient': 1.0,
  'alternatives': [{'name': 'AUTO',
    'coefficient': 0.72,
    'alternatives': [{'name': 'DRIVEALONE',
      'coefficient': 0.35,
      'alternatives': ['DRIVEALONEFREE', 'DRIVEALONEPAY']},
     {'name': 'SHAREDRIDE2',
      'coefficient': 0.35,
      'alternatives': ['SHARED2FREE', 'SHARED2PAY']},
     {'name': 'SHAREDRIDE3',
      'coefficient': 0.35,
      'alternatives': ['SHARED3FREE', 'SHARED3PAY']}]},
   {'name': 'NONMOTORIZED',
    'coefficient': 0.72,
    'alternatives': ['WALK', 'BIKE']},
   {'name': 'TRANSIT',
    'coefficient': 0.72,
    'alternatives': [{'name': 'WALKACCESS',
      'coefficient': 0.5,
      'alternatives': ['WALK_LOC',
       'WALK_LRF',
       'WALK_EXP',
       'WALK_HVY',
       'WALK_COM']},
     {'name': 'DRIVEACCESS',
      'coefficient': 0.5,
      'alternatives': ['DRIVE_LOC',
       'DRIVE_LRF',
       'DRIVE_EXP',
       'DRIVE_HVY',
       'DRIVE_COM']}]},
   {'name': 'MAAS'

## coefficients

In [5]:
coefficients

Unnamed: 0_level_0,value,constrain
coefficient_name,Unnamed: 1_level_1,Unnamed: 2_level_1
drive_transit_ASC_auto_sufficient_atwork,-999.214660,F
drive_transit_ASC_auto_deficient_atwork,-998.819600,F
joint_walk_transit_ASC_auto_sufficient_all,-18.264534,F
joint_drive_transit_ASC_auto_sufficient_all,-8.045285,F
joint_tnc_shared_ASC_auto_deficient_all,-7.160000,F
...,...,...
coef_topology_walk_multiplier_eatout_escort_othdiscr_othmaint_school_shopping_social_univ_work,15.000000,T
bike_ASC_auto_sufficient_atwork,15.720170,F
walk_ASC_no_auto_school,18.414557,F
coef_topology_bike_multiplier_eatout_escort_othdiscr_othmaint_school_shopping_social_univ_work,20.000000,T


## coef_template

In [6]:
coef_template

Unnamed: 0_level_0,eatout,escort,othdiscr,othmaint,school,shopping,social,univ,work,atwork
coefficient_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
coef_ivt,coef_ivt_eatout_escort_othdiscr_othmaint_shopp...,coef_ivt_eatout_escort_othdiscr_othmaint_shopp...,coef_ivt_eatout_escort_othdiscr_othmaint_shopp...,coef_ivt_eatout_escort_othdiscr_othmaint_shopp...,coef_ivt_school_univ,coef_ivt_eatout_escort_othdiscr_othmaint_shopp...,coef_ivt_eatout_escort_othdiscr_othmaint_shopp...,coef_ivt_school_univ,coef_ivt_work,coef_ivt_atwork
coef_topology_walk_multiplier,coef_topology_walk_multiplier_eatout_escort_ot...,coef_topology_walk_multiplier_eatout_escort_ot...,coef_topology_walk_multiplier_eatout_escort_ot...,coef_topology_walk_multiplier_eatout_escort_ot...,coef_topology_walk_multiplier_eatout_escort_ot...,coef_topology_walk_multiplier_eatout_escort_ot...,coef_topology_walk_multiplier_eatout_escort_ot...,coef_topology_walk_multiplier_eatout_escort_ot...,coef_topology_walk_multiplier_eatout_escort_ot...,coef_topology_walk_multiplier_atwork
coef_topology_bike_multiplier,coef_topology_bike_multiplier_eatout_escort_ot...,coef_topology_bike_multiplier_eatout_escort_ot...,coef_topology_bike_multiplier_eatout_escort_ot...,coef_topology_bike_multiplier_eatout_escort_ot...,coef_topology_bike_multiplier_eatout_escort_ot...,coef_topology_bike_multiplier_eatout_escort_ot...,coef_topology_bike_multiplier_eatout_escort_ot...,coef_topology_bike_multiplier_eatout_escort_ot...,coef_topology_bike_multiplier_eatout_escort_ot...,coef_topology_bike_multiplier_atwork
coef_topology_trn_multiplier,coef_topology_trn_multiplier_eatout_escort_oth...,coef_topology_trn_multiplier_eatout_escort_oth...,coef_topology_trn_multiplier_eatout_escort_oth...,coef_topology_trn_multiplier_eatout_escort_oth...,coef_topology_trn_multiplier_eatout_escort_oth...,coef_topology_trn_multiplier_eatout_escort_oth...,coef_topology_trn_multiplier_eatout_escort_oth...,coef_topology_trn_multiplier_eatout_escort_oth...,coef_topology_trn_multiplier_eatout_escort_oth...,coef_topology_trn_multiplier_atwork
coef_age1619_da_multiplier,coef_age1619_da_multiplier_eatout_escort_othdi...,coef_age1619_da_multiplier_eatout_escort_othdi...,coef_age1619_da_multiplier_eatout_escort_othdi...,coef_age1619_da_multiplier_eatout_escort_othdi...,coef_age1619_da_multiplier_school_univ,coef_age1619_da_multiplier_eatout_escort_othdi...,coef_age1619_da_multiplier_eatout_escort_othdi...,coef_age1619_da_multiplier_school_univ,coef_age1619_da_multiplier_eatout_escort_othdi...,coef_age1619_da_multiplier_atwork
...,...,...,...,...,...,...,...,...,...,...
express_bus_ASC,express_bus_ASC_eatout_escort_othdiscr_othmain...,express_bus_ASC_eatout_escort_othdiscr_othmain...,express_bus_ASC_eatout_escort_othdiscr_othmain...,express_bus_ASC_eatout_escort_othdiscr_othmain...,express_bus_ASC_school_univ,express_bus_ASC_eatout_escort_othdiscr_othmain...,express_bus_ASC_eatout_escort_othdiscr_othmain...,express_bus_ASC_school_univ,express_bus_ASC_work,express_bus_ASC_eatout_escort_othdiscr_othmain...
heavy_rail_ASC,heavy_rail_ASC_eatout_escort_othdiscr_othmaint...,heavy_rail_ASC_eatout_escort_othdiscr_othmaint...,heavy_rail_ASC_eatout_escort_othdiscr_othmaint...,heavy_rail_ASC_eatout_escort_othdiscr_othmaint...,heavy_rail_ASC_school_univ,heavy_rail_ASC_eatout_escort_othdiscr_othmaint...,heavy_rail_ASC_eatout_escort_othdiscr_othmaint...,heavy_rail_ASC_school_univ,heavy_rail_ASC_work,heavy_rail_ASC_eatout_escort_othdiscr_othmaint...
commuter_rail_ASC,commuter_rail_ASC_eatout_escort_othdiscr_othma...,commuter_rail_ASC_eatout_escort_othdiscr_othma...,commuter_rail_ASC_eatout_escort_othdiscr_othma...,commuter_rail_ASC_eatout_escort_othdiscr_othma...,commuter_rail_ASC_school_univ,commuter_rail_ASC_eatout_escort_othdiscr_othma...,commuter_rail_ASC_eatout_escort_othdiscr_othma...,commuter_rail_ASC_school_univ,commuter_rail_ASC_work,commuter_rail_ASC_eatout_escort_othdiscr_othma...
walk_transit_CBD_ASC,walk_transit_CBD_ASC_eatout_escort_othdiscr_ot...,walk_transit_CBD_ASC_eatout_escort_othdiscr_ot...,walk_transit_CBD_ASC_eatout_escort_othdiscr_ot...,walk_transit_CBD_ASC_eatout_escort_othdiscr_ot...,walk_transit_CBD_ASC_school_univ,walk_transit_CBD_ASC_eatout_escort_othdiscr_ot...,walk_transit_CBD_ASC_eatout_escort_othdiscr_ot...,walk_transit_CBD_ASC_school_univ,walk_transit_CBD_ASC_work,walk_transit_CBD_ASC_atwork


## spec

In [7]:
# Remove apostrophes from Label names
spec['Label'] = spec['Label'].str.replace("'","")

In [8]:
spec

Unnamed: 0,Label,Description,Expression,DRIVEALONEFREE,DRIVEALONEPAY,SHARED2FREE,SHARED2PAY,SHARED3FREE,SHARED3PAY,WALK,...,WALK_HVY,WALK_COM,DRIVE_LOC,DRIVE_LRF,DRIVE_EXP,DRIVE_HVY,DRIVE_COM,TAXI,TNC_SINGLE,TNC_SHARED
0,#,Drive alone no toll,,,,,,,,,...,,,,,,,,,,
1,util_DRIVEALONEFREE_Unavailable,DRIVEALONEFREE - Unavailable,sov_available == False,-999,,,,,,,...,,,,,,,,,,
2,util_DRIVEALONEFREE_Unavailable_for_zero_auto_...,DRIVEALONEFREE - Unavailable for zero auto hou...,auto_ownership == 0,-999,,,,,,,...,,,,,,,,,,
3,util_DRIVEALONEFREE_Unavailable_for_persons_le...,DRIVEALONEFREE - Unavailable for persons less ...,age < 16,-999,,,,,,,...,,,,,,,,,,
4,util_DRIVEALONEFREE_Unavailable_for_joint_tours,DRIVEALONEFREE - Unavailable for joint tours,is_joint == True,-999,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
340,#,FIXME - skims aren't symmetrical,so we have to make sure they can get back,,,,,,,,...,,,,,,,,,,
341,util_Walk_not_available_for_long_distances,Walk not available for long distances,@od_skims.max('DISTWALK') > 3,,,,,,,-999,...,,,,,,,,,,
342,util_Bike_not_available_for_long_distances,Bike not available for long distances,@od_skims.max('DISTBIKE') > 8,,,,,,,,...,,,,,,,,,,
343,util_Drive_alone_not_available_for_escort_tours,Drive alone not available for escort tours,is_escort,-999,-999,,,,,,...,,,,,,,,,,


In [9]:
# Check for double-parameters
ss = spec.query("Label!='#'").iloc[:,3:].stack().str.split("*")
st = ss.apply(lambda x: len(x))>1
assert len(ss[st]) == 0

## values

In [10]:
# Remove apostrophes from column names
values.columns = values.columns.str.replace("'","")
values

Unnamed: 0,tour_id,model_choice,util_DRIVEALONEFREE_Unavailable,util_DRIVEALONEFREE_Unavailable_for_zero_auto_households,util_DRIVEALONEFREE_Unavailable_for_persons_less_than_16,util_DRIVEALONEFREE_Unavailable_for_joint_tours,util_DRIVEALONEFREE_Unavailable_if_didnt_drive_to_work,util_DRIVEALONEFREE_In_vehicle_time,util_DRIVEALONEFREE_Terminal_time,util_DRIVEALONEFREE_Operating_cost,...,walk_heavyrail_available,walk_lrf_available,walk_ferry_available,drive_local_available,drive_commuter_available,drive_express_available,drive_heavyrail_available,drive_lrf_available,drive_ferry_available,destination_in_cbd
0,1277304,SHARED3FREE,0.0,0.0,0.0,0.0,0.0,10.750000,13.98212,6.780743,...,False,False,False,True,True,False,True,False,False,0
1,1314168,WALK,0.0,1.0,0.0,0.0,0.0,7.510000,15.45016,26.996040,...,False,False,False,False,False,False,False,False,False,1
2,1325643,WALK_LRF,0.0,1.0,0.0,0.0,0.0,11.129999,21.84808,29.480405,...,False,True,False,False,False,False,False,False,False,1
3,4521267,WALK_LOC,0.0,1.0,0.0,0.0,0.0,1.480000,22.08588,1.292095,...,False,False,False,False,False,False,False,False,False,1
4,4521273,WALK,0.0,1.0,0.0,0.0,0.0,3.750000,23.39484,3.230239,...,False,False,False,False,False,False,False,False,False,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,305767406,WALK,0.0,0.0,0.0,0.0,0.0,8.020000,14.72624,15.189854,...,False,False,False,True,True,False,True,False,False,0
239,305886167,WALK,0.0,0.0,0.0,0.0,0.0,5.300000,6.47160,16.288516,...,False,False,False,True,True,False,True,False,False,0
240,308478785,SHARED3FREE,0.0,0.0,0.0,0.0,0.0,18.230000,11.34704,12.087705,...,False,True,False,True,True,False,True,True,False,0
241,308641383,WALK_LRF,0.0,0.0,0.0,0.0,0.0,16.049999,20.54720,56.095187,...,False,True,False,True,True,False,True,False,False,0


# Data Setup

## Alternatives

In [11]:
alt_names = list(spec.columns[3:])
alt_codes = np.arange(1,len(alt_names)+1)
alt_names_to_codes = dict(zip(alt_names, alt_codes))
alt_codes_to_names = dict(zip(alt_codes, alt_names))
alt_names_to_codes

{'DRIVEALONEFREE': 1,
 'DRIVEALONEPAY': 2,
 'SHARED2FREE': 3,
 'SHARED2PAY': 4,
 'SHARED3FREE': 5,
 'SHARED3PAY': 6,
 'WALK': 7,
 'BIKE': 8,
 'WALK_LOC': 9,
 'WALK_LRF': 10,
 'WALK_EXP': 11,
 'WALK_HVY': 12,
 'WALK_COM': 13,
 'DRIVE_LOC': 14,
 'DRIVE_LRF': 15,
 'DRIVE_EXP': 16,
 'DRIVE_HVY': 17,
 'DRIVE_COM': 18,
 'TAXI': 19,
 'TNC_SINGLE': 20,
 'TNC_SHARED': 21}

## Nesting Tree

In [12]:
tree = larch_asim.construct_nesting_tree(alt_names, settings['NESTS'])

tree

In [13]:
tree.elemental_names()

{1: 'DRIVEALONEFREE',
 2: 'DRIVEALONEPAY',
 3: 'SHARED2FREE',
 4: 'SHARED2PAY',
 5: 'SHARED3FREE',
 6: 'SHARED3PAY',
 7: 'WALK',
 8: 'BIKE',
 9: 'WALK_LOC',
 10: 'WALK_LRF',
 11: 'WALK_EXP',
 12: 'WALK_HVY',
 13: 'WALK_COM',
 14: 'DRIVE_LOC',
 15: 'DRIVE_LRF',
 16: 'DRIVE_EXP',
 17: 'DRIVE_HVY',
 18: 'DRIVE_COM',
 19: 'TAXI',
 20: 'TNC_SINGLE',
 21: 'TNC_SHARED'}

## Purposes

In [14]:
purposes = list(coef_template.columns)
purposes

['eatout',
 'escort',
 'othdiscr',
 'othmaint',
 'school',
 'shopping',
 'social',
 'univ',
 'work',
 'atwork']

## Purpose-specific Models

In [15]:
m = {purpose:larch.Model(graph=tree) for purpose in purposes}

In [16]:
for alt_code, alt_name in tree.elemental_names().items():
    # Read in base utility function for this alt_name
    u = larch_asim.linear_utility_from_spec(
        spec, x_col='Label', p_col=alt_name, 
        ignore_x=('#',), 
    )
    for purpose in purposes:
        # Modify utility function based on template for purpose
        u_purp = sum(
            (
                P(coef_template[purpose].get(i.param,i.param)) 
                * i.data * i.scale
            )
            for i in u
        )
        m[purpose].utility_co[alt_code] = u_purp


## Set Parameter Values

In [17]:
for model in m.values():
    larch_asim.explicit_value_parameters(model)

In [18]:
coefficients

Unnamed: 0_level_0,value,constrain
coefficient_name,Unnamed: 1_level_1,Unnamed: 2_level_1
drive_transit_ASC_auto_sufficient_atwork,-999.214660,F
drive_transit_ASC_auto_deficient_atwork,-998.819600,F
joint_walk_transit_ASC_auto_sufficient_all,-18.264534,F
joint_drive_transit_ASC_auto_sufficient_all,-8.045285,F
joint_tnc_shared_ASC_auto_deficient_all,-7.160000,F
...,...,...
coef_topology_walk_multiplier_eatout_escort_othdiscr_othmaint_school_shopping_social_univ_work,15.000000,T
bike_ASC_auto_sufficient_atwork,15.720170,F
walk_ASC_no_auto_school,18.414557,F
coef_topology_bike_multiplier_eatout_escort_othdiscr_othmaint_school_shopping_social_univ_work,20.000000,T


In [19]:
larch_asim.apply_coefficients(coefficients, m)

## DataFrames

In [21]:
values['model_choice_code'] = values.model_choice.map(alt_names_to_codes)

In [22]:
d = larch.DataFrames(
    co=values.set_index('tour_id'),
    av=True,
    alt_codes=alt_codes,
    alt_names=alt_names,
)

In [23]:
for purpose, model in m.items():
    model.dataservice = d.selector_co(f"tour_type=='{purpose}'")
    model.choice_co_code = 'model_choice_code'

In [24]:
from larch.model.model_group import ModelGroup
mg = ModelGroup(m.values())

In [25]:
mg.load_data()

req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided


# Estimate

Note: The demo test data here is 100 households and the model has 
57 estimated parameters -- the result is a very over-specified
model which does not have a numerically stable likelihood maximizing
solution.

In [26]:
mg.estimate()

req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided
req_data does not request avail_ca or avail_co but it is set and being provided


Unnamed: 0,value,initvalue,nullvalue,minimum,maximum,holdfast,note,best
-999,-999.000000,-999.00,-999.00,-999.00,-999.00,1,,-999.000000
0.35,0.350000,0.35,0.35,0.35,0.35,1,,0.350000
0.5,0.500000,0.50,0.50,0.50,0.50,1,,0.500000
0.72,0.720000,0.72,0.72,0.72,0.72,1,,0.720000
1,1.000000,1.00,1.00,1.00,1.00,1,,1.000000
...,...,...,...,...,...,...,...,...
walk_ASC_no_auto_atwork,6.669213,0.00,0.00,-inf,inf,0,,6.669213
walk_transit_ASC_auto_deficient_atwork,-2.998829,0.00,0.00,-inf,inf,0,,-2.998829
walk_transit_ASC_auto_sufficient_atwork,-3.401027,0.00,0.00,-inf,inf,0,,-3.401027
walk_transit_ASC_no_auto_atwork,2.704188,0.00,0.00,-inf,inf,0,,2.704188


  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  """Entry point for launching an IPython kernel.
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,0
Unnamed: 0_level_1,0
-999,-999.000000
0.35,0.350000
0.5,0.500000
0.72,0.720000
1,1.000000
bike_ASC_auto_deficient_eatout,-1.569111
bike_ASC_auto_sufficient_eatout,-440.432192
bike_ASC_no_auto_eatout,-6.333044
coef_age010_trn_multiplier_eatout_escort_othdiscr_othmaint_shopping_social_work,0.000000
coef_age1619_da_multiplier_eatout_escort_othdiscr_othmaint_shopping_social_work,0.000000

Unnamed: 0,0
-999,-999.0
0.35,0.35
0.5,0.5
0.72,0.72
1,1.0
bike_ASC_auto_deficient_eatout,-1.569111
bike_ASC_auto_sufficient_eatout,-440.432192
bike_ASC_no_auto_eatout,-6.333044
coef_age010_trn_multiplier_eatout_escort_othdiscr_othmaint_shopping_social_work,0.0
coef_age1619_da_multiplier_eatout_escort_othdiscr_othmaint_shopping_social_work,0.0

Unnamed: 0,0
-999,0.0
0.35,0.0
0.5,0.0
0.72,0.0
1,0.0
bike_ASC_auto_deficient_eatout,0.0
bike_ASC_auto_sufficient_eatout,-4.377882e-145
bike_ASC_no_auto_eatout,-4.740254e-234
coef_age010_trn_multiplier_eatout_escort_othdiscr_othmaint_shopping_social_work,0.0
coef_age1619_da_multiplier_eatout_escort_othdiscr_othmaint_shopping_social_work,0.0


# Outputs

In [27]:
# The test model is wildly overspecified.
#
# mg.possible_overspecification 

In [30]:
est_names = [j for j in coefficients.index if j in mg.pf.index]

In [31]:
# Write re-estimated value back into the coefficients file.
coefficients.loc[est_names, 'value'] = mg.pf.loc[est_names, 'value']

In [35]:
# Write out replacement coefficients file and model summaries
os.makedirs(os.path.join(edb_directory,'estimated'), exist_ok=True)

coefficients.reset_index().to_csv(
    os.path.join(
        edb_directory, 
        'estimated',
        "tour_mode_choice_coefficients_revised.csv",
    ),
    index=False,
)

for purpose, model in m.items():
    model.to_xlsx(
        os.path.join(
            edb_directory, 
            'estimated',
            f"tour_mode_choice_{purpose}_model_estimation.xlsx",
        )
    )