# Estimating Workplace Location

Integration with [larch](https://larch.newman.me) for model estimation. See [estimation tools review](https://github.com/ActivitySim/activitysim/wiki/Estimation-Tools-Review) for more information about larch.

# Run the Example

Output an estimation data bundle (EBD), which contains:
  - model settings - workplace_location_model_settings.yaml
  - coefficients - workplace_location_coefficients.csv
  - utilities specification - workplace_location_SPEC.csv
  - alternatives values - workplace_location_alternatives_combined.csv
  - chooser data - workplace_location_choosers_combined.csv
  - chooses made - workplace_location_choices.csv

# Read EDB 

In [1]:
import larch  # !conda install larch #for estimation
import pandas as pd
import yaml 
import larch.util.excel
import larch_asim  # utility functions in a local module

In [2]:
directory = "estimation_data_bundle/workplace_location/"

coefficients = pd.read_csv(directory+"workplace_location_coefficients.csv")
spec = pd.read_csv(directory+"workplace_location_SPEC.csv")
alt_values = pd.read_csv(directory+"workplace_location_alternatives_combined.csv")
chooser_data = pd.read_csv(directory+"workplace_location_choosers_combined.csv")
choices = pd.read_csv(directory+"workplace_location_choices.csv")

In [3]:
settings = yaml.load(
    open(directory+"workplace_location_model_settings.yaml","r"), 
    Loader=yaml.SafeLoader,
)

settings

{'SAMPLE_SIZE': 30,
 'SIMULATE_CHOOSER_COLUMNS': ['income_segment', 'TAZ'],
 'SAMPLE_SPEC': 'workplace_location_sample.csv',
 'SPEC': 'workplace_location.csv',
 'COEFFICIENTS': 'workplace_location_coeffs.csv',
 'LOGSUM_SETTINGS': 'tour_mode_choice.yaml',
 'LOGSUM_PREPROCESSOR': 'nontour_preprocessor',
 'LOGSUM_TOUR_PURPOSE': 'work',
 'CHOOSER_ORIG_COL_NAME': 'TAZ',
 'ALT_DEST_COL_NAME': 'alt_dest',
 'IN_PERIOD': 17,
 'OUT_PERIOD': 8,
 'DEST_CHOICE_COLUMN_NAME': 'workplace_taz',
 'annotate_persons': {'SPEC': 'annotate_persons_workplace',
  'DF': 'persons',
  'TABLES': ['land_use']},
 'annotate_households': {'SPEC': 'annotate_households_workplace',
  'DF': 'households',
  'TABLES': ['persons']},
 'CHOOSER_TABLE_NAME': 'persons_merged',
 'MODEL_SELECTOR': 'workplace',
 'CHOOSER_SEGMENT_COLUMN_NAME': 'income_segment',
 'CHOOSER_FILTER_COLUMN_NAME': 'is_worker',
 'SEGMENT_IDS': {'work_low': 1,
  'work_med': 2,
  'work_high': 3,
  'work_veryhigh': 4},
 'CONSTANTS': {'WORK_HIGH_SEGMENT_ID': 3

In [4]:
coefficients

Unnamed: 0,coefficient_name,value,constrain
0,coef_dist_0_1,-0.8428,F
1,coef_dist_1_2,-0.3104,F
2,coef_dist_2_5,-0.3783,F
3,coef_dist_5_15,-0.1285,F
4,coef_dist_15_up,-0.0917,F
5,coef_dist_0_5_high,0.15,F
6,coef_dist_5_up_high,0.02,F
7,coef_mode_logsum,0.3,F


In [5]:
spec

Unnamed: 0,Label,Description,Expression,coefficient
0,local_dist,,_DIST@skims['DIST'],1
1,util_dist_0_1,"Distance, piecewise linear from 0 to 1 miles","@_DIST.clip(0,1)",coef_dist_0_1
2,util_dist_1_2,"Distance, piecewise linear from 1 to 2 miles","@(_DIST-1).clip(0,1)",coef_dist_1_2
3,util_dist_2_5,"Distance, piecewise linear from 2 to 5 miles","@(_DIST-2).clip(0,3)",coef_dist_2_5
4,util_dist_5_15,"Distance, piecewise linear from 5 to 15 miles","@(_DIST-5).clip(0,10)",coef_dist_5_15
5,util_dist_15_up,"Distance, piecewise linear for 15+ miles",@(_DIST-15.0).clip(0),coef_dist_15_up
6,util_dist_0_5_high,"Distance 0 to 5 mi, high and very high income",@(df['income_segment']>=WORK_HIGH_SEGMENT_ID) ...,coef_dist_0_5_high
7,util_dist_15_up_high,"Distance 5+ mi, high and very high income",@(df['income_segment']>=WORK_HIGH_SEGMENT_ID) ...,coef_dist_5_up_high
8,util_size_variable,Size variable,@(df['size_term'] * df['shadow_price_size_term...,1
9,util_utility_adjustment,utility adjustment,@df['shadow_price_utility_adjustment'],1


In [6]:
alt_values

Unnamed: 0,person_id,variable,1,2,3,4,5,6,7,8,...,181,182,183,184,185,186,187,188,189,190
0,110274,TAZ,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,...,181.0,182.0,183.0,184.0,185.0,186.0,187.0,188.0,189.0,190.0
1,110274,mode_choice_logsum,-0.47944851909784375,-0.28552521143267007,-0.30761367235529846,-0.24837207220778174,-0.2027951273777935,-0.5146191347308435,-0.3942615298576022,0.10708464401436528,...,-0.8152745809007619,-0.9352596221642304,-0.8396785283151357,-0.7454278382169685,-0.6642367603325747,-0.7360307976703867,-0.8922738873708044,-0.7536225578783141,-0.8524490550078891,-1.0103232877706299
2,110274,pick_count,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,110274,prob,0.057150840242627544,0.08469690499898065,0.005246768453568192,0.05174135498396009,0.036268359625744574,0.006904411205017171,0.022177275014310764,0.006808200370770835,...,8.074495966703502e-05,7.572843672313463e-05,0.0003166497489420263,0.00037108729605378374,0.0005699251275273905,0.00046081884065046075,0.0001799514663573611,0.0012246927616559123,5.2129276044611955e-05,0.00031078806541221486
4,110274,shadow_price_size_term_adjustment,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2522,7348239,util_mode_logsum,-1.1618321687208726,-1.0036209190477476,-1.6443615938430654,-0.9985735768846415,-1.618989524235128,-1.7269280452466291,-1.6705924063455906,-1.0125002778100034,...,-0.8786959280980746,-0.7551187435158331,-0.6818291367725213,-0.7301824836938693,-0.6588210025797607,-0.6218341269365818,-0.5424107164994928,-0.5237309882940743,-0.6440006321969093,-0.6918554535160348
2523,7348239,util_no_attractions,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2524,7348239,util_sample_of_corrections_factor,3.3747705061837348,2.934932920568512,5.829959008655943,3.568024502775243,4.020399259983128,5.443360043508121,4.2410951554815,5.283915342569453,...,8.246769750329333,8.190553778588678,6.976066460838195,6.66636517375826,6.327797452252676,6.407835558942262,7.315643685720587,5.498201483963222,8.333471316031241,6.902273720621597
2525,7348239,util_size_variable,8.849766838403042,9.283094427998961,6.380473322824815,8.62938784109069,8.165078121147992,6.750797329123684,7.9273160630170745,6.838835976217513,...,4.25552759981349,4.218433599118909,5.401455915465108,5.687287172127004,5.965094904087839,5.903501808626576,4.9316786630945435,6.695955888668583,3.939891058770619,5.4079786207575555


In [7]:
chooser_data

Unnamed: 0,person_id,income_segment,TAZ,model_choice,override_choice
0,110274,1,13,164,2
1,112798,1,19,160,19
2,115213,1,40,36,101
3,121226,1,88,123,148
4,125886,1,113,131,17
...,...,...,...,...,...
128,6368322,4,101,132,2
129,6918939,4,121,141,15
130,6918940,4,121,169,9
131,6918943,4,121,6,9


In [8]:
choices

Unnamed: 0,person_id,model_choice
0,110274,164
1,112798,160
2,115213,36
3,121226,123
4,125886,131
...,...,...
261,6368322,132
262,6918939,141
263,6918940,169
264,6918943,6


# Data Setup

In [9]:
from larch import P, X

In [10]:
m = larch.Model()

In [11]:
m.utility_ca = larch_asim.linear_utility_from_spec(
    spec, x_col='Label', p_col='coefficient', 
    ignore_x=('local_dist',), 
)
print(m.utility_ca)

  P.coef_dist_0_1 * X.util_dist_0_1
+ P.coef_dist_1_2 * X.util_dist_1_2
+ P.coef_dist_2_5 * X.util_dist_2_5
+ P.coef_dist_5_15 * X.util_dist_5_15
+ P.coef_dist_15_up * X.util_dist_15_up
+ P.coef_dist_0_5_high * X.util_dist_0_5_high
+ P.coef_dist_5_up_high * X.util_dist_15_up_high
+ P('1') * X.util_size_variable
+ P('1') * X.util_utility_adjustment
+ P('-999') * X.util_no_attractions
+ P.coef_mode_logsum * X.util_mode_logsum
+ P('1') * X.util_sample_of_corrections_factor


In [12]:
larch_asim.apply_coefficients(coefficients, m)

In [13]:
m.pf

Unnamed: 0,value,initvalue,nullvalue,minimum,maximum,holdfast,note
-999,-999.0,-999.0,-999.0,-999.0,-999.0,1,
1,1.0,1.0,1.0,1.0,1.0,1,
coef_dist_0_1,-0.8428,0.0,0.0,-inf,inf,0,
coef_dist_0_5_high,0.15,0.0,0.0,-inf,inf,0,
coef_dist_15_up,-0.0917,0.0,0.0,-inf,inf,0,
coef_dist_1_2,-0.3104,0.0,0.0,-inf,inf,0,
coef_dist_2_5,-0.3783,0.0,0.0,-inf,inf,0,
coef_dist_5_15,-0.1285,0.0,0.0,-inf,inf,0,
coef_dist_5_up_high,0.02,0.0,0.0,-inf,inf,0,
coef_mode_logsum,0.3,0.0,0.0,-inf,inf,0,


In [14]:
d = larch.DataFrames(
    co=chooser_data.set_index('person_id'),
    ca=larch_asim.cv_to_ca(
        alt_values.set_index(['person_id', 'variable'])
    ),
    av=True,
)

In [15]:
m.dataservice = d

In [16]:
m.choice_co_code = 'override_choice'

# Estimate

Note: The demo test data here is 100 households and the model has 
57 estimated parameters -- the result is a very over-specified
model which does not have a numerically stable likelihood maximizing
solution.

In [17]:
m.estimate()

req_data does not request avail_ca or avail_co but it is set and being provided


Unnamed: 0,value,initvalue,nullvalue,minimum,maximum,holdfast,note,best
-999,-999.0,-999.0,-999.0,-999.0,-999.0,1,,-999.0
1,1.0,1.0,1.0,1.0,1.0,1,,1.0
coef_dist_0_1,-1.273173,0.0,0.0,-inf,inf,0,,-1.273173
coef_dist_0_5_high,0.240198,0.0,0.0,-inf,inf,0,,0.240198
coef_dist_15_up,-0.0917,0.0,0.0,-inf,inf,0,,-0.0917
coef_dist_1_2,-0.560784,0.0,0.0,-inf,inf,0,,-0.560784
coef_dist_2_5,-0.593488,0.0,0.0,-inf,inf,0,,-0.593488
coef_dist_5_15,-0.17467,0.0,0.0,-inf,inf,0,,-0.17467
coef_dist_5_up_high,-0.142405,0.0,0.0,-inf,inf,0,,-0.142405
coef_mode_logsum,0.224803,0.0,0.0,-inf,inf,0,,0.224803




Unnamed: 0_level_0,0
Unnamed: 0_level_1,0
-999,-999.000000
1,1.000000
coef_dist_0_1,-1.273173
coef_dist_0_5_high,0.240198
coef_dist_15_up,-0.091700
coef_dist_1_2,-0.560784
coef_dist_2_5,-0.593488
coef_dist_5_15,-0.174670
coef_dist_5_up_high,-0.142405
coef_mode_logsum,0.224803

Unnamed: 0,0
-999,-999.0
1,1.0
coef_dist_0_1,-1.273173
coef_dist_0_5_high,0.240198
coef_dist_15_up,-0.0917
coef_dist_1_2,-0.560784
coef_dist_2_5,-0.593488
coef_dist_5_15,-0.17467
coef_dist_5_up_high,-0.142405
coef_mode_logsum,0.224803

Unnamed: 0,0
-999,0.0
1,0.0
coef_dist_0_1,-7.9e-05
coef_dist_0_5_high,-0.001208
coef_dist_15_up,0.0
coef_dist_1_2,-0.000485
coef_dist_2_5,-0.001257
coef_dist_5_15,-0.000107
coef_dist_5_up_high,-0.000263
coef_mode_logsum,0.000736


In [18]:
m.possible_overspecification

# Outputs

In [19]:
coefficients['value'] = m.pf.loc[coefficients.coefficient_name, 'value'].values

In [20]:
coefficients.to_csv(
    directory+"workplace_location_coefficients_revised.csv",
    index=False,
)

In [21]:
m.to_xlsx(
    directory+"workplace_location_model_estimation.xlsx",
)

<larch.util.excel.ExcelWriter at 0x7fd040aa4d90>