# Demo of MNL and LCCM for mode choice

In [1]:
import lccm
import numpy as np
import pandas as pd
import pylogit
from collections import OrderedDict

### Load data

In [2]:
df = pd.DataFrame.from_csv('data/valueOfGreen.dat', sep='\t', index_col=None)

In [3]:
df.columns.values

array(['ID', 'Question', 'CHOICE', 'A1_AV', 'A2_AV', 'B1_AV', 'B2_AV',
       'T_AV', 'K_AV', 'W_AV', 'TT_A1', 'C_A1', 'GG_A1', 'TT_A2', 'C_A2',
       'GG_A2', 'TT_B1', 'C_B1', 'GG_B1', 'TT_B2', 'C_B2', 'GG_B2', 'TT_T',
       'C_T', 'GG_T', 'TT_K', 'TT_W', 'GENDER', 'VEG', 'CAR_OWNER',
       'INCOME', 'INDENV1', 'INDENV2', 'INDENV3', 'INDENV4', 'INDENV5',
       'INDENV6', 'INDENV7', 'INDENV8', 'INDENV9', 'INDENV10', 'INDENV11',
       'INDENV12', 'INDPT1', 'INDPT2', 'INDPT3', 'INDPT4', 'INDPT5',
       'INDAUTO1', 'INDAUTO2', 'INDAUTO3', 'INDAUTO4', 'INDAUTO5',
       'INDAUTO6', 'INDAUTO7'], dtype=object)

### Alternative id's

- 1 = Auto 1
- 2 = Auto 2
- 3 = Bus 1
- 4 = Bus 2
- 5 = Train
- 6 = Walk
- 7 = Bicycle

### Convert from wide format to long format

In [5]:
ind_vars = ['ID', 'GENDER', 'VEG', 'CAR_OWNER', 'INCOME']

alt_varying_vars = {'travel_time': dict([(1, 'TT_A1'), (2, 'TT_A2'), (3, 'TT_B1'), 
                                (4, 'TT_B2'), (5, 'TT_T'), (6, 'TT_K'), (7, 'TT_W')]),
                    'travel_cost': dict([(1, 'C_A1'), (2, 'C_A2'), (3, 'C_B1'),
                                (4, 'C_B2'), (5, 'C_T')]),
                    'emissions': dict([(1, 'GG_A1'), (2, 'GG_A2'), (3, 'GG_B1'), 
                                (4, 'GG_B2'), (5, 'GG_T')])}

availability_vars = {1: 'A1_AV', 2: 'A2_AV', 3: 'B1_AV', 4: 'B2_AV', 5: 'T_AV', 6: 'K_AV', 7: 'W_AV'}

alt_id_col = 'ALT_ID'

# 'ID' is the decision-maker, but there are multiple observations (choice scenarios) for each
df['OBS_ID'] = np.arange(df.shape[0], dtype=int) + 1
obs_id_col = 'OBS_ID'

choice_col = 'CHOICE'

data = pylogit.convert_wide_to_long(df, ind_vars, alt_varying_vars, 
                availability_vars, obs_id_col, choice_col, new_alt_id_name=alt_id_col)

In [6]:
data.describe()

Unnamed: 0,OBS_ID,ALT_ID,CHOICE,ID,GENDER,VEG,CAR_OWNER,INCOME,travel_time,emissions,travel_cost
count,5010.0,5010.0,5010.0,5010.0,5010.0,5010.0,5010.0,5010.0,5010.0,5010.0,5010.0
mean,835.5,3.833333,0.333333,167.5,1.586826,0.062874,1.847305,1.254491,46.968663,2.670619,1.901547
std,482.135508,2.034629,0.471452,96.426687,0.498497,0.242761,0.428145,1.642938,63.85364,2.652327,1.727593
min,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,8.0,0.0,0.0
25%,418.0,2.0,0.0,84.0,1.0,0.0,2.0,1.0,15.0,0.0,0.0
50%,835.5,3.5,0.0,167.5,2.0,0.0,2.0,1.0,30.0,2.3,1.5
75%,1253.0,6.0,1.0,251.0,2.0,0.0,2.0,1.0,60.0,4.5,3.0
max,1670.0,7.0,1.0,334.0,3.0,1.0,3.0,9.0,400.0,10.0,6.0


### Label important columns

In [7]:
ind_id_col = 'ID'
obs_id_col = 'OBS_ID'
alt_id_col = 'ALT_ID'
choice_col = 'CHOICE'

### Multinomial logit

In [14]:
# base case is alt_id = 7 (walking)

spec =  OrderedDict([
            ('intercept', [1, 2, 3, 4, 5, 6]),
            ('travel_time', [[1, 2, 3, 4, 5, 6, 7]]),
            ('travel_cost', [[1, 2, 3, 4, 5, 6, 7]]),
            ('emissions', [[1, 2, 3, 4, 5, 6, 7]])
        ])

labels = OrderedDict([
            ('intercept', ['asc_drive1', 'asc_drive2', 'asc_bus1', 'asc_bus2', 'asc_train', 'asc_bike']),
            ('travel_time', ['travel time']), 
            ('travel_cost', ['travel cost']),
            ('emissions', ['emissions'])
        ])

In [15]:
m = pylogit.create_choice_model(data = data, 
                                alt_id_col = alt_id_col, 
                                obs_id_col = obs_id_col, 
                                choice_col = choice_col, 
                                specification = spec, 
                                model_type = "MNL", 
                                names=labels)

m.fit_mle(init_vals = np.random.rand(9))
print m.get_statsmodels_summary()

Log-likelihood at zero: -1,834.6825
Initial Log-likelihood: -36,851.2566
Estimation Time: 0.05 seconds.
Final log-likelihood: -1,834.6825
                     Multinomial Logit Model Regression Results                    
Dep. Variable:                      CHOICE   No. Observations:                1,670
Model:             Multinomial Logit Model   Df Residuals:                    1,661
Method:                                MLE   Df Model:                            9
Date:                     Wed, 09 Nov 2016   Pseudo R-squ.:                   0.000
Time:                             13:37:37   Pseudo R-bar-squ.:              -0.005
converged:                           False   Log-Likelihood:             -1,834.683
                                             LL-Null:                    -1,834.683
                  coef    std err          z      P>|z|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------
asc_drive1      1.5005      0.

### LCCM

In [16]:
# Class membership model

n_classes = 2

class_membership_spec = ['intercept', 'GENDER', 'VEG', 'INCOME', 'CAR_OWNER']
class_membership_labels = ['ASC', 'Gender', 'Vegetarian', 'Income', 'Car owner']

In [17]:
# Class-specific choice model

class_specific_specs = [spec, spec]
class_specific_labels = [labels, labels]

In [18]:
lccm.lccm_fit(data = data,
              ind_id_col = ind_id_col,
              obs_id_col = obs_id_col,
              alt_id_col = alt_id_col,
              choice_col = choice_col,
              n_classes = n_classes,
              class_membership_spec = class_membership_spec,
              class_membership_labels = class_membership_labels,
              class_specific_specs = class_specific_specs,
              class_specific_labels = class_specific_labels)

Processing data
Initializing EM Algorithm...

<Wed, 09 Nov 2016 13:37:43> Iteration 0: -1531.8303
<Wed, 09 Nov 2016 13:37:43> Iteration 1: -1401.3461
<Wed, 09 Nov 2016 13:37:43> Iteration 2: -1393.8490
<Wed, 09 Nov 2016 13:37:43> Iteration 3: -1390.8795
<Wed, 09 Nov 2016 13:37:43> Iteration 4: -1388.9969
<Wed, 09 Nov 2016 13:37:43> Iteration 5: -1387.6853
<Wed, 09 Nov 2016 13:37:43> Iteration 6: -1386.7621
<Wed, 09 Nov 2016 13:37:43> Iteration 7: -1386.1138
<Wed, 09 Nov 2016 13:37:43> Iteration 8: -1385.6626
<Wed, 09 Nov 2016 13:37:43> Iteration 9: -1385.3536
<Wed, 09 Nov 2016 13:37:43> Iteration 10: -1385.1463
<Wed, 09 Nov 2016 13:37:43> Iteration 11: -1385.0105
<Wed, 09 Nov 2016 13:37:43> Iteration 12: -1384.9234
<Wed, 09 Nov 2016 13:37:43> Iteration 13: -1384.8686
<Wed, 09 Nov 2016 13:37:43> Iteration 14: -1384.8348
<Wed, 09 Nov 2016 13:37:44> Iteration 15: -1384.8141
<Wed, 09 Nov 2016 13:37:44> Iteration 16: -1384.8016
<Wed, 09 Nov 2016 13:37:44> Iteration 17: -1384.7941
<Wed, 09 N