In [1]:
"""
File v423_binaryLogitSM.py

Michel Bierlaire
Wed Aug  5 08:11:37 2020
"""

# Import the packages
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
from biogeme.expressions import Beta

# Read the data
df = pd.read_csv('swissmetro.dat', '\t')
database = db.Database('swissmetro', df)

# The following statement allows you to use the names of the
# variable as Python variable.
globals().update(database.variables)

# Define new variables
CAR_AV_SP = CAR_AV * (SP != 0)
TRAIN_AV_SP = TRAIN_AV * (SP != 0)

# If the person has a GA (season ticket) her incremental cost is actually 0
# rather than the cost value gathered from the
# network data.

TRAIN_COST = TRAIN_CO * (GA == 0)

# For numerical reasons, it is good practice to scale the data to
# that the values of the parameters are around 1.0.
# A previous estimation with the unscaled data has generated
# parameters around -0.01 for both cost and time. Therefore, time and
# cost are multipled my 0.01.
TRAIN_TT_SCALED = TRAIN_TT / 100
TRAIN_COST_SCALED = TRAIN_COST / 100
CAR_TT_SCALED = CAR_TT / 100
CAR_CO_SCALED = CAR_CO / 100

# Removing some observations
# All observations verifying the following expression will not be
# considered for estimation
# The modeler here has developed the model only for trips to work.
# Observations such that the dependent variable CHOICE is 0 and
# CHOICE is 2 are also removed.
exclude = (TRAIN_AV_SP == 0) + \
    (CAR_AV_SP == 0) + \
    (CHOICE == 0) + \
    (CHOICE == 2) + \
    ((PURPOSE != 1) * (PURPOSE != 3)) > 0
database.remove(exclude)

# Parameters to be estimated
# Arguments:
#   - 1  Name for report; Typically, the same as the variable.
#   - 2  Starting value.
#   - 3  Lower bound.
#   - 4  Upper bound.
#   - 5  0: estimate the parameter, 1: keep it fixed.

ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 1)
B_CAR_TIME = Beta('B_CAR_TIME', 0, None, None, 0)
B_TRAIN_TIME = Beta('B_TRAIN_TIME', 0, None, None, 0)
B_CAR_COST = Beta('B_CAR_COST', 0, None, None, 0)
B_TRAIN_COST = Beta('B_TRAIN_COST', 0, None, None, 0)

# A parameter associated with the train headway is defined here
B_HE = Beta('B_HE', 0, None, None, 0)

# Utility functions
# The train headway is included in the utility
# function of the train alternative
V1 = ASC_TRAIN + \
    B_CAR_TIME * TRAIN_TT_SCALED + \
    B_CAR_COST * TRAIN_COST_SCALED + \
    B_HE * TRAIN_HE
V3 = ASC_CAR + \
    B_TRAIN_TIME * CAR_TT_SCALED + \
    B_TRAIN_COST * CAR_CO_SCALED

# Associate utility functions with the numbering of alternatives
V = {1: V1,
     3: V3}

# Associate the availability conditions with the alternatives
av = {1: TRAIN_AV_SP,
      3: CAR_AV_SP}

# The choice model is a logit, with availability conditions
logprob = models.loglogit(V, av, CHOICE)

# Create the Biogeme object
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'binaryLogitSM'

# Estimate the parameters
results = biogeme.estimate()

print(results.getEstimatedParameters())


                 Value   Std err     t-test       p-value  Rob. Std err  \
ASC_CAR      -1.239884  0.196062  -6.323924  2.550034e-10      0.203660   
B_CAR_COST   -2.401644  0.160354 -14.977145  0.000000e+00      0.273974   
B_CAR_TIME   -1.134900  0.156750  -7.240175  4.480860e-13      0.207983   
B_HE         -0.005812  0.001706  -3.406657  6.576373e-04      0.001638   
B_TRAIN_COST -1.114288  0.215321  -5.175016  2.278915e-07      0.292248   
B_TRAIN_TIME -0.394345  0.122658  -3.214996  1.304464e-03      0.293468   

              Rob. t-test  Rob. p-value  
ASC_CAR         -6.088014  1.143200e-09  
B_CAR_COST      -8.765969  0.000000e+00  
B_CAR_TIME      -5.456698  4.850704e-08  
B_HE            -3.548716  3.871149e-04  
B_TRAIN_COST    -3.812818  1.373913e-04  
B_TRAIN_TIME    -1.343742  1.790319e-01  
