In [3]:
"""
File v423_binaryLogitSM.py

Michel Bierlaire
Wed Aug  5 08:11:37 2020
"""

# Import the packages
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
from biogeme.expressions import Beta, bioNormalCdf, Elem, log

# Read the data
df = pd.read_csv('swissmetro.dat', '\t')
database = db.Database('swissmetro', df)

# The following statement allows you to use the names of the
# variable as Python variable.
globals().update(database.variables)

# Define new variables
CAR_AV_SP = CAR_AV * (SP != 0)
TRAIN_AV_SP = TRAIN_AV * (SP != 0)

# If the person has a GA (season ticket) her incremental cost is actually 0
# rather than the cost value gathered from the
# network data.

TRAIN_COST = TRAIN_CO * (GA == 0)

# For numerical reasons, it is good practice to scale the data to
# that the values of the parameters are around 1.0.
# A previous estimation with the unscaled data has generated
# parameters around -0.01 for both cost and time. Therefore, time and
# cost are multipled my 0.01.
TRAIN_TT_SCALED = TRAIN_TT / 100
TRAIN_COST_SCALED = TRAIN_COST / 100
CAR_TT_SCALED = CAR_TT / 100
CAR_CO_SCALED = CAR_CO / 100

# Removing some observations
# All observations verifying the following expression will not be
# considered for estimation
# The modeler here has developed the model only for trips to work.
# Observations such that the dependent variable CHOICE is 0 and
# CHOICE is 2 are also removed.
exclude = (TRAIN_AV_SP == 0) + \
    (CAR_AV_SP == 0) + \
    (CHOICE == 0) + \
    (CHOICE == 2) + \
    ((PURPOSE != 1) * (PURPOSE != 3)) > 0
database.remove(exclude)

# Parameters to be estimated
# Arguments:
#   - 1  Name for report; Typically, the same as the variable.
#   - 2  Starting value.
#   - 3  Lower bound.
#   - 4  Upper bound.
#   - 5  0: estimate the parameter, 1: keep it fixed.

ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 1)
B_CAR_TIME = Beta('B_CAR_TIME', 0, None, None, 0)
B_TRAIN_TIME = Beta('B_TRAIN_TIME', 0, None, None, 0)
B_CAR_COST = Beta('B_CAR_COST', 0, None, None, 0)
B_TRAIN_COST = Beta('B_TRAIN_COST', 0, None, None, 0)

# A parameter associated with the train headway is defined here
B_HE = Beta('B_HE', 0, None, None, 0)

# Utility functions
# The train headway is included in the utility
# function of the train alternative
V1 = ASC_TRAIN + \
    B_CAR_TIME * TRAIN_TT_SCALED + \
    B_CAR_COST * TRAIN_COST_SCALED + \
    B_HE * TRAIN_HE
V3 = ASC_CAR + \
    B_TRAIN_TIME * CAR_TT_SCALED + \
    B_TRAIN_COST * CAR_CO_SCALED

# Associate utility functions with the numbering of alternatives
V = {1: V1,
     3: V3}

# Associate the availability conditions with the alternatives
av = {1: TRAIN_AV_SP,
      3: CAR_AV_SP}

# Choice probability of alternative 1 according to the probit model
prob1 = bioNormalCdf(V1 - V3)

# The choice model is a probit
P = {1: prob1,
     3: 1 - prob1}

logprob = log(Elem(P, CHOICE))


# Create the Biogeme object
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'binaryLogitSM'

# Estimate the parameters
results = biogeme.estimate()

print(results.getEstimatedParameters())


                 Value   Std err     t-test       p-value  Rob. Std err  \
ASC_CAR      -0.550257  0.108710  -5.061689  4.155578e-07      0.122601   
B_CAR_COST   -0.985465  0.065120 -15.133158  0.000000e+00      0.147313   
B_CAR_TIME   -0.650757  0.079822  -8.152604  4.440892e-16      0.095668   
B_HE         -0.003323  0.000940  -3.535164  4.075216e-04      0.000942   
B_TRAIN_COST -0.543100  0.105871  -5.129824  2.900127e-07      0.134447   
B_TRAIN_TIME -0.194785  0.044027  -4.424187  9.680632e-06      0.076928   

              Rob. t-test  Rob. p-value  
ASC_CAR         -4.488195  7.182920e-06  
B_CAR_COST      -6.689586  2.238032e-11  
B_CAR_TIME      -6.802281  1.029754e-11  
B_HE            -3.527409  4.196483e-04  
B_TRAIN_COST    -4.039504  5.356435e-05  
B_TRAIN_TIME    -2.532057  1.133956e-02  
