In [1]:
"""
File v422_binary_SM_base.py

Michel Bierlaire
Tue Aug  4 20:09:59 2020
"""

# Import the packages
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
from biogeme.expressions import Beta

# Read the data
df = pd.read_csv('swissmetro.dat', '\t')
database = db.Database('swissmetro', df)

# The following statement allows you to use the names of the
# variable as Python variable.
globals().update(database.variables)

# Define new variables
CAR_AV_SP = CAR_AV * (SP != 0)
TRAIN_AV_SP = TRAIN_AV * (SP != 0)

# If the person has a GA (season ticket) her incremental cost is actually 0
# rather than the cost value gathered from the
# network data.

TRAIN_COST = TRAIN_CO * (GA == 0)

# For numerical reasons, it is good practice to scale the data to
# that the values of the parameters are around 1.0.
# A previous estimation with the unscaled data has generated
# parameters around -0.01 for both cost and time. Therefore, time and
# cost are multipled my 0.01.
TRAIN_TT_SCALED = TRAIN_TT / 100
TRAIN_COST_SCALED = TRAIN_COST / 100
CAR_TT_SCALED = CAR_TT / 100
CAR_CO_SCALED = CAR_CO / 100

# Removing some observations
# All observations verifying the following expression will not be
# considered for estimation
# The modeler here has developed the model only for trips to work.
# Observations such that the dependent variable CHOICE is 0 and
# CHOICE is 2 are also removed.
exclude = (TRAIN_AV_SP == 0) + \
    (CAR_AV_SP == 0) + \
    (CHOICE == 0) + \
    (CHOICE == 2) + \
    ((PURPOSE != 1) * (PURPOSE != 3)) > 0
database.remove(exclude)


# Parameters to be estimated
# Arguments:
#   - 1  Name for report; Typically, the same as the variable.
#   - 2  Starting value.
#   - 3  Lower bound.
#   - 4  Upper bound.
#   - 5  0: estimate the parameter, 1: keep it fixed.

ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 1)
B_TIME = Beta('B_TIME', 0, None, None, 0)
B_COST = Beta('B_COST', 0, None, None, 0)


# Utility functions
V1 = ASC_TRAIN + B_TIME * TRAIN_TT_SCALED + B_COST * TRAIN_COST_SCALED
V3 = ASC_CAR + B_TIME * CAR_TT_SCALED + B_COST * CAR_CO_SCALED

# Associate utility functions with the numbering of alternatives
V = {1: V1,
     3: V3}

# Associate the availability conditions with the alternatives

av = {1: TRAIN_AV_SP,
      3: CAR_AV_SP}

# The choice model is a logit, with availability conditions
logprob = models.loglogit(V, av, CHOICE)

# Create the Biogeme object
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'binary_SM_base'

# Estimate the parameters
results = biogeme.estimate()

print(results.getEstimatedParameters())

            Value   Std err     t-test       p-value  Rob. Std err  \
ASC_CAR  1.032710  0.071479  14.447732  0.000000e+00      0.136347   
B_COST  -1.704828  0.121025 -14.086536  0.000000e+00      0.178442   
B_TIME  -0.889785  0.134468  -6.617067  3.663958e-11      0.370767   

         Rob. t-test  Rob. p-value  
ASC_CAR     7.574103  3.619327e-14  
B_COST     -9.553982  0.000000e+00  
B_TIME     -2.399847  1.640192e-02  
