In [1]:
"""
File v715_optimal_model.py

Michel Bierlaire
Mon Aug 17 11:44:41 2020
"""
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
from biogeme.expressions import Beta

# Read the data
df = pd.read_csv('optima.dat', '\t')
database = db.Database('boeing', df)

# The following statement allows you to use the names of the
# variable as Python variable.
globals().update(database.variables)

# Define new variables
TimePT_scaled = TimePT / 200
TimeCar_scaled = TimeCar / 200
MarginalCostPT_scaled = MarginalCostPT / 10
CostCarCHF_scaled = CostCarCHF / 10
distance_km_scaled = distance_km / 5
male = Gender == 1
female = Gender == 2
unreportedGender = Gender == -1
fulltime = OccupStat == 1
notfulltime = OccupStat != 1

# Removing some observations
exclude = ((Choice == -1) + ((CarAvail == 3) * (Choice == 1))) != 0
database.remove(exclude)

# List of parameters to be estimated
ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
ASC_SM = Beta('ASC_SM', 0, None, None, 0)
BETA_TIME_FULLTIME = Beta('BETA_TIME_FULLTIME', 0, None, None, 0)
BETA_TIME_OTHER = Beta('BETA_TIME_OTHER', 0, None, None, 0)
BETA_DIST_MALE = Beta('BETA_DIST_MALE', 0, None, None, 0)
BETA_DIST_FEMALE = Beta('BETA_DIST_FEMALE', 0, None, None, 0)
BETA_DIST_UNREPORTED = Beta('BETA_DIST_UNREPORTED', 0, None, None, 0)
BETA_COST = Beta('BETA_COST', 0, None, None, 0)

# Definition of utility functions:
V_PT = BETA_TIME_FULLTIME * TimePT_scaled * fulltime + \
    BETA_TIME_OTHER * TimePT_scaled * notfulltime + \
    BETA_COST * MarginalCostPT_scaled
V_CAR = ASC_CAR + \
    BETA_TIME_FULLTIME * TimeCar_scaled * fulltime + \
    BETA_TIME_OTHER * TimeCar_scaled * notfulltime + \
    BETA_COST * CostCarCHF_scaled
V_SM = ASC_SM + \
    BETA_DIST_MALE * distance_km_scaled * male + \
    BETA_DIST_FEMALE * distance_km_scaled * female + \
    BETA_DIST_UNREPORTED * distance_km_scaled * unreportedGender

# Associate utility functions with the numbering of alternatives
V = {0: V_PT,
     1: V_CAR,
     2: V_SM}

# The choice model is a logit, without availability conditions
logprob = models.loglogit(V, None, Choice)

# Create the Biogeme object
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'v715_optima_model'

# Estimate the parameters
results = biogeme.estimate()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(pandasResults)

                         Value   Std err     t-test       p-value  \
ASC_CAR               0.296341  0.087779   3.375996  7.354894e-04   
ASC_SM               -0.020574  0.168723  -0.121937  9.029489e-01   
BETA_COST            -0.762979  0.075582 -10.094685  0.000000e+00   
BETA_DIST_FEMALE     -1.112262  0.136382  -8.155491  4.440892e-16   
BETA_DIST_MALE       -0.920033  0.108435  -8.484649  0.000000e+00   
BETA_DIST_UNREPORTED -0.912456  0.257352  -3.545550  3.917943e-04   
BETA_TIME_FULLTIME   -1.619867  0.321095  -5.044818  4.539522e-07   
BETA_TIME_OTHER      -0.509193  0.266876  -1.907977  5.639416e-02   

                      Rob. Std err  Rob. t-test  Rob. p-value  
ASC_CAR                   0.100041     2.962180  3.054689e-03  
ASC_SM                    0.290873    -0.070730  9.436124e-01  
BETA_COST                 0.140440    -5.432775  5.548434e-08  
BETA_DIST_FEMALE          0.283604    -3.921892  8.785622e-05  
BETA_DIST_MALE            0.270984    -3.395152  6.859049e

In [2]:
print(results)


Results for model v715_optima_model
Output file (HTML):			v715_optima_model.html
Nbr of parameters:		8
Sample size:			1899
Excluded data:			366
Init log likelihood:		-2086.265
Final log likelihood:		-1299.017
Likelihood ratio test:		1574.495
Rho square:			0.377
Rho bar square:			0.374
Akaike Information Criterion:	2614.034
Bayesian Information Criterion:	2658.427
Final gradient norm:		0.007507272
ASC_CAR        : 0.296[0.0878 3.38 0.000735][0.1 2.96 0.00305]
ASC_SM         : -0.0206[0.169 -0.122 0.903][0.291 -0.0707 0.944]
BETA_COST      : -0.763[0.0756 -10.1 0][0.14 -5.43 5.55e-08]
BETA_DIST_FEMALE: -1.11[0.136 -8.16 4.44e-16][0.284 -3.92 8.79e-05]
BETA_DIST_MALE : -0.92[0.108 -8.48 0][0.271 -3.4 0.000686]
BETA_DIST_UNREPORTED: -0.912[0.257 -3.55 0.000392][0.275 -3.32 0.000897]
BETA_TIME_FULLTIME: -1.62[0.321 -5.04 4.54e-07][0.339 -4.78 1.72e-06]
BETA_TIME_OTHER: -0.509[0.267 -1.91 0.0564][0.299 -1.7 0.0889]
('ASC_SM', 'ASC_CAR'):	0.00553	0.374	-2	0.0455	0.00157	0.0538	-1.05	0.295
('

In [6]:
df['Choice'].describe()

count    1899.000000
mean        0.777778
std         0.541347
min         0.000000
25%         0.000000
50%         1.000000
75%         1.000000
max         2.000000
Name: Choice, dtype: float64