In [1]:
# Translated to .py by Meritxell Pacheco
# 2017
# Adapted to PandasBiogeme by Michel Bierlaire
# Sun Oct 21 23:15:31 2018

import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, DefineVariable
from biogeme.models import loglogit

pandas = pd.read_table("airline.dat")
database = db.Database("airline",pandas)
pd.options.display.float_format = '{:.3g}'.format

globals().update(database.variables)

# Exclude
exclude = (  ArrivalTimeHours_1   ==  -1  )
database.remove(exclude)
  
# Choice
chosenAlternative = ( (  BestAlternative_1   *  1  ) + (  BestAlternative_2   *  2  ) ) + (  BestAlternative_3   *  3  )

#Parameters to be estimated
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
Constant1	 = Beta('Constant1',0,None,None,1)
Constant2	 = Beta('Constant2',0,None,None,0)
Constant3	 = Beta('Constant3',0,None,None,0)
Fare	 = Beta('Fare',0,None,None,0)
Legroom	 = Beta('Legroom',0,None,None,0)
SchedDE	 = Beta('SchedDE',0,None,None,0)
SchedDL	 = Beta('SchedDL',0,None,None,0)
Total_TT	 = Beta('Total_TT',0,None,None,0)

# Define here arithmetic expressions for name that are not directly
# available from the data

DepartureTimeSensitive  = DefineVariable('DepartureTimeSensitive', q11_DepartureOrArrivalIsImportant   ==  1 ,database)
ArrivalTimeSensitive  = DefineVariable('ArrivalTimeSensitive', q11_DepartureOrArrivalIsImportant   ==  2 ,database)
Missing  = DefineVariable('Missing',(  q11_DepartureOrArrivalIsImportant   !=  1  ) * (  q11_DepartureOrArrivalIsImportant   !=  2  ),database)
DesiredDepartureTime  = DefineVariable('DesiredDepartureTime',q12_IdealDepTime ,database)
DesiredArrivalTime  = DefineVariable('DesiredArrivalTime',q13_IdealArrTime ,database)
ScheduledDelay_1  = DefineVariable('ScheduledDelay_1',(  DepartureTimeSensitive   * (  DepartureTimeMins_1   -  DesiredDepartureTime   ) ) + (  ArrivalTimeSensitive   * (  ArrivalTimeMins_1   -  DesiredArrivalTime   ) ),database)
ScheduledDelay_2  = DefineVariable('ScheduledDelay_2',(  DepartureTimeSensitive   * (  DepartureTimeMins_2   -  DesiredDepartureTime   ) ) + (  ArrivalTimeSensitive   * (  ArrivalTimeMins_2   -  DesiredArrivalTime   ) ),database)
ScheduledDelay_3  = DefineVariable('ScheduledDelay_3',(  DepartureTimeSensitive   * (  DepartureTimeMins_3   -  DesiredDepartureTime   ) ) + (  ArrivalTimeSensitive   * (  ArrivalTimeMins_3   -  DesiredArrivalTime   ) ),database)
Opt1_SchedDelayEarly  = DefineVariable('Opt1_SchedDelayEarly',(  -(ScheduledDelay_1 )  * (  ScheduledDelay_1   <  0  ) ) /  60 ,database)
Opt2_SchedDelayEarly  = DefineVariable('Opt2_SchedDelayEarly',(  -(ScheduledDelay_2 )  * (  ScheduledDelay_2   <  0  ) ) /  60 ,database)
Opt3_SchedDelayEarly  = DefineVariable('Opt3_SchedDelayEarly',(  -(ScheduledDelay_3 )  * (  ScheduledDelay_3   <  0  ) ) /  60 ,database)
Opt1_SchedDelayLate  = DefineVariable('Opt1_SchedDelayLate',(  ScheduledDelay_1   * (  ScheduledDelay_1   >  0  ) ) /  60 ,database)
Opt2_SchedDelayLate  = DefineVariable('Opt2_SchedDelayLate',(  ScheduledDelay_2   * (  ScheduledDelay_2   >  0  ) ) /  60 ,database)
Opt3_SchedDelayLate  = DefineVariable('Opt3_SchedDelayLate',(  ScheduledDelay_3   * (  ScheduledDelay_3   >  0  ) ) /  60 ,database)

# Utilities
Opt1 = Constant1 + Fare * Fare_1 + Legroom * Legroom_1 + SchedDE * Opt1_SchedDelayEarly + SchedDL * Opt1_SchedDelayLate + Total_TT * TripTimeHours_1
Opt2 = Constant2 + Fare * Fare_2 + Legroom * Legroom_2 + SchedDE * Opt2_SchedDelayEarly + SchedDL * Opt2_SchedDelayLate + Total_TT * TripTimeHours_2
Opt3 = Constant3 + Fare * Fare_3 + Legroom * Legroom_3 + SchedDE * Opt3_SchedDelayEarly + SchedDL * Opt3_SchedDelayLate + Total_TT * TripTimeHours_3
V = {1: Opt1,2: Opt2,3: Opt3}
av = {1: 1,2: 1,3: 1}

# The choice model is a logit, with availability conditions
logprob = loglogit(V,av,chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = "logit_airline_generic"
results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(pandasResults)
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

            Value  Std err  t-test  p-value  Rob. Std err  Rob. t-test  \
Constant2   -1.31    0.127   -10.3        0         0.126        -10.4   
Constant3   -1.54    0.128     -12        0         0.126        -12.1   
Fare      -0.0194  0.00069   -28.2        0      0.000796        -24.4   
Legroom     0.225   0.0249    9.02        0        0.0266         8.45   
SchedDE    -0.139   0.0155   -8.94        0        0.0163        -8.55   
SchedDL    -0.104   0.0128   -8.15 4.44e-16        0.0137        -7.59   
Total_TT     -0.3   0.0668   -4.49 7.05e-06         0.067        -4.48   

           Rob. p-value  
Constant2             0  
Constant3             0  
Fare                  0  
Legroom               0  
SchedDE               0  
SchedDL        3.18e-14  
Total_TT       7.47e-06  
Nbr of observations: 3609
LL(0) =    -3964.892
LL(beta) = -2321.153
rho bar square = 0.413
Output file: logit_airline_generic.html


In [2]:
pandas.describe()

Unnamed: 0,SubjectId,OriginGMT,DestinationGMT,Direction,q02_TripPurpose,q03_WhoPays,q11_DepartureOrArrivalIsImportant,q12_IdealDepTime,q13_IdealArrTime,q14_PartySize,...,DesiredArrivalTime,ScheduledDelay_1,ScheduledDelay_2,ScheduledDelay_3,Opt1_SchedDelayEarly,Opt2_SchedDelayEarly,Opt3_SchedDelayEarly,Opt1_SchedDelayLate,Opt2_SchedDelayLate,Opt3_SchedDelayLate
count,3610.0,3610.0,3610.0,3610.0,3610.0,3610.0,3610.0,3610.0,3610.0,3610.0,...,3610.0,3610.0,3610.0,3610.0,3610.0,3610.0,3610.0,3610.0,3610.0,3610.0
mean,1810.0,382.0,397.0,0.911,2.04,1.2,1.33,401.0,322.0,2.29,...,322.0,26.3,68.7,67.5,1.95,1.79,1.8,2.39,2.94,2.93
std,1040.0,82.1,82.9,0.869,0.77,0.46,0.635,440.0,423.0,8.31,...,423.0,341.0,361.0,362.0,3.34,3.3,3.29,3.44,3.85,3.88
min,1.0,300.0,300.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1020.0,-1060.0,-1020.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0
25%,904.0,300.0,300.0,0.0,2.0,1.0,1.0,-1.0,-1.0,1.0,...,-1.0,-162.0,-120.0,-120.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
50%,1810.0,360.0,420.0,1.0,2.0,1.0,1.0,375.0,-1.0,1.0,...,-1.0,10.0,60.0,60.0,-0.0,-0.0,-0.0,0.167,1.0,1.0
75%,2710.0,480.0,480.0,2.0,2.0,1.0,2.0,660.0,720.0,2.0,...,720.0,240.0,306.0,300.0,2.7,2.0,2.0,4.0,5.1,5.0
max,3610.0,480.0,480.0,2.0,4.0,3.0,2.0,1420.0,1420.0,99.0,...,1420.0,1280.0,1410.0,1380.0,17.0,17.8,17.0,21.4,23.5,23.0


In [3]:
pandas

Unnamed: 0,SubjectId,OriginGMT,DestinationGMT,Direction,q02_TripPurpose,q03_WhoPays,q11_DepartureOrArrivalIsImportant,q12_IdealDepTime,q13_IdealArrTime,q14_PartySize,...,DesiredArrivalTime,ScheduledDelay_1,ScheduledDelay_2,ScheduledDelay_3,Opt1_SchedDelayEarly,Opt2_SchedDelayEarly,Opt3_SchedDelayEarly,Opt1_SchedDelayLate,Opt2_SchedDelayLate,Opt3_SchedDelayLate
0,1,300,360,0,2,1,2,-1,480,2,...,480,157,367,337,-0,-0,-0,2.62,6.12,5.62
1,2,480,480,1,2,1,2,-1,540,1,...,540,62,572,452,-0,-0,-0,1.03,9.53,7.53
2,3,480,480,1,2,1,1,-1,-1,2,...,-1,451,541,721,-0,-0,-0,7.52,9.02,12
3,4,480,480,1,2,3,1,630,-1,2,...,-1,450,450,90,-0,-0,-0,7.5,7.5,1.5
4,5,300,480,0,2,1,0,-1,-1,3,...,-1,0,0,0,-0,-0,-0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3604,3609,480,300,2,3,1,2,-1,660,3,...,660,496,496,436,-0,-0,-0,8.27,8.27,7.27
3605,3610,480,300,2,2,1,2,-1,480,1,...,480,740,710,500,-0,-0,-0,12.3,11.8,8.33
3606,3611,300,360,0,1,1,2,-1,870,1,...,870,-252,258,-12,4.2,-0,0.2,-0,4.3,-0
3607,3612,300,480,0,2,1,2,-1,1320,1,...,1.32e+03,-531,-321,-381,8.85,5.35,6.35,-0,-0,-0


In [5]:
pandas['TripTimeHours_1'].describe()

count   3.61e+03
mean        3.74
std         1.59
min        0.667
25%         2.42
50%         4.02
75%          5.3
max         6.35
Name: TripTimeHours_1, dtype: float64