In [1]:
##
#Yin Ma
#4 27 2020, change panel identifier from id to csid, get identical results as LG
#logit, panel data
 

In [2]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
from biogeme.expressions import Beta, DefineVariable, bioDraws, PanelLikelihoodTrajectory, MonteCarlo, log

In [3]:
# Read the data
#df = pd.read_csv("agree0623_all_0331.csv")

# 04 27 introduce scale variables (the ones are significant in essay 2)
df = pd.read_csv("agree0623_all_0427.csv")

#df.head(10)
#check missing values
#df.isnull().sum()

In [4]:
database = db.Database("agree0623_all_0427",df)

In [5]:
# They are organized as panel data. The variable ID identifies each individual.
database.panel("csid")

In [6]:
# use the names of the variable as Python variable.
globals().update(database.variables)

In [7]:
#we already remove missing values, so we don't need to exclude any observations here 
#note that weights were created before select variables and remove missing values

In [8]:
# using block specific data
#exclude=(block1==0)
#exclude=(block1==1)

#database.remove(exclude)
#next step, change weight accordingly

In [9]:
# Parameters to be estimated
#NZone variables are already scaled: zone/100
#use lg estimates as starting values 
B_NZONE1 = Beta('B_NZONE1',-0.0682,None,None,0)
B_NZONE2 = Beta('B_NZONE2',-0.0372,None,None,0)
B_NZONE3 = Beta('B_NZONE3',-0.0283,None,None,0)
B_TOWNDOWN = Beta('B_TOWNDOWN',0.0274,None,None,0)
B_TOWNMID = Beta('B_TOWNMID',0.11435,None,None,0)
B_TOWNUP = Beta('B_TOWNUP',0.205,None,None,0)
B_ECO = Beta('B_ECO',0.053,None,None,0)
B_REC = Beta('B_REC',0.0089,None,None,0)
B_DRY = Beta('B_DRY',-0.2146,None,None,0)
B_TAX = Beta('B_TAX',-0.0257,None,None,0)
# covariates, starting values from essay 2 scaled mnl
B_ASCEND = Beta('B_ASCEND',0.2997,None,None,0)
B_DESCEND = Beta('B_DESCEND',0.2851,None,None,0)
B_INCPP50K = Beta('B_INCPP50K',0.317,None,None,0)
B_INCPP75K = Beta('B_INCPP75K',0.5522,None,None,0)
B_INCPP75KM = Beta('B_INCPP75KM',0.1951,None,None,0)
B_F1FLOW = Beta('B_F1FLOW',-0.6279,None,None,0)
B_F3PROTUP = Beta('B_F3PROTUP',0.0066,None,None,0)
B_F4CTH2O = Beta('B_F4CTH2O',-0.5495,None,None,0)
B_TAXGRT = Beta('B_TAXGRT',0.3198,None,None,0)

In [12]:
# Definition of the utility functions
VSQ =(B_ASCEND*ascend+B_DESCEND*descend+B_INCPP50K*incpp_50k+B_INCPP75K*incpp_75k+B_INCPP75KM*incpp_75km+B_F1FLOW*f1_flow+B_F3PROTUP*f3_protup+B_F4CTH2O*f4_CTh2o+B_TAXGRT*taxgrt)*( B_NZONE1 * nzone1_sq + B_NZONE2 * nzone2_sq + B_NZONE3 * nzone3_sq + B_TOWNDOWN * towndown_sq + B_TOWNMID * townmid_sq + B_TOWNUP * townup_sq + B_ECO * eco_sq + B_REC * rec_sq + B_DRY * dry_sq + B_TAX * tax_sq)
VA = (B_ASCEND*ascend+B_DESCEND*descend+B_INCPP50K*incpp_50k+B_INCPP75K*incpp_75k+B_INCPP75KM*incpp_75km+B_F1FLOW*f1_flow+B_F3PROTUP*f3_protup+B_F4CTH2O*f4_CTh2o+B_TAXGRT*taxgrt)*( B_NZONE1 * nzone1_a + B_NZONE2 * nzone2_a + B_NZONE3 * nzone3_a + B_TOWNDOWN * towndown_a + B_TOWNMID * townmid_a + B_TOWNUP * townup_a + B_ECO * eco_a + B_REC * rec_a + B_DRY * dry_a + B_TAX * tax_a)
VB = (B_ASCEND*ascend+B_DESCEND*descend+B_INCPP50K*incpp_50k+B_INCPP75K*incpp_75k+B_INCPP75KM*incpp_75km+B_F1FLOW*f1_flow+B_F3PROTUP*f3_protup+B_F4CTH2O*f4_CTh2o+B_TAXGRT*taxgrt)*( B_NZONE1 * nzone1_b + B_NZONE2 * nzone2_b + B_NZONE3 * nzone3_b + B_TOWNDOWN * towndown_b + B_TOWNMID * townmid_b + B_TOWNUP * townup_b + B_ECO * eco_b + B_REC * rec_b + B_DRY * dry_b + B_TAX * tax_b)

In [13]:
# Associate utility functions with the numbering of alternatives
V = {1: VSQ,
     2: VA,
     3: VB}

# Associate the availability conditions with the alternatives
# all alternatives are available for each individual
av = {1: 1,
      2: 1,
      3: 1}

In [14]:
# Definition of the model. This is the contribution of each observation to the log likelihood function.
logprob = models.loglogit(V,av,choice)

In [15]:
# Define level of verbosity
import biogeme.messaging as msg
logger = msg.bioMessage()
logger.setDebug()
#logger.setWarning()
#logger.setGeneral()
#logger.setDetailed()


In [16]:
# Create the Biogeme object
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = "01logit_0427_scale"


[17:11:27] < General >   Remove 22 unused variables from the database as only 41 are used.


In [17]:
# Estimate the parameters
biogeme = bio.BIOGEME(database,logprob)
results = biogeme.estimate(saveIterations=True)
biogeme.createLogFile()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(pandasResults)

biogeme.modelName = "01logit_0427_scale"

results = biogeme.estimate()

print("Results=",results)

[17:11:31] < General >   Remove 0 unused variables from the database as only 41 are used.
[17:11:34] < General >   Log likelihood (N=7414):  -8057.964
[17:11:34] < General >   Minimize with tol 1e-07
[17:11:34] < Debug >     B_ASCEND:     0.2997
[17:11:34] < Debug >     B_DESCEND:     0.2851
[17:11:34] < Debug >     B_DRY:    -0.2146
[17:11:34] < Debug >     B_ECO:      0.053
[17:11:34] < Debug >     B_F1FLOW:    -0.6279
[17:11:34] < Debug >     B_F3PROTUP:     0.0066
[17:11:34] < Debug >     B_F4CTH2O:    -0.5495
[17:11:34] < Debug >     B_INCPP50K:      0.317
[17:11:34] < Debug >     B_INCPP75K:     0.5522
[17:11:34] < Debug >     B_INCPP75KM:     0.1951
[17:11:34] < Debug >     B_NZONE1:    -0.0682
[17:11:34] < Debug >     B_NZONE2:    -0.0372
[17:11:34] < Debug >     B_NZONE3:    -0.0283
[17:11:34] < Debug >     B_REC:     0.0089
[17:11:34] < Debug >     B_TAX:    -0.0257
[17:11:34] < Debug >     B_TAXGRT:     0.3198
[17:11:34] < Debug >     B_TOWNDOWN:     0.0274
[17:11:34] < Debu