In [1]:
import pandas as pd
import numpy as np
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme import models
from biogeme.expressions import Beta
from biogeme.expressions import log
import math

In [None]:
df = pd.read_csv("data/us_estdata_expanded.csv")
df

In [3]:
df["ORIGIN"].value_counts()

603700     4015
2500390    2045
1703400    1672
400100     1554
800190     1414
           ... 
1302900      27
2000700      27
2701800      27
1304400      25
2201600      21
Name: ORIGIN, Length: 975, dtype: int64

In [4]:
df["MED"] = np.where(df["NAICS"] == "MED", 1, 0)
df["MFG"] = np.where(df["NAICS"] == "MFG", 1, 0)
df["RET"] = np.where(df["NAICS"] == "RET", 1, 0)
df["EDU"] = np.where(df["NAICS"] == "EDU", 1, 0)
df["ADM"] = np.where(df["NAICS"] == "ADM", 1, 0)
df["FOD"] = np.where(df["NAICS"] == "FOD", 1, 0)
df["PRF"] = np.where(df["NAICS"] == "PRF", 1, 0)
df["TRN"] = np.where(df["NAICS"] == "TRN", 1, 0)
df["SRV"] = np.where(df["NAICS"] == "SRV", 1, 0)
df["FIN"] = np.where(df["NAICS"] == "FIN", 1, 0)
df["WHL"] = np.where(df["NAICS"] == "WHL", 1, 0)
df["AGR"] = np.where(df["NAICS"] == "AGR", 1, 0)
df["PUB"] = np.where(df["NAICS"] == "PUB", 1, 0)
df["INF"] = np.where(df["NAICS"] == "INF", 1, 0)
df["ENT"] = np.where(df["NAICS"] == "ENT", 1, 0)
df["REL"] = np.where(df["NAICS"] == "REL", 1, 0)
df["UTL"] = np.where(df["NAICS"] == "UTL", 1, 0)
df["EXT"] = np.where(df["NAICS"] == "EXT", 1, 0)
df["MNG"] = np.where(df["NAICS"] == "MNG", 1, 0)
df["CON"] = np.where(df["NAICS"] == "CON", 1, 0)
df["NO_IND"] = np.where(df["NAICS"].isna(), 1, 0)

In [5]:
df["NO_IND"].sum()

35066

In [6]:
# clean up the database (Biogeme Database can only have numerical values)
df = df.select_dtypes(['number'])
df = df.fillna(0)

In [7]:
df["CHOSEN"].value_counts()

102500     190
5500100    169
5310200    164
5500700    153
1200500    140
          ... 
4703203     18
5541001     18
2701409     17
4804619     17
2601703     17
Name: CHOSEN, Length: 2336, dtype: int64

In [8]:
# defining the chosen alterantive for each person explicitly (0 to 35, corresponding to staying and moving to one of the many PUMAs)
df['CHOSEN_PUMA'] = df['CHOSEN']
df['CHOSEN'] = 0
for i in range(1, 201): 
    var = 'ALT' + str(i) + '_PUMA'
    df['CHOSEN'] = np.where(df[var]==df['CHOSEN_PUMA'], i, df['CHOSEN'])
df["CHOSEN"] = np.where(df["STAY"] == 1, 0, df["CHOSEN"])

In [9]:
df["CHOSEN"].value_counts()

0    110249
1     16296
Name: CHOSEN, dtype: int64

In [10]:
df["IN_COLLEGE"] = np.where((df["SCHG"] == 15) | (df["SCHG"] == 16), 1, 0)
df["IN_COLLEGE"]

0         0
1         0
2         0
3         0
4         0
         ..
126540    0
126541    0
126542    0
126543    0
126544    0
Name: IN_COLLEGE, Length: 126545, dtype: int32

In [11]:
df["CHOSEN"].value_counts()

0    110249
1     16296
Name: CHOSEN, dtype: int64

In [12]:
df["STAY"].value_counts()

1    110249
0     16296
Name: STAY, dtype: int64

In [13]:
df["AGE_18_34"] = np.where(df["AGEP"] <= 34, 1, 0)
df["AGE_35_64"] = np.where((df["AGEP"] >= 35) & (df["AGEP"] <= 64), 1, 0)
df["AGE_OVER_65"] = np.where((df["AGEP"] >= 65), 1, 0)
df["FOREIGN"] = np.where(df["NATIVITY"] == 2, 1, 0)

In [14]:
df["AGE_18_22"] = np.where(df["AGEP"] <= 22, 1, 0)
df["AGE_23_29"] = np.where((df["AGEP"] >= 23) & (df["AGEP"] <= 29), 1, 0)
df["AGE_30_39"] = np.where((df["AGEP"] >= 30) & (df["AGEP"] <= 39), 1, 0)
df["AGE_40_49"] = np.where((df["AGEP"] >= 40) & (df["AGEP"] <= 49), 1, 0)
df["AGE_50_64"] = np.where((df["AGEP"] >= 50) & (df["AGEP"] <= 64), 1, 0)

In [15]:
df["AGEP"].describe()

count    126545.000000
mean         50.049437
std          18.884426
min          18.000000
25%          34.000000
50%          51.000000
75%          64.000000
max          96.000000
Name: AGEP, dtype: float64

In [16]:
df["EDU_LESS_HIGH"] = np.where(df["SCHL"] <= 15, 1, 0)
df["EDU_HIGH"] = np.where((df["SCHL"] <= 20) & (df["SCHL"] >= 16), 1, 0) 
df["EDU_COLLEGE"] = np.where(df["SCHL"] >= 21, 1, 0)
df["EDU_NOCOLLEGE"] = np.where(df["EDU_COLLEGE"] == 0, 1, 0)

In [17]:
df["WOMAN_CHILD"] = np.where((df["PAOC"] >= 1) & (df["PAOC"] <= 3), 1, 0)
df["UNEMPLOYED"] = np.where(df["ESR"] == 3, 1, 0)

In [18]:
df["MALE"] = np.where(df["SEX"] == 1, 1, 0)
df["FEMALE"] = np.where(df["SEX"] == 0, 1, 0)

In [19]:
df["MARRIED"] = np.where(df["MAR"] == 1, 1, 0)

In [20]:
# df["child_old"] = np.where(df["child"] == df["REC_CHILD"], 0, df["child"])
# df["child_old"].value_counts()
df["child"].value_counts()

0    87946
1    38599
Name: child, dtype: int64

In [21]:
df["REC_NO_MAR"] = np.where((df["MARHD"] == 1) | (df["MARHW"] == 1), 1, 0)
df["REC_NO_MAR"].value_counts()

0    124815
1      1730
Name: REC_NO_MAR, dtype: int64

In [22]:
df["MARHM_new"] = np.where(df["MARHM"] == 2, 0, df["MARHM"])
df["MARHM_new"].value_counts()

0.0    124522
1.0      2023
Name: MARHM_new, dtype: int64

In [23]:
df["married_old"] = np.where((df["MARHM"] == df["MARRIED"]), 0, df["MARRIED"])
df["married_old"].value_counts()

1    65863
0    60682
Name: married_old, dtype: int64

In [24]:
df["MILITARY"] = np.where(df["MIL"] == 1, 1, 0)

In [25]:
# df["HH_COST"] = np.where(df["TEN"] == 3, df["GRNTP"], df["SMOCP"])

In [26]:
# up to debate still
df["AGR_EXT"] = np.where((df["AGR"] == 1) | (df["EXT"] == 1), 1, 0)
df["HIGH_ED"] = np.where((df["MED"] == 1) | (df["EDU"] == 1) | (df["PRF"] == 1) | (df["MED"] == 1) | (df["FIN"] == 1) | (df["INF"] == 1) | (df["MED"] == 1), 1, 0)
df["LICENSE"] = np.where((df["SRV"] == 1) | (df["REL"] == 1), 1, 0)
df["OTHER_JOB"] = np.where((df["AGR_EXT"] == 0) & (df["HIGH_ED"] == 0) & (df["LICENSE"] == 0) & (df["NO_IND"] == 0), 1, 0)

In [27]:
df["MICRO_adj_ORIG"] = np.where((df["TYPE_ORIG"] == 3) | (df["TYPE_ORIG"] == 2), 1, 0)
df["METRO"] = np.where(df["TYPE_ORIG"] == 1, 1, 0)
df["T34"] = np.where(df["TYPE_ORIG"] == 0, 1, 0)

In [28]:
for i in range(1, 201):
    key = "ALT{0}_".format(i)
    df[key + "MICRO"] = np.where((df[key + "TYPE"] == 2) | (df[key + "TYPE"] == 3), 1, 0)
    df[key + "METRO"] = np.where(df[key + "TYPE"] == 1, 1, 0)
    df[key + "T34"] = np.where(df[key + "TYPE"] == 0, 1, 0)

  df[key + "T34"] = np.where(df[key + "TYPE"] == 0, 1, 0)
  df[key + "MICRO"] = np.where((df[key + "TYPE"] == 2) | (df[key + "TYPE"] == 3), 1, 0)
  df[key + "METRO"] = np.where(df[key + "TYPE"] == 1, 1, 0)
  df[key + "T34"] = np.where(df[key + "TYPE"] == 0, 1, 0)
  df[key + "MICRO"] = np.where((df[key + "TYPE"] == 2) | (df[key + "TYPE"] == 3), 1, 0)
  df[key + "METRO"] = np.where(df[key + "TYPE"] == 1, 1, 0)
  df[key + "T34"] = np.where(df[key + "TYPE"] == 0, 1, 0)
  df[key + "MICRO"] = np.where((df[key + "TYPE"] == 2) | (df[key + "TYPE"] == 3), 1, 0)
  df[key + "METRO"] = np.where(df[key + "TYPE"] == 1, 1, 0)
  df[key + "T34"] = np.where(df[key + "TYPE"] == 0, 1, 0)
  df[key + "MICRO"] = np.where((df[key + "TYPE"] == 2) | (df[key + "TYPE"] == 3), 1, 0)
  df[key + "METRO"] = np.where(df[key + "TYPE"] == 1, 1, 0)
  df[key + "T34"] = np.where(df[key + "TYPE"] == 0, 1, 0)
  df[key + "MICRO"] = np.where((df[key + "TYPE"] == 2) | (df[key + "TYPE"] == 3), 1, 0)
  df[key + "METRO"] = np.where

In [29]:
df["UNEMPLOYED"] = np.where((df["ESR"] == 3) | (df["ESR"] == 6), 1, 0)
df["IN_LF"] = np.where(df["ESR"] == 6, 0, 1)

  df["IN_LF"] = np.where(df["ESR"] == 6, 0, 1)


In [30]:
df["WORK2_MAR"] = np.where(df["FES"] == 1, 1, 0)
df["WORK1_MAR"] = np.where((df["FES"] <= 4) & (df["FES"] >= 2), 1, 0)
df["OTHER_FAMILY"] = np.where((df["HHT"] == 2) | (df["HHT"] == 3), 1, 0)

  df["WORK2_MAR"] = np.where(df["FES"] == 1, 1, 0)
  df["WORK1_MAR"] = np.where((df["FES"] <= 4) & (df["FES"] >= 2), 1, 0)
  df["OTHER_FAMILY"] = np.where((df["HHT"] == 2) | (df["HHT"] == 3), 1, 0)


In [31]:
df.loc[df["CBSA_NAME_ORIG"] == -1, "CBSA_NAME_ORIG"] = -2

In [32]:
# df["INTERNAL"] = df["ORIGIN"] == df["CHOSEN"]

In [33]:
# making the Biogeme Database that is used for the model estimation
database = db.Database('us_data', df)

In [34]:
# The following statement allows you to use the names of the
# variable as Python variable. (in the utility functions)
globals().update(database.variables)

In [35]:
c_move = Beta("c_move", 0, None, None, 0)

In [36]:
# Staying Choice Parameters to be Estimated
c_stay_age_18_22 = Beta("c_stay_age_18_22", 0.0621, None, None, 0)
c_stay_age_23_29 = Beta("c_stay_age_23_29", -0.113, None, None, 0)
c_stay_age_30_39 = Beta("c_stay_age_30_39", -0.0233, None, None, 0)
c_stay_age_40_49 = Beta("c_stay_age_40_49", 0, None, None, 1)
c_stay_age_50_64 = Beta("c_stay_age_50_64", 0.149, None, None, 0)
c_stay_age_65 = Beta("c_stay_age_65", 1.43, None, None, 0)
c_stay_child = Beta("c_stay_child", -0.278, None, None, 0)
c_stay = Beta("c_stay", 17.4, None, None, 0)
c_stay_foreign = Beta("c_stay_foreign", 0.185, None, None, 0)
c_stay_dens = Beta("c_stay_dens", 0.00000437, None, None, 0)
c_stay_college = Beta("c_stay_college", 0.706, None, None, 0)
c_stay_rec_child = Beta("c_stay_rec_child", 0.0288, None, None, 0)
c_stay_rec_mar = Beta("c_stay_rec_mar", -0.856, None, None, 0)
c_stay_rec_nomar = Beta("c_stay_rec_nomar", -0.359, None, None, 0)
c_stay_mil = Beta("c_stay_mil", -0.165, None, None, 0)
c_stay_2work_mar = Beta("c_stay_2work_mar", 0.713, None, None, 0)
c_stay_1work_mar = Beta("c_stay_1work_mar", 0.573, None, None, 0)
c_stay_otherfamily = Beta("c_stay_otherfamily", 0.497, None, None, 0)
c_stay_ownjob = Beta("c_stay_ownjob", 1.78, None, None, 0)
c_stay_rentcost = Beta("c_stay_rentcost", -4.21, None, None, 0)
c_stay_owncost = Beta("c_stay_owncost", 3.85, None, None, 0)
c_stay_unemp_rate = Beta("c_stay_unemp_rate", -0.772, None, None, 0)
c_stay_hh_val = Beta("c_stay_hh_val", 0.000000947, None, None, 0)

In [37]:
c_stay_T34 = Beta("c_stay_T34", 0, None, None, 1)
c_stay_metro = Beta("c_stay_metro", 0, None, None, 0)
c_stay_micro = Beta("c_stay_micro", 0, None, None, 0)
c_stay_edu_college = Beta("c_stay_edu_college", 0, None, None, 0)
c_stay_edu_high = Beta("c_stay_edu_high", 0, None, None, 0)
c_stay_edu_nohigh = Beta("c_stay_edu_nohigh", 0, None, None, 1)

In [38]:
# defining the staying utility function
V0 = c_stay_age_18_22 * AGE_18_22 + c_stay_age_23_29 * AGE_23_29 + c_stay_age_30_39 * AGE_30_39 + c_stay_age_40_49 * AGE_40_49 + c_stay_age_50_64 * AGE_50_64 + c_stay_age_65 * AGE_OVER_65 + c_stay_child * child + c_stay + c_stay_metro * METRO + c_stay_micro * MICRO_adj_ORIG + c_stay_college * IN_COLLEGE + c_stay_rec_mar * MARHM_new + c_stay_rec_nomar * REC_NO_MAR + c_stay_rec_child * REC_CHILD + c_stay_mil * MILITARY + c_stay_hh_val * HH_MED_VAL_ORIG + MARRIED * (c_stay_2work_mar * WORK2_MAR + c_stay_1work_mar * WORK1_MAR) + c_stay_otherfamily * OTHER_FAMILY + c_stay_ownjob * OWN_JOB_ORIG / TOT_JOBS_ORIG + (c_stay_rentcost * GRNTP_ORIG_ADJ + c_stay_owncost * SMOCP_ORIG_ADJ) / HINCP_ORIG_ADJ * 12 + c_stay_foreign * FOREIGN + c_stay_edu_college * EDU_COLLEGE + c_stay_edu_high * EDU_HIGH

In [None]:
# defining the staying utility function
V0 = c_stay_age_18_22 * AGE_18_22 + c_stay_age_23_29 * AGE_23_29 + c_stay_age_30_39 * AGE_30_39 + c_stay_age_40_49 * AGE_40_49 + c_stay_age_50_64 * AGE_50_64 + c_stay_age_65 * AGE_OVER_65 + c_stay_child * child + c_stay + c_stay_college * IN_COLLEGE + c_stay_rec_mar * MARHM_new + c_stay_rec_nomar * REC_NO_MAR + c_stay_rec_child * REC_CHILD + c_stay_mil * MILITARY + c_stay_hh_val * HH_MED_VAL_ORIG + MARRIED * (c_stay_2work_mar * WORK2_MAR + c_stay_1work_mar * WORK1_MAR) + (MAR != 5) * c_stay_otherfamily * OTHER_FAMILY + c_stay_ownjob * OWN_JOB_ORIG / TOT_JOBS_ORIG + c_stay_rentcost * GRNTP_ORIG_ADJ + c_stay_owncost * SMOCP_ORIG_ADJ + c_stay_foreign * FOREIGN +  c_stay_edu_college * EDU_COLLEGE + c_stay_edu_nohigh * EDU_LESS_HIGH + IN_LF * (c_stay_hhinc * PINCP_ORIG_ADJ)

In [39]:
print(V0)

(((((((((((((((((((((((c_stay_age_18_22(0.0621) * AGE_18_22) + (c_stay_age_23_29(-0.113) * AGE_23_29)) + (c_stay_age_30_39(-0.0233) * AGE_30_39)) + (c_stay_age_40_49(0) * AGE_40_49)) + (c_stay_age_50_64(0.149) * AGE_50_64)) + (c_stay_age_65(1.43) * AGE_OVER_65)) + (c_stay_child(-0.278) * child)) + c_stay(17.4)) + (c_stay_metro(0) * METRO)) + (c_stay_micro(0) * MICRO_adj_ORIG)) + (c_stay_college(0.706) * IN_COLLEGE)) + (c_stay_rec_mar(-0.856) * MARHM_new)) + (c_stay_rec_nomar(-0.359) * REC_NO_MAR)) + (c_stay_rec_child(0.0288) * REC_CHILD)) + (c_stay_mil(-0.165) * MILITARY)) + (c_stay_hh_val(9.47e-07) * HH_MED_VAL_ORIG)) + (MARRIED * ((c_stay_2work_mar(0.713) * WORK2_MAR) + (c_stay_1work_mar(0.573) * WORK1_MAR)))) + (c_stay_otherfamily(0.497) * OTHER_FAMILY)) + ((c_stay_ownjob(1.78) * OWN_JOB_ORIG) / TOT_JOBS_ORIG)) + ((((c_stay_rentcost(-4.21) * GRNTP_ORIG_ADJ) + (c_stay_owncost(3.85) * SMOCP_ORIG_ADJ)) / HINCP_ORIG_ADJ) * `12`)) + (c_stay_foreign(0.185) * FOREIGN)) + (c_stay_edu_colleg

In [40]:
# Destination Choice Parameters to be estimated
# Beta(name of the factor, initial value of the coefficient, lower bound, upper bound, whether or not
c_destchoice_dist=Beta('c_destchoice_dist', -0.0000000241, None, None, 0)
c_destchoice_logdist=Beta('c_destchoice_logdist', -0.257, None, None, 0)
c_destchoice_unemp_rate = Beta("c_destchoice_unemp_rate", -2.14, None, None, 0)
c_destchoice_hhcost = Beta("c_destchoice_hhcost", -1.09, None, None, 0)
c_destchoice_college = Beta("c_destchoice_college", 0.0000679, None, None, 0)
c_destchoice_age_18_34 = Beta("c_destchoice_age_18_34", 3.25, None, None, 0)
c_destchoice_age_35_64 = Beta("c_destchoice_age_35_64", 0.00000439, None, None, 0)
c_destchoice_age_over_65 = Beta("c_destchoice_age_over_65", 7.19, None, None, 0)
c_destchoice_foreign = Beta("c_destchoice_foreign", 1.57, None, None, 0)
c_destchoice_pincp = Beta("c_destchoice_pincp", 0.00000305, None, None, 0)
c_destchoice_entscore_18_34 = Beta("c_destchoice_entscore_18_34", 1.68, None, None, 0)
c_destchoice_entscore_35_64 = Beta("c_destchoice_entscore_35_64", 2.24, None, None, 0)
c_destchoice_entscore_65 = Beta("c_destchoice_entscore_65", 2.52, None, None, 0)
c_destchoice_military = Beta("c_destchoice_military", 0.000245, None, None, 0)

In [41]:
c_destchoice_T34_T34 = Beta("c_destchoice_T34_T34", 0, None, None, 1) 
c_destchoice_T34_Metro = Beta('c_destchoice_T34_Metro', 0.481, None, None, 0)
c_destchoice_T34_Micro = Beta("c_destchoice_T34_Micro", 0.582, None, None, 0)
c_destchoice_Metro_T34 = Beta("c_destchoice_Metro_T34", 0.165, None, None, 0)
c_destchoice_Metro_Metro = Beta('c_destchoice_Metro_Metro', 0.881, None, None, 0)
c_destchoice_Metro_Micro = Beta('c_destchoice_Metro_Micro', 1.06, None, None, 0)
c_destchoice_Micro_T34 = Beta("c_destchoice_Micro_T34", -0.199, None, None, 0)
c_destchoice_Micro_Metro = Beta("c_destchoice_Micro_Metro", 0.724, None, None, 0)
c_destchoice_Micro_Micro = Beta("c_destchoice_Micro_Micro", 1.52, None, None, 0)

In [42]:
c_destchoice_geo_spec_job = Beta("c_destchoice_geo_spec_job", 5.36, None, None, 0)
c_destchoice_high_ed_job = Beta("c_destchoice_high_ed_job", 2.51, None, None, 0)
c_destchoice_license_job = Beta("c_destchoice_license_job", 5.97, None, None, 0)
c_destchoice_other = Beta("c_destchoice_other", 2.43, None, None, 0)

In [43]:
c_destchoice_samestate = Beta("c_destchoice_samestate", 6.33, None, None, 0)
c_destchoice_birthstate = Beta("c_destchoice_birthstate", 0.918, None, None, 0)
c_destchoice_samecbsa = Beta("c_destchoice_samecbsa", 0.511, None, None, 0)
c_destchoice_cbsa_dist = Beta("c_destchoice_cbsa_dist", -0.0000278, None, None, 0)

In [44]:
# birthstate > cbsa > state

In [45]:
# defining the utility functions for each of the moving PUMA alternatives
# defined using the exec to parse a string to save space
# can also use a loop to print out the statements and then copy/paste them to run
# can also just write each one manually
for i in range(200):
    num = i + 1
    initialization = "V{0} = log(ALT{0}_POP) + (CBSA_NAME_ORIG != ALT{0}_CBSA) * (c_destchoice_dist * ALT{0}_DIST + c_destchoice_logdist * log(ALT{0}_DIST + 1)) + (CBSA_NAME_ORIG == ALT{0}_CBSA) * c_destchoice_cbsa_dist * ALT{0}_DIST + \
        \
    c_destchoice_hhcost * ALT{0}_HH_COST + c_destchoice_college * IN_COLLEGE * ALT{0}_COLLEGE + c_destchoice_foreign * FOREIGN * ALT{0}_FOREIGN / ALT{0}_POP + \
        \
    c_destchoice_age_18_34 * AGE_18_34 * ALT{0}_18_34 / ALT{0}_POP + c_destchoice_age_35_64 * AGE_35_64 * ALT{0}_35_64 + c_destchoice_age_over_65 * AGE_OVER_65 * ALT{0}_65 / ALT{0}_POP + \
        \
    ALT{0}_ENT / (ALT{0}_EMP + 1) * (c_destchoice_entscore_18_34 * AGE_18_34 + c_destchoice_entscore_35_64 * AGE_35_64 + c_destchoice_entscore_65 * AGE_OVER_65) + \
        \
    IN_LF * (c_destchoice_unemp_rate * ALT{0}_UNEMP_PCT + c_destchoice_pincp * ALT{0}_PINCP) + \
        \
    (c_destchoice_T34_T34 * T34 + c_destchoice_Metro_T34 * METRO +  c_destchoice_Micro_T34 * MICRO_adj_ORIG) * ALT{0}_T34 + \
    (c_destchoice_T34_Metro * T34 + c_destchoice_Metro_Metro * METRO + c_destchoice_Micro_Metro * MICRO_adj_ORIG) * ALT{0}_METRO + \
    (c_destchoice_T34_Micro * T34  + c_destchoice_Metro_Micro * METRO + c_destchoice_Micro_Micro * MICRO_adj_ORIG) * ALT{0}_MICRO + \
        \
    (c_destchoice_geo_spec_job * AGR_EXT + c_destchoice_high_ed_job * HIGH_ED + c_destchoice_license_job * LICENSE + c_destchoice_other * OTHER_JOB) * ALT{0}_OWN_JOB / (ALT{0}_EMP + 1) + c_destchoice_military * MILITARY * ALT{0}_MIL_NUM + \
        \
    c_destchoice_birthstate * (POBP == ALT{0}_State) * (CBSA_NAME_ORIG != ALT{0}_CBSA) + c_destchoice_samecbsa * (CBSA_NAME_ORIG == ALT{0}_CBSA) + c_destchoice_samestate * (MIGSP == ALT{0}_State)".format(num)
    exec(initialization)
print(V100)

# full model specification (takes a bit longer to run):
# "V{0} = log(ALT{0}_POP) + c_destchoice_dist * ALT{0}_DIST + c_destchoice_logdist * log(ALT{0}_DIST + 1) + c_destchoice_hhcost * ALT{0}_HH_COST + c_destchoice_college * IN_COLLEGE * ALT{0}_COLLEGE + c_destchoice_foreign * FOREIGN * ALT{0}_FOREIGN / ALT{0}_POP + c_destchoice_age_18_34 * ALT{0}_18_34 / ALT{0}_POP + c_destchoice_age_35_64 * ALT{0}_35_64 / ALT{0}_POP + c_destchoice_age_over_65 * ALT{0}_65 / ALT{0}_POP + c_destchoice_entscore * ALT{0}_ENT / AGEP / ALT{0}_EMP + c_destchoice_unemp * ALT{0}_UNEMP + c_destchoice_pincp * ALT{0}_PINCP + \
# (c_destchoice_T34_T34 * T34 + c_destchoice_Metro_T34 * METRO +  c_destchoice_Micro_T34 * MICRO_adj_ORIG) * ALT{0}_T34 + \
# (c_destchoice_T34_Metro * T34 + c_destchoice_Metro_Metro * METRO + c_destchoice_Micro_Metro * MICRO_adj_ORIG) * ALT{0}_METRO + \
# (c_destchoice_T34_Micro * T34  + c_destchoice_Metro_Micro * METRO + c_destchoice_Micro_Micro * MICRO_adj_ORIG) * ALT{0}_MICRO + \
# (c_destchoice_geo_spec_job * AGR_EXT + c_destchoice_high_ed_job * HIGH_ED + c_destchoice_license_job * LICENSE + c_destchoice_other * OTHER) * ALT{0}_OWN_JOB / ALT{0}_EMP".format(num)

# "V{0} = log(ALT{0}_POP) + c_destchoice_dist * ALT{0}_DIST + c_destchoice_logdist * log(ALT{0}_DIST + 1) + c_adm * ADM * ALT{0}_ADM/ALT{0}_EMP + c_agr * AGR * ALT{0}_AGR/ALT{0}_EMP + c_con * CON * ALT{0}_CON/ALT{0}_EMP + c_edu * EDU * ALT{0}_EDU/ALT{0}_EMP + c_ent * ENT * ALT{0}_ENT/ALT{0}_EMP + c_ext * EXT * ALT{0}_EXT/ALT{0}_EMP + c_fin * FIN * ALT{0}_FIN/ALT{0}_EMP + c_fod * FOD * ALT{0}_FOD/ALT{0}_EMP + c_inf * INF * ALT{0}_INF/ALT{0}_EMP + c_med * MED * ALT{0}_MED/ALT{0}_EMP + c_mfg * MFG * ALT{0}_MFG/ALT{0}_EMP + c_mng * MNG * ALT{0}_MNG/ALT{0}_EMP + c_prf * PRF * ALT{0}_PRF/ALT{0}_EMP + c_pub * PUB * ALT{0}_PUB/ALT{0}_EMP + c_rel * REL * ALT{0}_REL/ALT{0}_EMP + c_ret * RET * ALT{0}_RET/ALT{0}_EMP + c_srv * SRV * ALT{0}_SRV/ALT{0}_EMP + c_trn * TRN * ALT{0}_TRN/ALT{0}_EMP + c_utl * UTL * ALT{0}_UTL/ALT{0}_EMP + c_whl * WHL * ALT{0}_WHL/ALT{0}_EMP"

# for the fields already in the Biogeme db.Database, can explicitly refer to them; also used a few references to other databases using .loc and fields in the Biogeme database

((((((((((((((((((log(ALT100_POP) + ((CBSA_NAME_ORIG != ALT100_CBSA) * ((c_destchoice_dist(-2.41e-08) * ALT100_DIST) + (c_destchoice_logdist(-0.257) * log((ALT100_DIST + `1`)))))) + (((CBSA_NAME_ORIG == ALT100_CBSA) * c_destchoice_cbsa_dist(-2.78e-05)) * ALT100_DIST)) + (c_destchoice_hhcost(-1.09) * ALT100_HH_COST)) + ((c_destchoice_college(6.79e-05) * IN_COLLEGE) * ALT100_COLLEGE)) + (((c_destchoice_foreign(1.57) * FOREIGN) * ALT100_FOREIGN) / ALT100_POP)) + (((c_destchoice_age_18_34(3.25) * AGE_18_34) * ALT100_18_34) / ALT100_POP)) + ((c_destchoice_age_35_64(4.39e-06) * AGE_35_64) * ALT100_35_64)) + (((c_destchoice_age_over_65(7.19) * AGE_OVER_65) * ALT100_65) / ALT100_POP)) + ((ALT100_ENT / (ALT100_EMP + `1`)) * (((c_destchoice_entscore_18_34(1.68) * AGE_18_34) + (c_destchoice_entscore_35_64(2.24) * AGE_35_64)) + (c_destchoice_entscore_65(2.52) * AGE_OVER_65)))) + (IN_LF * ((c_destchoice_unemp_rate(-2.14) * ALT100_UNEMP_PCT) + (c_destchoice_pincp(3.05e-06) * ALT100_PINCP)))) + ((((c

In [46]:
W0 = PWGTP * 1

In [47]:
utilities = {}
for i in range(201):
    init = "utilities[{0}] = V{0}".format(i)
    exec(init)

In [48]:
import biogeme.messaging as msg
logger = msg.bioMessage()
logger.setDetailed()
logger.allMessages()

'*** Messages from biogeme 3.2.8 [2022-07-20]\n'

In [49]:
df.values.nbytes

5549757520

In [50]:
# Associate utility functions with the numbering of alternatives (corresponds to the CHOSEN field created earlier)
V = utilities

# Associate the availability conditions with the alternatives
# for this model, all migrants had all alternatives theoretically available so all are equal to 1 (available)
# if individual people had different availability for alterantives, could pass in a column of the dataframe to account for that availability
av = {}
for i in range(0, 201):
    av[i] = 1

# Definition of the model. This is the contribution of each
# observation to the log likelihood function.
# estimating the CHOSEN field
logprob = models.loglogit(V, av, CHOSEN)

# formulas = {"loglike": logprob, "weight": W0}

# Create the Biogeme object
biogeme = bio.BIOGEME(database, logprob, suggestScales=False)
biogeme.modelName = 'us_full_final'

biogeme.saveIterations = True

# Calculate the null log likelihood for reporting. (likelihood of predicting every entry's alterantive correctly if alternatives are randomly chosen)
biogeme.calculateNullLoglikelihood(av)

# Estimate the parameters
results = biogeme.estimate()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(pandasResults)

[01:52:50] < General >   Remove 1643 unused variables from the database as only 3839 are used.


In [None]:
# occupation-based coefficients

c_adm = Beta("c_adm", 0, None, None, 0)
c_agr = Beta("c_agr", 0, None, None, 0)
c_con = Beta("c_con", 0, None, None, 0)
c_edu = Beta("c_edu", 0, None, None, 0)
c_ent = Beta("c_ent", 0, None, None, 0)
c_ext = Beta("c_ext", 0, None, None, 0)
c_fin = Beta("c_fin", 0, None, None, 0)
c_fod = Beta("c_fod", 0, None, None, 0)
c_inf = Beta("c_inf", 0, None, None, 0)
c_med = Beta("c_med", 0, None, None, 0)
c_mfg = Beta("c_mfg", 0, None, None, 0)
c_mng = Beta("c_mng", 0, None, None, 0)
c_prf = Beta("c_prf", 0, None, None, 0)
c_pub = Beta("c_pub", 0, None, None, 0)
c_rel = Beta("c_rel", 0, None, None, 0)
c_ret = Beta("c_ret", 0, None, None, 0)
c_srv = Beta("c_srv", 0, None, None, 0)
c_trn = Beta("c_trn", 0, None, None, 0)
c_utl = Beta("c_utl", 0, None, None, 0)
c_whl = Beta("c_whl", 0, None, None, 0)

In [None]:
# Staying Choice Parameters with starting values of 0
c_stay_married = Beta("c_stay_married", 0, None, None, 0)
c_stay_age_18_22 = Beta("c_stay_age_18_22", 0, None, None, 0)
c_stay_age_23_29 = Beta("c_stay_age_23_29", 0, None, None, 0)
c_stay_age_30_39 = Beta("c_stay_age_30_39", 0, None, None, 0)
c_stay_age_40_49 = Beta("c_stay_age_40_49", 0, None, None, 1)
c_stay_age_50_64 = Beta("c_stay_age_50_64", 0, None, None, 0)
c_stay_age_65 = Beta("c_stay_age_65", 0, None, None, 0)
c_stay_edu_nohigh = Beta("c_stay_edu_nohigh", 0, None, None, 0)
c_stay_edu_somecollege = Beta("c_stay_edu_somecollege", 0, None, None, 0)
c_stay_edu_college = Beta("c_stay_edu_college", 0, None, None, 0)
c_stay_edu_beyondhs = Beta('c_stay_edu_beyondhs', 0, None, None, 0)
c_stay_child = Beta("c_stay_child", 0, None, None, 0)
c_stay_unemployed = Beta("c_stay_unemployed", 0, None, None, 0)
c_stay = Beta("c_stay", 15, None, None, 0)
c_stay_foreign = Beta("c_stay_foreign", 0, None, None, 0)
c_stay_dens = Beta("c_stay_dens", 0, None, None, 0)
c_stay_college = Beta("c_stay_college", 0, None, None, 0)
c_stay_rec_child = Beta("c_stay_rec_child", 0, None, None, 0)
c_stay_rec_mar = Beta("c_stay_rec_mar", 0, None, None, 0)
c_stay_rec_nomar = Beta("c_stay_rec_nomar", 0, None, None, 0)
c_stay_mil = Beta("c_stay_mil", 0, None, None, 0)
c_stay_2work_mar = Beta("c_stay_2work_mar", 0, None, None, 0)
c_stay_1work_mar = Beta("c_stay_1work_mar", 0, None, None, 0)
c_stay_otherfamily = Beta("c_stay_otherfamily", 0, None, None, 0)
c_stay_income = Beta("c_stay_income", 0, None, None, 0)
c_stay_hhinc = Beta("c_stay_hhinc", 0, None, None, 0)
c_stay_hurent = Beta('c_stay_hurent', 0, None, None, 0)
c_stay_vac = Beta('c_stay_vac', 0, None, None, 0)
c_stay_hhcost = Beta("c_stay_hhcost", 0, None, None, 0)
c_stay_unemp_rate = Beta("c_stay_unemp_rate", 0, None, None, 0)
c_stay_hh_val = Beta("c_stay_hh_val", 0, None, None, 0)
c_stay_partcp = Beta("c_stay_partcp", 0, None, None, 0)
c_stay_rentcost = Beta("c_stay_rentcost", 0, None, None, 0)
c_stay_owncost = Beta("c_stay_owncost", 0, None, None, 0)

In [None]:
# Bounded staying choice parameters
c_stay_income = Beta("c_stay_income", 0, 0, None, 0)
c_stay_hhinc = Beta("c_stay_hhinc", 0, 0, None, 0)
c_stay_hurent = Beta('c_stay_hurent', 0, None, 0, 0)
c_stay_vac = Beta('c_stay_vac', 0, None, None, 0)
c_stay_hhcost = Beta("c_stay_hhcost", 0, None, 0, 0)
c_stay_unemp_rate = Beta("c_stay_unemp_rate", 0, None, 0, 0)
c_stay_hh_val = Beta("c_stay_hh_val", 0, 0, None, 0)

In [None]:
# Destination Choice Parameters to be estimated
# Beta(name of the factor, initial value of the coefficient, lower bound, upper bound, whether or not
c_destchoice_dist=Beta('c_destchoice_dist', 0, None, None, 0)
c_destchoice_logdist=Beta('c_destchoice_logdist', 0, None, None, 0)
c_destchoice_unemp_rate = Beta("c_destchoice_unemp", 0, None, None, 0)
c_destchoice_hhcost = Beta("c_destchoice_hhcost", 0, None, None, 0)
c_destchoice_college = Beta("c_destchoice_college", 0, None, None, 0)
c_destchoice_age_18_34 = Beta("c_destchoice_age_18_34", 0, None, None, 0)
c_destchoice_age_35_64 = Beta("c_destchoice_age_35_64", 0, None, None, 0)
c_destchoice_age_over_65 = Beta("c_destchoice_age_over_65", 0, None, None, 0)
c_destchoice_foreign = Beta("c_destchoice_foreign", 0, None, None, 0)
c_destchoice_pincp = Beta("c_destchoice_pincp", 0, None, None, 0)

c_destchoice_entscore_18_34 = Beta("c_destchoice_entscore_18_34", 0, None, None, 0)
c_destchoice_entscore_35_64 = Beta("c_destchoice_entscore_35_64", 0, None, None, 0)
c_destchoice_entscore_65 = Beta("c_destchoice_entscore_65", 0, None, None, 0)

c_destchoice_T34_T34 = Beta("c_destchoice_T34_T34", 0, None, None, 1) 
c_destchoice_T34_Metro = Beta('c_destchoice_T34_Metro', 0, None, None, 0)
c_destchoice_T34_Micro = Beta("c_destchoice_T34_Micro", 0, None, None, 0)
c_destchoice_Metro_T34 = Beta("c_destchoice_Metro_T34", 0, None, None, 0)
c_destchoice_Metro_Metro = Beta('c_destchoice_Metro_Metro', 0, None, None, 0)
c_destchoice_Metro_Micro = Beta('c_destchoice_Metro_Micro', 0, None, None, 0)
c_destchoice_Micro_T34 = Beta("c_destchoice_Micro_T34", 0, None, None, 0)
c_destchoice_Micro_Metro = Beta("c_destchoice_Micro_Metro", 0, None, None, 0)
c_destchoice_Micro_Micro = Beta("c_destchoice_Micro_Micro", 0, None, None, 0)

c_destchoice_geo_spec_job = Beta("c_destchoice_geo_spec_job", 0, None, None, 0)
c_destchoice_high_ed_job = Beta("c_destchoice_high_ed_job", 0, None, None, 0)
c_destchoice_license_job = Beta("c_destchoice_license_job", 0, None, None, 0)
c_destchoice_other = Beta("c_destchoice_other", 0, None, None, 0)

c_destchoice_samestate = Beta("c_destchoice_samestate", 0, None, None, 0)
c_destchoice_birthstate = Beta("c_destchoice_birthstate", 0, None, None, 0)
c_destchoice_samecbsa = Beta("c_destchoice_samecbsa", 0, None, None, 0)
c_destchoice_cbsa_dist = Beta("c_destchoice_cbsa_dist", 0, None, None, 0)
c_destchoice_military = Beta("c_destchoice_military", 0, None, None, 0)