In [1]:
import pandas as pd
import numpy as np
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme import models
from biogeme.expressions import Beta
from biogeme.expressions import log
import math

In [75]:
df = pd.read_csv("data/us_estdata.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,RT,SERIALNO,DIVISION,SPORDER,PUMA,REGION,ST,ADJINC,PWGTP,...,ENT_ORIG,FOD_ORIG,SRV_ORIG,PUB_ORIG,JOBS_EDU_NOHS_ORIG,JOBS_EDU_HS_ORIG,JOBS_EDU_NOBACH_ORIG,JOBS_EDU_BACH_ORIG,OWN_JOB_ORIG,TYPE_ORIG
0,0,P,2018GQ0103142,2,1,902,1,34,1013097,27,...,4668.0,25243.0,13545.0,12088.0,44574.0,75277.0,96405.0,139711.0,0.0,0
1,1,P,2018HU1096416,9,1,10200,4,53,1013097,341,...,2445.0,7125.0,2552.0,3823.0,6449.0,14893.0,18922.0,14092.0,0.0,1
2,2,P,2018HU0306186,7,2,1400,3,48,1013097,43,...,179.0,3216.0,886.0,688.0,4016.0,6496.0,7050.0,4215.0,0.0,0
3,3,P,2018HU0641097,7,1,1200,3,48,1013097,24,...,97.0,2808.0,898.0,988.0,3882.0,7306.0,7491.0,4446.0,0.0,2
4,4,P,2018HU1246748,5,1,7300,3,12,1013097,75,...,2060.0,17590.0,6016.0,35219.0,15006.0,31118.0,38494.0,34392.0,0.0,1


In [76]:
df["MED"] = np.where(df["NAICS"] == "MED", 1, 0)
df["MFG"] = np.where(df["NAICS"] == "MFG", 1, 0)
df["RET"] = np.where(df["NAICS"] == "RET", 1, 0)
df["EDU"] = np.where(df["NAICS"] == "EDU", 1, 0)
df["ADM"] = np.where(df["NAICS"] == "ADM", 1, 0)
df["FOD"] = np.where(df["NAICS"] == "FOD", 1, 0)
df["PRF"] = np.where(df["NAICS"] == "PRF", 1, 0)
df["TRN"] = np.where(df["NAICS"] == "TRN", 1, 0)
df["SRV"] = np.where(df["NAICS"] == "SRV", 1, 0)
df["FIN"] = np.where(df["NAICS"] == "FIN", 1, 0)
df["WHL"] = np.where(df["NAICS"] == "WHL", 1, 0)
df["AGR"] = np.where(df["NAICS"] == "AGR", 1, 0)
df["PUB"] = np.where(df["NAICS"] == "PUB", 1, 0)
df["INF"] = np.where(df["NAICS"] == "INF", 1, 0)
df["ENT"] = np.where(df["NAICS"] == "ENT", 1, 0)
df["REL"] = np.where(df["NAICS"] == "REL", 1, 0)
df["UTL"] = np.where(df["NAICS"] == "UTL", 1, 0)
df["EXT"] = np.where(df["NAICS"] == "EXT", 1, 0)
df["MNG"] = np.where(df["NAICS"] == "MNG", 1, 0)
df["CON"] = np.where(df["NAICS"] == "CON", 1, 0)

In [77]:
df["ALT3_TYPE"]

0        0.0
1        0.0
2        1.0
3        0.0
4        1.0
        ... 
63267    0.0
63268    2.0
63269    0.0
63270    0.0
63271    0.0
Name: ALT3_TYPE, Length: 63272, dtype: float64

In [78]:
# clean up the database (Biogeme Database can only have numerical values)
df = df.select_dtypes(['number'])
df = df.fillna(0)

In [79]:
df["CHOSEN"].value_counts()

102500     106
5500100     85
5310200     84
5500700     74
1700202     73
          ... 
4804619      7
2701404      7
5541001      6
2601703      5
3500806      4
Name: CHOSEN, Length: 2336, dtype: int64

In [80]:
# defining the chosen alterantive for each person explicitly (0 to 35, corresponding to staying and moving to one of the many PUMAs)
df['CHOSEN_PUMA'] = df['CHOSEN']
df['CHOSEN'] = 0
for i in range(1, 101): 
    var = 'ALT' + str(i) + '_PUMA'
    df['CHOSEN'] = np.where(df[var]==df['CHOSEN_PUMA'], i, df['CHOSEN'])
df["CHOSEN"] = np.where(df["STAY"] == 1, 0, df["CHOSEN"])

In [81]:
df["CHOSEN"].value_counts()

0    55261
1     8011
Name: CHOSEN, dtype: int64

In [82]:
df["IN_COLLEGE"] = np.where((df["SCHG"] == 15) | (df["SCHG"] == 16), 1, 0)
df["IN_COLLEGE"]

0        0
1        0
2        0
3        0
4        0
        ..
63267    0
63268    0
63269    0
63270    0
63271    0
Name: IN_COLLEGE, Length: 63272, dtype: int32

In [83]:
df["AGE_18_34"] = np.where(df["AGEP"] <= 34, 1, 0)
df["AGE_35_64"] = np.where((df["AGEP"] >= 35) & (df["AGEP"] <= 64), 1, 0)
df["AGE_OVER_65"] = np.where((df["AGEP"] >= 65), 1, 0)
df["FOREIGN"] = np.where(df["NATIVITY"] == 2, 1, 0)

In [84]:
df["AGE_18_22"] = np.where(df["AGEP"] <= 22, 1, 0)
df["AGE_23_29"] = np.where((df["AGEP"] >= 23) & (df["AGEP"] <= 29), 1, 0)
df["AGE_30_39"] = np.where((df["AGEP"] >= 30) & (df["AGEP"] <= 39), 1, 0)
df["AGE_40_49"] = np.where((df["AGEP"] >= 40) & (df["AGEP"] <= 49), 1, 0)
df["AGE_50_64"] = np.where((df["AGEP"] >= 50) & (df["AGEP"] <= 64), 1, 0)

In [85]:
df["EDU_LESS_HIGH"] = np.where(df["SCHL"] <= 15, 1, 0)
df["EDU_HIGHONLY"] = np.where((df["SCHL"] >= 16) & (df["SCHL"] <= 17), 1, 0)
df["EDU_SOMECOLLEGE"] = np.where((df["SCHL"] >= 18) & (df["SCHL"] <= 20), 1, 0)
df["EDU_COLLEGE"] = np.where(df["SCHL"] >= 21, 1, 0)
df["EDU_NOCOLLEGE"] = np.where(df["EDU_COLLEGE"] == 0, 1, 0)

In [86]:
df["WOMAN_CHILD"] = np.where((df["PAOC"] >= 1) & (df["PAOC"] <= 3), 1, 0)
df["UNEMPLOYED"] = np.where(df["ESR"] == 3, 1, 0)

In [87]:
df["MALE"] = np.where(df["SEX"] == 1, 1, 0)
df["FEMALE"] = np.where(df["SEX"] == 0, 1, 0)

In [88]:
df["MARRIED"] = np.where(df["MAR"] == 1, 1, 0)

In [89]:
df["child_old"] = np.where(df["child"] == df["REC_CHILD"], 0, df["child"])
df["child_old"].value_counts()

0    44611
1    18661
Name: child_old, dtype: int64

In [90]:
df["REC_NO_MAR"] = np.where((df["MARHD"] == 1) | (df["MARHW"] == 1), 1, 0)
df["REC_NO_MAR"].value_counts()

0    62389
1      883
Name: REC_NO_MAR, dtype: int64

In [91]:
df["MARHM_new"] = np.where(df["MARHM"] == 2, 0, df["MARHM"])
df["MARHM_new"].value_counts()

0.0    62261
1.0     1011
Name: MARHM_new, dtype: int64

In [92]:
df["MARRIED"].value_counts()

1    34051
0    29221
Name: MARRIED, dtype: int64

In [93]:
df["married_old"] = np.where((df["MARHM"] == df["MARRIED"]), 0, df["MARRIED"])
df["married_old"].value_counts()

1    33059
0    30213
Name: married_old, dtype: int64

In [94]:
df["MILITARY"] = np.where(df["MIL"] == 1, 1, 0)

In [95]:
df["HH_COST"] = np.where(df["TEN"] == 3, df["GRNTP"], df["SMOCP"])

In [96]:
# group quarters or nonfamily >= person household
df["NONFAMILY"] = np.where((df["TYPE"] >= 2) | (df["HHT"] == 5) | (df["HHT"] == 7), 1, 0)

In [97]:
df["OWN_HOME"] = np.where(df["TEN"] <= 2, 1, 0)

In [98]:
df["ONE_PERSON_HH"] = np.where(df["NP"] == 1, 1, 0)

In [99]:
# update to debate still
df["AGR_EXT"] = np.where((df["AGR"] == 1) | (df["EXT"] == 1), 1, 0)
df["HIGH_ED"] = np.where((df["MED"] == 1) | (df["EDU"] == 1) | (df["PRF"] == 1) | (df["MED"] == 1) | (df["FIN"] == 1) | (df["INF"] == 1) | (df["MED"] == 1), 1, 0)
df["LICENSE"] = np.where((df["SRV"] == 1) | (df["REL"] == 1), 1, 0)
df["OTHER"] = np.where((df["AGR_EXT"] == 0) & (df["HIGH_ED"] == 0) & (df["LICENSE"] == 0), 1, 0)

In [113]:
df["MICRO_adj_ORIG"] = np.where((df["TYPE_ORIG"] == 3) | (df["TYPE_ORIG"] == 2), 1, 0)
df["METRO"] = np.where(df["TYPE_ORIG"] == 1, 1, 0)
df["T34"] = np.where(df["TYPE_ORIG"] == 0, 1, 0)

In [115]:
for i in range(1, 101):
    key = "ALT{0}_".format(i)
    df[key + "MICRO"] = np.where((df[key + "TYPE"] == 2) | (df[key + "TYPE"] == 3), 1, 0)
    df[key + "METRO"] = np.where(df[key + "TYPE"] == 1, 1, 0)
    df[key + "T34"] = np.where(df[key + "TYPE"] == 0, 1, 0)

  df[key + "METRO"] = np.where(df[key + "TYPE"] == 1, 1, 0)
  df[key + "T34"] = np.where(df[key + "TYPE"] == 0, 1, 0)
  df[key + "MICRO"] = np.where((df[key + "TYPE"] == 2) | (df[key + "TYPE"] == 3), 1, 0)
  df[key + "METRO"] = np.where(df[key + "TYPE"] == 1, 1, 0)
  df[key + "T34"] = np.where(df[key + "TYPE"] == 0, 1, 0)
  df[key + "MICRO"] = np.where((df[key + "TYPE"] == 2) | (df[key + "TYPE"] == 3), 1, 0)
  df[key + "METRO"] = np.where(df[key + "TYPE"] == 1, 1, 0)
  df[key + "T34"] = np.where(df[key + "TYPE"] == 0, 1, 0)
  df[key + "MICRO"] = np.where((df[key + "TYPE"] == 2) | (df[key + "TYPE"] == 3), 1, 0)
  df[key + "METRO"] = np.where(df[key + "TYPE"] == 1, 1, 0)
  df[key + "T34"] = np.where(df[key + "TYPE"] == 0, 1, 0)
  df[key + "MICRO"] = np.where((df[key + "TYPE"] == 2) | (df[key + "TYPE"] == 3), 1, 0)
  df[key + "METRO"] = np.where(df[key + "TYPE"] == 1, 1, 0)
  df[key + "T34"] = np.where(df[key + "TYPE"] == 0, 1, 0)
  df[key + "MICRO"] = np.where((df[key + "TYPE"] == 2) | (

In [122]:
df["UNEMP"] = np.where((df["ESR"] == 3) | (df["ESR"] == 6), 1, 0)

In [123]:
# making the Biogeme Database that is used for the model estimation
database = db.Database('us_data', df)

In [124]:
# The following statement allows you to use the names of the
# variable as Python variable. (in the utility functions)
globals().update(database.variables)

In [125]:
c_move = Beta("c_move", 0, None, None, 0)

In [126]:
# Staying Choice Parameters to be Estimated
c_stay_married = Beta("c_stay_married", 0, None, None, 0)
c_stay_age_18_22 = Beta("c_stay_age_18_22", 0, None, None, 0)
c_stay_age_23_29 = Beta("c_stay_age_23_29", 0, None, None, 0)
c_stay_age_30_39 = Beta("c_stay_age_30_39", 0, None, None, 0)
c_stay_age_50_64 = Beta("c_stay_age_50_64", 0, None, None, 0)
c_stay_age_65 = Beta("c_stay_age_65", 0, None, None, 0)
c_stay_edu_nohigh = Beta("c_stay_edu_nohigh", 0, None, None, 0)
c_stay_edu_somecollege = Beta("c_stay_edu_somecollege", 0, None, None, 0)
c_stay_edu_college = Beta("c_stay_edu_college", 0, None, None, 0)
c_stay_child = Beta("c_stay_child", 0, None, None, 0)
c_stay_unemployed = Beta("c_stay_unemployed", 0, None, None, 0)
c_stay = Beta("c_stay", 0, None, None, 0)
c_stay_foreign = Beta("c_stay_foreign", 0, None, None, 0)
c_stay_dens = Beta("c_stay_dens", 0, None, None, 0)
c_stay_college = Beta("c_stay_college", 0, None, None, 0)
c_stay_own_house = Beta("c_stay_own_house", 0, None, None, 0)
c_stay_gq = Beta("c_stay_gq", 0, None, None, 0)
c_stay_one_person_hh = Beta("c_stay_one_person_hh", 0, None, None, 0)
c_stay_rec_child = Beta("c_stay_rec_child", 0, None, None, 0)
c_stay_rec_mar = Beta("c_stay_rec_mar", 0, None, None, 0)
c_stay_rec_nomar = Beta("c_stay_rec_nomar", 0, None, None, 0)
c_stay_hhcost = Beta("c_stay_hhcost", 0, None, None, 0)
c_stay_mil = Beta("c_stay_mil", 0, None, None, 0)
c_stay_vac = Beta('c_stay_vac', 0, None, None, 0)

In [127]:
# occupation-based coefficients

c_adm = Beta("c_adm", 0, None, None, 0)
c_agr = Beta("c_agr", 0, None, None, 0)
c_con = Beta("c_con", 0, None, None, 0)
c_edu = Beta("c_edu", 0, None, None, 0)
c_ent = Beta("c_ent", 0, None, None, 0)
c_ext = Beta("c_ext", 0, None, None, 0)
c_fin = Beta("c_fin", 0, None, None, 0)
c_fod = Beta("c_fod", 0, None, None, 0)
c_inf = Beta("c_inf", 0, None, None, 0)
c_med = Beta("c_med", 0, None, None, 0)
c_mfg = Beta("c_mfg", 0, None, None, 0)
c_mng = Beta("c_mng", 0, None, None, 0)
c_prf = Beta("c_prf", 0, None, None, 0)
c_pub = Beta("c_pub", 0, None, None, 0)
c_rel = Beta("c_rel", 0, None, None, 0)
c_ret = Beta("c_ret", 0, None, None, 0)
c_srv = Beta("c_srv", 0, None, None, 0)
c_trn = Beta("c_trn", 0, None, None, 0)
c_utl = Beta("c_utl", 0, None, None, 0)
c_whl = Beta("c_whl", 0, None, None, 0)

In [128]:
# defining the staying utility function
V0 = c_stay_married * married_old + c_stay_age_18_22 * AGE_18_22 + c_stay_age_23_29 * AGE_23_29 + c_stay_age_30_39 * AGE_30_39 + c_stay_age_50_64 * AGE_50_64 + c_stay_age_65 * AGE_OVER_65 + c_stay_edu_college * EDU_COLLEGE + c_stay_child * child_old + c_stay + c_stay_foreign * FOREIGN +  c_stay_dens * DENS_ORIG + c_stay_college * IN_COLLEGE + c_stay_rec_mar * MARHM_new + c_stay_rec_nomar * REC_NO_MAR + c_stay_rec_child * REC_CHILD + c_stay_mil * MILITARY + c_stay_hhcost * UNEMP * HH_COST / (HINCP + 1) + c_stay_own_house * OWN_HOME + c_stay_gq * NONFAMILY + c_stay_one_person_hh * ONE_PERSON_HH + c_stay_unemployed * UNEMP + c_stay_vac * HU_VAC_ORIG / HU_TOT_ORIG

# c_stay_married * MARRIED + c_stay_income * PINCP + c_stay_age_18_22 * AGE_18_22 + c_stay_age_23_29 * AGE_23_29 + c_stay_age_30_39 * AGE_30_39 + c_stay_age_50_64 * AGE_50_64 + c_stay_age_65 * AGE_OVER_65 + c_stay_edu_nohigh * EDU_LESS_HIGH + c_stay_edu_somecollege * EDU_SOMECOLLEGE + c_stay_edu_college * EDU_COLLEGE + c_stay_child * child + c_stay + c_stay_hhinc_orig * HH_MED_INC_ORIG + c_stay_foreign * FOREIGN + c_stay_hurent * HH_MED_RENT_ORIG + c_stay_dens * DENS_ORIG + c_stay_origin_unemp_rate * UNEMP_ORIG + c_stay_college * IN_COLLEGE

In [129]:
# Destination Choice Parameters to be estimated
# Beta(name of the factor, initial value of the coefficient, lower bound, upper bound, whether or not
c_destchoice_dist=Beta('c_destchoice_dist', 0, None, None, 0)
c_destchoice_logdist=Beta('c_destchoice_logdist', 0, None, None, 0)
c_destchoice_unemp = Beta("c_destchoice_unemp", 0, None, None, 0)
c_destchoice_hhinc = Beta("c_destchoice_hhinc", 0, None, None, 0)
c_destchoice_internal = Beta("c_destchoice_internal", 0, None, None, 0)
c_destchoice_urban = Beta("c_destchoice_urban", 0, None, None, 0)
c_destchoice_hurent = Beta("c_destchoice_hurent", 0, None, None, 0)
c_destchoice_college = Beta("c_destchoice_college", 0, None, None, 0)
c_destchoice_age_18_34 = Beta("c_destchoice_age_18_34", 0, None, None, 0)
c_destchoice_age_35_64 = Beta("c_destchoice_age_35_64", 0, None, None, 0)
c_destchoice_age_over_65 = Beta("c_destchoice_age_over_65", 0, None, None, 0)
c_destchoice_foreign = Beta("c_destchoice_foreign", 0, None, None, 0)
c_destchoice_unemp = Beta("c_destchoice_unemp", 0, None, None, 0)
c_destchoice_pctownind = Beta("c_destchoice_pctownind", 0, None, None, 0)
c_destchoice_entscore = Beta("c_destchoice_entscore", 0, None, None, 0)

In [130]:
c_destchoice_T34_T34 = Beta("c_destchoice_T34_T34", 0, None, None, 0)
c_destchoice_T34_Metro = Beta('c_destchoice_T34_Metro', 0, None, None, 0)
c_destchoice_T34_Micro = Beta("c_destchoice_T34_Micro", 0, None, None, 0)
c_destchoice_Metro_T34 = Beta("c_destchoice_Metro_T34", 0, None, None, 0)
c_destchoice_Metro_Metro = Beta('c_destchoice_Metro_Metro', 0, None, None, 0)
c_destchoice_Metro_Micro = Beta('c_destchoice_Metro_Micro', 0, None, None, 0)
c_destchoice_Micro_T34 = Beta("c_destchoice_Micro_T34", 0, None, None, 0)
c_destchoice_Micro_Metro = Beta("c_destchoice_Micro_Metro", 0, None, None, 0)
c_destchoice_Micro_Micro = Beta("c_destchoice_Micro_Micro", 0, None, None, 0)

In [147]:
c_destchoice_geo_spec_job = Beta("c_destchoice_geo_spec_job", 0, None, None, 0)
c_destchoice_high_ed_job = Beta("c_destchoice_high_ed_job", 0, None, None, 0)
c_destchoice_license_job = Beta("c_destchoice_license_job", 0, None, None, 0)
c_destchoice_other = Beta("c_destchoice_other", 0, None, None, 0)

In [155]:
# defining the utility functions for each of the moving PUMA alternatives
# defined using the exec to parse a string to save space
# can also use a loop to print out the statements and then copy/paste them to run
# can also just write each one manually
for i in range(100):
    num = i + 1
    initialization = "V{0} = log(ALT{0}_POP) + c_destchoice_dist * ALT{0}_DIST + c_destchoice_logdist * log(ALT{0}_DIST + 1) + c_destchoice_hurent * ALT{0}_HURENT + c_destchoice_college * IN_COLLEGE * ALT{0}_COLLEGE + c_destchoice_foreign * FOREIGN * ALT{0}_FOREIGN / ALT{0}_POP + c_destchoice_age_18_34 * ALT{0}_18_34 / ALT{0}_POP + c_destchoice_age_over_65 * ALT{0}_65 / ALT{0}_POP + c_destchoice_entscore * ALT{0}_ENT / AGEP / ALT{0}_EMP + c_destchoice_hhinc * ALT{0}_HHINC + \
    (c_destchoice_T34_T34 * T34 + c_destchoice_Metro_T34 * METRO +  c_destchoice_Micro_T34 * MICRO_adj_ORIG) * ALT{0}_T34 + \
    (c_destchoice_T34_Metro * T34 + c_destchoice_Metro_Metro * METRO + c_destchoice_Micro_Metro * MICRO_adj_ORIG) * ALT{0}_METRO + \
    (c_destchoice_T34_Micro * T34  + c_destchoice_Metro_Micro * METRO + c_destchoice_Micro_Micro * MICRO_adj_ORIG) * ALT{0}_MICRO + \
    (c_destchoice_geo_spec_job * AGR_EXT + c_destchoice_high_ed_job * HIGH_ED + c_destchoice_license_job * LICENSE + c_destchoice_other * OTHER) * ALT{0}_OWN_JOB".format(num)
    exec(initialization)
print(V100)

# full model specification (takes a bit longer to run):
# "V{0} = log(ALT{0}_POP) + c_destchoice_dist * ALT{0}_DIST + c_destchoice_logdist * log(ALT{0}_DIST + 1) + c_destchoice_urban / DENS_ORIG * (ALT{0}_DENS - DENS_ORIG) + c_destchoice_hurent * ALT{0}_HURENT + c_destchoice_college * IN_COLLEGE * ALT{0}_COLLEGE + c_destchoice_vacancy * ALT{0}_VAC + c_destchoice_foreign * FOREIGN * ALT{0}_FOREIGN / ALT{0}_POP + c_destchoice_age_18_34 * ALT{0}_18_34 / ALT{0}_POP + c_destchoice_age_35_64 * ALT{0}_35_64 / ALT{0}_POP + c_destchoice_age_over_65 * ALT{0}_65 / ALT{0}_POP + c_destchoice_pctnobach * EDU_NOCOLLEGE * (ALT{0}_EDU_NOBACH + ALT{0}_EDU_NOHS + ALT{0}_EDU_HS) / ALT{0}_EMP + c_destchoice_pctbach * EDU_COLLEGE * ALT{0}_EDU_BACH / ALT{0}_EMP + c_destchoice_entscore * ALT{0}_ENT / AGEP / ALT{0}_EMP"

# "V{0} = log(ALT{0}_POP) + c_destchoice_dist * ALT{0}_DIST + c_destchoice_logdist * log(ALT{0}_DIST + 1) + c_adm * ADM * ALT{0}_ADM/ALT{0}_EMP + c_agr * AGR * ALT{0}_AGR/ALT{0}_EMP + c_con * CON * ALT{0}_CON/ALT{0}_EMP + c_edu * EDU * ALT{0}_EDU/ALT{0}_EMP + c_ent * ENT * ALT{0}_ENT/ALT{0}_EMP + c_ext * EXT * ALT{0}_EXT/ALT{0}_EMP + c_fin * FIN * ALT{0}_FIN/ALT{0}_EMP + c_fod * FOD * ALT{0}_FOD/ALT{0}_EMP + c_inf * INF * ALT{0}_INF/ALT{0}_EMP + c_med * MED * ALT{0}_MED/ALT{0}_EMP + c_mfg * MFG * ALT{0}_MFG/ALT{0}_EMP + c_mng * MNG * ALT{0}_MNG/ALT{0}_EMP + c_prf * PRF * ALT{0}_PRF/ALT{0}_EMP + c_pub * PUB * ALT{0}_PUB/ALT{0}_EMP + c_rel * REL * ALT{0}_REL/ALT{0}_EMP + c_ret * RET * ALT{0}_RET/ALT{0}_EMP + c_srv * SRV * ALT{0}_SRV/ALT{0}_EMP + c_trn * TRN * ALT{0}_TRN/ALT{0}_EMP + c_utl * UTL * ALT{0}_UTL/ALT{0}_EMP + c_whl * WHL * ALT{0}_WHL/ALT{0}_EMP"

# for the fields already in the Biogeme db.Database, can explicitly refer to them; also used a few references to other databases using .loc and fields in the Biogeme database

(((((((((((((log(ALT100_POP) + (c_destchoice_dist(0) * ALT100_DIST)) + (c_destchoice_logdist(0) * log((ALT100_DIST + `1`)))) + (c_destchoice_hurent(0) * ALT100_HURENT)) + ((c_destchoice_college(0) * IN_COLLEGE) * ALT100_COLLEGE)) + (((c_destchoice_foreign(0) * FOREIGN) * ALT100_FOREIGN) / ALT100_POP)) + ((c_destchoice_age_18_34(0) * ALT100_18_34) / ALT100_POP)) + ((c_destchoice_age_over_65(0) * ALT100_65) / ALT100_POP)) + (((c_destchoice_entscore(0) * ALT100_ENT) / AGEP) / ALT100_EMP)) + (c_destchoice_hhinc(0) * ALT100_HHINC)) + ((((c_destchoice_T34_T34(0) * T34) + (c_destchoice_Metro_T34(0) * METRO)) + (c_destchoice_Micro_T34(0) * MICRO_adj_ORIG)) * ALT100_T34)) + ((((c_destchoice_T34_Metro(0) * T34) + (c_destchoice_Metro_Metro(0) * METRO)) + (c_destchoice_Micro_Metro(0) * MICRO_adj_ORIG)) * ALT100_METRO)) + ((((c_destchoice_T34_Micro(0) * T34) + (c_destchoice_Metro_Micro(0) * METRO)) + (c_destchoice_Micro_Micro(0) * MICRO_adj_ORIG)) * ALT100_MICRO)) + (((((c_destchoice_geo_spec_job(0

In [156]:
utilities = {}
for i in range(101):
    init = "utilities[{0}] = V{0}".format(i)
    exec(init)

In [157]:
import biogeme.messaging as msg
logger = msg.bioMessage()
logger.setDetailed()

In [158]:
logger.allMessages()

'*** Messages from biogeme 3.2.8 [2022-06-01]\n[10:06:33] < General >   Remove 2778 unused variables from the database as only 1521 are used.\n[10:06:35] < Detailed >  It is suggested to scale the following variables.\n[10:06:35] < Detailed >  Multiply ALT16_ENT by\t1e-05 because the largest (abs) value is\t80042.0\n[10:06:35] < Detailed >  Multiply ALT8_DENS by\t1e-05 because the largest (abs) value is\t102846.33928097\n[10:06:35] < Detailed >  Multiply ALT29_EDU_BACH by\t1e-06 because the largest (abs) value is\t371563.0\n[10:06:35] < Detailed >  Multiply ALT98_EDU_HS by\t1e-05 because the largest (abs) value is\t150000.0\n[10:06:35] < Detailed >  Multiply ALT10_POP by\t1e-05 because the largest (abs) value is\t301019.0\n[10:06:35] < Detailed >  Multiply ALT14_EDU_NOHS by\t1e-05 because the largest (abs) value is\t96283.0\n[10:06:35] < Detailed >  Multiply ALT54_DIST by\t1e-07 because the largest (abs) value is\t5587229.0\n[10:06:35] < Detailed >  Multiply ALT57_DENS by\t1e-05 becaus

In [159]:
# Associate utility functions with the numbering of alternatives (corresponds to the CHOSEN field created earlier)
V = utilities

# Associate the availability conditions with the alternatives
# for this model, all migrants had all alternatives theoretically available so all are equal to 1 (available)
# if individual people had different availability for alterantives, could pass in a column of the dataframe to account for that availability
av = {}
for i in range(0, 101):
    av[i] = 1

# Definition of the model. This is the contribution of each
# observation to the log likelihood function.
# estimating the CHOSEN field
logprob = models.loglogit(V, av, CHOSEN)


# Create the Biogeme object
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'us_full_basic'

# Calculate the null log likelihood for reporting. (likelihood of predicting every entry's alterantive correctly if alternatives are randomly chosen)
biogeme.calculateNullLoglikelihood(av)

# Estimate the parameters
results = biogeme.estimate()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(pandasResults)

[06:01:17] < General >   Remove 3299 unused variables from the database as only 1432 are used.
[06:01:19] < Detailed >  It is suggested to scale the following variables.
[06:01:19] < Detailed >  Multiply ALT16_ENT by	1e-05 because the largest (abs) value is	80042.0
[06:01:19] < Detailed >  Multiply ALT10_POP by	1e-05 because the largest (abs) value is	301019.0
[06:01:19] < Detailed >  Multiply ALT54_DIST by	1e-07 because the largest (abs) value is	5587229.0
[06:01:19] < Detailed >  Multiply ALT54_HHINC by	1e-05 because the largest (abs) value is	181965.0
[06:01:19] < Detailed >  Multiply ALT72_OWN_JOB by	1e-05 because the largest (abs) value is	177406.0
[06:01:19] < Detailed >  Multiply ALT50_HHINC by	1e-05 because the largest (abs) value is	181965.0
[06:01:19] < Detailed >  Multiply ALT83_COLLEGE by	1e-05 because the largest (abs) value is	59710.0
[06:01:19] < Detailed >  Multiply ALT89_EMP by	1e-06 because the largest (abs) value is	1107358.0
[06:01:19] < Detailed >  Multiply ALT26_H

In [None]:
df[["UNEMP_ORIG", "ORIGIN"]]

Unnamed: 0,UNEMP_ORIG,ORIGIN
0,0.048183,3400900
1,0.057408,5310200
2,0.068618,4801400
3,0.054169,4801200
4,0.063164,1207300
...,...,...
63267,0.056103,603700
63268,0.060474,607700
63269,0.053630,3704990
63270,0.039232,608500


In [None]:
migpuma_acs_data = pd.read_csv("data/ACS_MIGPUMA_2018.csv").set_index("MIGPUMA")

In [None]:
migpuma_acs_data.loc[3400900, ["Population 16 Years and Over in Labor Force Civilian Unemployed", "Population 16 Years and Over in Labor Force Civilian"]]

Population 16 Years and Over in Labor Force Civilian Unemployed     20995.0
Population 16 Years and Over in Labor Force Civilian               435731.0
Name: 3400900, dtype: float64