In [182]:
import pandas as pd
import numpy as np
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme import models
from biogeme.expressions import Beta
from biogeme.expressions import log
import math

In [183]:
# read the data, is in a wide format (every person gets one unique entry with all alternatives specified)
df = pd.read_csv("data/ky_estdata_full.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,RT,SERIALNO,DIVISION,SPORDER,PUMA,REGION,ST,ADJINC,PWGTP,...,EDU_ORIG,MED_ORIG,ENT_ORIG,FOD_ORIG,SRV_ORIG,PUB_ORIG,JOBS_EDU_NOHS_ORIG,JOBS_EDU_HS_ORIG,JOBS_EDU_NOBACH_ORIG,JOBS_EDU_BACH_ORIG
0,0,P,2018GQ0000237,6,1,2300,3,21,1013097,12,...,4143,5538,624,4127,932,1907,4661,13989,13899,9628
1,1,P,2018GQ0000289,6,1,800,3,21,1013097,69,...,3259,5613,161,3103,578,1686,3315,10113,8855,5408
2,2,P,2018GQ0000306,6,1,2200,3,21,1013097,46,...,6917,4500,347,4366,774,1574,2982,8714,8775,7263
3,3,P,2018GQ0000424,6,1,2000,3,21,1013097,24,...,5180,4347,840,4443,1265,12683,4416,13305,14194,11786
4,4,P,2018GQ0000448,6,1,2600,3,21,1013097,5,...,5668,5529,867,6068,1521,1952,3595,11890,11642,8691


In [184]:
# reading some more data
puma_acs_data = pd.read_csv("data/ACS_2018.csv")
puma_lodes_data = pd.read_csv("data/wac_puma.csv")

puma_acs_data["Geo_FIPS"] = puma_acs_data["Geo_FIPS"].astype(str).str.zfill(7)
puma_acs_data = puma_acs_data.set_index("Geo_FIPS")

puma_lodes_data["puma"] = puma_lodes_data["puma"].astype(str).str.zfill(7)
puma_lodes_data = puma_lodes_data.set_index("puma")

In [185]:
puma_acs_data["UNEMP"] = puma_acs_data["Population 16 Years and Over in Labor Force Civilian Unemployed"] / puma_acs_data["Population 16 Years and Over in Labor Force Civilian"]
puma_acs_data["COLLEGE"] = puma_acs_data["Population 3 Years and Over Enrolled in School Private School College"] + puma_acs_data["Population 3 Years and Over Enrolled in School Public School College"]
puma_acs_data["VACANCY_PCT"] = puma_acs_data["Housing Units Vacant"] / puma_acs_data["Housing Units"]

In [186]:
df["MED"] = np.where(df["NAICS"] == "MED", 1, 0)
df["MFG"] = np.where(df["NAICS"] == "MFG", 1, 0)
df["RET"] = np.where(df["NAICS"] == "RET", 1, 0)
df["EDU"] = np.where(df["NAICS"] == "EDU", 1, 0)
df["ADM"] = np.where(df["NAICS"] == "ADM", 1, 0)
df["FOD"] = np.where(df["NAICS"] == "FOD", 1, 0)
df["PRF"] = np.where(df["NAICS"] == "PRF", 1, 0)
df["TRN"] = np.where(df["NAICS"] == "TRN", 1, 0)
df["SRV"] = np.where(df["NAICS"] == "SRV", 1, 0)
df["FIN"] = np.where(df["NAICS"] == "FIN", 1, 0)
df["WHL"] = np.where(df["NAICS"] == "WHL", 1, 0)
df["AGR"] = np.where(df["NAICS"] == "AGR", 1, 0)
df["PUB"] = np.where(df["NAICS"] == "PUB", 1, 0)
df["INF"] = np.where(df["NAICS"] == "INF", 1, 0)
df["ENT"] = np.where(df["NAICS"] == "ENT", 1, 0)
df["REL"] = np.where(df["NAICS"] == "REL", 1, 0)
df["UTL"] = np.where(df["NAICS"] == "UTL", 1, 0)
df["EXT"] = np.where(df["NAICS"] == "EXT", 1, 0)
df["MNG"] = np.where(df["NAICS"] == "MNG", 1, 0)

In [187]:
# importing a list of the Kentucky PUMAs
distances = pd.read_csv("data/ky_distances.csv").set_index("Unnamed: 0")
distances.columns

Index(['2100100', '2100200', '2101500', '2100300', '2101400', '2100600',
       '2101100', '2100700', '2102000', '2101200', '2100900', '2101000',
       '2102800', '2101800', '2102300', '2102700', '2100800', '2102600',
       '2101600', '2102100', '2102500', '2102200', '2101901', '2101902',
       '2102400', '2101704', '2101705', '2101703', '2101702', '2101701',
       '2101706', '2100400', '2101300', '2100500'],
      dtype='object')

In [188]:
df["NAICS_code"] = df["NAICS"].astype("category").cat.codes

In [189]:
naics_dict = dict(enumerate(df["NAICS"].astype("category").cat.categories))
naics_dict[-1] = "filler"
naics_df = pd.DataFrame(list(naics_dict.items())).set_index(0)
naics_df

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
0,ADM
1,AGR
2,CON
3,EDU
4,ENT
5,EXT
6,FIN
7,FOD
8,INF
9,MED


In [190]:
df["ALT1_PUMA"]

0        2100100
1        2100100
2        2100100
3        2100100
4        2100100
          ...   
35068    2100100
35069    2100100
35070    2100100
35071    2100100
35072    2100100
Name: ALT1_PUMA, Length: 35073, dtype: int64

In [191]:
# clean up the database (Biogeme Database can only have numerical values)
df = df.select_dtypes(['number'])
df = df.fillna(0)

In [192]:
# defining the chosen alterantive for each person explicitly (0 to 35, corresponding to staying and moving to one of the many PUMAs)
df['CHOSEN_PUMA'] = df['CHOSEN']
df['CHOSEN'] = 0
for i in range(1, 35): 
    var = 'ALT' + str(i) + '_PUMA'
    df['CHOSEN'] = np.where(df[var]==df['CHOSEN_PUMA'], i, df['CHOSEN'])
df["CHOSEN"] = np.where(df["STAY"] == 1, 0, df["CHOSEN"])

In [193]:
df["CHOSEN"].value_counts()

0     31080
23      224
24      188
32      162
29      160
34      155
1       155
30      149
15      146
16      145
25      141
18      140
22      137
28      129
33      128
14      123
20      110
2       109
9       108
11      107
26       98
17       96
10       96
31       95
5        88
7        88
27       88
4        87
6        87
3        84
13       83
19       80
12       75
8        66
21       66
Name: CHOSEN, dtype: int64

In [194]:
df["IN_COLLEGE"] = np.where((df["SCHG"] == 15) | (df["SCHG"] == 16), 1, 0)
df["IN_COLLEGE"]

0        1
1        0
2        1
3        0
4        0
        ..
35068    0
35069    0
35070    0
35071    0
35072    0
Name: IN_COLLEGE, Length: 35073, dtype: int32

In [195]:
df["AGE_18_34"] = np.where(df["AGEP"] <= 34, 1, 0)
df["AGE_35_64"] = np.where((df["AGEP"] >= 35) & (df["AGEP"] <= 64), 1, 0)
df["AGE_OVER_65"] = np.where((df["AGEP"] >= 65), 1, 0)
df["FOREIGN"] = np.where(df["NATIVITY"] == 2, 1, 0)

In [196]:
df["AGE_18_22"] = np.where(df["AGEP"] <= 22, 1, 0)
df["AGE_23_29"] = np.where((df["AGEP"] >= 23) & (df["AGEP"] <= 29), 1, 0)
df["AGE_30_39"] = np.where((df["AGEP"] >= 30) & (df["AGEP"] <= 39), 1, 0)
df["AGE_40_49"] = np.where((df["AGEP"] >= 40) & (df["AGEP"] <= 49), 1, 0)
df["AGE_50_64"] = np.where((df["AGEP"] >= 50) & (df["AGEP"] <= 64), 1, 0)

In [197]:
df["EDU_LESS_HIGH"] = np.where(df["SCHL"] <= 15, 1, 0)
df["EDU_HIGHONLY"] = np.where((df["SCHL"] >= 16) & (df["SCHL"] <= 17), 1, 0)
df["EDU_SOMECOLLEGE"] = np.where((df["SCHL"] >= 18) & (df["SCHL"] <= 20), 1, 0)
df["EDU_COLLEGE"] = np.where(df["SCHL"] >= 21, 1, 0)
df["EDU_NOCOLLEGE"] = np.where(df["EDU_COLLEGE"] == 0, 1, 0)

In [198]:
df["WOMAN_CHILD"] = np.where((df["PAOC"] >= 1) & (df["PAOC"] <= 3), 1, 0)
df["UNEMPLOYED"] = np.where(df["ESR"] == 3, 1, 0)

In [199]:
df["MALE"] = np.where(df["SEX"] == 1, 1, 0)
df["FEMALE"] = np.where(df["SEX"] == 0, 1, 0)

In [200]:
df["MARRIED"] = np.where(df["MAR"] == 1, 1, 0)

In [201]:
df["filler"] = 0

In [202]:
df["child_old"] = np.where(df["child"] == df["REC_CHILD"], 0, df["child"])

In [203]:
df["child_old"].value_counts()

0    25368
1     9705
Name: child_old, dtype: int64

In [204]:
df["REC_NO_MAR"] = np.where((df["MARHD"] == 1) | (df["MARHW"] == 1), 1, 0)

In [205]:
df["REC_NO_MAR"].value_counts()

0    34454
1      619
Name: REC_NO_MAR, dtype: int64

In [206]:
df["REC_CHILD"].value_counts()

0.0    33995
1.0     1078
Name: REC_CHILD, dtype: int64

In [207]:
df["MARHM_new"] = np.where(df["MARHM"] == 2, 0, df["MARHM"])
df["MARHM_new"].value_counts()

0.0    34457
1.0      616
Name: MARHM_new, dtype: int64

In [208]:
df["MARRIED"].value_counts()

1    19271
0    15802
Name: MARRIED, dtype: int64

In [209]:
df["married_old"] = np.where((df["MARHM"] == df["MARRIED"]), 0, df["MARRIED"])
df["married_old"].value_counts()

1    18676
0    16397
Name: married_old, dtype: int64

In [210]:
df["DENS_ORIG_NORM"] = (df["DENS_ORIG"] - np.mean(df["DENS_ORIG"]))/(np.std(df["DENS_ORIG"]))
df["PINCP_NORM"] = (df["PINCP"] - np.mean(df["PINCP"])) / np.std(df["PINCP"])
df["HH_MED_INC_ORIG_NORM"] = (df["HH_MED_INC_ORIG"] - np.mean(df["HH_MED_INC_ORIG"])) / np.std(df["HH_MED_INC_ORIG"])
df["HH_MED_RENT_ORIG_NORM"] = (df["HH_MED_RENT_ORIG"] - np.mean(df["HH_MED_RENT_ORIG"])) / np.std(df["HH_MED_RENT_ORIG"])
df["UNEMP_ORIG_NORM"] = (df["UNEMP_ORIG"] - np.mean(df["UNEMP_ORIG"])) / np.std(df["UNEMP_ORIG"])

In [211]:
puma_acs_data["HH_MED_INC_NORM"] = (puma_acs_data["Median Household Income (In 2019 Inflation Adjusted Dollars)"] - np.mean(puma_acs_data["Median Household Income (In 2019 Inflation Adjusted Dollars)"])) / np.std(puma_acs_data["Median Household Income (In 2019 Inflation Adjusted Dollars)"])
puma_acs_data["DENS_NORM"] = (puma_acs_data["Population Density (Per Sq. Mile)"] - np.mean(puma_acs_data["Population Density (Per Sq. Mile)"])) / np.std(puma_acs_data["Population Density (Per Sq. Mile)"])
puma_acs_data["HURENT_NORM"] = (puma_acs_data["Median Gross Rent"] - np.mean(puma_acs_data["Median Gross Rent"])) / np.std(puma_acs_data["Median Gross Rent"])
puma_acs_data["COLLEGE_NORM"] = (puma_acs_data["COLLEGE"] - np.mean(puma_acs_data["COLLEGE"])) / np.std(puma_acs_data["COLLEGE"])
puma_acs_data["VACANCY_NORM"] = (puma_acs_data["VACANCY_PCT"] - np.mean(puma_acs_data["VACANCY_PCT"])) / np.std(puma_acs_data["VACANCY_PCT"])
puma_acs_data["FOREIGN_NORM"] = (puma_acs_data["Total Population Foreign Born"] - np.mean(puma_acs_data["Total Population Foreign Born"])) / np.std(puma_acs_data["Total Population Foreign Born"])


In [212]:
# making the Biogeme Database that is used for the model estimation
database = db.Database('ky_data', df)

In [213]:
# The following statement allows you to use the names of the
# variable as Python variable. (in the utility functions)
globals().update(database.variables)

In [214]:
# Destination Choice Parameters to be estimated
# Beta(name of the factor, initial value of the coefficient, lower bound, upper bound, whether or not the coefficinet should be fixed to initial value value)
c_destchoice_emp=Beta('c_destchoice_emp', 0, None, None, 0)
c_destchoice_dist=Beta('c_destchoice_dist', -0.000005, None, None, 0)
c_destchoice_logdist=Beta('c_destchoice_logdist', -0.139344, None, None, 0)
c_destchoice_time=Beta('c_destchoice_time', 0, None, None, 0)
c_destchoice_unemp = Beta("c_destchoice_unemp", 0, None, None, 0)
c_destchoice_hhinc = Beta("c_destchoice_hhinc", 0.000014, None, None, 0)
c_destchoice_internal = Beta("c_destchoice_internal", 0, None, None, 0)
c_destchoice_urban = Beta("c_destchoice_urban", -0.008602, None, None, 0)
c_destchoice_huprice = Beta("c_destchoice_huprice", 0, None, None, 0)
c_destchoice_hurent = Beta("c_destchoice_hurent", -0.001804, None, None, 0)
c_destchoice_vacancy = Beta("c_destchoice_vacancy", 0.560615, None, None, 0)
c_destchoice_college = Beta("c_destchoice_college", 0.000081, None, None, 0)
c_destchoice_age_18_34 = Beta("c_destchoice_age_18_34", 2.779809, None, None, 0)
c_destchoice_age_35_64 = Beta("c_destchoice_age_35_64", 0.926137, None, None, 0)
c_destchoice_age_over_65 = Beta("c_destchoice_age_over_65", 0.028123, None, None, 0)
c_destchoice_foreign = Beta("c_destchoice_foreign", 4.160805, None, None, 0)
c_destchoice_unemp = Beta("c_destchoice_unemp", 0, None, None, 0)
c_destchoice_pctbach = Beta("c_destchoice_pctbach", 3.042734, None, None, 0)
c_destchoice_pctnobach = Beta("c_destchoice_pctnobach", 2.565847, None, None, 0)
c_destchoice_pctownind = Beta("c_destchoice_pctownind", 0, None, None, 0)
c_destchoice_entscore = Beta("c_destchoice_entscore", 0.051888, None, None, 0)

In [215]:
c_move = Beta("c_move", -7.985678, None, None, 0)

In [216]:
# Staying Choice Parameters to be Estimated
c_stay_married = Beta("c_stay_married", 0.662462, None, None, 0)
c_stay_income = Beta("c_stay_income", 0.000002, None, None, 0)
c_stay_origin_unemp_rate = Beta("c_stay_origin_unemp", -2.134128, None, None, 0)
c_stay_age_18_22 = Beta("c_stay_age_18_22", -0.532322, None, None, 0)
c_stay_age_23_29 = Beta("c_stay_age_23_29", -0.733181, None, None, 0)
c_stay_age_30_39 = Beta("c_stay_age_30_39", -0.385103, None, None, 0)
c_stay_age_50_64 = Beta("c_stay_age_50_64", 0.302110, None, None, 0)
c_stay_age_65 = Beta("c_stay_age_65", 0.381940, None, None, 0)
c_stay_edu_nohigh = Beta("c_stay_edu_nohigh", -0.163364, None, None, 0)
c_stay_edu_somecollege = Beta("c_stay_edu_somecollege", -0.042263, None, None, 0)
c_stay_edu_college = Beta("c_stay_edu_college", -0.802324, None, None, 0)
c_stay_child = Beta("c_stay_child", -0.394293, None, None, 0)
c_stay_unemployed = Beta("c_stay_unemployed", 0, None, None, 0)
c_stay = Beta("c_stay", 7.985678, None, None, 0)
c_stay_hhinc_orig = Beta("c_stay_hhinc_orig", 0.000014, None, None, 0)
c_stay_foreign = Beta("c_stay_foreign", 0.144379, None, None, 0)
c_stay_rent = Beta("c_stay_rent", 0, None, None, 0)
c_stay_hurent = Beta("c_stay_hurent", -0.001525, None, None, 0)
c_stay_dens = Beta("c_stay_dens", 0.000402, None, None, 0)
c_stay_college = Beta("c_stay_college", 0.650010, None, None, 0)

In [217]:
c_stay_rec_child = Beta("c_stay_rec_child", -0.28057, None, None, 0)
c_stay_rec_mar = Beta("c_stay_rec_mar", -0.130613, None, None, 0)
c_stay_rec_nomar = Beta("c_stay_rec_nomar", -0.725205, None, None, 0)

In [218]:
# defining the staying utility function
V0 = c_stay + c_stay_married * married_old + c_stay_income * PINCP + c_stay_age_18_22 * AGE_18_22 + c_stay_age_23_29 * AGE_23_29 + c_stay_age_30_39 * AGE_30_39 + c_stay_age_50_64 * AGE_50_64 + c_stay_age_65 * AGE_OVER_65 + c_stay_edu_nohigh * EDU_LESS_HIGH + c_stay_edu_somecollege * EDU_SOMECOLLEGE + c_stay_edu_college * EDU_COLLEGE + c_stay_child * child_old + c_stay_hhinc_orig * HH_MED_INC_ORIG + c_stay_foreign * FOREIGN + c_stay_hurent * HH_MED_RENT_ORIG + c_stay_dens * DENS_ORIG + c_stay_origin_unemp_rate * UNEMP_ORIG + c_stay_college * IN_COLLEGE + c_stay_rec_child * REC_CHILD + c_stay_rec_mar * MARHM + c_stay_rec_nomar * REC_NO_MAR

# c_stay_married * MARRIED + c_stay_income * PINCP + c_stay_age_18_22 * AGE_18_22 + c_stay_age_23_29 * AGE_23_29 + c_stay_age_30_39 * AGE_30_39 + c_stay_age_50_64 * AGE_50_64 + c_stay_age_65 * AGE_OVER_65 + c_stay_edu_nohigh * EDU_LESS_HIGH + c_stay_edu_somecollege * EDU_SOMECOLLEGE + c_stay_edu_college * EDU_COLLEGE + c_stay_child * child + c_stay + c_stay_hhinc_orig * HH_MED_INC_ORIG + c_stay_foreign * FOREIGN + c_stay_hurent * HH_MED_RENT_ORIG + c_stay_dens * DENS_ORIG + c_stay_origin_unemp_rate * UNEMP_ORIG + c_stay_college * IN_COLLEGE

In [219]:
# constants only model (all utilities equal to unique constant)

# for i in range(35):
#     key = "c_" + str(i)
#     print("{0} = Beta('{0}', 0, None, None, 0)".format(key))
#     print ("V{0} = {1}".format(i, key))

In [220]:
# defining the utility functions for each of the moving PUMA alternatives
# defined using the exec to parse a string to save space
# can also use a loop to print out the statements and then copy/paste them to run
# can also just write each one manually
for i, puma in enumerate(distances.columns):
    num = i + 1
    initialization = "V{0} = c_move + log(ALT{0}_POP) + c_destchoice_dist * ALT{0}_DIST + c_destchoice_logdist * log(ALT{0}_DIST + 1) + c_destchoice_hhinc * int(puma_acs_data.loc['{1}', 'Median Household Income (In 2019 Inflation Adjusted Dollars)']) + c_destchoice_urban / DENS_ORIG * (puma_acs_data.loc['{1}', 'Population Density (Per Sq. Mile)'] - DENS_ORIG) + c_destchoice_hurent * int(puma_acs_data.loc['{1}', 'Median Gross Rent']) + c_destchoice_college * IN_COLLEGE * int(puma_acs_data.loc['{1}', 'COLLEGE']) + c_destchoice_vacancy * float(puma_acs_data.loc['{1}', 'VACANCY_PCT']) + c_destchoice_foreign * FOREIGN * int(puma_acs_data.loc['{1}', 'Total Population Foreign Born']) / ALT{0}_POP + c_destchoice_age_18_34 * AGE_18_34 * int(puma_acs_data.loc['{1}', 'Total Population 18 to 34 Years']) / ALT{0}_POP + c_destchoice_age_35_64 *AGE_35_64 * int(puma_acs_data.loc['{1}', 'Total Population 35 to 64 Years']) / ALT{0}_POP + c_destchoice_age_over_65 * AGE_OVER_65 * int(puma_acs_data.loc['{1}', 'Total Population 65 and Over']) / ALT{0}_POP + c_destchoice_pctnobach * EDU_NOCOLLEGE * int(puma_lodes_data.loc['{1}', ['JOBS_EDU_HS', 'JOBS_EDU_NOBACH', 'JOBS_EDU_NOHS']].sum()) / ALT{0}_EMP  + c_destchoice_pctbach * EDU_COLLEGE * int(puma_lodes_data.loc['{1}', 'JOBS_EDU_BACH']) / ALT{0}_EMP + c_destchoice_entscore * ALT{0}_ENT / ALT{0}_EMP / AGEP".format(num, puma)
    exec(initialization)
print(V1)

# full model specification (takes a bit longer to run):
# V{0} = log(ALT{0}_POP) + c_destchoice_dist * ALT{0}_DIST + c_destchoice_logdist * log(ALT{0}_DIST + 1) + c_destchoice_hhinc * int(puma_acs_data.loc['{1}', 'Median Household Income (In 2019 Inflation Adjusted Dollars)']) + c_destchoice_urban / DENS_ORIG * (puma_acs_data.loc['{1}', 'Population Density (Per Sq. Mile)'] - DENS_ORIG) + c_destchoice_hurent * int(puma_acs_data.loc['{1}', 'Median Gross Rent']) + c_destchoice_college * IN_COLLEGE * int(puma_acs_data.loc['{1}', 'COLLEGE']) + c_destchoice_vacancy * float(puma_acs_data.loc['{1}', 'VACANCY_PCT']) + c_destchoice_foreign * FOREIGN * int(puma_acs_data.loc['{1}', 'Total Population Foreign Born']) / ALT{0}_POP + c_destchoice_age_18_34 * AGE_18_34 * int(puma_acs_data.loc['{1}', 'Total Population 18 to 34 Years']) / ALT{0}_POP + c_destchoice_age_35_64 *AGE_35_64 * int(puma_acs_data.loc['{1}', 'Total Population 35 to 64 Years']) / ALT{0}_POP + c_destchoice_age_over_65 * AGE_OVER_65 * int(puma_acs_data.loc['{1}', 'Total Population 65 and Over']) / ALT{0}_POP + c_destchoice_pctnobach * EDU_NOCOLLEGE * int(puma_lodes_data.loc['{1}', ['JOBS_EDU_HS', 'JOBS_EDU_NOBACH', 'JOBS_EDU_NOHS']].sum()) / ALT{0}_EMP  + c_destchoice_pctbach * EDU_COLLEGE * int(puma_lodes_data.loc['{1}', 'JOBS_EDU_BACH']) / ALT{0}_EMP + c_destchoice_unemp * float(puma_acs_data.loc['{1}', 'UNEMP']) + c_destchoice_entscore * ALT{0}_ENT / ALT{0}_EMP / AGEP

# for the fields already in the Biogeme db.Database, can explicitly refer to them; also used a few references to other databases using .loc and fields in the Biogeme database

(((((((((((((((c_move(-7.985678) + log(ALT1_POP)) + (c_destchoice_dist(-5e-06) * ALT1_DIST)) + (c_destchoice_logdist(-0.139344) * log((ALT1_DIST + `1`)))) + (c_destchoice_hhinc(1.4e-05) * `45561`)) + ((c_destchoice_urban(-0.008602) / DENS_ORIG) * (`236.724896654487` - DENS_ORIG))) + (c_destchoice_hurent(-0.001804) * `700`)) + ((c_destchoice_college(8.1e-05) * IN_COLLEGE) * `12344`)) + (c_destchoice_vacancy(0.560615) * `0.1783103294515363`)) + (((c_destchoice_foreign(4.160805) * FOREIGN) * `2733`) / ALT1_POP)) + (((c_destchoice_age_18_34(2.779809) * AGE_18_34) * `41282`) / ALT1_POP)) + (((c_destchoice_age_35_64(0.926137) * AGE_35_64) * `74145`) / ALT1_POP)) + (((c_destchoice_age_over_65(0.028123) * AGE_OVER_65) * `39562`) / ALT1_POP)) + (((c_destchoice_pctnobach(2.565847) * EDU_NOCOLLEGE) * `48119`) / ALT1_EMP)) + (((c_destchoice_pctbach(3.042734) * EDU_COLLEGE) * `13511`) / ALT1_EMP)) + (((c_destchoice_entscore(0.051888) * ALT1_ENT) / ALT1_EMP) / AGEP))


In [221]:
# occupation-based coefficients

# c_adm = Beta("c_adm", 0, None, None, 0)
# c_agr = Beta("c_agr", 0, None, None, 0)
# c_con = Beta("c_con", 0, None, None, 0)
# c_edu = Beta("c_edu", 0, None, None, 0)
# c_ent = Beta("c_ent", 0, None, None, 0)
# c_ext = Beta("c_ext", 0, None, None, 0)
# c_fin = Beta("c_fin", 0, None, None, 0)
# c_fod = Beta("c_fod", 0, None, None, 0)
# c_inf = Beta("c_inf", 0, None, None, 0)
# c_med = Beta("c_med", 0, None, None, 0)
# c_mfg = Beta("c_mfg", 0, None, None, 0)
# c_mng = Beta("c_mng", 0, None, None, 0)
# c_prf = Beta("c_prf", 0, None, None, 0)
# c_pub = Beta("c_pub", 0, None, None, 0)
# c_rel = Beta("c_rel", 0, None, None, 0)
# c_ret = Beta("c_ret", 0, None, None, 0)
# c_srv = Beta("c_srv", 0, None, None, 0)
# c_trn = Beta("c_trn", 0, None, None, 0)
# c_utl = Beta("c_utl", 0, None, None, 0)
# c_whl = Beta("c_whl", 0, None, None, 0)

In [39]:
# Associate utility functions with the numbering of alternatives (corresponds to the CHOSEN field created earlier)
V = {0: V0, 1: V1, 2: V2, 3: V3, 4: V4, 5: V5, 6: V6, 7: V7, 8: V8, 9: V9, 10: V10, 11: V11, 12: V12, 13: V13, 14: V14, 15: V15, 16: V16, 17: V17, 18: V18, 19: V19, 20: V20, 21: V21, 22: V22, 23: V23, 24: V24, 25: V25, 26: V26, 27: V27, 28: V28, 29: V29, 30: V30, 31: V31, 32: V32, 33: V33, 34: V34}

# Associate the availability conditions with the alternatives
# for this model, all migrants had all alternatives theoretically available so all are equal to 1 (available)
# if individual people had different availability for alterantives, could pass in a column of the dataframe to account for that availability
av = {}
for i in range(0, 35):
    av[i] = 1

# Definition of the model. This is the contribution of each
# observation to the log likelihood function.
# estimating the CHOSEN field
logprob = models.loglogit(V, av, CHOSEN)

# Create the Biogeme object
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'ky_full_updated'

# Calculate the null log likelihood for reporting. (likelihood of predicting every entry's alterantive correctly if alternatives are randomly chosen)
biogeme.calculateNullLoglikelihood(av)

# Estimate the parameters
results = biogeme.estimate()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(pandasResults)

KeyboardInterrupt: 

In [69]:
# V0 = c_stay_married * MARRIED + c_stay_income * PINCP + c_stay_child * child + c_stay

In [222]:
# Associate utility functions with the numbering of alternatives (corresponds to the CHOSEN field created earlier)
V = {0: V0, 1: V1, 2: V2, 3: V3, 4: V4, 5: V5, 6: V6, 7: V7, 8: V8, 9: V9, 10: V10, 11: V11, 12: V12, 13: V13, 14: V14, 15: V15, 16: V16, 17: V17, 18: V18, 19: V19, 20: V20, 21: V21, 22: V22, 23: V23, 24: V24, 25: V25, 26: V26, 27: V27, 28: V28, 29: V29, 30: V30, 31: V31, 32: V32, 33: V33, 34: V34}

# Associate the availability conditions with the alternatives
# for this model, all migrants had all alternatives theoretically available so all are equal to 1 (available)
# if individual people had different availability for alterantives, could pass in a column of the dataframe to account for that availability
av = {}
for i in range(35):
    av[i] = 1

In [223]:
move = Beta("move", 1.497034, 1, None, 0)

In [224]:
nest_move = move, list(range(1, 35))
nest_stay = 1.0, [0]
nests = nest_move, nest_stay

In [225]:
nest_logprob = models.lognested(V, av, nests, CHOSEN)

# Create the Biogeme object
biogeme_nest = bio.BIOGEME(database, nest_logprob)
biogeme_nest.modelName = "nested_full_lifeevents"

# Calculate the null log likelihood for reporting.
biogeme_nest.calculateNullLoglikelihood(av)

# Estimate the parameters
results_nest = biogeme_nest.estimate()
pandasResults_nest = results_nest.getEstimatedParameters()
print(pandasResults_nest)

                             Value       Std err     t-test       p-value  \
c_destchoice_age_18_34    2.791708  4.596354e-01   6.073744  1.249616e-09   
c_destchoice_age_35_64    0.908286  3.195224e-01   2.842636  4.474219e-03   
c_destchoice_age_over_65 -0.006997  1.372659e-01  -0.050971  9.593487e-01   
c_destchoice_college      0.000080  6.195227e-06  12.964654  0.000000e+00   
c_destchoice_dist        -0.000005  3.632288e-07 -14.389533  0.000000e+00   
c_destchoice_entscore     0.070380  3.609004e-03  19.501121  0.000000e+00   
c_destchoice_foreign      4.120548  5.485917e-02  75.111377  0.000000e+00   
c_destchoice_hhinc        0.000015  2.208224e-06   6.581960  4.642864e-11   
c_destchoice_hurent      -0.001797  2.247501e-04  -7.996005  1.332268e-15   
c_destchoice_logdist     -0.139013  7.474589e-03 -18.598095  0.000000e+00   
c_destchoice_pctbach      2.986815  7.799591e-01   3.829451  1.284295e-04   
c_destchoice_pctnobach    2.598586  6.955415e-01   3.736062  1.869245e-04   

In [None]:
# start from there and add -- for destination choice
# UNEMPLOYMENT RATE
# MED RENT
# MED HOUSE PRICE
# RURAL to RURAL
# RURAL to URBAN
# URBAN to URBAN 
# URBAN to RURAL
# MEDIAN INCOME of alternative PUMA
# VACANCY RATE (VACANCY / HOUSING UNITS)
# RENT as PERCENT OF INCOME in DEST -- better to do median rent as a percent of my income
# OWNERSHIP COST as PERCENT OF INCOME -- median home price as a percent of my income



# % of POP AGE 18-30 - if I"m 18-30, am I more likely to go there
# % of POP AGE 65+ - if I"m age 65+
# INCOME(of the person) * URBAN (of the alternative)

In [None]:
# then add one more alternative for: Didn't move.  