In [118]:
import pandas as pd
import numpy as np
import os

In [126]:
# path
path = r"C:\Users\USYS671257\OneDrive - WSP O365\21_31000110.002_Chattanooga TPO Model\model outputs"

# inputs
countycorr = pd.read_csv(os.path.join(path, "county_districts_chattanooga.csv"))
hhdata  = pd.read_csv(os.path.join(path, "_household_2.dat"), sep = '\t')
perdata = pd.read_csv(os.path.join(path, "_person_2.dat"), sep = '\t')

# parameters

In [127]:
def prep_vehavail(hhdata,perdata):
    
    """ prepare hhdata
        add person age in years from perdata
    """

    hhdata["hhvehcat"] = np.where(hhdata.hhvehs>4, 4, hhdata.hhvehs)
    perdata["hh16cat"] = np.where(perdata.pagey>=16, 1, 0)  #potential drivers
    aggper = perdata.groupby("hhno")["hh16cat"].sum() 
    hhdata = pd.merge(hhdata, aggper, on="hhno", how="left")
    hhdata["hh16cat"] = np.where(hhdata.hh16cat>4, 4, hhdata.hh16cat)
    hhdata["inccat"] = pd.cut(hhdata["hhincome"], 
                          bins=[0,15000,50000,75000,float("inf")], 
                          labels=["0K-15K", "15K-50K", "50K-75K", ">75K"], 
                          right=True)
    hhdata = (hhdata.merge(countycorr, 
                          left_on="hhtaz", 
                          right_on="TAZID", 
                          how="left").
                          rename(columns={"District": "hhcounty"}))
    
    return hhdata



def summary_vehavail(dshhdata, sum_by_var):
    
    """ summarize number of vehicles by county, set sum_by_var = "hhcounty"
        summarize number of vehicles by income, set sum_by_var = "inccat"
        summarize number of vehicles by HHdrivers, set sum_by_var = "hh16cat"
    """
    
    summary = (dshhdata.groupby([sum_by_var,"hhvehcat"])["hhexpfac"].
                        sum().
                        reset_index().
                        pivot_table(values='hhexpfac', 
                                    index= sum_by_var,
                                    columns="hhvehcat",
                                    fill_value=0))
    
    return summary

In [128]:
# clean up household data
dshhdata = prep_vehavail(hhdata,perdata)

In [129]:
# number of vehicles by income
summary_vehavail(dshhdata, "inccat")

hhvehcat,0,1,2,3,4
inccat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0K-15K,7350,11902,3591,865,240
15K-50K,3348,29217,22841,10550,6174
50K-75K,404,4905,14664,8782,6163
>75K,613,4427,22734,16117,13945


In [130]:
# number of vehicles by county
summary_vehavail(dshhdata, "hhcounty")

hhvehcat,0,1,2,3,4
hhcounty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,16448,44585,49267,27693,19927
2,1146,7002,9475,5716,4396
3,99,934,487,221,195
4,785,4308,5106,2782,2018


In [131]:
# number of vehicles by HHdrivers
summary_vehavail(dshhdata, "hh16cat")

hhvehcat,0,1,2,3,4
hh16cat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,24,3,3,0,0
1,15746,47296,7020,1593,0
2,2200,8586,51497,19465,10682
3,215,944,5086,11019,6707
4,293,0,729,4335,9147
