In [146]:
import pandas as pd
import numpy as np
import os

In [147]:
# path
path = r"C:\Users\USYS671257\OneDrive - WSP O365\21_31000110.002_Chattanooga TPO Model\model outputs"

# inputs
countycorr = pd.read_csv(os.path.join(path, "county_districts_chattanooga.csv"))
hhdata  = pd.read_csv(os.path.join(path, "_household_2.dat"), sep = '\t')
perdata = pd.read_csv(os.path.join(path, "_person_2.dat"), sep = '\t')

# parameters

In [154]:
def prep_wrkschloc(hhdata,perdata):
    
    """ prepare perdata for wrksch location summaries"""
    
    perdata = perdata.merge(hhdata, on="hhno", how="left")
    perdata["wrkr"] = np.where((perdata.pwtyp>0) & (perdata.pwtaz!=0), 1, 0)
    perdata["outhmwrkr"] = np.where((perdata.pwtaz>0) & (perdata.hhparcel!=perdata.pwpcl), 1, 0)
    perdata["wrkrtyp"] = np.where(perdata.pptyp==1, "FT", 
                                  np.where(perdata.pptyp==2, "PT","NotFTPT"))
    perdata["wrkdistcat"] = pd.cut(perdata["pwaudist"], 
                                   bins=range(0, 90),  
                                   right=True,
                                   labels=list(range(0, 89)))
    perdata["wrktimecat"] = pd.cut(perdata["pwautime"], 
                                   bins=range(0, 90),  
                                   right=True,
                                   labels=list(range(0, 89)))
    perdata["wrkdistcat"] = np.where(perdata.pwtaz<0, 91, perdata.wrkdistcat)
    perdata["wrktimecat"] = np.where(perdata.pwtaz<0, 91, perdata.wrktimecat)
    perdata["stud"] = np.where((perdata.pptyp.isin([5,6,7])) & (perdata.pstaz!=0), 1, 0)
    perdata["outhmstud"] = np.where((perdata.pstaz>0) & (perdata.hhparcel!=perdata.pspcl), 1, 0)
    perdata["stutyp"] = np.where(perdata.pptyp==5, "UniStu",
                                 np.where(perdata.pptyp==6, "Stu16",
                                 np.where(perdata.pptyp==7, "Ch515", "NotStdu")))
    perdata["schdistcat"] = pd.cut(perdata["psaudist"], 
                                   bins=range(0, 90),  
                                   right=True,
                                   labels=list(range(0, 89)))
    perdata["schtimecat"] = pd.cut(perdata["psautime"], 
                                   bins=range(0, 90),  
                                   right=True,
                                   labels=list(range(0, 89)))
    perdata["schdistcat"] = np.where(perdata.pstaz<0, 91, perdata.schdistcat)
    perdata["schtimecat"] = np.where(perdata.pstaz<0, 91, perdata.schtimecat)
    countycorr_dict = countycorr.set_index("TAZID")["District"].to_dict()
    perdata["hhcounty"] = perdata["hhtaz"].map(countycorr_dict)
    perdata["pwcounty"] = perdata["pwtaz"].map(countycorr_dict)
    perdata["pscounty"] = perdata["pstaz"].map(countycorr_dict)
    perdata["pwcounty"] = np.where(perdata.pwtaz<0, 13, perdata.pwcounty)
    perdata["pscounty"] = np.where(perdata.pstaz<0, 13, perdata.pscounty)    
    perdata["wfh"] = np.where((perdata.wrkr==1) & (perdata.hhparcel==perdata.pwpcl), 1, 0)
    perdata["sfh"] = np.where((perdata.stud==1) & (perdata.hhparcel==perdata.pspcl), 1, 0)
    perdata["pwautime"] = np.where(perdata.pwautime<0, np.NaN, perdata.pwautime)
    perdata["pwaudist"] = np.where(perdata.pwaudist<0, np.NaN, perdata.pwaudist)
    perdata["psautime"] = np.where(perdata.psautime<0, np.NaN, perdata.psautime)
    perdata["psaudist"] = np.where(perdata.psaudist<0, np.NaN, perdata.psaudist)

    return perdata


def summary_trip_length_duration(dsperdata, per_type, sum_by_var1, sum_by_var2):
    
    """ trip length/duration distribution
        per_type: wrkr or stud
        sum_by_var1: length or duration category
        sum_by_var2: wrkrtyp or studtyp
    """
    
    dsperdata = dsperdata[dsperdata[per_type]==1]
    
    if per_type == "wrkr":
        column_order = ["FT","PT","NotFTPT"]
    elif per_type == "stud":
        column_order = ["Ch515","Stu16","UniStu","NotStdu"]
    
    index_order = list(range(0, 92))
    summary = (dsperdata.groupby([sum_by_var1,sum_by_var2])["psexpfac"].
                         sum().
                         reset_index().
                         pivot_table(values='psexpfac', 
                                     index= sum_by_var1,
                                     columns=sum_by_var2,
                                     fill_value=0).
                          reindex(columns=column_order,
                                  index=index_order).
                          fillna(0))
    return summary


def summary_work_school_flow(dsperdata, per_type, des_county):
    
    """ home to work/school flow
        per_type: wrkr or stud
        des_county: work/school location
    """
    
    dsperdata = dsperdata[dsperdata[per_type]==1]
    summary = (dsperdata.groupby(["hhcounty",des_county])["psexpfac"].
                         sum().
                         reset_index().
                         pivot_table(values='psexpfac', 
                                     index= "hhcounty",
                                     columns=des_county,
                                     fill_value=0))
    return summary


def summary_work_school_at_home(dsperdata, at_home_type, per_type):
      
    """ work/school at home
        at_home_type: wfh or sfh
        per_type: wrkrtyp or studtyp
    """
    
    dsperdata = dsperdata[dsperdata[at_home_type]==1]
    
    if per_type == "wrkrtyp":
        column_order = ["FT","PT","NotFTPT"]
    elif per_type == "stutyp":
        column_order = ["Ch515","Stu16","UniStu","NotStdu"]
    
    summary = (dsperdata.groupby(["hhcounty",per_type])["psexpfac"].
                         sum().
                         reset_index().
                         pivot_table(values='psexpfac', 
                                     index= "hhcounty",
                                     columns=per_type,
                                     fill_value=0).
                          reindex(columns=column_order).
                          fillna(0))
    return summary

In [152]:
# clean up person data
dsperdata = prep_wrkschloc(hhdata,perdata)

In [155]:
# work trip length 
summary_trip_length_duration(dsperdata, "wrkr", "wrkdistcat", "wrkrtyp")

wrkrtyp,FT,PT,NotFTPT
wrkdistcat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,6336.0,2041.0,699.0
1,5432.0,908.0,521.0
2,6450.0,1269.0,760.0
3,7037.0,1502.0,848.0
4,7956.0,1607.0,917.0
...,...,...,...
87,0.0,0.0,0.0
88,0.0,0.0,0.0
89,0.0,0.0,0.0
90,0.0,0.0,0.0


In [156]:
# work trip duration 
summary_trip_length_duration(dsperdata, "wrkr", "wrktimecat", "wrkrtyp")

wrkrtyp,FT,PT,NotFTPT
wrktimecat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,4774.0,1782.0,466.0
1,1726.0,303.0,192.0
2,2242.0,394.0,258.0
3,2722.0,458.0,318.0
4,2872.0,578.0,364.0
...,...,...,...
87,0.0,0.0,0.0
88,0.0,0.0,0.0
89,0.0,0.0,0.0
90,0.0,0.0,0.0


In [157]:
# school trip length 
summary_trip_length_duration(dsperdata, "stud", "schdistcat", "stutyp")

stutyp,Ch515,Stu16,UniStu,NotStdu
schdistcat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,10686.0,1367.0,3136.0,0.0
1,10903.0,1820.0,675.0,0.0
2,9855.0,2522.0,793.0,0.0
3,6029.0,2305.0,1151.0,0.0
4,3361.0,1977.0,1221.0,0.0
...,...,...,...,...
87,0.0,0.0,0.0,0.0
88,0.0,0.0,0.0,0.0
89,0.0,0.0,0.0,0.0
90,0.0,0.0,0.0,0.0


In [158]:
# school trip duration 
summary_trip_length_duration(dsperdata, "stud", "schtimecat", "stutyp")

stutyp,Ch515,Stu16,UniStu,NotStdu
schtimecat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,5894.0,814.0,2150.0,0.0
1,5770.0,624.0,682.0,0.0
2,5664.0,958.0,557.0,0.0
3,5618.0,1071.0,426.0,0.0
4,5328.0,1255.0,308.0,0.0
...,...,...,...,...
87,0.0,0.0,0.0,0.0
88,0.0,0.0,0.0,0.0
89,0.0,0.0,0.0,0.0
90,0.0,0.0,0.0,0.0


In [159]:
# home to work flow
summary_work_school_flow(dsperdata,"wrkr", "pwcounty")

pwcounty,1.0,2.0,3.0,4.0,13.0
hhcounty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,155899,2662,111,1583,8372
2,16356,8969,77,982,5409
3,1376,46,79,40,118
4,8797,2249,55,2217,1709


In [136]:
# home to school flow
summary_work_school_flow(dsperdata,"stud", "pscounty")

pscounty,1.0,2.0,3.0,4.0
hhcounty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,76012,1490,373,958
2,6531,8355,30,853
3,704,27,262,103
4,3087,866,93,3660


In [142]:
# work from home
summary_work_school_at_home(dsperdata,"wfh","wrkrtyp")

wrkrtyp,FT,PT,NotFTPT
hhcounty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,3133,1238,342
2,683,246,29
3,47,15,3
4,410,189,23


In [145]:
# work from home
summary_work_school_at_home(dsperdata,"sfh","stutyp")

stutyp,Ch515,Stu16,UniStu,NotStdu
hhcounty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2624,458,1016,0.0
2,504,66,66,0.0
3,28,1,0,0.0
4,252,23,43,0.0
