In [56]:
import pandas as pd
import numpy as np
import os
import math

In [57]:
# path
path = r"C:\Users\USYS671257\OneDrive - WSP O365\21_31000110.002_Chattanooga TPO Model\model outputs"

# inputs
countycorr = pd.read_csv(os.path.join(path, "county_districts_chattanooga.csv"))
tripdata  = pd.read_csv(os.path.join(path, "_trip_2.dat"), sep = '\t')
tourdata  = pd.read_csv(os.path.join(path, "_tour_2.dat"), sep = '\t')
hhdata  = pd.read_csv(os.path.join(path, "_household_2.dat"), sep = '\t')
perdata = pd.read_csv(os.path.join(path, "_person_2.dat"), sep = '\t')
pdaydata = pd.read_csv(os.path.join(path, "_person_day_2.dat"), sep = '\t')

# parameters
excludeChildren5 = True

In [65]:
def prep_perdata(perdata, hhdata):

    countycorr_dict = countycorr.set_index("TAZID")["District"].to_dict()
    hhdata["hhcounty"] = hhdata["hhtaz"].map(countycorr_dict)
    hhdata["inccat"] = pd.cut(hhdata["hhincome"], 
                              bins=[0,15000,50000,75000,float("inf")], 
                              labels=["0K-15K", "15K-50K", "50K-75K", ">75K"], 
                              right=True)
    perdata["hh16cat"] = np.where(perdata.pagey>=16, 1, 0)
    aggper = perdata.groupby("hhno")["hh16cat"].sum() 
    hhdata = pd.merge(hhdata, aggper, on="hhno", how="left")
    hhdata["hh16cat"] = np.where(hhdata.hh16cat>4, 4, hhdata.hh16cat)
    hhdata["vehsuf"] = np.where(hhdata.hhvehs==0, 1, 0)
    hhdata["vehsuf"] = np.where((hhdata.hhvehs>0)&(hhdata.hhvehs<hhdata.hh16cat), 2, hhdata.vehsuf)
    hhdata["vehsuf"] = np.where((hhdata.hhvehs>0)&(hhdata.hhvehs==hhdata.hh16cat), 3, hhdata.vehsuf)
    hhdata["vehsuf"] = np.where((hhdata.hhvehs>0)&(hhdata.hhvehs>hhdata.hh16cat), 4, hhdata.vehsuf)

    hhdata = hhdata[["hhno","hhcounty","inccat","vehsuf"]]
    perdata = pd.merge(perdata, hhdata, on="hhno", how="left")
    
    return perdata



def prep_pdaydata(pdaydata, perdata):
    
    perdata = prep_perdata(perdata, hhdata)
    perdata = perdata[["hhno","pno","pptyp","hhcounty","inccat","vehsuf","psexpfac"]]
    
    pdaydata = pd.merge(pdaydata, perdata, on=["hhno","pno"], how="left")
    if excludeChildren5:
        pdaydata = pdaydata[pdaydata["pptyp"]<8]

    pdaydata["pbtours"] = pdaydata["pbtours"]+pdaydata["metours"]
    pdaydata["sotours"] = pdaydata["sotours"]+pdaydata["retours"]    
    pdaydata["pbstops"] = pdaydata["pbstops"]+pdaydata["mestops"]   
    pdaydata["sostops"] = pdaydata["sostops"]+pdaydata["restops"] 

    pdaydata["tottours"] = (pdaydata["wktours"]
                            +pdaydata["sctours"]
                            +pdaydata["estours"]
                            +pdaydata["pbtours"]
                            +pdaydata["shtours"]
                            +pdaydata["mltours"]
                            +pdaydata["sotours"])
    pdaydata["tottours"] =  np.where(pdaydata.tottours>3, 3, pdaydata.tottours)
    pdaydata["totstops"] = (pdaydata["wkstops"]
                            +pdaydata["scstops"]
                            +pdaydata["esstops"]
                            +pdaydata["pbstops"]
                            +pdaydata["shstops"]
                            +pdaydata["mlstops"]
                            +pdaydata["sostops"])

    pdaydata["tourstop"] = 0
    pdaydata.loc[(pdaydata["tottours"]==0) & (pdaydata["totstops"]==0), "tourstop"] = 0
    pdaydata.loc[(pdaydata["tottours"]==1) & (pdaydata["totstops"]==0), "tourstop"] = 1
    pdaydata.loc[(pdaydata["tottours"]==1) & (pdaydata["totstops"]==1), "tourstop"] = 2
    pdaydata.loc[(pdaydata["tottours"]==1) & (pdaydata["totstops"]==2), "tourstop"] = 3
    pdaydata.loc[(pdaydata["tottours"]==1) & (pdaydata["totstops"]>=3), "tourstop"] = 4
    pdaydata.loc[(pdaydata["tottours"]==2) & (pdaydata["totstops"]==0), "tourstop"] = 5
    pdaydata.loc[(pdaydata["tottours"]==2) & (pdaydata["totstops"]==1), "tourstop"] = 6
    pdaydata.loc[(pdaydata["tottours"]==2) & (pdaydata["totstops"]==2), "tourstop"] = 7
    pdaydata.loc[(pdaydata["tottours"]==2) & (pdaydata["totstops"]>=3), "tourstop"] = 8
    pdaydata.loc[(pdaydata["tottours"]==3) & (pdaydata["totstops"]==0), "tourstop"] = 9
    pdaydata.loc[(pdaydata["tottours"]==3) & (pdaydata["totstops"]==1), "tourstop"] = 10
    pdaydata.loc[(pdaydata["tottours"]==3) & (pdaydata["totstops"]==2), "tourstop"] = 11
    pdaydata.loc[(pdaydata["tottours"]==3) & (pdaydata["totstops"]>=3), "tourstop"] = 12

    pdaydata["wktostp"] = 0
    pdaydata.loc[(pdaydata["wktours"]==0) & (pdaydata["wkstops"]==0), "wktostp"] = 1
    pdaydata.loc[(pdaydata["wktours"]==0) & (pdaydata["wkstops"]>=1), "wktostp"] = 2
    pdaydata.loc[(pdaydata["wktours"]>=1) & (pdaydata["wkstops"]==0), "wktostp"] = 3
    pdaydata.loc[(pdaydata["wktours"]>=1) & (pdaydata["wkstops"]>=1), "wktostp"] = 4

    pdaydata["sctostp"] = 0
    pdaydata.loc[(pdaydata["sctours"]==0) & (pdaydata["scstops"]==0), "sctostp"] = 1
    pdaydata.loc[(pdaydata["sctours"]==0) & (pdaydata["scstops"]>=1), "sctostp"] = 2
    pdaydata.loc[(pdaydata["sctours"]>=1) & (pdaydata["scstops"]==0), "sctostp"] = 3
    pdaydata.loc[(pdaydata["sctours"]>=1) & (pdaydata["scstops"]>=1), "sctostp"] = 4  

    pdaydata["estostp"] = 0
    pdaydata.loc[(pdaydata["estours"]==0) & (pdaydata["esstops"]==0), "estostp"] = 1
    pdaydata.loc[(pdaydata["estours"]==0) & (pdaydata["esstops"]>=1), "estostp"] = 2
    pdaydata.loc[(pdaydata["estours"]>=1) & (pdaydata["esstops"]==0), "estostp"] = 3
    pdaydata.loc[(pdaydata["estours"]>=1) & (pdaydata["esstops"]>=1), "estostp"] = 4 

    pdaydata["pbtostp"] = 0
    pdaydata.loc[(pdaydata["pbtours"]==0) & (pdaydata["pbstops"]==0), "pbtostp"] = 1
    pdaydata.loc[(pdaydata["pbtours"]==0) & (pdaydata["pbstops"]>=1), "pbtostp"] = 2
    pdaydata.loc[(pdaydata["pbtours"]>=1) & (pdaydata["pbstops"]==0), "pbtostp"] = 3
    pdaydata.loc[(pdaydata["pbtours"]>=1) & (pdaydata["pbstops"]>=1), "pbtostp"] = 4 

    pdaydata["shtostp"] = 0
    pdaydata.loc[(pdaydata["shtours"]==0) & (pdaydata["shstops"]==0), "shtostp"] = 1
    pdaydata.loc[(pdaydata["shtours"]==0) & (pdaydata["shstops"]>=1), "shtostp"] = 2
    pdaydata.loc[(pdaydata["shtours"]>=1) & (pdaydata["shstops"]==0), "shtostp"] = 3
    pdaydata.loc[(pdaydata["shtours"]>=1) & (pdaydata["shstops"]>=1), "shtostp"] = 4 

    pdaydata["mltostp"] = 0
    pdaydata.loc[(pdaydata["mltours"]==0) & (pdaydata["mlstops"]==0), "mltostp"] = 1
    pdaydata.loc[(pdaydata["mltours"]==0) & (pdaydata["mlstops"]>=1), "mltostp"] = 2
    pdaydata.loc[(pdaydata["mltours"]>=1) & (pdaydata["mlstops"]==0), "mltostp"] = 3
    pdaydata.loc[(pdaydata["mltours"]>=1) & (pdaydata["mlstops"]>=1), "mltostp"] = 4 

    pdaydata["sotostp"] = 0
    pdaydata.loc[(pdaydata["sotours"]==0) & (pdaydata["sostops"]==0), "sotostp"] = 1
    pdaydata.loc[(pdaydata["sotours"]==0) & (pdaydata["sostops"]>=1), "sotostp"] = 2
    pdaydata.loc[(pdaydata["sotours"]>=1) & (pdaydata["sostops"]==0), "sotostp"] = 3
    pdaydata.loc[(pdaydata["sotours"]>=1) & (pdaydata["sostops"]>=1), "sotostp"] = 4 

    pdaydata["wktopt"] = np.where(pdaydata.wktours>3, 3, pdaydata.wktours)
    pdaydata["sctopt"] = np.where(pdaydata.sctours>3, 3, pdaydata.sctours)
    pdaydata["estopt"] = np.where(pdaydata.estours>3, 3, pdaydata.estours)
    pdaydata["pbtopt"] = np.where(pdaydata.pbtours>3, 3, pdaydata.pbtours)
    pdaydata["shtopt"] = np.where(pdaydata.shtours>3, 3, pdaydata.shtours)
    pdaydata["mltopt"] = np.where(pdaydata.mltours>3, 3, pdaydata.mltours)
    pdaydata["sotopt"] = np.where(pdaydata.sotours>3, 3, pdaydata.sotours)

    return pdaydata




def prep_tourdata(tourdata, perdata):

    tourdata = pd.merge(tourdata, perdata, on=["hhno","pno"], how="left")
    if excludeChildren5:
        tourdata = tourdata[tourdata["pptyp"]<8]

    tourdata["pdpurp"] = np.where(tourdata.pdpurp==8, 7, tourdata.pdpurp) 
    tourdata["pdpurp"] = np.where(tourdata.pdpurp==9, 4, tourdata.pdpurp)  
    tourdata["ftwind"] = np.where(tourdata.pptyp==1, 1, 2)  

    tourdata["stcat"] = np.where(tourdata.subtrs>3, 3, tourdata.subtrs)
    tourdata["stops"] = tourdata["tripsh1"]+tourdata["tripsh2"]-2
    tourdata["stopscat"] = np.where(tourdata.stops>6, 6, tourdata.stops)
    tourdata["h1stopscat"] = np.where(tourdata.tripsh1>6, 6, tourdata.tripsh1)
    tourdata["h2stopscat"] = np.where(tourdata.tripsh2>6, 6, tourdata.tripsh2)
    tourdata["pdpurp2"] = np.where(tourdata.parent==0, tourdata.pdpurp, 8)

    return tourdata
    

def prep_tripdata(tripdata, perdata):
    
    tripdata = pd.merge(tripdata, perdata, on=["hhno","pno"], how="left")
    if excludeChildren5:
        tripdata = tripdata[tripdata["pptyp"]<8]

    tripdata["dpurp"] = np.where(tripdata.dpurp==8, 7, tripdata.dpurp) 
    tripdata["dpurp"] = np.where(tripdata.dpurp==9, 4, tripdata.dpurp) 
    tripdata["dpurp"] = np.where(tripdata.dpurp==0, 8, tripdata.dpurp) 

    countycorr_dict = countycorr.set_index("TAZID")["District"].to_dict()
    tripdata["ocounty"] = tripdata["otaz"].map(countycorr_dict)
    
    return tripdata    


In [66]:
# clean up person day data
dspdaydata = prep_pdaydata(pdaydata, perdata)

In [69]:
# Tours&StopsByPurpose
summary = (dspdaydata.groupby(["wktostp","pptyp"])["psexpfac"].
                      sum().
                      reset_index().
                      pivot_table(values='psexpfac', 
                                 index="wktostp",
                                 columns="pptyp",
                                 fill_value=0))
summary

pptyp,1,2,3,4,5,6,7
wktostp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,36374,8857,62045,85734,23214,15352,60090
2,869,0,0,0,781,1177,0
3,93423,14382,0,0,1312,609,0
4,44994,4212,0,0,542,327,0
