In [1]:
import pandas as pd
import numpy as np
import os

In [9]:
# path
path = r"C:\Users\USYS671257\OneDrive - WSP O365\21_31000110.002_Chattanooga TPO Model\model outputs"

# inputs
countycorr = pd.read_csv(os.path.join(path, "county_districts_chattanooga.csv"))
tourdata  = pd.read_csv(os.path.join(path, "_tour_2.dat"), sep = '\t')
hhdata  = pd.read_csv(os.path.join(path, "_household_2.dat"), sep = '\t')
perdata = pd.read_csv(os.path.join(path, "_person_2.dat"), sep = '\t')

# parameters
excludeChildren5 = True

In [52]:
def prep_perdata(perdata,hhdata):

    countycorr_dict = countycorr.set_index("TAZID")["District"].to_dict()
    hhdata["hhcounty"] = hhdata["hhtaz"].map(countycorr_dict)
    perdata = perdata.merge(hhdata, on="hhno", how="left")
    
    return perdata



def prep_tourdata(tourdata, perdata, hhdata):
    
    countycorr_dict = countycorr.set_index("TAZID")["District"].to_dict()
    
    perdata = prep_perdata(perdata, hhdata)
    perdata = perdata[["hhno","pno","pptyp","hhtaz","hhcounty","pwtaz","psexpfac"]]
    
    tourdata = pd.merge(tourdata, perdata, on=["hhno","pno"], how="left")
    if excludeChildren5:
        tourdata = tourdata[tourdata["pptyp"]<8]
    tourdata["pdpurp"] = np.where(tourdata.pdpurp==8, 7, tourdata.pdpurp)   # combine recreational 8 with socail 7
    tourdata["pdpurp"] = np.where(tourdata.pdpurp==9, 4, tourdata.pdpurp)   # combine medical 8 with personal business 4
    tourdata["pdpurp2"] = np.where(tourdata.parent==0, tourdata.pdpurp, 8)   # workbased trips
    tourdata["ocounty"] = tourdata["totaz"].map(countycorr_dict)
    tourdata["dcounty"] = tourdata["tdtaz"].map(countycorr_dict)
    tourdata["distcat"] = pd.cut(tourdata["tautodist"], 
                                    bins=range(0, 91),  
                                    right=True,
                                    labels=list(range(0, 90)))
    tourdata["timecat"] = pd.cut(tourdata["tautotime"], 
                                    bins=range(0, 91),  
                                    right=True,
                                    labels=list(range(0, 90)))
    tourdata["wrkrtyp"] = np.where(tourdata.pptyp==1, "FT", 
                                   np.where(tourdata.pptyp==2, "PT","NotFTPT"))
    tourdata["tautodist"] = np.where(tourdata.tautodist<0, np.NaN, tourdata.tautodist)
    tourdata["tautotime"] = np.where(tourdata.tautotime<0, np.NaN, tourdata.tautotime)
    
    return tourdata
    
    

def summary_tour_destination(dstourdata, purpose, sum_by_var):
    
    """ tour purpose
        Work 1 
        School 2 
        Escort 3
        Personal_Business 4
        Shop 5
        Meal 6
        SocRec 7
        Workbased 8  
    """
       
    dstourdata = dstourdata[dstourdata["pdpurp2"]==purpose]
    index_order = list(range(0, 91))
    summary = (dstourdata.groupby([sum_by_var])["psexpfac"].
                         sum().
                         reindex(index=index_order).
                         fillna(0).
                         reset_index())
    return summary



def summary_tour_flow(dstourdata, purpose):
    
    """ tour purpose
        Work 1 
        School 2 
        Escort 3
        Personal_Business 4
        Shop 5
        Meal 6
        SocRec 7
        Workbased 8  
    """
       
    dstourdata = dstourdata[dstourdata["pdpurp2"]==purpose]
    summary = (dstourdata.groupby(["ocounty","dcounty"])["psexpfac"].
                         sum().
                         reset_index().
                         pivot_table(values='psexpfac', 
                                     index= "ocounty",
                                     columns="dcounty",
                                     fill_value=0))
    return summary

In [46]:
# clean up tour data
dstourdata = prep_tourdata(tourdata, perdata, hhdata)

In [53]:
# tour destination tour length - Escort 3
summary_tour_destination(dstourdata, 3, "distcat")

Unnamed: 0,distcat,psexpfac
0,0,2220.0
1,1,4133.0
2,2,6641.0
3,3,9075.0
4,4,9283.0
...,...,...
86,86,0.0
87,87,0.0
88,88,0.0
89,89,0.0


In [54]:
# tour destination tour duration - Escort 3
summary_tour_destination(dstourdata, 3, "timecat")

Unnamed: 0,timecat,psexpfac
0,0,1277.0
1,1,1341.0
2,2,2008.0
3,3,2815.0
4,4,3543.0
...,...,...
86,86,0.0
87,87,0.0
88,88,0.0
89,89,0.0


In [55]:
# tour destination county flow - Escort 3
summary_tour_flow(dstourdata, purpose=3)

dcounty,1,2,3,4
ocounty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,46756,3335,362,1355
2,970,8362,6,1120
3,360,2,54,30
4,315,2156,30,2975
