In [2]:
import os
import sys
import gzip

import pandas as pd
import numpy as np
import geopandas as gpd

import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
#Constants:

#Fields for DaySim output files
HH_FIELDS = {
    "hhno": "Household id",
    "fraction_with_jobs_outside": "Residence zone worker IX fraction",
    "hhsize": "Household size ",
    "hhvehs": "Vehicles available",
    "hhwkrs": "Household workers",
    "hhftw": "HH full time workers (type 1)",
    "hhptw": "HH part time workers (type 2)",
    "hhret": "HH retired adults (type 3)",
    "hhoad": "HH other adults (type 4)",
    "hhuni": "HH college students (type 5)",
    "hhhsc": "HH high school students (type 6)",
    "hh515": "HH kids age 5-15 (type 7)",
    "hhcu5": "HH kids age 0-4 (type 8)",
    "hhincome": "Household income ($)",
    "hownrent": "Household own or rent",
    "hrestype": "Household residence type",
    "hhparcel": "Residence parcel id",
    "zone_id": "Internal id based on parcel id",
    "hhtaz": "Based on parcel id",
    "hhexpfac": "HH expansion factor",
    "samptype": "Sample type",
}

PER_FIELDS = {
    "id": "internal daysim record ID",
    "hhno": "hh id",
    "pno": "person seq no on file",
    "pptyp": "person type",
    "pagey": "age in years",
    "pgend": "gender",
    "pwtyp": "worker type",
    "pwpcl": "usual work parcel id",
    "pwtaz": "usual work TAZ",
    "pwautime": "auto time to usual work",
    "pwaudist": "auto distance to usual work",
    "pstyp": "student type",
    "pspcl": "usual school parcel id",
    "pstaz": "usual school TAZ",
    "psautime": "auto time to usual work",
    "psaudist": "auto distance to usual work",
    "puwmode": "usual mode to work",
    "puwarrp": "Usual arrival period to work",
    "puwdepp": "Usual depart period from work",
    "ptpass": "transit pass?",
    "ppaidprk": "paid parking at workplace?",
    "pdiary": "Person used paper diary?",
    "pproxy": "proxy response?",
    "psexpfac": "Person expansion factor",
}

PER_DAY_FIELDS = {
    "id": "internal daysim record ID",
    "person_id": "internal daysim record ID",
    "household_day_id": "internal daysim record ID",
    "hhno": "Household id",
    "pno": "person seq no on file",
    "day": "Diary / simulation day ID",
    "beghom": "dairy day begins at home?",
    "endhom": "dairy day ends at home?",
    "hbtours": "home based tours in day",
    "wbtours": "work based tours in day",
    "uwtours": "tours to usual workplace in day",
    "wktours": "work tours",
    "sctours": "school tours",
    "estours": "escort tours",
    "pbtours": "pers.bus. Tours",
    "shtours": "shopping tours",
    "mltours": "meal tours",
    "sotours": "social tours",
    "retours": "recreation tours",
    "metours": "medical tours",
    "wkstops": "work stops in day (?)",
    "scstops": "school stops in day (?)",
    "esstops": "escort stops in day (?)",
    "pbstops": "pers.bus stops in day (?)",
    "shstops": "shopping stops in day (?)",
    "mlstops": "meal stops  in day (?)",
    "sostops": "social stops  in day (?)",
    "restops": "recreation stops  in day (?)",
    "mestops": "medical stops  in day (?)",
    "wkathome": "Minutes worked at home in day",
    "pdexpfac": "Person-day expansion factor",
}

TOUR_FIELDS = {
    "id": "internal daysim record ID",
    "person_id": "internal daysim record ID",
    "person_day_id": "internal daysim record ID",
    "hhno": "Household id",
    "pno": "person seq no on file",
    "day": "Diary / simulation day ID",
    "tour": "tour id",
    "jtindex": "hh joint tour index",
    "parent": "parent tour id",
    "subtrs": "number of subtours",
    "pdpurp": "prim.dest.purpose",
    "tlvorig": "time leave tour origin",
    "tardest": "time larrive tour dest",
    "tlvdest": "time leave tour dest",
    "tarorig": "time arrive tour origin",
    "toadtyp": "tour origin address type",
    "tdadtyp": "tour destination address type",
    "topcl": "tour origin parcel",
    "totaz": "tour origin TAZ",
    "tdpcl": "tour dest parcel",
    "tdtaz": "tour destination TAZ",
    "tmodetp": "tour main mode type",
    "tpathtp": "tour main mode path type",
    "tautotime": "tour 1-way auto time",
    "tautocost": "tour 1-way auto distance",
    "tautodist": "tour 1-way auto cost",
    "tripsh1": "1st half tour # of trips",
    "tripsh2": "2nd half tour # of trips",
    "phtindx1": "1st half-partial joint half tour index",
    "phtindx2": "2nd half-partial joint half tour index",
    "fhtindx1": "1s half- fully joint half tour index",
    "fhtindx2": "2nd half- fully joint half tour index",
    "toexpfac": "trip expansion factor",
}

TRIP_FIELDS = {
    "id": "internal daysim record ID",
    "tour_id": "internal daysim record ID",
    "hhno": "Household id",
    "pno": "person seq no on file",
    "day": "Diary / simulation day ID",
    "tour": "tour id",
    "half": "tour half",
    "tseg": "trip seqgment no within half tour",
    "tsvid": "original survey trip id no.",
    "opurp": "trip origin purpose",
    "dpurp": "trip dest purpose",
    "oadtyp": "trip origin address type",
    "dadtyp": "trip destination address type",
    "opcl": "trip origin parcel",
    "otaz": "trip origin zone",
    "dpcl": "trip dests parcel",
    "dtaz": "trip dest zone",
    "mode": "trip mode",
    "pathtype": "transit submode",
    "dorp": "trip driver or passenger",
    "deptm": "trip deparute time (min after 3 am)",
    "arrtm": "trip arrival time (min after 3 am)",
    "endacttm": "trip dest activity end time",
    "travtime": "network travel time, min (by sov)",
    "travcost": "network travel time, min (by sov)",
    "travdist": "network travel distance, miles (by sov)",
    "vot": "trip value of time (cents/minute)",
    "trexpfac": "trip expansion factor",
}

DAYSIM_FILES = {
    "HH": "_household_2.dat",
    "PER": "_person_2.dat",
    "PER_DAY": "_person_day_2.dat",
    "TOUR": "_tour_2.dat",
    "TRIP": "_trip_2.dat",
}

DAYSIM_FILEKEYS = list(DAYSIM_FILES.keys())
DAYSIM_FILENAMES = list(DAYSIM_FILES.values())


In [None]:
#

class DaySimOutputs():
    
    """
    
    """
    
    def __init__(self, output_folder, geo=None):
        """
        
        """
        all_files_n_folders = os.listdir(output_folder)
        assert set(DAYSIM_FILENAMES) in set(all_files_n_folders)
        
        # A dictionary with paths to various DaySim output files
        self.files = {key: os.path.join(output_folder, val) 
                          for key, val in DAYSIM_FILES.items()
                     }
        if geo:                
            self.dists = pd.read_csv(os.path.join(path, geo))
        
    
    def _read_output_file(self, prefix):
        """
        
        """
        assert prefix in DAYSIM_FILEKEYS
        
        try:
            df = pd.DataFrame(self.files[prefix])
            return df
        except:
            return None

    
    def household_data(self):
        return self._read_output_file('HH')
    
    def person_data(self):
        return self._read_output_file('PER')
    
    def person_day_data(self):
        return self._read_output_file('PER_DAY')
    
    def tour_data(self):
        return self._read_output_file('TOUR')
    
    def trip_data(self):
        return self._read_output_file('TRIP')
    
    def prep_vehavail(self, hhdata,perdata):
    
        """
        prepare hhdata
        add person age in years from perdata
        """

        hhdata["hhvehcat"] = np.where(hhdata.hhvehs>4, 4, hhdata.hhvehs)
        perdata["hh16cat"] = np.where(perdata.pagey>=16, 1, 0)  #potential drivers
        aggper = perdata.groupby("hhno")["hh16cat"].sum() 
        hhdata = pd.merge(hhdata, aggper, on="hhno", how="left")
        hhdata["hh16cat"] = np.where(hhdata.hh16cat>4, 4, hhdata.hh16cat)
        hhdata["inccat"] = pd.cut(hhdata["hhincome"], 
                              bins=[0,15000,50000,75000,float("inf")], 
                              labels=["0K-15K", "15K-50K", "50K-75K", ">75K"], 
                              right=True)
        hhdata = (hhdata.merge(countycorr, 
                              left_on="hhtaz", 
                              right_on="TAZID", 
                              how="left").
                              rename(columns={"District": "hhcounty"}))

        return hhdata
        

