In [1]:
import sys
sys.path.append('../')

In [2]:
import os
import pandas as pd
import numpy as np
from pydantic import ValidationError

from input import PersonValidator, HouseholdValidator, TazValidator, Person, Household
from input import TravelAnalysisZoneData as InputTAZ


## Remote I/O

In [3]:
test_data_dir = "../example_input/"
person_file = os.path.join(test_data_dir, "synthetic_persons_formatted.csv")
household_file = os.path.join(test_data_dir, "synthetic_households_formatted.csv")
taz_file = os.path.join(test_data_dir, "land_use_formatted.csv")

## Data Reads

In [4]:
person_df = pd.read_csv(person_file)
person_df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,areaenum_idx,sa2,sexp,age_yrs,person_type,occupation,lfsp,schg,person_id,PNUM,household_id,tz
0,0,0,1,1,2.0,75.0,7,6,3.0,-8,1,1,1,1679
1,1,1,1,1,1.0,61.0,8,6,3.0,-8,2,1,2,1679
2,2,2,1,1,2.0,55.0,6,2,1.0,-8,3,2,2,1679
3,3,3,1,1,1.0,20.0,8,6,2.0,-8,4,3,3,1679
4,4,4,1,1,1.0,45.0,5,2,1.0,-8,5,1,3,1679


In [5]:
household_df = pd.read_csv(household_file)
household_df.head()

Unnamed: 0,household_id,areaenum_idx,sa2,hhsize,n_workers,hh_income_quartile,household_income,Householder_Sex,min_HH_age,num_adults,Family_HH,hht,tz
0,1,1,1,1,0.0,2,59800,2.0,75.0,1.0,0,6,1679
1,2,1,1,2,1.0,1,20800,2.0,55.0,2.0,0,7,1679
2,3,1,1,5,2.0,3,104000,1.0,8.0,3.0,1,1,1679
3,4,1,1,5,2.0,3,104000,2.0,0.0,2.0,1,1,1679
4,5,1,1,1,1.0,2,59800,2.0,51.0,1.0,0,6,1679


## Persons
### Validation

In [6]:
p_df = person_df.rename(columns = {"person_id": "id", "age_yrs": "age", "sexp": "sex"}).copy()
p_df = p_df.head(1000).copy()
p_df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,areaenum_idx,sa2,sex,age,person_type,occupation,lfsp,schg,id,PNUM,household_id,tz
0,0,0,1,1,2.0,75.0,7,6,3.0,-8,1,1,1,1679
1,1,1,1,1,1.0,61.0,8,6,3.0,-8,2,1,2,1679
2,2,2,1,1,2.0,55.0,6,2,1.0,-8,3,2,2,1679
3,3,3,1,1,1.0,20.0,8,6,2.0,-8,4,3,3,1679
4,4,4,1,1,1.0,45.0,5,2,1.0,-8,5,1,3,1679


In [7]:
p_list = p_df.to_dict(orient = "records")
try:
    person_validator = PersonValidator(list_of_persons = p_list)
except ValidationError as e:
    print(e)

#### Demonstrate a failed validation

In [8]:
p_bad_df = p_df.copy()
p_bad_df["person_type"] = np.where(p_bad_df["id"] == 2, 11, p_bad_df["person_type"])
p_bad_list = p_bad_df.to_dict(orient = "records")
try: 
    bad_person_validator = PersonValidator(list_of_persons = p_bad_list)
except ValidationError as e:
    print(e)

1 validation error
list_of_persons -> 1 -> person_type
  value is not a valid enumeration member (type=type_error.enum)


### Make ActivitySim Input

In [9]:
input_person_df = pd.concat(
    [
        pd.DataFrame([dict(Person) for Person in person_validator.list_of_persons]).reset_index(drop=True),
        pd.DataFrame([Person.gender for Person in person_validator.list_of_persons], columns = ["gender"])
    ], 
    axis="columns"
)
input_person_df.head()

Unnamed: 0,id,age,sex,person_type,occupation,gender
0,1,75,2,7,6,2
1,2,61,1,8,6,1
2,3,55,2,6,2,2
3,4,20,1,8,6,1
4,5,45,1,5,2,1


## Households
### Validation

In [10]:
household_df.head()

Unnamed: 0,household_id,areaenum_idx,sa2,hhsize,n_workers,hh_income_quartile,household_income,Householder_Sex,min_HH_age,num_adults,Family_HH,hht,tz
0,1,1,1,1,0.0,2,59800,2.0,75.0,1.0,0,6,1679
1,2,1,1,2,1.0,1,20800,2.0,55.0,2.0,0,7,1679
2,3,1,1,5,2.0,3,104000,1.0,8.0,3.0,1,1,1679
3,4,1,1,5,2.0,3,104000,2.0,0.0,2.0,1,1,1679
4,5,1,1,1,1.0,2,59800,2.0,51.0,1.0,0,6,1679


In [11]:
h_df = household_df.rename(columns={"household_id": "id", "household_income": "income_in_aud2019", "tz": "home_location"}).copy()
h_df = h_df.head(1000).copy()
h_df.head()

Unnamed: 0,id,areaenum_idx,sa2,hhsize,n_workers,hh_income_quartile,income_in_aud2019,Householder_Sex,min_HH_age,num_adults,Family_HH,hht,home_location
0,1,1,1,1,0.0,2,59800,2.0,75.0,1.0,0,6,1679
1,2,1,1,2,1.0,1,20800,2.0,55.0,2.0,0,7,1679
2,3,1,1,5,2.0,3,104000,1.0,8.0,3.0,1,1,1679
3,4,1,1,5,2.0,3,104000,2.0,0.0,2.0,1,1,1679
4,5,1,1,1,1.0,2,59800,2.0,51.0,1.0,0,6,1679


In [12]:
h_list = h_df.to_dict(orient = "records")

In [13]:
for household in h_list:
    person_list = []
    for person in p_list:
        if household["id"] == person["household_id"]:
            person_list.append(person)
    household["persons"] = person_list


In [14]:
try:
    household_validator = HouseholdValidator(list_of_households = h_list)
except ValidationError as e:
    print(e)

### Make ActivitySim Input

In [15]:
input_household_df = pd.concat(
    [
        pd.DataFrame([dict(Household) for Household in household_validator.list_of_households]).reset_index(drop=True), 
        pd.DataFrame([Household.income_quartile for Household in household_validator.list_of_households], columns = ["income_quartile"]),
        pd.DataFrame([Household.household_size for Household in household_validator.list_of_households], columns = ["household_size"]),
    ], 
    axis="columns"
)
input_household_df = input_household_df.drop(columns=["persons"]).copy()
input_household_df.head()

Unnamed: 0,id,income_in_aud2019,home_location,income_quartile,household_size
0,1,59800.0,1679,1,1
1,2,20800.0,1679,1,2
2,3,104000.0,1679,1,5
3,4,104000.0,1679,1,5
4,5,59800.0,1679,1,1


## Zonal Data
### Validation

In [16]:
taz_df = pd.read_csv(taz_file)
taz_df.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'Year', 'FA', 'Pop_in_private_dwellings',
       'Private_dwellings', 'emp_agriculture', 'emp_mining', 'emp_utilities',
       'emp_construction', 'emp_manufacturing', 'emp_wholesale', 'emp_retail',
       'emp_transport', 'emp_communication', 'emp_finance', 'emp_rental',
       'emp_professionals', 'N55', 'emp_admin', 'emp_education', 'emp_health',
       'emp_social', 'emp_accommodation', 'emp_other', 'emp_pubadmin',
       'retail', 'service', 'emp', 'univ', 'EnrollDS', 'EnrollPD', 'PARKTOT',
       'PARKLNG', 'PROPFREE', 'PARKRATE', 'PARKING_ZONE', 'areatype',
       'RetailEmp30', 'I_PCTLT10K', 'I_PCT10TO20', 'I_PCT20TO40', 'I_PCTGT40',
       'TZ'],
      dtype='object')

In [17]:
t_df = taz_df.rename(
    columns = {
        "TZ": "id", 
        "Private_dwellings": "private_dwellings",
        "Pop_in_private_dwellings": "population_in_private_dwellings",
        "emp_agriculture": "employment_agriculture",
        "emp_mining": "employment_mining",
        "emp_utilities": "employment_utilities",
        "emp_construction": "employment_construction",
        "emp_manufacturing": "employment_manufacturing",
        "emp_wholesale": "employment_wholesale",
        "emp_retail": "employment_retail",
        "emp_transport": "employment_transport",
        "emp_communication": "employment_communication",
        "emp_finance": "employment_finance",
        "emp_rental": "employment_rental",
        "emp_professionals": "employment_professional",
        "emp_admin": "employment_administrative",
        "emp_education": "employment_education",
        "emp_health": "employment_health",
        "emp_social": "employment_social",
        "emp_accommodation": "employment_accommodation",
        "emp_pubadmin": "employment_public_administration",
        "emp_other": "employment_other",
        "areatype": "area_type",
        "PARKRATE": "parking_cost_per_hour_aud2019",
        "univ": "enrollment_tertiary",
        "EnrollDS": "enrollment_secondary",
        "EnrollPD": "enrollment_primary",
    }
).copy()
t_df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Year,FA,population_in_private_dwellings,private_dwellings,employment_agriculture,employment_mining,employment_utilities,employment_construction,...,PROPFREE,parking_cost_per_hour_aud2019,PARKING_ZONE,area_type,RetailEmp30,I_PCTLT10K,I_PCT10TO20,I_PCT20TO40,I_PCTGT40,id
0,0,0,2016,38,0,0,0.0,0.0,0.0,0.0,...,0,0,0,4,0,0,0,0,0,1
1,1,1,2016,38,0,0,0.0,0.0,0.0,0.0,...,0,0,0,4,0,0,0,0,0,2
2,2,2,2016,38,234,88,0.0,0.0,0.0,0.0,...,0,0,0,4,0,0,0,0,0,3
3,3,3,2016,38,0,0,0.0,0.0,0.0,0.0,...,0,0,0,4,0,0,0,0,0,4
4,4,4,2016,38,0,0,0.0,0.0,0.0,0.0,...,0,0,0,4,0,0,0,0,0,5


In [18]:
t_list = t_df.to_dict(orient = "records")

In [19]:
try:
    taz_validator = TazValidator(list_of_zones = t_list)
except ValidationError as e:
    print(e)

5100 validation errors
list_of_zones -> 0 -> households
  field required (type=value_error.missing)
list_of_zones -> 0 -> household_population
  field required (type=value_error.missing)
list_of_zones -> 0 -> parking_cost_per_hour_usd2019
  field required (type=value_error.missing)
list_of_zones -> 1 -> households
  field required (type=value_error.missing)
list_of_zones -> 1 -> household_population
  field required (type=value_error.missing)
list_of_zones -> 1 -> parking_cost_per_hour_usd2019
  field required (type=value_error.missing)
list_of_zones -> 2 -> households
  field required (type=value_error.missing)
list_of_zones -> 2 -> household_population
  field required (type=value_error.missing)
list_of_zones -> 2 -> parking_cost_per_hour_usd2019
  field required (type=value_error.missing)
list_of_zones -> 3 -> households
  field required (type=value_error.missing)
list_of_zones -> 3 -> household_population
  field required (type=value_error.missing)
list_of_zones -> 3 -> parking_cos