# Travel Demand Model Data Inputs
> 

## Setup 

In [2]:
import pandas as pd
import pathlib
import os
from utils import *

In [3]:
# current working directory
local_path = pathlib.Path().absolute()
# set workspace
arcpy.env.workspace = os.path.join(local_path, 'Workspace.gdb')
# overwrite true
arcpy.env.overwriteOutput = True

#### Notes
* old inputs: F:\Research and Analysis\misc\Reid_Haefer\Model\model_update_2018\data_inputs\lodging_occupancy
* Final inputs to produce: F:\Transportation\model\scenario_base\zonal

### Base Data

In [4]:
# Get TAZ data
taz_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/6'
sdf_taz = get_fs_data_spatial(taz_url)
#set spatial reference
sdf_taz.spatial.sr = 4326

In [5]:
block_groups_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/27'
sdf_block_groups = get_fs_data_spatial(block_groups_url)
sdf_block_groups = sdf_block_groups[(sdf_block_groups['YEAR'] == 2020) & (sdf_block_groups['GEOGRAPHY'] == 'Block Group')]
sdf_block_groups.spatial.sr = 4326

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._data[self.name] = self._data[self.name].geom.project_as(ref)


In [6]:
units_url = 'https://maps.trpa.org/server/rest/services/Existing_Development/MapServer/2'
sdf_units = get_fs_data_spatial_query(units_url, "Year = 2022")
sdf_units.spatial.sr = 4326

In [36]:
def make_taz_crosswalk(parcel_fc, taz_fc, geography_fc):
        # Define in-memory feature class names
    geo_feature_class = r"in_memory\geo"
    taz_feature_class = r"in_memory\taz_geo"

    # Perform first spatial join - order doesn't matter
    arcpy.analysis.SpatialJoin(
        target_features=parcel_fc,
        join_features=taz_fc,
        out_feature_class=taz_feature_class,
        join_operation="JOIN_ONE_TO_MANY",
        join_type="KEEP_ALL",
        match_option="HAVE_THEIR_CENTER_IN"
    )

    # Perform second spatial join
    arcpy.analysis.SpatialJoin(
        target_features=taz_feature_class,
        join_features=geography_fc,
        out_feature_class=geo_feature_class,
        join_operation="JOIN_ONE_TO_MANY",
        join_type="KEEP_ALL",
        match_option="HAVE_THEIR_CENTER_IN"
    )

    # Convert the final joined feature class to a Spatially enabled DataFrame
    sdf_taz_geo = pd.DataFrame.spatial.from_featureclass(geo_feature_class)

    # Select and rename necessary columns
    sdf_taz_geo = sdf_taz_geo[['APN', 'GEOID', 'TRPAID', 'TAZ_1', 'Residential_Units',
                            'TouristAccommodation_Units', 'CommercialFloorArea_SqFt']]
    sdf_taz_geo = sdf_taz_geo.rename(columns={'TAZ_1': 'TAZ'})

    # Group by and aggregate data
    df_parcels_grouped = sdf_taz_geo.groupby(['TAZ', 'TRPAID']).agg({'Residential_Units': 'sum',
                                                                    'TouristAccommodation_Units': 'sum',
                                                                    'CommercialFloorArea_SqFt': 'sum'}).reset_index()

    # Calculate totals and proportions
    df_parcels_grouped['Total_Res_Units'] = df_parcels_grouped.groupby('TAZ')['Residential_Units'].transform('sum')
    df_parcels_grouped['Total_TA_Units'] = df_parcels_grouped.groupby('TAZ')['TouristAccommodation_Units'].transform('sum')
    df_parcels_grouped['Total_CommercialFloorArea_SqFt'] = df_parcels_grouped.groupby('TAZ')['CommercialFloorArea_SqFt'].transform('sum')
    
    # Calculate proportions with checks for zero totals
    df_parcels_grouped['Residential_Units_Proportion'] = df_parcels_grouped.apply(
        lambda row: row['Residential_Units'] / row['Total_Res_Units'] if row['Total_Res_Units'] != 0 else 0, axis=1
    )
    df_parcels_grouped['TouristAccommodation_Units_Proportion'] = df_parcels_grouped.apply(
        lambda row: row['TouristAccommodation_Units'] / row['Total_TA_Units'] if row['Total_TA_Units'] != 0 else 0, axis=1
    )
    df_parcels_grouped['CommercialFloorArea_SqFt_Proportion'] = df_parcels_grouped.apply(
        lambda row: row['CommercialFloorArea_SqFt'] / row['Total_CommercialFloorArea_SqFt'] if row['Total_CommercialFloorArea_SqFt'] != 0 else 0, axis=1
    )

 
    # Fill NaN values with 0
    df_parcels_grouped.fillna(0, inplace=True)
    return df_parcels_grouped
    
    

In [37]:
taz_block_group_crosswalk = make_taz_crosswalk(sdf_units, sdf_taz, sdf_block_groups)
taz_block_group_crosswalk.to_csv('taz_block_group_crosswalk.csv', index=False)

## Socio Econ
> TAZ by total_residential_units,census_occ_rate,total_occ_units,occ_units_low_inc,occ_units_med_inc,occ_units_high_inc,persons_per_occ_unit,total_persons,emp_retail,emp_srvc,emp_rec,emp_game,emp_other

In [40]:
census_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/28'
df_census = get_fs_data(census_url)
df_census_2022 = df_census[(df_census['year_sample'] == 2022) & (df_census['sample_level'] == 'block group')]

In [17]:
occupancy_codes = ['B25002_003E','B25002_002E']
df_census_occupancy = df_census_2022[df_census_2022['variable_code'].isin(occupancy_codes)]
df_census_occupancy = df_census_occupancy[['TRPAID', 'variable_code', 'value']]
df_census_occupancy = df_census_occupancy.pivot(index='TRPAID', columns='variable_code', values='value').reset_index()
df_census_occupancy['total_units'] = df_census_occupancy['B25002_003E'] + df_census_occupancy['B25002_002E']
df_census_occupancy['occupancy_rate'] = df_census_occupancy['B25002_002E'] / df_census_occupancy['total_units']

In [18]:
df_census_household_size = df_census_2022[df_census_2022['variable_code'] == 'B25010_001E']
df_census_household_size = df_census_household_size[['TRPAID', 'variable_code', 'value']]
df_census_household_size = df_census_household_size.pivot(index='TRPAID', columns='variable_code', values='value').reset_index()
df_census_household_size['household_size'] = df_census_household_size['B25010_001E']

# Categorize the income variables from the census

In [19]:
code_lookup = pd.read_csv('Lookup_Lists/occupancy_census_codes.csv')
#Filter census so only variable codes in the code lookup are included
df_census_income = df_census_2022[df_census_2022['variable_code'].isin(code_lookup['variable_code'])]
#Create a new column that has a value from code lookup based on the variable code
df_census_income['income_category'] = df_census_income['variable_code'].map(code_lookup.set_index('variable_code')['category'])
#group by block group and income category and sum the values
df_census_income = df_census_income.groupby(['TRPAID','income_category'])['value'].sum().reset_index()
df_census_income = df_census_income.pivot(index='TRPAID', columns='income_category', values='value').reset_index()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_census_income['income_category'] = df_census_income['variable_code'].map(code_lookup.set_index('variable_code')['category'])


In [20]:
df_census_household_size['TRPAID']= df_census_household_size['TRPAID'].astype(str).str.zfill(16)
df_census_income['TRPAID']= df_census_income['TRPAID'].astype(str).str.zfill(16)
df_census_occupancy_all = pd.merge(df_census_occupancy, df_census_household_size, on='TRPAID', how='left')
df_census_all = pd.merge(df_census_occupancy_all, df_census_income, on='TRPAID', how='left')
# rename columns of df_census_all
column_rename = {
    'B25002_003E': 'vacant_units',
    'B25002_002E': 'occupied_units',
    'High Income': 'high_income',
    'Low Income': 'low_income',
    'Medium Income': 'middle_income',
}
df_census_all.rename(columns=column_rename, inplace=True)
df_census_all.drop(columns=['B25010_001E'], inplace=True)
df_census_all['high_income_proportion'] = df_census_all['high_income'] / df_census_all['occupied_units']
df_census_all['middle_income_proportion'] = df_census_all['middle_income'] / df_census_all['occupied_units']
df_census_all['low_income_proportion'] = df_census_all['low_income'] / df_census_all['occupied_units']

In [41]:
# use the proportions in sdf_taz_bg to assign proportion of block group values to taz
taz_values_acs = taz_block_group_crosswalk.merge(df_census_all, left_on='TRPAID', right_on='TRPAID', how='left')
taz_values_acs['adjusted_occupancy'] = taz_values_acs['occupancy_rate'] * taz_values_acs['Residential_Units_Proportion']
taz_values_acs['adjusted_high_income'] = taz_values_acs['high_income_proportion'] * taz_values_acs['Residential_Units_Proportion']
taz_values_acs['adjusted_middle_income'] = taz_values_acs['middle_income_proportion'] * taz_values_acs['Residential_Units_Proportion']
taz_values_acs['adjusted_low_income'] = taz_values_acs['low_income_proportion'] * taz_values_acs['Residential_Units_Proportion']
#Group by TAZ and sum the values
taz_values_grouped_occupancy = taz_values_acs.groupby('TAZ').agg({'adjusted_occupancy': 'sum',
                                                                  'adjusted_high_income':'sum',
                                                                  'adjusted_middle_income': 'sum',
                                                                  'adjusted_low_income': 'sum'}).reset_index()
#This is just the input values for the TAZs
taz_values_grouped_occupancy.to_csv('taz_calibration_values.csv', index=False)
#This provides the raw data for troubleshooting
taz_values_acs.to_csv('taz_values_acs.csv', index=False)

In [47]:
# Multiply the proportions by the total number of units in the TAZ
taz_residential_units = taz_block_group_crosswalk.groupby('TAZ')['Residential_Units'].sum().reset_index()
taz_residential_units = taz_residential_units.merge(taz_values_grouped_occupancy, on='TAZ', how='left')
taz_residential_units['occupancy'] = taz_residential_units['adjusted_occupancy'] * taz_residential_units['Residential_Units']
# Adjust income proportions by the total number of residential units and adjusted occupancy in each TAZ
taz_residential_units['high_income'] = (taz_residential_units['adjusted_high_income'] * 
                                                    taz_residential_units['Residential_Units'] * 
                                                    taz_residential_units['adjusted_occupancy'])

taz_residential_units['middle_income'] = (taz_residential_units['adjusted_middle_income'] * 
                                                    taz_residential_units['Residential_Units'] * 
                                                    taz_residential_units['adjusted_occupancy'])

taz_residential_units['low_income'] = (taz_residential_units['adjusted_low_income'] * 
                                                taz_residential_units['Residential_Units'] * 
                                                taz_residential_units['adjusted_occupancy'])
taz_residential_units.to_csv('taz_calibration_values_final.csv', index=False)


## Make a feature layer for the TDM variables. Not sure we need or want this

In [None]:
tahoe_geography_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/27'
# Create a FeatureLayer object directly from the REST map service URL
feature_layer = FeatureLayer(tahoe_geography_url)

# Create a spatial DataFrame from the FeatureLayer
sdf = GeoAccessor.from_layer(feature_layer)

merged_df = pd.merge(sdf, df_census_all, on='TRPAID', how='inner')
columns_drop=['GlobalID', 'YEAR', 'created_date',  'created_user', 'last_edited_date', 'last_edited_user', 'Shape.STArea()', 'Shape.STLength()']
merged_df = merged_df.drop(columns=columns_drop)
workspace = r"F:\GIS\PROJECTS\ResearchAnalysis\Demographics\Workspace.gdb"
## Export spatial dataframes to feature class to use in Spatial join
merged_df.spatial.to_featureclass(os.path.join(workspace, "Tahoe_BlockGroup_TDM_Values"), sanitize_columns=False)

### Employment

> Purpose: Assign employees to TAZs
* Data Axle data and CBP data sources

In [None]:
employment_url = ''


### Income by Residential Unit

> Purpose: Calculate #/% of residential units in low/mid/high income in each TAZ
* Get # of HH in each income bin by census block group from ACS
* Combine to low/mid/high income, with breaks at 60k and 100k
* Assign to TAZ


In [None]:
# place holder URL - Demographics URL will have a new Feature Service titled "Block Group 2022 - Travel Demand Model Inputs"
census_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/1'
sdf_census = get_fs_data_spatial(census_url)


## Overnight Visitation
>TAZ by hotelmotel,resort,casino,campground,percentHouseSeasonal,beach

### Tourist Accommodation Units

### Campgrounds

> Purpose: calculate campground occupancy on model day and assign to TAZs
* in-process data: "F:\Research and Analysis\Transportation\Travel_Demand_Model\2023 Update\Input Data\Campgrounds\Campground_Visitation.xlsx"
* USFS data: https://apps.fs.usda.gov/arcx/rest/services/EDW/EDW_RecreationAreaActivities_01/MapServer/0

In [None]:
# Get the data
campgroundu_url = ''
# join the data to parcels? or create points? 


## Occupancy Rates
> TAZ by hotelmotel,resort,casino,campground,house,seasonal

### Lodging Occupancy


> Purpose: calculate occupancy rates for hotels and VHRs and assign to TAZs

* Placer has Occupancy data at the TOT tax district. Josh will get the spatial file from Placer County GIS. 
* City of South Lake has hotel level occupancy data that needs to be converted to a spatial file. Reports live here: https://www.cityofslt.us/805/Zone-Detail-Reports 
* Douglas County has occupancy rates from the Casino reports: https://gaming.nv.gov/about/abstract/report/
* Washoe County has occupancy rates at the District level from RSCVA (single number by type of occupancy) "F:\Research and Analysis\Visitation\occupancy\Washoe B Occupied Rooms by Market Segment - 2022.xlsx"
* Rest of El Dorado County? is only VHRs, estimating Occupancy Rates based on TOT rates F:\Research and Analysis\Visitation\occupancy

In [None]:
# Get VHR data - this is current, no way to get historical data outside of the City of South Lake Tahoe
vhr_url = 'https://maps.trpa.org/server/rest/services/VHR/MapServer/0'
sdf_vhr = get_fs_data_spatial(vhr_url)

# Get Tourist Accomodation Units data
tau_url = 'https://maps.trpa.org/server/rest/services/Existing_Development/MapServer/2'
sdf_tau = get_fs_data_spatial_query(tau_url, "Year = 2022")

# get occupancy rate data - see above for sources


### Housing Occupancy


> Purpose: estimate household size in each occupied housing unit
* get total Residential Units aggregated to TAZ
* get total VHRs aggregated to TAZ
* apply occupancy rate from ACS
* ACS 2022 by Block Group - household size

In [50]:
# Get Houshold Block Group data

df_census_household_size = df_census_2022[df_census_2022['variable_code'] == 'B25010_001E']
df_census_household_size = df_census_household_size[['TRPAID', 'variable_code', 'value']]
df_census_household_size_taz = taz_block_group_crosswalk.merge(df_census_household_size, on='TRPAID', how='left')
df_census_household_size_taz['household_size_proportion'] = df_census_household_size_taz['value']*df_census_household_size_taz['Residential_Units_Proportion']
taz_household_size = df_census_household_size_taz.groupby('TAZ')['household_size_proportion'].sum().reset_index()
taz_household_size.to_csv('taz_household_size.csv', index=False)

## School Enrollment
> TAZ by elementary_school_enrollment,middle_school_enrollment,high_school_enrollment,college_enrollment

> Purpose: Collate school enrollment files
* create spatial file 

In [None]:
# Get School Enrollment data
school_url_table     = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/32'
df_school_enrollment = get_fs_data(school_url_table)

# Get School Enrollment data - spatial
school_url_spatial = 'https://maps.trpa.org/server/rest/services/Datadownloader_PlanningandJurisdictions/MapServer/14'
sdf_school         = get_fs_data_spatial(school_url_spatial)

# Get TAZ data
taz_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/6'
sdf_taz = get_fs_data_spatial(taz_url)


In [None]:
# set Type to Null
sdf_school['TYPE'] = None
# set SchoolType to 'elementary' if it contains 'elementary' or 'magnet' or 'academy'
sdf_school.loc[sdf_school['NAME'].str.contains('elementary', case=False), 'TYPE'] = 'Elementary School'
# set SchoolType to 'middle' if it contains 'middle'
sdf_school.loc[sdf_school['NAME'].str.contains('middle', case=False), 'TYPE'] = 'Middle School'
# set SchoolType to 'high' if it contains 'high'
sdf_school.loc[sdf_school['NAME'].str.contains('high', case=False), 'TYPE'] = 'High School'
# set SchoolType to 'college' if it contains 'college'
sdf_school.loc[sdf_school['NAME'].str.contains('college', case=False), 'TYPE'] = 'College'
# set SchoolType to 'other' if it it does not contain any of the above
sdf_school.loc[sdf_school['TYPE'].isnull(), 'TYPE'] = 'Elementary School'

In [None]:
# spatial join TAZs to School points
sdf_school_taz = sdf_school.spatial.join(sdf_taz, how='inner')
# group by TYPE and sum of Enrollment within TAZ 
sdf_school_taz_grouped = sdf_school_taz.groupby(['TYPE', 'TAZ']).agg(
                                                {'ENROLLMENT': 'sum'}).reset_index()
# unstack by TYPE as columns and TAZ as a column
sdf_school_taz_grouped_pivot = sdf_school_taz_grouped.pivot(index='TAZ', 
                                                            columns='TYPE', 
                                                            values='ENROLLMENT').reset_index()
# merge to sdf_taz to get all tazs
sdf_taz_school = pd.merge(sdf_taz, sdf_school_taz_grouped_pivot, how='left', on='TAZ')

# drop SHAPE column
sdf_taz_school = sdf_taz_school.drop(columns='SHAPE')
# fill NA with 0 for all rows
sdf_taz_school = sdf_taz_school.fillna(0)
# cast all fields to int
sdf_taz_school = sdf_taz_school.astype(int)
# rename columns
sdf_taz_school.rename(columns={'Elementary School':'elementary_school_enrollment',
                               'Middle School':'middle_school_enrollment',
                               'High School':'high_school_enrollment',
                               'College':'college_enrollment'}, inplace=True)

# export to csv
sdf_taz_school.to_csv(os.path.join('SchoolEnrollment.csv'), index=False)


In [None]:
sdf_taz_school

#### other way 

In [None]:
df_school_enrollment_22 = df_school_enrollment[df_school_enrollment['Year'] == '2022-2023']
# Add a row for LTCC - Lake Tahoe Community College
ltcc = {'School_Name': 'Lake Tahoe Community College', 'Level_': 'College', 'Enrollment': 2909}
df_school_enrollment_22 = pd.concat([df_school_enrollment_22, pd.DataFrame([ltcc])], ignore_index=True)
# join school spatial to school table
sdf_school_enroll = pd.merge(sdf_school, df_school_enrollment, left_on='SchoolID', how='left')

In [None]:
# Get TAZ data
taz_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/6'
sdf_taz = get_fs_data_spatial(taz_url)

# Get School Enrollment data - spatial
school_url_spatial = 'https://maps.trpa.org/server/rest/services/Datadownloader_PlanningandJurisdictions/MapServer/14'
sdf_school         = get_fs_data_spatial(school_url_spatial)


# keep only the columns we need; SHAPE and TAZ for TAZ and SHAPE and ENROLLMENT for schools
sdf_taz = sdf_taz[['TAZ', 'SHAPE']]
sdf_school = sdf_school[['ENROLLMENT', 'SHAPE']]

# convert to feature class
sdf_taz.spatial.to_featureclass(location=os.path.join(arcpy.env.workspace, 'TAZ'), overwrite=True, sanitize_columns=False)
sdf_school.spatial.to_featureclass(location=os.path.join(arcpy.env.workspace, 'Schools'), overwrite=True, sanitize_columns=False)

In [None]:
field_mappings = arcpy.FieldMappings()
# add tables to field mappings
field_mappings.addTable('TAZ')
field_mappings.addTable('Schools')


# print names of fields in field mappings
for field in field_mappings.fields:
    print(field.name)

In [None]:
# create field mappings
field_mappings = arcpy.FieldMappings()

# add tables to field mappings
field_mappings.addTable('TAZ')
field_mappings.addTable('Schools')

# for the output.
enrollment = field_mappings.findFieldMapIndex("ENROLLMENT")
fieldmap = field_mappings.getFieldMap(enrollment)
 
# Get the output field's properties as a field object
field = fieldmap.outputField
 
# Rename the field and pass the updated field object back into the field map
field.name = "Total_Enrollment"
field.aliasName = "Total Enrollment"
fieldmap.outputField = field
 
# Set the merge rule to mean and then replace the old fieldmap in the mappings object
# with the updated one
fieldmap.mergeRule = "sum"
field_mappings.replaceFieldMap(enrollment, fieldmap)

# spatial join that sums enrollment for each TAZ
arcpy.analysis.SpatialJoin(
    target_features   =sdf_taz,
    join_features     =sdf_school,
    out_feature_class ="TAZ_School_Enrollment",
    join_operation    ="JOIN_ONE_TO_ONE",
    join_type         ="KEEP_ALL",
    field_mapping     =field_mappings,
    match_option      ="INTERSECT",
)

In [None]:
# get the data 
sdf_taz_school = pd.DataFrame.spatial.from_featureclass("TAZ_School_Enrollment")



### Forecasts
> Forecast 2040 and 2050 development and population change

In [None]:
# 