In [12]:
import pandas as pd
import os
import arcpy
from utils import *

# Get Data

In [13]:
# get parcels from the database
# network path to connection files
filePath = "F:/GIS/PARCELUPDATE/Workspace/"
# database file path 
sdeBase    = os.path.join(filePath, "Vector.sde")
sdeCollect = os.path.join(filePath, "Collection.sde")
sdeTabular = os.path.join(filePath, "Tabular.sde")

arcpy.env.workspace = 'memory'
# # clear memory workspace
# arcpy.management.Delete('memory')

# overwrite true
arcpy.env.overwriteOutput = True
# Set spatial reference to NAD 1983 UTM Zone 10N
sr = arcpy.SpatialReference(26910)

We need to get data for parcels, join census assign to geographies

In [14]:
engine = get_conn('sde')
with engine.begin() as conn:
    df_census = pd.read_sql("SELECT * FROM SDE.Census_Demographics", conn)

In [15]:
variables_demographics = pd.read_csv('Lookup_Lists/demographic_variables_housing.csv')

In [16]:
# filter df_census to only include variables in the variables_demographics list joined on sample_year, sample_level and variable_name
df_census_demographics = df_census.merge(variables_demographics, how='inner', left_on=['year_sample', 'sample_level', 'variable_name'], right_on=['year', 'geography', 'Variable Name'])

In [17]:
# group df_census_demographics by sample_year, tract, and variable_name and sum the values
df_census_demographics_grouped = df_census_demographics.groupby(['year_sample', 'tract','state', 

                                                                 'county', 'variable_name', 'variable_code', 'census_geom_year_x'])['value'].sum().reset_index()

In [18]:
df_census_demographics_grouped['year_sample'] = df_census_demographics_grouped['year_sample'].astype(int).astype(str)
df_census_demographics_grouped['census_geom_year_x'] = df_census_demographics_grouped['census_geom_year_x'].astype(int).astype(str)
df_census_demographics_grouped['trpa_id'] = df_census_demographics_grouped['state'] + df_census_demographics_grouped['county'] + df_census_demographics_grouped['tract']+df_census_demographics_grouped['census_geom_year_x']
#Rename census_geom_year_x to census_geom_year
df_census_demographics_grouped.rename(columns={'census_geom_year_x': 'census_geom_year'}, inplace=True)
df_census_demographics_grouped.to_csv('Summarized_Data/Demographics_Data.csv', index=False)

In [None]:
# Do some data cleanup on this and fill in blanks. What is happening with group quarters???
# Gotta add age categories by tract

In [None]:
# parcel development layer polygons
parcel_db = sdeCollect + "\\SDE.Parcel\\SDE.Parcel_History_Attributed"
# query 2022 rows
sdf_units = pd.DataFrame.spatial.from_featureclass(parcel_db)
sdf_units = sdf_units.loc[sdf_units['YEAR'] == 2022]
sdf_units.spatial.sr = sr

In [None]:
census_variable_list = pd.read_csv('Lookup_Lists/census_variables.csv')
df_census_2022_include = df_census_2022.loc[df_census_2022['variable_code'].isin(census_variable_list['variable_code'])]
block_group_pivot = df_census_2022_include.pivot(index='block_group', columns='variable_code', values='value')


In [None]:
units_attributed = pd.merge(sdf_units, block_group_pivot, left_on='TRPAID', right_on='TRPAID', how='left')