In [3]:
from utils import *
import pandas as pd
import numpy as np
import arcpy
from arcgis.features import FeatureLayer

In [4]:

service_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/27'

feature_layer = FeatureLayer(service_url)
tahoe_geometry_fields = ['YEAR', 'STATE', 'GEOGRAPHY', 'GEOID', 'TRPAID', 'NEIGHBORHOOD']
query_result = feature_layer.query(out_fields=",".join(tahoe_geometry_fields))
# Convert the query result to a list of dictionaries
feature_list = query_result.features

# Create a pandas DataFrame from the list of dictionaries
tahoe_geometry = pd.DataFrame([feature.attributes for feature in feature_list])

county_lookup = {
    '005': 'Douglas County (Tahoe Basin)',
    '017': 'El Dorado County (Tahoe Basin)',
    '031': 'Washoe County (Tahoe Basin)',
    '061': 'Placer County (Tahoe Basin)'
}

state_lookup = {
    '32': 'Nevada',
    '06': 'California'
}

inflation_adjustment = {
    '2000': 1.57,
    '2010': 1.24
}

In [5]:
def categorize_values(census_df, categories_df, category_column, grouping_prefix_column, variable_category_column):
    """
    Categorizes and groups the values in the census DataFrame according to the specified categories DataFrame.

    Parameters:
    census_df (DataFrame): DataFrame containing census data.
    categories_df (DataFrame): DataFrame containing the mapping of variable codes to categories.
    category_column (str): Name of the column in categories_df that contains the category labels.
    grouping_prefix (str): Prefix to add to grouped variable codes and dataset/category names.

    Returns:
    DataFrame: Grouped and categorized DataFrame.
    """
    census_df_copy = census_df.copy()
    census_df_copy['value'] = census_df_copy['value'].astype(float)
    census_df_copy.drop(columns=['variable_category'], inplace=True)
    
    joined_data = census_df_copy.merge(categories_df, on='variable_code', how='left')
    joined_data.sort_values(by='variable_code', inplace=True)
    
    # Identify grouping columns excluding certain fixed columns
    group_columns = [column for column in census_df_copy if column not in ['value', 'variable_code', 'variable_name', 'MarginOfError','variable_category', 'OBJECTID']]
    group_columns.append(category_column)
    group_columns.append(variable_category_column)
    group_columns.append(grouping_prefix_column)
    
    grouped_data = joined_data.groupby(group_columns, as_index=False, dropna=False).agg({
        'value': 'sum',
        'variable_code': lambda x: ', '.join(x)
    })
    grouped_data['variable_code'] = grouped_data[grouping_prefix_column] +": " + grouped_data['variable_code']
    # Insert missing columns to maintain structure
    var_code_col_location = census_df.columns.get_loc('variable_code')
    var_name_col_location = census_df.columns.get_loc('variable_name')
    var_moe_col_location = census_df.columns.get_loc('MarginOfError')
    
    grouped_data.insert(var_moe_col_location, 'MarginOfError', '')
    grouped_data.insert(var_name_col_location, 'variable_name', '')
    
    grouped_data['variable_name'] = grouped_data[category_column]
    grouped_data['dataset'] = grouped_data[grouping_prefix_column] + ": " + grouped_data['dataset']
    columns_to_keep = [column for column in census_df if column not in ['OBJECTID']]
    grouped_data = grouped_data[columns_to_keep]
    
    return grouped_data
#Helper function that is used to concatenate census data return
def create_or_append_df(df, summary_df):
    if df.empty:
        df = summary_df.copy()
    else:
        df = pd.concat([df, summary_df])
    return df

def sum_multiple_variables(df, variable_list):
    df_values=pd.DataFrame()
    for variable,variable_category in variable_list:
        print(variable)
        summed_df = sum_across_levels(df,variable, variable_category)
        
        df_values = create_or_append_df(df_values, summed_df)
    return df_values

def sum_across_levels(df, variable_code, category_name):
    """
    Sums values across multiple geographic and sample levels for a specific variable code.

    Parameters:
    df (DataFrame): DataFrame containing the data to be aggregated.
    variable_code (str): The variable code to filter and aggregate.
    category_name (str): The category name to be associated with the aggregated data.

    Returns:
    DataFrame: Combined summary DataFrame with summed values for each geographic level.
    """
    filtered_df = df.loc[df['variable_code'] == variable_code]
    
    
    basin_summary = filtered_df.groupby([
        'dataset', 'sample_level', 'variable_name', 'variable_code', 
        'variable_category', 'year_sample'
    ], as_index=False).sum(['value'])
    
    county_summary = filtered_df.groupby([
        'dataset', 'sample_level', 'variable_name', 'variable_code', 
        'variable_category', 'year_sample', 'county_name'
    ], as_index=False).sum(['value'])
    
    north_south_summary = filtered_df.groupby([
        'dataset', 'sample_level', 'variable_name', 'variable_code', 
        'variable_category', 'year_sample', 'north_south'
    ], as_index=False).sum(['value'])
    
    state_summary = filtered_df.groupby([
        'dataset', 'sample_level', 'variable_name', 'variable_code', 
        'variable_category', 'year_sample', 'state_name'
    ], as_index=False).sum(['value'])
    
    basin_summary['Geography'] = 'Basin'
    county_summary['Geography'] = county_summary['county_name']
    north_south_summary['Geography'] = north_south_summary['north_south']
    state_summary['Geography'] = state_summary['state_name']
    
    columns_to_keep = ['variable_code', 'variable_name', 'variable_category', 'value', 'Geography', 'year_sample', 'dataset', 'sample_level']
    
    basin_summary = basin_summary[columns_to_keep]
    county_summary = county_summary[columns_to_keep]
    north_south_summary = north_south_summary[columns_to_keep]
    state_summary = state_summary[columns_to_keep]
    
    combined_summary = pd.concat([basin_summary, county_summary, north_south_summary, state_summary], ignore_index=True)
    
    combined_summary['Category'] = combined_summary['variable_category']
    combined_summary.drop(columns=['variable_category'], inplace=True)
    
    return combined_summary


In [6]:
census_data = get_fs_data('https://maps.trpa.org/server/rest/services/Demographics/MapServer/28')