# Setup and define variables

In [None]:
import pandas as pd
import numpy as np
import requests
import pyodbc
import arcpy
from arcgis.features import FeatureLayer
# This is using Andy's Census API KEy
census_api_key = '9a73d08c296b844e58f1c70bd19c831826da5cbf'

# Need to define datatypes so that FIPS code doesn't get cast as int and drop leading 0s
dtypes = {
    'YEAR' : str,
    'STATE': str,
    'GEOGRAPHY': str,
    'GEOID': str,
    'TRPAID':str,
    'NEIGHBORHOOD': str
}

#Manually defined list of census tracts that are within the basin
 
service_url = 'https://maps.trpa.org/server/rest/services/Demographics/FeatureServer/27'

feature_layer = FeatureLayer(service_url)
tahoe_geometry_fields = ['YEAR', 'STATE', 'GEOGRAPHY', 'GEOID', 'TRPAID', 'NEIGHBORHOOD']
query_result = feature_layer.query(out_fields=",".join(tahoe_geometry_fields))
# Convert the query result to a list of dictionaries
feature_list = query_result.features

# Create a pandas DataFrame from the list of dictionaries
tahoe_geometry = pd.DataFrame([feature.attributes for feature in feature_list])



## Define the census download functions

In [11]:
#Helper function that is used to concatenate census data return
def create_or_append_df(df, summary_df):
    if df.empty:
        df = summary_df.copy()
    else:
        df = pd.concat([df, summary_df])
    return df

#Thus gets the result of the get request and does some data wrangling to make it fit our structure
def get_request_census(request_url, sample_level, geo_name):
    response = requests.get(request_url)
            
    df = pd.DataFrame(response.json())
    #The json returns column names in the first row
    df.columns = df.iloc[0]
    df = df[1:]
    df['sample_level']=sample_level
    df['Geo_Name']=geo_name
    #Might as well add counties and states at this stage
    return df



def get_variable_data(year, dataset, geometry_return, variable, variablename, census_api_key, census_geom_year, tahoe_geometry, variable_category):
    #Returns all data for a given dataset for Washoe, El Dorado, Carson City, Douglas, Placer Counties
    #Need to make five seperate api calls because of the geometry structure
    county_states ={
        '06': ['017','061'],
        '32': ['005', '031', '510']
    }
    base_url = 'https://api.census.gov/data'
    df_total=pd.DataFrame()
    #Formatting to match html get request
    geometry_return=geometry_return.replace(" ", "%20")
    #This adds tract level to make block groups or blocks get request valid
    if geometry_return == 'tract':
        geometry_level = ''
    else:
        geometry_level='%20tract:*'
    if 'acs/acs5' in dataset:
        variable= variable +'E,'+variable + 'M'

    
    for state in county_states:
        for county in county_states[state]:
            print(f'{base_url}/{year}/{dataset}?get=GEO_ID,{variable}&for={geometry_return}:*&in=state:{state}%20county:{county}{geometry_level}&key={census_api_key}')
            request_url = f'{base_url}/{year}/{dataset}?get=GEO_ID,{variable}&for={geometry_return}:*&in=state:{state}%20county:{county}{geometry_level}&key={census_api_key}'
            response = requests.get(request_url)
            
            df = pd.DataFrame(response.json())
            #The json returns column names in the first row
            df.columns = df.iloc[0]
            df = df[1:]
            #Might as well add counties and states at this stage
            if df_total.empty:
                df_total=df
            else:
                df_total=pd.concat([df_total, df])
    #Figure out exactly what variable we want here
    #Add something here to handle margin of error
    df_total['variable_code']=variable
    df_total['variable_name']=variablename
    df_total['variable_category']= variable_category
    df_total['year_sample']=year
    df_total['sample_level']=geometry_return.replace("%20", " ")
    df_total['dataset']= dataset
    df_total['census_geom_year'] = census_geom_year
    df_total['GEO_ID'] = df_total['GEO_ID'].str.split('US').str[1]
    df_total['TRPAID'] = df_total['GEO_ID']+df_total['census_geom_year'].astype(str)
    df_total.columns.values[1] = 'value'
    df_total['value'] = df_total['value'].astype(float)
    if 'acs/acs5' in dataset:
        df_total.columns.values[2]='MarginOfError'
        df_total['variable_code'] = df_total['variable_code'].str.split(',').str[0]
    else:
        df_total.insert(2, 'MarginOfError', np.NaN)
    if geometry_return == 'tract':
        tract_col_loc = df_total.columns.get_loc('tract')
        df_total.insert(tract_col_loc, 'block group', np.NaN)

    #filter to just the tahoe parcels
    df_total = df_total[df_total['TRPAID'].isin(tahoe_geometry['TRPAID'])]
    df_total =  pd.merge(df_total, tahoe_geometry[['TRPAID', 'NEIGHBORHOOD']], on='TRPAID', how= 'left')
    
    return df_total

def get_non_tahoe_data(year,dataset, variable, variablename, census_api_key, census_geom_year, variable_category):
    base_url = 'https://api.census.gov/data'
    df_total=pd.DataFrame()
    county_states ={
        '06': ['017','061'],
        '32': ['005', '031', '510']
    }
    state_names={
        '06':'CA',
        '32':'NV'
    }
    county_names={
        '017':'El Dorado County',
        '061':'Placer County',
        '005':'Douglas County',
        '031':'Washoe County',
        '510':'Carson City County'
    }
    #Need to update this so that it handles the different years - are 2010 and 2020 the same?
    urban_centers = {
        'Reno-Sparks MSA':'39900',
        'Sacramento MSA': '40900',   
    }
    combined_metro_areas={
        'Sanfranciso CMSA': '488'
    }
    urban_centers_2000 = {
        'Reno-Sparks MSA':'6720',
        'Sacramento MSA': '6922',   
    }
    combined_metro_areas_2000={
        'Sanfranciso CMSA': '7362'
    }
    if year!="2000":
        for urban_center in urban_centers:
            urban_center_code = urban_centers[urban_center]
            print(f'{base_url}/{year}/{dataset}?get=GEO_ID,{variable}&for=metropolitan%20statistical%20area/micropolitan%20statistical%20area:{urban_center_code}&key={census_api_key}')
            request_url = f'{base_url}/{year}/{dataset}?get=GEO_ID,{variable}&for=metropolitan%20statistical%20area/micropolitan%20statistical%20area:{urban_center_code}&key={census_api_key}'            
            df = get_request_census(request_url,'MSA', urban_center)
            df_total = create_or_append_df(df_total, df)    
        for cma in combined_metro_areas:
            cma_code = combined_metro_areas[cma]
            print(f'{base_url}/{year}/{dataset}?get=GEO_ID,{variable}&for=combined%20statistical%20area:{cma_code}&key={census_api_key}')
            request_url = f'{base_url}/{year}/{dataset}?get=GEO_ID,{variable}&for=combined%20statistical%20area:{cma_code}&key={census_api_key}'
            df = get_request_census(request_url, 'MSA', cma)
            df_total = create_or_append_df(df_total, df)
    # for urban_center in urban_centers_2000:
    #         urban_center_code = urban_centers_2000[urban_center]
    #         statistical_region_url = f'metropolitan%20statistical%20area/micropolitan%20statistical%20area'
    #         print(f'{base_url}/{year}/{dataset}?get=GEO_ID,{variable}&for={statistical_region_url}:{urban_center_code}&key={census_api_key}')
    #         request_url= f'{base_url}/{year}/{dataset}?get=GEO_ID,{variable}&for={statistical_region_url}:{urban_center_code}&key={census_api_key}'
    #         df = get_request_census(request_url,'MSA',urban_center)
    #         df_total = create_or_append_df(df_total,df)
        
    for state in county_states:
        for county in county_states[state]:
            #https://api.census.gov/data/2010/dec/sf1?get=GEO_ID,P001001&for=county:017&in=state:06&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
            print(f'{base_url}/{year}/{dataset}?get=GEO_ID,{variable}&for=county:{county}&in=state:{state}&key={census_api_key}')
            request_url = f'{base_url}/{year}/{dataset}?get=GEO_ID,{variable}&for=county:{county}&in=state:{state}&key={census_api_key}'
            countyname = county_names[county]
            df = get_request_census(request_url, 'County', countyname)
            df_total = create_or_append_df(df_total, df)
    for state in county_states:
        #https://api.census.gov/data/2010/dec/sf1?get=GEO_ID,P001001&for=county:017&in=state:06&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
        print(f'{base_url}/{year}/{dataset}?get=GEO_ID,{variable}&for=state:{state}&key={census_api_key}')
        request_url = f'{base_url}/{year}/{dataset}?get=GEO_ID,{variable}&for=state:{state}&key={census_api_key}'
        geoname = state_names[state]
        df = get_request_census(request_url,'State', geoname)
        df_total = create_or_append_df(df_total, df)
        
    #Figure out exactly what variable we want here
    df_total['variable_code']=variable
    df_total['variable_name']=variablename
    df_total['variable_category']= variable_category
    df_total['year_sample']=year
    df_total['dataset']= dataset
    df_total['census_geom_year'] = census_geom_year
    df_total['GEO_ID'] = df_total['GEO_ID'].str.split('US').str[1]
    df_total['GEO_CODE'] = df_total['GEO_ID']+df_total['census_geom_year'].astype(str)
    df_total.columns.values[1] = 'value'
    return df_total

def census_download_wrapper (variable_file):
    dtypes = {
    'Variable' : str,
    'Code': str,
    'Category': str,
    'Datasource': str,
    'CodeNumber':str,
    'Year':str,
    'census_geom_year':str,
    'GeometryLevel':str
    }


    variables = pd.read_csv(variable_file,dtype=dtypes)

    #Loop through this?
    df_values=pd.DataFrame()
    for index, row in variables.iterrows():
        print(index)
        
        df = get_variable_data(row['Year'], row['Datasource Name'],row['GeometryLevel'],row['CodeNumber'],row['Variable'], census_api_key, row['census_geom_year'], tahoe_geometry, row['Category'])
        
        df_values = create_or_append_df(df_values, df)
    return df_values

def census_download_wrapper_non_tahoe(variable_file):
    dtypes = {
    'Variable' : str,
    'Code': str,
    'Category': str,
    'Datasource': str,
    'CodeNumber':str,
    'Year':str,
    'census_geom_year':str,
    'GeometryLevel':str
    }
    variables = pd.read_csv(variable_file,dtype=dtypes)

    #Loop through this?
    df_values=pd.DataFrame()
    for index, row in variables.iterrows():
        print(index)
        df = get_non_tahoe_data(row['Year'], row['Datasource Name'], row['CodeNumber'], row['Variable'], census_api_key, row['census_geom_year'], row['Category'])
        df_values = create_or_append_df(df_values, df)
    return df_values

def load_variable_multiple_year(year_range, dataset, geometry_return, variable, variablename, census_api_key, census_geom_year, tahoe_geometry, variable_category):
    df=pd.DataFrame()
    df_return=pd.DataFrame()
    #year_range = [str(num) for num in range(year_start, year_end+1)]
    for year in year_range:
        if year in ['2020', '2021', '2022']:
            census_geom_year = '2020'
        else:
            census_geom_year = '2010'
        df = get_variable_data(year,dataset,geometry_return,variable,variablename,census_api_key, census_geom_year, tahoe_geometry, variable_category)
        if df_return.empty:
            df_return=df
        else:
            df_return=pd.concat([df_return, df])
    return df_return


# Download data sets

## Download Data Sets

### Download core acs 2020 block group data

In [None]:
acs_bg_2020 = census_download_wrapper('census_variables_acs_blockgroup_2020.csv')

In [None]:
acs_tract_2020 = census_download_wrapper('census_variables_acs_tract_2020.csv')

In [None]:
acs_bg_homevalue_2020 = census_download_wrapper('Census_Variable_Lists\census_variables_acs_blockgroup_Median Home Values.csv')
acs_bg_homevalue_2020.to_excel("acs_bg_homevalue_2020.xlsx")

In [None]:
acs_tract_homevalue_2020 = census_download_wrapper('Census_Variable_Lists\census_variables_acs_tract_home_values.csv')
acs_tract_homevalue_2020.to_excel("acs_tract_homevalue_2020.xlsx")

## Download 2021 acs tract data

In [None]:
acs_tract_2020_1 = census_download_wrapper('Census_Variable_Lists\census_variables_acs_tract_2020_1.csv')
acs_tract_2020_1.to_excel("Census_Data_Downloads/acs_tract_2020_1.xlsx", index=False)

In [None]:
acs_internet_2021 = census_download_wrapper('Census_Variable_Lists\census_variables_internet.csv')
acs_internet_2021.to_excel("Census_Data_Downloads/acs_internet_2021.xlsx", index=False)

In [None]:
acs_tract_2020_2 = census_download_wrapper('Census_Variable_Lists\census_variables_acs_tract_2020_2.csv')
acs_tract_2020_2.to_excel("Census_Data_Downloads/acs_tract_2020_2.xlsx", index=False)

In [None]:
acs_tract_2010_3 = census_download_wrapper('Census_Variable_Lists\census_variables_acs_tract_2010_3.csv')
acs_tract_2010_3.to_excel("Census_Data_Downloads/acs_tract_2010_3.xlsx", index=False)

In [None]:
acs_tract_2010_4 = census_download_wrapper('Census_Variable_Lists\census_variables_acs_tract_2010_4.csv')
acs_tract_2010_4.to_excel("Census_Data_Downloads/acs_tract_2010_4.xlsx", index=False)

## Download 2000 Census Data

In [None]:
dem_tract_2020 = census_download_wrapper('Census_Variable_Lists\demographic_variables_2000.csv')
dem_tract_2020.to_excel("dem_tract_2020_first_half.xlsx")

In [None]:
dem_tract_2020_2 = census_download_wrapper('Census_Variable_Lists\demographic_variables_2000_second.csv')
dem_tract_2020_2.to_excel("dem_tract_2020_second.xlsx")

## Download 2010 DEC Census Data

In [None]:
dem_tract_2010 = census_download_wrapper('Census_Variable_Lists\census_variables_dec_tract_2010.csv')
dem_tract_2010.to_excel("Census_Data_Downloads\dem_tract_2010.xlsx")

In [None]:
dem_tract_2010_age = census_download_wrapper('Census_Variable_Lists\census_variables_dec_tract_2010_age.csv')
dem_tract_2010_age.to_excel("Census_Data_Downloads\dem_tract_2010_age.xlsx")

In [None]:
dem_tract_2000_age = census_download_wrapper('Census_Variable_Lists\census_variables_dec_tract_2000_age.csv')
dem_tract_2000_age.to_excel("Census_Data_Downloads\dem_tract_2000_age.xlsx")

## Download 2020 Dec Census Data

In [None]:
dem_tract_2020 = census_download_wrapper('Census_Variable_Lists\census_variables_dec_tract_2020.csv')
dem_tract_2020.to_excel("Census_Data_Downloads\dem_tract_2020.xlsx", index=False)

In [None]:
dem_tract_2020_age = census_download_wrapper('Census_Variable_Lists\census_variables_dec_tract_2020_age.csv')
dem_tract_2020_age.to_excel("Census_Data_Downloads\dem_tract_2020_age.xlsx", index=False)

In [None]:
housing_additional = census_download_wrapper('Census_Variable_Lists\census_variables_housing_add.csv')
housing_additional.to_excel("Census_Data_Downloads\housing_additional.xlsx", index=False)

In [None]:
housing_additional_acs = census_download_wrapper('Census_Variable_Lists\census_variables_housing_add_full.csv')
housing_additional_acs.to_excel("Census_Data_Downloads\housing_additional_acs.xlsx", index=False)

In [None]:
dec_bloc_2020_additional = census_download_wrapper('Census_Variable_Lists\census_variables_dec_block_2020.csv')
dec_bloc_2020_additional.to_excel("Census_Data_Downloads\dec_bloc_2020_additional.xlsx", index=False)

In [None]:
dec_bloc_2020_additional2 = census_download_wrapper('Census_Variable_Lists\census_variables_dec_block_2020_v2.csv')
dec_bloc_2020_additional2.to_excel("Census_Data_Downloads\dec_bloc_2020_additional_2.xlsx", index=False)

In [None]:
demographic_variables_vehicles = pd.read_csv("demographic_variables_Vehicles.csv")

In [None]:

#Load 2011-2019 Variables - this takes a long time
variables=pd.read_csv("demographic_variables.csv")
acs_variables=variables.loc[(variables['Datasource Name']=='acs/acs5')]
acs_variables=acs_variables.reset_index()
df_test_acs=pd.DataFrame()
year_range_acs = [str(num) for num in range(2011, 2020)]

for index, row in acs_variables.iterrows():
    df = load_variable_multiple_year(year_range_acs, row['Datasource Name'],'tract',row['CodeNumber'],row['Variable'], census_api_key, '2010', tahoe_geometry, row['Category'])
    #df = get_variable_data(row['Year'], row['Datasource Name'],'tract',row['CodeNumber'],row['Variable'], census_api_key, row['census_geom_year'], tahoe_geometry )
    if df_test_acs is None:
        df_test_acs=df
    else:
        df_test_acs=pd.concat([df_test_acs, df])
df_test_acs.to_csv('acs_demographic_data_2020.csv')

In [None]:
variable_name = 'Work From Home'
variable_code = 'B99084_005E'
variable_datasource = 'acs/acs5'
variable_year_range=[ '2011','2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020','2021']
census_geom_year = '2010'
variable_category = 'Employment'
df_work_from_home=pd.DataFrame()
for year in variable_year_range:
    df=get_non_tahoe_data(year,variable_datasource,variable_code,variable_name,census_api_key,'2010',variable_category)
    df_work_from_home = create_or_append_df(df_work_from_home,df)
variable_name = 'Did Not Work From Home'
variable_code = 'B99084_002E'
for year in variable_year_range:
    df=get_non_tahoe_data(year,variable_datasource,variable_code,variable_name,census_api_key,'2010',variable_category)
    df_work_from_home = create_or_append_df(df_work_from_home,df)


In [6]:
test_non_tahoe = census_download_wrapper_non_tahoe('Census_Variable_Lists\census_variables_dec_tract_2020_age.csv')

0
https://api.census.gov/data/2020/dec/dhc?get=GEO_ID,P12_002N&for=metropolitan%20statistical%20area/micropolitan%20statistical%20area:39900&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2020/dec/dhc?get=GEO_ID,P12_002N&for=metropolitan%20statistical%20area/micropolitan%20statistical%20area:40900&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2020/dec/dhc?get=GEO_ID,P12_002N&for=combined%20statistical%20area:488&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2020/dec/dhc?get=GEO_ID,P12_002N&for=county:017&in=state:06&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2020/dec/dhc?get=GEO_ID,P12_002N&for=county:061&in=state:06&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2020/dec/dhc?get=GEO_ID,P12_002N&for=county:005&in=state:32&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2020/dec/dhc?get=GEO_ID,P12_002N&for=county:031&in=state:32

In [8]:
non_tahoe_age_2010 = census_download_wrapper_non_tahoe('Census_Variable_Lists\census_variables_dec_tract_2010_age.csv')

0
https://api.census.gov/data/2010/dec/sf1?get=GEO_ID,P012002&for=metropolitan%20statistical%20area/micropolitan%20statistical%20area:39900&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2010/dec/sf1?get=GEO_ID,P012002&for=metropolitan%20statistical%20area/micropolitan%20statistical%20area:40900&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2010/dec/sf1?get=GEO_ID,P012002&for=combined%20statistical%20area:488&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2010/dec/sf1?get=GEO_ID,P012002&for=county:017&in=state:06&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2010/dec/sf1?get=GEO_ID,P012002&for=county:061&in=state:06&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2010/dec/sf1?get=GEO_ID,P012002&for=county:005&in=state:32&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2010/dec/sf1?get=GEO_ID,P012002&for=county:031&in=state:32&key=9a

In [12]:
non_tahoe_age_2000 = census_download_wrapper_non_tahoe('Census_Variable_Lists\census_variables_dec_tract_2000_age.csv')

0
https://api.census.gov/data/2000/dec/sf1?get=GEO_ID,P012002&for=county:017&in=state:06&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2000/dec/sf1?get=GEO_ID,P012002&for=county:061&in=state:06&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2000/dec/sf1?get=GEO_ID,P012002&for=county:005&in=state:32&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2000/dec/sf1?get=GEO_ID,P012002&for=county:031&in=state:32&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2000/dec/sf1?get=GEO_ID,P012002&for=county:510&in=state:32&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2000/dec/sf1?get=GEO_ID,P012002&for=state:06&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2000/dec/sf1?get=GEO_ID,P012002&for=state:32&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
1
https://api.census.gov/data/2000/dec/sf1?get=GEO_ID,P012003&for=county:017&in=state:06&key=9a73d08c2

In [15]:
non_tahoe_age_data = pd.concat([non_tahoe_age_2000, non_tahoe_age_2010, test_non_tahoe], ignore_index = True)
non_tahoe_age_data.to_excel('non_tahoe_age_data.xlsx')

In [None]:
#Template for manual download
variable_name = 'Population'
variable_code = 'B08134_002E'
variable_geometry_level = 'tract'
variable_datasource = 'acs/acs5'
variable_year_range=[ '2011','2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']
census_geom_year = '2010'
variable_category = 'Transportation'
df_variable=pd.DataFrame()
for year in variable_year_range:
    df = get_variable_data(year,variable_datasource,variable_geometry_level,variable_code,variable_name, census_api_key, census_geom_year, tahoe_geometry, variable_category )
    if df_variable.empty:
        df_variable=df
    else:
        df_variable=pd.concat([df_variable, df])
print(df_variable.head())

df_variable.to_csv('acs_travel_time.csv')