In [None]:
import pandas as pd
import numpy as np
import requests
import pyodbc
import arcpy
from arcgis.features import FeatureLayer
# This is using Andy's Census API KEy
census_api_key = '9a73d08c296b844e58f1c70bd19c831826da5cbf'

# Need to define datatypes so that FIPS code doesn't get cast as int and drop leading 0s
dtypes = {
    'YEAR' : str,
    'STATE': str,
    'GEOGRAPHY': str,
    'GEOID': str,
    'TRPAID':str,
    'NEIGHBORHOOD': str
}

#Manually defined list of census tracts that are within the basin
 
service_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/27'

feature_layer = FeatureLayer(service_url)
tahoe_geometry_fields = ['YEAR', 'STATE', 'GEOGRAPHY', 'GEOID', 'TRPAID', 'NEIGHBORHOOD']
query_result = feature_layer.query(out_fields=",".join(tahoe_geometry_fields))
# Convert the query result to a list of dictionaries
feature_list = query_result.features

# Create a pandas DataFrame from the list of dictionaries
tahoe_geometry = pd.DataFrame([feature.attributes for feature in feature_list])

In [None]:
#Download HIN data
# get spatially enabled dataframe
def download_sdf(service_url):
    feature_layer = FeatureLayer(service_url)
    query_result = feature_layer.query()
    # Convert the query result to a list of dictionaries
    sdf = query_result.sdf
    return sdf

zipcodes = download_sdf('https://services.arcgis.com/P3ePLMYs2RVChkJx/arcgis/rest/services/USA_ZIP_Code_Areas_anaylsis/FeatureServer/0')


In [4]:
def get_jobs_data(year, variable, variablename, census_api_key, zipcodelist):
    base_url = 'https://api.census.gov/data'
    df_total=pd.DataFrame()
    
    #Formatting to match html get request
    #get the zipcodes for inclusion from tahoe_geometry
    #zipcodes = zipcodelist['ZIP'].loc[(zipcodelist['Basin']=='Y')&(zipcodelist['Year']==year)]
    zipcodes = zipcodelist['ZIP'].loc[zipcodelist['Year']==year]
    zipcodes = [int(num) for num in zipcodes]
    print(zipcodes)
    
    for zipcode in zipcodes:
        #print(f'{base_url}/{year}/cbp?get={variable}&for={geometry_return}:*&in=state:{state}%20county:{county}{geometry_level}&key={census_api_key}')
        request_url = f'{base_url}/{year}/cbp?get=NAME,{variable}&for=zip%20code:{zipcode}&key={census_api_key}'
        print(request_url) 
        response = requests.get(request_url)
        
        df = pd.DataFrame(response.json())
        #The json returns column names in the first row
        df.columns = df.iloc[0]
        df = df[1:]
        
        #Might as well add counties and states at this stage
        if df_total.empty:
            df_total=df
        else:
            df_total=pd.concat([df_total, df])
    df_total['year']=year
    df_total['variable_name'] = variablename
    df_total.columns.values[1]='value'
    df_total.columns.values[2]='value_flag'
    return df_total

In [5]:
lookup_csv='Census_Category_Lists/ZipCode_List.csv'
zip_lookup = pd.read_csv(lookup_csv)
variable_csv = 'Census_Category_Lists/Employment_Variables.csv'
variables = pd.read_csv(variable_csv)

In [6]:
df_total = pd.DataFrame()
for index, row in variables.iterrows():
    variable_name = row['Variable_Name']
    variable_code = row['Variable_Code']
    variable_flag = row['Noise_Flag']
    variable_code_nf = variable_code + ',' + variable_flag if variable_code and variable_code else variable_code
    start_year = 2018
    end_year = 2021
    year_list = list(range(start_year,end_year+1))
    for year in year_list:
        df = get_jobs_data(year, variable_code_nf,variable_name, census_api_key, zip_lookup)
        if df_total.empty:
            df_total=df
        else:
            df_total=pd.concat([df_total, df])

        

[89402, 89413, 89448, 89449, 89450, 89451, 89452, 96140, 96141, 96142, 96143, 96145, 96146, 96148, 96150, 96151, 96158, 96160, 96161, 96162]
https://api.census.gov/data/2018/cbp?get=NAME,EMP,EMP_N_F&for=zip%20code:89402&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2018/cbp?get=NAME,EMP,EMP_N_F&for=zip%20code:89413&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2018/cbp?get=NAME,EMP,EMP_N_F&for=zip%20code:89448&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2018/cbp?get=NAME,EMP,EMP_N_F&for=zip%20code:89449&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2018/cbp?get=NAME,EMP,EMP_N_F&for=zip%20code:89450&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2018/cbp?get=NAME,EMP,EMP_N_F&for=zip%20code:89451&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2018/cbp?get=NAME,EMP,EMP_N_F&for=zip%20code:89452&key=9a73d08c296b844e58f1c70bd19c8318

In [7]:
df_total.to_excel('employment_data.xlsx')

In [8]:
def get_est_data(year, variable, variablename, census_api_key, zipcodelist):
    base_url = 'https://api.census.gov/data'
    df_total=pd.DataFrame()
    
    #Formatting to match html get request
    #get the zipcodes for inclusion from tahoe_geometry
    #zipcodes = zipcodelist['ZIP'].loc[(zipcodelist['Basin']=='Y')&(zipcodelist['Year']==year)]
    zipcodes = zipcodelist['ZIP'].loc[zipcodelist['Year']==year]
    zipcodes = [int(num) for num in zipcodes]
    print(zipcodes)
    
    for zipcode in zipcodes:
        #print(f'{base_url}/{year}/cbp?get={variable}&for={geometry_return}:*&in=state:{state}%20county:{county}{geometry_level}&key={census_api_key}')
        request_url = f'{base_url}/{year}/cbp?get=NAME,{variable}&for=zip%20code:{zipcode}&key={census_api_key}'
        print(request_url) 
        response = requests.get(request_url)
        
        df = pd.DataFrame(response.json())
        #The json returns column names in the first row
        df.columns = df.iloc[0]
        df = df[1:]
        
        #Might as well add counties and states at this stage
        if df_total.empty:
            df_total=df
        else:
            df_total=pd.concat([df_total, df])
    df_total['year']=year
    df_total['variable_name'] = variablename
    df_total.columns.values[1]='value'
    return df_total

In [9]:
df_total_est = pd.DataFrame()
variable_code_nf='ESTAB'
variable_name = 'Number of Establishments'
for year in year_list:
        df = get_est_data(year, variable_code_nf,variable_name, census_api_key, zip_lookup)
        if df_total_est.empty:
            df_total_est=df
        else:
            df_total_est=pd.concat([df_total_est, df])

[89402, 89413, 89448, 89449, 89450, 89451, 89452, 96140, 96141, 96142, 96143, 96145, 96146, 96148, 96150, 96151, 96158, 96160, 96161, 96162]
https://api.census.gov/data/2018/cbp?get=NAME,ESTAB&for=zip%20code:89402&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2018/cbp?get=NAME,ESTAB&for=zip%20code:89413&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2018/cbp?get=NAME,ESTAB&for=zip%20code:89448&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2018/cbp?get=NAME,ESTAB&for=zip%20code:89449&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2018/cbp?get=NAME,ESTAB&for=zip%20code:89450&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2018/cbp?get=NAME,ESTAB&for=zip%20code:89451&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2018/cbp?get=NAME,ESTAB&for=zip%20code:89452&key=9a73d08c296b844e58f1c70bd19c831826da5cbf
https://api.census.gov/data/2018/

In [23]:
print(State_Lookup)
print(df_total[['zip code','year']])

{(89402, 2020): 'NV', (89413, 2020): 'NV', (89448, 2020): 'NV', (89449, 2020): 'NV', (89450, 2020): 'NV', (89451, 2020): 'NV', (89452, 2020): 'NV', (96140, 2020): 'CA', (96141, 2020): 'CA', (96142, 2020): 'CA', (96143, 2020): 'CA', (96145, 2020): 'CA', (96146, 2020): 'CA', (96148, 2020): 'CA', (96150, 2020): 'CA', (96151, 2020): 'CA', (96158, 2020): 'CA', (96160, 2020): 'CA', (96161, 2020): 'CA', (96162, 2020): 'CA', (89402, 2019): 'NV', (89413, 2019): 'NV', (89448, 2019): 'NV', (89449, 2019): 'NV', (89450, 2019): 'NV', (89451, 2019): 'NV', (89452, 2019): 'NV', (96140, 2019): 'CA', (96141, 2019): 'CA', (96142, 2019): 'CA', (96143, 2019): 'CA', (96145, 2019): 'CA', (96146, 2019): 'CA', (96148, 2019): 'CA', (96150, 2019): 'CA', (96151, 2019): 'CA', (96158, 2019): 'CA', (96160, 2019): 'CA', (96161, 2019): 'CA', (96162, 2019): 'CA', (89402, 2018): 'NV', (89413, 2018): 'NV', (89448, 2018): 'NV', (89449, 2018): 'NV', (89450, 2018): 'NV', (89451, 2018): 'NV', (89452, 2018): 'NV', (96140, 2018

In [33]:
State_Lookup = zip_lookup.set_index(['ZIP','Year'])['State'].to_dict()
County_Lookup = zip_lookup.set_index(['ZIP','Year'])['County'].to_dict()
Basin_Lookup = zip_lookup.set_index(['ZIP','Year'])['Basin'].to_dict()
zip_lookup_dict = zip_lookup.set_index(['ZIP','Year']).to_dict(orient='index')
df_total['zip code']=df_total['zip code'].astype(int)
# df_total['State'] = df_total.apply(lambda row: State_Lookup.get((row['zip code'],row['year'])), axis=1)
# df_total['County'] = df_total.apply(lambda row: County_Lookup.get((row['zip code'],row['year'])), axis=1)
# df_total['Basin'] = df_total.apply(lambda row: Basin_Lookup.get((row['zip code'],row['year'])), axis=1)
df_total['State'] = df_total.apply(lambda row: zip_lookup_dict.get((row['zip code'], row['year']), {}).get('State'), axis=1)
df_total['County'] = df_total.apply(lambda row: zip_lookup_dict.get((row['zip code'], row['year']), {}).get('County'), axis=1)
df_total['Basin'] = df_total.apply(lambda row: zip_lookup_dict.get((row['zip code'], row['year']), {}).get('Basin'), axis=1)


In [28]:
# Sample dictionary
my_dict = {'A': 10, 'B': 20, 'C': 30}

# Get the data type of the keys
keys_data_type = type(list(my_dict.keys())[0])

# Display the data type of the keys
print(f"Data type of dictionary keys: {keys_data_type}")


Data type of dictionary keys: <class 'str'>


In [30]:
print(df_total['zip code'].dtype)
keys_data_type = type(list(County_Lookup.keys())[0])

# Display the data type of the keys
print(f"Data type of dictionary keys: {keys_data_type}")

object
Data type of dictionary keys: <class 'int'>


In [None]:
jobs_2021 = get_jobs_data('2021', 2021, 'EMP_N_F,EMP', 'Employees', census_api_key, zip_lookup, 'test')

In [None]:
jobs_2020 = get_jobs_data('2020', 2020, 'EMP_N_F,EMP', 'Employees', census_api_key, zip_lookup, 'test')