In [28]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo
import pandas as pd
import us
from us import states
from census import Census
import json
# Census API Key
from config import api_key
c = Census(api_key, year=2021)

In [2]:
# Years targeted for collection in census data
years=[2018,2019,2020]

complete_years=pd.DataFrame()
for curr_year in years:
    census_data = c.acs5.state_county(fields = ('NAME', "B01003_001E", "B01002_001E", "B19301_001E", "B17001_002E", "B27019_001E"),
                                      state_fips = "*",
                                      county_fips = "*", year = curr_year)

    census_data_curr_year=pd.DataFrame(census_data)

    # Column Reordering and assign the data-types appropriate names
    census_data_curr_year.rename(columns={"B01003_001E": "Population",
                                     "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                     "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                     "B27019_001E" : "Educational Attainment"
                                     },inplace=True)

    census_data_curr_year["Year"]=curr_year
    frames=[complete_years,census_data_curr_year]
    complete_years=pd.concat(frames)
complete_years.reset_index()
complete_years.head()

Unnamed: 0,NAME,Population,Median Age,Per Capita Income,Poverty Count,Educational Attainment,state,county,Year
0,"Mineral County, Montana",4211.0,52.6,23744.0,705.0,3105.0,30,61,2018
1,"Petroleum County, Montana",432.0,52.1,32565.0,35.0,336.0,30,69,2018
2,"Powell County, Montana",6861.0,45.3,25060.0,666.0,3790.0,30,77,2018
3,"Sanders County, Montana",11521.0,53.2,23822.0,2210.0,8678.0,30,89,2018
4,"Wibaux County, Montana",1175.0,46.0,23137.0,180.0,777.0,30,109,2018


In [3]:
# Convert to Data to Json
complete_years.to_json("complete_years.json",orient="records")

In [4]:
# Open Census Json Data, store in varaible
states_json_file = open("complete_years.json")
state_json_data = json.load(states_json_file)

# Open GeoJson Data, store in varaible
geojson_file = open("static/data/GeoJsons.json")
geojson_data = json.load(geojson_file)

In [5]:
## This block is to condense the State Census Jsons. 
## Outputs a single Json per county, with a key-value pair for each year of data

# Utility List to track which counties have been collected
counties_collected = []

# List to store the outputed dictionaries
counties_data = []


for county in state_json_data:
    
    #If this is the first time encountering a particular county, creates a new dictionary for it, stores data
    if county["NAME"] not in counties_collected:
        counties_collected.append(county["NAME"])
        json = {}
        json["name"] = county["NAME"],
        json["state"] = county["state"]
        json["county"] = county["county"]
        information = {}
        information["Population"] = county["Population"],
        information["Median Age"] = county["Median Age"]
        information["Per Capita Income"] = county["Per Capita Income"]
        information["Poverty Count"] = county["Poverty Count"]
        information['Educational Attainment'] = county['Educational Attainment']
        json[county["Year"]] = information
        counties_data.append(json)
        
    # If it has arleady encounterd the county, appends the standing entry and stores the new year's data
    else:
        location = counties_collected.index(county["NAME"])
        json = counties_data[location]
        information = {}
        information["Population"] = county["Population"],
        information["Median Age"] = county["Median Age"]
        information["Per Capita Income"] = county["Per Capita Income"]
        information["Poverty Count"] = county["Poverty Count"]
        information['Educational Attainment'] = county['Educational Attainment']
        json[county["Year"]] = information
        
#Sample Output
counties_data[0]

{'name': ('Mineral County, Montana',),
 'state': '30',
 'county': '061',
 2018: {'Population': (4211.0,),
  'Median Age': 52.6,
  'Per Capita Income': 23744.0,
  'Poverty Count': 705.0,
  'Educational Attainment': 3105.0},
 2019: {'Population': (4251.0,),
  'Median Age': 51.7,
  'Per Capita Income': 23621.0,
  'Poverty Count': 778.0,
  'Educational Attainment': 3126.0},
 2020: {'Population': (4330.0,),
  'Median Age': 52.3,
  'Per Capita Income': 28644.0,
  'Poverty Count': 814.0,
  'Educational Attainment': 3236.0}}

In [6]:
# Create a list containing unique identifies for each county. 
# These values are a string of state+county (EX: State: '20', County: "173" >>> 20173)

county_ids = []
for county in counties_data:
    county_id = county["state"]+county["county"]
    county_ids.append(county_id)

In [20]:
#Iterate through each GeoJson, pair the corresponding CensusDataJson

#Track Failed which could not pair
fails =[]


for county in geojson_data:
    
    #Generate the countyID as above for each GeoJson
    geojson_county_id = county["properties"]['STATE'] + county["properties"]['COUNTY']
    try:
        #Attempts to locate any county which has the same county_id in the censusJson List
        location = county_ids.index(str(geojson_county_id))
        matching_county_data = counties_data[location]
        county["properties"][2018] = matching_county_data[2018]
        county["properties"][2019] = matching_county_data[2019]
        county["properties"][2020] = matching_county_data[2020]
    except Exception:
        
        #If it cannot locate it stores the id for this county in the fail list
        fails.append(geojson_county_id)

In [21]:
#number of fail counties
print(len(geojson_data))
print(len(fails))
print(fails)

3217
0
[]


In [22]:
#Remove counties from GeoJson list which failed

for county in geojson_data:
    #Generate the countyID as above for each GeoJson
    geojson_county_id = county["properties"]['STATE'] + county["properties"]['COUNTY']
    if geojson_county_id in fails:
        geojson_data.remove(county)

#Length of final geojson_data list
print(len(geojson_data))

3217


In [26]:
geojson_data

[{'type': 'Feature',
  'properties': {'GEO_ID': '0500000US02275',
   'STATE': '02',
   'COUNTY': '275',
   'NAME': 'Wrangell',
   'LSAD': 'Cty&Bor',
   'CENSUSAREA': 2541.483,
   2018: {'Population': (2484.0,),
    'Median Age': 49.5,
    'Per Capita Income': 31489.0,
    'Poverty Count': 192.0,
    'Educational Attainment': 1829.0},
   2019: {'Population': (2502.0,),
    'Median Age': 49.4,
    'Per Capita Income': 32286.0,
    'Poverty Count': 194.0,
    'Educational Attainment': 1841.0},
   2020: {'Population': (2510.0,),
    'Median Age': 47.8,
    'Per Capita Income': 32159.0,
    'Poverty Count': 200.0,
    'Educational Attainment': 1811.0}},
  'geometry': {'type': 'MultiPolygon',
   'coordinates': [[[[-132.569885, 56.633502],
      [-132.570351, 56.633696],
      [-132.570123, 56.633934],
      [-132.541203, 56.664034],
      [-132.518265, 56.670282],
      [-132.444501, 56.653626],
      [-132.349323, 56.651246],
      [-132.335975, 56.642619],
      [-132.40022, 56.604548],
  

In [29]:
with open('FinalData.json', 'w', encoding='utf-8') as f:
    json.dump(geojson_data, f, ensure_ascii=False, indent=4)