## Importing Packages ##

In [1]:
# Import packages from Python standard library
import numpy as np
import pandas as pd
import re
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
from shapely import wkt
import geopandas as gpd
import geodatasets
import ast
import folium

## Loading Datasets ##

In [2]:
#Reading in clean Internet Demographics dataset 
clean_internet_demographics = pd.read_csv('../data/clean/internet_demographics.csv')
clean_internet_demographics.head(2)

Unnamed: 0,geoid,qualifying name,current land area (sq. m),current water area (sq. m),Total Surveyed Computer Ownership,Std. Error: Total Surveyed Computer Ownership,total: has a computer,std. error: total: has a computer,total: no computer,std. error: total: no computer,...,std. error: total: satellite internet service,total: satellite internet service satellite internet service with no other type of internet subscription,std. error: total: satellite internet service satellite internet service with no other type of internet subscription,total: other service with no other type of internet subscription,std. error: total: other service with no other type of internet subscription,total: internet access without a subscription,std. error: total: internet access without a subscription,total: no internet access,std. error: total: no internet access,geometry
0,17031823303,"Census Tract 8233.03, Cook County, Illinois",2008920,0,1617,113.9394,1476,125.4545,141,54.54546,...,23.63636,0,7.272727,0,7.272727,129,61.21212,184,57.57576,"POLYGON ((-87.720963 41.683952, -87.720304 41...."
1,17031823304,"Census Tract 8233.04, Cook County, Illinois",3553267,0,2634,176.9697,2075,133.9394,559,134.5455,...,59.39394,10,7.272727,0,10.30303,117,62.42424,634,127.2727,MULTIPOLYGON (((-87.72003618903081 41.68396731...


**The way in which the raw data file was compiled into the clean data file makes it confusing to understand the census data (numbers don't seem to add up correctly), so I am going to read in the raw data and only pull out the 'total' columns.**

In [3]:
#Reading in raw Internet Demographics Census Dataset
full_raw_internet_demographics = pd.read_csv('../../can-summer-2023/data/raw/demographics/R13395758_SL140.csv')
raw_internet_demographics = full_raw_internet_demographics[['FIPS','Qualifying Name',
'Total: With An Internet Subscription',
'Total: With An Internet Subscription Dial-Up With No Other Type Of Internet Subscription',
'Total: With An Internet Subscription Broadband Of Any Type',
'Total: With An Internet Subscription Cellular Data Plan',
'Total: With An Internet Subscription Cellular Data Plan Cellular Data Plan With No Other Type Of Internet Subscription',
'Total:  Broadband Such As Cable Fiber Optic Or Dsl',
'Total:  Broadband Such As Cable Fiber Optic Or Dsl Broadband Such As Cable Fiber Optic Or Dsl With No Other Type Of Internet Subscription',
'Total:  Satellite Internet Service',
'Total:  Satellite Internet Service Satellite Internet Service With No Other Type Of Internet Subscription',
'Total:  Other Service With No Other Type Of Internet Subscription',
'Total: Internet Access Without A Subscription',
'Total: No Internet Access',
'Total: Has A Computer',
'Total: Has A Computer: With Dial-Up Internet Subscription Alone',
'Total: Has A Computer: With A Broadband Internet Subscription',
'Total: Has A Computer: Without An Internet Subscription',
'Total: No Computer',
'Total',
'Total: With An Internet Subscription Dial-Up Alone',
'Total: With An Internet Subscription Broadband Such As Cable Fiber Optic Or Dsl',
'Total: With An Internet Subscription Satellite Internet Service',
'Total: With An Internet Subscription Other Service',]]

raw_internet_demographics.head(3)

Unnamed: 0,FIPS,Qualifying Name,Total: With An Internet Subscription,Total: With An Internet Subscription Dial-Up With No Other Type Of Internet Subscription,Total: With An Internet Subscription Broadband Of Any Type,Total: With An Internet Subscription Cellular Data Plan,Total: With An Internet Subscription Cellular Data Plan Cellular Data Plan With No Other Type Of Internet Subscription,Total: Broadband Such As Cable Fiber Optic Or Dsl,Total: Broadband Such As Cable Fiber Optic Or Dsl Broadband Such As Cable Fiber Optic Or Dsl With No Other Type Of Internet Subscription,Total: Satellite Internet Service,...,Total: Has A Computer,Total: Has A Computer: With Dial-Up Internet Subscription Alone,Total: Has A Computer: With A Broadband Internet Subscription,Total: Has A Computer: Without An Internet Subscription,Total: No Computer,Total,Total: With An Internet Subscription Dial-Up Alone,Total: With An Internet Subscription Broadband Such As Cable Fiber Optic Or Dsl,Total: With An Internet Subscription Satellite Internet Service,Total: With An Internet Subscription Other Service
0,Geo_FIPS,Geo_QName,ACS21_5yr_B28002002,ACS21_5yr_B28002003,ACS21_5yr_B28002004,ACS21_5yr_B28002005,ACS21_5yr_B28002006,ACS21_5yr_B28002007,ACS21_5yr_B28002008,ACS21_5yr_B28002009,...,ACS21_5yr_B28003002,ACS21_5yr_B28003003,ACS21_5yr_B28003004,ACS21_5yr_B28003005,ACS21_5yr_B28003006,ACS21_5yr_B28002001,ACS21_5yr_B28011003,ACS21_5yr_B28011004,ACS21_5yr_B28011005,ACS21_5yr_B28011006
1,17031010100,"Census Tract 101, Cook County, Illinois",1952,17,1935,1761,225,1710,174,62,...,2112,17,1870,225,299,2411,17,1710,62,0
2,17031010201,"Census Tract 102.01, Cook County, Illinois",2543,0,2543,2231,266,2234,312,128,...,2726,0,2468,258,270,2996,0,2234,128,31


In the raw_internet_demographics dataset that I made, it does not include the 'geometry' column that is in the clean_internet_demographics dataset. So, I need to merge the 'geometry' column into the raw_internet_demographics. So, I must first rename the 'Qualifying Name' column to 'qualifying name' so that the merge will work. I am also going to change the column name 'FIPS' to 'geoid', because the clean_internet_demographics dataset calls it 'geiod'.

In [4]:
#Renaming 'Qualifying Name' to 'qualifying name' so that I can merge the geometry column into the raw_internet_demographics from clean_internet_demographics
raw_internet_demographics_rename = raw_internet_demographics.rename(columns={'Qualifying Name': 'qualifying name','FIPS':'geoid'})

#Dropping first row that contains unnecessary descriptions
raw_internet_demographics_drop = raw_internet_demographics_rename.drop(labels=0,axis=0)
raw_internet_demographics_drop.head(2)

Unnamed: 0,geoid,qualifying name,Total: With An Internet Subscription,Total: With An Internet Subscription Dial-Up With No Other Type Of Internet Subscription,Total: With An Internet Subscription Broadband Of Any Type,Total: With An Internet Subscription Cellular Data Plan,Total: With An Internet Subscription Cellular Data Plan Cellular Data Plan With No Other Type Of Internet Subscription,Total: Broadband Such As Cable Fiber Optic Or Dsl,Total: Broadband Such As Cable Fiber Optic Or Dsl Broadband Such As Cable Fiber Optic Or Dsl With No Other Type Of Internet Subscription,Total: Satellite Internet Service,...,Total: Has A Computer,Total: Has A Computer: With Dial-Up Internet Subscription Alone,Total: Has A Computer: With A Broadband Internet Subscription,Total: Has A Computer: Without An Internet Subscription,Total: No Computer,Total,Total: With An Internet Subscription Dial-Up Alone,Total: With An Internet Subscription Broadband Such As Cable Fiber Optic Or Dsl,Total: With An Internet Subscription Satellite Internet Service,Total: With An Internet Subscription Other Service
1,17031010100,"Census Tract 101, Cook County, Illinois",1952,17,1935,1761,225,1710,174,62,...,2112,17,1870,225,299,2411,17,1710,62,0
2,17031010201,"Census Tract 102.01, Cook County, Illinois",2543,0,2543,2231,266,2234,312,128,...,2726,0,2468,258,270,2996,0,2234,128,31


In [5]:
#Merging: to ensure that the geometry matches the correct census tracts, I merge using the 'qualifying name' column
to_merge = clean_internet_demographics[['qualifying name','geometry']]
to_merge.head()

Unnamed: 0,qualifying name,geometry
0,"Census Tract 8233.03, Cook County, Illinois","POLYGON ((-87.720963 41.683952, -87.720304 41...."
1,"Census Tract 8233.04, Cook County, Illinois",MULTIPOLYGON (((-87.72003618903081 41.68396731...
2,"Census Tract 7404, Cook County, Illinois","POLYGON ((-87.72008 41.691274, -87.719372 41.6..."
3,"Census Tract 7403, Cook County, Illinois","POLYGON ((-87.720431 41.698569, -87.720066 41...."
4,"Census Tract 8228.02, Cook County, Illinois","MULTIPOLYGON (((-87.720752 41.702042, -87.7207..."


In [6]:
#Merging the geometry column into the raw_internet_demographics_rename and renaming it
raw_internet_demo = raw_internet_demographics_drop.merge(to_merge, on='qualifying name')
raw_internet_demo.head(2)

Unnamed: 0,geoid,qualifying name,Total: With An Internet Subscription,Total: With An Internet Subscription Dial-Up With No Other Type Of Internet Subscription,Total: With An Internet Subscription Broadband Of Any Type,Total: With An Internet Subscription Cellular Data Plan,Total: With An Internet Subscription Cellular Data Plan Cellular Data Plan With No Other Type Of Internet Subscription,Total: Broadband Such As Cable Fiber Optic Or Dsl,Total: Broadband Such As Cable Fiber Optic Or Dsl Broadband Such As Cable Fiber Optic Or Dsl With No Other Type Of Internet Subscription,Total: Satellite Internet Service,...,Total: Has A Computer: With Dial-Up Internet Subscription Alone,Total: Has A Computer: With A Broadband Internet Subscription,Total: Has A Computer: Without An Internet Subscription,Total: No Computer,Total,Total: With An Internet Subscription Dial-Up Alone,Total: With An Internet Subscription Broadband Such As Cable Fiber Optic Or Dsl,Total: With An Internet Subscription Satellite Internet Service,Total: With An Internet Subscription Other Service,geometry
0,17031010100,"Census Tract 101, Cook County, Illinois",1952,17,1935,1761,225,1710,174,62,...,17,1870,225,299,2411,17,1710,62,0,"POLYGON ((-87.676277 42.022965, -87.675968 42...."
1,17031010201,"Census Tract 102.01, Cook County, Illinois",2543,0,2543,2231,266,2234,312,128,...,0,2468,258,270,2996,0,2234,128,31,"POLYGON ((-87.684317 42.019481, -87.683901 42...."


In [8]:
#Removing census tracts that have 0's across all columns
raw_internet_demos = raw_internet_demo.drop(index=[411,863,864,865])
raw_internet_demos

Unnamed: 0,geoid,qualifying name,Total: With An Internet Subscription,Total: With An Internet Subscription Dial-Up With No Other Type Of Internet Subscription,Total: With An Internet Subscription Broadband Of Any Type,Total: With An Internet Subscription Cellular Data Plan,Total: With An Internet Subscription Cellular Data Plan Cellular Data Plan With No Other Type Of Internet Subscription,Total: Broadband Such As Cable Fiber Optic Or Dsl,Total: Broadband Such As Cable Fiber Optic Or Dsl Broadband Such As Cable Fiber Optic Or Dsl With No Other Type Of Internet Subscription,Total: Satellite Internet Service,...,Total: Has A Computer: With Dial-Up Internet Subscription Alone,Total: Has A Computer: With A Broadband Internet Subscription,Total: Has A Computer: Without An Internet Subscription,Total: No Computer,Total,Total: With An Internet Subscription Dial-Up Alone,Total: With An Internet Subscription Broadband Such As Cable Fiber Optic Or Dsl,Total: With An Internet Subscription Satellite Internet Service,Total: With An Internet Subscription Other Service,geometry
0,17031010100,"Census Tract 101, Cook County, Illinois",1952,17,1935,1761,225,1710,174,62,...,17,1870,225,299,2411,17,1710,62,0,"POLYGON ((-87.676277 42.022965, -87.675968 42...."
1,17031010201,"Census Tract 102.01, Cook County, Illinois",2543,0,2543,2231,266,2234,312,128,...,0,2468,258,270,2996,0,2234,128,31,"POLYGON ((-87.684317 42.019481, -87.683901 42...."
2,17031010202,"Census Tract 102.02, Cook County, Illinois",1040,0,1040,862,151,852,161,56,...,0,1022,137,75,1234,0,852,56,0,"POLYGON ((-87.676745 42.01941, -87.67651175102..."
3,17031010300,"Census Tract 103, Cook County, Illinois",2548,0,2548,2320,352,2112,185,183,...,0,2508,327,160,2995,0,2112,183,100,"POLYGON ((-87.671212 42.019373, -87.670858 42...."
4,17031010400,"Census Tract 104, Cook County, Illinois",1487,0,1487,1423,216,1238,64,47,...,0,1487,187,360,2034,0,1238,47,0,"POLYGON ((-87.663213 42.012834, -87.662494 42...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
861,17031844600,"Census Tract 8446, Cook County, Illinois",742,0,742,722,182,512,0,51,...,0,722,19,68,809,0,512,51,40,"POLYGON ((-87.628928 41.816453, -87.628494 41...."
862,17031844700,"Census Tract 8447, Cook County, Illinois",583,0,583,521,166,417,62,16,...,0,583,22,48,653,0,417,16,0,"POLYGON ((-87.719891 41.852741, -87.71989 41.8..."
866,17043840000,"Census Tract 8400, DuPage County, Illinois",818,6,812,778,81,681,16,134,...,0,812,53,72,937,6,681,134,0,"POLYGON ((-87.938967 41.993515, -87.938927 41...."
867,17043840801,"Census Tract 8408.01, DuPage County, Illinois",642,0,642,566,47,575,71,34,...,0,620,38,298,956,0,575,34,5,"MULTIPOLYGON (((-87.939058 41.958485, -87.9390..."


When trying to take a percentage (dividing one column over another), Python does not see it as division for each numerical value. So, I have to convert the list of numbers in every column to be considered integers that can be divided.

In [9]:
#Convert list of numbers to integers
for i in range(2,23):
    
    raw_internet_demos.iloc[:, i] = pd.to_numeric(raw_internet_demos.iloc[:, i])

raw_internet_demos.head(2)

Unnamed: 0,geoid,qualifying name,Total: With An Internet Subscription,Total: With An Internet Subscription Dial-Up With No Other Type Of Internet Subscription,Total: With An Internet Subscription Broadband Of Any Type,Total: With An Internet Subscription Cellular Data Plan,Total: With An Internet Subscription Cellular Data Plan Cellular Data Plan With No Other Type Of Internet Subscription,Total: Broadband Such As Cable Fiber Optic Or Dsl,Total: Broadband Such As Cable Fiber Optic Or Dsl Broadband Such As Cable Fiber Optic Or Dsl With No Other Type Of Internet Subscription,Total: Satellite Internet Service,...,Total: Has A Computer: With Dial-Up Internet Subscription Alone,Total: Has A Computer: With A Broadband Internet Subscription,Total: Has A Computer: Without An Internet Subscription,Total: No Computer,Total,Total: With An Internet Subscription Dial-Up Alone,Total: With An Internet Subscription Broadband Such As Cable Fiber Optic Or Dsl,Total: With An Internet Subscription Satellite Internet Service,Total: With An Internet Subscription Other Service,geometry
0,17031010100,"Census Tract 101, Cook County, Illinois",1952,17,1935,1761,225,1710,174,62,...,17,1870,225,299,2411,17,1710,62,0,"POLYGON ((-87.676277 42.022965, -87.675968 42...."
1,17031010201,"Census Tract 102.01, Cook County, Illinois",2543,0,2543,2231,266,2234,312,128,...,0,2468,258,270,2996,0,2234,128,31,"POLYGON ((-87.684317 42.019481, -87.683901 42...."


In [11]:
#Adding Column that calculates (Total: With an Internet Subscription) - (No Internet Access)/(Total: With an Internet Subscription)
raw_internet_demos['Percentage of People with Internet Access'] = ((raw_internet_demos['Total: With An Internet Subscription'] - (raw_internet_demos['Total: No Internet Access']))/ (raw_internet_demos['Total: With An Internet Subscription']))*100
raw_internet_demos


Unnamed: 0,geoid,qualifying name,Total: With An Internet Subscription,Total: With An Internet Subscription Dial-Up With No Other Type Of Internet Subscription,Total: With An Internet Subscription Broadband Of Any Type,Total: With An Internet Subscription Cellular Data Plan,Total: With An Internet Subscription Cellular Data Plan Cellular Data Plan With No Other Type Of Internet Subscription,Total: Broadband Such As Cable Fiber Optic Or Dsl,Total: Broadband Such As Cable Fiber Optic Or Dsl Broadband Such As Cable Fiber Optic Or Dsl With No Other Type Of Internet Subscription,Total: Satellite Internet Service,...,Total: Has A Computer: With A Broadband Internet Subscription,Total: Has A Computer: Without An Internet Subscription,Total: No Computer,Total,Total: With An Internet Subscription Dial-Up Alone,Total: With An Internet Subscription Broadband Such As Cable Fiber Optic Or Dsl,Total: With An Internet Subscription Satellite Internet Service,Total: With An Internet Subscription Other Service,geometry,Percentage of People with Internet Access
0,17031010100,"Census Tract 101, Cook County, Illinois",1952,17,1935,1761,225,1710,174,62,...,1870,225,299,2411,17,1710,62,0,"POLYGON ((-87.676277 42.022965, -87.675968 42....",81.096311
1,17031010201,"Census Tract 102.01, Cook County, Illinois",2543,0,2543,2231,266,2234,312,128,...,2468,258,270,2996,0,2234,128,31,"POLYGON ((-87.684317 42.019481, -87.683901 42....",88.596146
2,17031010202,"Census Tract 102.02, Cook County, Illinois",1040,0,1040,862,151,852,161,56,...,1022,137,75,1234,0,852,56,0,"POLYGON ((-87.676745 42.01941, -87.67651175102...",87.019231
3,17031010300,"Census Tract 103, Cook County, Illinois",2548,0,2548,2320,352,2112,185,183,...,2508,327,160,2995,0,2112,183,100,"POLYGON ((-87.671212 42.019373, -87.670858 42....",85.596546
4,17031010400,"Census Tract 104, Cook County, Illinois",1487,0,1487,1423,216,1238,64,47,...,1487,187,360,2034,0,1238,47,0,"POLYGON ((-87.663213 42.012834, -87.662494 42....",69.872226
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
861,17031844600,"Census Tract 8446, Cook County, Illinois",742,0,742,722,182,512,0,51,...,722,19,68,809,0,512,51,40,"POLYGON ((-87.628928 41.816453, -87.628494 41....",90.97035
862,17031844700,"Census Tract 8447, Cook County, Illinois",583,0,583,521,166,417,62,16,...,583,22,48,653,0,417,16,0,"POLYGON ((-87.719891 41.852741, -87.71989 41.8...",87.993139
866,17043840000,"Census Tract 8400, DuPage County, Illinois",818,6,812,778,81,681,16,134,...,812,53,72,937,6,681,134,0,"POLYGON ((-87.938967 41.993515, -87.938927 41....",85.452323
867,17043840801,"Census Tract 8408.01, DuPage County, Illinois",642,0,642,566,47,575,71,34,...,620,38,298,956,0,575,34,5,"MULTIPOLYGON (((-87.939058 41.958485, -87.9390...",54.82866
