In [1]:
import numpy as np
import pandas as pd
import urllib.request
from bs4 import BeautifulSoup

In [2]:
#loading in data about NJ's counties - name, population density, and total population
url = "https://en.wikipedia.org/wiki/List_of_counties_in_New_Jersey"
html = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html)

tablerows = soup.find( "table", {"class":"wikitable sortable"} ).findAll("tr")
'''
for row in tablerows[1:]:
    print(len(row.findAll("td")))

'''
njcounties = []

for row in tablerows[1:]:
    countyname = row.find("th").find("a").string
    popdensity = float((row.findAll("td")[6].string).replace('\n','').replace(',',''))
    population = row.find("span").string.replace(',','')
    njcounties.append((countyname, popdensity, population))

njdf = pd.DataFrame(njcounties)

In [3]:
#loading in data about NJ's COVID-19 Cases - total cases, deaths, and recoveries
url = "https://en.wikipedia.org/wiki/2020_coronavirus_pandemic_in_New_Jersey"
html = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html)

tablerows = soup.find( "table", {"class":"wikitable sortable"} ).findAll("tr")

covidbycounty = []
for row in tablerows[2:-2]:
    alltds = row.findAll("td")
    totalcovidcases = int(alltds[1].string.replace('\n','').replace(',',''))
    totaldeaths = int(alltds[2].string.replace('\n','').replace(',',''))
    totalrecovered = int(alltds[3].string.replace('\n','').replace(',',''))
    covidbycounty.append((totalcovidcases, totaldeaths, totalrecovered))

covidbycounty = pd.DataFrame(covidbycounty)

In [4]:
#loading in data about NJ's Median Household Income by county
url = "https://en.wikipedia.org/wiki/List_of_New_Jersey_locations_by_per_capita_income"
html = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html)

tablerows = soup.find( "table", {"class":"wikitable sortable"} ).findAll("tr")

listoftups = []
for row in tablerows[1:]:
    countyname = row.findAll('td')[1].find('a').string
    if (countyname == 'New Jersey' or countyname == 'United States'):
        continue
    medianhousehold = row.findAll("td")[3].string.replace(',','').replace('\n','').replace('$','')
    listoftups.append((countyname, medianhousehold))
    
sorted_by_alpha = sorted(listoftups, key=lambda tup: tup[0])
byincome = pd.DataFrame(sorted_by_alpha)

In [5]:
#loading in data about NJ county voting lines - who voted blue vs red
url = "https://www.nytimes.com/elections/2016/results/new-jersey"
html = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html)

#print(html)

nonhidden = soup.find( "div", {"class":"eln-county-table-container"} ).findAll("tr", {"class":"eln-row"})
votinglines = []
for row in nonhidden:
    countyname = row.find("td", {"class":"eln-cell eln-name"}).string
    demvotercount = row.find("td", {"class":"eln-cell eln-candidate"})
    if (demvotercount.find("div", {"class":"eln-vote-count eln-swatch-light eln-democrat"})):
        demvotercount = demvotercount.find("div", {"class":"eln-vote-count eln-swatch-light eln-democrat"}).string
    else:
        demvotercount = demvotercount.find("div", {"class":"eln-vote-count"}).string
    demvotercount = int(demvotercount.replace('\n','').replace(',',''))
    
    repvotercount = row.find("td", {"class":"eln-cell eln-candidate eln-last-candidate"})
    if (repvotercount.find("div", {"class":"eln-vote-count eln-swatch-light eln-republican"})):
        repvotercount = repvotercount.find("div", {"class":"eln-vote-count eln-swatch-light eln-republican"}).string
    else:
        repvotercount = repvotercount.find("div", {"class":"eln-vote-count"}).string
    repvotercount = int(repvotercount.replace('\n','').replace(',',''))
    votinglines.append((countyname, demvotercount, repvotercount))

sort_by_alpha = sorted(votinglines, key=lambda tup: tup[0])
byvotinglines = pd.DataFrame(sort_by_alpha)

In [6]:
#Concatenating all dataframes that were created and labeling columns
njdf = pd.concat([njdf, covidbycounty, byincome.loc[:,'1':'1'], byvotinglines.loc[:,'1':'2']], axis = 1)
njdf.columns = ['CountyName','popDensity',
                          'TotalPopulation','totalcovidcases',
                          'totaldeaths','totalrecovered', 'medianincome', 'votedblue', 'votedred']
print(njdf)

           CountyName  popDensity TotalPopulation  totalcovidcases  \
0     Atlantic County      489.39          265429              211   
1       Bergen County     3868.02          936692             8928   
2   Burlington County      557.43          445384              954   
3       Camden County     2313.77          507078             1072   
4     Cape May County      381.43           92560              116   
5   Cumberland County      320.85          150972              138   
6        Essex County     6221.98          799767             6580   
7   Gloucester County      887.04          291408              447   
8       Hudson County    13495.02          676061             6411   
9    Hunterdon County      298.49          124714              293   
10      Mercer County     1621.74          369811             1282   
11   Middlesex County     2604.05          829685             5060   
12    Monmouth County     1335.55          621354             3496   
13      Morris Count