# Electoral College Data Mapping

The purpose of this project is to analyze the how the electoral college votes were distributed after the 2010 census, look at how the redistribution of population (by estimate) has shifted over time until today, and what that means for the voter per electoral college vote in each state. I will also analyze how we expect the electoral college to be redistributed after the 2020 census, given census bureau predictions.

An analysis will also be performed on the percentage likelihood of each state giving its electoral college votes to a particular party and their respective nominees, based purely on historical data. The aim is to demonstrate which states have the greatest power per vote, given both their current electoral votes allotted and the likelihood of that state assigning thier votes to either candidate.

### Things to get

- Current apportionment of house seats
- formula for apportionment of 2019 numbers
- understand priority number creation


In [16]:
#import necessary libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import requests
import json

import sqlite3
%matplotlib inline

In [2]:
#define how to retrieve api keys

def get_keys(path):
    with open(path) as f:
        return json.load(f)

In [3]:
#get key for census bureau api
keys = get_keys("/Users/flatironschool/.secret/census_api.json")

api_key = keys['api_key']

In [5]:
#make and print request for census count and estimates from 2010-2019 for all states
year = '2019'

url = 'https://api.census.gov/data/{}/pep/population'.format(year)

variables = ['DATE_CODE',
             'DATE_DESC',
             'POP',
             'NAME']

granularity = 'state:*'

params = {'get': ','.join(variables), 'for': granularity, 'key': api_key}

r = requests.get(url, params=params)
print(r.url)
print(r)
print(type(r.text))
print(r.text[:1000])

https://api.census.gov/data/2019/pep/population?get=DATE_CODE%2CDATE_DESC%2CPOP%2CNAME&for=state%3A%2A&key=b7961d22ec04ff1777be8a0450921d3f28af8315
<Response [200]>
<class 'str'>
[["DATE_CODE","DATE_DESC","POP","NAME","state"],
["1","4/1/2010 Census population","5303925","Minnesota","27"],
["2","4/1/2010 population estimates base","5303927","Minnesota","27"],
["3","7/1/2010 population estimate","5310828","Minnesota","27"],
["4","7/1/2011 population estimate","5346143","Minnesota","27"],
["5","7/1/2012 population estimate","5376643","Minnesota","27"],
["6","7/1/2013 population estimate","5413479","Minnesota","27"],
["7","7/1/2014 population estimate","5451079","Minnesota","27"],
["8","7/1/2015 population estimate","5482032","Minnesota","27"],
["9","7/1/2016 population estimate","5522744","Minnesota","27"],
["10","7/1/2017 population estimate","5566230","Minnesota","27"],
["11","7/1/2018 population estimate","5606249","Minnesota","27"],
["12","7/1/2019 population estimate","5639632","Min

In [115]:
#clean data (only need population, state, and datetime) with pandas before putting into SQL database
data = r.json()
main_df = pd.DataFrame(data[1:], columns=data[0])
main_df['YEAR'] = main_df.DATE_DESC.apply(lambda x: x[4:8])
main_df.drop(main_df[(main_df.DATE_CODE == '2') | (main_df.DATE_CODE == '3')].index, inplace=True)
main_df.drop(['state','DATE_CODE', 'DATE_DESC'], axis=1, inplace=True)
main_df.reset_index(drop=True, inplace=True)
main_df['POP'] = main_df.POP.astype('int64')
main_df = main_df[['YEAR', 'NAME', 'POP']]
main_df.columns = ['Year', 'State', 'Population']

display(main_df.head(15))
display(main_df.info())

Unnamed: 0,Year,State,Population
0,2010,Minnesota,5303925
1,2011,Minnesota,5346143
2,2012,Minnesota,5376643
3,2013,Minnesota,5413479
4,2014,Minnesota,5451079
5,2015,Minnesota,5482032
6,2016,Minnesota,5522744
7,2017,Minnesota,5566230
8,2018,Minnesota,5606249
9,2019,Minnesota,5639632


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 520 entries, 0 to 519
Data columns (total 3 columns):
Year          520 non-null object
State         520 non-null object
Population    520 non-null int64
dtypes: int64(1), object(2)
memory usage: 12.3+ KB


None

In [116]:
overseas_df = pd.read_excel('Overseas Population 2010.xls', skiprows=7)
overseas_df.dropna(inplace=True)
overseas_df.columns = ['State', 'Overseas_pop']
overseas_df['Year'] = '2010'
overseas_df['Overseas_pop'] = overseas_df['Overseas_pop'].astype('int64')
overseas_df.reset_index(drop=True, inplace=True)

display(overseas_df.head())
display(overseas_df.info())

Unnamed: 0,State,Overseas_pop,Year
0,Alabama,23246,2010
1,Alaska,11292,2010
2,Arizona,20683,2010
3,Arkansas,10311,2010
4,California,88033,2010


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 3 columns):
State           52 non-null object
Overseas_pop    52 non-null int64
Year            52 non-null object
dtypes: int64(1), object(2)
memory usage: 1.3+ KB


None

In [160]:
#merge the overseas data with the main population estimates table and extrapolate overseas estimates based on 
#percentage change in the population year over year

#merge the population estimates with overseas data on State and Year
merged_df = main_df.merge(overseas_df, how='left', on=['State','Year'])

#create our Percent change in population column
merged_df['Percent_change'] = (merged_df.Population - merged_df.Population.shift(1)) / merged_df.Population.shift(1)

#iterate through the missing data in overseas population, calculating the next missing value from the previous known
#(from the 2010 census) or from the last calculated value based on percentage change in that state
for i in range(len(merged_df)):
    if pd.isna(merged_df.iloc[i]['Overseas_pop']):
        merged_df.at[i,'Overseas_pop'] = round(merged_df.iloc[i]['Percent_change'] * \
                                               merged_df.iloc[i-1]['Overseas_pop'] + \
                                               merged_df.iloc[i-1]['Overseas_pop'], 0)
        
merged_df.drop('Percent_change', axis=1, inplace=True)        
merged_df['Overseas_pop'] = merged_df.Overseas_pop.astype('int64')

display(merged_df.head(30))
display(merged_df.info())

Unnamed: 0,Year,State,Population,Overseas_pop
0,2010,Minnesota,5303925,10954
1,2011,Minnesota,5346143,11041
2,2012,Minnesota,5376643,11104
3,2013,Minnesota,5413479,11180
4,2014,Minnesota,5451079,11258
5,2015,Minnesota,5482032,11322
6,2016,Minnesota,5522744,11406
7,2017,Minnesota,5566230,11496
8,2018,Minnesota,5606249,11579
9,2019,Minnesota,5639632,11648


<class 'pandas.core.frame.DataFrame'>
Int64Index: 520 entries, 0 to 519
Data columns (total 4 columns):
Year            520 non-null object
State           520 non-null object
Population      520 non-null int64
Overseas_pop    520 non-null int64
dtypes: int64(2), object(2)
memory usage: 40.3+ KB


None

In [81]:
#put pop data into SQL database for easy retrieval
conn = sqlite3.connect('census_pop_data.db')
c = conn.cursor()

In [82]:
try:
    c.execute('CREATE TABLE POPULATION (Year text, State text, Population integer)')
    conn.commit()
    print('Population Table created (Year, State, Population)')
except:
    c.execute('DROP TABLE POPULATION')
    print('Population table dropped')
    c.execute('CREATE TABLE POPULATION (Year text, State text, Population integer)')
    conn.commit()
    print('Population table created (Year, State, Population)')

Population table dropped
Population Table created (Year, State, Population)


In [83]:
main_df.to_sql('POPULATION', conn, if_exists='replace')

In [84]:
c.execute('SELECT * FROM population')
for row in c.fetchall():
    print(row)

(0, '4/1/2010', 'Minnesota', 5303925)
(1, '4/1/2010', 'Minnesota', 5303927)
(2, '7/1/2010', 'Minnesota', 5310828)
(3, '7/1/2011', 'Minnesota', 5346143)
(4, '7/1/2012', 'Minnesota', 5376643)
(5, '7/1/2013', 'Minnesota', 5413479)
(6, '7/1/2014', 'Minnesota', 5451079)
(7, '7/1/2015', 'Minnesota', 5482032)
(8, '7/1/2016', 'Minnesota', 5522744)
(9, '7/1/2017', 'Minnesota', 5566230)
(10, '7/1/2018', 'Minnesota', 5606249)
(11, '7/1/2019', 'Minnesota', 5639632)
(12, '4/1/2010', 'Mississippi', 2967297)
(13, '4/1/2010', 'Mississippi', 2968130)
(14, '7/1/2010', 'Mississippi', 2970548)
(15, '7/1/2011', 'Mississippi', 2978731)
(16, '7/1/2012', 'Mississippi', 2983816)
(17, '7/1/2013', 'Mississippi', 2988711)
(18, '7/1/2014', 'Mississippi', 2990468)
(19, '7/1/2015', 'Mississippi', 2988471)
(20, '7/1/2016', 'Mississippi', 2987938)
(21, '7/1/2017', 'Mississippi', 2988510)
(22, '7/1/2018', 'Mississippi', 2981020)
(23, '7/1/2019', 'Mississippi', 2976149)
(24, '4/1/2010', 'Missouri', 5988927)
(25, '4/1/20

In [85]:
c.execute('SELECT * FROM population WHERE name = "California"')
for row in c.fetchall():
    print(row)

(396, '4/1/2010', 'California', 37253956)
(397, '4/1/2010', 'California', 37254519)
(398, '7/1/2010', 'California', 37319502)
(399, '7/1/2011', 'California', 37638369)
(400, '7/1/2012', 'California', 37948800)
(401, '7/1/2013', 'California', 38260787)
(402, '7/1/2014', 'California', 38596972)
(403, '7/1/2015', 'California', 38918045)
(404, '7/1/2016', 'California', 39167117)
(405, '7/1/2017', 'California', 39358497)
(406, '7/1/2018', 'California', 39461588)
(407, '7/1/2019', 'California', 39512223)


In [77]:
c.close()
conn.close()

In [93]:
def reciprocal_geometric_mean(next_house_seat):
    return 1 / np.sqrt(next_house_seat*(next_house_seat-1))

In [100]:
def priority_value(state_pop, next_house_seat):
    return int(round(state_pop * reciprocal_geometric_mean(next_house_seat), 0))

Population count is population from main census plus overseas population for each state

In [101]:
priority_value(37341989, 2)

26404774

In [96]:
88033 + 37253956

37341989

# Working Zone