In [1]:
# %matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import folium
import json
import os
from branca.colormap import linear
import branca.colormap

In [2]:
# US States GeoJSON file
us_states = os.path.join('us-states.json')
geo_json_data = json.load(open(us_states))

#Valid States 
valid_states = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI',
       'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI',
       'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC',
       'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT',
       'VT', 'VA', 'WA', 'WV', 'WI', 'WY']

In [3]:
# Donation Data
all_donations = pd.read_csv("1980 DONOR DATA.csv")

In [7]:
cleaned = all_donations[all_donations['STATE'].isin(valid_states)]

In [8]:
# Number of Political Donations of Individuals Per State in 1980
num_donations = cleaned.groupby("STATE").count().rename(columns={"SUB_ID":"Count"})[["Count"]]
donation_dict = num_donations.to_dict().get('Count')

In [9]:
# Building Base Map
m = folium.Map([43, -100], zoom_start=4)

folium.GeoJson(
    geo_json_data,
    style_function=lambda feature: {
        'fillColor': 'white',
        'color': 'black',
        'weight': 2,
        'dashArray': '5, 5'
    }
).add_to(m)

<folium.features.GeoJson at 0x140c9f70>

In [10]:
colormap = linear.OrRd_03.scale(
    num_donations.Count.min(),
    num_donations.Count.max())

colormap

In [11]:
# Geographic Distribution of Donations 
m = folium.Map([43, -100], zoom_start=4)

folium.GeoJson(
    geo_json_data,
    name='donations',
    style_function=lambda feature: {
        'fillColor': colormap(donation_dict[feature['id']]),
        'color': 'black',
        'weight': 1,
        'dashArray': '5, 5',
        'fillOpacity': 0.9,
    }
).add_to(m)

folium.LayerControl().add_to(m)

m

It makes sense that the states with largest populations (i.e. California, New York, Texas) have the most individual political donations. Lets control for that by taking finding the number of poltiical donations per capita. In order to find this per capita metric, we will divide the total number of political donations by the population of each state according to the 1980 US Census.

In [12]:
from bs4 import BeautifulSoup
import requests
from pandas.io.html import read_html

In [13]:
# Webscraping 1980 Census info from Wikipedia
url = "https://en.wikipedia.org/wiki/1980_United_States_Census"
wikitable = read_html(url, attrs={'class' : 'wikitable'})
census1980 = wikitable[0]

In [16]:
years = ["1980", "1984", "1988",
        "1992", "1996",
        "2000","2004", "2008",
        "2012" ,"2016"]

abbrev_dict = {'WA': 'WASHINGTON', 'VA': 'VIRGINIA', 'DE': 'DELAWARE', 'WI': 'WISCONSIN', 
     'WV': 'WEST VIRGINIA', 'HI': 'HAWAII', 'FL': 'FLORIDA',  'WY': 'WYOMING', 
     'NH': 'NEW HAMPSHIRE', 'NJ': 'NEW JERSEY', 'NM': 'NEW MEXICO', 'TX': 'TEXAS', 'LA': 'LOUISIANA', 'NC': 'NORTH CAROLINA',
     'ND': 'NORTH DAKOTA', 'NE': 'NEBRASKA', 'TN': 'TENNESSEE', 'NY': 'NEW YORK', 'PA': 'PENNSYLVANIA', 'CA': 'CALIFORNIA', 
     'NV': 'NEVADA', 'CO': 'COLORADO','AK': 'ALASKA', 'AL': 'ALABAMA', 
    'AR': 'ARKANSAS', 'VT': 'VERMONT', 'IL': 'ILLINOIS', 'GA': 'GEORGIA', 'IN': 'INDIANA', 'IA': 'IOWA', 
     'OK': 'OKLAHOMA', 'AZ': 'ARIZONA', 'ID': 'IDAHO', 'CT': 'CONNECTICUT', 'ME': 'MAINE', 'MD': 'MARYLAND', 'MA': 'MASSACHUSETTS',
     'OH': 'OHIO', 'UT': 'UTAH', 'MO': 'MISSOURI', 'MN': 'MINNESOTA', 'MI': 'MICHIGAN',
     'RI': 'RHODE ISLAND', 'KS': 'KANSAS', 'MT': 'MONTANA', 'MS': 'MISSISSIPPI', 
    'SC': 'SOUTH CAROLINA', 'KY': 'KENTUCKY', 'OR': 'OREGON', 'SD': 'SOUTH DAKOTA'}
abbrev_dict = {value:key for key, value in abbrev_dict.items()}

# Use 1980's Census data for (1980, 1984, 1988)
url = "https://en.wikipedia.org/wiki/1980_United_States_Census"
wikitable = read_html(url, attrs={'class' : 'wikitable'})
census1980 = wikitable[0]
census1980['State'] = census1980['State'].str.upper()
census1980['State'] = census1980['State'].map(abbrev_dict).fillna(census1980['State'])
census1980 = census1980[["State","Population as of1980 Census"]].drop([46]).set_index('State')
# Use 1990's Census data for (1992, 1996)
url1990 = "https://en.wikipedia.org/wiki/1990_United_States_Census"
wikitable1990 = read_html(url1990)
census1990 = wikitable1990[1]
census1990['State'] = census1990['State'].str.upper()
census1990['State'] = census1990['State'].map(abbrev_dict).fillna(census1990['State'])
census1990 = census1990[census1990["State"].isin(valid_states)]
census1990 = census1990[["State","Population as of1990 Census"]].set_index('State')
# Use 2000's Census data for (2000, 2004, 2008)
url2000 = "https://en.wikipedia.org/wiki/2000_United_States_Census"
wikitable2000 = read_html(url2000)
census2000 = wikitable2000[1]
census2000['State'] = census2000['State'].str.upper()
census2000['State'] = census2000['State'].map(abbrev_dict).fillna(census2000['State'])

census2000 = census2000[census2000["State"].isin(valid_states)]
census2000 = census2000[["State","Population as of2000 Census[4]"]].set_index('State')
# Use 2010's Census data for (2012, 2016)
url2010 = "https://en.wikipedia.org/wiki/2010_United_States_Census"
wikitable2010 = read_html(url2010)
census2010 = wikitable2010[2]
census2010['State'] = census2010['State'].str.upper()
census2010['State'] = census2010['State'].map(abbrev_dict).fillna(census2010['State'])
census2010 = census2010[census2010["State"].isin(valid_states)]
census2010 = census2010[["State","Population as of2010 Census[50]"]].set_index('State')

In [17]:
def per_capita_donations_map(year):

    # Pull Donor Data CSV's and clean for donations per state
    all_donations = pd.read_csv(str(year)+" DONOR DATA.csv", encoding='windows-1252')
    cleaned = all_donations[all_donations['STATE'].isin(valid_states)]
    num_donations = cleaned.groupby("STATE").count().rename(columns={"SUB_ID":"Count"})[["Count"]]

    # Apply webscrapped Census data for populations

    # Used to clean the Census population data 
    abbrev_dict = {'WA': 'WASHINGTON', 'VA': 'VIRGINIA', 'DE': 'DELAWARE', 'WI': 'WISCONSIN', 
     'WV': 'WEST VIRGINIA', 'HI': 'HAWAII', 'FL': 'FLORIDA',  'WY': 'WYOMING', 
     'NH': 'NEW HAMPSHIRE', 'NJ': 'NEW JERSEY', 'NM': 'NEW MEXICO', 'TX': 'TEXAS', 'LA': 'LOUISIANA', 'NC': 'NORTH CAROLINA',
     'ND': 'NORTH DAKOTA', 'NE': 'NEBRASKA', 'TN': 'TENNESSEE', 'NY': 'NEW YORK', 'PA': 'PENNSYLVANIA', 'CA': 'CALIFORNIA', 
     'NV': 'NEVADA', 'CO': 'COLORADO','AK': 'ALASKA', 'AL': 'ALABAMA', 
    'AR': 'ARKANSAS', 'VT': 'VERMONT', 'IL': 'ILLINOIS', 'GA': 'GEORGIA', 'IN': 'INDIANA', 'IA': 'IOWA', 
     'OK': 'OKLAHOMA', 'AZ': 'ARIZONA', 'ID': 'IDAHO', 'CT': 'CONNECTICUT', 'ME': 'MAINE', 'MD': 'MARYLAND', 'MA': 'MASSACHUSETTS',
     'OH': 'OHIO', 'UT': 'UTAH', 'MO': 'MISSOURI', 'MN': 'MINNESOTA', 'MI': 'MICHIGAN',
     'RI': 'RHODE ISLAND', 'KS': 'KANSAS', 'MT': 'MONTANA', 'MS': 'MISSISSIPPI', 
    'SC': 'SOUTH CAROLINA', 'KY': 'KENTUCKY', 'OR': 'OREGON', 'SD': 'SOUTH DAKOTA'}
    abbrev_dict = {value:key for key, value in abbrev_dict.items()}

    # Creating columns of Political Donations per Capita (by States) and dictionary

    if 1980 <= year <= 1990:
          
        per_cap_df = census1980.join(num_donations , how="left")[["Population as of1980 Census","Count"]]
        per_cap_df["Donations_Per_Capita"] = per_cap_df["Count"] / per_cap_df["Population as of1980 Census"]
        percap_dict = per_cap_df.to_dict().get('Donations_Per_Capita')
    
    if 1990 <= year <= 2000: 
    
        per_cap_df = num_donations.join( census1990, how="left")[["Population as of1990 Census","Count"]]
        per_cap_df["Donations_Per_Capita"] = per_cap_df["Count"] / per_cap_df["Population as of1990 Census"]
        percap_dict = per_cap_df.to_dict().get('Donations_Per_Capita')
    
    if 2000 <= year <= 2010:
        
        per_cap_df = census2000.join(num_donations , how="left")[["Population as of2000 Census[4]","Count"]]
        per_cap_df["Donations_Per_Capita"] = per_cap_df["Count"] / per_cap_df["Population as of2000 Census[4]"]
        percap_dict = per_cap_df.to_dict().get('Donations_Per_Capita')
    if 2010 <= year <= 2020:
    
        per_cap_df = census2010.join(num_donations , how="left")[["Population as of2010 Census[50]","Count"]]
        per_cap_df["Donations_Per_Capita"] = per_cap_df["Count"] / per_cap_df["Population as of2010 Census[50]"]
        percap_dict = per_cap_df.to_dict().get('Donations_Per_Capita')

    # Build Color Scale for Per Capita Donations
    colormap = linear.PuBu_06.scale(
        per_cap_df.Donations_Per_Capita.min(),
        per_cap_df.Donations_Per_Capita.max())
    # Geographic Distribution of Donations 
    m = folium.Map([43, -100], zoom_start=4)

    folium.GeoJson(
        geo_json_data,
        name='donations',
        style_function=lambda feature: {
            'fillColor': colormap(percap_dict[feature['id']]),
            'color': 'black',
            'weight': 1,
            'dashArray': '5, 5',
            'fillOpacity': 0.9,
        }
    ).add_to(m)

    folium.LayerControl().add_to(m)

    return m   

In [18]:
per_capita_donations_map(2004)

In [113]:
all_donations = pd.read_csv("2012 DONOR DATA.csv", encoding='windows-1252')
cleaned = all_donations[all_donations['STATE'].isin(valid_states)]
num_donations = cleaned.groupby("STATE").count().rename(columns={"SUB_ID":"Count"})[["Count"]]

In [86]:
# Build Color Scale for Per Capita Donations
colormap = linear.PuBu_06.scale(
    per_cap_df.Donations_Per_Capita.min(),
    per_cap_df.Donations_Per_Capita.max())
# Geographic Distribution of Donations 
m = folium.Map([43, -100], zoom_start=4)

folium.GeoJson(
        geo_json_data,
        name='donations',
        style_function=lambda feature: {
            'fillColor': colormap(percap_dict[feature['id']]),
            'color': 'black',
            'weight': 1,
            'dashArray': '5, 5',
            'fillOpacity': 0.9,
        }
    ).add_to(m)
folium.LayerControl().add_to(m)

m

In [34]:
# Creating columns of Political Donations per Capita (by States)
per_cap_df = census1980.join(num_donations , how="left")[["Population as of1980 Census","Count"]]
per_cap_df["Donations_Per_Capita"] = per_cap_df["Count"] / per_cap_df["Population as of1980 Census"]
percap_dict = per_cap_df.to_dict().get('Donations_Per_Capita')

In [35]:
per_cap_df[["Donations_Per_Capita"]].sort_values("Donations_Per_Capita", ascending=False).head(5)

Unnamed: 0_level_0,Donations_Per_Capita
State,Unnamed: 1_level_1
NV,0.002833
CT,0.002704
AK,0.002496
TX,0.00245
OK,0.002317


In [36]:
# Build Color Scale for Per Capita Donations
colormap = linear.PuBu_06.scale(
    per_cap_df.Donations_Per_Capita.min(),
    per_cap_df.Donations_Per_Capita.max())

colormap

In [37]:
# Geographic Distribution of Donations 
m = folium.Map([43, -100], zoom_start=4)

folium.GeoJson(
    geo_json_data,
    name='donations',
    style_function=lambda feature: {
        'fillColor': colormap(percap_dict[feature['id']]),
        'color': 'black',
        'weight': 1,
        'dashArray': '5, 5',
        'fillOpacity': 0.9,
    }
).add_to(m)

folium.LayerControl().add_to(m)

m

In [61]:
cleaned["TRANSACTION_AMT"] = pd.to_numeric(cleaned["TRANSACTION_AMT"])
avg_donation_amount = cleaned[["STATE","TRANSACTION_AMT"]].groupby("STATE").mean()[["TRANSACTION_AMT"]]
avg_don_dict = avg_donation_amount.to_dict().get("TRANSACTION_AMT")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned["TRANSACTION_AMT"] = pd.to_numeric(cleaned["TRANSACTION_AMT"])


In [63]:
avg_donation_amount.sort_values("TRANSACTION_AMT", ascending=False).head(5)

Unnamed: 0_level_0,TRANSACTION_AMT
STATE,Unnamed: 1_level_1
DE,845.181818
NY,727.465075
OK,720.093451
LA,711.524288
TX,701.985256


In [64]:
colormap = linear.BuGn_03.scale(
    avg_donation_amount.TRANSACTION_AMT.min(),
    avg_donation_amount.TRANSACTION_AMT.max())

colormap

In [65]:
# Geographic Distribution of Donations 
m = folium.Map([43, -100], zoom_start=4)

folium.GeoJson(
    geo_json_data,
    name='avg donations',
    style_function=lambda feature: {
        'fillColor': colormap(avg_don_dict[feature['id']]),
        'color': 'black',
        'weight': 1,
        'dashArray': '5, 5',
        'fillOpacity': 0.9,
    }
).add_to(m)

folium.LayerControl().add_to(m)

m

In [None]:
# Use median to control for Nevada and Arkansas
# Breakdown for SuperPAC data