<div align="right"><i>Peter Norvig<br>12 August 2019</i></div>

# Data and Code for [Tracking Trump: Electoral Votes Edition](Electoral%20Votes.ipynb)

First fetch the state-by-state, month-by-month approval data from the **[Tracking Trump](https://morningconsult.com/tracking-trump/)** web page at *Morning Consult*
  and cache it locally: 

In [1]:
! curl -s -o evs.html https://morningconsult.com/tracking-trump-2/

Now some imports: 

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import re
import ast
from collections import namedtuple
from IPython.display import display, Markdown
from statistics import stdev

Additional data: the variable `state_data` contains the [electoral votes by state](https://www.britannica.com/topic/United-States-Electoral-College-Votes-by-State-1787124) and the [partisan lean by state](https://github.com/fivethirtyeight/data/tree/master/partisan-lean) (how much more Republican (plus) or Democratic (minus) leaning the state is compared to the country as a whole, across  recent elections). The variable `net_usa` has the [country-wide net presidential approval](https://projects.fivethirtyeight.com/trump-approval-ratings/) by month.

In [3]:
# From https://github.com/fivethirtyeight/data/tree/master/partisan+lean
# a dict of {"state name": (electoral_votes, partisan_lean)}
state_data = {        
  "Alabama": (9, +27),         "Alaska": (3, +15),          "Arizona": (11, +9),         
  "Arkansas": (6, +24),        "California": (55, -24),     "Colorado": (9, -1),          
  "Connecticut": (7, -11),     "Delaware": (3, -14),        "District of Columbia": (3, -43),
  "Florida": (29, +5),         "Georgia": (16, +12),        "Hawaii": (4, -36),           
  "Idaho": (4, +35),           "Illinois": (20, -13),       "Indiana": (11, +18),        
  "Iowa": (6, +6),             "Kansas": (6, +23),          "Kentucky": (8, +23),        
  "Louisiana": (8, +17),       "Maine": (4, -5),            "Maryland": (10, -23),        
  "Massachusetts": (11, -29),  "Michigan": (16, -1),        "Minnesota": (10, -2),        
  "Mississippi": (6, +15),     "Missouri": (10, +19),       "Montana": (3, +18),         
  "Nebraska": (5, +24),        "Nevada": (6, +1),           "New Hampshire": (4, +2),    
  "New Jersey": (14, -13),     "New Mexico": (5, -7),       "New York": (29, -22),        
  "North Carolina": (15, +5),  "North Dakota": (3, +33),    "Ohio": (18, +7),            
  "Oklahoma": (7, +34),        "Oregon": (7, -9),           "Pennsylvania": (20, +1),    
  "Rhode Island": (4, -26),    "South Carolina": (9, +17),  "South Dakota": (3, +31),    
  "Tennessee": (11, +28),      "Texas": (38, +17),          "Utah": (6, +31),            
  "Vermont": (3, -24),         "Virginia": (13, 0),         "Washington": (12, -12),      
  "West Virginia": (5, +30),   "Wisconsin": (10, +1),       "Wyoming": (3, +47)}

# From https://projects.fivethirtyeight.com/trump-approval-ratings/
# A dict of {'date': country-wide-net-approval}
net_usa = {        
 '1-Jan-17': +10,  
 '1-Feb-17':   0, '1-Mar-17':  -6, '1-Apr-17': -13, '1-May-17': -11,
 '1-Jun-17': -16, '1-Jul-17': -15, '1-Aug-17': -19, '1-Sep-17': -20,
 '1-Oct-17': -17, '1-Nov-17': -19, '1-Dec-17': -18, '1-Jan-18': -18,
 '1-Feb-18': -15, '1-Mar-18': -14, '1-Apr-18': -13, '1-May-18': -12,
 '1-Jun-18': -11, '1-Jul-18': -10, '1-Aug-18': -12, '1-Sep-18': -14,
 '1-Oct-18': -11, '1-Nov-18': -11, '1-Dec-18': -10, '1-Jan-19': -12,
 '1-Feb-19': -16, '1-Mar-19': -11, '1-Apr-19': -11, '1-May-19': -12,
 '1-Jun-19': -12, '1-Jul-19': -11, '1-Aug-19': -10, '1-Sep-19': -13,
 '1-Oct-19': -13}


Now the code to parse and manipulate the data:

In [4]:
class State(namedtuple('_', 'name, ev, lean, approvals, disapprovals')):
    '''A State has a name, the number of electoral votes, the partisan lean,
    and two dicts of {date: percent}: approvals and disapprovals'''

def parse_page(filename='evs.html', data=state_data):
    "Read data from the file and return (list of dates, list of `State`s, last date)."
    # File format: Date headers, then [state, approval, disapproval ...]
    # [["Demographic","1-Jan-17","","1-Feb-17","", ... "1-Apr-19",""],
    #  ["Alabama","62","26","65","29", ... "61","35"], ... ] =>
    # State("Alabama", 9, +27, approvals={"1-Jan-17": 62, ...}, 
    #                          disapprovals={"1-Jan-17": 26, ...}), ...
    text = re.findall(r'\[\[.*?\]\]', open(filename).read())[0]
    header, *table  = ast.literal_eval(text)
    dates = header[1::2] # Every other header entry is a date
    states = [State(name, *data[name],
                    approvals=dict(zip(dates, map(int, numbers[0::2]))),
                    disapprovals=dict(zip(dates, map(int, numbers[1::2]))))
              for (name, *numbers) in table]
    return states, dates, dates[-1]

states, dates, now = parse_page()

def EV(states, date=now, swing=0) -> int:
    "Total electoral votes with net positive approval (plus half the votes for net zero)."
    return sum(s.ev * (1/2 if net(s, date) + swing == 0 else int(net(s, date) + swing > 0))
               for s in states)

def margin(states, date=now) -> int:
    "What's the least swing that would lead to a majority?"
    return next(swing for swing in range(-50, 50) if EV(states, date, swing) >= 270)

def net(state, date=now)         -> int:   return state.approvals[date] - state.disapprovals[date]
def undecided(state, date=now)   -> int:   return 100 - state.approvals[date] - state.disapprovals[date]
def movement(state, date=now)    -> float: return undecided(state, date) / 5 + 2 * 𝝈(state)
def 𝝈(state, recent=dates[-12:]) -> float: return stdev(net(state, d) for d in recent)
def is_swing(state)              -> bool:  return abs(net(state)) < movement(state)

Various functions for displaying data:

In [5]:
def labels(xlab, ylab): plt.xlabel(xlab); plt.ylabel(ylab); plt.grid(True); plt.legend()

def grid(): plt.minorticks_on(); plt.grid(which='minor', ls=':', alpha=0.7)
    
def header(head) -> str: return head + '\n' + '-'.join('|' * head.count('|'))

def markdown(fn) -> callable: return lambda *args: display(Markdown('\n'.join(fn(*args))))

def parp(state, date=now) -> int: return net(state, date) - state.lean 

In [6]:
def show_months(states=states, dates=dates, swing=3):
    plt.rcParams["figure.figsize"] = [10, 7]
    plt.style.use('fivethirtyeight')
    N = len(dates)
    err = [[EV(states, date) - EV(states, date, -swing) for date in dates],
           [EV(states, date, swing) - EV(states, date) for date in dates]]
    grid()
    plt.plot(range(N), [270] * N, color='darkorange', label="270 EVs", lw=2)
    plt.errorbar(range(N), [EV(states, date) for date in dates], fmt='D-',
                 yerr=err, ecolor='grey', capsize=7, label='Trump EVs ±3% swing', lw=2)
    labels('Months into term', 'Electoral Votes')

In [7]:
def show_approval(states=states, dates=dates):
    plt.rcParams["figure.figsize"] = [10, 7]
    plt.style.use('fivethirtyeight')
    N = len(dates)
    grid()
    plt.plot(range(N), [0] * N, label='Net zero', color='darkorange')
    plt.plot(range(N), [-margin(states, date) for date in dates], 'D-', label='Margin to 270')
    plt.plot(range(N), [net_usa[date] for date in dates], 'go-', label='Country-wide Net')
    labels('Months into term', 'Net popularity')

In [8]:
@markdown
def by_month(states, dates=dates[::-1]):
    yield header('|Month|EVs|Margin|Country|Undecided|')
    for date in dates:
        month = date.replace('1-', '').replace('-', ' 20')
        yield (f'|{month}|{int(EV(states, date))}|{margin(states, date)}%|{net_usa[date]:+d}%'
               f'|{sum(s.ev * undecided(s, date) for s in states) / 538:.0f}% '
               f'({sum(undecided(s, date) > 5 for s in states)} states)')

In [9]:
@markdown
def show_states(states=states, d=now, ref='1-Jan-17'):
    total = 0
    yield header(f'|State|Net|Move|EV|ΣEV|+|−|?|𝝈|Δ|')
    for s in sorted(states, key=net, reverse=True):
        total += s.ev
        b = '**' * is_swing(s)
        yield (f'|{swing_name(s)}|{b}{net(s, d):+d}%{b}|{b}±{movement(s):.0f}%{b}|{s.ev}|{total}'
               f'|{s.approvals[d]}%|{s.disapprovals[d]}%|{undecided(s, now)}%|±{𝝈(s):3.1f}%'
               f'|{net(s, d) - net(s, ref):+d}%|')
        
def swing_name(s) -> str: return ('**' + s.name.upper() + '**') if is_swing(s) else s.name

In [10]:
@markdown
def show_parp(states=states, dates=(now, '1-Jan-19', '1-Jan-18', '1-Jan-17')):
    def year(date): return '' if date == now else date[-2:]
    fields = [f'PARP {year(date)}|Net {year(date)}' for date in dates]
    yield header(f'|State|Lean|EV|{"|".join(fields)}|')
    for s in sorted(states, key=parp, reverse=True):
        fields = [f'{parp(s, date):+d}|{net(s, date):+d}' for date in dates]
        yield f'|{swing_name(s)}|{s.lean:+d}|{s.ev}|{"|".join(fields)}|'

I really should have some more tests.

In [11]:
assert len(states) == 51,                "50 states plus DC"
assert all(s.ev >= 3 for s in states),   "All states have two senators and at least one rep."
assert sum(s.ev for s in states) == 538, "Total of 538 electoral votes."