## parse_elections 

Find the party-to-party transfer matrix for Northern Ireland's 2016 MLA election, using data from [Elections NI](http://electionsni.org.s3-website-eu-west-1.amazonaws.com/data/).

Table is read whereby row, column indicates transfers _from_ row and _to_ column. 

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import zipfile
import os.path
import numpy as np 
import pandas as pd 
from glob import glob 
from urllib.request import urlretrieve

pd.options.display.max_columns = 100

%matplotlib inline

ImportError: No module named request

Download and unzip.

In [2]:
if not os.path.exists('mla2016.zip'):
    urlretrieve (
        'http://electionsni.org.s3-website-eu-west-1.amazonaws.com/2016/2016_archive_datapackage.zip',
        'mla2016.zip'
    )
    with zipfile.ZipFile('mla2016.zip') as zip_ref:
        zip_ref.extractall('data')

In [3]:
def process_transfer_table(df):
    """Parse an Elections NI constituency count DataFrame to extract
    the matrix of party-to-party transfers.
    """
    df = df.assign(Fullname=df.Firstname + ' ' + df.Surname)
    transfers = (df.loc[:, ['Count_Number', 'Fullname',
                            'Transfers', 'Total_Votes', 'Party_Name']]
                   .pivot(index='Count_Number', 
                         columns='Fullname', 
                         values='Transfers'))
    party_map = dict(df.loc[df.Count_Number == 1].set_index('Fullname')['Party_Name'])
    party_map['votes_lost'] = 'votes_lost'

    profile = pd.DataFrame(np.zeros((len(transfers.columns) + 1, len(transfers.columns) + 1)))
    profile.index = transfers.columns.tolist() + ['votes_lost']
    profile.columns = transfers.columns.tolist() + ['votes_lost']

    for _, row in transfers.iterrows():
        donators = row.index[row < 0]
        n_donators = len(donators)
        
        votes_lost = -1 * row[row < 0].sum() 
        votes_gained = row[row > 0].sum() 
        
        # If there are multiple parties losing votes, credit is distributed
        # evenly since it is impossible to figure out who donated where.
        # Fortunately this is rare.
        if n_donators:
            for donator in donators:
                row_maxed = np.maximum(row, 0)
                profile.loc[donator, profile.columns != 'votes_lost'] += row_maxed / n_donators
                profile.loc[donator, 'votes_lost'] += votes_lost - votes_gained 
    
    # return profile
    parties = set(df.Party_Name)
    parties.add('votes_lost')
    party_prof = pd.DataFrame(np.zeros((len(parties), len(parties))))
    party_prof.index = parties 
    party_prof.columns = parties

    for candidate, row in profile.iterrows():
        cand_party = party_map[candidate]
        row_mapped = row.copy() 
        row_mapped.index = [party_map[x] for x in row.index]
        row_grpd = row_mapped.groupby(by = row_mapped.index).sum()
        party_prof[cand_party] += row_grpd

    return party_prof.T


def get_total_fps(df):
    df = df.assign(Fullname=df.Firstname + ' ' + df.Surname)
    fps = df.loc[df.Count_Number == 1].set_index('Fullname')['Total_Votes']
    return fps.groupby(fps.index).sum()

Make the _total_ matrix of party-to-party transfers countrywide. This is saved both as absolute vote totals (`transfers.csv`) and as a row-normalised relative amount (`transfers_rel.csv`).

In [4]:
def row_normalise(M):
    M = M.copy()
    for party, row in M.iterrows():
        M.loc[M.index == party] /= row.sum()
    return M

def set_nans_to_lost(M):
    M = M.copy()
    for party, row in M.iterrows():
        if np.any(row.isnull()):
            M.loc[M.index == party] = 0 
            M.loc[M.index == party, 'votes_lost'] = 1
    return M

profiles = {}
relative_profiles = {}
fps = {}
for filename in glob('data/2016_archive_datapackage/constituency/*/Count.csv'):
    df = pd.DataFrame.from_csv(filename)
    profiles[filename] = process_transfer_table(df)
    relative_profiles[filename] = set_nans_to_lost(row_normalise(profiles[filename]))
    fps[filename] = get_total_fps(df)

In [5]:
all_transfers = pd.DataFrame() 

for _, constituency_votes in profiles.items():
    all_transfers = all_transfers.add(constituency_votes, fill_value=0)

all_transfers = all_transfers.fillna(0)

all_transfers = set_nans_to_lost(row_normalise(all_transfers))

all_transfers.to_csv('transfers_relative.csv')