## parse_elections 

Find the party-to-party transfer matrix for Northern Ireland's 2016 MLA election, using data from [Elections NI](http://electionsni.org.s3-website-eu-west-1.amazonaws.com/data/).

Table is read whereby row, column indicates transfers _from_ row and _to_ column. 

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import zipfile
import os.path
import numpy as np 
import pandas as pd 
from glob import glob 
from urllib.request import urlretrieve

pd.options.display.max_columns = 100

%matplotlib inline

ImportError: No module named request

Download and unzip.

In [2]:
if not os.path.exists('mla2016.zip'):
    urlretrieve (
        'http://electionsni.org.s3-website-eu-west-1.amazonaws.com/2016/2016_archive_datapackage.zip',
        'mla2016.zip'
    )
    with zipfile.ZipFile('mla2016.zip') as zip_ref:
        zip_ref.extractall('data')

In [2]:
def process_transfer_table(df):
    """Parse an Elections NI constituency count DataFrame to extract
    the matrix of party-to-party transfers.
    """
    df = df.assign(Fullname=df.Firstname + ' ' + df.Surname)
    transfers = (df.loc[:, ['Count_Number', 'Fullname',
                            'Transfers', 'Total_Votes', 'Party_Name']]
                   .pivot(index='Count_Number', 
                         columns='Fullname', 
                         values='Transfers'))
    party_map = dict(df.loc[df.Count_Number == 1].set_index('Fullname')['Party_Name'])
    party_map['votes_lost'] = 'votes_lost'

    profile = pd.DataFrame(np.zeros((len(transfers.columns) + 1, len(transfers.columns) + 1)))
    profile.index = transfers.columns.tolist() + ['votes_lost']
    profile.columns = transfers.columns.tolist() + ['votes_lost']

    for _, row in transfers.iterrows():
        donators = row.index[row < 0]
        n_donators = len(donators)
        
        votes_lost = -1 * row[row < 0].sum() 
        votes_gained = row[row > 0].sum() 
        
        # If there are multiple parties losing votes, credit is distributed
        # evenly since it is impossible to figure out who donated where.
        # Fortunately this is rare.
        if n_donators:
            for donator in donators:
                row_maxed = np.maximum(row, 0)
                profile.loc[donator, profile.columns != 'votes_lost'] += row_maxed / n_donators
                profile.loc[donator, 'votes_lost'] += votes_lost - votes_gained 
    
    # return profile
    parties = set(df.Party_Name)
    parties.add('votes_lost')
    party_prof = pd.DataFrame(np.zeros((len(parties), len(parties))))
    party_prof.index = parties 
    party_prof.columns = parties

    for candidate, row in profile.iterrows():
        cand_party = party_map[candidate]
        row_mapped = row.copy() 
        row_mapped.index = [party_map[x] for x in row.index]
        row_grpd = row_mapped.groupby(by = row_mapped.index).sum()
        party_prof[cand_party] += row_grpd

    return party_prof.T


def get_total_fps(df):
    df = df.assign(Fullname=df.Firstname + ' ' + df.Surname)
    fps = df.loc[df.Count_Number == 1].set_index('Fullname')['Total_Votes']
    return fps.groupby(fps.index).sum()

Make the _total_ matrix of party-to-party transfers countrywide. This is saved both as absolute vote totals (`transfers.csv`) and as a row-normalised relative amount (`transfers_rel.csv`).

In [3]:
def row_normalise(M):
    M = M.copy()
    for party, row in M.iterrows():
        M.loc[M.index == party] /= row.sum()
    return M

def set_nans_to_lost(M):
    M = M.copy()
    for party, row in M.iterrows():
        if np.any(row.isnull()):
            M.loc[M.index == party] = 0 
            M.loc[M.index == party, 'votes_lost'] = 1
    return M

profiles = {}
relative_profiles = {}
fps = {}
for filename in glob('data/2016_archive_datapackage/constituency/*/Count.csv'):
    df = pd.DataFrame.from_csv(filename)
    profiles[filename] = process_transfer_table(df)
    relative_profiles[filename] = set_nans_to_lost(row_normalise(profiles[filename]))
    fps[filename] = get_total_fps(df)

In [4]:
def stv_quota(votes, seats):
    return np.floor(votes / (seats + 1)) + 1


def add_dicts(super_, sub_, multiplier):
    for subk, subv in sub_.items():
        super_[subk] += subv * multiplier
    return super_


test_const = 'data/2016_archive_datapackage/constituency/belfast-east/Count.csv'
test_prof = relative_profiles[test_const]
test_fps = fps[test_const]

fps_dict = dict(test_fps)
prof = test_prof.copy()
quota = stv_quota(test_fps.sum(), 6)
through = []
through_this_round = []

while len(through) < 6:
    for candidate, votes in fps_dict.items():
        if votes > quota:
            through_this_round.append(candidate)

    remaining_candidates = set(fps_dict.keys())
    remaining_candidates.add('votes_lost')

    for victor in through_this_round:
        excess = fps_dict[victor]

        transfer_profile = prof[victor, np.in1d(prof.columns, remaining_candidates)]
        transfer_profile /= transfer_profile.sum()
        transfer_dict = dict(transfer_profile)

        fps_dict = add_dicts(fps_dict, transfer_dict, excess)

        del fps_dict[victor]

    through.extend(through_this_round)
    through_this_round = []

print(through)

TypeError: unhashable type: 'numpy.ndarray'

In [67]:
prof

Unnamed: 0,Amy DOHERTY,Andrew GIRVIN,Andy ALLEN,Chris LYTTLE,Chris MCGIMPSEY,Courtney ROBINSON,Erskine HOLMES,Joanne BUNTING,John KYLE,Jonny LAVERY,Maggie HUTTON,Naomi LONG,Neil WILSON,Niall O DONNGHAILE,Robin NEWTON,Ross BROWN,Sammy DOUGLAS,Tim MORROW,votes_lost
Amy DOHERTY,0.0,0.00848,0.033921,0.224729,0.016961,0.122965,0.0,0.0,0.042571,0.00848,0.029681,0.0,0.021201,0.072083,0.013399,0.161126,0.031716,0.076323,0.136364
Andrew GIRVIN,0.0,0.0,0.226045,0.020306,0.087028,0.0,0.0,0.0,0.138217,0.0,0.08211,0.0,0.0,0.0,0.102619,0.039754,0.175396,0.006749,0.121777
Andy ALLEN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
Chris LYTTLE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
Chris MCGIMPSEY,0.0,0.0,0.726949,0.02863,0.0,0.0,0.0,0.0,0.035843,0.0,0.0,0.0,0.0,0.0,0.063351,0.024054,0.049771,0.021922,0.049481
Courtney ROBINSON,0.0,0.007187,0.04508,0.136209,0.023429,0.0,0.0,0.0,0.050416,0.041396,0.055932,0.0,0.0,0.034245,0.014374,0.384031,0.035988,0.068599,0.103114
Erskine HOLMES,0.0,0.00848,0.033921,0.224729,0.016961,0.122965,0.0,0.0,0.042571,0.00848,0.029681,0.0,0.021201,0.072083,0.013399,0.161126,0.031716,0.076323,0.136364
Joanne BUNTING,0.003348,0.011982,0.045286,0.004053,0.014273,0.000881,0.0,0.0,0.017621,0.006696,0.00652,0.0,0.001586,0.0,0.096388,0.006167,0.755595,0.001586,0.028018
John KYLE,0.0,0.0,0.22732,0.07204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.188514,0.095668,0.164176,0.027053,0.225229
Jonny LAVERY,0.0,0.175839,0.113415,0.025307,0.046152,0.0,0.0,0.0,0.156416,0.0,0.06439,0.0,0.0,0.004183,0.067207,0.086574,0.162189,0.012772,0.085556
