## parse_elections 

Adapted from DexGroves's code.

Find the party-to-party transfer matrix for Northern Ireland's 2016 MLA election, using data from [Elections NI](http://electionsni.org.s3-website-eu-west-1.amazonaws.com/data/).

Table is read whereby row, column indicates transfers _from_ row and _to_ column. 

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import os.path
import numpy as np 
import pandas as pd 
from glob import glob 

pd.options.display.max_columns = 100

%matplotlib inline

Make the _total_ matrix of party-to-party transfers countrywide. This is saved both as absolute vote totals (`transfers.csv`) and as a row-normalised relative amount (`transfers_rel.csv`).

In [2]:
def getConstitDestinations(file):

    df = pd.DataFrame.from_csv(file)
    
    stillIn = df['Candidate_Id']
    stillIn = set(stillIn)
    voteDestinations = {}
    transferringCounts = {}
    for c in range(2,df.Count_Number.max()+1):
        #print c,stillIn
        
        #TODO: some transfers occur more than 1 count later, currently missed
        transferring = df[(df.Count_Number==c) & (df.Occurred_On_Count==c-1) & (df.Transfers < 0)].Party_Name
        
        for cid in df[(df.Count_Number==c) & (df.Occurred_On_Count==c-1) & (df.Transfers < 0)].Candidate_Id:
            stillIn.remove(cid)
        for cid in df[(df.Count_Number==c) & (df.Occurred_On_Count==c-2) & (df.Transfers < 0)].Candidate_Id:
            print '2 counts later removing',cid
            stillIn.remove(cid)

        if transferring.size == 1:
            transferring = transferring.iloc[0]
            if transferring in transferringCounts.keys():
                transferringCounts[transferring] += 1
                #TODO handle this case of two transfers from same party.
                #for now skip the second case
                continue
            else:
                transferringCounts[transferring] = 1
            
            #print transferring,c

            totalVotesAvailable = -1*df[(df.Count_Number==c) & (df.Occurred_On_Count==c-1) & (df.Transfers < 0)].Transfers

            transfersReceived = df[(df.Count_Number==c)].loc[:,['Candidate_Id','Party_Name','Transfers']]
            transfersReceived = transfersReceived[transfersReceived.Transfers >= 0]  #exclude the transferrer
            transfersReceived = transfersReceived[transfersReceived.Candidate_Id.isin(stillIn)]  #exclude those not available
            transfersReceived = transfersReceived.assign(fracReceived = lambda x: x.Transfers/totalVotesAvailable)

            fracLost = 1 - transfersReceived.fracReceived.sum()

            newFractions = transfersReceived.apply(lambda x: x.to_dict(),axis=1).tolist()
            for item in newFractions:
                party = item['Party_Name']
                if transferring in voteDestinations.keys():
                    if party in voteDestinations[transferring].keys():
                        voteDestinations[transferring][party] += item['fracReceived']
                    else:
                        voteDestinations[transferring][party] = item['fracReceived']
                else:
                    voteDestinations[transferring] = {party:item['fracReceived']}
            voteDestinations[transferring]['votes_lost'] = fracLost
            #print voteDestinations[transferring]
            if sum(voteDestinations[transferring].values()) < 0.9999:
                print 'missing some',sum(voteDestinations[transferring].values())
        else:
            pass
    
    return voteDestinations
        
#TODO: not correct yet. Have a SF self transfer = 0 in Mid Ulster.
    
#TODO: could also note count number applying to each transferring. Vote more likely to be lost
#  if it is being passed from the party at a high count number


In [3]:
overallDests = {}
for filename in glob('data/2016_archive_datapackage/constituency/*/Count.csv'):
    voteDestinations = getConstitDestinations(filename)
    if 'Sinn Fein' in voteDestinations.keys():
        if 'Sinn Fein' in voteDestinations['Sinn Fein'].keys():
            print filename,voteDestinations['Sinn Fein']['Sinn Fein']
    
    for donor in voteDestinations.keys():
        if donor in overallDests.keys():
            for dest in voteDestinations[donor]:
                if dest in overallDests[donor].keys():
                    overallDests[donor][dest][0] += 1
                    overallDests[donor][dest][1] += voteDestinations[donor][dest]
                else:
                    overallDests[donor][dest] = [1,voteDestinations[donor][dest]]
        else:
            overallDests[donor] = {}
            for dest in voteDestinations[donor].keys():
                overallDests[donor][dest] = [1,voteDestinations[donor][dest]]

#for dest in overallDests['Sinn Fein'].keys():
#    print dest,overallDests['Sinn Fein'][dest][1] / overallDests['Sinn Fein'][dest][0]

2 counts later removing 226
2 counts later removing 217
data/2016_archive_datapackage/constituency/south-down/Count.csv 0.886484568996
2 counts later removing 96
2 counts later removing 150
data/2016_archive_datapackage/constituency/mid-ulster/Count.csv 0.0
data/2016_archive_datapackage/constituency/belfast-west/Count.csv 0.930916312469
2 counts later removing 7
data/2016_archive_datapackage/constituency/fermanagh-south-tyrone/Count.csv 0.911226811964
data/2016_archive_datapackage/constituency/foyle/Count.csv 0.925497752087
2 counts later removing 159
2 counts later removing 168
data/2016_archive_datapackage/constituency/belfast-north/Count.csv 0.74314893617


In [4]:
dests = overallDests.keys()
dests.append('votes_lost')
#dests.insert(0,'DONOR')

import csv
with open('transferMatrix_rough.csv','w') as csvfile:
    mywriter = csv.writer(csvfile)
    mywriter.writerow(dests)
    for donor in overallDests.keys():
        newlist = [donor]
        for dest in dests:
            if dest in overallDests[donor].keys():
                #print donor,dest,overallDests[donor][dest][1] / overallDests[donor][dest][0]
                newlist.append(overallDests[donor][dest][1] / overallDests[donor][dest][0])
            else:
                #print donor,dest,'NaN'
                newlist.append('NaN')
        mywriter.writerow(newlist)