## parse_elections 

Adapted from DexGroves's code.

Find the party-to-party transfer matrix for Northern Ireland's 2016 MLA election, using data from [Elections NI](http://electionsni.org.s3-website-eu-west-1.amazonaws.com/data/).

Table is read whereby row, column indicates transfers _from_ row and _to_ column. 

In [5]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import os.path
import numpy as np 
import pandas as pd 
from glob import glob 
import csv

pd.options.display.max_columns = 100

%matplotlib inline

Make the _total_ matrix of party-to-party transfers countrywide. This is saved both as absolute vote totals (`transfers.csv`) and as a row-normalised relative amount (`transfers_rel.csv`).

In [3]:
def getConstitDestinations(file):

    df = pd.DataFrame.from_csv(file)
    
    stillIn = df['Candidate_Id']
    stillIn = set(stillIn)
    voteDestinations = {}
    transferringCounts = {}
    for c in range(2,df.Count_Number.max()+1):
        #print c,stillIn
        
        #TODO: some transfers occur more than 1 count later, currently missed
        transferring = df[(df.Count_Number==c) & (df.Occurred_On_Count==c-1) & (df.Transfers < 0)].Party_Name
        
        for cid in df[(df.Count_Number==c) & (df.Occurred_On_Count==c-1) & (df.Transfers < 0)].Candidate_Id:
            stillIn.remove(cid)
        for cid in df[(df.Count_Number==c) & (df.Occurred_On_Count==c-2) & (df.Transfers < 0)].Candidate_Id:
            print '2 counts later removing',cid
            stillIn.remove(cid)

        if transferring.size == 1:
            transferring = transferring.iloc[0]
            if transferring in transferringCounts.keys():
                transferringCounts[transferring] += 1
                #TODO handle this case of two transfers from same party.
                #for now skip the second case
                continue
            else:
                transferringCounts[transferring] = 1
            
            #print transferring,c

            totalVotesAvailable = -1*df[(df.Count_Number==c) & (df.Occurred_On_Count==c-1) & (df.Transfers < 0)].Transfers

            transfersReceived = df[(df.Count_Number==c)].loc[:,['Candidate_Id','Party_Name','Transfers']]
            transfersReceived = transfersReceived[transfersReceived.Transfers >= 0]  #exclude the transferrer
            transfersReceived = transfersReceived[transfersReceived.Candidate_Id.isin(stillIn)]  #exclude those not available
            transfersReceived = transfersReceived.assign(fracReceived = lambda x: x.Transfers/totalVotesAvailable)

            fracLost = 1 - transfersReceived.fracReceived.sum()

            newFractions = transfersReceived.apply(lambda x: x.to_dict(),axis=1).tolist()
            for item in newFractions:
                party = item['Party_Name']
                if transferring in voteDestinations.keys():
                    if party in voteDestinations[transferring].keys():
                        voteDestinations[transferring][party] += item['fracReceived']
                    else:
                        voteDestinations[transferring][party] = item['fracReceived']
                else:
                    voteDestinations[transferring] = {party:item['fracReceived']}
            voteDestinations[transferring]['votes_lost'] = fracLost
            #print voteDestinations[transferring]
            if sum(voteDestinations[transferring].values()) < 0.9999:
                print 'missing some',sum(voteDestinations[transferring].values())
        else:
            pass
    
    return voteDestinations
        
#TODO: not correct yet. Have a SF self transfer = 0 in Mid Ulster.
    
#TODO: could also note count number applying to each transferring. Vote more likely to be lost
#  if it is being passed from the party at a high count number


In [19]:
overallDests = {}
dests = ['Alliance Party','Animal Welfare Party','Cannabis Is Safer Than Alcohol',
         'Cross-Community Labour Alternative','Democracy First','Democratic Unionist Party','Green Party',
         'Independent','Labour Alternative','NI Conservatives','NI Labour Representation Committee',
         'Northern Ireland First','People Before Profit Alliance','Progressive Unionist Party','Sinn Fein',
         'Social Democratic and Labour Party','South Belfast Unionists','Traditional Unionist Voice',
         'UK Independence Party','Ulster Unionist Party','Workers Party','votes_lost']

for filename in glob('data/2016_archive_datapackage/constituency/*/Count.csv'):
    voteDestinations = getConstitDestinations(filename)
    #if 'Sinn Fein' in voteDestinations.keys():
    #    if 'Sinn Fein' in voteDestinations['Sinn Fein'].keys():
    #        print filename,voteDestinations['Sinn Fein']['Sinn Fein']
    constit = filename.split('/')[3]
    with open('transferMatrix_rough_%s.csv' % constit,'w') as csvfile:
        mywriter = csv.writer(csvfile)
        mywriter.writerow(dests)
        for donor in dests:
            newlist = [donor]
            if donor in voteDestinations.keys():
                for dest in dests:
                    if dest in voteDestinations[donor].keys():
                        #print donor,dest,overallDests[donor][dest][1] / overallDests[donor][dest][0]
                        newlist.append('%.5f' % voteDestinations[donor][dest])
                    else:
                        #print donor,dest,'NaN'
                        newlist.append('NaN')
            else:
                newlist += ['NaN']*len(dests)
            mywriter.writerow(newlist)
    
    for donor in voteDestinations.keys():
        if donor in overallDests.keys():
            for dest in voteDestinations[donor]:
                if dest in overallDests[donor].keys():
                    overallDests[donor][dest][0] += 1
                    overallDests[donor][dest][1] += voteDestinations[donor][dest]
                else:
                    overallDests[donor][dest] = [1,voteDestinations[donor][dest]]
        else:
            overallDests[donor] = {}
            for dest in voteDestinations[donor].keys():
                overallDests[donor][dest] = [1,voteDestinations[donor][dest]]

#for dest in overallDests['Sinn Fein'].keys():
#    print dest,overallDests['Sinn Fein'][dest][1] / overallDests['Sinn Fein'][dest][0]

2 counts later removing 226
2 counts later removing 217
2 counts later removing 96
2 counts later removing 150
2 counts later removing 7
2 counts later removing 159
2 counts later removing 168


In [11]:
voteDestinations['Democratic Unionist Party']['Alliance Party']

0.011230907457322551

In [17]:
with open('transferMatrix_rough_nationwide.csv','w') as csvfile:
    mywriter = csv.writer(csvfile)
    mywriter.writerow(dests)
    for donor in overallDests.keys():
        newlist = [donor]
        for dest in dests:
            if dest in overallDests[donor].keys():
                #print donor,dest,overallDests[donor][dest][1] / overallDests[donor][dest][0]
                newlist.append('%.5f' % (overallDests[donor][dest][1] / overallDests[donor][dest][0]))
            else:
                #print donor,dest,'NaN'
                newlist.append('NaN')
        mywriter.writerow(newlist)