## parse_elections 

Adapted from DexGroves's code.

Find the party-to-party transfer matrix for Northern Ireland's 2016 MLA election, using data from [Elections NI](http://electionsni.org.s3-website-eu-west-1.amazonaws.com/data/).

Table is read whereby row, column indicates transfers _from_ row and _to_ column. 

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import os.path
import numpy as np 
import pandas as pd 
from glob import glob 
import csv

pd.options.display.max_columns = 100

%matplotlib inline

Make the _total_ matrix of party-to-party transfers countrywide. This is saved both as absolute vote totals (`transfers.csv`) and as a row-normalised relative amount (`transfers_rel.csv`).

In [52]:
def getConstitDestinations(file, whichSituation='all'):

    df = pd.DataFrame.from_csv(file)
    
    stillIn = df['Candidate_Id']
    stillIn = set(stillIn)
    voteDestinations = {}
    transferringCounts = {}
    for c in range(2,df.Count_Number.max()+1):
        #print c,stillIn
        
        #TODO: some transfers occur more than 1 count later, currently missed
        transferring = df[(df.Count_Number==c) & (df.Occurred_On_Count==c-1) & (df.Transfers < 0)].Party_Name
        
        for cid in df[(df.Count_Number==c) & ((df.Status=='Elected') | (df.Status=='Excluded')) & (df.Occurred_On_Count<c)].Candidate_Id:
            if cid in stillIn:
                stillIn.remove(cid)
                #print 'removing',cid

        if transferring.size == 1:
            transferring = transferring.iloc[0]
            
#             if transferring in transferringCounts.keys():
#                 transferringCounts[transferring] += 1
#                 #TODO handle this case of two transfers from same party.
#                 #for now skip the second case
#                 print 'skipping a second/third',transferring
#                 continue
#             else:
#                 transferringCounts[transferring] = 1
            if transferring not in transferringCounts.keys():
                transferringCounts[transferring] = {}
            
            #print transferring,c

            totalVotesAvailable = -1*df[(df.Count_Number==c) & (df.Occurred_On_Count==c-1) & (df.Transfers < 0)].Transfers

            transfersReceived = df[(df.Count_Number==c)].loc[:,['Candidate_Id','Party_Name','Transfers']]
            transfersReceived = transfersReceived[transfersReceived.Transfers >= 0]  #exclude the transferrer
            transfersReceived = transfersReceived[transfersReceived.Candidate_Id.isin(stillIn)]  #exclude those not available
            transfersReceived = transfersReceived.assign(fracReceived = lambda x: x.Transfers/totalVotesAvailable)

            if whichSituation == 'self' and transferring not in transfersReceived.Party_Name.tolist():
                continue
            if whichSituation == 'noself' and transferring in transfersReceived.Party_Name.tolist():
                continue
            
            #print transferring,transfersReceived
            
            fracLost = 1 - transfersReceived.fracReceived.sum()

            newFractions = transfersReceived.apply(lambda x: x.to_dict(),axis=1).tolist()
            #print newFractions
            for item in newFractions:
                party = item['Party_Name']
                if transferring in voteDestinations.keys():
                    if party in voteDestinations[transferring].keys():
                        voteDestinations[transferring][party] += item['fracReceived']
                    else:
                        voteDestinations[transferring][party] = item['fracReceived']
                else:
                    voteDestinations[transferring] = {party:item['fracReceived']}
            if 'votes_lost' in voteDestinations[transferring].keys():
                voteDestinations[transferring]['votes_lost'] += fracLost
            else:
                voteDestinations[transferring]['votes_lost'] = fracLost
            
            #keep track of how many times the donor party occurred
            for party in set([item['Party_Name'] for item in newFractions]+['votes_lost']):
                if party in transferringCounts[transferring].keys():
                    transferringCounts[transferring][party] += 1
                else:
                    transferringCounts[transferring][party] = 1
            
            #if transferring == 'Ulster Unionist Party' and whichSituation=='self':
            #    print voteDestinations[transferring]['Ulster Unionist Party']
            
            #print voteDestinations[transferring]
            if sum(voteDestinations[transferring].values()) < 0.9999:
                print 'missing some',sum(voteDestinations[transferring].values())
        else:
            pass
            #TODO
    
    for donor in voteDestinations.keys():
        for recip in voteDestinations[donor].keys():
            #print voteDestinations[donor][recip],transferringCounts[donor][recip]
            voteDestinations[donor][recip] /= transferringCounts[donor][recip]
            
    return voteDestinations
           
#TODO: could also note count number applying to each transferring. Vote more likely to be lost
#  if it is being passed from the party at a high count number


In [53]:
mine = getConstitDestinations('data/2016_archive_datapackage/constituency/belfast-west/Count.csv', whichSituation='noself')
for key in mine:
    print sum(mine[key].values())
#for filename in glob('data/2016_archive_datapackage/constituency/*/Count.csv'):
#    voteDestinations = getConstitDestinations(filename, whichSituation='self')
    #print voteDestinations


1.0
1.0


In [57]:
#TODO: normalisation OK? UUP self prob seems low at 0.63

#Separate matrices for when self transfer available and not

def makeOverallMatrix(whichSituation):
    dests = ['Alliance Party','Animal Welfare Party','Cannabis Is Safer Than Alcohol',
         'Cross-Community Labour Alternative','Democracy First','Democratic Unionist Party','Green Party',
         'Independent','Labour Alternative','NI Conservatives','NI Labour Representation Committee',
         'Northern Ireland First','People Before Profit Alliance','Progressive Unionist Party','Sinn Fein',
         'Social Democratic and Labour Party','South Belfast Unionists','Traditional Unionist Voice',
         'UK Independence Party','Ulster Unionist Party','Workers Party','votes_lost']

    overallDests = {}

    if whichSituation=='self':
        outfile = 'transferProbs/transferMatrix_whenSelfAvailable_nationwide.csv'
    elif whichSituation=='noself':
        outfile = 'transferProbs/transferMatrix_whenSelfNotAvailable_nationwide.csv'
    else:
        outfile = 'transferProbs/transferMatrix_all_nationwide.csv'

    for filename in glob('data/2016_archive_datapackage/constituency/*/Count.csv'):
        voteDestinations = getConstitDestinations(filename, whichSituation=whichSituation)
        
#         constit = filename.split('/')[3]
#         with open('transferProbs/transferMatrix_rough2_%s.csv' % constit,'w') as csvfile:
#             mywriter = csv.writer(csvfile)
#             mywriter.writerow(dests)
#             for donor in dests:
#                 newlist = [donor]
#                 if donor in voteDestinations.keys():
#                     for dest in dests:
#                         if dest in voteDestinations[donor].keys():
#                             #print donor,dest,overallDests[donor][dest][1] / overallDests[donor][dest][0]
#                             newlist.append('%.5f' % voteDestinations[donor][dest])
#                         else:
#                             #print donor,dest,'NaN'
#                             newlist.append('NaN')
#                 else:
#                     newlist += ['NaN']*len(dests)
#                 mywriter.writerow(newlist)

        for donor in voteDestinations.keys():
            if donor in overallDests.keys():
                for dest in voteDestinations[donor]:
                    if dest in overallDests[donor].keys():
                        overallDests[donor][dest][0] += 1
                        overallDests[donor][dest][1] += voteDestinations[donor][dest]
                    else:
                        overallDests[donor][dest] = [1,voteDestinations[donor][dest]]
            else:
                overallDests[donor] = {}
                for dest in voteDestinations[donor].keys():
                    overallDests[donor][dest] = [1,voteDestinations[donor][dest]]

    with open(outfile,'w') as csvfile:
        mywriter = csv.writer(csvfile)
        mywriter.writerow(dests)
        for donor in overallDests.keys():
            newlist = [donor]
            normFactor = 1 / sum([overallDests[donor][dest][1]/overallDests[donor][dest][0] for dest in overallDests[donor]])
            #print donor,normFactor
            #print overallDests[donor]
            for dest in dests:
                if dest in overallDests[donor].keys():
                    newlist.append('%.5f' % (normFactor * overallDests[donor][dest][1] / overallDests[donor][dest][0]))
                else:
                    newlist.append('NaN')
            mywriter.writerow(newlist)
        
makeOverallMatrix('self')
makeOverallMatrix('noself')
makeOverallMatrix('all')