## parse_elections 

Adapted from DexGroves's code.

Find the party-to-party transfer matrix for Northern Ireland's 2016 MLA election, using data from [Elections NI](http://electionsni.org.s3-website-eu-west-1.amazonaws.com/data/).

Table is read whereby row, column indicates transfers _from_ row and _to_ column. 

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import os.path
import numpy as np 
import pandas as pd 
from glob import glob 
import csv

pd.options.display.max_columns = 100

%matplotlib inline

Make the _total_ matrix of party-to-party transfers countrywide. This is saved both as absolute vote totals (`transfers.csv`) and as a row-normalised relative amount (`transfers_rel.csv`).

In [2]:
def getConstitDestinations(file, whichSituation='all', fileToSaveEachRow=None):

    df = pd.DataFrame.from_csv(file)
    
    constituency = file.split('/')[-2]
    stillIn = df['Candidate_Id']
    stillIn = set(stillIn)
    voteDestinations = {}
    transferringCounts = {}
    #for c in range(2,df.Count_Number.max()+1):
    for c in range(1,df.Count_Number.max()+1):
        #print(c)
        
        #Changed from ==c-1 to <c.
        transferring = df[(df.Count_Number==c) & (df.Occurred_On_Count<c) & (df.Transfers < 0)].Party_Name
        #print(df[(df.Count_Number==c) & (df.Occurred_On_Count<c) & (df.Transfers < 0)].shape)
        #print(df[(df.Count_Number==c) & (df.Transfers < 0)].shape)
        #print('same')
        #transferring = df[(df.Count_Number==c) & (df.Transfers < 0)].Party_Name
        #print(transferring)
                
        for cid in df[(df.Count_Number==c) & ((df.Status=='Elected') | (df.Status=='Excluded')) & (df.Occurred_On_Count<c)].Candidate_Id:
            if cid in stillIn:
                stillIn.remove(cid)
                #print('Count',c,'removing',cid)

        if transferring.size == 1:
            transferring = transferring.iloc[0]
            
            if transferring not in transferringCounts.keys():
                transferringCounts[transferring] = {}            
            #print(transferring,c)

            #print(df[(df.Count_Number==c) & (df.Occurred_On_Count<c) & (df.Transfers < 0)])
            totalVotesAvailable = float(-1*df[(df.Count_Number==c) & (df.Occurred_On_Count<c) & (df.Transfers < 0)].Transfers)

            #print(stillIn)
            transfersReceived = df[(df.Count_Number==c)][['Candidate_Id','Party_Name','Transfers','Total_Votes']]
            transfersReceived = transfersReceived[transfersReceived.Transfers >= 0]  #exclude the transferrer
            #print(transfersReceived.Candidate_Id)
            transfersReceived = transfersReceived[transfersReceived.Candidate_Id.isin(stillIn)]  #exclude those not available
            transfersReceived = transfersReceived.assign(fracReceived = lambda x: x.Transfers/totalVotesAvailable)

            self_available = transferring in transfersReceived.Party_Name.tolist()
            if whichSituation == 'self' and not self_available:
                continue
            if whichSituation == 'noself' and self_available:
                continue
            
            #print('Transferring party = %s' % transferring)
            #print(transfersReceived[['Party_Name','fracReceived']])
            
            fracLost = 1 - transfersReceived.fracReceived.sum()

            newFractions = transfersReceived.apply(lambda x: x.to_dict(),axis=1).tolist()
            #print newFractions
            for item in newFractions:
                party = item['Party_Name']
                
                #Output individual transfer records
                item['Constituency'] = constituency
                item['Self_Available'] = self_available
                item['Count_Number'] = c
                item['Transferrer'] = transferring
                item['Target'] = party
                item['Transferred_Votes'] = totalVotesAvailable
                item['Target_Total_Votes_So_Far'] = item['Total_Votes']
                item['Number_Of_Target_Parties_Left'] = len(np.unique(transfersReceived.Party_Name.tolist()))
                if fileToSaveEachRow is not None:
                    with open(fileToSaveEachRow,'a') as csvfile:
                        mywriter = csv.writer(csvfile)
                        mywriter.writerow([item[k] for k in header])
                
                #print(transferring,party,item['fracReceived'],item['Number_Of_Target_Parties_Left'],
                #      sum([item['Party_Name']==party for item in newFractions]))
                numCandsThisParty = sum([item['Party_Name']==party for item in newFractions])
                if transferring in voteDestinations.keys():
                    if party in voteDestinations[transferring].keys():
                        #voteDestinations[transferring][party] += item['fracReceived']
                        voteDestinations[transferring][party][0] += item['fracReceived']
                        voteDestinations[transferring][party][1] += item['Number_Of_Target_Parties_Left']/numCandsThisParty
                    else:
                        #voteDestinations[transferring][party] = item['fracReceived']
                        voteDestinations[transferring][party] = [item['fracReceived'],
                                                                 item['Number_Of_Target_Parties_Left']/numCandsThisParty]
                else:
                    #voteDestinations[transferring] = {party:item['fracReceived']}
                    voteDestinations[transferring] = {party:[item['fracReceived'],
                                                             item['Number_Of_Target_Parties_Left']/numCandsThisParty]}
                

            if 'votes_lost' in voteDestinations[transferring].keys():
                #voteDestinations[transferring]['votes_lost'] += fracLost
                voteDestinations[transferring]['votes_lost'][0] += fracLost
                voteDestinations[transferring]['votes_lost'][1] += item['Number_Of_Target_Parties_Left']
            else:
                #voteDestinations[transferring]['votes_lost'] = fracLost
                voteDestinations[transferring]['votes_lost'] = [fracLost,
                                                                item['Number_Of_Target_Parties_Left']]
            
            #keep track of how many times the donor party occurred
            for party in set([item['Target'] for item in newFractions]+['votes_lost']):
                if party in transferringCounts[transferring].keys():
                    transferringCounts[transferring][party] += 1
                else:
                    transferringCounts[transferring][party] = 1
            
            #if sum(voteDestinations[transferring].values()) < 0.9999:
            #    print('missing some',sum(voteDestinations[transferring].values()))
            #TODO fix
            #if sum(voteDestinations[transferring].values()) < 0.9999:
            #    print('missing some',sum(voteDestinations[transferring].values()))
        else:
            #print(transferring.size)
            pass
            #TODO
    
    for donor in voteDestinations.keys():
        for recip in voteDestinations[donor].keys():
            #voteDestinations[donor][recip] /= transferringCounts[donor][recip]
            voteDestinations[donor][recip][0] /= transferringCounts[donor][recip]
            voteDestinations[donor][recip][1] /= transferringCounts[donor][recip]
            
    return voteDestinations

#OK to forget transferringCounts as it will almost always be 1, sometimes 2-3,
#  but higher number doesn't really mean more confidence.
    
#TODO: could also note count number applying to each transferring. Vote more likely to be lost
#  if it is being passed from the party at a high count number


In [4]:
mine = getConstitDestinations('data/2007_archive_datapackage/constituency/east-antrim/Count.csv', whichSituation='all')
for key in mine:
    #print(sum(mine[key].values()))
    print(key,mine[key])

In [12]:
#TODO: normalisation OK? UUP self prob seems low at 0.63

#Separate matrices for when self transfer available and not

def makeOverallMatrix(year, whichSituation='all', fileToSaveEachRow=None):
    dests = ['Alliance Party','Animal Welfare Party','Cannabis Is Safer Than Alcohol',
         'Cross-Community Labour Alternative','Democracy First','Democratic Unionist Party','Green Party',
         'Independent','Labour Alternative','NI Conservatives','NI Labour Representation Committee',
         'Northern Ireland First','People Before Profit Alliance','Progressive Unionist Party','Sinn Fein',
         'Social Democratic and Labour Party','South Belfast Unionists','Traditional Unionist Voice',
         'UK Independence Party','Ulster Unionist Party','Workers Party', #2016
             'Socialist Party','British Nationalist Party','Procapitalism', #2011
             'UK Unionist Party','Socialist Environmental Alliance','Republican Sinn Fein',
             'Labour Party','Make Politicians History', #2007
             'votes_lost']

    overallDests = {}

    if whichSituation=='self':
        outfile = 'transferProbs%i/transferMatrix_whenSelfAvailable_nationwide.csv' % year
        outfile2 = 'transferProbs%i/transferMatrix_whenSelfAvailable_%s.csv'
    elif whichSituation=='noself':
        outfile = 'transferProbs%i/transferMatrix_whenSelfNotAvailable_nationwide.csv' % year
        outfile2 = 'transferProbs%i/transferMatrix_whenSelfNotAvailable_%s.csv'
    else:
        outfile = 'transferProbs%i/transferMatrix_all_nationwide.csv' % year
        outfile2 = 'transferProbs%i/transferMatrix_all_%s.csv'

    for filename in glob('data/%i_archive_datapackage/constituency/*/Count.csv' % year):

        voteDestinations = getConstitDestinations(filename, whichSituation=whichSituation, 
                                                  fileToSaveEachRow=fileToSaveEachRow)
        #TODO use length-1 of voteDestinations[donor] dictionaries to record how many 
        #  different targets were available each time (constit average)
        
        constit = filename.split('/')[3]
        
        with open(outfile2 % (year,constit),'w') as csvfile:
            mywriter = csv.writer(csvfile)
            mywriter.writerow(dests)
            for donor in [d for d in dests if d != 'votes_lost']:
                newlist = [donor]
                if donor in voteDestinations.keys():
                    for dest in dests:
                        if dest in voteDestinations[donor].keys():
                            #print donor,dest,overallDests[donor][dest][1] / overallDests[donor][dest][0]
                            newlist.append('%.5f' % voteDestinations[donor][dest][0])
                        else:
                            #print donor,dest,'NaN'
                            newlist.append('NaN')
                else:
                    newlist += ['NaN']*len(dests)
                mywriter.writerow(newlist)

        for donor in voteDestinations.keys():
            if donor in overallDests.keys():
                for dest in voteDestinations[donor]:
                    if dest in overallDests[donor].keys():
                        overallDests[donor][dest][0] += 1
                        overallDests[donor][dest][1] += voteDestinations[donor][dest][0]
                        overallDests[donor][dest][2] += voteDestinations[donor][dest][1]
                    else:
                        overallDests[donor][dest] = [1,voteDestinations[donor][dest][0],
                                                    voteDestinations[donor][dest][1]]
            else:
                overallDests[donor] = {}
                for dest in voteDestinations[donor].keys():
                    overallDests[donor][dest] = [1,voteDestinations[donor][dest][0],
                                                voteDestinations[donor][dest][1]]

    with open(outfile,'w') as csvfile:
        mywriter = csv.writer(csvfile)
        mywriter.writerow(dests)
        for donor in [d for d in dests if d != 'votes_lost']:
            newlist = [donor]
            if donor in overallDests.keys():
                #normalise rows so they sum to 1
                normFactor = 1 / sum([overallDests[donor][dest][1]/overallDests[donor][dest][0] for dest in overallDests[donor]])
                #print(donor,[(dest,overallDests[donor][dest]) for dest in overallDests[donor]])
                for dest in dests:
                    if dest in overallDests[donor].keys():
                        newlist.append('%.5f' % (normFactor * overallDests[donor][dest][1] / overallDests[donor][dest][0]))
                    else:
                        newlist.append('NaN')
            else:
                newlist += ['NaN']*len(dests)
            mywriter.writerow(newlist)
    #also save counts of how many went into each transfer fraction
    with open(outfile.replace('transferMatrix','counts'),'w') as csvfile:
        mywriter = csv.writer(csvfile)
        mywriter.writerow(dests)
        for donor in [d for d in dests if d != 'votes_lost']:
            newlist = [donor]
            if donor in overallDests.keys():
                for dest in dests:
                    if dest in overallDests[donor].keys():
                        newlist.append(overallDests[donor][dest][0])
                    else:
                        newlist.append(0)
            else:
                newlist += [0]*len(dests)
            mywriter.writerow(newlist)
    #and average number of parties left
    with open(outfile.replace('transferMatrix','numPartiesLeft'),'w') as csvfile:
        mywriter = csv.writer(csvfile)
        mywriter.writerow(dests)
        for donor in [d for d in dests if d != 'votes_lost']:
            newlist = [donor]
            if donor in overallDests.keys():
                for dest in dests:
                    if dest in overallDests[donor].keys():
                        newlist.append('%.5f' % (overallDests[donor][dest][2] / overallDests[donor][dest][0]))
                    else:
                        newlist.append('NaN')
            else:
                newlist += ['NaN']*len(dests)
            mywriter.writerow(newlist)
        
year = 2017
makeOverallMatrix(year,whichSituation='self')
makeOverallMatrix(year,whichSituation='noself')

header = ['Transferrer','Target','Constituency','Count_Number','Self_Available','Number_Of_Target_Parties_Left',
          'Transferred_Votes','Target_Total_Votes_So_Far','fracReceived']
with open('transferProbs%i/transfersByRow_%i_all.csv' % (year,year),'w') as f:
    f.write(','.join(header)+'\n')
makeOverallMatrix(year,whichSituation='all',fileToSaveEachRow='transferProbs%i/transfersByRow_%s_all.csv' % (year,year))

147