## Election Prediction 2022 - Argentina - Dataset Creation

In [2]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from datetime import datetime

In [3]:
# Since the dataset is not truly available in even remotely similar form, I will create the dataset from the pdf reports

# Each entry will represent a different election year
years = [1983, 1989, 1995, 1999, 2003, 2007, 2011, 2015, 2019]

# Convert the list of years into datetime objects
year_dates = [datetime(year, 1, 1) for year in years]

In [4]:
# Now I can focus on the amount of registered voters for each year
number_registered_voters = [17929951, 20034252, 22178201, 24111270, 25481410, 27137536, 28916183, 32130853, 34231721]

# However, not everyone who is registered to vote actually votes, so the percentage of registered voters who actually voted is also important
percentage_voted = [85.61, 85.31, 82.08, 82.32, 78.22, 76.20, 79.39, 81.07, 80.42]

In [5]:
# Since we want to predict who wins the 2022 elections, and we have such few samples, we cannot predict the name of the candidate, but instead we can make the prediction based on the party position on the political spectrum ( Cause there tends to be one representative from each party in the final round of the elections)

# The political spectrum is divided into 3 main categories: Left, Center, Right, but we can add the in-between categories as well such as Left-Center, Center-Right, etc.

political_side_winner = ["Center to center-left",
                         "Center to center-right",
                         "Center to center-right",
                         "Center-left",
                         "Center-right",
                         "Left to center-left",
                         "Left to center-left",
                         "Left to center-left",
                         "Left to center-left"]

political_side_second = ["Center to center-right",
                         "Center to center-left",
                         "Left to center-left",
                         "Center to center-right",
                         "Left to center-left",
                         "Center to center-right",
                         "Center-left",
                         "Center-right",
                         "Center-right"]

political_side_third = ["Center-left",
                        "Center",
                        "Center to center-left",
                        "Right",
                        "Center",
                        "Center",
                        "Center to center-right",
                        "Center",
                        "Center"]

In [6]:
# Parties who presented themselves on each election
parties = [["Union Civica Radical",
            "Justicilista",
            "Intransigente",
            "Movimiento de Integración y Desarrollo"],

           ["Alianza Frente Justicialista de Unidad Popular",
            "Union Civica Radical",
            "Alianza de Centro",
            "Alianza Izquierda Unida"],

           ["Justicialista - Union de Centro Democrático Federal - Frente de Recuperación Ética",
            "Alianza Frente País Solidario",
            "Union Civica Radical",
            "Movimiento por la Dignidad y la Independencia"],

           ["Alianza para el Trabajo, la Justicia y la Educación",
            "Alianza Concertación Justicialista para el Cambio Unión del Centro Democrático - Alianza Frente de Integración Federal",
            "Alianza Acción por la República",
            "Alianza Izquierda Unida"],

           ["Alianza Frente por la Lealtad - Unión del Centro Democrático",
            "Alianza Frente para la Victoria",
            "Alianza Movimiento Federal para Recrear el Crecimiento",
            "Alianza Frente Movimiento Popular - Unión y Libertad"],

           ["Alianza Frente para la Victoria",
            "Alianza Confederación Coalición Cívica",
            "Alianza Concertación para una Nación Avanzada",
            "Alianza Frente Justicia, Unión y Libertad"],

           ["Alianza Frente para la Victoria",
            "Alianza Frente Amplio Progresista",
            "Alianza Unión para el Desarrollo Social",
            "Alianza Compromiso Federal"],

           ["ALIANZA FRENTE PARA LA VICTORIA",
            "ALIANZA CAMBIEMOS",
            "ALIANZA UNIDOS POR UNA NUEVA ALTERNATIVA (UNA)",
            "ALIANZA FRENTE DE IZQUIERDA Y DE LOS TRABAJADORES"],

           ["FRENTE DE TODOS",
            "JUNTOS POR EL CAMBIO",
            "CONSENSO FEDERAL",
            "FRENTE DE IZQUIERDA Y DE TRABAJADORES - UNIDAD"],]

In [7]:
# Winner of each election
winners = ["Union Civica Radical",
           "Alianza Frente Justicialista de Unidad Popular",
           "Justicialista - Union de Centro Democrático Federal - Frente de Recuperación Ética",
           "Alianza para el Trabajo, la Justicia y la Educación",
           "Alianza Frente por la Lealtad - Unión del Centro Democrático",
           "Alianza Frente para la Victoria",
           "Alianza Frente para la Victoria",
           "Alianza Frente para la Victoria",
           "FRENTE DE TODOS"
           ]

# Some of the winner parties are the same (candidate & political views wise), just a minor change in the name, so I will unify them
winners_unified = ["Union Civica Radical",
           "Justicialista",
           "Justicialista",
           "Alianza para el Trabajo, la Justicia y la Educación",
           "Alianza Frente por la Lealtad - Unión del Centro Democrático",
           "Alianza Frente para la Victoria",
           "Alianza Frente para la Victoria",
           "Alianza Frente para la Victoria",
           "Alianza Frente para la Victoria"
           ]

In [8]:
# Same process for the second place
second_position = ["Justicialista",
                   "Union Civica Radical",
                   "Alianza Frente País Solidario",
                   "Alianza Concertación Justicialista para el Cambio Unión del Centro Democrático - Alianza Frente de Integración Federal",
                   "Alianza Frente para la Victoria",
                   "Alianza Confederación Coalición Cívica",
                   "Alianza Frente Amplio Progresista",
                   "ALIANZA CAMBIEMOS",
                   "JUNTOS POR EL CAMBIO"]

second_position_unified = ["Justicilista",
                          "Union Civica Radical",
                           "Alianza Frente País Solidario",
                           "Alianza Concertación Justicialista para el Cambio Unión del Centro Democrático - Alianza Frente de Integración Federal",
                           "Alianza Frente para la Victoria",
                           "Alianza Confederación Coalición Cívica",
                           "Alianza Frente Amplio Progresista",
                           "ALIANZA CAMBIEMOS",
                           "ALIANZA CAMBIEMOS"]

In [9]:
# Same process for the third place
third_position = ["Intransigente",
                  "Alianza de Centro",
                  "Union Civica Radical",
                  "Alianza Acción por la República",
                  "Alianza Movimiento Federal para Recrear el Crecimiento",
                  "Alianza Concertación para una Nación Avanzada",
                  "Alianza Unión para el Desarrollo Social",
                  "ALIANZA UNIDOS POR UNA NUEVA ALTERNATIVA (UNA)",
                  "CONSENSO FEDERAL"]

# No need to unify here since they are all somewhat different

In [10]:
# Now, the votes need to be added to the dataset

votes = [["7.724.559", "5.995.402", "347.654", "177.426"],
         ["7954191", "6202163", "1093398", "409250"],
         ["8687511", "5096104", "2956137", "310069"],
         ["9167220", "7255586", "1937544", "151977"],
         ["4741202", "4313131", "3173584", "2736091"],
         ["8652293", "4403642", "3230236", "1459174"],
         ["11865055", "3684970", "2443016", "1745354"],
         ["9338490", "8601131", "5386977", "812530"],
         ["12946.037", "10.811.586", "1.649.322", "579.228"],]

vote_winners = [7724559,
                7954191,
                8687511,
                9167220,
                4741202,
                8652293,
                11865055,
                9338490,
                12946037]

vote_second_position = [5995402,
                        6202163,
                        5096104,
                        7255586,
                        4313131,
                        4403642,
                        3684970,
                        8601131,
                        10811586]

vote_third_position = [347654,
                       1093398,
                       2956137,
                       1937544,
                       3173584,
                       3230236,
                       2443016,
                       5386977,
                       1649322]

In [11]:
# Now, the voting centers for each district need to be added to the dataset

districts = ["BUENOS_AIRES", "CAPITAL_FEDERAL", "CATAMARCA", "CHACO", "CHUBUT","CÓRDOBA", "CORRIENTES", "ENTRE_RÍOS", "FORMOSA", "JUJUY","LA_PAMPA", "LA_RIOJA", "MENDOZA", "MISIONES", "NEUQUÉN", "RIO_NEGRO", "SALTA", "SAN_JUAN", "SAN_LUIS", "SANTA_CRUZ", "SANTA_FE", "SANTIAGO_DEL_ESTERO", "TIERRA_DEL_FUEGO", "TUCUMÁN"]

# The idea is to know if the district voted for what resulted to be the winner party
# Let yes=1, no=0
voting_center_bsas = [1, 1, 1, 1, 0, 1, 1, 1, 1]
voting_center_cf = [1, 1, 0, 1, 0, 0, 1, 1, 1 ]
voting_center_catamarca = [1, 1, 1, 1, 1, 1, 1, 1, 1]
voting_center_chaco = [0, 1, 1, 1, 1, 1, 1, 1, 1]
voting_center_chubut = [1, 0, 1, 1, 0, 1, 1, 1, 1]
voting_center_cba = [1, 0, 1, 1, 1, 0, 1, 1, 0]
voting_center_corrientes = [1, 1, 1, 1, 1, 1, 1, 1, 1]
voting_center_entre_rios = [1, 1, 1, 1, 1, 1, 1, 1, 0]
voting_center_formosa = [0, 1, 1, 0, 0, 1, 1, 1, 1]
voting_center_jujuy = [0, 1, 1, 1, 0, 1, 1, 1, 1]
voting_center_la_pampa = [1, 1, 1, 1, 1, 1, 1, 1, 1]
voting_center_la_rioja = [0, 1, 1, 0, 1, 1, 1, 1, 1]
voting_center_mendoza = [1, 1, 1, 1, 0, 1, 1, 1, 0]
voting_center_misiones = [1, 1, 1, 0, 1, 1, 1, 1, 1]
voting_center_neuquen = [1, 1, 1, 1, 0, 1, 1, 1, 1]
voting_center_rio_negro = [1, 1, 1, 1, 0, 1, 1, 1, 1]
voting_center_salta = [0, 0, 1, 1, 1, 1, 1, 1, 1]
voting_center_san_juan = [1, 1, 1, 1, 0, 1, 1, 1, 1]
voting_center_san_luis = [1, 1, 1, 1, 0, 0, 0, 1, 0]
voting_center_santa_cruz = [0, 1, 1, 1, 0, 1, 1, 1, 1]
voting_center_santa_fe = [1, 1, 1, 1, 1, 1, 1, 1, 0]
voting_center_santiago = [0, 1, 1, 0, 1, 1, 1, 1, 1]
voting_center_tierra = [1, 1, 1, 1, 0, 1, 1, 1, 1]
voting_center_tucuman = [0, 1, 1, 1, 1, 1, 1, 1, 1]

In [12]:
positive_votes_nationwide = ["14927512", "16749128", "17395284", "18953704", "19388697", "19107140", "21927282", "25184257", "24659333"]
positive_votes_nationwide_int = [14927512, 16749128, 17395284, 18953704, 19388697, 19107140, 21927282, 25184257, 24659333]
blank_votes_nationwide = ["334946", "222048", "653443", "708876", "196563", "1331011", "803362", "664740", "882633"]
blank_votes_nationwide_int = [334946, 222048, 653443, 708876, 196563, 1331011, 803362, 664740, 882633]
annuled_votes_nationwide = ["87728", "116049", "125112", "186761", "345651", "241175", "225741", "199449", "318001"]
annuled_votes_nationwide_int = [87728, 116049, 125112, 186761, 345651, 241175, 225741, 199449, 318001]

In [13]:
election_df = pd.DataFrame({"Year": years,
                           "Number_Registered_Voters": number_registered_voters,
                           "Percentage_who_voted": percentage_voted,
                           "Positive_Votes": positive_votes_nationwide_int,
                           "Blank_votes": blank_votes_nationwide_int,
                           "Annuled_votes": annuled_votes_nationwide_int,
                           "Buenos_Aires_voted_in_favor": voting_center_bsas,
                           "Ciudad_Federal_voted_in_favor": voting_center_cf,
                           "Catamarca_voted_in_favor": voting_center_catamarca,
                           "Chaco_voted_in_favor": voting_center_chaco,
                           "Chubut_voted_in_favor": voting_center_chubut,
                           "Cordoba_voted_in_favor": voting_center_cba,
                           "Corrientes_voted_in_favor": voting_center_corrientes,
                           "Entre_Rios_voted_in_favor": voting_center_entre_rios,
                           "Formosa_voted_in_favor": voting_center_formosa,
                           "Jujuy_voted_in_favor": voting_center_jujuy,
                           "La_Pampa_voted_in_favor": voting_center_la_pampa,
                           "La_Rioja_voted_in_favor": voting_center_la_rioja,
                           "Mendoza_voted_in_favor": voting_center_mendoza,
                           "Misiones_voted_in_favor": voting_center_misiones,
                           "Neuquen_voted_in_favor": voting_center_neuquen,
                           "Rio_Negro_voted_in_favor": voting_center_rio_negro,
                           "Salta_voted_in_favor": voting_center_salta,
                           "San_Juan_voted_in_favor": voting_center_san_juan,
                           "San_luis_voted_in_favor": voting_center_san_luis,
                           "Santa_Cruz_voted_in_favor": voting_center_santa_cruz,
                           "Santa_Fe_voted_in_favor": voting_center_santa_fe,
                           "Santiago_voted_in_favor": voting_center_santiago,
                           "Tierra_del_Fuego_voted_in_favor": voting_center_tierra,
                           "Tucuman_voted_in_favor": voting_center_tucuman,
                           "Third_position": third_position,
                           "Third_position_total_votes": vote_third_position,
                           "Political_Side_Third": political_side_third,
                           "Runner_up": second_position_unified,
                           "Runner_up_total_votes": vote_second_position,
                           "Political_Side_Second": political_side_second,
                           "Winner": winners_unified,
                           "Winner_total_votes": vote_winners,
                           "Political_Side_First": political_side_winner
                           })

In [14]:
# Now the final dataset is ready to be used
election_df

Unnamed: 0,Year,Number_Registered_Voters,Percentage_who_voted,Positive_Votes,Blank_votes,Annuled_votes,Buenos_Aires_voted_in_favor,Ciudad_Federal_voted_in_favor,Catamarca_voted_in_favor,Chaco_voted_in_favor,...,Tucuman_voted_in_favor,Third_position,Third_position_total_votes,Political_Side_Third,Runner_up,Runner_up_total_votes,Political_Side_Second,Winner,Winner_total_votes,Political_Side_First
0,1983,17929951,85.61,14927512,334946,87728,1,1,1,0,...,0,Intransigente,347654,Center-left,Justicilista,5995402,Center to center-right,Union Civica Radical,7724559,Center to center-left
1,1989,20034252,85.31,16749128,222048,116049,1,1,1,1,...,1,Alianza de Centro,1093398,Center,Union Civica Radical,6202163,Center to center-left,Justicialista,7954191,Center to center-right
2,1995,22178201,82.08,17395284,653443,125112,1,0,1,1,...,1,Union Civica Radical,2956137,Center to center-left,Alianza Frente País Solidario,5096104,Left to center-left,Justicialista,8687511,Center to center-right
3,1999,24111270,82.32,18953704,708876,186761,1,1,1,1,...,1,Alianza Acción por la República,1937544,Right,Alianza Concertación Justicialista para el Cam...,7255586,Center to center-right,"Alianza para el Trabajo, la Justicia y la Educ...",9167220,Center-left
4,2003,25481410,78.22,19388697,196563,345651,0,0,1,1,...,1,Alianza Movimiento Federal para Recrear el Cre...,3173584,Center,Alianza Frente para la Victoria,4313131,Left to center-left,Alianza Frente por la Lealtad - Unión del Cent...,4741202,Center-right
5,2007,27137536,76.2,19107140,1331011,241175,1,0,1,1,...,1,Alianza Concertación para una Nación Avanzada,3230236,Center,Alianza Confederación Coalición Cívica,4403642,Center to center-right,Alianza Frente para la Victoria,8652293,Left to center-left
6,2011,28916183,79.39,21927282,803362,225741,1,1,1,1,...,1,Alianza Unión para el Desarrollo Social,2443016,Center to center-right,Alianza Frente Amplio Progresista,3684970,Center-left,Alianza Frente para la Victoria,11865055,Left to center-left
7,2015,32130853,81.07,25184257,664740,199449,1,1,1,1,...,1,ALIANZA UNIDOS POR UNA NUEVA ALTERNATIVA (UNA),5386977,Center,ALIANZA CAMBIEMOS,8601131,Center-right,Alianza Frente para la Victoria,9338490,Left to center-left
8,2019,34231721,80.42,24659333,882633,318001,1,1,1,1,...,1,CONSENSO FEDERAL,1649322,Center,ALIANZA CAMBIEMOS,10811586,Center-right,Alianza Frente para la Victoria,12946037,Left to center-left
