In [1]:
from matplotlib import pyplot as plt
from Levenshtein import distance
import ipywidgets as widgets
import pandas as pd
import numpy as np
import unicodedata

import itertools
import os

In [2]:
DATA_PATH = os.path.join('..', 'data')
RAW_PATH = os.path.join(DATA_PATH, 'raw')
INTERMEDIATE_PATH = os.path.join(DATA_PATH, 'intermediate')

In [3]:
def get_levenstein_dataframe(series, threshold=3):
    uniques = sorted(series.unique())
    if len(uniques) == 1:
        return pd.DataFrame(columns=['a', 'b', 'distance'])
    levenstein = pd.DataFrame(
        itertools.combinations(uniques, 2),
        columns=['a', 'b']
    )
    levenstein['sorted'] = levenstein.apply(lambda x: tuple(sorted([x['a'], x['b']])), axis=1)
    levenstein = levenstein.drop_duplicates(subset='sorted').drop(columns='sorted')
    levenstein['distance'] = levenstein.apply(lambda x: distance(x['a'], x['b']), axis=1)
    return (
        levenstein
        .query('0 < distance < @threshold')
        .sort_values(['distance', 'a', 'b'])
        .reset_index(drop=True)
    )

In [4]:
dfs = list()
for eleccion in [2013, 2017, 2021]:
    for tipo in ['1ra', '2da']:
        df = pd.read_parquet(
            os.path.join(INTERMEDIATE_PATH, f'pres-{eleccion}-{tipo}.parquet')
        )
        df.insert(0, 'eleccion', eleccion)
        df.insert(1, 'tipo', tipo[0])
        dfs.append(df)
df_new = pd.concat(dfs)
object_cols = df_new.select_dtypes(include=['object']).columns
# df_new[object_cols] = df_new[object_cols].astype('category')
df_new[['eleccion', 'tipo']] = df_new[['eleccion', 'tipo']].astype('int')
df_new.reset_index(drop=True, inplace=True)
df_new = df_new.groupby(['eleccion', 'tipo', 'region', 'electoral', 'candidato']).votos.sum().reset_index()
# df_new.drop(columns=['senatorial', 'distrito', 'comuna', 'local', 'mesa'], inplace=True)
df_new

Unnamed: 0,eleccion,tipo,region,electoral,candidato,votos
0,2013,1,ANTOFAGASTA,ANTOFAGASTA NORTE,ALFREDO SFEIR,431
1,2013,1,ANTOFAGASTA,ANTOFAGASTA NORTE,EVELYN MATTHEI,7320
2,2013,1,ANTOFAGASTA,ANTOFAGASTA NORTE,FRANCO ALDO PARISI,10249
3,2013,1,ANTOFAGASTA,ANTOFAGASTA NORTE,MARCEL CLAUDE,1196
4,2013,1,ANTOFAGASTA,ANTOFAGASTA NORTE,MARCO ENRIQUEZ-OMINAMI,6569
...,...,...,...,...,...,...
26192,2021,2,ÑUBLE,VEGAS DE ITATA,VOTOS NULOS,4
26193,2021,2,ÑUBLE,YUNGAY,GABRIEL BORIC,3458
26194,2021,2,ÑUBLE,YUNGAY,JOSE ANTONIO KAST,3693
26195,2021,2,ÑUBLE,YUNGAY,VOTOS EN BLANCO,25


In [5]:
df_old = pd.read_parquet(os.path.join(INTERMEDIATE_PATH, 'pres-1990-2014.parquet'))
df_old = df_old.query('eleccion != 2014')
df_old.drop(columns=['porcentaje'], inplace=True)
replace_region = {
    'RIOS': 'LOS RIOS',
    'LAGOS': 'LOS LAGOS',
}
df_old.region = df_old.region.str.upper().replace(replace_region)#.astype('category')
replace_electoral = {
    'AISEN': 'AYSEN',
    'PELCHUQUAAN': 'PELCHUQUIN',
    'CURRINAA': 'CURRINE',
}
df_old.electoral = (
    df_old.electoral
    .str.normalize('NFKD')
    .str.encode('ascii', errors='ignore')
    .str.decode('utf-8')
    .str.replace("O'HIGGINS", 'OHIGGINS', regex=False)
    .str.replace(r'\s+', ' ', regex=True)
    .str.replace(r'\((.*?)\)', r'\1', regex=True)
    .str.replace(r'-\s*([^\-]+?)\s*-', r'\1', regex=True)
    .str.replace(r'\s*-\s*', ' - ', regex=True)
    .replace(replace_electoral)
    .str.strip()
    # .astype('category')
)
df_old.candidato = df_old.candidato.str.upper()
df_old

Unnamed: 0,eleccion,tipo,region,electoral,candidato,votos
0,1990,0,TARAPACA,ARICA,FRANCISCO J. ERRÁZURIZ,14981
1,1990,0,TARAPACA,ARICA,HERNÁN BÜCHI,25786
2,1990,0,TARAPACA,ARICA,PATRICIO AYLWIN,44556
3,1990,0,TARAPACA,BELEN,FRANCISCO J. ERRÁZURIZ,81
4,1990,0,TARAPACA,BELEN,HERNÁN BÜCHI,143
...,...,...,...,...,...,...
15110,2010,2,MAGALLANES,SAN GREGORIO,SEBASTIÁN PIÑERA,253
15111,2010,2,MAGALLANES,TIMAUKEL,EDUARDO FREI,111
15112,2010,2,MAGALLANES,TIMAUKEL,SEBASTIÁN PIÑERA,106
15113,2010,2,MAGALLANES,TORRES DEL PAINE C. CASTILLO,EDUARDO FREI,345


In [6]:
df = pd.concat([df_old, df_new])
df.region = df.region.replace('AISEN', 'AYSEN')
df.candidato = df.candidato.str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8')
df

Unnamed: 0,eleccion,tipo,region,electoral,candidato,votos
0,1990,0,TARAPACA,ARICA,FRANCISCO J. ERRAZURIZ,14981
1,1990,0,TARAPACA,ARICA,HERNAN BUCHI,25786
2,1990,0,TARAPACA,ARICA,PATRICIO AYLWIN,44556
3,1990,0,TARAPACA,BELEN,FRANCISCO J. ERRAZURIZ,81
4,1990,0,TARAPACA,BELEN,HERNAN BUCHI,143
...,...,...,...,...,...,...
26192,2021,2,ÑUBLE,VEGAS DE ITATA,VOTOS NULOS,4
26193,2021,2,ÑUBLE,YUNGAY,GABRIEL BORIC,3458
26194,2021,2,ÑUBLE,YUNGAY,JOSE ANTONIO KAST,3693
26195,2021,2,ÑUBLE,YUNGAY,VOTOS EN BLANCO,25


In [7]:
winners = {
    1990: 'PATRICIO AYLWIN',
    1994: 'EDUARDO FREI',
    2000: 'RICARDO LAGOS',
    2006: 'MICHELLE BACHELET',
    2010: 'SEBASTIAN PINERA',
    2013: 'MICHELLE BACHELET',
    2017: 'SEBASTIAN PINERA',
    2021: 'GABRIEL BORIC',
}

leaning = {
    'FRANCISCO J. ERRAZURIZ': 'right',
    'HERNAN BUCHI': 'right',
    'PATRICIO AYLWIN': 'left',
    'CRISTIAN REITZE': 'left',
    'EUGENIO PIZARRO': 'left',
    'MANFRED MAX NEEF': 'left',
    'JOSE PINERA': 'right',
    'ARTURO ALESSANDRI': 'right',
    'EDUARDO FREI': 'left',
    'ARTURO FREI': 'left',
    'SARA LARRAIN': 'left',
    'GLADYS MARIN': 'left',
    'TOMAS HIRSCH': 'left',
    'RICARDO LAGOS': 'left',
    'JOAQUIN LAVIN': 'right',
    'SEBASTIAN PINERA': 'right',
    'MICHELLE BACHELET': 'left',
    'JORGE ARRATE': 'left',
    'MARCO ENRIQUEZ - OMINAMI ': 'left',
    'ALFREDO SFEIR': 'left',
    'EVELYN MATTHEI': 'right',
    'FRANCO ALDO PARISI': 'right',
    'MARCEL CLAUDE': 'left',
    'MARCO ENRIQUEZ-OMINAMI': 'left',
    'RICARDO ISRAEL': 'right',
    'ROXANA MIRANDA': 'left',
    'TOMAS JOCELYN-HOLT': 'left',
    'VOTOS EN BLANCO ': 'blanco',
    'VOTOS NULOS ': 'nulo',
    'ALEJANDRO GUILLIER': 'left',
    'ALEJANDRO NAVARRO': 'left',
    'BEATRIZ SANCHEZ': 'left',
    'CAROLINA GOIC': 'left',
    'EDUARDO ARTES': 'left',
    'JOSE ANTONIO KAST': 'right',
    'GABRIEL BORIC': 'left',
    'SEBASTIAN SICHEL': 'right',
    'YASNA PROVOSTE': 'left'
}

df['p'] = df.votos/df.groupby(['eleccion', 'tipo', 'region', 'electoral']).votos.transform('sum')
df['leaning'] = df.candidato.map(leaning)

In [11]:
dfl = df.pivot_table(index=['region', 'electoral', 'leaning'], columns=['eleccion', 'tipo'], values='votos', aggfunc='sum', fill_value=0)
dflp = df.pivot_table(index=['region', 'electoral', 'leaning'], columns=['eleccion', 'tipo'], values='p', aggfunc='sum', fill_value=0)
dfl.columns = dfl.columns.map(lambda x: f'{x[0]}-{x[1]}')
dflp.columns = dflp.columns.map(lambda x: f'{x[0]}-{x[1]}')
dflp

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,1990-0,1994-0,2000-1,2000-2,2006-1,2006-2,2010-1,2010-2,2013-1,2013-2,2017-1,2017-2,2021-1,2021-2
region,electoral,leaning,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
ANTOFAGASTA,ANTOFAGASTA NORTE,blanco,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.004312,0.004810,0.003265,0.001749,0.003041,0.002566
ANTOFAGASTA,ANTOFAGASTA NORTE,left,0.599568,0.738009,0.644276,0.63013,0.620314,0.647236,0.664198,0.567665,0.593594,0.657669,0.598367,0.474205,0.378251,0.632176
ANTOFAGASTA,ANTOFAGASTA NORTE,nulo,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.010625,0.016821,0.008721,0.007984,0.007896,0.010294
ANTOFAGASTA,ANTOFAGASTA NORTE,right,0.400432,0.261991,0.355724,0.36987,0.379686,0.352764,0.335802,0.432335,0.391469,0.320700,0.389647,0.516061,0.610812,0.354964
ANTOFAGASTA,ANTOFAGASTA SUR,blanco,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.002503,0.005822,0.002474,0.002091,0.003164,0.003150
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ÑUBLE,VEGAS DE ITATA,right,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.644898,0.741036
ÑUBLE,YUNGAY,blanco,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.007029,0.003465
ÑUBLE,YUNGAY,left,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.396485,0.479279
ÑUBLE,YUNGAY,nulo,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.009372,0.005405
