In [1]:
from bs4 import BeautifulSoup
from tqdm import tqdm
import pandas as pd
import htmlmin

import re
import os

In [2]:
pd.set_option('future.no_silent_downcasting', True)

In [3]:
DATA_PATH = os.path.join('..', 'data')
PRIMARY_PATH = os.path.join(DATA_PATH, 'primary')

CONTENT_PATH = os.path.join('..', 'contenido')
GRAPHS_PATH = os.path.join(CONTENT_PATH, 'graficos')
WINNERS_ELECTORAL_PATH = os.path.join(GRAPHS_PATH, 'ganadores-electoral')

# Winners Electoral

In [4]:
df = pd.read_parquet(os.path.join(PRIMARY_PATH, 'pres-1990-2014.parquet'))
winners = df.sort_values(
    ['eleccion', 'tipo', 'region', 'electoral', 'votos'],
    ascending=[True, True, True, True, False]
)
winners.drop_duplicates(['eleccion', 'tipo', 'region', 'electoral'], keep='first', inplace=True)
support ={
    'Michelle Bachelet': 'izquierda',
    'Eduardo Frei': 'izquierda',
    'Sebastián Piñera': 'derecha',
    'Patricio Aylwin': 'izquierda',
    'Joaquín Lavín': 'derecha',
    'Ricardo Lagos': 'izquierda',
    'Evelyn Matthei': 'derecha',
    'Hernán Büchi': 'derecha',
    'Arturo Alessandri': 'derecha',
    'Francisco J. Errázuriz': 'derecha',
    'Marco Enríquez Ominami': 'izquierda',
    'Tomás Hirsch': 'izquierda'
}
winners.candidato = winners.candidato.str.replace(' - ', ' ').str.strip()
winners['initials'] = winners['candidato'].apply(
    lambda x: ''.join([i[0] for i in x.split(' ')])
)
winners['inclinacion'] = winners['candidato'].map(support)
winners.sample(3)

Unnamed: 0,eleccion,tipo,region,electoral,candidato,votos,porcentaje,initials,inclinacion
17940,2014,1,biobio,COPIULEMU,Michelle Bachelet,529,0.629762,MB,izquierda
14659,2010,2,biobio,NACIMIENTO,Eduardo Frei,6443,0.508444,EF,izquierda
6674,2000,1,araucania,TRAIGUEN,Joaquín Lavín,6345,0.56576,JL,derecha


In [5]:
winner_class = 'winner'
left_class = 'left'
right_class = 'right'
empty_class = 'empty'

years = winners.value_counts(['eleccion', 'tipo']).index.sort_values()
electorals = winners.value_counts(['region', 'electoral']).index.sort_values()

In [6]:
with open(os.path.join(WINNERS_ELECTORAL_PATH, 'index.html')) as file:
    html = BeautifulSoup(file, 'html.parser')

table = html.find('table')
thead_tr = table.find('thead').find('tr')
tbody = html.find('tbody')
tbody.clear()

with open(os.path.join(WINNERS_ELECTORAL_PATH, 'index.html'), 'w') as file:
    file.write(html.prettify())

In [7]:
tbody.clear()
thead_tr.clear()
new_th = BeautifulSoup('<th>Región</th>', 'html.parser')
thead_tr.append(new_th)
new_th = BeautifulSoup('<th>Circunscripción Electoral</th>', 'html.parser')
thead_tr.append(new_th)

for year_tipo in years:
    year = str(year_tipo[0])+('' if year_tipo[1] == 0 else '-'+str(year_tipo[1]))
    new_th = BeautifulSoup(f'<th>{year}</th>', 'html.parser')
    thead_tr.append(new_th)

for region_electoral in tqdm(electorals, total=len(electorals)):
    region = region_electoral[0]
    electoral = region_electoral[1]
    electoral_df = winners.query('region == @region and electoral == @electoral')
    new_tr = BeautifulSoup('<tr></tr>', 'html.parser').find('tr')
    new_td = BeautifulSoup(f'<td>{region.upper()}</td>', 'html.parser').find('td')
    new_tr.append(new_td)
    new_td = BeautifulSoup(f'<td>{electoral.upper()}</td>', 'html.parser').find('td')
    new_tr.append(new_td)
    for year_tipo in years:
        year = year_tipo[0]
        tipo = year_tipo[1]
        td_year = str(year) + ('' if tipo == 0 else '-' + str(tipo))
        winner = electoral_df.query('eleccion == @year and tipo == @tipo').copy()
        votes = df.query(
            'region == @region and electoral == @electoral and eleccion == @year and tipo == @tipo'
        ).copy()
        votes.sort_values('votos', ascending=False, inplace=True)
        if not winner.empty:
            winner.loc[:, 'inclinacion'] = winner['inclinacion'].replace(
                {'izquierda': left_class, 'derecha': right_class}
            )
            leaning = winner['inclinacion'].values[0]
            initials = winner['initials'].values[0]
        else:
            leaning = empty_class
            initials = ''
        new_td = BeautifulSoup(
            f'<td class="{winner_class}">'
            f'<span class="{winner_class} {leaning}">{initials}</span>'
            f'</td>',
            'html.parser'
        ).find('td')
        if not votes.empty:
            span = new_td.find('span')
            new_ul = BeautifulSoup('<ul class="votes"></ul>', 'html.parser').find('ul')
            for candidate in votes.itertuples():
                new_li = BeautifulSoup((
                    f'<li><strong>{candidate.candidato}:</strong> '
                    f'{100*candidate.porcentaje:.2f}%</li>'
                ), 'html.parser').find('li')
                new_ul.append(new_li)
            span.append(new_ul)
        new_tr.append(new_td)
    tbody.append(new_tr)

min_html = htmlmin.minify(
    html.prettify(formatter='html'),
    remove_empty_space=True,
    remove_comments=True,
    remove_all_empty_space=True,
    reduce_empty_attributes=True,
    reduce_boolean_attributes=True,
    remove_optional_attribute_quotes=True,
    convert_charrefs=True,
)
min_html = min_html.replace('> ', '>').replace(' <', '<')
min_html = min_html.replace('</strong>', '</strong> ')
min_html = re.sub(r'\s+', ' ', min_html)

with open(os.path.join(WINNERS_ELECTORAL_PATH, 'index.html'), 'w', encoding='utf-8') as file:
    file.write(min_html)

100%|██████████| 606/606 [00:36<00:00, 16.51it/s]
