In [79]:
import os
import re

import pandas as pd
import numpy as np

from datos_demre.params import RAW_FILES_DEMRE_OPEN_PATH, RAW_DICTIONARIES_DEMRE_OPEN_PATH

In [80]:
DATABASES = ['inscripciones', 'resultados', 'postulaciones', 'matriculas']

VERBOSE = True

In [81]:
def load_raw_dataset(name: str, year: int, verbose=False) -> pd.DataFrame:
    if name not in DATABASES:
        raise ValueError(f'Invalid dataset name. Must be one of {DATABASES}.')
    df = pd.read_csv(
        os.path.join(RAW_FILES_DEMRE_OPEN_PATH, str(year), f'{name}.csv'), sep=';'
    )
    df.insert(0, 'year_id', year)
    df.columns = (
        df.columns
        .str.replace(r'[ \-]', '_', regex=True)
        .str.lower()
        .str.replace(r'_+', '_', regex=True)
    )
    df.set_index(df.id_aux.str.lstrip('id_').astype(np.int64), inplace=True)
    df.drop(columns=['id_aux'], inplace=True)
    numeric = df.select_dtypes(include=[np.number]).columns
    for col in numeric:
        uints = [np.uint8, np.uint16, np.uint32, np.uint64]
        ints = [np.int8, np.int16, np.int32, np.int64]
        for dtype in uints+ints:
            if df[col].min() >= np.iinfo(dtype).min and df[col].max() < np.iinfo(dtype).max:
                df[col] = df[col].astype(dtype, errors='ignore')
                break
    if verbose:
        print(f'Loaded dataset {name} for year {year}.')
        print(f'Rows: {df.shape[0]} | Columns: {df.shape[1]}')
    ############################################################################
    xlsx = pd.ExcelFile(
        os.path.join(RAW_DICTIONARIES_DEMRE_OPEN_PATH, str(year), f'dict_{name}.xlsx')
    )
    df_d = {
        re.sub(r'_+', '_', re.sub(r'[ \-]', '_', sheet_name.lower())):
            xlsx.parse(sheet_name) for sheet_name in xlsx.sheet_names
    }
    if verbose:
        print(f'Dictionary: {df_d.keys()}')
    for key, value in df_d.items():
        df_d[key].dropna(how='all', inplace=True)
        df_d[key].insert(0, 'year_id', year)
        df_d[key].columns = (
            df_d[key].columns
            .str.replace(r'[ \-]', '_', regex=True)
            .str.lower()
            .str.replace(r'_+', '_', regex=True)
        )
        if verbose:
            print(f'\t- {key}: {value.columns.tolist()}')
    first_sheet_name = re.sub(r'_+', '_', re.sub(r'[ \-]', '_', xlsx.sheet_names[0].lower()))
    df_d[first_sheet_name]['variable'] = df_d[first_sheet_name]['variable'].ffill()
    return df, df_d

# 2004

In [82]:
year = 2004
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2004.
Rows: 153982 | Columns: 12
Dictionary: dict_keys(['rinden', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_comunasregiones: ['year_id', 'reg', 'nombre_region', 'prov', 'nombre_provincia', 'com', 'nombre_comuna']


In [83]:
dfr.info(memory_usage='deep'), dfr.memory_usage(deep=True)/1024**2

<class 'pandas.core.frame.DataFrame'>
Index: 153982 entries, 1160071115 to 999809975987
Data columns (total 12 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   year_id            153982 non-null  uint16 
 1   grupo_dependencia  153982 non-null  uint8  
 2   rama               153982 non-null  object 
 3   situacion_egreso   153982 non-null  uint8  
 4   codigo_region      153982 non-null  uint8  
 5   codigo_comuna      152913 non-null  float64
 6   leng_actual        153982 non-null  uint16 
 7   mate_actual        153982 non-null  uint16 
 8   hcso_actual        153982 non-null  uint16 
 9   cien_actual        153982 non-null  uint16 
 10  prom_notas         153982 non-null  uint8  
 11  ptje_nem           153982 non-null  uint16 
dtypes: float64(1), object(1), uint16(6), uint8(4)
memory usage: 12.2 MB


(None,
 Index                1.174789
 year_id              0.293697
 grupo_dependencia    0.146849
 rama                 7.489283
 situacion_egreso     0.146849
 codigo_region        0.146849
 codigo_comuna        1.174789
 leng_actual          0.293697
 mate_actual          0.293697
 hcso_actual          0.293697
 cien_actual          0.293697
 prom_notas           0.146849
 ptje_nem             0.293697
 dtype: float64)

# 2005

In [102]:
year = 2005
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2005.
Rows: 169888 | Columns: 12
Dictionary: dict_keys(['rinden', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cr', 'nombre_region', 'cpr', 'nombre_provincia', 'c_com', 'nombre_comuna']


# 2006

In [86]:
year = 2006
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2006.
Rows: 176778 | Columns: 12
Dictionary: dict_keys(['rinden', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cr', 'nombre_region', 'cpr', 'nombre_provincia', 'c_com', 'nombre_comuna']


# 2007

In [87]:
year = 2007
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2007.
Rows: 212889 | Columns: 12
Dictionary: dict_keys(['rinden', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cr', 'nombre_region', 'cpr', 'nombre_provincia', 'c_com', 'nombre_comuna']


# 2008

In [88]:
year = 2008
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2008.
Rows: 218261 | Columns: 12
Dictionary: dict_keys(['rinden', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cr', 'reg_orden', 'cpr', 'prv_nombre', 'com', 'com_nombre']


# 2009

In [89]:
year = 2009
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2009.
Rows: 243809 | Columns: 12
Dictionary: dict_keys(['rinden', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cr', 'reg_orden', 'cpr', 'prv_nombre', 'com', 'com_nombre']


# 2010

In [90]:
year = 2010
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2010.
Rows: 253236 | Columns: 14
Dictionary: dict_keys(['rinden', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cr', 'reg_orden', 'cpr', 'prv_nombre', 'com', 'com_nombre']


# 2011

In [91]:
year = 2011
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2011.
Rows: 252402 | Columns: 14
Dictionary: dict_keys(['rinden', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cr', 'reg_orden', 'cpr', 'prv_nombre', 'com', 'com_nombre']


# 2012

In [92]:
year = 2012
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2012.
Rows: 242661 | Columns: 19
Dictionary: dict_keys(['rinden', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cr', 'reg_orden', 'cpr', 'prv_nombre', 'com', 'com_nombre']


# 2013

In [93]:
year = 2013
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2013.
Rows: 244707 | Columns: 20
Dictionary: dict_keys(['rinden', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cr', 'reg_orden', 'cpr', 'prv_nombre', 'com', 'com_nombre']


# 2014

In [94]:
year = 2014
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2014.
Rows: 243596 | Columns: 20
Dictionary: dict_keys(['rinden', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cr', 'reg_orden', 'cpr', 'prv_nombre', 'com', 'com_nombre']


# 2015

In [95]:
year = 2015
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2015.
Rows: 257852 | Columns: 20
Dictionary: dict_keys(['rinden', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cr', 'reg_orden', 'cpr', 'prv_nombre', 'com', 'com_nombre']


# 2016

In [109]:
year = 2016
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2016.
Rows: 266494 | Columns: 20
Dictionary: dict_keys(['rinden', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cr', 'reg_orden', 'cpr', 'prv_nombre', 'com', 'com_nombre']


In [110]:
dfr

Unnamed: 0_level_0,year_id,rbd,grupo_dependencia,rama,situacion_egreso,codigo_region,codigo_comuna,leng_actual,mate_actual,hcso_actual,cien_actual,modulo_actual,leng_anterior,mate_anterior,hcso_anterior,cien_anterior,modulo_anterior,prom_notas,ptje_nem,ptje_ranking
id_aux,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
900070029,2016,6897,3,H1,5,14,14103.0,394,244,270,0,,0,0,0,0,,530,476,481
900070053,2016,6897,3,H1,5,14,14103.0,567,539,0,583,BIO,0,0,0,0,,543,502,507
900070059,2016,22634,2,H1,5,14,14101.0,514,342,421,0,,0,0,0,0,,520,455,455
900070070,2016,14299,2,H1,5,5,5801.0,507,532,0,515,BIO,0,0,0,0,,520,455,455
900070075,2016,26061,2,H2,1,13,13128.0,385,425,356,0,,0,0,0,0,,567,555,588
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9992599479234,2016,8488,3,H1,1,13,13101.0,439,406,458,539,BIO,0,0,0,0,,615,651,705
9992999579254,2016,25557,2,H2,1,13,13401.0,465,458,469,0,,0,0,0,0,,580,582,654
9996599679690,2016,10313,2,H1,1,13,13127.0,416,364,374,407,BIO,0,0,0,0,,605,631,764
9997599679773,2016,15745,2,H1,5,6,6301.0,493,406,0,391,BIO,0,0,0,0,,558,534,532


# 2017

In [97]:
year = 2017
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2017.
Rows: 269351 | Columns: 23
Dictionary: dict_keys(['rinden', 'anexo_cod_ens', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_cod_ens: ['year_id', 'código', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cr', 'reg_orden', 'cpr', 'prv_nombre', 'com', 'com_nombre']


# 2018

In [98]:
year = 2018
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2018.
Rows: 274498 | Columns: 25
Dictionary: dict_keys(['rinden', 'anexo_cod_ens', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_cod_ens: ['year_id', 'código', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cr', 'reg_orden', 'cpr', 'prv_nombre', 'com', 'com_nombre']


# 2019

In [104]:
year = 2019
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

Loaded dataset resultados for year 2019.
Rows: 278287 | Columns: 25
Dictionary: dict_keys(['rinden', 'anexo_cod_ens', 'anexo_comunasregiones'])
	- rinden: ['year_id', 'variable', 'descripción']
	- anexo_cod_ens: ['year_id', 'código', 'descripción']
	- anexo_comunasregiones: ['year_id', 'cod_reg.', 'region_nombre', 'cod.prv', 'prv_nombre', 'cod.comuna', 'com_nombre']


In [108]:
dfr

Unnamed: 0_level_0,year_id,rbd,cod_ens,grupo_dependencia,rama_educacional,situacion_egreso,codigo_region,codigo_comuna,promedio_notas,ptje_nem,ptje_ranking,leng_actual,mate_actual,hcso_actual,cien_actual,modulo_actual,promedio_lm_actual,percentil_lm_actual,leng_anterior,mate_anterior,hcso_anterior,cien_anterior,modulo_anterior,promedio_lm_anterior,percentil_lm_anterior
id_aux,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
900070019,2019,22388.0,363.0,2.0,H2,5.0,14.0,14101.0,633,691,731,572,0,0,0,,0,0,0,0,0,0,,0,0
900270084,2019,1.0,510.0,3.0,T2,5.0,15.0,15101.0,593,607,664,362,403,0,402,BIO,3825,12,0,0,0,0,,0,0
900370016,2019,14210.0,310.0,2.0,H1,5.0,5.0,5502.0,588,596,601,598,403,696,565,BIO,5005,53,0,0,0,0,,0,0
900370067,2019,1437.0,310.0,2.0,H1,5.0,5.0,5502.0,548,513,513,618,329,587,449,BIO,4735,42,0,0,0,0,,0,0
900370092,2019,1422.0,363.0,3.0,H2,1.0,5.0,5502.0,505,426,426,380,446,336,367,QUI,413,20,0,0,0,0,,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9996599179618,2019,5567.0,510.0,3.0,T2,1.0,9.0,9101.0,588,597,776,309,426,446,463,BIO,3675,9,0,0,0,0,,0,0
9997599679743,2019,8938.0,610.0,2.0,T3,1.0,13.0,13123.0,6,622,693,517,567,354,0,,542,67,0,0,0,0,,0,0
9997599779750,2019,10970.0,410.0,3.0,T1,1.0,2.0,2201.0,595,612,657,517,355,526,0,,436,28,0,0,0,0,,0,0
9997939341287,2019,10663.0,310.0,3.0,H1,1.0,13.0,13404.0,63,685,697,372,426,0,463,BIO,399,16,0,0,0,0,,0,0


In [106]:
dfr.info(memory_usage='deep'), dfr.memory_usage(deep=True)/1024**2

<class 'pandas.core.frame.DataFrame'>
Index: 278287 entries, 900070019 to 9998599379869
Data columns (total 25 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   year_id                278287 non-null  uint16 
 1   rbd                    274987 non-null  float64
 2   cod_ens                274987 non-null  float64
 3   grupo_dependencia      274987 non-null  float64
 4   rama_educacional       274987 non-null  object 
 5   situacion_egreso       278286 non-null  float64
 6   codigo_region          277208 non-null  float64
 7   codigo_comuna          274987 non-null  float64
 8   promedio_notas         278287 non-null  object 
 9   ptje_nem               278287 non-null  uint16 
 10  ptje_ranking           278287 non-null  uint16 
 11  leng_actual            278287 non-null  uint16 
 12  mate_actual            278287 non-null  uint16 
 13  hcso_actual            278287 non-null  uint16 
 14  cien_actual            278

(None,
 Index                     2.123161
 year_id                   0.530790
 rbd                       2.123161
 cod_ens                   2.123161
 grupo_dependencia         2.123161
 rama_educacional         13.475358
 situacion_egreso          2.123161
 codigo_region             2.123161
 codigo_comuna             2.123161
 promedio_notas           13.977691
 ptje_nem                  0.530790
 ptje_ranking              0.530790
 leng_actual               0.530790
 mate_actual               0.530790
 hcso_actual               0.530790
 cien_actual               0.530790
 modulo_actual            11.569561
 promedio_lm_actual       14.020638
 percentil_lm_actual       0.265395
 leng_anterior             0.530790
 mate_anterior             0.530790
 hcso_anterior             0.530790
 cien_anterior             0.530790
 modulo_anterior           9.090637
 promedio_lm_anterior     13.392700
 percentil_lm_anterior     0.265395
 dtype: float64)

# 2020

In [100]:
year = 2020
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

ValueError: invalid literal for int() with base 10: '.9+85.97e+83'

# 2021

In [None]:
year = 2021
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

# 2022

In [None]:
year = 2022
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

# 2023

In [None]:
year = 2023
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

# 2024

In [None]:
year = 2024
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)

# 2025

In [None]:
year = 2025
# dfi, di = load_raw_dataset('inscripciones', year=year, verbose=VERBOSE)
dfr, dr = load_raw_dataset('resultados', year=year, verbose=VERBOSE)
# dfp, dp = load_raw_dataset('postulaciones', year=year, verbose=VERBOSE)
# dfm, dm = load_raw_dataset('matriculas', year=year, verbose=VERBOSE)