# Obteniendo el **número de muertes** confirmadas por COVID-19 por **semana epidemiológica** de cada uno de los **24 departamentos** del Perú

## 0. Cargar librerías

In [65]:
import pandas as pd
import numpy as np

import functions as fn

## 1. Cargamos direcciones de RawData

In [66]:
vac_url = 'RawData/TB_VACUNACION_COVID19.csv'
fal_url = 'RawData/fallecidos_covid.csv'

## 2. Procesamos el dataset de fallecidos

### 2.1. Añadimos el año, semana epidemiológica y departamento de cada fallecido

In [67]:
def just_cities(fal_url):
    """
    Función para filtrar a cada fallecido reportado en su año
    y semana epidemiológica por depatamento (Fallecidos = fal)
    """         
    fal_col = ['FECHA_FALLECIMIENTO', 'DEPARTAMENTO']
    df_fal = fn.read_largeCSV_file(fal_url, ';', fal_col)
    fn.variable_fecha(df_fal, 'FECHA_FALLECIMIENTO')
    fn.date_to_epiweek(df_fal,'FECHA_FALLECIMIENTO')
    df_fal['fallecido'] = 1     # To count cases                                   
    df_fal['fallecido'].apply(np.int8) 
    
    return df_fal

In [68]:
falxdep = just_cities(fal_url)

In [69]:
print(falxdep.head())

  DEPARTAMENTO  epi_year  epi_week  fallecido
0   LAMBAYEQUE      2021        23          1
1        PIURA      2021        11          1
2          ICA      2021        22          1
3     AREQUIPA      2021        26          1
4         LIMA      2021        18          1


**IMPORTANTE:** No tenemos el total de fallecidos por departamento y semana epidemiológica, sino el departamento y semana epidemiológica de cada caso.

### 2.2. Realizamos una crosstab de fallecidos por COVID-19 por departamento de Perú, para cada semana epidemiológica y año

Esto para obtener el TOTAL de fallecidos por cada departamento y semana epidemiológica

In [70]:
ct_falxdep = pd.crosstab(index=[falxdep['epi_year'], falxdep['epi_week']],
                       columns=[falxdep['fallecido'], falxdep['DEPARTAMENTO']],
                       margins = False)

In [71]:
print(ct_falxdep.head())

fallecido                1                  ...                     
DEPARTAMENTO      AMAZONAS ANCASH APURIMAC  ... TACNA TUMBES UCAYALI
epi_year epi_week                           ...                     
2020     10              0      0        0  ...     0      0       0
         11              0      0        0  ...     0      0       0
         12              0      0        0  ...     0      0       2
         13              0      4        0  ...     0      0       3
         14              0      0        0  ...     1      3      11

[5 rows x 25 columns]


Ahora tenemos el TOTAL de fallecidos por cada departamento y semana epidemiológica

In [72]:
ct_falxdep.to_csv('Data/fallecidos_x_departamentos_x_semanasEpi.csv')

### 2.3. Arreglamos los índices
(Buscar una manera más eficiente de hacerlo, en lugar de guardar y leer el dataset nuevamente)

In [73]:
ct_falxdep = pd.read_csv('Data/fallecidos_x_departamentos_x_semanasEpi.csv')

In [74]:
def filtering_data_dep(falxdep_df):
    """Function to fix the indexes of the data of deceased by department of Peru.
    IMPORTANT: There are more efficient ways to modify indexes using 'loc' and 
    'iloc' but this method at least 'works'"""

    time = falxdep_df[["fallecido", "Unnamed: 1"]]   # Get the col of epidemiological weeks
    time = time.rename(columns=time.iloc[1])     # Put the first row (epi_week) as header
    time = time.drop([0,1, len(time)-1],axis=0)  # Drop the first and last row (header, nan and total)
    time = time.reset_index(drop=True)           # Reset index

    departments = falxdep_df.drop(["Unnamed: 1", 'fallecido'], axis=1)    # Drop cols that are not departments
    departments = departments.rename(columns=departments.iloc[0])     # Put the first row (epi_week) as header
    departments = departments.drop([0,1, len(departments)-1],axis=0)  # Drop the first and last row (header, nan and total)
    departments = departments.reset_index(drop=True)                  # Reset index

    falxdep_df = pd.concat([time, departments], axis=1)
    return falxdep_df

In [77]:
ct_falxdep_fix = filtering_data_dep(ct_falxdep)
print(ct_falxdep_fix.head())

  epi_year epi_week AMAZONAS ANCASH  ... SAN MARTIN TACNA TUMBES UCAYALI
0     2020       10        0      0  ...          2     0      0       0
1     2020       11        0      0  ...          0     0      0       0
2     2020       12        0      0  ...          0     0      0       2
3     2020       13        0      4  ...          0     0      0       3
4     2020       14        0      0  ...          3     1      3      11

[5 rows x 27 columns]


In [78]:
ct_falxdep_fix.to_csv('Data/fallecidos_x_departamentos_x_semanasEpi.csv', index = False)

# Obteniendo el número **total de personas completamente vacunadas (2 dosis)** y el número **total de muertes** confirmadas por COVID-19 por cada uno de los **24 departamentos** del Perú 

## 0. Crosstab of the TOTAL confirmed deaths from COVID-19 by department.
Use 'falxdep' dataframe

In [7]:
dep_deaths = pd.crosstab(index = falxdep['DEPARTAMENTO'], columns = falxdep['fallecido'])
dep_deaths.columns = ['fallecidos']

# Adding an extra row of the total deaths for the whole country
dep_deaths.loc['PERU']= dep_deaths.sum()

## 1. Get the TOTAL confirmed DEATHS from COVID-19 by each of the 24 departments of Peru

In [8]:
print("Head of a SUMMARY dataframe of all fully vaccinated people in Peru (2 doses) per place of vaccination:")
print(dep_deaths)

Head of a SUMMARY dataframe of all fully vaccinated people in Peru (2 doses) per place of vaccination:
               fallecidos
DEPARTAMENTO             
AMAZONAS             1273
ANCASH               6728
APURIMAC             1519
AREQUIPA             9718
AYACUCHO             2148
CAJAMARCA            4149
CALLAO              10055
CUSCO                4812
HUANCAVELICA         1176
HUANUCO              2701
ICA                  8569
JUNIN                7061
LA LIBERTAD         10324
LAMBAYEQUE           8642
LIMA                89006
LORETO               4266
MADRE DE DIOS         772
MOQUEGUA             1526
PASCO                1046
PIURA               12163
PUNO                 4228
SAN MARTIN           3030
TACNA                1970
TUMBES               1586
UCAYALI              3059
PERU               201527


## 2. Obtener el total de personas COMPLETAMENTE VACUNADAS (2 dosis) por los 24 departamentos del Perú

No existe una forma directa de calcular el total de personas completamente vacunadas por departamento. Para lograrlo se prevee lo siguiente:

1. The vaccination dataset **(RawData/TB_VACUNACION_COVID19.csv)** only gives information about the vaccination center called *id_centro_vacunacion*. NOT the department or another relevant location.

2. The vaccination centers dataset **(RawData/TB_CENTRO_VACUNACION.csv)** can be used to "match" the *id_centro_vacunacion* with the *id_ubigeo*. Which is a numeric variable from 0 to 1894 of the specifict district.

3.  Finally with the UBIGEO dataset **(RawData/TB_UBIGEOS.csv)** it is possible to "match" each *id_ubigeo* with the correct department.


In [12]:
def vac_department(vac_url):
    """
    Función que toma la dirección del dataset de vacunados y devuelve el número de VACUNADOS 
    por los 24 departamentos del Perú
    """
    vac_col = ['id_centro_vacunacion', 'dosis']                     
    df_vac = fn.read_largeCSV_file(vac_url, ',', vac_col)    
    lst_vac = fn.df_into_chunks(df_vac)               
    
    # Que cada 'chunk' de lst_vac sea solamente cuando dosis = 2, es decir tengan 2 dosis
    lst_vac = [chunk.loc[chunk.loc[:, 'dosis'] == 2] for chunk in lst_vac]

    # Creamos columnas de 1 en cada chunk para contabilizar cada caso de vacunado
    for chunk in lst_vac:                                        
        chunk['vacunado'] = 1
        chunk['vacunado'].apply(np.int8)
        del chunk['dosis']  # Borramos columna dosis llena de número 2
    del chunk # Borramos el último chunk que queda al final
    
    return lst_vac

In [13]:
vac_url = "RawData/TB_VACUNACION_COVID19.csv"
vacxdep = vac_department(vac_url)
del vac_url

In [14]:
print("Head of the first chunk of all fully vaccinated people in Peru (2 doses) per place of vaccination:")
print(vacxdep[0].head())

Head of the first chunk of all fully vaccinated people in Peru (2 doses) per place of vaccination:
    fecha_vacunacion  id_centro_vacunacion  vacunado
0           20210701                    62         1
1           20210703                   752         1
3           20211126                  3138         1
13          20211028                     3         1
16          20211120                     0         1


Note how each fully vaccinated person has a location id. Now it is necessary to collect each fully vaccine case by this id.

## 2. From a list of chunks to a summary dataframe of people fully vaccinated (2 doses) per department

**IMPORTANT:** "vacxdep" variable is actually a list of *dataframes* or *chunks*.

In [15]:
def vacxdep_chunks(dfs_vac):
    """
    Devuelve un dataframe con el TOTAL DE VACUNADOS por DEPARTAMENTO 
    del PERÚ (recibe una lista de dataframes o chunks)
    """
    var_holder = {}     # Diccionario para guardar nombres                                             
    lst_epi_vac = []    # Lista de dfs para cada sumatoria de chunks
                                         
    for i, chunk in enumerate(dfs_vac):
        var_holder['epi_vac_' + str(i)]= pd.crosstab(index=[chunk['id_centro_vacunacion']],
                                                     columns=chunk['vacunado'])
        lst_epi_vac.append(var_holder['epi_vac_' + str(i)])
    
    merged_epivac = pd.concat(lst_epi_vac, axis=1)  # Merge all dfs
    epi_vac = pd.DataFrame(merged_epivac.sum(numeric_only=True, axis=1))
    epi_vac.columns = ['vacunados']
    epi_vac['vacunados'] = epi_vac['vacunados'].astype(np.int64)
    epi_vac.reset_index(level=0, inplace=True)

    return epi_vac

In [16]:
vacxdep_sum = vacxdep_chunks(vacxdep)
print("Head of a SUMMARY dataframe of all fully vaccinated people in Peru (2 doses) per place of vaccination:")
print(vacxdep_sum.head()), print("\n")

print('Fully vaccinated (2 doses): ' + str(vacxdep_sum['vacunados'].sum())) 

Head of a SUMMARY dataframe of all fully vaccinated people in Peru (2 doses) per place of vaccination:
   id_centro_vacunacion  vacunados
0                     0    5039407
1                     1        344
2                     3     182818
3                     5      10024
4                     6      40121


Fully vaccinated (2 doses): 19291391


## 3. Getting the department of each vaccination center
Read the 2 other csv with the directions

For this case is necesary to match ...

In [17]:
ubigeo_url = 'RawData/TB_UBIGEOS.csv'
vaccenter_url = 'RawData/TB_CENTRO_VACUNACION.csv'

ubigeo = pd.read_csv(ubigeo_url, usecols = ['id_ubigeo', 'departamento'])
vaccenter = pd.read_csv(vaccenter_url, usecols= ['id_centro_vacunacion','id_ubigeo'])

del ubigeo_url, vaccenter_url

vaccenter = vaccenter.merge(ubigeo, on = 'id_ubigeo', how = 'left')
del vaccenter['id_ubigeo']

print("Head of the merged dataframe (vaccenter) with: 'id_centro_vacunacion' and 'departamento'") 
print(vaccenter.head(10))

Head of the merged dataframe (vaccenter) with: 'id_centro_vacunacion' and 'departamento'
   id_centro_vacunacion departamento
0                  2021         PUNO
1                  3699   SAN MARTIN
2                   154   SAN MARTIN
3                   155   SAN MARTIN
4                  3260   SAN MARTIN
5                  2906       ANCASH
6                  2907       ANCASH
7                  2909       ANCASH
8                  2910       ANCASH
9                  2912       ANCASH


Note that the new dataframe **'vaccenter'** can be used to get the department of each vaccination center

## 4. Found the deparment of all people fully vaccinated (2 doses)

In [18]:
vacxdep_sum = vacxdep_sum.merge(vaccenter, on = 'id_centro_vacunacion', how = 'left')
del vacxdep_sum['id_centro_vacunacion']

print(vacxdep_sum.head())

   vacunados departamento
0    5039407     AMAZONAS
1        344         LIMA
2     182818         LIMA
3      10024         LIMA
4      40121         LIMA


Finally just get the total of vaccinated grouping by department.

In [19]:
ct_vacxdep = vacxdep_sum.groupby(['departamento']).sum()    # Sum by departments
ct_vacxdep.loc['PERU',:] = ct_vacxdep.sum(axis = 0)         # Total of fully vaccinated
ct_vacxdep['vacunados'] = ct_vacxdep['vacunados'].apply(np.int64) 

print(ct_vacxdep)

               vacunados
departamento            
AMAZONAS         5142271
ANCASH            463675
APURIMAC          149049
AREQUIPA          843156
AYACUCHO          156990
CAJAMARCA         428833
CALLAO            715241
CUSCO               2003
HUANCAVELICA       62718
HUANUCO           163078
ICA               548606
JUNIN             519661
LA LIBERTAD       834518
LAMBAYEQUE        567017
LIMA             6354311
LORETO            259498
MADRE DE DIOS      53349
MOQUEGUA           96511
PASCO              81308
PIURA             694375
PUNO              303010
SAN MARTIN        350882
TACNA             190169
TUMBES             83342
UCAYALI           157200
PERU            19220771


# 5. Merge fully vaccinated and confirmed deaths from COVID-19 per departments

In [20]:
vndxdep = pd.concat([ct_vacxdep, dep_deaths], axis=1)
vndxdep.reset_index(level=0, inplace=True)
vndxdep.rename(columns = {'index':'departamento'}, inplace = True)

print(vndxdep)

     departamento  vacunados  fallecidos
0        AMAZONAS    5142271        1273
1          ANCASH     463675        6728
2        APURIMAC     149049        1519
3        AREQUIPA     843156        9718
4        AYACUCHO     156990        2148
5       CAJAMARCA     428833        4149
6          CALLAO     715241       10055
7           CUSCO       2003        4812
8    HUANCAVELICA      62718        1176
9         HUANUCO     163078        2701
10            ICA     548606        8569
11          JUNIN     519661        7061
12    LA LIBERTAD     834518       10324
13     LAMBAYEQUE     567017        8642
14           LIMA    6354311       89006
15         LORETO     259498        4266
16  MADRE DE DIOS      53349         772
17       MOQUEGUA      96511        1526
18          PASCO      81308        1046
19          PIURA     694375       12163
20           PUNO     303010        4228
21     SAN MARTIN     350882        3030
22          TACNA     190169        1970
23         TUMBE

# 6. Adding the number of inhabitants per department

Population based on: https://es.wikipedia.org/wiki/Anexo:Departamentos_del_Per%C3%BA_por_poblaci%C3%B3n

In [21]:
dic_dep = {
    "AMAZONAS"	:	426806,
    "ANCASH"	:	1180638,
    "APURIMAC"	:	430736,
    "AREQUIPA"	:	1497438,
    "AYACUCHO"	:	668213,
    "CAJAMARCA"	:	1453711,
    "CALLAO"	:	1129854,
    "CUSCO"	    :	1357075,
    "HUANCAVELICA":	365317,
    "HUANUCO"	:	760267,
    "ICA"	    :	975182,
    "JUNIN"	    :	1361467,
    "LA LIBERTAD":	2016771,
    "LAMBAYEQUE":	1310785,
    "LIMA"	    :	10628470,
    "LORETO"	:	1027559,
    "MADRE DE DIOS":173811,
    "MOQUEGUA"	:	192740,
    "PASCO"	    :	271904,
    "PIURA"	    :	2047954,
    "PUNO"	    :	1237997,
    "SAN MARTIN":	899648,
    "TACNA"	    :	370974,
    "TUMBES"	:	251521,
    "UCAYALI"	:	589110,
    "PERU"  	:	32625948,
}

In [22]:
vndxdep['no_habitantes'] = vndxdep['departamento'].map(dic_dep)
vndxdep = vndxdep[['departamento', 'no_habitantes', 'vacunados', 'fallecidos']]

# Mortality rate per 100k per department
vndxdep['tasa_mortalidad'] = round((vndxdep['fallecidos']/vndxdep['no_habitantes'])*100000,2)

# % of people fully vaccinated per department
vndxdep['vac_porcentaje'] = round((vndxdep['vacunados']*100)/vndxdep['no_habitantes'],2)

print(vndxdep)

     departamento  no_habitantes  ...  tasa_mortalidad  vac_porcentaje
0        AMAZONAS         426806  ...           298.26         1204.83
1          ANCASH        1180638  ...           569.86           39.27
2        APURIMAC         430736  ...           352.65           34.60
3        AREQUIPA        1497438  ...           648.98           56.31
4        AYACUCHO         668213  ...           321.45           23.49
5       CAJAMARCA        1453711  ...           285.41           29.50
6          CALLAO        1129854  ...           889.94           63.30
7           CUSCO        1357075  ...           354.59            0.15
8    HUANCAVELICA         365317  ...           321.91           17.17
9         HUANUCO         760267  ...           355.27           21.45
10            ICA         975182  ...           878.71           56.26
11          JUNIN        1361467  ...           518.63           38.17
12    LA LIBERTAD        2016771  ...           511.91           41.38
13    

Guardamos:

In [21]:
vndxdep.to_csv('Data/TOTAL_vacunados_y_fallecidos_x_departamento.csv',index = False)