# Getting the TOTAL DEATHS from COVID-19 of the 24 departments of Peru by each epidemiological week since the pandemic began

## 0. Run libraries and functions
Empezamos cargando las librerías que necesitamos y leyendo nuestro csv actualizado de fallecidos por COVID-19.

In [2]:
import pandas as pd
import numpy as np

import functions as fn

In [3]:
def just_cities(fal_url):
    """
    Función para filtrar a cada fallecido reportado en su año
    y semana epidemiológica en ciudad (Fallecidos = fal, Vacunados = vac)
    """         
    fal_col = ['FECHA_FALLECIMIENTO', 'DEPARTAMENTO']   # Get only date and department 
    df_fal = fn.read_largeCSV_file(fal_url, ';', fal_col)
    fn.variable_fecha(df_fal, 'FECHA_FALLECIMIENTO')
    fn.date_to_epiweek(df_fal,'FECHA_FALLECIMIENTO')
    df_fal['fallecido'] = 1     # To count cases                                   
    df_fal['fallecido'].apply(np.int8) 
    
    return df_fal

## 1. Add the year and epidemiological week per each death case

In [3]:
fal_url = "RawData/fallecidos_covid.csv"
dxpnw = just_cities(fal_url)
del fal_url

Print the result of the dxpnw (**d**eaths confirmed from COVID-19 by de**p**artment a**n**d by Epidemiological **w**eek)

In [4]:
print("Head of the 'dxpnw' dataframe of each death confirmed case from COVID-19 in Peru:")
print(dxpnw.head()), print("\n")

print('TOTAL of confirmed death cases from COVID-19 in Peru: ' + str(dxpnw['fallecido'].sum())) 

Head of the 'dxpnw' dataframe of each death confirmed case from COVID-19 in Peru:
  DEPARTAMENTO  epi_year  epi_week  fallecido
0         LIMA      2021        17          1
1         LIMA      2021        17          1
2     AYACUCHO      2021        17          1
3         LIMA      2021        16          1
4     AREQUIPA      2021        31          1


TOTAL of confirmed death cases from COVID-19 in Peru: 200246


Crosstab of the total deaths from COVID-19 by department of Peru, for each epidemiological week and year

In [5]:
ct_dxpnw = pd.crosstab(index=[dxpnw['epi_year'], dxpnw['epi_week']],
                       columns=[dxpnw['fallecido'], dxpnw['DEPARTAMENTO']],
                       margins = True)

ct_dxpnw.to_csv('Data/fallecidos_departamentos_semanasEpi.csv')

Fix the indexs and print the **c**ross**t**ab result of the **dxpnw**

In [6]:
ct_dxpnw = pd.read_csv('Data/fallecidos_departamentos_semanasEpi.csv')

def filtering_data_dep(falxdep_df):
    """Function to fix the indexes of the data of deceased by department of Peru.
    IMPORTANT: There are more efficient ways to modify indexes using 'loc' and 
    'iloc' but this method at least 'works'"""

    time = falxdep_df[["fallecido", "Unnamed: 1"]]   # Get the col of epidemiological weeks
    time = time.rename(columns=time.iloc[1])     # Put the first row (epi_week) as header
    time = time.drop([0,1, len(time)-1],axis=0)  # Drop the first and last row (header, nan and total)
    time = time.reset_index(drop=True)           # Reset index

    departments = falxdep_df.drop(["Unnamed: 1", 'fallecido'], axis=1)    # Drop cols that are not departments
    departments = departments.rename(columns=departments.iloc[0])     # Put the first row (epi_week) as header
    departments = departments.drop([0,1, len(departments)-1],axis=0)  # Drop the first and last row (header, nan and total)
    departments = departments.reset_index(drop=True)                  # Reset index

    falxdep_df = pd.concat([time, departments], axis=1)
    return falxdep_df

ct_dxpnw_fix = filtering_data_dep(ct_dxpnw)

print("Head of the crosstab of each death confirmed case from COVID-19 by year and epidemiological week:")
print(ct_dxpnw_fix.head(10))

del ct_dxpnw
ct_dxpnw_fix.to_csv('Data/fallecidos_departamentos_semanasEpi.csv', index = False)

Head of the crosstab of each death confirmed case from COVID-19 by year and epidemiological week:
  epi_year epi_week AMAZONAS ANCASH APURIMAC AREQUIPA AYACUCHO CAJAMARCA  \
0     2020       10        0      0        0        0        0         0   
1     2020       11        0      0        0        0        0         0   
2     2020       12        0      0        0        0        1         0   
3     2020       13        0      3        0        0        0         0   
4     2020       14        0      0        0       15        0         2   
5     2020       15        1      9        1        7        1         4   
6     2020       16        0     24        0        7        2         4   
7     2020       17        0     47        3       13        3         4   
8     2020       18        2     67        0       14        1         5   
9     2020       19        7    102        1       18        7         4   

  CALLAO CUSCO  ... MADRE DE DIOS MOQUEGUA PASCO PIURA PUNO SAN M

# Getting the TOTAL of people FULLY VACCINATED (2 doses) and the TOTAL confirmed DEATHS from COVID-19 by each of the 24 departments of Peru 

## 0. Crosstab of the TOTAL confirmed deaths from COVID-19 by department.
Use 'dxpnw' dataframe

In [None]:
dep_deaths = pd.crosstab(index = dxpnw['DEPARTAMENTO'], columns = dxpnw['fallecido'])
dep_deaths.columns = ['fallecidos']

# Adding an extra row of the total deaths for the whole country
dep_deaths.loc['PERU']= dep_deaths.sum()

## 1. Get the TOTAL confirmed DEATHS from COVID-19 by each of the 24 departments of Peru

In [None]:
print("Head of a SUMMARY dataframe of all fully vaccinated people in Peru (2 doses) per place of vaccination:")
print(dep_deaths)

## 2. Get the total of people FULLY VACCINATED (2 doses) by the 24 departments of Peru

There are no direct way to found the total of people fully vaccinated per departments. To achieve this the following is planned:

1. The vaccination dataset **(RawData/TB_VACUNACION_COVID19.csv)** only gives information about the vaccination center called *id_centro_vacunacion*. NOT the department or another relevant location.

2. The vaccination centers dataset **(RawData/TB_CENTRO_VACUNACION.csv)** can be used to "match" the *id_centro_vacunacion* with the *id_ubigeo*. Which is a numeric variable from 0 to 1894 of the specifict district.

3.  Finally with the UBIGEO dataset **(RawData/TB_UBIGEOS.csv)** it is possible to "match" each *id_ubigeo* with the correct department.


In [8]:
def vac_department(vac_url):
    """
    Función que toma la dirección del dataset de vacunados y devuelve el número de VACUNADOS 
    por los 24 departamentos del Perú
    """
    vac_col = ['id_centro_vacunacion', 'dosis','fecha_vacunacion']                     
    df_vac = fn.read_largeCSV_file(vac_url, ',', vac_col)    
    lst_vac = fn.df_into_chunks(df_vac)               
    
    for df in lst_vac:                                           
        df = df.drop(df[df["dosis"] == 1].index,  inplace=True)     # Drop non fully vaccinated (1 dose)

    for df in lst_vac:
        df['vacunado'] = 1  # To count each case
        df['vacunado'] = df['vacunado'].apply(np.int8)
        del df['dosis']     # Dose var is no needed anymore

    return lst_vac

In [9]:
vac_url = "RawData/TB_VACUNACION_COVID19.csv"
vacxdep = vac_department(vac_url)
del vac_url

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats i

In [7]:
print("Head of the first chunk of all fully vaccinated people in Peru (2 doses) per place of vaccination:")
print(vacxdep[0].head())

Head of the first chunk of all fully vaccinated people in Peru (2 doses) per place of vaccination:
   id_centro_vacunacion  vacunado
0                    17         1
1                  1828         1
2                103617         1
3                   891         1
7                108281         1


Note how each fully vaccinated person has a location id. Now it is necessary to collect each fully vaccine case by this id.

## 2. From a list of chunks to a summary dataframe of people fully vaccinated (2 doses) per department

**IMPORTANT:** "vacxdep" variable is actually a list of *dataframes* or *chunks*.

In [None]:
def vacxdep_chunks(dfs_vac):
    """
    Devuelve un dataframe con el TOTAL DE VACUNADOS por DEPARTAMENTO 
    del PERÚ (recibe una lista de dataframes o chunks)
    """
    var_holder = {}     # Diccionario para guardar nombres                                             
    lst_epi_vac = []    # Lista de dfs para cada sumatoria de chunks
                                         
    for i, chunk in enumerate(dfs_vac):
        var_holder['epi_vac_' + str(i)]= pd.crosstab(index=[chunk['id_centro_vacunacion']],
                                                     columns=chunk['vacunado'])
        lst_epi_vac.append(var_holder['epi_vac_' + str(i)])
    
    merged_epivac = pd.concat(lst_epi_vac, axis=1)  # Merge all dfs
    epi_vac = pd.DataFrame(merged_epivac.sum(numeric_only=True, axis=1))
    epi_vac.columns = ['vacunados']
    epi_vac['vacunados'] = epi_vac['vacunados'].astype(np.int64)
    epi_vac.reset_index(level=0, inplace=True)

    return epi_vac

In [None]:
vacxdep_sum = vacxdep_chunks(vacxdep)
print("Head of a SUMMARY dataframe of all fully vaccinated people in Peru (2 doses) per place of vaccination:")
print(vacxdep_sum.head()), print("\n")

print('Fully vaccinated (2 doses): ' + str(vacxdep_sum['vacunados'].sum())) 

## 3. Getting the department of each vaccination center
Read the 2 other csv with the directions

For this case is necesary to match ...

In [None]:
ubigeo_url = 'RawData/TB_UBIGEOS.csv'
vaccenter_url = 'RawData/TB_CENTRO_VACUNACION.csv'

ubigeo = pd.read_csv(ubigeo_url, usecols = ['id_ubigeo', 'departamento'])
vaccenter = pd.read_csv(vaccenter_url, usecols= ['id_centro_vacunacion','id_ubigeo'])

del ubigeo_url, vaccenter_url

vaccenter = vaccenter.merge(ubigeo, on = 'id_ubigeo', how = 'left')
del vaccenter['id_ubigeo']

print("Head of the merged dataframe (vaccenter) with: 'id_centro_vacunacion' and 'departamento'") 
print(vaccenter.head(10))



Note that the new dataframe **'vaccenter'** can be used to get the department of each vaccination center

## 4. Found the deparment of all people fully vaccinated (2 doses)

In [None]:
vacxdep_sum = vacxdep_sum.merge(vaccenter, on = 'id_centro_vacunacion', how = 'left')
del vacxdep_sum['id_centro_vacunacion']

print(vacxdep_sum.head())

Finally just get the total of vaccinated grouping by department.

In [None]:
ct_vacxdep = vacxdep_sum.groupby(['departamento']).sum()    # Sum by departments
ct_vacxdep.loc['PERU',:] = ct_vacxdep.sum(axis = 0)         # Total of fully vaccinated
ct_vacxdep['vacunados'] = ct_vacxdep['vacunados'].apply(np.int64) 

print(ct_vacxdep)

# 5. Merge fully vaccinated and confirmed deaths from COVID-19 per departments

In [None]:
vndxdep = pd.concat([ct_vacxdep, dep_deaths], axis=1)
vndxdep.reset_index(level=0, inplace=True)
vndxdep.rename(columns = {'index':'departamento'}, inplace = True)

print(vndxdep)

# 6. Adding the number of inhabitants per department

Population based on: https://es.wikipedia.org/wiki/Anexo:Departamentos_del_Per%C3%BA_por_poblaci%C3%B3n

In [None]:
dic_dep = {
    "AMAZONAS"	:	426806,
    "ANCASH"	:	1180638,
    "APURIMAC"	:	430736,
    "AREQUIPA"	:	1497438,
    "AYACUCHO"	:	668213,
    "CAJAMARCA"	:	1453711,
    "CALLAO"	:	1129854,
    "CUSCO"	    :	1357075,
    "HUANCAVELICA":	365317,
    "HUANUCO"	:	760267,
    "ICA"	    :	975182,
    "JUNIN"	    :	1361467,
    "LA LIBERTAD":	2016771,
    "LAMBAYEQUE":	1310785,
    "LIMA"	    :	10628470,
    "LORETO"	:	1027559,
    "MADRE DE DIOS":173811,
    "MOQUEGUA"	:	192740,
    "PASCO"	    :	271904,
    "PIURA"	    :	2047954,
    "PUNO"	    :	1237997,
    "SAN MARTIN":	899648,
    "TACNA"	    :	370974,
    "TUMBES"	:	251521,
    "UCAYALI"	:	589110,
    "PERU"  	:	32625948,
}

In [None]:
vndxdep['no_habitantes'] = vndxdep['departamento'].map(dic_dep)
vndxdep = vndxdep[['departamento', 'no_habitantes', 'vacunados', 'fallecidos']]

# Mortality rate per 100k per department
vndxdep['tasa_mortalidad'] = (vndxdep['fallecidos']/vndxdep['no_habitantes'])*100000

# % of people fully vaccinated per department
vndxdep['vac_porcentaje'] = (vndxdep['vacunados']*100)/vndxdep['no_habitantes']

print(vndxdep)

vndxdep.to_csv('Data/vac_fal_x_departamento.csv',index = False)

# Número de vacunados por semana epidemiológica de cada departamento

## 0. 
