In [None]:
# Import the necessary libraries
import pandas as pd
import geopandas as gpd
import gmaps
import gmaps.datasets
# For improved table display in the notebook
from IPython.display import display
import os
import re
import random
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 300
DATA_DIR = os.path.join('data', 'argentina')
gmaps.configure(api_key="AIzaSyAfC08SFyHiKyMaS_jEmevwxH3KBXghV94")

In [None]:
#importing plotly and cufflinks in offline mode
import cufflinks as cf
import plotly.offline
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

# Get all databases

In [None]:
import generate_databases
generate_databases.store_all()

# pxdpto geo test

In [None]:
PXLOC = os.path.join(DATA_DIR, 'indec', 'pxdptodatosok.shp')
geodata = gpd.read_file(PXLOC, encoding='utf-8')
from utils.utils import normalize_dpto_name, validate_dpto_indexes
geodata['departamen'] = [normalize_dpto_name(n) for n in geodata['departamen']]
geodata['link'] = [int(n) for n in geodata['link']]
geodata

In [None]:
geodata['geometry'][1]

In [None]:
print(geodata['geometry'][1].centroid)

In [None]:
# Store our latitude and longitude
puntos = [[dpto.centroid.y, dpto.centroid.x] for dpto in geodata['geometry']]
latitudes = [dpto.centroid.y for dpto in geodata['geometry']]
longitudes = [dpto.centroid.x for dpto in geodata['geometry']]
pesos = geodata['hogares']

m = gmaps.Map()
m.add_layer(gmaps.heatmap_layer(
    puntos, weights=pesos,
    max_intensity=float(max(pesos)), point_radius=10.0
))
m

# Densidad

In [None]:
DENS_FILE = os.path.join(DATA_DIR, 'datosgobar-densidad-poblacion', 'pais.geojson')
db_densidad = gpd.read_file(DENS_FILE, encoding='utf-8')
db_densidad['area'] = [float(a) for a in db_densidad['area']]
db_densidad['hogares'] = [int(re.sub(r'(\d+).0+', r'\1', x)) if x else 0 for x in db_densidad['hogares']]
db_densidad

In [None]:
f"population: {sum(map(lambda s: float(s) if s else 0.0, db_densidad['poblacion']))}"

In [None]:
_ = list(map(display, random.choices(db_densidad['geometry'], k=3)))

In [None]:
db_densidad["area"].iplot(kind="histogram", bins=10000, theme="white", title="Histograma de areas de secciones del pais", xTitle='Area', yTitle='Cant.')

In [None]:
db_densidad["densidad"].iplot(kind="histogram", bins=10000, theme="white", title="Histograma de densidades de secciones del pais", xTitle='Densidad', yTitle='Cant.')

In [None]:
diff=(set(map(int, db_densidad['dpto_id']))^set(map(int, geodata['link'])))
diff

In [None]:
print("Missing deparments in density")
geodata.loc[geodata['link'].isin(diff)][['link', 'departamen', 'provincia']]

In [None]:
db_densidad.plot(column='densidad', cmap='hot')

In [None]:
ax = db_densidad.plot(column='densidad', cmap='hot')
ax.set_xlim(-58.5, -58.3)
ax.set_ylim(-34.8, -34.5)

In [None]:
# Store our latitude and longitude
puntos = [[dpto.centroid.y, dpto.centroid.x] for dpto in db_densidad['geometry']]
pesos = db_densidad['hogares']

m = gmaps.Map()
m.add_layer(gmaps.heatmap_layer(
    puntos, weights=pesos,
    max_intensity=float(max(pesos)), point_radius=5.0
))
m

# Ministerio educacion database

In [None]:
pd.options.display.max_columns = None
SCHOOL_HDF = os.path.join(DATA_DIR, 'ministerio-educacion', 'matricula_y_secciones.hdf')
schooldb = pd.read_hdf(SCHOOL_HDF, 'matricula_y_secciones')
schooldb

In [None]:
count_cols = list(filter(lambda s: s.startswith('Alumnos con Sobreedad') or s.startswith('Repitentes') or s.startswith('Matrícula.'), schooldb.columns))

In [None]:
schooldb['total_alumnos'] = schooldb.loc[:,count_cols].sum(axis=1)

In [None]:
schooldb[schooldb['Ámbito'] == 'Rural']["total_alumnos"].iplot(kind="histogram", bins=1000, theme="white", title="Histograma de cantidad de alumnos por escuela", xTitle='Cantidad de alumnos', yTitle='Cant.')


In [None]:
schooldb[schooldb['Ámbito'] == 'Urbano']["total_alumnos"].iplot(kind="histogram", bins=1000, theme="white", title="Histograma de cantidad de alumnos por escuela", xTitle='Cantidad de alumnos', yTitle='Cant.')

In [None]:
schooldb[(schooldb['Ámbito'] == 'Urbano') & (schooldb['Provincia'] == 'Buenos Aires')]["total_alumnos"].iplot(kind="histogram", bins=1000, theme="white", title="Histograma de cantidad de alumnos por escuela", xTitle='Cantidad de alumnos', yTitle='Cant.')

In [None]:
schooldb['total_alumnos'].mean()

In [None]:
grouped = schooldb[['Provincia', 'Ámbito', 'total_alumnos']].groupby(['Provincia', 'Ámbito']).mean()
grouped

In [None]:
pd.options.display.max_columns = None
SCHOOL_HDF = os.path.join(DATA_DIR, 'ministerio-educacion', 'matricula_por_edad.hdf')
schooldb = pd.read_hdf(SCHOOL_HDF, 'matricula_por_edad')
schooldb

In [None]:
group_id = schooldb.groupby(['Ámbito']).sum()
group_id[group_id['De 20 a 24 años'] != 0]

In [None]:
cols = [, , ,, , '25 años y más', '30 años y más']
jardin = ['0 años', '1 año', '2 años', '3 años', '4 años', '5 años', '6 años y más']
primaria = ['6 años', '7 años', '8 años', '9 años', '10 años', '11 años']
secundaria = ['11 años y menos', '12 años', '13 años', '14 años', '15 años', '16 años', '17 años', '18 años y más']
empa = ['Menos de 18 años', '19 años', '20 años', '21 años', '22 años', '23 años', '24 años', 'De 20 a 24 años', 'De 25 a 29 años']

In [None]:
grouped = schooldb.groupby(['ID', 'Provincia', 'Ámbito']).sum()
grouped = schooldb.groupby(['ID', 'Provincia', 'Ámbito']).sum()

# Fake population generator

In [None]:
FAKE_POP_HDF = os.path.join(DATA_DIR, 'fake_population.hdf')

In [None]:
fake_pop = pd.read_hdf(FAKE_POP_HDF, 'population')
fake_pop

# Basic infection model

## Model

### Equations

\begin{equation}
\begin{split}
\dot{S} &= -\beta_1 I_1 S -\beta_2 I_2 S - \beta_3 I_3 S\\
\dot{E} &=\beta_1 I_1 S +\beta_2 I_2 S + \beta_3 I_3 S - a E \\
\dot{I_1} &= a E - \gamma_1 I_1 - p_1 I_1 \\
\dot{I_2} &= p_1 I_1 -\gamma_2 I_2 - p_2 I_2 \\
\dot{I_3} & = p_2 I_2 -\gamma_3 I_3 - \mu I_3 \\
\dot{R} & = \gamma_1 I_1 + \gamma_2 I_2 + \gamma_3 I_3 \\
\dot{D} & = \mu I_3
\end{split}
\end{equation}

### Variables
* $S$: Susceptible individuals
* $E$: Exposed individuals - infected but not yet infectious or symptomatic
* $I_i$: Infected individuals in severity class $i$. Severity increaes with $i$ and we assume individuals must pass through all previous classes
  * $I_1$: Mild infection (hospitalization not required)
  * $I_2$: Severe infection (hospitalization required)
  * $I_3$: Critical infection (ICU required)
* $R$: individuals who have recovered from disease and are now immune
* $D$: Dead individuals
* $N=S+E+I_1+I_2+I_3+R+D$ Total population size (constant)

### Parameters
* $\beta_i$ rate at which infected individuals in class $I_i$ contact susceptibles and infect them
* $a$ rate of progression from the exposed to infected class
* $\gamma_i$ rate at which infected individuals in class $I_i$ recover from disease and become immune
* $p_i$ rate at which infected individuals in class $I_i$ progress to class $I_{I+1}$
* $\mu$ death rate for individuals in the most severe stage of disease

### Basic reproductive ratio

Idea: $R_0$ is the sum of 
1. the average number of secondary infections generated from an individual in stage $I_1$
2. the probability that an infected individual progresses to $I_2$ multiplied by the average number of secondary infections generated from an individual in stage $I_2$
3.  the probability that an infected individual progresses to $I_3$ multiplied by the average number of secondary infections generated from an individual in stage $I_3$

\begin{equation}
\begin{split}
R_0 & = N\frac{\beta_1}{p_1+\gamma_1} + \frac{p_1}{p_1 + \gamma_1} \left( \frac{N \beta_2}{p_2+\gamma_2} + \frac{p_2}{p_2 + \gamma_2} \frac{N \beta_3}{\mu+\gamma_3}\right)\\
&= N\frac{\beta_1}{p_1+\gamma_1} \left(1 + \frac{p_1}{p_2 + \gamma_2}\frac{\beta_2}{\beta_1} \left( 1 + \frac{p_2}{\mu + \gamma_3} \frac{\beta_3}{\beta_2} \right) \right)
\end{split}
\end{equation}

In [None]:

from scipy.integrate import odeint

In [None]:
# Define parameters based on clinical observations

#I will add sources soon
# https://github.com/midas-network/COVID-19/tree/master/parameter_estimates/2019_novel_coronavirus

IncubPeriod=5  #Incubation period, days
DurMildInf=10 #Duration of mild infections, days
FracMild=0.8  #Fraction of infections that are mild
FracSevere=0.15 #Fraction of infections that are severe
FracCritical=0.05 #Fraction of infections that are critical
CFR=0.02 #Case fatality rate (fraction of infections resulting in death)
TimeICUDeath=7 #Time from ICU admission to death, days
DurHosp=11 #Duration of hospitalization, days
