In [5]:
import pandas as pd
import geopandas as gpd
from IPython.display import display
import os
import re
import random
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy.spatial import cKDTree
mpl.rcParams['figure.dpi']= 300
DATA_DIR = os.path.join('data', 'argentina')

In [2]:
#importing plotly and cufflinks in offline mode
import cufflinks as cf
import plotly.offline
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

In [3]:
import gmaps
import gmaps.datasets
with open('google_maps_key.txt', 'r') as fapi:
    gmaps.configure(api_key=fapi.read().strip())

In [4]:
from gen_dbs.utils.utils import normalize_dpto_name, validate_dpto_indexes

# Get all databases

In [8]:
!gen_dbs/generate_databases.py

Downloading data/argentina/covid19ardata.csv...


# pxdpto geo test

In [None]:
PXLOC = os.path.join(DATA_DIR, 'indec', 'pxdptodatosok.shp')
geodata = gpd.read_file(PXLOC, encoding='utf-8')
from gen_dbs.utils.utils import normalize_dpto_name, validate_dpto_indexes
geodata['departamen'] = [normalize_dpto_name(n) for n in geodata['departamen']]
geodata['link'] = [int(n) for n in geodata['link']]
geodata

In [None]:
geodata[geodata['provincia']=='Salta']

In [None]:
geodata[geodata['provincia']=='Santa Fe'].groupby('departamen').mean()

In [None]:
geodata['geometry'][1]

In [None]:
print(geodata['geometry'][1].centroid)

In [None]:
# Store our latitude and longitude
puntos = [[dpto.centroid.y, dpto.centroid.x] for dpto in geodata['geometry']]
latitudes = [dpto.centroid.y for dpto in geodata['geometry']]
longitudes = [dpto.centroid.x for dpto in geodata['geometry']]
pesos = geodata['hogares']

m = gmaps.Map()
m.add_layer(gmaps.heatmap_layer(
    puntos, weights=pesos,
    max_intensity=float(max(pesos)), point_radius=10.0
))
m

# Densidad

In [None]:
DENS_FILE = os.path.join(DATA_DIR, 'datosgobar-densidad-poblacion', 'pais.geojson')
db_densidad = gpd.read_file(DENS_FILE, encoding='utf-8')
db_densidad['area'] = pd.to_numeric(db_densidad['area'])
db_densidad['poblacion'] = pd.to_numeric(db_densidad['poblacion'])
db_densidad['viviendas'] = pd.to_numeric(db_densidad['viviendas'])
db_densidad['hogares'] = db_densidad['hogares'].apply(lambda x: int(re.sub(r'(\d+).0+', r'\1', x)) if x else 0)
print(db_densidad.crs)
db_densidad

In [None]:
db_densidad['poblacion'].iplot(kind="histogram", bins=100, theme="white", title="Histograma de poblacion de secciones del pais", xTitle='Poblacion', yTitle='Cant.')

In [None]:
np.sum(db_densidad['poblacion'])

In [None]:
f"population: {np.sum(db_densidad['poblacion'])/1000:.0f}k"

In [None]:
_ = list(map(display, random.choices(db_densidad['geometry'], k=3)))

In [None]:
db_densidad["area"].iplot(kind="histogram", bins=10000, theme="white", title="Histograma de areas de secciones del pais", xTitle='Area', yTitle='Cant.')

In [None]:
db_densidad["densidad"].iplot(kind="histogram", bins=10000, theme="white", title="Histograma de densidades de secciones del pais", xTitle='Densidad', yTitle='Cant.')

In [None]:
diff=(set(map(int, db_densidad['dpto_id']))^set(map(int, geodata['link'])))
diff

In [None]:
print("Missing deparments in density")
geodata.loc[geodata['link'].isin(diff)][['link', 'departamen', 'provincia']]

In [None]:
ax = db_densidad[db_densidad['dpto_id']=='82084'].plot(column='densidad', cmap='hot')
ax.set_title('Densidad de secciones de Rosario')
ax.set_xlim(-60.7, -60.6)
ax.set_ylim(-33.0, -32.9)

In [None]:
ax = db_densidad[db_densidad['prov_id']=='82'].plot(column='densidad', cmap='hot')
ax.set_title('Densidad de secciones de Santa Fe')
ax

In [None]:
# Store our latitude and longitude
puntos = [[dpto.centroid.y, dpto.centroid.x] for dpto in db_densidad['geometry']]
pesos = db_densidad['hogares']

m = gmaps.Map()
m.add_layer(gmaps.heatmap_layer(
    puntos, weights=pesos,
    max_intensity=float(max(pesos)), point_radius=5.0
))
m

In [None]:
asdf = db_densidad.groupby('dpto_id')[['poblacion', 'area']].sum().reset_index()
asdf['density'] = asdf['poblacion']/asdf['area']
asdf[asdf['dpto_id']=='02001']['density'][0]

# pxlocdatos

In [None]:
db_densidad.to_crs(epsg=4326,inplace=True)

In [None]:
densidad_datos = gpd.read_file(
    os.path.join(DATA_DIR, 'datosgobar-densidad-poblacion', 'datos.geojson'), encoding='utf-8')
densidad_datos

In [None]:
pxlocdatos = gpd.read_file(os.path.join(DATA_DIR, 'indec', 'pxlocdatos.shp'), encoding='utf-8')
# pxlocdatos['departamen'] = [normalize_dpto_name(n) for n in pxlocdatos['departamen']]
# pxlocdatos['link'] = [int(n) for n in pxlocdatos['link']]
pxlocdatos

In [None]:
pxlocdatos.crs = db_densidad.crs

In [None]:
joined = gpd.tools.sjoin(pxlocdatos,db_densidad,how="left")
joined

# Distancias entre secciones

In [None]:
#https://www.eye4software.com/hydromagic/documentation/supported-map-grids/Argentina
# proyeccion para argentina
db_densidad.to_crs(epsg=5349,inplace=True)

In [None]:
santafe = db_densidad[db_densidad['dpto_id']=='82063']['geometry'].iloc[0].centroid
rosario = db_densidad[db_densidad['dpto_id']=='82084']['geometry'].iloc[0].centroid
f"{rosario.distance(santafe)/1000:.0f} km"

In [None]:
upper_bound=3000
centroids = np.array(list(zip(db_densidad.geometry.centroid.x, db_densidad.geometry.centroid.y)))
btree = cKDTree(centroids)
dist, idx = btree.query(centroids, k=2000, distance_upper_bound=upper_bound)

In [None]:
nearests_count = pd.DataFrame([sum(1 for d in plist if d>1e-9 and d<upper_bound) for plist in dist])
nearests_count.iplot(kind="histogram", bins=100, theme="white", title=f"Histograma de cantidad de secciones vecinas a menos de {upper_bound/1000:.0f} km", xTitle='Cantidad de zonas cercanas', yTitle='Cant. de zonas con esa cantidad de zonas cercanas')

In [None]:
sum(nearests_count)

# Ministerio educacion database

In [None]:
pd.options.display.max_columns = None
SCHOOL_HDF = os.path.join(DATA_DIR, 'ministerio-educacion', 'matricula_y_secciones.hdf')
schooldb = pd.read_hdf(SCHOOL_HDF, 'matricula_y_secciones')
schooldb = schooldb.replace(to_replace="Ciudad de Buenos Aires", value="Ciudad Autónoma de Buenos Aires")
schooldb

In [None]:
count_cols = list(filter(lambda s: s.startswith('Alumnos con Sobreedad') or s.startswith('Repitentes') or s.startswith('Matrícula.'), schooldb.columns))

In [None]:
schooldb['total_alumnos'] = schooldb.loc[:,count_cols].sum(axis=1)

In [None]:
pd.options.display.max_columns = None
SCHOOL_HDF = os.path.join(DATA_DIR, 'ministerio-educacion', 'matricula_por_edad.hdf')
schooldb = pd.read_hdf(SCHOOL_HDF, 'matricula_por_edad')
schooldb

In [None]:
count_cols = list(filter(lambda s: 'años' in s, schooldb.columns))

In [None]:
schooldb['total_alumnos'] = schooldb[count_cols].sum(axis=1)

In [None]:
graph = schooldb[schooldb['Ámbito'] == 'Rural']['total_alumnos']
print(f"Escuelas rurales {len(graph)}")
graph.iplot(kind="histogram", bins=1000, theme="white", title="Histograma de cantidad de alumnos por escuela, rurales", xTitle='Cantidad de alumnos', yTitle='Cant.')

In [None]:
graph = schooldb[(schooldb['Ámbito'] == 'Urbano') & (schooldb['total_alumnos']<800)]['total_alumnos']
print(f"Escuelas urbanas {len(graph)}")
graph.iplot(kind="histogram", bins=1000, theme="white", title="Histograma de cantidad de alumnos por escuela, urbanas", xTitle='Cantidad de alumnos', yTitle='Cant.')

# TDMA
Transito medio diario argentina

In [None]:
tdma = gpd.read_file(os.path.join(DATA_DIR, 'transporte', 'tdma2017.geojson'), encoding='utf-8')
tdma

In [None]:
ax = tdma.plot(column='valor', cmap='hot')
ax.set_title('TDMA Argentina')
#ax.set_xlim(-60.7, -60.6)
#ax.set_ylim(-33.0, -32.9)

# Buses interburbano

In [None]:
#

In [6]:
import pandas as pd
url = 'https://docs.google.com/spreadsheets/d/16-bnsDdmmgtSxdWbVMboIHo5FRuz76DBxsz_BbsEVWA/export?format=csv&id=16-bnsDdmmgtSxdWbVMboIHo5FRuz76DBxsz_BbsEVWA&gid=0'
df = pd.read_csv(url)

In [7]:
df

Unnamed: 0,fecha,dia_inicio,dia_cuarentena_dnu260,osm_admin_level_2,osm_admin_level_4,osm_admin_level_8,tot_casosconf,nue_casosconf_diff,tot_fallecidos,nue_fallecidos_diff,tot_recuperados,tot_terapia,test_RT-PCR_negativos,test_RT-PCR_total,transmision_tipo,informe_tipo,informe_link,observacion,covid19argentina_admin_level_4
0,2/03/2020,1,,Argentina,CABA,,1,1,0,0,,,,,importado,,https://www.infobae.com/coronavirus/2020/03/03...,no hay informe,capital-federal
1,3/03/2020,2,,Argentina,Indeterminado,,1,0,0,0,,,,,importado,,,no hay informe,no-data
2,4/03/2020,3,,Argentina,Indeterminado,,1,0,0,0,,,,,importado,,,no hay informe,no-data
3,5/03/2020,4,,Argentina,Buenos Aires,,2,1,2,2,,,,,importado,informe nacional,https://www.argentina.gob.ar/sites/default/fil...,,buenos-aires
4,6/03/2020,5,,Argentina,Indeterminado,,2,0,2,0,,,,,importado,informe nacional,https://www.argentina.gob.ar/sites/default/fil...,,no-data
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3437,5/09/2020,185,169.0,Argentina,Santa Fe,,471408,698,9733,6,,,,,,,,,
3438,5/09/2020,185,169.0,Argentina,Santiago del Estero,,471475,67,9736,3,,,,,,,,,
3439,5/09/2020,185,169.0,Argentina,Tierra del Fuego,,471499,24,9738,2,,,,,,,,,
3440,5/09/2020,185,169.0,Argentina,Tucumán,,471806,307,9739,1,,,,,,,,,
