In [14]:
from dotenv import load_dotenv
import os
import sys
from pathlib import Path
from aves.config import setup_style

load_dotenv()
setup_style()

AVES_ROOT = Path(os.environ['AVES_ROOT'])
EOD_PATH = AVES_ROOT / "data" / "external" / "EOD_STGO"
CENSUS_GEO_ROOT = Path(os.environ['CENSUS_GEO_ROOT'])

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import geopandas as gpd
from aves.data import eod, census
import matplotlib as mpl

In [None]:
viajes = eod.read_trips(EOD_PATH)

# descartamos sectores que no sean relevantes en los orígenes y destinos de los viajes
viajes = viajes[
    (viajes["SectorOrigen"] != "Exterior a RM")
    & (viajes["SectorDestino"] != "Exterior a RM")
    & (viajes["SectorOrigen"] != "Extensión Sur-Poniente")
    & (viajes["SectorDestino"] != "Extensión Sur-Poniente")
    & pd.notnull(viajes["SectorOrigen"])
    & pd.notnull(viajes["SectorDestino"])
]

print(len(viajes))

In [25]:
personas = eod.read_people(EOD_PATH)
hogares = eod.read_homes(EOD_PATH)
tabla = viajes.merge(personas).merge(hogares.drop('TipoDia', axis=1))

In [26]:
tabla["Peso"] = (
    tabla["FactorExpansion"] * tabla["FactorPersona"]
)

In [None]:
zones = gpd.read_file(AVES_ROOT / 'data' / 'processed' / 'scl_zonas_urbanas.json').set_index('ID')
zones.head()

In [15]:
from aves.data.census.loading import read_census_map
comunas = read_census_map('comuna', path=CENSUS_GEO_ROOT / "R13")

In [None]:
comunas_urbanas = comunas[comunas['COMUNA'].isin(zones['Com'].unique())].drop('NOM_COMUNA', axis=1).copy()
comunas_urbanas['NombreComuna'] = comunas_urbanas['COMUNA'].map(dict(zip(zones['Com'], zones['Comuna'])))
comunas_urbanas.plot(facecolor="none", edgecolor="#abacab")

In [None]:
from aves.features.geo import clip_area_geodataframe

bounding_box = zones.total_bounds
comunas_urbanas = clip_area_geodataframe(comunas_urbanas, zones.total_bounds, buffer=0.02)
comunas_urbanas.plot()
zones.plot()

In [None]:
from aves.features.utils import normalize_rows

matrix = (
    tabla[
        (tabla["Proposito"] != "volver a casa")
        & (tabla["ComunaOrigen"].isin(comunas_urbanas["NombreComuna"]))
        & (tabla["ComunaDestino"].isin(comunas_urbanas["NombreComuna"]))
        & (tabla["ComunaOrigen"] != tabla["ComunaDestino"])
    ]
    .groupby(["Proposito", "ComunaOrigen", "ComunaDestino", "ZonaOrigen", "ZonaDestino"])
    .agg(n_viajes=("Peso", "sum"))
    .sort_values("n_viajes", ascending=False)
    #.assign(cumsum_viajes=lambda x: x["n_viajes"].cumsum() / x["n_viajes"].sum())
    #.pipe(lambda x: x[x["cumsum_viajes"] <= 0.75])
    .reset_index()
)

matrix

In [None]:
matrix['n_viajes'].sum()

In [47]:
fixed_zones = zones.reset_index().dissolve(by='ID').reset_index()

In [None]:
from aves.models.network import Network

network = Network.from_edgelist(
    matrix[
        (matrix["Proposito"] == "Al estudio")
        & (matrix["ZonaOrigen"] != matrix["ZonaDestino"])
        & (matrix["ZonaOrigen"].isin(fixed_zones["ID"]))
        & (matrix["ZonaDestino"].isin(fixed_zones["ID"]))
    ].pipe(
        lambda x: x.sort_values("n_viajes", ascending=False)
        .assign(cumsum_viajes=lambda x: x["n_viajes"].cumsum() / x["n_viajes"].sum())
        .pipe(lambda x: x[x["n_viajes"] >= x['n_viajes'].quantile(0.975)])
    ),
    source="ZonaOrigen",
    target="ZonaDestino",
    weight="n_viajes",
)
network

In [None]:
from aves.visualization.networks import NodeLink

nodelink = NodeLink(network)
nodelink.layout_nodes(method="geographical", geodataframe=fixed_zones, node_column="ID")

In [203]:
nodelink.set_node_drawing("plain")


In [None]:
from aves.visualization.figures import figure_from_geodataframe

fig, ax = figure_from_geodataframe(zones, height=7)

# contexto
zones.plot(ax=ax, facecolor='#efefef', edgecolor='white', zorder=0)
comunas_urbanas.plot(ax=ax, facecolor='none', edgecolor='#abacab', zorder=1)

nodelink.plot(ax, nodes=dict(palette='PuRd', edgecolor='black', node_size=1, alpha=0.95), edges=dict(alpha=0.5), zorder=2)

ax.set_title('Viajes al trabajo en Santiago (en días laborales, EOD 2012)')

fig.tight_layout()

In [102]:
from aves.features.geo import to_point_geodataframe

origenes_viajes = to_point_geodataframe(
    tabla, "OrigenCoordX", "OrigenCoordY", crs="epsg:32719"
).to_crs(zones.crs)

destinos_viajes = to_point_geodataframe(
    tabla, "DestinoCoordX", "DestinoCoordY", crs="epsg:32719"
).to_crs(zones.crs)

In [None]:
from aves.models.grid import H3Grid

hex_grid = H3Grid.from_geodf(zones, grid_level=7, extra_margin=0.025)
hex_grid.geodf

In [None]:
activities = {'Al estudio': 'Recurrentes',
 'Al trabajo': 'Recurrentes',
 'Por trabajo': 'Recurrentes',
 'Por estudio': 'Recurrentes',
 'volver a casa': 'N/A',
 'De compras': 'Mantención',
 'Trámites': 'Mantención',
 'De salud': 'Salud',
 'Buscar o Dejar a alguien': 'Discrecional',
 'Visitar a alguien': 'Discrecional',
 'Recreación': 'Discrecional',
 'Otra actividad (especifique)': 'Discrecional',
 'Comer o Tomar algo': 'Discrecional',
 'Buscar o dejar algo': 'Discrecional'}

activities


In [160]:
grid_destinos = (
    gpd.sjoin(destinos_viajes, hex_grid.geodf, predicate="within")
    .pipe(lambda x: x[x["Proposito"].isin(activities.keys())])
    .assign(actividad=lambda x: x["Proposito"].map(activities))
)

In [None]:
from aves.features.utils import logodds_ratio_with_uninformative_dirichlet_prior

grid_actividades = (
    grid_destinos.groupby(["actividad", "h3_cell_id"])["Peso"]
    .sum()
    .unstack()
    .T.fillna(0)
    .pipe(logodds_ratio_with_uninformative_dirichlet_prior)
)

grid_actividades

In [None]:
from aves.visualization.figures import small_multiples_from_geodataframe
from aves.visualization.maps import choropleth_map

fig, axes = small_multiples_from_geodataframe(zones, 4)

activity_order = ['Recurrentes', 'Mantención', 'Salud', 'Discrecional']

for ax, col in zip(axes, activity_order):
    choropleth_map(ax, hex_grid.geodf.join(grid_actividades, on='h3_cell_id'),col, k=7, binning='fisher_jenks')
    ax.set_title(col)

In [None]:
fig, axes = plt.subplots(1, 4, figsize=(9, 4))

for ax, col in zip(axes, activity_order):
        sns.histplot(data=grid_destinos[grid_destinos['actividad'] == col], x='TiempoViaje', weights='Peso', stat='density', bins=[0, 15, 30, 45, 60, 90, 120], ax=ax)
        ax.set_title(col)


In [None]:
import matplotlib.gridspec as gridspec

h_space = 0.1
w_space = 0.05

bounding_box = zones.total_bounds
map_aspect_ratio = (bounding_box[2] - bounding_box[0]) / (bounding_box[3] - bounding_box[1])

height_ratios = [1.0, 0.5]

y_size = sum(height_ratios) * 3
x_size = map_aspect_ratio * len(activity_order) * 3


fig = plt.figure(figsize=(x_size, y_size))
fig.set_facecolor('#efefef')

gs = gridspec.GridSpec(
    len(height_ratios),
    len(activity_order),
    figure=fig,
    hspace=0.05,
    wspace=0.05,
    height_ratios=height_ratios,
)

for i, col in enumerate(activity_order):
    fig.add_subplot(gs[0,i])
    fig.add_subplot(gs[1,i])




In [None]:
height_ratios = [1.1, 0.3]

y_size = sum(height_ratios) * 3
x_size = map_aspect_ratio * len(activity_order) * 3

fig = plt.figure(figsize=(x_size, y_size))
fig.set_facecolor('#efefef')

gs = gridspec.GridSpec(
    len(height_ratios),
    len(activity_order),
    figure=fig,
    hspace=0.05,
    wspace=0.05,
    height_ratios=height_ratios,
)

for i, col in enumerate(activity_order):
    ax_map = fig.add_subplot(gs[0,i])
    ax_hist = fig.add_subplot(gs[1,i])

    choropleth_map(ax_map, hex_grid.geodf.join(grid_actividades, on='h3_cell_id'),col, k=7, binning='fisher_jenks', edgecolor='none', cbar_args=dict(
        label="log-odds",
        height="60%",
        width="4%",
        orientation="vertical",
        location="center left",
        label_size="small",
        bbox_to_anchor=(0.0, 0.0, 0.9, 1.0),
    ))
    ax_map.set_title(col)
    ax_map.set_aspect('equal')
    ax_map.set_axis_off()

    _xlim = ax_map.get_xlim()
    _ylim = ax_map.get_ylim()

    # geopandas cambia los límites
    comunas.plot(ax=ax_map, edgecolor='grey', facecolor='none', linewidth=0.5)

    ax_map.set_xlim(_xlim)
    ax_map.set_ylim(_ylim)

    sns.histplot(data=grid_destinos[grid_destinos['actividad'] == col], x='TiempoViaje', weights='Peso', stat='density', bins=[0, 15, 30, 45, 60, 90, 120], ax=ax_hist)
    ax_hist.set_ylim([0, 0.025])

    ax_hist.set_xticks([0, 15, 30, 45, 60, 90, 120])

    if i > 0:
        ax_hist.set_yticklabels([])
        ax_hist.set_ylabel('')

    #break
    

fig.tight_layout()

In [222]:
grid_destinos["ModoAgregado"] = grid_destinos["ModoDifusion"].map(
    {
        "Taxi": "Taxi",
        "Bip! - Otros Privado": "Público",
        "Bip!": "Público",
        "Bip! - Otros Público": "Público",
        "Taxi Colectivo": "Taxi",
        "Bicicleta": "Activo",
        "Caminata": "Activo",
        "Auto": "Auto",
        "Otros": "Otros",
    }
)

In [None]:
modo_x_actividad = grid_destinos.groupby(["actividad", "ModoAgregado", "Sexo"])["Peso"].sum().unstack(
    fill_value=0
).astype(int)

modo_x_actividad.loc["Recurrentes"].sort_index(ascending=False).plot(
    kind="barh", stacked=True
)

In [None]:
height_ratios = [1.2, 0.3, 0.5]

y_size = sum(height_ratios) * 3
x_size = map_aspect_ratio * len(activity_order) * 3

fig = plt.figure(figsize=(x_size, y_size))
fig.set_facecolor('#efefef')

gs = gridspec.GridSpec(
    len(height_ratios),
    len(activity_order),
    figure=fig,
    hspace=0.15,
    wspace=0.05,
    height_ratios=height_ratios,
)

for i, col in enumerate(activity_order):
    ax_map = fig.add_subplot(gs[0,i])
    ax_hist = fig.add_subplot(gs[1,i])

    choropleth_map(ax_map, hex_grid.geodf.join(grid_actividades, on='h3_cell_id'),col, k=7, binning='fisher_jenks', edgecolor='none', cbar_args=dict(
        label="log-odds",
        height="60%",
        width="4%",
        orientation="vertical",
        location="center left",
        label_size="small",
        bbox_to_anchor=(0.0, 0.0, 0.9, 1.0),
    ))
    ax_map.set_title(col)
    ax_map.set_aspect('equal')
    ax_map.set_axis_off()

    _xlim = ax_map.get_xlim()
    _ylim = ax_map.get_ylim()

    # geopandas cambia los límites
    comunas.plot(ax=ax_map, edgecolor='grey', facecolor='none', linewidth=0.5)

    ax_map.set_xlim(_xlim)
    ax_map.set_ylim(_ylim)

    sns.histplot(data=grid_destinos[grid_destinos['actividad'] == col], x='TiempoViaje', weights='Peso', stat='density', bins=[0, 15, 30, 45, 60, 90, 120], ax=ax_hist)
    ax_hist.set_ylim([0, 0.025])

    ax_hist.set_xticks([0, 15, 30, 45, 60, 90, 120])
    ax_hist.set_xlabel('Tiempo de viaje [minutos]')
    ax_hist.tick_params(top=True, labeltop=True, bottom=False, labelbottom=False)

    if i > 0:
        ax_hist.set_yticklabels([])
        ax_hist.set_ylabel('')

    #break

    ax_modes = fig.add_subplot(gs[2,i])

    modo_x_actividad.loc[col].sort_index(ascending=False).div(1000).plot(
        kind="barh", stacked=True, ax=ax_modes, legend=(i == (len(activity_order) - 1)), edgecolor='none'
    )

    if i > 0:
        ax_modes.set_ylabel('')
        ax_modes.set_yticklabels([])

    ax_modes.set_xlabel('# de viajes [miles]')
    
fig.tight_layout()
fig.align_ylabels()