In [None]:
import sys
from pathlib import Path

AVES_ROOT = Path("../..") if not "google.colab" in sys.modules else Path("aves_git")

EOD_PATH = AVES_ROOT / "data" / "external" / "EOD_STGO"
EOD_PATH


In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import geopandas as gpd

# esto configura la calidad de la imagen. dependerá de tu resolución. el valor por omisión es 80
mpl.rcParams["figure.dpi"] = 150
# esto depende de las fuentes que tengas instaladas en el sistema.
mpl.rcParams["font.family"] = "Fira Sans Extra Condensed"


In [None]:
# Load
zones = gpd.read_file(AVES_ROOT / "data" / "processed" / "scl_zonas_urbanas.json")
zones.head()


In [None]:
from aves.data import eod

viajes = (
    eod.read_trips(EOD_PATH)
    .merge(eod.read_people(EOD_PATH))
    .merge(eod.read_homes(EOD_PATH))
)

viajes["PesoLaboral"] = viajes["FactorLaboralNormal"] * viajes["Factor_LaboralNormal"]

viajes = viajes[pd.notnull(viajes["PesoLaboral"])]

viajes.columns


In [None]:
from aves.features.geo import to_point_geodataframe

destinos_viajes = to_point_geodataframe(
    viajes[viajes["ZonaDestino"].isin(zones["ID"])],
    "DestinoCoordX",
    "DestinoCoordY",
    crs="epsg:5361",
)
destinos_viajes.plot()


In [None]:
zones_utm = zones.to_crs(destinos_viajes.crs)


Leer:

Kernel Density Estimation https://en.wikipedia.org/wiki/Kernel_density_estimation
Multivariate KDE https://en.wikipedia.org/wiki/Multivariate_kernel_density_estimation

In [None]:
from aves.visualization.maps import heat_map
from aves.visualization.figures import figure_from_geodataframe

fig, ax = figure_from_geodataframe(zones_utm)
zones_utm.plot(ax=ax, color="none", edgecolor="grey")
heat_map(
    ax,
    destinos_viajes[destinos_viajes["Proposito"] == "volver a casa"],
    weight="PesoLaboral",
    bandwidth=1000,
    n_levels=7,
    kernel="gaussian",
    low_threshold=0.05,
    alpha=0.85,
)


In [None]:
destinos_viajes["ModoDifusion"].value_counts()


In [None]:
from cgitb import small
from aves.visualization.figures import small_multiples_from_geodataframe

fig, axes = small_multiples_from_geodataframe(zones_utm, n_variables=4, col_wrap=2)

for ax, key in zip(axes, ("Auto", "Bicicleta", "Bip!", "Caminata")):
    group_df = destinos_viajes[destinos_viajes["ModoDifusion"] == key]
    zones_utm.plot(ax=ax, color="none", edgecolor="grey")
    heat_map(
        ax,
        group_df[group_df["Proposito"] == "volver a casa"],
        weight="PesoLaboral",
        bandwidth=1000,
        n_levels=7,
        kernel="gaussian",
        low_threshold=0.05,
        alpha=0.85,
    )
    ax.set_title(key)


In [None]:
# Clustering

import hdbscan


In [None]:
group = destinos_viajes[destinos_viajes["Proposito"] == "De salud"].copy()
group.plot()


In [None]:
group["cluster"] = hdbscan.HDBSCAN(min_cluster_size=50, min_samples=15).fit_predict(
    np.vstack([group.geometry.x.values, group.geometry.y.values]).T
)

ax = zones_utm.plot(color="none", edgecolor="grey", linewidth=0.5)
group[group["cluster"] >= 0].plot(
    ax=ax,
    column="cluster",
    categorical=True,
    legend=True,
    alpha=0.9,
    markersize=1,
    marker=".",
)


In [None]:
import libpysal as ps


In [None]:
shapes = []
for idx, cluster_group in group.groupby("cluster"):
    if idx < 0:
        continue
    # print(idx, len(group))
    cluster_xy = np.vstack(
        [cluster_group.geometry.x.values, cluster_group.geometry.y.values]
    ).T
    alpha = ps.cg.alpha_shape_auto(cluster_xy, step=1)
    shapes.append(alpha)

shapes = gpd.GeoSeries(data=shapes, crs=zones_utm.crs)
shapes.plot()


In [None]:
ax = zones_utm.plot(color="none", edgecolor="grey", linewidth=0.5)
shapes.plot(ax=ax, color="purple")


In [None]:
destinos_viajes["usa_tp"] = (
    destinos_viajes["ModoDifusion"].str.contains("Bip!").astype(int)
)
destinos_viajes["usa_tp"].describe()


In [None]:
destinos_viajes.columns


In [None]:
destinos_viajes["DistManhattan"]


In [None]:
destinos_viajes["geometry"]


In [None]:
destinos_viajes["Proposito"].value_counts()


In [None]:
viajes_regresion = destinos_viajes[
    destinos_viajes["Proposito"].isin(["Al trabajo", "Al estudio", "De salud"])
    & (destinos_viajes["DistManhattan"] > 500)
]
len(viajes_regresion)


In [None]:
viajes_regresion.columns


In [None]:
viajes_regresion[["TramoIngresoFinal", "TramoIngreso", "IngresoHogar"]]


In [None]:
viajes_regresion_sector = viajes_regresion.join(
    pd.get_dummies(viajes_regresion["Sector"], drop_first=True)
)
viajes_regresion_sector["DistManhattan"] = np.sqrt(
    viajes_regresion_sector["DistManhattan"] / 1000
)
viajes_regresion_sector["IngresoHogar"] = np.log(
    viajes_regresion_sector["IngresoHogar"] + 1
)
viajes_regresion_sector.columns


In [None]:
from scipy.spatial import KDTree

kdtree_viajes = KDTree(
    np.vstack(
        [viajes_regresion.geometry.x.values, viajes_regresion.geometry.y.values]
    ).T,
    balanced_tree=True,
)


In [None]:
from libpysal.weights import DistanceBand

w = DistanceBand(kdtree_viajes, 1000)
w.n


In [None]:
w.transform='r'

In [None]:
from spreg import OLS

model = OLS(
    viajes_regresion_sector["usa_tp"].values,
    viajes_regresion_sector[
        [
            "DistManhattan",
            "IngresoHogar",
            "Extensión Sur-Poniente",
            "Norte",
            "Oriente",
            "Poniente",
            "Sur",
            "Sur-Oriente",
        ]
    ].values,
    name_y="usa_tp",
    name_x=[
        "distancia",
        "ingreso",
        "sector sur poniente",
        "norte",
        "oriente",
        "poniente",
        "sur",
        "sur oriente",
    ],
    name_ds="eod_2012",
    w=w,
    name_w='spatial lag',
    spat_diag=True
)

print(model.summary)

In [None]:
from spreg import GM_Lag

model_lag = GM_Lag(
    viajes_regresion_sector["usa_tp"].values,
    viajes_regresion_sector[
        [
            "DistManhattan",
            "IngresoHogar",
            "Extensión Sur-Poniente",
            "Norte",
            "Oriente",
            "Poniente",
            "Sur",
            "Sur-Oriente",
        ]
    ].values,
    w=w,
    name_y="usa_tp",
    name_x=[
        "distancia",
        "ingreso",
        "sector sur poniente",
        "norte",
        "oriente",
        "poniente",
        "sur",
        "sur oriente",
    ],
    name_ds="eod_2012",
    name_w="spatial lag",
    spat_diag=True
)

print(model_lag.summary)


¿Con Kernels?

Ver: https://www.researchgate.net/figure/Cross-sections-of-different-types-of-spatial-kernels-A-diagram-depicting-the-general_fig15_296695387