In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd
import pytest
import shapely
import xarray as xr
from geopandas.testing import assert_geodataframe_equal
from pandas.testing import assert_frame_equal

import xvec  # noqa
from xvec import GeometryIndex

In [2]:
da = xr.DataArray(
    np.zeros((10, 10, 5)),
    coords={
        "x": range(10),
        "y": range(20, 30),
        "time": pd.date_range("2023-01-01", periods=5),
    },
)
da = da.to_dataset(name="test")

# Create the polygons
polygon1 = shapely.geometry.Polygon([(1, 22), (4, 22), (4, 26), (1, 26)])
polygon2 = shapely.geometry.Polygon([(6, 22), (9, 22), (9, 26), (6, 26)])
polygons = gpd.GeoSeries([polygon1, polygon2], crs="EPSG:4326")

# Expected results
expected = xr.DataArray(
    np.zeros((2, 5)),
    coords={
        "geometry": polygons,
        "time": pd.date_range("2023-01-01", periods=5),
    },
).xvec.set_geom_indexes("geometry", crs="EPSG:4326")

expected = expected.to_dataset(name="test")
expected = expected.set_coords("geometry")

# Actual results
actual = da.xvec.zonal_stats(polygons, "x", "y", stat="sum")

# Testing
xr.testing.assert_identical(actual, expected)

  0%|                                                     | 0/1 [00:00<?, ?it/s]


TypeError: iteration over a 0-d array

In [3]:
polygons

0    POLYGON ((1.00000 22.00000, 4.00000 22.00000, ...
1    POLYGON ((6.00000 22.00000, 9.00000 22.00000, ...
dtype: geometry

In [4]:
expected

In [None]:
vec_cube = self._obj.xvec._spatial_agg(
    polygons,
    stat=stat,
    x_coords=x_coords,
    y_coords=y_coords,
    chunk_size=2,
    n_jobs=n_jobs,
)

In [5]:
geometries =polygons 

In [None]:
polygons, "x", "y", stat="sum"

In [28]:
geometries = polygons
x_coords = "x"
y_coords= "y"
stat = "mean"


In [12]:
try:
    import geopandas as gpd
except ImportError as err:
    raise ImportError(
        "The geopandas package is required for `xvec._spatial_agg()`. "
        "You can install it using 'conda install -c conda-forge geopandas' or "
        "'pip install geopandas'."
    ) from err

try:
    import rioxarray  # noqa
except ImportError as err:
    raise ImportError(
        "The rioxarray package is required for `xvec._spatial_agg()`. "
        "You can install it using 'conda install -c conda-forge rioxarray' or "
        "'pip install rioxarray'."
    ) from err

try:
    from joblib import Parallel, delayed
except ImportError as err:
    raise ImportError(
        "The joblib package is required for `xvec._spatial_agg()`. "
        "You can install it using 'conda install -c conda-forge joblib' or "
        "'pip install joblib'."
    ) from err

try:
    from tqdm import tqdm
except ImportError as err:
    raise ImportError(
        "The tqdm package is required for `xvec._spatial_agg()`. "
        "You can install it using 'conda install -c conda-forge tqdm' or "
        "'pip install tqdm'."
    ) from err

import gc


transform = da.rio.transform()

In [9]:
geometry_chunks = [
    geometries[i : i + 2]
    for i in range(0, len(geometries), 2)
]

In [18]:
chunk_size = 2
n_jobs = -1

In [23]:
chunk

0    POLYGON ((1.00000 22.00000, 4.00000 22.00000, ...
1    POLYGON ((6.00000 22.00000, 9.00000 22.00000, ...
dtype: geometry

In [30]:
chunk_results = Parallel(n_jobs=n_jobs)(
        delayed(_agg_geom)(
            geom,
            transform,
            var,
            x_coords,
            y_coords,
            stat=stat,
        )
        for geom in chunk
    )

In [43]:
geometry_chunks = [
    geometries[i : i + chunk_size]
    for i in range(0, len(geometries), chunk_size)
]

stats_dic = {}
for var in da.data_vars:
    stats_dic[var] = []

    computed_results = []
    for chunk in tqdm(geometry_chunks):
        # Create a list of delayed objects for the current chunk
        chunk_results = Parallel(n_jobs=n_jobs)(
            delayed(_agg_geom)(
                geom,
                transform,
                var,
                x_coords,
                y_coords,
                stat=stat,
            )
            for geom in chunk
        )
        computed_results.extend(chunk_results)
    stats_dic[var] = computed_results


  0%|                                                     | 0/1 [00:00<?, ?it/s]


TypeError: 'Polygon' object is not iterable

In [41]:
for geom in chunk:
    print(geom)
    geometry_array = np.asarray(geom, dtype=object)
    print(geometry_array)
    print('-----------------------')
    geo_series = gpd.GeoSeries(geom)
     # Convert the GeoSeries to a GeometryArray
    geometry_array = geo_series.geometry.array
    print(geometry_array)

    break

POLYGON ((1 22, 4 22, 4 26, 1 26, 1 22))
POLYGON ((1 22, 4 22, 4 26, 1 26, 1 22))
-----------------------
<GeometryArray>
[<POLYGON ((1 22, 4 22, 4 26, 1 26, 1 22))>]
Length: 1, dtype: geometry


In [None]:
np.as

In [None]:
geometry_array = np.asarray(geom)

In [42]:
def _agg_geom(
    geom,
    trans,
    var: str,
    x_coords: str = None,
    y_coords: str = None,
    stat: str = "mean",
):
    """Aggregate the values from a dataset over a polygon geometry.

    The CRS of the raster and that of points need to be in wgs84.
    Xvec does not verify their equality.

    Parameters
    ----------
    geom : Polygon[shapely.Geometry]
        An arrray-like (1-D) of shapely geometry, like a numpy array or GeoPandas
        GeoSeries.
    trans : affine.Affine
        Affine transformer.
        Representing the geometric transformation applied to the data.
    x_coords : Hashable
        Name of the axis containing ``x`` coordinates.
    y_coords : Hashable
        Name of the axis containing ``y`` coordinates.
    var : Hashable
        Name of the variable in the dataset to aggregate its values.
    stat : Hashable
        Spatial aggregation statistic method, by default "mean". It supports the
        following statistcs: ['mean', 'median', 'min', 'max', 'sum']

    Returns
    -------
    Array
        Aggregated values over the geometry.

    """
    try:
        import rasterio
    except ImportError as err:
        raise ImportError(
            "The rasterio package is required for `xvec._agg_geom()`. "
            "You can install it using 'conda install -c conda-forge rasterio' or "
            "'pip install rasterio'."
        ) from err

    import gc

    # Array of shapely geometries
#    geometry_array = np.asarray(geom)
        # Create a GeoSeries from the geometry
#     geo_series = gpd.GeoSeries(geom)

#     # Convert the GeoSeries to a GeometryArray
#     geometry_array = geo_series.geometry.array
        
        
    xar_chunk = da[var]
    mask = rasterio.features.geometry_mask(
        [geom],
        out_shape=(xar_chunk.shape[0], xar_chunk.shape[1]),
        transform=trans,
    )

    masked_data = xar_chunk * mask[:, :, np.newaxis]
    del mask, xar_chunk
    gc.collect()


    if stat == "sum":
        stat_within_polygons = masked_data.sum(dim=[y_coords, x_coords])
    elif stat == "mean":
        stat_within_polygons = masked_data.mean(dim=[y_coords, x_coords])
    elif stat == "median":
        stat_within_polygons = masked_data.median(dim=[y_coords, x_coords])
    elif stat == "max":
        stat_within_polygons = masked_data.max(dim=[y_coords, x_coords])
    elif stat == "min":
        stat_within_polygons = masked_data.min(dim=[y_coords, x_coords])

    result = stat_within_polygons.values

    del masked_data, stat_within_polygons
    gc.collect()

    return result