# Explore Metadata Options

## Load Data from API Demo

In [1]:
from typing import Dict

import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape, Polygon

from lonboard.colormap import apply_continuous_cmap
from lonboard import Map, ScatterplotLayer, PolygonLayer
from palettable.cartocolors.sequential import BurgYl_2
from geojson_pydantic import Feature 
from geojson_pydantic import Polygon as Polygon_

import h3
from shapely.geometry import Polygon, Point

In [2]:
BASE_URL = "https://space2stats.ds.io"
FIELDS_ENDPOINT = f"{BASE_URL}/fields"
SUMMARY_ENDPOINT = f"{BASE_URL}/summary"

In [3]:
response = requests.get(FIELDS_ENDPOINT)
if response.status_code != 200:
    raise Exception(f"Failed to get fields: {response.text}")

available_fields = response.json()
print("Available Fields:", available_fields)

Available Fields: ['sum_pop_2020', 'ogc_fid', 'sum_pop_f_0_2020', 'sum_pop_f_10_2020', 'sum_pop_f_15_2020', 'sum_pop_f_1_2020', 'sum_pop_f_20_2020', 'sum_pop_f_25_2020', 'sum_pop_f_30_2020', 'sum_pop_f_35_2020', 'sum_pop_f_40_2020', 'sum_pop_f_45_2020', 'sum_pop_f_50_2020', 'sum_pop_f_55_2020', 'sum_pop_f_5_2020', 'sum_pop_f_60_2020', 'sum_pop_f_65_2020', 'sum_pop_f_70_2020', 'sum_pop_f_75_2020', 'sum_pop_f_80_2020', 'sum_pop_m_0_2020', 'sum_pop_m_10_2020', 'sum_pop_m_15_2020', 'sum_pop_m_1_2020', 'sum_pop_m_20_2020', 'sum_pop_m_25_2020', 'sum_pop_m_30_2020', 'sum_pop_m_35_2020', 'sum_pop_m_40_2020', 'sum_pop_m_45_2020', 'sum_pop_m_50_2020', 'sum_pop_m_55_2020', 'sum_pop_m_5_2020', 'sum_pop_m_60_2020', 'sum_pop_m_65_2020', 'sum_pop_m_70_2020', 'sum_pop_m_75_2020', 'sum_pop_m_80_2020', 'sum_pop_m_2020', 'sum_pop_f_2020']


In [4]:
AOIModel = Feature[Polygon_, Dict]

# kenya
aoi = {
      "type": "Feature",
      "geometry": {
        "type": "Polygon",
        "coordinates": [
          [
            [41.85508309264397, -1.68325],
            [40.98105, -2.49979],
            [40.993, -3.444],
            [41.58513, -3.91909],
            [40.88477, -4.95913],
            [39.55938425876585, -4.437641590288629],
            [39.25451, -3.42206],
            [37.7669, -3.67712],
            [37.69869, -3.09699],
            [34.07262, -1.05982],
            [33.90371119710453, -0.95],
            [33.893568969666944, 0.109813537861896],
            [34.18, 0.515],
            [34.6721, 1.17694],
            [35.03599, 1.90584],
            [34.59607, 3.05374],
            [34.47913, 3.5556],
            [35.298007118232946, 4.77696566346189],
            [35.817447662353516, 4.77696566346189],
            [36.159078632855646, 4.447864127672769],
            [36.85509323800812, 4.447864127672769],
            [38.120915, 3.598605],
            [38.43697, 3.58851],
            [38.67114, 3.61607],
            [38.89251, 3.50074],
            [39.55938425876585, 3.42206],
            [39.85494, 3.83879],
            [40.76848, 4.25702],
            [41.1718, 3.91909],
            [41.85508309264397, 2.97959],
            [41.58513, 2.09],
            [40.993, 1.657],
            [40.98105, 1.002],
            [41.85508309264397, -1.68325]
          ]
        ]
      },
      "properties": {
        "name": "Kenya"
      }
    }


feat = AOIModel(**aoi)

In [5]:
# Define the Request Payload
request_payload = {
    "aoi": aoi,
    "spatial_join_method": "centroid",
    "fields": available_fields[0:5],  # fails if all fields are requested
    "geometry": "point"
}

# Get Summary Data
response = requests.post(SUMMARY_ENDPOINT, json=request_payload)
if response.status_code != 200:
    raise Exception(f"Failed to get summary: {response.text}")

summary_data = response.json()
df = pd.DataFrame(summary_data)

In [6]:
df.head()

Unnamed: 0,hex_id,geometry,sum_pop_2020,ogc_fid,sum_pop_f_0_2020,sum_pop_f_10_2020,sum_pop_f_15_2020
0,866a4a48fffffff,"{'type': 'Point', 'coordinates': [39.797757107...",399.860905,6157365,4.405487,29.409952,26.127678
1,866a4a497ffffff,"{'type': 'Point', 'coordinates': [39.747949876...",582.555159,6157366,6.41833,42.847198,38.065269
2,866a4a49fffffff,"{'type': 'Point', 'coordinates': [36.919304623...",749.911237,6157367,8.262184,55.156311,49.000641
3,866a4a4d7ffffff,"{'type': 'Point', 'coordinates': [36.666909750...",863.88829,6157373,11.218388,64.547882,49.363289
4,866a5820fffffff,"{'type': 'Point', 'coordinates': [37.794834208...",525.085147,6161418,5.785151,38.620255,34.310074


In [7]:
gdf = df.copy()
gdf.loc[:, 'geometry'] = gdf['hex_id'].apply(lambda x: Polygon(h3.h3_to_geo_boundary(x, geo_json=True)))
gdf = gpd.GeoDataFrame(gdf, geometry='geometry', crs='epsg:4326')
out_file = "./test.geojson"
gdf.to_file(out_file, driver='GeoJSON')

## Create STAC

In [8]:
import shutil
import tempfile
from pathlib import Path

from pystac import Catalog, Item, Asset, CatalogType, get_stac_version
import fio_stac

In [9]:
print(get_stac_version())

1.0.0


### Catalog  

Basic description of project and dataset.

In [10]:
Catalog?

[1;31mInit signature:[0m
[0mCatalog[0m[1;33m([0m[1;33m
[0m    [0mid[0m[1;33m:[0m [1;34m'str'[0m[1;33m,[0m[1;33m
[0m    [0mdescription[0m[1;33m:[0m [1;34m'str'[0m[1;33m,[0m[1;33m
[0m    [0mtitle[0m[1;33m:[0m [1;34m'str | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mstac_extensions[0m[1;33m:[0m [1;34m'list[str] | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mextra_fields[0m[1;33m:[0m [1;34m'dict[str, Any] | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mhref[0m[1;33m:[0m [1;34m'str | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mcatalog_type[0m[1;33m:[0m [1;34m'CatalogType'[0m [1;33m=[0m [0mABSOLUTE_PUBLISHED[0m[1;33m,[0m[1;33m
[0m    [0mstrategy[0m[1;33m:[0m [1;34m'HrefLayoutStrategy | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m     
A PySTAC Catalog re

In [11]:
catalog = Catalog(
    id="space2stats", 
    description="Global dataset of geospatial variables at the grid level (hexagon H3 level 5).",
    title="Space2Stats"
    )

### STAC Item

Can represent the global H3 parquet file with column descriptions for each variable.

In [12]:
descriptions = [
    "Hexagon unique identifier",
    "Geometry (shapely polygon) of hexagon",
    "Total population in 2020",
    "OGC FID",
    "Total female population, age 0-1",
    "Total female population, age 10-15",
    "Total female population, age 15-20"
]

data_dict = []
for column, description in zip(gdf.columns, descriptions):
    data_dict.append({
        "name": column,
        "description": description,
        "type": str(gdf[column].dtype),
        })

In [13]:
item = fio_stac.create_stac_item(
    source = out_file,
    id = "space2stats-population",
    extensions = ['https://stac-extensions.github.io/table/v1.2.0/schema.json'],
    properties = {
        "name": "Space2Stats Population",
        "description": "Testing metadata with population variables",
        "table:primary_geometry" : "geometry",
        "table:columns" : data_dict,
    }
)

In [14]:
item

In [15]:
catalog.add_item(item)

In [16]:
print(list(catalog.get_children()))
print(list(catalog.get_items()))

[]
[<Item id=space2stats-population>]


In [17]:
catalog.describe()

* <Catalog id=space2stats>
  * <Item id=space2stats-population>


### Asset

Link to World Bank metadata page with appropriate schema (DDH or NADA).  
Can store additional information about authors, the source for input data, how it was processed etc.

In [18]:
Asset?

[1;31mInit signature:[0m
[0mAsset[0m[1;33m([0m[1;33m
[0m    [0mhref[0m[1;33m:[0m [1;34m'str'[0m[1;33m,[0m[1;33m
[0m    [0mtitle[0m[1;33m:[0m [1;34m'str | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mdescription[0m[1;33m:[0m [1;34m'str | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mmedia_type[0m[1;33m:[0m [1;34m'str | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mroles[0m[1;33m:[0m [1;34m'list[str] | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mextra_fields[0m[1;33m:[0m [1;34m'dict[str, Any] | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m [1;33m->[0m [1;34m'None'[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m     
An object that contains a link to data associated with an Item or Collection that
can be downloaded or streamed.

Args:
    href : Link to the asset object. Relative and absolute links are both
        al

In [19]:
example_url = 'https://nada-demo.ihsn.org/index.php/catalog/55'

In [20]:
asset = Asset(
    href=example_url,
    title="Additional Medata",
    media_type="text/html",
    roles=["metadata"]
)

In [21]:
item.add_asset("metadata", asset)

### Save Demo

In [24]:
from os.path import join

In [25]:
print(catalog.get_self_href() is None)
print(item.get_self_href() is None)

True
True


In [26]:
catalog.normalize_hrefs(join(".", "stac"))

In [27]:
print(catalog.get_self_href())
print(item.get_self_href())

c:/Users/WB514197/Repos/DECAT_Space2Stats/notebooks/METADATA/stac/catalog.json
c:/Users/WB514197/Repos/DECAT_Space2Stats/notebooks/METADATA/stac/space2stats-population/space2stats-population.json


In [28]:
catalog.save(catalog_type=CatalogType.SELF_CONTAINED)