In [1]:
import pystac
from datetime import datetime
from earthcode.static import create_project_collection
from pathlib import Path

In [None]:
# Define id, title, description, project status, license
# a custom id of the project, it can be related to the title, i.e. - 4datlantic-ohc
project_id = "polar-science-cluster-data" 
# the title of your project, i.e. - 4DAtlantic-OHC
project_title = "Polar Science Cluster Combined Datasets" 
# a description of the project
project_description = "ESA's Polar Science Cluster science activities aim to improve the understanding of Polar regions through advanced satellite Earth observation and scientific investigation. As part of work, there are multiple datasets generated for scientific exploitation. This project aims to combine and preprocess these datasets in order to make them more accessible and useful."
# project status. pick from - ongoing, completed
project_status = "ongoing"

# Overall license for all related data that will be uploaded from the project., i.e. CC-BYB4.0
# if you have multiple licenses, you can pick 'various'
project_license = 'various' 

# Define spatial extent of the project study area in epsg:4326
# if you have multiple disjoint study areas, specify the bounding box that covers all of them
# i.e project_s, project_w, project_n, project_e = -180.0, -90.0, 180.0, 90.0 
project_s, project_w, project_n, project_e = -180.0, -90.0, 180.0, 90.0 

# the project start and end times
project_start_year, project_start_month, project_start_day = 2026, 1, 1
project_end_year, project_end_month, project_end_day = 2028,12,31

# Define the links to the project website and  EO4SocietyLink
website_link = "https://eo4society.esa.int/communities/scientists/esa-polar-science-cluster/"
eo4socity_link = "https://eo4society.esa.int/communities/scientists/esa-polar-science-cluster/"

# Define project themes. Pick one or more from:
# - atmosphere, cryosphere, land, magnetosphere-ionosphere, oceans, solid-earth.
project_themes = ["cryosphere"]

# provide the ESA TO name and TO email
to_name = 'Martin Weaving'
to_email = 'earth-code@esa.int'

# List the consortium members in a tuple with format (name, contact_email), for example - ('University A', "contact@universitya.fr")
consortium_members = [('European Space Agency', 'earth-code@esa.int')]

In [None]:
# combine the spatial and temporal extent
spatial_extent = pystac.SpatialExtent([[project_s, project_w, project_n, project_e]])
temporal_extent = pystac.TemporalExtent(
    [[datetime(project_start_year, project_start_month, project_start_day), 
      datetime(project_end_year, project_end_month, project_end_day)]])
project_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)

# generate project collection
project_collection = create_project_collection(
    project_id, 
    project_title,
    project_description, 
    project_status,
    project_license,
    project_extent,
    project_themes,
    to_name,
    to_email,
    consortium_members,
    website_link,
    eo4socity_link=eo4socity_link
)

# validate the collection
project_collection.validate()

['https://schemas.stacspec.org/v1.0.0/collection-spec/json-schema/collection.json',
 'https://stac-extensions.github.io/osc/v1.0.0/schema.json',
 'https://stac-extensions.github.io/themes/v1.0.0/schema.json',
 'https://stac-extensions.github.io/contacts/v0.1.1/schema.json']

In [4]:
catalog_root = Path('/home/krasen/open-science-catalog-metadata/')

In [5]:
from earthcode.git_add import save_project_collection_to_osc
save_project_collection_to_osc(project_collection, catalog_root)

In [6]:
from earthcode.validator import validateOSCEntry
validateOSCEntry(project_collection.to_dict(), catalog_root)

[]

In [7]:
project_collection

### Product

In [8]:
# Define id, title, description, project status, license
product_id = "antarctic-data-cube"
product_title = "Antarctic Data Cube"
product_description = "ESA's Polar Science Cluster science activities aim to improve the understanding of Polar regions through advanced satellite Earth observation and scientific investigation. This collection focuses on the Antarctica region and contains combined outputs from these activities analysis ready, cloud-native format."
product_status = "ongoing"

# Define the product license
product_license = 'various'

# Define at most five keywords for the product
product_keywords = [ 
    "ice",
    "Antarctica",
    'Data cube'
] 

# Define spatial  in epsg:4326. If the dataset covers discontinuous regions,
# add the bounding box boundaries for each
# i..e a dataset with global coverage is:product_s product_w, product_n, product_e = [-180.0], [-90.0], [180.0], [90.0]
product_s =  [-180.0]
product_w = [-90.0]
product_n = [180.0]
product_e = [-60.0]

# Define the temporal extent
product_start_year, product_start_month, product_start_day = project_start_year, project_start_month, project_start_day,
product_end_year, product_end_month, product_end_day = project_end_year, project_end_month, project_end_day,


# define the semantic region covered by this product, i.e. Belgium
product_region = "Antarctic"

# Define project themes i.e. land. Pick one or more from:
# - atmosphere, cryosphere, land, magnetosphere-ionosphere, oceans, solid-earth.
product_themes = ["cryosphere", 'oceans']


# Define doi if available, i.e. "https://doi.org/10.57780/s3d-83ad619" else None
product_doi = None

# Define the related project id and title
# these have to match the new or an already existing project in the catalog
project_id = project_id
project_title = project_title

In [9]:
# Extract all the variables, missions and parameters from the child projects
all_variables = []
all_missions = []
all_parameters = []

ant_project = pystac.Collection.from_file(catalog_root / 'projects/4d-antarctica/collection.json')

for child in ant_project.get_children():
    product = pystac.Collection.from_file(catalog_root / 'products' / child.id / 'collection.json')
    product_dict = product.to_dict()
    all_parameters.extend([p['name'] for p in product_dict['cf:parameter']])
    all_variables.extend([v for v in product_dict['osc:variables']])
    all_missions.extend([m for m in product_dict['osc:missions']])

product_variables = list(set(all_variables))
product_missions = list(set(all_missions))
product_parameters = list(set(all_parameters))

In [10]:
# combine the spatial and temporal extent
spatial_extent = pystac.SpatialExtent([list(data) for data in zip(product_s, product_w, product_n, product_e)])
temporal_extent = pystac.TemporalExtent(
    [[datetime(product_start_year, product_start_month, product_start_day), 
      datetime(product_end_year, product_end_month, product_end_day)]])
product_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)

In [11]:
from earthcode.static import create_product_collection, manually_add_product_links

product_collection = create_product_collection(product_id, product_title, product_description, 
                              product_extent, product_license,
                              product_keywords, product_status, product_region,
                              product_themes, product_missions, product_variables,
                              project_id, project_title,)
manually_add_product_links(product_collection, 
                           access_link='https://discourse-earthcode.eox.at/t/antartica-insync-data-cubes/107',
                           documentation_link='https://discourse-earthcode.eox.at/t/antartica-insync-data-cubes/107')

In [12]:
product_collection.validate()

['https://schemas.stacspec.org/v1.0.0/collection-spec/json-schema/collection.json',
 'https://stac-extensions.github.io/osc/v1.0.0/schema.json',
 'https://stac-extensions.github.io/themes/v1.0.0/schema.json',
 'https://stac-extensions.github.io/cf/v0.2.0/schema.json']

In [13]:
from earthcode.git_add import save_product_collection_to_catalog
save_product_collection_to_catalog(product_collection, catalog_root)

In [14]:
validateOSCEntry(product_collection.to_dict(), catalog_root)

[]

### Add items

In [183]:

from xstac import xarray_to_stac
import json
import shapely
import xarray as xr
import numpy as np
import datetime

bbox = [-180.0, -90.0, 180.0, -60.0, ]
geometry = json.loads(json.dumps(shapely.box(*bbox).__geo_interface__))

In [190]:
collection = pystac.Collection.from_dict(
    
{
  "type": "Collection",
  "id": product_collection.id,
  "stac_version": "1.0.0",
  "title": product_collection.title,
  "description": "ESA's Polar Science Cluster science activities aim to improve the understanding of Polar regions through advanced satellite Earth observation and scientific investigation. As part of work, there are multiple datasets generated for scientific exploitation. This project aims to combine and preprocess these datasets in order to make them more accessible and useful.",
  "extent": {
    "spatial": {
      "bbox": [
        bbox
      ]
    },
    "temporal": {
      "interval": [
        [
          "1980-01-01T18:07:12Z",
          "2022-01-12T18:59:59Z"
        ]
      ]
    }
  },
  "license": "various",
  "links": []

}

)

In [191]:
combined_link = 'https://s3.waw4-1.cloudferro.com/EarthCODE/OSCAssets/antarctica_cube/antarctica-combined.zarr'
ds = xr.open_zarr(combined_link)


template = {
    "id": f"{product_id}-antarctica_combined",
    "type": "Feature",
    "stac_version": "1.0.0",
    
    "geometry": geometry,
    "bbox": bbox,
    
    "assets": {
        "data": {
            "href": combined_link, 
            "type": "application/vnd+zarr",
            "roles": ["data"],
            "title": 'Bedrock topography, Ice shelf Basal Melt, Groundlines, Subglacial Lakes and Supraglacial Lakes DataCube'
        }
    },
    'properties': {
        'start_datetime': str(datetime.datetime.now(datetime.UTC)),
        'end_datetime': str(datetime.datetime.now(datetime.UTC)),
         "description": 'Data cube with Bedrock topography, Ice shelf Basal Melt, Groundlines, Subglacial Lakes and Supraglacial Lakes data for the Antarctica Region',
        "title": 'Bedrock topography, Ice shelf Basal Melt, Groundlines, Subglacial Lakes and Supraglacial Lakes DataCube',
        "license": 'various',   
    }
}

combined_item = pystac.Item.from_dict(template)

collection.add_item(combined_item)

In [192]:
icetemp = 'https://s3.waw4-1.cloudferro.com/EarthCODE/OSCAssets/antarctica_cube/icetemp.zarr'
ds = xr.open_zarr(icetemp)


template = {
    "id": f"{product_id}-icetemp_profiles",
    "type": "Feature",
    "stac_version": "1.0.0",
    
    "geometry": geometry,
    "bbox": bbox,

    "assets": {
        "data": {
            "href": icetemp,  # or local path
            "type": "application/vnd+zarr",
            "roles": ["data"],
            "title": 'Ice Temperature Data Cube'
        }
    },

    'properties': {
        'start_datetime': str(datetime.datetime.now(datetime.UTC)),
        'end_datetime': str(datetime.datetime.now(datetime.UTC)),
        "description": 'Data cube with Ice Temperature Profiles from ESA science Activities',
        "title": 'Ice Temperature Data Cube',
        "license": 'various',
    },
}

icetemp_item = pystac.Item.from_dict(template)

collection.add_item(icetemp_item)

In [193]:
icevel = 'https://s3.waw4-1.cloudferro.com/EarthCODE/OSCAssets/antarctica_cube/icetemp.zarr'
ds = xr.open_zarr(icevel)


template = {
    "id": f"{product_id}-icevel",
    "type": "Feature",
    "stac_version": "1.0.0",
   
    "geometry": geometry,
    "bbox": bbox,
    "assets": {
        "data": {
            "href": icevel,  # or local path
            "type": "application/vnd+zarr",
            "roles": ["data"],
            "title": 'Ice Velocity Data Cube'
        }
    },
    
    'properties': {
        'start_datetime': str(datetime.datetime.now(datetime.UTC)),
        'end_datetime': str(datetime.datetime.now(datetime.UTC)),
         "description": 'Data cube with Ice Velocity from ESA Polar Science Activities',
        "title": 'Ice Velocity Data Cube',
        'license': 'various',
        
    }
}

icevel_item = pystac.Item.from_dict(template)

collection.add_item(icevel_item)

In [194]:
sec = 'https://s3.waw4-1.cloudferro.com/EarthCODE/OSCAssets/antarctica_cube/sec.zarr'
ds = xr.open_zarr(sec)


template = {
    "id": f"{product_id}-sec",
    "type": "Feature",
    "stac_version": "1.0.0",
    
    "geometry": geometry,
    "bbox": bbox,
    "assets": {
        "data": {
            "href": sec,  # or local path
            "type": "application/vnd+zarr",
            "roles": ["data"],
            "title": 'Surface elevation change Data Cube'
        }
    },
    
    'properties': {
        'start_datetime': str(datetime.datetime.now(datetime.UTC)),
        'end_datetime': str(datetime.datetime.now(datetime.UTC)),
        "description": 'Data cube with Ice Surface elevation change from ESA Polar Science Activities',
    "title": 'Surface elevation change Data Cube',
        'license': 'various',

    }
}

sec_item = pystac.Item.from_dict(template)

collection.add_item(sec_item)

In [195]:
collection.normalize_and_save(f'../../prr_preview/{product_collection.id}', catalog_type=pystac.CatalogType.SELF_CONTAINED)

0. Install git
1. Fork the repository on github
2. Clone the repository 
3. Create a new branch - `git checkout -b project_branch`
4. Make the changes below ...
5. Open a pull request against the main open science catalog repostiry

- check for duplicates
- check for themes, missions, variables backward links
- check that products do not have backward links from  themes, missions, variables backward links
- check that all links, except vias or catalog selves are relative ?
- no duplicate keys