In [1]:
import pystac
from datetime import datetime
from earthcode.static import create_project_collection

In [2]:
# Define id, title, description, project status, license
# a custom id of the project, it can be related to the title, i.e. - 4datlantic-ohc
project_id = "polar-science-cluster-data" 
# the title of your project, i.e. - 4DAtlantic-OHC
project_title = "Polar Science Cluster Combined Datasets" 
# a description of the project
project_description = "ESA's Polar Science Cluster science activities aim to improve the understanding of Polar regions through advanced satellite Earth observation and scientific investigation. As part of work, there are multiple datasets generated for scientific exploitation. This project aims to combine and preprocess these datasets in order to make them more accessible and useful."
# project status. pick from - ongoing, completed
project_status = "ongoing"

# Overall license for all related data that will be uploaded from the project., i.e. CC-BYB4.0
# if you have multiple licenses, you can pick 'various'
project_license = 'various' 

# Define spatial extent of the project study area in epsg:4326
# if you have multiple disjoint study areas, specify the bounding box that covers all of them
# i.e project_s, project_w, project_n, project_e = -180.0, -90.0, 180.0, 90.0 
project_s, project_w, project_n, project_e = -180.0, -90.0, 180.0, 90.0 

# the project start and end times
project_start_year, project_start_month, project_start_day = 2026, 1, 1
project_end_year, project_end_month, project_end_day = 2028,12,31

# Define the links to the project website and  EO4SocietyLink
website_link = "https://eo4society.esa.int/communities/scientists/esa-polar-science-cluster/"
eo4socity_link = "https://eo4society.esa.int/communities/scientists/esa-polar-science-cluster/"

# Define project themes. Pick one or more from:
# - atmosphere, cryosphere, land, magnetosphere-ionosphere, oceans, solid-earth.
project_themes = ["cryosphere"]

# provide the ESA TO name and TO email
to_name = 'Martin Weaving'
to_email = 'earth-code@esa.int'

# List the consortium members in a tuple with format (name, contact_email), for example - ('University A', "contact@universitya.fr")
consortium_members = [('European Space Agency', 'earth-code@esa.int')]

In [3]:
# combine the spatial and temporal extent
spatial_extent = pystac.SpatialExtent([[project_s, project_w, project_n, project_e]])
temporal_extent = pystac.TemporalExtent(
    [[datetime(project_start_year, project_start_month, project_start_day), 
      datetime(project_end_year, project_end_month, project_end_day)]])
project_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)

# generate project collection
project_collection = create_project_collection(
    project_id, 
    project_title,
    project_description, 
    project_status,
    project_license,
    project_extent,
    project_themes,
    to_name,
    to_email,
    consortium_members,
    website_link,
    eo4socity_link=eo4socity_link
)

# validate the collection
project_collection.validate()

['https://schemas.stacspec.org/v1.0.0/collection-spec/json-schema/collection.json',
 'https://stac-extensions.github.io/osc/v1.0.0/schema.json',
 'https://stac-extensions.github.io/themes/v1.0.0/schema.json',
 'https://stac-extensions.github.io/contacts/v0.1.1/schema.json']

In [4]:
from earthcode.validator import check_project_rules, check_global_rules, check_extension
from pathlib import Path

catalog_root = Path('~/open-science-catalog-metadata/')

errors = check_global_rules(project_collection.to_dict(), catalog_root)
errors.extend(check_project_rules(project_collection.to_dict(), catalog_root))
errors

[]

In [6]:
# --- Common Utilities ---

def _assert(ctx, condition, message):
    if not condition:
        ctx["errors"].append(message)

def _resolve(ctx, href):
    if href.startswith(LINK_PREFIX):
        href = href[len(LINK_PREFIX):]
    
    base = ctx["file_path"].parent
    return (base / href).resolve()

def _get_title_for_file(path: Path):
    try:
        with open(path, 'r', encoding='utf-8') as f:
            stac = json.load(f)
            if stac.get("type") == "Feature":
                return stac.get("properties", {}).get("title")
            return stac.get("title")
    except (FileNotFoundError, json.JSONDecodeError):
        return None

def _get_link_with_rel(data, rel):
    links = data.get("links", [])
    if isinstance(links, list):
        for link in links:
            if link.get("href") and link.get("rel") == rel:
                return link
    return None

def _has_link_with_rel(ctx, rel):
    link = _get_link_with_rel(ctx["data"], rel)
    _assert(ctx, isinstance(link, dict), f"must have {rel} link")
    return link

def _has_extensions(ctx, extensions):
    stac_ext = ctx["data"].get("stac_extensions", [])
    if isinstance(stac_ext, list):
        for ext in extensions:
            url = EXTENSION_SCHEMES.get(ext)
            if url:
                _assert(ctx, url in stac_ext, f"must implement extension: {ext}")
            else:
                _assert(ctx, False, f"Extension definition missing for {ext}")
    else:
        _assert(ctx, False, f"must implement extensions: {', '.join(extensions)}")

def _ensure_id_is_folder_name(ctx):
    parent_folder_name = ctx["file_path"].parent.name
    _assert(ctx, ctx["data"].get("id") == parent_folder_name, "parent folder name must match id")

def _check_stac_links_rel_abs(ctx, include_item_child=True):
    rels = ['related', 'parent']
    if include_item_child:
        rels.extend(['item', 'child'])
    
    for link in ctx["data"].get("links", []):
        href = link.get("href", "")
        rel = link.get("rel")
        if rel == 'self':
            _assert(ctx, href.startswith(LINK_PREFIX), f"Link 'self' must start with '{LINK_PREFIX}'")
        elif rel in rels:
            _assert(ctx, "://" not in href, f"Link '{rel}' to '{href}' must be relative")

def _check_link_title(ctx, link, prefix=''):
    href_resolved = _resolve(ctx, link['href'])
    title = _get_title_for_file(href_resolved)
    
    if isinstance(title, str):
        expected = f"{prefix}{title}" if prefix else title
        msg = f"'{expected}'" if prefix else f"title of linked file {href_resolved}"
        _assert(ctx, link.get("title") == expected, f"Title of link to {link['href']} (rel: {link['rel']}) must be {msg}")

def _require_parent_link(ctx, expected_path):
    _check_stac_link(ctx, 'parent', expected_path)

def _require_root_link(ctx, expected_path):
    _check_stac_link(ctx, 'root', expected_path)

def _require_via_link(ctx):
    _has_link_with_rel(ctx, "via")

def _check_stac_link(ctx, rel_type, expected_path):
    link = _has_link_with_rel(ctx, rel_type)
    if not link: return
    
    res_link = _resolve(ctx, link['href'])
    res_expected = _resolve(ctx, expected_path)
    
    _assert(ctx, res_link == res_expected, f"{rel_type} link must point to {expected_path}")
    _assert(ctx, link.get("type") == "application/json", f"{rel_type} link must be application/json")
    _check_link_title(ctx, link)

def _check_preview_image(ctx):
    link = _has_link_with_rel(ctx, "preview")
    if not link: return

    _assert(ctx, link.get("type") == "image/webp", "Preview type must be image/webp")
    _assert(ctx, link.get("proj:epsg") is None, "proj:epsg must be null")

    preview_path = _resolve(ctx, link['href'])
    
    if Image and preview_path.exists():
        try:
            with Image.open(preview_path) as img:
                w, h = img.size
                _assert(ctx, link.get("proj:shape") == [h, w], f"proj:shape mismatch for {preview_path}")
        except Exception:
             _assert(ctx, False, f"Preview image corrupt: {preview_path}")
    elif not preview_path.exists():
         _assert(ctx, False, f"Preview image doesn't exist: {preview_path}")

def _check_child_links(ctx, expected_type="products", expected_filename="collection"):
    links = [l for l in ctx["data"].get("links", []) if l.get("rel") == "child"]
    
    for link in links:
        _assert(ctx, link.get("type") == "application/json", f"Link child to {link['href']} type must be json")
        href_path = Path(link['href'])
        ftype = href_path.parent.parent.name
        fname = href_path.name
        
        _assert(ctx, ftype == expected_type, f"Child link to {link['href']} must point to folder '{expected_type}'")
        _assert(ctx, fname == f"{expected_filename}.json", f"Child link must point to '{expected_filename}.json'")
        _check_link_title(ctx, link)

        resolved = _resolve(ctx, link['href'])
        _assert(ctx, resolved.exists(), f"must have file for link {resolved}")

def _require_child_links_for_other_json(ctx, files_to_check=None, filename="collection", link_rel='child'):
    target_files = []
    
    if files_to_check:
        # Assuming files_to_check is a list of folder names in the current directory
        # logic mirrors JS: resolve(file) -> check exists
        for f in files_to_check:
            # Construct path relative to current folder
            # JS logic: if array, map resolve. resolve() uses folder.
            # ROOT_CHILDREN are folders.
            p = ctx["file_path"].parent / f / "catalog.json" # Assumption for root children
            if not p.exists():
                 p = ctx["file_path"].parent / f / "collection.json"
            
            if p.exists():
                target_files.append(p)
    else:
        # Scan directory
        current_folder = ctx["file_path"].parent
        if current_folder.exists():
            for entry in os.scandir(current_folder):
                if entry.is_dir():
                    if filename:
                        cand = Path(entry.path) / f"{filename}.json"
                        if cand.exists(): target_files.append(cand)
                    else:
                        for sub in os.scandir(entry.path):
                            if sub.name.endswith(".json"):
                                target_files.append(Path(sub.path))

    links = [l for l in ctx["data"].get("links", []) if l.get("href") and l.get("rel") == link_rel]
    link_hrefs = [_resolve(ctx, l['href']) for l in links]

    for link in links:
        _assert(ctx, link.get("type") == "application/json", f"{link_rel} link type error")
        _check_link_title(ctx, link)

    for tf in target_files:
        if tf not in link_hrefs:
            _assert(ctx, False, f"must have link with relation {link_rel} to {tf}")

    for lh in link_hrefs:
        # If we have a link, the file MUST exist
        if not lh.exists():
             _assert(ctx, False, f"must have file for link {lh}")

def _check_themes(ctx):
    themes = ctx["data"].get("themes")
    _assert(ctx, isinstance(themes, list), "'themes' must be an array")
    _has_extensions(ctx, ["themes"])
    
    if not isinstance(themes, list): return

    theme_obj = next((th for th in themes if th.get("scheme") == THEMES_SCHEME), None)
    _assert(ctx, theme_obj is not None, f"must have theme with scheme '{THEMES_SCHEME}'")
    if not theme_obj: return
            
    concepts = theme_obj.get("concepts")
    _assert(ctx, isinstance(concepts, list), "concepts must be an array")
    
    if isinstance(concepts, list):
        for obj in concepts:
            theme_path = _resolve(ctx, f"../../themes/{obj['id']}/catalog.json")
            _assert(ctx, theme_path.exists(), f"Referenced theme '{obj['id']}' must exist at {theme_path}")
            _check_related_link(ctx, "themes", obj['id'], "catalog")

def _check_related_link(ctx, type_name, id_val, filename="collection"):
    suffix = f"/{type_name}/{id_val}/{filename}.json"
    link = next((l for l in ctx["data"].get("links", []) 
                 if l.get("rel") == "related" and l.get("href", "").endswith(suffix)), None)
    
    _assert(ctx, link is not None, f"must have 'related' link to {type_name} with id '{id_val}'")
    
    if link:
        _assert(ctx, link.get("type") == "application/json", "related link type must be json")
        prefix = RELATED_TITLE_PREFIX.get(type_name, "") + ": "
        _check_link_title(ctx, link, prefix)

def _check_osc_cross_ref_array(ctx, field, type_name, required=False):
    vals = ctx["data"].get(field)
    if required:
        _assert(ctx, isinstance(vals, list), f"'{field}' must be array")
    if isinstance(vals, list):
        for val in vals:
            _check_osc_cross_ref(ctx, val, type_name, True)

def _check_osc_cross_ref(ctx, value, type_name, required=False):
    if not value and not required: return
    
    fname = "catalog"
    if type_name in ["projects", "products"]: fname = "collection"
    if type_name in ["experiments", "workflows"]: fname = "record"
    
    path_ref = _resolve(ctx, f"../../{type_name}/{value}/{fname}.json")
    _assert(ctx, path_ref.exists(), f"Referenced {type_name} '{value}' must exist")
    _check_related_link(ctx, type_name, value, fname)

def _require_technical_officer(ctx):
    contacts = ctx["data"].get("contacts")
    _assert(ctx, isinstance(contacts, list), "must have contacts")
    if not isinstance(contacts, list): return
    
    tech = next((c for c in contacts if "technical_officer" in c.get("roles", [])), None)
    if tech:
        _assert(ctx, len(tech.get("name", "")) > 1, "tech officer must have name")
        emails = tech.get("emails", [])
        _assert(ctx, len(emails) > 0 and len(emails[0].get("value", "")) > 1, "tech officer must have email")
    else:
        _assert(ctx, False, "must have technical officer contact")

In [7]:
catalog_root = Path('/home/krasen/open-science-catalog-metadata/')

In [8]:
# save project to catalog

def save_project_collection_to_osc(project_collection, catalog_root):

    # create a directory  under /projects with the same ID as the project ID
    project_dir = catalog_root / 'projects' / project_collection.id
    project_dir.mkdir()

    # save the collection in the new folder
    project_collection.save_object(
        dest_href=str(project_dir / 'collection.json'),
    )

    # create a link from the parent Projects catalog to the new item.
    projects_catalog = pystac.Catalog.from_file(catalog_root / 'projects/catalog.json')
    projects_catalog.add_link(
        pystac.Link(
            rel='child',
            target=f'./{project_collection.id}/collection.json',
            media_type="application/json",
            title=project_collection.title

        )
    )
    projects_catalog.save_object(include_self_link=False, dest_href=catalog_root / 'projects/catalog.json' )

In [12]:
# save_project_collection_to_osc(project_collection, catalog_root)

In [13]:
def _validate_project(ctx):
    
    data = ctx["data"]
    _assert(ctx, data.get("type") == "Collection", "type must be 'Collection'")
    
    _ensure_id_is_folder_name(ctx)
    _require_via_link(ctx)
    
    _require_parent_link(ctx, "../catalog.json")
    _require_root_link(ctx, "../../catalog.json")
    _check_child_links(ctx)
    _check_stac_links_rel_abs(ctx)

   
    _assert(ctx, data.get("osc:type") == "project", "'osc:type' must be 'project'")
    
    _check_osc_cross_ref_array(ctx, "osc:workflows", "workflows")
    
    _check_themes(ctx)

    _has_extensions(ctx, ["osc", "contacts"])
    _require_technical_officer(ctx)

In [14]:
EXTENSION_SCHEMES = {
    "osc": "https://stac-extensions.github.io/osc/v1.0.0/schema.json",
    "themes": "https://stac-extensions.github.io/themes/v1.0.0/schema.json",
    "contacts": "https://stac-extensions.github.io/contacts/v0.1.1/schema.json",
}
LINK_PREFIX = "https://esa-earthcode.github.io/open-science-catalog-metadata/"
THEMES_SCHEME = "https://github.com/stac-extensions/osc#theme"
RELATED_TITLE_PREFIX = {
    "projects": "Project",
    "products": "Product",
    "eo-missions": "EO Mission",
    "themes": "Theme",
    "variables": "Variable",
    "workflows": "Workflow",
    "experiments": "Experiment"
}


ctx = {
    "data": project_collection.to_dict(),
    "file_path": catalog_root / 'projects' / project_collection.id / 'collection.json',
    "root": catalog_root,
    "errors": []
}


In [15]:
import json
_validate_project(ctx)
ctx['errors']

[]

In [16]:
project_collection

### Product

In [17]:
# Define id, title, description, project status, license
product_id = "antarctic-data-cube"
product_title = "Antarctic Data Cube"
product_description = "ESA's Polar Science Cluster science activities aim to improve the understanding of Polar regions through advanced satellite Earth observation and scientific investigation. This collection focuses on the Antarctica region and contains combined outputs from these activities analysis ready, cloud-native format."
product_status = "ongoing"

# Define the product license
product_license = 'various'

# Define at most five keywords for the product
product_keywords = [ 
    "ice",
    "Antarctica",
    'Data cube'
] 

# Define spatial  in epsg:4326. If the dataset covers discontinuous regions,
# add the bounding box boundaries for each
# i..e a dataset with global coverage is:product_s product_w, product_n, product_e = [-180.0], [-90.0], [180.0], [90.0]
product_s =  [-180.0]
product_w = [-90.0]
product_n = [180.0]
product_e = [-60.0]

# Define the temporal extent
product_start_year, product_start_month, product_start_day = project_start_year, project_start_month, project_start_day,
product_end_year, product_end_month, product_end_day = project_end_year, project_end_month, project_end_day,


# define the semantic region covered by this product, i.e. Belgium
product_region = "Antarctic"

# Define project themes i.e. land. Pick one or more from:
# - atmosphere, cryosphere, land, magnetosphere-ionosphere, oceans, solid-earth.
product_themes = ["cryosphere", 'oceans']


# Define doi if available, i.e. "https://doi.org/10.57780/s3d-83ad619" else None
product_doi = None

# Define the related project id and title
# these have to match the new or an already existing project in the catalog
project_id = project_id
project_title = project_title

In [18]:
# Extract all the variables, missions and parameters from the child projects
all_variables = []
all_missions = []
all_parameters = []

ant_project = pystac.Collection.from_file(catalog_root / 'projects/4d-antarctica/collection.json')

for child in ant_project.get_children():
    product = pystac.Collection.from_file(catalog_root / 'products' / child.id / 'collection.json')
    product_dict = product.to_dict()
    all_parameters.extend([p['name'] for p in product_dict['cf:parameter']])
    all_variables.extend([v for v in product_dict['osc:variables']])
    all_missions.extend([m for m in product_dict['osc:missions']])

product_variables = list(set(all_variables))
product_missions = list(set(all_missions))
product_parameters = list(set(all_parameters))

In [19]:
# combine the spatial and temporal extent
spatial_extent = pystac.SpatialExtent([list(data) for data in zip(product_s, product_w, product_n, product_e)])
temporal_extent = pystac.TemporalExtent(
    [[datetime(product_start_year, product_start_month, product_start_day), 
      datetime(product_end_year, product_end_month, product_end_day)]])
product_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)

In [33]:
from earthcode.static import create_product_collection, manually_add_product_links

product_collection = create_product_collection(product_id, product_title, product_description, 
                              product_extent, product_license,
                              product_keywords, product_status, product_region,
                              product_themes, product_missions, product_variables,
                              project_id, project_title,)
manually_add_product_links(product_collection, 
                           access_link='https://discourse-earthcode.eox.at/t/antartica-insync-data-cubes/107',
                           documentation_link='https://discourse-earthcode.eox.at/t/antartica-insync-data-cubes/107')

In [34]:
product_collection.validate()

['https://schemas.stacspec.org/v1.0.0/collection-spec/json-schema/collection.json',
 'https://stac-extensions.github.io/osc/v1.0.0/schema.json',
 'https://stac-extensions.github.io/themes/v1.0.0/schema.json',
 'https://stac-extensions.github.io/cf/v0.2.0/schema.json']

In [35]:
def save_product_collection_to_catalog(product_collection, catalog_root):

    product_dict = product_collection.to_dict()
    project_id = product_dict['osc:project']
    product_themes = [p['concepts'][0]['id'] for p in product_dict['themes']]
    product_variables = [v for v in product_dict['osc:variables']]
    product_missions = [m for m in product_dict['osc:missions']]

    
    # create a directory  under /projects with the same ID as the project ID
    product_dir = catalog_root / 'products' / product_collection.id
    product_dir.mkdir()

    # create a link from the parent products catalog to the new item.
    products_catalog = pystac.Catalog.from_file(catalog_root / 'products/catalog.json')
    products_catalog.add_link(
        pystac.Link(
            rel='child',
            target=f'./{product_collection.id}/collection.json',
            media_type="application/json",
            title=f'{product_collection.title}'
        )
    )
    products_catalog.save_object(include_self_link=False, dest_href=catalog_root/'products/catalog.json')

    # add to project
    project_collection =  pystac.Collection.from_file(catalog_root / f'projects/{project_id}/collection.json')
    project_collection.add_link(
        pystac.Link(
            rel='child',
            target=f'../../products/{product_collection.id}/collection.json',
            media_type="application/json",
            title=f'{product_collection.title}'
        )
    )
    project_collection.save_object(include_self_link=False, dest_href=catalog_root / f'projects/{project_id}/collection.json')


    # add theme return links
    for theme in product_themes:
        theme_catalog =  pystac.Catalog.from_file(catalog_root / f'themes/{theme}/catalog.json')
        theme_catalog.add_link(
            pystac.Link(
                rel='child',
                target=f'../../products/{product_collection.id}/collection.json',
                media_type="application/json",
                title=f'{product_collection.title}'
            )
        )
        theme_catalog.save_object(include_self_link=False, dest_href=catalog_root / f'themes/{theme}/catalog.json')




    # add variable return links
    for var in product_variables:
        var_catalog =  pystac.Catalog.from_file(catalog_root / f'variables/{var}/catalog.json')
        var_catalog.add_link(
            pystac.Link(
                rel='child',
                target=f'../../products/{product_collection.id}/collection.json',
                media_type="application/json",
                title=f'{product_collection.title}'
            )
        )
        var_catalog.save_object(include_self_link=False, dest_href=catalog_root / f'variables/{var}/catalog.json')
        

    # add mission return links
    for mission in product_missions:
        mission_catalog = pystac.Catalog.from_file(catalog_root / f'eo-missions/{mission}/catalog.json')
        mission_catalog.add_link(
            pystac.Link(
                rel='child',
                target=f'../../products/{product_collection.id}/collection.json',
                media_type="application/json",
                title=f'{product_collection.title}'
            )
        )
        mission_catalog.save_object(include_self_link=False, dest_href=catalog_root / f'eo-missions/{mission}/catalog.json')

    # update link titles
    for link in product_collection.get_links('related'):
        link_elements = link.href.split('/')
        if link_elements[2] in ['variables', 'eo-missions']:
            catalog_title = pystac.Catalog.from_file(catalog_root / f'{link_elements[2]}/{link_elements[3]}/catalog.json').title
            prefix = 'Variable: ' if link_elements[2] == 'variables' else 'EO Mission: '
            link.title = prefix + catalog_title

    # save the collection in the new folder
    product_collection.save_object(
        dest_href=str(product_dir / 'collection.json'),
    )

In [36]:
save_product_collection_to_catalog(product_collection, catalog_root)

In [37]:
def _validate_product(ctx):
    data = ctx["data"]
    _assert(ctx, data.get("type") == "Collection", "type must be 'Collection'")
    _has_extensions(ctx, ["osc"])
    _ensure_id_is_folder_name(ctx)
    _require_via_link(ctx)

    _require_parent_link(ctx, "../catalog.json")
    _require_root_link(ctx, "../../catalog.json")
    _check_stac_links_rel_abs(ctx, include_item_child=False)

    _assert(ctx, data.get("osc:type") == "product", "'osc:type' must be 'product'")
    _assert(ctx, isinstance(data.get("osc:project"), str), "'osc:project' must be a string")
    
    _check_osc_cross_ref(ctx, data.get("osc:project"), "projects", required=True)
    _check_osc_cross_ref_array(ctx, "osc:variables", "variables")
    _check_osc_cross_ref_array(ctx, "osc:missions", "eo-missions")
    _check_osc_cross_ref(ctx, data.get("osc:experiment"), "experiments")

    _check_themes(ctx)

In [38]:
ctx = {
    "data": product_collection.to_dict(),
    "file_path": catalog_root / 'products' / product_collection.id / 'collection.json',
    "root": catalog_root,
    "errors": []
}

In [39]:
_validate_product(ctx)

In [40]:
ctx['errors']

[]

### Add items

In [183]:

from xstac import xarray_to_stac
import json
import shapely
import xarray as xr
import numpy as np
import datetime

bbox = [-180.0, -90.0, 180.0, -60.0, ]
geometry = json.loads(json.dumps(shapely.box(*bbox).__geo_interface__))

In [190]:
collection = pystac.Collection.from_dict(
    
{
  "type": "Collection",
  "id": product_collection.id,
  "stac_version": "1.0.0",
  "title": product_collection.title,
  "description": "ESA's Polar Science Cluster science activities aim to improve the understanding of Polar regions through advanced satellite Earth observation and scientific investigation. As part of work, there are multiple datasets generated for scientific exploitation. This project aims to combine and preprocess these datasets in order to make them more accessible and useful.",
  "extent": {
    "spatial": {
      "bbox": [
        bbox
      ]
    },
    "temporal": {
      "interval": [
        [
          "1980-01-01T18:07:12Z",
          "2022-01-12T18:59:59Z"
        ]
      ]
    }
  },
  "license": "various",
  "links": []

}

)

In [191]:
combined_link = 'https://s3.waw4-1.cloudferro.com/EarthCODE/OSCAssets/antarctica_cube/antarctica-combined.zarr'
ds = xr.open_zarr(combined_link)


template = {
    "id": f"{product_id}-antarctica_combined",
    "type": "Feature",
    "stac_version": "1.0.0",
    
    "geometry": geometry,
    "bbox": bbox,
    
    "assets": {
        "data": {
            "href": combined_link, 
            "type": "application/vnd+zarr",
            "roles": ["data"],
            "title": 'Bedrock topography, Ice shelf Basal Melt, Groundlines, Subglacial Lakes and Supraglacial Lakes DataCube'
        }
    },
    'properties': {
        'start_datetime': str(datetime.datetime.now(datetime.UTC)),
        'end_datetime': str(datetime.datetime.now(datetime.UTC)),
         "description": 'Data cube with Bedrock topography, Ice shelf Basal Melt, Groundlines, Subglacial Lakes and Supraglacial Lakes data for the Antarctica Region',
        "title": 'Bedrock topography, Ice shelf Basal Melt, Groundlines, Subglacial Lakes and Supraglacial Lakes DataCube',
        "license": 'various',   
    }
}

combined_item = pystac.Item.from_dict(template)

collection.add_item(combined_item)

In [192]:
icetemp = 'https://s3.waw4-1.cloudferro.com/EarthCODE/OSCAssets/antarctica_cube/icetemp.zarr'
ds = xr.open_zarr(icetemp)


template = {
    "id": f"{product_id}-icetemp_profiles",
    "type": "Feature",
    "stac_version": "1.0.0",
    
    "geometry": geometry,
    "bbox": bbox,

    "assets": {
        "data": {
            "href": icetemp,  # or local path
            "type": "application/vnd+zarr",
            "roles": ["data"],
            "title": 'Ice Temperature Data Cube'
        }
    },

    'properties': {
        'start_datetime': str(datetime.datetime.now(datetime.UTC)),
        'end_datetime': str(datetime.datetime.now(datetime.UTC)),
        "description": 'Data cube with Ice Temperature Profiles from ESA science Activities',
        "title": 'Ice Temperature Data Cube',
        "license": 'various',
    },
}

icetemp_item = pystac.Item.from_dict(template)

collection.add_item(icetemp_item)

In [193]:
icevel = 'https://s3.waw4-1.cloudferro.com/EarthCODE/OSCAssets/antarctica_cube/icetemp.zarr'
ds = xr.open_zarr(icevel)


template = {
    "id": f"{product_id}-icevel",
    "type": "Feature",
    "stac_version": "1.0.0",
   
    "geometry": geometry,
    "bbox": bbox,
    "assets": {
        "data": {
            "href": icevel,  # or local path
            "type": "application/vnd+zarr",
            "roles": ["data"],
            "title": 'Ice Velocity Data Cube'
        }
    },
    
    'properties': {
        'start_datetime': str(datetime.datetime.now(datetime.UTC)),
        'end_datetime': str(datetime.datetime.now(datetime.UTC)),
         "description": 'Data cube with Ice Velocity from ESA Polar Science Activities',
        "title": 'Ice Velocity Data Cube',
        'license': 'various',
        
    }
}

icevel_item = pystac.Item.from_dict(template)

collection.add_item(icevel_item)

In [194]:
sec = 'https://s3.waw4-1.cloudferro.com/EarthCODE/OSCAssets/antarctica_cube/sec.zarr'
ds = xr.open_zarr(sec)


template = {
    "id": f"{product_id}-sec",
    "type": "Feature",
    "stac_version": "1.0.0",
    
    "geometry": geometry,
    "bbox": bbox,
    "assets": {
        "data": {
            "href": sec,  # or local path
            "type": "application/vnd+zarr",
            "roles": ["data"],
            "title": 'Surface elevation change Data Cube'
        }
    },
    
    'properties': {
        'start_datetime': str(datetime.datetime.now(datetime.UTC)),
        'end_datetime': str(datetime.datetime.now(datetime.UTC)),
        "description": 'Data cube with Ice Surface elevation change from ESA Polar Science Activities',
    "title": 'Surface elevation change Data Cube',
        'license': 'various',

    }
}

sec_item = pystac.Item.from_dict(template)

collection.add_item(sec_item)

In [195]:
collection.normalize_and_save(f'../../prr_preview/{product_collection.id}', catalog_type=pystac.CatalogType.SELF_CONTAINED)

0. Install git
1. Fork the repository on github
2. Clone the repository 
3. Create a new branch - `git checkout -b project_branch`
4. Make the changes below ...
5. Open a pull request against the main open science catalog repostiry

- check for duplicates
- check for themes, missions, variables backward links
- check that products do not have backward links from  themes, missions, variables backward links
- check that 