In [None]:
import polars as pl
import requests
from io import StringIO
from rdflib import Graph
import os

def read_rdf_from_url(url: str) -> pl.DataFrame:
    """Fetch JSON-LD or RDF data from a URL and convert it to a polars DataFrame."""

    # Fetch RDF/JSON-LD data from the URL
    response = requests.get(url)
    response.raise_for_status()  # Raise an error for failed requests

    # Load into rdflib Graph
    g = Graph()
    g.parse(data=response.text, format="json-ld")  # Try JSON-LD format first

    # Convert RDF triples to a list of dictionaries
    data = [{"subject": str(s), "predicate": str(p), "object": str(o)} for s, p, o in g]

    # Convert to Polars DataFrame
    return pl.DataFrame(data)

def read_csv_from_url(url: str) -> pl.DataFrame:
    """Read a CSV file from a URL into a polars DataFrame."""
    response = requests.get(url)
    response.raise_for_status()  # Raise an error for failed requests
    csv_data = StringIO(response.text)
    return pl.read_csv(csv_data)

# Create folder structure
folders = ["mappings", "model", "resources", "wmdr2", "autogen"]
for folder in folders:
    os.makedirs(folder, exist_ok=True)

In [2]:
# I-ADOPT model (cf. https://umltool.ogc.org/index.php?m=7&o=2F3BC921-1482-4aab-B393-5FFDB6186EA9)
# classes
df_iadopt_classes = pl.DataFrame({
    "id": [1,2,3,4,5,],
    "name": ["Variable", "Property", "Entity", "Constraint", "VariableSet",],
})
df_iadopt_classes.write_csv(file="model/iadopt_class.csv")

# entity roles
df_iadopt_roles = pl.DataFrame({
    "id": [1,2,3,4,],
    "name": ['unclear','object_of_interest','matrix','context',],
    "value": [None,1,2,4,],
    })
df_iadopt_roles.write_csv(file="model/iadopt_role.csv")


In [None]:
# Retrieve code tables from codes.wmo.int/wmdr
# ObservedVariableAtmosphere types
url = "https://codes.wmo.int/wmdr/ObservedVariableAtmosphere?_format=csv&status=valid"
df_wmdr_ObservedVariableAtmosphere = read_csv_from_url(url)
df_wmdr_ObservedVariableAtmosphere.write_csv(file="resources/wmdr_observed_variable_atmosphere.csv")

# Domain types
url = "https://codes.wmo.int/wmdr/Domain?_format=csv&status=valid"
df_wmdr_Domain = read_csv_from_url(url)
df_wmdr_Domain.write_csv(file="resources/wmdr_domain.csv")
df_wmdr2_domain_types = df_wmdr_Domain.with_columns(pl.col("skos:notation").alias("id"),
                                                    pl.col("rdfs:label").alias("name"))
df_wmdr2_domain_types.write_csv("autogen/wmdr2_domain.csv")

# Geometry types
url = "https://codes.wmo.int/wmdr/Geometry?_format=csv&status=valid"
df_wmdr_Geometry = read_csv_from_url(url)
df_wmdr_Geometry.write_csv(file="resources/wmdr_geometry.csv")
df_wmdr2_geometry_types = df_wmdr_Geometry.with_columns(pl.col("skos:notation").alias("id"),
                                                        pl.col("rdfs:label").alias("name"))
df_wmdr2_geometry_types.write_csv("autogen/wmdr2_geometry.csv")

# Matrix types
url = "https://codes.wmo.int/wmdr/Matrix?_format=csv&status=valid"
df_wmdr_Matrix = read_csv_from_url(url)
df_wmdr_Matrix.write_csv(file="resources/wmdr_matrix.csv")
df_wmdr2_matrix_types = df_wmdr_Matrix.with_columns(pl.col("skos:notation").alias("id"),
                                                    pl.col("rdfs:label").alias("name"))
df_wmdr2_matrix_types.write_csv("autogen/wmdr2_matrix.csv")

In [None]:
# Disaggregate existing WMDR ObservedVariableAtmosphere types using the mappings as suggested by Morgan and Gao, March 2025
df_mapping = pl.read_csv("mappings/WMDR_ObservedVariableAtmosphere_IADOPT_mapping_v01.csv")
df_mapping = df_mapping.drop(["SeqNum", "@id", "@notation", "@status", "dct:description", "rdf:type",])
df_mapping.schema

df = pl.concat([df_wmdr_ObservedVariableAtmosphere, df_mapping], how="align")
display(df.head())

# Convert 'I-ADOPT Type' to various role columns for further treatment
df = df.with_columns(
    pl.col('I-ADOPT Type').str.contains("OoI").alias("has_ooi"),
    pl.col('I-ADOPT Type').str.contains("Property").alias("has_property"),
)
df = df.drop(["@id", "rdf:type", "I-ADOPT Type", "OoI"])
display(df.head())

@id,dct:description,rdf:type,rdfs:label,skos:notation,I-ADOPT Type,Comment,OoI,Property,Constraints
str,str,str,str,str,str,str,str,str,str
"""<http://codes.wmo.int/wmdr/Obs…","""''@en""","""skos:Concept""","""Total lightning density""","""'12001'""","""Property""",,"""lightning""","""flash density?""",
"""<http://codes.wmo.int/wmdr/Obs…","""''@en""","""skos:Concept""","""Lightning density cloud-to-gro…","""'12002'""","""Property""",,"""lightning""","""flash density?""",
"""<http://codes.wmo.int/wmdr/Obs…","""'The direction of horizontal w…","""skos:Concept""","""Horizontal wind direction at s…","""'12005'""","""OoI&Property""",,"""wind""","""wind direction""",
"""<http://codes.wmo.int/wmdr/Obs…","""'Commonly refers to the speed …","""skos:Concept""","""Horizontal wind speed at speci…","""'12006'""","""OoI&Property""",,"""wind""","""wind speed""",
"""<http://codes.wmo.int/wmdr/Obs…","""'IUPAC: 1,1,1-trichloro-2,2,2-…","""skos:Concept""","""Cl3CCF3 (1,1,1-trichloro-2,2,2…","""'12016'""","""OoI""",,"""Cl3CCF3 (1,1,1-trichloro-2,2,2…",,


dct:description,rdfs:label,skos:notation,Comment,Property,Constraints,has_ooi,has_property
str,str,str,str,str,str,bool,bool
"""''@en""","""Total lightning density""","""'12001'""",,"""flash density?""",,False,True
"""''@en""","""Lightning density cloud-to-gro…","""'12002'""",,"""flash density?""",,False,True
"""'The direction of horizontal w…","""Horizontal wind direction at s…","""'12005'""",,"""wind direction""",,True,True
"""'Commonly refers to the speed …","""Horizontal wind speed at speci…","""'12006'""",,"""wind speed""",,True,True
"""'IUPAC: 1,1,1-trichloro-2,2,2-…","""Cl3CCF3 (1,1,1-trichloro-2,2,2…","""'12016'""",,,,True,False


In [None]:
# Extract information from the df dataframe, but leave additional columns untouched for now. Add columns "id" and "name" for use in vocabulary builder.

# extract property
df_wmdr2_property_types = df.filter((pl.col('has_ooi')==False) & (pl.col("has_property")==True))
df_wmdr2_property_types = df_wmdr2_property_types.drop(["has_ooi", "has_property"])
df_wmdr2_property_types = df_wmdr2_property_types.with_columns(pl.Series("id", range(1, len(df_wmdr2_property_types) + 1)).alias("id"),
                                                               pl.col("rdfs:label").alias("name"))
display(df_wmdr2_property_types.head())
df_wmdr2_property_types.write_csv(file="autogen/wmdr2_property.csv")

# extract OoI
df_wmdr2_object_of_interest_types = df.filter((pl.col('has_ooi')==True) & (pl.col("has_property")==False))
df_wmdr2_object_of_interest_types = df_wmdr2_object_of_interest_types.drop(["has_ooi", "has_property", "Property"])
df_wmdr2_object_of_interest_types = df_wmdr2_object_of_interest_types.with_columns(pl.Series("id", range(1, len(df_wmdr2_object_of_interest_types) + 1)).alias("id"),
                                                                                   pl.col("rdfs:label").alias("name"))
display(df_wmdr2_object_of_interest_types.head())
df_wmdr2_object_of_interest_types.write_csv("autogen/wmdr2_object_of_interest.csv")

# extract *mixed* types. These will need to be addressed manually
df_wmdr2_mixed_types =  df.filter(
    ((pl.col('has_ooi')==True) & (pl.col("has_property")==True))
    | ((pl.col('has_ooi')==False) & (pl.col("has_property")==False)))
display(df_wmdr2_mixed_types.head())
df_wmdr2_mixed_types.write_csv(file="autogen/wmdr2_mixed.csv")


dct:description,rdfs:label,skos:notation,Comment,Property,Constraints,id,name
str,str,str,str,str,str,i64,str
"""''@en""","""Total lightning density""","""'12001'""",,"""flash density?""",,1,"""Total lightning density"""
"""''@en""","""Lightning density cloud-to-gro…","""'12002'""",,"""flash density?""",,2,"""Lightning density cloud-to-gro…"
"""'Atmospheric O2/N2 ratio'@en""","""O2/N2 ratio""","""'12021'""",,"""Ratio""",,3,"""O2/N2 ratio"""
"""'Height of vertically localize…","""Aerosol layer height""","""'12162'""",,"""Aerosol layer height""",,4,"""Aerosol layer height"""
"""'Height above the surface to w…","""Mixed layer height""","""'12163'""",,"""Mixed layer height""",,5,"""Mixed layer height"""


dct:description,rdfs:label,skos:notation,Comment,Constraints,id,name
str,str,str,str,str,i64,str
"""'IUPAC: 1,1,1-trichloro-2,2,2-…","""Cl3CCF3 (1,1,1-trichloro-2,2,2…","""'12016'""",,,1,"""Cl3CCF3 (1,1,1-trichloro-2,2,2…"
"""'IUPAC: dibromo(difluoro)metha…","""CBr2F2 (dibromodifluoromethane…","""'12017'""",,,2,"""CBr2F2 (dibromodifluoromethane…"
"""'IUPAC: 1,2-dibromo-1,1,2,2-te…","""C2Br2F4 (1,2-dibromo-1,1,2,2-t…","""'12018'""",,,3,"""C2Br2F4 (1,2-dibromo-1,1,2,2-t…"
"""'IUPAC: 1,1-dichloro-2-fluoroe…","""C2H3Cl2F (1,1-dichloro-2-fluor…","""'12019'""",,,4,"""C2H3Cl2F (1,1-dichloro-2-fluor…"
"""'All pollen taxa'@en""","""Total pollen""","""'12022'""","""with Constraint""",,5,"""Total pollen"""


dct:description,rdfs:label,skos:notation,Comment,Property,Constraints,has_ooi,has_property
str,str,str,str,str,str,bool,bool
"""'The direction of horizontal w…","""Horizontal wind direction at s…","""'12005'""",,"""wind direction""",,True,True
"""'Commonly refers to the speed …","""Horizontal wind speed at speci…","""'12006'""",,"""wind speed""",,True,True
"""'A measure of light attenuatio…","""Particle light extinction coef…","""'12145'""",,"""Extinction coefficient""",,True,True
"""'Ratio of particle scattering …","""Particle single scattering alb…","""'12146'""",,"""single scattering albedo""",,True,True
"""'A measure of dependence of th…","""ÃngstrÃ¶m exponent for partic…","""'12147'""",,"""Angstrom Exponent""","""extinction""",True,True


In [6]:
# Retrieve vocabularies from ACTRIS
# property types
url = "https://vocabulary.actris.nilu.no/skosmos/rest/v1/actris_vocab/data?uri=https%3A%2F%2Fvocabulary.actris.nilu.no%2Factris_vocab%2Fvariablepropertyofinterest&format=application/ld%2Bjson"
df_actris_property_types = read_rdf_from_url(url)
df_actris_property_types.write_csv(file="resources/actris_property.csv")

# object_of_interest types
url = "https://vocabulary.actris.nilu.no/skosmos/rest/v1/actris_vocab/data?uri=https%3A%2F%2Fvocabulary.actris.nilu.no%2Factris_vocab%2Fobjectofinterest&format=application/ld%2Bjson"
df_actris_object_of_interest_types = read_rdf_from_url(url)
df_actris_object_of_interest_types.write_csv(file="resources/actris_object_of_interest.csv")

# (observation) geometry types
url = "https://vocabulary.actris.nilu.no/skosmos/rest/v1/actris_vocab/data?uri=https%3A%2F%2Fvocabulary.actris.nilu.no%2Factris_vocab%2Fvariablegeometry&format=application/ld%2Bjson"
df_actris_geometry_types = read_rdf_from_url(url)
df_actris_geometry_types.write_csv(file="resources/actris_geometry.csv")

# matrix types
url = "https://vocabulary.actris.nilu.no/skosmos/rest/v1/actris_vocab/data?uri=https%3A%2F%2Fvocabulary.actris.nilu.no%2Factris_vocab%2Fvariablematrix&format=application/ld%2Bjson"
df_actris_matrix_types = read_rdf_from_url(url)
df_actris_matrix_types.write_csv(file="resources/actris_matrix.csv")

# variable constraint types
url = "https://vocabulary.actris.nilu.no/skosmos/rest/v1/actris_vocab/data?uri=https%3A%2F%2Fvocabulary.actris.nilu.no%2Factris_vocab%2Fvariableconstraints&format=application/ld%2Bjson"
df_actris_constraint_types = read_rdf_from_url(url)
df_actris_constraint_types.write_csv(file="resources/actris_constraint.csv")