Skip to content

Commit 15272a7

Browse files
committed
[WIP] Add option to convert_ids to append provenance data to the converted IDS
Tests need to be finished once IMAS-5304 is merged.
1 parent 743f436 commit 15272a7

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed

imaspy/ids_convert.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""
55

66
import copy
7+
import datetime
78
import logging
89
from functools import lru_cache
910
from pathlib import Path
@@ -13,6 +14,7 @@
1314
import numpy
1415
from packaging.version import InvalidVersion, Version
1516

17+
import imaspy
1618
from imaspy.dd_zip import parse_dd_version
1719
from imaspy.ids_base import IDSBase
1820
from imaspy.ids_data_type import IDSDataType
@@ -397,6 +399,7 @@ def convert_ids(
397399
version: Optional[str],
398400
*,
399401
deepcopy: bool = False,
402+
provenance_origin_uri: str = "",
400403
xml_path: Optional[str] = None,
401404
factory: Optional[IDSFactory] = None,
402405
target: Optional[IDSToplevel] = None,
@@ -427,6 +430,9 @@ def convert_ids(
427430
deepcopy: When True, performs a deep copy of all data. When False (default),
428431
numpy arrays are not copied and the converted IDS shares the same underlying
429432
data buffers.
433+
provenance_origin_uri: When nonempty, add an entry in the provenance data in
434+
``ids_properties`` to indicate that this IDS has been converted, and it was
435+
originally stored at the given uri.
430436
xml_path: Path to a data dictionary XML file that should be used instead of the
431437
released data dictionary version specified by ``version``.
432438
factory: Existing IDSFactory to use for as target version.
@@ -468,9 +474,49 @@ def convert_ids(
468474

469475
_copy_structure(toplevel, target_ids, deepcopy, source_is_new, version_map)
470476
logger.info("Conversion of IDS %s finished.", ids_name)
477+
if provenance_origin_uri:
478+
_add_provenance_entry(target_ids, toplevel._version, provenance_origin_uri)
471479
return target_ids
472480

473481

482+
def _add_provenance_entry(
483+
target_ids: IDSToplevel, source_version: str, provenance_origin_uri: str
484+
) -> None:
485+
# provenance node was added in DD 3.34.0
486+
if not hasattr(target_ids.ids_properties, "provenance"):
487+
logger.warning(
488+
"Cannot add provenance entry for DD conversion: "
489+
"target IDS does not have a provenance property."
490+
)
491+
return
492+
493+
# Find the node corresponding to the whole IDS, or create one if there is none
494+
for node in target_ids.ids_properties.provenance.node:
495+
if node.path == "":
496+
break
497+
else:
498+
# No node found for the whole IDS, create a new one:
499+
curlen = len(target_ids.ids_properties.provenance.node)
500+
target_ids.ids_properties.provenance.node.resize(curlen + 1, keep=True)
501+
node = target_ids.ids_properties.provenance.node[-1]
502+
503+
# Populate the node
504+
source_txt = (
505+
f"{provenance_origin_uri}; "
506+
f"This IDS has been converted from DD {source_version} to "
507+
f"DD {target_ids._dd_version} by IMASPy {imaspy.__version__}."
508+
)
509+
if hasattr(node, "reference"):
510+
# DD version after IMAS-5304
511+
node.reference.resize(len(node.reference) + 1, keep=True)
512+
node.reference[-1].name = source_txt
513+
timestamp = datetime.datetime.now(datetime.UTC).isoformat(timespec="seconds")
514+
node.reference[-1].time = timestamp.replace("+00:00", "Z")
515+
else:
516+
# DD before IMAS-5304 (between 3.34.0 and 3.41.0)
517+
node.sources.append(source_txt) # sources is a STR_1D (=list of strings)
518+
519+
474520
def _copy_structure(
475521
source: IDSStructure,
476522
target: IDSStructure,

imaspy/test/test_ids_convert.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,28 @@ def test_dbentry_autoconvert2(backend, worker_id, tmp_path):
174174
entry_331.close()
175175

176176

177+
def test_provenance_entry(factory):
178+
cp = factory.core_profiles()
179+
# Note: DD 3.31.0 doesn't have the provenance data structure, test that it doesn't
180+
# report an error:
181+
cp2 = convert_ids(cp, "3.31.0", provenance_origin_uri="<testdata>")
182+
# Convert back to 3.38.0
183+
cp3 = convert_ids(cp2, "3.38.0", provenance_origin_uri="<testdata>")
184+
assert len(cp3.ids_properties.provenance.node) == 1
185+
assert cp3.ids_properties.provenance.node[0].path == ""
186+
assert len(cp3.ids_properties.provenance.node[0].sources) == 1
187+
provenance_txt = cp3.ids_properties.provenance.node[0].sources[0]
188+
# Check that the provided origin URI is in the text
189+
assert "<testdata>" in provenance_txt
190+
# Check that origin and destination DD versions are included
191+
assert "3.31.0" in provenance_txt
192+
assert "3.38.0" in provenance_txt
193+
# Check that IMASPy is mentioned
194+
assert "IMASPy" in provenance_txt
195+
196+
# TODO: test logic branch for node.reference after IMAS-5304 is merged
197+
198+
177199
@pytest.fixture
178200
def dd4factory():
179201
try:

0 commit comments

Comments
 (0)