-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathdd_zip.py
297 lines (243 loc) · 10.1 KB
/
dd_zip.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
# This file is part of IMAS-Python.
# You should have received the IMAS-Python LICENSE file with this project.
""" Extract DD versions from a zip file.
The zip file contains files as
* `data-dictionary/3.30.0.xml`
* `data-dictionary/3.29.0.xml`
multiple paths are checked. See `ZIPFILE_LOCATIONS`.
First the environment variable IMAS_DDZIP is checked.
If that exists and points to a file we will attempt to open it.
Then, IDSDef.zip is searched in site-packages, the current folder,
in .config/imas/ (`$$XDG_CONFIG_HOME`) and in
the assets/ folder within the IMAS-Python package.
1. `$$IMAS_DDZIP`
2. The virtual environment
3. USER_BASE`imas/IDSDef.zip`
4. All `site-packages/imas/IDSDef.zip`
5. `./IDSDef.zip`
6. `~/.config/imas/IDSDef.zip`
7. `__file__/../../imas/assets/IDSDef.zip`
All files are checked, i.e. if your .config/imas/IDSDef.zip is outdated
the IMAS-Python-packaged version will be used.
The `assets/IDSDef.zip` provided with the package can be updated
with the `python setup.py build_DD` command, which is also performed on install
if you have access to the ITER data-dictionary git repo.
Reinstalling imas thus also will give you access to the latest DD versions.
"""
import logging
import os
import re
import xml.etree.ElementTree as ET
from contextlib import contextmanager, nullcontext
from functools import lru_cache
from pathlib import Path
from typing import Dict, Iterator, List, Tuple, Union
from zipfile import ZipFile
try:
from importlib.resources import as_file, files
try:
from importlib.resources.abc import Traversable
except ModuleNotFoundError: # Python 3.9/3.10 support
from importlib.abc import Traversable
except ImportError: # Python 3.8 support
from importlib_resources import as_file, files
from importlib_resources.abc import Traversable
from packaging.version import InvalidVersion, Version
import imas
from imas.exception import UnknownDDVersion
logger = logging.getLogger(__name__)
def _get_xdg_config_dir():
"""
Return the XDG config directory, according to the XDG base directory spec:
https://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
"""
return os.environ.get("XDG_CONFIG_HOME") or str(Path.home() / ".config")
def _generate_zipfile_locations() -> Iterator[Union[Path, Traversable]]:
"""Build a list of potential data dictionary locations.
We start with the path (if any) of the IMAS_DDZIP env var.
Then we look for IDSDef.zip in the current folder, in the
default XDG config dir (~/.config/imas/IDSDef.zip) and
finally in the assets distributed with this package.
"""
zip_name = "IDSDef.zip"
environ = os.environ.get("IMAS_DDZIP")
if environ:
yield Path(environ).resolve()
yield Path(zip_name).resolve()
yield Path(_get_xdg_config_dir()).resolve() / "imas" / zip_name
yield files(imas) / "assets" / zip_name
def parse_dd_version(version: str) -> Version:
try:
return Version(version)
except InvalidVersion:
# This is probably a dev build of the DD, of which the version is obtained with
# `git describe` in the format X.Y.Z-<ncommits>-g<hash> with X.Y.Z the previous
# released version: try again after converting the first dash to a + and treat
# it like a `local` version specifier, which is recognized as newer.
# https://packaging.python.org/en/latest/specifications/version-specifiers/
return Version(version.replace("-", "+", 1))
# Expected use case is one, maximum two DD versions
# Cache is bigger than that: in pytest we currently use the following DD versions:
# - 3.22.0
# - 3.25.0
# - 3.28.0
# - 3.39.0
# - 4.0.0 (if available)
# - Environment default
# - IDS_fake_toplevel.xml
# - IDS_minimal.xml
# - IDS_minimal_2.xml
# - IDS_minimal_struct_array.xml
# - IDS_minimal_types.xml
_DD_CACHE_SIZE = 8
ZIPFILE_LOCATIONS = list(_generate_zipfile_locations())
def dd_etree(version=None, xml_path=None):
"""Return the DD element tree corresponding to the provided dd_version or xml_file.
By default (``dd_version`` and ``dd_xml`` are not supplied), this will attempt
to get the version from the environment (``IMAS_VERSION``) and use the latest
available version as fallback.
You can also specify a specific DD version to use (e.g. "3.38.1") or point to a
specific data-dictionary XML file. These options are exclusive.
Args:
version: DD version string, e.g. "3.38.1".
xml_path: XML file containing data dictionary definition.
"""
if version and xml_path:
raise ValueError("version and xml_path cannot be provided both.")
if not version and not xml_path:
# Figure out which DD version to use
if "IMAS_VERSION" in os.environ:
imas_version = os.environ["IMAS_VERSION"]
if imas_version in dd_xml_versions():
# Use bundled DD version when available
version = imas_version
elif "IMAS_PREFIX" in os.environ:
# Try finding the IDSDef.xml in this installation
imas_prefix = Path(os.environ["IMAS_PREFIX"]).resolve()
xml_file = imas_prefix / "include" / "IDSDef.xml"
if xml_file.exists():
xml_path = str(xml_file)
if not version and not xml_path:
logger.warning(
"Unable to load IMAS version %s, falling back to latest version.",
imas_version,
)
if not version and not xml_path:
# Use latest available from
version = latest_dd_version()
# Do the actual loading in a cached method:
return _load_etree(version, xml_path)
@lru_cache(_DD_CACHE_SIZE)
def _load_etree(version, xml_path):
if xml_path:
logger.info("Parsing data dictionary from file: %s", xml_path)
tree = ET.parse(xml_path)
else:
xml = get_dd_xml(version)
logger.info("Parsing data dictionary version %s", version)
tree = ET.ElementTree(ET.fromstring(xml))
return tree
@contextmanager
def _open_zipfile(path: Union[Path, Traversable]) -> Iterator[ZipFile]:
"""Open a zipfile, given a Path or Traversable."""
if isinstance(path, Path):
ctx = nullcontext(path)
else:
ctx = as_file(path)
with ctx as file:
with ZipFile(file) as zipfile:
yield zipfile
@lru_cache
def _read_dd_versions() -> Dict[str, Tuple[Union[Path, Traversable], str]]:
"""Traverse all possible DD zip files and return a map of known versions.
Returns:
version_map: version -> (zipfile path, filename)
"""
versions = {}
xml_re = re.compile(r"^data-dictionary/([0-9.]+)\.xml$")
for path in ZIPFILE_LOCATIONS:
if not path.is_file():
continue
with _open_zipfile(path) as zipfile:
for fname in zipfile.namelist():
match = xml_re.match(fname)
if match:
version = match.group(1)
if version not in versions:
versions[version] = (path, fname)
if not versions:
raise RuntimeError(
"Could not find any data dictionary definitions. "
f"Looked in: {', '.join(map(repr, ZIPFILE_LOCATIONS))}."
)
return versions
@lru_cache
def _read_identifiers() -> Dict[str, Tuple[Union[Path, Traversable], str]]:
"""Traverse all possible DD zip files and return a map of known identifiers.
Returns:
identifier_map: identifier -> (zipfile path, filename)
"""
identifiers = {}
xml_re = re.compile(r"^identifiers/\w+/(\w+_identifier).xml$")
for path in ZIPFILE_LOCATIONS:
if not path.is_file():
continue
with _open_zipfile(path) as zipfile:
for fname in zipfile.namelist():
match = xml_re.match(fname)
if match:
identifier_name = match.group(1)
if identifier_name not in identifiers:
identifiers[identifier_name] = (path, fname)
return identifiers
@lru_cache
def dd_xml_versions() -> List[str]:
"""Parse IDSDef.zip to find version numbers available"""
def sort_key(version):
try:
return parse_dd_version(version)
except InvalidVersion:
# Don't fail when a malformatted version is present in the DD zip
logger.error(
f"Could not convert DD XML version {version} to a Version.", exc_info=1
)
return Version(0)
return sorted(_read_dd_versions(), key=sort_key)
@lru_cache
def dd_identifiers() -> List[str]:
"""Parse IDSDef.zip to find available identifiers"""
return sorted(_read_identifiers())
def get_dd_xml(version):
"""Read XML file for the given data dictionary version."""
dd_versions = dd_xml_versions()
if version not in dd_versions:
raise UnknownDDVersion(version, dd_versions)
path, fname = _read_dd_versions()[version]
with _open_zipfile(path) as zipfile:
return zipfile.read(fname)
def get_dd_xml_crc(version):
"""Given a version string, return its CRC checksum"""
# Note, by this time get_dd_xml is already called, so we don't need to check if the
# version is known
path, fname = _read_dd_versions()[version]
with _open_zipfile(path) as zipfile:
return zipfile.getinfo(fname).CRC
def get_identifier_xml(identifier_name):
"""Get identifier XML for the given identifier name"""
path, fname = _read_identifiers()[identifier_name]
with _open_zipfile(path) as zipfile:
return zipfile.read(fname)
def print_supported_version_warning(version):
try:
if parse_dd_version(version) < imas.OLDEST_SUPPORTED_VERSION:
logger.warning(
"Version %s is below lowest supported version of %s.\
Proceed at your own risk.",
version,
imas.OLDEST_SUPPORTED_VERSION,
)
except InvalidVersion:
logging.warning("Ignoring version parsing error.", exc_info=1)
def latest_dd_version():
"""Find the latest version in data-dictionary/IDSDef.zip"""
return dd_xml_versions()[-1]