From 4806ebe574a41479a0e41cdddc3251cba49958d1 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Mon, 10 Jun 2019 23:03:41 -0400 Subject: [PATCH] Cache the result of _identify_fields This halves the runtime of scripts like in #2146 for MassiveFIRE snapshots when run on HiPerGator's lustre filesystem. On such snapshots yt used to redundantly scan the backing HDF5 files 243 times for what Datasets they contain. --- yt/geometry/particle_geometry_handler.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt/geometry/particle_geometry_handler.py b/yt/geometry/particle_geometry_handler.py index 1bca51be42..739bcec502 100644 --- a/yt/geometry/particle_geometry_handler.py +++ b/yt/geometry/particle_geometry_handler.py @@ -207,8 +207,13 @@ def _detect_output_fields(self): dsl = [] units = {} pcounts = self._get_particle_type_counts() + field_cache = {} for dom in self.data_files: - fl, _units = self.io._identify_fields(dom) + if dom.filename in field_cache: + fl, _units = field_cache[dom.filename] + else: + fl, _units = self.io._identify_fields(dom) + field_cache[dom.filename] = fl, _units units.update(_units) dom._calculate_offsets(fl, pcounts) for f in fl: