From 4806ebe574a41479a0e41cdddc3251cba49958d1 Mon Sep 17 00:00:00 2001
From: Ben Kimock <kimockb@gmail.com>
Date: Mon, 10 Jun 2019 23:03:41 -0400
Subject: [PATCH] Cache the result of _identify_fields

This halves the runtime of scripts like in #2146 for MassiveFIRE
snapshots when run on HiPerGator's lustre filesystem.
On such snapshots yt used to redundantly scan the backing HDF5
files 243 times for what Datasets they contain.
---
 yt/geometry/particle_geometry_handler.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/yt/geometry/particle_geometry_handler.py b/yt/geometry/particle_geometry_handler.py
index 1bca51be42..739bcec502 100644
--- a/yt/geometry/particle_geometry_handler.py
+++ b/yt/geometry/particle_geometry_handler.py
@@ -207,8 +207,13 @@ def _detect_output_fields(self):
         dsl = []
         units = {}
         pcounts = self._get_particle_type_counts()
+        field_cache = {}
         for dom in self.data_files:
-            fl, _units = self.io._identify_fields(dom)
+            if dom.filename in field_cache:
+                fl, _units = field_cache[dom.filename]
+            else:
+                fl, _units = self.io._identify_fields(dom)
+                field_cache[dom.filename] = fl, _units
             units.update(_units)
             dom._calculate_offsets(fl, pcounts)
             for f in fl: