Merge branch 'main' into test_py310

zincware · Nov 30, 2022 · d9325e2 · d9325e2
2 parents bc02dce + cb07b6b
commit d9325e2
Show file tree

Hide file tree

Showing 8 changed files with 32 additions and 34 deletions.
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -46,7 +46,7 @@ jobs:
       - uses: actions/checkout@v2
       - uses: actions/setup-python@v2
         with:
-          python-version: '3.x'
+          python-version: '3.9'
       - name: Install package
         run: |
           pip install .

diff --git a/CI/integration_tests/calculators/test_einstein_diffusion_coefficients.py b/CI/integration_tests/calculators/test_einstein_diffusion_coefficients.py
@@ -51,7 +51,7 @@ def test_calculator(tmp_path):
 
     n_part = 500
     n_step = 5000
-    msd_range = 100
+    msd_range = 200
 
     vel = np.sqrt(2 * diff_coeff / time_step) * tf.random.normal(
         shape=(n_step, n_part, 3), mean=0, stddev=1

diff --git a/CI/unit_tests/memory_manager/test_memory_manager.py b/CI/unit_tests/memory_manager/test_memory_manager.py
@@ -148,9 +148,7 @@ def test_get_batch_size(self):
         self.memory_manager.data_path = ["Test/Path"]
         self.memory_manager.memory_fraction = 0.5
         self.memory_manager.machine_properties["memory"] = 50000
-        batch_size, number_of_batches, remainder = self.memory_manager.get_batch_size(
-            system=False
-        )
+        batch_size, number_of_batches, remainder = self.memory_manager.get_batch_size()
         self.assertEqual(batch_size, 10)
         self.assertEqual(number_of_batches, 1)
         self.assertEqual(remainder, 0)
@@ -160,9 +158,7 @@ def test_get_batch_size(self):
         self.memory_manager.data_path = ["Test/Path"]
         self.memory_manager.memory_fraction = 1.0
         self.memory_manager.machine_properties["memory"] = 50
-        batch_size, number_of_batches, remainder = self.memory_manager.get_batch_size(
-            system=False
-        )
+        batch_size, number_of_batches, remainder = self.memory_manager.get_batch_size()
         self.assertEqual(batch_size, 1)
         self.assertEqual(number_of_batches, 13)
         self.assertEqual(remainder, 0)

diff --git a/mdsuite/calculators/einstein_diffusion_coefficients.py b/mdsuite/calculators/einstein_diffusion_coefficients.py
@@ -184,17 +184,22 @@ def ensemble_operation(self, ensemble):
         msd = tf.math.squared_difference(
             tf.gather(ensemble, self.args.tau_values, axis=1), ensemble[:, None, 0]
         )
+        self.count += msd.shape[0]
         # average over particles, sum over dimensions
-        msd = tf.reduce_sum(tf.reduce_mean(msd, axis=0), axis=-1)
+        # msd = tf.reduce_sum(tf.reduce_mean(msd, axis=0), axis=-1)
+        msd = tf.reduce_sum(tf.reduce_sum(msd, axis=0), axis=-1)
+
         # sum up ensembles to average in post processing
-        self.msd_array += np.array(msd)
+        return np.array(msd)
 
     def fit_diff_coeff(self):
         """
         Apply unit conversion, fit line to the data, prepare for database storage
         """
 
-        self.msd_array /= int(self.n_batches) * self.ensemble_loop
+        # self.msd_array /= int(self.n_batches) * self.ensemble_loop
+        self.msd_array /= self.count
+
         self.msd_array *= self.experiment.units.length**2
         self.time *= self.experiment.units.time
 
@@ -225,13 +230,13 @@ def run_calculator(self):
             # species-wise
             self.time = None
             self.time = self._handle_tau_values()
-            self.msd_array = np.zeros(self.data_resolution)
             dict_ref = str.encode("/".join([species, self.loaded_property.name]))
             batch_ds = self.get_batch_dataset([species])
-
+            self.msd_array = np.zeros(self.data_resolution)
+            self.count = 0
             # loop over batches to get MSD
-            for batch in tqdm(
-                batch_ds,
+            for i, batch in tqdm(
+                enumerate(batch_ds),
                 ncols=70,
                 desc=species,
                 total=self.n_batches,
@@ -240,8 +245,13 @@ def run_calculator(self):
                 ensemble_ds = self.get_ensemble_dataset(batch, species)
 
                 for ensemble in ensemble_ds:
-                    self.ensemble_operation(ensemble[dict_ref])
+                    if not ensemble[dict_ref].shape[1] == self.args.data_range:
+                        continue
+                    else:
+                        self.msd_array += self.ensemble_operation(ensemble[dict_ref])
+                        self.count += 1
 
+            # self.msd_array = np.array(tf.reduce_sum(self.msd_array, axis=0))
             fit_results = self.fit_diff_coeff()
             self.queue_data(data=fit_results, subjects=[species])
 

diff --git a/mdsuite/calculators/radial_distribution_function.py b/mdsuite/calculators/radial_distribution_function.py
@@ -250,7 +250,6 @@ def check_input(self):
                 self.args.species = list(self.experiment.molecules)
             else:
                 self.args.species = list(self.experiment.species)
-
         self._initialize_rdf_parameters()
 
     def _initialize_rdf_parameters(self):
@@ -424,7 +423,6 @@ def _correct_batch_properties(self):
             self.minibatch = False
 
         self.remainder = 0
-        self.minibatch = False
 
     def run_minibatch_loop(self, atoms, stop, n_atoms, minibatch_start, positions_tensor):
         """
@@ -439,7 +437,6 @@ def run_minibatch_loop(self, atoms, stop, n_atoms, minibatch_start, positions_te
         positions_tensor : tf.Tensor
 
         """
-
         # Compute the number of atoms and configurations in the batch.
         atoms_per_batch, batch_size, _ = tf.shape(atoms)
 
@@ -509,7 +506,7 @@ def compute_species_values(
         particles_list = self.particles_list
         for tuples in itertools.combinations_with_replacement(self.index_list, 2):
             names = self._get_species_names(tuples)
-            start_ = tf.concat(
+            start_ = tf.stack(
                 [
                     sum(particles_list[: tuples[0]]) - start_batch,
                     sum(particles_list[: tuples[1]]),

diff --git a/mdsuite/calculators/trajectory_calculator.py b/mdsuite/calculators/trajectory_calculator.py
@@ -266,8 +266,7 @@ def _prepare_managers(self, data_path: list, correct: bool = False):
             self.batch_size,
             self.n_batches,
             self.remainder,
-        ) = self.memory_manager.get_batch_size(system=self.system_property)
-
+        ) = self.memory_manager.get_batch_size()
         self.ensemble_loop, self.minibatch = self.memory_manager.get_ensemble_loop(
             self.args.data_range, self.args.correlation_time
         )
@@ -281,7 +280,6 @@ def _prepare_managers(self, data_path: list, correct: bool = False):
 
         if correct:
             self._correct_batch_properties()
-
         self.data_manager = DataManager(
             data_path=data_path,
             database=self.database,
@@ -345,7 +343,6 @@ def get_batch_dataset(
         """
         path_list = [join_path(item, self.loaded_property.name) for item in subject_list]
         self._prepare_managers(path_list, correct=correct)
-
         type_spec = {}
         for item in subject_list:
             dict_ref = "/".join([item, self.loaded_property.name])

diff --git a/mdsuite/memory_management/memory_manager.py b/mdsuite/memory_management/memory_manager.py
@@ -31,6 +31,7 @@
 import tensorflow as tf
 
 from mdsuite.database.simulation_database import Database
+from mdsuite.utils import config
 from mdsuite.utils.meta_functions import get_machine_properties, gpu_available
 from mdsuite.utils.scale_functions import (
     linear_scale_function,
@@ -104,7 +105,7 @@ def __init__(
         self.data_path = data_path
         self.parallel = parallel
         self.database = database
-        self.memory_fraction = memory_fraction
+        self.memory_fraction = config.memory_fraction
         self.offset = offset
 
         self.machine_properties = get_machine_properties()
@@ -174,19 +175,13 @@ def _string_to_function(argument: str):
 
         return scale_function, scale_function_parameters
 
-    def get_batch_size(self, system: bool = False) -> tuple:
+    def get_batch_size(self) -> tuple:
         """
         Calculate the batch size of an operation.
 
         This method takes the tensor_values requirements of an operation and returns
         how big each batch of tensor_values should be for such an operation.
 
-
-        Parameters
-        ----------
-        system : bool
-                Tell the database what kind of tensor_values it is looking at,
-                atomistic, or system wide.
         Returns
         -------
         batch_size : int
@@ -199,7 +194,6 @@ def get_batch_size(self, system: bool = False) -> tuple:
         """
         if self.data_path is None:
             raise ValueError("No tensor_values have been requested.")
-
         per_configuration_memory: float = 0.0
         for item in self.data_path:
             n_particles, n_configs, n_bytes = self.database.get_data_size(item)

diff --git a/mdsuite/utils/config.py b/mdsuite/utils/config.py
@@ -36,11 +36,15 @@ class Config:
     bokeh_sizing_mode: str
         The way bokeh scales plots.
         see bokeh / sizing_mode for more information
+    jupyter : bool
+            If true, jupyter is being used.
+    memory_fraction: bool
+            The portion of the available memory to be used.
     """
 
     jupyter: bool = False
-    GPU: bool = False
     bokeh_sizing_mode: str = "stretch_both"
+    memory_fraction: float = 0.5
 
 
 config = Config()