Merge pull request #101 from usnistgov/develop

Develop
usnistgov · May 5, 2023 · a4a1715 · a4a1715
2 parents a8e8c38 + 90ccb7a
commit a4a1715
Show file tree

Hide file tree

Showing 14 changed files with 564 additions and 57 deletions.
diff --git a/alignn/__init__.py b/alignn/__init__.py
@@ -1,2 +1,2 @@
 """Version number."""
-__version__ = "2023.04.30"
+__version__ = "2023.05.03"
diff --git a/alignn/examples/sample_data_ff/mlearn_data/all/config_example.json b/alignn/examples/sample_data_ff/mlearn_data/all/config_example.json
@@ -0,0 +1,67 @@
+{
+    "version": "112bbedebdaecf59fb18e11c929080fb2f358246",
+    "dataset": "user_data",
+    "target": "target",
+    "atom_features": "cgcnn",
+    "neighbor_strategy": "k-nearest",
+    "id_tag": "jid",
+    "random_seed": 123,
+    "classification_threshold": null,
+    "n_val": 164,
+    "n_test": 164,
+    "n_train":1402,
+    "train_ratio": 0.9,
+    "val_ratio": 0.05,
+    "test_ratio": 0.05,
+    "target_multiplication_factor": null,
+    "epochs": 50,
+    "batch_size": 5,
+    "weight_decay": 1e-05,
+    "learning_rate": 0.001,
+    "filename": "sample",
+    "warmup_steps": 2000,
+    "criterion": "l1",
+    "optimizer": "adamw",
+    "scheduler": "onecycle",
+    "pin_memory": false,
+    "save_dataloader": false,
+    "write_checkpoint": true,
+    "write_predictions": true,
+    "store_outputs": false,
+    "progress": true,
+    "log_tensorboard": false,
+    "standard_scalar_and_pca": false,
+    "use_canonize": false,
+    "num_workers": 0,
+    "cutoff": 8.0,
+    "max_neighbors": 12,
+    "keep_data_order": false,
+    "normalize_graph_level_loss": false,
+    "distributed": false,
+    "n_early_stopping": null,
+    "output_dir": "out_continue",
+    "model": {
+        "name": "alignn_atomwise",
+        "alignn_layers": 2,
+        "gcn_layers": 4,
+        "atom_input_features": 92,
+        "edge_input_features": 80,
+        "triplet_input_features": 40,
+        "embedding_features": 64,
+        "hidden_features": 256,
+        "output_features": 1,
+        "grad_multiplier": -1,
+        "calculate_gradient": true,
+        "atomwise_output_features": 0,
+        "graphwise_weight": 1.0,
+        "gradwise_weight": 1.0,
+        "stresswise_weight": 0.01,
+        "atomwise_weight": 0.0,
+        "link": "identity",
+        "zero_inflated": false,
+        "use_cutoff_function": true,
+        "energy_mult_natoms": true,
+        "classification": false,
+        "stress_multiplier":10
+    }
+}
diff --git a/alignn/examples/sample_data_ff/mlearn_data/all/id_prop.json b/alignn/examples/sample_data_ff/mlearn_data/all/id_prop.json
diff --git a/alignn/examples/sample_data_ff/mlearn_data/all/prepare_mlearn.py b/alignn/examples/sample_data_ff/mlearn_data/all/prepare_mlearn.py
@@ -0,0 +1,153 @@
+"""Module for generating mlearn dataset."""
+# for m in Ni Cu Mo Ge Si Li;
+# do wget https://github.com/materialsvirtuallab
+# /mlearn/raw/master/data/${m}/training.json
+# ; mv training.json ${m}_train.json; done;
+from jarvis.core.atoms import pmg_to_atoms
+from jarvis.db.jsonutils import dumpjson
+from jarvis.db.jsonutils import loadjson
+from pymatgen.core.structure import Structure
+from collections import defaultdict
+import os
+import numpy as np
+from ase.stress import voigt_6_to_full_3x3_stress
+
+# Ref: https://github.com/materialsvirtuallab/mlearn
+
+url = "wget https://github.com/materialsvirtuallab/mlearn/raw/master/data/"
+mlearn_dat = []
+els = ["Ni", "Cu", "Mo", "Ge", "Si", "Li"]
+train_count = 0
+val_count = 0
+test_count = 0
+for ii in els:
+    print(ii)
+    name = ii + "_train.json"
+    if not os.path.exists(name):
+        cmd = url + ii + "/training.json -O " + ii + "_train.json"
+        os.system(cmd)
+
+    data = loadjson(name)
+    cmd = "rm " + ii + "_train.json"
+    os.system(cmd)
+    train_structures = [d["structure"] for d in data]
+    train_energies = [d["outputs"]["energy"] for d in data]
+    train_forces = [d["outputs"]["forces"] for d in data]
+    train_stresses = np.array(
+        voigt_6_to_full_3x3_stress(
+            np.array([d["outputs"]["virial_stress"] for d in data])
+        )
+    ).tolist()
+    print("train_structures", train_energies)
+    name = ii + "_test.json"
+    if not os.path.exists(name):
+        cmd = url + ii + "/test.json -O " + ii + "_test.json"
+        os.system(cmd)
+    data = loadjson(name)
+    cmd = "rm " + ii + "_test.json"
+    os.system(cmd)
+    test_structures = [d["structure"] for d in data]
+    test_energies = [d["outputs"]["energy"] for d in data]
+    test_forces = [d["outputs"]["forces"] for d in data]
+    # test_stresses = [d["outputs"]["virial_stress"] for d in data]
+    test_stresses = np.array(
+        voigt_6_to_full_3x3_stress(
+            np.array([d["outputs"]["virial_stress"] for d in data])
+        )
+    ).tolist()
+
+    # For ALIGNN-FF
+    mem = []
+    count = 0
+    train_e = defaultdict()
+    test_e = defaultdict()
+    train_f = defaultdict()
+    test_f = defaultdict()
+    train_s = defaultdict()
+    test_s = defaultdict()
+
+    for i, j, k, z in zip(
+        train_structures, train_energies, train_forces, train_stresses
+    ):
+        k = np.array(k)
+        z = np.array(z)
+        info = {}
+        atoms = pmg_to_atoms(Structure.from_dict(i))
+        count += 1
+        jid = ii + "-" + str(count)
+        info["jid"] = jid
+        info["atoms"] = atoms.to_dict()
+        info["energy"] = j  # / atoms.num_atoms
+        info["total_energy"] = j / atoms.num_atoms
+        info["forces"] = k.tolist()
+        info["stresses"] = z.tolist()
+        mem.append(info)
+        mlearn_dat.append(info)
+        # train[jid]=json.dumps(info)
+        train_e[jid] = j
+        train_f[jid] = ";".join(map(str, k.flatten()))
+        train_s[jid] = ";".join(map(str, z.flatten()))
+        train_count += 1
+    for i, j, k, z in zip(
+        test_structures, test_energies, test_forces, test_stresses
+    ):
+        k = np.array(k)
+        z = np.array(z)
+        info = {}
+        count += 1
+        jid = ii + "-" + str(count)
+        info["jid"] = ii + "-" + str(count)
+        # atoms = pmg_to_atoms(i)
+        atoms = pmg_to_atoms(Structure.from_dict(i))
+        info["atoms"] = atoms.to_dict()
+        info["energy"] = j  # / atoms.num_atoms
+        info["total_energy"] = j / atoms.num_atoms
+        info["forces"] = k.tolist()
+        info["stresses"] = z.tolist()
+        # val[jid]=json.dumps(info)
+        mem.append(info)
+        mlearn_dat.append(info)
+        info["jid"] = info["jid"] + "a"
+        mlearn_dat.append(info)  # For val set
+        test_e[jid] = j
+        test_f[jid] = ";".join(map(str, k.flatten()))
+        test_s[jid] = ";".join(map(str, z.flatten()))
+        test_count += 1
+    print(len(mem), len(train_structures), len(test_structures))
+    dat = {}
+    dat["train"] = train_e
+    dat["test"] = test_e
+    fname = "mlearn_" + ii + "_energy.json"
+    dumpjson(data=dat, filename=fname)
+    cmd = "zip " + fname + ".zip " + fname
+    os.system(cmd)
+    cmd = "rm " + fname
+    os.system(cmd)
+
+    dat = {}
+    dat["train"] = train_f
+    dat["test"] = test_f
+    fname = "mlearn_" + ii + "_forces.json"
+    dumpjson(data=dat, filename=fname)
+    cmd = "zip " + fname + ".zip " + fname
+    os.system(cmd)
+    cmd = "rm " + fname
+    os.system(cmd)
+
+    dat = {}
+    dat["train"] = train_s
+    dat["test"] = test_s
+    fname = "mlearn_" + ii + "_stresses.json"
+    dumpjson(data=dat, filename=fname)
+    cmd = "zip " + fname + ".zip " + fname
+    os.system(cmd)
+    cmd = "rm " + fname
+    os.system(cmd)
+# For Figshare
+print("train", train_count)
+print("test", test_count)
+dumpjson(data=mlearn_dat, filename="mlearn.json")
+cmd = "zip mlearn.json.zip mlearn.json"
+os.system(cmd)
+cmd = "rm mlearn.json"
+# os.system(cmd)
diff --git a/alignn/ff/ff.py b/alignn/ff/ff.py
@@ -67,6 +67,15 @@ def default_path():
     return dpath
 
 
+def revised_path():
+    """Get defaukt model path."""
+    dpath = os.path.abspath(
+        str(os.path.join(os.path.dirname(__file__), "revised"))
+    )
+    print("model_path", dpath)
+    return dpath
+
+
 # print("default_model_path", default_model_path)
 
 
@@ -178,14 +187,14 @@ def calculate(self, atoms, properties=None, system_changes=None):
             mult = num_atoms
         else:
             mult = 1
-
+        # print('result["stresses"]',result["stresses"],result["stresses"].shape)
         self.results = {
             "energy": result["out"].detach().cpu().numpy() * num_atoms,
             "forces": result["grad"].detach().cpu().numpy()
             * mult
             * self.force_multiplier,
             "stress": full_3x3_to_voigt_6_stress(
-                result["stress"].detach().cpu().numpy()
+                result["stresses"][:3].reshape(3, 3).detach().cpu().numpy()
             )
             * self.stress_wt
             # * num_atoms,

diff --git a/alignn/ff/revised/__init__.py b/alignn/ff/revised/__init__.py
@@ -0,0 +1 @@
+"""Provide revised force-field file."""
diff --git a/alignn/ff/revised/best_model.pt b/alignn/ff/revised/best_model.pt
diff --git a/alignn/ff/revised/config.json b/alignn/ff/revised/config.json
@@ -0,0 +1,64 @@
+{
+    "version": "112bbedebdaecf59fb18e11c929080fb2f358246",
+    "dataset": "user_data",
+    "target": "target",
+    "atom_features": "cgcnn",
+    "neighbor_strategy": "k-nearest",
+    "id_tag": "jid",
+    "random_seed": 123,
+    "classification_threshold": null,
+    "n_val": null,
+    "n_test": null,
+    "n_train": null,
+    "train_ratio": 0.9,
+    "val_ratio": 0.05,
+    "test_ratio": 0.05,
+    "target_multiplication_factor": null,
+    "epochs": 100,
+    "batch_size": 16,
+    "weight_decay": 1e-05,
+    "learning_rate": 0.001,
+    "filename": "sample",
+    "warmup_steps": 2000,
+    "criterion": "l1",
+    "optimizer": "adamw",
+    "scheduler": "onecycle",
+    "pin_memory": false,
+    "save_dataloader": false,
+    "write_checkpoint": true,
+    "write_predictions": true,
+    "store_outputs": false,
+    "progress": true,
+    "log_tensorboard": false,
+    "standard_scalar_and_pca": false,
+    "use_canonize": false,
+    "num_workers": 0,
+    "cutoff": 8.0,
+    "max_neighbors": 12,
+    "keep_data_order": false,
+    "normalize_graph_level_loss": false,
+    "distributed": false,
+    "n_early_stopping": null,
+    "output_dir": "out_continue",
+    "model": {
+        "name": "alignn_atomwise",
+        "alignn_layers": 4,
+        "gcn_layers": 4,
+        "atom_input_features": 92,
+        "edge_input_features": 80,
+        "triplet_input_features": 40,
+        "embedding_features": 64,
+        "hidden_features": 256,
+        "output_features": 1,
+        "grad_multiplier": -1,
+        "calculate_gradient": true,
+        "atomwise_output_features": 0,
+        "graphwise_weight": 1.0,
+        "gradwise_weight": 10.0,
+        "stresswise_weight": 0.0,
+        "atomwise_weight": 0.0,
+        "link": "identity",
+        "zero_inflated": false,
+        "classification": false
+    }
+}