Skip to content

Commit

Permalink
Merge pull request #101 from usnistgov/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
knc6 committed May 5, 2023
2 parents a8e8c38 + 90ccb7a commit a4a1715
Show file tree
Hide file tree
Showing 14 changed files with 564 additions and 57 deletions.
2 changes: 1 addition & 1 deletion alignn/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""Version number."""
__version__ = "2023.04.30"
__version__ = "2023.05.03"
67 changes: 67 additions & 0 deletions alignn/examples/sample_data_ff/mlearn_data/all/config_example.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{
"version": "112bbedebdaecf59fb18e11c929080fb2f358246",
"dataset": "user_data",
"target": "target",
"atom_features": "cgcnn",
"neighbor_strategy": "k-nearest",
"id_tag": "jid",
"random_seed": 123,
"classification_threshold": null,
"n_val": 164,
"n_test": 164,
"n_train":1402,
"train_ratio": 0.9,
"val_ratio": 0.05,
"test_ratio": 0.05,
"target_multiplication_factor": null,
"epochs": 50,
"batch_size": 5,
"weight_decay": 1e-05,
"learning_rate": 0.001,
"filename": "sample",
"warmup_steps": 2000,
"criterion": "l1",
"optimizer": "adamw",
"scheduler": "onecycle",
"pin_memory": false,
"save_dataloader": false,
"write_checkpoint": true,
"write_predictions": true,
"store_outputs": false,
"progress": true,
"log_tensorboard": false,
"standard_scalar_and_pca": false,
"use_canonize": false,
"num_workers": 0,
"cutoff": 8.0,
"max_neighbors": 12,
"keep_data_order": false,
"normalize_graph_level_loss": false,
"distributed": false,
"n_early_stopping": null,
"output_dir": "out_continue",
"model": {
"name": "alignn_atomwise",
"alignn_layers": 2,
"gcn_layers": 4,
"atom_input_features": 92,
"edge_input_features": 80,
"triplet_input_features": 40,
"embedding_features": 64,
"hidden_features": 256,
"output_features": 1,
"grad_multiplier": -1,
"calculate_gradient": true,
"atomwise_output_features": 0,
"graphwise_weight": 1.0,
"gradwise_weight": 1.0,
"stresswise_weight": 0.01,
"atomwise_weight": 0.0,
"link": "identity",
"zero_inflated": false,
"use_cutoff_function": true,
"energy_mult_natoms": true,
"classification": false,
"stress_multiplier":10
}
}

Large diffs are not rendered by default.

153 changes: 153 additions & 0 deletions alignn/examples/sample_data_ff/mlearn_data/all/prepare_mlearn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
"""Module for generating mlearn dataset."""
# for m in Ni Cu Mo Ge Si Li;
# do wget https://github.com/materialsvirtuallab
# /mlearn/raw/master/data/${m}/training.json
# ; mv training.json ${m}_train.json; done;
from jarvis.core.atoms import pmg_to_atoms
from jarvis.db.jsonutils import dumpjson
from jarvis.db.jsonutils import loadjson
from pymatgen.core.structure import Structure
from collections import defaultdict
import os
import numpy as np
from ase.stress import voigt_6_to_full_3x3_stress

# Ref: https://github.com/materialsvirtuallab/mlearn

url = "wget https://github.com/materialsvirtuallab/mlearn/raw/master/data/"
mlearn_dat = []
els = ["Ni", "Cu", "Mo", "Ge", "Si", "Li"]
train_count = 0
val_count = 0
test_count = 0
for ii in els:
print(ii)
name = ii + "_train.json"
if not os.path.exists(name):
cmd = url + ii + "/training.json -O " + ii + "_train.json"
os.system(cmd)

data = loadjson(name)
cmd = "rm " + ii + "_train.json"
os.system(cmd)
train_structures = [d["structure"] for d in data]
train_energies = [d["outputs"]["energy"] for d in data]
train_forces = [d["outputs"]["forces"] for d in data]
train_stresses = np.array(
voigt_6_to_full_3x3_stress(
np.array([d["outputs"]["virial_stress"] for d in data])
)
).tolist()
print("train_structures", train_energies)
name = ii + "_test.json"
if not os.path.exists(name):
cmd = url + ii + "/test.json -O " + ii + "_test.json"
os.system(cmd)
data = loadjson(name)
cmd = "rm " + ii + "_test.json"
os.system(cmd)
test_structures = [d["structure"] for d in data]
test_energies = [d["outputs"]["energy"] for d in data]
test_forces = [d["outputs"]["forces"] for d in data]
# test_stresses = [d["outputs"]["virial_stress"] for d in data]
test_stresses = np.array(
voigt_6_to_full_3x3_stress(
np.array([d["outputs"]["virial_stress"] for d in data])
)
).tolist()

# For ALIGNN-FF
mem = []
count = 0
train_e = defaultdict()
test_e = defaultdict()
train_f = defaultdict()
test_f = defaultdict()
train_s = defaultdict()
test_s = defaultdict()

for i, j, k, z in zip(
train_structures, train_energies, train_forces, train_stresses
):
k = np.array(k)
z = np.array(z)
info = {}
atoms = pmg_to_atoms(Structure.from_dict(i))
count += 1
jid = ii + "-" + str(count)
info["jid"] = jid
info["atoms"] = atoms.to_dict()
info["energy"] = j # / atoms.num_atoms
info["total_energy"] = j / atoms.num_atoms
info["forces"] = k.tolist()
info["stresses"] = z.tolist()
mem.append(info)
mlearn_dat.append(info)
# train[jid]=json.dumps(info)
train_e[jid] = j
train_f[jid] = ";".join(map(str, k.flatten()))
train_s[jid] = ";".join(map(str, z.flatten()))
train_count += 1
for i, j, k, z in zip(
test_structures, test_energies, test_forces, test_stresses
):
k = np.array(k)
z = np.array(z)
info = {}
count += 1
jid = ii + "-" + str(count)
info["jid"] = ii + "-" + str(count)
# atoms = pmg_to_atoms(i)
atoms = pmg_to_atoms(Structure.from_dict(i))
info["atoms"] = atoms.to_dict()
info["energy"] = j # / atoms.num_atoms
info["total_energy"] = j / atoms.num_atoms
info["forces"] = k.tolist()
info["stresses"] = z.tolist()
# val[jid]=json.dumps(info)
mem.append(info)
mlearn_dat.append(info)
info["jid"] = info["jid"] + "a"
mlearn_dat.append(info) # For val set
test_e[jid] = j
test_f[jid] = ";".join(map(str, k.flatten()))
test_s[jid] = ";".join(map(str, z.flatten()))
test_count += 1
print(len(mem), len(train_structures), len(test_structures))
dat = {}
dat["train"] = train_e
dat["test"] = test_e
fname = "mlearn_" + ii + "_energy.json"
dumpjson(data=dat, filename=fname)
cmd = "zip " + fname + ".zip " + fname
os.system(cmd)
cmd = "rm " + fname
os.system(cmd)

dat = {}
dat["train"] = train_f
dat["test"] = test_f
fname = "mlearn_" + ii + "_forces.json"
dumpjson(data=dat, filename=fname)
cmd = "zip " + fname + ".zip " + fname
os.system(cmd)
cmd = "rm " + fname
os.system(cmd)

dat = {}
dat["train"] = train_s
dat["test"] = test_s
fname = "mlearn_" + ii + "_stresses.json"
dumpjson(data=dat, filename=fname)
cmd = "zip " + fname + ".zip " + fname
os.system(cmd)
cmd = "rm " + fname
os.system(cmd)
# For Figshare
print("train", train_count)
print("test", test_count)
dumpjson(data=mlearn_dat, filename="mlearn.json")
cmd = "zip mlearn.json.zip mlearn.json"
os.system(cmd)
cmd = "rm mlearn.json"
# os.system(cmd)
13 changes: 11 additions & 2 deletions alignn/ff/ff.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ def default_path():
return dpath


def revised_path():
"""Get defaukt model path."""
dpath = os.path.abspath(
str(os.path.join(os.path.dirname(__file__), "revised"))
)
print("model_path", dpath)
return dpath


# print("default_model_path", default_model_path)


Expand Down Expand Up @@ -178,14 +187,14 @@ def calculate(self, atoms, properties=None, system_changes=None):
mult = num_atoms
else:
mult = 1

# print('result["stresses"]',result["stresses"],result["stresses"].shape)
self.results = {
"energy": result["out"].detach().cpu().numpy() * num_atoms,
"forces": result["grad"].detach().cpu().numpy()
* mult
* self.force_multiplier,
"stress": full_3x3_to_voigt_6_stress(
result["stress"].detach().cpu().numpy()
result["stresses"][:3].reshape(3, 3).detach().cpu().numpy()
)
* self.stress_wt
# * num_atoms,
Expand Down
1 change: 1 addition & 0 deletions alignn/ff/revised/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Provide revised force-field file."""
Binary file added alignn/ff/revised/best_model.pt
Binary file not shown.
64 changes: 64 additions & 0 deletions alignn/ff/revised/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{
"version": "112bbedebdaecf59fb18e11c929080fb2f358246",
"dataset": "user_data",
"target": "target",
"atom_features": "cgcnn",
"neighbor_strategy": "k-nearest",
"id_tag": "jid",
"random_seed": 123,
"classification_threshold": null,
"n_val": null,
"n_test": null,
"n_train": null,
"train_ratio": 0.9,
"val_ratio": 0.05,
"test_ratio": 0.05,
"target_multiplication_factor": null,
"epochs": 100,
"batch_size": 16,
"weight_decay": 1e-05,
"learning_rate": 0.001,
"filename": "sample",
"warmup_steps": 2000,
"criterion": "l1",
"optimizer": "adamw",
"scheduler": "onecycle",
"pin_memory": false,
"save_dataloader": false,
"write_checkpoint": true,
"write_predictions": true,
"store_outputs": false,
"progress": true,
"log_tensorboard": false,
"standard_scalar_and_pca": false,
"use_canonize": false,
"num_workers": 0,
"cutoff": 8.0,
"max_neighbors": 12,
"keep_data_order": false,
"normalize_graph_level_loss": false,
"distributed": false,
"n_early_stopping": null,
"output_dir": "out_continue",
"model": {
"name": "alignn_atomwise",
"alignn_layers": 4,
"gcn_layers": 4,
"atom_input_features": 92,
"edge_input_features": 80,
"triplet_input_features": 40,
"embedding_features": 64,
"hidden_features": 256,
"output_features": 1,
"grad_multiplier": -1,
"calculate_gradient": true,
"atomwise_output_features": 0,
"graphwise_weight": 1.0,
"gradwise_weight": 10.0,
"stresswise_weight": 0.0,
"atomwise_weight": 0.0,
"link": "identity",
"zero_inflated": false,
"classification": false
}
}

0 comments on commit a4a1715

Please sign in to comment.