Skip to content

Commit

Permalink
Merge pull request #130 from pbenner/no-test-set
Browse files Browse the repository at this point in the history
 Allowing training without test set
  • Loading branch information
knc6 committed Aug 11, 2023
2 parents 46649be + e9a3299 commit 4f14235
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 12 deletions.
24 changes: 15 additions & 9 deletions alignn/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ def get_id_train_val_test(
# full train/val test split
# ids = ids[::-1]
id_train = ids[:n_train]
id_val = ids[-(n_val + n_test) : -n_test] # noqa:E203
id_test = ids[-n_test:]
id_val = ids[-(n_val + n_test) : -n_test] if n_test > 0 else ids[-(n_val + n_test) :] # noqa:E203
id_test = ids[-n_test:] if n_test > 0 else []
return id_train, id_val, id_test


Expand Down Expand Up @@ -489,7 +489,7 @@ def get_train_val_loaders(
classification=classification_threshold is not None,
output_dir=output_dir,
tmp_name="val_data",
)
) if len(dataset_val) > 0 else None
test_data = get_torch_dataset(
dataset=dataset_test,
id_tag=id_tag,
Expand All @@ -507,7 +507,7 @@ def get_train_val_loaders(
classification=classification_threshold is not None,
output_dir=output_dir,
tmp_name="test_data",
)
) if len(dataset_test) > 0 else None

collate_fn = train_data.collate
# print("line_graph,line_dih_graph", line_graph, line_dih_graph)
Expand Down Expand Up @@ -543,14 +543,20 @@ def get_train_val_loaders(
drop_last=False,
num_workers=workers,
pin_memory=pin_memory,
)
) if len(dataset_test) > 0 else None

if save_dataloader:
torch.save(train_loader, train_sample)
torch.save(val_loader, val_sample)
torch.save(test_loader, test_sample)
if val_loader is not None:
torch.save(val_loader, val_sample)
if test_loader is not None:
torch.save(test_loader, test_sample)

print("n_train:", len(train_loader.dataset))
print("n_val:", len(val_loader.dataset))
print("n_test:", len(test_loader.dataset))
print("n_val :", len(val_loader.dataset)
if val_loader is not None else 0)
print("n_test :", len(test_loader.dataset)
if test_loader is not None else 0)
return (
train_loader,
val_loader,
Expand Down
6 changes: 6 additions & 0 deletions alignn/run_alignn_ff.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@
)
parser.add_argument("--interface_info", default=None, help=intf_line)

parser.add_argument(
"--device",
default=None,
help="set device for executing the model [e.g. cpu, cuda, cuda:2]"
)

if __name__ == "__main__":
args = parser.parse_args(sys.argv[1:])
model_path = args.model_path
Expand Down
12 changes: 9 additions & 3 deletions alignn/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,20 @@
import json
import pprint


# from accelerate import Accelerator
import os
import warnings


warnings.filterwarnings("ignore", category=RuntimeWarning)
# from sklearn.decomposition import PCA, KernelPCA
# from sklearn.preprocessing import StandardScaler

# torch config
torch.set_default_dtype(torch.float32)


device = "cpu"
if torch.cuda.is_available():
device = torch.device("cuda")
Expand All @@ -87,8 +90,9 @@ def make_standard_scalar_and_pca(output):
"""Use standard scalar and PCS for multi-output data."""
sc = pk.load(open(os.path.join(tmp_output_dir, "sc.pkl"), "rb"))
y_pred, y = output
y_pred = torch.tensor(sc.transform(y_pred.cpu().numpy()), device=device)
y = torch.tensor(sc.transform(y.cpu().numpy()), device=device)
y_pred = torch.tensor(sc.transform(y_pred.cpu().numpy()),
device=y_pred.device)
y = torch.tensor(sc.transform(y.cpu().numpy()), device=y.device)
# pc = pk.load(open("pca.pkl", "rb"))
# y_pred = torch.tensor(pc.transform(y_pred), device=device)
# y = torch.tensor(pc.transform(y), device=device)
Expand Down Expand Up @@ -1063,7 +1067,7 @@ def es_score(engine):
test_loss = evaluator.state.metrics["loss"]
tb_logger.writer.add_hparams(config, {"hparam/test_loss": test_loss})
tb_logger.close()
if config.write_predictions and classification:
if config.write_predictions and classification and test_loader is not None:
net.eval()
f = open(
os.path.join(config.output_dir, "prediction_results_test_set.csv"),
Expand Down Expand Up @@ -1100,6 +1104,7 @@ def es_score(engine):
config.write_predictions
and not classification
and config.model.output_features > 1
and test_loader is not None
):
net.eval()
mem = []
Expand Down Expand Up @@ -1130,6 +1135,7 @@ def es_score(engine):
config.write_predictions
and not classification
and config.model.output_features == 1
and test_loader is not None
):
net.eval()
f = open(
Expand Down
6 changes: 6 additions & 0 deletions alignn/train_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@
help="Folder to save outputs",
)

parser.add_argument(
"--device",
default=None,
help="set device for training the model [e.g. cpu, cuda, cuda:2]"
)


def train_for_folder(
root_dir="examples/sample_data",
Expand Down
7 changes: 7 additions & 0 deletions alignn/train_folder_ff.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@
)


parser.add_argument(
"--device",
default=None,
help="set device for training the model [e.g. cpu, cuda, cuda:2]"
)


def train_for_folder(
root_dir="examples/sample_data",
config_name="config.json",
Expand Down

0 comments on commit 4f14235

Please sign in to comment.