From fa877afa838579ec75117cdefd3fc757f8ea8651 Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Mon, 18 Dec 2023 13:11:40 +0100 Subject: [PATCH] Change CLI API --- .gitignore | 3 +- docs/static/{parameters.yaml => options.yaml} | 1 - examples/{parameters.yaml => options.yaml} | 0 examples/usage.sh | 7 +-- src/metatensor/models/__main__.py | 8 ++-- src/metatensor/models/cli/eval_model.py | 38 +++++++--------- src/metatensor/models/cli/export_model.py | 17 +++---- src/metatensor/models/cli/train_model.py | 34 +++++++------- tests/cli/test_eval_model.py | 9 +--- tests/cli/test_train_model.py | 44 +++++++++++++++---- .../{parameters.yaml => options.yaml} | 8 ++-- 11 files changed, 88 insertions(+), 81 deletions(-) rename docs/static/{parameters.yaml => options.yaml} (86%) rename examples/{parameters.yaml => options.yaml} (100%) rename tests/resources/{parameters.yaml => options.yaml} (78%) diff --git a/.gitignore b/.gitignore index 4a4923fbe..c61424005 100644 --- a/.gitignore +++ b/.gitignore @@ -159,8 +159,9 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ -# Models +# Don't save model outputs *.pt +!tests/resources/*.pt # model output directories outputs/ diff --git a/docs/static/parameters.yaml b/docs/static/options.yaml similarity index 86% rename from docs/static/parameters.yaml rename to docs/static/options.yaml index 8458de2b2..3331fbea7 100644 --- a/docs/static/parameters.yaml +++ b/docs/static/options.yaml @@ -1,5 +1,4 @@ defaults: - - _self_ # mandatory parameter to avoid hydra warnings - architecture: soap_bpnn # architecture used to train the model # Section defining the parameters for structure and target data diff --git a/examples/parameters.yaml b/examples/options.yaml similarity index 100% rename from examples/parameters.yaml rename to examples/options.yaml diff --git a/examples/usage.sh b/examples/usage.sh index 33fdb4e55..279d1bd23 100644 --- a/examples/usage.sh +++ b/examples/usage.sh @@ -1,15 +1,16 @@ #!\bin\bash -metatensor-models train --parameters=parameters.yaml +metatensor-models train options.yaml # The functions saves the final model `model.pt` to the current output folder for later # evaluation. All command line flags of the train sub-command can be listed via metatensor-models train --help -# We now evaluate the model on the training dataset +# We now evaluate the model on the training dataset, where the first arguments specifies +# the model and the second the structure file -metatensor-models eval --model=model.pt --structures=qm9_reduced_100.xyz +metatensor-models eval model.pt qm9_reduced_100.xyz # The evaluation command predicts the property the model was trained against; here "U0". # The predictions together with the structures have been written in a file named diff --git a/src/metatensor/models/__main__.py b/src/metatensor/models/__main__.py index cb0a0f4ad..8df0f8b78 100644 --- a/src/metatensor/models/__main__.py +++ b/src/metatensor/models/__main__.py @@ -43,10 +43,10 @@ def main(): # override `sys.argv` to be compatible with our CLI architecture. argv = sys.argv[:1] - parameters_path = Path(args.parameters_path) - argv.append(f"--config-dir={parameters_path.parent}") - argv.append(f"--config-name={parameters_path.name}") - argv.append(f"+output_path={args.output_path}") + options = Path(args.options) + argv.append(f"--config-dir={options.parent}") + argv.append(f"--config-name={options.name}") + argv.append(f"+output_path={args.output}") if args.hydra_paramters is not None: argv += args.hydra_paramters diff --git a/src/metatensor/models/cli/eval_model.py b/src/metatensor/models/cli/eval_model.py index b247d0dbb..43d4436f2 100644 --- a/src/metatensor/models/cli/eval_model.py +++ b/src/metatensor/models/cli/eval_model.py @@ -21,47 +21,39 @@ def _add_eval_model_parser(subparser: argparse._SubParsersAction) -> None: parser.set_defaults(callable="eval_model") parser.add_argument( - "-m", - "--model", - dest="model_path", + "model", type=str, - required=True, - help="Path to a saved model", + help="saved model to be evaluated", ) parser.add_argument( - "-s", - "--structures", - dest="structure_path", + "structures", type=str, - required=True, - help="Path to a structure file which should be considered for the evaluation.", + help="Structure file which should be considered for the evaluation.", ) parser.add_argument( "-o", "--output", - dest="output_path", + dest="output", type=str, required=False, default="output.xyz", - help="Path to save the predicted values.", + help="filenmae of the predictions", ) -def eval_model( - model_path: str, structure_path: str, output_path: str = "output.xyz" -) -> None: +def eval_model(model: str, structures: str, output: str = "output.xyz") -> None: """Evaluate a pretrained model. ``target_property`` wil be predicted on a provided set of structures. Predicted - values will be written ``output_path``. + values will be written ``output``. - :param model_path: Path to a saved model - :param structure_path: Path to a structure file which should be considered for the + :param model: Path to a saved model + :param structure: Path to a structure file which should be considered for the evaluation. - :param output_path: Path to save the predicted values + :param output: Path to save the predicted values """ - model = load_model(model_path) - structures = read_structures(structure_path) - predictions = model(structures) - write_predictions(output_path, predictions, structures) + loaded_model = load_model(model) + structure_list = read_structures(structures) + predictions = loaded_model(structure_list) + write_predictions(output, predictions, structure_list) diff --git a/src/metatensor/models/cli/export_model.py b/src/metatensor/models/cli/export_model.py index 0ba8ee35d..8b96390d9 100644 --- a/src/metatensor/models/cli/export_model.py +++ b/src/metatensor/models/cli/export_model.py @@ -15,28 +15,25 @@ def _add_export_model_parser(subparser: argparse._SubParsersAction) -> None: parser.set_defaults(callable="export_model") parser.add_argument( - "-m", - "--model", - dest="model_path", + "model", type=str, - required=True, - help="Path to a saved model", + help="Saved model which should be exprted", ) parser.add_argument( "-o", "--output", - dest="output_path", + dest="output", type=str, required=False, default="exported.pt", - help="Export path for the model.", + help="Filename of the exported model.", ) -def export_model(model_path: str, output_path: str) -> None: +def export_model(model: str, output: str) -> None: """Export a pretrained model to run MD simulations - :param model_path: Path to a saved model - :param output_path: Path to save the exported model + :param model: Path to a saved model + :param output: Path to save the exported model """ raise NotImplementedError("model exporting is not implemented yet.") diff --git a/src/metatensor/models/cli/train_model.py b/src/metatensor/models/cli/train_model.py index 810e5f330..25c582af4 100644 --- a/src/metatensor/models/cli/train_model.py +++ b/src/metatensor/models/cli/train_model.py @@ -20,9 +20,7 @@ def _has_yaml_suffix(s: str) -> str: """Checks if a string has a .yaml suffix.""" if Path(s).suffix != ".yaml": - raise argparse.ArgumentTypeError( - f"Parameters file '{s}' must be a `.yaml` file." - ) + raise argparse.ArgumentTypeError(f"Options file '{s}' must be a `.yaml` file.") return s @@ -46,17 +44,14 @@ def _add_train_model_parser(subparser: argparse._SubParsersAction) -> None: parser.set_defaults(callable="train_model") parser.add_argument( - "-p", - "--parameters", - dest="parameters_path", + "options", type=_has_yaml_suffix, - required=True, - help="Path to the parameter file", + help="Options file", ) parser.add_argument( "-o", "--output", - dest="output_path", + dest="output", type=str, required=False, default="model.pt", @@ -73,7 +68,7 @@ def _add_train_model_parser(subparser: argparse._SubParsersAction) -> None: @hydra.main(config_path=str(CONFIG_PATH), config_name="config", version_base=None) -def train_model(config: DictConfig) -> None: +def train_model(options: DictConfig) -> None: """Train an atomistic machine learning model using configurations provided by Hydra. This function sets up the dataset and model architecture, then runs the training @@ -87,34 +82,35 @@ def train_model(config: DictConfig) -> None: https://hydra.cc/docs/advanced/hydra-command-line-flags/ and https://hydra.cc/docs/advanced/override_grammar/basic/ for details. - :param config: A dictionary-like object obtained from Hydra, containing all the - necessary parameters for dataset preparation, model instantiation, and training. + :param options: A dictionary-like object obtained from Hydra, containing all the + necessary options for dataset preparation, model hyperparameters, and training. """ logger.info("Setting up dataset") - structures = read_structures(config["dataset"]["structure_path"]) + structures = read_structures(options["dataset"]["structure_path"]) targets = read_targets( - config["dataset"]["targets_path"], - target_values=config["dataset"]["target_value"], + options["dataset"]["targets_path"], + target_values=options["dataset"]["target_value"], ) dataset = Dataset(structures, targets) logger.info("Setting up model") - architetcure_name = config["architecture"]["name"] + architetcure_name = options["architecture"]["name"] architecture = importlib.import_module(f"metatensor.models.{architetcure_name}") model = architecture.Model( all_species=dataset.all_species, - hypers=OmegaConf.to_container(config["architecture"]["model"]), + hypers=OmegaConf.to_container(options["architecture"]["model"]), ) logger.info("Run training") output_dir = hydra.core.hydra_config.HydraConfig.get().runtime.output_dir + print(OmegaConf.to_container(options)) model = architecture.train( model=model, train_dataset=dataset, - hypers=OmegaConf.to_container(config["architecture"]["training"]), + hypers=OmegaConf.to_container(options["architecture"]["training"]), output_dir=output_dir, ) - save_model(model, config["output_path"]) + save_model(model, options["output_path"]) diff --git a/tests/cli/test_eval_model.py b/tests/cli/test_eval_model.py index 6356199c9..183ab24f3 100644 --- a/tests/cli/test_eval_model.py +++ b/tests/cli/test_eval_model.py @@ -16,14 +16,7 @@ def test_eval(output, monkeypatch, tmp_path): shutil.copy(RESOURCES_PATH / "qm9_reduced_100.xyz", "qm9_reduced_100.xyz") shutil.copy(RESOURCES_PATH / "bpnn-model.pt", "bpnn-model.pt") - command = [ - "metatensor-models", - "eval", - "-m", - "bpnn-model.pt", - "-s", - "qm9_reduced_100.xyz", - ] + command = ["metatensor-models", "eval", "bpnn-model.pt", "qm9_reduced_100.xyz"] if output is not None: command += ["-o", output] diff --git a/tests/cli/test_train_model.py b/tests/cli/test_train_model.py index 8b078bedb..627476a31 100644 --- a/tests/cli/test_train_model.py +++ b/tests/cli/test_train_model.py @@ -2,19 +2,47 @@ import subprocess from pathlib import Path +import pytest + RESOURCES_PATH = Path(__file__).parent.resolve() / ".." / "resources" -def test_train(monkeypatch, tmp_path): +@pytest.mark.parametrize("output", [None, "mymodel.pt"]) +def test_train(monkeypatch, tmp_path, output): """Test that training via the training cli runs without an error raise.""" monkeypatch.chdir(tmp_path) shutil.copy(RESOURCES_PATH / "qm9_reduced_100.xyz", "qm9_reduced_100.xyz") - shutil.copy(RESOURCES_PATH / "parameters.yaml", "parameters.yaml") - subprocess.check_call( - [ - "metatensor-models", - "train", - "--parameters=parameters.yaml", - ] + shutil.copy(RESOURCES_PATH / "options.yaml", "options.yaml") + + command = ["metatensor-models", "train", "options.yaml"] + + if output is not None: + command += ["-o", output] + else: + output = "model.pt" + + subprocess.check_call(command) + assert Path(output).is_file() + + +def test_yml_error(): + """Test error raise of the option file is not a .yaml file.""" + try: + subprocess.check_output( + ["metatensor-models", "train", "options.yml"], stderr=subprocess.STDOUT + ) + except subprocess.CalledProcessError as captured: + assert "Options file 'options.yml' must be a `.yaml` file." in str( + captured.output + ) + + +def test_hydra_arguments(): + """Test if hydra arguments work.""" + option_path = str(RESOURCES_PATH / "options.yaml") + out = subprocess.check_output( + ["metatensor-models", "train", option_path, "--hydra=--help"] ) + # Check that num_epochs is override is succesful + assert "num_epochs: 1" in str(out) diff --git a/tests/resources/parameters.yaml b/tests/resources/options.yaml similarity index 78% rename from tests/resources/parameters.yaml rename to tests/resources/options.yaml index 17d7274f6..1563cd69d 100644 --- a/tests/resources/parameters.yaml +++ b/tests/resources/options.yaml @@ -1,10 +1,10 @@ defaults: - - _self_ # mandatory parameter to avoid hydra warnings - architecture: soap_bpnn # architecture used to train the model -training: - batch_size: 8 - num_epochs: 1 +architecture: + training: + batch_size: 2 + num_epochs: 1 # Section defining the parameters for structure and target data dataset: