diff --git a/.gitignore b/.gitignore index 4a4923fbe..c61424005 100644 --- a/.gitignore +++ b/.gitignore @@ -159,8 +159,9 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ -# Models +# Don't save model outputs *.pt +!tests/resources/*.pt # model output directories outputs/ diff --git a/docs/src/getting-started/usage.rst b/docs/src/getting-started/usage.rst index c457689e7..b8456ec4f 100644 --- a/docs/src/getting-started/usage.rst +++ b/docs/src/getting-started/usage.rst @@ -1,5 +1,5 @@ -Usage - command line -==================== +Usage +===== `metatensor-models` is designed for an direct usage from the the command line (cli). The general help of `metatensor-models` can be accessed using @@ -31,24 +31,23 @@ The sub-command to start a model training is metatensor-models train -To train a model you have to define your parameters. This includes the specific -architecture you want to use, the files for obtaining training structures and target -values as well as probable changes +To train a model you have to define your options. This includes the specific +architecture you want to use and the data including the training structures and target +values The default model and training hyperparameter for each model are listed in their -corresponding documentation page. We will use the following config to run an example -training +corresponding documentation page. We will use these minimal options to run an example +training using the default hyperparameters of an SOAP BPNN model - -.. literalinclude:: ../../static/parameters.yaml +.. literalinclude:: ../../static/options.yaml :language: yaml -For each training run a new output directory is based on the current date and time is -created. By default, this output directory is used to store Hydra output for the run -(Configuration, Logs etc). You can `override +For each training run a new output directory based on the current date and time is +created. By default, this output directory is used to store Hydra's output for the run +(configuration, Logs etc). You can `override `_ this -behavior in the parameter file. To start the training using the ``parameters.yaml`` in -the current directory type. +behavior in the options file. To start the training create an ``options.yaml`` file in +the current directory and type .. literalinclude:: ../../../examples/usage.sh :language: bash diff --git a/docs/static/parameters.yaml b/docs/static/options.yaml similarity index 86% rename from docs/static/parameters.yaml rename to docs/static/options.yaml index 8458de2b2..3331fbea7 100644 --- a/docs/static/parameters.yaml +++ b/docs/static/options.yaml @@ -1,5 +1,4 @@ defaults: - - _self_ # mandatory parameter to avoid hydra warnings - architecture: soap_bpnn # architecture used to train the model # Section defining the parameters for structure and target data diff --git a/examples/parameters.yaml b/examples/options.yaml similarity index 100% rename from examples/parameters.yaml rename to examples/options.yaml diff --git a/examples/usage.sh b/examples/usage.sh index 33fdb4e55..279d1bd23 100644 --- a/examples/usage.sh +++ b/examples/usage.sh @@ -1,15 +1,16 @@ #!\bin\bash -metatensor-models train --parameters=parameters.yaml +metatensor-models train options.yaml # The functions saves the final model `model.pt` to the current output folder for later # evaluation. All command line flags of the train sub-command can be listed via metatensor-models train --help -# We now evaluate the model on the training dataset +# We now evaluate the model on the training dataset, where the first arguments specifies +# the model and the second the structure file -metatensor-models eval --model=model.pt --structures=qm9_reduced_100.xyz +metatensor-models eval model.pt qm9_reduced_100.xyz # The evaluation command predicts the property the model was trained against; here "U0". # The predictions together with the structures have been written in a file named diff --git a/src/metatensor/models/__main__.py b/src/metatensor/models/__main__.py index cb0a0f4ad..4793ad0dc 100644 --- a/src/metatensor/models/__main__.py +++ b/src/metatensor/models/__main__.py @@ -13,7 +13,7 @@ def main(): ap = argparse.ArgumentParser( description=__doc__, - formatter_class=argparse.ArgumentDefaultsHelpFormatter, + formatter_class=argparse.RawTextHelpFormatter, ) ap.add_argument( @@ -43,10 +43,10 @@ def main(): # override `sys.argv` to be compatible with our CLI architecture. argv = sys.argv[:1] - parameters_path = Path(args.parameters_path) - argv.append(f"--config-dir={parameters_path.parent}") - argv.append(f"--config-name={parameters_path.name}") - argv.append(f"+output_path={args.output_path}") + options = Path(args.options) + argv.append(f"--config-dir={options.parent}") + argv.append(f"--config-name={options.name}") + argv.append(f"+output_path={args.output}") if args.hydra_paramters is not None: argv += args.hydra_paramters diff --git a/src/metatensor/models/cli/eval_model.py b/src/metatensor/models/cli/eval_model.py index b247d0dbb..1b8839b30 100644 --- a/src/metatensor/models/cli/eval_model.py +++ b/src/metatensor/models/cli/eval_model.py @@ -3,65 +3,58 @@ from ..utils.data.readers import read_structures from ..utils.data.writers import write_predictions from ..utils.model_io import load_model +from .formatter import CustomHelpFormatter def _add_eval_model_parser(subparser: argparse._SubParsersAction) -> None: """Add the `eval_model` paramaters to an argparse (sub)-parser""" if eval_model.__doc__ is not None: - description = eval_model.__doc__.split(r"\n:param")[0] + description = eval_model.__doc__.split(r":param")[0] else: description = None parser = subparser.add_parser( "eval", description=description, - formatter_class=argparse.ArgumentDefaultsHelpFormatter, + formatter_class=CustomHelpFormatter, ) parser.set_defaults(callable="eval_model") parser.add_argument( - "-m", - "--model", - dest="model_path", + "model", type=str, - required=True, - help="Path to a saved model", + help="saved model to be evaluated", ) parser.add_argument( - "-s", - "--structures", - dest="structure_path", + "structures", type=str, - required=True, - help="Path to a structure file which should be considered for the evaluation.", + help="Structure file which should be considered for the evaluation.", ) parser.add_argument( "-o", "--output", - dest="output_path", + dest="output", type=str, required=False, default="output.xyz", - help="Path to save the predicted values.", + help="filenmae of the predictions (default: %(default)s)", ) -def eval_model( - model_path: str, structure_path: str, output_path: str = "output.xyz" -) -> None: +def eval_model(model: str, structures: str, output: str = "output.xyz") -> None: """Evaluate a pretrained model. ``target_property`` wil be predicted on a provided set of structures. Predicted - values will be written ``output_path``. + values will be written ``output``. - :param model_path: Path to a saved model - :param structure_path: Path to a structure file which should be considered for the + :param model: Path to a saved model + :param structure: Path to a structure file which should be considered for the evaluation. - :param output_path: Path to save the predicted values + :param output: Path to save the predicted values """ - model = load_model(model_path) - structures = read_structures(structure_path) - predictions = model(structures) - write_predictions(output_path, predictions, structures) + loaded_model = load_model(model) + structure_list = read_structures(structures) + predictions = loaded_model(structure_list) + write_predictions(output, predictions, structure_list) diff --git a/src/metatensor/models/cli/export_model.py b/src/metatensor/models/cli/export_model.py index 0ba8ee35d..69ed17afe 100644 --- a/src/metatensor/models/cli/export_model.py +++ b/src/metatensor/models/cli/export_model.py @@ -1,42 +1,41 @@ import argparse +from .formatter import CustomHelpFormatter + def _add_export_model_parser(subparser: argparse._SubParsersAction) -> None: if export_model.__doc__ is not None: - description = export_model.__doc__.split(r":param")[0] + description = export_model.__doc__.split(":param")[0] else: description = None parser = subparser.add_parser( "export", description=description, - formatter_class=argparse.ArgumentDefaultsHelpFormatter, + formatter_class=CustomHelpFormatter, ) parser.set_defaults(callable="export_model") parser.add_argument( - "-m", - "--model", - dest="model_path", + "model", type=str, - required=True, - help="Path to a saved model", + help="Saved model which should be exprted", ) parser.add_argument( "-o", "--output", - dest="output_path", + dest="output", type=str, required=False, default="exported.pt", - help="Export path for the model.", + help="Filename of the exported model (default: %(default)s).", ) -def export_model(model_path: str, output_path: str) -> None: +def export_model(model: str, output: str) -> None: """Export a pretrained model to run MD simulations - :param model_path: Path to a saved model - :param output_path: Path to save the exported model + :param model: Path to a saved model + :param output: Path to save the exported model """ raise NotImplementedError("model exporting is not implemented yet.") diff --git a/src/metatensor/models/cli/formatter.py b/src/metatensor/models/cli/formatter.py new file mode 100644 index 000000000..736512389 --- /dev/null +++ b/src/metatensor/models/cli/formatter.py @@ -0,0 +1,26 @@ +import argparse + + +class CustomHelpFormatter(argparse.RawDescriptionHelpFormatter): + """Descriptions formatter showing positional arguments before optionals.""" + + def _format_usage(self, usage, actions, groups, prefix): + if usage is None: + # split optionals from positionals + optionals = [] + positionals = [] + for action in actions: + if action.option_strings: + optionals.append(action) + else: + positionals.append(action) + + prog = "%(prog)s" % dict(prog=self._prog) + + # build full usage string + format = self._format_actions_usage + action_usage = format(positionals + optionals, groups) + usage = " ".join([s for s in [prog, action_usage] if s]) + + # Call the superclass method to format the usage + return super()._format_usage(usage, actions, groups, prefix) diff --git a/src/metatensor/models/cli/train_model.py b/src/metatensor/models/cli/train_model.py index 810e5f330..21ddc12d5 100644 --- a/src/metatensor/models/cli/train_model.py +++ b/src/metatensor/models/cli/train_model.py @@ -11,6 +11,7 @@ from .. import CONFIG_PATH from ..utils.model_io import save_model +from .formatter import CustomHelpFormatter logger = logging.getLogger(__name__) @@ -20,9 +21,7 @@ def _has_yaml_suffix(s: str) -> str: """Checks if a string has a .yaml suffix.""" if Path(s).suffix != ".yaml": - raise argparse.ArgumentTypeError( - f"Parameters file '{s}' must be a `.yaml` file." - ) + raise argparse.ArgumentTypeError(f"Options file '{s}' must be a `.yaml` file.") return s @@ -34,33 +33,30 @@ def _add_train_model_parser(subparser: argparse._SubParsersAction) -> None: be parsed by the hydra CLI.""" if train_model.__doc__ is not None: - description = train_model.__doc__.split(r"\n:param")[0] + description = train_model.__doc__.split(r":param")[0] else: description = None parser = subparser.add_parser( "train", description=description, - formatter_class=argparse.ArgumentDefaultsHelpFormatter, + formatter_class=CustomHelpFormatter, ) parser.set_defaults(callable="train_model") parser.add_argument( - "-p", - "--parameters", - dest="parameters_path", + "options", type=_has_yaml_suffix, - required=True, - help="Path to the parameter file", + help="Options file", ) parser.add_argument( "-o", "--output", - dest="output_path", + dest="output", type=str, required=False, default="model.pt", - help="Path to save the final model.", + help="Path to save the final model (default: %(default)s).", ) parser.add_argument( "-y", @@ -73,7 +69,7 @@ def _add_train_model_parser(subparser: argparse._SubParsersAction) -> None: @hydra.main(config_path=str(CONFIG_PATH), config_name="config", version_base=None) -def train_model(config: DictConfig) -> None: +def train_model(options: DictConfig) -> None: """Train an atomistic machine learning model using configurations provided by Hydra. This function sets up the dataset and model architecture, then runs the training @@ -87,34 +83,35 @@ def train_model(config: DictConfig) -> None: https://hydra.cc/docs/advanced/hydra-command-line-flags/ and https://hydra.cc/docs/advanced/override_grammar/basic/ for details. - :param config: A dictionary-like object obtained from Hydra, containing all the - necessary parameters for dataset preparation, model instantiation, and training. + :param options: A dictionary-like object obtained from Hydra, containing all the + necessary options for dataset preparation, model hyperparameters, and training. """ logger.info("Setting up dataset") - structures = read_structures(config["dataset"]["structure_path"]) + structures = read_structures(options["dataset"]["structure_path"]) targets = read_targets( - config["dataset"]["targets_path"], - target_values=config["dataset"]["target_value"], + options["dataset"]["targets_path"], + target_values=options["dataset"]["target_value"], ) dataset = Dataset(structures, targets) logger.info("Setting up model") - architetcure_name = config["architecture"]["name"] + architetcure_name = options["architecture"]["name"] architecture = importlib.import_module(f"metatensor.models.{architetcure_name}") model = architecture.Model( all_species=dataset.all_species, - hypers=OmegaConf.to_container(config["architecture"]["model"]), + hypers=OmegaConf.to_container(options["architecture"]["model"]), ) logger.info("Run training") output_dir = hydra.core.hydra_config.HydraConfig.get().runtime.output_dir + print(OmegaConf.to_container(options)) model = architecture.train( model=model, train_dataset=dataset, - hypers=OmegaConf.to_container(config["architecture"]["training"]), + hypers=OmegaConf.to_container(options["architecture"]["training"]), output_dir=output_dir, ) - save_model(model, config["output_path"]) + save_model(model, options["output_path"]) diff --git a/tests/cli/test_eval_model.py b/tests/cli/test_eval_model.py index 6356199c9..183ab24f3 100644 --- a/tests/cli/test_eval_model.py +++ b/tests/cli/test_eval_model.py @@ -16,14 +16,7 @@ def test_eval(output, monkeypatch, tmp_path): shutil.copy(RESOURCES_PATH / "qm9_reduced_100.xyz", "qm9_reduced_100.xyz") shutil.copy(RESOURCES_PATH / "bpnn-model.pt", "bpnn-model.pt") - command = [ - "metatensor-models", - "eval", - "-m", - "bpnn-model.pt", - "-s", - "qm9_reduced_100.xyz", - ] + command = ["metatensor-models", "eval", "bpnn-model.pt", "qm9_reduced_100.xyz"] if output is not None: command += ["-o", output] diff --git a/tests/cli/test_formatter.py b/tests/cli/test_formatter.py new file mode 100644 index 000000000..4c6836102 --- /dev/null +++ b/tests/cli/test_formatter.py @@ -0,0 +1,20 @@ +import argparse + +from metatensor.models.cli.formatter import CustomHelpFormatter + + +def test_formatter(capsys): + """Test that positonal arguments are displayed before optional in usage.""" + parser = argparse.ArgumentParser(prog="myprog", formatter_class=CustomHelpFormatter) + parser.add_argument("required_input") + parser.add_argument("required_input2") + parser.add_argument("-f", "--foo", help="optional argument") + parser.add_argument("-b", "--bar", help="optional argument 2") + + parser.print_help() + + captured = capsys.readouterr() + assert ( + "usage: myprog required_input required_input2 [-h] [-f FOO] [-b BAR]" + in captured.out + ) diff --git a/tests/cli/test_train_model.py b/tests/cli/test_train_model.py index 8b078bedb..627476a31 100644 --- a/tests/cli/test_train_model.py +++ b/tests/cli/test_train_model.py @@ -2,19 +2,47 @@ import subprocess from pathlib import Path +import pytest + RESOURCES_PATH = Path(__file__).parent.resolve() / ".." / "resources" -def test_train(monkeypatch, tmp_path): +@pytest.mark.parametrize("output", [None, "mymodel.pt"]) +def test_train(monkeypatch, tmp_path, output): """Test that training via the training cli runs without an error raise.""" monkeypatch.chdir(tmp_path) shutil.copy(RESOURCES_PATH / "qm9_reduced_100.xyz", "qm9_reduced_100.xyz") - shutil.copy(RESOURCES_PATH / "parameters.yaml", "parameters.yaml") - subprocess.check_call( - [ - "metatensor-models", - "train", - "--parameters=parameters.yaml", - ] + shutil.copy(RESOURCES_PATH / "options.yaml", "options.yaml") + + command = ["metatensor-models", "train", "options.yaml"] + + if output is not None: + command += ["-o", output] + else: + output = "model.pt" + + subprocess.check_call(command) + assert Path(output).is_file() + + +def test_yml_error(): + """Test error raise of the option file is not a .yaml file.""" + try: + subprocess.check_output( + ["metatensor-models", "train", "options.yml"], stderr=subprocess.STDOUT + ) + except subprocess.CalledProcessError as captured: + assert "Options file 'options.yml' must be a `.yaml` file." in str( + captured.output + ) + + +def test_hydra_arguments(): + """Test if hydra arguments work.""" + option_path = str(RESOURCES_PATH / "options.yaml") + out = subprocess.check_output( + ["metatensor-models", "train", option_path, "--hydra=--help"] ) + # Check that num_epochs is override is succesful + assert "num_epochs: 1" in str(out) diff --git a/tests/resources/parameters.yaml b/tests/resources/options.yaml similarity index 78% rename from tests/resources/parameters.yaml rename to tests/resources/options.yaml index 17d7274f6..1563cd69d 100644 --- a/tests/resources/parameters.yaml +++ b/tests/resources/options.yaml @@ -1,10 +1,10 @@ defaults: - - _self_ # mandatory parameter to avoid hydra warnings - architecture: soap_bpnn # architecture used to train the model -training: - batch_size: 8 - num_epochs: 1 +architecture: + training: + batch_size: 2 + num_epochs: 1 # Section defining the parameters for structure and target data dataset: