Change CLI API (#24)

metatensor · Jan 3, 2024 · 1a98445 · 1a98445
1 parent dc2299b
commit 1a98445
Show file tree

Hide file tree

Showing 14 changed files with 159 additions and 103 deletions.
diff --git a/.gitignore b/.gitignore
@@ -159,8 +159,9 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
-# Models
+# Don't save model outputs
 *.pt
+!tests/resources/*.pt
 
 # model output directories
 outputs/

diff --git a/docs/src/getting-started/usage.rst b/docs/src/getting-started/usage.rst
@@ -1,5 +1,5 @@
-Usage - command line
-====================
+Usage
+=====
 
 `metatensor-models` is designed for an direct usage from the the command line (cli). The
 general help of `metatensor-models` can be accessed using
@@ -31,24 +31,23 @@ The sub-command to start a model training is
 
     metatensor-models train
 
-To train a model you have to define your parameters. This includes the specific
-architecture you want to use, the files for obtaining training structures and target
-values as well as probable changes
+To train a model you have to define your options. This includes the specific
+architecture you want to use and the data including the training structures and target
+values
 
 The default model and training hyperparameter for each model are listed in their
-corresponding documentation page. We will use the following config to run an example
-training
+corresponding documentation page. We will use these minimal options to run an example
+training using the default hyperparameters of an SOAP BPNN model
 
-
-.. literalinclude:: ../../static/parameters.yaml
+.. literalinclude:: ../../static/options.yaml
    :language: yaml
 
-For each training run a new output directory is based on the current date and time is
-created. By default, this output directory is used to store Hydra output for the run
-(Configuration, Logs etc). You can `override
+For each training run a new output directory based on the current date and time is
+created. By default, this output directory is used to store Hydra's output for the run
+(configuration, Logs etc). You can `override
 <https://hydra.cc/docs/tutorials/basic/running_your_app/working_directory/>`_ this
-behavior in the parameter file. To start the training using the ``parameters.yaml`` in
-the current directory type.
+behavior in the options file. To start the training create an ``options.yaml`` file in
+the current directory and type
 
 .. literalinclude:: ../../../examples/usage.sh
     :language: bash

diff --git a/docs/static/parameters.yaml → docs/static/options.yaml b/docs/static/parameters.yaml → docs/static/options.yaml
@@ -1,5 +1,4 @@
 defaults:
-  - _self_  # mandatory parameter to avoid hydra warnings
   - architecture: soap_bpnn  # architecture used to train the model
 
 # Section defining the parameters for structure and target data

diff --git a/examples/parameters.yaml → examples/options.yaml b/examples/parameters.yaml → examples/options.yaml
diff --git a/examples/usage.sh b/examples/usage.sh
@@ -1,15 +1,16 @@
 #!\bin\bash
 
-metatensor-models train --parameters=parameters.yaml
+metatensor-models train options.yaml
 
 # The functions saves the final model `model.pt` to the current output folder for later
 # evaluation. All command line flags of the train sub-command can be listed via
 
 metatensor-models train --help
 
-# We now evaluate the model on the training dataset
+# We now evaluate the model on the training dataset, where the first arguments specifies
+# the model and the second the structure file
 
-metatensor-models eval --model=model.pt --structures=qm9_reduced_100.xyz
+metatensor-models eval model.pt qm9_reduced_100.xyz
 
 # The evaluation command predicts the property the model was trained against; here "U0".
 # The predictions together with the structures have been written in a file named

diff --git a/src/metatensor/models/__main__.py b/src/metatensor/models/__main__.py
@@ -13,7 +13,7 @@
 def main():
     ap = argparse.ArgumentParser(
         description=__doc__,
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        formatter_class=argparse.RawTextHelpFormatter,
     )
 
     ap.add_argument(
@@ -43,10 +43,10 @@ def main():
         # override `sys.argv` to be compatible with our CLI architecture.
         argv = sys.argv[:1]
 
-        parameters_path = Path(args.parameters_path)
-        argv.append(f"--config-dir={parameters_path.parent}")
-        argv.append(f"--config-name={parameters_path.name}")
-        argv.append(f"+output_path={args.output_path}")
+        options = Path(args.options)
+        argv.append(f"--config-dir={options.parent}")
+        argv.append(f"--config-name={options.name}")
+        argv.append(f"+output_path={args.output}")
 
         if args.hydra_paramters is not None:
             argv += args.hydra_paramters

diff --git a/src/metatensor/models/cli/eval_model.py b/src/metatensor/models/cli/eval_model.py
@@ -3,65 +3,58 @@
 from ..utils.data.readers import read_structures
 from ..utils.data.writers import write_predictions
 from ..utils.model_io import load_model
+from .formatter import CustomHelpFormatter
 
 
 def _add_eval_model_parser(subparser: argparse._SubParsersAction) -> None:
     """Add the `eval_model` paramaters to an argparse (sub)-parser"""
 
     if eval_model.__doc__ is not None:
-        description = eval_model.__doc__.split(r"\n:param")[0]
+        description = eval_model.__doc__.split(r":param")[0]
     else:
         description = None
 
     parser = subparser.add_parser(
         "eval",
         description=description,
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        formatter_class=CustomHelpFormatter,
     )
     parser.set_defaults(callable="eval_model")
 
     parser.add_argument(
-        "-m",
-        "--model",
-        dest="model_path",
+        "model",
         type=str,
-        required=True,
-        help="Path to a saved model",
+        help="saved model to be evaluated",
     )
     parser.add_argument(
-        "-s",
-        "--structures",
-        dest="structure_path",
+        "structures",
         type=str,
-        required=True,
-        help="Path to a structure file which should be considered for the evaluation.",
+        help="Structure file which should be considered for the evaluation.",
     )
     parser.add_argument(
         "-o",
         "--output",
-        dest="output_path",
+        dest="output",
         type=str,
         required=False,
         default="output.xyz",
-        help="Path to save the predicted values.",
+        help="filenmae of the predictions (default: %(default)s)",
     )
 
 
-def eval_model(
-    model_path: str, structure_path: str, output_path: str = "output.xyz"
-) -> None:
+def eval_model(model: str, structures: str, output: str = "output.xyz") -> None:
     """Evaluate a pretrained model.
 
     ``target_property`` wil be predicted on a provided set of structures. Predicted
-    values will be written ``output_path``.
+    values will be written ``output``.
 
-    :param model_path: Path to a saved model
-    :param structure_path: Path to a structure file which should be considered for the
+    :param model: Path to a saved model
+    :param structure: Path to a structure file which should be considered for the
         evaluation.
-    :param output_path: Path to save the predicted values
+    :param output: Path to save the predicted values
     """
 
-    model = load_model(model_path)
-    structures = read_structures(structure_path)
-    predictions = model(structures)
-    write_predictions(output_path, predictions, structures)
+    loaded_model = load_model(model)
+    structure_list = read_structures(structures)
+    predictions = loaded_model(structure_list)
+    write_predictions(output, predictions, structure_list)
diff --git a/src/metatensor/models/cli/export_model.py b/src/metatensor/models/cli/export_model.py
@@ -1,42 +1,41 @@
 import argparse
 
+from .formatter import CustomHelpFormatter
+
 
 def _add_export_model_parser(subparser: argparse._SubParsersAction) -> None:
     if export_model.__doc__ is not None:
-        description = export_model.__doc__.split(r":param")[0]
+        description = export_model.__doc__.split(":param")[0]
     else:
         description = None
 
     parser = subparser.add_parser(
         "export",
         description=description,
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        formatter_class=CustomHelpFormatter,
     )
     parser.set_defaults(callable="export_model")
 
     parser.add_argument(
-        "-m",
-        "--model",
-        dest="model_path",
+        "model",
         type=str,
-        required=True,
-        help="Path to a saved model",
+        help="Saved model which should be exprted",
     )
     parser.add_argument(
         "-o",
         "--output",
-        dest="output_path",
+        dest="output",
         type=str,
         required=False,
         default="exported.pt",
-        help="Export path for the model.",
+        help="Filename of the exported model (default: %(default)s).",
     )
 
 
-def export_model(model_path: str, output_path: str) -> None:
+def export_model(model: str, output: str) -> None:
     """Export a pretrained model to run MD simulations
 
-    :param model_path: Path to a saved model
-    :param output_path: Path to save the exported model
+    :param model: Path to a saved model
+    :param output: Path to save the exported model
     """
     raise NotImplementedError("model exporting is not implemented yet.")
diff --git a/src/metatensor/models/cli/formatter.py b/src/metatensor/models/cli/formatter.py
@@ -0,0 +1,26 @@
+import argparse
+
+
+class CustomHelpFormatter(argparse.RawDescriptionHelpFormatter):
+    """Descriptions formatter showing positional arguments before optionals."""
+
+    def _format_usage(self, usage, actions, groups, prefix):
+        if usage is None:
+            # split optionals from positionals
+            optionals = []
+            positionals = []
+            for action in actions:
+                if action.option_strings:
+                    optionals.append(action)
+                else:
+                    positionals.append(action)
+
+            prog = "%(prog)s" % dict(prog=self._prog)
+
+            # build full usage string
+            format = self._format_actions_usage
+            action_usage = format(positionals + optionals, groups)
+            usage = " ".join([s for s in [prog, action_usage] if s])
+
+        # Call the superclass method to format the usage
+        return super()._format_usage(usage, actions, groups, prefix)
diff --git a/src/metatensor/models/cli/train_model.py b/src/metatensor/models/cli/train_model.py
@@ -11,6 +11,7 @@
 
 from .. import CONFIG_PATH
 from ..utils.model_io import save_model
+from .formatter import CustomHelpFormatter
 
 
 logger = logging.getLogger(__name__)
@@ -20,9 +21,7 @@ def _has_yaml_suffix(s: str) -> str:
     """Checks if a string has a .yaml suffix."""
 
     if Path(s).suffix != ".yaml":
-        raise argparse.ArgumentTypeError(
-            f"Parameters file '{s}' must be a `.yaml` file."
-        )
+        raise argparse.ArgumentTypeError(f"Options file '{s}' must be a `.yaml` file.")
 
     return s
 
@@ -34,33 +33,30 @@ def _add_train_model_parser(subparser: argparse._SubParsersAction) -> None:
     be parsed by the hydra CLI."""
 
     if train_model.__doc__ is not None:
-        description = train_model.__doc__.split(r"\n:param")[0]
+        description = train_model.__doc__.split(r":param")[0]
     else:
         description = None
 
     parser = subparser.add_parser(
         "train",
         description=description,
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        formatter_class=CustomHelpFormatter,
     )
     parser.set_defaults(callable="train_model")
 
     parser.add_argument(
-        "-p",
-        "--parameters",
-        dest="parameters_path",
+        "options",
         type=_has_yaml_suffix,
-        required=True,
-        help="Path to the parameter file",
+        help="Options file",
     )
     parser.add_argument(
         "-o",
         "--output",
-        dest="output_path",
+        dest="output",
         type=str,
         required=False,
         default="model.pt",
-        help="Path to save the final model.",
+        help="Path to save the final model (default: %(default)s).",
     )
     parser.add_argument(
         "-y",
@@ -73,7 +69,7 @@ def _add_train_model_parser(subparser: argparse._SubParsersAction) -> None:
 
 
 @hydra.main(config_path=str(CONFIG_PATH), config_name="config", version_base=None)
-def train_model(config: DictConfig) -> None:
+def train_model(options: DictConfig) -> None:
     """Train an atomistic machine learning model using configurations provided by Hydra.
 
     This function sets up the dataset and model architecture, then runs the training
@@ -87,34 +83,35 @@ def train_model(config: DictConfig) -> None:
     https://hydra.cc/docs/advanced/hydra-command-line-flags/ and
     https://hydra.cc/docs/advanced/override_grammar/basic/ for details.
 
-    :param config: A dictionary-like object obtained from Hydra, containing all the
-        necessary parameters for dataset preparation, model instantiation, and training.
+    :param options: A dictionary-like object obtained from Hydra, containing all the
+        necessary options for dataset preparation, model hyperparameters, and training.
     """
 
     logger.info("Setting up dataset")
-    structures = read_structures(config["dataset"]["structure_path"])
+    structures = read_structures(options["dataset"]["structure_path"])
     targets = read_targets(
-        config["dataset"]["targets_path"],
-        target_values=config["dataset"]["target_value"],
+        options["dataset"]["targets_path"],
+        target_values=options["dataset"]["target_value"],
     )
     dataset = Dataset(structures, targets)
 
     logger.info("Setting up model")
-    architetcure_name = config["architecture"]["name"]
+    architetcure_name = options["architecture"]["name"]
     architecture = importlib.import_module(f"metatensor.models.{architetcure_name}")
     model = architecture.Model(
         all_species=dataset.all_species,
-        hypers=OmegaConf.to_container(config["architecture"]["model"]),
+        hypers=OmegaConf.to_container(options["architecture"]["model"]),
     )
 
     logger.info("Run training")
     output_dir = hydra.core.hydra_config.HydraConfig.get().runtime.output_dir
 
+    print(OmegaConf.to_container(options))
     model = architecture.train(
         model=model,
         train_dataset=dataset,
-        hypers=OmegaConf.to_container(config["architecture"]["training"]),
+        hypers=OmegaConf.to_container(options["architecture"]["training"]),
         output_dir=output_dir,
     )
 
-    save_model(model, config["output_path"])
+    save_model(model, options["output_path"])
diff --git a/tests/cli/test_eval_model.py b/tests/cli/test_eval_model.py
@@ -16,14 +16,7 @@ def test_eval(output, monkeypatch, tmp_path):
     shutil.copy(RESOURCES_PATH / "qm9_reduced_100.xyz", "qm9_reduced_100.xyz")
     shutil.copy(RESOURCES_PATH / "bpnn-model.pt", "bpnn-model.pt")
 
-    command = [
-        "metatensor-models",
-        "eval",
-        "-m",
-        "bpnn-model.pt",
-        "-s",
-        "qm9_reduced_100.xyz",
-    ]
+    command = ["metatensor-models", "eval", "bpnn-model.pt", "qm9_reduced_100.xyz"]
 
     if output is not None:
         command += ["-o", output]