From fa877afa838579ec75117cdefd3fc757f8ea8651 Mon Sep 17 00:00:00 2001
From: Philip Loche <ploche@physik.fu-berlin.de>
Date: Mon, 18 Dec 2023 13:11:40 +0100
Subject: [PATCH] Change CLI API

---
 .gitignore                                    |  3 +-
 docs/static/{parameters.yaml => options.yaml} |  1 -
 examples/{parameters.yaml => options.yaml}    |  0
 examples/usage.sh                             |  7 +--
 src/metatensor/models/__main__.py             |  8 ++--
 src/metatensor/models/cli/eval_model.py       | 38 +++++++---------
 src/metatensor/models/cli/export_model.py     | 17 +++----
 src/metatensor/models/cli/train_model.py      | 34 +++++++-------
 tests/cli/test_eval_model.py                  |  9 +---
 tests/cli/test_train_model.py                 | 44 +++++++++++++++----
 .../{parameters.yaml => options.yaml}         |  8 ++--
 11 files changed, 88 insertions(+), 81 deletions(-)
 rename docs/static/{parameters.yaml => options.yaml} (86%)
 rename examples/{parameters.yaml => options.yaml} (100%)
 rename tests/resources/{parameters.yaml => options.yaml} (78%)

diff --git a/.gitignore b/.gitignore
index 4a4923fbe..c61424005 100644
--- a/.gitignore
+++ b/.gitignore
@@ -159,8 +159,9 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
-# Models
+# Don't save model outputs
 *.pt
+!tests/resources/*.pt
 
 # model output directories
 outputs/
diff --git a/docs/static/parameters.yaml b/docs/static/options.yaml
similarity index 86%
rename from docs/static/parameters.yaml
rename to docs/static/options.yaml
index 8458de2b2..3331fbea7 100644
--- a/docs/static/parameters.yaml
+++ b/docs/static/options.yaml
@@ -1,5 +1,4 @@
 defaults:
-  - _self_  # mandatory parameter to avoid hydra warnings
   - architecture: soap_bpnn  # architecture used to train the model
 
 # Section defining the parameters for structure and target data
diff --git a/examples/parameters.yaml b/examples/options.yaml
similarity index 100%
rename from examples/parameters.yaml
rename to examples/options.yaml
diff --git a/examples/usage.sh b/examples/usage.sh
index 33fdb4e55..279d1bd23 100644
--- a/examples/usage.sh
+++ b/examples/usage.sh
@@ -1,15 +1,16 @@
 #!\bin\bash
 
-metatensor-models train --parameters=parameters.yaml
+metatensor-models train options.yaml
 
 # The functions saves the final model `model.pt` to the current output folder for later
 # evaluation. All command line flags of the train sub-command can be listed via
 
 metatensor-models train --help
 
-# We now evaluate the model on the training dataset
+# We now evaluate the model on the training dataset, where the first arguments specifies
+# the model and the second the structure file
 
-metatensor-models eval --model=model.pt --structures=qm9_reduced_100.xyz
+metatensor-models eval model.pt qm9_reduced_100.xyz
 
 # The evaluation command predicts the property the model was trained against; here "U0".
 # The predictions together with the structures have been written in a file named
diff --git a/src/metatensor/models/__main__.py b/src/metatensor/models/__main__.py
index cb0a0f4ad..8df0f8b78 100644
--- a/src/metatensor/models/__main__.py
+++ b/src/metatensor/models/__main__.py
@@ -43,10 +43,10 @@ def main():
         # override `sys.argv` to be compatible with our CLI architecture.
         argv = sys.argv[:1]
 
-        parameters_path = Path(args.parameters_path)
-        argv.append(f"--config-dir={parameters_path.parent}")
-        argv.append(f"--config-name={parameters_path.name}")
-        argv.append(f"+output_path={args.output_path}")
+        options = Path(args.options)
+        argv.append(f"--config-dir={options.parent}")
+        argv.append(f"--config-name={options.name}")
+        argv.append(f"+output_path={args.output}")
 
         if args.hydra_paramters is not None:
             argv += args.hydra_paramters
diff --git a/src/metatensor/models/cli/eval_model.py b/src/metatensor/models/cli/eval_model.py
index b247d0dbb..43d4436f2 100644
--- a/src/metatensor/models/cli/eval_model.py
+++ b/src/metatensor/models/cli/eval_model.py
@@ -21,47 +21,39 @@ def _add_eval_model_parser(subparser: argparse._SubParsersAction) -> None:
     parser.set_defaults(callable="eval_model")
 
     parser.add_argument(
-        "-m",
-        "--model",
-        dest="model_path",
+        "model",
         type=str,
-        required=True,
-        help="Path to a saved model",
+        help="saved model to be evaluated",
     )
     parser.add_argument(
-        "-s",
-        "--structures",
-        dest="structure_path",
+        "structures",
         type=str,
-        required=True,
-        help="Path to a structure file which should be considered for the evaluation.",
+        help="Structure file which should be considered for the evaluation.",
     )
     parser.add_argument(
         "-o",
         "--output",
-        dest="output_path",
+        dest="output",
         type=str,
         required=False,
         default="output.xyz",
-        help="Path to save the predicted values.",
+        help="filenmae of the predictions",
     )
 
 
-def eval_model(
-    model_path: str, structure_path: str, output_path: str = "output.xyz"
-) -> None:
+def eval_model(model: str, structures: str, output: str = "output.xyz") -> None:
     """Evaluate a pretrained model.
 
     ``target_property`` wil be predicted on a provided set of structures. Predicted
-    values will be written ``output_path``.
+    values will be written ``output``.
 
-    :param model_path: Path to a saved model
-    :param structure_path: Path to a structure file which should be considered for the
+    :param model: Path to a saved model
+    :param structure: Path to a structure file which should be considered for the
         evaluation.
-    :param output_path: Path to save the predicted values
+    :param output: Path to save the predicted values
     """
 
-    model = load_model(model_path)
-    structures = read_structures(structure_path)
-    predictions = model(structures)
-    write_predictions(output_path, predictions, structures)
+    loaded_model = load_model(model)
+    structure_list = read_structures(structures)
+    predictions = loaded_model(structure_list)
+    write_predictions(output, predictions, structure_list)
diff --git a/src/metatensor/models/cli/export_model.py b/src/metatensor/models/cli/export_model.py
index 0ba8ee35d..8b96390d9 100644
--- a/src/metatensor/models/cli/export_model.py
+++ b/src/metatensor/models/cli/export_model.py
@@ -15,28 +15,25 @@ def _add_export_model_parser(subparser: argparse._SubParsersAction) -> None:
     parser.set_defaults(callable="export_model")
 
     parser.add_argument(
-        "-m",
-        "--model",
-        dest="model_path",
+        "model",
         type=str,
-        required=True,
-        help="Path to a saved model",
+        help="Saved model which should be exprted",
     )
     parser.add_argument(
         "-o",
         "--output",
-        dest="output_path",
+        dest="output",
         type=str,
         required=False,
         default="exported.pt",
-        help="Export path for the model.",
+        help="Filename of the exported model.",
     )
 
 
-def export_model(model_path: str, output_path: str) -> None:
+def export_model(model: str, output: str) -> None:
     """Export a pretrained model to run MD simulations
 
-    :param model_path: Path to a saved model
-    :param output_path: Path to save the exported model
+    :param model: Path to a saved model
+    :param output: Path to save the exported model
     """
     raise NotImplementedError("model exporting is not implemented yet.")
diff --git a/src/metatensor/models/cli/train_model.py b/src/metatensor/models/cli/train_model.py
index 810e5f330..25c582af4 100644
--- a/src/metatensor/models/cli/train_model.py
+++ b/src/metatensor/models/cli/train_model.py
@@ -20,9 +20,7 @@ def _has_yaml_suffix(s: str) -> str:
     """Checks if a string has a .yaml suffix."""
 
     if Path(s).suffix != ".yaml":
-        raise argparse.ArgumentTypeError(
-            f"Parameters file '{s}' must be a `.yaml` file."
-        )
+        raise argparse.ArgumentTypeError(f"Options file '{s}' must be a `.yaml` file.")
 
     return s
 
@@ -46,17 +44,14 @@ def _add_train_model_parser(subparser: argparse._SubParsersAction) -> None:
     parser.set_defaults(callable="train_model")
 
     parser.add_argument(
-        "-p",
-        "--parameters",
-        dest="parameters_path",
+        "options",
         type=_has_yaml_suffix,
-        required=True,
-        help="Path to the parameter file",
+        help="Options file",
     )
     parser.add_argument(
         "-o",
         "--output",
-        dest="output_path",
+        dest="output",
         type=str,
         required=False,
         default="model.pt",
@@ -73,7 +68,7 @@ def _add_train_model_parser(subparser: argparse._SubParsersAction) -> None:
 
 
 @hydra.main(config_path=str(CONFIG_PATH), config_name="config", version_base=None)
-def train_model(config: DictConfig) -> None:
+def train_model(options: DictConfig) -> None:
     """Train an atomistic machine learning model using configurations provided by Hydra.
 
     This function sets up the dataset and model architecture, then runs the training
@@ -87,34 +82,35 @@ def train_model(config: DictConfig) -> None:
     https://hydra.cc/docs/advanced/hydra-command-line-flags/ and
     https://hydra.cc/docs/advanced/override_grammar/basic/ for details.
 
-    :param config: A dictionary-like object obtained from Hydra, containing all the
-        necessary parameters for dataset preparation, model instantiation, and training.
+    :param options: A dictionary-like object obtained from Hydra, containing all the
+        necessary options for dataset preparation, model hyperparameters, and training.
     """
 
     logger.info("Setting up dataset")
-    structures = read_structures(config["dataset"]["structure_path"])
+    structures = read_structures(options["dataset"]["structure_path"])
     targets = read_targets(
-        config["dataset"]["targets_path"],
-        target_values=config["dataset"]["target_value"],
+        options["dataset"]["targets_path"],
+        target_values=options["dataset"]["target_value"],
     )
     dataset = Dataset(structures, targets)
 
     logger.info("Setting up model")
-    architetcure_name = config["architecture"]["name"]
+    architetcure_name = options["architecture"]["name"]
     architecture = importlib.import_module(f"metatensor.models.{architetcure_name}")
     model = architecture.Model(
         all_species=dataset.all_species,
-        hypers=OmegaConf.to_container(config["architecture"]["model"]),
+        hypers=OmegaConf.to_container(options["architecture"]["model"]),
     )
 
     logger.info("Run training")
     output_dir = hydra.core.hydra_config.HydraConfig.get().runtime.output_dir
 
+    print(OmegaConf.to_container(options))
     model = architecture.train(
         model=model,
         train_dataset=dataset,
-        hypers=OmegaConf.to_container(config["architecture"]["training"]),
+        hypers=OmegaConf.to_container(options["architecture"]["training"]),
         output_dir=output_dir,
     )
 
-    save_model(model, config["output_path"])
+    save_model(model, options["output_path"])
diff --git a/tests/cli/test_eval_model.py b/tests/cli/test_eval_model.py
index 6356199c9..183ab24f3 100644
--- a/tests/cli/test_eval_model.py
+++ b/tests/cli/test_eval_model.py
@@ -16,14 +16,7 @@ def test_eval(output, monkeypatch, tmp_path):
     shutil.copy(RESOURCES_PATH / "qm9_reduced_100.xyz", "qm9_reduced_100.xyz")
     shutil.copy(RESOURCES_PATH / "bpnn-model.pt", "bpnn-model.pt")
 
-    command = [
-        "metatensor-models",
-        "eval",
-        "-m",
-        "bpnn-model.pt",
-        "-s",
-        "qm9_reduced_100.xyz",
-    ]
+    command = ["metatensor-models", "eval", "bpnn-model.pt", "qm9_reduced_100.xyz"]
 
     if output is not None:
         command += ["-o", output]
diff --git a/tests/cli/test_train_model.py b/tests/cli/test_train_model.py
index 8b078bedb..627476a31 100644
--- a/tests/cli/test_train_model.py
+++ b/tests/cli/test_train_model.py
@@ -2,19 +2,47 @@
 import subprocess
 from pathlib import Path
 
+import pytest
+
 
 RESOURCES_PATH = Path(__file__).parent.resolve() / ".." / "resources"
 
 
-def test_train(monkeypatch, tmp_path):
+@pytest.mark.parametrize("output", [None, "mymodel.pt"])
+def test_train(monkeypatch, tmp_path, output):
     """Test that training via the training cli runs without an error raise."""
     monkeypatch.chdir(tmp_path)
     shutil.copy(RESOURCES_PATH / "qm9_reduced_100.xyz", "qm9_reduced_100.xyz")
-    shutil.copy(RESOURCES_PATH / "parameters.yaml", "parameters.yaml")
-    subprocess.check_call(
-        [
-            "metatensor-models",
-            "train",
-            "--parameters=parameters.yaml",
-        ]
+    shutil.copy(RESOURCES_PATH / "options.yaml", "options.yaml")
+
+    command = ["metatensor-models", "train", "options.yaml"]
+
+    if output is not None:
+        command += ["-o", output]
+    else:
+        output = "model.pt"
+
+    subprocess.check_call(command)
+    assert Path(output).is_file()
+
+
+def test_yml_error():
+    """Test error raise of the option file is not a .yaml file."""
+    try:
+        subprocess.check_output(
+            ["metatensor-models", "train", "options.yml"], stderr=subprocess.STDOUT
+        )
+    except subprocess.CalledProcessError as captured:
+        assert "Options file 'options.yml' must be a `.yaml` file." in str(
+            captured.output
+        )
+
+
+def test_hydra_arguments():
+    """Test if hydra arguments work."""
+    option_path = str(RESOURCES_PATH / "options.yaml")
+    out = subprocess.check_output(
+        ["metatensor-models", "train", option_path, "--hydra=--help"]
     )
+    # Check that num_epochs is override is succesful
+    assert "num_epochs: 1" in str(out)
diff --git a/tests/resources/parameters.yaml b/tests/resources/options.yaml
similarity index 78%
rename from tests/resources/parameters.yaml
rename to tests/resources/options.yaml
index 17d7274f6..1563cd69d 100644
--- a/tests/resources/parameters.yaml
+++ b/tests/resources/options.yaml
@@ -1,10 +1,10 @@
 defaults:
-  - _self_  # mandatory parameter to avoid hydra warnings
   - architecture: soap_bpnn  # architecture used to train the model
 
-training:
-    batch_size: 8
-    num_epochs: 1
+architecture:
+  training:
+      batch_size: 2
+      num_epochs: 1
 
 # Section defining the parameters for structure and target data
 dataset: