diff --git a/docs/src/architectures/alchemical-model.rst b/docs/src/architectures/alchemical-model.rst
index 8576db608..bfdd55013 100644
--- a/docs/src/architectures/alchemical-model.rst
+++ b/docs/src/architectures/alchemical-model.rst
@@ -59,11 +59,18 @@ hyperparameters to tune are (in decreasing order of importance):
   This hyperparameter controls the size and depth of the descriptors and the neural
   network. In general, increasing this might lead to better accuracy,
   especially on larger datasets, at the cost of increased training and evaluation time.
-- ``loss_weights``: This controls the weighting of different contributions to the loss
-  (e.g., energy, forces, virial, etc.). The default values work well for most datasets,
-  but they might need to be adjusted. For example, to set a weight of 1.0 for the energy
-  and 0.1 for the forces, you can set the following in the ``options.yaml`` file:
-  ``loss_weights: {"energy": 1.0, "forces": 0.1}``.
+- ``loss``: This section describes the loss function to be used, and it has three
+  subsections. 1. ``weights``. This controls the weighting of different contributions
+  to the loss (e.g., energy, forces, virial, etc.). The default values work well for
+  most datasets, but they might need to be adjusted. For example, to set a weight of
+  1.0 for the energy and 0.1 for the forces, you can set the following in the
+  ``options.yaml`` file under ``loss``: ``weights: {"energy": 1.0, "forces": 0.1}``. 2.
+  ``type``. This controls the type of loss to be used. The default value is ``mse``,
+  and other options are ``mae`` and ``huber``. ``huber`` is a subsection of its own,
+  and it requires the user to specify the ``deltas`` parameters in a similar way to how
+  the ``weights`` are specified (e.g., ``deltas: {"energy": 0.1, "forces": 0.01}``). 3.
+  ``reduction``. This controls how the loss is reduced over batches. The default value
+  is ``sum``, and the other allowed option is ``mean``.
 
 
 Architecture Hyperparameters
diff --git a/docs/src/architectures/soap-bpnn.rst b/docs/src/architectures/soap-bpnn.rst
index f0715081b..7ac009710 100644
--- a/docs/src/architectures/soap-bpnn.rst
+++ b/docs/src/architectures/soap-bpnn.rst
@@ -55,14 +55,21 @@ hyperparameters to tune are (in decreasing order of importance):
 - ``radial_scaling`` hyperparameters: These hyperparameters control the radial scaling
   of the SOAP descriptor. In general, the default values should work well, but they
   might need to be adjusted for specific datasets.
-- ``loss_weights``: This controls the weighting of different contributions to the loss
-  (e.g., energy, forces, virial, etc.). The default values work well for most datasets,
-  but they might need to be adjusted. For example, to set a weight of 1.0 for the energy
-  and 0.1 for the forces, you can set the following in the ``options.yaml`` file:
-  ``loss_weights: {"energy": 1.0, "forces": 0.1}``.
 - ``layernorm``: Whether to use layer normalization before the neural network. Setting
   this hyperparameter to ``false`` will lead to slower convergence of training, but
   might lead to better generalization outside of the training set distribution.
+- ``loss``: This section describes the loss function to be used, and it has three
+  subsections. 1. ``weights``. This controls the weighting of different contributions
+  to the loss (e.g., energy, forces, virial, etc.). The default values work well for
+  most datasets, but they might need to be adjusted. For example, to set a weight of
+  1.0 for the energy and 0.1 for the forces, you can set the following in the
+  ``options.yaml`` file under ``loss``: ``weights: {"energy": 1.0, "forces": 0.1}``. 2.
+  ``type``. This controls the type of loss to be used. The default value is ``mse``,
+  and other options are ``mae`` and ``huber``. ``huber`` is a subsection of its own,
+  and it requires the user to specify the ``deltas`` parameters in a similar way to how
+  the ``weights`` are specified (e.g., ``deltas: {"energy": 0.1, "forces": 0.01}``). 3.
+  ``reduction``. This controls how the loss is reduced over batches. The default value
+  is ``sum``, and the other allowed option is ``mean``.
 
 
 All Hyperparameters
diff --git a/src/metatrain/experimental/alchemical_model/default-hypers.yaml b/src/metatrain/experimental/alchemical_model/default-hypers.yaml
index 778510902..7bd78acbf 100644
--- a/src/metatrain/experimental/alchemical_model/default-hypers.yaml
+++ b/src/metatrain/experimental/alchemical_model/default-hypers.yaml
@@ -26,5 +26,8 @@ architecture:
     log_interval: 5
     checkpoint_interval: 25
     per_structure_targets: []
-    loss_weights: {}
     log_mae: False
+    loss:
+      weights: {}
+      type: mse
+      reduction: sum
diff --git a/src/metatrain/experimental/alchemical_model/trainer.py b/src/metatrain/experimental/alchemical_model/trainer.py
index 4127e7036..7f607898f 100644
--- a/src/metatrain/experimental/alchemical_model/trainer.py
+++ b/src/metatrain/experimental/alchemical_model/trainer.py
@@ -175,11 +175,11 @@ def train(
         loss_weights_dict = {}
         for output_name in outputs_list:
             loss_weights_dict[output_name] = (
-                self.hypers["loss_weights"][
+                self.hypers["loss"]["weights"][
                     to_external_name(output_name, model.outputs)
                 ]
                 if to_external_name(output_name, model.outputs)
-                in self.hypers["loss_weights"]
+                in self.hypers["loss"]["weights"]
                 else 1.0
             )
         loss_weights_dict_external = {
@@ -189,7 +189,11 @@ def train(
         logging.info(f"Training with loss weights: {loss_weights_dict_external}")
 
         # Create a loss function:
-        loss_fn = TensorMapDictLoss(loss_weights_dict)
+        loss_fn = TensorMapDictLoss(
+            loss_weights_dict,
+            reduction=self.hypers["loss"]["reduction"],
+            type=self.hypers["loss"]["type"],
+        )
 
         # Create an optimizer:
         optimizer = torch.optim.Adam(
diff --git a/src/metatrain/experimental/soap_bpnn/default-hypers.yaml b/src/metatrain/experimental/soap_bpnn/default-hypers.yaml
index 47a7c738c..ca95f5bc9 100644
--- a/src/metatrain/experimental/soap_bpnn/default-hypers.yaml
+++ b/src/metatrain/experimental/soap_bpnn/default-hypers.yaml
@@ -35,6 +35,8 @@ architecture:
     checkpoint_interval: 25
     fixed_composition_weights: {}
     per_structure_targets: []
-    loss_type: mse
-    loss_weights: {}
     log_mae: False
+    loss:
+      weights: {}
+      type: mse
+      reduction: sum
diff --git a/src/metatrain/experimental/soap_bpnn/schema-hypers.json b/src/metatrain/experimental/soap_bpnn/schema-hypers.json
index ecc7c36ef..cc7bdbb3b 100644
--- a/src/metatrain/experimental/soap_bpnn/schema-hypers.json
+++ b/src/metatrain/experimental/soap_bpnn/schema-hypers.json
@@ -141,20 +141,57 @@
             "type": "string"
           }
         },
-        "loss_type": {
-          "type": "string"
+        "log_mae": {
+          "type": "boolean"
         },
-        "loss_weights": {
+        "loss": {
           "type": "object",
-          "patternProperties": {
-            ".*": {
-              "type": "number"
+          "properties": {
+            "weights": {
+              "type": "object",
+              "patternProperties": {
+                ".*": {
+                  "type": "number"
+                }
+              },
+              "additionalProperties": false
+            },
+            "reduction": {
+              "type": "string",
+              "enum": ["sum", "mean", "none"]
+            },
+            "type": {
+              "oneOf": [
+                {
+                  "type": "string",
+                  "enum": ["mse", "mae"]
+                },
+                {
+                  "type": "object",
+                  "properties": {
+                    "huber": {
+                      "type": "object",
+                      "properties": {
+                        "deltas": {
+                          "type": "object",
+                          "patternProperties": {
+                            ".*": {
+                              "type": "number"
+                            }
+                          },
+                          "additionalProperties": false
+                        }
+                      },
+                      "required": ["deltas"],
+                      "additionalProperties": false
+                    }
+                  },
+                  "additionalProperties": false
+                }
+              ]
             }
           },
           "additionalProperties": false
-        },
-        "log_mae": {
-          "type": "boolean"
         }
       },
       "additionalProperties": false
diff --git a/src/metatrain/experimental/soap_bpnn/trainer.py b/src/metatrain/experimental/soap_bpnn/trainer.py
index f70818759..6c51267d4 100644
--- a/src/metatrain/experimental/soap_bpnn/trainer.py
+++ b/src/metatrain/experimental/soap_bpnn/trainer.py
@@ -191,11 +191,11 @@ def train(
         loss_weights_dict = {}
         for output_name in outputs_list:
             loss_weights_dict[output_name] = (
-                self.hypers["loss_weights"][
+                self.hypers["loss"]["weights"][
                     to_external_name(output_name, train_targets)
                 ]
                 if to_external_name(output_name, train_targets)
-                in self.hypers["loss_weights"]
+                in self.hypers["loss"]["weights"]
                 else 1.0
             )
         loss_weights_dict_external = {
@@ -205,7 +205,11 @@ def train(
         logging.info(f"Training with loss weights: {loss_weights_dict_external}")
 
         # Create a loss function:
-        loss_fn = TensorMapDictLoss(loss_weights_dict, type=self.hypers["loss_type"])
+        loss_fn = TensorMapDictLoss(
+            loss_weights_dict,
+            reduction=self.hypers["loss"]["reduction"],
+            type=self.hypers["loss"]["type"],
+        )
 
         # Create an optimizer:
         optimizer = torch.optim.Adam(
diff --git a/src/metatrain/utils/loss.py b/src/metatrain/utils/loss.py
index b5af8cb7c..fe226212d 100644
--- a/src/metatrain/utils/loss.py
+++ b/src/metatrain/utils/loss.py
@@ -2,6 +2,9 @@
 
 import torch
 from metatensor.torch import TensorMap
+from omegaconf import DictConfig
+
+from metatrain.utils.external_naming import to_internal_name
 
 
 # This file defines losses for metatensor models.
@@ -32,14 +35,32 @@ def __init__(
         gradient_weights: Optional[Dict[str, float]] = None,
         type: Union[str, dict] = "mse",
     ):
+        if gradient_weights is None:
+            gradient_weights = {}
+
+        losses = {}
         if type == "mse":
-            self.loss = torch.nn.MSELoss(reduction=reduction)
+            losses["values"] = torch.nn.MSELoss(reduction=reduction)
+            for key in gradient_weights.keys():
+                losses[key] = torch.nn.MSELoss(reduction=reduction)
         elif type == "mae":
-            self.loss = torch.nn.L1Loss(reduction=reduction)
+            losses["values"] = torch.nn.L1Loss(reduction=reduction)
+            for key in gradient_weights.keys():
+                losses[key] = torch.nn.L1Loss(reduction=reduction)
+        elif isinstance(type, dict) and "huber" in type:
+            # Huber loss
+            deltas = type["huber"]["deltas"]
+            losses["values"] = torch.nn.HuberLoss(
+                reduction=reduction, delta=deltas["values"]
+            )
+            for key in gradient_weights.keys():
+                losses[key] = torch.nn.HuberLoss(reduction=reduction, delta=deltas[key])
         else:
             raise ValueError(f"Unknown loss type: {type}")
+
+        self.losses = losses
         self.weight = weight
-        self.gradient_weights = {} if gradient_weights is None else gradient_weights
+        self.gradient_weights = gradient_weights
 
     def __call__(
         self, tensor_map_1: TensorMap, tensor_map_2: TensorMap
@@ -103,12 +124,12 @@ def __call__(
 
         values_1 = tensor_map_1.block().values
         values_2 = tensor_map_2.block().values
-        loss += self.weight * self.loss(values_1, values_2)
+        loss += self.weight * self.losses["values"](values_1, values_2)
 
         for gradient_name, gradient_weight in self.gradient_weights.items():
             values_1 = tensor_map_1.block().gradient(gradient_name).values
             values_2 = tensor_map_2.block().gradient(gradient_name).values
-            loss += gradient_weight * self.loss(values_1, values_2)
+            loss += gradient_weight * self.losses[gradient_name](values_1, values_2)
 
         return loss
 
@@ -148,11 +169,13 @@ def __init__(
                         "_gradients", ""
                     )
                     gradient_weights[gradient_name] = weight
+            print(type)
+            type_output = _process_type(type, output)
             self.losses[output] = TensorMapLoss(
                 reduction=reduction,
                 weight=value_weight,
                 gradient_weights=gradient_weights,
-                type=type,
+                type=type_output,
             )
 
     def __call__(
@@ -175,3 +198,27 @@ def __call__(
             loss += target_loss
 
         return loss
+
+
+def _process_type(type: Union[str, DictConfig], output: str) -> Union[str, dict]:
+    if not isinstance(type, str):
+        assert "huber" in type
+        # we process the Huber loss delta dict to make it similar to the
+        # `weights` dict
+        type_output = {"huber": {"deltas": {}}}  # type: ignore
+        for key, delta in type["huber"]["deltas"].items():
+            key_internal = to_internal_name(key)
+            if key_internal == output:
+                type_output["huber"]["deltas"]["values"] = delta
+            elif key_internal.startswith(output) and key_internal.endswith(
+                "_gradients"
+            ):
+                gradient_name = key_internal.replace(f"{output}_", "").replace(
+                    "_gradients", ""
+                )
+                type_output["huber"]["deltas"][gradient_name] = delta
+            else:
+                pass
+    else:
+        type_output = type  # type: ignore
+    return type_output
diff --git a/tests/utils/test_loss.py b/tests/utils/test_loss.py
index 035480a32..8894fe537 100644
--- a/tests/utils/test_loss.py
+++ b/tests/utils/test_loss.py
@@ -246,3 +246,94 @@ def test_tmap_dict_loss_subset(tensor_map_with_grad_1, tensor_map_with_grad_3):
 
     loss_value = loss(output_dict, target_dict)
     torch.testing.assert_close(loss_value, expected_result)
+
+
+def test_tmap_loss_mae():
+    """Test that the MAE loss is computed correctly."""
+    loss = TensorMapLoss(type="mae", reduction="mean")
+
+    tensor_map_1 = TensorMap(
+        keys=Labels.single(),
+        blocks=[
+            TensorBlock(
+                values=torch.tensor([[2.0], [2.0], [3.0]]),
+                samples=Labels.range("samples", 3),
+                components=[],
+                properties=Labels("energy", torch.tensor([[0]])),
+            )
+        ],
+    )
+    tensor_map_2 = TensorMap(
+        keys=Labels.single(),
+        blocks=[
+            TensorBlock(
+                values=torch.tensor([[0.0], [3.0], [3.0]]),
+                samples=Labels.range("samples", 3),
+                components=[],
+                properties=Labels("energy", torch.tensor([[0]])),
+            )
+        ],
+    )
+
+    loss_value = loss(tensor_map_1, tensor_map_1)
+    torch.testing.assert_close(loss_value, torch.tensor(0.0))
+
+    # Expected result: 1.0
+    loss_value = loss(tensor_map_1, tensor_map_2)
+    torch.testing.assert_close(loss_value, torch.tensor(1.0))
+
+
+def test_tmap_loss_huber():
+    """Test that the Huber loss is computed correctly."""
+    loss_mse = TensorMapLoss(type="mse", reduction="mean")
+    loss_huber = TensorMapLoss(
+        type={"huber": {"deltas": {"values": 3.0}}}, reduction="mean"
+    )
+
+    tensor_map_1 = TensorMap(
+        keys=Labels.single(),
+        blocks=[
+            TensorBlock(
+                values=torch.tensor([[2.0], [2.0], [3.0]]),
+                samples=Labels.range("samples", 3),
+                components=[],
+                properties=Labels("energy", torch.tensor([[0]])),
+            )
+        ],
+    )
+    tensor_map_2 = TensorMap(
+        keys=Labels.single(),
+        blocks=[
+            TensorBlock(
+                values=torch.tensor([[0.0], [3.0], [3.0]]),
+                samples=Labels.range("samples", 3),
+                components=[],
+                properties=Labels("energy", torch.tensor([[0]])),
+            )
+        ],
+    )
+
+    loss_value = loss_huber(tensor_map_1, tensor_map_1)
+    torch.testing.assert_close(loss_value, torch.tensor(0.0))
+
+    # No outliers, should be equal to MSE (scaled by 0.5 due to torch implementation)
+    loss_value_huber = loss_huber(tensor_map_1, tensor_map_2)
+    loss_value_mse = loss_mse(tensor_map_1, tensor_map_2)
+    torch.testing.assert_close(loss_value_huber, 0.5 * loss_value_mse)
+
+    tensor_map_with_outlier = TensorMap(
+        keys=Labels.single(),
+        blocks=[
+            TensorBlock(
+                values=torch.tensor([[0.0], [100.0], [3.0]]),
+                samples=Labels.range("samples", 3),
+                components=[],
+                properties=Labels("energy", torch.tensor([[0]])),
+            )
+        ],
+    )
+
+    loss_value_huber = loss_huber(tensor_map_1, tensor_map_with_outlier)
+    loss_value_mse = loss_mse(tensor_map_1, tensor_map_with_outlier)
+    # Huber loss is lower due to the outlier
+    assert loss_value_huber < 0.5 * loss_value_mse