diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 85207528..855b6b0c 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -95,6 +95,11 @@ New features and enhancements
 * Conservative regridding now supports oblique mercator projections. (:pull:`467`).
 * The automatic name for the weight file in ``regrid_dataset`` is now more explicit to avoid errors, but now requires `cat:id` and `cat:domain` arguments for both the source and target datasets. (:pull:`467`).
 
+Breaking changes
+^^^^^^^^^^^^^^^^
+* Version facet is now optional in default filepath schemas for non-simulations a with "source_version" level. (:issue:`500`, :pull:`501`).
+* Catalog attributes are removed by default in ``save_to_zarr`` and ``save_to_netcdf``. Catalog attributes are those added from the catalog columns by ``to_dataset``, ``to_dataset_dict`` and ``extract_dataset``, which have names prefixed with ``cat:``. (:issue:`499`, :pull:`501`).
+
 Bug fixes
 ^^^^^^^^^
 * Fixed bug with reusing weights. (:issue:`411`, :pull:`414`).
diff --git a/src/xscen/catutils.py b/src/xscen/catutils.py
index 7737a5bc..3941ceeb 100644
--- a/src/xscen/catutils.py
+++ b/src/xscen/catutils.py
@@ -1051,7 +1051,8 @@ def _get_needed_fields(schema: dict):
                 needed.add(level)
         elif isinstance(level, list):
             for lvl in level:
-                needed.add(lvl)
+                if not (lvl.startswith("(") and lvl.endswith(")")):
+                    needed.add(lvl)
         elif not (isinstance(level, dict) and list(level.keys()) == ["text"]):
             raise ValueError(
                 f"Invalid schema with unknown {level} of type {type(level)}."
diff --git a/src/xscen/data/file_schema.yml b/src/xscen/data/file_schema.yml
index 36330e5b..29b4af13 100644
--- a/src/xscen/data/file_schema.yml
+++ b/src/xscen/data/file_schema.yml
@@ -13,7 +13,7 @@
 #       # There are four ways to specify a folder name to use:
 #       - < facet >           # The value of the facet.
 #       - (< facet >)         # Same, but if the facet is missing, this level is skipped, resulting in a tree of a different depth.
-#       - [< facet >, < facet >, ...]:  # The folder name consists in more than one facet, concatenated with a "_" by default. They can't be optional.
+#       - [< facet >, < facet >, ...]:  # The folder name consists in more than one facet, concatenated with a "_" by default. They can be optional.
 #       - text: < value >     # A fixed string
 #     filename: # The file name schema, a list of facet names. If a facet is empty, it will be skipped. Elements will be separated by "_".
 #               # The special "DATES" facet will be replaced by the most concise way found to define the temporal range covered by the file.
@@ -33,7 +33,7 @@ original-non-sims:
     - type
     - domain
     - institution
-    - [ source, version ]
+    - [ source, (version) ]
     - (member)
     - frequency
     - variable
@@ -92,7 +92,7 @@ original-hydro-reconstruction:
     - hydrology_source
     - (hydrology_member)
     - institution
-    - [ source, version ]
+    - [ source, (version) ]
     - (member)
     - frequency
     - variable
@@ -199,7 +199,7 @@ derived-reconstruction:
   folders:
     - type
     - institution
-    - [ source, version ]
+    - [ source, (version) ]
     - (member)
     - domain
     - processing_level
@@ -261,7 +261,7 @@ derived-hydro-reconstruction:
     - hydrology_source
     - (hydrology_member)
     - institution
-    - [ source, version ]
+    - [ source, (version) ]
     - (member)
     - domain
     - processing_level
diff --git a/src/xscen/io.py b/src/xscen/io.py
index e27f0c30..c7935f3e 100644
--- a/src/xscen/io.py
+++ b/src/xscen/io.py
@@ -24,7 +24,7 @@
 
 from .config import parse_config
 from .scripting import TimeoutException
-from .utils import TRANSLATOR, season_sort_key, translate_time_chunk
+from .utils import TRANSLATOR, season_sort_key, strip_cat_attrs, translate_time_chunk
 
 logger = logging.getLogger(__name__)
 KEEPBITS = defaultdict(lambda: 12)
@@ -374,6 +374,7 @@ def save_to_netcdf(
     bitround: bool | int | dict = False,
     compute: bool = True,
     netcdf_kwargs: dict | None = None,
+    strip_cat_metadata: bool = True,
 ):
     """Save a Dataset to NetCDF, rechunking or compressing if requested.
 
@@ -399,6 +400,8 @@ def save_to_netcdf(
         Whether to start the computation or return a delayed object.
     netcdf_kwargs : dict, optional
         Additional arguments to send to_netcdf()
+    strip_cat_metadata : bool
+        If True (default), strips all catalog-added attributes before saving the dataset.
 
     Returns
     -------
@@ -425,6 +428,9 @@ def save_to_netcdf(
         # Remove original_shape from encoding, since it can cause issues with some engines.
         ds[var].encoding.pop("original_shape", None)
 
+    if strip_cat_metadata:
+        ds = strip_cat_attrs(ds)
+
     _coerce_attrs(ds.attrs)
     for var in ds.variables.values():
         _coerce_attrs(var.attrs)
@@ -445,6 +451,7 @@ def save_to_zarr(  # noqa: C901
     mode: str = "f",
     itervar: bool = False,
     timeout_cleanup: bool = True,
+    strip_cat_metadata: bool = True,
 ):
     """
     Save a Dataset to Zarr format, rechunking and compressing if requested.
@@ -487,6 +494,8 @@ def save_to_zarr(  # noqa: C901
         If True (default) and a :py:class:`xscen.scripting.TimeoutException` is raised during the writing,
         the variable being written is removed from the dataset as it is incomplete.
         This does nothing if `compute` is False.
+    strip_cat_metadata : bool
+        If True (default), strips all catalog-added attributes before saving the dataset.
 
     Returns
     -------
@@ -561,6 +570,9 @@ def _skip(var):
     if len(ds.data_vars) == 0:
         return None
 
+    if strip_cat_metadata:
+        ds = strip_cat_attrs(ds)
+
     _coerce_attrs(ds.attrs)
     for var in ds.variables.values():
         _coerce_attrs(var.attrs)
diff --git a/src/xscen/utils.py b/src/xscen/utils.py
index c9e86701..f6b69475 100644
--- a/src/xscen/utils.py
+++ b/src/xscen/utils.py
@@ -633,6 +633,15 @@ def get_cat_attrs(
     return facets
 
 
+def strip_cat_attrs(ds: xr.Dataset, prefix: str = "cat:"):
+    """Remove attributes added from the catalog by `to_dataset` or `extract_dataset`."""
+    dsc = ds.copy()
+    for k in list(dsc.attrs):
+        if k.startswith(prefix):
+            del dsc.attrs[k]
+    return dsc
+
+
 @parse_config
 def maybe_unstack(
     ds: xr.Dataset,
@@ -923,12 +932,10 @@ def clean_up(  # noqa: C901
         msg = f"Converting units: {variables_and_units}"
         logger.info(msg)
         ds = change_units(ds=ds, variables_and_units=variables_and_units)
-
     # convert calendar
     if convert_calendar_kwargs:
         # create mask of grid point that should always be nan
         ocean = ds.isnull().all("time")
-
         # if missing_by_var exist make sure missing data are added to time axis
         if missing_by_var:
             if not all(k in missing_by_var.keys() for k in ds.data_vars):
diff --git a/tests/test_catutils.py b/tests/test_catutils.py
index 40c7c857..f3e561f7 100644
--- a/tests/test_catutils.py
+++ b/tests/test_catutils.py
@@ -273,20 +273,28 @@ def test_pattern_from_schema(samplecat):
         assert any(res)
 
 
-def test_build_path_ds():
+@pytest.mark.parametrize("hasver", [True, False])
+def test_build_path_ds(hasver):
     ds = xr.tutorial.open_dataset("air_temperature")
     ds = ds.assign(time=xr.cftime_range("0001-01-01", freq="6h", periods=ds.time.size))
     ds.attrs.update(source="source", institution="institution")
+    if hasver:
+        ds.attrs["version"] = "v1"
     new_path = cu.build_path(
         ds,
         schemas={
-            "folders": ["source", "institution", ["variable", "xrfreq"]],
+            "folders": [["source", "(version)"], "institution", ["variable", "xrfreq"]],
             "filename": ["source", "institution", "variable", "frequency", "DATES"],
         },
     )
-    assert new_path == Path(
-        "source/institution/air_6h/source_institution_air_6hr_0001-0002"
-    )
+    if hasver:
+        assert new_path == Path(
+            "source_v1/institution/air_6h/source_institution_air_6hr_0001-0002"
+        )
+    else:
+        assert new_path == Path(
+            "source/institution/air_6h/source_institution_air_6hr_0001-0002"
+        )
 
 
 def test_build_path_multivar(samplecat):
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 9b49b6c3..647ac33f 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -241,6 +241,10 @@ def test_get_cat_attrs(self, ds, prefix, var_as_str):
         elif prefix == "dog:":
             assert out == {"source": "CanESM5"}
 
+    def test_strip_cat_attrs(self):
+        out = xs.utils.strip_cat_attrs(self.ds)
+        assert list(out.attrs.keys()) == ["dog:source"]
+
 
 class TestStack:
     def test_no_nan(self):