From 484ba1888b446ee89ac182c2be9aad8aff3f3afb Mon Sep 17 00:00:00 2001
From: Ryan Soley <ryan.e.soley@gmail.com>
Date: Thu, 12 Oct 2023 14:31:33 -0400
Subject: [PATCH] add rubicon schema support (#393)

* port `rubicon_schema` source over
* formatting
* add tests
* add notebooks
* update docs
* add recent XGB changes
* add recent LGBM changes
* reset versions
* linting & formatting
---
 MANIFEST.in                                   |   3 +
 docs/source/api_reference.rst                 |   9 +
 docs/source/contribute-schema.rst             |  67 +++
 docs/source/index.rst                         |   5 +
 docs/source/schema-representation.rst         | 440 ++++++++++++++++
 environment.yml                               |   2 +
 .../logging-examples/log-with-schema.ipynb    | 474 ++++++++++++++++++
 .../register-custom-schema.ipynb              | 356 +++++++++++++
 notebooks/logging-examples/set-schema.ipynb   | 193 +++++++
 rubicon_ml/client/project.py                  |   3 +-
 rubicon_ml/schema/__init__.py                 |  10 +
 rubicon_ml/schema/logger.py                   | 221 ++++++++
 rubicon_ml/schema/registry.py                 |  77 +++
 .../schema/lightgbm__LGBMClassifier.yaml      |  15 +
 .../schema/schema/lightgbm__LGBMModel.yaml    |  75 +++
 .../schema/lightgbm__LGBMRegressor.yaml       |   9 +
 .../sklearn__RandomForestClassifier.yaml      |  61 +++
 .../schema/xgboost__DaskXGBClassifier.yaml    |   9 +
 .../schema/xgboost__DaskXGBRegressor.yaml     |   9 +
 .../schema/schema/xgboost__XGBClassifier.yaml |   9 +
 .../schema/schema/xgboost__XGBModel.yaml      | 120 +++++
 .../schema/schema/xgboost__XGBRegressor.yaml  |   9 +
 setup.cfg                                     |   1 +
 tests/fixtures.py                             | 280 ++++++++++-
 tests/integration/test_schema.py              |  60 +++
 tests/unit/schema/__init__.py                 |   0
 tests/unit/schema/test_schema_logger.py       | 355 +++++++++++++
 tests/unit/schema/test_schema_registry.py     |  56 +++
 28 files changed, 2921 insertions(+), 7 deletions(-)
 create mode 100644 docs/source/contribute-schema.rst
 create mode 100644 docs/source/schema-representation.rst
 create mode 100644 notebooks/logging-examples/log-with-schema.ipynb
 create mode 100644 notebooks/logging-examples/register-custom-schema.ipynb
 create mode 100644 notebooks/logging-examples/set-schema.ipynb
 create mode 100644 rubicon_ml/schema/__init__.py
 create mode 100644 rubicon_ml/schema/logger.py
 create mode 100644 rubicon_ml/schema/registry.py
 create mode 100644 rubicon_ml/schema/schema/lightgbm__LGBMClassifier.yaml
 create mode 100644 rubicon_ml/schema/schema/lightgbm__LGBMModel.yaml
 create mode 100644 rubicon_ml/schema/schema/lightgbm__LGBMRegressor.yaml
 create mode 100644 rubicon_ml/schema/schema/sklearn__RandomForestClassifier.yaml
 create mode 100644 rubicon_ml/schema/schema/xgboost__DaskXGBClassifier.yaml
 create mode 100644 rubicon_ml/schema/schema/xgboost__DaskXGBRegressor.yaml
 create mode 100644 rubicon_ml/schema/schema/xgboost__XGBClassifier.yaml
 create mode 100644 rubicon_ml/schema/schema/xgboost__XGBModel.yaml
 create mode 100644 rubicon_ml/schema/schema/xgboost__XGBRegressor.yaml
 create mode 100644 tests/integration/test_schema.py
 create mode 100644 tests/unit/schema/__init__.py
 create mode 100644 tests/unit/schema/test_schema_logger.py
 create mode 100644 tests/unit/schema/test_schema_registry.py

diff --git a/MANIFEST.in b/MANIFEST.in
index 50ddf56a..ebcbfd06 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,7 @@
 graft rubicon_ml/viz/assets
 graft rubicon_ml/viz/assets/css
+
 include versioneer.py
 include rubicon_ml/_version.py
+
+recursive-include rubicon_ml/schema *.yaml
diff --git a/docs/source/api_reference.rst b/docs/source/api_reference.rst
index 4a93aee5..a300d208 100644
--- a/docs/source/api_reference.rst
+++ b/docs/source/api_reference.rst
@@ -87,6 +87,15 @@ RubiconJSON
 
 .. _library-reference-sklearn:
 
+schema
+======
+
+.. automodule:: rubicon_ml.schema.logger
+   :members:
+
+.. automodule:: rubicon_ml.schema.registry
+   :members:
+
 sklearn
 =======
 ``rubicon_ml`` offers direct integration with **Scikit-learn** via our
diff --git a/docs/source/contribute-schema.rst b/docs/source/contribute-schema.rst
new file mode 100644
index 00000000..2b6c1a14
--- /dev/null
+++ b/docs/source/contribute-schema.rst
@@ -0,0 +1,67 @@
+.. _contribute-schema:
+
+Contribute a schema
+*******************
+
+Consider the following schema that was created in the "Register a custom schema" section:
+
+.. code-block:: python
+
+    extended_schema = {
+        "name": "sklearn__RandomForestClassifier__ext",
+        "extends": "sklearn__RandomForestClassifier",
+
+        "parameters": [
+            {"name": "runtime_environment", "value_env": "RUNTIME_ENV"},
+        ],
+    }
+
+To contribute "sklearn__RandomForestClassifier__ext" to the ``rubicon_ml.schema`` registry,
+first write the dictionary out to a YAML file.
+
+.. code-block:: python
+
+    import yaml
+
+    schema_filename = "sklearn__RandomForestClassifier__ext.yaml"
+
+    with open(schema_filename, "w") as file:
+        file.write(yaml.dump(extended_schema))
+
+Once "sklearn__RandomForestClassifier__ext.yaml" is created, follow the "Developer
+instructions" to fork the rubicon-ml GitHub repository and prepare to make a contribution.
+
+From the root of the forked repository, copy the new schema into the library's schema directory:
+
+.. code-block:: bash
+
+    cp [PATH_TO]/sklearn__RandomForestClassifier__ext.yaml rubicon_ml/schema/schema/
+
+Then update **rubicon_ml/schema/registry.py**, adding the new schema to the
+``RUBICON_SCHEMA_REGISTRY``:
+
+.. code-block:: python
+
+    RUBICON_SCHEMA_REGISTRY = {
+        # other schema entries...
+        "sklearn__RandomForestClassifier__ext": lambda: _load_schema(
+            os.path.join("schema", "sklearn__RandomForestClassifier__ext.yaml")
+        ),
+    }
+
+Finally refer back to the "Contribute" section of the "Developer instructions" to push your
+changes to GitHub and open a pull request. Once the pull request is merged,
+"sklearn__RandomForestClassifier__ext" will be available in the next release of
+``rubicon_ml``.
+
+Schema naming conventions
+=========================
+
+When naming a schema that extends a schema already made available by ``rubicon_ml.schema``, simply
+append a double-underscore and a unique identifier. The "sklearn__RandomForestClassifier__ext"
+above is named following this convention.
+
+When naming a schema that represents an object that is not yet present in schema,
+leverage the ``registry.get_schema_name`` function to generate a name. For example, if
+you are making a schema for an object ``my_obj`` of class ``Model`` from a module ``my_model``,
+``registry.get_schema_name(my_obj)`` will return the name "my_model__Model".
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 63b0b5d4..a3494283 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -121,6 +121,7 @@ To install all extra modules, use the ``all`` extra.
    logging-examples/logging-training-metadata
    logging-examples/logging-plots
    logging-examples/logging-concurrently
+   logging-examples/log-with-schema
    logging-examples/tagging
    logging-examples/rubiconJSON-querying
    visualizations.rst
@@ -136,6 +137,8 @@ To install all extra modules, use the ``all`` extra.
    integrations/integration-sklearn
    logging-examples/logging-feature-plots
    logging-examples/multiple-backend
+   logging-examples/register-custom-schema
+   logging-examples/set-schema
    logging-examples/visualizing-logged-dataframes
 
 .. toctree::
@@ -152,6 +155,7 @@ To install all extra modules, use the ``all`` extra.
    :caption: Reference
 
    api_reference.rst
+   schema-representation.rst
 
 .. toctree::
    :maxdepth: 2
@@ -159,6 +163,7 @@ To install all extra modules, use the ``all`` extra.
    :caption: Community
 
    contributing.rst
+   contribute-schema.rst
    Changelog<https://github.com/capitalone/rubicon-ml/releases>
    Feedback<https://github.com/capitalone/rubicon-ml/issues/new/choose>
    GitHub<https://github.com/capitalone/rubicon-ml>
diff --git a/docs/source/schema-representation.rst b/docs/source/schema-representation.rst
new file mode 100644
index 00000000..8653da5f
--- /dev/null
+++ b/docs/source/schema-representation.rst
@@ -0,0 +1,440 @@
+.. _schema-representation:
+
+Representing model metadata with a schema
+*****************************************
+
+A rubicon-ml schema is a YAML file defining how attributes of a Python object, generally
+representing a model, will be logged to a rubicon-ml experiment. Schema can
+be used to automatically instrument and standardize the rubicon-ml logging of commonly
+used model objects.
+
+Schema are used to log experiments to an existing rubicon-ml project.
+Experiments consist of features, parameters, metrics, artifacts, and dataframes. More info
+on each of these can be found in rubicon-ml's glossary.
+
+A simple schema
+===============
+
+Consider the following objects from a module called ``my_model``:
+
+.. code-block:: python
+
+    import pandas as pd
+    
+    class Optimizer:
+        def optimize(X, y, target):
+            self.optimized_ = True
+    
+            return "optimized"
+    
+    class Model:
+        def __init__(self, alpha=1e-3, gamma=1e-3):
+            self.alpha = alpha
+            self.gamma = gamma
+    
+        def fit(self, X, y):
+            self.optimizer = Optimizer()
+            self.target = "y"
+    
+            self.feature_names_in_ = X.columns
+            self.feature_importances_ = [1.0 / len(X.columns)] * len(X.columns)
+    
+            self.learned_attribute_ = optimizer.optimize(X, y, target)
+    
+            return self
+    
+        def score(self, X):
+            self.score_ = 1.0
+            self.summary_ = pd.DataFrame(
+                [[self.alpha, self.gamma, self.learned_attribute_, self.score_]],
+                columns=["alpha", "gamma", "learned_attribute", "score"],
+            )
+            
+            return self.score_
+
+The following is a complete YAML representation of the ``Model`` object's schema:
+
+.. code-block:: yaml
+
+    name: my_model__Model
+    verison: 1.0.0
+    
+    compatibility:
+      pandas:
+        max_version:
+        min_version: 1.0.5
+    docs_url: https://my-docs.com/my-model/Model.html
+    
+    artifacts:
+      - self
+      - name: optimizer
+        data_object_attr: optimizer
+    dataframes:
+      - name: summary
+        df_attr: summary_
+    features:
+      - names_attr: feature_names_in_
+        importances_attr: feature_importances_
+        optional: true
+      - name_attr: target
+    metrics:
+      - name: learned_attribute
+        value_attr: learned_attribute_
+        optional: true
+      - name: score
+        value_attr: score_
+      - name: env_metric
+        value_env: METRIC
+    parameters:
+      - name: alpha
+        value_attr: alpha
+      - name: gamma
+        value_attr: gamma
+      - name: env_param
+        value_env: PARAMETER
+
+Schema metadata
+---------------
+
+The first section of the schema defines metadata about the schema itself,
+like the name and version. **The name of a schema should be the name of the
+library the class it represents comes from and the name of the Python class itself separated
+by a double underscore.**
+
+.. code-block:: yaml
+
+    name: my_model__Model
+    verison: 1.0.0
+
+The next section defines any dependencies the model object has on external Python libraries.
+Generally, this will be at least the library the object is imported from. Reference documentation
+for the object to be logged can also be included in this section.
+
+.. code-block:: yaml
+
+    compatibility:
+      pandas:
+        max_version:
+        min_version: 1.0.5
+    docs_url: https://my-docs.com/my-model/Model.html
+
+The remaining sections define how the attributes of the object will be logged to the
+``rubicon-ml`` experiment. In general, each section is a list of attributes to log to
+``rubicon-ml`` with a name for the logged metadata and the name of the attribute
+containing the value to log.
+
+Artifacts
+---------
+
+Define a :ref:`rubicon_ml.Artifact<library-reference-artifact>` 
+for logging by providing a ``name`` for the logged artifact and the attribute ``data_object_attr``
+containing the object to log. The special keyword ``self`` will log the full object the schema
+represents as an artifact with the same name as the object's class.
+
+.. code-block:: yaml
+
+    artifacts:
+      - self             # logs this Model as an artifact named "Model"
+      - name: optimizer  # logs Optimizer in `optimizer` attribute as an artifact named "optimizer"
+        data_object_attr: optimizer
+
+Dataframes
+----------
+
+Define a :ref:`rubicon_ml.Dataframe<library-reference-dataframe>`
+for logging by providing a ``name`` for the logged dataframe and the attribute ``df_attr``
+containing the DataFrame to log.
+
+.. code-block:: yaml
+
+    dataframes:
+      - name: summary  # logs DataFrame in `summary_` attribute as a dataframe named "summary"
+        df_attr: summary_
+
+Features
+--------
+
+Define a single :ref:`rubicon_ml.Feature<library-reference-feature>`
+for logging by providing the attribute ``name_attr`` containing the name of the feature to log
+and optionally the attribute ``importance_attr`` containing the feature's importance.
+
+Lists of features can be defined for logging with the attributes ``names_attr`` containing a
+list of feature names to log and optionally ``importances_attr`` containing the corresponding
+importances.
+
+.. code-block:: yaml
+
+    features:
+      - names_attr: feature_names_in_  # for each value in the `feature_names_in_` attribute, logs a feature named that
+                                       # value with the corresponding importance in the `feature_importances_` attribute
+        importances_attr: feature_importances_
+        optional: true
+      - name_attr: target              # logs a feature named the value of the `target` attribute
+
+Metrics
+-------
+
+Define a :ref:`rubicon_ml.Metric<library-reference-metric>`
+for logging by providing a ``name`` for the logged metric and the attribute ``value_attr``
+containing the metric value to log.
+
+Metric values can also be extracted from the runtime environment. Replace ``value_attr`` with ``value_env`` to
+leverage ``os.environ`` to read the metric value from the available environment variables.
+
+.. code-block:: yaml
+
+    metrics:
+      - name: learned_attribute  # logs value in `learned_attribute_` attribute as a metric named "learned_attribute"
+        value_attr: learned_attribute_
+        optional: true
+      - name: score              # logs value in `score_` attribute as a metric named "score"
+        value_attr: score_
+      - name: env_metric         # logs value in `METRIC` environment varibale as a metric named "env_metric"
+        value_env: METRIC
+
+Parameters
+----------
+
+Define a :ref:`rubicon_ml.Parameter<library-reference-parameter>`
+for logging by providing a ``name`` for the logged parameter and the attribute ``value_attr``
+containing the parameter value to log.
+
+Parameter values can also be extracted from the runtime environment. Replace ``value_attr`` with ``value_env`` to
+leverage ``os.environ`` to read the parameter value from the available environment variables.
+
+.. code-block:: yaml
+
+    parameters:
+      - name: alpha      # logs value in `alpha` attribute as a parameter named "alpha"
+        value_attr: alpha
+      - name: gamma      # logs value in `gamma` attribute as a parameter named "gamma"
+        value_attr: gamma
+      - name: env_param  # logs value in `PARAMETER` environment varibale as a parameter named "env_param"
+        value_env: PARAMETER
+
+Optional attributes
+===================
+
+In some cases, the attribute containing the value to log may not always be set on the underlying object. A model
+may have been trained on a dataset with no feature names, or perhaps some learned attributes are only learned
+if certain parameters have certain values while fitting.
+
+By default, schema logging will raise an exception if the attribute to be logged is not set. To suppress the errors
+and simply move on, items in the ``artifacts``, ``dataframes``, ``features``, ``metrics``, ``parameters`` and
+``schema`` lists may optionally contain a key ``optional`` with a **true** value.
+
+The ``feature_names_in_`` and ``learned_attribute_`` attributes are both marked optional in the example schema
+above to handle cases where no feature names were present in the training data and ``learned_attribute_`` was
+not learned:
+
+.. code-block:: yaml
+
+    features:
+      - names_attr: feature_names_in_
+        importances_attr: feature_importances_
+        optional: true     # will not error if `feature_importances_` attribute is not set
+      - name_attr: target  # **will** error if `target` attribute is not set
+    metrics:
+      - name: learned_attribute
+        value_attr: learned_attribute_
+        optional: true     # will not error if `learned_attribute_` attribute is not set
+
+**Note:** Optional items in ``artifacts``, ``dataframes``, ``features``, and ``schema`` will omit the associated
+entity from logging entirely if an optional attribute is not set. Optional items in ``metrics`` and ``parameters``
+will log the associated entity with the given name and a value of **None** if an optional attribute is not set.
+
+Nested schema
+=============
+
+The following is a complete YAML representation of the ``Optimizer`` object's schema:
+
+.. code-block:: yaml
+
+    name: my_model__Optimizer
+    verison: 1.0.0
+
+    metrics:
+      - name: optimized
+        value_attr: optimized_
+
+To apply another schema to one of the attributes of the original object, provide the schema ``name``
+to be retrieved via ``registry.get_schema`` and the attribute ``attr`` containing the
+object to apply the schema to.
+
+.. code-block:: yaml
+
+    schema:
+      - name: my_model__Optimizer  # logs a metric according to the above schema using the object in `optimizer`
+      - attr: optimizer
+
+**Note:** Nested schema will add the logged entities to the original experiment created by the parent schema,
+not a new experiment. Nested schema cannot have names that conflict with the entites logged by the parent
+schema.
+
+The complete schema now looks like this and will log an additional metric ``optimized`` as defined by the
+``Optimizer`` schema to the original experiment:
+
+.. code-block:: yaml
+
+    name: my_model__Model
+    verison: 1.0.0
+    
+    compatibility:
+      pandas:
+        max_version:
+        min_version: 1.0.5
+    docs_url: https://my-docs.com/my-model/Model.html
+    
+    artifacts:
+      - self
+      - name: optimizer
+        data_object_attr: optimizer
+    dataframes:
+      - name: summary
+        df_attr: summary_
+    features:
+      - names_attr: feature_names_in_
+        importances_attr: feature_importances_
+        optional: true
+      - name_attr: target
+    metrics:
+      - name: learned_attribute
+        value_attr: learned_attribute_
+        optional: true
+      - name: score
+        value_attr: score_
+      - name: env_metric
+        value_env: METRIC
+    parameters:
+      - name: alpha
+        value_attr: alpha
+      - name: gamma
+        value_attr: gamma
+      - name: env_param
+        value_env: PARAMETER
+    schema:
+      - name: my_model__Optimizer
+      - attr: optimizer
+
+Hierarchical schema
+===================
+
+Some objects may contain a list of other objects that are already represented by a scehma, like
+a feature eliminator or hyperparameter optimizer that trained multiple iterations of an underlying model
+object.
+
+The ``children`` key can be provided to log each of these underlying objects to a **new experiment**. This
+means that a single call to ``project.log_with_schema`` will log **1+n** experiments to ``project`` where
+**n** is the number of objects in the list specified by ``children``.
+
+Within the ``children`` key, provide the schema ``name`` for the children objects to be retrieved via
+``registry.get_schema`` and the attribute ``attr`` containing the list of child objects.
+
+.. code-block:: yaml
+
+    children:
+      - name: my_model__Optimizer  # defines the children's schema
+      - attr: optimizers           # logs an experiment according to the schema for each object in `optimizers`
+
+If we replace the nested schema from the previous example with a list of children that adhere to the same
+``Optimizer`` schema, the complete schema now looks like this. It will log a single experiment for ``Model``
+containing all the information in the original ``Model`` schema, as well as an additional experiment as
+defined by the ``Optimizer`` schema for each of the objects in ``Model``'s ``optimizers`` list.
+
+.. code-block:: yaml
+
+    name: my_model__Model
+    verison: 1.0.0
+    
+    compatibility:
+      pandas:
+        max_version:
+        min_version: 1.0.5
+    docs_url: https://my-docs.com/my-model/Model.html
+    
+    artifacts:
+      - self
+      - name: optimizer
+        data_object_attr: optimizer
+    children:
+      - name: my_model__Optimizer
+      - attr: optimizers
+    dataframes:
+      - name: summary
+        df_attr: summary_
+    features:
+      - names_attr: feature_names_in_
+        importances_attr: feature_importances_
+        optional: true
+      - name_attr: target
+    metrics:
+      - name: learned_attribute
+        value_attr: learned_attribute_
+        optional: true
+      - name: score
+        value_attr: score_
+      - name: env_metric
+        value_env: METRIC
+    parameters:
+      - name: alpha
+        value_attr: alpha
+      - name: gamma
+        value_attr: gamma
+      - name: env_param
+        value_env: PARAMETER
+
+Extending a schema
+==================
+
+Consider an extension of ``Model`` named ``NewModel``:
+
+.. code-block:: python
+
+    class NewModel(Model):
+        def __init__(self, alpha=1e-3, gamma=1e-3, delta=1e-3):
+            super().__init__(alpha=alpha, gamma=gamma)
+    
+            self.delta = delta
+    
+        def fit(self, X, y):
+            super().fit(X, y)
+    
+            self.other_learned_attribute_ = self.delta * self.learned_attribute_
+    
+            return self
+
+To extend an existing schema, provide the name of the schema to extend as the
+``extends`` key's value after the new schema's name. This new schema will log everything
+in the schema represented by ``extends`` plus any additional values.
+
+.. code-block:: yaml
+
+    name: my_model__NewModel
+    extends: my_model__Model
+    verison: 1.0.0
+
+The following is a complete YAML representation of the ``NewModel`` object's schema.
+This schema will log everything that the ``Model`` schema would with the addition of the
+``other_learned_attribute`` metric and ``delta`` parameter from ``NewModel``.
+
+.. code-block:: yaml
+
+    name: my_model__NewModel
+    extends: my_model__Model
+    verison: 1.0.0
+    
+    compatibility:
+      pandas:
+        max_version:
+        min_version: 1.0.5
+    docs_url: https://my-docs.com/my-model/NewModel.html
+    
+    metrics:
+      - name: other_learned_attribute
+        value_attr: other_learned_attribute_
+    parameters:
+      - name: delta
+        value_attr: delta
+
+To see an extended schema in action, check out the "Register a custom
+schema" section.
diff --git a/environment.yml b/environment.yml
index 38be381b..852a9523 100644
--- a/environment.yml
+++ b/environment.yml
@@ -32,9 +32,11 @@ dependencies:
   - ipykernel
   - isort
   - jupyterlab
+  - lightgbm
   - nbconvert
   - pytest
   - pytest-cov
+  - xgboost
   
   # for versioning
   - versioneer
diff --git a/notebooks/logging-examples/log-with-schema.ipynb b/notebooks/logging-examples/log-with-schema.ipynb
new file mode 100644
index 00000000..37eb2a37
--- /dev/null
+++ b/notebooks/logging-examples/log-with-schema.ipynb
@@ -0,0 +1,474 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# Logging with a schema\n",
+    "\n",
+    "Create a ``rubicon_ml`` project"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<rubicon_ml.client.project.Project at 0x11c99e890>"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from rubicon_ml import Rubicon\n",
+    "\n",
+    "rubicon = Rubicon(persistence=\"memory\", auto_git_enabled=True)\n",
+    "project = rubicon.create_project(name=\"apply schema\")\n",
+    "project"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Train a ``RandomForestClassifier``\n",
+    "\n",
+    "Load a training dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import load_wine\n",
+    "\n",
+    "X, y = load_wine(return_X_y=True, as_frame=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Train an instance of the model the schema represents"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "RandomForestClassifier(ccp_alpha=0.005, criterion='log_loss',\n",
+      "                       max_features='log2', n_estimators=24, oob_score=True,\n",
+      "                       random_state=121)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "\n",
+    "rfc = RandomForestClassifier(\n",
+    "    ccp_alpha=5e-3,\n",
+    "    criterion=\"log_loss\",\n",
+    "    max_features=\"log2\",\n",
+    "    n_estimators=24,\n",
+    "    oob_score=True,\n",
+    "    random_state=121,\n",
+    ")\n",
+    "rfc.fit(X, y)\n",
+    "\n",
+    "print(rfc)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Infer schema and log model metadata\n",
+    "\n",
+    "Log the model metadata defined in the applied schema to a new experiment in ``project`` with ``project.log_with_schema``\n",
+    "\n",
+    "**Note:** ``project.log_with_schema`` will infer the correct schema based on the given object to log"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "inferred schema name: sklearn__RandomForestClassifier\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<rubicon_ml.client.experiment.Experiment at 0x16d392b10>"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "experiment = project.log_with_schema(\n",
+    "    rfc,\n",
+    "    experiment_kwargs={  # additional kwargs to be passed to `project.log_experiment`\n",
+    "        \"name\": \"log with schema\",\n",
+    "        \"model_name\": \"RandomForestClassifier\",\n",
+    "        \"description\": \"logged with the `RandomForestClassifier` `rubicon_schema`\",\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "print(f\"inferred schema name: {project.schema_['name']}\")\n",
+    "experiment"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## View the experiment's logged metadata\n",
+    "\n",
+    "Each experiment contains all the data represented in the schema - more information on the data captured by\n",
+    "a ``rubicon_schema`` can be found in the \"Representing model metadata with a ``rubicon_schema``\" section"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'project_name': 'apply schema',\n",
+       " 'id': 'ec4c3ead-3337-4623-9a97-c61f48e8de3d',\n",
+       " 'name': 'log with schema',\n",
+       " 'description': 'logged with the `RandomForestClassifier` `rubicon_schema`',\n",
+       " 'model_name': 'RandomForestClassifier',\n",
+       " 'branch_name': 'schema',\n",
+       " 'commit_hash': 'c9f696408a03c6a6fbf2fbff39fa48bbf722bae1',\n",
+       " 'training_metadata': None,\n",
+       " 'tags': [],\n",
+       " 'created_at': datetime.datetime(2023, 9, 25, 15, 47, 37, 552091)}"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "vars(experiment._domain)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The features and their importances are logged as defined in the schema's \"features\" section"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'names_attr': 'feature_names_in_',\n",
+       "  'importances_attr': 'feature_importances_',\n",
+       "  'optional': True}]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "project.schema_[\"features\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "alcohol (0.1276831830349219)\n",
+      "malic_acid (0.03863837532736449)\n",
+      "ash (0.006168227239831861)\n",
+      "alcalinity_of_ash (0.025490751927615605)\n",
+      "magnesium (0.02935763050777937)\n",
+      "total_phenols (0.058427899304369986)\n",
+      "flavanoids (0.15309812550131274)\n",
+      "nonflavanoid_phenols (0.007414542189797497)\n",
+      "proanthocyanins (0.012615187741781065)\n",
+      "color_intensity (0.13608806341133572)\n",
+      "hue (0.0892558912217226)\n",
+      "od280/od315_of_diluted_wines (0.15604181694153108)\n",
+      "proline (0.15972030565063608)\n"
+     ]
+    }
+   ],
+   "source": [
+    "for feature in experiment.features():\n",
+    "    print(f\"{feature.name} ({feature.importance})\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Each parameter and its value are logged as defined in the schema's \"parameters\" section"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'name': 'bootstrap', 'value_attr': 'bootstrap'},\n",
+       " {'name': 'ccp_alpha', 'value_attr': 'ccp_alpha'},\n",
+       " {'name': 'class_weight', 'value_attr': 'class_weight'},\n",
+       " {'name': 'criterion', 'value_attr': 'criterion'},\n",
+       " {'name': 'max_depth', 'value_attr': 'max_depth'},\n",
+       " {'name': 'max_features', 'value_attr': 'max_features'},\n",
+       " {'name': 'min_impurity_decrease', 'value_attr': 'min_impurity_decrease'},\n",
+       " {'name': 'max_leaf_nodes', 'value_attr': 'max_leaf_nodes'},\n",
+       " {'name': 'max_samples', 'value_attr': 'max_samples'},\n",
+       " {'name': 'min_samples_split', 'value_attr': 'min_samples_split'},\n",
+       " {'name': 'min_samples_leaf', 'value_attr': 'min_samples_leaf'},\n",
+       " {'name': 'min_weight_fraction_leaf',\n",
+       "  'value_attr': 'min_weight_fraction_leaf'},\n",
+       " {'name': 'n_estimators', 'value_attr': 'n_estimators'},\n",
+       " {'name': 'oob_score', 'value_attr': 'oob_score'},\n",
+       " {'name': 'random_state', 'value_attr': 'random_state'}]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "project.schema_[\"parameters\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "bootstrap: True\n",
+      "ccp_alpha: 0.005\n",
+      "class_weight: None\n",
+      "criterion: log_loss\n",
+      "max_depth: None\n",
+      "max_features: log2\n",
+      "min_impurity_decrease: 0.0\n",
+      "max_leaf_nodes: None\n",
+      "max_samples: None\n",
+      "min_samples_split: 2\n",
+      "min_samples_leaf: 1\n",
+      "min_weight_fraction_leaf: 0.0\n",
+      "n_estimators: 24\n",
+      "oob_score: True\n",
+      "random_state: 121\n"
+     ]
+    }
+   ],
+   "source": [
+    "for parameter in experiment.parameters():\n",
+    "    print(f\"{parameter.name}: {parameter.value}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Each metric and its value are logged as defined in the schema's \"metrics\" section"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'name': 'classes', 'value_attr': 'classes_'},\n",
+       " {'name': 'n_classes', 'value_attr': 'n_classes_'},\n",
+       " {'name': 'n_features_in', 'value_attr': 'n_features_in_'},\n",
+       " {'name': 'n_outputs', 'value_attr': 'n_outputs_'},\n",
+       " {'name': 'oob_decision_function',\n",
+       "  'value_attr': 'oob_decision_function_',\n",
+       "  'optional': True},\n",
+       " {'name': 'oob_score', 'value_attr': 'oob_score_', 'optional': True}]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "project.schema_[\"metrics\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "classes: ...\n",
+      "n_classes: 3\n",
+      "n_features_in: 13\n",
+      "n_outputs: 1\n",
+      "oob_decision_function: ...\n",
+      "oob_score: 0.9775280898876404\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "for metric in experiment.metrics():\n",
+    "    if np.isscalar(metric.value):\n",
+    "        print(f\"{metric.name}: {metric.value}\")\n",
+    "    else:  # don't print long metrics\n",
+    "        print(f\"{metric.name}: ...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A copy of the trained model is logged as defined in the schema's \"artifacts\" section"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['self']"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "project.schema_[\"artifacts\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "RandomForestClassifier:\n",
+      "RandomForestClassifier(ccp_alpha=0.005, criterion='log_loss',\n",
+      "                       max_features='log2', n_estimators=24, oob_score=True,\n",
+      "                       random_state=121)\n"
+     ]
+    }
+   ],
+   "source": [
+    "for artifact in experiment.artifacts():\n",
+    "    print(f\"{artifact.name}:\\n{artifact.get_data(unpickle=True)}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/logging-examples/register-custom-schema.ipynb b/notebooks/logging-examples/register-custom-schema.ipynb
new file mode 100644
index 00000000..c3a95b1a
--- /dev/null
+++ b/notebooks/logging-examples/register-custom-schema.ipynb
@@ -0,0 +1,356 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "6898ac1c-c6e3-40d8-a787-c70f2b4e0b03",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# Register a custom schema\n",
+    "\n",
+    "``rubicon_schema`` can be constructed within a Python session in addition to being read from\n",
+    "the registry's YAML files\n",
+    "\n",
+    "## Define additional metadata to log\n",
+    "\n",
+    "Add an additional variable to the environment to record with our ``rubicon_schema``"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "75a9fb48-2c0f-4fdc-91df-9105fde2892f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "AWS\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"RUNTIME_ENV\"] = \"AWS\"\n",
+    "\n",
+    "! echo $RUNTIME_ENV"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6ce08db5-c532-4f1a-9357-61a6b3d1eadd",
+   "metadata": {},
+   "source": [
+    "## Construct a custom schema\n",
+    "\n",
+    "Create a dictionary representation of the new, custom schema. This new schema will extend\n",
+    "the existing ``RandomForestClassifier`` schema with an additional parameter that logs the\n",
+    "new environment variable\n",
+    "\n",
+    "**Note:** The ``extends`` key is not required - custom schema do not need to extend existing schema"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "020156b3-d0b2-4b99-8c5a-bfb60e02612d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'extends': 'sklearn__RandomForestClassifier',\n",
+      " 'name': 'sklearn__RandomForestClassifier__ext',\n",
+      " 'parameters': [{'name': 'runtime_environment', 'value_env': 'RUNTIME_ENV'}]}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pprint\n",
+    "\n",
+    "extended_schema = {\n",
+    "    \"name\": \"sklearn__RandomForestClassifier__ext\",\n",
+    "    \"extends\": \"sklearn__RandomForestClassifier\",    \n",
+    "\n",
+    "    \"parameters\": [\n",
+    "        {\"name\": \"runtime_environment\", \"value_env\": \"RUNTIME_ENV\"},\n",
+    "    ],\n",
+    "}\n",
+    "pprint.pprint(extended_schema)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ae6d5dfa-e511-4901-ae61-f6f2aea55cb0",
+   "metadata": {},
+   "source": [
+    "## Apply a custom schema to a project\n",
+    "\n",
+    "Create a ``rubicon_ml`` project"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "6fde24fc-1ab3-49fc-8ea4-fb3573e3bb29",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<rubicon_ml.client.project.Project at 0x11251af90>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from rubicon_ml import Rubicon\n",
+    "\n",
+    "rubicon = Rubicon(persistence=\"memory\", auto_git_enabled=True)\n",
+    "project = rubicon.create_project(name=\"apply schema\")\n",
+    "project"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b9c46f5b-27da-42bb-b2d2-4761e35cdd4a",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "Apply the custom schema to the project"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "e91c3d60-806a-49d4-a4b8-ec13489e6a11",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "project.set_schema(extended_schema)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "885b3e61-7875-445e-993f-3359bd4bb7ad",
+   "metadata": {},
+   "source": [
+    "## Log model metadata with a custom schema\n",
+    "\n",
+    "Load a training dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "f71158d7-208d-4094-92b9-94b49a45cb6b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import load_wine\n",
+    "\n",
+    "X, y = load_wine(return_X_y=True, as_frame=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7b779808-771f-4c40-8250-a347e3b67c19",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "Train an instance of the model the schema represents"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "838d254b-de2a-4155-909b-707728f343d9",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "RandomForestClassifier(ccp_alpha=0.005, criterion='log_loss',\n",
+      "                       max_features='log2', n_estimators=24, oob_score=True,\n",
+      "                       random_state=121)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "\n",
+    "rfc = RandomForestClassifier(\n",
+    "    ccp_alpha=5e-3,\n",
+    "    criterion=\"log_loss\",\n",
+    "    max_features=\"log2\",\n",
+    "    n_estimators=24,\n",
+    "    oob_score=True,\n",
+    "    random_state=121,\n",
+    ")\n",
+    "rfc.fit(X, y)\n",
+    "\n",
+    "print(rfc)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "60e4c75b-1b92-4b8e-b938-81603162f2f4",
+   "metadata": {},
+   "source": [
+    "Log the model metadata defined in the base ``RandomForestClassifier`` plus the additional parameter\n",
+    "from the environment to a new experiment in ``project`` with ``project.log_with_schema``"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "34341f0d-32a8-4a39-aaf6-dad8ccc8bf1b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<rubicon_ml.client.experiment.Experiment at 0x169f92b10>"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "experiment = project.log_with_schema(\n",
+    "    rfc,\n",
+    "    experiment_kwargs={\n",
+    "        \"name\": \"log with extended schema\",\n",
+    "        \"model_name\": \"RandomForestClassifier\",\n",
+    "        \"description\": \"logged with an extended `rubicon_schema`\",\n",
+    "    },\n",
+    ")\n",
+    "experiment"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "793daa8e-693b-4d2e-8c31-c71cd236291e",
+   "metadata": {},
+   "source": [
+    "## View the experiment's logged metadata\n",
+    "\n",
+    "Each experiment contains all the data represented in the base ``RandomForestClassifier`` schema plus the\n",
+    "additional parameter from the environment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "c656f695-5a30-4333-9aa8-a206f52a6d31",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "bootstrap: True\n",
+      "ccp_alpha: 0.005\n",
+      "class_weight: None\n",
+      "criterion: log_loss\n",
+      "max_depth: None\n",
+      "max_features: log2\n",
+      "min_impurity_decrease: 0.0\n",
+      "max_leaf_nodes: None\n",
+      "max_samples: None\n",
+      "min_samples_split: 2\n",
+      "min_samples_leaf: 1\n",
+      "min_weight_fraction_leaf: 0.0\n",
+      "n_estimators: 24\n",
+      "oob_score: True\n",
+      "random_state: 121\n",
+      "runtime_environment: AWS\n"
+     ]
+    }
+   ],
+   "source": [
+    "for parameter in experiment.parameters():\n",
+    "    print(f\"{parameter.name}: {parameter.value}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d33f1585-00b3-42e6-9741-ac849a6cc8a9",
+   "metadata": {},
+   "source": [
+    "Don't forget to clean up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "ec8e4159-0f97-4c4d-923a-b8f283184b66",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "del os.environ[\"RUNTIME_ENV\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d4757cb2-00e8-4ba1-aa64-04959dfea5d8",
+   "metadata": {},
+   "source": [
+    "## Persisting and sharing a custom schema\n",
+    "\n",
+    "To share custom schema with all ``rubicon_schema`` users, check out the \"Contribute a ``rubicon_schema``\" section"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/logging-examples/set-schema.ipynb b/notebooks/logging-examples/set-schema.ipynb
new file mode 100644
index 00000000..64285a53
--- /dev/null
+++ b/notebooks/logging-examples/set-schema.ipynb
@@ -0,0 +1,193 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# Set a schema on a project\n",
+    "\n",
+    "\"Log a ``rubicon_ml`` experiment with a ``rubicon_schema``\" showed how ``rubicon_schema`` can\n",
+    "infer schema from the object to log - sometimes, this may not be possible and a schema may need to be set manually\n",
+    "\n",
+    "## Select a schema\n",
+    "\n",
+    "View all available schema"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['sklearn__RandomForestClassifier',\n",
+       " 'xgboost__XGBClassifier',\n",
+       " 'xgboost__DaskXGBClassifier']"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from rubicon_ml.schema import registry\n",
+    "\n",
+    "available_schema = registry.available_schema()\n",
+    "available_schema"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Load a schema"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'artifacts': ['self'],\n",
+      " 'compatibility': {'scikit-learn': {'max_version': None,\n",
+      "                                    'min_version': '1.0.2'}},\n",
+      " 'docs_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html',\n",
+      " 'features': [{'importances_attr': 'feature_importances_',\n",
+      "               'names_attr': 'feature_names_in_',\n",
+      "               'optional': True}],\n",
+      " 'metrics': [{'name': 'classes', 'value_attr': 'classes_'},\n",
+      "             {'name': 'n_classes', 'value_attr': 'n_classes_'},\n",
+      "             {'name': 'n_features_in', 'value_attr': 'n_features_in_'},\n",
+      "             {'name': 'n_outputs', 'value_attr': 'n_outputs_'},\n",
+      "             {'name': 'oob_decision_function',\n",
+      "              'optional': True,\n",
+      "              'value_attr': 'oob_decision_function_'},\n",
+      "             {'name': 'oob_score',\n",
+      "              'optional': True,\n",
+      "              'value_attr': 'oob_score_'}],\n",
+      " 'name': 'sklearn__RandomForestClassifier',\n",
+      " 'parameters': [{'name': 'bootstrap', 'value_attr': 'bootstrap'},\n",
+      "                {'name': 'ccp_alpha', 'value_attr': 'ccp_alpha'},\n",
+      "                {'name': 'class_weight', 'value_attr': 'class_weight'},\n",
+      "                {'name': 'criterion', 'value_attr': 'criterion'},\n",
+      "                {'name': 'max_depth', 'value_attr': 'max_depth'},\n",
+      "                {'name': 'max_features', 'value_attr': 'max_features'},\n",
+      "                {'name': 'min_impurity_decrease',\n",
+      "                 'value_attr': 'min_impurity_decrease'},\n",
+      "                {'name': 'max_leaf_nodes', 'value_attr': 'max_leaf_nodes'},\n",
+      "                {'name': 'max_samples', 'value_attr': 'max_samples'},\n",
+      "                {'name': 'min_samples_split',\n",
+      "                 'value_attr': 'min_samples_split'},\n",
+      "                {'name': 'min_samples_leaf', 'value_attr': 'min_samples_leaf'},\n",
+      "                {'name': 'min_weight_fraction_leaf',\n",
+      "                 'value_attr': 'min_weight_fraction_leaf'},\n",
+      "                {'name': 'n_estimators', 'value_attr': 'n_estimators'},\n",
+      "                {'name': 'oob_score', 'value_attr': 'oob_score'},\n",
+      "                {'name': 'random_state', 'value_attr': 'random_state'}],\n",
+      " 'verison': '1.0.0'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pprint\n",
+    "\n",
+    "rfc_schema = registry.get_schema(\"sklearn__RandomForestClassifier\")\n",
+    "pprint.pprint(rfc_schema)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Apply the schema to a project\n",
+    "\n",
+    "Create a ``rubicon_ml`` project"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<rubicon_ml.client.project.Project at 0x134d4fd50>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from rubicon_ml import Rubicon\n",
+    "\n",
+    "rubicon = Rubicon(persistence=\"memory\")\n",
+    "project = rubicon.create_project(name=\"apply schema\")\n",
+    "project"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Set the schema on the project"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "project.set_schema(rfc_schema)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, ``log_with_schema`` will leverage the schema ``rfc_schema`` instead of trying to infer one"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/rubicon_ml/client/project.py b/rubicon_ml/client/project.py
index 62543b76..e5ddb067 100644
--- a/rubicon_ml/client/project.py
+++ b/rubicon_ml/client/project.py
@@ -12,6 +12,7 @@
 from rubicon_ml.client.utils.exception_handling import failsafe
 from rubicon_ml.client.utils.tags import filter_children
 from rubicon_ml.exceptions import RubiconException
+from rubicon_ml.schema.logger import SchemaMixin
 
 if TYPE_CHECKING:
     from rubicon_ml import Rubicon
@@ -19,7 +20,7 @@
     from rubicon_ml.domain import Project as ProjectDomain
 
 
-class Project(Base, ArtifactMixin, DataframeMixin):
+class Project(Base, ArtifactMixin, DataframeMixin, SchemaMixin):
     """A client project.
 
     A `project` is a collection of `experiments`,
diff --git a/rubicon_ml/schema/__init__.py b/rubicon_ml/schema/__init__.py
new file mode 100644
index 00000000..d8c2df5b
--- /dev/null
+++ b/rubicon_ml/schema/__init__.py
@@ -0,0 +1,10 @@
+"""``schema`` submodule initialization."""
+
+from rubicon_ml.schema.registry import (
+    available_schema,
+    get_schema,
+    get_schema_name,
+    register_schema,
+)
+
+__all__ = ["available_schema", "get_schema", "get_schema_name", "register_schema"]
diff --git a/rubicon_ml/schema/logger.py b/rubicon_ml/schema/logger.py
new file mode 100644
index 00000000..cfd83ae8
--- /dev/null
+++ b/rubicon_ml/schema/logger.py
@@ -0,0 +1,221 @@
+"""Methods and a mixin to enable schema logging.
+
+The functions available in the ``schema`` submodule are applied to
+``rubicon_ml.Project`` via the ``SchemaMixin`` class. They can be
+called directly as a method of an existing project.
+"""
+
+import os
+from contextlib import contextmanager
+from typing import Any, Dict, Optional
+
+from rubicon_ml.client.experiment import Experiment
+from rubicon_ml.exceptions import RubiconException
+from rubicon_ml.schema import registry
+
+
+def _get_value(obj, entity_schema):
+    optional = entity_schema.get("optional", False)
+    value = None
+
+    if "value_attr" in entity_schema:
+        value = _safe_getattr(obj, entity_schema["value_attr"], optional)
+    if "value_env" in entity_schema:
+        value = _safe_environ(entity_schema["value_env"], optional)
+    if "value_func" in entity_schema:
+        value = _safe_call_func(obj, entity_schema["value_func"], optional)
+
+    return value
+
+
+def _get_df(obj, entity_schema):
+    optional = entity_schema.get("optional", False)
+    df_value = None
+
+    if "df_attr" in entity_schema:
+        df_value = _safe_getattr(obj, entity_schema["df_attr"], optional)
+    if "df_func" in entity_schema:
+        df_value = _safe_call_func(obj, entity_schema["df_func"], optional)
+
+    return df_value
+
+
+def _get_data_object(obj, entity_schema):
+    optional = entity_schema.get("optional", False)
+    data_object = None
+
+    if "data_object_func" in entity_schema:
+        data_object = _safe_call_func(obj, entity_schema["data_object_func"], optional)
+    elif "data_object_attr" in entity_schema:
+        data_object = _safe_getattr(obj, entity_schema["data_object_attr"], optional)
+
+    return data_object
+
+
+def _safe_getattr(obj, attr, optional, default=None):
+    try:
+        value = getattr(obj, attr)
+    except (TypeError, AttributeError) as err:
+        if optional or (attr is None and isinstance(err, TypeError)):
+            return default
+
+        raise err
+
+    return value
+
+
+def _safe_environ(environ_var, optional, default=None):
+    try:
+        value = os.environ[environ_var]
+    except KeyError as err:
+        if optional:
+            return default
+
+        raise RubiconException(f"Environment variable '{environ_var}' not set.") from err
+
+    return value
+
+
+def _safe_call_func(obj, func, optional, default=None):
+    method = _safe_getattr(obj, func, optional, default)
+    value = None
+
+    if method is not None:
+        try:
+            value = method()
+        except Exception as err:
+            if optional:
+                return default
+
+            raise err
+
+    return value
+
+
+@contextmanager
+def _set_temporary_schema(project, schema_name):
+    original_schema = project.schema_
+    project.set_schema(registry.get_schema(schema_name))
+
+    yield
+
+    project.set_schema(original_schema)
+
+
+class SchemaMixin:
+    """Adds schema logging support to a client object."""
+
+    def log_with_schema(
+        self,
+        obj: Any,
+        experiment: Experiment = None,
+        experiment_kwargs: Optional[Dict[str, Any]] = None,
+    ) -> Any:
+        """Log an experiment leveraging ``self.schema_``."""
+
+        if not hasattr(self, "schema_"):
+            try:
+                schema_name = registry.get_schema_name(obj)
+                self.schema_ = registry.get_schema(schema_name)
+            except ValueError as err:
+                raise ValueError(
+                    f"No schema set and no schema could be inferred from object {obj}. "
+                    f"Set a schema with `Project.set_schema(schema)`."
+                ) from err
+
+        if experiment_kwargs is None:
+            experiment_kwargs = {}
+
+        if experiment is None:
+            experiment = self.log_experiment(**experiment_kwargs)
+
+        base_schema_name = self.schema_.get("extends")
+        if base_schema_name is not None:
+            with _set_temporary_schema(self, base_schema_name):
+                self.log_with_schema(obj, experiment=experiment)
+
+        for feature in self.schema_.get("features", []):
+            is_optional = feature.get("optional", False)
+
+            if "names_attr" in feature:
+                feature_names = _safe_getattr(obj, feature["names_attr"], is_optional)
+
+                if feature_names is not None:
+                    feature_importances = _safe_getattr(
+                        obj,
+                        feature.get("importances_attr"),
+                        is_optional,
+                        default=[None] * len(feature["names_attr"]),
+                    )
+
+                    for name, importance in zip(feature_names, feature_importances):
+                        experiment.log_feature(name=name, importance=importance)
+
+            elif "name_attr" in feature:
+                feature_name = _safe_getattr(obj, feature["name_attr"], is_optional)
+
+                if feature_name is not None:
+                    feature_importance = _safe_getattr(
+                        obj, feature.get("importance_attr"), is_optional
+                    )
+
+                    experiment.log_feature(name=feature_name, importance=feature_importance)
+
+        for parameter in self.schema_.get("parameters", []):
+            experiment.log_parameter(
+                name=parameter["name"],
+                value=_get_value(obj, parameter),
+            )
+
+        for metric in self.schema_.get("metrics", []):
+            experiment.log_metric(
+                name=metric["name"],
+                value=_get_value(obj, metric),
+            )
+        for artifact in self.schema_.get("artifacts", []):
+            if isinstance(artifact, str):
+                if artifact == "self":
+                    experiment.log_artifact(name=obj.__class__.__name__, data_object=obj)
+            elif isinstance(artifact, dict):
+                data_object = _get_data_object(obj, artifact)
+                if data_object is not None:
+                    experiment.log_artifact(name=artifact["name"], data_object=data_object)
+
+        for dataframe in self.schema_.get("dataframes", []):
+            df_value = _get_df(obj, dataframe)
+
+            if df_value is not None:
+                experiment.log_dataframe(df=df_value, name=dataframe["name"])
+
+        for schema in self.schema_.get("schema", []):
+            object_to_log = _safe_getattr(obj, schema["attr"], schema.get("optional", False))
+
+            if object_to_log is not None:
+                with _set_temporary_schema(self, schema["name"]):
+                    self.log_with_schema(object_to_log, experiment=experiment)
+
+        has_children = False
+
+        for children in self.schema_.get("children", []):
+            children_objects = _safe_getattr(
+                obj, children["attr"], children.get("optional", False), default=[]
+            )
+
+            for child in children_objects:
+                has_children = True
+
+                child_experiment = self.log_experiment(**experiment_kwargs)
+                child_experiment.add_tags(tags=["child", f"parent_id:{experiment.id}"])
+
+                with _set_temporary_schema(self, children["name"]):
+                    self.log_with_schema(child, experiment=child_experiment)
+
+        if has_children:
+            experiment.add_tags(tags=["parent"])
+
+        return experiment
+
+    def set_schema(self, schema: Dict[str, Any]) -> None:
+        """Set the schema for this client object."""
+
+        self.schema_ = schema
diff --git a/rubicon_ml/schema/registry.py b/rubicon_ml/schema/registry.py
new file mode 100644
index 00000000..ad73c4f1
--- /dev/null
+++ b/rubicon_ml/schema/registry.py
@@ -0,0 +1,77 @@
+"""Mehtods for interacting with the existing rubicon-ml ``schema``."""
+
+import os
+from typing import Any, List
+
+import yaml
+
+RUBICON_SCHEMA_REGISTRY = {
+    "lightgbm__LGBMModel": lambda: _load_schema(os.path.join("schema", "lightgbm__LGBMModel.yaml")),
+    "lightgbm__LGBMClassifier": lambda: _load_schema(
+        os.path.join("schema", "lightgbm__LGBMClassifier.yaml")
+    ),
+    "lightgbm__LGBMRegressor": lambda: _load_schema(
+        os.path.join("schema", "lightgbm__LGBMRegressor.yaml")
+    ),
+    "sklearn__RandomForestClassifier": lambda: _load_schema(
+        os.path.join("schema", "sklearn__RandomForestClassifier.yaml")
+    ),
+    "xgboost__XGBModel": lambda: _load_schema(os.path.join("schema", "xgboost__XGBModel.yaml")),
+    "xgboost__XGBClassifier": lambda: _load_schema(
+        os.path.join("schema", "xgboost__XGBClassifier.yaml")
+    ),
+    "xgboost__XGBRegressor": lambda: _load_schema(
+        os.path.join("schema", "xgboost__XGBRegressor.yaml")
+    ),
+    "xgboost__DaskXGBClassifier": lambda: _load_schema(
+        os.path.join("schema", "xgboost__DaskXGBClassifier.yaml")
+    ),
+    "xgboost__DaskXGBRegressor": lambda: _load_schema(
+        os.path.join("schema", "xgboost__DaskXGBRegressor.yaml")
+    ),
+}
+
+
+def _load_schema(path: str) -> Any:
+    """Loads a schema YAML file from ``path`` relative to this file."""
+
+    full_path = os.path.join(os.path.dirname(__file__), path)
+    with open(full_path, "r") as file:
+        schema = yaml.safe_load(file)
+
+    return schema
+
+
+def available_schema() -> List[str]:
+    """Get the names of all available schema."""
+
+    return list(RUBICON_SCHEMA_REGISTRY.keys())
+
+
+def get_schema(name: str) -> Any:
+    """Get the schema with name ``name``."""
+
+    if name not in RUBICON_SCHEMA_REGISTRY:
+        raise ValueError(
+            f"'{name}' is not the name of an available rubicon schema. "
+            "For a list of schema names, use `registry.available_schema()`."
+        )
+
+    return RUBICON_SCHEMA_REGISTRY[name]()
+
+
+def get_schema_name(obj: Any) -> str:
+    """Get the name of the schema that represents object ``obj``."""
+
+    obj_cls = obj.__class__
+
+    cls_name = obj_cls.__name__
+    module_name = obj_cls.__module__.split(".")[0]
+
+    return f"{module_name}__{cls_name}"
+
+
+def register_schema(name: str, schema: dict):
+    """Add a schema to the schema registry."""
+
+    RUBICON_SCHEMA_REGISTRY[name] = lambda: schema
diff --git a/rubicon_ml/schema/schema/lightgbm__LGBMClassifier.yaml b/rubicon_ml/schema/schema/lightgbm__LGBMClassifier.yaml
new file mode 100644
index 00000000..b4990e09
--- /dev/null
+++ b/rubicon_ml/schema/schema/lightgbm__LGBMClassifier.yaml
@@ -0,0 +1,15 @@
+name: lightgbm__LGBMClassifier
+extends: lightgbm__LGBMModel
+version: 1.0.0
+
+compatibility:
+  lightgbm:
+    max_version:
+    min_version: 3.1.1
+docs_url: https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMModel.html#lightgbm.LGBMClassifier
+
+metrics:
+  - name: classes
+    value_attr: classes_
+  - name: n_classes
+    value_attr: n_classes_
diff --git a/rubicon_ml/schema/schema/lightgbm__LGBMModel.yaml b/rubicon_ml/schema/schema/lightgbm__LGBMModel.yaml
new file mode 100644
index 00000000..a501c1f3
--- /dev/null
+++ b/rubicon_ml/schema/schema/lightgbm__LGBMModel.yaml
@@ -0,0 +1,75 @@
+name: lightgbm__LGBMModel
+version: 1.0.0
+
+compatibility:
+  lightgbm:
+    max_version:
+    min_version: 3.1.1
+docs_url: https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMModel.html#lightgbm.LGBMModel
+
+artifacts:
+  - self
+  - name: booster
+    data_object_attr: booster_
+features: 
+  - names_attr: feature_name_
+    importances_attr: feature_importances_
+    optional: true
+metrics:
+  - name: best_iteration
+    value_attr: best_iteration_
+    optional: true
+  - name: best_score
+    value_attr: best_score_
+    optional: true
+  - name: evals_result
+    value_attr: evals_result_
+    optional: true
+  - name: n_features
+    value_attr: n_features_
+  - name: n_features_in
+    value_attr: n_features_in_
+  - name: n_iter
+    value_attr: n_iter_
+    optional: true
+  - name: objective
+    value_attr: objective_
+parameters:
+  - name: boosting_type
+    value_attr: boosting_type
+  - name: num_leaves
+    value_attr: num_leaves
+  - name: max_depth
+    value_attr: max_depth
+  - name: learning_rate
+    value_attr: learning_rate
+  - name: n_estimators
+    value_attr: n_estimators
+  - name: subsample_for_bin
+    value_attr: subsample_for_bin
+  - name: objective
+    value_attr: objective
+  - name: class_weight
+    value_attr: class_weight
+  - name: min_split_gain
+    value_attr: min_split_gain
+  - name: min_child_weight
+    value_attr: min_child_weight
+  - name: min_child_samples
+    value_attr: min_child_samples
+  - name: subsample
+    value_attr: subsample
+  - name: subsample_freq
+    value_attr: subsample_freq
+  - name: colsample_bytree
+    value_attr: colsample_bytree
+  - name: reg_alpha
+    value_attr: reg_alpha
+  - name: reg_lambda
+    value_attr: reg_lambda
+  - name: random_state
+    value_attr: random_state
+  - name: n_jobs
+    value_attr: n_jobs
+  - name: importance_type
+    value_attr: importance_type
diff --git a/rubicon_ml/schema/schema/lightgbm__LGBMRegressor.yaml b/rubicon_ml/schema/schema/lightgbm__LGBMRegressor.yaml
new file mode 100644
index 00000000..39c67532
--- /dev/null
+++ b/rubicon_ml/schema/schema/lightgbm__LGBMRegressor.yaml
@@ -0,0 +1,9 @@
+name: lightgbm__LGBMRegressor
+extends: lightgbm__LGBMModel
+version: 1.0.0
+
+compatibility:
+  lightgbm:
+    max_version:
+    min_version: 3.1.1
+docs_url: https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMModel.html#lightgbm.LGBMRegressor
diff --git a/rubicon_ml/schema/schema/sklearn__RandomForestClassifier.yaml b/rubicon_ml/schema/schema/sklearn__RandomForestClassifier.yaml
new file mode 100644
index 00000000..028e9c00
--- /dev/null
+++ b/rubicon_ml/schema/schema/sklearn__RandomForestClassifier.yaml
@@ -0,0 +1,61 @@
+name: sklearn__RandomForestClassifier
+verison: 1.0.0
+
+compatibility:
+  scikit-learn:
+    max_version:
+    min_version: 1.0.2
+docs_url: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html
+
+artifacts:
+  - self
+features:
+  - names_attr: feature_names_in_
+    importances_attr: feature_importances_
+    optional: true
+metrics:
+  - name: classes
+    value_attr: classes_
+  - name: n_classes
+    value_attr: n_classes_
+  - name: n_features_in
+    value_attr: n_features_in_
+  - name: n_outputs
+    value_attr: n_outputs_
+  - name: oob_decision_function
+    value_attr: oob_decision_function_
+    optional: true
+  - name: oob_score
+    value_attr: oob_score_
+    optional: true
+parameters:
+  - name: bootstrap
+    value_attr: bootstrap
+  - name: ccp_alpha
+    value_attr: ccp_alpha
+  - name: class_weight
+    value_attr: class_weight
+  - name: criterion
+    value_attr: criterion
+  - name: max_depth
+    value_attr: max_depth
+  - name: max_features
+    value_attr: max_features
+  - name: min_impurity_decrease
+    value_attr: min_impurity_decrease
+  - name: max_leaf_nodes
+    value_attr: max_leaf_nodes
+  - name: max_samples
+    value_attr: max_samples
+  - name: min_samples_split
+    value_attr: min_samples_split
+  - name: min_samples_leaf
+    value_attr: min_samples_leaf
+  - name: min_weight_fraction_leaf
+    value_attr: min_weight_fraction_leaf
+  - name: n_estimators
+    value_attr: n_estimators
+  - name: oob_score
+    value_attr: oob_score
+  - name: random_state
+    value_attr: random_state
diff --git a/rubicon_ml/schema/schema/xgboost__DaskXGBClassifier.yaml b/rubicon_ml/schema/schema/xgboost__DaskXGBClassifier.yaml
new file mode 100644
index 00000000..ae54ebe1
--- /dev/null
+++ b/rubicon_ml/schema/schema/xgboost__DaskXGBClassifier.yaml
@@ -0,0 +1,9 @@
+name: xgboost__DaskXGBClassifier
+extends: xgboost__XGBModel
+version: 1.0.0
+
+compatibility:
+  xgboost:
+    max_version:
+    min_version: 1.7.0
+docs_url: https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.dask.DaskXGBClassifier
diff --git a/rubicon_ml/schema/schema/xgboost__DaskXGBRegressor.yaml b/rubicon_ml/schema/schema/xgboost__DaskXGBRegressor.yaml
new file mode 100644
index 00000000..be3e93e4
--- /dev/null
+++ b/rubicon_ml/schema/schema/xgboost__DaskXGBRegressor.yaml
@@ -0,0 +1,9 @@
+name: xgboost__DaskXGBRegressor
+extends: xgboost__XGBModel
+version: 1.0.0
+
+compatibility:
+  xgboost:
+    max_version:
+    min_version: 1.7.0
+docs_url: https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.dask.DaskXGBRegressor
diff --git a/rubicon_ml/schema/schema/xgboost__XGBClassifier.yaml b/rubicon_ml/schema/schema/xgboost__XGBClassifier.yaml
new file mode 100644
index 00000000..6d463629
--- /dev/null
+++ b/rubicon_ml/schema/schema/xgboost__XGBClassifier.yaml
@@ -0,0 +1,9 @@
+name: xgboost__XGBClassifier
+extends: xgboost__XGBModel
+version: 1.0.0
+
+compatibility:
+  xgboost:
+    max_version:
+    min_version: 1.7.0
+docs_url: https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.XGBClassifier
diff --git a/rubicon_ml/schema/schema/xgboost__XGBModel.yaml b/rubicon_ml/schema/schema/xgboost__XGBModel.yaml
new file mode 100644
index 00000000..74844434
--- /dev/null
+++ b/rubicon_ml/schema/schema/xgboost__XGBModel.yaml
@@ -0,0 +1,120 @@
+name: xgboost__XGBModel
+version: 1.0.0
+
+compatibility:
+  xgboost:
+    max_version:
+    min_version: 1.7.0
+docs_url: https://xgboost.readthedocs.io/en/stable/python/python_api.html
+
+artifacts:
+  - self
+  - name: booster
+    data_object_func: get_booster
+features: 
+  - names_attr: feature_names_in_
+    importances_attr: feature_importances_
+    optional: true
+metrics:
+  - name: best_iteration
+    value_attr: best_iteration
+    optional: true
+  - name: best_score
+    value_attr: best_score
+    optional: true
+  - name: coef_
+    value_attr: coef_
+    optional: true
+  - name: intercept_
+    value_attr: intercept_
+    optional: true
+  - name: n_features_in_
+    value_attr: n_features_in_
+  - name: evals_result
+    value_func: evals_result
+    optional: true
+  - name: num_boosting_rounds
+    value_func: get_num_boosting_rounds
+parameters:
+  - name: n_estimators
+    value_attr: n_estimators
+  - name: max_depth
+    value_attr: max_depth
+  - name: max_leaves
+    value_attr: max_leaves
+  - name: max_bin
+    value_attr: max_bin
+  - name: grow_policy
+    value_attr: grow_policy
+  - name: learning_rate
+    value_attr: learning_rate
+  - name: verbosity
+    value_attr: verbosity
+  - name: objective
+    value_attr: objective
+  - name: booster
+    value_attr: booster
+  - name: tree_method
+    value_attr: tree_method
+  - name: n_jobs
+    value_attr: n_jobs
+  - name: gamma
+    value_attr: gamma
+  - name: min_child_weight
+    value_attr: min_child_weight
+  - name: max_delta_step
+    value_attr: max_delta_step
+  - name: subsample
+    value_attr: subsample
+  - name: sampling_method
+    value_attr: sampling_method
+  - name: colsample_bytree
+    value_attr: colsample_bytree
+  - name: colsample_bylevel
+    value_attr: colsample_bylevel
+  - name: colsample_bynode
+    value_attr: colsample_bynode
+  - name: reg_alpha
+    value_attr: reg_alpha
+  - name: reg_lambda
+    value_attr: reg_lambda
+  - name: scale_pos_weight
+    value_attr: scale_pos_weight
+  - name: base_score
+    value_attr: base_score
+  - name: random_state
+    value_attr: random_state
+  - name: missing
+    value_attr: missing
+  - name: num_parallel_tree
+    value_attr: num_parallel_tree
+  - name: monotone_constraints
+    value_attr: monotone_constraints
+  - name: interaction_constraints
+    value_attr: interaction_constraints
+  - name: importance_type
+    value_attr: importance_type
+  - name: gpu_id
+    value_attr: gpu_id
+    optional: true  # removed in xgboost 2.0.0
+  - name: device
+    value_attr: device
+    optional: true  # added in xgboost 2.0.0
+  - name: validate_parameters
+    value_attr: validate_parameters
+  - name: predictor
+    value_attr: predictor
+    optional: true  # removed in xgboost 2.0.0
+  - name: enable_categorical
+    value_attr: enable_categorical
+  - name: max_cat_to_onehot
+    value_attr: max_cat_to_onehot
+  - name: max_cat_threshold
+    value_attr: max_cat_threshold
+  - name: multi-strategy
+    value_attr: multi_strategy
+    optional: true  # added in xgboost 2.0.0
+  - name: eval_metric
+    value_attr: eval_metric
+  - name: early_stopping_rounds
+    value_attr: early_stopping_rounds
diff --git a/rubicon_ml/schema/schema/xgboost__XGBRegressor.yaml b/rubicon_ml/schema/schema/xgboost__XGBRegressor.yaml
new file mode 100644
index 00000000..2092d152
--- /dev/null
+++ b/rubicon_ml/schema/schema/xgboost__XGBRegressor.yaml
@@ -0,0 +1,9 @@
+name: xgboost__XGBRegressor
+extends: xgboost__XGBModel
+version: 1.0.0
+
+compatibility:
+  xgboost:
+    max_version:
+    min_version: 1.7.0
+docs_url: https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.XGBRegressor
diff --git a/setup.cfg b/setup.cfg
index 6c941456..63a7dc09 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -110,6 +110,7 @@ deps =
 	pytest
 	pytest-cov
 	prefect
+	xgboost
 extras = 
 	all
 upgrade = 
diff --git a/tests/fixtures.py b/tests/fixtures.py
index 71579d1b..b3807f9b 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -1,17 +1,59 @@
 import os
 import random
+import uuid
 
+import dask.array as da
+import dask.dataframe as dd
 import numpy as np
 import pandas as pd
 import pytest
+from dask.distributed import Client
+from sklearn.datasets import make_classification
 
+from rubicon_ml import Rubicon
 from rubicon_ml.repository import MemoryRepository
 
 
-class MockCompletedProcess:
-    """Use to mock a CompletedProcess result from
-    `subprocess.run()`.
-    """
+class _AnotherObject:
+    """Another object to log for schema testing."""
+
+    def __init__(self):
+        self.another_parameter = 100
+        self.another_metric = 100
+
+
+class _ObjectToLog:
+    """An object to log for schema testing."""
+
+    def __init__(self):
+        """Initialize an object to log."""
+
+        self.object_ = _AnotherObject()
+        self.feature_names_ = ["var_001", "var_002"]
+        self.other_feature_names_ = ["var_003", "var_004"]
+        self.feature_importances_ = [0.75, 0.25]
+        self.feature_name_ = "var_005"
+        self.other_feature_name_ = "var_006"
+        self.feature_importance_ = 1.0
+        self.dataframe = pd.DataFrame([[100, 0], [0, 100]], columns=["x", "y"])
+        self.parameter = 100
+        self.metric = 100
+
+    def metric_function(self):
+        return self.metric
+
+    def artifact_function(self):
+        return self
+
+    def dataframe_function(self):
+        return pd.DataFrame([[100, 0], [0, 100]], columns=["x", "y"])
+
+    def erroring_function(self):
+        raise RuntimeError("raised from `_ObjectToLog.erroring_function`")
+
+
+class _MockCompletedProcess:
+    """Use to mock a CompletedProcess result from `subprocess.run()`."""
 
     def __init__(self, stdout="", returncode=0):
         self.stdout = stdout
@@ -20,12 +62,12 @@ def __init__(self, stdout="", returncode=0):
 
 @pytest.fixture
 def mock_completed_process_empty():
-    return MockCompletedProcess(stdout=b"\n")
+    return _MockCompletedProcess(stdout=b"\n")
 
 
 @pytest.fixture
 def mock_completed_process_git():
-    return MockCompletedProcess(stdout=b"origin github.com (fetch)\n")
+    return _MockCompletedProcess(stdout=b"origin github.com (fetch)\n")
 
 
 @pytest.fixture
@@ -250,3 +292,229 @@ def viz_experiments(rubicon_and_project_client):
         experiment.log_dataframe(data_df, name="test dataframe")
 
     return project.experiments()
+
+
+@pytest.fixture
+def objects_to_log():
+    """Returns objects for testing."""
+
+    return _ObjectToLog(), _AnotherObject()
+
+
+@pytest.fixture
+def another_object_schema():
+    """Returns a schema representing ``_AnotherObject``."""
+
+    return {
+        "parameters": [{"name": "another_parameter", "value_attr": "another_parameter"}],
+        "metrics": [{"name": "another_metric", "value_attr": "another_metric"}],
+    }
+
+
+@pytest.fixture
+def artifact_schema():
+    """Returns a schema for testing artifacts."""
+
+    return {
+        "artifacts": [
+            "self",
+            {"name": "object_", "data_object_attr": "object_"},
+            {"name": "object_b", "data_object_func": "artifact_function"},
+        ]
+    }
+
+
+@pytest.fixture
+def dataframe_schema():
+    """Returns a schema for testing dataframes."""
+
+    return {
+        "dataframes": [
+            {"name": "dataframe", "df_attr": "dataframe"},
+            {"name": "dataframe_b", "df_func": "dataframe_function"},
+        ]
+    }
+
+
+@pytest.fixture
+def feature_schema():
+    """Returns a schema for testing features."""
+
+    return {
+        "features": [
+            {
+                "names_attr": "feature_names_",
+                "importances_attr": "feature_importances_",
+            },
+            {"names_attr": "other_feature_names_"},
+            {"name_attr": "feature_name_", "importance_attr": "feature_importance_"},
+            {"name_attr": "other_feature_name_"},
+        ]
+    }
+
+
+@pytest.fixture
+def metric_schema():
+    """Returns a schema for testing metrics."""
+
+    return {
+        "metrics": [
+            {"name": "metric_a", "value_attr": "metric"},
+            {"name": "metric_b", "value_env": "METRIC"},
+            {"name": "metric_c", "value_func": "metric_function"},
+        ],
+    }
+
+
+@pytest.fixture
+def parameter_schema():
+    """Returns a schema for testing parameters."""
+
+    return {
+        "parameters": [
+            {"name": "parameter_a", "value_attr": "parameter"},
+            {"name": "parameter_b", "value_env": "PARAMETER"},
+        ],
+    }
+
+
+@pytest.fixture
+def nested_schema():
+    """Returns a schema for testing nested schema."""
+
+    return {"schema": [{"name": "AnotherObject", "attr": "object_"}]}
+
+
+@pytest.fixture
+def optional_schema():
+    """Returns a schema for testing optional attributes."""
+
+    return {
+        "artifacts": [
+            {
+                "name": "object",
+                "data_object_attr": "missing_object",
+                "optional": "true",
+            },
+            {
+                "name": "object_b",
+                "data_object_func": "missing_object_func",
+                "optional": "true",
+            },
+        ],
+        "dataframes": [
+            {"name": "dataframe", "df_attr": "missing_dataframe", "optional": "true"},
+            {
+                "name": "dataframe_b",
+                "df_func": "missing_dataframe_func",
+                "optional": "true",
+            },
+        ],
+        "features": [
+            {"names_attr": "missing_feature_names", "optional": "true"},
+            {"name_attr": "missing_feature_name", "optional": "true"},
+        ],
+        "metrics": [
+            {"name": "metric_a", "value_attr": "missing_metric", "optional": "true"},
+            {"name": "metric_b", "value_env": "MISSING_METRIC", "optional": "true"},
+            {
+                "name": "metric_c",
+                "value_func": "missing_metric_func",
+                "optional": "true",
+            },
+        ],
+        "parameters": [
+            {
+                "name": "parameter_a",
+                "value_attr": "missing_parameter",
+                "optional": "true",
+            },
+            {
+                "name": "parameter_b",
+                "value_env": "MISSING_PARAMETER",
+                "optional": "true",
+            },
+        ],
+        "schema": [
+            {
+                "name": "MissingObject",
+                "attr": "another_missing_object",
+                "optional": "true",
+            }
+        ],
+    }
+
+
+@pytest.fixture
+def hierarchical_schema():
+    """Returns a schema for testing hierarchical schema."""
+
+    return {"children": [{"name": "AnotherObject", "attr": "children"}]}
+
+
+@pytest.fixture
+def rubicon_project():
+    """Returns an in-memory rubicon project for testing."""
+
+    rubicon = Rubicon(persistence="memory", root_dir="/tmp")
+
+    random_name = str(uuid.uuid4())
+    return rubicon.create_project(name=random_name)
+
+
+@pytest.fixture
+def make_classification_array():
+    """Returns classification data generated by scikit-learn as an array."""
+
+    X, y = make_classification(
+        n_samples=1000,
+        n_features=10,
+        n_informative=5,
+        n_redundant=5,
+        n_classes=2,
+        class_sep=1,
+        random_state=3211,
+    )
+
+    return X, y
+
+
+@pytest.fixture
+def make_classification_df(make_classification_array):
+    """Returns classification data generated by scikit-learn as dataframes."""
+
+    X, y = make_classification_array
+    X_df = pd.DataFrame(X, columns=[f"var_{i}" for i in range(10)])
+
+    return X_df, y
+
+
+@pytest.fixture
+def dask_client():
+    """Returns a dask client and shuts it down upon test completion."""
+
+    client = Client()
+
+    yield client
+
+    client.shutdown()
+
+
+@pytest.fixture
+def make_classification_dask_array(make_classification_array):
+    """Returns classification data generated by scikit-learn as a dask array."""
+
+    X, y = make_classification_array
+    X_da, y_da = da.from_array(X), da.from_array(y)
+
+    return X_da, y_da
+
+
+@pytest.fixture
+def make_classification_dask_df(make_classification_df):
+    """Returns classification data generated by scikit-learn as dataframes."""
+
+    X, y = make_classification_df
+    X_df, y_da = dd.from_pandas(X, npartitions=1), da.from_array(y)
+
+    return X_df, y_da
diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py
new file mode 100644
index 00000000..065201f9
--- /dev/null
+++ b/tests/integration/test_schema.py
@@ -0,0 +1,60 @@
+import pytest
+from lightgbm import LGBMClassifier, LGBMRegressor
+from sklearn.ensemble import RandomForestClassifier
+from xgboost import XGBClassifier, XGBRegressor
+from xgboost.dask import DaskXGBClassifier, DaskXGBRegressor
+
+PANDAS_SCHEMA_CLS = [
+    LGBMClassifier,
+    LGBMRegressor,
+    RandomForestClassifier,
+    XGBClassifier,
+    XGBRegressor,
+]
+DASK_SCHEMA_CLS = [DaskXGBClassifier, DaskXGBRegressor]
+
+
+def _fit_and_log(X, y, schema_cls, rubicon_project):
+    model = schema_cls()
+    model.fit(X, y)
+
+    rubicon_project.log_with_schema(model)
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("schema_cls", PANDAS_SCHEMA_CLS)
+def test_estimator_schema_fit_array(schema_cls, make_classification_array, rubicon_project):
+    X, y = make_classification_array
+
+    _fit_and_log(X, y, schema_cls, rubicon_project)
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("schema_cls", PANDAS_SCHEMA_CLS)
+def test_estimator_schema_fit_df(schema_cls, make_classification_df, rubicon_project):
+    X, y = make_classification_df
+
+    _fit_and_log(X, y, schema_cls, rubicon_project)
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("schema_cls", DASK_SCHEMA_CLS)
+def test_estimator_schema_fit_dask_array(
+    schema_cls,
+    make_classification_dask_array,
+    rubicon_project,
+    dask_client,
+):
+    X_da, y_da = make_classification_dask_array
+
+    _fit_and_log(X_da, y_da, schema_cls, rubicon_project)
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("schema_cls", DASK_SCHEMA_CLS)
+def test_estimator_schema_fit_dask_df(
+    schema_cls, make_classification_dask_df, rubicon_project, dask_client
+):
+    X_df, y_da = make_classification_dask_df
+
+    _fit_and_log(X_df, y_da, schema_cls, rubicon_project)
diff --git a/tests/unit/schema/__init__.py b/tests/unit/schema/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unit/schema/test_schema_logger.py b/tests/unit/schema/test_schema_logger.py
new file mode 100644
index 00000000..0931a6d7
--- /dev/null
+++ b/tests/unit/schema/test_schema_logger.py
@@ -0,0 +1,355 @@
+"""Testing ``schema_logger``."""
+
+import os
+from copy import deepcopy
+from unittest import mock
+
+import pandas as pd
+import pytest
+
+from rubicon_ml.exceptions import RubiconException
+from rubicon_ml.schema import logger
+from rubicon_ml.schema.registry import RUBICON_SCHEMA_REGISTRY
+
+
+def test_safe_getattr_raises_error(objects_to_log):
+    """Testing ``_safe_getattr`` raises an error when not optional."""
+
+    object_to_log, _ = objects_to_log
+    missing_attr_name = "missing_attr"
+
+    with pytest.raises(AttributeError) as e:
+        logger._safe_getattr(
+            object_to_log,
+            missing_attr_name,
+            optional=False,
+        )
+
+    assert f"no attribute '{missing_attr_name}'" in str(e)
+
+
+def test_safe_call_func_raises_error(objects_to_log):
+    """Testing ``_safe_call_func`` raises an error when not optional."""
+
+    object_to_log, _ = objects_to_log
+    missing_func_name = "missing_func"
+
+    with pytest.raises(AttributeError) as e:
+        logger._safe_call_func(
+            object_to_log,
+            missing_func_name,
+            optional=False,
+        )
+
+    assert f"no attribute '{missing_func_name}'" in str(e)
+
+
+def test_safe_call_func_reraises_error(objects_to_log):
+    """Testing ``_safe_call_func`` reraises an error when not optional."""
+
+    object_to_log, _ = objects_to_log
+    erroring_func_name = "erroring_function"
+
+    with pytest.raises(Exception) as e:
+        logger._safe_call_func(
+            object_to_log,
+            erroring_func_name,
+            optional=False,
+        )
+
+    assert "raised from `_ObjectToLog.erroring_function`" in str(e)
+
+
+def test_safe_environ_raises_error(objects_to_log):
+    """Testing ``_safe_environ`` raises an error when not optional."""
+
+    object_to_log, _ = objects_to_log
+    missing_environ_name = "missing_environ"
+
+    with pytest.raises(RubiconException) as e:
+        logger._safe_environ(
+            missing_environ_name,
+            optional=False,
+        )
+
+    assert f"'{missing_environ_name}' not set" in str(e)
+
+
+def test_log_inferred_schema(objects_to_log, rubicon_project, another_object_schema):
+    """Testing ``Project.log_with_schema`` can log inferred schema."""
+
+    _, another_object = objects_to_log
+    schema_to_patch = {"tests___AnotherObject": lambda: another_object_schema}
+
+    with mock.patch.dict(RUBICON_SCHEMA_REGISTRY, schema_to_patch, clear=True):
+        experiment = rubicon_project.log_with_schema(another_object)
+
+    parameter = experiment.parameter(name=another_object_schema["parameters"][0]["name"])
+    metric = experiment.metric(name=another_object_schema["metrics"][0]["name"])
+
+    assert rubicon_project.schema_ == another_object_schema
+    assert parameter.value == getattr(
+        another_object,
+        another_object_schema["parameters"][0]["value_attr"],
+    )
+    assert metric.value == getattr(
+        another_object,
+        another_object_schema["metrics"][0]["value_attr"],
+    )
+
+
+def test_log_artifacts_with_schema(objects_to_log, rubicon_project, artifact_schema):
+    """Testing ``Project.log_with_schema`` can log artifacts."""
+
+    object_to_log, another_object = objects_to_log
+    object_b = object_to_log
+    otl_cls, ao_cls, obj_b_cls = (
+        object_to_log.__class__,
+        another_object.__class__,
+        object_b.__class__,
+    )
+
+    rubicon_project.set_schema(artifact_schema)
+    experiment = rubicon_project.log_with_schema(object_to_log)
+
+    otl_artifact = experiment.artifact(name=otl_cls.__name__)
+    ao_artifact = experiment.artifact(name=artifact_schema["artifacts"][1]["name"])
+    obj_b_artifact = experiment.artifact(name=artifact_schema["artifacts"][2]["name"])
+
+    assert isinstance(otl_artifact.get_data(unpickle=True), otl_cls)
+    assert isinstance(ao_artifact.get_data(unpickle=True), ao_cls)
+    assert isinstance(obj_b_artifact.get_data(unpickle=True), obj_b_cls)
+
+
+def test_log_dataframes_with_schema(objects_to_log, rubicon_project, dataframe_schema):
+    """Testing ``Project.log_with_schema`` can log dataframes."""
+
+    object_to_log, _ = objects_to_log
+
+    rubicon_project.set_schema(dataframe_schema)
+    experiment = rubicon_project.log_with_schema(object_to_log)
+
+    dataframe = experiment.dataframe(name=dataframe_schema["dataframes"][0]["name"])
+    dataframe_b = experiment.dataframe(name=dataframe_schema["dataframes"][1]["name"])
+
+    assert isinstance(dataframe.get_data(), pd.DataFrame)
+    assert isinstance(dataframe_b.get_data(), pd.DataFrame)
+    assert dataframe.get_data().equals(object_to_log.dataframe)
+    assert dataframe.get_data().equals(object_to_log.dataframe_function())
+
+
+def test_log_features_with_schema(objects_to_log, rubicon_project, feature_schema):
+    """Testing ``Project.log_with_schema`` can log features."""
+
+    object_to_log, _ = objects_to_log
+
+    rubicon_project.set_schema(feature_schema)
+    experiment = rubicon_project.log_with_schema(object_to_log)
+
+    expected_feature_names = getattr(object_to_log, feature_schema["features"][0]["names_attr"])
+    expected_feature_names.extend(
+        getattr(object_to_log, feature_schema["features"][1]["names_attr"])
+    )
+    expected_feature_names.append(
+        getattr(object_to_log, feature_schema["features"][2]["name_attr"])
+    )
+    expected_feature_names.append(
+        getattr(object_to_log, feature_schema["features"][3]["name_attr"])
+    )
+
+    expected_feature_importances = getattr(
+        object_to_log, feature_schema["features"][0].get("importances_attr")
+    )
+    expected_feature_importances.extend([None, None])
+    expected_feature_importances.append(
+        getattr(object_to_log, feature_schema["features"][2].get("importance_attr"))
+    )
+    expected_feature_importances.append(None)
+
+    for name, importance in zip(expected_feature_names, expected_feature_importances):
+        feature = experiment.feature(name=name)
+
+        assert feature.importance == importance
+
+
+def test_log_metrics_with_schema(objects_to_log, rubicon_project, metric_schema):
+    """Testing ``Project.log_with_schema`` can log metrics."""
+
+    object_to_log, _ = objects_to_log
+
+    rubicon_project.set_schema(metric_schema)
+
+    with mock.patch.dict(os.environ, {"METRIC": "metric env value"}, clear=True):
+        experiment = rubicon_project.log_with_schema(object_to_log)
+
+    metric_a = experiment.metric(name=metric_schema["metrics"][0]["name"])
+    metric_b = experiment.metric(name=metric_schema["metrics"][1]["name"])
+    metric_c = experiment.metric(name=metric_schema["metrics"][2]["name"])
+
+    assert metric_a.value == getattr(object_to_log, metric_schema["metrics"][0]["value_attr"])
+    assert metric_b.value == "metric env value"
+
+    method = getattr(object_to_log, metric_schema["metrics"][2]["value_func"])
+    assert metric_c.value == method()
+
+
+def test_log_parameters_with_schema(objects_to_log, rubicon_project, parameter_schema):
+    """Testing ``Project.log_with_schema`` can log parameters."""
+
+    object_to_log, _ = objects_to_log
+
+    rubicon_project.set_schema(parameter_schema)
+
+    with mock.patch.dict(os.environ, {"PARAMETER": "param env value"}, clear=True):
+        experiment = rubicon_project.log_with_schema(object_to_log)
+
+    parameter_a = experiment.parameter(name=parameter_schema["parameters"][0]["name"])
+    parameter_b = experiment.parameter(name=parameter_schema["parameters"][1]["name"])
+
+    assert parameter_a.value == getattr(
+        object_to_log, parameter_schema["parameters"][0]["value_attr"]
+    )
+    assert parameter_b.value == "param env value"
+
+
+def test_log_nested_schema(objects_to_log, rubicon_project, another_object_schema, nested_schema):
+    """Testing ``Project.log_with_schema`` can log nested schema."""
+
+    object_to_log, another_object = objects_to_log
+    schema_to_patch = {"AnotherObject": lambda: another_object_schema}
+
+    with mock.patch.dict(RUBICON_SCHEMA_REGISTRY, schema_to_patch, clear=True):
+        rubicon_project.set_schema(nested_schema)
+        experiment = rubicon_project.log_with_schema(object_to_log)
+
+    parameter = experiment.parameter(name=another_object_schema["parameters"][0]["name"])
+    metric = experiment.metric(name=another_object_schema["metrics"][0]["name"])
+
+    assert parameter.value == getattr(
+        another_object,
+        another_object_schema["parameters"][0]["value_attr"],
+    )
+    assert metric.value == getattr(
+        another_object,
+        another_object_schema["metrics"][0]["value_attr"],
+    )
+
+
+def test_log_extended_schema(objects_to_log, rubicon_project, another_object_schema):
+    """Testing ``Project.log_with_schema`` can log extended schema."""
+
+    _, another_object = objects_to_log
+
+    feature_name_attr = "extended_schema_feature"
+    feature_name_value = "extended schema feature"
+    setattr(another_object, feature_name_attr, feature_name_value)
+
+    schema_to_patch = {"AnotherObject": lambda: another_object_schema}
+    extended_schema = {
+        "extends": "AnotherObject",
+        "features": [{"name_attr": feature_name_attr}],
+    }
+
+    with mock.patch.dict(RUBICON_SCHEMA_REGISTRY, schema_to_patch, clear=True):
+        rubicon_project.set_schema(extended_schema)
+        experiment = rubicon_project.log_with_schema(another_object)
+
+    feature = experiment.feature(name=feature_name_value)
+    parameter = experiment.parameter(name=another_object_schema["parameters"][0]["name"])
+    metric = experiment.metric(name=another_object_schema["metrics"][0]["name"])
+
+    assert feature.name == feature_name_value
+    assert parameter.value == getattr(
+        another_object,
+        another_object_schema["parameters"][0]["value_attr"],
+    )
+    assert metric.value == getattr(
+        another_object,
+        another_object_schema["metrics"][0]["value_attr"],
+    )
+
+
+def test_log_optional_schema(objects_to_log, rubicon_project, optional_schema):
+    """Testing ``Project.log_with_schema`` can log optional schema."""
+
+    object_to_log, _ = objects_to_log
+    schema_to_patch = {"MissingObject": lambda: {}}
+
+    with mock.patch.dict(RUBICON_SCHEMA_REGISTRY, schema_to_patch, clear=True):
+        rubicon_project.set_schema(optional_schema)
+        experiment = rubicon_project.log_with_schema(object_to_log)
+
+    assert len(experiment.artifacts()) == 0
+    assert len(experiment.dataframes()) == 0
+    assert len(experiment.features()) == 0
+
+    assert len(experiment.parameters()) == 2
+    for parameter in experiment.parameters():
+        assert parameter.value is None
+
+    assert len(experiment.metrics()) == 3
+    for metric in experiment.metrics():
+        assert metric.value is None
+
+
+def test_log_with_children(
+    objects_to_log, rubicon_project, another_object_schema, hierarchical_schema
+):
+    """Testing ``Project.log_with_schema`` can log hierarchical schema."""
+
+    object_to_log, another_object = objects_to_log
+    schema_to_patch = {"AnotherObject": lambda: another_object_schema}
+
+    num_children = 4
+    object_to_log.children = [deepcopy(another_object) for _ in range(num_children)]
+
+    with mock.patch.dict(RUBICON_SCHEMA_REGISTRY, schema_to_patch, clear=True):
+        rubicon_project.set_schema(hierarchical_schema)
+        parent_experiment = rubicon_project.log_with_schema(object_to_log)
+
+    assert len(rubicon_project.experiments()) == 1 + num_children
+    assert len(rubicon_project.experiments(tags=["parent"])) == 1
+
+    child_experiments = rubicon_project.experiments(tags=["child"])
+    assert len(child_experiments) == num_children
+
+    for child_experiment in child_experiments:
+        assert f"parent_id:{parent_experiment.id}" in child_experiment.tags
+
+
+def test_log_with_schema_and_experiment_kwargs(
+    objects_to_log,
+    rubicon_project,
+    artifact_schema,
+):
+    """Testing ``Project.log_with_schema`` can log experiment kwargs."""
+
+    object_to_log, _ = objects_to_log
+
+    rubicon_project.set_schema(artifact_schema)
+    experiment = rubicon_project.log_with_schema(
+        object_to_log,
+        experiment_kwargs={"name": "name", "description": "description"},
+    )
+
+    assert experiment.name == "name"
+    assert experiment.description == "description"
+
+
+def test_log_with_schema_raises_error(objects_to_log, rubicon_project):
+    """Testing ``Project.log_with_schema`` rasies an error when no schema is set."""
+
+    object_to_log, _ = objects_to_log
+
+    with pytest.raises(ValueError) as err:
+        _ = rubicon_project.log_with_schema(object_to_log)
+
+    assert "No schema set and no schema could be inferred" in str(err)
+
+
+def test_set_schema(rubicon_project, artifact_schema):
+    """Testing ``Project.set_schema``."""
+
+    rubicon_project.set_schema(artifact_schema)
+
+    assert rubicon_project.schema_ == artifact_schema
diff --git a/tests/unit/schema/test_schema_registry.py b/tests/unit/schema/test_schema_registry.py
new file mode 100644
index 00000000..dcd1f865
--- /dev/null
+++ b/tests/unit/schema/test_schema_registry.py
@@ -0,0 +1,56 @@
+"""Testing ``schema.registry``."""
+
+import pytest
+
+from rubicon_ml.schema import registry
+
+
+def test_available_schema():
+    """Testing ``schema.registry.available_schema``."""
+
+    for schema_name in registry.available_schema():
+        assert schema_name in registry.RUBICON_SCHEMA_REGISTRY
+
+    assert len(registry.available_schema()) == len(registry.RUBICON_SCHEMA_REGISTRY)
+
+
+def test_get_schema():
+    """Testing ``schema.registry.get_schema``."""
+
+    for name, load_func in registry.RUBICON_SCHEMA_REGISTRY.items():
+        schema = registry.get_schema(name)
+
+        assert load_func() == schema
+
+
+def test_get_schema_raises_error():
+    """Testing ``schema.registry.get_schema`` raises an error when an invalid name is given."""
+
+    with pytest.raises(ValueError) as err:
+        registry.get_schema("InvalidSchemaClass")
+
+    assert "'InvalidSchemaClass' is not the name of an available rubicon schema." in str(err)
+
+
+def test_get_schema_name(objects_to_log):
+    """Testing ``schema.registry.get_schema_name``."""
+
+    object_to_log, _ = objects_to_log
+
+    schema_name = registry.get_schema_name(object_to_log)
+
+    assert schema_name == "tests___ObjectToLog"
+
+
+def test_register_schema():
+    """Testing ``schema.registry.register_schema``."""
+
+    schema_name = "c1-rubicon-schema__TestRegisterSchema"
+    schema = {"name": schema_name}
+
+    registry.register_schema(schema_name, schema)
+
+    assert schema_name in registry.RUBICON_SCHEMA_REGISTRY
+    assert registry.get_schema(schema_name) == schema
+
+    del registry.RUBICON_SCHEMA_REGISTRY[schema_name]