From 42a55140532a732dc34abff940a206179baba71f Mon Sep 17 00:00:00 2001
From: Thibault Derousseaux <6574550+tibdex@users.noreply.github.com>
Date: Tue, 4 Mar 2025 15:15:08 -0500
Subject: [PATCH] Add skeleton (#292)

---
 .github/workflows/test.yml                |   1 +
 Dockerfile                                |   5 +-
 app/__init__.py                           |   2 +-
 app/constants.py                          |  55 ------
 app/create_and_join_tables.py             |  54 +++---
 app/create_cubes.py                       |  79 ++++-----
 app/load_tables.py                        |  36 ++--
 app/skeleton.py                           | 128 ++++++++++++++
 app/util/__init__.py                      |   1 +
 app/util/skeleton/__init__.py             |  20 +++
 app/util/skeleton/_node.py                | 193 ++++++++++++++++++++++
 app/util/skeleton/column.py               |   9 +
 app/util/skeleton/contributors_count.py   |   1 +
 app/util/skeleton/fact_based_hierarchy.py |  24 +++
 app/util/skeleton/skeleton.py             | 132 +++++++++++++++
 pyproject.toml                            |   6 +-
 tests/docker/test_docker.py               |   9 +-
 tests/test_session.py                     |  26 +--
 uv.lock                                   |   2 +
 19 files changed, 618 insertions(+), 165 deletions(-)
 delete mode 100644 app/constants.py
 create mode 100644 app/skeleton.py
 create mode 100644 app/util/skeleton/__init__.py
 create mode 100644 app/util/skeleton/_node.py
 create mode 100644 app/util/skeleton/column.py
 create mode 100644 app/util/skeleton/contributors_count.py
 create mode 100644 app/util/skeleton/fact_based_hierarchy.py
 create mode 100644 app/util/skeleton/skeleton.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6e28673..84ae15b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -12,6 +12,7 @@ jobs:
       - uses: astral-sh/setup-uv@v3
         with:
           enable-cache: true
+          # Keep in sync with `Dockerfile`'s `builder`.
           version: "0.5.6"
       - run: uv python install 3.10
       - run: uv sync --locked
diff --git a/Dockerfile b/Dockerfile
index 1a35640..5470b54 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,7 @@
 # Inspired from https://github.com/astral-sh/uv-docker-example/blob/dee88a8c43be3b16b0ad58f0daee5eaee7e2157a/multistage.Dockerfile.
 
-FROM ghcr.io/astral-sh/uv:0.4.10-python3.10-bookworm-slim AS builder
+# Keep in sync with `.github/workflows/test.yml`.
+FROM ghcr.io/astral-sh/uv:0.5.6-python3.10-bookworm-slim AS builder
 
 ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
 
@@ -25,4 +26,4 @@ ENV PORT=80
 
 EXPOSE $PORT
 
-CMD ["python", "-u", "-m", "app"]
+CMD ["python", "-O", "-u", "-m", "app"]
diff --git a/app/__init__.py b/app/__init__.py
index 362b5f3..13c9190 100644
--- a/app/__init__.py
+++ b/app/__init__.py
@@ -1,3 +1,3 @@
 from .config import Config as Config
-from .constants import *  # noqa: F403
+from .skeleton import SKELETON as SKELETON
 from .start_app import start_app as start_app
diff --git a/app/constants.py b/app/constants.py
deleted file mode 100644
index 8f6278b..0000000
--- a/app/constants.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from enum import Enum
-
-
-class Table(Enum):
-    STATION_DETAILS = "Station details"
-    STATION_STATUS = "Station status"
-
-
-class StationDetailsTableColumn(Enum):
-    ID = "ID"
-    NAME = "Name"
-    DEPARTMENT = "Department"
-    CITY = "City"
-    POSTCODE = "Postcode"
-    STREET = "Street"
-    HOUSE_NUMBER = "House number"
-    CAPACITY = "Capacity"
-
-
-class StationStatusTableColumn(Enum):
-    STATION_ID = "Station ID"
-    BIKE_TYPE = "Bike type"
-    BIKES = "Bikes"
-
-
-class Cube(Enum):
-    STATION = "Station"
-
-
-class StationCubeHierarchy(Enum):
-    BIKE_TYPE = StationStatusTableColumn.BIKE_TYPE.value
-    LOCATION = "Location"
-    STATION = "Station"
-
-
-class StationCubeBikeTypeLevel(Enum):
-    BIKE_TYPE = StationCubeHierarchy.BIKE_TYPE.value
-
-
-class StationCubeLocationLevel(Enum):
-    DEPARTMENT = StationDetailsTableColumn.DEPARTMENT.value
-    CITY = StationDetailsTableColumn.CITY.value
-    POSTCODE = StationDetailsTableColumn.POSTCODE.value
-    STREET = StationDetailsTableColumn.STREET.value
-    HOUSE_NUMBER = StationDetailsTableColumn.HOUSE_NUMBER.value
-
-
-class StationCubeStationLevel(Enum):
-    NAME = StationDetailsTableColumn.NAME.value
-    ID = StationDetailsTableColumn.ID.value
-
-
-class StationCubeMeasure(Enum):
-    CAPACITY = StationDetailsTableColumn.CAPACITY.value
-    BIKES = StationStatusTableColumn.BIKES.value
diff --git a/app/create_and_join_tables.py b/app/create_and_join_tables.py
index 2d8746d..6fed3b2 100644
--- a/app/create_and_join_tables.py
+++ b/app/create_and_join_tables.py
@@ -1,52 +1,54 @@
 import atoti as tt
 
-from .constants import StationDetailsTableColumn, StationStatusTableColumn, Table
+from .skeleton import SKELETON
+from .util.skeleton import column
 
 
 def create_station_status_table(session: tt.Session, /) -> None:
+    skeleton = SKELETON.tables.STATION_STATUS
+    columns = skeleton.columns
     session.create_table(
-        Table.STATION_STATUS.value,
+        skeleton.name,
         data_types={
-            StationStatusTableColumn.STATION_ID.value: tt.LONG,
-            StationStatusTableColumn.BIKE_TYPE.value: tt.STRING,
-            StationStatusTableColumn.BIKES.value: tt.INT,
+            columns.STATION_ID.name: tt.LONG,
+            columns.BIKE_TYPE.name: tt.STRING,
+            columns.BIKES.name: tt.INT,
         },
         keys={
-            StationStatusTableColumn.STATION_ID.value,
-            StationStatusTableColumn.BIKE_TYPE.value,
+            columns.STATION_ID.name,
+            columns.BIKE_TYPE.name,
         },
     )
 
 
 def create_station_details_table(session: tt.Session, /) -> None:
+    skeleton = SKELETON.tables.STATION_DETAILS
+    columns = skeleton.columns
     session.create_table(
-        Table.STATION_DETAILS.value,
+        skeleton.name,
         data_types={
-            StationDetailsTableColumn.ID.value: tt.LONG,
-            StationDetailsTableColumn.NAME.value: tt.STRING,
-            StationDetailsTableColumn.DEPARTMENT.value: tt.STRING,
-            StationDetailsTableColumn.CITY.value: tt.STRING,
-            StationDetailsTableColumn.POSTCODE.value: tt.INT,
-            StationDetailsTableColumn.STREET.value: tt.STRING,
-            StationDetailsTableColumn.HOUSE_NUMBER.value: tt.STRING,
-            StationDetailsTableColumn.CAPACITY.value: tt.INT,
+            columns.ID.name: tt.LONG,
+            columns.NAME.name: tt.STRING,
+            columns.DEPARTMENT.name: tt.STRING,
+            columns.CITY.name: tt.STRING,
+            columns.POSTCODE.name: tt.INT,
+            columns.STREET.name: tt.STRING,
+            columns.HOUSE_NUMBER.name: tt.STRING,
+            columns.CAPACITY.name: tt.INT,
         },
-        default_values={StationDetailsTableColumn.POSTCODE.value: 0},
+        default_values={columns.POSTCODE.name: 0},
         keys={
-            StationDetailsTableColumn.ID.value,
+            columns.ID.name,
         },
     )
 
 
 def join_tables(session: tt.Session, /) -> None:
-    session.tables[Table.STATION_STATUS.value].join(
-        session.tables[Table.STATION_DETAILS.value],
-        session.tables[Table.STATION_STATUS.value][
-            StationStatusTableColumn.STATION_ID.value
-        ]
-        == session.tables[Table.STATION_DETAILS.value][
-            StationDetailsTableColumn.ID.value
-        ],
+    tables = SKELETON.tables
+    session.tables[tables.STATION_STATUS.key].join(
+        session.tables[tables.STATION_DETAILS.key],
+        column(session, tables.STATION_STATUS.columns.STATION_ID)
+        == column(session, tables.STATION_DETAILS.columns.ID),
     )
 
 
diff --git a/app/create_cubes.py b/app/create_cubes.py
index c16650e..e041d2f 100644
--- a/app/create_cubes.py
+++ b/app/create_cubes.py
@@ -1,69 +1,48 @@
 import atoti as tt
 
-from .constants import (
-    Cube,
-    StationCubeBikeTypeLevel,
-    StationCubeHierarchy,
-    StationCubeLocationLevel,
-    StationCubeMeasure,
-    StationCubeStationLevel,
-    StationDetailsTableColumn,
-    StationStatusTableColumn,
-    Table,
-)
+from .skeleton import SKELETON
+from .util.skeleton import column, fact_based_hierarchy
 
 
 def create_station_cube(session: tt.Session, /) -> None:
-    station_details_table = session.tables[Table.STATION_DETAILS.value]
-    station_status_table = session.tables[Table.STATION_STATUS.value]
+    tables = SKELETON.tables
+    skeleton = SKELETON.cubes.STATION
 
-    cube = session.create_cube(station_status_table, Cube.STATION.value, mode="manual")
+    cube = session.create_cube(
+        session.tables[tables.STATION_STATUS.key],
+        skeleton.name,
+        mode="manual",
+    )
     h, l, m = cube.hierarchies, cube.levels, cube.measures
 
     h.update(
-        {
-            StationCubeHierarchy.BIKE_TYPE.value: {
-                StationCubeBikeTypeLevel.BIKE_TYPE.value: station_status_table[
-                    StationStatusTableColumn.BIKE_TYPE.value
+        dict(
+            [
+                fact_based_hierarchy(session, hierarchy)
+                for hierarchy in [
+                    skeleton.dimensions.STATION_STATUS.hierarchies.BIKE_TYPE,
+                    skeleton.dimensions.STATION_DETAILS.hierarchies.LOCATION,
+                    skeleton.dimensions.STATION_DETAILS.hierarchies.STATION,
                 ]
-            },
-            StationCubeHierarchy.LOCATION.value: {
-                StationCubeLocationLevel.DEPARTMENT.value: station_details_table[
-                    StationDetailsTableColumn.DEPARTMENT.value
-                ],
-                StationCubeLocationLevel.CITY.value: station_details_table[
-                    StationDetailsTableColumn.CITY.value
-                ],
-                StationCubeLocationLevel.POSTCODE.value: station_details_table[
-                    StationDetailsTableColumn.POSTCODE.value
-                ],
-                StationCubeLocationLevel.STREET.value: station_details_table[
-                    StationDetailsTableColumn.STREET.value
-                ],
-                StationCubeLocationLevel.HOUSE_NUMBER.value: station_details_table[
-                    StationDetailsTableColumn.HOUSE_NUMBER.value
-                ],
-            },
-            StationCubeHierarchy.STATION.value: {
-                StationCubeStationLevel.NAME.value: station_details_table[
-                    StationDetailsTableColumn.NAME.value
-                ],
-                StationCubeStationLevel.ID.value: station_status_table[
-                    StationStatusTableColumn.STATION_ID.value
-                ],
-            },
-        }
+            ]
+        )
     )
 
     with session.data_model_transaction():
-        m[StationCubeMeasure.BIKES.value] = tt.agg.sum(
-            station_status_table[StationStatusTableColumn.BIKES.value]
+        m[skeleton.measures.BIKES.key] = tt.agg.sum(
+            column(session, tables.STATION_STATUS.columns.BIKES)
         )
-        m[StationCubeMeasure.CAPACITY.value] = tt.agg.sum(
+        m[skeleton.measures.CAPACITY.key] = tt.agg.sum(
             tt.agg.single_value(
-                station_details_table[StationDetailsTableColumn.CAPACITY.value]
+                column(session, tables.STATION_DETAILS.columns.CAPACITY)
+            ),
+            scope=tt.OriginScope(
+                {
+                    l[
+                        skeleton.dimensions.STATION_DETAILS.hierarchies.STATION.levels.ID.key
+                    ]
+                }
             ),
-            scope=tt.OriginScope({l[StationCubeStationLevel.ID.value]}),
         )
 
 
diff --git a/app/load_tables.py b/app/load_tables.py
index 0bc3793..c1db43b 100644
--- a/app/load_tables.py
+++ b/app/load_tables.py
@@ -9,7 +9,7 @@
 from pydantic import HttpUrl
 
 from .config import Config
-from .constants import StationDetailsTableColumn, StationStatusTableColumn, Table
+from .skeleton import SKELETON
 from .util import read_json, reverse_geocode
 
 
@@ -19,6 +19,7 @@ async def read_station_details(
     reverse_geocoding_path: HttpUrl | Path,
     velib_data_base_path: HttpUrl | Path,
 ) -> pd.DataFrame:
+    columns = SKELETON.tables.STATION_DETAILS.columns
     stations_data: Any = cast(
         Any,
         await read_json(
@@ -31,9 +32,9 @@ async def read_station_details(
         ["station_id", "name", "capacity", "lat", "lon"]
     ].rename(
         columns={
-            "station_id": StationDetailsTableColumn.ID.value,
-            "name": StationDetailsTableColumn.NAME.value,
-            "capacity": StationDetailsTableColumn.CAPACITY.value,
+            "station_id": columns.ID.name,
+            "name": columns.NAME.name,
+            "capacity": columns.CAPACITY.name,
             "lat": "latitude",
             "lon": "longitude",
         }
@@ -52,11 +53,11 @@ async def read_station_details(
         coordinates, reverse_geocoding_path=reverse_geocoding_path
     ).rename(
         columns={
-            "department": StationDetailsTableColumn.DEPARTMENT.value,
-            "city": StationDetailsTableColumn.CITY.value,
-            "postcode": StationDetailsTableColumn.POSTCODE.value,
-            "street": StationDetailsTableColumn.STREET.value,
-            "house_number": StationDetailsTableColumn.HOUSE_NUMBER.value,
+            "department": columns.DEPARTMENT.name,
+            "city": columns.CITY.name,
+            "postcode": columns.POSTCODE.name,
+            "street": columns.STREET.name,
+            "house_number": columns.HOUSE_NUMBER.name,
         }
     )
 
@@ -71,6 +72,7 @@ async def read_station_status(
     *,
     http_client: httpx.AsyncClient,
 ) -> pd.DataFrame:
+    columns = SKELETON.tables.STATION_STATUS.columns
     stations_data = cast(
         Any,
         await read_json(
@@ -89,11 +91,9 @@ async def read_station_status(
             bike_type, bikes = next(iter(num_bikes_available_types.items()))
             station_statuses.append(
                 {
-                    StationStatusTableColumn.STATION_ID.value: station_status[
-                        "station_id"
-                    ],
-                    StationStatusTableColumn.BIKE_TYPE.value: bike_type,
-                    StationStatusTableColumn.BIKES.value: bikes,
+                    columns.STATION_ID.name: station_status["station_id"],
+                    columns.BIKE_TYPE.name: bike_type,
+                    columns.BIKES.name: bikes,
                 }
             )
     return pd.DataFrame(station_statuses)
@@ -120,6 +120,10 @@ async def load_tables(
 
     with session.tables.data_transaction():
         await asyncio.gather(
-            session.tables[Table.STATION_DETAILS.value].load_async(station_details_df),
-            session.tables[Table.STATION_STATUS.value].load_async(station_status_df),
+            session.tables[SKELETON.tables.STATION_DETAILS.key].load_async(
+                station_details_df
+            ),
+            session.tables[SKELETON.tables.STATION_STATUS.key].load_async(
+                station_status_df
+            ),
         )
diff --git a/app/skeleton.py b/app/skeleton.py
new file mode 100644
index 0000000..06f9143
--- /dev/null
+++ b/app/skeleton.py
@@ -0,0 +1,128 @@
+from .util.skeleton import (
+    Column,
+    Columns,
+    Cube,
+    Cubes,
+    Dimension,
+    Dimensions,
+    Hierarchies,
+    Hierarchy,
+    Level,
+    Levels,
+    Measure,
+    Measures,
+    Skeleton,
+    Table,
+    Tables,
+)
+
+
+class _StationDetailsTableColumns(Columns):
+    ID = Column("ID")
+    NAME = Column("Name")
+    DEPARTMENT = Column("Department")
+    CITY = Column("City")
+    POSTCODE = Column("Postcode")
+    STREET = Column("Street")
+    HOUSE_NUMBER = Column("House number")
+    CAPACITY = Column("Capacity")
+
+
+class _StationDetailsTable(Table):
+    name = "Station details"
+    columns = _StationDetailsTableColumns()
+
+
+class _StationStatusTableColumns(Columns):
+    STATION_ID = Column("Station ID")
+    BIKE_TYPE = Column("Bike type")
+    BIKES = Column("Bikes")
+
+
+class _StationStatusTable(Table):
+    name = "Station status"
+    columns = _StationStatusTableColumns()
+
+
+class _Tables(Tables):
+    STATION_DETAILS = _StationDetailsTable()
+    STATION_STATUS = _StationStatusTable()
+
+
+class _StationCubeStationDetailsDimensionLocationHierarchyLevels(Levels):
+    DEPARTMENT = Level(_StationDetailsTableColumns.DEPARTMENT)
+    CITY = Level(_StationDetailsTableColumns.CITY)
+    POSTCODE = Level(_StationDetailsTableColumns.POSTCODE)
+    STREET = Level(_StationDetailsTableColumns.STREET)
+    HOUSE_NUMBER = Level(_StationDetailsTableColumns.HOUSE_NUMBER)
+
+
+class _StationCubeStationDetailsDimensionLocationHierarchy(Hierarchy):
+    name = "Location"
+    levels = _StationCubeStationDetailsDimensionLocationHierarchyLevels()
+
+
+class _StationCubeStationDetailsDimensionStationHierarchyLevels(Levels):
+    NAME = Level(_StationDetailsTableColumns.NAME)
+    ID = Level(_StationDetailsTableColumns.ID)
+
+
+class _StationCubeStationDetailsDimensionStationHierarchy(Hierarchy):
+    name = "Station"
+    levels = _StationCubeStationDetailsDimensionStationHierarchyLevels()
+
+
+class _StationCubeStationDetailsDimensionHierarchies(Hierarchies):
+    LOCATION = _StationCubeStationDetailsDimensionLocationHierarchy()
+    STATION = _StationCubeStationDetailsDimensionStationHierarchy()
+
+
+class _StationCubeStationDetailsDimension(Dimension):
+    name = _StationDetailsTable.name
+    hierarchies = _StationCubeStationDetailsDimensionHierarchies()
+
+
+class _StationCubeStationStatusDimensionBikeTypeHierarchyLevels(Levels):
+    BIKE_TYPE = Level(_StationStatusTableColumns.BIKE_TYPE)
+
+
+class _StationCubeStationStatusDimensionBikeTypeHierarchy(Hierarchy):
+    name = _StationStatusTableColumns.BIKE_TYPE.name
+    levels = _StationCubeStationStatusDimensionBikeTypeHierarchyLevels()
+
+
+class _StationCubeStationStatusDimensionHierarchies(Hierarchies):
+    BIKE_TYPE = _StationCubeStationStatusDimensionBikeTypeHierarchy()
+
+
+class _StationCubeStationStatusDimension(Dimension):
+    name = _StationStatusTable.name
+    hierarchies = _StationCubeStationStatusDimensionHierarchies()
+
+
+class _StationCubeDimensions(Dimensions):
+    STATION_DETAILS = _StationCubeStationDetailsDimension()
+    STATION_STATUS = _StationCubeStationStatusDimension()
+
+
+class _StationCubeMeasures(Measures):
+    CAPACITY = Measure(_StationDetailsTableColumns.CAPACITY.name)
+    BIKES = Measure(_StationStatusTableColumns.BIKES.name)
+
+
+class _StationCube(Cube):
+    name = "Station"
+    dimensions = _StationCubeDimensions()
+    measures = _StationCubeMeasures()
+
+
+class _Cubes(Cubes):
+    STATION = _StationCube()
+
+
+class _Skeleton(Skeleton):
+    cubes = _Cubes()
+    tables = _Tables()
+
+
+SKELETON = _Skeleton()
diff --git a/app/util/__init__.py b/app/util/__init__.py
index 5bee464..4f592a3 100644
--- a/app/util/__init__.py
+++ b/app/util/__init__.py
@@ -1,3 +1,4 @@
+from . import skeleton as skeleton
 from .normalize_postgres_dsn_for_atoti_jdbc import (
     normalize_postgres_dsn_for_atoti_jdbc as normalize_postgres_dsn_for_atoti_jdbc,
 )
diff --git a/app/util/skeleton/__init__.py b/app/util/skeleton/__init__.py
new file mode 100644
index 0000000..36d1dfd
--- /dev/null
+++ b/app/util/skeleton/__init__.py
@@ -0,0 +1,20 @@
+from .column import column as column
+from .contributors_count import CONTRIBUTORS_COUNT as CONTRIBUTORS_COUNT
+from .fact_based_hierarchy import fact_based_hierarchy as fact_based_hierarchy
+from .skeleton import (
+    Column as Column,
+    Columns as Columns,
+    Cube as Cube,
+    Cubes as Cubes,
+    Dimension as Dimension,
+    Dimensions as Dimensions,
+    Hierarchies as Hierarchies,
+    Hierarchy as Hierarchy,
+    Level as Level,
+    Levels as Levels,
+    Measure as Measure,
+    Measures as Measures,
+    Skeleton as Skeleton,
+    Table as Table,
+    Tables as Tables,
+)
diff --git a/app/util/skeleton/_node.py b/app/util/skeleton/_node.py
new file mode 100644
index 0000000..d2bc974
--- /dev/null
+++ b/app/util/skeleton/_node.py
@@ -0,0 +1,193 @@
+"""Module containing the node classes required to build a skeleton.
+
+The code is dense and makes heavy use of reflection but expected invariants are enforced by many assertions.
+
+This module does not need to be modified to add new nodes to the skeleton.
+"""
+
+from abc import ABC
+from typing import (
+    ClassVar,
+    Final,
+    Generic,
+    TypeVar,
+    cast,
+    final,
+    get_args,
+    get_origin,
+)
+
+from typing_extensions import TypeVarTuple, Unpack, get_original_bases, override
+
+
+def _camel_from_pascal(name: str, /) -> str:
+    match len(name):
+        case 0:
+            return ""
+        case 1:
+            return name.lower()
+        case _:
+            return f"{name[0].lower()}{name[1:]}"
+
+
+def _is_private(name: str, /) -> bool:
+    return name.startswith("_")
+
+
+_KeyT_co = TypeVar("_KeyT_co", bound=str | tuple[str, ...], covariant=True)
+
+_ChildT = TypeVar("_ChildT")
+_HeterogeneousChildT = TypeVarTuple("_HeterogeneousChildT")
+
+_LEAF_NODE_CLASS_NAME = "LeafNode"
+
+
+class HeterogeneousNode(Generic[_KeyT_co, Unpack[_HeterogeneousChildT]], ABC):
+    _child_types: tuple[Unpack[_HeterogeneousChildT]]
+    _key_length: ClassVar[int]
+    _path: tuple[str, ...] | None = None
+    name: str
+
+    @override
+    def __init_subclass__(cls) -> None:
+        super().__init_subclass__()
+
+        # Cannot directly reference `LeafNode` at this time.
+        if cls.__name__ == _LEAF_NODE_CLASS_NAME:
+            return
+
+        (orig_base,) = cls.__orig_bases__  # type: ignore[attr-defined]
+        key_type, *child_types = get_args(orig_base)
+
+        if get_origin(orig_base).__name__ == _LEAF_NODE_CLASS_NAME:
+            assert not child_types, (
+                f"Expected leaf {cls.__name__} to not have children but got {child_types}."
+            )
+            cls._child_types = ()
+        else:
+            assert child_types, (
+                f"Expected non-leaf {cls.__name__} to have children but got none."
+            )
+            cls._child_types = tuple(child_types)
+
+            if HeterogeneousNode not in cls.__bases__:
+                for child_type in cls._child_types:
+                    child = cls._child(child_type)
+                    assert isinstance(child, HomogeneousNode | None), (
+                        f"Expected {cls.__name__}'s {child_type.__name__} to be an {HomogeneousNode.__name__} but got {type(child).__name__}."
+                    )
+
+        attribute_names = {
+            name
+            for name in dir(cls)
+            if not _is_private(name) and name not in {"key", "name"}
+        }
+        if (
+            _LEAF_NODE_CLASS_NAME in {base.__name__ for base in cls.__bases__}
+        ) or HeterogeneousNode in cls.__bases__:
+            assert not attribute_names, (
+                f"Expected {cls.__name__} to have no attributes but got {attribute_names}."
+            )
+        else:
+            unexpected_attribute_names = attribute_names - {
+                _camel_from_pascal(child_type.__name__)
+                for child_type in cls._child_types
+            }
+            assert not unexpected_attribute_names, (
+                f"{cls.__name__} has some unexpected attributes: {attribute_names}."
+            )
+
+        if key_type is str:
+            cls._key_length = 1
+        else:
+            assert get_origin(key_type) is tuple
+            key_length = len(get_args(key_type))
+            degenerated_tuple_length = 1
+            assert key_length == 0 or key_length > degenerated_tuple_length, (
+                "Use `str` instead of `tuple[str]`."
+            )
+            cls._key_length = key_length
+
+    @final
+    @classmethod
+    def _child(cls, child_type: type[_ChildT], /) -> _ChildT | None:
+        attribute_name = _camel_from_pascal(child_type.__name__)
+        child = getattr(cls, attribute_name, None)
+        assert isinstance(child, child_type | None), (
+            f"Expected {cls.__name__}.{attribute_name} to be a {child_type.__name__} but got {type(child).__name__}."
+        )
+        return child
+
+    @final
+    @property
+    def key(self) -> _KeyT_co:
+        assert self._path is not None, (
+            f"The `_path` of the {type(self).__name__} named `{self.name}` should have been set by now."
+        )
+        match self._key_length:
+            case 0:
+                return cast(_KeyT_co, ())
+            case 1:
+                return cast(_KeyT_co, self._path[-1])
+            case key_length:
+                return cast(_KeyT_co, self._path[-key_length:])
+
+    @final
+    def _set_path(self, *, parent_path: tuple[str, ...]) -> None:
+        self_part = () if self.name is None else (self.name,)
+        self._path = (*parent_path, *self_part)
+        for child_type in self._child_types:
+            assert isinstance(child_type, type)
+            assert issubclass(child_type, HomogeneousNode)
+            child = self._child(child_type)
+            if child is not None:
+                child._set_path(parent_path=self._path)  # noqa: SLF001
+
+
+class LeafNode(HeterogeneousNode[_KeyT_co]):
+    def __init__(self, name: str, /) -> None:
+        super().__init__()
+        self.name: Final = name
+
+
+assert LeafNode.__name__ == _LEAF_NODE_CLASS_NAME
+
+_HomogenousChildT = TypeVar("_HomogenousChildT")
+
+
+class HomogeneousNode(Generic[_HomogenousChildT], ABC):
+    _child_type: type[_HomogenousChildT]
+
+    @override
+    def __init_subclass__(cls) -> None:
+        super().__init_subclass__()
+
+        if HomogeneousNode not in cls.__bases__:
+            return
+
+        (original_base,) = get_original_bases(cls)
+        assert get_origin(original_base) is HomogeneousNode
+        (child_type,) = get_args(original_base)
+        assert issubclass(child_type, HeterogeneousNode)
+        cls._child_type = child_type
+
+    @final
+    @classmethod
+    def _children(cls) -> dict[str, _HomogenousChildT]:
+        children: dict[str, _HomogenousChildT] = {}
+        for name, value in vars(cls).items():
+            if _is_private(name):
+                continue
+            assert isinstance(value, cls._child_type), (
+                f"Expected {cls.__name__}.{name} to be a {cls._child_type.__name__} but got {type(value).__name__}."
+            )
+            children[name] = value
+        return children
+
+    @final
+    def _set_path(self, *, parent_path: tuple[str, ...]) -> None:
+        for value in self._children().values():
+            assert isinstance(value, HeterogeneousNode), (
+                f"Expected {type(value).__name__} to be an {HeterogeneousNode.__name__}."
+            )
+            value._set_path(parent_path=parent_path)  # noqa: SLF001
diff --git a/app/util/skeleton/column.py b/app/util/skeleton/column.py
new file mode 100644
index 0000000..720aa51
--- /dev/null
+++ b/app/util/skeleton/column.py
@@ -0,0 +1,9 @@
+import atoti as tt
+
+from .skeleton import Column as Column
+
+
+def column(session: tt.Session, column: Column, /) -> tt.Column:
+    """Atoti has :attr:`atoti.Cube.levels` but no `Tables.columns`, this is the next-best thing."""
+    table_name, column_name = column.key
+    return session.tables[table_name][column_name]
diff --git a/app/util/skeleton/contributors_count.py b/app/util/skeleton/contributors_count.py
new file mode 100644
index 0000000..5adfdec
--- /dev/null
+++ b/app/util/skeleton/contributors_count.py
@@ -0,0 +1 @@
+CONTRIBUTORS_COUNT = "contributors.COUNT"
diff --git a/app/util/skeleton/fact_based_hierarchy.py b/app/util/skeleton/fact_based_hierarchy.py
new file mode 100644
index 0000000..d0cf4a1
--- /dev/null
+++ b/app/util/skeleton/fact_based_hierarchy.py
@@ -0,0 +1,24 @@
+import atoti as tt
+
+from .column import column
+from .skeleton import Column, Hierarchy, Level, Levels
+
+
+def _column(level: Level, /) -> Column:
+    column = level._column  # noqa: SLF001
+    assert column is not None, (
+        f"Cannot use `{fact_based_hierarchy.__name__}()` with a hierarchy with level `{level.name}` not based on a column."
+    )
+    return column
+
+
+def fact_based_hierarchy(
+    session: tt.Session, hierarchy: Hierarchy, /
+) -> tuple[tuple[str, str], dict[str, tt.Column]]:
+    """Return the definition of a hierarchy for which all levels are based on columns."""
+    levels = hierarchy._child(Levels)  # noqa: SLF001
+    assert levels is not None
+    return hierarchy.key, {
+        level.name: column(session, _column(level))
+        for level in levels._children().values()  # noqa: SLF001
+    }
diff --git a/app/util/skeleton/skeleton.py b/app/util/skeleton/skeleton.py
new file mode 100644
index 0000000..f1ef2bd
--- /dev/null
+++ b/app/util/skeleton/skeleton.py
@@ -0,0 +1,132 @@
+from typing import Final, final
+
+from typing_extensions import override
+
+from ._node import HeterogeneousNode, HomogeneousNode, LeafNode
+
+
+@final
+class Column(LeafNode[tuple[str, str]]): ...
+
+
+class Columns(HomogeneousNode[Column]): ...
+
+
+class Table(HeterogeneousNode[str, Columns]): ...
+
+
+class Tables(HomogeneousNode[Table]): ...
+
+
+@final
+class Level(LeafNode[tuple[str, str, str]]):
+    _column: Column | None = None
+
+    def __init__(self, column_or_name: Column | str, /) -> None:
+        match column_or_name:
+            case Column() as column:
+                super().__init__("__pending__")
+                self._column = column
+            case str() as name:
+                super().__init__(name)
+
+    @override  # type: ignore[misc]
+    def _set_path(self, *, parent_path: tuple[str, ...]) -> None:
+        if self._column is not None:
+            assert self._column.key is not None, (
+                "The column key should have been set by now."
+            )
+            self.name = self._column.name  # type: ignore[misc]
+
+        super()._set_path(parent_path=parent_path)
+
+
+class Levels(HomogeneousNode[Level]): ...
+
+
+class Hierarchy(HeterogeneousNode[tuple[str, str], Levels]): ...
+
+
+class Hierarchies(HomogeneousNode[Hierarchy]): ...
+
+
+class Dimension(HeterogeneousNode[str, Hierarchies]): ...
+
+
+class Dimensions(HomogeneousNode[Dimension]): ...
+
+
+@final
+class Measure(LeafNode[str]): ...
+
+
+class Measures(HomogeneousNode[Measure]): ...
+
+
+class Cube(HeterogeneousNode[str, Dimensions, Measures]): ...
+
+
+class Cubes(HomogeneousNode[Cube]): ...
+
+
+class Skeleton(
+    HeterogeneousNode[
+        tuple[()],
+        # Before `Cubes` so that a `Level` referencing a `Column` can access the column's `_path`.
+        Tables,
+        Cubes,
+    ]
+):
+    """The skeleton of a data model.
+
+    It mirrors the structure of the data model but only declares the parent/child relationship between nodes and the name of each node.
+
+    Note:
+        Attaching other information to the skeleton is discouraged because this will end up duplicating the data model API already provided by Atoti.
+        For instance, it is discouraged to add a ``data_type`` attribute to ``Column``, or a ``keys`` attribute to ``Table``.
+
+    Skeletons scale well to large data models because IDEs can inspect them statically and thus offer:
+
+    * Autocompletion
+    * "Find all references"
+    * "Go to definition"
+    * Type checking
+    * Dead code detection
+
+    When instantiated, the skeleton will propagate the path from the root (i.e. this class) to all the nodes, providing easy access to unambiguous keys:
+
+    >>> class _MyCubeFooDimensionBarHierarchyLevels(Levels):
+    ...     BAZ = Level("baz")
+    >>> class _MyCubeFooDimensionBarHierarchy(Hierarchy):
+    ...     name = "bar"
+    ...     levels = _MyCubeFooDimensionBarHierarchyLevels()
+    >>> class _MyCubeFooDimensionHierarchies(Hierarchies):
+    ...     BAR = _MyCubeFooDimensionBarHierarchy()
+    >>> class _MyCubeFooDimension(Dimension):
+    ...     name = "foo"
+    ...     hierarchies = _MyCubeFooDimensionHierarchies()
+    >>> class _MyCubeDimensions(Dimensions):
+    ...     FOO = _MyCubeFooDimension()
+    >>> class _MyCube(Cube):
+    ...     name = "my cube"
+    ...     dimensions = _MyCubeDimensions()
+    >>> class _Cubes(Cubes):
+    ...     MY_CUBE = _MyCube()
+    >>> class _Skeleton(Skeleton):
+    ...     cubes = _Cubes()
+    >>> SKELETON = _Skeleton()
+    >>> SKELETON.cubes.MY_CUBE.dimensions.FOO.key
+    'foo'
+    >>> SKELETON.cubes.MY_CUBE.dimensions.FOO.hierarchies.BAR.key
+    ('foo', 'bar')
+    >>> SKELETON.cubes.MY_CUBE.dimensions.FOO.hierarchies.BAR.levels.BAZ.key
+    ('foo', 'bar', 'baz')
+
+    This works well with :func:`atoti.mapping_lookup` when ``check=False`` since that mode requires unambiguous keys.
+
+    """
+
+    name: Final = "__root__"
+
+    def __init__(self) -> None:
+        self._set_path(parent_path=())
diff --git a/pyproject.toml b/pyproject.toml
index 22f6e62..0885a6a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,6 +8,7 @@ dependencies = [
     "pandas",
     "pydantic",
     "pydantic-settings",
+    "typing-extensions",
 ]
 
 [tool.mypy]
@@ -22,11 +23,14 @@ module = ["docker", "docker.*"]
 ignore_missing_imports = true
 
 [tool.pytest.ini_options]
-addopts = "--strict-markers"
+addopts = "--doctest-modules --strict-markers"
 asyncio_default_fixture_loop_scope = "session"
 asyncio_mode = "auto"
 filterwarnings = ["error"]
 
+[tool.ruff.format]
+docstring-code-format = true
+
 [tool.ruff.lint]
 ignore = [
     "COM812",  # Covered by the formatter.
diff --git a/tests/docker/test_docker.py b/tests/docker/test_docker.py
index 3bf7938..4ae5ff4 100644
--- a/tests/docker/test_docker.py
+++ b/tests/docker/test_docker.py
@@ -1,11 +1,12 @@
 import atoti as tt
 
-from app import Cube
+from app import SKELETON
+from app.util.skeleton import CONTRIBUTORS_COUNT
 
 
 def test_session_inside_docker_container(
     session_inside_docker_container: tt.Session,
 ) -> None:
-    cube = session_inside_docker_container.cubes[Cube.STATION.value]
-    result_df = cube.query(cube.measures["contributors.COUNT"])
-    assert result_df["contributors.COUNT"][0] > 0
+    cube = session_inside_docker_container.cubes[SKELETON.cubes.STATION.key]
+    result_df = cube.query(cube.measures[CONTRIBUTORS_COUNT])
+    assert result_df[CONTRIBUTORS_COUNT][0] > 0
diff --git a/tests/test_session.py b/tests/test_session.py
index 8e9cc6c..3544ebc 100644
--- a/tests/test_session.py
+++ b/tests/test_session.py
@@ -1,16 +1,16 @@
 import atoti as tt
 import pandas as pd
 
-from app import Cube, StationCubeLocationLevel, StationCubeMeasure
+from app import SKELETON
+from app.util.skeleton import CONTRIBUTORS_COUNT
 
 
 def test_total_capacity(session: tt.Session) -> None:
-    station_cube = session.cubes[Cube.STATION.value]
-    result = station_cube.query(
-        station_cube.measures[StationCubeMeasure.CAPACITY.value]
-    )
+    skeleton = SKELETON.cubes.STATION
+    cube = session.cubes[skeleton.key]
+    result = cube.query(cube.measures[skeleton.measures.CAPACITY.key])
     expected_result = pd.DataFrame(
-        columns=[StationCubeMeasure.CAPACITY.value],
+        columns=[skeleton.measures.CAPACITY.name],
         data=[
             (45_850),
         ],
@@ -20,10 +20,16 @@ def test_total_capacity(session: tt.Session) -> None:
 
 
 def test_departments(session: tt.Session) -> None:
-    station_cube = session.cubes[Cube.STATION.value]
-    result = station_cube.query(
-        station_cube.measures["contributors.COUNT"],
-        levels=[station_cube.levels[StationCubeLocationLevel.DEPARTMENT.value]],
+    skeleton = SKELETON.cubes.STATION
+    cube = session.cubes[skeleton.key]
+    l, m = cube.levels, cube.measures
+    result = cube.query(
+        m[CONTRIBUTORS_COUNT],
+        levels=[
+            l[
+                skeleton.dimensions.STATION_DETAILS.hierarchies.LOCATION.levels.DEPARTMENT.key
+            ]
+        ],
     )
     assert list(result.index) == [
         "75, Paris, Île-de-France",
diff --git a/uv.lock b/uv.lock
index 3303054..a2a5cb4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -40,6 +40,7 @@ dependencies = [
     { name = "pandas" },
     { name = "pydantic" },
     { name = "pydantic-settings" },
+    { name = "typing-extensions" },
 ]
 
 [package.dev-dependencies]
@@ -59,6 +60,7 @@ requires-dist = [
     { name = "pandas" },
     { name = "pydantic" },
     { name = "pydantic-settings" },
+    { name = "typing-extensions" },
 ]
 
 [package.metadata.requires-dev]