Skip to content

Commit

Permalink
Add skeleton (#292)
Browse files Browse the repository at this point in the history
  • Loading branch information
tibdex authored Mar 4, 2025
1 parent 4dd21a4 commit 42a5514
Show file tree
Hide file tree
Showing 19 changed files with 618 additions and 165 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ jobs:
- uses: astral-sh/setup-uv@v3
with:
enable-cache: true
# Keep in sync with `Dockerfile`'s `builder`.
version: "0.5.6"
- run: uv python install 3.10
- run: uv sync --locked
Expand Down
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Inspired from https://github.com/astral-sh/uv-docker-example/blob/dee88a8c43be3b16b0ad58f0daee5eaee7e2157a/multistage.Dockerfile.

FROM ghcr.io/astral-sh/uv:0.4.10-python3.10-bookworm-slim AS builder
# Keep in sync with `.github/workflows/test.yml`.
FROM ghcr.io/astral-sh/uv:0.5.6-python3.10-bookworm-slim AS builder

ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy

Expand All @@ -25,4 +26,4 @@ ENV PORT=80

EXPOSE $PORT

CMD ["python", "-u", "-m", "app"]
CMD ["python", "-O", "-u", "-m", "app"]
2 changes: 1 addition & 1 deletion app/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .config import Config as Config
from .constants import * # noqa: F403
from .skeleton import SKELETON as SKELETON
from .start_app import start_app as start_app
55 changes: 0 additions & 55 deletions app/constants.py

This file was deleted.

54 changes: 28 additions & 26 deletions app/create_and_join_tables.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,54 @@
import atoti as tt

from .constants import StationDetailsTableColumn, StationStatusTableColumn, Table
from .skeleton import SKELETON
from .util.skeleton import column


def create_station_status_table(session: tt.Session, /) -> None:
skeleton = SKELETON.tables.STATION_STATUS
columns = skeleton.columns
session.create_table(
Table.STATION_STATUS.value,
skeleton.name,
data_types={
StationStatusTableColumn.STATION_ID.value: tt.LONG,
StationStatusTableColumn.BIKE_TYPE.value: tt.STRING,
StationStatusTableColumn.BIKES.value: tt.INT,
columns.STATION_ID.name: tt.LONG,
columns.BIKE_TYPE.name: tt.STRING,
columns.BIKES.name: tt.INT,
},
keys={
StationStatusTableColumn.STATION_ID.value,
StationStatusTableColumn.BIKE_TYPE.value,
columns.STATION_ID.name,
columns.BIKE_TYPE.name,
},
)


def create_station_details_table(session: tt.Session, /) -> None:
skeleton = SKELETON.tables.STATION_DETAILS
columns = skeleton.columns
session.create_table(
Table.STATION_DETAILS.value,
skeleton.name,
data_types={
StationDetailsTableColumn.ID.value: tt.LONG,
StationDetailsTableColumn.NAME.value: tt.STRING,
StationDetailsTableColumn.DEPARTMENT.value: tt.STRING,
StationDetailsTableColumn.CITY.value: tt.STRING,
StationDetailsTableColumn.POSTCODE.value: tt.INT,
StationDetailsTableColumn.STREET.value: tt.STRING,
StationDetailsTableColumn.HOUSE_NUMBER.value: tt.STRING,
StationDetailsTableColumn.CAPACITY.value: tt.INT,
columns.ID.name: tt.LONG,
columns.NAME.name: tt.STRING,
columns.DEPARTMENT.name: tt.STRING,
columns.CITY.name: tt.STRING,
columns.POSTCODE.name: tt.INT,
columns.STREET.name: tt.STRING,
columns.HOUSE_NUMBER.name: tt.STRING,
columns.CAPACITY.name: tt.INT,
},
default_values={StationDetailsTableColumn.POSTCODE.value: 0},
default_values={columns.POSTCODE.name: 0},
keys={
StationDetailsTableColumn.ID.value,
columns.ID.name,
},
)


def join_tables(session: tt.Session, /) -> None:
session.tables[Table.STATION_STATUS.value].join(
session.tables[Table.STATION_DETAILS.value],
session.tables[Table.STATION_STATUS.value][
StationStatusTableColumn.STATION_ID.value
]
== session.tables[Table.STATION_DETAILS.value][
StationDetailsTableColumn.ID.value
],
tables = SKELETON.tables
session.tables[tables.STATION_STATUS.key].join(
session.tables[tables.STATION_DETAILS.key],
column(session, tables.STATION_STATUS.columns.STATION_ID)
== column(session, tables.STATION_DETAILS.columns.ID),
)


Expand Down
79 changes: 29 additions & 50 deletions app/create_cubes.py
Original file line number Diff line number Diff line change
@@ -1,69 +1,48 @@
import atoti as tt

from .constants import (
Cube,
StationCubeBikeTypeLevel,
StationCubeHierarchy,
StationCubeLocationLevel,
StationCubeMeasure,
StationCubeStationLevel,
StationDetailsTableColumn,
StationStatusTableColumn,
Table,
)
from .skeleton import SKELETON
from .util.skeleton import column, fact_based_hierarchy


def create_station_cube(session: tt.Session, /) -> None:
station_details_table = session.tables[Table.STATION_DETAILS.value]
station_status_table = session.tables[Table.STATION_STATUS.value]
tables = SKELETON.tables
skeleton = SKELETON.cubes.STATION

cube = session.create_cube(station_status_table, Cube.STATION.value, mode="manual")
cube = session.create_cube(
session.tables[tables.STATION_STATUS.key],
skeleton.name,
mode="manual",
)
h, l, m = cube.hierarchies, cube.levels, cube.measures

h.update(
{
StationCubeHierarchy.BIKE_TYPE.value: {
StationCubeBikeTypeLevel.BIKE_TYPE.value: station_status_table[
StationStatusTableColumn.BIKE_TYPE.value
dict(
[
fact_based_hierarchy(session, hierarchy)
for hierarchy in [
skeleton.dimensions.STATION_STATUS.hierarchies.BIKE_TYPE,
skeleton.dimensions.STATION_DETAILS.hierarchies.LOCATION,
skeleton.dimensions.STATION_DETAILS.hierarchies.STATION,
]
},
StationCubeHierarchy.LOCATION.value: {
StationCubeLocationLevel.DEPARTMENT.value: station_details_table[
StationDetailsTableColumn.DEPARTMENT.value
],
StationCubeLocationLevel.CITY.value: station_details_table[
StationDetailsTableColumn.CITY.value
],
StationCubeLocationLevel.POSTCODE.value: station_details_table[
StationDetailsTableColumn.POSTCODE.value
],
StationCubeLocationLevel.STREET.value: station_details_table[
StationDetailsTableColumn.STREET.value
],
StationCubeLocationLevel.HOUSE_NUMBER.value: station_details_table[
StationDetailsTableColumn.HOUSE_NUMBER.value
],
},
StationCubeHierarchy.STATION.value: {
StationCubeStationLevel.NAME.value: station_details_table[
StationDetailsTableColumn.NAME.value
],
StationCubeStationLevel.ID.value: station_status_table[
StationStatusTableColumn.STATION_ID.value
],
},
}
]
)
)

with session.data_model_transaction():
m[StationCubeMeasure.BIKES.value] = tt.agg.sum(
station_status_table[StationStatusTableColumn.BIKES.value]
m[skeleton.measures.BIKES.key] = tt.agg.sum(
column(session, tables.STATION_STATUS.columns.BIKES)
)
m[StationCubeMeasure.CAPACITY.value] = tt.agg.sum(
m[skeleton.measures.CAPACITY.key] = tt.agg.sum(
tt.agg.single_value(
station_details_table[StationDetailsTableColumn.CAPACITY.value]
column(session, tables.STATION_DETAILS.columns.CAPACITY)
),
scope=tt.OriginScope(
{
l[
skeleton.dimensions.STATION_DETAILS.hierarchies.STATION.levels.ID.key
]
}
),
scope=tt.OriginScope({l[StationCubeStationLevel.ID.value]}),
)


Expand Down
36 changes: 20 additions & 16 deletions app/load_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pydantic import HttpUrl

from .config import Config
from .constants import StationDetailsTableColumn, StationStatusTableColumn, Table
from .skeleton import SKELETON
from .util import read_json, reverse_geocode


Expand All @@ -19,6 +19,7 @@ async def read_station_details(
reverse_geocoding_path: HttpUrl | Path,
velib_data_base_path: HttpUrl | Path,
) -> pd.DataFrame:
columns = SKELETON.tables.STATION_DETAILS.columns
stations_data: Any = cast(
Any,
await read_json(
Expand All @@ -31,9 +32,9 @@ async def read_station_details(
["station_id", "name", "capacity", "lat", "lon"]
].rename(
columns={
"station_id": StationDetailsTableColumn.ID.value,
"name": StationDetailsTableColumn.NAME.value,
"capacity": StationDetailsTableColumn.CAPACITY.value,
"station_id": columns.ID.name,
"name": columns.NAME.name,
"capacity": columns.CAPACITY.name,
"lat": "latitude",
"lon": "longitude",
}
Expand All @@ -52,11 +53,11 @@ async def read_station_details(
coordinates, reverse_geocoding_path=reverse_geocoding_path
).rename(
columns={
"department": StationDetailsTableColumn.DEPARTMENT.value,
"city": StationDetailsTableColumn.CITY.value,
"postcode": StationDetailsTableColumn.POSTCODE.value,
"street": StationDetailsTableColumn.STREET.value,
"house_number": StationDetailsTableColumn.HOUSE_NUMBER.value,
"department": columns.DEPARTMENT.name,
"city": columns.CITY.name,
"postcode": columns.POSTCODE.name,
"street": columns.STREET.name,
"house_number": columns.HOUSE_NUMBER.name,
}
)

Expand All @@ -71,6 +72,7 @@ async def read_station_status(
*,
http_client: httpx.AsyncClient,
) -> pd.DataFrame:
columns = SKELETON.tables.STATION_STATUS.columns
stations_data = cast(
Any,
await read_json(
Expand All @@ -89,11 +91,9 @@ async def read_station_status(
bike_type, bikes = next(iter(num_bikes_available_types.items()))
station_statuses.append(
{
StationStatusTableColumn.STATION_ID.value: station_status[
"station_id"
],
StationStatusTableColumn.BIKE_TYPE.value: bike_type,
StationStatusTableColumn.BIKES.value: bikes,
columns.STATION_ID.name: station_status["station_id"],
columns.BIKE_TYPE.name: bike_type,
columns.BIKES.name: bikes,
}
)
return pd.DataFrame(station_statuses)
Expand All @@ -120,6 +120,10 @@ async def load_tables(

with session.tables.data_transaction():
await asyncio.gather(
session.tables[Table.STATION_DETAILS.value].load_async(station_details_df),
session.tables[Table.STATION_STATUS.value].load_async(station_status_df),
session.tables[SKELETON.tables.STATION_DETAILS.key].load_async(
station_details_df
),
session.tables[SKELETON.tables.STATION_STATUS.key].load_async(
station_status_df
),
)
Loading

0 comments on commit 42a5514

Please sign in to comment.