diff --git a/tests/__resources__/station_information.json b/app/__resources__/station_information.json similarity index 100% rename from tests/__resources__/station_information.json rename to app/__resources__/station_information.json diff --git a/tests/__resources__/station_location.csv b/app/__resources__/station_location.csv similarity index 100% rename from tests/__resources__/station_location.csv rename to app/__resources__/station_location.csv diff --git a/tests/__resources__/station_status.json b/app/__resources__/station_status.json similarity index 100% rename from tests/__resources__/station_status.json rename to app/__resources__/station_status.json diff --git a/app/config.py b/app/config.py index 092765e..dca3b68 100644 --- a/app/config.py +++ b/app/config.py @@ -1,13 +1,9 @@ -from datetime import timedelta from pathlib import Path from typing import Annotated from pydantic import ( AliasChoices, - DirectoryPath, Field, - FilePath, - HttpUrl, PostgresDsn, TypeAdapter, field_validator, @@ -25,15 +21,17 @@ class Config(BaseSettings): model_config = SettingsConfigDict(frozen=True) - data_refresh_period: timedelta | None = timedelta(minutes=1) + check_mapping_lookups: bool = __debug__ + + data_refresh_period: float | None = None + """How often the station status data should be refreshed. + + If ``None``, only local data will be used: no requests will be made to external APIs. + """ # The $PORT environment variable is used by most PaaS to indicate the port the app server should bind to. port: int = 9090 - reverse_geocoding_path: HttpUrl | FilePath = TypeAdapter(HttpUrl).validate_python( - "https://api-adresse.data.gouv.fr/reverse/csv/" - ) - user_content_storage: Annotated[ PostgresDsn | Path | None, Field( @@ -43,12 +41,6 @@ class Config(BaseSettings): ), ] = Path("content") - velib_data_base_path: HttpUrl | DirectoryPath = TypeAdapter( - HttpUrl - ).validate_python( - "https://velib-metropole-opendata.smovengo.cloud/opendata/Velib_Metropole" - ) - @field_validator("user_content_storage") @classmethod def normalize_postgres_dsn(cls, value: object) -> object: diff --git a/app/load_tables.py b/app/load_tables.py index 144079b..d82af9d 100644 --- a/app/load_tables.py +++ b/app/load_tables.py @@ -6,9 +6,10 @@ import atoti as tt import httpx import pandas as pd -from pydantic import HttpUrl +from pydantic import DirectoryPath, FilePath, HttpUrl from .config import Config +from .path import RESOURCES_DIRECTORY from .skeleton import SKELETON from .util import read_json, reverse_geocode @@ -106,19 +107,35 @@ async def load_tables( config: Config, http_client: httpx.AsyncClient, ) -> None: + if config.data_refresh_period is None: + reverse_geocoding_path: HttpUrl | FilePath = ( + RESOURCES_DIRECTORY / "station_location.csv" + ) + velib_data_base_path: HttpUrl | DirectoryPath = RESOURCES_DIRECTORY + else: + reverse_geocoding_path = HttpUrl( + "https://api-adresse.data.gouv.fr/reverse/csv/" + ) + velib_data_base_path = HttpUrl( + "https://velib-metropole-opendata.smovengo.cloud/opendata/Velib_Metropole" + ) + station_details_df, station_status_df = await asyncio.gather( read_station_details( http_client=http_client, - reverse_geocoding_path=config.reverse_geocoding_path, - velib_data_base_path=config.velib_data_base_path, + reverse_geocoding_path=reverse_geocoding_path, + velib_data_base_path=velib_data_base_path, ), read_station_status( - config.velib_data_base_path, + velib_data_base_path, http_client=http_client, ), ) - with tt.mapping_lookup(check=__debug__), session.tables.data_transaction(): + with ( + tt.mapping_lookup(check=config.check_mapping_lookups), + session.tables.data_transaction(), + ): await asyncio.gather( session.tables[SKELETON.tables.STATION_DETAILS.key].load_async( station_details_df diff --git a/app/path.py b/app/path.py new file mode 100644 index 0000000..c3ea661 --- /dev/null +++ b/app/path.py @@ -0,0 +1,4 @@ +from pathlib import Path + +APP_DIRECTORY = Path(__file__).parent +RESOURCES_DIRECTORY = APP_DIRECTORY / "__resources__" diff --git a/app/start_session.py b/app/start_session.py index b9edb3b..7d58900 100644 --- a/app/start_session.py +++ b/app/start_session.py @@ -39,7 +39,7 @@ async def start_session( """Start the session, declare the data model and load the initial data.""" session_config = get_session_config(config) with tt.Session.start(session_config) as session: - with tt.mapping_lookup(check=__debug__): + with tt.mapping_lookup(check=config.check_mapping_lookups): create_and_join_tables(session) create_cubes(session) await load_tables(session, config=config, http_client=http_client) diff --git a/app/util/run_periodically.py b/app/util/run_periodically.py index b8306a7..bcbd03a 100644 --- a/app/util/run_periodically.py +++ b/app/util/run_periodically.py @@ -1,7 +1,6 @@ import asyncio from collections.abc import AsyncGenerator, Awaitable, Callable from contextlib import asynccontextmanager -from datetime import timedelta @asynccontextmanager @@ -9,15 +8,14 @@ async def run_periodically( callback: Callable[[], Awaitable[None]], /, *, - period: timedelta, + period: float, ) -> AsyncGenerator[None]: - period_in_seconds = period.total_seconds() stopped = asyncio.Event() async def loop() -> None: while not stopped.is_set(): await callback() - await asyncio.sleep(period_in_seconds) + await asyncio.sleep(period) task = asyncio.create_task(loop()) diff --git a/pyproject.toml b/pyproject.toml index 0885a6a..9e6182a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ ignore = [ "PLC0414", # Redundant imports are used for re-exporting (See https://peps.python.org/pep-0484/#stub-files). "S101", # `assert` is useful when used correctly (https://realpython.com/python-assert-statement). "TCH", # Pydantic needs to resolve type annotations at runtime. + "TID252", "TRY003", # Not worth the annoyance. ] select = ["ALL"] diff --git a/task-definition.json b/task-definition.json index f7f2e00..0a01392 100644 --- a/task-definition.json +++ b/task-definition.json @@ -3,6 +3,12 @@ "containerDefinitions": [ { "name": "atoti-session", + "environment": [ + { + "name": "DATA_REFRESH_PERIOD", + "value": "60" + } + ], "essential": true, "logConfiguration": { "logDriver": "awslogs", diff --git a/tests/conftest.py b/tests/conftest.py index 8614fe6..b0e76f4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,7 +7,6 @@ from app import Config, start_app _TESTS_DIRECTORY = Path(__file__).parent -_TESTS_RESOURCES_DIRECTORY = _TESTS_DIRECTORY / "__resources__" _PROJECT_DIRECTORY = _TESTS_DIRECTORY.parent @@ -19,11 +18,8 @@ def project_name_fixture() -> str: @pytest.fixture(name="config", scope="session") def config_fixture() -> Config: return Config( - data_refresh_period=None, - reverse_geocoding_path=_TESTS_RESOURCES_DIRECTORY / "station_location.csv", port=0, user_content_storage=None, - velib_data_base_path=_TESTS_RESOURCES_DIRECTORY, ) diff --git a/tests/docker/_docker_container.py b/tests/docker/_docker_container.py index a7eb2a8..1823411 100644 --- a/tests/docker/_docker_container.py +++ b/tests/docker/_docker_container.py @@ -1,4 +1,4 @@ -from collections.abc import Generator +from collections.abc import Generator, Mapping from contextlib import contextmanager from uuid import uuid4 @@ -13,10 +13,12 @@ def docker_container( *, client: docker.DockerClient, container_name: str | None = None, + env: Mapping[str, str] | None = None, ) -> Generator[Container, None, None]: container = client.containers.run( image_name, detach=True, + environment=env, name=container_name or str(uuid4()), publish_all_ports=True, ) diff --git a/tests/docker/conftest.py b/tests/docker/conftest.py index 9b0374e..50020fb 100644 --- a/tests/docker/conftest.py +++ b/tests/docker/conftest.py @@ -54,7 +54,14 @@ def session_inside_docker_container_fixture( ) -> Generator[tt.Session, None, None]: timeout = Timeout(timedelta(minutes=1)) - with docker_container(docker_image_name, client=docker_client) as container: + with docker_container( + docker_image_name, + client=docker_client, + env={ + # Test external APIs. + "DATA_REFRESH_PERIOD": "30" + }, + ) as container: logs = container.logs(stream=True) while "Session listening on port" not in next(logs).decode(): diff --git a/tests/docker/test_docker.py b/tests/docker/test_docker.py index 4ae5ff4..16ee3de 100644 --- a/tests/docker/test_docker.py +++ b/tests/docker/test_docker.py @@ -1,12 +1,20 @@ import atoti as tt from app import SKELETON -from app.util.skeleton import CONTRIBUTORS_COUNT + +from ..total_capacity import TOTAL_CAPACITY def test_session_inside_docker_container( session_inside_docker_container: tt.Session, ) -> None: - cube = session_inside_docker_container.cubes[SKELETON.cubes.STATION.key] - result_df = cube.query(cube.measures[CONTRIBUTORS_COUNT]) - assert result_df[CONTRIBUTORS_COUNT][0] > 0 + skeleton = SKELETON.cubes.STATION + cube = session_inside_docker_container.cubes[skeleton.key] + result_df = cube.query(cube.measures[skeleton.measures.CAPACITY.key]) + total_capacity = result_df[skeleton.measures.CAPACITY.name][0] + assert total_capacity > 0, ( + "There should be at least one station with one dock or more." + ) + assert total_capacity != TOTAL_CAPACITY, ( + "The data fetched from the external API should lead to a different capacity than the one of the local data since new stations have been created since the data was snapshotted." + ) diff --git a/tests/test_session.py b/tests/test_session.py index 3544ebc..abee3a5 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -4,17 +4,17 @@ from app import SKELETON from app.util.skeleton import CONTRIBUTORS_COUNT +from .total_capacity import TOTAL_CAPACITY + def test_total_capacity(session: tt.Session) -> None: skeleton = SKELETON.cubes.STATION cube = session.cubes[skeleton.key] result = cube.query(cube.measures[skeleton.measures.CAPACITY.key]) expected_result = pd.DataFrame( - columns=[skeleton.measures.CAPACITY.name], - data=[ - (45_850), - ], - dtype="Int32", + { + skeleton.measures.CAPACITY.name: pd.Series([TOTAL_CAPACITY], dtype="Int32"), + } ) pd.testing.assert_frame_equal(result, expected_result) diff --git a/tests/total_capacity.py b/tests/total_capacity.py new file mode 100644 index 0000000..937b02f --- /dev/null +++ b/tests/total_capacity.py @@ -0,0 +1,2 @@ +TOTAL_CAPACITY = 45_850 +"""The expected total capacity of the system when using local data."""