Skip to content

Commit

Permalink
Introduce Schema component -> more efficient serialization. Improve f…
Browse files Browse the repository at this point in the history
…ield restriction and support 2 levels.
  • Loading branch information
Etienne Jodry authored and Etienne Jodry committed Feb 6, 2025
1 parent 2ab81c6 commit d97be87
Show file tree
Hide file tree
Showing 36 changed files with 298 additions and 150 deletions.
1 change: 1 addition & 0 deletions src/biodm/components/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Explicit re-export for mypy strict.
from .schema import Schema as Schema
from .table import Base as Base
from .table import S3File as S3File
from .table import Versioned as Versioned
Expand Down
2 changes: 0 additions & 2 deletions src/biodm/components/controllers/admincontroller.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from marshmallow import Schema

from biodm.utils.security import admin_required, login_required
from .resourcecontroller import ResourceController

Expand Down
3 changes: 2 additions & 1 deletion src/biodm/components/controllers/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
from io import BytesIO
from typing import Any, Iterable, List, Dict, TYPE_CHECKING, Optional

from marshmallow.schema import Schema
# from marshmallow.schema import Schema
from marshmallow.exceptions import ValidationError
from sqlalchemy.exc import MissingGreenlet
from starlette.requests import Request
from starlette.responses import Response
import starlette.routing as sr

from biodm import config
from biodm.components import Schema
from biodm.component import ApiComponent
from biodm.exceptions import (
DataError, PayloadJSONDecodingError, AsyncDBError, SchemaError
Expand Down
56 changes: 42 additions & 14 deletions src/biodm/components/controllers/resourcecontroller.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ async def _extract_body(self, request: Request) -> bytes:
:rtype: bytes
"""
body = await request.body()
if body in (b'{}', b'[]', b'[{}]'):
if not body or body in (b'{}', b'[]', b'[{}]'):
raise PayloadEmptyError("No input data.")
return body

Expand All @@ -307,18 +307,41 @@ def _extract_fields(
fields = query_params.pop('fields', None)
fields = fields.split(',') if fields else None

if fields: # User input case, check and raise.
fields = set(fields) | self.table.pk
if fields: # User input case, check validity.
fields = set(fields)
nested = []
for field in fields:
if field not in self.schema.dump_fields.keys():
raise DataError(f"Requested field {field} does not exists.")
self.svc.check_allowed_nested(fields, user_info=user_info)

else: # Default case, gracefully populate allowed fields.
fields = [
k for k,v in self.schema.dump_fields.items()
]
fields = self.svc.takeout_unallowed_nested(fields, user_info=user_info)
chain = field.split('.')
if len(chain) > 2:
raise QueryError("Requested fields can be set only on two levels.")
if chain[0] not in self.schema.dump_fields:
raise QueryError(
f"Requested field {field} does not exists at {self.prefix}."
)
if len(chain) > 1:
nschema = self.schema.dump_fields[chain[0]]
match nschema:
case List():
nschema = nschema.inner.schema
case Nested():
nschema = nschema.schema
if chain[1] not in nschema.dump_fields:
raise QueryError(
f"Requested field {field} does not exist "
f"for child resource at {self.prefix}."
)
if chain[0] in self.table.relationships:
nested.append(chain[0])

self.svc.check_allowed_nested(nested, user_info=user_info)

else: # Default case, pass down all allowed dump_fields
fields = self.svc.takeout_unallowed_nested(
self.schema.dump_fields.keys(),
user_info=user_info
)

fields = fields | self.table.pk # fetch pk in any case.
return fields

def _extract_query_params(self, queryparams: QueryParams) -> Dict[str, Any]:
Expand Down Expand Up @@ -555,7 +578,7 @@ async def read_nested(self, request: Request) -> Response:
f"Unknown collection {nested_attribute} of {self.table.__class__.__name__}"
)

# Serialization and field extraction done by target controller.
# Serialization and field extraction done by target controller.
ctrl: ResourceController = (
target_rel
.mapper
Expand Down Expand Up @@ -720,12 +743,17 @@ async def filter(self, request: Request) -> Response:
"""
params = self._extract_query_params(request.query_params)
fields = self._extract_fields(params, user_info=request.user)

ser_fields = []
for f in fields:
ser_fields.append(f.split('.')[0])

count = bool(params.pop('count', 0))
result = await self.svc.filter(
fields=fields,
params=params,
user_info=request.user,
serializer=partial(self.serialize, many=True, only=fields),
serializer=partial(self.serialize, many=True, only=ser_fields),
)

# Prepare response object.
Expand Down
4 changes: 2 additions & 2 deletions src/biodm/components/controllers/s3controller.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from typing import List, Type

from marshmallow import Schema, RAISE, ValidationError
from marshmallow import RAISE, ValidationError
import starlette.routing as sr
from starlette.requests import Request
from starlette.responses import Response, PlainTextResponse

from biodm.components import S3File
from biodm.components import S3File, Schema
from biodm.components.services import S3Service
from biodm.components.table import Base
from biodm.schemas import PartsEtagSchema
Expand Down
48 changes: 48 additions & 0 deletions src/biodm/components/schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import marshmallow as ma

from marshmallow.utils import get_value as ma_get_value, missing
from sqlalchemy.orm import make_transient
from sqlalchemy.orm.exc import DetachedInstanceError


from biodm.utils.utils import to_it

"""Below is a way to check if data is properly loaded before running serialization."""


SKIP_VALUES = (None, [], {}, '', '[]', '{}',)


def gettattr_unbound(obj, key: int | str, default=missing):
try:
return ma_get_value(obj, key, default)
except DetachedInstanceError:
return default


class Schema(ma.Schema):
# def __init__(self, *args, **kwargs):
# super().__init__(*args, **kwargs)
# for field in self.fields.values():
# field.get_value = partial(field.get_value, accessor=gettattr_unbound)

@ma.pre_dump
def turn_to_transient(self, data, **kwargs):
"""Avoids serialization fetchig extra data from the database on the fly."""
for one in to_it(data):
make_transient(one)
return data

@ma.post_dump
def remove_skip_values(self, data, **kwargs):
"""Removes un-necessary empty values from resulting dict."""
return {
key: value for key, value in data.items()
if value not in SKIP_VALUES
}

# def get_attribute(self, obj, attr, default):
# try:
# return super().get_attribute(obj, attr, default)
# except DetachedInstanceError:
# return None
Loading

0 comments on commit d97be87

Please sign in to comment.