From 81dc40743e3af3ca9433a9e8f7f651e81dd5edc4 Mon Sep 17 00:00:00 2001 From: Etienne Jodry Date: Tue, 8 Oct 2024 14:36:15 +0200 Subject: [PATCH] Refactor, add & doc submitter_username feature, file size checking + tests, support permissions schema discovery, flip FK naming convention --- README.md | 5 + docker/Dockerfile.biodm-test-api | 2 +- docs/developer_manual/advanced_use.rst | 2 +- docs/developer_manual/index.rst | 2 +- docs/developer_manual/table_schema.rst | 21 +++ docs/user_manual.rst | 4 +- src/biodm/api.py | 10 +- src/biodm/basics/k8scontroller.py | 2 - .../controllers/resourcecontroller.py | 154 ++++-------------- .../components/controllers/s3controller.py | 1 + src/biodm/components/services/dbservice.py | 37 +++-- src/biodm/components/services/s3service.py | 14 +- src/biodm/components/table.py | 26 ++- src/biodm/config.py | 1 - src/biodm/error.py | 10 +- src/biodm/exceptions.py | 2 + src/biodm/managers/s3manager.py | 3 +- src/biodm/schemas/__init__.py | 1 - src/biodm/schemas/group.py | 3 - src/biodm/schemas/k8sinstance.py | 10 -- src/biodm/schemas/listgroup.py | 2 +- src/biodm/schemas/upload.py | 2 +- src/biodm/schemas/user.py | 2 +- src/biodm/tables/__init__.py | 1 - src/biodm/tables/asso.py | 8 +- src/biodm/tables/group.py | 16 +- src/biodm/tables/history.py | 2 +- src/biodm/tables/k8sinstance.py | 20 --- src/biodm/tables/upload_part.py | 6 +- src/biodm/tables/user.py | 3 +- src/biodm/utils/apispec.py | 135 +++++++++++++++ src/biodm/utils/biodm.py | 17 ++ src/biodm/utils/security.py | 16 +- src/biodm/utils/sqla.py | 3 +- src/biodm/utils/utils.py | 24 ++- src/example/entities/controllers/file.py | 3 +- src/example/entities/schemas/dataset.py | 4 +- src/example/entities/schemas/file.py | 1 + src/example/entities/tables/dataset.py | 11 +- src/example/entities/tables/file.py | 4 +- src/example/entities/tables/project.py | 4 +- src/example/entities/tables/visualization.py | 4 +- src/example/manifests/visualization.py | 2 +- src/tests/integration/s3/test_files.py | 50 +++++- 44 files changed, 401 insertions(+), 249 deletions(-) delete mode 100644 src/biodm/schemas/k8sinstance.py delete mode 100644 src/biodm/tables/k8sinstance.py create mode 100644 src/biodm/utils/apispec.py create mode 100644 src/biodm/utils/biodm.py diff --git a/README.md b/README.md index c5d1ef9..3b8497b 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,11 @@ BioDM is a fast, modular, stateless and asynchronous REST API framework with the - Login and token retrieval system - OpenAPI schema generation through [apispec](https://github.com/marshmallow-code/apispec) + +It sits on the **F**indability and **A**ccessibility part of the **F.A.I.R** principles, +while remaining flexible for the remainder to be implemented. + + ## Quickstart ### Install ```bash diff --git a/docker/Dockerfile.biodm-test-api b/docker/Dockerfile.biodm-test-api index 9f0c985..53cf1e9 100644 --- a/docker/Dockerfile.biodm-test-api +++ b/docker/Dockerfile.biodm-test-api @@ -20,7 +20,7 @@ COPY ./src/biodm /biodm/src/biodm COPY ./src/example /biodm/src/example # conditionally remove .env to replace it with environment variables in compose file. -RUN if [[ -z "$KEEPENV" ]] ; then find /biodm/src/example -name '.env' | xargs rm -rf ; else : ; fi +RUN if [ -z "$KEEPENV" ] ; then find /biodm/src/example -name '.env' | xargs rm -rf ; else : ; fi RUN pip3 install . diff --git a/docs/developer_manual/advanced_use.rst b/docs/developer_manual/advanced_use.rst index 62fd3e5..d855c5f 100644 --- a/docs/developer_manual/advanced_use.rst +++ b/docs/developer_manual/advanced_use.rst @@ -317,7 +317,7 @@ A lot of that code has to do with retrieving async SQLAlchemy objects attributes key = await self.gen_key(file, session=session) parts.append( UploadPart( - id_upload=file.upload.id, + upload_id=file.upload.id, form=str( self.s3.create_presigned_post( object_name=key, diff --git a/docs/developer_manual/index.rst b/docs/developer_manual/index.rst index d528110..5ca55d8 100644 --- a/docs/developer_manual/index.rst +++ b/docs/developer_manual/index.rst @@ -22,7 +22,7 @@ metadata and relationships, to setup standard RESTful endpoints. Furthermore, it is providing a structure and toolkit in order to manage common Data Management problems such as: s3 protocol remote file storage, group based permissions access (on both resources and -endpoints), resource versioning (coming up), cluster jobs and so on. +endpoints), resource versioning, cluster jobs and so on. Moreover, the modular and flexible architecture allows you to easily extend base features for instance specific use cases. diff --git a/docs/developer_manual/table_schema.rst b/docs/developer_manual/table_schema.rst index bc810d7..0ece33c 100644 --- a/docs/developer_manual/table_schema.rst +++ b/docs/developer_manual/table_schema.rst @@ -30,6 +30,27 @@ In particular, if a relationship is one-armed (pointing in one direction only), be possible to create a nested resource in the other direction. +Special columns +~~~~~~~~~~~~~~~ + +Some special column will yield built-in behavior. + + +**Tracking resource submitter: submitter_username** + + +Setting up the following `foreign key`, in a table will automatically populate the field +with requesting user's username creating the resource. + + +.. code:: python + + class MyTable(Base): + id: Mapped[int] = mapped_column(Integer(), primary_key=True) + ... + submitter_username: Mapped[str] = mapped_column(ForeignKey("USER.username"), nullable=False) + + Schemas ------- diff --git a/docs/user_manual.rst b/docs/user_manual.rst index 29c156e..b1701e7 100644 --- a/docs/user_manual.rst +++ b/docs/user_manual.rst @@ -350,8 +350,8 @@ otherwise. "perm_files": { "write": { "groups": [ - {"name": "genomics_team"}, - {"name": "IT_team"}, + {"path": "genomics_team"}, + {"path": "IT_team"}, {"..."} ] }, diff --git a/src/biodm/api.py b/src/biodm/api.py index 0caf3c7..5933494 100644 --- a/src/biodm/api.py +++ b/src/biodm/api.py @@ -8,7 +8,7 @@ from typing import Callable, List, Optional, Dict, Any, Type from apispec import APISpec -from apispec.ext.marshmallow import MarshmallowPlugin +# from apispec.ext.marshmallow import MarshmallowPlugin from starlette_apispec import APISpecSchemaGenerator from starlette.applications import Starlette from starlette.middleware.base import BaseHTTPMiddleware @@ -21,6 +21,7 @@ from biodm import Scope, config from biodm.basics import CORE_CONTROLLERS, K8sController +from biodm.components.controllers.resourcecontroller import ResourceController from biodm.components.k8smanifest import K8sManifest from biodm.managers import DatabaseManager, KeycloakManager, S3Manager, K8sManager from biodm.components.controllers import Controller @@ -29,6 +30,7 @@ from biodm.exceptions import RequestError from biodm.utils.security import AuthenticationMiddleware, PermissionLookupTables from biodm.utils.utils import to_it +from biodm.utils.apispec import BDMarshmallowPlugin from biodm.tables import History, ListGroup, Upload, UploadPart from biodm import __version__ as CORE_VERSION @@ -64,7 +66,7 @@ async def dispatch(self, request: Request, call_next: Callable) -> Any: endpoint = str(request.url).rsplit(self.server_host, maxsplit=1)[-1] body = await request.body() entry = { - 'username_user': user_id, + 'user_username': user_id, 'endpoint': endpoint, 'method': request.method, 'content': str(body) if body else "" @@ -150,7 +152,7 @@ def __init__( title=config.API_NAME, version=config.API_VERSION, openapi_version="3.0.0", - plugins=[MarshmallowPlugin()], + plugins=[BDMarshmallowPlugin()], info={"description": "", "backend": "biodm", "backend_version": CORE_VERSION}, security=[{'Authorization': []}] # Same name as security_scheme arg below. ) @@ -191,7 +193,7 @@ def __init__( # self.add_exception_handler(DatabaseError, on_error) @property - def server_endpoint(self) -> str: + def server_endpoint(cls) -> str: """Server address, useful to compute callbacks.""" return f"{config.SERVER_SCHEME}{config.SERVER_HOST}:{config.SERVER_PORT}/" diff --git a/src/biodm/basics/k8scontroller.py b/src/biodm/basics/k8scontroller.py index 7097953..7a7bd60 100644 --- a/src/biodm/basics/k8scontroller.py +++ b/src/biodm/basics/k8scontroller.py @@ -8,9 +8,7 @@ from biodm.components.controllers import Controller, HttpMethod# ResourceController from biodm.exceptions import ManifestError -from biodm.tables import K8sInstance from biodm.components import K8sManifest -from biodm.schemas import K8sinstanceSchema from biodm.utils.utils import json_response diff --git a/src/biodm/components/controllers/resourcecontroller.py b/src/biodm/components/controllers/resourcecontroller.py index 34c563a..4669327 100644 --- a/src/biodm/components/controllers/resourcecontroller.py +++ b/src/biodm/components/controllers/resourcecontroller.py @@ -1,7 +1,8 @@ """Controller class for Tables acting as a Resource.""" from __future__ import annotations -from functools import partial, wraps +from functools import partial +from inspect import getmembers, ismethod from types import MethodType from typing import TYPE_CHECKING, Callable, List, Set, Any, Dict, Type @@ -32,6 +33,7 @@ ) from biodm.utils.security import UserInfo from biodm.utils.utils import json_response +from biodm.utils.apispec import register_runtime_schema, process_apispec_docstrings from biodm.components import Base from .controller import HttpMethod, EntityController @@ -91,7 +93,10 @@ def __init__( self.pk = set(self.table.pk) self.svc: UnaryEntityService = self._infer_svc()(app=self.app, table=self.table) - self.__class__.schema = (schema if schema else self._infer_schema())(unknown=RAISE) + # Inst schema, and set custom registry for apispec. + schema_cls = schema if schema else self._infer_schema() + self.__class__.schema = schema_cls(unknown=RAISE) + register_runtime_schema(schema_cls, self.__class__.schema) self._infuse_schema_in_apispec_docstrings() @staticmethod @@ -110,112 +115,22 @@ async def mirror(self, *args, **kwargs): def _infuse_schema_in_apispec_docstrings(self): """Substitute endpoint documentation template bits with adapted ones for this resource. - - Essentially handling APIspec/Marshmallow/OpenAPISchema support for abstract endpoints. - - Current patterns for abstract documentation: - - Marshmallow Schema | - schema: Schema -> schema: self.Schema.__class__.__name__ - - key Attributes | - - in: path - name: id - -> - List of table primary keys, with their description from marshmallow schema if any. - - field conditions | - - in: query - name: field_conditions - -> - List of available fields to set conditions on. + Handling APIspec/Marshmallow/OpenAPISchema support for abstract endpoints. """ - def process_apispec_docstrings(self, abs_doc): - # Use intance schema. - abs_doc = abs_doc.replace( - 'schema: Schema', f"schema: {self.schema.__class__.__name__}" + for method, fct in getmembers( + self, predicate=lambda x: ( # Use typing anotations to identify endpoints. + ismethod(x) and hasattr(x, '__annotations__') and + x.__annotations__.get('request', '') == 'Request' and + x.__annotations__.get('return', '') == 'Response' + ) + ): + # Replace with processed docstrings. + setattr(self, method, MethodType( + ResourceController.replace_method_docstrings( + method, process_apispec_docstrings(self, fct.__doc__ or "") + ), self + ) ) - - # Template replacement #1: path key. - path_key = [] - for key in self.pk: - attr = [] - attr.append("- in: path") - attr.append(f"name: {key}") - field = self.schema.declared_fields[key] - desc = field.metadata.get("description", None) - attr.append("description: " + (desc or f"{self.resource} {key}")) - path_key.append(attr) - - # Template replacement #2: field conditions. - field_conditions = [] - for col in self.table.__table__.columns: - condition = [] - condition.append("- in: query") - condition.append(f"name: {col.name}") - if col.type.python_type == str: - condition.append( - "description: text - key=val | key=pattern " - "where pattern may contain '*' for wildcards" - ) - elif col.type.python_type in (int, float): - condition.append( - "description: numeric - key=val | key=val1,val2.. | key.op(val) " - "for op in (le|lt|ge|gt)" - ) - else: - condition.append(f"description: {self.resource} {col.name}") - field_conditions.append(condition) - - # Split. - doc = abs_doc.split('---') - if len(doc) > 1: - sphinxdoc, apispec = doc - apispec = apispec.split('\n') - flattened = [] - # Search and replace templates. - for i in range(len(apispec)): - if '- in: path' in apispec[i-1] and 'name: id' in apispec[i]: - # Work out same indentation level in order not to break the yaml. - indent = len(apispec[i-1].split('- in: path')[0]) - for path_attribute in path_key: - path_attribute[0] = " " * indent + path_attribute[0] - path_attribute[1] = " " * (indent+2) + path_attribute[1] - path_attribute[2] = " " * (indent+2) + path_attribute[2] - flattened.extend(path_attribute) - break - if flattened: - apispec = apispec[:i-1] + flattened + apispec[i+1:] - - flattened = [] - for i in range(len(apispec)): - if '- in: query' in apispec[i-1] and 'name: fields_conditions' in apispec[i]: - indent = len(apispec[i-1].split('- in: query')[0]) - flattened = [] - for condition in field_conditions: - condition[0] = " " * indent + condition[0] - condition[1] = " " * (indent+2) + condition[1] - condition[2] = " " * (indent+2) + condition[2] - flattened.extend(condition) - break - if flattened: - apispec = apispec[:i-1] + flattened + apispec[i+1:] - # Join. - abs_doc = sphinxdoc + "\n---\n" + "\n".join(apispec) - return abs_doc - - for method in dir(self): - if not method.startswith('_'): - fct = getattr(self, method, {}) - if hasattr(fct, '__annotations__'): - if ( # Use typing anotations to identify endpoints. - fct.__annotations__.get('request', '') == 'Request' and - fct.__annotations__.get('return', '') == 'Response' - ): - # Replace with processed docstrings. - setattr(self, method, MethodType( - ResourceController.replace_method_docstrings( - method, process_apispec_docstrings(self, fct.__doc__ or "") - ), self - ) - ) def _infer_resource_name(self) -> str: """Infer entity name from controller name.""" @@ -440,7 +355,7 @@ async def read(self, request: Request) -> Response: e.g. /datasets/1_1?name,description,contact,files - in: path name: attribute - description: Optional, nested collection name. + description: nested collection name responses: 200: description: Found matching item @@ -526,21 +441,16 @@ async def update(self, request: Request) -> Response: # Plug in pk into the dict. validated_data.update(dict(zip(self.pk, pk_val))) # type: ignore [assignment] - try: - return json_response( - data=await self.svc.write( - data=validated_data, - stmt_only=False, - user_info=request.state.user_info, - serializer=partial(self.serialize, many=isinstance(validated_data, list)), - ), - status_code=201, - ) - except IntegrityError as ie: - if 'UNIQUE' in ie.args[0] and 'version' in ie.args[0]: # Versioned case. - raise UpdateVersionedError( - "Attempt at updating versioned resources via POST detected" - ) + return json_response( + data=await self.svc.write( + data=validated_data, + stmt_only=False, + user_info=request.state.user_info, + serializer=partial(self.serialize, many=isinstance(validated_data, list)), + ), + status_code=201, + ) + async def delete(self, request: Request) -> Response: """Delete resource. diff --git a/src/biodm/components/controllers/s3controller.py b/src/biodm/components/controllers/s3controller.py index b81442c..19d28dc 100644 --- a/src/biodm/components/controllers/s3controller.py +++ b/src/biodm/components/controllers/s3controller.py @@ -21,6 +21,7 @@ class S3Controller(ResourceController): """Controller for entities involving file management leveraging an S3Service.""" svc: S3Service + def __init__( self, app, diff --git a/src/biodm/components/services/dbservice.py b/src/biodm/components/services/dbservice.py index 884df50..8065fd7 100644 --- a/src/biodm/components/services/dbservice.py +++ b/src/biodm/components/services/dbservice.py @@ -2,7 +2,7 @@ from abc import ABCMeta from typing import Callable, List, Sequence, Any, Dict, overload, Literal, Type, Set -from sqlalchemy import select, delete, update, or_, func +from sqlalchemy import select, delete, update, or_, func, inspect from sqlalchemy.dialects import postgresql, sqlite from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.hybrid import hybrid_property @@ -189,6 +189,7 @@ async def _check_permissions( :type session: AsyncSession :raises UnauthorizedError: Insufficient permissions detected. """ + # Internal request. if not user_info: return @@ -197,6 +198,10 @@ async def _check_permissions( groups = user_info.info[1] if user_info.info else [] + # Special admin case. + if groups and 'admin' in groups: + return + if not self._group_required(verb, groups): raise UnauthorizedError("Insufficient group privileges for this operation.") @@ -269,11 +274,16 @@ def _apply_read_permissions( verb = "read" perms = self._get_permissions(verb) + # No restriction or internal request. if not perms or not user_info: return stmt groups = user_info.info[1] if user_info.info else [] + # Special admin case. + if groups and 'admin' in groups: + return stmt + # Build nested query to filter permitted results. for permission in perms: link, chain = permission['from'][-1], permission['from'][:-1] @@ -335,7 +345,7 @@ def __init__(self, app, table: Type[Base], *args, **kwargs) -> None: self.table = table self.pk = set(table.col(name) for name in table.pk) # Take a snapshot at declaration time, convenient to isolate runtime permissions. - self.relationships = table.relationships() + self._inst_relationships = inspect(self.table).mapper.relationships # Enable entity - service - table linkage so everything is conveniently available. setattr(table, 'svc', self) setattr(table.__table__, 'decl_class', table) @@ -362,7 +372,7 @@ def _svc_from_rel_name(self, key: str) -> DatabaseService: return rel.target.decl_class.svc def _check_allowed_nested(self, fields, user_info: UserInfo) -> None: - nested, _ = partition(fields, lambda x: x in self.relationships) + nested, _ = partition(fields, lambda x: x in self.table.relationships()) for name in nested: target_svc = self._svc_from_rel_name(name) if target_svc._login_required("read") and not user_info.info: @@ -374,7 +384,7 @@ def _check_allowed_nested(self, fields, user_info: UserInfo) -> None: raise UnauthorizedError(f"Insufficient group privileges to retrieve {name}.") def _takeout_unallowed_nested(self, fields, user_info: UserInfo) -> List[str]: - nested, fields = partition(fields, lambda x: x in self.relationships) + nested, fields = partition(fields, lambda x: x in self.table.relationships()) def ncheck(name): target_svc = self._svc_from_rel_name(name) @@ -463,7 +473,7 @@ async def write( await self.populate_ids_sqlite(data) futures = kwargs.pop('futures', None) - stmts = [self.upsert(one, futures=futures) for one in to_it(data)] + stmts = [self.upsert(one, futures=futures, user_info=user_info) for one in to_it(data)] if len(stmts) == 1: return stmts[0] if stmt_only else await self._insert(stmts[0], user_info=user_info, **kwargs) @@ -472,7 +482,8 @@ async def write( def upsert( self, data: Dict[Any, str], - futures: List[str] | None = None + futures: List[str] | None = None, + user_info: UserInfo | None = None, ) -> Insert | Update: """Generates an upsert (Insert + .on_conflict_do_x) depending on data population. OR an explicit Update statement for partial data with full primary key. @@ -518,6 +529,10 @@ def upsert( .returning(self.table) ) return stmt + elif missing_data == {'submitter_username'} and self.table.has_submitter_username: + if not user_info or not user_info.info: + raise UnauthorizedError("Requires authentication.") + data['submitter_username'] = user_info.info[0] else: raise DataError(f"{self.table.__name__} missing the following: {missing_data}.") @@ -569,7 +584,7 @@ def _restrict_select_on_fields( :return: statement restricted on field list :rtype: Select """ - nested, fields = partition(fields, lambda x: x in self.relationships) + nested, fields = partition(fields, lambda x: x in self.table.relationships()) # Exclude hybrid properties. _, fields = partition( fields, @@ -591,7 +606,7 @@ def _restrict_select_on_fields( ) if fields else stmt for n in nested: - relationship = self.relationships[n] + relationship = self.table.relationships()[n] target = relationship.target target = self.table if isinstance(target, Alias) else target.decl_class @@ -852,7 +867,7 @@ def permission_relationships(self) -> Dict[str, Relationship]: """ return { key: rel for key, rel in self.table.relationships().items() - if key not in self.relationships.keys() + if key not in self._inst_relationships.keys() } @DatabaseManager.in_session @@ -1008,9 +1023,9 @@ async def _parse_composite( delayed[key] = CompositeInsert(item=perm_stmt, nested={}, delayed=perm_listgroups) # Remaining table relationships. - for key in self.relationships.keys() & data.keys(): + for key in self._inst_relationships.keys() & data.keys(): svc, sub = self._svc_from_rel_name(key), data.pop(key) - rel = self.relationships[key] + rel = self._inst_relationships[key] # Infer fields that will get populated at insertion time (for error detection). nested_futures = None diff --git a/src/biodm/components/services/s3service.py b/src/biodm/components/services/s3service.py index ffc1199..2f8282a 100644 --- a/src/biodm/components/services/s3service.py +++ b/src/biodm/components/services/s3service.py @@ -6,7 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from biodm.components.table import Base, S3File -from biodm.exceptions import FileNotUploadedError +from biodm.exceptions import FileNotUploadedError, FileTooLargeError from biodm.managers import DatabaseManager, S3Manager from biodm.tables import Upload, UploadPart from biodm.utils.utils import utcnow, classproperty @@ -61,6 +61,9 @@ async def gen_upload_form(self, file: S3File, session: AsyncSession): """ assert isinstance(file, S3File) # mypy. + if file.size > self.s3.file_size_limit * 1024 ** 3: + raise FileTooLargeError(f"File exceeding {self.s3.file_size_limit} GB") + file.upload = Upload() session.add(file.upload) await session.flush() @@ -75,7 +78,7 @@ async def gen_upload_form(self, file: S3File, session: AsyncSession): for i in range(1, n_chunks+1): parts.append( UploadPart( - id_upload=file.upload.id, + upload_id=file.upload.id, part_number=i, form=str( self.s3.create_upload_part( @@ -87,11 +90,12 @@ async def gen_upload_form(self, file: S3File, session: AsyncSession): else: parts.append( UploadPart( - id_upload=file.upload.id, + upload_id=file.upload.id, form=str( - self.s3.create_presigned_post( # TODO: use filesize. + self.s3.create_presigned_post( object_name=key, - callback=self.post_callback(file) + file_size=file.size, + callback=self.post_callback(file), ) ) ) diff --git a/src/biodm/components/table.py b/src/biodm/components/table.py index 35035ad..3f9b08e 100644 --- a/src/biodm/components/table.py +++ b/src/biodm/components/table.py @@ -7,7 +7,7 @@ from uuid import uuid4 from sqlalchemy import ( - BOOLEAN, Integer, inspect, Column, String, TIMESTAMP, ForeignKey, + BOOLEAN, Integer, inspect, Column, String, TIMESTAMP, ForeignKey, BigInteger ) from sqlalchemy.ext.asyncio import AsyncAttrs from sqlalchemy.ext.declarative import declared_attr @@ -122,6 +122,24 @@ def required(cls) -> Set[str]: ) ) + @classproperty + def has_submitter_username(cls) -> bool: + """True if table has FK pointing to USER.username called 'submitter_username' + + :return: Flag + :rtype: bool + """ + return ( + 'submitter_username' in cls.__dict__ and + cls.submitter_username.foreign_keys and + len(cls.submitter_username.foreign_keys) == 1 and + next( + iter( + cls.submitter_username.foreign_keys + ) + ).target_fullname == 'USER.username' + ) + class S3File: """Class to use in order to have a file managed on S3 bucket associated to this table @@ -129,14 +147,14 @@ class S3File: filename = Column(String(100), nullable=False) extension = Column(String(10), nullable=False) ready = Column(BOOLEAN, nullable=False, server_default='0') - size = Column(Integer, nullable=False) + size = Column(BigInteger, nullable=False) # upload_form = Column(String(2000)) # , nullable=False+ - id_upload: Mapped[int] = mapped_column(ForeignKey("UPLOAD.id"), nullable=True) + upload_id: Mapped[int] = mapped_column(ForeignKey("UPLOAD.id"), nullable=True) @declared_attr def upload(cls) -> Mapped["Upload"]: - return relationship(backref="file", foreign_keys=[cls.id_upload]) + return relationship(backref="file", foreign_keys=[cls.upload_id]) dl_count = Column(Integer, nullable=False, server_default='0') diff --git a/src/biodm/config.py b/src/biodm/config.py index 780a981..543011b 100644 --- a/src/biodm/config.py +++ b/src/biodm/config.py @@ -44,7 +44,6 @@ 'verify_aud': False}) # Kubernetes. - K8_IP = config("K8_IP", cast=str, default=None) K8_PORT = config("K8_PORT", cast=str, default="8443") K8_HOST = config("K8_HOST", cast=str, default=None) diff --git a/src/biodm/error.py b/src/biodm/error.py index adabefe..0c550ba 100644 --- a/src/biodm/error.py +++ b/src/biodm/error.py @@ -17,6 +17,7 @@ TokenDecodingError, UpdateVersionedError, FileNotUploadedError, + FileTooLargeError, DataError ) @@ -41,15 +42,15 @@ async def onerror(_, exc): """Error event handler. Relevant documentation: https://restfulapi.net/http-status-codes/""" - status = 500 detail = None if issubclass(exc.__class__, RequestError): - detail = exc.detail + # TODO: investigate + detail = exc.detail + (str(exc.messages) if hasattr(exc, 'messages') else "") + match exc: - case ValidationError(): + case ValidationError() | FileTooLargeError(): status = 400 - detail = str(exc.messages) case DataError() | EndpointError() | PayloadJSONDecodingError(): status = 400 case FailedDelete() | FailedRead() | FailedUpdate(): @@ -65,6 +66,7 @@ async def onerror(_, exc): case UnauthorizedError(): status = 511 else: + status = 500 detail = "Server Error. Contact an administrator about it." return Error(status, detail).response diff --git a/src/biodm/exceptions.py b/src/biodm/exceptions.py index 370aa08..424ce1f 100644 --- a/src/biodm/exceptions.py +++ b/src/biodm/exceptions.py @@ -56,6 +56,8 @@ class UpdateVersionedError(RequestError): class FileNotUploadedError(RequestError): """Raised when trying to download a file that has not been uploaded yet.""" +class FileTooLargeError(RequestError): + """Raised when trying to create a too large file.""" class DataError(RequestError): """Raised when input data is incorrect.""" diff --git a/src/biodm/managers/s3manager.py b/src/biodm/managers/s3manager.py index 1606587..f940705 100644 --- a/src/biodm/managers/s3manager.py +++ b/src/biodm/managers/s3manager.py @@ -52,6 +52,7 @@ def endpoint(self) -> str: def create_presigned_post(self, object_name, + file_size, callback, ) -> Any: """ Generates a presigned url + form fiels to upload a given file on s3 bucket. @@ -71,7 +72,7 @@ def create_presigned_post(self, {"x-amz-date": t.isoformat()}, {"success_action_status": "201"}, ["starts-with", "$success_action_redirect", ""], # self.app.server_endpoint - ["content-length-range", 2, self.file_size_limit * 1024 ** 3], + ["content-length-range", 2, file_size], {"bucket": self.bucket_name}, ] diff --git a/src/biodm/schemas/__init__.py b/src/biodm/schemas/__init__.py index 841270c..d3d9443 100644 --- a/src/biodm/schemas/__init__.py +++ b/src/biodm/schemas/__init__.py @@ -4,7 +4,6 @@ from .user import UserSchema from .group import GroupSchema from .listgroup import ListGroupSchema -from .k8sinstance import K8sinstanceSchema from .upload import UploadSchema, PartsEtagSchema """ Headless schemas should be explicitely added to the register. """ diff --git a/src/biodm/schemas/group.py b/src/biodm/schemas/group.py index 35e9086..e507478 100644 --- a/src/biodm/schemas/group.py +++ b/src/biodm/schemas/group.py @@ -5,9 +5,6 @@ class GroupSchema(Schema): """Schema for Keycloak Groups. id field is purposefully left out as we manage it internally.""" path = String(metadata={"description": "Group name chain separated by '__'"}) - # Test - n_members = Integer() - users = List(Nested('UserSchema', exclude=['groups'])) # only=['username'] children = List(Nested('GroupSchema', exclude=['children', 'parent'])) # exclude=['users', 'children', 'parent'])) parent = Nested('GroupSchema', exclude=['children', 'parent'])# exclude=['users', 'children', 'parent'], dump_only=True) # parent', 'users', 'children diff --git a/src/biodm/schemas/k8sinstance.py b/src/biodm/schemas/k8sinstance.py deleted file mode 100644 index 05f5f4a..0000000 --- a/src/biodm/schemas/k8sinstance.py +++ /dev/null @@ -1,10 +0,0 @@ -from marshmallow import Schema -from marshmallow.fields import String, Integer - - -class K8sinstanceSchema(Schema): - """K8Instance Schema.""" - id = Integer() - username_user = String() - namespace = String() - manifest = String() diff --git a/src/biodm/schemas/listgroup.py b/src/biodm/schemas/listgroup.py index f4ebe5d..c3148cc 100644 --- a/src/biodm/schemas/listgroup.py +++ b/src/biodm/schemas/listgroup.py @@ -4,4 +4,4 @@ class ListGroupSchema(Schema): id = Integer() - groups = List(Nested('GroupSchema', only=('path', 'n_members',))) + groups = List(Nested('GroupSchema', exclude=['users', 'children', 'parent'])) diff --git a/src/biodm/schemas/upload.py b/src/biodm/schemas/upload.py index a6a3702..a5c2337 100644 --- a/src/biodm/schemas/upload.py +++ b/src/biodm/schemas/upload.py @@ -6,7 +6,7 @@ class PartsEtagSchema(Schema): ETag = String() class UploadPartSchema(Schema): - id_upload = Integer() + upload_id = Integer() part_number = Integer() form = String() diff --git a/src/biodm/schemas/user.py b/src/biodm/schemas/user.py index b169b41..f46c6b2 100644 --- a/src/biodm/schemas/user.py +++ b/src/biodm/schemas/user.py @@ -10,4 +10,4 @@ class UserSchema(Schema): firstName = String() lastName = String() - groups = List(Nested('GroupSchema', exclude=['users', 'children', 'parent'])) + groups = List(Nested('GroupSchema', exclude=['users', 'children', 'parent'])) #'children', 'parent' diff --git a/src/biodm/tables/__init__.py b/src/biodm/tables/__init__.py index 494e567..e3fbb51 100644 --- a/src/biodm/tables/__init__.py +++ b/src/biodm/tables/__init__.py @@ -3,7 +3,6 @@ from .group import Group from .listgroup import ListGroup from .history import History -from .k8sinstance import K8sInstance from .upload import Upload from .upload_part import UploadPart from .asso import asso_user_group diff --git a/src/biodm/tables/asso.py b/src/biodm/tables/asso.py index d723050..d186405 100644 --- a/src/biodm/tables/asso.py +++ b/src/biodm/tables/asso.py @@ -6,13 +6,13 @@ asso_user_group = Table( "ASSO_USER_GROUP", Base.metadata, - Column("username_user", ForeignKey("USER.username"), primary_key=True), - Column("path_group", ForeignKey("GROUP.path"), primary_key=True) + Column("user_username", ForeignKey("USER.username"), primary_key=True), + Column("group_path", ForeignKey("GROUP.path"), primary_key=True) ) asso_list_group = Table( "ASSO_LIST_GROUP", Base.metadata, - Column("id_listgroup", ForeignKey("LISTGROUP.id"), primary_key=True), - Column("path_group", ForeignKey("GROUP.path"), primary_key=True) + Column("listgroup_id", ForeignKey("LISTGROUP.id"), primary_key=True), + Column("group_path", ForeignKey("GROUP.path"), primary_key=True) ) diff --git a/src/biodm/tables/group.py b/src/biodm/tables/group.py index c880d6c..3e97acf 100644 --- a/src/biodm/tables/group.py +++ b/src/biodm/tables/group.py @@ -21,11 +21,7 @@ class Group(Base): # id on creation is ensured by read_or_create method from KCService subclasses. # KC fields managed internally (not part of the Schema). id: Mapped[str] = mapped_column(nullable=True) - # path: Mapped[str] = mapped_column(String(500), primary_key=True) - # test - n_members: Mapped[int] = mapped_column(nullable=True) - # relationships users: Mapped[List["User"]] = relationship( secondary=asso_user_group, @@ -34,16 +30,16 @@ class Group(Base): ) @hybrid_property - def path_parent(self) -> str: + def parent_path(self) -> str: return self.path[:self.path.index('__', -1)] # @hybrid_property # TODO ? # def display_name(self) -> str: # return self.path[self.path.index('__', -1):] - @path_parent.inplace.expression + @parent_path.inplace.expression @classmethod - def _path_parent(cls) -> SQLColumnExpression[str]: + def _parent_path(cls) -> SQLColumnExpression[str]: sep = literal('__') if "postgresql" in config.DATABASE_URL: return func.substring( @@ -84,7 +80,7 @@ def __repr__(self): Group.parent = relationship( Group_alias, - primaryjoin=Group.path_parent == Group_alias.path, + primaryjoin=Group.parent_path == Group_alias.path, foreign_keys=[Group_alias.path], uselist=False, viewonly=True, @@ -93,8 +89,8 @@ def __repr__(self): Group.children = relationship( Group_alias, - primaryjoin=foreign(Group_alias.path_parent) == Group.path, - foreign_keys=[Group_alias.path_parent], + primaryjoin=foreign(Group_alias.parent_path) == Group.path, + foreign_keys=[Group_alias.parent_path], uselist=True, viewonly=True, ) diff --git a/src/biodm/tables/history.py b/src/biodm/tables/history.py index 89aa5c0..4ef7c90 100644 --- a/src/biodm/tables/history.py +++ b/src/biodm/tables/history.py @@ -8,7 +8,7 @@ class History(Base): """History table.""" timestamp = Column(TIMESTAMP(timezone=True), default=utcnow, nullable=False, primary_key=True) - username_user: Mapped[str] = mapped_column(String(100), primary_key=True) + user_username: Mapped[str] = mapped_column(String(100), primary_key=True) content = Column(Text, nullable=False) endpoint = Column(String(500), nullable=False) diff --git a/src/biodm/tables/k8sinstance.py b/src/biodm/tables/k8sinstance.py deleted file mode 100644 index 144d57b..0000000 --- a/src/biodm/tables/k8sinstance.py +++ /dev/null @@ -1,20 +0,0 @@ -from uuid import UUID - -from sqlalchemy import Column, String, ForeignKey, TIMESTAMP -from sqlalchemy.orm import Mapped, mapped_column - -from biodm.components import Base -from biodm.utils.utils import utcnow - -class K8sInstance(Base): - """K8Instance table.""" - # id = Column(Uuid, nullable=False, primary_key=True) - id: Mapped[UUID] = mapped_column(primary_key=True) - username_user = mapped_column(ForeignKey('USER.username'), nullable=False) - namespace = Column(String(50)) - manifest = Column(String(50)) - - emited_at = Column(TIMESTAMP(timezone=True), - default=utcnow, - nullable=False) - expiring_at = Column(TIMESTAMP(timezone=True)) diff --git a/src/biodm/tables/upload_part.py b/src/biodm/tables/upload_part.py index 6668b49..1051057 100644 --- a/src/biodm/tables/upload_part.py +++ b/src/biodm/tables/upload_part.py @@ -9,12 +9,12 @@ from .upload import Upload class UploadPart(Base): - id_upload: Mapped[int] = mapped_column(ForeignKey("UPLOAD.id"), primary_key=True) + upload_id: Mapped[int] = mapped_column(ForeignKey("UPLOAD.id"), primary_key=True) part_number: Mapped[int] = mapped_column(server_default='0', primary_key=True) form: Mapped[str] = mapped_column(nullable=False) - # etag: Mapped[str] = mapped_column(nullable=True) + upload: Mapped["Upload"] = relationship( back_populates="parts", - foreign_keys=[id_upload], + foreign_keys=[upload_id], single_parent=True ) diff --git a/src/biodm/tables/user.py b/src/biodm/tables/user.py index 6fdd3f0..6609932 100644 --- a/src/biodm/tables/user.py +++ b/src/biodm/tables/user.py @@ -16,9 +16,10 @@ class User(Base): # KC ENDPOINT: /auth/admin/realms/{realm-name}/users/{id} # nullable=False is a problem when creating parent entity with just the User.name. # id on creation is ensured by read_or_create method from KCService subclasses. - id: Mapped[str] = mapped_column(nullable=True) # unique=True + id: Mapped[str] = mapped_column(nullable=True) username = Column(String(50), nullable=False, primary_key=True) email = Column(String(100)) + # camecase exception: direct mapping for keycloak fields firstName = Column(String(50)) lastName = Column(String(50)) diff --git a/src/biodm/utils/apispec.py b/src/biodm/utils/apispec.py new file mode 100644 index 0000000..54d5056 --- /dev/null +++ b/src/biodm/utils/apispec.py @@ -0,0 +1,135 @@ +from typing import TYPE_CHECKING, Type, Dict, List, Tuple + +from apispec.ext.marshmallow import MarshmallowPlugin +from marshmallow import Schema, class_registry + +if TYPE_CHECKING: + from biodm.components.controllers import ResourceController + + +"""Inspired by marshmallow registry. Maps classes to instances attached to controllers.""" +_runtime_schema_registry: Dict[Type[Schema], Schema] = {} + + +def register_runtime_schema(cls: Type[Schema], inst: Schema) -> None: + """Adds entry to register. Indexed by class, since we should not assume the name.""" + _runtime_schema_registry[cls] = inst + + +class BDMarshmallowPlugin(MarshmallowPlugin): + """Redefines schema_helper in order to fetch schema instances from our custom registry in order + to take runtime patches into account when outputing OpenAPI schema.""" + def schema_helper(self, name, _, schema=None, **kwargs): + """Definition helper that allows using a marshmallow + :class:`Schema ` to provide OpenAPI + metadata. + + :param type|Schema schema: A marshmallow Schema class or instance. + """ + if isinstance(schema, str): + schema_cls = class_registry.get_class(schema) + if schema_cls in _runtime_schema_registry: + schema = _runtime_schema_registry[schema_cls] + # Works because lower level calls are working with an instance. + return super().schema_helper(name, _, schema, **kwargs) + + +def replace_docstrings_pattern( + apispec: List[str], + pattern=Tuple[str], + blocks=List[List[str]] +) -> List[str]: + """Takes a 2 line pattern and replace it by lines in block, matching indentation.""" + for i in range(len(apispec)): + if pattern[0] in apispec[i-1] and pattern[1] in apispec[i]: + flattened = [] + indent = len(apispec[i-1].split(pattern[0])[0]) + for part in blocks: + flattened.append(" " * indent + part[0]) + for line in part[1:]: + flattened.append(" " * (indent + 2) + line) + return apispec[:i-1] + flattened + apispec[i+1:] + return apispec + + +def process_apispec_docstrings(ctrl: 'ResourceController', abs_doc: str): + """Process an abstract documentation block to adapt it to a controllers instance characteristics. + + Current patterns for abstract documentation: + - Marshmallow Schema | + schema: Schema -> schema: self.Schema.__class__.__name__ + - key Attributes | + - in: path + name: id + -> + List of table primary keys, with their description from marshmallow schema if any. + - field conditions | + - in: query + name: field_conditions + -> + List of available fields to set conditions on. + + :param ctrl: ResourceController + :type ctrl: ResourceController + :param abs_doc: Abstract documentation block + :type abs_doc: str + :return: Processed documentation block + :rtype: str + """ + # Use intance schema. + abs_doc = abs_doc.replace( + 'schema: Schema', f"schema: {ctrl.schema.__class__.__name__}" + ) + + # Template replacement #1: path key. + path_key = [] + for key in ctrl.pk: + attr = [] + attr.append("- in: path") + attr.append(f"name: {key}") + field = ctrl.schema.declared_fields[key] + desc = field.metadata.get("description", f"{ctrl.resource} {key}") + attr.append("description: " + desc) + path_key.append(attr) + + # Template replacement #2: field conditions. + field_conditions = [] + load_cols = [ + col for col in ctrl.table.__table__.columns + if col.name in ctrl.schema.load_fields + ] + for col in load_cols: + condition = [] + condition.append("- in: query") + condition.append(f"name: {col.name}") + if col.type.python_type == str: + condition.append( + "description: text - key=val | key=pattern " + "where pattern may contain '*' for wildcards" + ) + elif col.type.python_type in (int, float): + condition.append( + "description: numeric - key=val | key=val1,val2.. | key.op(val) " + "for op in (le|lt|ge|gt)" + ) + else: + condition.append(f"description: {ctrl.resource} {col.name}") + field_conditions.append(condition) + + # Split. + doc = abs_doc.split('---') + if len(doc) > 1: + sphinxdoc, apispec = doc + apispec = apispec.split('\n') + # Search and replace templates. + apispec = replace_docstrings_pattern( + apispec=apispec, pattern=('- in: path', 'name: id'), blocks=path_key + ) + apispec = replace_docstrings_pattern( + apispec=apispec, + pattern=('- in: query', 'name: fields_conditions'), + blocks=field_conditions + ) + # Join. + abs_doc = sphinxdoc + "\n---\n" + "\n".join(apispec) + return abs_doc diff --git a/src/biodm/utils/biodm.py b/src/biodm/utils/biodm.py new file mode 100644 index 0000000..4fcf87f --- /dev/null +++ b/src/biodm/utils/biodm.py @@ -0,0 +1,17 @@ +from typing import TYPE_CHECKING, Type + +from sqlalchemy.orm.relationships import _RelationshipDeclared + +if TYPE_CHECKING: + from biodm.components import Base + + +def gen_schema(table: Type['Base']): + for k in table.__table__.columns: + print(k.name, table.colinfo(k.name)[1]) + print("---") + for k, v in table.__dict__.items(): + if hasattr(v, 'prop'): + p = v.prop + if isinstance(p, _RelationshipDeclared): + print(k) diff --git a/src/biodm/utils/security.py b/src/biodm/utils/security.py index 7179383..5e65184 100644 --- a/src/biodm/utils/security.py +++ b/src/biodm/utils/security.py @@ -5,7 +5,6 @@ from inspect import getmembers, ismethod from typing import TYPE_CHECKING, List, Tuple, Callable, Awaitable, Set, ClassVar, Type, Any, Dict -# import marshmallow as ma from marshmallow import fields, Schema from starlette.middleware.base import BaseHTTPMiddleware from starlette.requests import Request @@ -166,8 +165,6 @@ def enabled_verbs(self) -> Set[str]: ) - - class PermissionLookupTables: """Holds lookup tables for group based access. @@ -302,7 +299,8 @@ def _gen_perm_schema(table: Type['Base'], fkey: str, verbs: List[str]): f"{verb}": fields.Nested("ListGroupSchema"), } ) - schema_columns['entity'] = fields.Nested(table.ctrl.schema) + # back reference - probably unnecessary. + # schema_columns['entity'] = fields.Nested(table.ctrl.schema) return type( f"AssoPerm{table.__name__.capitalize()}{fkey.capitalize()}Schema", @@ -379,11 +377,11 @@ def setup_permissions(cls, app: 'Api'): rel_name, NewAsso = cls._gen_perm_table(app, table, field_fullkey, verbs) NewAssoSchema = cls._gen_perm_schema(table, field_fullkey, verbs) - # Set extra load field onto associated schema. - # Load fields only -> permissions are not dumped. # TODO: think about it. - table.ctrl.schema.load_fields.update( - {rel_name: fields.Nested(NewAssoSchema)} - ) + # Set extra field onto associated schema. + patch = {rel_name: fields.Nested(NewAssoSchema)} + table.ctrl.schema.fields.update(patch) + table.ctrl.schema.load_fields.update(patch) + table.ctrl.schema.dump_fields.update(patch) # Set up look up table for incomming requests. entry = {'table': NewAsso, 'from': tchain, 'verbs': verbs} diff --git a/src/biodm/utils/sqla.py b/src/biodm/utils/sqla.py index 297a1e6..291f2bc 100644 --- a/src/biodm/utils/sqla.py +++ b/src/biodm/utils/sqla.py @@ -33,4 +33,5 @@ def stmt_to_dict(stmt: UpsertStmt) -> Dict[str, Any]: :return: Dict values. :rtype: Dict[str, Any] """ - return {k.name: v.effective_value for k, v in stmt._values.items()} + pass + return {k.name: v.effective_value for k, v in stmt._values.items() if hasattr(k, 'name')} diff --git a/src/biodm/utils/utils.py b/src/biodm/utils/utils.py index 89e3206..09d7a27 100644 --- a/src/biodm/utils/utils.py +++ b/src/biodm/utils/utils.py @@ -1,7 +1,7 @@ """Utils.""" import datetime as dt import json -from functools import reduce, update_wrapper +from functools import reduce, update_wrapper, wraps import operator from os import path, utime from typing import ( @@ -30,19 +30,29 @@ async def __new__(cls, *args, **kwargs) -> Self: # type: ignore [misc] class classproperty(Generic[_T]): """Descriptor combining @classmethod and @property behaviours for python v3.11+. - note: only implements the getter. + notes: only implements the getter and memoizes for subsequent calls. - Coutesy of: https://stackoverflow.com/a/76378416/6847689 + Inspired by: https://stackoverflow.com/a/76378416/6847689 """ - def __init__(self, method: Callable[..., _T]): + def __init__(self, method: Callable[..., _T]) -> None: self.method = method + self.cache = {} + update_wrapper(self, method) # type: ignore [misc] - def __get__(self, obj, cls=None) -> _T: - if cls is None: - cls = type(obj) + def __call__(self, cls): + """Not necessary but suppresses Sphinx errors.""" return self.method(cls) + def __get__(self, slf, cls=None) -> _T: + if cls is None: + cls = type(slf) + + if cls not in self.cache: + self.cache[cls] = self.method(cls) + + return self.cache[cls] + def utcnow() -> dt.datetime: """Support for python==3.10 and below.""" diff --git a/src/example/entities/controllers/file.py b/src/example/entities/controllers/file.py index 166beed..df1263a 100644 --- a/src/example/entities/controllers/file.py +++ b/src/example/entities/controllers/file.py @@ -11,6 +11,7 @@ from entities import tables + class FileController(S3Controller): def routes(self, **_) -> List[Mount | Route] | List[Mount] | List[BaseRoute]: """Adds a /files/id/visualize route. @@ -48,7 +49,7 @@ async def visualize(self, request: Request) -> Response: if not user_info.info: raise UnauthorizedError("Visualizing requires authentication.") - vis_data["username_user"] = user_info.info[0] + vis_data["user_username"] = user_info.info[0] vis = await vis_svc.write(data=vis_data, stmt_only=False, user_info=user_info) diff --git a/src/example/entities/schemas/dataset.py b/src/example/entities/schemas/dataset.py index 219e4bd..7d05050 100644 --- a/src/example/entities/schemas/dataset.py +++ b/src/example/entities/schemas/dataset.py @@ -13,11 +13,11 @@ class DatasetSchema(Schema): # # [g.name for g in Group] # # ) # ) - username_user_contact = String() + username_contact = String() id_project = Integer() # owner_group = Nested('GroupSchema') # , only=('path', 'n_members',) - contact = Nested('UserSchema') # , only=('username', ) + contact = Nested('UserSchema', exclude=['groups']) # , only=('username', ) project = Nested('ProjectSchema', exclude=('datasets', )) tags = List(Nested('TagSchema')) files = List(Nested('FileSchema')) diff --git a/src/example/entities/schemas/file.py b/src/example/entities/schemas/file.py index ee49db4..2510f22 100644 --- a/src/example/entities/schemas/file.py +++ b/src/example/entities/schemas/file.py @@ -13,5 +13,6 @@ class FileSchema(Schema): id_dataset = Integer() version_dataset = Integer() + # submitter_username = String() upload = Nested("UploadSchema", dump_only=True) # dataset = Nested('DatasetSchema') # , load_only=True diff --git a/src/example/entities/tables/dataset.py b/src/example/entities/tables/dataset.py index a5f66bf..dd60376 100644 --- a/src/example/entities/tables/dataset.py +++ b/src/example/entities/tables/dataset.py @@ -41,12 +41,12 @@ class Dataset(Versioned, Base): # # supplementary_metadata = Column(JSONB, nullable=True) # # Foreign keys - username_user_contact: Mapped[int] = mapped_column(ForeignKey("USER.username"), nullable=False) + username_contact: Mapped[str] = mapped_column(ForeignKey("USER.username"), nullable=False) id_project: Mapped[int] = mapped_column(ForeignKey("PROJECT.id"), nullable=False) # # relationships # policy - cascade="save-update, merge" ? - contact: Mapped["User"] = relationship(foreign_keys=[username_user_contact]) + contact: Mapped["User"] = relationship(foreign_keys=[username_contact]) tags: Mapped[Set["Tag"]] = relationship(secondary=asso_dataset_tag, uselist=True) project: Mapped["Project"] = relationship(back_populates="datasets") files: Mapped[List["File"]] = relationship(back_populates="dataset") @@ -62,7 +62,6 @@ class Dataset(Versioned, Base): # ) #  Special parameters. - # __permissions__ = ( - # # Flag many-to-entity (composition pattern) with permissions. - # Permission(files, read=True, write=True, download=True), - # ) + __permissions__ = ( + Permission(files, read=True, write=True, download=True), + ) diff --git a/src/example/entities/tables/file.py b/src/example/entities/tables/file.py index 86bde3f..74e2a09 100644 --- a/src/example/entities/tables/file.py +++ b/src/example/entities/tables/file.py @@ -2,7 +2,7 @@ import uuid from sqlalchemy import Column, Integer, ForeignKey, Boolean, String, ForeignKeyConstraint, SmallInteger -from sqlalchemy.orm import Mapped, relationship +from sqlalchemy.orm import Mapped, relationship, mapped_column from sqlalchemy.ext.hybrid import hybrid_property from sqlalchemy.ext.asyncio import AsyncSession @@ -19,6 +19,8 @@ class File(S3File, Base): id_dataset = Column(Integer, nullable=False) version_dataset = Column(SmallInteger, nullable=False) + # submitter_username: Mapped[str] = mapped_column(ForeignKey("USER.username"), nullable=False) + __table_args__ = ( ForeignKeyConstraint( ["id_dataset", "version_dataset"], diff --git a/src/example/entities/tables/project.py b/src/example/entities/tables/project.py index 7ace738..72e68e0 100644 --- a/src/example/entities/tables/project.py +++ b/src/example/entities/tables/project.py @@ -30,8 +30,8 @@ class Project(Base): # id_user_responsible = Column(Integer, nullable=False) __permissions__ = ( - # Permission(datasets, read=True, write=True, download=True, propagates_to=["files"]), + Permission(datasets, read=True, write=True, download=True, propagates_to=["files"]), #  propagates_to=[] - Permission("datasets.files", read=True, write=True, download=True), + # Permission("datasets.files", read=True, write=True, download=True), # Permission(visualizations, write=True) ) diff --git a/src/example/entities/tables/visualization.py b/src/example/entities/tables/visualization.py index 07fb43a..0f4b31d 100644 --- a/src/example/entities/tables/visualization.py +++ b/src/example/entities/tables/visualization.py @@ -15,13 +15,13 @@ class Visualization(Base): name = Column(String(200), nullable=True) # Foreign Keys - username_user: Mapped[str] = mapped_column(ForeignKey("USER.username")) + user_username: Mapped[str] = mapped_column(ForeignKey("USER.username")) # id_project: Mapped[int] = mapped_column(ForeignKey("PROJECT.id")) id_file: Mapped[int] = mapped_column(ForeignKey("FILE.id")) # id_k8sinstance: Mapped[int] = mapped_column(ForeignKey("K8SINSTANCE.id")) # Relationships - user: Mapped["User"] = relationship(foreign_keys=[username_user]) + user: Mapped["User"] = relationship(foreign_keys=[user_username]) # project: Mapped["Project"] = relationship(back_populates="visualizations", lazy="select") file: Mapped["File"] = relationship(foreign_keys=[id_file]) # k8sinstance: Mapped["K8sInstance"] = relationship(foreign_keys=[id_k8sinstance], lazy="select") diff --git a/src/example/manifests/visualization.py b/src/example/manifests/visualization.py index 36eaa56..099df59 100644 --- a/src/example/manifests/visualization.py +++ b/src/example/manifests/visualization.py @@ -42,7 +42,7 @@ async def gen_manifest( :rtype: Dict[str, str] """ # 1. Get vis.user.username - username = vis.username_user + username = vis.user_username # 2. Get vis.file.key file: tables.File = await vis.awaitable_attrs.file diff --git a/src/tests/integration/s3/test_files.py b/src/tests/integration/s3/test_files.py index 43ee28d..6b9ea28 100644 --- a/src/tests/integration/s3/test_files.py +++ b/src/tests/integration/s3/test_files.py @@ -46,7 +46,7 @@ def test_create_project_dataset(srv_endpoint, utils): def test_create_file(srv_endpoint, utils, tmpdir): global small_file_path, small_file_upload_form, small_file - small_file_path = Path(tmpdir) / big_file_name + small_file_path = Path(tmpdir) / small_file_name utils.rand_file(small_file_path, ceil(0.5*CHUNK_SIZE)) # -> 1 chunk. small_file = { @@ -90,6 +90,54 @@ def test_file_upload(): assert response.status_code == 201 +@pytest.mark.dependency(name="test_create_file") +def test_create_oversized_file(srv_endpoint, utils, tmpdir): + small_file = { + "filename": small_file_path.name.split('.')[0], + "extension": small_file_path.name.split('.')[1], + "size": 1000*1024**3, # 1000GB + "id_dataset": "1", + "version_dataset": "1", + } + response = requests.post(f"{srv_endpoint}/files", data=utils.json_bytes(small_file)) + + assert response.status_code == 400 + assert "File exceeding 100 GB" in response.text + + +@pytest.mark.dependency(name="test_create_file") +def test_create_and_upload_oversized_file(srv_endpoint, utils): + # create, with lower size. + small_file = { + "filename": small_file_path.name.split('.')[0], + "extension": small_file_path.name.split('.')[1], + "size": small_file_path.stat().st_size - 10, + "id_dataset": "1", + "version_dataset": "1", + } + response = requests.post(f"{srv_endpoint}/files", data=utils.json_bytes(small_file)) + assert response.status_code == 201 + # Get form. + json_rf = json.loads(response.text) + upload = json_rf['upload'] + assert 'parts' in upload + assert len(upload['parts']) == 1 + upload_form = upload['parts'][0]['form'] + # Upload + postv4 = json.loads(upload_form.replace("'", "\"")) + with open(small_file_path, 'rb') as f: + files = {'file': (small_file_name, f)} + response = requests.post( + postv4['url'], + data=postv4['fields'], + files=files, + verify=True, + allow_redirects=True + ) + assert response.status_code == 400 + assert "EntityTooLarge" in response.text + + @pytest.mark.dependency(name="test_file_upload") def test_file_readiness(srv_endpoint): response = requests.get(f"{srv_endpoint}/files/{small_file['id']}")