Skip to content

Commit

Permalink
Merge pull request #18 from bento-platform/modernize
Browse files Browse the repository at this point in the history
Fix issue with paths/viewing files & generate URIs for objects separate from file path
  • Loading branch information
davidlougheed authored Feb 7, 2023
2 parents 2fb7f19 + 6bec5ab commit b084621
Show file tree
Hide file tree
Showing 9 changed files with 256 additions and 177 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ these can also be read from a minIO instance (or AWS S3 for that matter).

## Environment Variables

Set `SERVICE_URL` to the base URL of the service (e.g. `https://bentov2.local/api/drop-box`).
This is used for file URI generation.

If using the current filesystem to serve file, you can use the `SERVICE_DATA`
environment variable to point to some location (./data by default).

Expand Down
1 change: 1 addition & 0 deletions bento_drop_box_service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
SERVICE_ID=os.environ.get("SERVICE_ID", str(":".join(list(SERVICE_TYPE.values())[:2]))),
SERVICE_DATA_SOURCE="minio" if MINIO_URL else "local",
SERVICE_DATA=None if MINIO_URL else SERVICE_DATA,
SERVICE_URL=os.environ.get("SERVICE_URL", "http://127.0.0.1:5000"), # base URL to construct object URIs from
MINIO_URL=MINIO_URL,
MINIO_USERNAME=os.environ.get("MINIO_USERNAME") if MINIO_URL else None,
MINIO_PASSWORD=os.environ.get("MINIO_PASSWORD") if MINIO_URL else None,
Expand Down
4 changes: 2 additions & 2 deletions bento_drop_box_service/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ async def _get_backend() -> Optional[DropBoxBackend]:
# Make data directory/ies if needed
if current_app.config["SERVICE_DATA_SOURCE"] == "local":
os.makedirs(current_app.config["SERVICE_DATA"], exist_ok=True)
return LocalBackend()
return LocalBackend(logger=current_app.logger)

elif current_app.config["SERVICE_DATA_SOURCE"] == "minio":
return MinioBackend()
return MinioBackend(logger=current_app.logger)

return None

Expand Down
4 changes: 4 additions & 0 deletions bento_drop_box_service/backends/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from abc import ABC, abstractmethod
from typing import Tuple
from werkzeug import Request, Response
Expand All @@ -7,6 +8,9 @@


class DropBoxBackend(ABC):
def __init__(self, logger: logging.Logger):
self.logger = logger

@abstractmethod
async def get_directory_tree(self) -> Tuple[dict]:
pass
Expand Down
59 changes: 45 additions & 14 deletions bento_drop_box_service/backends/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,69 @@
import aiofiles.os
import aiofiles.ospath
import os
import pathlib

from bento_lib.responses.quart_errors import quart_bad_request_error, quart_not_found_error
from typing import TypedDict
from typing import Tuple, TypedDict
from quart import current_app, send_file, Request, Response
from werkzeug.utils import secure_filename

from .base import DropBoxBackend


# TODO: py3.10: remove in favour of [str].removeprefix(...)
def _str_removeprefix_polyfill(s: str, prefix: str) -> str:
return s[len(prefix):] if s.startswith(prefix) else s


# TODO: py3.11: individual optional fields
class DropBoxEntry(TypedDict, total=False):
name: str
path: str
filePath: str
uri: str
size: int
contents: tuple[DropBoxEntry, ...]


class LocalBackend(DropBoxBackend):
async def _get_directory_tree(self, directory: str, level: int = 0) -> tuple[DropBoxEntry, ...]:
async def _get_directory_tree(
self,
root_path: pathlib.Path,
sub_path: Tuple[str, ...],
level: int = 0,
) -> tuple[DropBoxEntry, ...]:
root_path = root_path.absolute()
entries: list[DropBoxEntry] = []
# for some reason this doesn't work as a comprehension
sub_path_str: str = "/".join(sub_path)
current_dir = (root_path / sub_path_str).absolute()
# noinspection PyUnresolvedReferences
for entry in (await aiofiles.os.listdir(directory)):
for entry in (await aiofiles.os.listdir(current_dir)):
if (level < current_app.config["TRAVERSAL_LIMIT"] or not (
await aiofiles.ospath.isdir(os.path.join(directory, entry)))) and entry[0] != ".":
await aiofiles.ospath.isdir(current_dir))) and entry[0] != ".":
if "/" in entry:
self.logger.warning(f"Skipped entry with a '/' in its name: {entry}")
continue
entry_path = current_dir / entry
entries.append({
"name": entry,
"path": os.path.abspath(os.path.join(directory, entry)),
"filePath": str(entry_path),
**({
"contents": await self._get_directory_tree(os.path.join(directory, entry), level=level + 1),
} if (await aiofiles.ospath.isdir(os.path.join(directory, entry))) else {
"size": await aiofiles.ospath.getsize(os.path.join(directory, entry)),
"contents": await self._get_directory_tree(root_path, (*sub_path, entry), level=level + 1),
} if (await aiofiles.ospath.isdir(entry_path)) else {
"size": await aiofiles.ospath.getsize(entry_path),
"uri": (
current_app.config["SERVICE_URL"] +
"/objects" +
_str_removeprefix_polyfill(str(entry_path), str(root_path))
),
})
})

return tuple(entries)

async def get_directory_tree(self) -> tuple[DropBoxEntry, ...]:
return await self._get_directory_tree(current_app.config["SERVICE_DATA"])
root_path: pathlib.Path = pathlib.Path(current_app.config["SERVICE_DATA"])
return await self._get_directory_tree(root_path, (".",))

async def upload_to_path(self, request: Request, path: str, content_length: int) -> Response:
# TODO: This might not be secure (ok for now due to permissions check)
Expand All @@ -68,10 +92,14 @@ async def upload_to_path(self, request: Request, path: str, content_length: int)
return current_app.response_class(status=204)

async def retrieve_from_path(self, path: str) -> Response:
root_path: pathlib.Path = pathlib.Path(current_app.config["SERVICE_DATA"]).absolute()
directory_items: tuple[DropBoxEntry, ...] = await self.get_directory_tree()

# Manually crawl through the tree to only return items which are explicitly in the tree.

# Otherwise, find the file if it exists and return it.
path_parts: list[str] = path.split("/") # TODO: Deal with slashes in file names
# TODO: Deal with slashes in file names
path_parts: list[str] = _str_removeprefix_polyfill(path, str(root_path)).lstrip("/").split("/")

while len(path_parts) > 0:
part = path_parts[0]
Expand All @@ -87,8 +115,11 @@ async def retrieve_from_path(self, path: str) -> Response:
if len(path_parts) > 0:
return quart_bad_request_error("Cannot retrieve a directory")

return await send_file(node["path"], mimetype="application/octet-stream", as_attachment=True,
attachment_filename=node["name"])
return await send_file(
node["filePath"],
mimetype="application/octet-stream",
as_attachment=True,
attachment_filename=node["name"])

directory_items = node["contents"]

Expand Down
5 changes: 3 additions & 2 deletions bento_drop_box_service/backends/minio.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import boto3
import logging
from typing import Tuple

from botocore.exceptions import ClientError
Expand All @@ -11,8 +12,8 @@


class MinioBackend(DropBoxBackend):
def __init__(self, resource=None):
super(MinioBackend, self).__init__()
def __init__(self, logger: logging.Logger, resource=None):
super(MinioBackend, self).__init__(logger)

if resource:
self.minio = resource
Expand Down
36 changes: 32 additions & 4 deletions bento_drop_box_service/routes.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import asyncio
from quart import Blueprint, current_app, jsonify, request, Response
from bento_lib.auth.quart_decorators import quart_permissions_owner
from bento_lib.responses.quart_errors import (
quart_bad_request_error,
quart_internal_server_error
)
from bento_lib.types import GA4GHServiceInfo
from bento_lib.types import GA4GHServiceInfo, BentoExtraServiceInfo
from bento_drop_box_service import __version__
from bento_drop_box_service.backend import get_backend
from bento_drop_box_service.constants import BENTO_SERVICE_KIND, SERVICE_NAME, SERVICE_TYPE
Expand Down Expand Up @@ -46,9 +47,37 @@ async def drop_box_retrieve(path) -> Response:
return await backend.retrieve_from_path(path)


async def _git_stdout(*args) -> str:
git_proc = await asyncio.create_subprocess_exec(
"git", *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
res, _ = await git_proc.communicate()
return res.decode().rstrip()


@drop_box_service.route("/service-info", methods=["GET"])
async def service_info() -> Response:
# Spec: https://github.com/ga4gh-discovery/ga4gh-service-info

bento_info: BentoExtraServiceInfo = {
"serviceKind": BENTO_SERVICE_KIND
}

debug_mode = current_app.config["BENTO_DEBUG"]
if debug_mode:
try:
if res_tag := await _git_stdout("describe", "--tags", "--abbrev=0"):
# noinspection PyTypeChecker
bento_info["gitTag"] = res_tag
if res_branch := await _git_stdout("branch", "--show-current"):
# noinspection PyTypeChecker
bento_info["gitBranch"] = res_branch
if res_commit := await _git_stdout("rev-parse", "HEAD"):
# noinspection PyTypeChecker
bento_info["gitCommit"] = res_commit

except Exception as e:
current_app.logger.error(f"Error retrieving git information: {type(e).__name__}")

# Do a little type checking
info: GA4GHServiceInfo = {
"id": current_app.config["SERVICE_ID"],
Expand All @@ -62,8 +91,7 @@ async def service_info() -> Response:
"contactUrl": "mailto:[email protected]",
"version": __version__,
"environment": "dev" if current_app.config["BENTO_DEBUG"] else "prod",
"bento": {
"serviceKind": BENTO_SERVICE_KIND,
},
"bento": bento_info,
}

return jsonify(info)
Loading

0 comments on commit b084621

Please sign in to comment.