Skip to content

Commit

Permalink
MIDRC-849 Add DB migrations setup and tests (#17)
Browse files Browse the repository at this point in the history
  • Loading branch information
paulineribeyre authored Nov 21, 2024
1 parent d2dad90 commit bd79886
Show file tree
Hide file tree
Showing 23 changed files with 929 additions and 62 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
name: Python Unit Test with Postgres
uses: uc-cdis/.github/.github/workflows/python_unit_test.yaml@master
with:
test-script: 'tests/test.sh'
test-script: 'bin/test.sh'
python-version: '3.9'
use-cache: true
# run-coveralls: true # TODO enable once the repo is public
Expand Down
40 changes: 38 additions & 2 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -136,15 +136,51 @@
"line_number": 15
}
],
"alembic.ini": [
{
"type": "Basic Auth Credentials",
"filename": "alembic.ini",
"hashed_secret": "9d4e1e23bd5b727046a9e3b4b7db57bd8d6ee684",
"is_verified": false,
"line_number": 64
}
],
"gen3workflow/config-default.yaml": [
{
"type": "Secret Keyword",
"filename": "gen3workflow/config-default.yaml",
"hashed_secret": "afc848c316af1a89d49826c5ae9d00ed769415f3",
"is_verified": false,
"line_number": 27
}
],
"migrations/versions/e1886270d9d2_create_system_key_table.py": [
{
"type": "Hex High Entropy String",
"filename": "migrations/versions/e1886270d9d2_create_system_key_table.py",
"hashed_secret": "1df47988c41b70d5541f29636c48c6127cf593b8",
"is_verified": false,
"line_number": 16
}
],
"tests/conftest.py": [
{
"type": "Base64 High Entropy String",
"filename": "tests/conftest.py",
"hashed_secret": "0dd78d9147bb410f0cb0199c5037da36594f77d8",
"is_verified": false,
"line_number": 141
"line_number": 188
}
],
"tests/migrations/test_migration_e1886270d9d2.py": [
{
"type": "Hex High Entropy String",
"filename": "tests/migrations/test_migration_e1886270d9d2.py",
"hashed_secret": "1df47988c41b70d5541f29636c48c6127cf593b8",
"is_verified": false,
"line_number": 24
}
]
},
"generated_at": "2024-10-23T16:21:37Z"
"generated_at": "2024-11-19T19:43:31Z"
}
117 changes: 117 additions & 0 deletions alembic.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# A generic, single database configuration.

[alembic]
# path to migration scripts
# Use forward slashes (/) also on windows to provide an os agnostic path
script_location = migrations

# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
# Uncomment the line below if you want the files to be prepended with date and time
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
# for all available tokens
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s

# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory.
prepend_sys_path = .

# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the python>=3.9 or backports.zoneinfo library.
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
# string value is passed to ZoneInfo()
# leave blank for localtime
# timezone =

# max length of characters to apply to the "slug" field
# truncate_slug_length = 40

# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false

# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false

# version location specification; This defaults
# to migrations/versions. When using multiple version
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the separator specified by "version_path_separator" below.
# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions

# version path separator; As mentioned above, this is the character used to split
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
# Valid values for version_path_separator are:
#
# version_path_separator = :
# version_path_separator = ;
# version_path_separator = space
# version_path_separator = newline
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.

# set to 'true' to search source files recursively
# in each "version_locations" directory
# new in Alembic version 1.10
# recursive_version_locations = false

# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8

sqlalchemy.url = driver://user:pass@localhost/dbname


[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts. See the documentation for further
# detail and examples

# format using "black" - use the console_scripts runner, against the "black" entrypoint
# hooks = black
# black.type = console_scripts
# black.entrypoint = black
# black.options = -l 79 REVISION_SCRIPT_FILENAME

# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
# hooks = ruff
# ruff.type = exec
# ruff.executable = %(here)s/.venv/bin/ruff
# ruff.options = --fix REVISION_SCRIPT_FILENAME

# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = WARNING
handlers = console
qualname =

[logger_sqlalchemy]
level = WARNING
handlers =
qualname = sqlalchemy.engine

[logger_alembic]
level = INFO
handlers =
qualname = alembic

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic

[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
50 changes: 50 additions & 0 deletions bin/_common_setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env bash
set -e

# Common setup for both tests and running the service
# Used in run.sh and test.sh

CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

# Source the environment variables from the metrics setup script
# source "${CURRENT_DIR}/setup_prometheus"

echo "installing dependencies with 'poetry install -vv'..."
poetry install -vv
poetry env info
echo "ensuring db exists"

# Get the username, password, host, port, and database name
db_settings=$(poetry run python $CURRENT_DIR/../gen3workflow/config.py | tail -1)
if [ -z "${db_settings}" ]; then
echo "'gen3workflow/config.py' did not return DB settings"
exit 1
fi
db_settings_array=($db_settings)
HOST=${db_settings_array[0]}
PORT=${db_settings_array[1]}
USER=${db_settings_array[2]}
PASSWORD=${db_settings_array[3]}
DB_NAME=${db_settings_array[4]}

if [ -z "${HOST}" ] || [ -z "${PORT}" ] || [ -z "${USER}" ] || [ -z "${PASSWORD}" ] || [ -z "${DB_NAME}" ]; then
echo "Failed to extract one or more components from DB settings"
exit 1
fi

echo "Extracted database name: ${DB_NAME}"
echo "Extracted username: ${USER}"

# Check if the database exists
# Use the full connection string to connect directly
if [ "$( PGPASSWORD="${PASSWORD}" psql -h "${HOST}" -p "${PORT}" -U "${USER}" -d postgres -XtAc "SELECT 1 FROM pg_database WHERE datname='${DB_NAME}'" )" = '1' ]
then
echo "Database ${DB_NAME} already exists."
else
echo "Database ${DB_NAME} does not exist. Creating it..."
# Connect to the default postgres database to create the new database
PGPASSWORD="${PASSWORD}" psql -h "${HOST}" -p "${PORT}" -U "${USER}" -d postgres -c "CREATE DATABASE \"${DB_NAME}\";"
fi

echo "running db migration with 'poetry run alembic upgrade head'..."
poetry run alembic upgrade head
9 changes: 9 additions & 0 deletions bin/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -e

CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

source "${CURRENT_DIR}/_common_setup.sh"

echo "running tests with 'pytest'..."
poetry run pytest -vv --cov=gen3workflow --cov=migrations --cov-report term-missing:skip-covered --cov-report xml
6 changes: 6 additions & 0 deletions docs/local_installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ Update your configuration file:
- set `TES_SERVER_URL` to the TES server URL
- set `MOCK_AUTH` to `true`, so that no attempts to interact with Arborist are made.

Run database schema migration:

```bash
alembic upgrade head
```

Start the Gen3Workflow app:

```bash
Expand Down
39 changes: 22 additions & 17 deletions gen3workflow/aws_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,38 +13,43 @@
iam_resp_err = "Unexpected response from AWS IAM"


def get_iam_user_name(user_id):
def get_safe_name_from_user_id(user_id):
"""
Generate a valid IAM user name for the specified user.
IAM user names can contain up to 64 characters. They can only contain alphanumeric characters
Generate a valid IAM user name or S3 bucket name for the specified user.
- IAM user names can contain up to 64 characters. They can only contain alphanumeric characters
and/or the following: +=,.@_- (not enforced here since user IDs and hostname should not contain
special characters).
- S3 bucket names can contain up to 63 characters.
Args:
user_id (str): The user's unique Gen3 ID
Returns:
str: IAM user name
str: safe name
"""
escaped_hostname = config["HOSTNAME"].replace(".", "-")
iam_user_name = f"gen3wf-{escaped_hostname}"
max = 64 - len(f"-{user_id}")
if len(iam_user_name) > max:
iam_user_name = iam_user_name[:max]
iam_user_name = f"{iam_user_name}-{user_id}"
return iam_user_name
safe_name = f"gen3wf-{escaped_hostname}"
max = 63 - len(f"-{user_id}")
if len(safe_name) > max:
safe_name = safe_name[:max]
safe_name = f"{safe_name}-{user_id}"
return safe_name


def get_user_bucket_info(user_id):
"""TODO
def create_user_bucket(user_id):
"""
Create an S3 bucket for the specified user and return information about the bucket.
Args:
user_id (str): The user's unique Gen3 ID
Returns:
tuple: (bucket name, prefix where the user stores objects in the bucket, bucket region)
"""
return "TODO", "ga4gh-tes", "us-east-1"
user_bucket_name = get_safe_name_from_user_id(user_id)
s3_client = boto3.client("s3")
s3_client.create_bucket(Bucket=user_bucket_name)
return user_bucket_name, "ga4gh-tes", config["USER_BUCKETS_REGION"]


def create_or_update_policy(policy_name, policy_document, path_prefix, tags):
Expand Down Expand Up @@ -94,7 +99,7 @@ def create_or_update_policy(policy_name, policy_document, path_prefix, tags):


def create_iam_user_and_key(user_id):
iam_user_name = get_iam_user_name(user_id)
iam_user_name = get_safe_name_from_user_id(user_id)
escaped_hostname = config["HOSTNAME"].replace(".", "-")
iam_tags = [
{
Expand All @@ -111,7 +116,7 @@ def create_iam_user_and_key(user_id):
raise

# grant the IAM user access to the user's s3 bucket
bucket_name, bucket_prefix, _ = get_user_bucket_info(user_id)
bucket_name, bucket_prefix, _ = create_user_bucket(user_id)
policy_document = {
"Version": "2012-10-17",
"Statement": [
Expand Down Expand Up @@ -145,7 +150,7 @@ def create_iam_user_and_key(user_id):


def list_iam_user_keys(user_id):
iam_user_name = get_iam_user_name(user_id)
iam_user_name = get_safe_name_from_user_id(user_id)
try:
response = iam_client.list_access_keys(UserName=iam_user_name)
except ClientError as e:
Expand All @@ -164,7 +169,7 @@ def list_iam_user_keys(user_id):
def delete_iam_user_key(user_id, key_id):
try:
iam_client.delete_access_key(
UserName=get_iam_user_name(user_id),
UserName=get_safe_name_from_user_id(user_id),
AccessKeyId=key_id,
)
except ClientError as e:
Expand Down
20 changes: 16 additions & 4 deletions gen3workflow/config-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,26 @@ HOSTNAME: localhost
DEBUG: false
DOCS_URL_PREFIX: /gen3workflow

MAX_IAM_KEYS_PER_USER: 2 # the default AWS AccessKeysPerUser quota is 2
IAM_KEYS_LIFETIME_DAYS: 30

# override the default Arborist URL; ignored if already set as an environment variable
ARBORIST_URL:

# /!\ only use for development! Allows running gen3workflow locally without Arborist interaction
MOCK_AUTH: false
MOCK_AUTH: false # TODO add to config validation. Also add "no unexpected props" to validation.

MAX_IAM_KEYS_PER_USER: 2 # the default AWS AccessKeysPerUser quota is 2
IAM_KEYS_LIFETIME_DAYS: 30
USER_BUCKETS_REGION: us-east-1

#############
# DATABASE #
#############

DB_DRIVER: postgresql+asyncpg
DB_HOST: localhost
DB_PORT: 5432
DB_USER: postgres
DB_PASSWORD: postgres
DB_DATABASE: gen3workflow_test



Expand Down
Loading

0 comments on commit bd79886

Please sign in to comment.