diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8fb8b311..a1c51cf7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -248,7 +248,7 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets[format('AWS_SECRET_ACCESS_KEY_SERVICES_{0}', env.TARGET_ENV_UPPERCASE)] }} AWS_DEFAULT_REGION: us-west-2 - TF_VAR_hydrocronapi_api_docker_image: "ghcr.io/podaac/hydrocron:${{ env.THE_VERSION }}" + TF_VAR_hydrocron_api_api_docker_image: "ghcr.io/podaac/hydrocron:${{ env.THE_VERSION }}" run: | #source bin/config.sh ${{ env.THE_ENV }} diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 00000000..9b82e8d7 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,16 @@ +FROM node:10 +LABEL org.opencontainers.image.source="https://github.com/podaac/hydrocron-api" +RUN npm install forever -g + +ENV project_dir /project +ENV app_dir ${project_dir}/app +ENV config_dir ${project_dir}/config + +RUN mkdir ${project_dir} ${app_dir} ${config_dir} +WORKDIR ${app_dir} + +COPY package*.json ./ +RUN npm install +COPY . . + +CMD ${app_dir}/docker/docker-start-command \ No newline at end of file diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 00000000..7752ef27 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,48 @@ +# Hydrocron API Docker Image + +This directory contains the `Dockerfile` used to build the Docker image capable of running hydrocron API as a lambda. + +It includes a number of helper scripts to be run by the CI/CD pipeline but can also be run locally to build the image. + +## Building + +Building the Hydrocron API docker image depends on a tar file version of the project. This can be built using `poetry build` or by downloading a previously built version of the project as a tar. + +### Building from tar + +`build-docker.sh` script can be used to build the docker image from the +local tar file. There are two required arguments that must be set: + +1. service-name: The name of the service being built (from pyproject.toml) +2. service-version: The version of the service being built (also from pyproject.toml) + +The docker tag of the built image will be returned from the script. + +Example: + +```shell script +./docker/build-docker.sh -n podaac-hydrocron -v 1.0.0-alpha.3 +``` + +## Running + +The Docker image can be run directly using the `docker run` command. + +See [Testing Lambda container images locally](https://docs.aws.amazon.com/lambda/latest/dg/images-test.html) for details. + +## Pushing to ECR + +The `push-docker-ecr.sh` script can be used to push a docker image to AWS ECR. There are two required arguments: + +1. tf-venue: The target venue for uploading (sit, uat, or ops). +2. docker-tag: The docker tage of the image being pushed + +The easiest way to use the `push-docker-ecr.sh` script is to first call `build-docker.sh` and save the output to the +`docker_tag` environment variable. Then call `push-docker-ecr.sh`. + +Example: + +```shell script +export docker_tag=$(./docker/build-docker.sh -n podaac-hydrocron -v 1.0.0-alpha.3) +./docker/push-docker-ecr.sh -v sit -t $docker_tag +``` \ No newline at end of file diff --git a/docker/build-docker.sh b/docker/build-docker.sh new file mode 100755 index 00000000..36a2f0f4 --- /dev/null +++ b/docker/build-docker.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash + +# This script is intended to be run by the CI/CD pipeline to build a specific version of the Hydrocron API. + +set -Eeo pipefail + +POSITIONAL=() +while [[ $# -gt 0 ]] +do +key="$1" + +case $key in + -n|--service-name) + service_name="$2" + shift # past argument + shift # past value + ;; + -v|--service-version) + service_version="$2" + shift # past argument + shift # past value + ;; + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; +esac +done +set -- "${POSITIONAL[@]}" # restore positional parameters + +USAGE="USAGE: build-docker.sh -n|--service-name service_name -v|--service-version service_version" + +# shellcheck disable=SC2154 +if [[ -z "${service_name}" ]]; then + echo "service_name required. Name of the service as found in pyproject.toml (e.g. podaac-staging)" >&2 + echo "$USAGE" >&2 + exit 1 +fi + +# shellcheck disable=SC2154 +if [[ -z "${service_version}" ]]; then + echo "service_version required. Version of software to install (e.g. 0.1.0-a1+12353)." >&2 + echo "$USAGE" >&2 + exit 1 +fi + +set -u + +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +PROJECT_DIR="$(dirname "${SCRIPTPATH}")" + +repositoryName=podaac/podaac-cloud/${service_name} + +# Docker tags can't include '+' https://github.com/docker/distribution/issues/1201 +dockerTagVersion=$(echo "${service_version}" | tr "+" _) + +tar_filename="${service_name}-${service_version}.tar.gz" +docker build -t "${repositoryName}":"${dockerTagVersion}" --build-arg SOURCE="dist/${tar_filename}" -f "$SCRIPTPATH"/Dockerfile "$PROJECT_DIR" 1>&2 + +echo "${repositoryName}":"${dockerTagVersion}" \ No newline at end of file diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh new file mode 100644 index 00000000..97ed9d30 --- /dev/null +++ b/docker/docker-entrypoint.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -e + +if test -f "logging.ini"; then + echo "Applying user-provided logging.ini" +else + echo "Using default logging.ini included with hydrocron. This can be overridden by mounting a python logging configuration file at $(pwd)/logging.ini" + python - <<'END' +import pkgutil +logging_conf = pkgutil.get_data('hydrocron', 'conf/logging.ini').decode("utf-8") +with open('logging.ini', 'w') as logging_ini: + logging_ini.write(logging_conf) +END +fi + +uvicorn hydrocron.api:app --proxy-headers --host 0.0.0.0 --port 80 --log-config logging.ini \ No newline at end of file diff --git a/docker/push-docker-ecr.sh b/docker/push-docker-ecr.sh new file mode 100755 index 00000000..997e2a3e --- /dev/null +++ b/docker/push-docker-ecr.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash + +# This script is intended to be run by the CI/CD pipeline to push a docker tag previously built by build-docker.sh + +set -Eeo pipefail + +POSITIONAL=() +while [[ $# -gt 0 ]] +do +key="$1" + +case $key in + -t|--docker-tag) + docker_tag="$2" + echo "--- docker_tag" + echo $docker_tag + shift # past argument + shift # past value + ;; + -v|--tf-venue) + tf_venue="$2" + echo "--- tf_venue" + echo $tf_venue + case $tf_venue in + sit|uat|ops) ;; + *) + echo "tf_venue must be sit, uat, or ops" + exit 1;; + esac + shift # past argument + shift # past value + ;; + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; +esac +done +set -- "${POSITIONAL[@]}" # restore positional parameters + +USAGE="push-docker-ecr.sh -t|--docker-tag docker_tag -v|--tf-venue tf_venue" + +# shellcheck disable=SC2154 +if [[ -z "${tf_venue}" ]]; then + echo "tf_venue required. One of sit, uat, ops" >&2 + echo "$USAGE" >&2 + exit 1 +fi + +# shellcheck disable=SC2154 +if [[ -z "${docker_tag}" ]]; then + echo "docker_tag required." >&2 + echo "$USAGE" >&2 + exit 1 +fi + +set -u + +repositoryName=$(echo "${docker_tag}" | awk -F':' '{print $1}') +tf_profile="ngap-service-${tf_venue}" + +# Get the AWS Account ID for this venue/profile +# shellcheck disable=SC2154 +aws_acct=$(aws sts get-caller-identity --profile "$tf_profile" | python -c "import sys, json; print(json.load(sys.stdin)['Account'])") +echo "aws_acct" +echo $aws_acct + +# Create repository if needed +aws ecr create-repository --repository-name "${repositoryName}" --profile "$tf_profile" || echo "No need to create, repository ${repositoryName} already exists" + +# Login to ECR +echo "aws ecr get-login-password --region us-west-2 --profile \"$tf_profile\" | docker login --username AWS --password-stdin \"$aws_acct\".dkr.ecr.us-west-2.amazonaws.com" +set +x +$(aws ecr get-login --no-include-email --region us-west-2 --profile "$tf_profile" 2> /dev/null) || \ + docker login --username AWS --password "$(aws ecr get-login-password --region us-west-2 --profile "$tf_profile")" "$aws_acct".dkr.ecr.us-west-2.amazonaws.com +set -x + +# Tag the image for this venue's ECR +docker tag "${docker_tag}" "$aws_acct".dkr.ecr.us-west-2.amazonaws.com/"${docker_tag}" + +# Push the tag +docker push "$aws_acct".dkr.ecr.us-west-2.amazonaws.com/"${docker_tag}" diff --git a/terraform/api-specification-templates/hydrocron_aws_api.yml b/terraform/api-specification-templates/hydrocron_aws_api.yml index 94815a9a..1a2bb186 100644 --- a/terraform/api-specification-templates/hydrocron_aws_api.yml +++ b/terraform/api-specification-templates/hydrocron_aws_api.yml @@ -1,15 +1,13 @@ -openapi: 3.0.1 +openapi: 3.0.0 info: - title: podaac-hydrocron + title: "Get time series data from SWOT observations for reaches, nodes, and/or lakes" + description: "Get time series data from SWOT observations for reaches, nodes, and/or\ + \ lakes" version: 1.0.0 - license: - name: Apache 2.0 - url: 'https://www.apache.org/licenses/LICENSE-2.0.html' servers: - - url: 'https://hydrocron.podaac.earthdata.nasa.gov/' - description: Production - - url: 'https://hydrocron.podaac.uat.earthdata.nasa.gov/' - description: User Acceptance +- url: https://virtserver.swaggerhub.com/hydrocron/HydroAPI/1.0.0 + description: "Get time series data from SWOT observations for reaches, nodes, and/or\ + \ lakes" paths: /timeseries: get: @@ -78,276 +76,36 @@ paths: default: feature_id, time_str, wse, geometry example: feature_id, time_str, wse, geometry responses: - '200': - $ref: '#/components/responses/Success' - '400': - $ref: '#/components/responses/ClientError' - '404': - $ref: '#/components/responses/NotFound' - '413': - $ref: '#/components/responses/ClientError' - '500': - $ref: '#/components/responses/ServerError' - x-amazon-apigateway-integration: - uri: ${hydrocron_api_lambda_arn_timeseries_test} - responses: - default: - statusCode: "200" - responseTemplates: - application/json: | - #set($inputRoot = $input.path('$')) - #if($inputRoot.toString().contains('206 PARTIAL CONTENT')) - #set($context.responseOverride.status = 206) - #end - $input.json('$') - ^400.*: - statusCode: "400" - responseTemplates: - application/json: |- - { - "error" : "$input.path('$.errorMessage')" - } - ^404.*: - statusCode: "404" - responseTemplates: - application/json: |- - { - "error" : "$input.path('$.errorMessage')" - } - ^413.*: - statusCode: "413" - responseTemplates: - application/json: |- - { - "error" : "$input.path('$.errorMessage')" - } - ^[^1-5].*: - statusCode: "500" - responseTemplates: - application/json: |- - { - "error" : "$input.path('$.errorMessage')" - } - requestTemplates: - application/json: |- - { - "body": { - "feature":"$input.params('feature')", - "reach_id":"$input.params('reach_id')", - "start_time":"$input.params('start_time')", - "end_time":"$input.params('end_time')", - "output": "$input.params('output')", - "fields": "$input.params('fields')", - "exact":"$input.params('exact')", - "page_number": "$input.params('page_number')" , - "page_size": "$input.params('page_size')" - } - } - passthroughBehavior: when_no_templates - httpMethod: POST - contentHandling: CONVERT_TO_TEXT - type: aws - /timeseriesSubset: - get: - summary: Subset by time series for a given spatial region - description: "Get Timeseries for a particular Reach, Node, or LakeID" - operationId: getsubset_get - parameters: - - name: feature - in: query - description: Data requested for Reach or Node or Lake - required: false - style: form - explode: true - schema: - type: string - enum: [ "Reach", "Lake", "Node"] - example: Reach - - name: subsetpolygon - in: query - description: GEOJSON of the subset area - required: false - style: form - explode: true - schema: - type: string - example: '{"features": [{"type": "Feature","geometry": {"coordinates": [[-95.6499095054704,50.323685647314554],[-95.3499095054704,50.323685647314554],[-95.3499095054704,50.19088502467528],[-95.6499095054704,50.19088502467528],[-95.6499095054704,50.323685647314554]],"type": "LineString"},"properties": {}}],"type": "FeatureCollection"}' - - name: start_time - in: query - description: Start time of the timeseries - required: true - style: form - explode: true - schema: - type: string - format: date-time - example: 2022-08-04T00:00:00Z - - name: end_time - in: query - description: End time of the timeseries - required: true - style: form - explode: true - schema: - type: string - format: date-time - example: 2022-08-23T00:00:00Z - - name: output - in: query - description: Format of the data returned - required: false - style: form - explode: true - schema: - type: string - enum: [ "csv", "geojson"] - default: geojson - example: geojson - - name: fields - in: query - description: Format of the data returned - required: false - style: form - explode: true - schema: - type: string - default: feature_id, time_str, wse, geometry - example: feature_id, time_str, wse, geometry - responses: - '200': - $ref: '#/components/responses/Success' - '400': - $ref: '#/components/responses/ClientError' - '404': - $ref: '#/components/responses/NotFound' - '413': - $ref: '#/components/responses/ClientError' - '500': - $ref: '#/components/responses/ServerError' - x-amazon-apigateway-integration: - uri: ${hydrocron_api_lambda_arn_subset_test} - responses: - default: - statusCode: "200" - responseTemplates: - application/json: | - #set($inputRoot = $input.path('$')) - #if($inputRoot.toString().contains('206 PARTIAL CONTENT')) - #set($context.responseOverride.status = 206) - #end - $input.json('$') - ^400.*: - statusCode: "400" - responseTemplates: - application/json: |- - { - "error" : "$input.path('$.errorMessage')" - } - ^404.*: - statusCode: "404" - responseTemplates: - application/json: |- - { - "error" : "$input.path('$.errorMessage')" - } - ^413.*: - statusCode: "413" - responseTemplates: - application/json: |- - { - "error" : "$input.path('$.errorMessage')" - } - ^[^1-5].*: - statusCode: "500" - responseTemplates: - application/json: |- - { - "error" : "$input.path('$.errorMessage')" - } - requestTemplates: - application/json: |- - { - "body": { - "feature":"$input.params('feature')", - "subsetpolygon":"$input.params('subsetpolygon')", - "start_time":"$input.params('start_time')", - "end_time":"$input.params('end_time')", - "output": "$input.params('output')", - "fields": "$input.params('fields')", - "exact":"$input.params('exact')", - "page_number": "$input.params('page_number')" , - "page_size": "$input.params('page_size')" - } - } - passthroughBehavior: when_no_templates - httpMethod: POST - contentHandling: CONVERT_TO_TEXT - type: aws -components: - responses: - Success: - description: Success Response - content: - application/json: - schema: - $ref: '#/components/schemas/SuccessResponse' - ClientError: - description: 400 response - content: - application/json: - schema: - $ref: '#/components/schemas/Error' - NotFound: - description: 404 response - content: - application/json: - schema: - $ref: '#/components/schemas/Error' - ServerError: - description: 500 response - content: - application/json: - schema: - $ref: '#/components/schemas/Error' - schemas: - Empty: - title: Empty Schema - type: object - Error: - type: object - properties: - error: - type: string - SuccessResponse: - title: Success Response Body - type: object - properties: - status: - type: string - description: HTTP Status code returned by backend - time: - type: string - description: Time in milliseconds to complete request - hits: - type: integer - description: Total number of results - results_count: - type: integer - description: Number of result entries returned in this request. Only appears if hits > page_size - results: - type: object - description: Single object where each property of the object is a result -x-amazon-apigateway-policy: - Version: '2012-10-17' - Statement: - - Effect: Allow - Principal: '*' - Action: 'execute-api:Invoke' - Resource: '*' - - Effect: Deny - Principal: '*' - Action: 'execute-api:Invoke' - Resource: '*' - Condition: - StringNotEquals: - 'aws:SourceVpc': ${vpc_id} + "200": + description: OK + content: + text/csv: + schema: + type: array + items: + type: string + "400": + description: "400 error. The specified URL is invalid (does not exist)." + content: + text/csv: + schema: + type: array + items: + type: string + "404": + description: "404 error. An entry with the specified region was not found." + content: + text/csv: + schema: + type: array + items: + type: string + "413": + description: "413 error. Your query has returned is too large." + content: + text/csv: + schema: + type: array + items: + type: string + x-openapi-router-controller: hydrocron_api.controllers.timeseries \ No newline at end of file diff --git a/terraform/bin/config.sh b/terraform/bin/config.sh index 157be9a9..a4972d6e 100644 --- a/terraform/bin/config.sh +++ b/terraform/bin/config.sh @@ -1,71 +1,15 @@ #!/usr/bin/env bash - -set -Eexo pipefail - -# Read in args from command line 4 - -POSITIONAL=() +set -eo pipefail while [[ $# -gt 0 ]] do -key="$1" - -case $key in - --ticket) - ticket="$2" - shift # past argument - shift # past value - ;; - --app-version) - app_version="$2" - shift # past argument - shift # past value - ;; - -v|--tf-venue) - tf_venue="$2" - case $tf_venue in - sit|uat|ops) ;; - *) - echo "tf_venue must be sit, uat, or ops" - exit 1;; - esac - shift # past argument - shift # past value - ;; - *) # unknown option - POSITIONAL+=("$1") # save it in an array for later - shift # past argument - ;; -esac -done -set -- "${POSITIONAL[@]}" # restore positional parameters - - - - -PACKAGE_NAME="hydrocron" -VERSION="0.0.1" - -ROOT_PATH="$PWD" -ZIP_PATH="$ROOT_PATH/$PACKAGE_NAME-$VERSION.zip" - -mkdir -p "$ROOT_PATH/" -rm -f "$ZIP_PATH" -zip -vr9 "$ZIP_PATH" . - - - - +VENUE="$1" +source "$(dirname $BASH_SOURCE)/../environments/$VENUE.env" +export TF_IN_AUTOMATION=true # https://www.terraform.io/cli/config/environment-variables#tf_in_automation +export TF_INPUT=false # https://www.terraform.io/cli/config/environment-variables#tf_input -# https://www.terraform.io/docs/commands/environment-variables.html#tf_in_automation -TF_IN_AUTOMATION=true +export TF_VAR_region="$REGION" +export TF_VAR_stage="$VENUE" -if [[ "${ticket}" ]]; then - set +e - terraform workspace new "${ticket}" - set -e - terraform workspace select "${ticket}" -else - terraform workspace select default -fi +terraform init -reconfigure -backend-config="bucket=$BUCKET" -backend-config="region=$REGION" diff --git a/terraform/hydrocron-main.tf b/terraform/hydrocron-main.tf index 5e4620d7..adf28000 100644 --- a/terraform/hydrocron-main.tf +++ b/terraform/hydrocron-main.tf @@ -32,11 +32,19 @@ resource "aws_api_gateway_deployment" "hydrocron-api-gateway-deployment-test" { } } +data "archive_file" "zip_the_python_code_timeseries" { +type = "zip" +source_dir = "${path.module}/" +output_path = "${path.module}/hydrocron-timeseries.zip" +} + + + resource "aws_lambda_function" "hydrocron_api_lambda_timeseries_test" { function_name = "${local.ec2_resources_name}-function-timeseries-test" - filename = "${path.module}/../dist/${local.name}-${local.version}-test.zip" - source_code_hash = filebase64sha256("${path.module}/../dist/${local.name}-${local.version}-test.zip") role = aws_iam_role.hydrocron-service-role-test.arn + package_type = "Image" + image_uri = "${local.account_id}.dkr.ecr.us-west-2.amazonaws.com/${var.docker_tag}" timeout = 5 handler = "hydrocron_api.controllers.timeseries.lambda_handler" runtime = "python3.8" @@ -59,31 +67,6 @@ resource "aws_lambda_function" "hydrocron_api_lambda_timeseries_test" { } -resource "aws_lambda_function" "hydrocron_api_lambda_subset_test" { - function_name = "${local.ec2_resources_name}-function-subset-test" - filename = "${path.module}/../dist/${local.name}-${local.version}-test.zip" - source_code_hash = filebase64sha256("${path.module}/../dist/${local.name}-${local.version}-test.zip") - role = aws_iam_role.hydrocron-service-role-test.arn - timeout = 5 - handler = "hydrocron_api.controllers.subset.lambda_handler" - runtime = "python3.8" - - vpc_config { - subnet_ids = var.private_subnets - security_group_ids = [var.default_vpc_sg] - } - - environment { - variables = { - DB_HOST=data.aws_ssm_parameter.hydrocron-db-host.value - DB_NAME=data.aws_ssm_parameter.hydrocron-db-name.value - DB_USERNAME=data.aws_ssm_parameter.hydrocron-db-user.value - DB_PASSWORD_SSM_NAME=data.aws_ssm_parameter.hydrocron-db-user-pass.name - } - } - - tags = var.default_tags -} resource "aws_lambda_permission" "allow_hydrocron-timeseries-test" { statement_id = "AllowAPIGatewayInvoke" @@ -96,18 +79,6 @@ resource "aws_lambda_permission" "allow_hydrocron-timeseries-test" { source_arn = "${aws_api_gateway_rest_api.hydrocron-api-gateway-test.execution_arn}/*/*/*" } -resource "aws_lambda_permission" "allow_hydrocron-subset-test" { - statement_id = "AllowAPIGatewayInvoke" - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.hydrocron_api_lambda_subset_test.function_name - principal = "apigateway.amazonaws.com" - - # The "/*/*/*" portion grants access from any method on any resource - # within the API Gateway REST API. - source_arn = "${aws_api_gateway_rest_api.hydrocron-api-gateway-test.execution_arn}/*/*/*" -} - - # API Gateway resource "aws_api_gateway_rest_api" "hydrocron-api-gateway-test" { @@ -117,7 +88,6 @@ resource "aws_api_gateway_rest_api" "hydrocron-api-gateway-test" { "${path.module}/api-specification-templates/hydrocron_aws_api.yml", { hydrocron_api_lambda_arn_timeseries_test = aws_lambda_function.hydrocron_api_lambda_timeseries_test.invoke_arn - hydrocron_api_lambda_arn_subset_test = aws_lambda_function.hydrocron_api_lambda_subset_test.invoke_arn vpc_id = var.vpc_id }) parameters = { diff --git a/terraform/main.tf b/terraform/main.tf index ae4001ed..d1dcd768 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -38,5 +38,4 @@ locals { } : var.default_tags } -data "aws_caller_identity" "current" {} - +data "aws_caller_identity" "current" {} \ No newline at end of file