diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7e94e4191e8a..cb896892d3c0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -658,6 +658,8 @@ repos: ^docs/exts/removemarktransform.py$| ^newsfragments/41761.significant.rst$| ^scripts/ci/pre_commit/vendor_k8s_json_schema.py$| + ^scripts/ci/docker-compose/integration-keycloak.yml$| + ^scripts/ci/docker-compose/keycloak/keycloak-entrypoint.sh$| ^tests/| ^providers/tests/| ^.pre-commit-config\.yaml$| diff --git a/contributing-docs/testing/integration_tests.rst b/contributing-docs/testing/integration_tests.rst index 322298d4f00c..ea9dfb7e9529 100644 --- a/contributing-docs/testing/integration_tests.rst +++ b/contributing-docs/testing/integration_tests.rst @@ -49,39 +49,41 @@ The following integrations are available: .. BEGIN AUTO-GENERATED INTEGRATION LIST -+--------------+----------------------------------------------------+ -| Identifier | Description | -+==============+====================================================+ -| cassandra | Integration required for Cassandra hooks. | -+--------------+----------------------------------------------------+ -| celery | Integration required for Celery executor tests. | -+--------------+----------------------------------------------------+ -| drill | Integration required for drill operator and hook. | -+--------------+----------------------------------------------------+ -| kafka | Integration required for Kafka hooks. | -+--------------+----------------------------------------------------+ -| kerberos | Integration that provides Kerberos authentication. | -+--------------+----------------------------------------------------+ -| mongo | Integration required for MongoDB hooks. | -+--------------+----------------------------------------------------+ -| mssql | Integration required for mssql hooks. | -+--------------+----------------------------------------------------+ -| openlineage | Integration required for Openlineage hooks. | -+--------------+----------------------------------------------------+ -| otel | Integration required for OTEL/opentelemetry hooks. | -+--------------+----------------------------------------------------+ -| pinot | Integration required for Apache Pinot hooks. | -+--------------+----------------------------------------------------+ -| qdrant | Integration required for Qdrant tests. | -+--------------+----------------------------------------------------+ -| redis | Integration required for Redis tests. | -+--------------+----------------------------------------------------+ -| statsd | Integration required for Statsd hooks. | -+--------------+----------------------------------------------------+ -| trino | Integration required for Trino hooks. | -+--------------+----------------------------------------------------+ -| ydb | Integration required for YDB tests. | -+--------------+----------------------------------------------------+ ++--------------+-------------------------------------------------------+ +| Identifier | Description | ++==============+=======================================================+ +| cassandra | Integration required for Cassandra hooks. | ++--------------+-------------------------------------------------------+ +| celery | Integration required for Celery executor tests. | ++--------------+-------------------------------------------------------+ +| drill | Integration required for drill operator and hook. | ++--------------+-------------------------------------------------------+ +| kafka | Integration required for Kafka hooks. | ++--------------+-------------------------------------------------------+ +| kerberos | Integration that provides Kerberos authentication. | ++--------------+-------------------------------------------------------+ +| keycloak | Integration for manual testing of multi-team Airflow. | ++--------------+-------------------------------------------------------+ +| mongo | Integration required for MongoDB hooks. | ++--------------+-------------------------------------------------------+ +| mssql | Integration required for mssql hooks. | ++--------------+-------------------------------------------------------+ +| openlineage | Integration required for Openlineage hooks. | ++--------------+-------------------------------------------------------+ +| otel | Integration required for OTEL/opentelemetry hooks. | ++--------------+-------------------------------------------------------+ +| pinot | Integration required for Apache Pinot hooks. | ++--------------+-------------------------------------------------------+ +| qdrant | Integration required for Qdrant tests. | ++--------------+-------------------------------------------------------+ +| redis | Integration required for Redis tests. | ++--------------+-------------------------------------------------------+ +| statsd | Integration required for Statsd hooks. | ++--------------+-------------------------------------------------------+ +| trino | Integration required for Trino hooks. | ++--------------+-------------------------------------------------------+ +| ydb | Integration required for YDB tests. | ++--------------+-------------------------------------------------------+ .. END AUTO-GENERATED INTEGRATION LIST' diff --git a/dev/breeze/doc/images/output-commands.svg b/dev/breeze/doc/images/output-commands.svg index f80a72a9fbc5..f67cd5795e40 100644 --- a/dev/breeze/doc/images/output-commands.svg +++ b/dev/breeze/doc/images/output-commands.svg @@ -305,8 +305,8 @@ (>3.9< | 3.10 | 3.11 | 3.12)                                 [default: 3.9]                                               --integrationIntegration(s) to enable when running (can be more than one).                        -(all | all-testable | cassandra | celery | drill | kafka | kerberos | mongo | mssql  -| openlineage | otel | pinot | qdrant | redis | statsd | trino | ydb)                +(all | all-testable | cassandra | celery | drill | kafka | kerberos | keycloak |     +mongo | mssql | openlineage | otel | pinot | qdrant | redis | statsd | trino | ydb)  --standalone-dag-processorRun standalone dag processor for start-airflow. --database-isolationRun airflow in database isolation mode. ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/dev/breeze/doc/images/output_shell.svg b/dev/breeze/doc/images/output_shell.svg index b8eb1713c7f6..58c359bd96b4 100644 --- a/dev/breeze/doc/images/output_shell.svg +++ b/dev/breeze/doc/images/output_shell.svg @@ -544,8 +544,8 @@ (>3.9< | 3.10 | 3.11 | 3.12)                                 [default: 3.9]                                               --integrationIntegration(s) to enable when running (can be more than one).                        -(all | all-testable | cassandra | celery | drill | kafka | kerberos | mongo | mssql  -| openlineage | otel | pinot | qdrant | redis | statsd | trino | ydb)                +(all | all-testable | cassandra | celery | drill | kafka | kerberos | keycloak |     +mongo | mssql | openlineage | otel | pinot | qdrant | redis | statsd | trino | ydb)  --standalone-dag-processorRun standalone dag processor for start-airflow. --database-isolationRun airflow in database isolation mode. ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/dev/breeze/doc/images/output_shell.txt b/dev/breeze/doc/images/output_shell.txt index 051dd34cd10f..348aac661fac 100644 --- a/dev/breeze/doc/images/output_shell.txt +++ b/dev/breeze/doc/images/output_shell.txt @@ -1 +1 @@ -fd70e0f17940f32fbc0579e8f77fc6c4 +fef5a76133d85a06a67932b9c394b0e1 diff --git a/dev/breeze/doc/images/output_start-airflow.svg b/dev/breeze/doc/images/output_start-airflow.svg index e4336a4f931e..4cdfd78ad47c 100644 --- a/dev/breeze/doc/images/output_start-airflow.svg +++ b/dev/breeze/doc/images/output_start-airflow.svg @@ -408,8 +408,8 @@ [default: 3.9]                                               --platformPlatform for Airflow image.(linux/amd64 | linux/arm64) --integrationIntegration(s) to enable when running (can be more than one).                        -(all | all-testable | cassandra | celery | drill | kafka | kerberos | mongo | mssql  -| openlineage | otel | pinot | qdrant | redis | statsd | trino | ydb)                +(all | all-testable | cassandra | celery | drill | kafka | kerberos | keycloak |     +mongo | mssql | openlineage | otel | pinot | qdrant | redis | statsd | trino | ydb)  --standalone-dag-processorRun standalone dag processor for start-airflow. --database-isolationRun airflow in database isolation mode. --load-example-dags-eEnable configuration to load example DAGs when starting Airflow. diff --git a/dev/breeze/doc/images/output_start-airflow.txt b/dev/breeze/doc/images/output_start-airflow.txt index 5811c7ec6662..4618c5d3b44a 100644 --- a/dev/breeze/doc/images/output_start-airflow.txt +++ b/dev/breeze/doc/images/output_start-airflow.txt @@ -1 +1 @@ -e63a3289a1be34b82c28b606dee0c472 +02160e5d799a77830ac522c628e90aed diff --git a/dev/breeze/doc/images/output_testing_integration-tests.svg b/dev/breeze/doc/images/output_testing_integration-tests.svg index 07a2ef88b13e..7bac13970e42 100644 --- a/dev/breeze/doc/images/output_testing_integration-tests.svg +++ b/dev/breeze/doc/images/output_testing_integration-tests.svg @@ -218,8 +218,8 @@ ╭─ Test environment ───────────────────────────────────────────────────────────────────────────────────────────────────╮ --integrationIntegration(s) to enable when running (can be more than one).        (all | all-testable | cassandra | celery | drill | kafka | kerberos  -| mongo | mssql | openlineage | otel | pinot | qdrant | redis |      -statsd | trino | ydb)                                                +| keycloak | mongo | mssql | openlineage | otel | pinot | qdrant |   +redis | statsd | trino | ydb)                                        --backend-bDatabase backend to use. If 'none' is chosen, Breeze will start with an invalid database configuration, meaning there will be no database available, and any attempts to connect to the Airflow database will  diff --git a/dev/breeze/doc/images/output_testing_integration-tests.txt b/dev/breeze/doc/images/output_testing_integration-tests.txt index 81d45b246b52..3304c7f76020 100644 --- a/dev/breeze/doc/images/output_testing_integration-tests.txt +++ b/dev/breeze/doc/images/output_testing_integration-tests.txt @@ -1 +1 @@ -5436131180cd928292c8234d15e0496f +633eca64e9397259c7400814ce4c2877 diff --git a/dev/breeze/doc/images/output_testing_tests.svg b/dev/breeze/doc/images/output_testing_tests.svg index c20e2ef16b24..ac7e81bcbd5b 100644 --- a/dev/breeze/doc/images/output_testing_tests.svg +++ b/dev/breeze/doc/images/output_testing_tests.svg @@ -497,8 +497,8 @@ ╭─ Test environment ───────────────────────────────────────────────────────────────────────────────────────────────────╮ --integrationIntegration(s) to enable when running (can be more than one).        (all | all-testable | cassandra | celery | drill | kafka | kerberos  -| mongo | mssql | openlineage | otel | pinot | qdrant | redis |      -statsd | trino | ydb)                                                +| keycloak | mongo | mssql | openlineage | otel | pinot | qdrant |   +redis | statsd | trino | ydb)                                        --backend-bDatabase backend to use. If 'none' is chosen, Breeze will start with an invalid database configuration, meaning there will be no database available, and any attempts to connect to the Airflow database will  diff --git a/dev/breeze/doc/images/output_testing_tests.txt b/dev/breeze/doc/images/output_testing_tests.txt index d19f6cf5abda..bc12c1fb42a0 100644 --- a/dev/breeze/doc/images/output_testing_tests.txt +++ b/dev/breeze/doc/images/output_testing_tests.txt @@ -1 +1 @@ -15002aa129ce25039921f800fb1cf744 +4e128855ff2df624e0fc59e229b0973d diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index 24a61dc651a3..03fbe55a2e27 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -62,10 +62,14 @@ ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS = ["3.9", "3.10", "3.11", "3.12"] DEFAULT_PYTHON_MAJOR_MINOR_VERSION = ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS[0] ALLOWED_ARCHITECTURES = [Architecture.X86_64, Architecture.ARM] -# Database Backends used when starting Breeze. The "none" value means that invalid configuration -# Is set and no database started - access to a database will fail. -ALLOWED_BACKENDS = ["sqlite", "mysql", "postgres", "none"] -ALLOWED_PROD_BACKENDS = ["mysql", "postgres"] +# Database Backends used when starting Breeze. The "none" value means that the configuration is invalid. +# No database will be started - access to a database will fail. +SQLITE_BACKEND = "sqlite" +MYSQL_BACKEND = "mysql" +POSTGRES_BACKEND = "postgres" +NONE_BACKEND = "none" +ALLOWED_BACKENDS = [SQLITE_BACKEND, MYSQL_BACKEND, POSTGRES_BACKEND, NONE_BACKEND] +ALLOWED_PROD_BACKENDS = [MYSQL_BACKEND, POSTGRES_BACKEND] DEFAULT_BACKEND = ALLOWED_BACKENDS[0] CELERY_INTEGRATION = "celery" TESTABLE_INTEGRATIONS = [ @@ -85,7 +89,11 @@ DISABLE_TESTABLE_INTEGRATIONS_FROM_CI = [ "mssql", ] -OTHER_INTEGRATIONS = ["statsd", "otel", "openlineage"] +KEYCLOAK_INTEGRATION = "keycloak" +STATSD_INTEGRATION = "statsd" +OTEL_INTEGRATION = "otel" +OPENLINEAGE_INTEGRATION = "openlineage" +OTHER_INTEGRATIONS = [STATSD_INTEGRATION, OTEL_INTEGRATION, OPENLINEAGE_INTEGRATION, KEYCLOAK_INTEGRATION] ALLOWED_DEBIAN_VERSIONS = ["bookworm"] ALL_INTEGRATIONS = sorted( [ diff --git a/dev/breeze/src/airflow_breeze/params/shell_params.py b/dev/breeze/src/airflow_breeze/params/shell_params.py index 2d3cadbf1971..0c6026437517 100644 --- a/dev/breeze/src/airflow_breeze/params/shell_params.py +++ b/dev/breeze/src/airflow_breeze/params/shell_params.py @@ -43,6 +43,7 @@ EDGE_EXECUTOR, FASTAPI_API_HOST_PORT, FLOWER_HOST_PORT, + KEYCLOAK_INTEGRATION, MOUNT_ALL, MOUNT_PROVIDERS_AND_TESTS, MOUNT_REMOVE, @@ -50,6 +51,7 @@ MOUNT_TESTS, MSSQL_HOST_PORT, MYSQL_HOST_PORT, + POSTGRES_BACKEND, POSTGRES_HOST_PORT, REDIS_HOST_PORT, SSH_PORT, @@ -665,3 +667,14 @@ def __post_init__(self): self.airflow_constraints_reference = self.default_constraints_branch if self.providers_constraints_reference == "default": self.providers_constraints_reference = self.default_constraints_branch + + if ( + self.backend + and self.integration + and KEYCLOAK_INTEGRATION in self.integration + and not self.backend == POSTGRES_BACKEND + ): + get_console().print( + "[error]When using the Keycloak integration the backend must be Postgres![/]\n" + ) + sys.exit(2) diff --git a/scripts/ci/docker-compose/integration-keycloak.yml b/scripts/ci/docker-compose/integration-keycloak.yml new file mode 100644 index 000000000000..7373c5fb6177 --- /dev/null +++ b/scripts/ci/docker-compose/integration-keycloak.yml @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +--- +services: + keycloak: + image: quay.io/keycloak/keycloak:23.0.6 + labels: + breeze.description: "Integration for manual testing of multi-team Airflow." + entrypoint: /opt/keycloak/keycloak-entrypoint.sh + environment: + KC_HOSTNAME: localhost + KC_HOSTNAME_PORT: 48080 + KC_HOSTNAME_STRICT_BACKCHANNEL: false + KC_HTTP_ENABLED: true + KC_HOSTNAME_STRICT: true + + KEYCLOAK_ADMIN: admin + KEYCLOAK_ADMIN_PASSWORD: admin + + KC_DB: postgres + KC_DB_URL: jdbc:postgresql://postgres/keycloak + KC_DB_USERNAME: keycloak + KC_DB_PASSWORD: keycloak + ports: + - 48080:48080 + restart: always + depends_on: + postgres: + condition: service_healthy + volumes: + - ./keycloak/keycloak-entrypoint.sh:/opt/keycloak/keycloak-entrypoint.sh + + postgres: + volumes: + - ./keycloak/init-keycloak-db.sh:/docker-entrypoint-initdb.d/init-keycloak-db.sh + environment: + KC_POSTGRES_DB: keycloak + KC_POSTGRES_USER: keycloak + KC_POSTGRES_PASSWORD: keycloak + healthcheck: + test: ["CMD", "psql", "-h", "localhost", "-U", "keycloak"] + interval: 10s + timeout: 10s + retries: 5 + + airflow: + depends_on: + - keycloak diff --git a/scripts/ci/docker-compose/keycloak/init-keycloak-db.sh b/scripts/ci/docker-compose/keycloak/init-keycloak-db.sh new file mode 100755 index 000000000000..47df6aede204 --- /dev/null +++ b/scripts/ci/docker-compose/keycloak/init-keycloak-db.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eu + +psql -v ON_ERROR_STOP=1 --username "${POSTGRES_USER}" > /dev/null <<-EOSQL + CREATE USER ${KC_POSTGRES_USER}; + ALTER USER ${KC_POSTGRES_USER} WITH PASSWORD '${KC_POSTGRES_PASSWORD}'; + CREATE DATABASE ${KC_POSTGRES_DB}; + GRANT ALL PRIVILEGES ON DATABASE ${KC_POSTGRES_DB} TO ${KC_POSTGRES_USER}; +EOSQL diff --git a/scripts/ci/docker-compose/keycloak/keycloak-entrypoint.sh b/scripts/ci/docker-compose/keycloak/keycloak-entrypoint.sh new file mode 100755 index 000000000000..e699d858346a --- /dev/null +++ b/scripts/ci/docker-compose/keycloak/keycloak-entrypoint.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# We exit in case cd fails +cd /opt/keycloak/bin/ || exit + +http_port="${KC_HOSTNAME_PORT}" + +# Start Keycloak in the background +./kc.sh start-dev --http-port="$http_port" & + +# Wait for Keycloak to be ready +echo "Waiting for Keycloak to start on port $http_port..." +while ! (echo > /dev/tcp/localhost/"$http_port") 2>/dev/null; do + echo "keycloak still not started" + sleep 5 +done +sleep 3 +echo "Keycloak is running (probably...)" + +# The below commands are used to disable the ssl requirement to use the admin panel of keycloak +echo "Configuring admin console access without ssl/https" +# Get credentials to make the below update to the realm settings +./kcadm.sh config credentials --server http://localhost:"$http_port" --realm master --user admin --password admin +./kcadm.sh update realms/master -s sslRequired=NONE --server http://localhost:"$http_port" +echo "Configuring complete!" + +# Keep the container running +wait