diff --git a/.github/actions/install-pre-commit/action.yml b/.github/actions/install-pre-commit/action.yml index aa1dee87aa5f..af8236fc4bd0 100644 --- a/.github/actions/install-pre-commit/action.yml +++ b/.github/actions/install-pre-commit/action.yml @@ -24,7 +24,7 @@ inputs: default: 3.9 uv-version: description: 'uv version to use' - default: 0.4.30 + default: 0.5.2 pre-commit-version: description: 'pre-commit version to use' default: 4.0.1 @@ -36,11 +36,10 @@ runs: steps: - name: Install pre-commit, uv, and pre-commit-uv shell: bash - run: > - pip install - pre-commit==${{inputs.pre-commit-version}} - uv==${{inputs.uv-version}} - pre-commit-uv==${{inputs.pre-commit-uv-version}} + run: | + pip install uv==${{inputs.uv-version}} || true + uv tool install pre-commit==${{inputs.pre-commit-version}} --with uv==${{inputs.uv-version}} \ + --with pre-commit-uv==${{inputs.pre-commit-uv-version}} - name: Cache pre-commit envs uses: actions/cache@v4 with: diff --git a/.github/workflows/basic-tests.yml b/.github/workflows/basic-tests.yml index 22f5d0652c9b..14f0628e454b 100644 --- a/.github/workflows/basic-tests.yml +++ b/.github/workflows/basic-tests.yml @@ -196,10 +196,19 @@ jobs: working-directory: ./clients/python - name: "Install source version of required packages" run: | - breeze release-management prepare-provider-packages fab standard common.sql --package-format \ - wheel --skip-tag-check --version-suffix-for-pypi dev0 - pip install . dist/apache_airflow_providers_fab-*.whl \ - dist/apache_airflow_providers_standard-*.whl dist/apache_airflow_providers_common_sql-*.whl + breeze release-management prepare-provider-packages \ + fab \ + standard \ + common.sql \ + sqlite \ + --package-format wheel \ + --skip-tag-check \ + --version-suffix-for-pypi dev0 + pip install . \ + dist/apache_airflow_providers_fab-*.whl \ + dist/apache_airflow_providers_standard-*.whl \ + dist/apache_airflow_providers_common_sql-*.whl \ + dist/apache_airflow_providers_sqlite-*.whl breeze release-management prepare-task-sdk-package --package-format wheel pip install ./dist/apache_airflow_task_sdk-*.whl - name: "Install Python client" diff --git a/.github/workflows/check-providers.yml b/.github/workflows/check-providers.yml index 3faf19b61f53..a0bf2d316f82 100644 --- a/.github/workflows/check-providers.yml +++ b/.github/workflows/check-providers.yml @@ -40,7 +40,7 @@ on: # yamllint disable-line rule:truthy description: "Whether to upgrade to newer dependencies" required: true type: string - affected-providers-list-as-string: + selected-providers-list-as-string: description: "List of affected providers as string" required: false type: string @@ -54,7 +54,7 @@ on: # yamllint disable-line rule:truthy description: "List of parallel provider test types as string" required: true type: string - skip-provider-tests: + skip-providers-tests: description: "Whether to skip provider tests (true/false)" required: true type: string @@ -163,7 +163,7 @@ jobs: run: > breeze release-management prepare-provider-packages --include-not-ready-providers --version-suffix-for-pypi dev0 --package-format sdist - ${{ inputs.affected-providers-list-as-string }} + ${{ inputs.selected-providers-list-as-string }} - name: "Prepare airflow package: sdist" run: > breeze release-management prepare-airflow-package @@ -187,7 +187,7 @@ jobs: --providers-constraints-location /files/constraints-${{env.PYTHON_MAJOR_MINOR_VERSION}}/constraints-source-providers-${{env.PYTHON_MAJOR_MINOR_VERSION}}.txt --run-in-parallel - if: inputs.affected-providers-list-as-string == '' + if: inputs.selected-providers-list-as-string == '' - name: "Install affected provider packages and airflow via sdist files" run: > breeze release-management install-provider-packages @@ -198,7 +198,7 @@ jobs: --providers-constraints-location /files/constraints-${{env.PYTHON_MAJOR_MINOR_VERSION}}/constraints-source-providers-${{env.PYTHON_MAJOR_MINOR_VERSION}}.txt --run-in-parallel - if: inputs.affected-providers-list-as-string != '' + if: inputs.selected-providers-list-as-string != '' providers-compatibility-checks: timeout-minutes: 80 @@ -218,7 +218,7 @@ jobs: VERSION_SUFFIX_FOR_PYPI: "dev0" VERBOSE: "true" CLEAN_AIRFLOW_INSTALLATION: "${{ inputs.canary-run }}" - if: inputs.skip-provider-tests != 'true' + if: inputs.skip-providers-tests != 'true' steps: - name: "Cleanup repo" shell: bash @@ -268,7 +268,7 @@ jobs: Airflow ${{ matrix.airflow-version }}:Python ${{ matrix.python-version }} if: matrix.run-tests == 'true' run: > - breeze testing tests --run-in-parallel + breeze testing providers-tests --run-in-parallel --parallel-test-types "${{ inputs.providers-test-types-list-as-string }}" --use-packages-from-dist --package-format wheel diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9f61e356b3b2..96996a42ee18 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,86 +55,89 @@ jobs: env: GITHUB_CONTEXT: ${{ toJson(github) }} outputs: - image-tag: ${{ github.event.pull_request.head.sha || github.sha }} - docker-cache: ${{ steps.selective-checks.outputs.docker-cache }} - disable-airflow-repo-cache: ${{ steps.selective-checks.outputs.disable-airflow-repo-cache }} - affected-providers-list-as-string: >- - ${{ steps.selective-checks.outputs.affected-providers-list-as-string }} - upgrade-to-newer-dependencies: ${{ steps.selective-checks.outputs.upgrade-to-newer-dependencies }} - python-versions: ${{ steps.selective-checks.outputs.python-versions }} - python-versions-list-as-string: ${{ steps.selective-checks.outputs.python-versions-list-as-string }} all-python-versions-list-as-string: >- ${{ steps.selective-checks.outputs.all-python-versions-list-as-string }} - default-python-version: ${{ steps.selective-checks.outputs.default-python-version }} - kubernetes-versions-list-as-string: >- - ${{ steps.selective-checks.outputs.kubernetes-versions-list-as-string }} - kubernetes-combos-list-as-string: >- - ${{ steps.selective-checks.outputs.kubernetes-combos-list-as-string }} - default-kubernetes-version: ${{ steps.selective-checks.outputs.default-kubernetes-version }} - postgres-versions: ${{ steps.selective-checks.outputs.postgres-versions }} - default-postgres-version: ${{ steps.selective-checks.outputs.default-postgres-version }} - mysql-versions: ${{ steps.selective-checks.outputs.mysql-versions }} - default-mysql-version: ${{ steps.selective-checks.outputs.default-mysql-version }} - default-helm-version: ${{ steps.selective-checks.outputs.default-helm-version }} - default-kind-version: ${{ steps.selective-checks.outputs.default-kind-version }} - force-pip: ${{ steps.selective-checks.outputs.force-pip }} - full-tests-needed: ${{ steps.selective-checks.outputs.full-tests-needed }} - parallel-test-types-list-as-string: >- - ${{ steps.selective-checks.outputs.parallel-test-types-list-as-string }} - providers-test-types-list-as-string: >- - ${{ steps.selective-checks.outputs.providers-test-types-list-as-string }} - separate-test-types-list-as-string: >- - ${{ steps.selective-checks.outputs.separate-test-types-list-as-string }} - include-success-outputs: ${{ steps.selective-checks.outputs.include-success-outputs }} - postgres-exclude: ${{ steps.selective-checks.outputs.postgres-exclude }} - mysql-exclude: ${{ steps.selective-checks.outputs.mysql-exclude }} - sqlite-exclude: ${{ steps.selective-checks.outputs.sqlite-exclude }} - skip-provider-tests: ${{ steps.selective-checks.outputs.skip-provider-tests }} - run-tests: ${{ steps.selective-checks.outputs.run-tests }} - run-amazon-tests: ${{ steps.selective-checks.outputs.run-amazon-tests }} - run-ui-tests: ${{ steps.selective-checks.outputs.run-ui-tests }} - run-www-tests: ${{ steps.selective-checks.outputs.run-www-tests }} - run-kubernetes-tests: ${{ steps.selective-checks.outputs.run-kubernetes-tests }} - run-task-sdk-tests: ${{ steps.selective-checks.outputs.run-task-sdk-tests }} basic-checks-only: ${{ steps.selective-checks.outputs.basic-checks-only }} + build-job-description: ${{ steps.source-run-info.outputs.build-job-description }} + canary-run: ${{ steps.source-run-info.outputs.canary-run }} + chicken-egg-providers: ${{ steps.selective-checks.outputs.chicken-egg-providers }} ci-image-build: ${{ steps.selective-checks.outputs.ci-image-build }} - prod-image-build: ${{ steps.selective-checks.outputs.prod-image-build }} - docs-build: ${{ steps.selective-checks.outputs.docs-build }} - mypy-checks: ${{ steps.selective-checks.outputs.mypy-checks }} - needs-mypy: ${{ steps.selective-checks.outputs.needs-mypy }} - needs-helm-tests: ${{ steps.selective-checks.outputs.needs-helm-tests }} - needs-api-tests: ${{ steps.selective-checks.outputs.needs-api-tests }} - needs-api-codegen: ${{ steps.selective-checks.outputs.needs-api-codegen }} + core-test-types-list-as-string: >- + ${{ steps.selective-checks.outputs.core-test-types-list-as-string }} + debug-resources: ${{ steps.selective-checks.outputs.debug-resources }} default-branch: ${{ steps.selective-checks.outputs.default-branch }} default-constraints-branch: ${{ steps.selective-checks.outputs.default-constraints-branch }} + default-helm-version: ${{ steps.selective-checks.outputs.default-helm-version }} + default-kind-version: ${{ steps.selective-checks.outputs.default-kind-version }} + default-kubernetes-version: ${{ steps.selective-checks.outputs.default-kubernetes-version }} + default-mysql-version: ${{ steps.selective-checks.outputs.default-mysql-version }} + default-postgres-version: ${{ steps.selective-checks.outputs.default-postgres-version }} + default-python-version: ${{ steps.selective-checks.outputs.default-python-version }} + disable-airflow-repo-cache: ${{ steps.selective-checks.outputs.disable-airflow-repo-cache }} + docker-cache: ${{ steps.selective-checks.outputs.docker-cache }} + docs-build: ${{ steps.selective-checks.outputs.docs-build }} docs-list-as-string: ${{ steps.selective-checks.outputs.docs-list-as-string }} - skip-pre-commits: ${{ steps.selective-checks.outputs.skip-pre-commits }} - providers-compatibility-checks: ${{ steps.selective-checks.outputs.providers-compatibility-checks }} excluded-providers-as-string: ${{ steps.selective-checks.outputs.excluded-providers-as-string }} + force-pip: ${{ steps.selective-checks.outputs.force-pip }} + full-tests-needed: ${{ steps.selective-checks.outputs.full-tests-needed }} + has-migrations: ${{ steps.selective-checks.outputs.has-migrations }} helm-test-packages: ${{ steps.selective-checks.outputs.helm-test-packages }} - debug-resources: ${{ steps.selective-checks.outputs.debug-resources }} - runs-on-as-json-default: ${{ steps.selective-checks.outputs.runs-on-as-json-default }} - runs-on-as-json-docs-build: ${{ steps.selective-checks.outputs.runs-on-as-json-docs-build }} - runs-on-as-json-public: ${{ steps.selective-checks.outputs.runs-on-as-json-public }} - runs-on-as-json-self-hosted: ${{ steps.selective-checks.outputs.runs-on-as-json-self-hosted }} - runs-on-as-json-self-hosted-asf: ${{ steps.selective-checks.outputs.runs-on-as-json-self-hosted-asf }} - is-self-hosted-runner: ${{ steps.selective-checks.outputs.is-self-hosted-runner }} + image-tag: ${{ github.event.pull_request.head.sha || github.sha }} + in-workflow-build: ${{ steps.source-run-info.outputs.in-workflow-build }} + include-success-outputs: ${{ steps.selective-checks.outputs.include-success-outputs }} + individual-providers-test-types-list-as-string: >- + ${{ steps.selective-checks.outputs.individual-providers-test-types-list-as-string }} is-airflow-runner: ${{ steps.selective-checks.outputs.is-airflow-runner }} is-amd-runner: ${{ steps.selective-checks.outputs.is-amd-runner }} is-arm-runner: ${{ steps.selective-checks.outputs.is-arm-runner }} - is-vm-runner: ${{ steps.selective-checks.outputs.is-vm-runner }} is-k8s-runner: ${{ steps.selective-checks.outputs.is-k8s-runner }} + is-self-hosted-runner: ${{ steps.selective-checks.outputs.is-self-hosted-runner }} + is-vm-runner: ${{ steps.selective-checks.outputs.is-vm-runner }} + kubernetes-combos-list-as-string: >- + ${{ steps.selective-checks.outputs.kubernetes-combos-list-as-string }} + kubernetes-versions-list-as-string: >- + ${{ steps.selective-checks.outputs.kubernetes-versions-list-as-string }} latest-versions-only: ${{ steps.selective-checks.outputs.latest-versions-only }} - chicken-egg-providers: ${{ steps.selective-checks.outputs.chicken-egg-providers }} - has-migrations: ${{ steps.selective-checks.outputs.has-migrations }} + mypy-checks: ${{ steps.selective-checks.outputs.mypy-checks }} + mysql-exclude: ${{ steps.selective-checks.outputs.mysql-exclude }} + mysql-versions: ${{ steps.selective-checks.outputs.mysql-versions }} + needs-api-codegen: ${{ steps.selective-checks.outputs.needs-api-codegen }} + needs-api-tests: ${{ steps.selective-checks.outputs.needs-api-tests }} + needs-helm-tests: ${{ steps.selective-checks.outputs.needs-helm-tests }} + needs-mypy: ${{ steps.selective-checks.outputs.needs-mypy }} only-new-ui-files: ${{ steps.selective-checks.outputs.only-new-ui-files }} - source-head-repo: ${{ steps.source-run-info.outputs.source-head-repo }} + postgres-exclude: ${{ steps.selective-checks.outputs.postgres-exclude }} + postgres-versions: ${{ steps.selective-checks.outputs.postgres-versions }} + prod-image-build: ${{ steps.selective-checks.outputs.prod-image-build }} + providers-compatibility-checks: ${{ steps.selective-checks.outputs.providers-compatibility-checks }} + providers-test-types-list-as-string: >- + ${{ steps.selective-checks.outputs.providers-test-types-list-as-string }} pull-request-labels: ${{ steps.source-run-info.outputs.pr-labels }} - in-workflow-build: ${{ steps.source-run-info.outputs.in-workflow-build }} - build-job-description: ${{ steps.source-run-info.outputs.build-job-description }} - testable-integrations: ${{ steps.selective-checks.outputs.testable-integrations }} - canary-run: ${{ steps.source-run-info.outputs.canary-run }} + python-versions-list-as-string: ${{ steps.selective-checks.outputs.python-versions-list-as-string }} + python-versions: ${{ steps.selective-checks.outputs.python-versions }} + run-amazon-tests: ${{ steps.selective-checks.outputs.run-amazon-tests }} run-coverage: ${{ steps.source-run-info.outputs.run-coverage }} + run-kubernetes-tests: ${{ steps.selective-checks.outputs.run-kubernetes-tests }} + run-task-sdk-tests: ${{ steps.selective-checks.outputs.run-task-sdk-tests }} + run-system-tests: ${{ steps.selective-checks.outputs.run-system-tests }} + run-tests: ${{ steps.selective-checks.outputs.run-tests }} + run-ui-tests: ${{ steps.selective-checks.outputs.run-ui-tests }} + run-www-tests: ${{ steps.selective-checks.outputs.run-www-tests }} + runs-on-as-json-default: ${{ steps.selective-checks.outputs.runs-on-as-json-default }} + runs-on-as-json-docs-build: ${{ steps.selective-checks.outputs.runs-on-as-json-docs-build }} + runs-on-as-json-public: ${{ steps.selective-checks.outputs.runs-on-as-json-public }} + runs-on-as-json-self-hosted-asf: ${{ steps.selective-checks.outputs.runs-on-as-json-self-hosted-asf }} + runs-on-as-json-self-hosted: ${{ steps.selective-checks.outputs.runs-on-as-json-self-hosted }} + selected-providers-list-as-string: >- + ${{ steps.selective-checks.outputs.selected-providers-list-as-string }} + skip-pre-commits: ${{ steps.selective-checks.outputs.skip-pre-commits }} + skip-providers-tests: ${{ steps.selective-checks.outputs.skip-providers-tests }} + source-head-repo: ${{ steps.source-run-info.outputs.source-head-repo }} + sqlite-exclude: ${{ steps.selective-checks.outputs.sqlite-exclude }} + test-groups: ${{ steps.selective-checks.outputs.test-groups }} + testable-core-integrations: ${{ steps.selective-checks.outputs.testable-core-integrations }} + testable-providers-integrations: ${{ steps.selective-checks.outputs.testable-providers-integrations }} + upgrade-to-newer-dependencies: ${{ steps.selective-checks.outputs.upgrade-to-newer-dependencies }} steps: - name: "Cleanup repo" shell: bash @@ -328,7 +331,7 @@ jobs: packages: read secrets: inherit if: > - needs.build-info.outputs.skip-provider-tests != 'true' && + needs.build-info.outputs.skip-providers-tests != 'true' && needs.build-info.outputs.latest-versions-only != 'true' with: runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} @@ -336,9 +339,9 @@ jobs: canary-run: ${{ needs.build-info.outputs.canary-run }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} - affected-providers-list-as-string: ${{ needs.build-info.outputs.affected-providers-list-as-string }} + selected-providers-list-as-string: ${{ needs.build-info.outputs.selected-providers-list-as-string }} providers-compatibility-checks: ${{ needs.build-info.outputs.providers-compatibility-checks }} - skip-provider-tests: ${{ needs.build-info.outputs.skip-provider-tests }} + skip-providers-tests: ${{ needs.build-info.outputs.skip-providers-tests }} python-versions: ${{ needs.build-info.outputs.python-versions }} providers-test-types-list-as-string: ${{ needs.build-info.outputs.providers-test-types-list-as-string }} @@ -374,17 +377,19 @@ jobs: backend: "postgres" test-name: "Postgres" test-scope: "DB" + test-groups: ${{ needs.build-info.outputs.test-groups }} image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} backend-versions: ${{ needs.build-info.outputs.postgres-versions }} excluded-providers-as-string: ${{ needs.build-info.outputs.excluded-providers-as-string }} excludes: ${{ needs.build-info.outputs.postgres-exclude }} - parallel-test-types-list-as-string: ${{ needs.build-info.outputs.parallel-test-types-list-as-string }} + core-test-types-list-as-string: ${{ needs.build-info.outputs.core-test-types-list-as-string }} + providers-test-types-list-as-string: ${{ needs.build-info.outputs.providers-test-types-list-as-string }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} run-migration-tests: "true" run-coverage: ${{ needs.build-info.outputs.run-coverage }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} - if: needs.build-info.outputs.run-tests == 'true' && needs.build-info.outputs.only-new-ui-files != 'true' + if: needs.build-info.outputs.run-tests == 'true' tests-mysql: name: "MySQL tests" @@ -399,17 +404,19 @@ jobs: backend: "mysql" test-name: "MySQL" test-scope: "DB" + test-groups: ${{ needs.build-info.outputs.test-groups }} image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} backend-versions: ${{ needs.build-info.outputs.mysql-versions }} excluded-providers-as-string: ${{ needs.build-info.outputs.excluded-providers-as-string }} excludes: ${{ needs.build-info.outputs.mysql-exclude }} - parallel-test-types-list-as-string: ${{ needs.build-info.outputs.parallel-test-types-list-as-string }} + core-test-types-list-as-string: ${{ needs.build-info.outputs.core-test-types-list-as-string }} + providers-test-types-list-as-string: ${{ needs.build-info.outputs.providers-test-types-list-as-string }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} run-coverage: ${{ needs.build-info.outputs.run-coverage }} run-migration-tests: "true" debug-resources: ${{ needs.build-info.outputs.debug-resources }} - if: needs.build-info.outputs.run-tests == 'true' && needs.build-info.outputs.only-new-ui-files != 'true' + if: needs.build-info.outputs.run-tests == 'true' tests-sqlite: name: "Sqlite tests" @@ -425,18 +432,20 @@ jobs: test-name: "Sqlite" test-name-separator: "" test-scope: "DB" + test-groups: ${{ needs.build-info.outputs.test-groups }} image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} # No versions for sqlite backend-versions: "['']" excluded-providers-as-string: ${{ needs.build-info.outputs.excluded-providers-as-string }} excludes: ${{ needs.build-info.outputs.sqlite-exclude }} - parallel-test-types-list-as-string: ${{ needs.build-info.outputs.parallel-test-types-list-as-string }} + core-test-types-list-as-string: ${{ needs.build-info.outputs.core-test-types-list-as-string }} + providers-test-types-list-as-string: ${{ needs.build-info.outputs.providers-test-types-list-as-string }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} run-coverage: ${{ needs.build-info.outputs.run-coverage }} run-migration-tests: "true" debug-resources: ${{ needs.build-info.outputs.debug-resources }} - if: needs.build-info.outputs.run-tests == 'true' && needs.build-info.outputs.only-new-ui-files != 'true' + if: needs.build-info.outputs.run-tests == 'true' tests-non-db: name: "Non-DB tests" @@ -452,17 +461,19 @@ jobs: test-name: "" test-name-separator: "" test-scope: "Non-DB" + test-groups: ${{ needs.build-info.outputs.test-groups }} image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} # No versions for non-db backend-versions: "['']" excluded-providers-as-string: ${{ needs.build-info.outputs.excluded-providers-as-string }} excludes: ${{ needs.build-info.outputs.sqlite-exclude }} - parallel-test-types-list-as-string: ${{ needs.build-info.outputs.parallel-test-types-list-as-string }} + core-test-types-list-as-string: ${{ needs.build-info.outputs.core-test-types-list-as-string }} + providers-test-types-list-as-string: ${{ needs.build-info.outputs.providers-test-types-list-as-string }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} run-coverage: ${{ needs.build-info.outputs.run-coverage }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} - if: needs.build-info.outputs.run-tests == 'true' && needs.build-info.outputs.only-new-ui-files != 'true' + if: needs.build-info.outputs.run-tests == 'true' tests-special: name: "Special tests" @@ -478,9 +489,11 @@ jobs: needs.build-info.outputs.upgrade-to-newer-dependencies != 'false' || needs.build-info.outputs.full-tests-needed == 'true') with: + test-groups: ${{ needs.build-info.outputs.test-groups }} runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} image-tag: ${{ needs.build-info.outputs.image-tag }} - parallel-test-types-list-as-string: ${{ needs.build-info.outputs.parallel-test-types-list-as-string }} + core-test-types-list-as-string: ${{ needs.build-info.outputs.core-test-types-list-as-string }} + providers-test-types-list-as-string: ${{ needs.build-info.outputs.providers-test-types-list-as-string }} run-coverage: ${{ needs.build-info.outputs.run-coverage }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} python-versions: ${{ needs.build-info.outputs.python-versions }} @@ -490,10 +503,10 @@ jobs: upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} - tests-integration: - name: Integration Tests + tests-integration-system: + name: Integration and System Tests needs: [build-info, wait-for-ci-images] - uses: ./.github/workflows/integration-tests.yml + uses: ./.github/workflows/integration-system-tests.yml permissions: contents: read packages: read @@ -501,17 +514,19 @@ jobs: with: runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} image-tag: ${{ needs.build-info.outputs.image-tag }} - testable-integrations: ${{ needs.build-info.outputs.testable-integrations }} + testable-core-integrations: ${{ needs.build-info.outputs.testable-core-integrations }} + testable-providers-integrations: ${{ needs.build-info.outputs.testable-providers-integrations }} + run-system-tests: ${{ needs.build-info.outputs.run-tests }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} default-postgres-version: ${{ needs.build-info.outputs.default-postgres-version }} default-mysql-version: ${{ needs.build-info.outputs.default-mysql-version }} - skip-provider-tests: ${{ needs.build-info.outputs.skip-provider-tests }} + skip-providers-tests: ${{ needs.build-info.outputs.skip-providers-tests }} run-coverage: ${{ needs.build-info.outputs.run-coverage }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} if: needs.build-info.outputs.run-tests == 'true' tests-with-lowest-direct-resolution: - name: "Lowest direct dependency resolution tests" + name: "Lowest direct dependency providers tests" needs: [build-info, wait-for-ci-images] uses: ./.github/workflows/run-unit-tests.yml permissions: @@ -525,13 +540,16 @@ jobs: test-name: "LowestDeps-Postgres" force-lowest-dependencies: "true" test-scope: "All" + test-groups: ${{ needs.build-info.outputs.test-groups }} backend: "postgres" image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} backend-versions: "['${{ needs.build-info.outputs.default-postgres-version }}']" excluded-providers-as-string: ${{ needs.build-info.outputs.excluded-providers-as-string }} excludes: "[]" - parallel-test-types-list-as-string: ${{ needs.build-info.outputs.separate-test-types-list-as-string }} + core-test-types-list-as-string: ${{ needs.build-info.outputs.core-test-types-list-as-string }} + # yamllint disable rule:line-length + providers-test-types-list-as-string: ${{ needs.build-info.outputs.individual-providers-test-types-list-as-string }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} run-coverage: ${{ needs.build-info.outputs.run-coverage }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} @@ -686,7 +704,7 @@ jobs: - tests-mysql - tests-postgres - tests-non-db - - tests-integration + - tests-integration-system uses: ./.github/workflows/finalize-tests.yml with: runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} diff --git a/.github/workflows/helm-tests.yml b/.github/workflows/helm-tests.yml index 8b26769ff4bc..4c1ec1023fc9 100644 --- a/.github/workflows/helm-tests.yml +++ b/.github/workflows/helm-tests.yml @@ -75,7 +75,7 @@ jobs: - name: "Prepare breeze & CI image: ${{inputs.default-python-version}}:${{inputs.image-tag}}" uses: ./.github/actions/prepare_breeze_and_image - name: "Helm Unit Tests: ${{ matrix.helm-test-package }}" - run: breeze testing helm-tests --helm-test-package "${{ matrix.helm-test-package }}" + run: breeze testing helm-tests --test-type "${{ matrix.helm-test-package }}" tests-helm-release: timeout-minutes: 80 diff --git a/.github/workflows/integration-system-tests.yml b/.github/workflows/integration-system-tests.yml new file mode 100644 index 000000000000..7fde2ae96836 --- /dev/null +++ b/.github/workflows/integration-system-tests.yml @@ -0,0 +1,200 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +--- +name: Integration and system tests +on: # yamllint disable-line rule:truthy + workflow_call: + inputs: + runs-on-as-json-public: + description: "The array of labels (in json form) determining public runners." + required: true + type: string + image-tag: + description: "Tag to set for the image" + required: true + type: string + testable-core-integrations: + description: "The list of testable core integrations as JSON array." + required: true + type: string + testable-providers-integrations: + description: "The list of testable providers integrations as JSON array." + required: true + type: string + run-system-tests: + description: "Run system tests (true/false)" + required: true + type: string + default-postgres-version: + description: "Default version of Postgres to use" + required: true + type: string + default-mysql-version: + description: "Default version of MySQL to use" + required: true + type: string + skip-providers-tests: + description: "Skip provider tests (true/false)" + required: true + type: string + run-coverage: + description: "Run coverage (true/false)" + required: true + type: string + default-python-version: + description: "Which version of python should be used by default" + required: true + type: string + debug-resources: + description: "Debug resources (true/false)" + required: true + type: string +jobs: + tests-core-integration: + timeout-minutes: 130 + if: inputs.testable-core-integrations != '[]' + name: "Integration core ${{ matrix.integration }}" + runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} + strategy: + fail-fast: false + matrix: + integration: ${{ fromJSON(inputs.testable-core-integrations) }} + env: + IMAGE_TAG: "${{ inputs.image-tag }}" + BACKEND: "postgres" + BACKEND_VERSION: ${{ inputs.default-postgres-version }}" + PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" + JOB_ID: "integration-core-${{ matrix.integration }}" + SKIP_PROVIDERS_TESTS: "${{ inputs.skip-providers-tests }}" + ENABLE_COVERAGE: "${{ inputs.run-coverage}}" + DEBUG_RESOURCES: "${{ inputs.debug-resources }}" + GITHUB_REPOSITORY: ${{ github.repository }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_USERNAME: ${{ github.actor }} + VERBOSE: "true" + steps: + - name: "Cleanup repo" + shell: bash + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v4 + with: + persist-credentials: false + - name: "Cleanup docker" + run: ./scripts/ci/cleanup_docker.sh + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + uses: ./.github/actions/prepare_breeze_and_image + - name: "Integration: core ${{ matrix.integration }}" + # yamllint disable rule:line-length + run: ./scripts/ci/testing/run_integration_tests_with_retry.sh core "${{ matrix.integration }}" + - name: "Post Tests success" + uses: ./.github/actions/post_tests_success + with: + codecov-token: ${{ secrets.CODECOV_TOKEN }} + python-version: ${{ inputs.default-python-version }} + - name: "Post Tests failure" + uses: ./.github/actions/post_tests_failure + if: failure() + + tests-providers-integration: + timeout-minutes: 130 + if: inputs.testable-providers-integrations != '[]' && inputs.skip-providers-tests != 'true' + name: "Integration: providers ${{ matrix.integration }}" + runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} + strategy: + fail-fast: false + matrix: + integration: ${{ fromJSON(inputs.testable-providers-integrations) }} + env: + IMAGE_TAG: "${{ inputs.image-tag }}" + BACKEND: "postgres" + BACKEND_VERSION: ${{ inputs.default-postgres-version }}" + PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" + JOB_ID: "integration-providers-${{ matrix.integration }}" + SKIP_PROVIDERS_TESTS: "${{ inputs.skip-providers-tests }}" + ENABLE_COVERAGE: "${{ inputs.run-coverage}}" + DEBUG_RESOURCES: "${{ inputs.debug-resources }}" + GITHUB_REPOSITORY: ${{ github.repository }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_USERNAME: ${{ github.actor }} + VERBOSE: "true" + steps: + - name: "Cleanup repo" + shell: bash + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v4 + with: + persist-credentials: false + - name: "Cleanup docker" + run: ./scripts/ci/cleanup_docker.sh + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + uses: ./.github/actions/prepare_breeze_and_image + - name: "Integration: providers ${{ matrix.integration }}" + run: ./scripts/ci/testing/run_integration_tests_with_retry.sh providers "${{ matrix.integration }}" + - name: "Post Tests success" + uses: ./.github/actions/post_tests_success + with: + codecov-token: ${{ secrets.CODECOV_TOKEN }} + python-version: ${{ inputs.default-python-version }} + - name: "Post Tests failure" + uses: ./.github/actions/post_tests_failure + if: failure() + + tests-system: + timeout-minutes: 130 + if: inputs.run-system-tests == 'true' + name: "System Tests" + runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} + env: + IMAGE_TAG: "${{ inputs.image-tag }}" + BACKEND: "postgres" + BACKEND_VERSION: ${{ inputs.default-postgres-version }}" + PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" + JOB_ID: "system" + SKIP_PROVIDERS_TESTS: "${{ inputs.skip-providers-tests }}" + ENABLE_COVERAGE: "${{ inputs.run-coverage}}" + DEBUG_RESOURCES: "${{ inputs.debug-resources }}" + GITHUB_REPOSITORY: ${{ github.repository }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_USERNAME: ${{ github.actor }} + VERBOSE: "true" + steps: + - name: "Cleanup repo" + shell: bash + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v4 + with: + persist-credentials: false + - name: "Cleanup docker" + run: ./scripts/ci/cleanup_docker.sh + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + uses: ./.github/actions/prepare_breeze_and_image + - name: "System Tests" + run: > + ./scripts/ci/testing/run_system_tests.sh + tests/system/example_empty.py providers/tests/system/example_empty.py + - name: "Post Tests success" + uses: ./.github/actions/post_tests_success + with: + codecov-token: ${{ secrets.CODECOV_TOKEN }} + python-version: ${{ inputs.default-python-version }} + - name: "Post Tests failure" + uses: ./.github/actions/post_tests_failure + if: failure() diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml deleted file mode 100644 index 530d0f9fc563..000000000000 --- a/.github/workflows/integration-tests.yml +++ /dev/null @@ -1,103 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ---- -name: Integration tests -on: # yamllint disable-line rule:truthy - workflow_call: - inputs: - runs-on-as-json-public: - description: "The array of labels (in json form) determining public runners." - required: true - type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string - testable-integrations: - description: "The list of testable integrations as JSON array." - required: true - type: string - default-postgres-version: - description: "Default version of Postgres to use" - required: true - type: string - default-mysql-version: - description: "Default version of MySQL to use" - required: true - type: string - skip-provider-tests: - description: "Skip provider tests (true/false)" - required: true - type: string - run-coverage: - description: "Run coverage (true/false)" - required: true - type: string - default-python-version: - description: "Which version of python should be used by default" - required: true - type: string - debug-resources: - description: "Debug resources (true/false)" - required: true - type: string -jobs: - tests-integration: - timeout-minutes: 130 - if: inputs.testable-integrations != '[]' - name: "Integration Tests: ${{ matrix.integration }}" - runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} - strategy: - fail-fast: false - matrix: - integration: ${{ fromJSON(inputs.testable-integrations) }} - env: - IMAGE_TAG: "${{ inputs.image-tag }}" - BACKEND: "postgres" - BACKEND_VERSION: ${{ inputs.default-postgres-version }}" - PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" - JOB_ID: "integration-${{ matrix.integration }}" - SKIP_PROVIDER_TESTS: "${{ inputs.skip-provider-tests }}" - ENABLE_COVERAGE: "${{ inputs.run-coverage}}" - DEBUG_RESOURCES: "${{ inputs.debug-resources }}" - GITHUB_REPOSITORY: ${{ github.repository }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_USERNAME: ${{ github.actor }} - VERBOSE: "true" - steps: - - name: "Cleanup repo" - shell: bash - run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 - with: - persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" - uses: ./.github/actions/prepare_breeze_and_image - - name: "Integration Tests: ${{ matrix.integration }}" - run: ./scripts/ci/testing/run_integration_tests_with_retry.sh ${{ matrix.integration }} - - name: "Post Tests success: Integration Tests ${{ matrix.integration }}" - uses: ./.github/actions/post_tests_success - with: - codecov-token: ${{ secrets.CODECOV_TOKEN }} - python-version: ${{ inputs.default-python-version }} - - name: "Post Tests failure: Integration Tests ${{ matrix.integration }}" - uses: ./.github/actions/post_tests_failure - if: failure() diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index eb3e1a90707f..8b8568083501 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -24,6 +24,10 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining default runner used for the build." required: true type: string + test-groups: + description: "The json representing list of test test groups to run" + required: true + type: string backend: description: "The backend to run the tests on" required: true @@ -61,8 +65,12 @@ on: # yamllint disable-line rule:truthy description: "Excluded combos (stringified JSON array of python-version/backend-version dicts)" required: true type: string - parallel-test-types-list-as-string: - description: "The list of parallel test types to run separated by spaces" + core-test-types-list-as-string: + description: "The list of core test types to run separated by spaces" + required: true + type: string + providers-test-types-list-as-string: + description: "The list of providers test types to run separated by spaces" required: true type: string run-migration-tests: @@ -117,9 +125,9 @@ jobs: tests: timeout-minutes: 120 name: "\ - ${{ inputs.test-scope }}:\ + ${{ inputs.test-scope }}-${{ matrix.test-group }}:\ ${{ inputs.test-name }}${{ inputs.test-name-separator }}${{ matrix.backend-version }}:\ - ${{matrix.python-version}}: ${{ inputs.parallel-test-types-list-as-string }}" + ${{matrix.python-version}}" runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }} strategy: fail-fast: false @@ -127,8 +135,8 @@ jobs: python-version: "${{fromJSON(inputs.python-versions)}}" backend-version: "${{fromJSON(inputs.backend-versions)}}" exclude: "${{fromJSON(inputs.excludes)}}" + test-group: "${{fromJSON(inputs.test-groups)}}" env: - # yamllint disable rule:line-length AIRFLOW_ENABLE_AIP_44: "${{ inputs.enable-aip-44 }}" BACKEND: "${{ inputs.backend }}" BACKEND_VERSION: "${{ matrix.backend-version }}" @@ -145,9 +153,10 @@ jobs: IMAGE_TAG: "${{ inputs.image-tag }}" INCLUDE_SUCCESS_OUTPUTS: ${{ inputs.include-success-outputs }} # yamllint disable rule:line-length - JOB_ID: "${{ inputs.test-scope }}-${{ inputs.test-name }}-${{inputs.backend}}-${{ matrix.backend-version }}-${{ matrix.python-version }}" + JOB_ID: "${{ matrix.test-group }}-${{ inputs.test-scope }}-${{ inputs.test-name }}-${{inputs.backend}}-${{ matrix.backend-version }}-${{ matrix.python-version }}" MOUNT_SOURCES: "skip" - PARALLEL_TEST_TYPES: "${{ inputs.parallel-test-types-list-as-string }}" + # yamllint disable rule:line-length + PARALLEL_TEST_TYPES: ${{ matrix.test-group == 'core' && inputs.core-test-types-list-as-string || inputs.providers-test-types-list-as-string }} PYTHON_MAJOR_MINOR_VERSION: "${{ matrix.python-version }}" UPGRADE_BOTO: "${{ inputs.upgrade-boto }}" AIRFLOW_MONITOR_DELAY_TIME_IN_SECONDS: "${{inputs.monitor-delay-time-in-seconds}}" @@ -165,33 +174,13 @@ jobs: - name: "Prepare breeze & CI image: ${{matrix.python-version}}:${{ inputs.image-tag }}" uses: ./.github/actions/prepare_breeze_and_image - name: > - Migration Tests: - ${{ matrix.python-version }}:${{ inputs.parallel-test-types-list-as-string }} + Migration Tests: ${{ matrix.python-version }}:${{ env.PARALLEL_TEST_TYPES }} uses: ./.github/actions/migration_tests - if: inputs.run-migration-tests == 'true' + if: inputs.run-migration-tests == 'true' && matrix.test-group == 'core' - name: > - ${{ inputs.test-scope }} Tests ${{ inputs.test-name }} ${{ matrix.backend-version }} - Py${{ matrix.python-version }}:${{ inputs.parallel-test-types-list-as-string}} - run: | - if [[ "${{ inputs.test-scope }}" == "DB" ]]; then - breeze testing db-tests \ - --parallel-test-types "${{ inputs.parallel-test-types-list-as-string }}" - elif [[ "${{ inputs.test-scope }}" == "Non-DB" ]]; then - breeze testing non-db-tests \ - --parallel-test-types "${{ inputs.parallel-test-types-list-as-string }}" - elif [[ "${{ inputs.test-scope }}" == "All" ]]; then - breeze testing tests --run-in-parallel \ - --parallel-test-types "${{ inputs.parallel-test-types-list-as-string }}" - elif [[ "${{ inputs.test-scope }}" == "Quarantined" ]]; then - breeze testing tests --test-type "All-Quarantined" || true - elif [[ "${{ inputs.test-scope }}" == "ARM collection" ]]; then - breeze testing tests --collect-only --remove-arm-packages - elif [[ "${{ inputs.test-scope }}" == "System" ]]; then - breeze testing tests tests/system/example_empty.py --system core - else - echo "Unknown test scope: ${{ inputs.test-scope }}" - exit 1 - fi + ${{ matrix.test-group}}:${{ inputs.test-scope }} Tests ${{ inputs.test-name }} ${{ matrix.backend-version }} + Py${{ matrix.python-version }}:${{ env.PARALLEL_TEST_TYPES }} + run: ./scripts/ci/testing/run_unit_tests.sh "${{ matrix.test-group }}" "${{ inputs.test-scope }}" - name: "Post Tests success" uses: ./.github/actions/post_tests_success with: diff --git a/.github/workflows/special-tests.yml b/.github/workflows/special-tests.yml index 78b4d928f7a9..012b619cba94 100644 --- a/.github/workflows/special-tests.yml +++ b/.github/workflows/special-tests.yml @@ -24,12 +24,20 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining default runner used for the build." required: true type: string + test-groups: + description: "The json representing list of test test groups to run" + required: true + type: string image-tag: description: "Tag to set for the image" required: true type: string - parallel-test-types-list-as-string: - description: "The list of parallel test types to run separated by spaces" + core-test-types-list-as-string: + description: "The list of core test types to run separated by spaces" + required: true + type: string + providers-test-types-list-as-string: + description: "The list of providers test types to run separated by spaces" required: true type: string run-coverage: @@ -77,14 +85,15 @@ jobs: downgrade-sqlalchemy: "true" test-name: "MinSQLAlchemy-Postgres" test-scope: "DB" + test-groups: ${{ needs.build-info.outputs.test-groups }} backend: "postgres" image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} excludes: "[]" - parallel-test-types-list-as-string: ${{ inputs.parallel-test-types-list-as-string }} - include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} + core-test-types-list-as-string: ${{ inputs.core-test-types-list-as-string }} + providers-test-types-list-as-string: ${{ inputs.providers-test-types-list-as-string }} run-coverage: ${{ inputs.run-coverage }} debug-resources: ${{ inputs.debug-resources }} @@ -100,13 +109,15 @@ jobs: upgrade-boto: "true" test-name: "LatestBoto-Postgres" test-scope: "All" + test-groups: ${{ needs.build-info.outputs.test-groups }} backend: "postgres" image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} excludes: "[]" - parallel-test-types-list-as-string: ${{ inputs.parallel-test-types-list-as-string }} + core-test-types-list-as-string: ${{ inputs.core-test-types-list-as-string }} + providers-test-types-list-as-string: ${{ inputs.providers-test-types-list-as-string }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} run-coverage: ${{ inputs.run-coverage }} debug-resources: ${{ inputs.debug-resources }} @@ -123,13 +134,15 @@ jobs: downgrade-pendulum: "true" test-name: "Pendulum2-Postgres" test-scope: "All" + test-groups: ${{ needs.build-info.outputs.test-groups }} backend: "postgres" image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} excludes: "[]" - parallel-test-types-list-as-string: ${{ inputs.parallel-test-types-list-as-string }} + core-test-types-list-as-string: ${{ inputs.core-test-types-list-as-string }} + providers-test-types-list-as-string: ${{ inputs.providers-test-types-list-as-string }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} run-coverage: ${{ inputs.run-coverage }} debug-resources: ${{ inputs.debug-resources }} @@ -146,13 +159,15 @@ jobs: enable-aip-44: "false" test-name: "InProgressDisabled-Postgres" test-scope: "All" + test-groups: ${{ needs.build-info.outputs.test-groups }} backend: "postgres" image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} excludes: "[]" - parallel-test-types-list-as-string: ${{ inputs.parallel-test-types-list-as-string }} + core-test-types-list-as-string: ${{ inputs.core-test-types-list-as-string }} + providers-test-types-list-as-string: ${{ inputs.providers-test-types-list-as-string }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} run-coverage: ${{ inputs.run-coverage }} debug-resources: ${{ inputs.debug-resources }} @@ -168,13 +183,15 @@ jobs: runs-on-as-json-default: ${{ inputs.runs-on-as-json-default }} test-name: "Postgres" test-scope: "Quarantined" + test-groups: ${{ needs.build-info.outputs.test-groups }} backend: "postgres" image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} excludes: "[]" - parallel-test-types-list-as-string: ${{ inputs.parallel-test-types-list-as-string }} + core-test-types-list-as-string: ${{ inputs.core-test-types-list-as-string }} + providers-test-types-list-as-string: ${{ inputs.providers-test-types-list-as-string }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} run-coverage: ${{ inputs.run-coverage }} debug-resources: ${{ inputs.debug-resources }} @@ -190,19 +207,21 @@ jobs: runs-on-as-json-default: ${{ inputs.runs-on-as-json-default }} test-name: "Postgres" test-scope: "ARM collection" + test-groups: ${{ needs.build-info.outputs.test-groups }} backend: "postgres" image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} excludes: "[]" - parallel-test-types-list-as-string: ${{ inputs.parallel-test-types-list-as-string }} + core-test-types-list-as-string: ${{ inputs.core-test-types-list-as-string }} + providers-test-types-list-as-string: ${{ inputs.providers-test-types-list-as-string }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} run-coverage: ${{ inputs.run-coverage }} debug-resources: ${{ inputs.debug-resources }} tests-system: - name: "System test" + name: "System test: ${{ matrix.test-group }}" uses: ./.github/workflows/run-unit-tests.yml permissions: contents: read @@ -212,13 +231,15 @@ jobs: runs-on-as-json-default: ${{ inputs.runs-on-as-json-default }} test-name: "SystemTest" test-scope: "System" + test-groups: ${{ needs.build-info.outputs.test-groups }} backend: "postgres" image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} excludes: "[]" - parallel-test-types-list-as-string: ${{ inputs.parallel-test-types-list-as-string }} + core-test-types-list-as-string: ${{ inputs.core-test-types-list-as-string }} + providers-test-types-list-as-string: ${{ inputs.providers-test-types-list-as-string }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} run-coverage: ${{ inputs.run-coverage }} debug-resources: ${{ inputs.debug-resources }} diff --git a/Dockerfile b/Dockerfile index 5ca9949b0213..b8f2e24b2dc0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -55,7 +55,7 @@ ARG PYTHON_BASE_IMAGE="python:3.9-slim-bookworm" # Also use `force pip` label on your PR to swap all places we use `uv` to `pip` ARG AIRFLOW_PIP_VERSION=24.3.1 # ARG AIRFLOW_PIP_VERSION="git+https://github.com/pypa/pip.git@main" -ARG AIRFLOW_UV_VERSION=0.5.1 +ARG AIRFLOW_UV_VERSION=0.5.2 ARG AIRFLOW_USE_UV="false" ARG UV_HTTP_TIMEOUT="300" ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" @@ -606,6 +606,7 @@ function common::show_packaging_tool_version_and_location() { } function common::install_packaging_tools() { + : "${AIRFLOW_USE_UV:?Should be set}" if [[ "${VIRTUAL_ENV=}" != "" ]]; then echo echo "${COLOR_BLUE}Checking packaging tools in venv: ${VIRTUAL_ENV}${COLOR_RESET}" @@ -658,8 +659,23 @@ function common::install_packaging_tools() { pip install --root-user-action ignore --disable-pip-version-check "uv==${AIRFLOW_UV_VERSION}" fi fi - # make sure that the venv/user in .local exists - mkdir -p "${HOME}/.local/bin" + if [[ ${AIRFLOW_PRE_COMMIT_VERSION=} == "" ]]; then + echo + echo "${COLOR_BLUE}Installing latest pre-commit with pre-commit-uv uv${COLOR_RESET}" + echo + uv tool install pre-commit --with pre-commit-uv --with uv + # make sure that the venv/user in .local exists + mkdir -p "${HOME}/.local/bin" + else + echo + echo "${COLOR_BLUE}Installing predefined versions of pre-commit with pre-commit-uv and uv:${COLOR_RESET}" + echo "${COLOR_BLUE}pre_commit(${AIRFLOW_PRE_COMMIT_VERSION}) uv(${AIRFLOW_UV_VERSION}) pre_commit-uv(${AIRFLOW_PRE_COMMIT_UV_VERSION})${COLOR_RESET}" + echo + uv tool install "pre-commit==${AIRFLOW_PRE_COMMIT_VERSION}" \ + --with "uv==${AIRFLOW_UV_VERSION}" --with "pre-commit-uv==${AIRFLOW_PRE_COMMIT_UV_VERSION}" + # make sure that the venv/user in .local exists + mkdir -p "${HOME}/.local/bin" + fi } function common::import_trusted_gpg() { @@ -890,7 +906,7 @@ function install_airflow() { # Similarly we need _a_ file for task_sdk too mkdir -p ./task_sdk/src/airflow/sdk/ - touch ./task_sdk/src/airflow/sdk/__init__.py + echo '__version__ = "0.0.0dev0"' > ./task_sdk/src/airflow/sdk/__init__.py trap 'rm -f ./providers/src/airflow/providers/__init__.py ./task_sdk/src/airflow/__init__.py 2>/dev/null' EXIT diff --git a/Dockerfile.ci b/Dockerfile.ci index 943270aec693..6de026318993 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -547,6 +547,7 @@ function common::show_packaging_tool_version_and_location() { } function common::install_packaging_tools() { + : "${AIRFLOW_USE_UV:?Should be set}" if [[ "${VIRTUAL_ENV=}" != "" ]]; then echo echo "${COLOR_BLUE}Checking packaging tools in venv: ${VIRTUAL_ENV}${COLOR_RESET}" @@ -599,8 +600,23 @@ function common::install_packaging_tools() { pip install --root-user-action ignore --disable-pip-version-check "uv==${AIRFLOW_UV_VERSION}" fi fi - # make sure that the venv/user in .local exists - mkdir -p "${HOME}/.local/bin" + if [[ ${AIRFLOW_PRE_COMMIT_VERSION=} == "" ]]; then + echo + echo "${COLOR_BLUE}Installing latest pre-commit with pre-commit-uv uv${COLOR_RESET}" + echo + uv tool install pre-commit --with pre-commit-uv --with uv + # make sure that the venv/user in .local exists + mkdir -p "${HOME}/.local/bin" + else + echo + echo "${COLOR_BLUE}Installing predefined versions of pre-commit with pre-commit-uv and uv:${COLOR_RESET}" + echo "${COLOR_BLUE}pre_commit(${AIRFLOW_PRE_COMMIT_VERSION}) uv(${AIRFLOW_UV_VERSION}) pre_commit-uv(${AIRFLOW_PRE_COMMIT_UV_VERSION})${COLOR_RESET}" + echo + uv tool install "pre-commit==${AIRFLOW_PRE_COMMIT_VERSION}" \ + --with "uv==${AIRFLOW_UV_VERSION}" --with "pre-commit-uv==${AIRFLOW_PRE_COMMIT_UV_VERSION}" + # make sure that the venv/user in .local exists + mkdir -p "${HOME}/.local/bin" + fi } function common::import_trusted_gpg() { @@ -660,7 +676,7 @@ function install_airflow() { # Similarly we need _a_ file for task_sdk too mkdir -p ./task_sdk/src/airflow/sdk/ - touch ./task_sdk/src/airflow/sdk/__init__.py + echo '__version__ = "0.0.0dev0"' > ./task_sdk/src/airflow/sdk/__init__.py trap 'rm -f ./providers/src/airflow/providers/__init__.py ./task_sdk/src/airflow/__init__.py 2>/dev/null' EXIT @@ -1068,12 +1084,6 @@ function check_run_tests() { python "${IN_CONTAINER_DIR}/remove_arm_packages.py" fi - if [[ ${TEST_TYPE} == "PlainAsserts" ]]; then - # Plain asserts should be converted to env variable to make sure they are taken into account - # otherwise they will not be effective during test collection when plain assert is breaking collection - export PYTEST_PLAIN_ASSERTS="true" - fi - if [[ ${DATABASE_ISOLATION=} == "true" ]]; then echo "${COLOR_BLUE}Starting internal API server:${COLOR_RESET}" # We need to start the internal API server before running tests @@ -1101,7 +1111,7 @@ function check_run_tests() { fi fi - if [[ ${RUN_SYSTEM_TESTS:="false"} == "true" ]]; then + if [[ ${TEST_GROUP:=""} == "system" ]]; then exec "${IN_CONTAINER_DIR}/run_system_tests.sh" "${@}" else exec "${IN_CONTAINER_DIR}/run_ci_tests.sh" "${@}" @@ -1344,10 +1354,14 @@ RUN bash /scripts/docker/install_packaging_tools.sh; \ # Also use `force pip` label on your PR to swap all places we use `uv` to `pip` ARG AIRFLOW_PIP_VERSION=24.3.1 # ARG AIRFLOW_PIP_VERSION="git+https://github.com/pypa/pip.git@main" -ARG AIRFLOW_UV_VERSION=0.5.1 +ARG AIRFLOW_UV_VERSION=0.5.2 +# TODO(potiuk): automate with upgrade check (possibly) +ARG AIRFLOW_PRE_COMMIT_VERSION="4.0.1" +ARG AIRFLOW_PRE_COMMIT_UV_VERSION="4.1.4" ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ - AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} + AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} \ + AIRFLOW_PRE_COMMIT_VERSION=${AIRFLOW_PRE_COMMIT_VERSION} # The PATH is needed for PIPX to find the tools installed ENV PATH="/root/.local/bin:${PATH}" diff --git a/airflow/api/client/local_client.py b/airflow/api/client/local_client.py index 7d828792c426..800afa283021 100644 --- a/airflow/api/client/local_client.py +++ b/airflow/api/client/local_client.py @@ -36,14 +36,14 @@ def __init__(self, auth=None, session: httpx.Client | None = None): self._session.auth = auth def trigger_dag( - self, dag_id, run_id=None, conf=None, execution_date=None, replace_microseconds=True + self, dag_id, run_id=None, conf=None, logical_date=None, replace_microseconds=True ) -> dict | None: dag_run = trigger_dag.trigger_dag( dag_id=dag_id, triggered_by=DagRunTriggeredByType.CLI, run_id=run_id, conf=conf, - execution_date=execution_date, + logical_date=logical_date, replace_microseconds=replace_microseconds, ) if dag_run: diff --git a/airflow/api/common/mark_tasks.py b/airflow/api/common/mark_tasks.py index 957e82e7de49..a170e6901a50 100644 --- a/airflow/api/common/mark_tasks.py +++ b/airflow/api/common/mark_tasks.py @@ -59,19 +59,19 @@ def _create_dagruns( :param dag: The DAG to create runs for. :param infos: List of logical dates and data intervals to evaluate. :param state: The state to set the dag run to - :param run_type: The prefix will be used to construct dag run id: ``{run_id_prefix}__{execution_date}``. - :return: Newly created and existing dag runs for the execution dates supplied. + :param run_type: The prefix will be used to construct dag run id: ``{run_id_prefix}__{logical_date}``. + :return: Newly created and existing dag runs for the logical dates supplied. """ # Find out existing DAG runs that we don't need to create. dag_runs = { run.logical_date: run - for run in DagRun.find(dag_id=dag.dag_id, execution_date=[info.logical_date for info in infos]) + for run in DagRun.find(dag_id=dag.dag_id, logical_date=[info.logical_date for info in infos]) } for info in infos: if info.logical_date not in dag_runs: dag_runs[info.logical_date] = dag.create_dagrun( - execution_date=info.logical_date, + logical_date=info.logical_date, data_interval=info.data_interval, start_date=timezone.utcnow(), external_trigger=False, @@ -87,7 +87,7 @@ def set_state( *, tasks: Collection[Operator | tuple[Operator, int]], run_id: str | None = None, - execution_date: datetime | None = None, + logical_date: datetime | None = None, upstream: bool = False, downstream: bool = False, future: bool = False, @@ -107,11 +107,11 @@ def set_state( :param tasks: the iterable of tasks or (task, map_index) tuples from which to work. ``task.dag`` needs to be set :param run_id: the run_id of the dagrun to start looking from - :param execution_date: the execution date from which to start looking (deprecated) + :param logical_date: the logical date from which to start looking (deprecated) :param upstream: Mark all parents (upstream tasks) :param downstream: Mark all siblings (downstream tasks) of task_id :param future: Mark all future tasks on the interval of the dag up until - last execution date. + last logical date. :param past: Retroactively mark all tasks starting from start_date of the DAG :param state: State to which the tasks need to be set :param commit: Commit tasks to be altered to the database @@ -121,11 +121,11 @@ def set_state( if not tasks: return [] - if not exactly_one(execution_date, run_id): - raise ValueError("Exactly one of dag_run_id and execution_date must be set") + if not exactly_one(logical_date, run_id): + raise ValueError("Exactly one of dag_run_id and logical_date must be set") - if execution_date and not timezone.is_localized(execution_date): - raise ValueError(f"Received non-localized date {execution_date}") + if logical_date and not timezone.is_localized(logical_date): + raise ValueError(f"Received non-localized date {logical_date}") task_dags = {task[0].dag if isinstance(task, tuple) else task.dag for task in tasks} if len(task_dags) > 1: @@ -134,8 +134,8 @@ def set_state( if dag is None: raise ValueError("Received tasks with no DAG") - if execution_date: - run_id = dag.get_dagrun(execution_date=execution_date, session=session).run_id + if logical_date: + run_id = dag.get_dagrun(logical_date=logical_date, session=session).run_id if not run_id: raise ValueError("Received tasks with no run_id") @@ -200,26 +200,26 @@ def find_task_relatives(tasks, downstream, upstream): @provide_session -def get_execution_dates( - dag: DAG, execution_date: datetime, future: bool, past: bool, *, session: SASession = NEW_SESSION +def get_logical_dates( + dag: DAG, logical_date: datetime, future: bool, past: bool, *, session: SASession = NEW_SESSION ) -> list[datetime]: - """Return DAG execution dates.""" - latest_execution_date = dag.get_latest_execution_date(session=session) - if latest_execution_date is None: - raise ValueError(f"Received non-localized date {execution_date}") - execution_date = timezone.coerce_datetime(execution_date) + """Return DAG logical dates.""" + latest_logical_date = dag.get_latest_logical_date(session=session) + if latest_logical_date is None: + raise ValueError(f"Received non-localized date {logical_date}") + logical_date = timezone.coerce_datetime(logical_date) # determine date range of dag runs and tasks to consider - end_date = latest_execution_date if future else execution_date + end_date = latest_logical_date if future else logical_date if dag.start_date: start_date = dag.start_date else: - start_date = execution_date - start_date = execution_date if not past else start_date + start_date = logical_date + start_date = logical_date if not past else start_date if not dag.timetable.can_be_scheduled: # If the DAG never schedules, need to look at existing DagRun if the user wants future or # past runs. dag_runs = dag.get_dagruns_between(start_date=start_date, end_date=end_date) - dates = sorted({d.execution_date for d in dag_runs}) + dates = sorted({d.logical_date for d in dag_runs}) elif not dag.timetable.periodic: dates = [start_date] else: @@ -235,7 +235,7 @@ def get_run_ids(dag: DAG, run_id: str, future: bool, past: bool, session: SASess last_dagrun = dag.get_last_dagrun(include_externally_triggered=True, session=session) current_dagrun = dag.get_dagrun(run_id=run_id, session=session) first_dagrun = session.scalar( - select(DagRun).filter(DagRun.dag_id == dag.dag_id).order_by(DagRun.execution_date.asc()).limit(1) + select(DagRun).filter(DagRun.dag_id == dag.dag_id).order_by(DagRun.logical_date.asc()).limit(1) ) if last_dagrun is None: @@ -255,7 +255,7 @@ def get_run_ids(dag: DAG, run_id: str, future: bool, past: bool, session: SASess dates = [ info.logical_date for info in dag.iter_dagrun_infos_between(start_date, end_date, align=False) ] - run_ids = [dr.run_id for dr in DagRun.find(dag_id=dag.dag_id, execution_date=dates, session=session)] + run_ids = [dr.run_id for dr in DagRun.find(dag_id=dag.dag_id, logical_date=dates, session=session)] return run_ids @@ -279,7 +279,7 @@ def _set_dag_run_state(dag_id: str, run_id: str, state: DagRunState, session: SA def set_dag_run_state_to_success( *, dag: DAG, - execution_date: datetime | None = None, + logical_date: datetime | None = None, run_id: str | None = None, commit: bool = False, session: SASession = NEW_SESSION, @@ -287,29 +287,29 @@ def set_dag_run_state_to_success( """ Set the dag run's state to success. - Set for a specific execution date and its task instances to success. + Set for a specific logical date and its task instances to success. :param dag: the DAG of which to alter state - :param execution_date: the execution date from which to start looking(deprecated) + :param logical_date: the logical date from which to start looking(deprecated) :param run_id: the run_id to start looking from :param commit: commit DAG and tasks to be altered to the database :param session: database session :return: If commit is true, list of tasks that have been updated, otherwise list of tasks that will be updated - :raises: ValueError if dag or execution_date is invalid + :raises: ValueError if dag or logical_date is invalid """ - if not exactly_one(execution_date, run_id): + if not exactly_one(logical_date, run_id): return [] if not dag: return [] - if execution_date: - if not timezone.is_localized(execution_date): - raise ValueError(f"Received non-localized date {execution_date}") - dag_run = dag.get_dagrun(execution_date=execution_date) + if logical_date: + if not timezone.is_localized(logical_date): + raise ValueError(f"Received non-localized date {logical_date}") + dag_run = dag.get_dagrun(logical_date=logical_date) if not dag_run: - raise ValueError(f"DagRun with execution_date: {execution_date} not found") + raise ValueError(f"DagRun with logical_date: {logical_date} not found") run_id = dag_run.run_id if not run_id: raise ValueError(f"Invalid dag_run_id: {run_id}") @@ -333,7 +333,7 @@ def set_dag_run_state_to_success( def set_dag_run_state_to_failed( *, dag: DAG, - execution_date: datetime | None = None, + logical_date: datetime | None = None, run_id: str | None = None, commit: bool = False, session: SASession = NEW_SESSION, @@ -341,28 +341,28 @@ def set_dag_run_state_to_failed( """ Set the dag run's state to failed. - Set for a specific execution date and its task instances to failed. + Set for a specific logical date and its task instances to failed. :param dag: the DAG of which to alter state - :param execution_date: the execution date from which to start looking(deprecated) + :param logical_date: the logical date from which to start looking(deprecated) :param run_id: the DAG run_id to start looking from :param commit: commit DAG and tasks to be altered to the database :param session: database session :return: If commit is true, list of tasks that have been updated, otherwise list of tasks that will be updated - :raises: AssertionError if dag or execution_date is invalid + :raises: AssertionError if dag or logical_date is invalid """ - if not exactly_one(execution_date, run_id): + if not exactly_one(logical_date, run_id): return [] if not dag: return [] - if execution_date: - if not timezone.is_localized(execution_date): - raise ValueError(f"Received non-localized date {execution_date}") - dag_run = dag.get_dagrun(execution_date=execution_date) + if logical_date: + if not timezone.is_localized(logical_date): + raise ValueError(f"Received non-localized date {logical_date}") + dag_run = dag.get_dagrun(logical_date=logical_date) if not dag_run: - raise ValueError(f"DagRun with execution_date: {execution_date} not found") + raise ValueError(f"DagRun with logical_date: {logical_date} not found") run_id = dag_run.run_id if not run_id: @@ -429,16 +429,16 @@ def __set_dag_run_state_to_running_or_queued( *, new_state: DagRunState, dag: DAG, - execution_date: datetime | None = None, + logical_date: datetime | None = None, run_id: str | None = None, commit: bool = False, session: SASession, ) -> list[TaskInstance]: """ - Set the dag run for a specific execution date to running. + Set the dag run for a specific logical date to running. :param dag: the DAG of which to alter state - :param execution_date: the execution date from which to start looking + :param logical_date: the logical date from which to start looking :param run_id: the id of the DagRun :param commit: commit DAG and tasks to be altered to the database :param session: database session @@ -447,18 +447,18 @@ def __set_dag_run_state_to_running_or_queued( """ res: list[TaskInstance] = [] - if not exactly_one(execution_date, run_id): + if not exactly_one(logical_date, run_id): return res if not dag: return res - if execution_date: - if not timezone.is_localized(execution_date): - raise ValueError(f"Received non-localized date {execution_date}") - dag_run = dag.get_dagrun(execution_date=execution_date) + if logical_date: + if not timezone.is_localized(logical_date): + raise ValueError(f"Received non-localized date {logical_date}") + dag_run = dag.get_dagrun(logical_date=logical_date) if not dag_run: - raise ValueError(f"DagRun with execution_date: {execution_date} not found") + raise ValueError(f"DagRun with logical_date: {logical_date} not found") run_id = dag_run.run_id if not run_id: raise ValueError(f"DagRun with run_id: {run_id} not found") @@ -474,7 +474,7 @@ def __set_dag_run_state_to_running_or_queued( def set_dag_run_state_to_running( *, dag: DAG, - execution_date: datetime | None = None, + logical_date: datetime | None = None, run_id: str | None = None, commit: bool = False, session: SASession = NEW_SESSION, @@ -482,12 +482,12 @@ def set_dag_run_state_to_running( """ Set the dag run's state to running. - Set for a specific execution date and its task instances to running. + Set for a specific logical date and its task instances to running. """ return __set_dag_run_state_to_running_or_queued( new_state=DagRunState.RUNNING, dag=dag, - execution_date=execution_date, + logical_date=logical_date, run_id=run_id, commit=commit, session=session, @@ -498,7 +498,7 @@ def set_dag_run_state_to_running( def set_dag_run_state_to_queued( *, dag: DAG, - execution_date: datetime | None = None, + logical_date: datetime | None = None, run_id: str | None = None, commit: bool = False, session: SASession = NEW_SESSION, @@ -506,12 +506,12 @@ def set_dag_run_state_to_queued( """ Set the dag run's state to queued. - Set for a specific execution date and its task instances to queued. + Set for a specific logical date and its task instances to queued. """ return __set_dag_run_state_to_running_or_queued( new_state=DagRunState.QUEUED, dag=dag, - execution_date=execution_date, + logical_date=logical_date, run_id=run_id, commit=commit, session=session, diff --git a/airflow/api/common/trigger_dag.py b/airflow/api/common/trigger_dag.py index 44beae3f1f78..4a94f990191c 100644 --- a/airflow/api/common/trigger_dag.py +++ b/airflow/api/common/trigger_dag.py @@ -44,7 +44,7 @@ def _trigger_dag( triggered_by: DagRunTriggeredByType, run_id: str | None = None, conf: dict | str | None = None, - execution_date: datetime | None = None, + logical_date: datetime | None = None, replace_microseconds: bool = True, ) -> DagRun | None: """ @@ -53,9 +53,9 @@ def _trigger_dag( :param dag_id: DAG ID :param dag_bag: DAG Bag model :param triggered_by: the entity which triggers the dag_run - :param run_id: ID of the dag_run + :param run_id: ID of the run :param conf: configuration - :param execution_date: date of execution + :param logical_date: logical date of the run :param replace_microseconds: whether microseconds should be zeroed :return: list of triggered dags """ @@ -64,31 +64,31 @@ def _trigger_dag( if dag is None or dag_id not in dag_bag.dags: raise DagNotFound(f"Dag id {dag_id} not found") - execution_date = execution_date or timezone.utcnow() + logical_date = logical_date or timezone.utcnow() - if not timezone.is_localized(execution_date): - raise ValueError("The execution_date should be localized") + if not timezone.is_localized(logical_date): + raise ValueError("The logical date should be localized") if replace_microseconds: - execution_date = execution_date.replace(microsecond=0) + logical_date = logical_date.replace(microsecond=0) if dag.default_args and "start_date" in dag.default_args: min_dag_start_date = dag.default_args["start_date"] - if min_dag_start_date and execution_date < min_dag_start_date: + if min_dag_start_date and logical_date < min_dag_start_date: raise ValueError( - f"The execution_date [{execution_date.isoformat()}] should be >= start_date " + f"Logical date [{logical_date.isoformat()}] should be >= start_date " f"[{min_dag_start_date.isoformat()}] from DAG's default_args" ) - logical_date = timezone.coerce_datetime(execution_date) + coerced_logical_date = timezone.coerce_datetime(logical_date) - data_interval = dag.timetable.infer_manual_data_interval(run_after=logical_date) + data_interval = dag.timetable.infer_manual_data_interval(run_after=coerced_logical_date) run_id = run_id or dag.timetable.generate_run_id( - run_type=DagRunType.MANUAL, logical_date=logical_date, data_interval=data_interval + run_type=DagRunType.MANUAL, logical_date=coerced_logical_date, data_interval=data_interval ) - dag_run = DagRun.find_duplicate(dag_id=dag_id, execution_date=execution_date, run_id=run_id) + dag_run = DagRun.find_duplicate(dag_id=dag_id, run_id=run_id, logical_date=logical_date) if dag_run: - raise DagRunAlreadyExists(dag_run=dag_run, execution_date=execution_date, run_id=run_id) + raise DagRunAlreadyExists(dag_run, logical_date=logical_date, run_id=run_id) run_conf = None if conf: @@ -96,7 +96,7 @@ def _trigger_dag( dag_version = DagVersion.get_latest_version(dag.dag_id) dag_run = dag.create_dagrun( run_id=run_id, - execution_date=execution_date, + logical_date=logical_date, state=DagRunState.QUEUED, conf=run_conf, external_trigger=True, @@ -116,7 +116,7 @@ def trigger_dag( triggered_by: DagRunTriggeredByType, run_id: str | None = None, conf: dict | str | None = None, - execution_date: datetime | None = None, + logical_date: datetime | None = None, replace_microseconds: bool = True, session: Session = NEW_SESSION, ) -> DagRun | None: @@ -126,7 +126,7 @@ def trigger_dag( :param dag_id: DAG ID :param run_id: ID of the dag_run :param conf: configuration - :param execution_date: date of execution + :param logical_date: date of execution :param replace_microseconds: whether microseconds should be zeroed :param session: Unused. Only added in compatibility with database isolation mode :param triggered_by: the entity which triggers the dag_run @@ -142,7 +142,7 @@ def trigger_dag( dag_bag=dagbag, run_id=run_id, conf=conf, - execution_date=execution_date, + logical_date=logical_date, replace_microseconds=replace_microseconds, triggered_by=triggered_by, ) diff --git a/airflow/api_connexion/endpoints/asset_endpoint.py b/airflow/api_connexion/endpoints/asset_endpoint.py index 883931644d75..ff47db883879 100644 --- a/airflow/api_connexion/endpoints/asset_endpoint.py +++ b/airflow/api_connexion/endpoints/asset_endpoint.py @@ -324,6 +324,7 @@ def delete_asset_queued_events( ) +@mark_fastapi_migration_done @security.requires_access_asset("POST") @provide_session @action_logging diff --git a/airflow/api_connexion/endpoints/dag_run_endpoint.py b/airflow/api_connexion/endpoints/dag_run_endpoint.py index 6a38eb27ff45..b8e7f36d1fd4 100644 --- a/airflow/api_connexion/endpoints/dag_run_endpoint.py +++ b/airflow/api_connexion/endpoints/dag_run_endpoint.py @@ -115,6 +115,7 @@ def get_dag_run( raise BadRequest("DAGRunSchema error", detail=str(e)) +@mark_fastapi_migration_done @security.requires_access_dag("GET", DagAccessEntity.RUN) @security.requires_access_asset("GET") @provide_session @@ -157,11 +158,11 @@ def _fetch_dag_runs( query = query.where(DagRun.start_date >= start_date_gte) if start_date_lte: query = query.where(DagRun.start_date <= start_date_lte) - # filter execution date + # filter logical date if execution_date_gte: - query = query.where(DagRun.execution_date >= execution_date_gte) + query = query.where(DagRun.logical_date >= execution_date_gte) if execution_date_lte: - query = query.where(DagRun.execution_date <= execution_date_lte) + query = query.where(DagRun.logical_date <= execution_date_lte) # filter end date if end_date_gte: query = query.where(DagRun.end_date >= end_date_gte) @@ -174,12 +175,12 @@ def _fetch_dag_runs( query = query.where(DagRun.updated_at <= updated_at_lte) total_entries = get_query_count(query, session=session) - to_replace = {"dag_run_id": "run_id", "execution_date": "logical_date"} + to_replace = {"dag_run_id": "run_id", "logical_date": "logical_date"} allowed_sort_attrs = [ "id", "state", "dag_id", - "execution_date", + "logical_date", "dag_run_id", "start_date", "end_date", @@ -319,13 +320,13 @@ def post_dag_run(*, dag_id: str, session: Session = NEW_SESSION) -> APIResponse: except ValidationError as err: raise BadRequest(detail=str(err)) - logical_date = pendulum.instance(post_body["execution_date"]) + logical_date = pendulum.instance(post_body["logical_date"]) run_id = post_body["run_id"] dagrun_instance = session.scalar( select(DagRun) .where( DagRun.dag_id == dag_id, - or_(DagRun.run_id == run_id, DagRun.execution_date == logical_date), + or_(DagRun.run_id == run_id, DagRun.logical_date == logical_date), ) .limit(1) ) @@ -346,7 +347,7 @@ def post_dag_run(*, dag_id: str, session: Session = NEW_SESSION) -> APIResponse: dag_run = dag.create_dagrun( run_type=DagRunType.MANUAL, run_id=run_id, - execution_date=logical_date, + logical_date=logical_date, data_interval=data_interval, state=DagRunState.QUEUED, conf=post_body.get("conf"), @@ -363,7 +364,7 @@ def post_dag_run(*, dag_id: str, session: Session = NEW_SESSION) -> APIResponse: except (ValueError, ParamValidationError) as ve: raise BadRequest(detail=str(ve)) - if dagrun_instance.execution_date == logical_date: + if dagrun_instance.logical_date == logical_date: raise AlreadyExists( detail=( f"DAGRun with DAG ID: '{dag_id}' and " @@ -403,6 +404,7 @@ def update_dag_run_state(*, dag_id: str, dag_run_id: str, session: Session = NEW return dagrun_schema.dump(dag_run) +@mark_fastapi_migration_done @security.requires_access_dag("PUT", DagAccessEntity.RUN) @action_logging @provide_session diff --git a/airflow/api_connexion/endpoints/task_endpoint.py b/airflow/api_connexion/endpoints/task_endpoint.py index abc28cfee6fb..3fd14c8cdf89 100644 --- a/airflow/api_connexion/endpoints/task_endpoint.py +++ b/airflow/api_connexion/endpoints/task_endpoint.py @@ -47,6 +47,7 @@ def get_task(*, dag_id: str, task_id: str) -> APIResponse: return task_schema.dump(task) +@mark_fastapi_migration_done @security.requires_access_dag("GET", DagAccessEntity.TASK) def get_tasks(*, dag_id: str, order_by: str = "task_id") -> APIResponse: """Get tasks for DAG.""" diff --git a/airflow/api_connexion/endpoints/task_instance_endpoint.py b/airflow/api_connexion/endpoints/task_instance_endpoint.py index 9f9461910082..83456960753d 100644 --- a/airflow/api_connexion/endpoints/task_instance_endpoint.py +++ b/airflow/api_connexion/endpoints/task_instance_endpoint.py @@ -198,7 +198,7 @@ def get_mapped_task_instances( # Other search criteria base_query = _apply_range_filter( base_query, - key=DR.execution_date, + key=DR.logical_date, value_range=(execution_date_gte, execution_date_lte), ) base_query = _apply_range_filter( @@ -336,7 +336,7 @@ def get_task_instances( base_query = base_query.where(TI.run_id == dag_run_id) base_query = _apply_range_filter( base_query, - key=DR.execution_date, + key=DR.logical_date, value_range=(execution_date_gte, execution_date_lte), ) base_query = _apply_range_filter( @@ -367,6 +367,7 @@ def get_task_instances( ) +@mark_fastapi_migration_done @security.requires_access_dag("GET", DagAccessEntity.TASK_INSTANCE) @provide_session def get_task_instances_batch(session: Session = NEW_SESSION) -> APIResponse: @@ -398,7 +399,7 @@ def get_task_instances_batch(session: Session = NEW_SESSION) -> APIResponse: base_query = _apply_array_filter(base_query, key=TI.task_id, values=data["task_ids"]) base_query = _apply_range_filter( base_query, - key=DR.execution_date, + key=DR.logical_date, value_range=(data["execution_date_gte"], data["execution_date_lte"]), ) base_query = _apply_range_filter( @@ -522,22 +523,18 @@ def post_set_task_instances_state(*, dag_id: str, session: Session = NEW_SESSION error_message = f"Task ID {task_id} not found" raise NotFound(error_message) - execution_date = data.get("execution_date") + logical_date = data.get("logical_date") run_id = data.get("dag_run_id") if ( - execution_date + logical_date and ( session.scalars( - select(TI).where( - TI.task_id == task_id, TI.dag_id == dag_id, TI.execution_date == execution_date - ) + select(TI).where(TI.task_id == task_id, TI.dag_id == dag_id, TI.logical_date == logical_date) ).one_or_none() ) is None ): - raise NotFound( - detail=f"Task instance not found for task {task_id!r} on execution_date {execution_date}" - ) + raise NotFound(detail=f"Task instance not found for task {task_id!r} on logical_date {logical_date}") select_stmt = select(TI).where( TI.dag_id == dag_id, TI.task_id == task_id, TI.run_id == run_id, TI.map_index == -1 @@ -550,7 +547,7 @@ def post_set_task_instances_state(*, dag_id: str, session: Session = NEW_SESSION tis = dag.set_task_instance_state( task_id=task_id, run_id=run_id, - execution_date=execution_date, + logical_date=logical_date, state=data["new_state"], upstream=data["include_upstream"], downstream=data["include_downstream"], diff --git a/airflow/api_connexion/openapi/v1.yaml b/airflow/api_connexion/openapi/v1.yaml index af6a3055670d..10cee57d95fc 100644 --- a/airflow/api_connexion/openapi/v1.yaml +++ b/airflow/api_connexion/openapi/v1.yaml @@ -3080,7 +3080,7 @@ components: The value of this field can be set only when creating the object. If you try to modify the field of an existing object, the request fails with an BAD_REQUEST error. - If not provided, a value will be generated based on execution_date. + If not provided, a value will be generated based on logical_date. If the specified dag_run_id is in use, the creation request fails with an ALREADY_EXISTS error. @@ -3102,19 +3102,6 @@ components: *New in version 2.2.0* format: date-time - execution_date: - type: string - nullable: true - description: | - The execution date. This is the same as logical_date, kept for backwards compatibility. - If both this field and logical_date are provided but with different values, the request - will fail with an BAD_REQUEST error. - - *Changed in version 2.2.0*: Field becomes nullable. - - *Deprecated since version 2.2.0*: Use 'logical_date' instead. - format: date-time - deprecated: true start_date: type: string format: date-time @@ -3677,7 +3664,7 @@ components: The DagRun ID for this task instance *New in version 2.3.0* - execution_date: + logical_date: type: string format: datetime start_date: @@ -3788,7 +3775,7 @@ components: type: string readOnly: true description: The DAG ID. - execution_date: + logical_date: type: string format: datetime readOnly: true @@ -3860,7 +3847,7 @@ components: timestamp: type: string format: datetime - execution_date: + logical_date: type: string format: datetime map_index: @@ -4734,14 +4721,14 @@ components: description: The task ID. type: string - execution_date: - description: The execution date. Either set this or dag_run_id but not both. + logical_date: + description: The logical date. Either set this or dag_run_id but not both. type: string format: datetime dag_run_id: description: | - The task instance's DAG run ID. Either set this or execution_date but not both. + The task instance's DAG run ID. Either set this or logical_date but not both. *New in version 2.3.0* type: string diff --git a/airflow/api_connexion/schemas/asset_schema.py b/airflow/api_connexion/schemas/asset_schema.py index 7f84b799d1a7..e83c4f1b4279 100644 --- a/airflow/api_connexion/schemas/asset_schema.py +++ b/airflow/api_connexion/schemas/asset_schema.py @@ -119,7 +119,7 @@ class Meta: run_id = auto_field(data_key="dag_run_id") dag_id = auto_field(dump_only=True) - execution_date = auto_field(data_key="logical_date", dump_only=True) + logical_date = auto_field(data_key="logical_date", dump_only=True) start_date = auto_field(dump_only=True) end_date = auto_field(dump_only=True) state = auto_field(dump_only=True) diff --git a/airflow/api_connexion/schemas/dag_run_schema.py b/airflow/api_connexion/schemas/dag_run_schema.py index e829dd956a4c..c2560613def7 100644 --- a/airflow/api_connexion/schemas/dag_run_schema.py +++ b/airflow/api_connexion/schemas/dag_run_schema.py @@ -63,7 +63,7 @@ class Meta: run_id = auto_field(data_key="dag_run_id") dag_id = auto_field(dump_only=True) - execution_date = auto_field(data_key="logical_date", validate=validate_istimezone) + logical_date = auto_field(data_key="logical_date", validate=validate_istimezone) start_date = auto_field(dump_only=True) end_date = auto_field(dump_only=True) state = DagStateField(dump_only=True) @@ -78,25 +78,12 @@ class Meta: @pre_load def autogenerate(self, data, **kwargs): - """ - Auto generate run_id and logical_date if they are not provided. - - For compatibility, if `execution_date` is submitted, it is converted - to `logical_date`. - """ + """Auto generate run_id and logical_date if they are not provided.""" logical_date = data.get("logical_date", _MISSING) - execution_date = data.pop("execution_date", _MISSING) - if logical_date is execution_date is _MISSING: # Both missing. + + # Auto-generate logical_date if missing + if logical_date is _MISSING: data["logical_date"] = str(timezone.utcnow()) - elif logical_date is _MISSING: # Only logical_date missing. - data["logical_date"] = execution_date - elif execution_date is _MISSING: # Only execution_date missing. - pass - elif logical_date != execution_date: # Both provided but don't match. - raise BadRequest( - "logical_date conflicts with execution_date", - detail=f"{logical_date!r} != {execution_date!r}", - ) if "dag_run_id" not in data: try: @@ -109,9 +96,8 @@ def autogenerate(self, data, **kwargs): @post_dump def autofill(self, data, **kwargs): - """Populate execution_date from logical_date for compatibility.""" + """Ensure that only requested fields are returned if 'fields' context is set.""" ret_data = {} - data["execution_date"] = data["logical_date"] if self.context.get("fields"): ret_fields = self.context.get("fields") for ret_field in ret_fields: diff --git a/airflow/api_connexion/schemas/event_log_schema.py b/airflow/api_connexion/schemas/event_log_schema.py index bf88d1a9ce88..33ecf4ca947a 100644 --- a/airflow/api_connexion/schemas/event_log_schema.py +++ b/airflow/api_connexion/schemas/event_log_schema.py @@ -40,7 +40,7 @@ class Meta: map_index = auto_field(dump_only=True) try_number = auto_field(dump_only=True) event = auto_field(dump_only=True) - execution_date = auto_field(dump_only=True) + logical_date = auto_field(dump_only=True) owner = auto_field(dump_only=True) extra = auto_field(dump_only=True) diff --git a/airflow/api_connexion/schemas/task_instance_schema.py b/airflow/api_connexion/schemas/task_instance_schema.py index f0b8285fdfa0..3e864f18652c 100644 --- a/airflow/api_connexion/schemas/task_instance_schema.py +++ b/airflow/api_connexion/schemas/task_instance_schema.py @@ -45,7 +45,7 @@ class Meta: dag_id = auto_field() run_id = auto_field(data_key="dag_run_id") map_index = auto_field() - execution_date = auto_field() + logical_date = auto_field() start_date = auto_field() end_date = auto_field() duration = auto_field() @@ -196,7 +196,7 @@ class SetTaskInstanceStateFormSchema(Schema): dry_run = fields.Boolean(load_default=True) task_id = fields.Str(required=True) - execution_date = fields.DateTime(validate=validate_istimezone) + logical_date = fields.DateTime(validate=validate_istimezone) dag_run_id = fields.Str() include_upstream = fields.Boolean(required=True) include_downstream = fields.Boolean(required=True) @@ -212,8 +212,8 @@ class SetTaskInstanceStateFormSchema(Schema): @validates_schema def validate_form(self, data, **kwargs): """Validate set task instance state form.""" - if not exactly_one(data.get("execution_date"), data.get("dag_run_id")): - raise ValidationError("Exactly one of execution_date or dag_run_id must be provided") + if not exactly_one(data.get("logical_date"), data.get("dag_run_id")): + raise ValidationError("Exactly one of logical_date or dag_run_id must be provided") class SetSingleTaskInstanceStateFormSchema(Schema): @@ -234,7 +234,7 @@ class TaskInstanceReferenceSchema(Schema): task_id = fields.Str() run_id = fields.Str(data_key="dag_run_id") dag_id = fields.Str() - execution_date = fields.DateTime() + logical_date = fields.DateTime() class TaskInstanceReferenceCollection(NamedTuple): diff --git a/airflow/api_connexion/schemas/xcom_schema.py b/airflow/api_connexion/schemas/xcom_schema.py index 625f05bd1459..a56adf7551d7 100644 --- a/airflow/api_connexion/schemas/xcom_schema.py +++ b/airflow/api_connexion/schemas/xcom_schema.py @@ -34,7 +34,7 @@ class Meta: key = auto_field() timestamp = auto_field() - execution_date = auto_field() + logical_date = auto_field() map_index = auto_field() task_id = auto_field() dag_id = auto_field() diff --git a/airflow/api_fastapi/common/parameters.py b/airflow/api_fastapi/common/parameters.py index 41ef99804332..c573996eafd1 100644 --- a/airflow/api_fastapi/common/parameters.py +++ b/airflow/api_fastapi/common/parameters.py @@ -66,7 +66,7 @@ def depends(self, *args: Any, **kwargs: Any) -> Self: pass -class _LimitFilter(BaseParam[int]): +class LimitFilter(BaseParam[int]): """Filter on the limit.""" def to_orm(self, select: Select) -> Select: @@ -75,11 +75,11 @@ def to_orm(self, select: Select) -> Select: return select.limit(self.value) - def depends(self, limit: int = 100) -> _LimitFilter: + def depends(self, limit: int = 100) -> LimitFilter: return self.set_value(limit) -class _OffsetFilter(BaseParam[int]): +class OffsetFilter(BaseParam[int]): """Filter on offset.""" def to_orm(self, select: Select) -> Select: @@ -87,7 +87,7 @@ def to_orm(self, select: Select) -> Select: return select return select.offset(self.value) - def depends(self, offset: int = 0) -> _OffsetFilter: + def depends(self, offset: int = 0) -> OffsetFilter: return self.set_value(offset) @@ -115,18 +115,54 @@ def depends(self, only_active: bool = True) -> _OnlyActiveFilter: return self.set_value(only_active) -class _DagIdsFilter(BaseParam[list[str]]): - """Filter on multi-valued dag_ids param for DagRun.""" +class DagIdsFilter(BaseParam[list[str]]): + """Filter on dag ids.""" + + def __init__(self, model: Base, value: list[str] | None = None, skip_none: bool = True) -> None: + super().__init__(value, skip_none) + self.model = model def to_orm(self, select: Select) -> Select: if self.value and self.skip_none: - return select.where(DagRun.dag_id.in_(self.value)) + return select.where(self.model.dag_id.in_(self.value)) return select - def depends(self, dag_ids: list[str] = Query(None)) -> _DagIdsFilter: + def depends(self, dag_ids: list[str] = Query(None)) -> DagIdsFilter: return self.set_value(dag_ids) +class DagRunIdsFilter(BaseParam[list[str]]): + """Filter on dag run ids.""" + + def __init__(self, model: Base, value: list[str] | None = None, skip_none: bool = True) -> None: + super().__init__(value, skip_none) + self.model = model + + def to_orm(self, select: Select) -> Select: + if self.value and self.skip_none: + return select.where(self.model.run_id.in_(self.value)) + return select + + def depends(self, dag_run_ids: list[str] = Query(None)) -> DagRunIdsFilter: + return self.set_value(dag_run_ids) + + +class TaskIdsFilter(BaseParam[list[str]]): + """Filter on task ids.""" + + def __init__(self, model: Base, value: list[str] | None = None, skip_none: bool = True) -> None: + super().__init__(value, skip_none) + self.model = model + + def to_orm(self, select: Select) -> Select: + if self.value and self.skip_none: + return select.where(self.model.task_id.in_(self.value)) + return select + + def depends(self, task_ids: list[str] = Query(None)) -> TaskIdsFilter: + return self.set_value(task_ids) + + class _SearchParam(BaseParam[str]): """Search on attribute.""" @@ -273,7 +309,7 @@ def depends(self, owners: list[str] = Query(default_factory=list)) -> _OwnersFil return self.set_value(owners) -class _TIStateFilter(BaseParam[List[Optional[TaskInstanceState]]]): +class TIStateFilter(BaseParam[List[Optional[TaskInstanceState]]]): """Filter on task instance state.""" def to_orm(self, select: Select) -> Select: @@ -286,12 +322,12 @@ def to_orm(self, select: Select) -> Select: conditions = [TaskInstance.state == state for state in self.value] return select.where(or_(*conditions)) - def depends(self, state: list[str] = Query(default_factory=list)) -> _TIStateFilter: + def depends(self, state: list[str] = Query(default_factory=list)) -> TIStateFilter: states = _convert_ti_states(state) return self.set_value(states) -class _TIPoolFilter(BaseParam[List[str]]): +class TIPoolFilter(BaseParam[List[str]]): """Filter on task instance pool.""" def to_orm(self, select: Select) -> Select: @@ -304,11 +340,11 @@ def to_orm(self, select: Select) -> Select: conditions = [TaskInstance.pool == pool for pool in self.value] return select.where(or_(*conditions)) - def depends(self, pool: list[str] = Query(default_factory=list)) -> _TIPoolFilter: + def depends(self, pool: list[str] = Query(default_factory=list)) -> TIPoolFilter: return self.set_value(pool) -class _TIQueueFilter(BaseParam[List[str]]): +class TIQueueFilter(BaseParam[List[str]]): """Filter on task instance queue.""" def to_orm(self, select: Select) -> Select: @@ -321,11 +357,11 @@ def to_orm(self, select: Select) -> Select: conditions = [TaskInstance.queue == queue for queue in self.value] return select.where(or_(*conditions)) - def depends(self, queue: list[str] = Query(default_factory=list)) -> _TIQueueFilter: + def depends(self, queue: list[str] = Query(default_factory=list)) -> TIQueueFilter: return self.set_value(queue) -class _TIExecutorFilter(BaseParam[List[str]]): +class TIExecutorFilter(BaseParam[List[str]]): """Filter on task instance executor.""" def to_orm(self, select: Select) -> Select: @@ -338,7 +374,7 @@ def to_orm(self, select: Select) -> Select: conditions = [TaskInstance.executor == executor for executor in self.value] return select.where(or_(*conditions)) - def depends(self, executor: list[str] = Query(default_factory=list)) -> _TIExecutorFilter: + def depends(self, executor: list[str] = Query(default_factory=list)) -> TIExecutorFilter: return self.set_value(executor) @@ -603,8 +639,8 @@ def depends_float( OptionalDateTimeQuery = Annotated[Union[datetime, None], AfterValidator(_safe_parse_datetime_optional)] # DAG -QueryLimit = Annotated[_LimitFilter, Depends(_LimitFilter().depends)] -QueryOffset = Annotated[_OffsetFilter, Depends(_OffsetFilter().depends)] +QueryLimit = Annotated[LimitFilter, Depends(LimitFilter().depends)] +QueryOffset = Annotated[OffsetFilter, Depends(OffsetFilter().depends)] QueryPausedFilter = Annotated[_PausedFilter, Depends(_PausedFilter().depends)] QueryOnlyActiveFilter = Annotated[_OnlyActiveFilter, Depends(_OnlyActiveFilter().depends)] QueryDagIdPatternSearch = Annotated[_DagIdPatternSearch, Depends(_DagIdPatternSearch().depends)] @@ -619,7 +655,7 @@ def depends_float( # DagRun QueryLastDagRunStateFilter = Annotated[_LastDagRunStateFilter, Depends(_LastDagRunStateFilter().depends)] -QueryDagIdsFilter = Annotated[_DagIdsFilter, Depends(_DagIdsFilter().depends)] +QueryDagIdsFilter = Annotated[DagIdsFilter, Depends(DagIdsFilter(DagRun).depends)] # DAGWarning QueryDagIdInDagWarningFilter = Annotated[_DagIdFilter, Depends(_DagIdFilter(DagWarning.dag_id).depends)] @@ -629,10 +665,10 @@ def depends_float( QueryDagTagPatternSearch = Annotated[_DagTagNamePatternSearch, Depends(_DagTagNamePatternSearch().depends)] # TI -QueryTIStateFilter = Annotated[_TIStateFilter, Depends(_TIStateFilter().depends)] -QueryTIPoolFilter = Annotated[_TIPoolFilter, Depends(_TIPoolFilter().depends)] -QueryTIQueueFilter = Annotated[_TIQueueFilter, Depends(_TIQueueFilter().depends)] -QueryTIExecutorFilter = Annotated[_TIExecutorFilter, Depends(_TIExecutorFilter().depends)] +QueryTIStateFilter = Annotated[TIStateFilter, Depends(TIStateFilter().depends)] +QueryTIPoolFilter = Annotated[TIPoolFilter, Depends(TIPoolFilter().depends)] +QueryTIQueueFilter = Annotated[TIQueueFilter, Depends(TIQueueFilter().depends)] +QueryTIExecutorFilter = Annotated[TIExecutorFilter, Depends(TIExecutorFilter().depends)] # Assets QueryUriPatternSearch = Annotated[_UriPatternSearch, Depends(_UriPatternSearch().depends)] diff --git a/airflow/api_fastapi/common/types.py b/airflow/api_fastapi/common/types.py index 2dc1be7d4cf0..ab10a21c9700 100644 --- a/airflow/api_fastapi/common/types.py +++ b/airflow/api_fastapi/common/types.py @@ -16,14 +16,11 @@ # under the License. from __future__ import annotations -import inspect from datetime import timedelta from typing import Annotated from pydantic import AfterValidator, AliasGenerator, AwareDatetime, BaseModel, BeforeValidator, ConfigDict -from airflow.models.mappedoperator import MappedOperator -from airflow.serialization.serialized_objects import SerializedBaseOperator from airflow.utils import timezone UtcDateTime = Annotated[AwareDatetime, AfterValidator(lambda d: d.astimezone(timezone.utc))] @@ -59,28 +56,3 @@ class TimeDelta(BaseModel): TimeDeltaWithValidation = Annotated[TimeDelta, BeforeValidator(_validate_timedelta_field)] - - -def get_class_ref(obj) -> dict[str, str | None]: - """Return the class_ref dict for obj.""" - is_mapped_or_serialized = isinstance(obj, (MappedOperator, SerializedBaseOperator)) - - module_path = None - if is_mapped_or_serialized: - module_path = obj._task_module - else: - module_type = inspect.getmodule(obj) - module_path = module_type.__name__ if module_type else None - - class_name = None - if is_mapped_or_serialized: - class_name = obj._task_type - elif obj.__class__ is type: - class_name = obj.__name__ - else: - class_name = type(obj).__name__ - - return { - "module_path": module_path, - "class_name": class_name, - } diff --git a/airflow/api_fastapi/core_api/datamodels/assets.py b/airflow/api_fastapi/core_api/datamodels/assets.py index 9ac4528964e6..bfdbb2d7fc88 100644 --- a/airflow/api_fastapi/core_api/datamodels/assets.py +++ b/airflow/api_fastapi/core_api/datamodels/assets.py @@ -19,11 +19,11 @@ from datetime import datetime -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, field_validator class DagScheduleAssetReference(BaseModel): - """Serializable version of the DagScheduleAssetReference ORM SqlAlchemyModel.""" + """DAG schedule reference serializer for assets.""" dag_id: str created_at: datetime @@ -31,7 +31,7 @@ class DagScheduleAssetReference(BaseModel): class TaskOutletAssetReference(BaseModel): - """Serializable version of the TaskOutletAssetReference ORM SqlAlchemyModel.""" + """Task outlet reference serializer for assets.""" dag_id: str task_id: str @@ -40,7 +40,7 @@ class TaskOutletAssetReference(BaseModel): class AssetAliasSchema(BaseModel): - """Serializable version of the AssetAliasSchema ORM SqlAlchemyModel.""" + """Asset alias serializer for assets.""" id: int name: str @@ -73,7 +73,7 @@ class DagRunAssetReference(BaseModel): dag_id: str execution_date: datetime = Field(alias="logical_date") start_date: datetime - end_date: datetime + end_date: datetime | None state: str data_interval_start: datetime data_interval_end: datetime @@ -114,3 +114,20 @@ class QueuedEventCollectionResponse(BaseModel): queued_events: list[QueuedEventResponse] total_entries: int + + +class CreateAssetEventsBody(BaseModel): + """Create asset events request.""" + + uri: str + extra: dict = Field(default_factory=dict) + + @field_validator("extra", mode="after") + def set_from_rest_api(cls, v: dict) -> dict: + v["from_rest_api"] = True + return v + + class Config: + """Pydantic config.""" + + extra = "forbid" diff --git a/airflow/api_fastapi/core_api/datamodels/dag_run.py b/airflow/api_fastapi/core_api/datamodels/dag_run.py index 102567f69976..8241885aff2f 100644 --- a/airflow/api_fastapi/core_api/datamodels/dag_run.py +++ b/airflow/api_fastapi/core_api/datamodels/dag_run.py @@ -41,6 +41,12 @@ class DAGRunPatchBody(BaseModel): note: str | None = Field(None, max_length=1000) +class DAGRunClearBody(BaseModel): + """DAG Run serializer for clear endpoint body.""" + + dry_run: bool = True + + class DAGRunResponse(BaseModel): """DAG Run serializer for responses.""" diff --git a/airflow/api_fastapi/core_api/datamodels/event_logs.py b/airflow/api_fastapi/core_api/datamodels/event_logs.py index f70e5bd15834..5b65ec85ba7b 100644 --- a/airflow/api_fastapi/core_api/datamodels/event_logs.py +++ b/airflow/api_fastapi/core_api/datamodels/event_logs.py @@ -33,7 +33,7 @@ class EventLogResponse(BaseModel): map_index: int | None try_number: int | None event: str - execution_date: datetime | None = Field(alias="logical_date") + logical_date: datetime | None owner: str | None extra: str | None diff --git a/airflow/api_fastapi/core_api/datamodels/task_instances.py b/airflow/api_fastapi/core_api/datamodels/task_instances.py index 00910bce1c8f..cd4caf1b6119 100644 --- a/airflow/api_fastapi/core_api/datamodels/task_instances.py +++ b/airflow/api_fastapi/core_api/datamodels/task_instances.py @@ -19,7 +19,15 @@ from datetime import datetime from typing import Annotated -from pydantic import AliasPath, BaseModel, BeforeValidator, ConfigDict, Field +from pydantic import ( + AliasPath, + AwareDatetime, + BaseModel, + BeforeValidator, + ConfigDict, + Field, + NonNegativeInt, +) from airflow.api_fastapi.core_api.datamodels.job import JobResponse from airflow.api_fastapi.core_api.datamodels.trigger import TriggerResponse @@ -36,7 +44,7 @@ class TaskInstanceResponse(BaseModel): dag_id: str run_id: str = Field(alias="dag_run_id") map_index: int - execution_date: datetime = Field(alias="logical_date") + logical_date: datetime start_date: datetime | None end_date: datetime | None duration: float | None @@ -83,3 +91,26 @@ class TaskDependencyCollectionResponse(BaseModel): """Task scheduling dependencies collection serializer for responses.""" dependencies: list[TaskDependencyResponse] + + +class TaskInstancesBatchBody(BaseModel): + """Task Instance body for get batch.""" + + dag_ids: list[str] | None = None + dag_run_ids: list[str] | None = None + task_ids: list[str] | None = None + state: list[TaskInstanceState | None] | None = None + logical_date_gte: AwareDatetime | None = None + logical_date_lte: AwareDatetime | None = None + start_date_gte: AwareDatetime | None = None + start_date_lte: AwareDatetime | None = None + end_date_gte: AwareDatetime | None = None + end_date_lte: AwareDatetime | None = None + duration_gte: float | None = None + duration_lte: float | None = None + pool: list[str] | None = None + queue: list[str] | None = None + executor: list[str] | None = None + page_offset: NonNegativeInt = 0 + page_limit: NonNegativeInt = 100 + order_by: str | None = None diff --git a/airflow/api_fastapi/core_api/datamodels/tasks.py b/airflow/api_fastapi/core_api/datamodels/tasks.py index 7caaf9c02f47..9b962390cc34 100644 --- a/airflow/api_fastapi/core_api/datamodels/tasks.py +++ b/airflow/api_fastapi/core_api/datamodels/tasks.py @@ -17,16 +17,44 @@ from __future__ import annotations +import inspect from collections import abc from datetime import datetime +from typing import Any -from pydantic import BaseModel, computed_field, field_validator +from pydantic import BaseModel, computed_field, field_validator, model_validator from airflow.api_fastapi.common.types import TimeDeltaWithValidation -from airflow.serialization.serialized_objects import encode_priority_weight_strategy +from airflow.models.mappedoperator import MappedOperator +from airflow.serialization.serialized_objects import SerializedBaseOperator, encode_priority_weight_strategy from airflow.task.priority_strategy import PriorityWeightStrategy +def _get_class_ref(obj) -> dict[str, str | None]: + """Return the class_ref dict for obj.""" + is_mapped_or_serialized = isinstance(obj, (MappedOperator, SerializedBaseOperator)) + + module_path = None + if is_mapped_or_serialized: + module_path = obj._task_module + else: + module_type = inspect.getmodule(obj) + module_path = module_type.__name__ if module_type else None + + class_name = None + if is_mapped_or_serialized: + class_name = obj._task_type + elif obj.__class__ is type: + class_name = obj.__name__ + else: + class_name = type(obj).__name__ + + return { + "module_path": module_path, + "class_name": class_name, + } + + class TaskResponse(BaseModel): """Task serializer for responses.""" @@ -57,6 +85,14 @@ class TaskResponse(BaseModel): class_ref: dict | None is_mapped: bool | None + @model_validator(mode="before") + @classmethod + def validate_model(cls, task: Any) -> Any: + task.__dict__.update( + {"class_ref": _get_class_ref(task), "is_mapped": isinstance(task, MappedOperator)} + ) + return task + @field_validator("weight_rule", mode="before") @classmethod def validate_weight_rule(cls, wr: str | PriorityWeightStrategy | None) -> str | None: @@ -81,3 +117,10 @@ def get_params(cls, params: abc.MutableMapping | None) -> dict | None: def extra_links(self) -> list[str]: """Extract and return extra_links.""" return getattr(self, "operator_extra_links", []) + + +class TaskCollectionResponse(BaseModel): + """Task collection serializer for responses.""" + + tasks: list[TaskResponse] + total_entries: int diff --git a/airflow/api_fastapi/core_api/datamodels/xcom.py b/airflow/api_fastapi/core_api/datamodels/xcom.py index dadbc51290e0..186b5aad77f0 100644 --- a/airflow/api_fastapi/core_api/datamodels/xcom.py +++ b/airflow/api_fastapi/core_api/datamodels/xcom.py @@ -27,7 +27,7 @@ class XComResponse(BaseModel): key: str timestamp: datetime - execution_date: datetime + logical_date: datetime map_index: int task_id: str dag_id: str diff --git a/airflow/api_fastapi/core_api/openapi/v1-generated.yaml b/airflow/api_fastapi/core_api/openapi/v1-generated.yaml index 8429616f70b9..bdf1b8aef1bd 100644 --- a/airflow/api_fastapi/core_api/openapi/v1-generated.yaml +++ b/airflow/api_fastapi/core_api/openapi/v1-generated.yaml @@ -348,6 +348,50 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' + /public/events: + post: + tags: + - Asset + summary: Create Asset Event + description: Create asset events. + operationId: create_asset_event + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateAssetEventsBody' + required: true + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/AssetEventResponse' + '401': + description: Unauthorized + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + '403': + description: Forbidden + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + '404': + description: Not Found + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' /public/assets/{uri}: get: tags: @@ -1220,6 +1264,117 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' + /public/dags/{dag_id}/dagRuns/{dag_run_id}/upstreamAssetEvents: + get: + tags: + - DagRun + summary: Get Upstream Asset Events + description: If dag run is asset-triggered, return the asset events that triggered + it. + operationId: get_upstream_asset_events + parameters: + - name: dag_id + in: path + required: true + schema: + type: string + title: Dag Id + - name: dag_run_id + in: path + required: true + schema: + type: string + title: Dag Run Id + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/AssetEventCollectionResponse' + '401': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Unauthorized + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Forbidden + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Not Found + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /public/dags/{dag_id}/dagRuns/{dag_run_id}/clear: + post: + tags: + - DagRun + summary: Clear Dag Run + operationId: clear_dag_run + parameters: + - name: dag_id + in: path + required: true + schema: + type: string + title: Dag Id + - name: dag_run_id + in: path + required: true + schema: + type: string + title: Dag Run Id + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/DAGRunClearBody' + responses: + '200': + description: Successful Response + content: + application/json: + schema: + anyOf: + - $ref: '#/components/schemas/TaskInstanceCollectionResponse' + - $ref: '#/components/schemas/DAGRunResponse' + title: Response Clear Dag Run + '401': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Unauthorized + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Forbidden + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Not Found + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' /public/dagSources/{file_token}: get: tags: @@ -2211,31 +2366,6 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' - /public/monitor/health: - get: - tags: - - Monitor - summary: Get Health - operationId: get_health - responses: - '200': - description: Successful Response - content: - application/json: - schema: - $ref: '#/components/schemas/HealthInfoSchema' - '401': - description: Unauthorized - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPExceptionResponse' - '403': - description: Forbidden - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPExceptionResponse' /public/plugins/: get: tags: @@ -3226,6 +3356,108 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' + /public/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances/list: + post: + tags: + - Task Instance + summary: Get Task Instances Batch + description: Get list of task instances. + operationId: get_task_instances_batch + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/TaskInstancesBatchBody' + required: true + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/TaskInstanceCollectionResponse' + '401': + description: Unauthorized + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + '403': + description: Forbidden + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + '404': + description: Not Found + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /public/dags/{dag_id}/tasks/: + get: + tags: + - Task + summary: Get Tasks + description: Get tasks for DAG. + operationId: get_tasks + parameters: + - name: dag_id + in: path + required: true + schema: + type: string + title: Dag Id + - name: order_by + in: query + required: false + schema: + type: string + default: task_id + title: Order By + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/TaskCollectionResponse' + '401': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Unauthorized + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Forbidden + '400': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Bad Request + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Not Found + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' /public/dags/{dag_id}/tasks/{task_id}: get: tags: @@ -3524,32 +3756,6 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' - /public/version/: - get: - tags: - - Version - summary: Get Version - description: Get version information. - operationId: get_version - responses: - '200': - description: Successful Response - content: - application/json: - schema: - $ref: '#/components/schemas/VersionInfo' - '401': - description: Unauthorized - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPExceptionResponse' - '403': - description: Forbidden - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPExceptionResponse' /public/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances/{task_id}/xcomEntries/{xcom_key}: get: tags: @@ -3644,6 +3850,33 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' + /public/monitor/health: + get: + tags: + - Monitor + summary: Get Health + operationId: get_health + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/HealthInfoSchema' + /public/version/: + get: + tags: + - Version + summary: Get Version + description: Get version information. + operationId: get_version + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/VersionInfo' components: schemas: AppBuilderMenuItemResponse: @@ -3706,7 +3939,7 @@ components: - id - name title: AssetAliasSchema - description: Serializable version of the AssetAliasSchema ORM SqlAlchemyModel. + description: Asset alias serializer for assets. AssetCollectionResponse: properties: assets: @@ -4093,6 +4326,20 @@ components: - message title: ConnectionTestResponse description: Connection Test serializer for responses. + CreateAssetEventsBody: + properties: + uri: + type: string + title: Uri + extra: + type: object + title: Extra + additionalProperties: false + type: object + required: + - uri + title: CreateAssetEventsBody + description: Create asset events request. DAGCollectionResponse: properties: dags: @@ -4463,6 +4710,15 @@ components: - file_token title: DAGResponse description: DAG serializer for responses. + DAGRunClearBody: + properties: + dry_run: + type: boolean + title: Dry Run + default: true + type: object + title: DAGRunClearBody + description: DAG Run serializer for clear endpoint body. DAGRunPatchBody: properties: state: @@ -4873,8 +5129,10 @@ components: format: date-time title: Start Date end_date: - type: string - format: date-time + anyOf: + - type: string + format: date-time + - type: 'null' title: End Date state: type: string @@ -4956,7 +5214,7 @@ components: - created_at - updated_at title: DagScheduleAssetReference - description: Serializable version of the DagScheduleAssetReference ORM SqlAlchemyModel. + description: DAG schedule reference serializer for assets. DagStatsCollectionResponse: properties: dags: @@ -5593,6 +5851,22 @@ components: - latest_scheduler_heartbeat title: SchedulerInfoSchema description: Schema for Scheduler info. + TaskCollectionResponse: + properties: + tasks: + items: + $ref: '#/components/schemas/TaskResponse' + type: array + title: Tasks + total_entries: + type: integer + title: Total Entries + type: object + required: + - tasks + - total_entries + title: TaskCollectionResponse + description: Task collection serializer for responses. TaskDependencyCollectionResponse: properties: dependencies: @@ -5868,6 +6142,123 @@ components: - deferred title: TaskInstanceStateCount description: TaskInstance serializer for responses. + TaskInstancesBatchBody: + properties: + dag_ids: + anyOf: + - items: + type: string + type: array + - type: 'null' + title: Dag Ids + dag_run_ids: + anyOf: + - items: + type: string + type: array + - type: 'null' + title: Dag Run Ids + task_ids: + anyOf: + - items: + type: string + type: array + - type: 'null' + title: Task Ids + state: + anyOf: + - items: + anyOf: + - $ref: '#/components/schemas/TaskInstanceState' + - type: 'null' + type: array + - type: 'null' + title: State + logical_date_gte: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Logical Date Gte + logical_date_lte: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Logical Date Lte + start_date_gte: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Start Date Gte + start_date_lte: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Start Date Lte + end_date_gte: + anyOf: + - type: string + format: date-time + - type: 'null' + title: End Date Gte + end_date_lte: + anyOf: + - type: string + format: date-time + - type: 'null' + title: End Date Lte + duration_gte: + anyOf: + - type: number + - type: 'null' + title: Duration Gte + duration_lte: + anyOf: + - type: number + - type: 'null' + title: Duration Lte + pool: + anyOf: + - items: + type: string + type: array + - type: 'null' + title: Pool + queue: + anyOf: + - items: + type: string + type: array + - type: 'null' + title: Queue + executor: + anyOf: + - items: + type: string + type: array + - type: 'null' + title: Executor + page_offset: + type: integer + minimum: 0.0 + title: Page Offset + default: 0 + page_limit: + type: integer + minimum: 0.0 + title: Page Limit + default: 100 + order_by: + anyOf: + - type: string + - type: 'null' + title: Order By + type: object + title: TaskInstancesBatchBody + description: Task Instance body for get batch. TaskOutletAssetReference: properties: dag_id: @@ -5891,7 +6282,7 @@ components: - created_at - updated_at title: TaskOutletAssetReference - description: Serializable version of the TaskOutletAssetReference ORM SqlAlchemyModel. + description: Task outlet reference serializer for assets. TaskResponse: properties: task_id: @@ -6234,10 +6625,10 @@ components: type: string format: date-time title: Timestamp - execution_date: + logical_date: type: string format: date-time - title: Execution Date + title: Logical Date map_index: type: integer title: Map Index @@ -6253,7 +6644,7 @@ components: required: - key - timestamp - - execution_date + - logical_date - map_index - task_id - dag_id @@ -6269,10 +6660,10 @@ components: type: string format: date-time title: Timestamp - execution_date: + logical_date: type: string format: date-time - title: Execution Date + title: Logical Date map_index: type: integer title: Map Index @@ -6291,7 +6682,7 @@ components: required: - key - timestamp - - execution_date + - logical_date - map_index - task_id - dag_id diff --git a/airflow/api_fastapi/core_api/routes/public/__init__.py b/airflow/api_fastapi/core_api/routes/public/__init__.py index 71de864fa3ee..e85d17ae4ca8 100644 --- a/airflow/api_fastapi/core_api/routes/public/__init__.py +++ b/airflow/api_fastapi/core_api/routes/public/__init__.py @@ -41,28 +41,34 @@ from airflow.api_fastapi.core_api.routes.public.version import version_router from airflow.api_fastapi.core_api.routes.public.xcom import xcom_router -public_router = AirflowRouter( - prefix="/public", +public_router = AirflowRouter(prefix="/public") + +# Router with common attributes for all routes +authenticated_router = AirflowRouter( responses=create_openapi_http_exception_doc([status.HTTP_401_UNAUTHORIZED, status.HTTP_403_FORBIDDEN]), ) +authenticated_router.include_router(assets_router) +authenticated_router.include_router(backfills_router) +authenticated_router.include_router(connections_router) +authenticated_router.include_router(dag_run_router) +authenticated_router.include_router(dag_sources_router) +authenticated_router.include_router(dag_stats_router) +authenticated_router.include_router(dag_warning_router) +authenticated_router.include_router(dags_router) +authenticated_router.include_router(event_logs_router) +authenticated_router.include_router(import_error_router) +authenticated_router.include_router(plugins_router) +authenticated_router.include_router(pools_router) +authenticated_router.include_router(providers_router) +authenticated_router.include_router(task_instances_router) +authenticated_router.include_router(tasks_router) +authenticated_router.include_router(variables_router) +authenticated_router.include_router(xcom_router) + +# Include authenticated router in public router +public_router.include_router(authenticated_router) -public_router.include_router(assets_router) -public_router.include_router(backfills_router) -public_router.include_router(connections_router) -public_router.include_router(dag_run_router) -public_router.include_router(dag_sources_router) -public_router.include_router(dag_stats_router) -public_router.include_router(dag_warning_router) -public_router.include_router(dags_router) -public_router.include_router(event_logs_router) -public_router.include_router(import_error_router) +# Following routers are not included in common router, for now we don't expect it to have authentication public_router.include_router(monitor_router) -public_router.include_router(plugins_router) -public_router.include_router(pools_router) -public_router.include_router(providers_router) -public_router.include_router(task_instances_router) -public_router.include_router(tasks_router) -public_router.include_router(variables_router) public_router.include_router(version_router) -public_router.include_router(xcom_router) diff --git a/airflow/api_fastapi/core_api/routes/public/assets.py b/airflow/api_fastapi/core_api/routes/public/assets.py index d4597f9994df..0900b0400987 100644 --- a/airflow/api_fastapi/core_api/routes/public/assets.py +++ b/airflow/api_fastapi/core_api/routes/public/assets.py @@ -44,11 +44,15 @@ AssetEventCollectionResponse, AssetEventResponse, AssetResponse, + CreateAssetEventsBody, QueuedEventCollectionResponse, QueuedEventResponse, ) from airflow.api_fastapi.core_api.openapi.exceptions import create_openapi_http_exception_doc +from airflow.assets import Asset +from airflow.assets.manager import asset_manager from airflow.models.asset import AssetDagRunQueue, AssetEvent, AssetModel +from airflow.utils import timezone assets_router = AirflowRouter(tags=["Asset"]) @@ -159,6 +163,32 @@ def get_asset_events( ) +@assets_router.post( + "/events", + responses=create_openapi_http_exception_doc([404]), +) +def create_asset_event( + body: CreateAssetEventsBody, + session: Annotated[Session, Depends(get_session)], +) -> AssetEventResponse: + """Create asset events.""" + asset = session.scalar(select(AssetModel).where(AssetModel.uri == body.uri).limit(1)) + if not asset: + raise HTTPException(404, f"Asset with uri: `{body.uri}` was not found") + timestamp = timezone.utcnow() + + assets_event = asset_manager.register_asset_change( + asset=Asset(uri=body.uri), + timestamp=timestamp, + extra=body.extra, + session=session, + ) + + if not assets_event: + raise HTTPException(404, f"Asset with uri: `{body.uri}` was not found") + return AssetEventResponse.model_validate(assets_event, from_attributes=True) + + @assets_router.get( "/assets/{uri:path}", responses=create_openapi_http_exception_doc([401, 403, 404]), diff --git a/airflow/api_fastapi/core_api/routes/public/dag_run.py b/airflow/api_fastapi/core_api/routes/public/dag_run.py index 810896806eea..decc7ff2b285 100644 --- a/airflow/api_fastapi/core_api/routes/public/dag_run.py +++ b/airflow/api_fastapi/core_api/routes/public/dag_run.py @@ -30,11 +30,17 @@ ) from airflow.api_fastapi.common.db.common import get_session from airflow.api_fastapi.common.router import AirflowRouter +from airflow.api_fastapi.core_api.datamodels.assets import AssetEventCollectionResponse, AssetEventResponse from airflow.api_fastapi.core_api.datamodels.dag_run import ( + DAGRunClearBody, DAGRunPatchBody, DAGRunPatchStates, DAGRunResponse, ) +from airflow.api_fastapi.core_api.datamodels.task_instances import ( + TaskInstanceCollectionResponse, + TaskInstanceResponse, +) from airflow.api_fastapi.core_api.openapi.exceptions import create_openapi_http_exception_doc from airflow.models import DAG, DagRun @@ -142,3 +148,83 @@ def patch_dag_run( dag_run = session.get(DagRun, dag_run.id) return DAGRunResponse.model_validate(dag_run, from_attributes=True) + + +@dag_run_router.get( + "/{dag_run_id}/upstreamAssetEvents", + responses=create_openapi_http_exception_doc( + [ + status.HTTP_404_NOT_FOUND, + ] + ), +) +def get_upstream_asset_events( + dag_id: str, dag_run_id: str, session: Annotated[Session, Depends(get_session)] +) -> AssetEventCollectionResponse: + """If dag run is asset-triggered, return the asset events that triggered it.""" + dag_run: DagRun | None = session.scalar( + select(DagRun).where( + DagRun.dag_id == dag_id, + DagRun.run_id == dag_run_id, + ) + ) + if dag_run is None: + raise HTTPException( + status.HTTP_404_NOT_FOUND, + f"The DagRun with dag_id: `{dag_id}` and run_id: `{dag_run_id}` was not found", + ) + events = dag_run.consumed_asset_events + return AssetEventCollectionResponse( + asset_events=[ + AssetEventResponse.model_validate(asset_event, from_attributes=True) for asset_event in events + ], + total_entries=len(events), + ) + + +@dag_run_router.post( + "/{dag_run_id}/clear", responses=create_openapi_http_exception_doc([status.HTTP_404_NOT_FOUND]) +) +def clear_dag_run( + dag_id: str, + dag_run_id: str, + body: DAGRunClearBody, + request: Request, + session: Annotated[Session, Depends(get_session)], +) -> TaskInstanceCollectionResponse | DAGRunResponse: + dag_run = session.scalar(select(DagRun).filter_by(dag_id=dag_id, run_id=dag_run_id)) + if dag_run is None: + raise HTTPException( + 404, f"The DagRun with dag_id: `{dag_id}` and run_id: `{dag_run_id}` was not found" + ) + + dag: DAG = request.app.state.dag_bag.get_dag(dag_id) + start_date = dag_run.logical_date + end_date = dag_run.logical_date + + if body.dry_run: + task_instances = dag.clear( + start_date=start_date, + end_date=end_date, + task_ids=None, + only_failed=False, + dry_run=True, + session=session, + ) + + return TaskInstanceCollectionResponse( + task_instances=[ + TaskInstanceResponse.model_validate(ti, from_attributes=True) for ti in task_instances + ], + total_entries=len(task_instances), + ) + else: + dag.clear( + start_date=dag_run.start_date, + end_date=dag_run.end_date, + task_ids=None, + only_failed=False, + session=session, + ) + dag_run_cleared = session.scalar(select(DagRun).where(DagRun.id == dag_run.id)) + return DAGRunResponse.model_validate(dag_run_cleared, from_attributes=True) diff --git a/airflow/api_fastapi/core_api/routes/public/event_logs.py b/airflow/api_fastapi/core_api/routes/public/event_logs.py index 4047bc5a2749..6405c177a390 100644 --- a/airflow/api_fastapi/core_api/routes/public/event_logs.py +++ b/airflow/api_fastapi/core_api/routes/public/event_logs.py @@ -79,7 +79,7 @@ def get_event_logs( "task_id", "run_id", "event", - "execution_date", # logical_date + "logical_date", "owner", "extra", ], diff --git a/airflow/api_fastapi/core_api/routes/public/task_instances.py b/airflow/api_fastapi/core_api/routes/public/task_instances.py index 56fa57d3ebeb..271f75e69e68 100644 --- a/airflow/api_fastapi/core_api/routes/public/task_instances.py +++ b/airflow/api_fastapi/core_api/routes/public/task_instances.py @@ -25,14 +25,24 @@ from airflow.api_fastapi.common.db.common import get_session, paginated_select from airflow.api_fastapi.common.parameters import ( + DagIdsFilter, + DagRunIdsFilter, + LimitFilter, + OffsetFilter, QueryLimit, QueryOffset, QueryTIExecutorFilter, QueryTIPoolFilter, QueryTIQueueFilter, QueryTIStateFilter, + Range, RangeFilter, SortParam, + TaskIdsFilter, + TIExecutorFilter, + TIPoolFilter, + TIQueueFilter, + TIStateFilter, datetime_range_filter_factory, float_range_filter_factory, ) @@ -41,6 +51,7 @@ TaskDependencyCollectionResponse, TaskInstanceCollectionResponse, TaskInstanceResponse, + TaskInstancesBatchBody, ) from airflow.api_fastapi.core_api.openapi.exceptions import create_openapi_http_exception_doc from airflow.exceptions import TaskNotFound @@ -95,9 +106,7 @@ def get_mapped_task_instances( dag_run_id: str, task_id: str, request: Request, - logical_date_range: Annotated[ - RangeFilter, Depends(datetime_range_filter_factory("logical_date", TI, "execution_date")) - ], + logical_date_range: Annotated[RangeFilter, Depends(datetime_range_filter_factory("logical_date", TI))], start_date_range: Annotated[RangeFilter, Depends(datetime_range_filter_factory("start_date", TI))], end_date_range: Annotated[RangeFilter, Depends(datetime_range_filter_factory("end_date", TI))], update_at_range: Annotated[RangeFilter, Depends(datetime_range_filter_factory("updated_at", TI))], @@ -160,7 +169,7 @@ def get_mapped_task_instances( session, ) - task_instances = session.scalars(task_instance_select).all() + task_instances = session.scalars(task_instance_select) return TaskInstanceCollectionResponse( task_instances=[ @@ -270,9 +279,7 @@ def get_task_instances( dag_id: str, dag_run_id: str, request: Request, - logical_date: Annotated[ - RangeFilter, Depends(datetime_range_filter_factory("logical_date", TI, "execution_date")) - ], + logical_date: Annotated[RangeFilter, Depends(datetime_range_filter_factory("logical_date", TI))], start_date_range: Annotated[RangeFilter, Depends(datetime_range_filter_factory("start_date", TI))], end_date_range: Annotated[RangeFilter, Depends(datetime_range_filter_factory("end_date", TI))], update_at_range: Annotated[RangeFilter, Depends(datetime_range_filter_factory("updated_at", TI))], @@ -326,7 +333,85 @@ def get_task_instances( session, ) - task_instances = session.scalars(task_instance_select).all() + task_instances = session.scalars(task_instance_select) + + return TaskInstanceCollectionResponse( + task_instances=[ + TaskInstanceResponse.model_validate(task_instance, from_attributes=True) + for task_instance in task_instances + ], + total_entries=total_entries, + ) + + +@task_instances_router.post( + "/list", + responses=create_openapi_http_exception_doc([status.HTTP_404_NOT_FOUND]), +) +def get_task_instances_batch( + body: TaskInstancesBatchBody, + session: Annotated[Session, Depends(get_session)], +) -> TaskInstanceCollectionResponse: + """Get list of task instances.""" + dag_ids = DagIdsFilter(TI, body.dag_ids) + dag_run_ids = DagRunIdsFilter(TI, body.dag_run_ids) + task_ids = TaskIdsFilter(TI, body.task_ids) + logical_date = RangeFilter( + Range(lower_bound=body.logical_date_gte, upper_bound=body.logical_date_lte), + attribute=TI.logical_date, + ) + start_date = RangeFilter( + Range(lower_bound=body.start_date_gte, upper_bound=body.start_date_lte), + attribute=TI.start_date, + ) + end_date = RangeFilter( + Range(lower_bound=body.end_date_gte, upper_bound=body.end_date_lte), + attribute=TI.end_date, + ) + duration = RangeFilter( + Range(lower_bound=body.duration_gte, upper_bound=body.duration_lte), + attribute=TI.duration, + ) + state = TIStateFilter(body.state) + pool = TIPoolFilter(body.pool) + queue = TIQueueFilter(body.queue) + executor = TIExecutorFilter(body.executor) + + offset = OffsetFilter(body.page_offset) + limit = LimitFilter(body.page_limit) + + order_by = SortParam( + ["id", "state", "duration", "start_date", "end_date", "map_index"], + TI, + ).set_value(body.order_by) + + base_query = select(TI).join(TI.dag_run) + task_instance_select, total_entries = paginated_select( + base_query, + [ + dag_ids, + dag_run_ids, + task_ids, + logical_date, + start_date, + end_date, + duration, + state, + pool, + queue, + executor, + ], + order_by, + offset, + limit, + session, + ) + + task_instance_select = task_instance_select.options( + joinedload(TI.rendered_task_instance_fields), joinedload(TI.task_instance_note) + ) + + task_instances = session.scalars(task_instance_select) return TaskInstanceCollectionResponse( task_instances=[ diff --git a/airflow/api_fastapi/core_api/routes/public/tasks.py b/airflow/api_fastapi/core_api/routes/public/tasks.py index 574d2fc7b782..a8a366cf6df0 100644 --- a/airflow/api_fastapi/core_api/routes/public/tasks.py +++ b/airflow/api_fastapi/core_api/routes/public/tasks.py @@ -17,26 +17,52 @@ from __future__ import annotations +from operator import attrgetter + from fastapi import HTTPException, Request, status from airflow.api_fastapi.common.router import AirflowRouter -from airflow.api_fastapi.common.types import get_class_ref -from airflow.api_fastapi.core_api.datamodels.tasks import TaskResponse +from airflow.api_fastapi.core_api.datamodels.tasks import TaskCollectionResponse, TaskResponse from airflow.api_fastapi.core_api.openapi.exceptions import create_openapi_http_exception_doc from airflow.exceptions import TaskNotFound from airflow.models import DAG -from airflow.models.mappedoperator import MappedOperator tasks_router = AirflowRouter(tags=["Task"], prefix="/dags/{dag_id}/tasks") +@tasks_router.get( + "/", + responses=create_openapi_http_exception_doc( + [ + status.HTTP_400_BAD_REQUEST, + status.HTTP_404_NOT_FOUND, + ] + ), +) +def get_tasks( + dag_id: str, + request: Request, + order_by: str = "task_id", +) -> TaskCollectionResponse: + """Get tasks for DAG.""" + dag: DAG = request.app.state.dag_bag.get_dag(dag_id) + if not dag: + raise HTTPException(status.HTTP_404_NOT_FOUND, f"Dag with id {dag_id} was not found") + try: + tasks = sorted(dag.tasks, key=attrgetter(order_by.lstrip("-")), reverse=(order_by[0:1] == "-")) + except AttributeError as err: + raise HTTPException(status.HTTP_400_BAD_REQUEST, str(err)) + return TaskCollectionResponse( + tasks=[TaskResponse.model_validate(task, from_attributes=True) for task in tasks], + total_entries=(len(tasks)), + ) + + @tasks_router.get( "/{task_id}", responses=create_openapi_http_exception_doc( [ status.HTTP_400_BAD_REQUEST, - status.HTTP_401_UNAUTHORIZED, - status.HTTP_403_FORBIDDEN, status.HTTP_404_NOT_FOUND, ] ), @@ -48,9 +74,6 @@ def get_task(dag_id: str, task_id, request: Request) -> TaskResponse: raise HTTPException(status.HTTP_404_NOT_FOUND, f"Dag with id {dag_id} was not found") try: task = dag.get_task(task_id=task_id) - task.__dict__.update( - {"class_ref": get_class_ref(task), "is_mapped": isinstance(task, MappedOperator)} - ) except TaskNotFound: raise HTTPException(status.HTTP_404_NOT_FOUND, f"Task with id {task_id} was not found") return TaskResponse.model_validate(task, from_attributes=True) diff --git a/airflow/api_fastapi/core_api/routes/ui/assets.py b/airflow/api_fastapi/core_api/routes/ui/assets.py index 1615f97c2bd2..ca2aae1bb6c0 100644 --- a/airflow/api_fastapi/core_api/routes/ui/assets.py +++ b/airflow/api_fastapi/core_api/routes/ui/assets.py @@ -71,8 +71,8 @@ def next_run_assets( and_( AssetEvent.asset_id == AssetModel.id, ( - AssetEvent.timestamp >= latest_run.execution_date - if latest_run and latest_run.execution_date + AssetEvent.timestamp >= latest_run.logical_date + if latest_run and latest_run.logical_date else True ), ), diff --git a/airflow/api_fastapi/core_api/routes/ui/dags.py b/airflow/api_fastapi/core_api/routes/ui/dags.py index da906bbbd3c9..fad736ced379 100644 --- a/airflow/api_fastapi/core_api/routes/ui/dags.py +++ b/airflow/api_fastapi/core_api/routes/ui/dags.py @@ -68,39 +68,39 @@ def recent_dag_runs( recent_runs_subquery = ( select( DagRun.dag_id, - DagRun.execution_date, + DagRun.logical_date, func.rank() .over( partition_by=DagRun.dag_id, - order_by=DagRun.execution_date.desc(), + order_by=DagRun.logical_date.desc(), ) .label("rank"), ) - .order_by(DagRun.execution_date.desc()) + .order_by(DagRun.logical_date.desc()) .subquery() ) dags_with_recent_dag_runs_select = ( select( DagRun, DagModel, - recent_runs_subquery.c.execution_date, + recent_runs_subquery.c.logical_date, ) .join(DagModel, DagModel.dag_id == recent_runs_subquery.c.dag_id) .join( DagRun, and_( DagRun.dag_id == DagModel.dag_id, - DagRun.execution_date == recent_runs_subquery.c.execution_date, + DagRun.logical_date == recent_runs_subquery.c.logical_date, ), ) .where(recent_runs_subquery.c.rank <= dag_runs_limit) .group_by( DagModel.dag_id, - recent_runs_subquery.c.execution_date, - DagRun.execution_date, + recent_runs_subquery.c.logical_date, + DagRun.logical_date, DagRun.id, ) - .order_by(recent_runs_subquery.c.execution_date.desc()) + .order_by(recent_runs_subquery.c.logical_date.desc()) ) dags_with_recent_dag_runs_select_filter, _ = paginated_select( dags_with_recent_dag_runs_select, diff --git a/airflow/api_fastapi/execution_api/datamodels.py b/airflow/api_fastapi/execution_api/datamodels.py index 32115c9ac5a4..ec8be531e103 100644 --- a/airflow/api_fastapi/execution_api/datamodels.py +++ b/airflow/api_fastapi/execution_api/datamodels.py @@ -29,7 +29,7 @@ ) from airflow.api_fastapi.common.types import UtcDateTime -from airflow.utils.state import State, TaskInstanceState as TIState +from airflow.utils.state import IntermediateTIState, TaskInstanceState as TIState, TerminalTIState class TIEnterRunningPayload(BaseModel): @@ -40,7 +40,7 @@ class TIEnterRunningPayload(BaseModel): state: Annotated[ Literal[TIState.RUNNING], # Specify a default in the schema, but not in code, so Pydantic marks it as required. - WithJsonSchema({"enum": [TIState.RUNNING], "default": TIState.RUNNING}), + WithJsonSchema({"type": "string", "enum": [TIState.RUNNING], "default": TIState.RUNNING}), ] hostname: str """Hostname where this task has started""" @@ -55,11 +55,7 @@ class TIEnterRunningPayload(BaseModel): class TITerminalStatePayload(BaseModel): """Schema for updating TaskInstance to a terminal state (e.g., SUCCESS or FAILED).""" - state: Annotated[ - Literal[TIState.SUCCESS, TIState.FAILED, TIState.SKIPPED], - Field(title="TerminalState"), - WithJsonSchema({"enum": list(State.ran_and_finished_states)}), - ] + state: TerminalTIState end_date: UtcDateTime """When the task completed executing""" @@ -68,18 +64,7 @@ class TITerminalStatePayload(BaseModel): class TITargetStatePayload(BaseModel): """Schema for updating TaskInstance to a target state, excluding terminal and running states.""" - state: Annotated[ - TIState, - # For the OpenAPI schema generation, - # make sure we do not include RUNNING as a valid state here - WithJsonSchema( - { - "enum": [ - state for state in TIState if state not in (State.ran_and_finished_states | {State.NONE}) - ] - } - ), - ] + state: IntermediateTIState def ti_state_discriminator(v: dict[str, str] | BaseModel) -> str: @@ -97,7 +82,7 @@ def ti_state_discriminator(v: dict[str, str] | BaseModel) -> str: state = getattr(v, "state", None) if state == TIState.RUNNING: return str(state) - elif state in State.ran_and_finished_states: + elif state in set(TerminalTIState): return "_terminal_" return "_other_" diff --git a/airflow/cli/cli_config.py b/airflow/cli/cli_config.py index e93d5e25c631..00dd68041cbd 100644 --- a/airflow/cli/cli_config.py +++ b/airflow/cli/cli_config.py @@ -144,17 +144,17 @@ def string_lower_type(val): # Shared ARG_DAG_ID = Arg(("dag_id",), help="The id of the dag") ARG_TASK_ID = Arg(("task_id",), help="The id of the task") -ARG_EXECUTION_DATE = Arg(("execution_date",), help="The execution date of the DAG", type=parsedate) -ARG_EXECUTION_DATE_OPTIONAL = Arg( - ("execution_date",), nargs="?", help="The execution date of the DAG (optional)", type=parsedate +ARG_LOGICAL_DATE = Arg(("logical_date",), help="The logical date of the DAG", type=parsedate) +ARG_LOGICAL_DATE_OPTIONAL = Arg( + ("logical_date",), nargs="?", help="The logical date of the DAG (optional)", type=parsedate ) -ARG_EXECUTION_DATE_OR_RUN_ID = Arg( - ("execution_date_or_run_id",), help="The execution_date of the DAG or run_id of the DAGRun" +ARG_LOGICAL_DATE_OR_RUN_ID = Arg( + ("logical_date_or_run_id",), help="The logical date of the DAG or run_id of the DAGRun" ) -ARG_EXECUTION_DATE_OR_RUN_ID_OPTIONAL = Arg( - ("execution_date_or_run_id",), +ARG_LOGICAL_DATE_OR_RUN_ID_OPTIONAL = Arg( + ("logical_date_or_run_id",), nargs="?", - help="The execution_date of the DAG or run_id of the DAGRun (optional)", + help="The logical date of the DAG or run_id of the DAGRun (optional)", ) ARG_TASK_REGEX = Arg(("-t", "--task-regex"), help="The regex to filter specific task_ids (optional)") ARG_SUBDIR = Arg( @@ -261,7 +261,7 @@ def string_lower_type(val): ("-n", "--num-executions"), default=1, type=positive_int(allow_zero=False), - help="The number of next execution datetimes to show", + help="The number of next logical date times to show", ) # misc @@ -410,7 +410,7 @@ def string_lower_type(val): # trigger_dag ARG_RUN_ID = Arg(("-r", "--run-id"), help="Helps to identify this run") ARG_CONF = Arg(("-c", "--conf"), help="JSON string that gets pickled into the DagRun's conf attribute") -ARG_EXEC_DATE = Arg(("-e", "--exec-date"), help="The execution date of the DAG", type=parsedate) +ARG_EXEC_DATE = Arg(("-e", "--exec-date"), help="The logical date of the DAG", type=parsedate) ARG_REPLACE_MICRO = Arg( ("--no-replace-microseconds",), help="whether microseconds should be zeroed", @@ -1089,13 +1089,13 @@ class GroupCommand(NamedTuple): name="state", help="Get the status of a dag run", func=lazy_load_command("airflow.cli.commands.dag_command.dag_state"), - args=(ARG_DAG_ID, ARG_EXECUTION_DATE, ARG_SUBDIR, ARG_VERBOSE), + args=(ARG_DAG_ID, ARG_LOGICAL_DATE, ARG_SUBDIR, ARG_VERBOSE), ), ActionCommand( name="next-execution", - help="Get the next execution datetimes of a DAG", + help="Get the next logical datetimes of a DAG", description=( - "Get the next execution datetimes of a DAG. It returns one execution unless the " + "Get the next logical datetimes of a DAG. It returns one execution unless the " "num-executions option is given" ), func=lazy_load_command("airflow.cli.commands.dag_command.dag_next_execution"), @@ -1208,7 +1208,7 @@ class GroupCommand(NamedTuple): name="test", help="Execute one single DagRun", description=( - "Execute one single DagRun for a given DAG and execution date.\n" + "Execute one single DagRun for a given DAG and logical date.\n" "\n" "The --imgcat-dagrun option only works in iTerm.\n" "\n" @@ -1221,15 +1221,15 @@ class GroupCommand(NamedTuple): "see: https://www.graphviz.org/doc/info/output.html\n" "\n" "If you want to create a PNG file then you should execute the following command:\n" - "airflow dags test --save-dagrun output.png\n" + "airflow dags test --save-dagrun output.png\n" "\n" "If you want to create a DOT file then you should execute the following command:\n" - "airflow dags test --save-dagrun output.dot\n" + "airflow dags test --save-dagrun output.dot\n" ), func=lazy_load_command("airflow.cli.commands.dag_command.dag_test"), args=( ARG_DAG_ID, - ARG_EXECUTION_DATE_OPTIONAL, + ARG_LOGICAL_DATE_OPTIONAL, ARG_CONF, ARG_SUBDIR, ARG_SHOW_DAGRUN, @@ -1289,7 +1289,7 @@ class GroupCommand(NamedTuple): args=( ARG_DAG_ID, ARG_TASK_ID, - ARG_EXECUTION_DATE_OR_RUN_ID, + ARG_LOGICAL_DATE_OR_RUN_ID, ARG_SUBDIR, ARG_VERBOSE, ARG_MAP_INDEX, @@ -1304,7 +1304,7 @@ class GroupCommand(NamedTuple): "and then run by an executor." ), func=lazy_load_command("airflow.cli.commands.task_command.task_failed_deps"), - args=(ARG_DAG_ID, ARG_TASK_ID, ARG_EXECUTION_DATE_OR_RUN_ID, ARG_SUBDIR, ARG_MAP_INDEX, ARG_VERBOSE), + args=(ARG_DAG_ID, ARG_TASK_ID, ARG_LOGICAL_DATE_OR_RUN_ID, ARG_SUBDIR, ARG_MAP_INDEX, ARG_VERBOSE), ), ActionCommand( name="render", @@ -1313,7 +1313,7 @@ class GroupCommand(NamedTuple): args=( ARG_DAG_ID, ARG_TASK_ID, - ARG_EXECUTION_DATE_OR_RUN_ID, + ARG_LOGICAL_DATE_OR_RUN_ID, ARG_SUBDIR, ARG_VERBOSE, ARG_MAP_INDEX, @@ -1326,7 +1326,7 @@ class GroupCommand(NamedTuple): args=( ARG_DAG_ID, ARG_TASK_ID, - ARG_EXECUTION_DATE_OR_RUN_ID, + ARG_LOGICAL_DATE_OR_RUN_ID, ARG_SUBDIR, ARG_MARK_SUCCESS, ARG_FORCE, @@ -1355,7 +1355,7 @@ class GroupCommand(NamedTuple): args=( ARG_DAG_ID, ARG_TASK_ID, - ARG_EXECUTION_DATE_OR_RUN_ID_OPTIONAL, + ARG_LOGICAL_DATE_OR_RUN_ID_OPTIONAL, ARG_SUBDIR, ARG_DRY_RUN, ARG_TASK_PARAMS, @@ -1369,7 +1369,7 @@ class GroupCommand(NamedTuple): name="states-for-dag-run", help="Get the status of all task instances in a dag run", func=lazy_load_command("airflow.cli.commands.task_command.task_states_for_dag_run"), - args=(ARG_DAG_ID, ARG_EXECUTION_DATE_OR_RUN_ID, ARG_OUTPUT, ARG_VERBOSE), + args=(ARG_DAG_ID, ARG_LOGICAL_DATE_OR_RUN_ID, ARG_OUTPUT, ARG_VERBOSE), ), ) POOLS_COMMANDS = ( @@ -1817,7 +1817,7 @@ class GroupCommand(NamedTuple): help="Generate YAML files for all tasks in DAG. Useful for debugging tasks without " "launching into a cluster", func=lazy_load_command("airflow.providers.cncf.kubernetes.cli.kubernetes_command.generate_pod_yaml"), - args=(ARG_DAG_ID, ARG_EXECUTION_DATE, ARG_SUBDIR, ARG_OUTPUT_PATH, ARG_VERBOSE), + args=(ARG_DAG_ID, ARG_LOGICAL_DATE, ARG_SUBDIR, ARG_OUTPUT_PATH, ARG_VERBOSE), ), ) diff --git a/airflow/cli/commands/dag_command.py b/airflow/cli/commands/dag_command.py index dfff75ee2d6c..0b4d3192d6ed 100644 --- a/airflow/cli/commands/dag_command.py +++ b/airflow/cli/commands/dag_command.py @@ -65,7 +65,7 @@ def dag_trigger(args) -> None: dag_id=args.dag_id, run_id=args.run_id, conf=args.conf, - execution_date=args.exec_date, + logical_date=args.exec_date, replace_microseconds=args.replace_microseconds, ) AirflowConsole().print_as( @@ -264,7 +264,7 @@ def dag_state(args, session: Session = NEW_SESSION) -> None: if not dag: raise SystemExit(f"DAG: {args.dag_id} does not exist in 'dag' table") - dr = session.scalar(select(DagRun).filter_by(dag_id=args.dag_id, execution_date=args.execution_date)) + dr = session.scalar(select(DagRun).filter_by(dag_id=args.dag_id, logical_date=args.logical_date)) out = dr.state if dr else None conf_out = "" if out and dr.conf: @@ -276,7 +276,7 @@ def dag_state(args, session: Session = NEW_SESSION) -> None: @providers_configuration_loaded def dag_next_execution(args) -> None: """ - Return the next execution datetime of a DAG at the command line. + Return the next logical datetime of a DAG at the command line. >>> airflow dags next-execution tutorial 2018-08-31 10:38:00 @@ -461,12 +461,12 @@ def dag_list_dag_runs(args, dag: DAG | None = None, session: Session = NEW_SESSI dag_id=args.dag_id, state=state, no_backfills=args.no_backfill, - execution_start_date=args.start_date, - execution_end_date=args.end_date, + logical_start_date=args.start_date, + logical_end_date=args.end_date, session=session, ) - dag_runs.sort(key=lambda x: x.execution_date, reverse=True) + dag_runs.sort(key=lambda x: x.logical_date, reverse=True) AirflowConsole().print_as( data=dag_runs, output=args.output, @@ -474,7 +474,7 @@ def dag_list_dag_runs(args, dag: DAG | None = None, session: Session = NEW_SESSI "dag_id": dr.dag_id, "run_id": dr.run_id, "state": dr.state, - "execution_date": dr.execution_date.isoformat(), + "logical_date": dr.logical_date.isoformat(), "start_date": dr.start_date.isoformat() if dr.start_date else "", "end_date": dr.end_date.isoformat() if dr.end_date else "", }, @@ -485,14 +485,14 @@ def dag_list_dag_runs(args, dag: DAG | None = None, session: Session = NEW_SESSI @providers_configuration_loaded @provide_session def dag_test(args, dag: DAG | None = None, session: Session = NEW_SESSION) -> None: - """Execute one single DagRun for a given DAG and execution date.""" + """Execute one single DagRun for a given DAG and logical date.""" run_conf = None if args.conf: try: run_conf = json.loads(args.conf) except ValueError as e: raise SystemExit(f"Configuration {args.conf!r} is not valid JSON. Error: {e}") - execution_date = args.execution_date or timezone.utcnow() + logical_date = args.logical_date or timezone.utcnow() use_executor = args.use_executor mark_success_pattern = ( @@ -502,7 +502,7 @@ def dag_test(args, dag: DAG | None = None, session: Session = NEW_SESSION) -> No with _airflow_parsing_context_manager(dag_id=args.dag_id): dag = dag or get_dag(subdir=args.subdir, dag_id=args.dag_id) dr: DagRun = dag.test( - execution_date=execution_date, + logical_date=logical_date, run_conf=run_conf, use_executor=use_executor, mark_success_pattern=mark_success_pattern, @@ -515,7 +515,7 @@ def dag_test(args, dag: DAG | None = None, session: Session = NEW_SESSION) -> No tis = session.scalars( select(TaskInstance).where( TaskInstance.dag_id == args.dag_id, - TaskInstance.execution_date == execution_date, + TaskInstance.logical_date == logical_date, ) ).all() diff --git a/airflow/cli/commands/kubernetes_command.py b/airflow/cli/commands/kubernetes_command.py index eab11133c9dd..8f5f7333b7f4 100644 --- a/airflow/cli/commands/kubernetes_command.py +++ b/airflow/cli/commands/kubernetes_command.py @@ -48,10 +48,10 @@ @providers_configuration_loaded def generate_pod_yaml(args): """Generate yaml files for each task in the DAG. Used for testing output of KubernetesExecutor.""" - execution_date = args.execution_date + logical_date = args.logical_date dag = get_dag(subdir=args.subdir, dag_id=args.dag_id) yaml_output_path = args.output_path - dr = DagRun(dag.dag_id, execution_date=execution_date) + dr = DagRun(dag.dag_id, logical_date=logical_date) kube_config = KubeConfig() for task in dag.tasks: ti = TaskInstance(task, None) @@ -62,7 +62,7 @@ def generate_pod_yaml(args): pod_id=create_unique_id(args.dag_id, ti.task_id), try_number=ti.try_number, kube_image=kube_config.kube_image, - date=ti.execution_date, + date=ti.logical_date, args=ti.command_as_list(), pod_override_object=PodGenerator.from_obj(ti.executor_config), scheduler_job_id="worker-config", @@ -71,7 +71,7 @@ def generate_pod_yaml(args): with_mutation_hook=True, ) api_client = ApiClient() - date_string = pod_generator.datetime_to_label_safe_datestring(execution_date) + date_string = pod_generator.datetime_to_label_safe_datestring(logical_date) yaml_file_name = f"{args.dag_id}_{ti.task_id}_{date_string}.yml" os.makedirs(os.path.dirname(yaml_output_path + "/airflow_yaml_output/"), exist_ok=True) with open(yaml_output_path + "/airflow_yaml_output/" + yaml_file_name, "w") as output: diff --git a/airflow/cli/commands/task_command.py b/airflow/cli/commands/task_command.py index e14c18399555..396186bf14d2 100644 --- a/airflow/cli/commands/task_command.py +++ b/airflow/cli/commands/task_command.py @@ -114,43 +114,43 @@ def _get_dag_run( """ if not exec_date_or_run_id and not create_if_necessary: raise ValueError("Must provide `exec_date_or_run_id` if not `create_if_necessary`.") - execution_date: pendulum.DateTime | None = None + logical_date: pendulum.DateTime | None = None if exec_date_or_run_id: dag_run = DAG.fetch_dagrun(dag_id=dag.dag_id, run_id=exec_date_or_run_id, session=session) if dag_run: return dag_run, False with suppress(ParserError, TypeError): - execution_date = timezone.parse(exec_date_or_run_id) - if execution_date: - dag_run = DAG.fetch_dagrun(dag_id=dag.dag_id, execution_date=execution_date, session=session) + logical_date = timezone.parse(exec_date_or_run_id) + if logical_date: + dag_run = DAG.fetch_dagrun(dag_id=dag.dag_id, logical_date=logical_date, session=session) if dag_run: return dag_run, False elif not create_if_necessary: raise DagRunNotFound( - f"DagRun for {dag.dag_id} with run_id or execution_date " + f"DagRun for {dag.dag_id} with run_id or logical_date " f"of {exec_date_or_run_id!r} not found" ) - if execution_date is not None: - dag_run_execution_date = execution_date + if logical_date is not None: + dag_run_logical_date = logical_date else: - dag_run_execution_date = pendulum.instance(timezone.utcnow()) + dag_run_logical_date = pendulum.instance(timezone.utcnow()) if create_if_necessary == "memory": dag_run = DagRun( dag_id=dag.dag_id, run_id=exec_date_or_run_id, - execution_date=dag_run_execution_date, - data_interval=dag.timetable.infer_manual_data_interval(run_after=dag_run_execution_date), + logical_date=dag_run_logical_date, + data_interval=dag.timetable.infer_manual_data_interval(run_after=dag_run_logical_date), triggered_by=DagRunTriggeredByType.CLI, ) return dag_run, True elif create_if_necessary == "db": dag_run = dag.create_dagrun( state=DagRunState.QUEUED, - execution_date=dag_run_execution_date, + logical_date=dag_run_logical_date, run_id=_generate_temporary_run_id(), - data_interval=dag.timetable.infer_manual_data_interval(run_after=dag_run_execution_date), + data_interval=dag.timetable.infer_manual_data_interval(run_after=dag_run_logical_date), session=session, triggered_by=DagRunTriggeredByType.CLI, ) @@ -165,7 +165,7 @@ def _get_ti_db_access( task: Operator, map_index: int, *, - exec_date_or_run_id: str | None = None, + logical_date_or_run_id: str | None = None, pool: str | None = None, create_if_necessary: CreateIfNecessary = False, session: Session = NEW_SESSION, @@ -177,7 +177,7 @@ def _get_ti_db_access( if task.task_id not in dag.task_dict: raise ValueError(f"Provided task {task.task_id} is not in dag '{dag.dag_id}.") - if not exec_date_or_run_id and not create_if_necessary: + if not logical_date_or_run_id and not create_if_necessary: raise ValueError("Must provide `exec_date_or_run_id` if not `create_if_necessary`.") if task.get_needs_expansion(): if map_index < 0: @@ -186,7 +186,7 @@ def _get_ti_db_access( raise RuntimeError("map_index passed to non-mapped task") dag_run, dr_created = _get_dag_run( dag=dag, - exec_date_or_run_id=exec_date_or_run_id, + exec_date_or_run_id=logical_date_or_run_id, create_if_necessary=create_if_necessary, session=session, ) @@ -197,7 +197,7 @@ def _get_ti_db_access( if not create_if_necessary: raise TaskInstanceNotFound( f"TaskInstance for {dag.dag_id}, {task.task_id}, map={map_index} with " - f"run_id or execution_date of {exec_date_or_run_id!r} not found" + f"run_id or logical_date of {logical_date_or_run_id!r} not found" ) # TODO: Validate map_index is in range? ti = TaskInstance(task, run_id=dag_run.run_id, map_index=map_index) @@ -214,7 +214,7 @@ def _get_ti( task: Operator, map_index: int, *, - exec_date_or_run_id: str | None = None, + logical_date_or_run_id: str | None = None, pool: str | None = None, create_if_necessary: CreateIfNecessary = False, ): @@ -226,7 +226,7 @@ def _get_ti( dag=dag, task=task, map_index=map_index, - exec_date_or_run_id=exec_date_or_run_id, + logical_date_or_run_id=logical_date_or_run_id, pool=pool, create_if_necessary=create_if_necessary, ) @@ -438,7 +438,7 @@ def task_run(args, dag: DAG | None = None) -> TaskReturnCode | None: else: _dag = dag task = _dag.get_task(task_id=args.task_id) - ti, _ = _get_ti(task, args.map_index, exec_date_or_run_id=args.execution_date_or_run_id, pool=args.pool) + ti, _ = _get_ti(task, args.map_index, logical_date_or_run_id=args.logical_date_or_run_id, pool=args.pool) ti.init_run_context(raw=args.raw) hostname = get_hostname() @@ -487,7 +487,7 @@ def task_failed_deps(args) -> None: """ dag = get_dag(args.subdir, args.dag_id) task = dag.get_task(task_id=args.task_id) - ti, _ = _get_ti(task, args.map_index, exec_date_or_run_id=args.execution_date_or_run_id) + ti, _ = _get_ti(task, args.map_index, logical_date_or_run_id=args.logical_date_or_run_id) # tasks_failed-deps is executed with access to the database. if isinstance(ti, TaskInstancePydantic): raise ValueError("not a TaskInstance") @@ -514,7 +514,7 @@ def task_state(args) -> None: """ dag = get_dag(args.subdir, args.dag_id) task = dag.get_task(task_id=args.task_id) - ti, _ = _get_ti(task, args.map_index, exec_date_or_run_id=args.execution_date_or_run_id) + ti, _ = _get_ti(task, args.map_index, logical_date_or_run_id=args.logical_date_or_run_id) # task_state is executed with access to the database. if isinstance(ti, TaskInstancePydantic): raise ValueError("not a TaskInstance") @@ -572,20 +572,20 @@ def _guess_debugger() -> _SupportedDebugger: def task_states_for_dag_run(args, session: Session = NEW_SESSION) -> None: """Get the status of all task instances in a DagRun.""" dag_run = session.scalar( - select(DagRun).where(DagRun.run_id == args.execution_date_or_run_id, DagRun.dag_id == args.dag_id) + select(DagRun).where(DagRun.run_id == args.logical_date_or_run_id, DagRun.dag_id == args.dag_id) ) if not dag_run: try: - execution_date = timezone.parse(args.execution_date_or_run_id) + logical_date = timezone.parse(args.logical_date_or_run_id) dag_run = session.scalar( - select(DagRun).where(DagRun.execution_date == execution_date, DagRun.dag_id == args.dag_id) + select(DagRun).where(DagRun.logical_date == logical_date, DagRun.dag_id == args.dag_id) ) except (ParserError, TypeError) as err: - raise AirflowException(f"Error parsing the supplied execution_date. Error: {err}") + raise AirflowException(f"Error parsing the supplied logical_date. Error: {err}") if dag_run is None: raise DagRunNotFound( - f"DagRun for {args.dag_id} with run_id or execution_date of {args.execution_date_or_run_id!r} " + f"DagRun for {args.dag_id} with run_id or logical_date of {args.logical_date_or_run_id!r} " "not found" ) @@ -594,7 +594,7 @@ def task_states_for_dag_run(args, session: Session = NEW_SESSION) -> None: def format_task_instance(ti: TaskInstance) -> dict[str, str]: data = { "dag_id": ti.dag_id, - "execution_date": dag_run.execution_date.isoformat(), + "logical_date": dag_run.logical_date.isoformat(), "task_id": ti.task_id, "state": ti.state, "start_date": ti.start_date.isoformat() if ti.start_date else "", @@ -643,7 +643,7 @@ def task_test(args, dag: DAG | None = None, session: Session = NEW_SESSION) -> N task.params.validate() ti, dr_created = _get_ti( - task, args.map_index, exec_date_or_run_id=args.execution_date_or_run_id, create_if_necessary="db" + task, args.map_index, logical_date_or_run_id=args.logical_date_or_run_id, create_if_necessary="db" ) # task_test is executed with access to the database. if isinstance(ti, TaskInstancePydantic): @@ -694,7 +694,7 @@ def task_render(args, dag: DAG | None = None) -> None: dag = get_dag(args.subdir, args.dag_id) task = dag.get_task(task_id=args.task_id) ti, _ = _get_ti( - task, args.map_index, exec_date_or_run_id=args.execution_date_or_run_id, create_if_necessary="memory" + task, args.map_index, logical_date_or_run_id=args.logical_date_or_run_id, create_if_necessary="memory" ) # task_render is executed with access to the database. if isinstance(ti, TaskInstancePydantic): diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index c77f9476b0d2..eba9f7b8c70e 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -1064,24 +1064,6 @@ metrics: example: "\"scheduler,executor,dagrun,pool,triggerer,celery\" or \"^scheduler,^executor,heartbeat|timeout\"" default: "" - # TODO: Remove 'timer_unit_consistency' in Airflow 3.0 - timer_unit_consistency: - description: | - Controls the consistency of timer units across all metrics loggers - (e.g., Statsd, Datadog, OpenTelemetry) - for timing and duration-based metrics. When enabled, all timers will publish - metrics in milliseconds for consistency and alignment with Airflow's default - metrics behavior in version 3.0+. - - .. warning:: - - It will be the default behavior from Airflow 3.0. If disabled, timers may publish - in seconds for backwards compatibility, though it is recommended to enable this - setting to ensure metric uniformity and forward-compat with Airflow 3. - version_added: 2.11.0 - type: string - example: ~ - default: "False" statsd_on: description: | Enables sending metrics to StatsD. diff --git a/airflow/configuration.py b/airflow/configuration.py index 82718325865f..cd218657133a 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -376,7 +376,7 @@ def inversed_deprecated_sections(self): }, "elasticsearch": { "log_id_template": ( - re2.compile("^" + re2.escape("{dag_id}-{task_id}-{execution_date}-{try_number}") + "$"), + re2.compile("^" + re2.escape("{dag_id}-{task_id}-{logical_date}-{try_number}") + "$"), "{dag_id}-{task_id}-{run_id}-{map_index}-{try_number}", "3.0", ) diff --git a/airflow/dag_processing/collection.py b/airflow/dag_processing/collection.py index f608900ee76e..0ca121c56185 100644 --- a/airflow/dag_processing/collection.py +++ b/airflow/dag_processing/collection.py @@ -89,7 +89,7 @@ def _get_latest_runs_stmt(dag_ids: Collection[str]) -> Select: if len(dag_ids) == 1: # Index optimized fast path to avoid more complicated & slower groupby queryplan. (dag_id,) = dag_ids last_automated_runs_subq = ( - select(func.max(DagRun.execution_date).label("max_execution_date")) + select(func.max(DagRun.logical_date).label("max_execution_date")) .where( DagRun.dag_id == dag_id, DagRun.run_type.in_((DagRunType.BACKFILL_JOB, DagRunType.SCHEDULED)), @@ -98,11 +98,11 @@ def _get_latest_runs_stmt(dag_ids: Collection[str]) -> Select: ) query = select(DagRun).where( DagRun.dag_id == dag_id, - DagRun.execution_date == last_automated_runs_subq, + DagRun.logical_date == last_automated_runs_subq, ) else: last_automated_runs_subq = ( - select(DagRun.dag_id, func.max(DagRun.execution_date).label("max_execution_date")) + select(DagRun.dag_id, func.max(DagRun.logical_date).label("max_execution_date")) .where( DagRun.dag_id.in_(dag_ids), DagRun.run_type.in_((DagRunType.BACKFILL_JOB, DagRunType.SCHEDULED)), @@ -112,12 +112,12 @@ def _get_latest_runs_stmt(dag_ids: Collection[str]) -> Select: ) query = select(DagRun).where( DagRun.dag_id == last_automated_runs_subq.c.dag_id, - DagRun.execution_date == last_automated_runs_subq.c.max_execution_date, + DagRun.logical_date == last_automated_runs_subq.c.max_execution_date, ) return query.options( load_only( DagRun.dag_id, - DagRun.execution_date, + DagRun.logical_date, DagRun.data_interval_start, DagRun.data_interval_end, ) diff --git a/airflow/dag_processing/processor.py b/airflow/dag_processing/processor.py index 394e09245127..219c7aa9776a 100644 --- a/airflow/dag_processing/processor.py +++ b/airflow/dag_processing/processor.py @@ -608,7 +608,7 @@ def _execute_dag_callbacks(cls, dagbag: DagBag, request: DagCallbackRequest, ses dag = dagbag.dags[request.dag_id] callbacks, context = DAG.fetch_callback( dag=dag, - dag_run_id=request.run_id, + run_id=request.run_id, success=not request.is_failure_callback, reason=request.msg, session=session, diff --git a/airflow/example_dags/example_branch_python_dop_operator_3.py b/airflow/example_dags/example_branch_python_dop_operator_3.py index a7031961ab55..4bb940ca36dd 100644 --- a/airflow/example_dags/example_branch_python_dop_operator_3.py +++ b/airflow/example_dags/example_branch_python_dop_operator_3.py @@ -32,15 +32,13 @@ @task.branch() def should_run(**kwargs) -> str: """ - Determine which empty_task should be run based on if the execution date minute is even or odd. + Determine which empty_task should be run based on if the logical date minute is even or odd. :param dict kwargs: Context :return: Id of the task to run """ - print( - f"------------- exec dttm = {kwargs['execution_date']} and minute = {kwargs['execution_date'].minute}" - ) - if kwargs["execution_date"].minute % 2 == 0: + print(f"------------- exec dttm = {kwargs['logical_date']} and minute = {kwargs['logical_date'].minute}") + if kwargs["logical_date"].minute % 2 == 0: return "empty_task_1" else: return "empty_task_2" diff --git a/airflow/example_dags/tutorial_objectstorage.py b/airflow/example_dags/tutorial_objectstorage.py index 4660aa3c8e8c..d03ec6ec8003 100644 --- a/airflow/example_dags/tutorial_objectstorage.py +++ b/airflow/example_dags/tutorial_objectstorage.py @@ -72,7 +72,7 @@ def get_air_quality_data(**kwargs) -> ObjectStoragePath: """ import pandas as pd - execution_date = kwargs["logical_date"] + logical_date = kwargs["logical_date"] start_time = kwargs["data_interval_start"] params = { @@ -83,7 +83,7 @@ def get_air_quality_data(**kwargs) -> ObjectStoragePath: "area": "Uusimaa", "param": ",".join(aq_fields.keys()), "starttime": start_time.isoformat(timespec="seconds"), - "endtime": execution_date.isoformat(timespec="seconds"), + "endtime": logical_date.isoformat(timespec="seconds"), "tz": "UTC", } @@ -93,7 +93,7 @@ def get_air_quality_data(**kwargs) -> ObjectStoragePath: # ensure the bucket exists base.mkdir(exist_ok=True) - formatted_date = execution_date.format("YYYYMMDD") + formatted_date = logical_date.format("YYYYMMDD") path = base / f"air_quality_{formatted_date}.parquet" df = pd.DataFrame(response.json()).astype(aq_fields) diff --git a/airflow/exceptions.py b/airflow/exceptions.py index 316fe880b66b..3b07b9a6fda9 100644 --- a/airflow/exceptions.py +++ b/airflow/exceptions.py @@ -230,12 +230,12 @@ class DagRunNotFound(AirflowNotFoundException): class DagRunAlreadyExists(AirflowBadRequest): """Raise when creating a DAG run for DAG which already has DAG run entry.""" - def __init__(self, dag_run: DagRun, execution_date: datetime.datetime, run_id: str) -> None: + def __init__(self, dag_run: DagRun, logical_date: datetime.datetime, run_id: str) -> None: super().__init__( - f"A DAG Run already exists for DAG {dag_run.dag_id} at {execution_date} with run id {run_id}" + f"A DAG Run already exists for DAG {dag_run.dag_id} at {logical_date} with run id {run_id}" ) self.dag_run = dag_run - self.execution_date = execution_date + self.logical_date = logical_date self.run_id = run_id def serialize(self): @@ -249,13 +249,13 @@ def serialize(self): run_id=self.dag_run.run_id, external_trigger=self.dag_run.external_trigger, run_type=self.dag_run.run_type, - execution_date=self.dag_run.execution_date, + logical_date=self.dag_run.logical_date, ) dag_run.id = self.dag_run.id return ( f"{cls.__module__}.{cls.__name__}", (), - {"dag_run": dag_run, "execution_date": self.execution_date, "run_id": self.run_id}, + {"dag_run": dag_run, "logical_date": self.logical_date, "run_id": self.run_id}, ) diff --git a/airflow/jobs/scheduler_job_runner.py b/airflow/jobs/scheduler_job_runner.py index 5369f425c9c4..6c7887c643cb 100644 --- a/airflow/jobs/scheduler_job_runner.py +++ b/airflow/jobs/scheduler_job_runner.py @@ -340,7 +340,7 @@ def _executable_task_instances_to_queued(self, max_tis: int, session: Session) - .where(not_(DM.is_paused)) .where(TI.state == TaskInstanceState.SCHEDULED) .options(selectinload(TI.dag_model)) - .order_by(-TI.priority_weight, DR.execution_date, TI.map_index) + .order_by(-TI.priority_weight, DR.logical_date, TI.map_index) ) if starved_pools: @@ -735,7 +735,7 @@ def process_executor_events( # Report execution for ti_key, (state, _) in event_buffer.items(): - # We create map (dag_id, task_id, execution_date) -> in-memory try_number + # We create map (dag_id, task_id, logical_date) -> in-memory try_number ti_primary_key_to_try_number_map[ti_key.primary] = ti_key.try_number cls.logger().info("Received executor event with state %s for task instance %s", state, ti_key) @@ -871,7 +871,7 @@ def _set_span_attrs__process_executor_events(cls, span, state, ti): "end_date": str(ti.end_date), "duration": ti.duration, "executor_config": str(ti.executor_config), - "execution_date": str(ti.execution_date), + "logical_date": str(ti.logical_date), "hostname": ti.hostname, "log_url": ti.log_url, "operator": str(ti.operator), @@ -1288,15 +1288,15 @@ def _mark_backfills_complete(self, session: Session = NEW_SESSION) -> None: @add_span def _create_dag_runs(self, dag_models: Collection[DagModel], session: Session) -> None: """Create a DAG run and update the dag_model to control if/when the next DAGRun should be created.""" - # Bulk Fetch DagRuns with dag_id and execution_date same + # Bulk Fetch DagRuns with dag_id and logical_date same # as DagModel.dag_id and DagModel.next_dagrun # This list is used to verify if the DagRun already exist so that we don't attempt to create # duplicate dag runs existing_dagruns = ( session.execute( - select(DagRun.dag_id, DagRun.execution_date).where( + select(DagRun.dag_id, DagRun.logical_date).where( tuple_in_condition( - (DagRun.dag_id, DagRun.execution_date), + (DagRun.dag_id, DagRun.logical_date), ((dm.dag_id, dm.next_dagrun) for dm in dag_models), ), ) @@ -1337,7 +1337,7 @@ def _create_dag_runs(self, dag_models: Collection[DagModel], session: Session) - try: dag.create_dagrun( run_type=DagRunType.SCHEDULED, - execution_date=dag_model.next_dagrun, + logical_date=dag_model.next_dagrun, state=DagRunState.QUEUED, data_interval=data_interval, external_trigger=False, @@ -1372,18 +1372,18 @@ def _create_dag_runs_asset_triggered( session: Session, ) -> None: """For DAGs that are triggered by assets, create dag runs.""" - # Bulk Fetch DagRuns with dag_id and execution_date same + # Bulk Fetch DagRuns with dag_id and logical_date same # as DagModel.dag_id and DagModel.next_dagrun # This list is used to verify if the DagRun already exist so that we don't attempt to create # duplicate dag runs - exec_dates = { + logical_dates = { dag_id: timezone.coerce_datetime(last_time) for dag_id, (_, last_time) in asset_triggered_dag_info.items() } existing_dagruns: set[tuple[str, timezone.DateTime]] = set( session.execute( - select(DagRun.dag_id, DagRun.execution_date).where( - tuple_in_condition((DagRun.dag_id, DagRun.execution_date), exec_dates.items()) + select(DagRun.dag_id, DagRun.logical_date).where( + tuple_in_condition((DagRun.dag_id, DagRun.logical_date), logical_dates.items()) ) ) ) @@ -1411,24 +1411,24 @@ def _create_dag_runs_asset_triggered( # we need to set dag.next_dagrun_info if the Dag Run already exists or if we # create a new one. This is so that in the next Scheduling loop we try to create new runs # instead of falling in a loop of Integrity Error. - exec_date = exec_dates[dag.dag_id] - if (dag.dag_id, exec_date) not in existing_dagruns: + logical_date = logical_dates[dag.dag_id] + if (dag.dag_id, logical_date) not in existing_dagruns: previous_dag_run = session.scalar( select(DagRun) .where( DagRun.dag_id == dag.dag_id, - DagRun.execution_date < exec_date, + DagRun.logical_date < logical_date, DagRun.run_type == DagRunType.ASSET_TRIGGERED, ) - .order_by(DagRun.execution_date.desc()) + .order_by(DagRun.logical_date.desc()) .limit(1) ) asset_event_filters = [ DagScheduleAssetReference.dag_id == dag.dag_id, - AssetEvent.timestamp <= exec_date, + AssetEvent.timestamp <= logical_date, ] if previous_dag_run: - asset_event_filters.append(AssetEvent.timestamp > previous_dag_run.execution_date) + asset_event_filters.append(AssetEvent.timestamp > previous_dag_run.logical_date) asset_events = session.scalars( select(AssetEvent) @@ -1439,10 +1439,10 @@ def _create_dag_runs_asset_triggered( .where(*asset_event_filters) ).all() - data_interval = dag.timetable.data_interval_for_events(exec_date, asset_events) + data_interval = dag.timetable.data_interval_for_events(logical_date, asset_events) run_id = dag.timetable.generate_run_id( run_type=DagRunType.ASSET_TRIGGERED, - logical_date=exec_date, + logical_date=logical_date, data_interval=data_interval, session=session, events=asset_events, @@ -1451,7 +1451,7 @@ def _create_dag_runs_asset_triggered( dag_run = dag.create_dagrun( run_id=run_id, run_type=DagRunType.ASSET_TRIGGERED, - execution_date=exec_date, + logical_date=logical_date, data_interval=data_interval, state=DagRunState.QUEUED, external_trigger=False, @@ -1706,8 +1706,8 @@ def _schedule_dag_run( ) return callback_to_execute - if dag_run.execution_date > timezone.utcnow() and not dag.allow_future_exec_dates: - self.log.error("Execution date is in future: %s", dag_run.execution_date) + if dag_run.logical_date > timezone.utcnow() and not dag.allow_future_exec_dates: + self.log.error("Logical date is in future: %s", dag_run.logical_date) return callback if not self._verify_integrity_if_dag_changed(dag_run=dag_run, session=session): @@ -1721,7 +1721,7 @@ def _schedule_dag_run( if self._should_update_dag_next_dagruns(dag, dag_model, last_dag_run=dag_run, session=session): dag_model.calculate_dagrun_date_fields(dag, dag.get_run_data_interval(dag_run)) # This will do one query per dag run. We "could" build up a complex - # query to update all the TIs across all the execution dates and dag + # query to update all the TIs across all the logical dates and dag # IDs in a single query, but it turns out that can be _very very slow_ # see #11147/commit ee90807ac for more details if span.is_recording(): diff --git a/airflow/metrics/datadog_logger.py b/airflow/metrics/datadog_logger.py index 81926716eb25..a166c6fcb169 100644 --- a/airflow/metrics/datadog_logger.py +++ b/airflow/metrics/datadog_logger.py @@ -19,11 +19,9 @@ import datetime import logging -import warnings from typing import TYPE_CHECKING from airflow.configuration import conf -from airflow.exceptions import RemovedInAirflow3Warning from airflow.metrics.protocols import Timer from airflow.metrics.validators import ( PatternAllowListValidator, @@ -42,14 +40,6 @@ log = logging.getLogger(__name__) -timer_unit_consistency = conf.getboolean("metrics", "timer_unit_consistency") -if not timer_unit_consistency: - warnings.warn( - "Timer and timing metrics publish in seconds were deprecated. It is enabled by default from Airflow 3 onwards. Enable timer_unit_consistency to publish all the timer and timing metrics in milliseconds.", - RemovedInAirflow3Warning, - stacklevel=2, - ) - class SafeDogStatsdLogger: """DogStatsd Logger.""" @@ -144,10 +134,7 @@ def timing( tags_list = [] if self.metrics_validator.test(stat): if isinstance(dt, datetime.timedelta): - if timer_unit_consistency: - dt = dt.total_seconds() * 1000.0 - else: - dt = dt.total_seconds() + dt = dt.total_seconds() * 1000.0 return self.dogstatsd.timing(metric=stat, value=dt, tags=tags_list) return None diff --git a/airflow/metrics/otel_logger.py b/airflow/metrics/otel_logger.py index ed123608626f..c3633212cd27 100644 --- a/airflow/metrics/otel_logger.py +++ b/airflow/metrics/otel_logger.py @@ -31,7 +31,6 @@ from opentelemetry.sdk.resources import HOST_NAME, SERVICE_NAME, Resource from airflow.configuration import conf -from airflow.exceptions import RemovedInAirflow3Warning from airflow.metrics.protocols import Timer from airflow.metrics.validators import ( OTEL_NAME_MAX_LENGTH, @@ -73,14 +72,6 @@ # Delimiter is placed between the universal metric prefix and the unique metric name. DEFAULT_METRIC_NAME_DELIMITER = "." -timer_unit_consistency = conf.getboolean("metrics", "timer_unit_consistency") -if not timer_unit_consistency: - warnings.warn( - "Timer and timing metrics publish in seconds were deprecated. It is enabled by default from Airflow 3 onwards. Enable timer_unit_consistency to publish all the timer and timing metrics in milliseconds.", - RemovedInAirflow3Warning, - stacklevel=2, - ) - def full_name(name: str, *, prefix: str = DEFAULT_METRIC_NAME_PREFIX) -> str: """Assembles the prefix, delimiter, and name and returns it as a string.""" @@ -284,10 +275,7 @@ def timing( """OTel does not have a native timer, stored as a Gauge whose value is number of seconds elapsed.""" if self.metrics_validator.test(stat) and name_is_otel_safe(self.prefix, stat): if isinstance(dt, datetime.timedelta): - if timer_unit_consistency: - dt = dt.total_seconds() * 1000.0 - else: - dt = dt.total_seconds() + dt = dt.total_seconds() * 1000.0 self.metrics_map.set_gauge_value(full_name(prefix=self.prefix, name=stat), float(dt), False, tags) def timer( diff --git a/airflow/metrics/protocols.py b/airflow/metrics/protocols.py index 0d12704e87a3..8cfe4d8e7ea3 100644 --- a/airflow/metrics/protocols.py +++ b/airflow/metrics/protocols.py @@ -19,23 +19,12 @@ import datetime import time -import warnings from typing import Union -from airflow.configuration import conf -from airflow.exceptions import RemovedInAirflow3Warning from airflow.typing_compat import Protocol DeltaType = Union[int, float, datetime.timedelta] -timer_unit_consistency = conf.getboolean("metrics", "timer_unit_consistency") -if not timer_unit_consistency: - warnings.warn( - "Timer and timing metrics publish in seconds were deprecated. It is enabled by default from Airflow 3 onwards. Enable timer_unit_consistency to publish all the timer and timing metrics in milliseconds.", - RemovedInAirflow3Warning, - stacklevel=2, - ) - class TimerProtocol(Protocol): """Type protocol for StatsLogger.timer.""" @@ -127,9 +116,6 @@ def start(self) -> Timer: def stop(self, send: bool = True) -> None: """Stop the timer, and optionally send it to stats backend.""" if self._start_time is not None: - if timer_unit_consistency: - self.duration = 1000.0 * (time.perf_counter() - self._start_time) # Convert to milliseconds. - else: - self.duration = time.perf_counter() - self._start_time + self.duration = 1000.0 * (time.perf_counter() - self._start_time) # Convert to milliseconds. if send and self.real_timer: self.real_timer.stop() diff --git a/airflow/models/abstractoperator.py b/airflow/models/abstractoperator.py index feafb0b6b637..58c2aec6fdeb 100644 --- a/airflow/models/abstractoperator.py +++ b/airflow/models/abstractoperator.py @@ -38,7 +38,7 @@ from airflow.utils.state import State, TaskInstanceState from airflow.utils.task_group import MappedTaskGroup from airflow.utils.trigger_rule import TriggerRule -from airflow.utils.weight_rule import WeightRule +from airflow.utils.weight_rule import WeightRule, db_safe_priority if TYPE_CHECKING: from collections.abc import Mapping @@ -335,7 +335,7 @@ def priority_weight_total(self) -> int: ) if isinstance(self.weight_rule, _AbsolutePriorityWeightStrategy): - return self.priority_weight + return db_safe_priority(self.priority_weight) elif isinstance(self.weight_rule, _DownstreamPriorityWeightStrategy): upstream = False elif isinstance(self.weight_rule, _UpstreamPriorityWeightStrategy): @@ -344,10 +344,13 @@ def priority_weight_total(self) -> int: upstream = False dag = self.get_dag() if dag is None: - return self.priority_weight - return self.priority_weight + sum( - dag.task_dict[task_id].priority_weight - for task_id in self.get_flat_relative_ids(upstream=upstream) + return db_safe_priority(self.priority_weight) + return db_safe_priority( + self.priority_weight + + sum( + dag.task_dict[task_id].priority_weight + for task_id in self.get_flat_relative_ids(upstream=upstream) + ) ) @cached_property diff --git a/airflow/models/backfill.py b/airflow/models/backfill.py index 11d677542fc0..0e88fa15bb04 100644 --- a/airflow/models/backfill.py +++ b/airflow/models/backfill.py @@ -171,7 +171,7 @@ def _create_backfill_dag_run( dr = session.scalar( with_row_locks( select(DagRun) - .where(DagRun.execution_date == info.logical_date) + .where(DagRun.logical_date == info.logical_date) .order_by(nulls_first(desc(DagRun.start_date), session=session)) .limit(1), session=session, @@ -204,7 +204,7 @@ def _create_backfill_dag_run( dag_version = DagVersion.get_latest_version(dag.dag_id, session=session) dr = dag.create_dagrun( triggered_by=DagRunTriggeredByType.BACKFILL, - execution_date=info.logical_date, + logical_date=info.logical_date, data_interval=info.data_interval, start_date=timezone.utcnow(), state=DagRunState.QUEUED, diff --git a/airflow/models/baseoperator.py b/airflow/models/baseoperator.py index c1448ef9cc55..520060b8b8f7 100644 --- a/airflow/models/baseoperator.py +++ b/airflow/models/baseoperator.py @@ -438,16 +438,16 @@ class derived from this one results in the creation of a task object, :param max_retry_delay: maximum delay interval between retries, can be set as ``timedelta`` or ``float`` seconds, which will be converted into ``timedelta``. :param start_date: The ``start_date`` for the task, determines - the ``execution_date`` for the first task instance. The best practice + the ``logical_date`` for the first task instance. The best practice is to have the start_date rounded to your DAG's schedule. Daily jobs have their start_date some day at 00:00:00, hourly jobs have their start_date at 00:00 of a specific hour. Note that Airflow simply looks at the latest - ``execution_date`` and adds the schedule to determine - the next ``execution_date``. It is also very important + ``logical_date`` and adds the schedule to determine + the next ``logical_date``. It is also very important to note that different tasks' dependencies need to line up in time. If task A depends on task B and their - start_date are offset in a way that their execution_date don't line + start_date are offset in a way that their logical_date don't line up, A's dependencies will never be met. If you are looking to delay a task, for example running a daily task at 2AM, look into the ``TimeSensor`` and ``TimeDeltaSensor``. We advise against using @@ -473,6 +473,8 @@ class derived from this one results in the creation of a task object, This allows the executor to trigger higher priority tasks before others when things get backed up. Set priority_weight as a higher number for more important tasks. + As not all database engines support 64-bit integers, values are capped with 32-bit. + Valid range is from -2,147,483,648 to 2,147,483,647. :param weight_rule: weighting method used for the effective total priority weight of the task. Options are: ``{ downstream | upstream | absolute }`` default is ``downstream`` @@ -494,7 +496,8 @@ class derived from this one results in the creation of a task object, Additionally, when set to ``absolute``, there is bonus effect of significantly speeding up the task creation process as for very large DAGs. Options can be set as string or using the constants defined in - the static class ``airflow.utils.WeightRule`` + the static class ``airflow.utils.WeightRule``. + Irrespective of the weight rule, resulting priority values are capped with 32-bit. |experimental| Since 2.9.0, Airflow allows to define custom priority weight strategy, by creating a subclass of @@ -549,7 +552,7 @@ class derived from this one results in the creation of a task object, Resources constructor) to their values. :param run_as_user: unix username to impersonate while running the task :param max_active_tis_per_dag: When set, a task will be able to limit the concurrent - runs across execution_dates. + runs across logical_dates. :param max_active_tis_per_dagrun: When set, a task will be able to limit the concurrent task instances per DAG run. :param executor: Which executor to target when running this task. NOT YET SUPPORTED @@ -769,9 +772,9 @@ def clear( qry = select(TaskInstance).where(TaskInstance.dag_id == self.dag_id) if start_date: - qry = qry.where(TaskInstance.execution_date >= start_date) + qry = qry.where(TaskInstance.logical_date >= start_date) if end_date: - qry = qry.where(TaskInstance.execution_date <= end_date) + qry = qry.where(TaskInstance.logical_date <= end_date) tasks = [self.task_id] @@ -811,10 +814,10 @@ def get_task_instances( .where(TaskInstance.task_id == self.task_id) ) if start_date: - query = query.where(DagRun.execution_date >= start_date) + query = query.where(DagRun.logical_date >= start_date) if end_date: - query = query.where(DagRun.execution_date <= end_date) - return session.scalars(query.order_by(DagRun.execution_date)).all() + query = query.where(DagRun.logical_date <= end_date) + return session.scalars(query.order_by(DagRun.logical_date)).all() @provide_session def run( @@ -850,7 +853,7 @@ def run( dag_run = session.scalars( select(DagRun).where( DagRun.dag_id == self.dag_id, - DagRun.execution_date == info.logical_date, + DagRun.logical_date == info.logical_date, ) ).one() ti = TaskInstance(self, run_id=dag_run.run_id) @@ -860,7 +863,7 @@ def run( dag_id=self.dag_id, run_id=DagRun.generate_run_id(DagRunType.MANUAL, info.logical_date), run_type=DagRunType.MANUAL, - execution_date=info.logical_date, + logical_date=info.logical_date, data_interval=info.data_interval, triggered_by=DagRunTriggeredByType.TEST, ) @@ -950,7 +953,7 @@ def xcom_pull( :param dag_id: If provided, only pulls XComs from this DAG. If None (default), the DAG of the calling task is used. :param include_prior_dates: If False, only XComs from the current - execution_date are returned. If True, XComs from previous dates + logical_date are returned. If True, XComs from previous dates are returned as well. """ return context["ti"].xcom_pull( diff --git a/airflow/models/dag.py b/airflow/models/dag.py index e48ec0a9a9c5..a4d1a2b0eda6 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -194,7 +194,7 @@ def get_last_dagrun(dag_id, session, include_externally_triggered=False): query = select(DR).where(DR.dag_id == dag_id) if not include_externally_triggered: query = query.where(DR.external_trigger == expression.false()) - query = query.order_by(DR.execution_date.desc()) + query = query.order_by(DR.logical_date.desc()) return session.scalar(query.limit(1)) @@ -267,7 +267,7 @@ def _create_orm_dagrun( run = DagRun( dag_id=dag_id, run_id=run_id, - execution_date=logical_date, + logical_date=logical_date, start_date=start_date, external_trigger=external_trigger, conf=conf, @@ -538,7 +538,7 @@ def get_run_data_interval(self, run: DagRun | DagRunPydantic) -> DataInterval: return data_interval # Compatibility: runs created before AIP-39 implementation don't have an # explicit data interval. Try to infer from the logical date. - return self.infer_automated_data_interval(run.execution_date) + return self.infer_automated_data_interval(run.logical_date) def infer_automated_data_interval(self, logical_date: datetime) -> DataInterval: """ @@ -581,13 +581,13 @@ def next_dagrun_info( Get information about the next DagRun of this dag after ``date_last_automated_dagrun``. This calculates what time interval the next DagRun should operate on - (its execution date) and when it can be scheduled, according to the + (its logical date) and when it can be scheduled, according to the dag's timetable, start_date, end_date, etc. This doesn't check max active run or any other "max_active_tasks" type limits, but only performs calculations based on the various date and interval fields of this dag and its tasks. - :param last_automated_dagrun: The ``max(execution_date)`` of + :param last_automated_dagrun: The ``max(logical_date)`` of existing "automated" DagRuns for this dag (scheduled or backfill, but not manual). :param restricted: If set to *False* (default is *True*), ignore @@ -784,7 +784,7 @@ def get_serialized_fields(cls): @provide_session def fetch_callback( dag: DAG, - dag_run_id: str, + run_id: str, success: bool = True, reason: str | None = None, *, @@ -797,14 +797,14 @@ def fetch_callback( the list of callbacks. :param dag: DAG object - :param dag_run_id: The DAG run ID + :param run_id: The DAG run ID :param success: Flag to specify if failure or success callback should be called :param reason: Completion reason :param session: Database session """ callbacks = dag.on_success_callback if success else dag.on_failure_callback if callbacks: - dagrun = DAG.fetch_dagrun(dag_id=dag.dag_id, run_id=dag_run_id, session=session) + dagrun = DAG.fetch_dagrun(dag_id=dag.dag_id, run_id=run_id, session=session) callbacks = callbacks if isinstance(callbacks, list) else [callbacks] tis = dagrun.get_task_instances(session=session) # tis from a dagrun may not be a part of dag.partial_subset, @@ -840,7 +840,7 @@ def handle_callback(self, dagrun: DagRun, success=True, reason=None, session=NEW :param session: Database session """ callbacks, context = DAG.fetch_callback( - dag=self, dag_run_id=dagrun.run_id, success=success, reason=reason, session=session + dag=self, run_id=dagrun.run_id, success=success, reason=reason, session=session ) or (None, None) DAG.execute_callback(callbacks, context, self.dag_id) @@ -865,15 +865,15 @@ def execute_callback(cls, callbacks: list[Callable] | None, context: Context | N def get_active_runs(self): """ - Return a list of dag run execution dates currently running. + Return a list of dag run logical dates currently running. - :return: List of execution dates + :return: List of logical dates """ runs = DagRun.find(dag_id=self.dag_id, state=DagRunState.RUNNING) active_dates = [] for run in runs: - active_dates.append(run.execution_date) + active_dates.append(run.logical_date) return active_dates @@ -882,24 +882,24 @@ def get_active_runs(self): @provide_session def fetch_dagrun( dag_id: str, - execution_date: datetime | None = None, + logical_date: datetime | None = None, run_id: str | None = None, session: Session = NEW_SESSION, ) -> DagRun | DagRunPydantic: """ - Return the dag run for a given execution date or run_id if it exists, otherwise none. + Return the dag run for a given logical date or run_id if it exists, otherwise none. :param dag_id: The dag_id of the DAG to find. - :param execution_date: The execution date of the DagRun to find. + :param logical_date: The logical date of the DagRun to find. :param run_id: The run_id of the DagRun to find. :param session: :return: The DagRun if found, otherwise None. """ - if not (execution_date or run_id): - raise TypeError("You must provide either the execution_date or the run_id") + if not (logical_date or run_id): + raise TypeError("You must provide either the logical_date or the run_id") query = select(DagRun) - if execution_date: - query = query.where(DagRun.dag_id == dag_id, DagRun.execution_date == execution_date) + if logical_date: + query = query.where(DagRun.dag_id == dag_id, DagRun.logical_date == logical_date) if run_id: query = query.where(DagRun.dag_id == dag_id, DagRun.run_id == run_id) return session.scalar(query) @@ -907,38 +907,36 @@ def fetch_dagrun( @provide_session def get_dagrun( self, - execution_date: datetime | None = None, + logical_date: datetime | None = None, run_id: str | None = None, session: Session = NEW_SESSION, ) -> DagRun | DagRunPydantic: - return DAG.fetch_dagrun( - dag_id=self.dag_id, execution_date=execution_date, run_id=run_id, session=session - ) + return DAG.fetch_dagrun(dag_id=self.dag_id, logical_date=logical_date, run_id=run_id, session=session) @provide_session def get_dagruns_between(self, start_date, end_date, session=NEW_SESSION): """ Return the list of dag runs between start_date (inclusive) and end_date (inclusive). - :param start_date: The starting execution date of the DagRun to find. - :param end_date: The ending execution date of the DagRun to find. + :param start_date: The starting logical date of the DagRun to find. + :param end_date: The ending logical date of the DagRun to find. :param session: :return: The list of DagRuns found. """ dagruns = session.scalars( select(DagRun).where( DagRun.dag_id == self.dag_id, - DagRun.execution_date >= start_date, - DagRun.execution_date <= end_date, + DagRun.logical_date >= start_date, + DagRun.logical_date <= end_date, ) ).all() return dagruns @provide_session - def get_latest_execution_date(self, session: Session = NEW_SESSION) -> pendulum.DateTime | None: + def get_latest_logical_date(self, session: Session = NEW_SESSION) -> pendulum.DateTime | None: """Return the latest date for which at least one dag run exists.""" - return session.scalar(select(func.max(DagRun.execution_date)).where(DagRun.dag_id == self.dag_id)) + return session.scalar(select(func.max(DagRun.logical_date)).where(DagRun.dag_id == self.dag_id)) @provide_session def get_task_instances_before( @@ -955,21 +953,21 @@ def get_task_instances_before( corresponding to any DagRunType. It can have less if there are less than ``num`` scheduled DAG runs before ``base_date``. """ - execution_dates: list[Any] = session.execute( - select(DagRun.execution_date) + logical_dates: list[Any] = session.execute( + select(DagRun.logical_date) .where( DagRun.dag_id == self.dag_id, - DagRun.execution_date <= base_date, + DagRun.logical_date <= base_date, ) - .order_by(DagRun.execution_date.desc()) + .order_by(DagRun.logical_date.desc()) .limit(num) ).all() - if not execution_dates: + if not logical_dates: return self.get_task_instances(start_date=base_date, end_date=base_date, session=session) - min_date: datetime | None = execution_dates[-1]._mapping.get( - "execution_date" + min_date: datetime | None = logical_dates[-1]._mapping.get( + "logical_date" ) # getting the last value from the list return self.get_task_instances(start_date=min_date, end_date=base_date, session=session) @@ -997,7 +995,7 @@ def get_task_instances( exclude_task_ids=(), session=session, ) - return session.scalars(cast(Select, query).order_by(DagRun.execution_date)).all() + return session.scalars(cast(Select, query).order_by(DagRun.logical_date)).all() @overload def _get_task_instances( @@ -1074,14 +1072,14 @@ def _get_task_instances( if run_id: tis = tis.where(TaskInstance.run_id == run_id) if start_date: - tis = tis.where(DagRun.execution_date >= start_date) + tis = tis.where(DagRun.logical_date >= start_date) if task_ids is not None: tis = tis.where(TaskInstance.ti_selector_condition(task_ids)) # This allows allow_trigger_in_future config to take affect, rather than mandating exec_date <= UTC if end_date or not self.allow_future_exec_dates: end_date = end_date or timezone.utcnow() - tis = tis.where(DagRun.execution_date <= end_date) + tis = tis.where(DagRun.logical_date <= end_date) if state: if isinstance(state, (str, TaskInstanceState)): @@ -1146,7 +1144,7 @@ def _get_task_instances( .where( TI.dag_id == task.external_dag_id, TI.task_id == task.external_task_id, - DagRun.execution_date == pendulum.parse(task.execution_date), + DagRun.logical_date == pendulum.parse(task.logical_date), ) ) @@ -1219,7 +1217,7 @@ def set_task_instance_state( *, task_id: str, map_indexes: Collection[int] | None = None, - execution_date: datetime | None = None, + logical_date: datetime | None = None, run_id: str | None = None, state: TaskInstanceState, upstream: bool = False, @@ -1235,7 +1233,7 @@ def set_task_instance_state( :param task_id: Task ID of the TaskInstance :param map_indexes: Only set TaskInstance if its map_index matches. If None (default), all mapped TaskInstances of the task are set. - :param execution_date: Execution date of the TaskInstance + :param logical_date: Logical date of the TaskInstance :param run_id: The run_id of the TaskInstance :param state: State to set the TaskInstance to :param upstream: Include all upstream tasks of the given task_id @@ -1246,8 +1244,8 @@ def set_task_instance_state( """ from airflow.api.common.mark_tasks import set_state - if not exactly_one(execution_date, run_id): - raise ValueError("Exactly one of execution_date or run_id must be provided") + if not exactly_one(logical_date, run_id): + raise ValueError("Exactly one of logical_date or run_id must be provided") task = self.get_task(task_id) task.dag = self @@ -1260,7 +1258,7 @@ def set_task_instance_state( altered = set_state( tasks=tasks_to_set_state, - execution_date=execution_date, + logical_date=logical_date, run_id=run_id, upstream=upstream, downstream=downstream, @@ -1283,16 +1281,16 @@ def set_task_instance_state( include_upstream=False, ) - if execution_date is None: + if logical_date is None: dag_run = session.scalars( select(DagRun).where(DagRun.run_id == run_id, DagRun.dag_id == self.dag_id) ).one() # Raises an error if not found - resolve_execution_date = dag_run.execution_date + resolve_logical_date = dag_run.logical_date else: - resolve_execution_date = execution_date + resolve_logical_date = logical_date - end_date = resolve_execution_date if not future else None - start_date = resolve_execution_date if not past else None + end_date = resolve_logical_date if not future else None + start_date = resolve_logical_date if not past else None subdag.clear( start_date=start_date, @@ -1310,7 +1308,7 @@ def set_task_group_state( self, *, group_id: str, - execution_date: datetime | None = None, + logical_date: datetime | None = None, run_id: str | None = None, state: TaskInstanceState, upstream: bool = False, @@ -1324,7 +1322,7 @@ def set_task_group_state( Set TaskGroup to the given state and clear downstream tasks in failed or upstream_failed state. :param group_id: The group_id of the TaskGroup - :param execution_date: Execution date of the TaskInstance + :param logical_date: Logical date of the TaskInstance :param run_id: The run_id of the TaskInstance :param state: State to set the TaskInstance to :param upstream: Include all upstream tasks of the given task_id @@ -1336,22 +1334,22 @@ def set_task_group_state( """ from airflow.api.common.mark_tasks import set_state - if not exactly_one(execution_date, run_id): - raise ValueError("Exactly one of execution_date or run_id must be provided") + if not exactly_one(logical_date, run_id): + raise ValueError("Exactly one of logical_date or run_id must be provided") tasks_to_set_state: list[BaseOperator | tuple[BaseOperator, int]] = [] task_ids: list[str] = [] - if execution_date is None: + if logical_date is None: dag_run = session.scalars( select(DagRun).where(DagRun.run_id == run_id, DagRun.dag_id == self.dag_id) ).one() # Raises an error if not found - resolve_execution_date = dag_run.execution_date + resolve_logical_date = dag_run.logical_date else: - resolve_execution_date = execution_date + resolve_logical_date = logical_date - end_date = resolve_execution_date if not future else None - start_date = resolve_execution_date if not past else None + end_date = resolve_logical_date if not future else None + start_date = resolve_logical_date if not past else None task_group_dict = self.task_group.get_task_group_dict() task_group = task_group_dict.get(group_id) @@ -1361,17 +1359,17 @@ def set_task_group_state( task_ids = [task.task_id for task in task_group.iter_tasks()] dag_runs_query = select(DagRun.id).where(DagRun.dag_id == self.dag_id) if start_date is None and end_date is None: - dag_runs_query = dag_runs_query.where(DagRun.execution_date == start_date) + dag_runs_query = dag_runs_query.where(DagRun.logical_date == start_date) else: if start_date is not None: - dag_runs_query = dag_runs_query.where(DagRun.execution_date >= start_date) + dag_runs_query = dag_runs_query.where(DagRun.logical_date >= start_date) if end_date is not None: - dag_runs_query = dag_runs_query.where(DagRun.execution_date <= end_date) + dag_runs_query = dag_runs_query.where(DagRun.logical_date <= end_date) with lock_rows(dag_runs_query, session): altered = set_state( tasks=tasks_to_set_state, - execution_date=execution_date, + logical_date=logical_date, run_id=run_id, upstream=upstream, downstream=downstream, @@ -1404,6 +1402,40 @@ def set_task_group_state( return altered + @overload + def clear( + self, + *, + dry_run: Literal[True], + task_ids: Collection[str | tuple[str, int]] | None = None, + start_date: datetime | None = None, + end_date: datetime | None = None, + only_failed: bool = False, + only_running: bool = False, + confirm_prompt: bool = False, + dag_run_state: DagRunState = DagRunState.QUEUED, + session: Session = NEW_SESSION, + dag_bag: DagBag | None = None, + exclude_task_ids: frozenset[str] | frozenset[tuple[str, int]] | None = frozenset(), + ) -> list[TaskInstance]: ... # pragma: no cover + + @overload + def clear( + self, + *, + task_ids: Collection[str | tuple[str, int]] | None = None, + start_date: datetime | None = None, + end_date: datetime | None = None, + only_failed: bool = False, + only_running: bool = False, + confirm_prompt: bool = False, + dag_run_state: DagRunState = DagRunState.QUEUED, + dry_run: Literal[False] = False, + session: Session = NEW_SESSION, + dag_bag: DagBag | None = None, + exclude_task_ids: frozenset[str] | frozenset[tuple[str, int]] | None = frozenset(), + ) -> int: ... # pragma: no cover + @provide_session def clear( self, @@ -1418,13 +1450,13 @@ def clear( session: Session = NEW_SESSION, dag_bag: DagBag | None = None, exclude_task_ids: frozenset[str] | frozenset[tuple[str, int]] | None = frozenset(), - ) -> int | Iterable[TaskInstance]: + ) -> int | list[TaskInstance]: """ Clear a set of task instances associated with the current dag for a specified date range. :param task_ids: List of task ids or (``task_id``, ``map_index``) tuples to clear - :param start_date: The minimum execution_date to clear - :param end_date: The maximum execution_date to clear + :param start_date: The minimum logical_date to clear + :param end_date: The maximum logical_date to clear :param only_failed: Only clear failed tasks :param only_running: Only clear running tasks. :param confirm_prompt: Ask for confirmation @@ -1550,7 +1582,7 @@ def cli(self): @provide_session def test( self, - execution_date: datetime | None = None, + logical_date: datetime | None = None, run_conf: dict[str, Any] | None = None, conn_file_path: str | None = None, variable_file_path: str | None = None, @@ -1559,9 +1591,9 @@ def test( session: Session = NEW_SESSION, ) -> DagRun: """ - Execute one single DagRun for a given DAG and execution date. + Execute one single DagRun for a given DAG and logical date. - :param execution_date: execution date for the DAG run + :param logical_date: logical date for the DAG run :param run_conf: configuration to pass to newly created dagrun :param conn_file_path: file path to a connection file in either yaml or json :param variable_file_path: file path to a variable file in either yaml or json @@ -1599,23 +1631,23 @@ def add_logger_if_needed(ti: TaskInstance): exit_stack.callback(lambda: secrets_backend_list.pop(0)) with exit_stack: - execution_date = execution_date or timezone.utcnow() + logical_date = logical_date or timezone.utcnow() self.validate() - self.log.debug("Clearing existing task instances for execution date %s", execution_date) + self.log.debug("Clearing existing task instances for logical date %s", logical_date) self.clear( - start_date=execution_date, - end_date=execution_date, + start_date=logical_date, + end_date=logical_date, dag_run_state=False, # type: ignore session=session, ) self.log.debug("Getting dagrun for dag %s", self.dag_id) - logical_date = timezone.coerce_datetime(execution_date) + logical_date = timezone.coerce_datetime(logical_date) data_interval = self.timetable.infer_manual_data_interval(run_after=logical_date) dr: DagRun = _get_or_create_dagrun( dag=self, - start_date=execution_date, - execution_date=execution_date, - run_id=DagRun.generate_run_id(DagRunType.MANUAL, execution_date), + start_date=logical_date, + logical_date=logical_date, + run_id=DagRun.generate_run_id(DagRunType.MANUAL, logical_date), session=session, conf=run_conf, triggered_by=DagRunTriggeredByType.TEST, @@ -1699,8 +1731,8 @@ def create_dagrun( self, state: DagRunState, *, - triggered_by: DagRunTriggeredByType, - execution_date: datetime | None = None, + triggered_by: DagRunTriggeredByType | None, + logical_date: datetime | None = None, run_id: str | None = None, start_date: datetime | None = None, external_trigger: bool | None = False, @@ -1721,7 +1753,7 @@ def create_dagrun( :param triggered_by: The entity which triggers the DagRun :param run_id: defines the run id for this dag run :param run_type: type of DagRun - :param execution_date: the execution date of this dag run + :param logical_date: the logical date of this dag run :param start_date: the date this dag run should be evaluated :param external_trigger: whether this dag run is externally triggered :param conf: Dict containing configuration/parameters to pass to the DAG @@ -1731,7 +1763,7 @@ def create_dagrun( :param data_interval: Data interval of the DagRun :param backfill_id: id of the backfill run if one exists """ - logical_date = timezone.coerce_datetime(execution_date) + logical_date = timezone.coerce_datetime(logical_date) if data_interval and not isinstance(data_interval, DataInterval): data_interval = DataInterval(*map(timezone.coerce_datetime, data_interval)) @@ -1761,13 +1793,13 @@ def create_dagrun( f"A {run_type.value} DAG run cannot use ID {run_id!r} since it " f"is reserved for {inferred_run_type.value} runs" ) - elif run_type and logical_date is not None: # Generate run_id from run_type and execution_date. + elif run_type and logical_date is not None: # Generate run_id from run_type and logical_date. run_id = self.timetable.generate_run_id( run_type=run_type, logical_date=logical_date, data_interval=data_interval ) else: raise AirflowException( - "Creating DagRun needs either `run_id` or both `run_type` and `execution_date`" + "Creating DagRun needs either `run_id` or both `run_type` and `logical_date`" ) regex = airflow_conf.get("scheduler", "allowed_run_id_pattern") @@ -2427,7 +2459,7 @@ def _get_or_create_dagrun( dag: DAG, conf: dict[Any, Any] | None, start_date: datetime, - execution_date: datetime, + logical_date: datetime, run_id: str, session: Session, triggered_by: DagRunTriggeredByType, @@ -2441,7 +2473,7 @@ def _get_or_create_dagrun( :param dag: DAG to be used to find run. :param conf: Configuration to pass to newly created run. :param start_date: Start date of new run. - :param execution_date: Logical date for finding an existing run. + :param logical_date: Logical date for finding an existing run. :param run_id: Run ID for the new DAG run. :param triggered_by: the entity which triggers the dag_run @@ -2449,7 +2481,7 @@ def _get_or_create_dagrun( """ log.info("dagrun id: %s", dag.dag_id) dr: DagRun = session.scalar( - select(DagRun).where(DagRun.dag_id == dag.dag_id, DagRun.execution_date == execution_date) + select(DagRun).where(DagRun.dag_id == dag.dag_id, DagRun.logical_date == logical_date) ) if dr: session.delete(dr) @@ -2457,9 +2489,9 @@ def _get_or_create_dagrun( dag_version = DagVersion.get_latest_version(dag.dag_id, session=session) dr = dag.create_dagrun( state=DagRunState.RUNNING, - execution_date=execution_date, + logical_date=logical_date, run_id=run_id, - start_date=start_date or execution_date, + start_date=start_date or logical_date, session=session, conf=conf, data_interval=data_interval, diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index ccc4832f5fb6..a2327221ad5d 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -129,7 +129,7 @@ class DagRun(Base, LoggingMixin): id = Column(Integer, primary_key=True) dag_id = Column(StringID(), nullable=False) queued_at = Column(UtcDateTime) - execution_date = Column("logical_date", UtcDateTime, default=timezone.utcnow, nullable=False) + logical_date = Column(UtcDateTime, default=timezone.utcnow, nullable=False) start_date = Column(UtcDateTime) end_date = Column(UtcDateTime) _state = Column("state", String(50), default=DagRunState.QUEUED) @@ -228,7 +228,7 @@ def __init__( dag_id: str | None = None, run_id: str | None = None, queued_at: datetime | None | ArgNotSet = NOTSET, - execution_date: datetime | None = None, + logical_date: datetime | None = None, start_date: datetime | None = None, external_trigger: bool | None = None, conf: Any | None = None, @@ -248,7 +248,7 @@ def __init__( self.dag_id = dag_id self.run_id = run_id - self.execution_date = execution_date + self.logical_date = logical_date self.start_date = start_date self.external_trigger = external_trigger self.conf = conf or {} @@ -268,7 +268,7 @@ def __init__( def __repr__(self): return ( - f"" ) @@ -287,10 +287,6 @@ def validate_run_id(self, key: str, run_id: str) -> str | None: def stats_tags(self) -> dict[str, str]: return prune_dict({"dag_id": self.dag_id, "run_type": self.run_type}) - @property - def logical_date(self) -> datetime: - return self.execution_date - def get_state(self): return self._state @@ -437,13 +433,13 @@ def get_running_dag_runs_to_examine(cls, session: Session) -> Query: .order_by( nulls_first(BackfillDagRun.sort_ordinal, session=session), nulls_first(cls.last_scheduling_decision, session=session), - cls.execution_date, + cls.logical_date, ) .limit(cls.DEFAULT_DAGRUNS_TO_EXAMINE) ) - if not settings.ALLOW_FUTURE_EXEC_DATES: - query = query.where(DagRun.execution_date <= func.now()) + if not settings.ALLOW_FUTURE_LOGICAL_DATES: + query = query.where(DagRun.logical_date <= func.now()) return session.scalars(with_row_locks(query, of=cls, session=session, skip_locked=True)) @@ -523,13 +519,13 @@ def get_queued_dag_runs_to_set_running(cls, session: Session) -> Query: nulls_first(BackfillDagRun.sort_ordinal, session=session), nulls_first(cls.last_scheduling_decision, session=session), nulls_first(running_drs.c.num_running, session=session), # many running -> lower priority - cls.execution_date, + cls.logical_date, ) .limit(cls.DEFAULT_DAGRUNS_TO_EXAMINE) ) - if not settings.ALLOW_FUTURE_EXEC_DATES: - query = query.where(DagRun.execution_date <= func.now()) + if not settings.ALLOW_FUTURE_LOGICAL_DATES: + query = query.where(DagRun.logical_date <= func.now()) return session.scalars(with_row_locks(query, of=cls, session=session, skip_locked=True)) @@ -539,14 +535,14 @@ def find( cls, dag_id: str | list[str] | None = None, run_id: Iterable[str] | None = None, - execution_date: datetime | Iterable[datetime] | None = None, + logical_date: datetime | Iterable[datetime] | None = None, state: DagRunState | None = None, external_trigger: bool | None = None, no_backfills: bool = False, run_type: DagRunType | None = None, session: Session = NEW_SESSION, - execution_start_date: datetime | None = None, - execution_end_date: datetime | None = None, + logical_start_date: datetime | None = None, + logical_end_date: datetime | None = None, ) -> list[DagRun]: """ Return a set of dag runs for the given search criteria. @@ -554,14 +550,14 @@ def find( :param dag_id: the dag_id or list of dag_id to find dag runs for :param run_id: defines the run id for this dag run :param run_type: type of DagRun - :param execution_date: the execution date + :param logical_date: the logical date :param state: the state of the dag run :param external_trigger: whether this dag run is externally triggered :param no_backfills: return no backfills (True), return all (False). Defaults to False :param session: database session - :param execution_start_date: dag run that was executed from this date - :param execution_end_date: dag run that was executed until this date + :param logical_start_date: dag run that was executed from this date + :param logical_end_date: dag run that was executed until this date """ qry = select(cls) dag_ids = [dag_id] if isinstance(dag_id, str) else dag_id @@ -572,16 +568,16 @@ def find( qry = qry.where(cls.run_id.in_(run_id)) elif run_id is not None: qry = qry.where(cls.run_id == run_id) - if is_container(execution_date): - qry = qry.where(cls.execution_date.in_(execution_date)) - elif execution_date is not None: - qry = qry.where(cls.execution_date == execution_date) - if execution_start_date and execution_end_date: - qry = qry.where(cls.execution_date.between(execution_start_date, execution_end_date)) - elif execution_start_date: - qry = qry.where(cls.execution_date >= execution_start_date) - elif execution_end_date: - qry = qry.where(cls.execution_date <= execution_end_date) + if is_container(logical_date): + qry = qry.where(cls.logical_date.in_(logical_date)) + elif logical_date is not None: + qry = qry.where(cls.logical_date == logical_date) + if logical_start_date and logical_end_date: + qry = qry.where(cls.logical_date.between(logical_start_date, logical_end_date)) + elif logical_start_date: + qry = qry.where(cls.logical_date >= logical_start_date) + elif logical_end_date: + qry = qry.where(cls.logical_date <= logical_end_date) if state: qry = qry.where(cls.state == state) if external_trigger is not None: @@ -591,7 +587,7 @@ def find( if no_backfills: qry = qry.where(cls.run_type != DagRunType.BACKFILL_JOB) - return session.scalars(qry.order_by(cls.execution_date)).all() + return session.scalars(qry.order_by(cls.logical_date)).all() @classmethod @provide_session @@ -599,31 +595,29 @@ def find_duplicate( cls, dag_id: str, run_id: str, - execution_date: datetime, + logical_date: datetime, session: Session = NEW_SESSION, ) -> DagRun | None: """ - Return an existing run for the DAG with a specific run_id or execution_date. - - *None* is returned if no such DAG run is found. + Return an existing run for the DAG with a specific run_id or logical date. :param dag_id: the dag_id to find duplicates for :param run_id: defines the run id for this dag run - :param execution_date: the execution date + :param logical_date: the logical date :param session: database session """ return session.scalars( select(cls).where( cls.dag_id == dag_id, - or_(cls.run_id == run_id, cls.execution_date == execution_date), + or_(cls.run_id == run_id, cls.logical_date == logical_date), ) ).one_or_none() @staticmethod - def generate_run_id(run_type: DagRunType, execution_date: datetime) -> str: - """Generate Run ID based on Run Type and Execution Date.""" + def generate_run_id(run_type: DagRunType, logical_date: datetime) -> str: + """Generate Run ID based on Run Type and Logical Date.""" # _Ensure_ run_type is a DagRunType, not just a string from user code - return DagRunType(run_type).generate_run_id(execution_date) + return DagRunType(run_type).generate_run_id(logical_date) @staticmethod @internal_api_call @@ -669,7 +663,7 @@ def _check_last_n_dagruns_failed(self, dag_id, max_consecutive_failed_dag_runs, dag_runs = ( session.query(DagRun) .filter(DagRun.dag_id == dag_id) - .order_by(DagRun.execution_date.desc()) + .order_by(DagRun.logical_date.desc()) .limit(max_consecutive_failed_dag_runs) .all() ) @@ -797,11 +791,11 @@ def get_previous_dagrun( """ filters = [ DagRun.dag_id == dag_run.dag_id, - DagRun.execution_date < dag_run.execution_date, + DagRun.logical_date < dag_run.logical_date, ] if state is not None: filters.append(DagRun.state == state) - return session.scalar(select(DagRun).where(*filters).order_by(DagRun.execution_date.desc()).limit(1)) + return session.scalar(select(DagRun).where(*filters).order_by(DagRun.logical_date.desc()).limit(1)) @staticmethod @internal_api_call @@ -821,10 +815,10 @@ def get_previous_scheduled_dagrun( select(DagRun) .where( DagRun.dag_id == dag_run.dag_id, - DagRun.execution_date < dag_run.execution_date, + DagRun.logical_date < dag_run.logical_date, DagRun.run_type != DagRunType.MANUAL, ) - .order_by(DagRun.execution_date.desc()) + .order_by(DagRun.logical_date.desc()) .limit(1) ) @@ -994,7 +988,7 @@ def recalculate(self) -> _UnfinishedStates: if self._state == DagRunState.FAILED or self._state == DagRunState.SUCCESS: msg = ( - "DagRun Finished: dag_id=%s, execution_date=%s, run_id=%s, " + "DagRun Finished: dag_id=%s, logical_date=%s, run_id=%s, " "run_start_date=%s, run_end_date=%s, run_duration=%s, " "state=%s, external_trigger=%s, run_type=%s, " "data_interval_start=%s, data_interval_end=%s, dag_version_name=%s" @@ -1003,7 +997,7 @@ def recalculate(self) -> _UnfinishedStates: self.log.info( msg, self.dag_id, - self.execution_date, + self.logical_date, self.run_id, self.start_date, self.end_date, @@ -1039,7 +1033,7 @@ def _trace_dagrun(self, dagv) -> None: attributes = { "category": "DAG runs", "dag_id": self.dag_id, - "execution_date": str(self.execution_date), + "logical_date": str(self.logical_date), "run_id": self.run_id, "queued_at": str(self.queued_at), "run_start_date": str(self.start_date), @@ -1330,8 +1324,8 @@ def verify_integrity(self, *, session: Session = NEW_SESSION) -> None: def task_filter(task: Operator) -> bool: return task.task_id not in task_ids and ( self.run_type == DagRunType.BACKFILL_JOB - or (task.start_date is None or task.start_date <= self.execution_date) - and (task.end_date is None or self.execution_date <= task.end_date) + or (task.start_date is None or task.start_date <= self.logical_date) + and (task.end_date is None or self.logical_date <= task.end_date) ) created_counts: dict[str, int] = defaultdict(int) @@ -1607,14 +1601,14 @@ def _revise_map_indexes_if_mapped(self, task: Operator, *, session: Session) -> def get_latest_runs(cls, session: Session = NEW_SESSION) -> list[DagRun]: """Return the latest DagRun for each DAG.""" subquery = ( - select(cls.dag_id, func.max(cls.execution_date).label("execution_date")) + select(cls.dag_id, func.max(cls.logical_date).label("logical_date")) .group_by(cls.dag_id) .subquery() ) return session.scalars( select(cls).join( subquery, - and_(cls.dag_id == subquery.c.dag_id, cls.execution_date == subquery.c.execution_date), + and_(cls.dag_id == subquery.c.dag_id, cls.logical_date == subquery.c.logical_date), ) ).all() diff --git a/airflow/models/log.py b/airflow/models/log.py index e01cc29b45e0..8669d228b250 100644 --- a/airflow/models/log.py +++ b/airflow/models/log.py @@ -41,7 +41,7 @@ class Log(Base): task_id = Column(StringID()) map_index = Column(Integer) event = Column(String(60)) - execution_date = Column(UtcDateTime) + logical_date = Column(UtcDateTime) run_id = Column(StringID()) owner = Column(String(500)) owner_display_name = Column(String(500)) @@ -69,13 +69,13 @@ def __init__( task_owner = None - self.execution_date = None + self.logical_date = None if task_instance: self.dag_id = task_instance.dag_id self.task_id = task_instance.task_id - if execution_date := getattr(task_instance, "execution_date", None): - self.execution_date = execution_date self.run_id = task_instance.run_id + if logical_date := getattr(task_instance, "logical_date", None): + self.logical_date = logical_date self.try_number = task_instance.try_number self.map_index = task_instance.map_index if task := getattr(task_instance, "task", None): @@ -85,8 +85,8 @@ def __init__( self.task_id = kwargs["task_id"] if "dag_id" in kwargs: self.dag_id = kwargs["dag_id"] - if kwargs.get("execution_date"): - self.execution_date = kwargs["execution_date"] + if kwargs.get("logical_date"): + self.logical_date = kwargs["logical_date"] if kwargs.get("run_id"): self.run_id = kwargs["run_id"] if "map_index" in kwargs: diff --git a/airflow/models/renderedtifields.py b/airflow/models/renderedtifields.py index 5b0b2bef52d3..b8ab93cf41a7 100644 --- a/airflow/models/renderedtifields.py +++ b/airflow/models/renderedtifields.py @@ -104,7 +104,7 @@ class RenderedTaskInstanceFields(TaskInstanceDependencies): ) # We don't need a DB level FK here, as we already have that to TI (which has one to DR) but by defining - # the relationship we can more easily find the execution date for these rows + # the relationship we can more easily find the logical date for these rows dag_run = relationship( "DagRun", primaryjoin="""and_( @@ -114,7 +114,7 @@ class RenderedTaskInstanceFields(TaskInstanceDependencies): viewonly=True, ) - execution_date = association_proxy("dag_run", "execution_date") + logical_date = association_proxy("dag_run", "logical_date") def __init__(self, ti: TaskInstance, render_templates=True, rendered_fields=None): self.dag_id = ti.dag_id @@ -254,11 +254,11 @@ def delete_old_records( from airflow.models.dagrun import DagRun tis_to_keep_query = ( - select(cls.dag_id, cls.task_id, cls.run_id, DagRun.execution_date) + select(cls.dag_id, cls.task_id, cls.run_id, DagRun.logical_date) .where(cls.dag_id == dag_id, cls.task_id == task_id) .join(cls.dag_run) .distinct() - .order_by(DagRun.execution_date.desc()) + .order_by(DagRun.logical_date.desc()) .limit(num_to_keep) ) diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index 410cdd8773d3..7a2f451727a7 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -26,7 +26,6 @@ import operator import os import signal -import warnings from collections import defaultdict from contextlib import nullcontext from datetime import timedelta @@ -85,7 +84,6 @@ AirflowSkipException, AirflowTaskTerminated, AirflowTaskTimeout, - RemovedInAirflow3Warning, TaskDeferralError, TaskDeferred, UnmappableXComLengthPushed, @@ -176,14 +174,6 @@ PAST_DEPENDS_MET = "past_depends_met" -timer_unit_consistency = conf.getboolean("metrics", "timer_unit_consistency") -if not timer_unit_consistency: - warnings.warn( - "Timer and timing metrics publish in seconds were deprecated. It is enabled by default from Airflow 3 onwards. Enable timer_unit_consistency to publish all the timer and timing metrics in milliseconds.", - RemovedInAirflow3Warning, - stacklevel=2, - ) - class TaskReturnCode(Enum): """ @@ -302,11 +292,11 @@ def _run_raw_task( raise ti.defer_task(exception=defer, session=session) ti.log.info( - "Pausing task as DEFERRED. dag_id=%s, task_id=%s, run_id=%s, execution_date=%s, start_date=%s", + "Pausing task as DEFERRED. dag_id=%s, task_id=%s, run_id=%s, logical_date=%s, start_date=%s", ti.dag_id, ti.task_id, ti.run_id, - _date_or_empty(task_instance=ti, attr="execution_date"), + _date_or_empty(task_instance=ti, attr="logical_date"), _date_or_empty(task_instance=ti, attr="start_date"), ) return TaskReturnCode.DEFERRED @@ -587,7 +577,7 @@ def _xcom_pull( If *None* (default), this is inferred from the task(s) being pulled (see below for details). :param include_prior_dates: If False, only XComs from the current - execution_date are returned. If *True*, XComs from previous dates + logical_date are returned. If *True*, XComs from previous dates are returned as well. :param run_id: If provided, only pulls XComs from a DagRun w/a matching run_id. If *None* (default), the run_id of the calling task is used. @@ -831,11 +821,11 @@ def _set_ti_attrs(target, source, include_dag_run=False): target.dag_version_id = source.dag_version_id if include_dag_run: - target.execution_date = source.execution_date + target.logical_date = source.logical_date target.dag_run.id = source.dag_run.id target.dag_run.dag_id = source.dag_run.dag_id target.dag_run.queued_at = source.dag_run.queued_at - target.dag_run.execution_date = source.dag_run.execution_date + target.dag_run.logical_date = source.dag_run.logical_date target.dag_run.start_date = source.dag_run.start_date target.dag_run.end_date = source.dag_run.end_date target.dag_run.state = source.dag_run.state @@ -977,7 +967,7 @@ def _get_template_context( validated_params = process_params(dag, task, dag_run, suppress_exception=ignore_param_exceptions) - logical_date: DateTime = timezone.coerce_datetime(task_instance.execution_date) + logical_date: DateTime = timezone.coerce_datetime(task_instance.logical_date) ds = logical_date.strftime("%Y-%m-%d") ds_nodash = ds.replace("-", "") ts = logical_date.isoformat() @@ -1018,76 +1008,6 @@ def get_prev_end_date_success() -> pendulum.DateTime | None: return None return timezone.coerce_datetime(dagrun.end_date) - @cache - def get_yesterday_ds() -> str: - return (logical_date - timedelta(1)).strftime("%Y-%m-%d") - - def get_yesterday_ds_nodash() -> str: - return get_yesterday_ds().replace("-", "") - - @cache - def get_tomorrow_ds() -> str: - return (logical_date + timedelta(1)).strftime("%Y-%m-%d") - - def get_tomorrow_ds_nodash() -> str: - return get_tomorrow_ds().replace("-", "") - - @cache - def get_next_execution_date() -> pendulum.DateTime | None: - # For manually triggered dagruns that aren't run on a schedule, - # the "next" execution date doesn't make sense, and should be set - # to execution date for consistency with how execution_date is set - # for manually triggered tasks, i.e. triggered_date == execution_date. - if dag_run.external_trigger: - return logical_date - if dag is None: - return None - next_info = dag.next_dagrun_info(data_interval, restricted=False) - if next_info is None: - return None - return timezone.coerce_datetime(next_info.logical_date) - - def get_next_ds() -> str | None: - execution_date = get_next_execution_date() - if execution_date is None: - return None - return execution_date.strftime("%Y-%m-%d") - - def get_next_ds_nodash() -> str | None: - ds = get_next_ds() - if ds is None: - return ds - return ds.replace("-", "") - - @cache - def get_prev_execution_date(): - # For manually triggered dagruns that aren't run on a schedule, - # the "previous" execution date doesn't make sense, and should be set - # to execution date for consistency with how execution_date is set - # for manually triggered tasks, i.e. triggered_date == execution_date. - if dag_run.external_trigger: - return logical_date - - # Workaround code copy until deprecated context fields are removed in Airflow 3 - from airflow.timetables.interval import _DataIntervalTimetable - - if not isinstance(dag.timetable, _DataIntervalTimetable): - return None - return dag.timetable._get_prev(timezone.coerce_datetime(logical_date)) - - @cache - def get_prev_ds() -> str | None: - execution_date = get_prev_execution_date() - if execution_date is None: - return None - return execution_date.strftime("%Y-%m-%d") - - def get_prev_ds_nodash() -> str | None: - prev_ds = get_prev_ds() - if prev_ds is None: - return None - return prev_ds.replace("-", "") - def get_triggering_events() -> dict[str, list[AssetEvent | AssetEventPydantic]]: if TYPE_CHECKING: assert session is not None @@ -1124,27 +1044,16 @@ def get_triggering_events() -> dict[str, list[AssetEvent | AssetEventPydantic]]: "outlet_events": OutletEventAccessors(), "ds": ds, "ds_nodash": ds_nodash, - "execution_date": logical_date, "expanded_ti_count": expanded_ti_count, "inlets": task.inlets, "inlet_events": InletEventsAccessors(task.inlets, session=session), "logical_date": logical_date, "macros": macros, "map_index_template": task.map_index_template, - "next_ds": get_next_ds(), - "next_ds_nodash": get_next_ds_nodash(), - "next_execution_date": get_next_execution_date(), "outlets": task.outlets, "params": validated_params, "prev_data_interval_start_success": get_prev_data_interval_start_success(), "prev_data_interval_end_success": get_prev_data_interval_end_success(), - "prev_ds": get_prev_ds(), - "prev_ds_nodash": get_prev_ds_nodash(), - "prev_execution_date": get_prev_execution_date(), - "prev_execution_date_success": task_instance.get_previous_execution_date( - state=DagRunState.SUCCESS, - session=session, - ), "prev_start_date_success": get_prev_start_date_success(), "prev_end_date_success": get_prev_end_date_success(), "run_id": task_instance.run_id, @@ -1153,8 +1062,6 @@ def get_triggering_events() -> dict[str, list[AssetEvent | AssetEventPydantic]]: "task_instance_key_str": f"{task.dag_id}__{task.task_id}__{ds_nodash}", "test_mode": task_instance.test_mode, "ti": task_instance, - "tomorrow_ds": get_tomorrow_ds(), - "tomorrow_ds_nodash": get_tomorrow_ds_nodash(), "triggering_asset_events": lazy_object_proxy.Proxy(get_triggering_events), "ts": ts, "ts_nodash": ts_nodash, @@ -1164,8 +1071,6 @@ def get_triggering_events() -> dict[str, list[AssetEvent | AssetEventPydantic]]: "value": VariableAccessor(deserialize_json=False), }, "conn": ConnectionAccessor(), - "yesterday_ds": get_yesterday_ds(), - "yesterday_ds_nodash": get_yesterday_ds_nodash(), } # Mypy doesn't like turning existing dicts in to a TypeDict -- and we "lie" in the type stub to say it # is one, but in practice it isn't. See https://github.com/python/mypy/issues/8890 @@ -1267,12 +1172,11 @@ def _handle_failure( "end_date": str(task_instance.end_date), "duration": task_instance.duration, "executor_config": str(task_instance.executor_config), - "execution_date": str(task_instance.execution_date), + "logical_date": str(task_instance.logical_date), "hostname": task_instance.hostname, "operator": str(task_instance.operator), } ) - if isinstance(task_instance, TaskInstance): span.set_attribute("log_url", task_instance.log_url) @@ -1399,14 +1303,14 @@ def _get_previous_dagrun( return None -def _get_previous_execution_date( +def _get_previous_logical_date( *, task_instance: TaskInstance | TaskInstancePydantic, state: DagRunState | None, session: Session, ) -> pendulum.DateTime | None: """ - Get execution date from property previous_ti_success. + Get logical date from property previous_ti_success. :param task_instance: the task instance :param session: SQLAlchemy ORM Session @@ -1414,9 +1318,9 @@ def _get_previous_execution_date( :meta private: """ - log.debug("previous_execution_date was called") + log.debug("previous_logical_date was called") prev_ti = task_instance.get_previous_ti(state=state, session=session) - return pendulum.instance(prev_ti.execution_date) if prev_ti and prev_ti.execution_date else None + return pendulum.instance(prev_ti.logical_date) if prev_ti and prev_ti.logical_date else None def _get_previous_start_date( @@ -1602,11 +1506,11 @@ def _log_state(*, task_instance: TaskInstance | TaskInstancePydantic, lead_msg: if task_instance.map_index >= 0: params.append(task_instance.map_index) message += "map_index=%d, " - message += "execution_date=%s, start_date=%s, end_date=%s" + message += "logical_date=%s, start_date=%s, end_date=%s" log.info( message, *params, - _date_or_empty(task_instance=task_instance, attr="execution_date"), + _date_or_empty(task_instance=task_instance, attr="logical_date"), _date_or_empty(task_instance=task_instance, attr="start_date"), _date_or_empty(task_instance=task_instance, attr="end_date"), stacklevel=2, @@ -1930,7 +1834,7 @@ class TaskInstance(Base, LoggingMixin): triggerer_job = association_proxy("trigger", "triggerer_job") dag_run = relationship("DagRun", back_populates="task_instances", lazy="joined", innerjoin=True) rendered_task_instance_fields = relationship("RenderedTaskInstanceFields", lazy="noload", uselist=False) - execution_date = association_proxy("dag_run", "execution_date") + logical_date = association_proxy("dag_run", "logical_date") task_instance_note = relationship( "TaskInstanceNote", back_populates="task_instance", @@ -2193,7 +2097,7 @@ def generate_command( def log_url(self) -> str: """Log URL for TaskInstance.""" run_id = quote(self.run_id) - base_date = quote(self.execution_date.strftime("%Y-%m-%dT%H:%M:%S%z")) + base_date = quote(self.logical_date.strftime("%Y-%m-%dT%H:%M:%S%z")) base_url = conf.get_mandatory_value("webserver", "BASE_URL") map_index = f"&map_index={self.map_index}" if self.map_index >= 0 else "" return ( @@ -2442,18 +2346,18 @@ def get_previous_ti( return _get_previous_ti(task_instance=self, state=state, session=session) @provide_session - def get_previous_execution_date( + def get_previous_logical_date( self, state: DagRunState | None = None, session: Session = NEW_SESSION, ) -> pendulum.DateTime | None: """ - Return the execution date from property previous_ti_success. + Return the logical date from property previous_ti_success. :param state: If passed, it only take into account instances of a specific state. :param session: SQLAlchemy ORM Session """ - return _get_previous_execution_date(task_instance=self, state=state, session=session) + return _get_previous_logical_date(task_instance=self, state=state, session=session) @provide_session def get_previous_start_date( @@ -2552,7 +2456,7 @@ def next_retry_datetime(self): # deterministic per task instance ti_hash = int( hashlib.sha1( - f"{self.dag_id}#{self.task_id}#{self.execution_date}#{self.try_number}".encode() + f"{self.dag_id}#{self.task_id}#{self.logical_date}#{self.try_number}".encode() ).hexdigest(), 16, ) @@ -2602,7 +2506,7 @@ def get_dagrun(self, session: Session = NEW_SESSION) -> DagRun: if TYPE_CHECKING: assert self.task dr.dag = self.task.dag - # Record it in the instance for next time. This means that `self.execution_date` will work correctly + # Record it in the instance for next time. This means that `self.logical_date` will work correctly set_committed_value(self, "dag_run", dr) return dr @@ -2767,9 +2671,9 @@ def _check_and_change_state_before_execution( settings.engine.dispose() # type: ignore if verbose: if mark_success: - cls.logger().info("Marking success for %s on %s", ti.task, ti.execution_date) + cls.logger().info("Marking success for %s on %s", ti.task, ti.logical_date) else: - cls.logger().info("Executing %s on %s", ti.task, ti.execution_date) + cls.logger().info("Executing %s on %s", ti.task, ti.logical_date) return True @provide_session @@ -2831,10 +2735,7 @@ def emit_state_change_metric(self, new_state: TaskInstanceState) -> None: self.task_id, ) return - if timer_unit_consistency: - timing = timezone.utcnow() - self.queued_dttm - else: - timing = (timezone.utcnow() - self.queued_dttm).total_seconds() + timing = timezone.utcnow() - self.queued_dttm elif new_state == TaskInstanceState.QUEUED: metric_name = "scheduled_duration" if self.start_date is None: @@ -2847,10 +2748,7 @@ def emit_state_change_metric(self, new_state: TaskInstanceState) -> None: self.task_id, ) return - if timer_unit_consistency: - timing = timezone.utcnow() - self.start_date - else: - timing = (timezone.utcnow() - self.start_date).total_seconds() + timing = timezone.utcnow() - self.start_date else: raise NotImplementedError("no metric emission setup for state %s", new_state) @@ -3499,7 +3397,7 @@ def xcom_pull( If *None* (default), this is inferred from the task(s) being pulled (see below for details). :param include_prior_dates: If False, only XComs from the current - execution_date are returned. If *True*, XComs from previous dates + logical_date are returned. If *True*, XComs from previous dates are returned as well. :param run_id: If provided, only pulls XComs from a DagRun w/a matching run_id. If *None* (default), the run_id of the calling task is used. diff --git a/airflow/models/taskreschedule.py b/airflow/models/taskreschedule.py index 2fc06b798a67..aa987294e8ee 100644 --- a/airflow/models/taskreschedule.py +++ b/airflow/models/taskreschedule.py @@ -75,7 +75,7 @@ class TaskReschedule(TaskInstanceDependencies): ), ) dag_run = relationship("DagRun") - execution_date = association_proxy("dag_run", "execution_date") + logical_date = association_proxy("dag_run", "logical_date") def __init__( self, diff --git a/airflow/models/xcom.py b/airflow/models/xcom.py index 87c72d5bf7f5..69843db5ab5e 100644 --- a/airflow/models/xcom.py +++ b/airflow/models/xcom.py @@ -111,7 +111,7 @@ class BaseXCom(TaskInstanceDependencies, LoggingMixin): lazy="joined", passive_deletes="all", ) - execution_date = association_proxy("dag_run", "execution_date") + logical_date = association_proxy("dag_run", "logical_date") @reconstructor def init_on_load(self): @@ -374,12 +374,12 @@ def get_many( query = query.filter(BaseXCom.map_index == map_indexes) if include_prior_dates: - dr = session.query(DagRun.execution_date).filter(DagRun.run_id == run_id).subquery() - query = query.filter(BaseXCom.execution_date <= dr.c.execution_date) + dr = session.query(DagRun.logical_date).filter(DagRun.run_id == run_id).subquery() + query = query.filter(BaseXCom.logical_date <= dr.c.logical_date) else: query = query.filter(BaseXCom.run_id == run_id) - query = query.order_by(DagRun.execution_date.desc(), BaseXCom.timestamp.desc()) + query = query.order_by(DagRun.logical_date.desc(), BaseXCom.timestamp.desc()) if limit: return query.limit(limit) return query @@ -424,7 +424,7 @@ def clear( :param session: Database session. If not given, a new session will be created for this function. """ - # Given the historic order of this function (execution_date was first argument) to add a new optional + # Given the historic order of this function (logical_date was first argument) to add a new optional # param we need to add default values for everything :( if dag_id is None: raise TypeError("clear() missing required argument: dag_id") diff --git a/airflow/models/xcom_arg.py b/airflow/models/xcom_arg.py index c28af6acbe5a..4e10fe800547 100644 --- a/airflow/models/xcom_arg.py +++ b/airflow/models/xcom_arg.py @@ -41,7 +41,6 @@ if TYPE_CHECKING: from sqlalchemy.orm import Session - # from airflow.models.dag import DAG from airflow.models.operator import Operator from airflow.sdk import DAG, BaseOperator from airflow.utils.context import Context diff --git a/airflow/operators/trigger_dagrun.py b/airflow/operators/trigger_dagrun.py index bb1eac7c6963..e65a74ef2b9a 100644 --- a/airflow/operators/trigger_dagrun.py +++ b/airflow/operators/trigger_dagrun.py @@ -195,7 +195,7 @@ def execute(self, context: Context): dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, - execution_date=parsed_logical_date, + logical_date=parsed_logical_date, replace_microseconds=False, triggered_by=DagRunTriggeredByType.OPERATOR, ) @@ -234,7 +234,7 @@ def execute(self, context: Context): trigger=DagStateTrigger( dag_id=self.trigger_dag_id, states=self.allowed_states + self.failed_states, - execution_dates=[dag_run.logical_date], + logical_dates=[dag_run.logical_date], poll_interval=self.poke_interval, ), method_name="execute_complete", @@ -261,12 +261,12 @@ def execute(self, context: Context): @provide_session def execute_complete(self, context: Context, session: Session, event: tuple[str, dict[str, Any]]): # This logical_date is parsed from the return trigger event - provided_logical_date = event[1]["execution_dates"][0] + provided_logical_date = event[1]["logical_dates"][0] try: # Note: here execution fails on database isolation mode. Needs structural changes for AIP-72 dag_run = session.execute( select(DagRun).where( - DagRun.dag_id == self.trigger_dag_id, DagRun.execution_date == provided_logical_date + DagRun.dag_id == self.trigger_dag_id, DagRun.logical_date == provided_logical_date ) ).scalar_one() except NoResultFound: diff --git a/airflow/policies.py b/airflow/policies.py index 49691b393866..e0c48931a286 100644 --- a/airflow/policies.py +++ b/airflow/policies.py @@ -99,7 +99,7 @@ def get_airflow_context_vars(context) -> dict[str, str]: # type: ignore[empty-b This setting allows getting the airflow context vars, which are key value pairs. They are then injected to default airflow context vars, which in the end are available as environment variables when running - tasks dag_id, task_id, execution_date, dag_run_id, try_number are reserved keys. + tasks dag_id, task_id, logical_date, dag_run_id, try_number are reserved keys. :param context: The context for the task_instance of interest. """ diff --git a/airflow/sensors/external_task.py b/airflow/sensors/external_task.py index 331e17168bab..d5504ab2e9fb 100644 --- a/airflow/sensors/external_task.py +++ b/airflow/sensors/external_task.py @@ -70,7 +70,7 @@ def get_link(self, operator: BaseOperator, *, ti_key: TaskInstanceKey) -> str: ) query = { "dag_id": external_dag_id, - "execution_date": ti.execution_date.isoformat(), # type: ignore[union-attr] + "logical_date": ti.logical_date.isoformat(), # type: ignore[union-attr] } return build_airflow_url_with_query(query) @@ -351,7 +351,7 @@ def execute(self, context: Context) -> None: external_dag_id=self.external_dag_id, external_task_group_id=self.external_task_group_id, external_task_ids=self.external_task_ids, - execution_dates=self._get_dttm_filter(context), + logical_dates=self._get_dttm_filter(context), allowed_states=self.allowed_states, poke_interval=self.poll_interval, soft_fail=self.soft_fail, @@ -406,7 +406,7 @@ def get_count(self, dttm_filter, session, states) -> int: """ Get the count of records against dttm filter and states. - :param dttm_filter: date time filter for execution date + :param dttm_filter: date time filter for logical date :param session: airflow session object :param states: task or dag states :return: count of record against the filters @@ -436,7 +436,7 @@ def _handle_execution_date_fn(self, context) -> Any: Handle backward compatibility. This function is to handle backwards compatibility with how this operator was - previously where it only passes the execution date, but also allow for the newer + previously where it only passes the logical date, but also allow for the newer implementation to pass all context variables as keyword arguments, to allow for more sophisticated returns of dates to return. """ @@ -464,14 +464,14 @@ class ExternalTaskMarker(EmptyOperator): :param external_dag_id: The dag_id that contains the dependent task that needs to be cleared. :param external_task_id: The task_id of the dependent task that needs to be cleared. - :param execution_date: The logical date of the dependent task execution that needs to be cleared. + :param logical_date: The logical date of the dependent task execution that needs to be cleared. :param recursion_depth: The maximum level of transitive dependencies allowed. Default is 10. This is mostly used for preventing cyclic dependencies. It is fine to increase this number if necessary. However, too many levels of transitive dependencies will make it slower to clear tasks in the web UI. """ - template_fields = ["external_dag_id", "external_task_id", "execution_date"] + template_fields = ["external_dag_id", "external_task_id", "logical_date"] ui_color = "#4db7db" operator_extra_links = [ExternalDagLink()] @@ -483,20 +483,20 @@ def __init__( *, external_dag_id: str, external_task_id: str, - execution_date: str | datetime.datetime | None = "{{ logical_date.isoformat() }}", + logical_date: str | datetime.datetime | None = "{{ logical_date.isoformat() }}", recursion_depth: int = 10, **kwargs, ): super().__init__(**kwargs) self.external_dag_id = external_dag_id self.external_task_id = external_task_id - if isinstance(execution_date, datetime.datetime): - self.execution_date = execution_date.isoformat() - elif isinstance(execution_date, str): - self.execution_date = execution_date + if isinstance(logical_date, datetime.datetime): + self.logical_date = logical_date.isoformat() + elif isinstance(logical_date, str): + self.logical_date = logical_date else: raise TypeError( - f"Expected str or datetime.datetime type for execution_date. Got {type(execution_date)}" + f"Expected str or datetime.datetime type for logical_date. Got {type(logical_date)}" ) if recursion_depth <= 0: diff --git a/airflow/sentry.py b/airflow/sentry.py index d5fbf3c04d21..22261bd99fd2 100644 --- a/airflow/sentry.py +++ b/airflow/sentry.py @@ -62,7 +62,7 @@ def flush(self): class ConfiguredSentry(DummySentry): """Configure Sentry SDK.""" - SCOPE_DAG_RUN_TAGS = frozenset(("data_interval_end", "data_interval_start", "execution_date")) + SCOPE_DAG_RUN_TAGS = frozenset(("data_interval_end", "data_interval_start", "logical_date")) SCOPE_TASK_INSTANCE_TAGS = frozenset(("task_id", "dag_id", "try_number")) SCOPE_CRUMBS = frozenset(("task_id", "state", "operator", "duration")) diff --git a/airflow/serialization/pydantic/dag_run.py b/airflow/serialization/pydantic/dag_run.py index a0175e3749d9..b3b5ab6d70d3 100644 --- a/airflow/serialization/pydantic/dag_run.py +++ b/airflow/serialization/pydantic/dag_run.py @@ -40,7 +40,7 @@ class DagRunPydantic(BaseModelPydantic): id: int dag_id: str queued_at: Optional[datetime] - execution_date: datetime + logical_date: datetime start_date: Optional[datetime] end_date: Optional[datetime] state: str @@ -61,10 +61,6 @@ class DagRunPydantic(BaseModelPydantic): model_config = ConfigDict(from_attributes=True, arbitrary_types_allowed=True) - @property - def logical_date(self) -> datetime: - return self.execution_date - def get_task_instances( self, state: Iterable[TaskInstanceState | None] | None = None, diff --git a/airflow/serialization/pydantic/taskinstance.py b/airflow/serialization/pydantic/taskinstance.py index e8a1cfaf79a7..eea493758d17 100644 --- a/airflow/serialization/pydantic/taskinstance.py +++ b/airflow/serialization/pydantic/taskinstance.py @@ -90,7 +90,7 @@ class TaskInstancePydantic(BaseModelPydantic, LoggingMixin): map_index: int start_date: Optional[datetime] end_date: Optional[datetime] - execution_date: Optional[datetime] + logical_date: Optional[datetime] duration: Optional[float] state: Optional[str] try_number: int @@ -180,7 +180,7 @@ def xcom_pull( :param task_ids: task id or list of task ids, if None, the task_id of the current task is used :param dag_id: dag id, if None, the dag_id of the current task is used :param key: the key to identify the XCom value - :param include_prior_dates: whether to include prior execution dates + :param include_prior_dates: whether to include prior logical dates :param session: the sqlalchemy session :param map_indexes: map index or list of map indexes, if None, the map_index of the current task is used @@ -367,20 +367,20 @@ def get_previous_dagrun( return _get_previous_dagrun(task_instance=self, state=state, session=session) - def get_previous_execution_date( + def get_previous_logical_date( self, state: DagRunState | None = None, session: Session | None = None, ) -> pendulum.DateTime | None: """ - Return the execution date from property previous_ti_success. + Return the logical date from property previous_ti_success. :param state: If passed, it only take into account instances of a specific state. :param session: SQLAlchemy ORM Session """ - from airflow.models.taskinstance import _get_previous_execution_date + from airflow.models.taskinstance import _get_previous_logical_date - return _get_previous_execution_date(task_instance=self, state=state, session=session) + return _get_previous_logical_date(task_instance=self, state=state, session=session) def get_previous_start_date( self, @@ -388,7 +388,7 @@ def get_previous_start_date( session: Session | None = None, ) -> pendulum.DateTime | None: """ - Return the execution date from property previous_ti_success. + Return the logical date from property previous_ti_success. :param state: If passed, it only take into account instances of a specific state. :param session: SQLAlchemy ORM Session diff --git a/airflow/settings.py b/airflow/settings.py index a3f99510adba..db8ee4f41117 100644 --- a/airflow/settings.py +++ b/airflow/settings.py @@ -31,6 +31,7 @@ import pluggy from packaging.version import Version from sqlalchemy import create_engine, exc, text +from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, create_async_engine from sqlalchemy.orm import scoped_session, sessionmaker from sqlalchemy.pool import NullPool @@ -95,8 +96,17 @@ DONOT_MODIFY_HANDLERS: bool | None = None DAGS_FOLDER: str = os.path.expanduser(conf.get_mandatory_value("core", "DAGS_FOLDER")) +AIO_LIBS_MAPPING = {"sqlite": "aiosqlite", "postgresql": "asyncpg", "mysql": "aiomysql"} +""" +Mapping of sync scheme to async scheme. + +:meta private: +""" + engine: Engine Session: Callable[..., SASession] +async_engine: AsyncEngine +create_async_session: Callable[..., AsyncSession] # The JSON library to use for DAG Serialization and De-Serialization json = json @@ -199,13 +209,25 @@ def load_policy_plugins(pm: pluggy.PluginManager): pm.load_setuptools_entrypoints("airflow.policy") +def _get_async_conn_uri_from_sync(sync_uri): + scheme, rest = sync_uri.split(":", maxsplit=1) + scheme = scheme.split("+", maxsplit=1)[0] + aiolib = AIO_LIBS_MAPPING.get(scheme) + if aiolib: + return f"{scheme}+{aiolib}:{rest}" + else: + return sync_uri + + def configure_vars(): """Configure Global Variables from airflow.cfg.""" global SQL_ALCHEMY_CONN + global SQL_ALCHEMY_CONN_ASYNC global DAGS_FOLDER global PLUGINS_FOLDER global DONOT_MODIFY_HANDLERS SQL_ALCHEMY_CONN = conf.get("database", "SQL_ALCHEMY_CONN") + SQL_ALCHEMY_CONN_ASYNC = _get_async_conn_uri_from_sync(sync_uri=SQL_ALCHEMY_CONN) DAGS_FOLDER = os.path.expanduser(conf.get("core", "DAGS_FOLDER")) @@ -441,6 +463,9 @@ def configure_orm(disable_connection_pool=False, pool_class=None): global Session global engine + global async_engine + global create_async_session + if os.environ.get("_AIRFLOW_SKIP_DB_TESTS") == "true": # Skip DB initialization in unit tests, if DB tests are skipped Session = SkipDBTestsSession @@ -466,7 +491,14 @@ def configure_orm(disable_connection_pool=False, pool_class=None): connect_args["check_same_thread"] = False engine = create_engine(SQL_ALCHEMY_CONN, connect_args=connect_args, **engine_args, future=True) - + async_engine = create_async_engine(SQL_ALCHEMY_CONN_ASYNC, future=True) + create_async_session = sessionmaker( + bind=async_engine, + autocommit=False, + autoflush=False, + class_=AsyncSession, + expire_on_commit=False, + ) mask_secret(engine.url.password) setup_event_handlers(engine) @@ -797,7 +829,7 @@ def is_usage_data_collection_enabled() -> bool: fallback=False, ) -ALLOW_FUTURE_EXEC_DATES = conf.getboolean("scheduler", "allow_trigger_in_future", fallback=False) +ALLOW_FUTURE_LOGICAL_DATES = conf.getboolean("scheduler", "allow_trigger_in_future", fallback=False) USE_JOB_SCHEDULE = conf.getboolean("scheduler", "use_job_schedule", fallback=True) diff --git a/airflow/task/standard_task_runner.py b/airflow/task/standard_task_runner.py index bc846574f024..c446bccac960 100644 --- a/airflow/task/standard_task_runner.py +++ b/airflow/task/standard_task_runner.py @@ -160,7 +160,7 @@ def _start_by_fork(self): self.log.info("Running: %s", self._command) self.log.info("Subtask %s", self._task_instance.task_id) - proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date_or_run_id}" + proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.logical_date_or_run_id}" setproctitle(proc_title.format(args)) return_code = 0 try: diff --git a/airflow/ti_deps/dep_context.py b/airflow/ti_deps/dep_context.py index 988cefdfcf94..bc2d476d8018 100644 --- a/airflow/ti_deps/dep_context.py +++ b/airflow/ti_deps/dep_context.py @@ -89,7 +89,7 @@ def ensure_finished_tis(self, dag_run: DagRun, session: Session) -> list[TaskIns Ensure finished_tis is populated if it's currently None, which allows running tasks without dag_run. :param dag_run: The DagRun for which to find finished tasks - :return: A list of all the finished tasks of this DAG and execution_date + :return: A list of all the finished tasks of this DAG and logical_date """ if self.finished_tis is None: finished_tis = dag_run.get_task_instances(state=State.finished, session=session) diff --git a/airflow/ti_deps/deps/exec_date_after_start_date_dep.py b/airflow/ti_deps/deps/exec_date_after_start_date_dep.py index 09e0d8c22961..4832ea2806be 100644 --- a/airflow/ti_deps/deps/exec_date_after_start_date_dep.py +++ b/airflow/ti_deps/deps/exec_date_after_start_date_dep.py @@ -22,25 +22,25 @@ class ExecDateAfterStartDateDep(BaseTIDep): - """Determines whether a task's execution date is after start date.""" + """Determines whether a task's logical date is after start date.""" - NAME = "Execution Date" + NAME = "Logical Date" IGNORABLE = True @provide_session def _get_dep_statuses(self, ti, session, dep_context): - if ti.task.start_date and ti.execution_date < ti.task.start_date: + if ti.task.start_date and ti.logical_date < ti.task.start_date: yield self._failing_status( reason=( - f"The execution date is {ti.execution_date.isoformat()} but this is before " + f"The logical date is {ti.logical_date.isoformat()} but this is before " f"the task's start date {ti.task.start_date.isoformat()}." ) ) - if ti.task.dag and ti.task.dag.start_date and ti.execution_date < ti.task.dag.start_date: + if ti.task.dag and ti.task.dag.start_date and ti.logical_date < ti.task.dag.start_date: yield self._failing_status( reason=( - f"The execution date is {ti.execution_date.isoformat()} but this is " + f"The logical date is {ti.logical_date.isoformat()} but this is " f"before the task's DAG's start date {ti.task.dag.start_date.isoformat()}." ) ) diff --git a/airflow/ti_deps/deps/prev_dagrun_dep.py b/airflow/ti_deps/deps/prev_dagrun_dep.py index 7265727f6d53..c756e6ec1c64 100644 --- a/airflow/ti_deps/deps/prev_dagrun_dep.py +++ b/airflow/ti_deps/deps/prev_dagrun_dep.py @@ -78,7 +78,7 @@ def _has_any_prior_tis(ti: TI, *, session: Session) -> bool: query = exists_query( TI.dag_id == ti.dag_id, TI.task_id == ti.task_id, - TI.execution_date < ti.execution_date, + TI.logical_date < ti.logical_date, session=session, ) return query @@ -169,7 +169,7 @@ def _get_dep_statuses(self, ti: TI, session: Session, dep_context): return # There was a DAG run, but the task wasn't active back then. - if catchup and last_dagrun.execution_date < ti.task.start_date: + if catchup and last_dagrun.logical_date < ti.task.start_date: self._push_past_deps_met_xcom_if_needed(ti, dep_context) yield self._passing_status(reason="This task instance was the first task instance for its task.") return diff --git a/airflow/ti_deps/deps/runnable_exec_date_dep.py b/airflow/ti_deps/deps/runnable_exec_date_dep.py index da9d88a2ec06..990264563a2f 100644 --- a/airflow/ti_deps/deps/runnable_exec_date_dep.py +++ b/airflow/ti_deps/deps/runnable_exec_date_dep.py @@ -23,7 +23,7 @@ class RunnableExecDateDep(BaseTIDep): - """Determines whether a task's execution date is valid.""" + """Determines whether a task's logical date is valid.""" NAME = "Execution Date" IGNORABLE = True @@ -34,11 +34,11 @@ def _get_dep_statuses(self, ti, session, dep_context): # don't consider runs that are executed in the future unless # specified by config and schedule is None - logical_date = ti.get_dagrun(session).execution_date + logical_date = ti.get_dagrun(session).logical_date if logical_date > cur_date and not ti.task.dag.allow_future_exec_dates: yield self._failing_status( reason=( - f"Execution date {logical_date.isoformat()} is in the future " + f"Logical date {logical_date.isoformat()} is in the future " f"(the current date is {cur_date.isoformat()})." ) ) @@ -46,7 +46,7 @@ def _get_dep_statuses(self, ti, session, dep_context): if ti.task.end_date and logical_date > ti.task.end_date: yield self._failing_status( reason=( - f"The execution date is {logical_date.isoformat()} but this is " + f"The logical date is {logical_date.isoformat()} but this is " f"after the task's end date {ti.task.end_date.isoformat()}." ) ) @@ -54,7 +54,7 @@ def _get_dep_statuses(self, ti, session, dep_context): if ti.task.dag and ti.task.dag.end_date and logical_date > ti.task.dag.end_date: yield self._failing_status( reason=( - f"The execution date is {logical_date.isoformat()} but this is after " + f"The logical date is {logical_date.isoformat()} but this is after " f"the task's DAG's end date {ti.task.dag.end_date.isoformat()}." ) ) diff --git a/airflow/timetables/base.py b/airflow/timetables/base.py index 64a261202651..f8aa4279ebba 100644 --- a/airflow/timetables/base.py +++ b/airflow/timetables/base.py @@ -174,7 +174,7 @@ class Timetable(Protocol): ``NullTimetable`` sets this to *False*. """ - run_ordering: Sequence[str] = ("data_interval_end", "execution_date") + run_ordering: Sequence[str] = ("data_interval_end", "logical_date") """How runs triggered from this timetable should be ordered in UI. This should be a list of field names on the DAG run object. diff --git a/airflow/timetables/simple.py b/airflow/timetables/simple.py index 5a931b40dd11..3457c52a08aa 100644 --- a/airflow/timetables/simple.py +++ b/airflow/timetables/simple.py @@ -36,7 +36,7 @@ class _TrivialTimetable(Timetable): """Some code reuse for "trivial" timetables that has nothing complex.""" periodic = False - run_ordering: Sequence[str] = ("execution_date",) + run_ordering: Sequence[str] = ("logical_date",) @classmethod def deserialize(cls, data: dict[str, Any]) -> Timetable: diff --git a/airflow/triggers/external_task.py b/airflow/triggers/external_task.py index cd43d59876e9..159a6df90950 100644 --- a/airflow/triggers/external_task.py +++ b/airflow/triggers/external_task.py @@ -41,7 +41,7 @@ class WorkflowTrigger(BaseTrigger): A trigger to monitor tasks, task group and dag execution in Apache Airflow. :param external_dag_id: The ID of the external DAG. - :param execution_dates: A list of execution dates for the external DAG. + :param logical_dates: A list of logical dates for the external DAG. :param external_task_ids: A collection of external task IDs to wait for. :param external_task_group_id: The ID of the external task group to wait for. :param failed_states: States considered as failed for external tasks. @@ -54,7 +54,7 @@ class WorkflowTrigger(BaseTrigger): def __init__( self, external_dag_id: str, - execution_dates: list, + logical_dates: list, external_task_ids: typing.Collection[str] | None = None, external_task_group_id: str | None = None, failed_states: typing.Iterable[str] | None = None, @@ -70,7 +70,7 @@ def __init__( self.failed_states = failed_states self.skipped_states = skipped_states self.allowed_states = allowed_states - self.execution_dates = execution_dates + self.logical_dates = logical_dates self.poke_interval = poke_interval self.soft_fail = soft_fail super().__init__(**kwargs) @@ -86,7 +86,7 @@ def serialize(self) -> tuple[str, dict[str, Any]]: "failed_states": self.failed_states, "skipped_states": self.skipped_states, "allowed_states": self.allowed_states, - "execution_dates": self.execution_dates, + "logical_dates": self.logical_dates, "poke_interval": self.poke_interval, "soft_fail": self.soft_fail, }, @@ -109,7 +109,7 @@ async def run(self) -> typing.AsyncIterator[TriggerEvent]: yield TriggerEvent({"status": "skipped"}) return allowed_count = await self._get_count(self.allowed_states) - if allowed_count == len(self.execution_dates): + if allowed_count == len(self.logical_dates): yield TriggerEvent({"status": "success"}) return self.log.info("Sleeping for %s seconds", self.poke_interval) @@ -124,7 +124,7 @@ def _get_count(self, states: typing.Iterable[str] | None) -> int: :return The count of records. """ return _get_count( - dttm_filter=self.execution_dates, + dttm_filter=self.logical_dates, external_task_ids=self.external_task_ids, external_task_group_id=self.external_task_group_id, external_dag_id=self.external_dag_id, @@ -138,7 +138,7 @@ class DagStateTrigger(BaseTrigger): :param dag_id: The dag_id that contains the task you want to wait for :param states: allowed states, default is ``['success']`` - :param execution_dates: The logical date at which DAG run. + :param logical_dates: The logical date at which DAG run. :param poll_interval: The time interval in seconds to check the state. The default value is 5.0 sec. """ @@ -147,13 +147,13 @@ def __init__( self, dag_id: str, states: list[DagRunState], - execution_dates: list[datetime], + logical_dates: list[datetime], poll_interval: float = 5.0, ): super().__init__() self.dag_id = dag_id self.states = states - self.execution_dates = execution_dates + self.logical_dates = logical_dates self.poll_interval = poll_interval def serialize(self) -> tuple[str, dict[str, typing.Any]]: @@ -163,7 +163,7 @@ def serialize(self) -> tuple[str, dict[str, typing.Any]]: { "dag_id": self.dag_id, "states": self.states, - "execution_dates": self.execution_dates, + "logical_dates": self.logical_dates, "poll_interval": self.poll_interval, }, ) @@ -173,7 +173,7 @@ async def run(self) -> typing.AsyncIterator[TriggerEvent]: while True: # mypy confuses typing here num_dags = await self.count_dags() # type: ignore[call-arg] - if num_dags == len(self.execution_dates): + if num_dags == len(self.logical_dates): yield TriggerEvent(self.serialize()) return await asyncio.sleep(self.poll_interval) @@ -187,7 +187,7 @@ def count_dags(self, *, session: Session = NEW_SESSION) -> int | None: .filter( DagRun.dag_id == self.dag_id, DagRun.state.in_(self.states), - DagRun.execution_date.in_(self.execution_dates), + DagRun.logical_date.in_(self.logical_dates), ) .scalar() ) diff --git a/airflow/ui/openapi-gen/queries/common.ts b/airflow/ui/openapi-gen/queries/common.ts index 761d07f352e3..7b23e33f0ab4 100644 --- a/airflow/ui/openapi-gen/queries/common.ts +++ b/airflow/ui/openapi-gen/queries/common.ts @@ -327,6 +327,28 @@ export const UseDagRunServiceGetDagRunKeyFn = ( }, queryKey?: Array, ) => [useDagRunServiceGetDagRunKey, ...(queryKey ?? [{ dagId, dagRunId }])]; +export type DagRunServiceGetUpstreamAssetEventsDefaultResponse = Awaited< + ReturnType +>; +export type DagRunServiceGetUpstreamAssetEventsQueryResult< + TData = DagRunServiceGetUpstreamAssetEventsDefaultResponse, + TError = unknown, +> = UseQueryResult; +export const useDagRunServiceGetUpstreamAssetEventsKey = + "DagRunServiceGetUpstreamAssetEvents"; +export const UseDagRunServiceGetUpstreamAssetEventsKeyFn = ( + { + dagId, + dagRunId, + }: { + dagId: string; + dagRunId: string; + }, + queryKey?: Array, +) => [ + useDagRunServiceGetUpstreamAssetEventsKey, + ...(queryKey ?? [{ dagId, dagRunId }]), +]; export type DagSourceServiceGetDagSourceDefaultResponse = Awaited< ReturnType >; @@ -622,18 +644,6 @@ export const UseImportErrorServiceGetImportErrorsKeyFn = ( useImportErrorServiceGetImportErrorsKey, ...(queryKey ?? [{ limit, offset, orderBy }]), ]; -export type MonitorServiceGetHealthDefaultResponse = Awaited< - ReturnType ->; -export type MonitorServiceGetHealthQueryResult< - TData = MonitorServiceGetHealthDefaultResponse, - TError = unknown, -> = UseQueryResult; -export const useMonitorServiceGetHealthKey = "MonitorServiceGetHealth"; -export const UseMonitorServiceGetHealthKeyFn = (queryKey?: Array) => [ - useMonitorServiceGetHealthKey, - ...(queryKey ?? []), -]; export type PluginServiceGetPluginsDefaultResponse = Awaited< ReturnType >; @@ -965,6 +975,24 @@ export const UseTaskInstanceServiceGetTaskInstancesKeyFn = ( }, ]), ]; +export type TaskServiceGetTasksDefaultResponse = Awaited< + ReturnType +>; +export type TaskServiceGetTasksQueryResult< + TData = TaskServiceGetTasksDefaultResponse, + TError = unknown, +> = UseQueryResult; +export const useTaskServiceGetTasksKey = "TaskServiceGetTasks"; +export const UseTaskServiceGetTasksKeyFn = ( + { + dagId, + orderBy, + }: { + dagId: string; + orderBy?: string; + }, + queryKey?: Array, +) => [useTaskServiceGetTasksKey, ...(queryKey ?? [{ dagId, orderBy }])]; export type TaskServiceGetTaskDefaultResponse = Awaited< ReturnType >; @@ -1022,18 +1050,6 @@ export const UseVariableServiceGetVariablesKeyFn = ( useVariableServiceGetVariablesKey, ...(queryKey ?? [{ limit, offset, orderBy }]), ]; -export type VersionServiceGetVersionDefaultResponse = Awaited< - ReturnType ->; -export type VersionServiceGetVersionQueryResult< - TData = VersionServiceGetVersionDefaultResponse, - TError = unknown, -> = UseQueryResult; -export const useVersionServiceGetVersionKey = "VersionServiceGetVersion"; -export const UseVersionServiceGetVersionKeyFn = (queryKey?: Array) => [ - useVersionServiceGetVersionKey, - ...(queryKey ?? []), -]; export type XcomServiceGetXcomEntryDefaultResponse = Awaited< ReturnType >; @@ -1067,6 +1083,33 @@ export const UseXcomServiceGetXcomEntryKeyFn = ( { dagId, dagRunId, deserialize, mapIndex, stringify, taskId, xcomKey }, ]), ]; +export type MonitorServiceGetHealthDefaultResponse = Awaited< + ReturnType +>; +export type MonitorServiceGetHealthQueryResult< + TData = MonitorServiceGetHealthDefaultResponse, + TError = unknown, +> = UseQueryResult; +export const useMonitorServiceGetHealthKey = "MonitorServiceGetHealth"; +export const UseMonitorServiceGetHealthKeyFn = (queryKey?: Array) => [ + useMonitorServiceGetHealthKey, + ...(queryKey ?? []), +]; +export type VersionServiceGetVersionDefaultResponse = Awaited< + ReturnType +>; +export type VersionServiceGetVersionQueryResult< + TData = VersionServiceGetVersionDefaultResponse, + TError = unknown, +> = UseQueryResult; +export const useVersionServiceGetVersionKey = "VersionServiceGetVersion"; +export const UseVersionServiceGetVersionKeyFn = (queryKey?: Array) => [ + useVersionServiceGetVersionKey, + ...(queryKey ?? []), +]; +export type AssetServiceCreateAssetEventMutationResult = Awaited< + ReturnType +>; export type BackfillServiceCreateBackfillMutationResult = Awaited< ReturnType >; @@ -1076,9 +1119,15 @@ export type ConnectionServicePostConnectionMutationResult = Awaited< export type ConnectionServiceTestConnectionMutationResult = Awaited< ReturnType >; +export type DagRunServiceClearDagRunMutationResult = Awaited< + ReturnType +>; export type PoolServicePostPoolMutationResult = Awaited< ReturnType >; +export type TaskInstanceServiceGetTaskInstancesBatchMutationResult = Awaited< + ReturnType +>; export type VariableServicePostVariableMutationResult = Awaited< ReturnType >; diff --git a/airflow/ui/openapi-gen/queries/prefetch.ts b/airflow/ui/openapi-gen/queries/prefetch.ts index 51998a375065..0c522f36e433 100644 --- a/airflow/ui/openapi-gen/queries/prefetch.ts +++ b/airflow/ui/openapi-gen/queries/prefetch.ts @@ -412,6 +412,32 @@ export const prefetchUseDagRunServiceGetDagRun = ( queryKey: Common.UseDagRunServiceGetDagRunKeyFn({ dagId, dagRunId }), queryFn: () => DagRunService.getDagRun({ dagId, dagRunId }), }); +/** + * Get Upstream Asset Events + * If dag run is asset-triggered, return the asset events that triggered it. + * @param data The data for the request. + * @param data.dagId + * @param data.dagRunId + * @returns AssetEventCollectionResponse Successful Response + * @throws ApiError + */ +export const prefetchUseDagRunServiceGetUpstreamAssetEvents = ( + queryClient: QueryClient, + { + dagId, + dagRunId, + }: { + dagId: string; + dagRunId: string; + }, +) => + queryClient.prefetchQuery({ + queryKey: Common.UseDagRunServiceGetUpstreamAssetEventsKeyFn({ + dagId, + dagRunId, + }), + queryFn: () => DagRunService.getUpstreamAssetEvents({ dagId, dagRunId }), + }); /** * Get Dag Source * Get source code using file token. @@ -810,16 +836,6 @@ export const prefetchUseImportErrorServiceGetImportErrors = ( queryFn: () => ImportErrorService.getImportErrors({ limit, offset, orderBy }), }); -/** - * Get Health - * @returns HealthInfoSchema Successful Response - * @throws ApiError - */ -export const prefetchUseMonitorServiceGetHealth = (queryClient: QueryClient) => - queryClient.prefetchQuery({ - queryKey: Common.UseMonitorServiceGetHealthKeyFn(), - queryFn: () => MonitorService.getHealth(), - }); /** * Get Plugins * @param data The data for the request. @@ -1300,6 +1316,29 @@ export const prefetchUseTaskInstanceServiceGetTaskInstances = ( updatedAtLte, }), }); +/** + * Get Tasks + * Get tasks for DAG. + * @param data The data for the request. + * @param data.dagId + * @param data.orderBy + * @returns TaskCollectionResponse Successful Response + * @throws ApiError + */ +export const prefetchUseTaskServiceGetTasks = ( + queryClient: QueryClient, + { + dagId, + orderBy, + }: { + dagId: string; + orderBy?: string; + }, +) => + queryClient.prefetchQuery({ + queryKey: Common.UseTaskServiceGetTasksKeyFn({ dagId, orderBy }), + queryFn: () => TaskService.getTasks({ dagId, orderBy }), + }); /** * Get Task * Get simplified representation of a task. @@ -1373,17 +1412,6 @@ export const prefetchUseVariableServiceGetVariables = ( }), queryFn: () => VariableService.getVariables({ limit, offset, orderBy }), }); -/** - * Get Version - * Get version information. - * @returns VersionInfo Successful Response - * @throws ApiError - */ -export const prefetchUseVersionServiceGetVersion = (queryClient: QueryClient) => - queryClient.prefetchQuery({ - queryKey: Common.UseVersionServiceGetVersionKeyFn(), - queryFn: () => VersionService.getVersion(), - }); /** * Get Xcom Entry * Get an XCom entry. @@ -1439,3 +1467,24 @@ export const prefetchUseXcomServiceGetXcomEntry = ( xcomKey, }), }); +/** + * Get Health + * @returns HealthInfoSchema Successful Response + * @throws ApiError + */ +export const prefetchUseMonitorServiceGetHealth = (queryClient: QueryClient) => + queryClient.prefetchQuery({ + queryKey: Common.UseMonitorServiceGetHealthKeyFn(), + queryFn: () => MonitorService.getHealth(), + }); +/** + * Get Version + * Get version information. + * @returns VersionInfo Successful Response + * @throws ApiError + */ +export const prefetchUseVersionServiceGetVersion = (queryClient: QueryClient) => + queryClient.prefetchQuery({ + queryKey: Common.UseVersionServiceGetVersionKeyFn(), + queryFn: () => VersionService.getVersion(), + }); diff --git a/airflow/ui/openapi-gen/queries/queries.ts b/airflow/ui/openapi-gen/queries/queries.ts index 93df8f104ac2..8ec0ea9234ac 100644 --- a/airflow/ui/openapi-gen/queries/queries.ts +++ b/airflow/ui/openapi-gen/queries/queries.ts @@ -32,12 +32,15 @@ import { import { BackfillPostBody, ConnectionBody, + CreateAssetEventsBody, DAGPatchBody, + DAGRunClearBody, DAGRunPatchBody, DagRunState, DagWarningType, PoolPatchBody, PoolPostBody, + TaskInstancesBatchBody, VariableBody, } from "../requests/types.gen"; import * as Common from "./common"; @@ -517,6 +520,39 @@ export const useDagRunServiceGetDagRun = < queryFn: () => DagRunService.getDagRun({ dagId, dagRunId }) as TData, ...options, }); +/** + * Get Upstream Asset Events + * If dag run is asset-triggered, return the asset events that triggered it. + * @param data The data for the request. + * @param data.dagId + * @param data.dagRunId + * @returns AssetEventCollectionResponse Successful Response + * @throws ApiError + */ +export const useDagRunServiceGetUpstreamAssetEvents = < + TData = Common.DagRunServiceGetUpstreamAssetEventsDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + dagRunId, + }: { + dagId: string; + dagRunId: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useQuery({ + queryKey: Common.UseDagRunServiceGetUpstreamAssetEventsKeyFn( + { dagId, dagRunId }, + queryKey, + ), + queryFn: () => + DagRunService.getUpstreamAssetEvents({ dagId, dagRunId }) as TData, + ...options, + }); /** * Get Dag Source * Get source code using file token. @@ -992,24 +1028,6 @@ export const useImportErrorServiceGetImportErrors = < ImportErrorService.getImportErrors({ limit, offset, orderBy }) as TData, ...options, }); -/** - * Get Health - * @returns HealthInfoSchema Successful Response - * @throws ApiError - */ -export const useMonitorServiceGetHealth = < - TData = Common.MonitorServiceGetHealthDefaultResponse, - TError = unknown, - TQueryKey extends Array = unknown[], ->( - queryKey?: TQueryKey, - options?: Omit, "queryKey" | "queryFn">, -) => - useQuery({ - queryKey: Common.UseMonitorServiceGetHealthKeyFn(queryKey), - queryFn: () => MonitorService.getHealth() as TData, - ...options, - }); /** * Get Plugins * @param data The data for the request. @@ -1558,6 +1576,35 @@ export const useTaskInstanceServiceGetTaskInstances = < }) as TData, ...options, }); +/** + * Get Tasks + * Get tasks for DAG. + * @param data The data for the request. + * @param data.dagId + * @param data.orderBy + * @returns TaskCollectionResponse Successful Response + * @throws ApiError + */ +export const useTaskServiceGetTasks = < + TData = Common.TaskServiceGetTasksDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + orderBy, + }: { + dagId: string; + orderBy?: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useQuery({ + queryKey: Common.UseTaskServiceGetTasksKeyFn({ dagId, orderBy }, queryKey), + queryFn: () => TaskService.getTasks({ dagId, orderBy }) as TData, + ...options, + }); /** * Get Task * Get simplified representation of a task. @@ -1652,25 +1699,6 @@ export const useVariableServiceGetVariables = < VariableService.getVariables({ limit, offset, orderBy }) as TData, ...options, }); -/** - * Get Version - * Get version information. - * @returns VersionInfo Successful Response - * @throws ApiError - */ -export const useVersionServiceGetVersion = < - TData = Common.VersionServiceGetVersionDefaultResponse, - TError = unknown, - TQueryKey extends Array = unknown[], ->( - queryKey?: TQueryKey, - options?: Omit, "queryKey" | "queryFn">, -) => - useQuery({ - queryKey: Common.UseVersionServiceGetVersionKeyFn(queryKey), - queryFn: () => VersionService.getVersion() as TData, - ...options, - }); /** * Get Xcom Entry * Get an XCom entry. @@ -1727,6 +1755,82 @@ export const useXcomServiceGetXcomEntry = < }) as TData, ...options, }); +/** + * Get Health + * @returns HealthInfoSchema Successful Response + * @throws ApiError + */ +export const useMonitorServiceGetHealth = < + TData = Common.MonitorServiceGetHealthDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useQuery({ + queryKey: Common.UseMonitorServiceGetHealthKeyFn(queryKey), + queryFn: () => MonitorService.getHealth() as TData, + ...options, + }); +/** + * Get Version + * Get version information. + * @returns VersionInfo Successful Response + * @throws ApiError + */ +export const useVersionServiceGetVersion = < + TData = Common.VersionServiceGetVersionDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useQuery({ + queryKey: Common.UseVersionServiceGetVersionKeyFn(queryKey), + queryFn: () => VersionService.getVersion() as TData, + ...options, + }); +/** + * Create Asset Event + * Create asset events. + * @param data The data for the request. + * @param data.requestBody + * @returns AssetEventResponse Successful Response + * @throws ApiError + */ +export const useAssetServiceCreateAssetEvent = < + TData = Common.AssetServiceCreateAssetEventMutationResult, + TError = unknown, + TContext = unknown, +>( + options?: Omit< + UseMutationOptions< + TData, + TError, + { + requestBody: CreateAssetEventsBody; + }, + TContext + >, + "mutationFn" + >, +) => + useMutation< + TData, + TError, + { + requestBody: CreateAssetEventsBody; + }, + TContext + >({ + mutationFn: ({ requestBody }) => + AssetService.createAssetEvent({ + requestBody, + }) as unknown as Promise, + ...options, + }); /** * Create Backfill * @param data The data for the request. @@ -1847,6 +1951,52 @@ export const useConnectionServiceTestConnection = < }) as unknown as Promise, ...options, }); +/** + * Clear Dag Run + * @param data The data for the request. + * @param data.dagId + * @param data.dagRunId + * @param data.requestBody + * @returns unknown Successful Response + * @throws ApiError + */ +export const useDagRunServiceClearDagRun = < + TData = Common.DagRunServiceClearDagRunMutationResult, + TError = unknown, + TContext = unknown, +>( + options?: Omit< + UseMutationOptions< + TData, + TError, + { + dagId: string; + dagRunId: string; + requestBody: DAGRunClearBody; + }, + TContext + >, + "mutationFn" + >, +) => + useMutation< + TData, + TError, + { + dagId: string; + dagRunId: string; + requestBody: DAGRunClearBody; + }, + TContext + >({ + mutationFn: ({ dagId, dagRunId, requestBody }) => + DagRunService.clearDagRun({ + dagId, + dagRunId, + requestBody, + }) as unknown as Promise, + ...options, + }); /** * Post Pool * Create a Pool. @@ -1884,6 +2034,45 @@ export const usePoolServicePostPool = < PoolService.postPool({ requestBody }) as unknown as Promise, ...options, }); +/** + * Get Task Instances Batch + * Get list of task instances. + * @param data The data for the request. + * @param data.requestBody + * @returns TaskInstanceCollectionResponse Successful Response + * @throws ApiError + */ +export const useTaskInstanceServiceGetTaskInstancesBatch = < + TData = Common.TaskInstanceServiceGetTaskInstancesBatchMutationResult, + TError = unknown, + TContext = unknown, +>( + options?: Omit< + UseMutationOptions< + TData, + TError, + { + requestBody: TaskInstancesBatchBody; + }, + TContext + >, + "mutationFn" + >, +) => + useMutation< + TData, + TError, + { + requestBody: TaskInstancesBatchBody; + }, + TContext + >({ + mutationFn: ({ requestBody }) => + TaskInstanceService.getTaskInstancesBatch({ + requestBody, + }) as unknown as Promise, + ...options, + }); /** * Post Variable * Create a variable. diff --git a/airflow/ui/openapi-gen/queries/suspense.ts b/airflow/ui/openapi-gen/queries/suspense.ts index 7ec5ecc8319c..1b8142228153 100644 --- a/airflow/ui/openapi-gen/queries/suspense.ts +++ b/airflow/ui/openapi-gen/queries/suspense.ts @@ -502,6 +502,39 @@ export const useDagRunServiceGetDagRunSuspense = < queryFn: () => DagRunService.getDagRun({ dagId, dagRunId }) as TData, ...options, }); +/** + * Get Upstream Asset Events + * If dag run is asset-triggered, return the asset events that triggered it. + * @param data The data for the request. + * @param data.dagId + * @param data.dagRunId + * @returns AssetEventCollectionResponse Successful Response + * @throws ApiError + */ +export const useDagRunServiceGetUpstreamAssetEventsSuspense = < + TData = Common.DagRunServiceGetUpstreamAssetEventsDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + dagRunId, + }: { + dagId: string; + dagRunId: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useSuspenseQuery({ + queryKey: Common.UseDagRunServiceGetUpstreamAssetEventsKeyFn( + { dagId, dagRunId }, + queryKey, + ), + queryFn: () => + DagRunService.getUpstreamAssetEvents({ dagId, dagRunId }) as TData, + ...options, + }); /** * Get Dag Source * Get source code using file token. @@ -977,24 +1010,6 @@ export const useImportErrorServiceGetImportErrorsSuspense = < ImportErrorService.getImportErrors({ limit, offset, orderBy }) as TData, ...options, }); -/** - * Get Health - * @returns HealthInfoSchema Successful Response - * @throws ApiError - */ -export const useMonitorServiceGetHealthSuspense = < - TData = Common.MonitorServiceGetHealthDefaultResponse, - TError = unknown, - TQueryKey extends Array = unknown[], ->( - queryKey?: TQueryKey, - options?: Omit, "queryKey" | "queryFn">, -) => - useSuspenseQuery({ - queryKey: Common.UseMonitorServiceGetHealthKeyFn(queryKey), - queryFn: () => MonitorService.getHealth() as TData, - ...options, - }); /** * Get Plugins * @param data The data for the request. @@ -1543,6 +1558,35 @@ export const useTaskInstanceServiceGetTaskInstancesSuspense = < }) as TData, ...options, }); +/** + * Get Tasks + * Get tasks for DAG. + * @param data The data for the request. + * @param data.dagId + * @param data.orderBy + * @returns TaskCollectionResponse Successful Response + * @throws ApiError + */ +export const useTaskServiceGetTasksSuspense = < + TData = Common.TaskServiceGetTasksDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + orderBy, + }: { + dagId: string; + orderBy?: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useSuspenseQuery({ + queryKey: Common.UseTaskServiceGetTasksKeyFn({ dagId, orderBy }, queryKey), + queryFn: () => TaskService.getTasks({ dagId, orderBy }) as TData, + ...options, + }); /** * Get Task * Get simplified representation of a task. @@ -1637,25 +1681,6 @@ export const useVariableServiceGetVariablesSuspense = < VariableService.getVariables({ limit, offset, orderBy }) as TData, ...options, }); -/** - * Get Version - * Get version information. - * @returns VersionInfo Successful Response - * @throws ApiError - */ -export const useVersionServiceGetVersionSuspense = < - TData = Common.VersionServiceGetVersionDefaultResponse, - TError = unknown, - TQueryKey extends Array = unknown[], ->( - queryKey?: TQueryKey, - options?: Omit, "queryKey" | "queryFn">, -) => - useSuspenseQuery({ - queryKey: Common.UseVersionServiceGetVersionKeyFn(queryKey), - queryFn: () => VersionService.getVersion() as TData, - ...options, - }); /** * Get Xcom Entry * Get an XCom entry. @@ -1712,3 +1737,40 @@ export const useXcomServiceGetXcomEntrySuspense = < }) as TData, ...options, }); +/** + * Get Health + * @returns HealthInfoSchema Successful Response + * @throws ApiError + */ +export const useMonitorServiceGetHealthSuspense = < + TData = Common.MonitorServiceGetHealthDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useSuspenseQuery({ + queryKey: Common.UseMonitorServiceGetHealthKeyFn(queryKey), + queryFn: () => MonitorService.getHealth() as TData, + ...options, + }); +/** + * Get Version + * Get version information. + * @returns VersionInfo Successful Response + * @throws ApiError + */ +export const useVersionServiceGetVersionSuspense = < + TData = Common.VersionServiceGetVersionDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useSuspenseQuery({ + queryKey: Common.UseVersionServiceGetVersionKeyFn(queryKey), + queryFn: () => VersionService.getVersion() as TData, + ...options, + }); diff --git a/airflow/ui/openapi-gen/requests/schemas.gen.ts b/airflow/ui/openapi-gen/requests/schemas.gen.ts index 6db4ec1e1b7d..1f83e434286b 100644 --- a/airflow/ui/openapi-gen/requests/schemas.gen.ts +++ b/airflow/ui/openapi-gen/requests/schemas.gen.ts @@ -103,8 +103,7 @@ export const $AssetAliasSchema = { type: "object", required: ["id", "name"], title: "AssetAliasSchema", - description: - "Serializable version of the AssetAliasSchema ORM SqlAlchemyModel.", + description: "Asset alias serializer for assets.", } as const; export const $AssetCollectionResponse = { @@ -695,6 +694,24 @@ export const $ConnectionTestResponse = { description: "Connection Test serializer for responses.", } as const; +export const $CreateAssetEventsBody = { + properties: { + uri: { + type: "string", + title: "Uri", + }, + extra: { + type: "object", + title: "Extra", + }, + }, + additionalProperties: false, + type: "object", + required: ["uri"], + title: "CreateAssetEventsBody", + description: "Create asset events request.", +} as const; + export const $DAGCollectionResponse = { properties: { dags: { @@ -1304,6 +1321,19 @@ export const $DAGResponse = { description: "DAG serializer for responses.", } as const; +export const $DAGRunClearBody = { + properties: { + dry_run: { + type: "boolean", + title: "Dry Run", + default: true, + }, + }, + type: "object", + title: "DAGRunClearBody", + description: "DAG Run serializer for clear endpoint body.", +} as const; + export const $DAGRunPatchBody = { properties: { state: { @@ -1924,8 +1954,15 @@ export const $DagRunAssetReference = { title: "Start Date", }, end_date: { - type: "string", - format: "date-time", + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], title: "End Date", }, state: { @@ -2012,8 +2049,7 @@ export const $DagScheduleAssetReference = { type: "object", required: ["dag_id", "created_at", "updated_at"], title: "DagScheduleAssetReference", - description: - "Serializable version of the DagScheduleAssetReference ORM SqlAlchemyModel.", + description: "DAG schedule reference serializer for assets.", } as const; export const $DagStatsCollectionResponse = { @@ -2926,6 +2962,26 @@ export const $SchedulerInfoSchema = { description: "Schema for Scheduler info.", } as const; +export const $TaskCollectionResponse = { + properties: { + tasks: { + items: { + $ref: "#/components/schemas/TaskResponse", + }, + type: "array", + title: "Tasks", + }, + total_entries: { + type: "integer", + title: "Total Entries", + }, + }, + type: "object", + required: ["tasks", "total_entries"], + title: "TaskCollectionResponse", + description: "Task collection serializer for responses.", +} as const; + export const $TaskDependencyCollectionResponse = { properties: { dependencies: { @@ -3345,6 +3401,236 @@ export const $TaskInstanceStateCount = { description: "TaskInstance serializer for responses.", } as const; +export const $TaskInstancesBatchBody = { + properties: { + dag_ids: { + anyOf: [ + { + items: { + type: "string", + }, + type: "array", + }, + { + type: "null", + }, + ], + title: "Dag Ids", + }, + dag_run_ids: { + anyOf: [ + { + items: { + type: "string", + }, + type: "array", + }, + { + type: "null", + }, + ], + title: "Dag Run Ids", + }, + task_ids: { + anyOf: [ + { + items: { + type: "string", + }, + type: "array", + }, + { + type: "null", + }, + ], + title: "Task Ids", + }, + state: { + anyOf: [ + { + items: { + anyOf: [ + { + $ref: "#/components/schemas/TaskInstanceState", + }, + { + type: "null", + }, + ], + }, + type: "array", + }, + { + type: "null", + }, + ], + title: "State", + }, + logical_date_gte: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Logical Date Gte", + }, + logical_date_lte: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Logical Date Lte", + }, + start_date_gte: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Start Date Gte", + }, + start_date_lte: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Start Date Lte", + }, + end_date_gte: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "End Date Gte", + }, + end_date_lte: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "End Date Lte", + }, + duration_gte: { + anyOf: [ + { + type: "number", + }, + { + type: "null", + }, + ], + title: "Duration Gte", + }, + duration_lte: { + anyOf: [ + { + type: "number", + }, + { + type: "null", + }, + ], + title: "Duration Lte", + }, + pool: { + anyOf: [ + { + items: { + type: "string", + }, + type: "array", + }, + { + type: "null", + }, + ], + title: "Pool", + }, + queue: { + anyOf: [ + { + items: { + type: "string", + }, + type: "array", + }, + { + type: "null", + }, + ], + title: "Queue", + }, + executor: { + anyOf: [ + { + items: { + type: "string", + }, + type: "array", + }, + { + type: "null", + }, + ], + title: "Executor", + }, + page_offset: { + type: "integer", + minimum: 0, + title: "Page Offset", + default: 0, + }, + page_limit: { + type: "integer", + minimum: 0, + title: "Page Limit", + default: 100, + }, + order_by: { + anyOf: [ + { + type: "string", + }, + { + type: "null", + }, + ], + title: "Order By", + }, + }, + type: "object", + title: "TaskInstancesBatchBody", + description: "Task Instance body for get batch.", +} as const; + export const $TaskOutletAssetReference = { properties: { dag_id: { @@ -3369,8 +3655,7 @@ export const $TaskOutletAssetReference = { type: "object", required: ["dag_id", "task_id", "created_at", "updated_at"], title: "TaskOutletAssetReference", - description: - "Serializable version of the TaskOutletAssetReference ORM SqlAlchemyModel.", + description: "Task outlet reference serializer for assets.", } as const; export const $TaskResponse = { @@ -3940,10 +4225,10 @@ export const $XComResponseNative = { format: "date-time", title: "Timestamp", }, - execution_date: { + logical_date: { type: "string", format: "date-time", - title: "Execution Date", + title: "Logical Date", }, map_index: { type: "integer", @@ -3965,7 +4250,7 @@ export const $XComResponseNative = { required: [ "key", "timestamp", - "execution_date", + "logical_date", "map_index", "task_id", "dag_id", @@ -3986,10 +4271,10 @@ export const $XComResponseString = { format: "date-time", title: "Timestamp", }, - execution_date: { + logical_date: { type: "string", format: "date-time", - title: "Execution Date", + title: "Logical Date", }, map_index: { type: "integer", @@ -4019,7 +4304,7 @@ export const $XComResponseString = { required: [ "key", "timestamp", - "execution_date", + "logical_date", "map_index", "task_id", "dag_id", diff --git a/airflow/ui/openapi-gen/requests/services.gen.ts b/airflow/ui/openapi-gen/requests/services.gen.ts index 4df1042ea9bd..c39dce38d34d 100644 --- a/airflow/ui/openapi-gen/requests/services.gen.ts +++ b/airflow/ui/openapi-gen/requests/services.gen.ts @@ -9,6 +9,8 @@ import type { GetAssetsResponse, GetAssetEventsData, GetAssetEventsResponse, + CreateAssetEventData, + CreateAssetEventResponse, GetAssetData, GetAssetResponse, GetDagAssetQueuedEventsData, @@ -47,6 +49,10 @@ import type { DeleteDagRunResponse, PatchDagRunData, PatchDagRunResponse, + GetUpstreamAssetEventsData, + GetUpstreamAssetEventsResponse, + ClearDagRunData, + ClearDagRunResponse, GetDagSourceData, GetDagSourceResponse, GetDagStatsData, @@ -75,7 +81,6 @@ import type { GetImportErrorResponse, GetImportErrorsData, GetImportErrorsResponse, - GetHealthResponse, GetPluginsData, GetPluginsResponse, DeletePoolData, @@ -102,6 +107,10 @@ import type { GetMappedTaskInstanceResponse, GetTaskInstancesData, GetTaskInstancesResponse, + GetTaskInstancesBatchData, + GetTaskInstancesBatchResponse, + GetTasksData, + GetTasksResponse, GetTaskData, GetTaskResponse, DeleteVariableData, @@ -114,9 +123,10 @@ import type { GetVariablesResponse, PostVariableData, PostVariableResponse, - GetVersionResponse, GetXcomEntryData, GetXcomEntryResponse, + GetHealthResponse, + GetVersionResponse, } from "./types.gen"; export class AssetService { @@ -216,6 +226,31 @@ export class AssetService { }); } + /** + * Create Asset Event + * Create asset events. + * @param data The data for the request. + * @param data.requestBody + * @returns AssetEventResponse Successful Response + * @throws ApiError + */ + public static createAssetEvent( + data: CreateAssetEventData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "POST", + url: "/public/events", + body: data.requestBody, + mediaType: "application/json", + errors: { + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 422: "Validation Error", + }, + }); + } + /** * Get Asset * Get an asset. @@ -767,6 +802,64 @@ export class DagRunService { }, }); } + + /** + * Get Upstream Asset Events + * If dag run is asset-triggered, return the asset events that triggered it. + * @param data The data for the request. + * @param data.dagId + * @param data.dagRunId + * @returns AssetEventCollectionResponse Successful Response + * @throws ApiError + */ + public static getUpstreamAssetEvents( + data: GetUpstreamAssetEventsData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "GET", + url: "/public/dags/{dag_id}/dagRuns/{dag_run_id}/upstreamAssetEvents", + path: { + dag_id: data.dagId, + dag_run_id: data.dagRunId, + }, + errors: { + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 422: "Validation Error", + }, + }); + } + + /** + * Clear Dag Run + * @param data The data for the request. + * @param data.dagId + * @param data.dagRunId + * @param data.requestBody + * @returns unknown Successful Response + * @throws ApiError + */ + public static clearDagRun( + data: ClearDagRunData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "POST", + url: "/public/dags/{dag_id}/dagRuns/{dag_run_id}/clear", + path: { + dag_id: data.dagId, + dag_run_id: data.dagRunId, + }, + body: data.requestBody, + mediaType: "application/json", + errors: { + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 422: "Validation Error", + }, + }); + } } export class DagSourceService { @@ -1237,24 +1330,6 @@ export class ImportErrorService { } } -export class MonitorService { - /** - * Get Health - * @returns HealthInfoSchema Successful Response - * @throws ApiError - */ - public static getHealth(): CancelablePromise { - return __request(OpenAPI, { - method: "GET", - url: "/public/monitor/health", - errors: { - 401: "Unauthorized", - 403: "Forbidden", - }, - }); - } -} - export class PluginService { /** * Get Plugins @@ -1714,9 +1789,65 @@ export class TaskInstanceService { }, }); } + + /** + * Get Task Instances Batch + * Get list of task instances. + * @param data The data for the request. + * @param data.requestBody + * @returns TaskInstanceCollectionResponse Successful Response + * @throws ApiError + */ + public static getTaskInstancesBatch( + data: GetTaskInstancesBatchData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "POST", + url: "/public/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances/list", + body: data.requestBody, + mediaType: "application/json", + errors: { + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 422: "Validation Error", + }, + }); + } } export class TaskService { + /** + * Get Tasks + * Get tasks for DAG. + * @param data The data for the request. + * @param data.dagId + * @param data.orderBy + * @returns TaskCollectionResponse Successful Response + * @throws ApiError + */ + public static getTasks( + data: GetTasksData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "GET", + url: "/public/dags/{dag_id}/tasks/", + path: { + dag_id: data.dagId, + }, + query: { + order_by: data.orderBy, + }, + errors: { + 400: "Bad Request", + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 422: "Validation Error", + }, + }); + } + /** * Get Task * Get simplified representation of a task. @@ -1886,25 +2017,6 @@ export class VariableService { } } -export class VersionService { - /** - * Get Version - * Get version information. - * @returns VersionInfo Successful Response - * @throws ApiError - */ - public static getVersion(): CancelablePromise { - return __request(OpenAPI, { - method: "GET", - url: "/public/version/", - errors: { - 401: "Unauthorized", - 403: "Forbidden", - }, - }); - } -} - export class XcomService { /** * Get Xcom Entry @@ -1947,3 +2059,32 @@ export class XcomService { }); } } + +export class MonitorService { + /** + * Get Health + * @returns HealthInfoSchema Successful Response + * @throws ApiError + */ + public static getHealth(): CancelablePromise { + return __request(OpenAPI, { + method: "GET", + url: "/public/monitor/health", + }); + } +} + +export class VersionService { + /** + * Get Version + * Get version information. + * @returns VersionInfo Successful Response + * @throws ApiError + */ + public static getVersion(): CancelablePromise { + return __request(OpenAPI, { + method: "GET", + url: "/public/version/", + }); + } +} diff --git a/airflow/ui/openapi-gen/requests/types.gen.ts b/airflow/ui/openapi-gen/requests/types.gen.ts index 99b6e98d7cec..96d6b812897f 100644 --- a/airflow/ui/openapi-gen/requests/types.gen.ts +++ b/airflow/ui/openapi-gen/requests/types.gen.ts @@ -22,7 +22,7 @@ export type AppBuilderViewResponse = { }; /** - * Serializable version of the AssetAliasSchema ORM SqlAlchemyModel. + * Asset alias serializer for assets. */ export type AssetAliasSchema = { id: number; @@ -174,6 +174,16 @@ export type ConnectionTestResponse = { message: string; }; +/** + * Create asset events request. + */ +export type CreateAssetEventsBody = { + uri: string; + extra?: { + [key: string]: unknown; + }; +}; + /** * DAG Collection serializer for responses. */ @@ -273,6 +283,13 @@ export type DAGResponse = { readonly file_token: string; }; +/** + * DAG Run serializer for clear endpoint body. + */ +export type DAGRunClearBody = { + dry_run?: boolean; +}; + /** * DAG Run Serializer for PATCH requests. */ @@ -419,7 +436,7 @@ export type DagRunAssetReference = { dag_id: string; logical_date: string; start_date: string; - end_date: string; + end_date: string | null; state: string; data_interval_start: string; data_interval_end: string; @@ -457,7 +474,7 @@ export type DagRunType = | "asset_triggered"; /** - * Serializable version of the DagScheduleAssetReference ORM SqlAlchemyModel. + * DAG schedule reference serializer for assets. */ export type DagScheduleAssetReference = { dag_id: string; @@ -727,6 +744,14 @@ export type SchedulerInfoSchema = { latest_scheduler_heartbeat: string | null; }; +/** + * Task collection serializer for responses. + */ +export type TaskCollectionResponse = { + tasks: Array; + total_entries: number; +}; + /** * Task scheduling dependencies collection serializer for responses. */ @@ -826,7 +851,31 @@ export type TaskInstanceStateCount = { }; /** - * Serializable version of the TaskOutletAssetReference ORM SqlAlchemyModel. + * Task Instance body for get batch. + */ +export type TaskInstancesBatchBody = { + dag_ids?: Array | null; + dag_run_ids?: Array | null; + task_ids?: Array | null; + state?: Array | null; + logical_date_gte?: string | null; + logical_date_lte?: string | null; + start_date_gte?: string | null; + start_date_lte?: string | null; + end_date_gte?: string | null; + end_date_lte?: string | null; + duration_gte?: number | null; + duration_lte?: number | null; + pool?: Array | null; + queue?: Array | null; + executor?: Array | null; + page_offset?: number; + page_limit?: number; + order_by?: string | null; +}; + +/** + * Task outlet reference serializer for assets. */ export type TaskOutletAssetReference = { dag_id: string; @@ -950,7 +999,7 @@ export type VersionInfo = { export type XComResponseNative = { key: string; timestamp: string; - execution_date: string; + logical_date: string; map_index: number; task_id: string; dag_id: string; @@ -963,7 +1012,7 @@ export type XComResponseNative = { export type XComResponseString = { key: string; timestamp: string; - execution_date: string; + logical_date: string; map_index: number; task_id: string; dag_id: string; @@ -1001,6 +1050,12 @@ export type GetAssetEventsData = { export type GetAssetEventsResponse = AssetEventCollectionResponse; +export type CreateAssetEventData = { + requestBody: CreateAssetEventsBody; +}; + +export type CreateAssetEventResponse = AssetEventResponse; + export type GetAssetData = { uri: string; }; @@ -1138,6 +1193,23 @@ export type PatchDagRunData = { export type PatchDagRunResponse = DAGRunResponse; +export type GetUpstreamAssetEventsData = { + dagId: string; + dagRunId: string; +}; + +export type GetUpstreamAssetEventsResponse = AssetEventCollectionResponse; + +export type ClearDagRunData = { + dagId: string; + dagRunId: string; + requestBody: DAGRunClearBody; +}; + +export type ClearDagRunResponse = + | TaskInstanceCollectionResponse + | DAGRunResponse; + export type GetDagSourceData = { accept?: string; fileToken: string; @@ -1265,8 +1337,6 @@ export type GetImportErrorsData = { export type GetImportErrorsResponse = ImportErrorCollectionResponse; -export type GetHealthResponse = HealthInfoSchema; - export type GetPluginsData = { limit?: number; offset?: number; @@ -1401,6 +1471,19 @@ export type GetTaskInstancesData = { export type GetTaskInstancesResponse = TaskInstanceCollectionResponse; +export type GetTaskInstancesBatchData = { + requestBody: TaskInstancesBatchBody; +}; + +export type GetTaskInstancesBatchResponse = TaskInstanceCollectionResponse; + +export type GetTasksData = { + dagId: string; + orderBy?: string; +}; + +export type GetTasksResponse = TaskCollectionResponse; + export type GetTaskData = { dagId: string; taskId: unknown; @@ -1442,8 +1525,6 @@ export type PostVariableData = { export type PostVariableResponse = VariableResponse; -export type GetVersionResponse = VersionInfo; - export type GetXcomEntryData = { dagId: string; dagRunId: string; @@ -1456,6 +1537,10 @@ export type GetXcomEntryData = { export type GetXcomEntryResponse = XComResponseNative | XComResponseString; +export type GetHealthResponse = HealthInfoSchema; + +export type GetVersionResponse = VersionInfo; + export type $OpenApiTs = { "/ui/next_run_assets/{dag_id}": { get: { @@ -1528,6 +1613,33 @@ export type $OpenApiTs = { }; }; }; + "/public/events": { + post: { + req: CreateAssetEventData; + res: { + /** + * Successful Response + */ + 200: AssetEventResponse; + /** + * Unauthorized + */ + 401: HTTPExceptionResponse; + /** + * Forbidden + */ + 403: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + }; "/public/assets/{uri}": { get: { req: GetAssetData; @@ -2029,6 +2141,60 @@ export type $OpenApiTs = { }; }; }; + "/public/dags/{dag_id}/dagRuns/{dag_run_id}/upstreamAssetEvents": { + get: { + req: GetUpstreamAssetEventsData; + res: { + /** + * Successful Response + */ + 200: AssetEventCollectionResponse; + /** + * Unauthorized + */ + 401: HTTPExceptionResponse; + /** + * Forbidden + */ + 403: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + }; + "/public/dags/{dag_id}/dagRuns/{dag_run_id}/clear": { + post: { + req: ClearDagRunData; + res: { + /** + * Successful Response + */ + 200: TaskInstanceCollectionResponse | DAGRunResponse; + /** + * Unauthorized + */ + 401: HTTPExceptionResponse; + /** + * Forbidden + */ + 403: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + }; "/public/dagSources/{file_token}": { get: { req: GetDagSourceData; @@ -2413,24 +2579,6 @@ export type $OpenApiTs = { }; }; }; - "/public/monitor/health": { - get: { - res: { - /** - * Successful Response - */ - 200: HealthInfoSchema; - /** - * Unauthorized - */ - 401: HTTPExceptionResponse; - /** - * Forbidden - */ - 403: HTTPExceptionResponse; - }; - }; - }; "/public/plugins/": { get: { req: GetPluginsData; @@ -2772,6 +2920,64 @@ export type $OpenApiTs = { }; }; }; + "/public/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances/list": { + post: { + req: GetTaskInstancesBatchData; + res: { + /** + * Successful Response + */ + 200: TaskInstanceCollectionResponse; + /** + * Unauthorized + */ + 401: HTTPExceptionResponse; + /** + * Forbidden + */ + 403: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + }; + "/public/dags/{dag_id}/tasks/": { + get: { + req: GetTasksData; + res: { + /** + * Successful Response + */ + 200: TaskCollectionResponse; + /** + * Bad Request + */ + 400: HTTPExceptionResponse; + /** + * Unauthorized + */ + 401: HTTPExceptionResponse; + /** + * Forbidden + */ + 403: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + }; "/public/dags/{dag_id}/tasks/{task_id}": { get: { req: GetTaskData; @@ -2928,24 +3134,6 @@ export type $OpenApiTs = { }; }; }; - "/public/version/": { - get: { - res: { - /** - * Successful Response - */ - 200: VersionInfo; - /** - * Unauthorized - */ - 401: HTTPExceptionResponse; - /** - * Forbidden - */ - 403: HTTPExceptionResponse; - }; - }; - }; "/public/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances/{task_id}/xcomEntries/{xcom_key}": { get: { req: GetXcomEntryData; @@ -2977,4 +3165,24 @@ export type $OpenApiTs = { }; }; }; + "/public/monitor/health": { + get: { + res: { + /** + * Successful Response + */ + 200: HealthInfoSchema; + }; + }; + }; + "/public/version/": { + get: { + res: { + /** + * Successful Response + */ + 200: VersionInfo; + }; + }; + }; }; diff --git a/airflow/ui/src/components/DagRunInfo.tsx b/airflow/ui/src/components/DagRunInfo.tsx index 4cc2f7027370..0d30e9c7667c 100644 --- a/airflow/ui/src/components/DagRunInfo.tsx +++ b/airflow/ui/src/components/DagRunInfo.tsx @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -import { VStack, Text, Box, HStack } from "@chakra-ui/react"; +import { VStack, Text, HStack } from "@chakra-ui/react"; import dayjs from "dayjs"; import type { DAGRunResponse } from "openapi/requests/types.gen"; @@ -24,6 +24,8 @@ import Time from "src/components/Time"; import { Tooltip } from "src/components/ui"; import { stateColor } from "src/utils/stateColor"; +import { StateCircle } from "./StateCircle"; + type Props = { readonly dataIntervalEnd?: string | null; readonly dataIntervalStart?: string | null; @@ -81,13 +83,7 @@ const DagRunInfo = ({