From 9008eb9eb44a655fc95c39d0b7a3aa9fc77fbf05 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Fri, 2 Aug 2024 16:38:31 +0100 Subject: [PATCH 01/12] feat(FIR-34986): can use COPY to create table --- .../create_external_table.sql | 98 ++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/dbt/include/firebolt/macros/dbt_external_tables/create_external_table.sql b/dbt/include/firebolt/macros/dbt_external_tables/create_external_table.sql index cccb7da1e..2bae09264 100644 --- a/dbt/include/firebolt/macros/dbt_external_tables/create_external_table.sql +++ b/dbt/include/firebolt/macros/dbt_external_tables/create_external_table.sql @@ -1,4 +1,12 @@ {% macro firebolt__create_external_table(source_node) %} + {% if source_node.external.strategy == 'copy' %} + {{ firebolt__create_with_copy_from(source_node) }} + {% else %} + {{ firebolt__create_with_external_table(source_node) }} + {% endif %} +{% endmacro %} + +{% macro firebolt__create_with_external_table(source_node) %} {%- set external = source_node.external -%} {%- if 'partitions' in external -%} {%- set columns = adapter.make_field_partition_pairs(source_node.columns.values(), @@ -7,7 +15,6 @@ {%- set columns = adapter.make_field_partition_pairs(source_node.columns.values(), []) -%} {%- endif -%} - -- {%- set partitions = external.partitions -%} {%- set credentials = external.credentials -%} {# Leaving out "IF NOT EXISTS" because this should only be called by if no DROP IF is necessary. #} @@ -39,3 +46,92 @@ {%- if external.compression -%} COMPRESSION = {{external.compression}} {%- endif %} TYPE = {{ external.type }} {% endmacro %} + +{% macro firebolt__create_with_copy_from(source_node) %} + {# COPY FROM is only available in Firebolt 2.0. #} + {%- set external = source_node.external -%} + {%- set credentials = external.credentials -%} + {%- set options = external.options -%} + {%- set csv_options = options.csv_options -%} + {%- set error_file_credentials = options.error_file_credentials -%} + + {# There are no partitions, but this formats the columns correctly. #} + {%- if 'partitions' in external -%} + {%- set columns = adapter.make_field_partition_pairs(source_node.columns.values(), + external.partitions) -%} + {%- else -%} + {%- set columns = adapter.make_field_partition_pairs(source_node.columns.values(), + []) -%} + {%- endif -%} + COPY INTO {{source(source_node.source_name, source_node.name)}} + {%- if columns and columns | length > 0 %} + ( + {%- for column in columns -%} + {{ column.name }} + {%- if column.default is not none %} DEFAULT {{ column.default }}{% endif %} + {%- if column.source_column_name is not none %} {{ '$' ~ loop.index0 }}{% endif %} + {{- ',' if not loop.last }} + {%- endfor -%} + ) + {%- endif %} + FROM '{{external.url}}' + {%- if options %} + WITH + {%- if options.object_pattern %} + PATTERN = '{{options.object_pattern}}' + {%- endif %} + {%- if options.type %} + TYPE = {{ options.type }} + {%- endif %} + {%- if options.auto_create is not none %} + AUTO_CREATE = {{ options.auto_create | upper }} + {%- endif %} + {%- if options.allow_column_mismatch is not none %} + ALLOW_COLUMN_MISMATCH = {{ options.allow_column_mismatch | upper }} + {%- endif %} + {%- if options.error_file %} + ERROR_FILE = '{{ options.error_file }}' + {%- endif %} + {%- if error_file_credentials %} + ERROR_FILE_CREDENTIALS = (AWS_KEY_ID = '{{ error_file_credentials.aws_key_id }}' AWS_SECRET_KEY = '{{ error_file_credentials.aws_secret_key }}') + {%- endif %} + {%- if options.max_errors_per_file %} + MAX_ERRORS_PER_FILE = {{ options.max_errors_per_file }} + {%- endif %} + {%- if csv_options %} + {%- if csv_options.header is not none %} + HEADER = {{ csv_options.header | upper }} + {%- endif %} + {%- if csv_options.delimiter %} + DELIMITER = '{{ csv_options.delimiter }}' + {%- endif %} + {%- if csv_options.newline %} + NEWLINE = '{{ csv_options.newline }}' + {%- endif %} + {%- if csv_options.quote %} + QUOTE = {{ csv_options.quote }} + {%- endif %} + {%- if csv_options.escape %} + ESCAPE = '{{ csv_options.escape }}' + {%- endif %} + {%- if csv_options.null_string %} + NULL_STRING = '{{ csv_options.null_string }}' + {%- endif %} + {%- if csv_options.empty_field_as_null is not none %} + EMPTY_FIELD_AS_NULL = {{ csv_options.empty_field_as_null | upper }} + {%- endif %} + {%- if csv_options.skip_blank_lines is not none %} + SKIP_BLANK_LINES = {{ csv_options.skip_blank_lines | upper }} + {%- endif %} + {%- if csv_options.date_format %} + DATE_FORMAT = '{{ csv_options.date_format }}' + {%- endif %} + {%- if csv_options.timestamp_format %} + TIMESTAMP_FORMAT = '{{ csv_options.timestamp_format }}' + {%- endif %} + {%- endif %} + {%- endif %} + {%- if credentials %} + CREDENTIALS = (AWS_KEY_ID = '{{credentials.aws_key_id}}' AWS_SECRET_KEY = '{{credentials.aws_secret_key}}') + {%- endif %} +{% endmacro %} From b9796f3bec0ca97254317f82231fd2bcc0c9ac3b Mon Sep 17 00:00:00 2001 From: ptiurin Date: Fri, 2 Aug 2024 16:52:27 +0100 Subject: [PATCH 02/12] add test --- .../jaffle_shop/run_test_workflow.sh | 3 ++ .../sources_external_tables_copy.yml | 29 +++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 .github/workflows/jaffle_shop/sources_external_tables_copy.yml diff --git a/.github/workflows/jaffle_shop/run_test_workflow.sh b/.github/workflows/jaffle_shop/run_test_workflow.sh index 65915ae25..335e6a876 100755 --- a/.github/workflows/jaffle_shop/run_test_workflow.sh +++ b/.github/workflows/jaffle_shop/run_test_workflow.sh @@ -10,6 +10,9 @@ if [[ -n "$AWS_ACCESS_ROLE_ARN" ]]; then # Can't test this on FB 1.0 cp ../dbt-firebolt/.github/workflows/jaffle_shop/sources_external_tables_iam.yml models/staging/sources_external_tables.yml dbt run-operation stage_external_sources --vars "ext_full_refresh: true" + # Test COPY INTO + cp ../dbt-firebolt/.github/workflows/jaffle_shop/sources_external_tables_copy.yml models/staging/sources_external_tables.yml + dbt run-operation stage_external_sources --vars "ext_full_refresh: true" fi dbt seed dbt seed --full-refresh diff --git a/.github/workflows/jaffle_shop/sources_external_tables_copy.yml b/.github/workflows/jaffle_shop/sources_external_tables_copy.yml new file mode 100644 index 000000000..92c8a6df4 --- /dev/null +++ b/.github/workflows/jaffle_shop/sources_external_tables_copy.yml @@ -0,0 +1,29 @@ +version: 2 + +sources: + - name: s3 + tables: + - name: raw_customers + external: + strategy: copy + url: "{{ env_var('SECURE_BUCKET_PATH') }}" + credentials: + aws_key_id: "{{ env_var('AWS_ACCESS_KEY_ID') }}" + aws_secret_key: "{{ env_var('AWS_SECRET_ACCESS_KEY') }}" + options: + object_pattern: '*raw_customers.csv' + type: CSV + auto_create: true + allow_column_mismatch: false + max_errors_per_file: 10 + csv_options: + header: true + delimiter: ',' + quote: DOUBLE_QUOTE + escape: '\' + null_string: '\\N' + empty_field_as_null: true + skip_blank_lines: true + date_format: 'YYYY-MM-DD' + timestamp_format: 'YYYY-MM-DD HH24:MI:SS' + From 9d9c398f8055edd570d2eff22f0b369296dc8bec Mon Sep 17 00:00:00 2001 From: ptiurin Date: Fri, 2 Aug 2024 16:57:57 +0100 Subject: [PATCH 03/12] fix env var --- .github/workflows/jaffle_shop/sources_external_tables_copy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/jaffle_shop/sources_external_tables_copy.yml b/.github/workflows/jaffle_shop/sources_external_tables_copy.yml index 92c8a6df4..97dbcd9b8 100644 --- a/.github/workflows/jaffle_shop/sources_external_tables_copy.yml +++ b/.github/workflows/jaffle_shop/sources_external_tables_copy.yml @@ -9,7 +9,7 @@ sources: url: "{{ env_var('SECURE_BUCKET_PATH') }}" credentials: aws_key_id: "{{ env_var('AWS_ACCESS_KEY_ID') }}" - aws_secret_key: "{{ env_var('AWS_SECRET_ACCESS_KEY') }}" + aws_secret_key: "{{ env_var('AWS_ACCESS_SECRET_KEY') }}" options: object_pattern: '*raw_customers.csv' type: CSV From 58402254daf6e1f9e3ef7a3a3616544f8c13e77a Mon Sep 17 00:00:00 2001 From: ptiurin Date: Tue, 13 Aug 2024 16:50:13 +0100 Subject: [PATCH 04/12] fix schema --- dbt/include/firebolt/macros/adapters.sql | 6 ++---- dbt/include/firebolt/macros/catalog.sql | 4 +--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/dbt/include/firebolt/macros/adapters.sql b/dbt/include/firebolt/macros/adapters.sql index fb5b5e984..9a3fc6cd2 100644 --- a/dbt/include/firebolt/macros/adapters.sql +++ b/dbt/include/firebolt/macros/adapters.sql @@ -171,12 +171,10 @@ SELECT table_catalog AS "database", table_name AS "name", - table_schema AS "schema", + '{{ relation.schema }}' AS "schema", CASE - WHEN table_type = 'BASE TABLE' THEN 'table' - WHEN table_type = 'DIMENSION' THEN 'table' - WHEN table_type = 'FACT' THEN 'table' WHEN table_type = 'VIEW' THEN 'view' + ELSE 'table' END AS "type" FROM information_schema.tables diff --git a/dbt/include/firebolt/macros/catalog.sql b/dbt/include/firebolt/macros/catalog.sql index a212e95c6..b09f1020c 100644 --- a/dbt/include/firebolt/macros/catalog.sql +++ b/dbt/include/firebolt/macros/catalog.sql @@ -11,10 +11,8 @@ the columns (for instance, `is_nullable` is missing) but more could be added lat cols.column_name as column_name, cols.data_type AS column_type, CASE - WHEN table_type = 'BASE TABLE' THEN 'TABLE' - WHEN table_type = 'DIMENSION' THEN 'TABLE' - WHEN table_type = 'FACT' THEN 'TABLE' WHEN table_type = 'VIEW' THEN 'VIEW' + ELSE 'TABLE' END AS relation_type, cols.ordinal_position as column_index FROM From 5763b0eb03458f4500fc196a9f62aeebe0e5dd2f Mon Sep 17 00:00:00 2001 From: ptiurin Date: Tue, 13 Aug 2024 17:49:52 +0100 Subject: [PATCH 05/12] test profiles fix --- .github/workflows/jaffle_shop/profiles.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/jaffle_shop/profiles.yml b/.github/workflows/jaffle_shop/profiles.yml index 38ea5f7e6..2690f5ec2 100644 --- a/.github/workflows/jaffle_shop/profiles.yml +++ b/.github/workflows/jaffle_shop/profiles.yml @@ -3,7 +3,7 @@ seeds: jaffle_shop: target: app outputs: - app: &app + app: type: firebolt api_endpoint: "{{ env_var('API_ENDPOINT') }}" account_name: "{{ env_var('ACCOUNT_NAME') }}" From f67b53c4f461678e143ea7db63d39aaa54fd8498 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Tue, 13 Aug 2024 17:57:06 +0100 Subject: [PATCH 06/12] install fix --- .github/workflows/jaffle-shop-v2.yml | 11 +++++------ .github/workflows/jaffle_shop/run_test_workflow.sh | 2 +- setup.cfg | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/jaffle-shop-v2.yml b/.github/workflows/jaffle-shop-v2.yml index 2738a66d8..839dd36eb 100644 --- a/.github/workflows/jaffle-shop-v2.yml +++ b/.github/workflows/jaffle-shop-v2.yml @@ -23,14 +23,14 @@ jobs: path: jaffle-shop - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: 3.8 - name: Install dependencies run: | - python -m pip install --upgrade pip - python -m pip install "dbt-firebolt/.[dev]" + cd dbt-firebolt + python -m pip install dbt-core -e . - name: Setup database and engine id: setup @@ -41,7 +41,6 @@ jobs: account: ${{ vars.FIREBOLT_ACCOUNT }} api-endpoint: "api.staging.firebolt.io" - - name: Run Jaffle Shop test workflow env: USER_NAME: ${{ secrets.FIREBOLT_CLIENT_ID_STG_NEW_IDN }} @@ -56,5 +55,5 @@ jobs: AWS_ACCESS_ROLE_ARN: ${{ secrets.AWS_ACCESS_ROLE_ARN }} DBT_PROFILES_DIR: "../dbt-firebolt/.github/workflows/jaffle_shop" working-directory: jaffle-shop - run: - ../dbt-firebolt/.github/workflows/jaffle_shop/run_test_workflow.sh + run: | + source ../dbt-firebolt/.github/workflows/jaffle_shop/run_test_workflow.sh diff --git a/.github/workflows/jaffle_shop/run_test_workflow.sh b/.github/workflows/jaffle_shop/run_test_workflow.sh index 335e6a876..65a305368 100755 --- a/.github/workflows/jaffle_shop/run_test_workflow.sh +++ b/.github/workflows/jaffle_shop/run_test_workflow.sh @@ -1,6 +1,6 @@ set -xe -dbt debug dbt clean +dbt debug dbt deps dbt compile dbt run-operation stage_external_sources diff --git a/setup.cfg b/setup.cfg index d7e106246..5ae829402 100644 --- a/setup.cfg +++ b/setup.cfg @@ -22,7 +22,7 @@ project_urls = [options] packages = find_namespace: install_requires = - dbt-core~=1.6,<1.8 + dbt-core~=1.6,<1.7 firebolt-sdk>=1.1.0 pydantic>=0.23 python_requires = >=3.8 From eb0336eeef33d0847bee171fcd7d7d5bbd87be23 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Wed, 14 Aug 2024 16:58:36 +0100 Subject: [PATCH 07/12] remove source --- .github/workflows/jaffle-shop-v2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/jaffle-shop-v2.yml b/.github/workflows/jaffle-shop-v2.yml index 839dd36eb..1acbdca7d 100644 --- a/.github/workflows/jaffle-shop-v2.yml +++ b/.github/workflows/jaffle-shop-v2.yml @@ -56,4 +56,4 @@ jobs: DBT_PROFILES_DIR: "../dbt-firebolt/.github/workflows/jaffle_shop" working-directory: jaffle-shop run: | - source ../dbt-firebolt/.github/workflows/jaffle_shop/run_test_workflow.sh + ../dbt-firebolt/.github/workflows/jaffle_shop/run_test_workflow.sh From c56b5cf8d3886c9cbde101a50a2d21169804324f Mon Sep 17 00:00:00 2001 From: ptiurin Date: Wed, 14 Aug 2024 17:02:17 +0100 Subject: [PATCH 08/12] revert not relevant changes --- .github/workflows/jaffle_shop/profiles.yml | 2 +- .github/workflows/jaffle_shop/run_test_workflow.sh | 2 +- setup.cfg | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/jaffle_shop/profiles.yml b/.github/workflows/jaffle_shop/profiles.yml index 2690f5ec2..38ea5f7e6 100644 --- a/.github/workflows/jaffle_shop/profiles.yml +++ b/.github/workflows/jaffle_shop/profiles.yml @@ -3,7 +3,7 @@ seeds: jaffle_shop: target: app outputs: - app: + app: &app type: firebolt api_endpoint: "{{ env_var('API_ENDPOINT') }}" account_name: "{{ env_var('ACCOUNT_NAME') }}" diff --git a/.github/workflows/jaffle_shop/run_test_workflow.sh b/.github/workflows/jaffle_shop/run_test_workflow.sh index 65a305368..335e6a876 100755 --- a/.github/workflows/jaffle_shop/run_test_workflow.sh +++ b/.github/workflows/jaffle_shop/run_test_workflow.sh @@ -1,6 +1,6 @@ set -xe -dbt clean dbt debug +dbt clean dbt deps dbt compile dbt run-operation stage_external_sources diff --git a/setup.cfg b/setup.cfg index 5ae829402..d7e106246 100644 --- a/setup.cfg +++ b/setup.cfg @@ -22,7 +22,7 @@ project_urls = [options] packages = find_namespace: install_requires = - dbt-core~=1.6,<1.7 + dbt-core~=1.6,<1.8 firebolt-sdk>=1.1.0 pydantic>=0.23 python_requires = >=3.8 From cadddb148e32c436d572e0130a4e213db79588f3 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Wed, 14 Aug 2024 17:17:32 +0100 Subject: [PATCH 09/12] standardise --- .github/workflows/jaffle-shop-v1.yml | 3 ++- .github/workflows/jaffle-shop-v2.yml | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/jaffle-shop-v1.yml b/.github/workflows/jaffle-shop-v1.yml index 45022b604..117b923da 100644 --- a/.github/workflows/jaffle-shop-v1.yml +++ b/.github/workflows/jaffle-shop-v1.yml @@ -30,7 +30,8 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install "dbt-firebolt/.[dev]" + cd dbt-firebolt + python -m pip install dbt-core -e . - name: Setup database and engine id: setup diff --git a/.github/workflows/jaffle-shop-v2.yml b/.github/workflows/jaffle-shop-v2.yml index 1acbdca7d..18d6497e1 100644 --- a/.github/workflows/jaffle-shop-v2.yml +++ b/.github/workflows/jaffle-shop-v2.yml @@ -29,6 +29,7 @@ jobs: - name: Install dependencies run: | + python -m pip install --upgrade pip cd dbt-firebolt python -m pip install dbt-core -e . From 188ddbb3d5f25cd1598eab4394756870935fddbd Mon Sep 17 00:00:00 2001 From: ptiurin Date: Wed, 14 Aug 2024 17:17:56 +0100 Subject: [PATCH 10/12] v5 --- .github/workflows/jaffle-shop-v1.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/jaffle-shop-v1.yml b/.github/workflows/jaffle-shop-v1.yml index 117b923da..b13e54474 100644 --- a/.github/workflows/jaffle-shop-v1.yml +++ b/.github/workflows/jaffle-shop-v1.yml @@ -23,7 +23,7 @@ jobs: path: jaffle-shop - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: 3.8 From 361011e81577e14d9b4f3304afc8f55a0fa5a763 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Wed, 14 Aug 2024 17:25:47 +0100 Subject: [PATCH 11/12] changie --- .changes/unreleased/Added-20240814-172459.yaml | 3 +++ .changes/unreleased/Fixed-20240814-172534.yaml | 3 +++ 2 files changed, 6 insertions(+) create mode 100644 .changes/unreleased/Added-20240814-172459.yaml create mode 100644 .changes/unreleased/Fixed-20240814-172534.yaml diff --git a/.changes/unreleased/Added-20240814-172459.yaml b/.changes/unreleased/Added-20240814-172459.yaml new file mode 100644 index 000000000..ef69df3f4 --- /dev/null +++ b/.changes/unreleased/Added-20240814-172459.yaml @@ -0,0 +1,3 @@ +kind: Added +body: Added a way to use COPY FROM command as an alternative to EXTERNAL TABLE. +time: 2024-08-14T17:24:59.573298+01:00 diff --git a/.changes/unreleased/Fixed-20240814-172534.yaml b/.changes/unreleased/Fixed-20240814-172534.yaml new file mode 100644 index 000000000..738f6afa6 --- /dev/null +++ b/.changes/unreleased/Fixed-20240814-172534.yaml @@ -0,0 +1,3 @@ +kind: Fixed +body: Fixed seed full refresh resolution. +time: 2024-08-14T17:25:34.986273+01:00 From f470604c3533b09d712fb3a5cb5548d3b533e334 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Wed, 14 Aug 2024 17:33:43 +0100 Subject: [PATCH 12/12] -e in integration tests --- .github/workflows/integration-tests-v2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration-tests-v2.yml b/.github/workflows/integration-tests-v2.yml index 943d4e046..ebfc23cff 100644 --- a/.github/workflows/integration-tests-v2.yml +++ b/.github/workflows/integration-tests-v2.yml @@ -24,7 +24,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install ".[dev]" --no-cache-dir + python -m pip install -e ".[dev]" --no-cache-dir - name: Setup database and engine id: setup