From 8a7d4ff65bfe07c27883a4d6ed9381d1fd2d2756 Mon Sep 17 00:00:00 2001 From: Julien Kervizic Date: Tue, 5 Sep 2023 13:18:57 +0200 Subject: [PATCH 1/8] feat: add support for S3 Extra arguments in Seed file upload to s3 --- README.md | 1 + dbt/adapters/athena/connections.py | 2 ++ dbt/adapters/athena/impl.py | 3 ++- dbt/include/athena/macros/materializations/seeds/helpers.sql | 2 ++ dbt/include/athena/profile_template.yml | 3 +++ 5 files changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 15b5a6c9..1fc955aa 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,7 @@ A dbt profile can be configured to run against AWS Athena using the following co | aws_profile_name | Profile to use from your AWS shared credentials file. | Optional | `my-profile` | | work_group | Identifier of Athena workgroup | Optional | `my-custom-workgroup` | | num_retries | Number of times to retry a failing query | Optional | `3` | +| extra_args | Dictionary containing boto3 ExtraArgs when uploading to S3 | Optional | `{"ACL": "bucket-owner-full-control"}`| **Example profiles.yml entry:** ```yaml diff --git a/dbt/adapters/athena/connections.py b/dbt/adapters/athena/connections.py index 89363d92..82db0ec0 100644 --- a/dbt/adapters/athena/connections.py +++ b/dbt/adapters/athena/connections.py @@ -59,6 +59,7 @@ class AthenaCredentials(Credentials): num_retries: Optional[int] = 5 s3_data_dir: Optional[str] = None s3_data_naming: Optional[str] = "schema_table_unique" + extra_args: Optional[Dict[str, Any]] = None @property def type(self) -> str: @@ -83,6 +84,7 @@ def _connection_keys(self) -> Tuple[str, ...]: "s3_data_dir", "s3_data_naming", "debug_query_state", + "extra_args" ) diff --git a/dbt/adapters/athena/impl.py b/dbt/adapters/athena/impl.py index 48b56dab..dcd1f8cd 100755 --- a/dbt/adapters/athena/impl.py +++ b/dbt/adapters/athena/impl.py @@ -297,6 +297,7 @@ def upload_seed_to_s3( s3_data_dir: Optional[str] = None, s3_data_naming: Optional[str] = None, external_location: Optional[str] = None, + extra_args: Optional[Dict[str, Any]] = None ) -> str: conn = self.connections.get_thread_connection() client = conn.handle @@ -315,7 +316,7 @@ def upload_seed_to_s3( # This ensures cross-platform support, tempfile.NamedTemporaryFile does not tmpfile = os.path.join(tempfile.gettempdir(), os.urandom(24).hex()) table.to_csv(tmpfile, quoting=csv.QUOTE_NONNUMERIC) - s3_client.upload_file(tmpfile, bucket, object_name) + s3_client.upload_file(tmpfile, bucket, object_name, ExtraArgs=extra_args) os.remove(tmpfile) return str(s3_location) diff --git a/dbt/include/athena/macros/materializations/seeds/helpers.sql b/dbt/include/athena/macros/materializations/seeds/helpers.sql index 8664568a..2beb895e 100644 --- a/dbt/include/athena/macros/materializations/seeds/helpers.sql +++ b/dbt/include/athena/macros/materializations/seeds/helpers.sql @@ -96,6 +96,7 @@ {%- set s3_data_dir = config.get('s3_data_dir', default=target.s3_data_dir) -%} {%- set s3_data_naming = config.get('s3_data_naming', target.s3_data_naming) -%} {%- set external_location = config.get('external_location', default=none) -%} + {%- set extra_args = config.get('extra_args', default=target.extra_args) -%} {%- set tmp_relation = api.Relation.create( identifier=identifier + "__dbt_tmp", @@ -110,6 +111,7 @@ s3_data_dir, s3_data_naming, external_location, + extra_args=extra_args ) -%} -- create target relation diff --git a/dbt/include/athena/profile_template.yml b/dbt/include/athena/profile_template.yml index 87be52ed..9ae0ab58 100644 --- a/dbt/include/athena/profile_template.yml +++ b/dbt/include/athena/profile_template.yml @@ -17,6 +17,9 @@ prompts: hint: Specify the database (Data catalog) to build models into (lowercase only) default: awsdatacatalog + extra_args: + hint: Specify any extra arguments to use in the S3 Upload, e.g. ACL, SSEKMSKeyId + threads: hint: '1 or more' type: 'int' From 17026f968c10dbeeb2e5425bc20e315bb74f4f15 Mon Sep 17 00:00:00 2001 From: Julien Kervizic Date: Mon, 11 Sep 2023 08:22:04 +0200 Subject: [PATCH 2/8] feat: add support for S3 Extra arguments in Seed file upload to s3 - change parameter name --- README.md | 4 +++- dbt/adapters/athena/connections.py | 4 ++-- dbt/adapters/athena/impl.py | 4 ++-- dbt/include/athena/macros/materializations/seeds/helpers.sql | 4 ++-- dbt/include/athena/profile_template.yml | 2 +- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 1fc955aa..89b5fdd5 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ A dbt profile can be configured to run against AWS Athena using the following co | aws_profile_name | Profile to use from your AWS shared credentials file. | Optional | `my-profile` | | work_group | Identifier of Athena workgroup | Optional | `my-custom-workgroup` | | num_retries | Number of times to retry a failing query | Optional | `3` | -| extra_args | Dictionary containing boto3 ExtraArgs when uploading to S3 | Optional | `{"ACL": "bucket-owner-full-control"}`| +| seed_s3_upload_args | Dictionary containing boto3 ExtraArgs when uploading to S3 | Optional | `{"ACL": "bucket-owner-full-control"}`| **Example profiles.yml entry:** ```yaml @@ -105,6 +105,8 @@ athena: database: awsdatacatalog aws_profile_name: my-profile work_group: my-workgroup + seed_s3_upload_args: + ACL: bucket-owner-full-control ``` _Additional information_ diff --git a/dbt/adapters/athena/connections.py b/dbt/adapters/athena/connections.py index 82db0ec0..87682de0 100644 --- a/dbt/adapters/athena/connections.py +++ b/dbt/adapters/athena/connections.py @@ -59,7 +59,7 @@ class AthenaCredentials(Credentials): num_retries: Optional[int] = 5 s3_data_dir: Optional[str] = None s3_data_naming: Optional[str] = "schema_table_unique" - extra_args: Optional[Dict[str, Any]] = None + seed_s3_upload_args: Optional[Dict[str, Any]] = None @property def type(self) -> str: @@ -84,7 +84,7 @@ def _connection_keys(self) -> Tuple[str, ...]: "s3_data_dir", "s3_data_naming", "debug_query_state", - "extra_args" + "seed_s3_upload_args", ) diff --git a/dbt/adapters/athena/impl.py b/dbt/adapters/athena/impl.py index dcd1f8cd..1103002d 100755 --- a/dbt/adapters/athena/impl.py +++ b/dbt/adapters/athena/impl.py @@ -297,7 +297,7 @@ def upload_seed_to_s3( s3_data_dir: Optional[str] = None, s3_data_naming: Optional[str] = None, external_location: Optional[str] = None, - extra_args: Optional[Dict[str, Any]] = None + seed_s3_upload_args: Optional[Dict[str, Any]] = None, ) -> str: conn = self.connections.get_thread_connection() client = conn.handle @@ -316,7 +316,7 @@ def upload_seed_to_s3( # This ensures cross-platform support, tempfile.NamedTemporaryFile does not tmpfile = os.path.join(tempfile.gettempdir(), os.urandom(24).hex()) table.to_csv(tmpfile, quoting=csv.QUOTE_NONNUMERIC) - s3_client.upload_file(tmpfile, bucket, object_name, ExtraArgs=extra_args) + s3_client.upload_file(tmpfile, bucket, object_name, ExtraArgs=seed_s3_upload_args) os.remove(tmpfile) return str(s3_location) diff --git a/dbt/include/athena/macros/materializations/seeds/helpers.sql b/dbt/include/athena/macros/materializations/seeds/helpers.sql index 2beb895e..c256bf59 100644 --- a/dbt/include/athena/macros/materializations/seeds/helpers.sql +++ b/dbt/include/athena/macros/materializations/seeds/helpers.sql @@ -96,7 +96,7 @@ {%- set s3_data_dir = config.get('s3_data_dir', default=target.s3_data_dir) -%} {%- set s3_data_naming = config.get('s3_data_naming', target.s3_data_naming) -%} {%- set external_location = config.get('external_location', default=none) -%} - {%- set extra_args = config.get('extra_args', default=target.extra_args) -%} + {%- set seed_s3_upload_args = config.get('seed_s3_upload_args', default=target.seed_s3_upload_args) -%} {%- set tmp_relation = api.Relation.create( identifier=identifier + "__dbt_tmp", @@ -111,7 +111,7 @@ s3_data_dir, s3_data_naming, external_location, - extra_args=extra_args + extra_args=seed_s3_upload_args ) -%} -- create target relation diff --git a/dbt/include/athena/profile_template.yml b/dbt/include/athena/profile_template.yml index 9ae0ab58..72d94710 100644 --- a/dbt/include/athena/profile_template.yml +++ b/dbt/include/athena/profile_template.yml @@ -17,7 +17,7 @@ prompts: hint: Specify the database (Data catalog) to build models into (lowercase only) default: awsdatacatalog - extra_args: + seed_s3_upload_args: hint: Specify any extra arguments to use in the S3 Upload, e.g. ACL, SSEKMSKeyId threads: From e1aa1019447639f39ec9c54063a9631cf80d09da Mon Sep 17 00:00:00 2001 From: Julien Kervizic Date: Mon, 11 Sep 2023 08:22:57 +0200 Subject: [PATCH 3/8] feat: add support for S3 Extra arguments in Seed file upload to s3 - add boto3-stubs to dev-requirements --- dev-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/dev-requirements.txt b/dev-requirements.txt index 1070e5fc..03b3b907 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -12,3 +12,4 @@ pytest-cov~=4.1 pytest-dotenv~=0.5 pytest-xdist~=3.3 pyupgrade~=3.10 +boto3-stubs[s3]~=1.28 \ No newline at end of file From 73d09780c31186f11cab8a8842f282326faa5da8 Mon Sep 17 00:00:00 2001 From: Julien Kervizic Date: Mon, 11 Sep 2023 08:23:45 +0200 Subject: [PATCH 4/8] feat: add support for S3 Extra arguments in Seed file upload to s3 - format README markdown table --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 89b5fdd5..47bfd54c 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ A dbt profile can be configured to run against AWS Athena using the following co | aws_profile_name | Profile to use from your AWS shared credentials file. | Optional | `my-profile` | | work_group | Identifier of Athena workgroup | Optional | `my-custom-workgroup` | | num_retries | Number of times to retry a failing query | Optional | `3` | -| seed_s3_upload_args | Dictionary containing boto3 ExtraArgs when uploading to S3 | Optional | `{"ACL": "bucket-owner-full-control"}`| +| seed_s3_upload_args | Dictionary containing boto3 ExtraArgs when uploading to S3 | Optional | `{"ACL": "bucket-owner-full-control"}` | **Example profiles.yml entry:** ```yaml From 735f34445c1dae47b8a6eb7082ead43136d9d243 Mon Sep 17 00:00:00 2001 From: Julien Kervizic Date: Mon, 11 Sep 2023 08:28:37 +0200 Subject: [PATCH 5/8] feat: add support for S3 Extra arguments in Seed file upload to s3 - change parameter name --- dbt/include/athena/macros/materializations/seeds/helpers.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/include/athena/macros/materializations/seeds/helpers.sql b/dbt/include/athena/macros/materializations/seeds/helpers.sql index c256bf59..93e374bc 100644 --- a/dbt/include/athena/macros/materializations/seeds/helpers.sql +++ b/dbt/include/athena/macros/materializations/seeds/helpers.sql @@ -111,7 +111,7 @@ s3_data_dir, s3_data_naming, external_location, - extra_args=seed_s3_upload_args + seed_s3_upload_args=seed_s3_upload_args ) -%} -- create target relation From 15dbbdb5652bf4382ef4bc5556450bf8264d6948 Mon Sep 17 00:00:00 2001 From: Julien Kervizic Date: Mon, 11 Sep 2023 09:34:30 +0200 Subject: [PATCH 6/8] feat: add support for S3 Extra arguments in Seed file upload to s3 - dev requirements --- dev-requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 03b3b907..d451b571 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -12,4 +12,5 @@ pytest-cov~=4.1 pytest-dotenv~=0.5 pytest-xdist~=3.3 pyupgrade~=3.10 -boto3-stubs[s3]~=1.28 \ No newline at end of file +boto3-stubs[s3]~=1.28 + From ce03885655259d67eed14fac527d99548d86ba70 Mon Sep 17 00:00:00 2001 From: Serhii Dimchenko <39801237+svdimchenko@users.noreply.github.com> Date: Mon, 11 Sep 2023 09:37:33 +0200 Subject: [PATCH 7/8] Update dev-requirements.txt --- dev-requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index f631cbd4..716d29c8 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -13,4 +13,3 @@ pytest-dotenv~=0.5 pytest-xdist~=3.3 pyupgrade~=3.10 boto3-stubs[s3]~=1.28 - From d1d10fd821b083ab9db69c78af123bcca9359592 Mon Sep 17 00:00:00 2001 From: Serhii Dimchenko <39801237+svdimchenko@users.noreply.github.com> Date: Mon, 11 Sep 2023 09:41:13 +0200 Subject: [PATCH 8/8] Update dev-requirements.txt --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 03a9bcbf..e992c3fc 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,6 @@ autoflake~=1.7 black~=23.9 +boto3-stubs[s3]~=1.28 dbt-tests-adapter~=1.6.2 flake8~=6.1 Flake8-pyproject~=1.2 @@ -12,4 +13,3 @@ pytest-cov~=4.1 pytest-dotenv~=0.5 pytest-xdist~=3.3 pyupgrade~=3.10 -boto3-stubs[s3]~=1.28