From f1caac7cf66add2a881eadebb2b9ae0f52965f80 Mon Sep 17 00:00:00 2001 From: Njuguna Ndung'u Date: Thu, 26 Sep 2024 14:24:44 +0300 Subject: [PATCH 1/8] feat: upload document_metadata in batches if var is set --- models/root/document_metadata.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/models/root/document_metadata.sql b/models/root/document_metadata.sql index d3718e3a..e58a546c 100644 --- a/models/root/document_metadata.sql +++ b/models/root/document_metadata.sql @@ -19,6 +19,8 @@ SELECT saved_timestamp, doc->>'type' as doc_type from {{ source('couchdb', env_var('POSTGRES_TABLE')) }} source_table -{% if is_incremental() %} -WHERE source_table.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }} +{% if var('start_timestamp') is not none and var('end_timestamp' is not none)%} + WHERE source_table.saved_timestamp >= {{ var('start_timestamp') }} AND source_table.saved_timestamp <= {{ var('end_timestamp') }} +{% elif is_incremental() %} + WHERE source_table.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }} {% endif %} From 7e304f63cacec3078058c4529319d2283f3f1e48 Mon Sep 17 00:00:00 2001 From: Njuguna Ndung'u Date: Fri, 27 Sep 2024 13:50:27 +0300 Subject: [PATCH 2/8] refactor: try an alternative batching technique --- models/root/document_metadata.sql | 34 ++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/models/root/document_metadata.sql b/models/root/document_metadata.sql index e58a546c..4a7b2d23 100644 --- a/models/root/document_metadata.sql +++ b/models/root/document_metadata.sql @@ -13,14 +13,28 @@ ) }} -SELECT - _id as uuid, - _deleted, - saved_timestamp, - doc->>'type' as doc_type -from {{ source('couchdb', env_var('POSTGRES_TABLE')) }} source_table -{% if var('start_timestamp') is not none and var('end_timestamp' is not none)%} - WHERE source_table.saved_timestamp >= {{ var('start_timestamp') }} AND source_table.saved_timestamp <= {{ var('end_timestamp') }} -{% elif is_incremental() %} - WHERE source_table.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }} +WITH source_table AS ( + SELECT + _id as uuid, + _deleted, + saved_timestamp, + doc->>'type' as doc_type + from {{ source('couchdb', env_var('POSTGRES_TABLE')) }} +) + +{% if var('start_timestamp') is not none and var('batch_size' is not none) %} + WITH batched_data AS ( + SELECT * + FROM source_table + WHERE saved_timestamp >= {{ var('start_timestamp') }} + ORDER BY saved_timestamp + LIMIT {{ var('batch_size') }} + ) +{% else %} + + SELECT * + from {{ source('couchdb', env_var('POSTGRES_TABLE')) }} source_table + {% if is_incremental() %} + WHERE source_table.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }} + {% endif %} {% endif %} From 63377caf7448413bdec98ccdfeedd83453b371ee Mon Sep 17 00:00:00 2001 From: Njuguna Ndung'u Date: Fri, 27 Sep 2024 18:11:12 +0300 Subject: [PATCH 3/8] chore: update batch query --- models/root/document_metadata.sql | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/models/root/document_metadata.sql b/models/root/document_metadata.sql index 4a7b2d23..53d9267e 100644 --- a/models/root/document_metadata.sql +++ b/models/root/document_metadata.sql @@ -19,10 +19,10 @@ WITH source_table AS ( _deleted, saved_timestamp, doc->>'type' as doc_type - from {{ source('couchdb', env_var('POSTGRES_TABLE')) }} + FROM {{ source('couchdb', env_var('POSTGRES_TABLE')) }} ) -{% if var('start_timestamp') is not none and var('batch_size' is not none) %} +{% if {{ var("start_timestamp") }} is not none and {{ var("batch_size") }} is not none) %} WITH batched_data AS ( SELECT * FROM source_table @@ -32,8 +32,12 @@ WITH source_table AS ( ) {% else %} - SELECT * - from {{ source('couchdb', env_var('POSTGRES_TABLE')) }} source_table + SELECT + _id as uuid, + _deleted, + saved_timestamp, + doc->>'type' as doc_type + FROM {{ source('couchdb', env_var('POSTGRES_TABLE')) }} source_table {% if is_incremental() %} WHERE source_table.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }} {% endif %} From a23f55c6be39965fa664b78b63650f890ba6fb67 Mon Sep 17 00:00:00 2001 From: Njuguna Ndung'u Date: Fri, 27 Sep 2024 18:12:39 +0300 Subject: [PATCH 4/8] fix: typo --- models/root/document_metadata.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/root/document_metadata.sql b/models/root/document_metadata.sql index 53d9267e..af7506b2 100644 --- a/models/root/document_metadata.sql +++ b/models/root/document_metadata.sql @@ -22,7 +22,7 @@ WITH source_table AS ( FROM {{ source('couchdb', env_var('POSTGRES_TABLE')) }} ) -{% if {{ var("start_timestamp") }} is not none and {{ var("batch_size") }} is not none) %} +{% if {{ var("start_timestamp") }} is not none and {{ var("batch_size") }} is not none %} WITH batched_data AS ( SELECT * FROM source_table From 5ea363ee573a97c2b436062a24cebfe57034bc00 Mon Sep 17 00:00:00 2001 From: Njuguna Ndung'u Date: Sun, 29 Sep 2024 14:46:05 +0300 Subject: [PATCH 5/8] refactor: try a different way of using vars --- models/root/document_metadata.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/root/document_metadata.sql b/models/root/document_metadata.sql index af7506b2..7a51bf82 100644 --- a/models/root/document_metadata.sql +++ b/models/root/document_metadata.sql @@ -22,7 +22,7 @@ WITH source_table AS ( FROM {{ source('couchdb', env_var('POSTGRES_TABLE')) }} ) -{% if {{ var("start_timestamp") }} is not none and {{ var("batch_size") }} is not none %} +{% if var("start_timestamp") is not none and var("batch_size") is not none %} WITH batched_data AS ( SELECT * FROM source_table From e2a8408b3582e28f00fced27864493445391e57b Mon Sep 17 00:00:00 2001 From: Njuguna Ndung'u Date: Sun, 29 Sep 2024 15:11:29 +0300 Subject: [PATCH 6/8] fix: CTE naming --- models/root/document_metadata.sql | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/models/root/document_metadata.sql b/models/root/document_metadata.sql index 7a51bf82..58f61681 100644 --- a/models/root/document_metadata.sql +++ b/models/root/document_metadata.sql @@ -13,7 +13,7 @@ ) }} -WITH source_table AS ( +WITH source_table_CTE AS ( SELECT _id as uuid, _deleted, @@ -23,13 +23,11 @@ WITH source_table AS ( ) {% if var("start_timestamp") is not none and var("batch_size") is not none %} - WITH batched_data AS ( - SELECT * - FROM source_table - WHERE saved_timestamp >= {{ var('start_timestamp') }} - ORDER BY saved_timestamp - LIMIT {{ var('batch_size') }} - ) + SELECT * + FROM source_table_CTE + WHERE saved_timestamp >= {{ var('start_timestamp') }} + ORDER BY saved_timestamp + LIMIT {{ var('batch_size') }} {% else %} SELECT From 5d10c4fcbf426005c6a4acaad7ac6022a4abcb8f Mon Sep 17 00:00:00 2001 From: Njuguna Ndung'u Date: Sun, 29 Sep 2024 15:25:47 +0300 Subject: [PATCH 7/8] fix: upadte how variable is used --- models/root/document_metadata.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/root/document_metadata.sql b/models/root/document_metadata.sql index 58f61681..4c7f0b47 100644 --- a/models/root/document_metadata.sql +++ b/models/root/document_metadata.sql @@ -25,7 +25,7 @@ WITH source_table_CTE AS ( {% if var("start_timestamp") is not none and var("batch_size") is not none %} SELECT * FROM source_table_CTE - WHERE saved_timestamp >= {{ var('start_timestamp') }} + WHERE saved_timestamp >= '{{ var("start_timestamp") }}' ORDER BY saved_timestamp LIMIT {{ var('batch_size') }} {% else %} From 1d8e368d72bb7b0395e220602e2f3ed4c4fe8f97 Mon Sep 17 00:00:00 2001 From: Njuguna Ndung'u Date: Mon, 7 Oct 2024 11:42:56 +0300 Subject: [PATCH 8/8] chore: add variables to dbt_project file --- dbt_project.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbt_project.yml b/dbt_project.yml index b2ecd57d..81487611 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -7,3 +7,7 @@ profile: 'default' on-run-end: - "{{ log_dbt_results(results) }}" + +vars: + start_timestamp: null + batch_size: null