From 46ff35bc87a349f64d97f2b4b66c40445afb5701 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Mon, 17 Feb 2025 21:05:00 -0800 Subject: [PATCH 01/15] beginning to add new company object --- integration_tests/dbt_project.yml | 1 + .../get_hubspot_deal_company_columns.sql | 11 +++ .../int_rag_hubspot__company_document.sql | 73 +++++++++++++++++++ .../hubspot_staging/src_rag_hubspot.yml | 13 ++++ .../stg_rag_hubspot__deal_company.sql | 46 ++++++++++++ 5 files changed, 144 insertions(+) create mode 100644 macros/staging/hubspot/get_hubspot_deal_company_columns.sql create mode 100644 models/intermediate/hubspot/int_rag_hubspot__company_document.sql create mode 100644 models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index fef72d6..5ffc2cd 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -21,6 +21,7 @@ vars: rag_hubspot_engagement_company_identifier: "hubspot_engagement_company" rag_hubspot_engagement_contact_identifier: "hubspot_engagement_contact" rag_hubspot_engagement_deal_identifier: "hubspot_engagement_deal" + rag_hubspot_engagement_deal_company: "hubspot_deal_company" rag_hubspot_company_identifier: "hubspot_company" rag_hubspot_contact_identifier: "hubspot_contact" rag_hubspot_owner_identifier: "hubspot_owner" diff --git a/macros/staging/hubspot/get_hubspot_deal_company_columns.sql b/macros/staging/hubspot/get_hubspot_deal_company_columns.sql new file mode 100644 index 0000000..aa8199c --- /dev/null +++ b/macros/staging/hubspot/get_hubspot_deal_company_columns.sql @@ -0,0 +1,11 @@ +{% macro get_hubspot_deal_company_columns() %} + +{% set columns = [ + {"name": "_fivetran_synced", "datatype": dbt.type_timestamp()}, + {"name": "deal_id", "datatype": dbt.type_int()}, + {"name": "company_id", "datatype": dbt.type_int()} +] %} + +{{ return(columns) }} + +{% endmacro %} \ No newline at end of file diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql new file mode 100644 index 0000000..5ea18e2 --- /dev/null +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -0,0 +1,73 @@ +WITH owners AS ( + SELECT + *, + COALESCE( + email, + 'UNKNOWN' + ) AS safe_email, + COALESCE( + first_name, + '' + ) AS safe_first_name, + COALESCE( + last_name, + '' + ) AS safe_last_name + FROM + {{ ref('stg_rag_hubspot__owner') }} +), +deals AS ( + SELECT + *, + COALESCE({{ cast('property_closedate', dbt.type_string()) }}, 'not closed yet') AS safe_close_date + FROM + {{ ref('stg_rag_hubspot__deal') }} +), +company AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__company') }} +), +deal_company AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__deal_company') }} +), +deal_descriptions AS ( + SELECT + DISTINCT deal.deal_id, + {{ dbt.concat([ "'- {'", "'deal_name: '", "deals.property_dealname", "', '", "'deal_owner_name: '", "owners.safe_first_name", "' '", "owners.safe_last_name", "', '", "'deal_owner_email: '", "owners.safe_email", "', '", "'deal_closed_date: '", "deals.safe_close_date", "'}'" ]) }} AS deal_description, + deal.property_closedate + FROM + deals + JOIN owners + ON owners.owner_id = deal.owner_id +), +company_with_deal_description AS ( + SELECT + id, + {{ dbt.listagg( + measure = "dd.deal_description", + delimiter_text = "'\\n'", + order_by_clause = "order by dd.property_closedate" + ) }} AS deal_descriptions + FROM + company + JOIN deal_company dc + ON dc.company_id = company.id + JOIN deal_descriptions dd + ON dd.deal_id = dc.deal_id + GROUP BY + 1 +) +SELECT + cdd.deal_descriptions, + company.* +FROM + company + JOIN company_with_deal_description cdd + ON cdd.id = company.id +WHERE + NOT company._fivetran_deleted diff --git a/models/staging/hubspot_staging/src_rag_hubspot.yml b/models/staging/hubspot_staging/src_rag_hubspot.yml index 72a42bf..9d00152 100644 --- a/models/staging/hubspot_staging/src_rag_hubspot.yml +++ b/models/staging/hubspot_staging/src_rag_hubspot.yml @@ -269,3 +269,16 @@ sources: description: The type of owner. - name: updated_at description: Timestamp representing when the owner was last updated. + + - name: deal_company + identifier: "{{ var('rag_hubspot_deal_company_identifier', 'deal_company')}}" + description: Each record represents a 'link' between a deal and a company. + config: + enabled: "{{ var('rag_hubspot_sales_enabled', true) and var('rag_hubspot_company_enabled', true) and var('rag_hubspot_deal_enabled', true) }}" + columns: + - name: _fivetran_synced + description: '{{ doc("_fivetran_synced") }}' + - name: deal_id + description: The ID of the related contact. + - name: company + description: The ID of the related company. \ No newline at end of file diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql b/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql new file mode 100644 index 0000000..4ec431e --- /dev/null +++ b/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql @@ -0,0 +1,46 @@ +{{ config(enabled=var('rag__using_hubspot', True)) }} + +with base as ( + + {{ + fivetran_utils.union_data( + table_identifier='deal_company', + database_variable='rag_hubspot_database', + schema_variable='rag_hubspot_schema', + default_database=target.database, + default_schema='rag_hubspot', + default_variable='hubspot_deal_company', + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases' + ) + }} +), + +fields as ( + + select + {{ + fivetran_utils.fill_staging_columns( + source_columns=adapter.get_columns_in_relation(source('rag_hubspot','deal_company')), + staging_columns=get_hubspot_deal_company_columns() + ) + }} + + {{ fivetran_utils.source_relation( + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases') + }} + from base +), + +final as ( + + select + deal_id, + company_id, + source_relation + from fields +) + +select * +from final From 0ae873d681559ea6d371e1d231019db382fa2e95 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Tue, 18 Feb 2025 09:26:12 -0800 Subject: [PATCH 02/15] added working company document --- dbt_project.yml | 2 +- .../hubspot/get_hubspot_company_columns.sql | 3 +- .../int_rag_hubspot__company_document.sql | 37 ++++---- .../stg_rag_hubspot__company.sql | 88 ++++++------------- .../stg_rag_hubspot__company_fields.sql | 30 +++++++ .../stg_rag_hubspot__owner.sql | 2 +- 6 files changed, 84 insertions(+), 78 deletions(-) create mode 100644 models/staging/hubspot_staging/stg_rag_hubspot__company_fields.sql diff --git a/dbt_project.yml b/dbt_project.yml index 9d16e0f..7509c0f 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -35,7 +35,7 @@ models: unified_rag: +schema: unified_rag intermediate: - +materialized: ephemeral + +materialized: view unstructured: +materialized: view staging: diff --git a/macros/staging/hubspot/get_hubspot_company_columns.sql b/macros/staging/hubspot/get_hubspot_company_columns.sql index 9966be6..211e32e 100644 --- a/macros/staging/hubspot/get_hubspot_company_columns.sql +++ b/macros/staging/hubspot/get_hubspot_company_columns.sql @@ -13,7 +13,8 @@ {"name": "property_city", "datatype": dbt.type_string(), "alias": "city"}, {"name": "property_state", "datatype": dbt.type_string(), "alias": "state"}, {"name": "property_country", "datatype": dbt.type_string(), "alias": "country"}, - {"name": "property_annualrevenue", "datatype": dbt.type_int(), "alias": "company_annual_revenue"} + {"name": "property_annualrevenue", "datatype": dbt.type_int(), "alias": "company_annual_revenue"}, + {"name": "portal_id", "datatype": dbt.type_int()} ] %} {{ return(columns) }} diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql index 5ea18e2..fc01132 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -2,7 +2,7 @@ WITH owners AS ( SELECT *, COALESCE( - email, + owner_email, 'UNKNOWN' ) AS safe_email, COALESCE( @@ -19,7 +19,7 @@ WITH owners AS ( deals AS ( SELECT *, - COALESCE({{ cast('property_closedate', dbt.type_string()) }}, 'not closed yet') AS safe_close_date + COALESCE({{ cast('closed_date', dbt.type_string()) }}, 'not closed yet') AS safe_close_date FROM {{ ref('stg_rag_hubspot__deal') }} ), @@ -37,37 +37,42 @@ deal_company AS ( ), deal_descriptions AS ( SELECT - DISTINCT deal.deal_id, - {{ dbt.concat([ "'- {'", "'deal_name: '", "deals.property_dealname", "', '", "'deal_owner_name: '", "owners.safe_first_name", "' '", "owners.safe_last_name", "', '", "'deal_owner_email: '", "owners.safe_email", "', '", "'deal_closed_date: '", "deals.safe_close_date", "'}'" ]) }} AS deal_description, - deal.property_closedate + DISTINCT deals.deal_id, + deals.source_relation, + {{ dbt.concat([ "' - {'", "'deal_name: '", "deals.title", "' // '", "'deal_owner_name: '", "owners.safe_first_name", "' '", "owners.safe_last_name", "' // '", "'deal_owner_email: '", "owners.safe_email", "' // '", "'deal_closed_date: '", "deals.safe_close_date", "'}'" ]) }} AS deal_description, + deals.closed_date FROM deals JOIN owners - ON owners.owner_id = deal.owner_id + ON owners.owner_id = deals.owner_id + AND owners.source_relation = deals.source_relation ), company_with_deal_description AS ( SELECT - id, + company.company_id AS company_id, + company.source_relation AS source_relation, {{ dbt.listagg( measure = "dd.deal_description", delimiter_text = "'\\n'", - order_by_clause = "order by dd.property_closedate" + order_by_clause = "order by dd.closed_date" ) }} AS deal_descriptions FROM company - JOIN deal_company dc - ON dc.company_id = company.id - JOIN deal_descriptions dd + LEFT JOIN deal_company dc + ON dc.company_id = company.company_id + AND dc.source_relation = company.source_relation + LEFT JOIN deal_descriptions dd ON dd.deal_id = dc.deal_id + AND dc.source_relation = dd.source_relation GROUP BY - 1 + 1, + 2 ) SELECT - cdd.deal_descriptions, + cdd.deal_descriptions AS deals, company.* FROM company JOIN company_with_deal_description cdd - ON cdd.id = company.id -WHERE - NOT company._fivetran_deleted + ON cdd.company_id = company.company_id + AND cdd.source_relation = company.source_relation diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__company.sql b/models/staging/hubspot_staging/stg_rag_hubspot__company.sql index a1b3970..ca37f04 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot__company.sql +++ b/models/staging/hubspot_staging/stg_rag_hubspot__company.sql @@ -1,59 +1,29 @@ -{{ config(enabled=var('rag__using_hubspot', True)) }} - -with base as ( - - {{ - fivetran_utils.union_data( - table_identifier='company', - database_variable='rag_hubspot_database', - schema_variable='rag_hubspot_schema', - default_database=target.database, - default_schema='rag_hubspot', - default_variable='hubspot_company', - union_schema_variable='rag_hubspot_union_schemas', - union_database_variable='rag_hubspot_union_databases' - ) - }} -), - -fields as ( - - select - {{ - fivetran_utils.fill_staging_columns( - source_columns=adapter.get_columns_in_relation(source('rag_hubspot','company')), - staging_columns=get_hubspot_company_columns() - ) - }} - - {{ fivetran_utils.source_relation( - union_schema_variable='rag_hubspot_union_schemas', - union_database_variable='rag_hubspot_union_databases') - }} - from base -), - -final as ( - - select - company_id, - source_relation, - is_company_deleted, - cast(_fivetran_synced as {{ dbt.type_timestamp() }}) as _fivetran_synced, - company_name, - description, - created_date, - industry, - street_address, - street_address_2, - city, - state, - country, - company_annual_revenue - - from fields - -) - -select * -from final \ No newline at end of file +{{ config(enabled = var('rag__using_hubspot', True)) }} + +WITH FINAL AS ( + + SELECT + {{ dbt_utils.star( + from = ref('stg_rag_hubspot__company_fields'), + except = ['id', '_fivetran_synced', 'is_deleted', 'property_name', 'property_description', 'property_createdate', 'property_industry', 'property_address', 'property_address_2', 'property_city', 'property_state', 'property_country', 'property_annualrevenue' ] + ) }}, + id AS company_id, + CAST(_fivetran_synced AS {{ dbt.type_timestamp() }}) AS _fivetran_synced, + is_deleted AS is_company_deleted, + property_name AS company_name, + property_description AS description, + property_createdate AS created_date, + property_industry AS industry, + property_address AS street_address, + property_address_2 AS street_address_2, + property_city AS city, + property_state AS state, + property_country AS country, + property_annualrevenue AS company_annual_revenue + FROM + {{ ref('stg_rag_hubspot__company_fields') }} +) +SELECT + * +FROM + FINAL diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__company_fields.sql b/models/staging/hubspot_staging/stg_rag_hubspot__company_fields.sql new file mode 100644 index 0000000..a2632cc --- /dev/null +++ b/models/staging/hubspot_staging/stg_rag_hubspot__company_fields.sql @@ -0,0 +1,30 @@ +{{ config(enabled=var('rag__using_hubspot', True)) }} + +with base as ( + + {{ + fivetran_utils.union_data( + table_identifier='company', + database_variable='rag_hubspot_database', + schema_variable='rag_hubspot_schema', + default_database=target.database, + default_schema='rag_hubspot', + default_variable='hubspot_company', + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases' + ) + }} +), + +fields as ( + + select + * + {{ fivetran_utils.source_relation( + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases') + }} + from base +) + +select * from fields \ No newline at end of file diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__owner.sql b/models/staging/hubspot_staging/stg_rag_hubspot__owner.sql index fa06618..62a5175 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot__owner.sql +++ b/models/staging/hubspot_staging/stg_rag_hubspot__owner.sql @@ -21,7 +21,7 @@ fields as ( select {{ fivetran_utils.fill_staging_columns( - source_columns=adapter.get_columns_in_relation(source('rag_hubspot','contact')), + source_columns=adapter.get_columns_in_relation(source('rag_hubspot','owner')), staging_columns=get_hubspot_owner_columns() ) }} From cd24a991c12ac14a63dd8e67d83ffeeb4933a730 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Tue, 18 Feb 2025 10:02:18 -0800 Subject: [PATCH 03/15] remove unnecessary changes --- dbt_project.yml | 2 +- macros/staging/hubspot/get_hubspot_company_columns.sql | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dbt_project.yml b/dbt_project.yml index 7509c0f..9d16e0f 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -35,7 +35,7 @@ models: unified_rag: +schema: unified_rag intermediate: - +materialized: view + +materialized: ephemeral unstructured: +materialized: view staging: diff --git a/macros/staging/hubspot/get_hubspot_company_columns.sql b/macros/staging/hubspot/get_hubspot_company_columns.sql index 211e32e..9966be6 100644 --- a/macros/staging/hubspot/get_hubspot_company_columns.sql +++ b/macros/staging/hubspot/get_hubspot_company_columns.sql @@ -13,8 +13,7 @@ {"name": "property_city", "datatype": dbt.type_string(), "alias": "city"}, {"name": "property_state", "datatype": dbt.type_string(), "alias": "state"}, {"name": "property_country", "datatype": dbt.type_string(), "alias": "country"}, - {"name": "property_annualrevenue", "datatype": dbt.type_int(), "alias": "company_annual_revenue"}, - {"name": "portal_id", "datatype": dbt.type_int()} + {"name": "property_annualrevenue", "datatype": dbt.type_int(), "alias": "company_annual_revenue"} ] %} {{ return(columns) }} From 3686c1d7f0030d24b7adb451e2cd094667bdc4d2 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Tue, 18 Feb 2025 11:46:15 -0800 Subject: [PATCH 04/15] make it a left join for Deal --- models/unstructured/rag_hubspot__document.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/unstructured/rag_hubspot__document.sql b/models/unstructured/rag_hubspot__document.sql index ece1210..c7349ed 100644 --- a/models/unstructured/rag_hubspot__document.sql +++ b/models/unstructured/rag_hubspot__document.sql @@ -26,10 +26,10 @@ final as ( {{ dbt.concat([ "deal_document.comment_markdown", "'\\n\\n## COMMENTS\\n\\n'", - "grouped.comments_group_markdown"]) }} + "coalesce(grouped.comments_group_markdown, '')"]) }} as chunk from deal_document - join grouped + left join grouped on grouped.deal_id = deal_document.deal_id and grouped.source_relation = deal_document.source_relation ) From 17e782bed721821cf2d8366e5ac0cc2f2aed3099 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Wed, 19 Feb 2025 11:32:32 -0800 Subject: [PATCH 05/15] fix bug and add owners to Deal --- .../hubspot/int_rag_hubspot__deal_document.sql | 17 ++++++++++++++--- models/unstructured/rag_hubspot__document.sql | 2 +- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql index e9927aa..4c3445f 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql @@ -41,6 +41,11 @@ engagement_deals as ( from {{ ref('stg_rag_hubspot__engagement_deal') }} ), +owners AS ( + select * + from {{ ref('stg_rag_hubspot__owner') }} +), + engagement_detail_prep as ( select @@ -52,7 +57,8 @@ engagement_detail_prep as ( {{ unified_rag.coalesce_cast(["contacts.contact_name", "'UNKNOWN'"], dbt.type_string()) }} as contact_name, {{ unified_rag.coalesce_cast(["contacts.email", "'UNKNOWN'"], dbt.type_string()) }} as created_by, {{ unified_rag.coalesce_cast(["companies.company_name", "'UNKNOWN'"], dbt.type_string()) }} as company_name, - {{ unified_rag.coalesce_cast(["deals.created_date", "'1970-01-01 00:00:00'"], dbt.type_timestamp()) }} AS created_on + {{ unified_rag.coalesce_cast(["deals.created_date", "'1970-01-01 00:00:00'"], dbt.type_timestamp()) }} AS created_on, + {{ dbt.concat(["coalesce(owners.first_name, '')", "' '", "coalesce(owners.last_name, '')", "' ('", "coalesce(owners.owner_email, '')", "')'"]) }} AS owner_details from deals left join engagement_deals on deals.deal_id = engagement_deals.deal_id @@ -72,6 +78,9 @@ engagement_detail_prep as ( left join companies on engagement_companies.company_id = companies.company_id and engagement_companies.source_relation = companies.source_relation + left join owners + on deals.owner_id = owners.owner_id + and deals.source_relation = owners.source_relation ), engagement_details as ( @@ -84,7 +93,8 @@ engagement_details as ( {{ fivetran_utils.string_agg(field_to_agg="distinct engagement_type", delimiter="', '") }} as engagement_type, {{ fivetran_utils.string_agg(field_to_agg="distinct contact_name", delimiter="', '") }} as contact_name, {{ fivetran_utils.string_agg(field_to_agg="distinct created_by", delimiter="', '") }} as created_by, - {{ fivetran_utils.string_agg(field_to_agg="distinct company_name", delimiter="', '") }} as company_name + {{ fivetran_utils.string_agg(field_to_agg="distinct company_name", delimiter="', '") }} as company_name, + {{ fivetran_utils.string_agg(field_to_agg="distinct owner_details", delimiter="', '") }} as owner_details from engagement_detail_prep group by 1,2,3,4,5 ), @@ -101,7 +111,8 @@ engagement_markdown as ( "'Created By : '", "contact_name", "' ('", "created_by", "')\\n'", "'Created On : '", "created_on", "'\\n'", "'Company Name: '", "company_name", "'\\n'", - "'Engagement Type: '", "engagement_type", "'\\n'" + "'Engagement Type: '", "engagement_type", "'\\n'", + "'Deal Owner: '", "owner_details", "'\\n'" ]) }} as {{ dbt.type_string() }}) as comment_markdown from engagement_details ), diff --git a/models/unstructured/rag_hubspot__document.sql b/models/unstructured/rag_hubspot__document.sql index c7349ed..dd23856 100644 --- a/models/unstructured/rag_hubspot__document.sql +++ b/models/unstructured/rag_hubspot__document.sql @@ -21,7 +21,7 @@ final as ( 'hubspot' as platform, deal_document.source_relation, grouped.most_recent_chunk_update, - grouped.chunk_index, + coalesce(grouped.chunk_index, 0) as chunk_index, grouped.chunk_tokens as chunk_tokens_approximate, {{ dbt.concat([ "deal_document.comment_markdown", From ad595234aecbe86c8539d13f9543f5396fdac81c Mon Sep 17 00:00:00 2001 From: abhijeethp Date: Sat, 22 Feb 2025 13:21:51 -0800 Subject: [PATCH 06/15] enable on flag --- .../intermediate/hubspot/int_rag_hubspot__company_document.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql index fc01132..ea28e0a 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -1,3 +1,5 @@ +{{ config(enabled=var('rag__using_hubspot', True)) }} + WITH owners AS ( SELECT *, From 886e9a1533da837a476bdebdd8885316af916432 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Wed, 5 Mar 2025 14:13:59 -0800 Subject: [PATCH 07/15] add company ids to deal documents --- models/intermediate/hubspot/int_rag_hubspot__deal_document.sql | 3 +++ models/unstructured/rag_hubspot__document.sql | 1 + 2 files changed, 4 insertions(+) diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql index 4c3445f..a26ad97 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql @@ -56,6 +56,7 @@ engagement_detail_prep as ( deals.source_relation, {{ unified_rag.coalesce_cast(["contacts.contact_name", "'UNKNOWN'"], dbt.type_string()) }} as contact_name, {{ unified_rag.coalesce_cast(["contacts.email", "'UNKNOWN'"], dbt.type_string()) }} as created_by, + {{ unified_rag.coalesce_cast(["companies.company_id", "'UNKNOWN'"], dbt.type_string()) }} as company_id, {{ unified_rag.coalesce_cast(["companies.company_name", "'UNKNOWN'"], dbt.type_string()) }} as company_name, {{ unified_rag.coalesce_cast(["deals.created_date", "'1970-01-01 00:00:00'"], dbt.type_timestamp()) }} AS created_on, {{ dbt.concat(["coalesce(owners.first_name, '')", "' '", "coalesce(owners.last_name, '')", "' ('", "coalesce(owners.owner_email, '')", "')'"]) }} AS owner_details @@ -94,6 +95,7 @@ engagement_details as ( {{ fivetran_utils.string_agg(field_to_agg="distinct contact_name", delimiter="', '") }} as contact_name, {{ fivetran_utils.string_agg(field_to_agg="distinct created_by", delimiter="', '") }} as created_by, {{ fivetran_utils.string_agg(field_to_agg="distinct company_name", delimiter="', '") }} as company_name, + {{ fivetran_utils.string_agg(field_to_agg="distinct company_id", delimiter="', '") }} as company_ids, {{ fivetran_utils.string_agg(field_to_agg="distinct owner_details", delimiter="', '") }} as owner_details from engagement_detail_prep group by 1,2,3,4,5 @@ -106,6 +108,7 @@ engagement_markdown as ( title, source_relation, url_reference, + company_ids, cast( {{ dbt.concat([ "'Deal Name : '", "title", "'\\n\\n'", "'Created By : '", "contact_name", "' ('", "created_by", "')\\n'", diff --git a/models/unstructured/rag_hubspot__document.sql b/models/unstructured/rag_hubspot__document.sql index dd23856..af3e8a2 100644 --- a/models/unstructured/rag_hubspot__document.sql +++ b/models/unstructured/rag_hubspot__document.sql @@ -18,6 +18,7 @@ final as ( cast(deal_document.deal_id as {{ dbt.type_string() }}) as document_id, coalesce(deal_document.title, grouped.title) as title, deal_document.url_reference, + deal_document.company_ids, 'hubspot' as platform, deal_document.source_relation, grouped.most_recent_chunk_update, From 4cbff831c044a80676767e862f00278eaeff5af2 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 13:11:45 -0800 Subject: [PATCH 08/15] more iteration --- macros/utility/create_json.sql | 26 +++++++++++++++++++ .../int_rag_hubspot__company_document.sql | 15 ++++++----- 2 files changed, 34 insertions(+), 7 deletions(-) create mode 100644 macros/utility/create_json.sql diff --git a/macros/utility/create_json.sql b/macros/utility/create_json.sql new file mode 100644 index 0000000..b4e9cff --- /dev/null +++ b/macros/utility/create_json.sql @@ -0,0 +1,26 @@ +{% macro create_json(columns) %} + {%- if not execute -%} + {%- set json_function = { + 'bigquery': 'TO_JSON_STRING', + 'snowflake': 'OBJECT_CONSTRUCT', + 'redshift': 'json_build_object', + 'databricks': 'to_json' + }[target.type] -%} + {%- set json_expression = json_function + '(' -%} + {%- for column in columns -%} + {%- set json_expression = json_expression + "'" + column + "', " + column -%} + {%- if not loop.last -%} + {%- set json_expression = json_expression + ', ' -%} + {%- endif -%} + {%- endfor -%} + {%- set json_expression = json_expression + ')' -%} + + {%- if target.type == 'snowflake' -%} + CAST({{ json_expression }} AS STRING) + {%- elif target.type == 'redshift' -%} + {{ json_expression }}::VARCHAR + {%- else -%} + {{ json_expression }} + {%- endif -%} + {%- endif -%} +{% endmacro %} diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql index ea28e0a..95e6be0 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -39,15 +39,16 @@ deal_company AS ( ), deal_descriptions AS ( SELECT - DISTINCT deals.deal_id, - deals.source_relation, - {{ dbt.concat([ "' - {'", "'deal_name: '", "deals.title", "' // '", "'deal_owner_name: '", "owners.safe_first_name", "' '", "owners.safe_last_name", "' // '", "'deal_owner_email: '", "owners.safe_email", "' // '", "'deal_closed_date: '", "deals.safe_close_date", "'}'" ]) }} AS deal_description, - deals.closed_date + DISTINCT deal_id, + source_relation, + safe_close_date AS closed_date, + --{{ dbt.concat([ "' - {'", "'deal_name: '", "deals.title", "' // '", "'deal_owner_name: '", "owners.safe_first_name", "' '", "owners.safe_last_name", "' // '", "'deal_owner_email: '", "owners.safe_email", "' // '", "'deal_closed_date: '", "deals.safe_close_date", "'}'" ]) }} AS deal_description, + {{ create_json(['deal_id', 'title', 'safe_close_date']) }} AS deal_description FROM deals - JOIN owners - ON owners.owner_id = deals.owner_id - AND owners.source_relation = deals.source_relation + --JOIN owners + --ON owners.owner_id = deals.owner_id + --AND owners.source_relation = deals.source_relation ), company_with_deal_description AS ( SELECT From 6514a72c12a360136112fc52096cdd167c64ce05 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 13:19:08 -0800 Subject: [PATCH 09/15] fix json macro bug --- macros/utility/create_json.sql | 42 ++++++++++++++++------------------ 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/macros/utility/create_json.sql b/macros/utility/create_json.sql index b4e9cff..6801b04 100644 --- a/macros/utility/create_json.sql +++ b/macros/utility/create_json.sql @@ -1,26 +1,24 @@ {% macro create_json(columns) %} - {%- if not execute -%} - {%- set json_function = { - 'bigquery': 'TO_JSON_STRING', - 'snowflake': 'OBJECT_CONSTRUCT', - 'redshift': 'json_build_object', - 'databricks': 'to_json' - }[target.type] -%} - {%- set json_expression = json_function + '(' -%} - {%- for column in columns -%} - {%- set json_expression = json_expression + "'" + column + "', " + column -%} - {%- if not loop.last -%} - {%- set json_expression = json_expression + ', ' -%} - {%- endif -%} - {%- endfor -%} - {%- set json_expression = json_expression + ')' -%} - - {%- if target.type == 'snowflake' -%} - CAST({{ json_expression }} AS STRING) - {%- elif target.type == 'redshift' -%} - {{ json_expression }}::VARCHAR - {%- else -%} - {{ json_expression }} + {%- set json_function = { + 'bigquery': 'TO_JSON_STRING', + 'snowflake': 'OBJECT_CONSTRUCT', + 'redshift': 'json_build_object', + 'databricks': 'to_json' + }[target.type] -%} + {%- set json_expression = json_function + '(' -%} + {%- for column in columns -%} + {%- set json_expression = json_expression + "'" + column + "', " + column -%} + {%- if not loop.last -%} + {%- set json_expression = json_expression + ', ' -%} {%- endif -%} + {%- endfor -%} + {%- set json_expression = json_expression + ')' -%} + + {%- if target.type == 'snowflake' -%} + CAST({{ json_expression }} AS STRING) + {%- elif target.type == 'redshift' -%} + {{ json_expression }}::VARCHAR + {%- else -%} + {{ json_expression }} {%- endif -%} {% endmacro %} From ab87a84c16831fcc74ab7c744a7e7afcd7dc6ede Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 13:39:00 -0800 Subject: [PATCH 10/15] concat into json list --- .../hubspot/int_rag_hubspot__company_document.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql index 95e6be0..45495b4 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -54,11 +54,11 @@ company_with_deal_description AS ( SELECT company.company_id AS company_id, company.source_relation AS source_relation, - {{ dbt.listagg( - measure = "dd.deal_description", - delimiter_text = "'\\n'", - order_by_clause = "order by dd.closed_date" - ) }} AS deal_descriptions + {{ dbt.concat([ + "'['", + dbt.listagg("dd.deal_description", "','", "order by dd.closed_date"), + "']'" + ]) }} AS deal_descriptions FROM company LEFT JOIN deal_company dc From 121163de82d70fe1b08e9d40ce9f22cd4d811a6d Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 13:50:12 -0800 Subject: [PATCH 11/15] try to fix json macro --- macros/utility/create_json.sql | 62 +++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/macros/utility/create_json.sql b/macros/utility/create_json.sql index 6801b04..b76218c 100644 --- a/macros/utility/create_json.sql +++ b/macros/utility/create_json.sql @@ -1,24 +1,38 @@ -{% macro create_json(columns) %} - {%- set json_function = { - 'bigquery': 'TO_JSON_STRING', - 'snowflake': 'OBJECT_CONSTRUCT', - 'redshift': 'json_build_object', - 'databricks': 'to_json' - }[target.type] -%} - {%- set json_expression = json_function + '(' -%} - {%- for column in columns -%} - {%- set json_expression = json_expression + "'" + column + "', " + column -%} - {%- if not loop.last -%} - {%- set json_expression = json_expression + ', ' -%} - {%- endif -%} - {%- endfor -%} - {%- set json_expression = json_expression + ')' -%} - - {%- if target.type == 'snowflake' -%} - CAST({{ json_expression }} AS STRING) - {%- elif target.type == 'redshift' -%} - {{ json_expression }}::VARCHAR - {%- else -%} - {{ json_expression }} - {%- endif -%} -{% endmacro %} +{% macro create_json(columns) -%} + {% if target.type == 'bigquery' -%} + TO_JSON_STRING( + STRUCT( + {%- for column in columns -%} + {{ column }} AS {{ column }} + {%- if not loop.last -%}, {% endif -%} + {%- endfor -%} + ) + ) + {% elif target.type == 'snowflake' -%} + CAST( + OBJECT_CONSTRUCT( + {%- for column in columns -%} + '{{ column }}', {{ column }} + {%- if not loop.last -%}, {% endif -%} + {%- endfor -%} + ) + AS STRING + ) + {% elif target.type == 'redshift' -%} + json_build_object( + {%- for column in columns -%} + '{{ column }}', {{ column }} + {%- if not loop.last -%}, {% endif -%} + {%- endfor -%} + )::VARCHAR + {% elif target.type == 'databricks' -%} + to_json( + named_struct( + {%- for column in columns -%} + '{{ column }}', {{ column }} + {%- if not loop.last -%}, {% endif -%} + {%- endfor -%} + ) + ) + {% endif -%} +{% endmacro -%} From 4128792724f0cdbedddc0b7d5212449d2b1cf79f Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 13:53:52 -0800 Subject: [PATCH 12/15] remove comment --- .../hubspot/int_rag_hubspot__company_document.sql | 4 ---- 1 file changed, 4 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql index 45495b4..3bf68ea 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -42,13 +42,9 @@ deal_descriptions AS ( DISTINCT deal_id, source_relation, safe_close_date AS closed_date, - --{{ dbt.concat([ "' - {'", "'deal_name: '", "deals.title", "' // '", "'deal_owner_name: '", "owners.safe_first_name", "' '", "owners.safe_last_name", "' // '", "'deal_owner_email: '", "owners.safe_email", "' // '", "'deal_closed_date: '", "deals.safe_close_date", "'}'" ]) }} AS deal_description, {{ create_json(['deal_id', 'title', 'safe_close_date']) }} AS deal_description FROM deals - --JOIN owners - --ON owners.owner_id = deals.owner_id - --AND owners.source_relation = deals.source_relation ), company_with_deal_description AS ( SELECT From 919f7a1d42eef9438c77e66880489e71f9ad91f7 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 14:34:07 -0800 Subject: [PATCH 13/15] add companies to deals --- .../int_rag_hubspot__deal_document.sql | 28 +++++++++++++++---- models/unstructured/rag_hubspot__document.sql | 2 +- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql index a26ad97..15f7bd9 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql @@ -13,11 +13,19 @@ contacts as ( ), companies as ( - - select * + select + *, + {{ create_json(['company_id']) }} AS company_desc from {{ ref('stg_rag_hubspot__company') }} ), +deal_company AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__deal_company') }} +), + engagements as ( select * from {{ ref('stg_rag_hubspot__engagement') }} @@ -56,7 +64,6 @@ engagement_detail_prep as ( deals.source_relation, {{ unified_rag.coalesce_cast(["contacts.contact_name", "'UNKNOWN'"], dbt.type_string()) }} as contact_name, {{ unified_rag.coalesce_cast(["contacts.email", "'UNKNOWN'"], dbt.type_string()) }} as created_by, - {{ unified_rag.coalesce_cast(["companies.company_id", "'UNKNOWN'"], dbt.type_string()) }} as company_id, {{ unified_rag.coalesce_cast(["companies.company_name", "'UNKNOWN'"], dbt.type_string()) }} as company_name, {{ unified_rag.coalesce_cast(["deals.created_date", "'1970-01-01 00:00:00'"], dbt.type_timestamp()) }} AS created_on, {{ dbt.concat(["coalesce(owners.first_name, '')", "' '", "coalesce(owners.last_name, '')", "' ('", "coalesce(owners.owner_email, '')", "')'"]) }} AS owner_details @@ -95,7 +102,6 @@ engagement_details as ( {{ fivetran_utils.string_agg(field_to_agg="distinct contact_name", delimiter="', '") }} as contact_name, {{ fivetran_utils.string_agg(field_to_agg="distinct created_by", delimiter="', '") }} as created_by, {{ fivetran_utils.string_agg(field_to_agg="distinct company_name", delimiter="', '") }} as company_name, - {{ fivetran_utils.string_agg(field_to_agg="distinct company_id", delimiter="', '") }} as company_ids, {{ fivetran_utils.string_agg(field_to_agg="distinct owner_details", delimiter="', '") }} as owner_details from engagement_detail_prep group by 1,2,3,4,5 @@ -108,7 +114,11 @@ engagement_markdown as ( title, source_relation, url_reference, - company_ids, + {{ dbt.concat([ + "'['", + dbt.listagg("cc.company_desc", "','"), + "']'" + ]) }} AS companies, cast( {{ dbt.concat([ "'Deal Name : '", "title", "'\\n\\n'", "'Created By : '", "contact_name", "' ('", "created_by", "')\\n'", @@ -117,7 +127,13 @@ engagement_markdown as ( "'Engagement Type: '", "engagement_type", "'\\n'", "'Deal Owner: '", "owner_details", "'\\n'" ]) }} as {{ dbt.type_string() }}) as comment_markdown - from engagement_details + from engagement_details ed + left join deal_company dc + on dc.deal_id = ed.deal_id + and dc.source_relation = ed.source_relation + left join companies cc + on dc.company_id = cc.company_id + and dc.source_relation = cc.source_relation ), engagement_tokens as ( diff --git a/models/unstructured/rag_hubspot__document.sql b/models/unstructured/rag_hubspot__document.sql index af3e8a2..d246927 100644 --- a/models/unstructured/rag_hubspot__document.sql +++ b/models/unstructured/rag_hubspot__document.sql @@ -18,7 +18,7 @@ final as ( cast(deal_document.deal_id as {{ dbt.type_string() }}) as document_id, coalesce(deal_document.title, grouped.title) as title, deal_document.url_reference, - deal_document.company_ids, + deal_document.companies, 'hubspot' as platform, deal_document.source_relation, grouped.most_recent_chunk_update, From 999f9c0da9f7245ba451ef05d45737f4302f5bce Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 14:45:26 -0800 Subject: [PATCH 14/15] add company name --- models/intermediate/hubspot/int_rag_hubspot__deal_document.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql index 15f7bd9..6ed89b0 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql @@ -15,7 +15,7 @@ contacts as ( companies as ( select *, - {{ create_json(['company_id']) }} AS company_desc + {{ create_json(['company_id', 'company_name']) }} AS company_desc from {{ ref('stg_rag_hubspot__company') }} ), From 400f24daf467c802608cdc66c663eef96f19f33a Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 14:48:19 -0800 Subject: [PATCH 15/15] fix query --- .../int_rag_hubspot__deal_document.sql | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql index 6ed89b0..dccbe08 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql @@ -110,23 +110,23 @@ engagement_details as ( engagement_markdown as ( select - deal_id, - title, - source_relation, - url_reference, - {{ dbt.concat([ - "'['", - dbt.listagg("cc.company_desc", "','"), - "']'" - ]) }} AS companies, + ed.deal_id, + ed.title, + ed.source_relation, + ed.url_reference, cast( {{ dbt.concat([ "'Deal Name : '", "title", "'\\n\\n'", "'Created By : '", "contact_name", "' ('", "created_by", "')\\n'", "'Created On : '", "created_on", "'\\n'", - "'Company Name: '", "company_name", "'\\n'", + "'Company Name: '", "ed.company_name", "'\\n'", "'Engagement Type: '", "engagement_type", "'\\n'", "'Deal Owner: '", "owner_details", "'\\n'" - ]) }} as {{ dbt.type_string() }}) as comment_markdown + ]) }} as {{ dbt.type_string() }}) as comment_markdown, + {{ dbt.concat([ + "'['", + dbt.listagg("cc.company_desc", "','"), + "']'" + ]) }} AS companies from engagement_details ed left join deal_company dc on dc.deal_id = ed.deal_id @@ -134,6 +134,7 @@ engagement_markdown as ( left join companies cc on dc.company_id = cc.company_id and dc.source_relation = cc.source_relation + group by 1,2,3,4,5 ), engagement_tokens as (