diff --git a/README.md b/README.md index 77ecab6a..ef108fec 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,11 @@ This package builds a mart of tables from dbt artifacts loaded into a table. It Models included: -- `fct_dbt_model_executions` -- `fct_dbt_run_results` +- `dim_dbt__models` +- `fct_dbt__model_executions` - `fct_dbt__latest_full_model_executions` - `fct_dbt__critical_path` +- `fct_dbt_run_results` The critical path model determines the slowest route through your DAG, which provides you with the information needed to make a targeted effort to reducing `dbt run` times. For example: diff --git a/models/incremental/dim_dbt__models.sql b/models/incremental/dim_dbt__models.sql new file mode 100644 index 00000000..a38fbd9d --- /dev/null +++ b/models/incremental/dim_dbt__models.sql @@ -0,0 +1,39 @@ +{{ config( materialized='incremental', unique_key='manifest_model_id' ) }} + +with dbt_models as ( + + select * from {{ ref('stg_dbt__models') }} + +), + +dbt_models_incremental as ( + + select * + from dbt_models + + {% if is_incremental() %} + -- this filter will only be applied on an incremental run + where artifact_generated_at > (select max(artifact_generated_at) from {{ this }}) + {% endif %} + +), + +fields as ( + + select + manifest_model_id, + command_invocation_id, + artifact_generated_at, + node_id, + name, + model_schema, + depends_on_nodes, + package_name, + model_path, + checksum, + model_materialization + from dbt_models_incremental + +) + +select * from fields diff --git a/models/incremental/fct_dbt__model_executions.sql b/models/incremental/fct_dbt__model_executions.sql index 1efaf0f8..6edeb8f5 100644 --- a/models/incremental/fct_dbt__model_executions.sql +++ b/models/incremental/fct_dbt__model_executions.sql @@ -3,14 +3,14 @@ with models as ( select * - from {{ ref('stg_dbt__models') }} + from {{ ref('dim_dbt__models') }} ), model_executions as ( select * - from {{ ref('stg_dbt__model_executions') }} + from {{ ref('int_dbt__model_executions') }} ), diff --git a/models/incremental/int_dbt__model_executions.sql b/models/incremental/int_dbt__model_executions.sql new file mode 100644 index 00000000..7d9f8be8 --- /dev/null +++ b/models/incremental/int_dbt__model_executions.sql @@ -0,0 +1,40 @@ +{{ config( materialized='incremental', unique_key='model_execution_id' ) }} + +with model_executions as ( + + select * + from {{ ref('stg_dbt__model_executions') }} + +), + +model_executions_incremental as ( + + select * + from model_executions + + {% if is_incremental() %} + -- this filter will only be applied on an incremental run + where artifact_generated_at > (select max(artifact_generated_at) from {{ this }}) + {% endif %} + +), + +fields as ( + + select + model_execution_id, + command_invocation_id, + artifact_generated_at, + was_full_refresh, + node_id, + thread_id, + status, + compile_started_at, + query_completed_at, + total_node_runtime, + rows_affected + from model_executions_incremental + +) + +select * from fields \ No newline at end of file diff --git a/models/schemas.yml b/models/schemas.yml index 50e3dfab..4153d35d 100644 --- a/models/schemas.yml +++ b/models/schemas.yml @@ -103,3 +103,29 @@ models: description: Was the run executed with a --full-refresh flag? - name: env_* description: Columns for the environment variables set when the command was executed. + + - name: dim_dbt__models + description: All dbt model metadata from every manifest.json. + columns: + - name: manifest_model_id + description: Primary key generated from the command_invocation_id and checksum. + tests: + - unique + - not_null + - name: command_invocation_id + description: The id of the command which resulted in the source artifact's generation. + - name: artifact_generated_at + description: Timestamp of when the source artifact was generated. + - name: node_id + description: Unique id for the node, in the form of model.[package_name].[model_name] + - name: name + description: The model name. + - name: model_schema + - name: depends_on_nodes + description: List of node ids the model depends on. + - name: package_name + - name: model_path + description: Filepath of the model. + - name: checksum + description: Unique identifier for the model. If a model is unchanged between separate executions this will remain the same. + - name: model_materialization