diff --git a/docs/guides/table_migration.md b/docs/guides/table_migration.md index cb57abd35..4072649bc 100644 --- a/docs/guides/table_migration.md +++ b/docs/guides/table_migration.md @@ -129,7 +129,7 @@ Consider an existing table named `my_schema.existing_table`. Migrating this tabl b. Specify the start of the first time interval SQLMesh should track in the `MODEL` DDL `start` key (example uses "2024-01-01") - c. Create the model in the SQLMesh project without backfilling any data by running `sqlmesh plan [environment name] --skip-backfill --start 2024-01-01`, replacing "[environment name]" with an environment name other than `prod` and using the same start date from the `MODEL` DDL in step 3b. + c. Create the model in the SQLMesh project without backfilling any data by running `sqlmesh plan [environment name] --empty-backfill --start 2024-01-01`, replacing "[environment name]" with an environment name other than `prod` and using the same start date from the `MODEL` DDL in step 3b. 4. Determine the name of the model's snapshot physical table by running `sqlmesh table_name my_schema.existing_table`. For example, it might return `sqlmesh__my_schema.existing_table_123456`. 5. Rename the original table `my_schema.existing_table_temp` to `sqlmesh__my_schema.existing_table_123456` diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 334c44c80..10b7fa704 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -295,6 +295,10 @@ Options: matching models in the target environment. --skip-backfill, --dry-run Skip the backfill step and only create a virtual update for the plan. + --empty-backfill Produce empty backfill. Like --skip-backfill + no models will be backfilled, unlike --skip- + backfill missing intervals will be recorded + as if they were backfilled. --forward-only Create a plan for forward-only changes. --allow-destructive-model TEXT Allow destructive forward-only changes to models whose names match the expression. diff --git a/sqlmesh/cli/main.py b/sqlmesh/cli/main.py index 5ea0b6415..60dfd31ec 100644 --- a/sqlmesh/cli/main.py +++ b/sqlmesh/cli/main.py @@ -333,6 +333,11 @@ def diff(ctx: click.Context, environment: t.Optional[str] = None) -> None: is_flag=True, help="Skip the backfill step and only create a virtual update for the plan.", ) +@click.option( + "--empty-backfill", + is_flag=True, + help="Produce empty backfill. Like --skip-backfill no models will be backfilled, unlike --skip-backfill missing intervals will be recorded as if they were backfilled.", +) @click.option( "--forward-only", is_flag=True, diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index b7a21bb49..5f73c9df5 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -982,6 +982,7 @@ def plan( restate_models: t.Optional[t.Iterable[str]] = None, no_gaps: bool = False, skip_backfill: bool = False, + empty_backfill: bool = False, forward_only: t.Optional[bool] = None, allow_destructive_models: t.Optional[t.Collection[str]] = None, no_prompts: t.Optional[bool] = None, @@ -1019,6 +1020,7 @@ def plan( part of the target environment have no data gaps when compared against previous snapshots for same models. skip_backfill: Whether to skip the backfill step. Default: False. + empty_backfill: Like skip_backfill, but also records processed intervals. forward_only: Whether the purpose of the plan is to make forward only changes. allow_destructive_models: Models whose forward-only changes are allowed to be destructive. no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that @@ -1051,6 +1053,7 @@ def plan( restate_models=restate_models, no_gaps=no_gaps, skip_backfill=skip_backfill, + empty_backfill=empty_backfill, forward_only=forward_only, allow_destructive_models=allow_destructive_models, no_auto_categorization=no_auto_categorization, @@ -1086,6 +1089,7 @@ def plan_builder( restate_models: t.Optional[t.Iterable[str]] = None, no_gaps: bool = False, skip_backfill: bool = False, + empty_backfill: bool = False, forward_only: t.Optional[bool] = None, allow_destructive_models: t.Optional[t.Collection[str]] = None, no_auto_categorization: t.Optional[bool] = None, @@ -1117,6 +1121,7 @@ def plan_builder( part of the target environment have no data gaps when compared against previous snapshots for same models. skip_backfill: Whether to skip the backfill step. Default: False. + empty_backfill: Like skip_backfill, but also records processed intervals. forward_only: Whether the purpose of the plan is to make forward only changes. allow_destructive_models: Models whose forward-only changes are allowed to be destructive. no_auto_categorization: Indicates whether to disable automatic categorization of model @@ -1250,6 +1255,7 @@ def plan_builder( backfill_models=backfill_models, no_gaps=no_gaps, skip_backfill=skip_backfill, + empty_backfill=empty_backfill, is_dev=is_dev, forward_only=( forward_only if forward_only is not None else self.config.plan.forward_only diff --git a/sqlmesh/core/plan/builder.py b/sqlmesh/core/plan/builder.py index 13eae9e54..db530b4dd 100644 --- a/sqlmesh/core/plan/builder.py +++ b/sqlmesh/core/plan/builder.py @@ -50,6 +50,7 @@ class PlanBuilder: part of the target environment have no data gaps when compared against previous snapshots for same nodes. skip_backfill: Whether to skip the backfill step. + empty_backfill: Like skip_backfill, but also records processed intervals. is_dev: Whether this plan is for development purposes. forward_only: Whether the purpose of the plan is to make forward only changes. allow_destructive_models: A list of fully qualified model names whose forward-only changes are allowed to be destructive. @@ -83,6 +84,7 @@ def __init__( backfill_models: t.Optional[t.Iterable[str]] = None, no_gaps: bool = False, skip_backfill: bool = False, + empty_backfill: bool = False, is_dev: bool = False, forward_only: bool = False, allow_destructive_models: t.Optional[t.Iterable[str]] = None, @@ -104,6 +106,7 @@ def __init__( self._context_diff = context_diff self._no_gaps = no_gaps self._skip_backfill = skip_backfill + self._empty_backfill = empty_backfill self._is_dev = is_dev self._forward_only = forward_only self._allow_destructive_models = set( @@ -251,6 +254,7 @@ def build(self) -> Plan: provided_end=self._end, is_dev=self._is_dev, skip_backfill=self._skip_backfill, + empty_backfill=self._empty_backfill, no_gaps=self._no_gaps, forward_only=self._forward_only, allow_destructive_models=t.cast(t.Set, self._allow_destructive_models), diff --git a/sqlmesh/core/plan/definition.py b/sqlmesh/core/plan/definition.py index 59c9814d7..e68885218 100644 --- a/sqlmesh/core/plan/definition.py +++ b/sqlmesh/core/plan/definition.py @@ -37,6 +37,7 @@ class Plan(PydanticModel, frozen=True): is_dev: bool skip_backfill: bool + empty_backfill: bool no_gaps: bool forward_only: bool allow_destructive_models: t.Set[str] @@ -82,7 +83,11 @@ def previous_plan_id(self) -> t.Optional[str]: @property def requires_backfill(self) -> bool: - return not self.skip_backfill and (bool(self.restatements) or bool(self.missing_intervals)) + return ( + not self.skip_backfill + and not self.empty_backfill + and (bool(self.restatements) or bool(self.missing_intervals)) + ) @property def has_changes(self) -> bool: @@ -225,6 +230,7 @@ def to_evaluatable(self) -> EvaluatablePlan: environment=self.environment, no_gaps=self.no_gaps, skip_backfill=self.skip_backfill, + empty_backfill=self.empty_backfill, restatements={s.name: i for s, i in self.restatements.items()}, is_dev=self.is_dev, allow_destructive_models=self.allow_destructive_models, @@ -256,6 +262,7 @@ class EvaluatablePlan(PydanticModel): environment: Environment no_gaps: bool skip_backfill: bool + empty_backfill: bool restatements: t.Dict[str, Interval] is_dev: bool allow_destructive_models: t.Set[str] diff --git a/sqlmesh/core/plan/evaluator.py b/sqlmesh/core/plan/evaluator.py index 89520e9fb..d2aa5ef08 100644 --- a/sqlmesh/core/plan/evaluator.py +++ b/sqlmesh/core/plan/evaluator.py @@ -160,6 +160,23 @@ def _backfill( plan: The plan to source snapshots from. selected_snapshots: The snapshots to backfill. """ + if plan.empty_backfill: + intervals_to_add = [] + for snapshot in snapshots_by_name.values(): + intervals = [snapshot.inclusive_exclusive(plan.start, plan.end, strict=False)] + is_deployable = deployability_index.is_deployable(snapshot) + intervals_to_add.append( + SnapshotIntervals( + name=snapshot.name, + identifier=snapshot.identifier, + version=snapshot.version, + intervals=intervals if is_deployable else [], + dev_intervals=intervals if not is_deployable else [], + ) + ) + self.state_sync.add_snapshots_intervals(intervals_to_add) + return + if not plan.requires_backfill or not selected_snapshots: return diff --git a/sqlmesh/magics.py b/sqlmesh/magics.py index e91d72e6f..5696725ea 100644 --- a/sqlmesh/magics.py +++ b/sqlmesh/magics.py @@ -337,6 +337,11 @@ def test(self, context: Context, line: str, test_def_raw: t.Optional[str] = None action="store_true", help="Skip the backfill step and only create a virtual update for the plan.", ) + @argument( + "--empty-backfill", + action="store_true", + help="Produce empty backfill. Like --skip-backfill no models will be backfilled, unlike --skip-backfill missing intervals will be recorded as if they were backfilled.", + ) @argument( "--forward-only", action="store_true", @@ -418,6 +423,7 @@ def plan(self, context: Context, line: str) -> None: backfill_models=args.backfill_model, no_gaps=args.no_gaps, skip_backfill=args.skip_backfill, + empty_backfill=args.empty_backfill, forward_only=args.forward_only, no_prompts=args.no_prompts, auto_apply=args.auto_apply, diff --git a/tests/core/test_integration.py b/tests/core/test_integration.py index 386bfa1b8..d1000264b 100644 --- a/tests/core/test_integration.py +++ b/tests/core/test_integration.py @@ -1698,6 +1698,29 @@ def test_create_environment_no_changes_with_selector(init_and_plan_context: t.Ca assert {o.name for o in schema_objects} == {"top_waiters"} +@freeze_time("2023-01-08 15:00:00") +def test_empty_bacfkill(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + plan = context.plan("prod", no_prompts=True, skip_tests=True, empty_backfill=True) + assert plan.missing_intervals + assert plan.empty_backfill + assert not plan.requires_backfill + + context.apply(plan) + + for model in context.models.values(): + if model.is_seed or model.kind.is_symbolic: + continue + row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM {model.name}")[0] + assert row_num == 0 + + plan = context.plan("prod", no_prompts=True, skip_tests=True) + assert not plan.requires_backfill + assert not plan.has_changes + assert not plan.missing_intervals + + @pytest.mark.parametrize( "context_fixture", ["sushi_context", "sushi_dbt_context", "sushi_test_dbt_context", "sushi_no_default_catalog"], diff --git a/tests/core/test_plan.py b/tests/core/test_plan.py index 767e20270..f4e4b900a 100644 --- a/tests/core/test_plan.py +++ b/tests/core/test_plan.py @@ -695,6 +695,7 @@ def test_missing_intervals_lookback(make_snapshot, mocker: MockerFixture): execution_time="2022-01-05 12:00", is_dev=True, skip_backfill=False, + empty_backfill=False, no_gaps=False, forward_only=False, allow_destructive_models=set(), diff --git a/tests/schedulers/airflow/test_client.py b/tests/schedulers/airflow/test_client.py index 2f14da062..426b8cb79 100644 --- a/tests/schedulers/airflow/test_client.py +++ b/tests/schedulers/airflow/test_client.py @@ -65,6 +65,7 @@ def test_apply_plan(mocker: MockerFixture, snapshot: Snapshot): environment=environment, no_gaps=False, skip_backfill=False, + empty_backfill=False, restatements={snapshot.name: (to_timestamp("2024-01-01"), to_timestamp("2024-01-02"))}, is_dev=False, allow_destructive_models=set(), @@ -176,6 +177,7 @@ def test_apply_plan(mocker: MockerFixture, snapshot: Snapshot): }, "no_gaps": False, "skip_backfill": False, + "empty_backfill": False, "is_dev": False, "forward_only": False, "allow_destructive_models": [], diff --git a/tests/schedulers/airflow/test_integration.py b/tests/schedulers/airflow/test_integration.py index 77c7c72f1..4e1c9f67a 100644 --- a/tests/schedulers/airflow/test_integration.py +++ b/tests/schedulers/airflow/test_integration.py @@ -104,6 +104,7 @@ def _create_evaluatable_plan( environment=environment, no_gaps=False, skip_backfill=False, + empty_backfill=False, restatements={}, is_dev=is_dev, allow_destructive_models=set(), diff --git a/tests/schedulers/airflow/test_plan.py b/tests/schedulers/airflow/test_plan.py index e5489ba80..37e497aea 100644 --- a/tests/schedulers/airflow/test_plan.py +++ b/tests/schedulers/airflow/test_plan.py @@ -112,6 +112,7 @@ def test_create_plan_dag_spec( environment=new_environment, no_gaps=True, skip_backfill=False, + empty_backfill=False, restatements={}, is_dev=False, forward_only=True, @@ -249,6 +250,7 @@ def test_restatement( environment=new_environment, no_gaps=True, skip_backfill=False, + empty_backfill=False, restatements={ the_snapshot.name: ( to_timestamp("2022-01-02"), @@ -374,6 +376,7 @@ def test_select_models_for_backfill(mocker: MockerFixture, random_name, make_sna environment=new_environment, no_gaps=True, skip_backfill=False, + empty_backfill=False, restatements={}, is_dev=False, forward_only=True, @@ -458,6 +461,7 @@ def test_create_plan_dag_spec_duplicated_snapshot( environment=new_environment, no_gaps=False, skip_backfill=False, + empty_backfill=False, restatements={}, is_dev=False, forward_only=False, @@ -519,6 +523,7 @@ def test_create_plan_dag_spec_unbounded_end( environment=new_environment, no_gaps=True, skip_backfill=False, + empty_backfill=False, restatements={}, is_dev=False, forward_only=False,