From 19be124e4d128276803e6ce108591654c9625da1 Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Wed, 22 May 2024 15:08:16 +0200 Subject: [PATCH] Reintroduce dummy intermediate variables in implementation of TruncatedRV Partially reverts 9d4a3d7d371eda91be99c9bffc519d680d2cf290 and 3888d53575dfc2bf8527e4d69a7621645df5cbf6 The logprob derivation(s) in the icdf implementation of `Truncated` can duplicate nodes and cause spurious input variables to be marked as missing. We replace these by dummies so the graph above is hidden, and variables cannot be accidentally cloned/modified during logprob inference. --- pymc/distributions/truncated.py | 54 +++++++++++++++------------ tests/distributions/test_truncated.py | 14 +++++++ 2 files changed, 44 insertions(+), 24 deletions(-) diff --git a/pymc/distributions/truncated.py b/pymc/distributions/truncated.py index 45f77574d6..1ef4a1da32 100644 --- a/pymc/distributions/truncated.py +++ b/pymc/distributions/truncated.py @@ -106,28 +106,34 @@ def rv_op(cls, dist, lower, upper, max_n_steps, *, size=None): ] graph_inputs = [*rv_inputs, lower, upper] - rv = dist.owner.op.make_node(*rv_inputs).default_output() + # Variables with `_` suffix identify dummy inputs for the OpFromGraph + graph_inputs_ = [ + inp.type() if not isinstance(inp.type, RandomType) else inp for inp in graph_inputs + ] + *rv_inputs_, lower_, upper_ = graph_inputs_ + + rv_ = dist.owner.op.make_node(*rv_inputs_).default_output() # Try to use inverted cdf sampling # truncated_rv = icdf(rv, draw(uniform(cdf(lower), cdf(upper)))) try: - logcdf_lower, logcdf_upper = cls._create_logcdf_exprs(rv, rv, lower, upper) + logcdf_lower_, logcdf_upper_ = TruncatedRV._create_logcdf_exprs( + rv_, rv_, lower_, upper_ + ) # We use the first RNG from the base RV, so we don't have to introduce a new one # This is not problematic because the RNG won't be used in the RV logcdf graph - uniform_rng = next(inp for inp in rv_inputs if isinstance(inp.type, RandomType)) - uniform_next_rng, uniform = pt.random.uniform( - pt.exp(logcdf_lower), - pt.exp(logcdf_upper), - rng=uniform_rng, - size=rv.shape, + uniform_rng_ = next(inp_ for inp_ in rv_inputs_ if isinstance(inp_.type, RandomType)) + uniform_next_rng_, uniform_ = pt.random.uniform( + pt.exp(logcdf_lower_), + pt.exp(logcdf_upper_), + rng=uniform_rng_, + size=rv_.shape, ).owner.outputs - # So icdf does not see the random graph of uniform - uniform_type = uniform.type() - truncated_rv = graph_replace(icdf(rv, uniform_type), {uniform_type: uniform}) + truncated_rv_ = icdf(rv_, uniform_, warn_rvs=False) return TruncatedRV( base_rv_op=dist.owner.op, - inputs=graph_inputs, - outputs=[truncated_rv, uniform_next_rng], + inputs=graph_inputs_, + outputs=[truncated_rv_, uniform_next_rng_], ndim_supp=0, max_n_steps=max_n_steps, )(*graph_inputs) @@ -154,25 +160,25 @@ def loop_fn(truncated_rv, reject_draws, lower, upper, *rv_inputs): return ( (truncated_rv, reject_draws), - collect_default_updates(new_truncated_rv, inputs=rv_inputs), + collect_default_updates(new_truncated_rv), until(~pt.any(reject_draws)), ) - (truncated_rv, reject_draws_), updates = scan( + (truncated_rv_, reject_draws_), updates = scan( loop_fn, outputs_info=[ - pt.zeros_like(rv), - pt.ones_like(rv, dtype=bool), + pt.zeros_like(rv_), + pt.ones_like(rv_, dtype=bool), ], - non_sequences=[lower, upper, *rv_inputs], + non_sequences=[lower_, upper_, *rv_inputs_], n_steps=max_n_steps, strict=True, ) - truncated_rv = truncated_rv[-1] - convergence = ~pt.any(reject_draws_[-1]) - truncated_rv = TruncationCheck(f"Truncation did not converge in {max_n_steps} steps")( - truncated_rv, convergence + truncated_rv_ = truncated_rv_[-1] + convergence_ = ~pt.any(reject_draws_[-1]) + truncated_rv_ = TruncationCheck(f"Truncation did not converge in {max_n_steps} steps")( + truncated_rv_, convergence_ ) # Sort updates of each RNG so that they show in the same order as the input RNGs @@ -184,8 +190,8 @@ def sort_updates(update): return TruncatedRV( base_rv_op=dist.owner.op, - inputs=graph_inputs, - outputs=[truncated_rv, *next_rngs], + inputs=graph_inputs_, + outputs=[truncated_rv_, *next_rngs], ndim_supp=0, max_n_steps=max_n_steps, )(*graph_inputs) diff --git a/tests/distributions/test_truncated.py b/tests/distributions/test_truncated.py index 4f748f0fbc..50ae98147f 100644 --- a/tests/distributions/test_truncated.py +++ b/tests/distributions/test_truncated.py @@ -585,3 +585,17 @@ def test_truncated_identity_input(dist_op): rv_out = Truncated.dist(dist=dist_op(mu_identity, 5), lower=0, upper=1) assert np.ptp(draw(rv_out, draws=500)) < 1 + + +@pytest.mark.parametrize("rv_op", [icdf_normal, rejection_normal]) +def test_truncated_custom_dist_indexed_argument(rv_op): + # Regression test for https://github.com/pymc-devs/pymc/issues/7312 + + def dist(scale, size): + return pt.exp(rv_op(scale=scale, size=size)) + + scale = Exponential.dist(scale=[1, 2, 3]) + latent = CustomDist.dist(scale[[0, 0, 1, 1, 2, 2]], dist=dist) + rv_out = Truncated.dist(latent, upper=7) + + assert np.ptp(draw(rv_out, draws=100)) < 7