From aaf6676c2a784872b0aaf20782e2d0fbe7a61e65 Mon Sep 17 00:00:00 2001 From: Tomas Capretto Date: Mon, 15 Apr 2024 11:59:44 -0300 Subject: [PATCH] Fix more tests. Detected problems with PyMC --- .pylintrc | 3 +- bambi/backend/pymc.py | 2 + bambi/backend/terms.py | 7 +- bambi/interpret/effects.py | 27 ++--- tests/test_model_construction.py | 70 +++++++------ tests/test_models.py | 174 +++++++++++++++---------------- tests/test_priors.py | 51 +++++---- 7 files changed, 175 insertions(+), 159 deletions(-) diff --git a/.pylintrc b/.pylintrc index c5bde5e22..d6ed29e91 100644 --- a/.pylintrc +++ b/.pylintrc @@ -384,8 +384,7 @@ missing-member-max-choices=1 # List of note tags to take in consideration, separated by a comma. notes=FIXME, - XXX, - TODO + XXX [SPELLING] diff --git a/bambi/backend/pymc.py b/bambi/backend/pymc.py index cd363ccb8..0f5c29329 100644 --- a/bambi/backend/pymc.py +++ b/bambi/backend/pymc.py @@ -203,6 +203,8 @@ def _run_mcmc( if is_likelihood_param and is_deterministic: vars_to_sample.remove(name) + print(vars_to_sample) + with self.model: try: idata = pm.sample( diff --git a/bambi/backend/terms.py b/bambi/backend/terms.py index 976803906..fc3f44759 100644 --- a/bambi/backend/terms.py +++ b/bambi/backend/terms.py @@ -187,8 +187,7 @@ def build(self, spec): dist = dist(label, dims=dims, **self.term.prior.args)[np.newaxis, :] else: dist = dist(label, **self.term.prior.args) - # TODO: check if this brings the desired result - # Multiply it by vector of ones so it then has the proper length + # Multiply it by a vector of ones so it then has the proper length dist = dist * np.ones((spec.response_component.term.data.shape[0],)) return dist @@ -245,6 +244,10 @@ def build(self, pymc_backend, bmb_model): bmb_component = bmb_model.components[name] aliased_name = bmb_component.alias if bmb_component.alias else bmb_component.name linkinv = get_linkinv(self.family.link[name], pymc_backend.INVLINKS) + print(name) + print(self.family.link[name]) + print(component) + print(component.output) kwargs[name] = pm.Deterministic(aliased_name, linkinv(component.output), dims=dims) # Add observed and dims diff --git a/bambi/interpret/effects.py b/bambi/interpret/effects.py index 971157688..359ad0c57 100644 --- a/bambi/interpret/effects.py +++ b/bambi/interpret/effects.py @@ -67,8 +67,6 @@ def __post_init__(self): if self.target is None: self.name_target = self.name else: - # TODO: verify this is the correct update - # self.name_target = f"{self.name}_{self.target}" self.name_target = self.target self.name_obs = "__obs__" @@ -281,7 +279,7 @@ def get_estimate( if self.variable.values.ndim == 1: self.variable.values = np.array(self.variable.values).reshape(-1, 1) - + print("name_target", self.response.name_target) print("name_obs", self.response.name_obs) @@ -530,7 +528,8 @@ def predictions( target_ = model.family.likelihood.parent else: target_ = target - response = ResponseInfo(response_name, target_) + + response = ResponseInfo(name=response_name, target=target_) response_transform = transforms.get(response_name, identity) if pps: @@ -693,16 +692,13 @@ def comparisons( transforms = transforms if transforms is not None else {} response_name = get_aliased_name(model.response_component.term) - # TODO: see if this is appropriate + # FIXME: changed target="mean" for target=model.family.likelihood.parent, check it response = ResponseInfo( - response_name, - target=model.family.likelihood.parent, - lower_bound=lower_bound, - upper_bound=upper_bound + name=response_name, + target=model.family.likelihood.parent, + lower_bound=lower_bound, + upper_bound=upper_bound, ) - # response = ResponseInfo( - # response_name, target="mean", lower_bound=lower_bound, upper_bound=upper_bound - # ) response_transform = transforms.get(response_name, identity) comparisons_data = create_differences_data( @@ -864,7 +860,12 @@ def slopes( transforms = transforms if transforms is not None else {} response_name = get_aliased_name(model.response_component.term) - response = ResponseInfo(response_name, "mean", lower_bound, upper_bound) + response = ResponseInfo( + name=response_name, + target=model.family.likelihood.parent, + lower_bound=lower_bound, + upper_bound=upper_bound, + ) response_transform = transforms.get(response_name, identity) slopes_data = create_differences_data(conditional_info, wrt_info, effect_type) diff --git a/tests/test_model_construction.py b/tests/test_model_construction.py index a79d1a5c1..23212e258 100644 --- a/tests/test_model_construction.py +++ b/tests/test_model_construction.py @@ -90,22 +90,23 @@ def test_distribute_group_specific_effect_over(diabetes_data): # Treatment encoding because of the intercept levels = sorted(list(diabetes_data["age_grp"].unique()))[1:] levels = [str(level) for level in levels] - - assert "C(age_grp)|BMI" in model.response_component.terms - assert "1|BMI" in model.response_component.terms - assert model.response_component.terms["C(age_grp)|BMI"].coords["C(age_grp)__expr_dim"] == levels + parent_component = model.components[model.family.likelihood.parent] + assert "C(age_grp)|BMI" in parent_component.terms + assert "1|BMI" in parent_component.terms + assert parent_component.terms["C(age_grp)|BMI"].coords["C(age_grp)__expr_dim"] == levels # This is equal to the sub-matrix of Z that corresponds to this term. # 442 is the number of observations. 163 the number of groups. # 2 is the number of levels of the categorical variable 'C(age_grp)' after removing # the reference level. Then the number of columns is 326 = 163 * 2. - assert model.response_component.terms["C(age_grp)|BMI"].data.shape == (442, 326) + assert parent_component.terms["C(age_grp)|BMI"].data.shape == (442, 326) # Without intercept. Reference level is not removed. model = bmb.Model("BP ~ (0 + C(age_grp)|BMI)", diabetes_data) - assert "C(age_grp)|BMI" in model.response_component.terms - assert not "1|BMI" in model.response_component.terms - assert model.response_component.terms["C(age_grp)|BMI"].data.shape == (442, 489) + parent_component = model.components[model.family.likelihood.parent] + assert "C(age_grp)|BMI" in parent_component.terms + assert not "1|BMI" in parent_component.terms + assert parent_component.terms["C(age_grp)|BMI"].data.shape == (442, 489) def test_model_init_bad_data(): @@ -129,12 +130,13 @@ def test_model_categorical_argument(): } ) model = bmb.Model("y ~ 0 + x", data, categorical="x") - assert model.response_component.terms["x"].categorical + assert model.components[model.family.likelihood.parent].terms["x"].categorical model = bmb.Model("y ~ 0 + x*z", data, categorical=["x", "z"]) - assert model.response_component.terms["x"].categorical - assert model.response_component.terms["z"].categorical - assert model.response_component.terms["x:z"].categorical + parent_component = model.components[model.family.likelihood.parent] + assert parent_component.terms["x"].categorical + assert parent_component.terms["z"].categorical + assert parent_component.terms["x:z"].categorical def test_model_no_response(): @@ -144,15 +146,17 @@ def test_model_no_response(): def test_model_term_names_property(diabetes_data): model = bmb.Model("BMI ~ age_grp + BP + S1", diabetes_data) - assert model.response_component.intercept_term.name == "Intercept" - assert set(model.response_component.common_terms) == {"age_grp", "BP", "S1"} + parent_component = model.components[model.family.likelihood.parent] + assert parent_component.intercept_term.name == "Intercept" + assert set(parent_component.common_terms) == {"age_grp", "BP", "S1"} def test_model_term_names_property_interaction(crossed_data): crossed_data["fourcats"] = sum([[x] * 10 for x in ["a", "b", "c", "d"]], list()) * 3 model = bmb.Model("Y ~ threecats*fourcats", crossed_data) - assert model.response_component.intercept_term.name == "Intercept" - assert set(model.response_component.common_terms) == { + parent_component = model.components[model.family.likelihood.parent] + assert parent_component.intercept_term.name == "Intercept" + assert set(parent_component.common_terms) == { "threecats", "fourcats", "threecats:fourcats", @@ -163,7 +167,7 @@ def test_model_terms_levels_interaction(crossed_data): crossed_data["fourcats"] = sum([[x] * 10 for x in ["a", "b", "c", "d"]], list()) * 3 model = bmb.Model("Y ~ threecats*fourcats", crossed_data) - assert model.response_component.terms["threecats:fourcats"].levels == [ + assert model.components[model.family.likelihood.parent].terms["threecats:fourcats"].levels == [ "b, b", "b, c", "b, d", @@ -185,11 +189,12 @@ def test_model_terms_levels(): } ) model = bmb.Model("y ~ x + z + time + (time|subject)", data) - assert model.response_component.terms["z"].levels == ["Group 2", "Group 3"] - assert model.response_component.terms["1|subject"].groups == [ + parent_component = model.components[model.family.likelihood.parent] + assert parent_component.terms["z"].levels == ["Group 2", "Group 3"] + assert parent_component.terms["1|subject"].groups == [ f"Subject {x}" for x in range(1, 6) ] - assert model.response_component.terms["time|subject"].groups == [ + assert parent_component.terms["time|subject"].groups == [ f"Subject {x}" for x in range(1, 6) ] @@ -207,14 +212,15 @@ def test_model_term_classes(): model = bmb.Model("y ~ x*g + (x|s)", data) - assert isinstance(model.response_component.terms["x"], CommonTerm) - assert isinstance(model.response_component.terms["g"], CommonTerm) - assert isinstance(model.response_component.terms["x:g"], CommonTerm) - assert isinstance(model.response_component.terms["1|s"], GroupSpecificTerm) - assert isinstance(model.response_component.terms["x|s"], GroupSpecificTerm) + parent_component = model.components[model.family.likelihood.parent] + assert isinstance(parent_component.terms["x"], CommonTerm) + assert isinstance(parent_component.terms["g"], CommonTerm) + assert isinstance(parent_component.terms["x:g"], CommonTerm) + assert isinstance(parent_component.terms["1|s"], GroupSpecificTerm) + assert isinstance(parent_component.terms["x|s"], GroupSpecificTerm) # Also check 'categorical' attribute is right - assert model.response_component.terms["g"].categorical + assert parent_component.terms["g"].categorical def test_one_shot_formula_fit(diabetes_data): @@ -247,7 +253,7 @@ def test_categorical_term(): "1|g2_sigma", "g1|g2_sigma[b]", "x2|g2_sigma", - "y_sigma", + "sigma", "1|g2[x]", "1|g2[y]", "1|g2[z]", @@ -422,7 +428,7 @@ def test_1d_group_specific(): # The difference is that we do .squeeze() on it after creation. model = bmb.Model("y ~ (x|g)", data) model.build() - assert model.backend.components["y"].output.shape.eval() == (40,) + assert model.backend.components["mu"].output.shape.eval() == (40,) def test_data_is_copied(): @@ -446,13 +452,13 @@ def test_response_is_censored(): } ) dm = bmb.Model("censored(x, status) ~ 1", df) - assert dm.response_component.response_term.is_censored is True + assert dm.response_component.term.is_censored is True def test_response_is_truncated(): df = pd.DataFrame({"x": [1, 2, 3, 4, 5]}) dm = bmb.Model("truncated(x, 5.5) ~ 1", df) - assert dm.response_component.response_term.is_truncated is True + assert dm.response_component.term.is_truncated is True def test_custom_likelihood_function(): @@ -468,7 +474,7 @@ def CustomGaussian(*args, **kwargs): family = bmb.Family("custom_gaussian", likelihood, "identity") model = bmb.Model("y ~ x", df, family=family, priors={"sigma": sigma_prior}) _ = model.fit(tune=100, draws=100) - assert model.backend.model.observed_RVs[0].str_repr() == "y ~ Normal(f(Intercept, x), y_sigma)" + assert model.backend.model.observed_RVs[0].str_repr() == "y ~ Normal(mu, sigma)" def test_extra_namespace(): @@ -477,7 +483,7 @@ def test_extra_namespace(): extra_namespace = {"levels": data["veh_body"].unique()} formula = "numclaims ~ 0 + C(veh_body, levels=levels)" model = bmb.Model(formula, data, family="poisson", link="log", extra_namespace=extra_namespace) - term = model.response_component.terms["C(veh_body, levels=levels)"] + term = model.components[model.family.likelihood.parent].terms["C(veh_body, levels=levels)"] assert (np.asarray(term.levels) == data["veh_body"].unique()).all() diff --git a/tests/test_models.py b/tests/test_models.py index 3bdcf4a39..c7dd2e964 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -169,7 +169,7 @@ def predict_oos(self, model, idata, data=None): # Reuse the original data if data is None: data = model.data.head() - return model.predict(idata, kind="pps", data=data, inplace=False) + return model.predict(idata, kind="response", data=data, inplace=False) class TestGaussian(FitPredictParent): @@ -197,7 +197,7 @@ def test_2_factors_saturated(self, crossed_data): "threecats", "fourcats", "threecats:fourcats", - "Y_sigma", + "sigma", } assert list(idata.posterior["threecats_dim"].values) == ["b", "c"] assert list(idata.posterior["fourcats_dim"].values) == ["b", "c", "d"] @@ -218,7 +218,7 @@ def test_2_factors_no_intercept(self, crossed_data): "threecats", "fourcats", "threecats:fourcats", - "Y_sigma", + "sigma", } assert list(idata.posterior["threecats_dim"].values) == ["a", "b", "c"] assert list(idata.posterior["fourcats_dim"].values) == ["b", "c", "d"] @@ -235,7 +235,7 @@ def test_2_factors_no_intercept(self, crossed_data): def test_2_factors_cell_means(self, crossed_data): model = bmb.Model("Y ~ 0 + threecats:fourcats", crossed_data) idata = self.fit(model) - assert set(idata.posterior.data_vars) == {"threecats:fourcats", "Y_sigma"} + assert set(idata.posterior.data_vars) == {"threecats:fourcats", "sigma"} assert list(idata.posterior["threecats:fourcats_dim"].values) == [ "a, a", "a, b", @@ -255,7 +255,7 @@ def test_2_factors_cell_means(self, crossed_data): def test_cell_means_with_covariate(self, crossed_data): model = bmb.Model("Y ~ 0 + threecats + continuous", crossed_data) idata = self.fit(model) - assert set(idata.posterior.data_vars) == {"threecats", "continuous", "Y_sigma"} + assert set(idata.posterior.data_vars) == {"threecats", "continuous", "sigma"} assert list(idata.posterior["threecats_dim"].values) == ["a", "b", "c"] self.predict_oos(model, idata) @@ -291,11 +291,11 @@ def test_many_common_many_group_specific(self, crossed_data): # Check that the group specific effects design matrices have the same shape X0 = pd.concat( - [pd.DataFrame(t.data) for t in model0.response_component.group_specific_terms.values()], + [pd.DataFrame(t.data) for t in model0.components["mu"].group_specific_terms.values()], axis=1, ) X1 = pd.concat( - [pd.DataFrame(t.data) for t in model1.response_component.group_specific_terms.values()], + [pd.DataFrame(t.data) for t in model1.components["mu"].group_specific_terms.values()], axis=1, ) assert X0.shape == X1.shape @@ -310,14 +310,14 @@ def test_many_common_many_group_specific(self, crossed_data): # even if term names / level names / order of columns is different X0_list = [] X1_list = [] - for term in model0.response_component.common_terms.values(): + for term in model0.components["mu"].common_terms.values(): if term.levels is not None: for level_idx in range(len(term.levels)): X0_list.append(tuple(term.data[:, level_idx])) else: X0_list.append(tuple(term.data)) - for term in model1.response_component.common_terms.values(): + for term in model1.components["mu"].common_terms.values(): if term.levels is not None: for level_idx in range(len(term.levels)): X1_list.append(tuple(term.data[:, level_idx])) @@ -329,12 +329,12 @@ def test_many_common_many_group_specific(self, crossed_data): # check that models have same priors for common effects priors0 = { x.name: x.prior.args - for x in model0.response_component.terms.values() + for x in model0.components["mu"].terms.values() if not isinstance(x, GroupSpecificTerm) } priors1 = { x.name: x.prior.args - for x in model1.response_component.terms.values() + for x in model1.components["mu"].terms.values() if not isinstance(x, GroupSpecificTerm) } @@ -353,11 +353,11 @@ def dicts_close(a, b): # check that fit and add models have same priors for group specific effects priors0 = { x.name: x.prior.args["sigma"].args - for x in model0.response_component.group_specific_terms.values() + for x in model0.components["mu"].group_specific_terms.values() } priors1 = { x.name: x.prior.args["sigma"].args - for x in model1.response_component.group_specific_terms.values() + for x in model1.components["mu"].group_specific_terms.values() } # check dictionary keys @@ -407,7 +407,7 @@ def test_cell_means_with_many_group_specific_effects(self, crossed_data): pd.DataFrame(t.data) if not isinstance(t.data, dict) else pd.concat([pd.DataFrame(t.data[x]) for x in t.data.keys()], axis=1) - for t in model0.response_component.group_specific_terms.values() + for t in model0.components["mu"].group_specific_terms.values() ], axis=1, ) @@ -416,7 +416,7 @@ def test_cell_means_with_many_group_specific_effects(self, crossed_data): pd.DataFrame(t.data) if not isinstance(t.data, dict) else pd.concat([pd.DataFrame(t.data[x]) for x in t.data.keys()], axis=1) - for t in model0.response_component.group_specific_terms.values() + for t in model0.components["mu"].group_specific_terms.values() ], axis=1, ) @@ -433,14 +433,14 @@ def test_cell_means_with_many_group_specific_effects(self, crossed_data): X0 = set( [ tuple(t.data[:, lev]) - for t in model0.response_component.common_terms.values() + for t in model0.components["mu"].common_terms.values() for lev in range(len(t.levels)) ] ) X1 = set( [ tuple(t.data[:, lev]) - for t in model1.response_component.common_terms.values() + for t in model1.components["mu"].common_terms.values() for lev in range(len(t.levels)) ] ) @@ -449,12 +449,12 @@ def test_cell_means_with_many_group_specific_effects(self, crossed_data): # check that fit and add models have same priors for common effects priors0 = { x.name: x.prior.args - for x in model0.response_component.terms.values() + for x in model0.components["mu"].terms.values() if not isinstance(x, GroupSpecificTerm) } priors1 = { x.name: x.prior.args - for x in model1.response_component.terms.values() + for x in model1.components["mu"].terms.values() if not isinstance(x, GroupSpecificTerm) } assert set(priors0) == set(priors1) @@ -462,12 +462,12 @@ def test_cell_means_with_many_group_specific_effects(self, crossed_data): # check that fit and add models have same priors for group specific effects priors0 = { x.name: x.prior.args["sigma"].args - for x in model0.response_component.terms.values() + for x in model0.components["mu"].terms.values() if isinstance(x, GroupSpecificTerm) } priors1 = { x.name: x.prior.args["sigma"].args - for x in model1.response_component.terms.values() + for x in model1.components["mu"].terms.values() if isinstance(x, GroupSpecificTerm) } assert set(priors0) == set(priors1) @@ -480,7 +480,7 @@ def test_group_specific_categorical_interaction(self, crossed_data): assert set(idata.posterior.data_vars) == { "Intercept", "continuous", - "Y_sigma", + "sigma", "1|site_sigma", "threecats:fourcats|site_sigma", "1|site", @@ -513,14 +513,15 @@ def test_group_specific_categorical_interaction(self, crossed_data): def test_fit_include_mean(self, crossed_data): draws = 100 model = bmb.Model("Y ~ continuous * threecats", crossed_data) + # FIXME: what do we do with 'include_mean'? Maybe deprecate it? idata = model.fit(tune=draws, draws=draws, include_mean=True) - assert idata.posterior["Y_mean"].shape[1:] == (draws, 120) + assert idata.posterior["mu"].shape[1:] == (draws, 120) # Compare with the mean obtained with `model.predict()` - mean = idata.posterior["Y_mean"].stack(sample=("chain", "draw")).values.mean(1) + mean = idata.posterior["mu"].stack(sample=("chain", "draw")).values.mean(1) model.predict(idata) - predicted_mean = idata.posterior["Y_mean"].stack(sample=("chain", "draw")).values.mean(1) + predicted_mean = idata.posterior["mu"].stack(sample=("chain", "draw")).values.mean(1) assert np.array_equal(mean, predicted_mean) @@ -571,19 +572,19 @@ def test_group_specific_splines(self): class TestBernoulli(FitPredictParent): def assert_posterior_predictive_range(self, model, idata): - y_name = model.response_component.response_term.name + y_name = model.response_component.term.name y_posterior_predictive = idata.posterior_predictive[y_name].to_numpy() assert set(np.unique(y_posterior_predictive)) == {0, 1} def assert_mean_range(self, model, idata): - y_mean_name = model.response_component.response_term.name + "_mean" + y_mean_name = "p" y_mean_posterior = idata.posterior[y_mean_name].to_numpy() assert ((0 < y_mean_posterior) & (y_mean_posterior < 1)).all() def test_bernoulli_empty_index(self, data_n100): model = bmb.Model("b1 ~ 1 + y1", data_n100, family="bernoulli") idata = self.fit(model) - model.predict(idata, kind="pps") + model.predict(idata, kind="response") self.assert_mean_range(model, idata) self.assert_posterior_predictive_range(model, idata) @@ -595,7 +596,7 @@ def test_bernoulli_empty_index(self, data_n100): def test_bernoulli_good_numeric(self, data_n100): model = bmb.Model("b1 ~ y1", data_n100, family="bernoulli") idata = self.fit(model) - model.predict(idata, kind="pps") + model.predict(idata, kind="response") self.assert_mean_range(model, idata) self.assert_posterior_predictive_range(model, idata) @@ -619,14 +620,14 @@ def test_categorical_group_specific(self, data_n100): class TestBinomial(FitPredictParent): def assert_mean_range(self, model, idata): - y_mean_name = model.response_component.response_term.name + "_mean" + y_mean_name = "p" y_mean_posterior = idata.posterior[y_mean_name].to_numpy() assert ((0 < y_mean_posterior) & (y_mean_posterior < 1)).all() def test_binomial_regression(self, beetle_data): model = bmb.Model("prop(y, n) ~ x", beetle_data, family="binomial") idata = self.fit(model) - model.predict(idata, kind="pps") + model.predict(idata, kind="response") self.assert_mean_range(model, idata) y_reshaped = beetle_data["n"].to_numpy()[None, None, :] @@ -647,7 +648,7 @@ def test_binomial_regression_constant(self, beetle_data): # Uses a constant instead of variable in data frame model = bmb.Model("p(y, 62) ~ x", beetle_data, family="binomial") idata = self.fit(model) - model.predict(idata, kind="pps") + model.predict(idata, kind="response") self.assert_mean_range(model, idata) assert (idata.posterior_predictive["p(y, 62)"].to_numpy() <= 62).all() assert (0 <= idata.posterior_predictive["p(y, 62)"].to_numpy()).all() @@ -665,7 +666,7 @@ def test_binomial_regression_constant(self, beetle_data): class TestPoisson(FitPredictParent): def assert_mean_range(self, model, idata): - y_mean_name = model.response_component.response_term.name + "_mean" + y_mean_name = "mu" y_mean_posterior = idata.posterior[y_mean_name].to_numpy() assert (y_mean_posterior > 0).all() @@ -683,21 +684,21 @@ def test_poisson_regression(self, crossed_data): self.assert_mean_range(model1, idata1) # check that term names agree - assert set(model0.response_component.terms) == set(model1.response_component.terms) + assert set(model0.components["mu"].terms) == set(model1.components["mu"].terms) # check that common effect design matrices are the same, # even if term names / level names / order of columns is different X0_list = [] X1_list = [] - for term in model0.response_component.common_terms.values(): + for term in model0.components["mu"].common_terms.values(): if term.levels is not None: for level_idx in range(len(term.levels)): X0_list.append(tuple(term.data[:, level_idx])) else: X0_list.append(tuple(term.data)) - for term in model1.response_component.common_terms.values(): + for term in model1.components["mu"].common_terms.values(): if term.levels is not None: for level_idx in range(len(term.levels)): X1_list.append(tuple(term.data[:, level_idx])) @@ -709,12 +710,12 @@ def test_poisson_regression(self, crossed_data): # check that models have same priors for common effects priors0 = { x.name: x.prior.args - for x in model0.response_component.terms.values() + for x in model0.components["mu"].terms.values() if not isinstance(x, GroupSpecificTerm) } priors1 = { x.name: x.prior.args - for x in model1.response_component.terms.values() + for x in model1.components["mu"].terms.values() if not isinstance(x, GroupSpecificTerm) } # check dictionary keys @@ -749,10 +750,10 @@ def dicts_close(a, b): # Now test posterior predictive # Fit again to make sure we fix the number of chainS idata = model1.fit(tune=50, draws=50, chains=2) - pps = model1.predict(idata, kind="pps", inplace=False) + pps = model1.predict(idata, kind="response", inplace=False) assert pps.posterior_predictive["count"].shape == (2, 50, 120) - pps = model1.predict(idata, kind="pps", inplace=True) + pps = model1.predict(idata, kind="response", inplace=True) assert pps is None assert idata.posterior_predictive["count"].shape == (2, 50, 120) @@ -764,16 +765,16 @@ def test_predict_negativebinomial(self, data_n100): model = bmb.Model("n1 ~ y1", data_n100, family="negativebinomial") idata = self.fit(model) - model.predict(idata, kind="mean") - assert (0 < idata.posterior["n1_mean"]).all() + model.predict(idata, kind="params") # FIXME + assert (0 < idata.posterior["mu"]).all() - model.predict(idata, kind="pps") + model.predict(idata, kind="response") assert (np.equal(np.mod(idata.posterior_predictive["n1"].values, 1), 0)).all() - model.predict(idata, kind="mean", data=data_n100.iloc[:20, :]) - assert (0 < idata.posterior["n1_mean"]).all() + model.predict(idata, kind="params", data=data_n100.iloc[:20, :]) + assert (0 < idata.posterior["mu"]).all() - model.predict(idata, kind="pps", data=data_n100.iloc[:20, :]) + model.predict(idata, kind="response", data=data_n100.iloc[:20, :]) assert (np.equal(np.mod(idata.posterior_predictive["n1"].values, 1), 0)).all() @@ -781,11 +782,11 @@ class TestLaplace(FitPredictParent): def test_laplace_regression(self, data_n100): model = bmb.Model("y1 ~ y2", data_n100, family="laplace") idata = self.fit(model) - assert set(idata.posterior.data_vars) == {"Intercept", "y2", "y1_b"} - assert (idata.posterior["y1_b"] > 0).all().item() + assert set(idata.posterior.data_vars) == {"Intercept", "y2", "b"} + assert (idata.posterior["b"] > 0).all().item() idata = self.predict_oos(model, idata) - assert "y1_mean" in idata.posterior + assert "mu" in idata.posterior class TestGamma(FitPredictParent): @@ -794,7 +795,7 @@ def test_gamma_regression(self, data_n100): data_n100["o"] = np.exp(data_n100["y1"]) model = bmb.Model("o ~ y2 + y3 + n1 + cat4", data_n100, family="gamma", link="log") idata = self.fit(model) - assert set(idata.posterior.data_vars) == {"Intercept", "y2", "y3", "n1", "cat4", "o_alpha"} + assert set(idata.posterior.data_vars) == {"Intercept", "y2", "y3", "n1", "cat4", "alpha"} idata = self.predict_oos(model, idata) assert (idata.posterior_predictive["o"] > 0).all().item() @@ -807,7 +808,7 @@ def test_gamma_regression_categoric(self, data_n100): data_n100["o"] = np.exp(data_n100["y1"]) model = bmb.Model("o ~ 0 + cat2:cat4", data_n100, family="gamma", link="log") idata = self.fit(model) - assert set(idata.posterior.data_vars) == {"cat2:cat4", "o_alpha"} + assert set(idata.posterior.data_vars) == {"cat2:cat4", "alpha"} idata = self.predict_oos(model, idata) assert (idata.posterior_predictive["o"] > 0).all().item() @@ -820,18 +821,18 @@ def test_beta_regression(self, gasoline_data): idata = self.fit(model, target_accept=0.9, random_seed=1234) # To Do: Could be adjusted but this is what we had before - model.predict(idata, kind="mean") - model.predict(idata, kind="pps") + model.predict(idata, kind="params") + model.predict(idata, kind="response") - assert (0 < idata.posterior["yield_mean"]).all() & (idata.posterior["yield_mean"] < 1).all() + assert (0 < idata.posterior["mu"]).all() & (idata.posterior["mu"] < 1).all() assert (0 < idata.posterior_predictive["yield"]).all() & ( idata.posterior_predictive["yield"] < 1 ).all() - model.predict(idata, kind="mean", data=gasoline_data.iloc[:20, :]) - model.predict(idata, kind="pps", data=gasoline_data.iloc[:20, :]) + model.predict(idata, kind="params", data=gasoline_data.iloc[:20, :]) + model.predict(idata, kind="response", data=gasoline_data.iloc[:20, :]) - assert (0 < idata.posterior["yield_mean"]).all() & (idata.posterior["yield_mean"] < 1).all() + assert (0 < idata.posterior["mu"]).all() & (idata.posterior["mu"] < 1).all() assert (0 < idata.posterior_predictive["yield"]).all() & ( idata.posterior_predictive["yield"] < 1 ).all() @@ -841,7 +842,7 @@ class TestStudentT(FitPredictParent): def test_t_regression(self, data_n100): model = bmb.Model("y1 ~ y2", data_n100, family="t") idata = self.fit(model) - assert set(idata.posterior.data_vars) == {"Intercept", "y2", "y1_nu", "y1_sigma"} + assert set(idata.posterior.data_vars) == {"Intercept", "y2", "nu", "sigma"} self.predict_oos(model, idata) @@ -851,7 +852,7 @@ def test_vonmises_regression(self): data = pd.DataFrame({"y": rng.vonmises(0, 1, size=100), "x": rng.normal(size=100)}) model = bmb.Model("y ~ x", data, family="vonmises") idata = self.fit(model) - assert set(idata.posterior.data_vars) == {"Intercept", "x", "y_kappa"} + assert set(idata.posterior.data_vars) == {"Intercept", "x", "kappa"} idata = self.predict_oos(model, idata) assert (idata.posterior_predictive["y"].min() >= -np.pi).item() and ( idata.posterior_predictive["y"].max() <= np.pi @@ -874,26 +875,26 @@ def test_quantile_regression(self): bmb_model1.predict(idata1) assert np.all( - idata0.posterior["y_mean"].mean(("chain", "draw")) - > idata1.posterior["y_mean"].mean(("chain", "draw")) + idata0.posterior["mu"].mean(("chain", "draw")) + > idata1.posterior["mu"].mean(("chain", "draw")) ) class TestCategorical(FitPredictParent): # assert pps.shape[-1] == inhaler.shape[0] def assert_mean_sum(self, model, idata): - y_mean_name = model.response_component.response_term.name + "_mean" - y_dim = model.response_component.response_term.name + "_dim" + y_mean_name = "p" + y_dim = model.response_component.term.name + "_dim" y_mean_posterior = idata.posterior[y_mean_name] assert np.allclose(y_mean_posterior.sum(y_dim).to_numpy(), 1) def assert_mean_range(self, model, idata): - y_mean_name = model.response_component.response_term.name + "_mean" + y_mean_name = "p" y_mean_posterior = idata.posterior[y_mean_name].to_numpy() assert ((0 < y_mean_posterior) & (y_mean_posterior < 1)).all() def assert_posterior_predictive_range(self, model, idata, n): - y_name = model.response_component.response_term.name + y_name = model.response_component.term.name y_posterior_predictive = idata.posterior_predictive[y_name].to_numpy() assert set(np.unique(y_posterior_predictive)).issubset(set(range(n))) @@ -907,10 +908,10 @@ def test_basic(self, inhaler_data): assert list(idata.posterior.coords["rating_reduced_dim"].values) == ["2", "3", "4"] idata = self.predict_oos(model, idata) - assert list(idata.posterior["rating_mean"].coords) == [ + assert list(idata.posterior["p"].coords) == [ "chain", "draw", - "rating_obs", + "__obs__", "rating_dim", ] assert list(idata.posterior.coords["rating_dim"].values) == ["1", "2", "3", "4"] @@ -941,10 +942,10 @@ def test_varying_intercept(self, inhaler_data): assert list(idata.posterior.coords["rating_reduced_dim"].values) == ["2", "3", "4"] idata = self.predict_oos(model, idata) - assert set(idata.posterior["rating_mean"].coords) == { + assert set(idata.posterior["p"].coords) == { "chain", "draw", - "rating_obs", + "__obs__", "rating_dim", } assert list(idata.posterior.coords["rating_dim"].values) == ["1", "2", "3", "4"] @@ -1053,7 +1054,7 @@ def test_ordinal_families(self, inhaler_data, family, link): idata = self.fit(model, random_seed=1234) idata = self.predict_oos(model, idata) - assert np.allclose(idata.posterior["rating_mean"].sum("rating_dim").to_numpy(), 1) + assert np.allclose(idata.posterior["p"].sum("rating_dim").to_numpy(), 1) assert set(np.unique(idata.posterior_predictive["rating"])).issubset({0, 1, 2, 3}) def test_cumulative_family_priors(self, inhaler_data): @@ -1176,7 +1177,7 @@ def test_constrained_response(self, truncated_data): class TestMultinomial(FitPredictParent): def assert_posterior_predictive(self, model, idata): - y_name = model.response_component.response_term.name + y_name = model.response_component.term.name y_posterior_predictive = idata.posterior_predictive[y_name].to_numpy() assert (y_posterior_predictive.sum(-1).var((0, 1)) == 0).all() @@ -1232,7 +1233,7 @@ def test_group_specific_effects(self): class TestDirichletMultinomial(FitPredictParent): def assert_posterior_predictive(self, model, idata): - y_name = model.response_component.response_term.name + y_name = model.response_component.term.name y_posterior_predictive = idata.posterior_predictive[y_name].to_numpy() assert (y_posterior_predictive.sum(-1).var((0, 1)) == 0).all() @@ -1269,16 +1270,16 @@ def test_wald_family(data_n100): model = bmb.Model("y ~ y2", data_n100, family="wald", link="log", priors=priors) idata = model.fit(tune=DRAWS, draws=DRAWS, random_seed=1234) - model.predict(idata, kind="mean") - model.predict(idata, kind="pps") + model.predict(idata, kind="params") + model.predict(idata, kind="response") - assert (0 < idata.posterior["y_mean"]).all() + assert (0 < idata.posterior["mu"]).all() assert (0 < idata.posterior_predictive["y"]).all() - model.predict(idata, kind="mean", data=data_n100.iloc[:20, :]) - model.predict(idata, kind="pps", data=data_n100.iloc[:20, :]) + model.predict(idata, kind="params", data=data_n100.iloc[:20, :]) + model.predict(idata, kind="response", data=data_n100.iloc[:20, :]) - assert (0 < idata.posterior["y_mean"]).all() + assert (0 < idata.posterior["mu"]).all() assert (0 < idata.posterior_predictive["y"]).all() @@ -1298,17 +1299,14 @@ def test_predict_include_group_specific(): idata_1 = model.predict(idata, data=data, inplace=False, include_group_specific=True) idata_2 = model.predict(idata, data=data, inplace=False, include_group_specific=False) - assert not np.isclose( - idata_1.posterior["y_mean"].values, - idata_2.posterior["y_mean"].values, - ).all() + assert not np.isclose(idata_1.posterior["p"].values, idata_2.posterior["p"].values).all() # Since it's an intercept-only model, predictions are the same for all observations if # we drop group-specific terms. - assert (idata_2.posterior["y_mean"] == idata_2.posterior["y_mean"][:, :, 0]).all() + assert (idata_2.posterior["p"] == idata_2.posterior["p"][:, :, 0]).all() # When we include group-specific terms, these predictions are different - assert not (idata_1.posterior["y_mean"] == idata_1.posterior["y_mean"][:, :, 0]).all() + assert not (idata_1.posterior["p"] == idata_1.posterior["p"][:, :, 0]).all() def test_predict_offset(): @@ -1317,7 +1315,7 @@ def test_predict_offset(): model = bmb.Model("numclaims ~ offset(np.log(exposure))", data, family="poisson", link="log") idata = model.fit(tune=DRAWS, draws=DRAWS, random_seed=1234) model.predict(idata) - model.predict(idata, kind="pps") + model.predict(idata, kind="response") # More complex case rng = np.random.default_rng(121195) @@ -1332,7 +1330,7 @@ def test_predict_offset(): model = bmb.Model("y ~ offset(np.log(time)) + x + (1 | group)", data, family="poisson") idata = model.fit(tune=DRAWS, draws=DRAWS, target_accept=0.9, random_seed=1234) model.predict(idata) - model.predict(idata, kind="pps") + model.predict(idata, kind="response") def test_predict_new_groups_fail(sleepstudy): @@ -1390,5 +1388,5 @@ def test_weighted(): data = pd.DataFrame({"w": weights, "y": y}) model = bmb.Model("weighted(y, w) ~ 1", data, family="exponential") idata = model.fit(tune=TUNE, draws=DRAWS) - model.predict(idata, kind="pps") - model.predict(idata, kind="pps", data=data) \ No newline at end of file + model.predict(idata, kind="response") + model.predict(idata, kind="response", data=data) \ No newline at end of file diff --git a/tests/test_priors.py b/tests/test_priors.py index b9933b47e..a079e5a76 100644 --- a/tests/test_priors.py +++ b/tests/test_priors.py @@ -101,9 +101,10 @@ def test_auto_scale(diabetes_data): # By default, should scale everything except custom bmb.Prior() objects priors = {"BP": bmb.Prior("Cauchy", alpha=1, beta=17.5)} model = bmb.Model("BMI ~ S1 + S2 + BP", diabetes_data, priors=priors) - p1 = model.response_component.terms["S1"].prior - p2 = model.response_component.terms["S2"].prior - p3 = model.response_component.terms["BP"].prior + parent_component = model.components[model.family.likelihood.parent] + p1 = parent_component.terms["S1"].prior + p2 = parent_component.terms["S2"].prior + p3 = parent_component.terms["BP"].prior assert p1.name == p2.name == "Normal" assert 0 < p1.args["sigma"] < 1 assert p2.args["sigma"] > p1.args["sigma"] @@ -113,8 +114,9 @@ def test_auto_scale(diabetes_data): # With auto_scale off, custom priors are considered. priors = {"BP": bmb.Prior("Cauchy", alpha=1, beta=17.5)} model = bmb.Model("BMI ~ S1 + S2 + BP", diabetes_data, priors=priors, auto_scale=False) - p2_off = model.response_component.terms["S2"].prior - p3_off = model.response_component.terms["BP"].prior + parent_component = model.components[model.family.likelihood.parent] + p2_off = parent_component.terms["S2"].prior + p3_off = parent_component.terms["BP"].prior assert p2_off.name == "Flat" assert "sigma" not in p2_off.args assert p3_off.name == "Cauchy" @@ -199,23 +201,24 @@ def test_set_priors(): # Common model.set_priors(common=prior) - assert model.response_component.terms["x"].prior == prior + assert model.components[model.family.likelihood.parent].terms["x"].prior == prior # Group-specific with pytest.raises(ValueError, match="must have hyperpriors"): model.set_priors(group_specific=prior) model.set_priors(group_specific=gp_prior) - assert model.response_component.terms["1|g"].prior == gp_prior + assert model.components[model.family.likelihood.parent].terms["1|g"].prior == gp_prior # By name model = bmb.Model("y ~ x + (1|g)", data) model.set_priors(priors={"Intercept": prior}) model.set_priors(priors={"x": prior}) model.set_priors(priors={"1|g": gp_prior}) - assert model.response_component.terms["Intercept"].prior == prior - assert model.response_component.terms["x"].prior == prior - assert model.response_component.terms["1|g"].prior == gp_prior + parent_component = model.components[model.family.likelihood.parent] + assert parent_component.terms["Intercept"].prior == prior + assert parent_component.terms["x"].prior == prior + assert parent_component.terms["1|g"].prior == gp_prior def test_set_prior_unexisting_term(): @@ -295,27 +298,31 @@ def test_prior_shape(): ) model = bmb.Model("score ~ 0 + q", data) - assert model.response_component.terms["q"].prior.args["mu"].shape == (5,) - assert model.response_component.terms["q"].prior.args["sigma"].shape == (5,) + parent_component = model.components[model.family.likelihood.parent] + assert parent_component.terms["q"].prior.args["mu"].shape == (5,) + assert parent_component.terms["q"].prior.args["sigma"].shape == (5,) model = bmb.Model("score ~ q", data) - assert model.response_component.terms["q"].prior.args["mu"].shape == (4,) - assert model.response_component.terms["q"].prior.args["sigma"].shape == (4,) + parent_component = model.components[model.family.likelihood.parent] + assert parent_component.terms["q"].prior.args["mu"].shape == (4,) + assert parent_component.terms["q"].prior.args["sigma"].shape == (4,) model = bmb.Model("score ~ 0 + q:s", data) - assert model.response_component.terms["q:s"].prior.args["mu"].shape == (15,) - assert model.response_component.terms["q:s"].prior.args["sigma"].shape == (15,) + parent_component = model.components[model.family.likelihood.parent] + assert parent_component.terms["q:s"].prior.args["mu"].shape == (15,) + assert parent_component.terms["q:s"].prior.args["sigma"].shape == (15,) # "s" is automatically added to ensure full rank matrix model = bmb.Model("score ~ q:s", data) - assert model.response_component.terms["Intercept"].prior.args["mu"].shape == () - assert model.response_component.terms["Intercept"].prior.args["sigma"].shape == () + parent_component = model.components[model.family.likelihood.parent] + assert parent_component.terms["Intercept"].prior.args["mu"].shape == () + assert parent_component.terms["Intercept"].prior.args["sigma"].shape == () - assert model.response_component.terms["s"].prior.args["mu"].shape == (2,) - assert model.response_component.terms["s"].prior.args["sigma"].shape == (2,) + assert parent_component.terms["s"].prior.args["mu"].shape == (2,) + assert parent_component.terms["s"].prior.args["sigma"].shape == (2,) - assert model.response_component.terms["q:s"].prior.args["mu"].shape == (12,) - assert model.response_component.terms["q:s"].prior.args["sigma"].shape == (12,) + assert parent_component.terms["q:s"].prior.args["mu"].shape == (12,) + assert parent_component.terms["q:s"].prior.args["sigma"].shape == (12,) def test_set_priors_but_intercept():