diff --git a/docs/source/api/data.rst b/docs/source/api/data.rst index 09d3f42fdb..fe70e2d7f1 100644 --- a/docs/source/api/data.rst +++ b/docs/source/api/data.rst @@ -10,5 +10,4 @@ Data MutableData get_data Data - GeneratorAdapter Minibatch diff --git a/pymc/data.py b/pymc/data.py index 9373eb5775..c21ac3001f 100644 --- a/pymc/data.py +++ b/pymc/data.py @@ -33,18 +33,16 @@ from pytensor.scalar import Cast from pytensor.tensor.elemwise import Elemwise from pytensor.tensor.random.basic import IntegersRV -from pytensor.tensor.type import TensorType from pytensor.tensor.variable import TensorConstant, TensorVariable import pymc as pm -from pymc.pytensorf import GeneratorOp, convert_data, smarttypeX +from pymc.pytensorf import convert_data from pymc.vartypes import isgenerator __all__ = [ "ConstantData", "Data", - "GeneratorAdapter", "Minibatch", "MutableData", "get_data", @@ -86,51 +84,6 @@ def clone(self): return cp -class GeneratorAdapter: - """Class that helps infer data type of generator. - - It looks at the first item, preserving the order of the resulting generator. - """ - - def make_variable(self, gop, name=None): - var = GenTensorVariable(gop, self.tensortype, name) - var.tag.test_value = self.test_value - return var - - def __init__(self, generator): - if not pm.vartypes.isgenerator(generator): - raise TypeError("Object should be generator like") - self.test_value = smarttypeX(copy(next(generator))) - # make pickling potentially possible - self._yielded_test_value = False - self.gen = generator - self.tensortype = TensorType(self.test_value.dtype, ((False,) * self.test_value.ndim)) - - # python3 generator - def __next__(self): - """Next value in the generator.""" - if not self._yielded_test_value: - self._yielded_test_value = True - return self.test_value - else: - return smarttypeX(copy(next(self.gen))) - - # python2 generator - next = __next__ - - def __iter__(self): - """Return an iterator.""" - return self - - def __eq__(self, other): - """Return true if both objects are actually the same.""" - return id(self) == id(other) - - def __hash__(self): - """Return a hash of the object.""" - return hash(id(self)) - - class MinibatchIndexRV(IntegersRV): _print_name = ("minibatch_index", r"\operatorname{minibatch\_index}") @@ -170,8 +123,6 @@ def is_valid_observed(v) -> bool: isinstance(v.owner.op, MinibatchOp) and all(is_valid_observed(inp) for inp in v.owner.inputs) ) - # Or Generator - or isinstance(v.owner.op, GeneratorOp) ) diff --git a/pymc/pytensorf.py b/pymc/pytensorf.py index eda2064821..cf5700b95e 100644 --- a/pymc/pytensorf.py +++ b/pymc/pytensorf.py @@ -36,7 +36,6 @@ walk, ) from pytensor.graph.fg import FunctionGraph, Output -from pytensor.graph.op import Op from pytensor.scalar.basic import Cast from pytensor.scan.op import Scan from pytensor.tensor.basic import _as_tensor_variable @@ -63,10 +62,8 @@ "compile_pymc", "cont_inputs", "convert_data", - "convert_generator_data", "convert_observed_data", "floatX", - "generator", "gradient", "hessian", "hessian_diag", @@ -81,20 +78,10 @@ def convert_observed_data(data) -> np.ndarray | Variable: """Convert user provided dataset to accepted formats.""" if isgenerator(data): - return convert_generator_data(data) + raise TypeError("Data passed to `observed` cannot be a generator.") return convert_data(data) -def convert_generator_data(data) -> TensorVariable: - warnings.warn( - "Generator data is deprecated and we intend to remove it." - " If you disagree and need this, please get in touch via https://github.com/pymc-devs/pymc/issues.", - DeprecationWarning, - stacklevel=2, - ) - return generator(data) - - def convert_data(data) -> np.ndarray | Variable: ret: np.ndarray | Variable if hasattr(data, "to_numpy") and hasattr(data, "isnull"): @@ -625,98 +612,6 @@ def __call__(self, input): return pytensor.clone_replace(self.tensor, {oldinput: input}, rebuild_strict=False) -class GeneratorOp(Op): - """ - Generator Op is designed for storing python generators inside pytensor graph. - - __call__ creates TensorVariable - It has 2 new methods - - var.set_gen(gen): sets new generator - - var.set_default(value): sets new default value (None erases default value) - - If generator is exhausted, variable will produce default value if it is not None, - else raises `StopIteration` exception that can be caught on runtime. - - Parameters - ---------- - gen: generator that implements __next__ (py3) or next (py2) method - and yields np.arrays with same types - default: np.array with the same type as generator produces - """ - - __props__ = ("generator",) - - def __init__(self, gen, default=None): - warnings.warn( - "generator data is deprecated and will be removed in a future release", FutureWarning - ) - from pymc.data import GeneratorAdapter - - super().__init__() - if not isinstance(gen, GeneratorAdapter): - gen = GeneratorAdapter(gen) - self.generator = gen - self.set_default(default) - - def make_node(self, *inputs): - gen_var = self.generator.make_variable(self) - return Apply(self, [], [gen_var]) - - def perform(self, node, inputs, output_storage, params=None): - if self.default is not None: - output_storage[0][0] = next(self.generator, self.default) - else: - output_storage[0][0] = next(self.generator) - - def do_constant_folding(self, fgraph, node): - return False - - __call__ = pytensor.config.change_flags(compute_test_value="off")(Op.__call__) - - def set_gen(self, gen): - from pymc.data import GeneratorAdapter - - if not isinstance(gen, GeneratorAdapter): - gen = GeneratorAdapter(gen) - if not gen.tensortype == self.generator.tensortype: - raise ValueError("New generator should yield the same type") - self.generator = gen - - def set_default(self, value): - if value is None: - self.default = None - else: - value = np.asarray(value, self.generator.tensortype.dtype) - t1 = (False,) * value.ndim - t2 = self.generator.tensortype.broadcastable - if not t1 == t2: - raise ValueError("Default value should have the same type as generator") - self.default = value - - -def generator(gen, default=None): - """ - Create a generator variable with possibility to set default value and new generator. - - If generator is exhausted variable will produce default value if it is not None, - else raises `StopIteration` exception that can be caught on runtime. - - Parameters - ---------- - gen: generator that implements __next__ (py3) or next (py2) method - and yields np.arrays with same types - default: np.array with the same type as generator produces - - Returns - ------- - TensorVariable - It has 2 new methods - - var.set_gen(gen): sets new generator - - var.set_default(value): sets new default value (None erases default value) - """ - return GeneratorOp(gen, default)() - - def ix_(*args): """ PyTensor np.ix_ analog. diff --git a/tests/test_data.py b/tests/test_data.py index 0906ab8434..154737b637 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -13,11 +13,9 @@ # limitations under the License. import io -import itertools as it from os import path -import cloudpickle import numpy as np import pytensor import pytensor.tensor as pt @@ -29,7 +27,7 @@ import pymc as pm from pymc.data import MinibatchOp -from pymc.pytensorf import GeneratorOp, floatX +from pymc.pytensorf import floatX class TestData: @@ -495,97 +493,6 @@ def integers_ndim(ndim): i += 1 -@pytest.mark.usefixtures("strict_float32") -class TestGenerator: - def test_basic(self): - generator = pm.GeneratorAdapter(integers()) - gop = GeneratorOp(generator)() - assert gop.tag.test_value == np.float32(0) - f = pytensor.function([], gop) - assert f() == np.float32(0) - assert f() == np.float32(1) - for _ in range(2, 100): - f() - assert f() == np.float32(100) - - def test_ndim(self): - for ndim in range(10): - res = list(it.islice(integers_ndim(ndim), 0, 2)) - generator = pm.GeneratorAdapter(integers_ndim(ndim)) - gop = GeneratorOp(generator)() - f = pytensor.function([], gop) - assert ndim == res[0].ndim - np.testing.assert_equal(f(), res[0]) - np.testing.assert_equal(f(), res[1]) - - def test_cloning_available(self): - gop = pm.generator(integers()) - res = gop**2 - shared = pytensor.shared(pm.floatX(10)) - res1 = pytensor.clone_replace(res, {gop: shared}) - f = pytensor.function([], res1) - assert f() == np.float32(100) - - def test_default_value(self): - def gen(): - for i in range(2): - yield pm.floatX(np.ones((10, 10)) * i) - - gop = pm.generator(gen(), np.ones((10, 10)) * 10) - f = pytensor.function([], gop) - np.testing.assert_equal(np.ones((10, 10)) * 0, f()) - np.testing.assert_equal(np.ones((10, 10)) * 1, f()) - np.testing.assert_equal(np.ones((10, 10)) * 10, f()) - with pytest.raises(ValueError): - gop.set_default(1) - - def test_set_gen_and_exc(self): - def gen(): - for i in range(2): - yield pm.floatX(np.ones((10, 10)) * i) - - gop = pm.generator(gen()) - f = pytensor.function([], gop) - np.testing.assert_equal(np.ones((10, 10)) * 0, f()) - np.testing.assert_equal(np.ones((10, 10)) * 1, f()) - with pytest.raises(StopIteration): - f() - gop.set_gen(gen()) - np.testing.assert_equal(np.ones((10, 10)) * 0, f()) - np.testing.assert_equal(np.ones((10, 10)) * 1, f()) - - def test_pickling(self, datagen): - gen = pm.generator(datagen) - cloudpickle.loads(cloudpickle.dumps(gen)) - bad_gen = pm.generator(integers()) - with pytest.raises(TypeError): - cloudpickle.dumps(bad_gen) - - def test_gen_cloning_with_shape_change(self, datagen): - gen = pm.generator(datagen) - gen_r = pt.random.normal(size=gen.shape).T - X = gen.dot(gen_r) - res, _ = pytensor.scan(lambda x: x.sum(), X, n_steps=X.shape[0]) - assert res.eval().shape == (50,) - shared = pytensor.shared(datagen.data.astype(gen.dtype)) - res2 = pytensor.clone_replace(res, {gen: shared**2}) - assert res2.eval().shape == (1000,) - - -def gen1(): - i = 0 - while True: - yield np.ones((10, 100)) * i - i += 1 - - -def gen2(): - i = 0 - while True: - yield np.ones((20, 100)) * i - i += 1 - - @pytest.mark.usefixtures("strict_float32") class TestMinibatch: data = np.random.rand(30, 10) diff --git a/tests/test_pytensorf.py b/tests/test_pytensorf.py index c434f1a9c7..b8c82886b9 100644 --- a/tests/test_pytensorf.py +++ b/tests/test_pytensorf.py @@ -27,7 +27,6 @@ from pytensor.graph.basic import Variable, equal_computations from pytensor.tensor.random.basic import normal, uniform from pytensor.tensor.subtensor import AdvancedIncSubtensor -from pytensor.tensor.variable import TensorVariable import pymc as pm @@ -37,19 +36,16 @@ from pymc.exceptions import NotConstantValueError from pymc.logprob.utils import ParameterValueError from pymc.pytensorf import ( - GeneratorOp, collect_default_updates, compile, constant_fold, convert_data, - convert_generator_data, extract_obs_data, hessian, hessian_diag, replace_rng_nodes, replace_vars_in_graphs, reseed_rngs, - smarttypeX, walk_model, ) from pymc.vartypes import int_types @@ -265,32 +261,6 @@ def test_convert_data(input_dtype): assert pytensor_output.dtype == intX -@pytest.mark.parametrize("input_dtype", ["int32", "int64", "float32", "float64"]) -def test_convert_generator_data(input_dtype): - # Create a generator object producing NumPy arrays with the intended dtype. - # This is required to infer the correct dtype. - square_generator = (np.array([i**2], dtype=input_dtype) for i in range(100)) - - # Output is NOT wrapped with `pm.floatX`/`intX`, - # but produced from calling a special Op. - with pytest.warns(DeprecationWarning, match="get in touch"): - result = convert_generator_data(square_generator) - apply = result.owner - op = apply.op - # Make sure the returned object is a PyTensor TensorVariable - assert isinstance(result, TensorVariable) - assert isinstance(op, GeneratorOp), f"It's a {type(apply)}" - # There are no inputs - because it generates... - assert apply.inputs == [] - - # Evaluation results should have the correct* dtype! - # (*intX/floatX will be enforced!) - evaled = result.eval() - expected_dtype = smarttypeX(np.array(1, dtype=input_dtype)).dtype - assert result.type.dtype == expected_dtype - assert evaled.dtype == np.dtype(expected_dtype) - - def test_pandas_to_array_pandas_index(): data = pd.Index([1, 2, 3]) result = convert_data(data) diff --git a/tests/variational/test_minibatch_rv.py b/tests/variational/test_minibatch_rv.py index 84d118c581..33229e0bb7 100644 --- a/tests/variational/test_minibatch_rv.py +++ b/tests/variational/test_minibatch_rv.py @@ -22,9 +22,7 @@ from pymc import Normal, draw from pymc.data import Minibatch -from pymc.testing import select_by_precision from pymc.variational.minibatch_rv import create_minibatch_rv -from tests.test_data import gen1, gen2 class TestMinibatchRandomVariable: @@ -42,50 +40,6 @@ def test_density_scaling(self): p2 = pytensor.function([], model2.logp()) assert p1() * 2 == p2() - def test_density_scaling_with_generator(self): - # We have different size generators - - def true_dens(): - g = gen1() - for i, point in enumerate(g): - yield st.norm.logpdf(point).sum() * 10 - - t = true_dens() - # We have same size models - with pm.Model() as model1: - pm.Normal("n", observed=gen1(), total_size=100) - p1 = pytensor.function([], model1.logp()) - - with pm.Model() as model2: - gen_var = pm.generator(gen2()) - pm.Normal("n", observed=gen_var, total_size=100) - p2 = pytensor.function([], model2.logp()) - - for i in range(10): - _1, _2, _t = p1(), p2(), next(t) - decimals = select_by_precision(float64=7, float32=1) - np.testing.assert_almost_equal(_1, _t, decimal=decimals) # Value O(-50,000) - np.testing.assert_almost_equal(_1, _2) - # Done - - def test_gradient_with_scaling(self): - with pm.Model() as model1: - genvar = pm.generator(gen1()) - m = pm.Normal("m") - pm.Normal("n", observed=genvar, total_size=1000) - grad1 = model1.compile_fn(model1.dlogp(vars=m), point_fn=False) - with pm.Model() as model2: - m = pm.Normal("m") - shavar = pytensor.shared(np.ones((1000, 100))) - pm.Normal("n", observed=shavar) - grad2 = model2.compile_fn(model2.dlogp(vars=m), point_fn=False) - - for i in range(10): - shavar.set_value(np.ones((100, 100)) * i) - g1 = grad1(1) - g2 = grad2(1) - np.testing.assert_almost_equal(g1, g2) - def test_multidim_scaling(self): with pm.Model() as model0: pm.Normal("n", observed=[[1, 1], [1, 1]], total_size=[])