Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove deprecated generator data #7664

Merged
merged 6 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/source/api/data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,4 @@ Data
MutableData
get_data
Data
GeneratorAdapter
Minibatch
51 changes: 1 addition & 50 deletions pymc/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,16 @@
from pytensor.scalar import Cast
from pytensor.tensor.elemwise import Elemwise
from pytensor.tensor.random.basic import IntegersRV
from pytensor.tensor.type import TensorType
from pytensor.tensor.variable import TensorConstant, TensorVariable

import pymc as pm

from pymc.pytensorf import GeneratorOp, convert_data, smarttypeX
from pymc.pytensorf import convert_data
from pymc.vartypes import isgenerator

__all__ = [
"ConstantData",
"Data",
"GeneratorAdapter",
"Minibatch",
"MutableData",
"get_data",
Expand Down Expand Up @@ -86,51 +84,6 @@ def clone(self):
return cp


class GeneratorAdapter:
"""Class that helps infer data type of generator.

It looks at the first item, preserving the order of the resulting generator.
"""

def make_variable(self, gop, name=None):
var = GenTensorVariable(gop, self.tensortype, name)
var.tag.test_value = self.test_value
return var

def __init__(self, generator):
if not pm.vartypes.isgenerator(generator):
raise TypeError("Object should be generator like")
self.test_value = smarttypeX(copy(next(generator)))
# make pickling potentially possible
self._yielded_test_value = False
self.gen = generator
self.tensortype = TensorType(self.test_value.dtype, ((False,) * self.test_value.ndim))

# python3 generator
def __next__(self):
"""Next value in the generator."""
if not self._yielded_test_value:
self._yielded_test_value = True
return self.test_value
else:
return smarttypeX(copy(next(self.gen)))

# python2 generator
next = __next__

def __iter__(self):
"""Return an iterator."""
return self

def __eq__(self, other):
"""Return true if both objects are actually the same."""
return id(self) == id(other)

def __hash__(self):
"""Return a hash of the object."""
return hash(id(self))


class MinibatchIndexRV(IntegersRV):
_print_name = ("minibatch_index", r"\operatorname{minibatch\_index}")

Expand Down Expand Up @@ -170,8 +123,6 @@ def is_valid_observed(v) -> bool:
isinstance(v.owner.op, MinibatchOp)
and all(is_valid_observed(inp) for inp in v.owner.inputs)
)
# Or Generator
or isinstance(v.owner.op, GeneratorOp)
)


Expand Down
107 changes: 1 addition & 106 deletions pymc/pytensorf.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
walk,
)
from pytensor.graph.fg import FunctionGraph, Output
from pytensor.graph.op import Op
from pytensor.scalar.basic import Cast
from pytensor.scan.op import Scan
from pytensor.tensor.basic import _as_tensor_variable
Expand All @@ -63,10 +62,8 @@
"compile_pymc",
"cont_inputs",
"convert_data",
"convert_generator_data",
"convert_observed_data",
"floatX",
"generator",
"gradient",
"hessian",
"hessian_diag",
Expand All @@ -81,20 +78,10 @@
def convert_observed_data(data) -> np.ndarray | Variable:
"""Convert user provided dataset to accepted formats."""
if isgenerator(data):
return convert_generator_data(data)
raise TypeError("Data passed to `observed` cannot be a generator.")

Check warning on line 81 in pymc/pytensorf.py

View check run for this annotation

Codecov / codecov/patch

pymc/pytensorf.py#L81

Added line #L81 was not covered by tests
return convert_data(data)


def convert_generator_data(data) -> TensorVariable:
warnings.warn(
"Generator data is deprecated and we intend to remove it."
" If you disagree and need this, please get in touch via https://github.com/pymc-devs/pymc/issues.",
DeprecationWarning,
stacklevel=2,
)
return generator(data)


def convert_data(data) -> np.ndarray | Variable:
ret: np.ndarray | Variable
if hasattr(data, "to_numpy") and hasattr(data, "isnull"):
Expand Down Expand Up @@ -625,98 +612,6 @@
return pytensor.clone_replace(self.tensor, {oldinput: input}, rebuild_strict=False)


class GeneratorOp(Op):
"""
Generator Op is designed for storing python generators inside pytensor graph.

__call__ creates TensorVariable
It has 2 new methods
- var.set_gen(gen): sets new generator
- var.set_default(value): sets new default value (None erases default value)

If generator is exhausted, variable will produce default value if it is not None,
else raises `StopIteration` exception that can be caught on runtime.

Parameters
----------
gen: generator that implements __next__ (py3) or next (py2) method
and yields np.arrays with same types
default: np.array with the same type as generator produces
"""

__props__ = ("generator",)

def __init__(self, gen, default=None):
warnings.warn(
"generator data is deprecated and will be removed in a future release", FutureWarning
)
from pymc.data import GeneratorAdapter

super().__init__()
if not isinstance(gen, GeneratorAdapter):
gen = GeneratorAdapter(gen)
self.generator = gen
self.set_default(default)

def make_node(self, *inputs):
gen_var = self.generator.make_variable(self)
return Apply(self, [], [gen_var])

def perform(self, node, inputs, output_storage, params=None):
if self.default is not None:
output_storage[0][0] = next(self.generator, self.default)
else:
output_storage[0][0] = next(self.generator)

def do_constant_folding(self, fgraph, node):
return False

__call__ = pytensor.config.change_flags(compute_test_value="off")(Op.__call__)

def set_gen(self, gen):
from pymc.data import GeneratorAdapter

if not isinstance(gen, GeneratorAdapter):
gen = GeneratorAdapter(gen)
if not gen.tensortype == self.generator.tensortype:
raise ValueError("New generator should yield the same type")
self.generator = gen

def set_default(self, value):
if value is None:
self.default = None
else:
value = np.asarray(value, self.generator.tensortype.dtype)
t1 = (False,) * value.ndim
t2 = self.generator.tensortype.broadcastable
if not t1 == t2:
raise ValueError("Default value should have the same type as generator")
self.default = value


def generator(gen, default=None):
"""
Create a generator variable with possibility to set default value and new generator.

If generator is exhausted variable will produce default value if it is not None,
else raises `StopIteration` exception that can be caught on runtime.

Parameters
----------
gen: generator that implements __next__ (py3) or next (py2) method
and yields np.arrays with same types
default: np.array with the same type as generator produces

Returns
-------
TensorVariable
It has 2 new methods
- var.set_gen(gen): sets new generator
- var.set_default(value): sets new default value (None erases default value)
"""
return GeneratorOp(gen, default)()


def ix_(*args):
"""
PyTensor np.ix_ analog.
Expand Down
95 changes: 1 addition & 94 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@
# limitations under the License.

import io
import itertools as it

from os import path

import cloudpickle
import numpy as np
import pytensor
import pytensor.tensor as pt
Expand All @@ -29,7 +27,7 @@
import pymc as pm

from pymc.data import MinibatchOp
from pymc.pytensorf import GeneratorOp, floatX
from pymc.pytensorf import floatX


class TestData:
Expand Down Expand Up @@ -495,97 +493,6 @@ def integers_ndim(ndim):
i += 1


@pytest.mark.usefixtures("strict_float32")
class TestGenerator:
def test_basic(self):
generator = pm.GeneratorAdapter(integers())
gop = GeneratorOp(generator)()
assert gop.tag.test_value == np.float32(0)
f = pytensor.function([], gop)
assert f() == np.float32(0)
assert f() == np.float32(1)
for _ in range(2, 100):
f()
assert f() == np.float32(100)

def test_ndim(self):
for ndim in range(10):
res = list(it.islice(integers_ndim(ndim), 0, 2))
generator = pm.GeneratorAdapter(integers_ndim(ndim))
gop = GeneratorOp(generator)()
f = pytensor.function([], gop)
assert ndim == res[0].ndim
np.testing.assert_equal(f(), res[0])
np.testing.assert_equal(f(), res[1])

def test_cloning_available(self):
gop = pm.generator(integers())
res = gop**2
shared = pytensor.shared(pm.floatX(10))
res1 = pytensor.clone_replace(res, {gop: shared})
f = pytensor.function([], res1)
assert f() == np.float32(100)

def test_default_value(self):
def gen():
for i in range(2):
yield pm.floatX(np.ones((10, 10)) * i)

gop = pm.generator(gen(), np.ones((10, 10)) * 10)
f = pytensor.function([], gop)
np.testing.assert_equal(np.ones((10, 10)) * 0, f())
np.testing.assert_equal(np.ones((10, 10)) * 1, f())
np.testing.assert_equal(np.ones((10, 10)) * 10, f())
with pytest.raises(ValueError):
gop.set_default(1)

def test_set_gen_and_exc(self):
def gen():
for i in range(2):
yield pm.floatX(np.ones((10, 10)) * i)

gop = pm.generator(gen())
f = pytensor.function([], gop)
np.testing.assert_equal(np.ones((10, 10)) * 0, f())
np.testing.assert_equal(np.ones((10, 10)) * 1, f())
with pytest.raises(StopIteration):
f()
gop.set_gen(gen())
np.testing.assert_equal(np.ones((10, 10)) * 0, f())
np.testing.assert_equal(np.ones((10, 10)) * 1, f())

def test_pickling(self, datagen):
gen = pm.generator(datagen)
cloudpickle.loads(cloudpickle.dumps(gen))
bad_gen = pm.generator(integers())
with pytest.raises(TypeError):
cloudpickle.dumps(bad_gen)

def test_gen_cloning_with_shape_change(self, datagen):
gen = pm.generator(datagen)
gen_r = pt.random.normal(size=gen.shape).T
X = gen.dot(gen_r)
res, _ = pytensor.scan(lambda x: x.sum(), X, n_steps=X.shape[0])
assert res.eval().shape == (50,)
shared = pytensor.shared(datagen.data.astype(gen.dtype))
res2 = pytensor.clone_replace(res, {gen: shared**2})
assert res2.eval().shape == (1000,)


def gen1():
i = 0
while True:
yield np.ones((10, 100)) * i
i += 1


def gen2():
i = 0
while True:
yield np.ones((20, 100)) * i
i += 1


@pytest.mark.usefixtures("strict_float32")
class TestMinibatch:
data = np.random.rand(30, 10)
Expand Down
Loading