Skip to content

Commit

Permalink
Remove deprecated generator data (#7664)
Browse files Browse the repository at this point in the history
  • Loading branch information
wd60622 authored Feb 10, 2025
1 parent 358b825 commit 112af3e
Show file tree
Hide file tree
Showing 6 changed files with 3 additions and 327 deletions.
1 change: 0 additions & 1 deletion docs/source/api/data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,4 @@ Data
MutableData
get_data
Data
GeneratorAdapter
Minibatch
51 changes: 1 addition & 50 deletions pymc/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,16 @@
from pytensor.scalar import Cast
from pytensor.tensor.elemwise import Elemwise
from pytensor.tensor.random.basic import IntegersRV
from pytensor.tensor.type import TensorType
from pytensor.tensor.variable import TensorConstant, TensorVariable

import pymc as pm

from pymc.pytensorf import GeneratorOp, convert_data, smarttypeX
from pymc.pytensorf import convert_data
from pymc.vartypes import isgenerator

__all__ = [
"ConstantData",
"Data",
"GeneratorAdapter",
"Minibatch",
"MutableData",
"get_data",
Expand Down Expand Up @@ -86,51 +84,6 @@ def clone(self):
return cp


class GeneratorAdapter:
"""Class that helps infer data type of generator.
It looks at the first item, preserving the order of the resulting generator.
"""

def make_variable(self, gop, name=None):
var = GenTensorVariable(gop, self.tensortype, name)
var.tag.test_value = self.test_value
return var

def __init__(self, generator):
if not pm.vartypes.isgenerator(generator):
raise TypeError("Object should be generator like")
self.test_value = smarttypeX(copy(next(generator)))
# make pickling potentially possible
self._yielded_test_value = False
self.gen = generator
self.tensortype = TensorType(self.test_value.dtype, ((False,) * self.test_value.ndim))

# python3 generator
def __next__(self):
"""Next value in the generator."""
if not self._yielded_test_value:
self._yielded_test_value = True
return self.test_value
else:
return smarttypeX(copy(next(self.gen)))

# python2 generator
next = __next__

def __iter__(self):
"""Return an iterator."""
return self

def __eq__(self, other):
"""Return true if both objects are actually the same."""
return id(self) == id(other)

def __hash__(self):
"""Return a hash of the object."""
return hash(id(self))


class MinibatchIndexRV(IntegersRV):
_print_name = ("minibatch_index", r"\operatorname{minibatch\_index}")

Expand Down Expand Up @@ -170,8 +123,6 @@ def is_valid_observed(v) -> bool:
isinstance(v.owner.op, MinibatchOp)
and all(is_valid_observed(inp) for inp in v.owner.inputs)
)
# Or Generator
or isinstance(v.owner.op, GeneratorOp)
)


Expand Down
107 changes: 1 addition & 106 deletions pymc/pytensorf.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
walk,
)
from pytensor.graph.fg import FunctionGraph, Output
from pytensor.graph.op import Op
from pytensor.scalar.basic import Cast
from pytensor.scan.op import Scan
from pytensor.tensor.basic import _as_tensor_variable
Expand All @@ -63,10 +62,8 @@
"compile_pymc",
"cont_inputs",
"convert_data",
"convert_generator_data",
"convert_observed_data",
"floatX",
"generator",
"gradient",
"hessian",
"hessian_diag",
Expand All @@ -81,20 +78,10 @@
def convert_observed_data(data) -> np.ndarray | Variable:
"""Convert user provided dataset to accepted formats."""
if isgenerator(data):
return convert_generator_data(data)
raise TypeError("Data passed to `observed` cannot be a generator.")
return convert_data(data)


def convert_generator_data(data) -> TensorVariable:
warnings.warn(
"Generator data is deprecated and we intend to remove it."
" If you disagree and need this, please get in touch via https://github.com/pymc-devs/pymc/issues.",
DeprecationWarning,
stacklevel=2,
)
return generator(data)


def convert_data(data) -> np.ndarray | Variable:
ret: np.ndarray | Variable
if hasattr(data, "to_numpy") and hasattr(data, "isnull"):
Expand Down Expand Up @@ -625,98 +612,6 @@ def __call__(self, input):
return pytensor.clone_replace(self.tensor, {oldinput: input}, rebuild_strict=False)


class GeneratorOp(Op):
"""
Generator Op is designed for storing python generators inside pytensor graph.
__call__ creates TensorVariable
It has 2 new methods
- var.set_gen(gen): sets new generator
- var.set_default(value): sets new default value (None erases default value)
If generator is exhausted, variable will produce default value if it is not None,
else raises `StopIteration` exception that can be caught on runtime.
Parameters
----------
gen: generator that implements __next__ (py3) or next (py2) method
and yields np.arrays with same types
default: np.array with the same type as generator produces
"""

__props__ = ("generator",)

def __init__(self, gen, default=None):
warnings.warn(
"generator data is deprecated and will be removed in a future release", FutureWarning
)
from pymc.data import GeneratorAdapter

super().__init__()
if not isinstance(gen, GeneratorAdapter):
gen = GeneratorAdapter(gen)
self.generator = gen
self.set_default(default)

def make_node(self, *inputs):
gen_var = self.generator.make_variable(self)
return Apply(self, [], [gen_var])

def perform(self, node, inputs, output_storage, params=None):
if self.default is not None:
output_storage[0][0] = next(self.generator, self.default)
else:
output_storage[0][0] = next(self.generator)

def do_constant_folding(self, fgraph, node):
return False

__call__ = pytensor.config.change_flags(compute_test_value="off")(Op.__call__)

def set_gen(self, gen):
from pymc.data import GeneratorAdapter

if not isinstance(gen, GeneratorAdapter):
gen = GeneratorAdapter(gen)
if not gen.tensortype == self.generator.tensortype:
raise ValueError("New generator should yield the same type")
self.generator = gen

def set_default(self, value):
if value is None:
self.default = None
else:
value = np.asarray(value, self.generator.tensortype.dtype)
t1 = (False,) * value.ndim
t2 = self.generator.tensortype.broadcastable
if not t1 == t2:
raise ValueError("Default value should have the same type as generator")
self.default = value


def generator(gen, default=None):
"""
Create a generator variable with possibility to set default value and new generator.
If generator is exhausted variable will produce default value if it is not None,
else raises `StopIteration` exception that can be caught on runtime.
Parameters
----------
gen: generator that implements __next__ (py3) or next (py2) method
and yields np.arrays with same types
default: np.array with the same type as generator produces
Returns
-------
TensorVariable
It has 2 new methods
- var.set_gen(gen): sets new generator
- var.set_default(value): sets new default value (None erases default value)
"""
return GeneratorOp(gen, default)()


def ix_(*args):
"""
PyTensor np.ix_ analog.
Expand Down
95 changes: 1 addition & 94 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@
# limitations under the License.

import io
import itertools as it

from os import path

import cloudpickle
import numpy as np
import pytensor
import pytensor.tensor as pt
Expand All @@ -29,7 +27,7 @@
import pymc as pm

from pymc.data import MinibatchOp
from pymc.pytensorf import GeneratorOp, floatX
from pymc.pytensorf import floatX


class TestData:
Expand Down Expand Up @@ -495,97 +493,6 @@ def integers_ndim(ndim):
i += 1


@pytest.mark.usefixtures("strict_float32")
class TestGenerator:
def test_basic(self):
generator = pm.GeneratorAdapter(integers())
gop = GeneratorOp(generator)()
assert gop.tag.test_value == np.float32(0)
f = pytensor.function([], gop)
assert f() == np.float32(0)
assert f() == np.float32(1)
for _ in range(2, 100):
f()
assert f() == np.float32(100)

def test_ndim(self):
for ndim in range(10):
res = list(it.islice(integers_ndim(ndim), 0, 2))
generator = pm.GeneratorAdapter(integers_ndim(ndim))
gop = GeneratorOp(generator)()
f = pytensor.function([], gop)
assert ndim == res[0].ndim
np.testing.assert_equal(f(), res[0])
np.testing.assert_equal(f(), res[1])

def test_cloning_available(self):
gop = pm.generator(integers())
res = gop**2
shared = pytensor.shared(pm.floatX(10))
res1 = pytensor.clone_replace(res, {gop: shared})
f = pytensor.function([], res1)
assert f() == np.float32(100)

def test_default_value(self):
def gen():
for i in range(2):
yield pm.floatX(np.ones((10, 10)) * i)

gop = pm.generator(gen(), np.ones((10, 10)) * 10)
f = pytensor.function([], gop)
np.testing.assert_equal(np.ones((10, 10)) * 0, f())
np.testing.assert_equal(np.ones((10, 10)) * 1, f())
np.testing.assert_equal(np.ones((10, 10)) * 10, f())
with pytest.raises(ValueError):
gop.set_default(1)

def test_set_gen_and_exc(self):
def gen():
for i in range(2):
yield pm.floatX(np.ones((10, 10)) * i)

gop = pm.generator(gen())
f = pytensor.function([], gop)
np.testing.assert_equal(np.ones((10, 10)) * 0, f())
np.testing.assert_equal(np.ones((10, 10)) * 1, f())
with pytest.raises(StopIteration):
f()
gop.set_gen(gen())
np.testing.assert_equal(np.ones((10, 10)) * 0, f())
np.testing.assert_equal(np.ones((10, 10)) * 1, f())

def test_pickling(self, datagen):
gen = pm.generator(datagen)
cloudpickle.loads(cloudpickle.dumps(gen))
bad_gen = pm.generator(integers())
with pytest.raises(TypeError):
cloudpickle.dumps(bad_gen)

def test_gen_cloning_with_shape_change(self, datagen):
gen = pm.generator(datagen)
gen_r = pt.random.normal(size=gen.shape).T
X = gen.dot(gen_r)
res, _ = pytensor.scan(lambda x: x.sum(), X, n_steps=X.shape[0])
assert res.eval().shape == (50,)
shared = pytensor.shared(datagen.data.astype(gen.dtype))
res2 = pytensor.clone_replace(res, {gen: shared**2})
assert res2.eval().shape == (1000,)


def gen1():
i = 0
while True:
yield np.ones((10, 100)) * i
i += 1


def gen2():
i = 0
while True:
yield np.ones((20, 100)) * i
i += 1


@pytest.mark.usefixtures("strict_float32")
class TestMinibatch:
data = np.random.rand(30, 10)
Expand Down
Loading

0 comments on commit 112af3e

Please sign in to comment.