From 635bf2e94a21436fd75095b9ef57de735ad50907 Mon Sep 17 00:00:00 2001 From: Matthew Treinish Date: Wed, 11 Nov 2020 17:55:16 -0500 Subject: [PATCH] Add custom iterator class for BFS successors return (#185) * Add custom iterator class for BFS successors return This commit changes the return type of the bfs_successors function to be a custom class BFSSuccessors. This new return class implements both the sequence protocol and iterator protocol. This means that aside from explicit type checking it should be backwards compatible with the list being previously returned. It can be used with either index based access or iterated over. This should be more efficient for large graphs because instead of doing the copy and type conversion and iterating over the entire nested Vec of results it instead does it per access (either via __getitem__ or __next__). It does add a small amount of overhead for smaller graphs but it is minimal since the function returns in microseconds in such cases so a 10-20% overhead is not a big deal. It's worth noting while this defers the type conversion, it does not defer execution like most python iterators normally do. When bfs_successors is called it will still always fully traverse the graph. However, in practice the bottleneck for the bfs_successor function wasn't actually the graph traversal, but instead the type conversion. Related to #71 * Only implement sequence protocol Using the sequence protocol we can still get an implicit iterator by just casting it on the python side. This will still get us the lazy type conversion but simplify the api and also make the behavior more consistent. At the same time to ensure we're handling negative indices correctly a test method is added to verify that a negative index access to the sequence works as expected. * Update src/lib.rs Co-authored-by: Kevin Krsulich --- docs/source/api.rst | 8 ++++ src/iterators.rs | 63 +++++++++++++++++++++++++++++ src/lib.rs | 28 +++++++++---- tests/test_pred_succ.py | 87 +++++++++++++++++++++++++++++++++++++++-- 4 files changed, 175 insertions(+), 11 deletions(-) create mode 100644 src/iterators.rs diff --git a/docs/source/api.rst b/docs/source/api.rst index 85c3d90c1..5c96e26c4 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -95,3 +95,11 @@ Exceptions retworkx.NoSuitableNeighbors retworkx.NoPathFound retworkx.NullGraph + +Return Iterator Types +--------------------- + +.. autosummary:: + :toctree: stubs + + retworkx.BFSSuccessors \ No newline at end of file diff --git a/src/iterators.rs b/src/iterators.rs new file mode 100644 index 000000000..7cde889e1 --- /dev/null +++ b/src/iterators.rs @@ -0,0 +1,63 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +use std::convert::TryInto; + +use pyo3::class::PySequenceProtocol; +use pyo3::exceptions::PyIndexError; +use pyo3::prelude::*; + +/// A custom class for the return from :func:`retworkx.bfs_successors` +/// +/// This class is a container class for the results of the +/// :func:`retworkx.bfs_successors` function. It implements the Python +/// sequence protocol. So you can treat the return as read-only +/// sequence/list that is integer indexed. If you want to use it as an +/// iterator you can by wrapping it in an ``iter()`` that will yield the +/// results in order. +/// +/// For example:: +/// +/// import retworkx +/// +/// graph = retworkx.generators.directed_path_graph(5) +/// bfs_succ = retworkx.bfs_successors(0) +/// # Index based access +/// third_element = bfs_succ[2] +/// # Use as iterator +/// bfs_iter = iter(bfs_succ) +/// first_element = next(bfs_iter) +/// second_element = nex(bfs_iter) +/// +#[pyclass(module = "retworkx")] +pub struct BFSSuccessors { + pub bfs_successors: Vec<(PyObject, Vec)>, + pub index: usize, +} + +#[pyproto] +impl PySequenceProtocol for BFSSuccessors { + fn __len__(&self) -> PyResult { + Ok(self.bfs_successors.len()) + } + + fn __getitem__( + &'p self, + idx: isize, + ) -> PyResult<(PyObject, Vec)> { + if idx >= self.bfs_successors.len().try_into().unwrap() { + Err(PyIndexError::new_err(format!("Invalid index, {}", idx))) + } else { + Ok(self.bfs_successors[idx as usize].clone()) + } + } +} diff --git a/src/lib.rs b/src/lib.rs index d5a1f4a57..397de11d0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,7 @@ mod dijkstra; mod dot_utils; mod generators; mod graph; +mod iterators; mod k_shortest_path; use std::cmp::{Ordering, Reverse}; @@ -429,31 +430,41 @@ fn graph_dfs_edges( /// :param PyDiGraph graph: The DAG to get the bfs_successors from /// :param int node: The index of the dag node to get the bfs successors for /// -/// :returns: A list of nodes's data and their children in bfs order -/// :rtype: list +/// :returns: A list of nodes's data and their children in bfs order. The +/// BFSSuccessors class that is returned is a custom container class that +/// implements the sequence protocol. This can be used as a python list +/// with index based access. +/// :rtype: BFSSuccessors #[pyfunction] #[text_signature = "(graph, node, /)"] fn bfs_successors( py: Python, graph: &digraph::PyDiGraph, node: usize, -) -> PyResult { +) -> PyResult { let index = NodeIndex::new(node); let mut bfs = Bfs::new(graph, index); - let mut out_list: Vec<(&PyObject, Vec<&PyObject>)> = Vec::new(); + let mut out_list: Vec<(PyObject, Vec)> = Vec::new(); while let Some(nx) = bfs.next(graph) { let children = graph .graph .neighbors_directed(nx, petgraph::Direction::Outgoing); - let mut succesors: Vec<&PyObject> = Vec::new(); + let mut succesors: Vec = Vec::new(); for succ in children { - succesors.push(graph.graph.node_weight(succ).unwrap()); + succesors + .push(graph.graph.node_weight(succ).unwrap().clone_ref(py)); } if !succesors.is_empty() { - out_list.push((graph.graph.node_weight(nx).unwrap(), succesors)); + out_list.push(( + graph.graph.node_weight(nx).unwrap().clone_ref(py), + succesors, + )); } } - Ok(PyList::new(py, out_list).into()) + Ok(iterators::BFSSuccessors { + bfs_successors: out_list, + index: 0, + }) } /// Return the ancestors of a node in a graph. @@ -2507,6 +2518,7 @@ fn retworkx(py: Python<'_>, m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(graph_k_shortest_path_lengths))?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_wrapped(wrap_pymodule!(generators))?; Ok(()) } diff --git a/tests/test_pred_succ.py b/tests/test_pred_succ.py index 4148793e4..96f7ee64d 100644 --- a/tests/test_pred_succ.py +++ b/tests/test_pred_succ.py @@ -85,7 +85,7 @@ def test_single_successor(self): node_c = dag.add_child(node_b, 'c', {'a': 2}) dag.add_child(node_c, 'd', {'a': 1}) res = retworkx.bfs_successors(dag, node_b) - self.assertEqual([('b', ['c']), ('c', ['d'])], res) + self.assertEqual([('b', ['c']), ('c', ['d'])], list(res)) def test_many_children(self): dag = retworkx.PyDAG() @@ -96,7 +96,7 @@ def test_many_children(self): self.assertEqual([('a', [{'numeral': 9}, {'numeral': 8}, {'numeral': 7}, {'numeral': 6}, {'numeral': 5}, {'numeral': 4}, {'numeral': 3}, {'numeral': 2}, - {'numeral': 1}, {'numeral': 0}])], res) + {'numeral': 1}, {'numeral': 0}])], list(res)) def test_bfs_succesors(self): dag = retworkx.PyDAG() @@ -124,4 +124,85 @@ def test_bfs_succesors(self): } self.assertEqual(expected, res) self.assertEqual([(7, [8]), (8, [9]), (9, [10])], - retworkx.bfs_successors(dag, node_h)) + list(retworkx.bfs_successors(dag, node_h))) + + def test_bfs_successors_sequence(self): + dag = retworkx.PyDAG() + node_a = dag.add_node(0) + node_b = dag.add_child(node_a, 1, {}) + node_c = dag.add_child(node_b, 2, {}) + node_d = dag.add_child(node_c, 3, {}) + node_e = dag.add_child(node_d, 4, {}) + node_f = dag.add_child(node_e, 5, {}) + dag.add_child(node_f, 6, {}) + node_h = dag.add_child(node_c, 7, {}) + node_i = dag.add_child(node_h, 8, {}) + node_j = dag.add_child(node_i, 9, {}) + dag.add_child(node_j, 10, {}) + res = retworkx.bfs_successors(dag, node_b) + expected = [ + (1, [2]), + (2, [7, 3]), + (7, [8]), + (3, [4]), + (8, [9]), + (4, [5]), + (9, [10]), + (5, [6]) + ] + for index, expected_value in enumerate(expected): + self.assertEqual((res[index][0], res[index][1]), + expected_value) + + def test_bfs_successors_sequence_invalid_index(self): + dag = retworkx.PyDAG() + node_a = dag.add_node(0) + node_b = dag.add_child(node_a, 1, {}) + node_c = dag.add_child(node_b, 2, {}) + node_d = dag.add_child(node_c, 3, {}) + node_e = dag.add_child(node_d, 4, {}) + node_f = dag.add_child(node_e, 5, {}) + dag.add_child(node_f, 6, {}) + node_h = dag.add_child(node_c, 7, {}) + node_i = dag.add_child(node_h, 8, {}) + node_j = dag.add_child(node_i, 9, {}) + dag.add_child(node_j, 10, {}) + res = retworkx.bfs_successors(dag, node_b) + with self.assertRaises(IndexError): + res[8] + + def test_bfs_successors_sequence_negative_index(self): + dag = retworkx.PyDAG() + node_a = dag.add_node(0) + node_b = dag.add_child(node_a, 1, {}) + node_c = dag.add_child(node_b, 2, {}) + node_d = dag.add_child(node_c, 3, {}) + node_e = dag.add_child(node_d, 4, {}) + node_f = dag.add_child(node_e, 5, {}) + dag.add_child(node_f, 6, {}) + node_h = dag.add_child(node_c, 7, {}) + node_i = dag.add_child(node_h, 8, {}) + node_j = dag.add_child(node_i, 9, {}) + dag.add_child(node_j, 10, {}) + res = retworkx.bfs_successors(dag, node_b) + self.assertEqual((5, [6]), res[-1]) + self.assertEqual((4, [5]), res[-3]) + + def test_bfs_successors_sequence_stop_iterator(self): + dag = retworkx.PyDAG() + node_a = dag.add_node(0) + node_b = dag.add_child(node_a, 1, {}) + node_c = dag.add_child(node_b, 2, {}) + node_d = dag.add_child(node_c, 3, {}) + node_e = dag.add_child(node_d, 4, {}) + node_f = dag.add_child(node_e, 5, {}) + dag.add_child(node_f, 6, {}) + node_h = dag.add_child(node_c, 7, {}) + node_i = dag.add_child(node_h, 8, {}) + node_j = dag.add_child(node_i, 9, {}) + dag.add_child(node_j, 10, {}) + res = iter(retworkx.bfs_successors(dag, node_b)) + for _ in range(8): + next(res) + with self.assertRaises(StopIteration): + next(res)