From 92205659300ec02cc5c454a4a546238f7b5da93b Mon Sep 17 00:00:00 2001 From: Kevin Meinhardt Date: Fri, 15 Nov 2024 16:23:06 +0100 Subject: [PATCH] More efficient diff function removing quadratic iterable (#22859) * More efficient diff function removing quadratic iterable * fix format --- src/olympia/blocklist/mlbf.py | 6 ++++-- src/olympia/blocklist/tests/test_mlbf.py | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/olympia/blocklist/mlbf.py b/src/olympia/blocklist/mlbf.py index 43eede4a427e..015db06ef19a 100644 --- a/src/olympia/blocklist/mlbf.py +++ b/src/olympia/blocklist/mlbf.py @@ -22,9 +22,11 @@ def ordered_diff_lists( previous: List[str], current: List[str] ) -> Tuple[List[str], List[str], int]: + current_set = set(current) + previous_set = set(previous) # Use lists instead of sets to maintain order - extras = [x for x in current if x not in previous] - deletes = [x for x in previous if x not in current] + extras = [x for x in current if x not in previous_set] + deletes = [x for x in previous if x not in current_set] changed_count = len(extras) + len(deletes) return extras, deletes, changed_count diff --git a/src/olympia/blocklist/tests/test_mlbf.py b/src/olympia/blocklist/tests/test_mlbf.py index d087b9b011a3..19df2eb0e3cb 100644 --- a/src/olympia/blocklist/tests/test_mlbf.py +++ b/src/olympia/blocklist/tests/test_mlbf.py @@ -19,6 +19,7 @@ MLBFDataBaseLoader, MLBFDataType, MLBFStorageLoader, + ordered_diff_lists, ) @@ -45,6 +46,27 @@ def _block_version(self, block, version, block_type=BlockType.BLOCKED): ) +class TestOrderedDiffLists(TestCase): + def test_return_added(self): + assert ordered_diff_lists(['a', 'b'], ['a', 'b', 'c']) == (['c'], [], 1) + + def test_return_removed(self): + assert ordered_diff_lists(['a', 'b', 'c'], ['a', 'b']) == ([], ['c'], 1) + + def test_return_added_and_removed(self): + assert ordered_diff_lists(['a', 'b', 'c'], ['b', 'c', 'd']) == (['d'], ['a'], 2) + + def test_large_diff(self): + size = 2_000_000 + even_items = [i for i in range(size) if i % 2 == 0] + odd_items = [i for i in range(size) if i % 2 == 1] + assert ordered_diff_lists(even_items, odd_items) == ( + odd_items, + even_items, + size, + ) + + class TestBaseMLBFLoader(_MLBFBase): class TestStaticLoader(BaseMLBFLoader): @cached_property