Skip to content

Commit

Permalink
Add many functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Ezibenroc committed Jan 20, 2024
1 parent 142e8f5 commit a1b3a70
Show file tree
Hide file tree
Showing 2 changed files with 335 additions and 18 deletions.
350 changes: 333 additions & 17 deletions pyroaring/abstract_bitmap.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -770,9 +770,9 @@ cdef class AbstractBitMap64:
"""
cdef croaring.roaring64_bitmap_t* _c_bitmap

def __cinit__(self, values=None, copy_on_write=False, optimize=True, no_init=False):
def __cinit__(self, values=None, optimize=True, no_init=False):
if no_init:
assert values is None and not copy_on_write
assert values is None
return
cdef vector[uint64_t] buff_vect
cdef uint64_t[:] buff
Expand Down Expand Up @@ -806,25 +806,18 @@ cdef class AbstractBitMap64:
if size > 0:
buff_vect = values
croaring.roaring64_bitmap_add_many(self._c_bitmap, size, &buff_vect[0])
# TODO:
#if not isinstance(values, AbstractBitMap64):
# croaring.roaring_bitmap_set_copy_on_write(self._c_bitmap, copy_on_write)
# self._h_val = 0
if optimize:
self.run_optimize()
# TODO: self.shrink_to_fit()

def __init__(self, values=None, copy_on_write=False, optimize=True):
def __init__(self, values=None, optimize=True):
"""
Construct a AbstractBitMap64 object, either empry or from an iterable.
The field copy_on_write has no effect (yet).
>>> BitMap()
BitMap([])
>>> BitMap([1, 123456789, 27])
BitMap([1, 27, 123456789])
>>> BitMap([1, 123456789, 27], copy_on_write=True)
>>> BitMap64()
BitMap64([])
>>> BitMap64([1, 123456789, 27])
BitMap([1, 27, 123456789])
"""

Expand All @@ -846,10 +839,6 @@ cdef class AbstractBitMap64:
if self._c_bitmap is not NULL:
croaring.roaring64_bitmap_free(self._c_bitmap)

def _check_compatibility(self, AbstractBitMap64 other):
if self.copy_on_write != other.copy_on_write:
raise ValueError('Cannot have interactions between bitmaps with and without copy_on_write.\n')

def __contains__(self, uint64_t value):
return croaring.roaring64_bitmap_contains(self._c_bitmap, value)

Expand Down Expand Up @@ -970,3 +959,330 @@ cdef class AbstractBitMap64:
2
"""
return croaring.roaring64_bitmap_rank(self._c_bitmap, value)

def next_set_bit(self, uint64_t value):
"""
Return the next set bit larger or equal to the given value.
>>> BitMap64([1, 2, 4]).next_set_bit(1)
1
>>> BitMap64([1, 2, 4]).next_set_bit(3)
4
>>> BitMap64([1, 2, 4]).next_set_bit(5)
Traceback (most recent call last):
ValueError: No value larger or equal to specified value.
"""
try:
return next(self.iter_equal_or_larger(value))
except StopIteration:
raise ValueError('No value larger or equal to specified value.')

cdef int64_t _shift_index(self, int64_t index) except -1:
cdef int64_t size = len(self)
if index >= size or index < -size:
raise IndexError('Index out of bound')
if index < 0:
return (index + size)
else:
return index

cdef uint64_t _get_elt(self, int64_t index) except? 0:
cdef uint64_t s_index = self._shift_index(index)
cdef uint64_t elt
cdef bool valid = croaring.roaring64_bitmap_select(self._c_bitmap, s_index, &elt)
if not valid:
raise ValueError('Invalid rank')
return elt

cdef _get_slice(self, sl):
"""For a faster computation, four different methods, depending on the slice."""
start, stop, step = sl.indices(len(self))
sign = 1 if step > 0 else -1
if (sign > 0 and start >= stop) or (sign < 0 and start <= stop):
return self.__class__()
r = range(start, stop, step)
assert len(r) > 0
if abs(step) == 1:
first_elt = self._get_elt(start)
last_elt = self._get_elt(stop-sign)
values = range(first_elt, last_elt+sign, step)
result = self & self.__class__(values)
return result
else:
if step < 0:
start = r[-1]
stop = r[0] + 1
step = -step
else:
start = r[0]
stop = r[-1] + 1
return self._generic_get_slice(start, stop, step)

cdef _generic_get_slice(self, uint64_t start, uint64_t stop, uint64_t step):
"""Assume that start, stop and step > 0 and that the result will not be empty."""
cdef croaring.roaring64_bitmap_t *result = croaring.roaring64_bitmap_create()
cdef croaring.roaring64_iterator_t *iterator = croaring.roaring64_iterator_create(self._c_bitmap)
cdef uint64_t count, max_count=256
cdef uint64_t *buff = <uint64_t*>malloc(max_count*8)
cdef uint64_t i_loc=0, i_glob=start, i_buff=0
first_elt = self._get_elt(start)
valid = croaring.roaring64_iterator_move_equalorlarger(iterator, first_elt)
assert valid
while True:
count = croaring.roaring64_iterator_read(iterator, buff, max_count)
while i_buff < max_count and i_glob < stop:
buff[i_loc] = buff[i_buff]
i_loc += 1
i_buff += step
i_glob += step
croaring.roaring64_bitmap_add_many(result, i_loc, buff)
if count != max_count or i_glob >= stop:
break
i_loc = 0
i_buff = i_buff % max_count
croaring.roaring64_iterator_free(iterator)
free(buff)
return self.from_ptr(result)

def __getitem__(self, value):
if isinstance(value, int):
return self._get_elt(value)
elif isinstance(value, slice):
return self._get_slice(value)
else:
return TypeError('Indices must be integers or slices, not %s' % type(value))

def to_array(self):
"""
Return an array.array containing the elements of the bitmap, in increasing order.
It is equivalent to array.array('Q', self), but more efficient.
>>> BitMap64([3, 12]).to_array()
array('Q', [3, 12])
"""
cdef uint64_t size = len(self)
if size == 0:
return array.array('Q', [])
cdef array.array result = array.array('Q')
array.resize(result, size)
cdef uint64_t[:] buff = result
cdef croaring.roaring64_iterator_t *iterator = croaring.roaring64_iterator_create(self._c_bitmap)
assert croaring.roaring64_iterator_has_value(iterator)
assert croaring.roaring64_iterator_read(iterator, &buff[0], size) == size
croaring.roaring64_iterator_free(iterator)
return result


def copy(self):
"""
Return a copy of a set.
>>> bm = BitMap64([3, 12])
>>> bm2 = bm.copy()
>>> bm == bm2
True
>>> bm.add(1)
>>> bm == bm2
False
"""
return self.__class__(self)

def isdisjoint(self, other):
"""
Return True if two sets have a null intersection.
>>> BitMap64([1, 2]).isdisjoint(BitMap64([3, 4]))
True
>>> BitMap64([1, 2, 3]).BitMap64(BitMap([3, 4]))
False
"""
return self.intersection_cardinality(other) == 0

def issubset(self, other):
"""
Report whether another set contains this set.
>>> BitMap64([1, 2]).BitMap64(BitMap([1, 2, 3, 4]))
True
>>> BitMap64([1, 2]).issubset(BitMap64([3, 4]))
False
"""
return self <= other

def issuperset(self, other):
"""
Report whether this set contains another set.
>>> BitMap64([1, 2, 3, 4]).BitMap64(BitMap([1, 2]))
True
>>> BitMap64([1, 2]).BitMap64(BitMap([3, 4]))
False
"""
return self >= other

def difference(*bitmaps):
"""
Return the difference of two or more sets as a new set.
(i.e. all elements that are in this set but not the others.)
>>> BitMap64.difference(BitMap64([1, 2, 3]), BitMap64([2, 20]), BitMap64([3, 30]))
BitMap([1])
"""
size = len(bitmaps)
cdef AbstractBitMap64 result, bm
if size <= 1:
return bitmaps[0].copy()
elif size == 2:
return bitmaps[0] - bitmaps[1]
else:
result = BitMap64(bitmaps[0])
for bm in bitmaps[1:]:
result -= bm
return bitmaps[0].__class__(result)


def symmetric_difference(self, other):
"""
Return the symmetric difference of two sets as a new set.
(i.e. all elements that are in exactly one of the sets.)
>>> BitMap64([1, 2, 3]).symmetric_difference(BitMap64([2, 3, 4]))
BitMap64([1, 4])
"""
return self.__xor__(other)

def union(*bitmaps):
"""
Return the union of the bitmaps.
>>> BitMap64.union(BitMap64([3, 12]), BitMap64([5]), BitMap64([0, 10, 12]))
BitMap64([0, 3, 5, 10, 12])
"""
size = len(bitmaps)
cdef AbstractBitMap64 result, bm
if size <= 1:
return bitmaps[0].copy()
elif size == 2:
return bitmaps[0] | bitmaps[1]
else:
result = BitMap64(bitmaps[0])
for bm in bitmaps[1:]:
result |= bm
return bitmaps[0].__class__(result)

def intersection(*bitmaps):
"""
Return the intersection of the bitmaps.
>>> BitMap64.intersection(BitMap64(range(0, 15)), BitMap64(range(5, 20)), BitMap64(range(10, 25)))
BitMap64([10, 11, 12, 13, 14])
"""
size = len(bitmaps)
cdef AbstractBitMap64 result, bm
if size <= 1:
return bitmaps[0].copy()
elif size == 2:
return bitmaps[0] & bitmaps[1]
else:
result = BitMap64(bitmaps[0])
for bm in bitmaps[1:]:
result &= bm
return bitmaps[0].__class__(result)

cdef binary_op(self, AbstractBitMap64 other, (croaring.roaring64_bitmap_t*)func(const croaring.roaring64_bitmap_t*, const croaring.roaring64_bitmap_t*) noexcept) noexcept:
cdef croaring.roaring64_bitmap_t *r = func(self._c_bitmap, other._c_bitmap)
return self.from_ptr(r)

def __or__(self, other):
return (<AbstractBitMap64>self).binary_op(<AbstractBitMap64?>other, croaring.roaring64_bitmap_or)

def __and__(self, other):
return (<AbstractBitMap64>self).binary_op(<AbstractBitMap64?>other, croaring.roaring64_bitmap_and)

def __xor__(self, other):
return (<AbstractBitMap64>self).binary_op(<AbstractBitMap64?>other, croaring.roaring64_bitmap_xor)

def __sub__(self, other):
return (<AbstractBitMap64>self).binary_op(<AbstractBitMap64?>other, croaring.roaring64_bitmap_andnot)

def union_cardinality(self, AbstractBitMap64 other):
"""
Return the number of elements in the union of the two bitmaps.
It is equivalent to len(self | other), but faster.
>>> BitMap64([3, 12]).union_cardinality(BitMap64([3, 5, 8]))
4
"""
return croaring.roaring64_bitmap_or_cardinality(self._c_bitmap, other._c_bitmap)

def intersection_cardinality(self, AbstractBitMap64 other):
"""
Return the number of elements in the intersection of the two bitmaps.
It is equivalent to len(self & other), but faster.
>>> BitMap64([3, 12]).intersection_cardinality(BitMap64([3, 5, 8]))
1
"""
return croaring.roaring64_bitmap_and_cardinality(self._c_bitmap, other._c_bitmap)

def difference_cardinality(self, AbstractBitMap64 other):
"""
Return the number of elements in the difference of the two bitmaps.
It is equivalent to len(self - other), but faster.
>>> BitMap64([3, 12]).difference_cardinality(BitMap64([3, 5, 8]))
1
"""
return croaring.roaring64_bitmap_andnot_cardinality(self._c_bitmap, other._c_bitmap)

def symmetric_difference_cardinality(self, AbstractBitMap64 other):
"""
Return the number of elements in the symmetric difference of the two bitmaps.
It is equivalent to len(self ^ other), but faster.
>>> BitMap64([3, 12]).symmetric_difference_cardinality(BitMap64([3, 5, 8]))
3
"""
return croaring.roaring64_bitmap_xor_cardinality(self._c_bitmap, other._c_bitmap)

def intersect(self, AbstractBitMap64 other):
"""
Return True if and only if the two bitmaps have elements in common.
It is equivalent to len(self & other) > 0, but faster.
>>> BitMap64([3, 12]).intersect(BitMap64([3, 18]))
True
>>> BitMap64([3, 12]).intersect(BitMap64([5, 18]))
False
"""
return croaring.roaring64_bitmap_intersect(self._c_bitmap, other._c_bitmap)

def jaccard_index(self, AbstractBitMap64 other):
"""
Compute the Jaccard index of the two bitmaps.
It is equivalent to len(self&other)/len(self|other), but faster.
See https://en.wikipedia.org/wiki/Jaccard_index
>>> BitMap64([3, 10, 12]).jaccard_index(BitMap64([3, 18]))
0.25
"""
return croaring.roaring64_bitmap_jaccard_index(self._c_bitmap, other._c_bitmap)
3 changes: 2 additions & 1 deletion pyroaring/croaring.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -145,4 +145,5 @@ cdef extern from "roaring.h":
bool roaring64_iterator_has_value(const roaring64_iterator_t *it)
bool roaring64_iterator_advance(roaring64_iterator_t *it)
uint64_t roaring64_iterator_value(const roaring64_iterator_t *it)
bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, uint64_t val)
bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, uint64_t val)
uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf, uint64_t count)

0 comments on commit a1b3a70

Please sign in to comment.