Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
80 commits
Select commit Hold shift + click to select a range
7b87224
Introducing Zapx V17
CascadingRadium Nov 11, 2025
647ea3f
remove debug print statements
CascadingRadium Nov 11, 2025
264f04e
Apply suggestions from code review
CascadingRadium Nov 11, 2025
df2401a
small error msg fix
CascadingRadium Nov 11, 2025
782a36c
fix footer
CascadingRadium Nov 11, 2025
1173c95
small fix in zap.md
CascadingRadium Nov 11, 2025
4780ed1
bug fixes and new features.
CascadingRadium Sep 13, 2025
af6c696
Update nested_cache.go
CascadingRadium Sep 13, 2025
3747c03
Update nested_cache.go
CascadingRadium Sep 13, 2025
e659767
Update merge.go
CascadingRadium Sep 13, 2025
aab8129
simplify
CascadingRadium Sep 13, 2025
a994bbd
Update nested_cache.go
CascadingRadium Sep 13, 2025
78d68b4
Update nested_cache.go
CascadingRadium Sep 13, 2025
9e51f9c
Update segment.go
CascadingRadium Sep 13, 2025
af9f3a4
Update segment.go
CascadingRadium Sep 13, 2025
460e568
Update segment.go
CascadingRadium Sep 13, 2025
d459929
Update segment.go
CascadingRadium Sep 13, 2025
45051aa
Update segment.go
CascadingRadium Sep 13, 2025
43d0c99
comments
CascadingRadium Sep 13, 2025
3fdd593
fix comments
CascadingRadium Sep 13, 2025
6c7a1bc
fix count API part1
CascadingRadium Oct 14, 2025
1070303
performance improvement
CascadingRadium Oct 14, 2025
38ec1ef
gomod changes
CascadingRadium Oct 16, 2025
2057857
cache improvement
CascadingRadium Oct 20, 2025
85aa9c9
code review
CascadingRadium Oct 23, 2025
c98b0c5
add zapx.md
CascadingRadium Nov 13, 2025
71b1151
add cmd line tooling
CascadingRadium Nov 13, 2025
5b46683
Apply suggestions from code review
CascadingRadium Nov 13, 2025
763ba00
reset to 16 for test
CascadingRadium Nov 17, 2025
3a14cca
revert to 17
CascadingRadium Nov 19, 2025
933cdfe
fix cmd line tool
CascadingRadium Nov 19, 2025
a091db2
Apply suggestions from code review
CascadingRadium Nov 19, 2025
c20a8b3
temp change to 16
CascadingRadium Nov 19, 2025
21873f9
perf optimization
CascadingRadium Nov 26, 2025
064c9a0
perf opt 3
CascadingRadium Nov 27, 2025
77bf6e8
nestedCache v2
CascadingRadium Nov 27, 2025
9c9f86b
cache optimization
CascadingRadium Nov 27, 2025
9032692
use clear instead
CascadingRadium Nov 28, 2025
43825ef
use uvarint
CascadingRadium Nov 28, 2025
3d8762b
reuse edgeList
CascadingRadium Nov 28, 2025
0e5ae09
add guard
CascadingRadium Nov 28, 2025
43688ea
Apply suggestions from code review
CascadingRadium Nov 28, 2025
0984d43
fix md
CascadingRadium Nov 30, 2025
ddf2d5a
Update github actions workflow (#350)
CascadingRadium Dec 10, 2025
b619c64
Moved vectorIndexWrapper implementations into a struct
Likith101 Dec 10, 2025
c73d9fc
Apply suggestion from @Copilot
abhinavdangeti Dec 10, 2025
5425f7d
fix tests
CascadingRadium Dec 10, 2025
dbd7e53
fix tests
CascadingRadium Dec 10, 2025
683ac3f
fix comments
CascadingRadium Dec 10, 2025
52630c6
rebase
CascadingRadium Dec 10, 2025
297d353
fix typos
CascadingRadium Dec 10, 2025
4bbbd0d
resolve comments
CascadingRadium Dec 10, 2025
d0e645a
add explicit l2 case
CascadingRadium Dec 10, 2025
466697a
rename var
CascadingRadium Dec 10, 2025
aa3db82
Addressing reviews
Likith101 Dec 11, 2025
df85930
Merge branch 'vecWrapper' into knnDup
CascadingRadium Dec 11, 2025
d04b0bd
change var name
CascadingRadium Dec 11, 2025
6c506d8
Merge branch 'knnDup' into merge
CascadingRadium Dec 11, 2025
7f8ab8d
Merge branch 'merge' into nestedFields
CascadingRadium Dec 11, 2025
c1fa881
knn support
CascadingRadium Dec 11, 2025
72ccec0
fox reuse
CascadingRadium Dec 11, 2025
dd4db74
Minor improvements
Likith101 Dec 11, 2025
f86bbbf
Merge branch 'vecWrapper' into knnDup
CascadingRadium Dec 11, 2025
4c50856
resolve comments
CascadingRadium Dec 11, 2025
23bb1e5
Merge branch 'knnDup' into merge
CascadingRadium Dec 11, 2025
f6d6ad1
Merge branch 'merge' into nestedFields
CascadingRadium Dec 11, 2025
2ffff1e
Minor bug fix
Likith101 Dec 11, 2025
8cb4bbd
Merge branch 'vecWrapper' into knnDup
CascadingRadium Dec 11, 2025
b4d5ebe
Merge branch 'knnDup' into merge
CascadingRadium Dec 11, 2025
ae9ef74
Merge branch 'merge' into nestedFields
CascadingRadium Dec 11, 2025
7ca55d8
bug fix
CascadingRadium Dec 12, 2025
aa6c793
Merge branch 'unstable-v17' into nestedFields
CascadingRadium Jan 12, 2026
8816f8a
go mod tidy
CascadingRadium Jan 12, 2026
8fe0b4e
fix edge cases
CascadingRadium Jan 12, 2026
e087714
typos
CascadingRadium Jan 12, 2026
dd49bf0
Merge remote-tracking branch 'origin/unstable-v17' into nestedFields
abhinavdangeti Jan 12, 2026
220effd
code review
CascadingRadium Jan 13, 2026
b7bc092
add unit tests
CascadingRadium Jan 13, 2026
542af27
add more tests
CascadingRadium Jan 13, 2026
e7d7643
optimization
CascadingRadium Jan 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ on:
push:
branches:
- master
- v16.x
- v15.x
- v14.x
- v13.x
Expand Down
28 changes: 0 additions & 28 deletions .golangci.yml

This file was deleted.

7 changes: 7 additions & 0 deletions build.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkMode uint32, numDocs uint64
invIndexCache: newInvertedIndexCache(),
vecIndexCache: newVectorIndexCache(),
synIndexCache: newSynonymIndexCache(),
nstIndexCache: newNestedIndexCache(),
// following fields gets populated by loadFields
fieldsMap: make(map[string]uint16),
fieldsOptions: make(map[string]index.FieldIndexingOptions),
Expand All @@ -189,5 +190,11 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkMode uint32, numDocs uint64
return nil, err
}

// initialize any of the caches if needed
err = sb.nstIndexCache.initialize(sb.numDocs, sb.getEdgeListOffset(), sb.mem)
if err != nil {
return nil, err
}

return sb, nil
}
80 changes: 80 additions & 0 deletions cmd/zap/cmd/edge.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright (c) 2026 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cmd

import (
"encoding/binary"
"fmt"

"github.com/spf13/cobra"
)

// edgeListCmd represents the edge command
var edgeListCmd = &cobra.Command{
Use: "edgeList",
Short: "prints the edge list for nested documents",
Long: `The edgeList command will print the edge list for nested documents in the segment.`,
RunE: func(cmd *cobra.Command, args []string) error {
edgeListAddr, err := segment.EdgeListAddr()
if err != nil {
return fmt.Errorf("error getting edge list: %v", err)
}
if edgeListAddr == 0 {
fmt.Println("no edge list present")
return nil
}
data := segment.Data()
// read edge list
// pos stores the current read position
pos := edgeListAddr
// read number of nested documents which is also the number of edges
numEdges, read := binary.Uvarint(data[pos : pos+binary.MaxVarintLen64])
if read <= 0 {
return fmt.Errorf("error reading number of edges in nested edge list")
}
pos += uint64(read)
// if no edges or no nested documents, return
if numEdges == 0 {
fmt.Println("no nested documents present")
return nil
}
// edgeList as a map[node]parent
edgeList := make(map[uint64]uint64, numEdges)
for i := uint64(0); i < numEdges; i++ {
child, read := binary.Uvarint(data[pos : pos+binary.MaxVarintLen64])
if read <= 0 {
return fmt.Errorf("error reading child doc id in nested edge list")
}
pos += uint64(read)
parent, read := binary.Uvarint(data[pos : pos+binary.MaxVarintLen64])
if read <= 0 {
return fmt.Errorf("error reading parent doc id in nested edge list")
}
pos += uint64(read)
edgeList[child] = parent
}
// print number of edges / nested documents
fmt.Printf("number of edges / nested documents: %d\n", len(edgeList))
fmt.Printf("child document number -> parent document number\n")
for child, parent := range edgeList {
fmt.Printf("%d -> %d\n", child, parent)
}
return nil
},
}

func init() {
RootCmd.AddCommand(edgeListCmd)
}
5 changes: 5 additions & 0 deletions faiss_vector_posting.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,11 @@ func (sb *SegmentBase) InterpretVectorIndex(field string, except *roaring.Bitmap
if rv.vecIndex != nil {
rv.vecIndexSize = rv.vecIndex.Size()
}

// get the number of nested documents in this segment, if any
// to determine if the wrapper needs to handle nested documents
rv.nestedMode = sb.countNested() > 0

return rv, nil
}

Expand Down
14 changes: 14 additions & 0 deletions faiss_vector_test.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
// Copyright (c) 2026 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build vectors
// +build vectors

Expand Down
25 changes: 24 additions & 1 deletion faiss_vector_wrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ type vectorIndexWrapper struct {
fieldID uint16
vecIndexSize uint64

// nestedMode indicates if the vector index is operating in nested document mode.
// if so we have a reusable ancestry slice to help with docID lookups
nestedMode bool
ancestry []index.AncestorID

sb *SegmentBase
}

Expand Down Expand Up @@ -528,9 +533,27 @@ func (v *vectorIndexWrapper) searchClustersFromIVFIndex(eligibleCentroidIDs []in

// Utility function to get the docID for a given vectorID, used for the
// deduplication logic, to map vectorIDs back to their corresponding docIDs
// if we are in nested mode, this method returns the root docID instead of
// the nested docID, by consulting the edge list. This ensures that kNN searches
// return unique root documents when nested documents are involved.
func (v *vectorIndexWrapper) getDocIDForVectorID(vecID int64) (uint32, bool) {
docID, exists := v.mapping.docForVec(uint32(vecID))
return docID, exists
if !v.nestedMode || !exists {
// either not in nested mode, or docID does not exist
//for the vectorID, so just return the docID as is
return docID, exists
}
// in nested mode and docID exists, so we must get the root docID from the edge list
// reuse the wrapper's ancestry slice to avoid allocations
v.ancestry = v.sb.Ancestors(uint64(docID), v.ancestry[:0])
if len(v.ancestry) == 0 {
// should not happen, but just in case, return the docID as is
return docID, exists
}
// return the root docID, which is the last element in the ancestry slice
// in case the docID is a root doc, the ancestry slice would have
// just one element, which is the docID itself
return uint32(v.ancestry[len(v.ancestry)-1]), true
}

// ------------------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ go 1.24

require (
github.com/RoaringBitmap/roaring/v2 v2.4.5
github.com/blevesearch/bleve_index_api v1.2.12-0.20260109154621-f19a6d6af728
github.com/blevesearch/bleve_index_api v1.3.0
github.com/blevesearch/go-faiss v1.0.27
github.com/blevesearch/mmap-go v1.0.4
github.com/blevesearch/scorch_segment_api/v2 v2.3.14-0.20260109154938-b56b54c737df
github.com/blevesearch/scorch_segment_api/v2 v2.4.0
github.com/blevesearch/vellum v1.1.0
github.com/golang/snappy v0.0.4
github.com/spf13/cobra v1.7.0
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ github.com/RoaringBitmap/roaring/v2 v2.4.5/go.mod h1:FiJcsfkGje/nZBZgCu0ZxCPOKD/
github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bits-and-blooms/bitset v1.22.0 h1:Tquv9S8+SGaS3EhyA+up3FXzmkhxPGjQQCkcs2uw7w4=
github.com/bits-and-blooms/bitset v1.22.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/blevesearch/bleve_index_api v1.2.12-0.20260109154621-f19a6d6af728 h1:qFnvr+SqVOCbhMl5sVynhuwVkv1yrc7Vhrn8lVdw1nU=
github.com/blevesearch/bleve_index_api v1.2.12-0.20260109154621-f19a6d6af728/go.mod h1:xvd48t5XMeeioWQ5/jZvgLrV98flT2rdvEJ3l/ki4Ko=
github.com/blevesearch/bleve_index_api v1.3.0 h1:DsMpWVjFNlBw9/6pyWf59XoqcAkhHj3H0UWiQsavb6E=
github.com/blevesearch/bleve_index_api v1.3.0/go.mod h1:xvd48t5XMeeioWQ5/jZvgLrV98flT2rdvEJ3l/ki4Ko=
github.com/blevesearch/go-faiss v1.0.27 h1:7cBImYDDQ82WJd5RUZ1ie6zXztCsC73W94ZzwOjkatk=
github.com/blevesearch/go-faiss v1.0.27/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
github.com/blevesearch/scorch_segment_api/v2 v2.3.14-0.20260109154938-b56b54c737df h1:gBuVkzZLUpGJGnCBRgY0ruZVjppD7WaQLeHZei7QQnU=
github.com/blevesearch/scorch_segment_api/v2 v2.3.14-0.20260109154938-b56b54c737df/go.mod h1:f8fXitmMpzgNziIMqUlpTrfPxVVDN8at9k7POEohvJU=
github.com/blevesearch/scorch_segment_api/v2 v2.4.0 h1:OtipwURRzZv6UFmHQnbEqOY90eotINQ2TtSSpWfYuWU=
github.com/blevesearch/scorch_segment_api/v2 v2.4.0/go.mod h1:JalWE/eyEgISwhqtKXoaHMKf5t+F4kXiYrgg0ds3ylw=
github.com/blevesearch/vellum v1.1.0 h1:CinkGyIsgVlYf8Y2LUQHvdelgXr6PYuvoDIajq6yR9w=
github.com/blevesearch/vellum v1.1.0/go.mod h1:QgwWryE8ThtNPxtgWJof5ndPfx0/YMBh+W2weHKPw8Y=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
Expand Down
1 change: 0 additions & 1 deletion inverted_text_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ type invertedIndexCache struct {
cache map[uint16]*invertedCacheEntry
}

// Clear clears the synonym cache which would mean that the termID to term map would no longer be available.
func (sc *invertedIndexCache) Clear() {
sc.m.Lock()
sc.cache = nil
Expand Down
54 changes: 54 additions & 0 deletions merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,60 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
}
}

// calculate new edge list if applicable
var newEdgeList map[uint64]uint64

for segI, segment := range segments {
// check for the closure in meantime
if isClosed(closeCh) {
return 0, nil, seg.ErrClosed
}
// get the edgeList for this segment
edgeList := segment.EdgeList()
// if no edgeList, nothing to do
if edgeList == nil {
continue
}
newSegDocNums := rv[segI]
edgeList.Iterate(func(oldChild uint64, oldParent uint64) bool {
newParent := newSegDocNums[oldParent]
newChild := newSegDocNums[oldChild]
if newParent != docDropped &&
newChild != docDropped {
if newEdgeList == nil {
newEdgeList = make(map[uint64]uint64)
}
newEdgeList[newChild] = newParent
}
return true
})
}

// write out the new edge list
// first write out the number of entries
// which is also the number of valid subDocs
// in the merged segment
buf := make([]byte, binary.MaxVarintLen64)
n := binary.PutUvarint(buf, uint64(len(newEdgeList)))
_, err := w.Write(buf[:n])
if err != nil {
return 0, nil, err
}
// write the child -> parent edge list
// child and parent are both flattened doc ids
for child, parent := range newEdgeList {
n = binary.PutUvarint(buf, child)
_, err = w.Write(buf[:n])
if err != nil {
return 0, nil, err
}
n = binary.PutUvarint(buf, parent)
_, err = w.Write(buf[:n])
if err != nil {
return 0, nil, err
}
}

return storedIndexOffset, rv, nil
}

Expand Down
Loading
Loading