Skip to content

Commit 6c962d1

Browse files
first draft
1 parent 98577e1 commit 6c962d1

File tree

9 files changed

+349
-4
lines changed

9 files changed

+349
-4
lines changed

document/field_nested.go

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Copyright (c) 2025 Couchbase, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package document
16+
17+
import (
18+
"reflect"
19+
20+
"github.com/blevesearch/bleve/v2/size"
21+
index "github.com/blevesearch/bleve_index_api"
22+
)
23+
24+
var reflectStaticSizeNestedField int
25+
26+
func init() {
27+
var f NestedField
28+
reflectStaticSizeNestedField = int(reflect.TypeOf(f).Size())
29+
}
30+
31+
const DefaultNestedIndexingOptions = index.IndexField
32+
33+
type NestedField struct {
34+
name string
35+
options index.FieldIndexingOptions
36+
numPlainTextBytes uint64
37+
38+
nestedDocuments []index.Document
39+
40+
docAnalyzer index.DocumentAnalyzer
41+
}
42+
43+
func (s *NestedField) Size() int {
44+
return reflectStaticSizeNestedField + size.SizeOfPtr +
45+
len(s.name)
46+
}
47+
48+
func (s *NestedField) Name() string {
49+
return s.name
50+
}
51+
52+
func (s *NestedField) ArrayPositions() []uint64 {
53+
return nil
54+
}
55+
56+
func (s *NestedField) Options() index.FieldIndexingOptions {
57+
return s.options
58+
}
59+
60+
func (s *NestedField) NumPlainTextBytes() uint64 {
61+
return s.numPlainTextBytes
62+
}
63+
64+
func (s *NestedField) AnalyzedLength() int {
65+
return 0
66+
}
67+
68+
func (s *NestedField) EncodedFieldType() byte {
69+
return 'e'
70+
}
71+
72+
func (s *NestedField) AnalyzedTokenFrequencies() index.TokenFrequencies {
73+
return nil
74+
}
75+
76+
func (s *NestedField) Analyze() {
77+
for _, doc := range s.nestedDocuments {
78+
s.docAnalyzer.Analyze(doc)
79+
}
80+
}
81+
82+
func (s *NestedField) Value() []byte {
83+
return nil
84+
}
85+
86+
func NewNestedField(name string, nestedDocuments []index.Document, docAnalyzer index.DocumentAnalyzer) *NestedField {
87+
return &NestedField{
88+
name: name,
89+
options: DefaultNestedIndexingOptions,
90+
nestedDocuments: nestedDocuments,
91+
docAnalyzer: docAnalyzer,
92+
}
93+
}

index/scorch/snapshot_index.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828

2929
"github.com/RoaringBitmap/roaring/v2"
3030
"github.com/blevesearch/bleve/v2/document"
31+
"github.com/blevesearch/bleve/v2/search"
3132
index "github.com/blevesearch/bleve_index_api"
3233
segment "github.com/blevesearch/scorch_segment_api/v2"
3334
"github.com/blevesearch/vellum"
@@ -621,6 +622,12 @@ func (is *IndexSnapshot) TermFieldReader(ctx context.Context, term []byte, field
621622
rv.includeTermVectors = includeTermVectors
622623
rv.currPosting = nil
623624
rv.currID = rv.currID[:0]
625+
rv.nestInfo = nil
626+
if ctx != nil {
627+
if nInfo, ok := ctx.Value(search.NestedInfoCallbackKey).(*search.NestedInfo); ok {
628+
rv.nestInfo = nInfo
629+
}
630+
}
624631

625632
if rv.dicts == nil {
626633
rv.dicts = make([]segment.TermDictionary, len(is.segment))
@@ -634,7 +641,13 @@ func (is *IndexSnapshot) TermFieldReader(ctx context.Context, term []byte, field
634641
segBytesRead := s.segment.BytesRead()
635642
rv.incrementBytesRead(segBytesRead)
636643
}
637-
dict, err := s.segment.Dictionary(field)
644+
var dict segment.TermDictionary
645+
var err error
646+
if nestedSegment, ok := s.segment.(segment.NestedSegment); ok && rv.nestInfo != nil {
647+
dict, err = nestedSegment.NestedDictionary(field, rv.nestInfo.Path, rv.nestInfo.ArrayPosition)
648+
} else {
649+
dict, err = s.segment.Dictionary(field)
650+
}
638651
if err != nil {
639652
return nil, err
640653
}

index/scorch/snapshot_index_tfr.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ type IndexSnapshotTermFieldReader struct {
5151
bytesRead uint64
5252
ctx context.Context
5353
unadorned bool
54+
nestInfo *search.NestedInfo
5455
}
5556

5657
func (i *IndexSnapshotTermFieldReader) incrementBytesRead(val uint64) {

mapping/document.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import (
4242
type DocumentMapping struct {
4343
Enabled bool `json:"enabled"`
4444
Dynamic bool `json:"dynamic"`
45+
Nested bool `json:"nested,omitempty"`
4546
Properties map[string]*DocumentMapping `json:"properties,omitempty"`
4647
Fields []*FieldMapping `json:"fields,omitempty"`
4748
DefaultAnalyzer string `json:"default_analyzer,omitempty"`
@@ -316,6 +317,11 @@ func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
316317
if err != nil {
317318
return err
318319
}
320+
case "nested":
321+
err := util.UnmarshalJSON(v, &dm.Nested)
322+
if err != nil {
323+
return err
324+
}
319325
case "default_analyzer":
320326
err := util.UnmarshalJSON(v, &dm.DefaultAnalyzer)
321327
if err != nil {

mapping/index.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,11 +196,17 @@ func (im *IndexMappingImpl) Validate() error {
196196
if err != nil {
197197
return err
198198
}
199-
for _, docMapping := range im.TypeMapping {
199+
if im.DefaultMapping.Nested {
200+
return fmt.Errorf("default mapping cannot be nested")
201+
}
202+
for typ, docMapping := range im.TypeMapping {
200203
err = docMapping.Validate(im.cache, "", fieldAliasCtx)
201204
if err != nil {
202205
return err
203206
}
207+
if docMapping.Nested {
208+
return fmt.Errorf("document mapping for type '%s' cannot be nested", typ)
209+
}
204210
}
205211

206212
if _, ok := index.SupportedScoringModels[im.ScoringModel]; !ok && im.ScoringModel != "" {

search/query/nested.go

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// Copyright (c) 2025 Couchbase, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package query
16+
17+
import (
18+
"context"
19+
"encoding/json"
20+
"fmt"
21+
22+
"github.com/blevesearch/bleve/v2/mapping"
23+
"github.com/blevesearch/bleve/v2/search"
24+
"github.com/blevesearch/bleve/v2/search/searcher"
25+
"github.com/blevesearch/bleve/v2/util"
26+
index "github.com/blevesearch/bleve_index_api"
27+
)
28+
29+
type NestedQuery struct {
30+
Path string `json:"path"`
31+
InnerQuery Query `json:"query"`
32+
}
33+
34+
func NewNestedQuery(path string, innerQuery Query) *NestedQuery {
35+
return &NestedQuery{
36+
Path: path,
37+
InnerQuery: innerQuery,
38+
}
39+
}
40+
41+
func (q *NestedQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
42+
nr, ok := i.(index.NestedReader)
43+
if !ok {
44+
return nil, fmt.Errorf("nested searcher requires an index reader that supports nested documents")
45+
}
46+
childCount := nr.ChildCount(q.Path)
47+
if childCount == 0 {
48+
return nil, fmt.Errorf("nested searcher: path %q has no child documents", q.Path)
49+
}
50+
innerSearchers := make([]search.Searcher, 0, childCount)
51+
for arrayPos := range childCount {
52+
nctx := context.WithValue(ctx, search.NestedInfoCallbackKey, &search.NestedInfo{
53+
Path: q.Path,
54+
ArrayPosition: arrayPos,
55+
})
56+
innerSearcher, err := q.InnerQuery.Searcher(nctx, i, m, options)
57+
if err != nil {
58+
return nil, fmt.Errorf("nested searcher: failed to create inner searcher at pos %d: %w", arrayPos, err)
59+
}
60+
innerSearchers = append(innerSearchers, innerSearcher)
61+
}
62+
return searcher.NewDisjunctionSearcher(ctx, i, innerSearchers, 0, options)
63+
}
64+
65+
func (q *NestedQuery) Validate() error {
66+
if q.Path == "" {
67+
return fmt.Errorf("nested query must have a path")
68+
}
69+
if q.InnerQuery == nil {
70+
return fmt.Errorf("nested query must have a query")
71+
}
72+
if vq, ok := q.InnerQuery.(ValidatableQuery); ok {
73+
if err := vq.Validate(); err != nil {
74+
return fmt.Errorf("nested query must have a valid query: %v", err)
75+
}
76+
}
77+
return nil
78+
}
79+
80+
func (q *NestedQuery) UnmarshalJSON(data []byte) error {
81+
tmp := struct {
82+
Path string `json:"path"`
83+
Query json.RawMessage `json:"query"`
84+
}{}
85+
err := util.UnmarshalJSON(data, &tmp)
86+
if err != nil {
87+
return err
88+
}
89+
if tmp.Path == "" {
90+
return fmt.Errorf("nested query must have a path")
91+
}
92+
if tmp.Query == nil {
93+
return fmt.Errorf("nested query must have a query")
94+
}
95+
q.Path = tmp.Path
96+
q.InnerQuery, err = ParseQuery(tmp.Query)
97+
if err != nil || q.InnerQuery == nil {
98+
return fmt.Errorf("nested query must have a valid query: %v", err)
99+
}
100+
return nil
101+
}

search/query/query.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,6 @@ func ParseQuery(input []byte) (Query, error) {
353353
}
354354
return &rv, nil
355355
}
356-
357356
_, hasGeo := tmp["geometry"]
358357
if hasGeo {
359358
var rv GeoShapeQuery
@@ -363,7 +362,6 @@ func ParseQuery(input []byte) (Query, error) {
363362
}
364363
return &rv, nil
365364
}
366-
367365
_, hasCIDR := tmp["cidr"]
368366
if hasCIDR {
369367
var rv IPRangeQuery
@@ -373,6 +371,15 @@ func ParseQuery(input []byte) (Query, error) {
373371
}
374372
return &rv, nil
375373
}
374+
_, hasNested := tmp["nested"]
375+
if hasNested {
376+
var rv NestedQuery
377+
err := util.UnmarshalJSON(input, &rv)
378+
if err != nil {
379+
return nil, err
380+
}
381+
return &rv, nil
382+
}
376383

377384
return nil, fmt.Errorf("unknown query type")
378385
}

search/util.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ const (
152152
// BM25StatsKey is used to store and transport the BM25 Data
153153
// to the actual search phase which would use it to perform the search.
154154
BM25StatsKey ContextKey = "_bm25_stats_key"
155+
156+
NestedInfoCallbackKey ContextKey = "_nested_info_callback_key"
155157
)
156158

157159
func RecordSearchCost(ctx context.Context,
@@ -233,3 +235,8 @@ type BM25Stats struct {
233235
DocCount float64 `json:"doc_count"`
234236
FieldCardinality map[string]int `json:"field_cardinality"`
235237
}
238+
239+
type NestedInfo struct {
240+
Path string `json:"path"`
241+
ArrayPosition int `json:"array_position"`
242+
}

0 commit comments

Comments
 (0)