Skip to content

Commit d9fc1b7

Browse files
committed
Merge branch 'master' of https://github.com/blevesearch/bleve into fix-scorch-on-rpi
2 parents 99bcef2 + 4a7e6fb commit d9fc1b7

File tree

19 files changed

+452
-41
lines changed

19 files changed

+452
-41
lines changed

.travis.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ script:
1616
- go test -race -v $(go list ./... | grep -v vendor/)
1717
- go vet $(go list ./... | grep -v vendor/)
1818
- go test ./test -v -indexType scorch
19-
- if [[ ${TRAVIS_GO_VERSION} =~ ^1\.11 ]]; then
20-
errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
21-
else
19+
- if [[ ${TRAVIS_GO_VERSION} =~ ^1\.10 ]]; then
2220
echo "errcheck skipped for go version" $TRAVIS_GO_VERSION;
21+
else
22+
errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
2323
fi
2424
- docs/project-code-coverage.sh
2525
- docs/build_children.sh

analysis/token/reverse/reverse.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// Copyright (c) 2019 Couchbase, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package reverse
16+
17+
import (
18+
"unicode"
19+
"unicode/utf8"
20+
21+
"github.com/blevesearch/bleve/analysis"
22+
"github.com/blevesearch/bleve/registry"
23+
)
24+
25+
// Name is the name used to register ReverseFilter in the bleve registry
26+
const Name = "reverse"
27+
28+
type ReverseFilter struct {
29+
}
30+
31+
func NewReverseFilter() *ReverseFilter {
32+
return &ReverseFilter{}
33+
}
34+
35+
func (f *ReverseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
36+
for _, token := range input {
37+
token.Term = reverse(token.Term)
38+
}
39+
return input
40+
}
41+
42+
func ReverseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
43+
return NewReverseFilter(), nil
44+
}
45+
46+
func init() {
47+
registry.RegisterTokenFilter(Name, ReverseFilterConstructor)
48+
}
49+
50+
// reverse(..) will generate a reversed version of the provided
51+
// unicode array and return it back to its caller.
52+
func reverse(s []byte) []byte {
53+
cursorIn := 0
54+
inputRunes := []rune(string(s))
55+
cursorOut := len(s)
56+
output := make([]byte, len(s))
57+
for i := 0; i < len(inputRunes); {
58+
wid := utf8.RuneLen(inputRunes[i])
59+
i++
60+
for i < len(inputRunes) {
61+
r := inputRunes[i]
62+
if unicode.Is(unicode.Mn, r) || unicode.Is(unicode.Me, r) || unicode.Is(unicode.Mc, r) {
63+
wid += utf8.RuneLen(r)
64+
i++
65+
} else {
66+
break
67+
}
68+
}
69+
copy(output[cursorOut-wid:cursorOut], s[cursorIn:cursorIn+wid])
70+
cursorIn += wid
71+
cursorOut -= wid
72+
}
73+
74+
return output
75+
}
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
// Copyright (c) 2019 Couchbase, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package reverse
16+
17+
import (
18+
"bytes"
19+
"testing"
20+
21+
"github.com/blevesearch/bleve/analysis"
22+
)
23+
24+
func TestReverseFilter(t *testing.T) {
25+
inputTokenStream := analysis.TokenStream{
26+
&analysis.Token{},
27+
&analysis.Token{
28+
Term: []byte("one"),
29+
},
30+
&analysis.Token{
31+
Term: []byte("TWo"),
32+
},
33+
&analysis.Token{
34+
Term: []byte("thRee"),
35+
},
36+
&analysis.Token{
37+
Term: []byte("four's"),
38+
},
39+
&analysis.Token{
40+
Term: []byte("what's this in reverse"),
41+
},
42+
&analysis.Token{
43+
Term: []byte("œ∑´®†"),
44+
},
45+
&analysis.Token{
46+
Term: []byte("İȺȾCAT÷≥≤µ123"),
47+
},
48+
&analysis.Token{
49+
Term: []byte("!@#$%^&*()"),
50+
},
51+
&analysis.Token{
52+
Term: []byte("cafés"),
53+
},
54+
&analysis.Token{
55+
Term: []byte("¿Dónde estás?"),
56+
},
57+
&analysis.Token{
58+
Term: []byte("Me gustaría una cerveza."),
59+
},
60+
}
61+
62+
expectedTokenStream := analysis.TokenStream{
63+
&analysis.Token{},
64+
&analysis.Token{
65+
Term: []byte("eno"),
66+
},
67+
&analysis.Token{
68+
Term: []byte("oWT"),
69+
},
70+
&analysis.Token{
71+
Term: []byte("eeRht"),
72+
},
73+
&analysis.Token{
74+
Term: []byte("s'ruof"),
75+
},
76+
&analysis.Token{
77+
Term: []byte("esrever ni siht s'tahw"),
78+
},
79+
&analysis.Token{
80+
Term: []byte("†®´∑œ"),
81+
},
82+
&analysis.Token{
83+
Term: []byte("321µ≤≥÷TACȾȺİ"),
84+
},
85+
&analysis.Token{
86+
Term: []byte(")(*&^%$#@!"),
87+
},
88+
&analysis.Token{
89+
Term: []byte("séfac"),
90+
},
91+
&analysis.Token{
92+
Term: []byte("?sátse ednóD¿"),
93+
},
94+
&analysis.Token{
95+
Term: []byte(".azevrec anu aíratsug eM"),
96+
},
97+
}
98+
99+
filter := NewReverseFilter()
100+
outputTokenStream := filter.Filter(inputTokenStream)
101+
for i := 0; i < len(expectedTokenStream); i++ {
102+
if !bytes.Equal(outputTokenStream[i].Term, expectedTokenStream[i].Term) {
103+
t.Errorf("[%d] expected %s got %s",
104+
i+1, expectedTokenStream[i].Term, outputTokenStream[i].Term)
105+
}
106+
}
107+
}
108+
109+
func BenchmarkReverseFilter(b *testing.B) {
110+
input := analysis.TokenStream{
111+
&analysis.Token{
112+
Term: []byte("A"),
113+
},
114+
&analysis.Token{
115+
Term: []byte("boiling"),
116+
},
117+
&analysis.Token{
118+
Term: []byte("liquid"),
119+
},
120+
&analysis.Token{
121+
Term: []byte("expanding"),
122+
},
123+
&analysis.Token{
124+
Term: []byte("vapor"),
125+
},
126+
&analysis.Token{
127+
Term: []byte("explosion"),
128+
},
129+
&analysis.Token{
130+
Term: []byte("caused"),
131+
},
132+
&analysis.Token{
133+
Term: []byte("by"),
134+
},
135+
&analysis.Token{
136+
Term: []byte("the"),
137+
},
138+
&analysis.Token{
139+
Term: []byte("rupture"),
140+
},
141+
&analysis.Token{
142+
Term: []byte("of"),
143+
},
144+
&analysis.Token{
145+
Term: []byte("a"),
146+
},
147+
&analysis.Token{
148+
Term: []byte("vessel"),
149+
},
150+
&analysis.Token{
151+
Term: []byte("containing"),
152+
},
153+
&analysis.Token{
154+
Term: []byte("pressurized"),
155+
},
156+
&analysis.Token{
157+
Term: []byte("liquid"),
158+
},
159+
&analysis.Token{
160+
Term: []byte("above"),
161+
},
162+
&analysis.Token{
163+
Term: []byte("its"),
164+
},
165+
&analysis.Token{
166+
Term: []byte("boiling"),
167+
},
168+
&analysis.Token{
169+
Term: []byte("point"),
170+
},
171+
&analysis.Token{
172+
Term: []byte("İȺȾCAT"),
173+
},
174+
&analysis.Token{
175+
Term: []byte("Me gustaría una cerveza."),
176+
},
177+
}
178+
filter := NewReverseFilter()
179+
180+
b.ResetTimer()
181+
for i := 0; i < b.N; i++ {
182+
filter.Filter(input)
183+
}
184+
}

config/config.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,17 +45,20 @@ import (
4545

4646
// token filters
4747
_ "github.com/blevesearch/bleve/analysis/token/apostrophe"
48+
_ "github.com/blevesearch/bleve/analysis/token/camelcase"
4849
_ "github.com/blevesearch/bleve/analysis/token/compound"
4950
_ "github.com/blevesearch/bleve/analysis/token/edgengram"
5051
_ "github.com/blevesearch/bleve/analysis/token/elision"
5152
_ "github.com/blevesearch/bleve/analysis/token/keyword"
5253
_ "github.com/blevesearch/bleve/analysis/token/length"
5354
_ "github.com/blevesearch/bleve/analysis/token/lowercase"
5455
_ "github.com/blevesearch/bleve/analysis/token/ngram"
56+
_ "github.com/blevesearch/bleve/analysis/token/reverse"
5557
_ "github.com/blevesearch/bleve/analysis/token/shingle"
5658
_ "github.com/blevesearch/bleve/analysis/token/stop"
5759
_ "github.com/blevesearch/bleve/analysis/token/truncate"
5860
_ "github.com/blevesearch/bleve/analysis/token/unicodenorm"
61+
_ "github.com/blevesearch/bleve/analysis/token/unique"
5962

6063
// tokenizers
6164
_ "github.com/blevesearch/bleve/analysis/tokenizer/exception"

index/index.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,10 @@ type IndexReaderOnly interface {
121121
FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
122122
}
123123

124+
type IndexReaderContains interface {
125+
FieldDictContains(field string) (FieldDictContains, error)
126+
}
127+
124128
// FieldTerms contains the terms used by a document, keyed by field
125129
type FieldTerms map[string][]string
126130

@@ -230,6 +234,10 @@ type FieldDict interface {
230234
Close() error
231235
}
232236

237+
type FieldDictContains interface {
238+
Contains(key []byte) (bool, error)
239+
}
240+
233241
// DocIDReader is the interface exposing enumeration of documents identifiers.
234242
// Close the reader to release associated resources.
235243
type DocIDReader interface {

index/scorch/persister.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ func (s *Scorch) persisterLoop() {
9090
var persistWatchers []*epochWatcher
9191
var lastPersistedEpoch, lastMergedEpoch uint64
9292
var ew *epochWatcher
93+
94+
var unpersistedCallbacks []index.BatchCallback
95+
9396
po, err := s.parsePersisterOptions()
9497
if err != nil {
9598
s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
@@ -149,11 +152,25 @@ OUTER:
149152
_ = ourSnapshot.DecRef()
150153
break OUTER
151154
}
155+
156+
// save this current snapshot's persistedCallbacks, to invoke during
157+
// the retry attempt
158+
unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
159+
152160
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
153161
_ = ourSnapshot.DecRef()
154162
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
155163
continue OUTER
156164
}
165+
166+
if unpersistedCallbacks != nil {
167+
// in the event of this being a retry attempt for persisting a snapshot
168+
// that had earlier failed, prepend the persistedCallbacks associated
169+
// with earlier segment(s) to the latest persistedCallbacks
170+
ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
171+
unpersistedCallbacks = nil
172+
}
173+
157174
for i := range ourPersistedCallbacks {
158175
ourPersistedCallbacks[i](err)
159176
}

index/scorch/segment/empty.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,20 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
9191
return &EmptyDictionaryIterator{}
9292
}
9393

94+
func (e *EmptyDictionary) Contains(key []byte) (bool, error) {
95+
return false, nil
96+
}
97+
9498
type EmptyDictionaryIterator struct{}
9599

96100
func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
97101
return nil, nil
98102
}
99103

104+
func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) {
105+
return false, nil
106+
}
107+
100108
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
101109
return nil, nil
102110
}

index/scorch/segment/regexp.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func LiteralPrefix(s *syntax.Regexp) string {
5555
s = s.Sub[0]
5656
}
5757

58-
if s.Op == syntax.OpLiteral {
58+
if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
5959
return string(s.Rune)
6060
}
6161

index/scorch/segment/regexp_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ func TestLiteralPrefix(t *testing.T) {
4040
{`^hello`, ""},
4141
{`^`, ""},
4242
{`$`, ""},
43+
{`(?i)mArTy`, ""},
4344
}
4445

4546
for i, test := range tests {

0 commit comments

Comments
 (0)