Skip to content

Commit

Permalink
Improve search results (#63)
Browse files Browse the repository at this point in the history
* Generate wildcard question and add clean index mapping
* JSON field support
* Do not add keyword mappings to _all field
* Do not add fields to index that are not searchable
* Add motion change recos to search
* Add mediafile directory to additional fields
  • Loading branch information
bastianjoel authored Nov 17, 2023
1 parent 27657ec commit ee0b502
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 37 deletions.
21 changes: 14 additions & 7 deletions pkg/meta/collections.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,24 @@ type Collection struct {

// CollectionRelation describes a related collection
type CollectionRelation struct {
Type string `json:"type"`
Collection *string `json:"collection,omitempty"`
Fields map[string]*CollectionRelation `json:"fields"`
Type string `json:"type" yaml:"type"`
Collection *string `json:"collection,omitempty" yaml:"collection,omitempty"`
Fields map[string]*CollectionRelation `json:"fields" yaml:"fields"`
}

// CollectionSearchableConfig contains per field config of a collection
type CollectionSearchableConfig struct {
Type *string `yaml:"type,omitempty"`
Analyzer *string `yaml:"analyzer,omitempty"`
}

// CollectionDescription is the collection format for search filters
type CollectionDescription struct {
Searchable []string `yaml:"searchable"`
Additional []string `yaml:"additional"`
Contains []string `yaml:"contains,omitempty"`
Relations map[string]*CollectionRelation `yaml:"relations,omitempty"`
Searchable []string `yaml:"searchable"`
SearchableConfig map[string]*CollectionSearchableConfig `yaml:"searchable_config,omitempty"`
Additional []string `yaml:"additional"`
Contains []string `yaml:"contains,omitempty"`
Relations map[string]*CollectionRelation `yaml:"relations,omitempty"`
}

// Collections is part of the meta model.
Expand Down
35 changes: 25 additions & 10 deletions pkg/meta/filters.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ import (

// Filter is part of the meta model.
type Filter struct {
Name string
Items []string
Additional []string
Contains map[string]struct{}
Relations map[string]*CollectionRelation
Name string
Items []string
ItemsConfig map[string]*CollectionSearchableConfig
Additional []string
Contains map[string]struct{}
Relations map[string]*CollectionRelation
}

// FilterKey is part of the meta model.
Expand Down Expand Up @@ -65,11 +66,12 @@ func (fs *Filters) UnmarshalYAML(value *yaml.Node) error {
}

*fs = append(*fs, Filter{
Name: s.Name,
Items: fsm[s].Searchable,
Additional: fsm[s].Additional,
Relations: relations,
Contains: contains,
Name: s.Name,
Items: fsm[s].Searchable,
ItemsConfig: fsm[s].SearchableConfig,
Additional: fsm[s].Additional,
Relations: relations,
Contains: contains,
})
}
return nil
Expand Down Expand Up @@ -101,11 +103,16 @@ func (fs Filters) Retain(verbose bool) func(string, string, *Member) bool {
keep := map[key]struct{}{}
additional := map[key]struct{}{}
relations := map[key]*CollectionRelation{}
config := map[key]*CollectionSearchableConfig{}
for _, m := range fs {
for _, f := range m.Items {
keep[key{rel: m.Name, field: f}] = struct{}{}
}

for f, data := range m.ItemsConfig {
config[key{rel: m.Name, field: f}] = data
}

for _, f := range m.Additional {
additional[key{rel: m.Name, field: f}] = struct{}{}
}
Expand All @@ -119,6 +126,14 @@ func (fs Filters) Retain(verbose bool) func(string, string, *Member) bool {
m.Relation = relations[key{rel: rk, field: fk}]
}

if c, ok := config[key{rel: rk, field: fk}]; ok {
if c.Type != nil {
m.Type = *c.Type
}

m.Analyzer = c.Analyzer
}

if _, ok := additional[key{rel: rk, field: fk}]; ok {
m.Searchable = false
return true
Expand Down
1 change: 1 addition & 0 deletions pkg/meta/member.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ type Member struct {
RestrictionMode string `yaml:"restriction_mode"`
Required bool `yaml:"required"`
Searchable bool `yaml:"-"`
Analyzer *string `yaml:"-"`
Relation *CollectionRelation `yaml:"-"`
Order int32 `yaml:"-"`
}
Expand Down
89 changes: 69 additions & 20 deletions pkg/search/textindex.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
package search

import (
"bytes"
"fmt"
"html"
"os"
"strconv"
"strings"
"time"

log "github.com/sirupsen/logrus"
Expand All @@ -19,6 +21,7 @@ import (
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/analysis/analyzer/simple"
bleveHtml "github.com/blevesearch/bleve/v2/analysis/char/html"
"github.com/blevesearch/bleve/v2/analysis/lang/de"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
Expand Down Expand Up @@ -143,38 +146,57 @@ func (bt bleveType) BleveType() string {
}

func buildIndexMapping(collections meta.Collections) mapping.IndexMapping {

numberFieldMapping := bleve.NewNumericFieldMapping()

numberedRelationFieldMapping := bleve.NewNumericFieldMapping()
numberedRelationFieldMapping.IncludeInAll = false

textFieldMapping := bleve.NewTextFieldMapping()
textFieldMapping.Analyzer = de.AnalyzerName

htmlFieldMapping := bleve.NewTextFieldMapping()
htmlFieldMapping.Analyzer = deHTML

keywordFieldMapping := bleve.NewTextFieldMapping()
keywordFieldMapping.Analyzer = keyword.Name
collectionInfoFieldMapping := bleve.NewTextFieldMapping()
collectionInfoFieldMapping.Analyzer = keyword.Name
collectionInfoFieldMapping.IncludeInAll = false

simpleFieldMapping := bleve.NewTextFieldMapping()
simpleFieldMapping.Analyzer = simple.Name

indexMapping := mapping.NewIndexMapping()
indexMapping.TypeField = "_bleve_type"

for name, col := range collections {
docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("_bleve_type", keywordFieldMapping)
docMapping.AddFieldMappingsAt("_bleve_type", collectionInfoFieldMapping)
for fname, cf := range col.Fields {
if cf.Searchable {
switch cf.Type {
case "HTMLStrict", "HTMLPermissive":
docMapping.AddFieldMappingsAt(fname, htmlFieldMapping)
case "string", "text":
docMapping.AddFieldMappingsAt(fname, textFieldMapping)
case "generic-relation":
docMapping.AddFieldMappingsAt(fname, keywordFieldMapping)
case "relation", "number":
docMapping.AddFieldMappingsAt(fname, numberFieldMapping)
case "number[]":
docMapping.AddFieldMappingsAt(fname, numberFieldMapping)
default:
log.Errorf("unsupport type %q on field %s\n", cf.Type, fname)
if cf.Analyzer == nil {
switch cf.Type {
case "HTMLStrict", "HTMLPermissive":
docMapping.AddFieldMappingsAt(fname, htmlFieldMapping)
case "string", "text":
docMapping.AddFieldMappingsAt(fname, textFieldMapping)
docMapping.AddFieldMappingsAt("_"+fname+"_original", simpleFieldMapping)
case "generic-relation":
docMapping.AddFieldMappingsAt(fname, collectionInfoFieldMapping)
case "relation", "relation-list":
docMapping.AddFieldMappingsAt(fname, numberedRelationFieldMapping)
case "number", "number[]":
docMapping.AddFieldMappingsAt(fname, numberFieldMapping)
default:
log.Errorf("unsupport type %q on field %s\n", cf.Type, fname)
}
} else {
switch *cf.Analyzer {
case "html":
docMapping.AddFieldMappingsAt(fname, htmlFieldMapping)
case "simple":
docMapping.AddFieldMappingsAt(fname, simpleFieldMapping)
default:
log.Errorf("unsupported analyzer %q on field %s\n", *cf.Analyzer, fname)
}
}
}
}
Expand All @@ -187,9 +209,19 @@ func buildIndexMapping(collections meta.Collections) mapping.IndexMapping {
}

func (bt bleveType) fill(fields map[string]*meta.Member, data []byte) {
for fname := range fields {
for fname, field := range fields {
if !field.Searchable {
continue
}

switch fields[fname].Type {
case "HTMLStrict", "HTMLPermissive", "string", "text", "generic-relation":
case "string", "text":
if v, err := jsonparser.GetString(data, fname); err == nil {
bt[fname] = v
bt["_"+fname+"_original"] = v
continue
}
case "HTMLStrict", "HTMLPermissive", "generic-relation":
if v, err := jsonparser.GetString(data, fname); err == nil {
bt[fname] = v
continue
Expand All @@ -207,6 +239,13 @@ func (bt bleveType) fill(fields map[string]*meta.Member, data []byte) {
}
}, fname)
continue
case "json-int-string-map":
bt[fname] = []string{}
jsonparser.ObjectEach(data, func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) error {
bt[fname] = append(bt[fname].([]string), string(value))
return nil
}, fname)
continue
default:
if v, _, _, err := jsonparser.Get(data, fname); err == nil {
bt[fname] = v
Expand Down Expand Up @@ -350,8 +389,18 @@ func (ti *TextIndex) Search(question string, collections []string, meetingID int
log.Debugf("searching for %q took %v\n", question, time.Since(start))
}()

var wildcardQuestion bytes.Buffer
for _, w := range strings.Split(question, " ") {
if w[0] != byte('*') && w[len(w)-1] != byte('*') {
wildcardQuestion.WriteString("*" + strings.ToLower(w) + "* ")
}
}
wildcardQuery := bleve.NewQueryStringQuery(wildcardQuestion.String())

var q query.Query
matchQuery := bleve.NewQueryStringQuery(question)
matchQueryOriginal := bleve.NewQueryStringQuery(question)
matchQueryOriginal.SetBoost(5)
matchQuery := bleve.NewDisjunctionQuery(matchQueryOriginal, wildcardQuery)

if meetingID > 0 {
fmid := float64(meetingID)
Expand Down
40 changes: 40 additions & 0 deletions search.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ mediafile:
- owner_id
additional:
- id
- is_directory
relations:
owner_id:
type: generic-relation
Expand Down Expand Up @@ -84,6 +85,11 @@ motion:
- text
- reason
- meeting_id
- amendment_paragraphs
searchable_config:
amendment_paragraphs:
type: json-int-string-map
analyzer: html
additional:
- id
- sequential_number
Expand Down Expand Up @@ -121,6 +127,33 @@ motion:
pronoun: null
username: null
gender: null
motion_change_recommendation:
contains:
- motion
searchable:
- other_description
- text
- meeting_id
additional:
- id
- motion_id
relations:
motion_id:
type: relation
collection: motion
fields:
id: null
title: null
number: null
sequential_number: null
meeting_id: null
meeting_id:
type: relation
collection: meeting
fields:
id: null
name: null
motions_show_sequential_number: null
poll:
contains:
- topic
Expand Down Expand Up @@ -219,6 +252,13 @@ user:
- organization_management_level
- meeting_ids
- owner_id
searchable_config:
first_name:
analyzer: simple
last_name:
analyzer: simple
email:
analyzer: simple
additional:
- id
relations:
Expand Down

0 comments on commit ee0b502

Please sign in to comment.