Skip to content

Commit 70604db

Browse files
authored
Merge pull request guardian#4401 from bbc/t2118-fuzzy-search
T2118 fuzzy search
2 parents ace0144 + 8e3e80f commit 70604db

File tree

4 files changed

+51
-4
lines changed

4 files changed

+51
-4
lines changed

dev/script/generate-config/service-config.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ function getMediaApiConfig(config) {
142142
|es6.replicas=${config.es6.replicas}
143143
|quota.store.key="rcs-quota.json"
144144
|security.cors.allowedOrigins="${getCorsAllowedOriginString(config)}"
145+
|search.fuzziness={
146+
| enabled=true
147+
|}
145148
|metrics.request.enabled=false
146149
|syndication.review.useRuntimeFieldsFix=true
147150
|`;

media-api/app/lib/MediaApiConfig.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,15 @@ class MediaApiConfig(resources: GridConfigResources) extends CommonConfigWithEla
3131
val cloudFrontPrivateKeyBucketKey: Option[String] = stringOpt("cloudfront.private-key.key")
3232
val cloudFrontKeyPairId: Option[String] = stringOpt("cloudfront.keypair.id")
3333

34+
val fuzzySearchEnabled: Boolean = boolean("search.fuzziness.enabled")
35+
val fuzzySearchPrefixLength: Int = intOpt("search.fuzziness.prefixLength").getOrElse(1)
36+
val fuzzySearchEditDistance: String = stringOpt("search.fuzziness.editDistance") match {
37+
case Some(editDistance) if editDistance.toIntOption.isDefined => editDistance
38+
case Some(editDistance) if editDistance.contains("AUTO:") => editDistance //<- for non-default AUTO word boundaries
39+
case _ => "AUTO"
40+
}
41+
val fuzzyMaxExpansions: Int = intOpt("search.fuzziness.maxExpansions").getOrElse(50)
42+
3443
val rootUri: String = services.apiBaseUri
3544
val kahunaUri: String = services.kahunaBaseUri
3645
val cropperUri: String = services.cropperBaseUri

media-api/app/lib/elasticsearch/QueryBuilder.scala

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,21 @@ class QueryBuilder(matchFields: Seq[String], overQuotaAgencies: () => List[Agenc
2626
private def multiMatchPhraseQuery(value: String, fields: Seq[String]): MultiMatchQuery =
2727
ElasticDsl.multiMatchQuery(value).fields(fields).matchType(MultiMatchQueryBuilderType.PHRASE)
2828

29+
private def multiMatchWordQuery(value: String, fields: Seq[String]): MultiMatchQuery = {
30+
val multiMatchQuery = ElasticDsl.multiMatchQuery(value).fields(fields).operator(Operator.AND)
31+
32+
if (config.fuzzySearchEnabled) {
33+
multiMatchQuery.matchType(MultiMatchQueryBuilderType.BEST_FIELDS)
34+
.fuzziness(config.fuzzySearchEditDistance)
35+
.maxExpansions(config.fuzzyMaxExpansions)
36+
.prefixLength(config.fuzzySearchPrefixLength)
37+
} else {
38+
multiMatchQuery.matchType(MultiMatchQueryBuilderType.CROSS_FIELDS)
39+
}
40+
}
41+
2942
private def makeMultiQuery(value: Value, fields: Seq[String]): MultiMatchQuery = value match {
30-
case Words(value) => ElasticDsl.multiMatchQuery(value).fields(fields).
31-
operator(Operator.AND).
32-
matchType(MultiMatchQueryBuilderType.CROSS_FIELDS)
43+
case Words(value) => multiMatchWordQuery(value, fields)
3344
case Phrase(string) => multiMatchPhraseQuery(string, fields)
3445
// That's OK, we only do date queries on a single field at a time
3546
case e => throw InvalidQuery(s"Cannot do multiQuery on $e")

media-api/test/lib/elasticsearch/QueryBuilderTest.scala

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ class QueryBuilderTest extends AnyFunSpec with Matchers with ConditionFixtures w
2323

2424
val matchFields: Seq[String] = Seq("afield", "anothermatchfield")
2525

26+
private val commonConfigurations = USED_CONFIGS_IN_TEST ++ MOCK_CONFIG_KEYS.map(_ -> NOT_USED_IN_TEST).toMap
27+
2628
private val mediaApiConfig = new MediaApiConfig(GridConfigResources(
27-
Configuration.from(USED_CONFIGS_IN_TEST ++ MOCK_CONFIG_KEYS.map(_ -> NOT_USED_IN_TEST).toMap),
29+
Configuration.from(commonConfigurations),
2830
null,
2931
new ApplicationLifecycle {
3032
override def addStopHook(hook: () => Future[_]): Unit = {}
@@ -132,6 +134,28 @@ class QueryBuilderTest extends AnyFunSpec with Matchers with ConditionFixtures w
132134
multiMatchClause.`type` shouldBe Some(MultiMatchQueryBuilderType.CROSS_FIELDS)
133135
}
134136

137+
it("any field words queries should be applied to all of the match fields with best fields type and fuzziness, operator and analyzers set") {
138+
val mediaApiConfigWithFuzzySearch = new MediaApiConfig(GridConfigResources(
139+
Configuration.from(commonConfigurations ++ Map("search.fuzziness.enabled" -> true)),
140+
null,
141+
new ApplicationLifecycle {
142+
override def addStopHook(hook: () => Future[_]): Unit = {}
143+
override def stop(): Future[_] = Future.successful(())
144+
}
145+
))
146+
val queryBuilder = new QueryBuilder(matchFields, () => Nil, mediaApiConfigWithFuzzySearch)
147+
val query = queryBuilder.makeQuery(List(anyFieldWordsCondition)).asInstanceOf[BoolQuery]
148+
149+
query.must.size shouldBe 1
150+
val multiMatchClause = query.must.head.asInstanceOf[MultiMatchQuery]
151+
multiMatchClause.text shouldBe "cats dogs"
152+
multiMatchClause.fields.map(_.field) shouldBe matchFields
153+
multiMatchClause.operator shouldBe Some(Operator.AND)
154+
multiMatchClause.`type` shouldBe Some(MultiMatchQueryBuilderType.BEST_FIELDS)
155+
multiMatchClause.fuzziness shouldBe defined
156+
multiMatchClause.fuzziness shouldBe Some("AUTO")
157+
}
158+
135159
it("multiple field queries should query against the requested fields only") {
136160
val query = queryBuilder.makeQuery(List(multipleFieldWordsCondition)).asInstanceOf[BoolQuery]
137161

0 commit comments

Comments
 (0)