Skip to content

Commit 907e948

Browse files
authored
Merge pull request #27 from ScoopInstaller/tokenize-dots-split
Split on dots without whitespace to properly return packages XXX.YYY when searching YY
2 parents 9037a57 + 3796cf4 commit 907e948

File tree

2 files changed

+21
-7
lines changed

2 files changed

+21
-7
lines changed

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ jobs:
5858
- name: Stop SonarScanner and send analysis
5959
run: |
6060
export JAVA_HOME=$JAVA_HOME_17_X64 # Force JAVA_HOME environment variable (Version 11 or 17 is required by SonarScanner)
61-
dotnet sonarscanner end /d:sonar.login="${{ secrets.SONAR_TOKEN }}"
61+
dotnet sonarscanner end /d:sonar.token="${{ secrets.SONAR_TOKEN }}"
6262
6363
keepalive:
6464
name: Keepalive Workflow

src/ScoopSearch.Indexer/Indexer/AzureSearchIndex.cs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ internal class AzureSearchIndex : ISearchIndex
2424

2525
private const string EdgeNGramTokenFilter = "EdgeNGramTokenFilter";
2626

27+
private const string DotReplacementCharFilter = "DotReplacementCharFilter";
28+
2729
private static readonly string[] CorsAllowedHosts = { "*" };
2830

2931
private readonly SearchIndexClient _client;
@@ -39,11 +41,12 @@ public async Task CreateIndexIfRequiredAsync(CancellationToken cancellationToken
3941
{
4042
var index = new SearchIndex(_indexName);
4143
index.Fields = BuildFields();
42-
index.Analyzers.Add(BuildAnalyzer(StandardAnalyzer, LexicalTokenizerName.Standard, TokenFilterName.Lowercase));
43-
index.Analyzers.Add(BuildAnalyzer(PrefixAnalyzer, LexicalTokenizerName.Standard, TokenFilterName.Lowercase, EdgeNGramTokenFilter));
44-
index.Analyzers.Add(BuildAnalyzer(SuffixAnalyzer, LexicalTokenizerName.Standard, TokenFilterName.Lowercase, TokenFilterName.Reverse, EdgeNGramTokenFilter));
45-
index.Analyzers.Add(BuildAnalyzer(ReverseAnalyzer, LexicalTokenizerName.Standard, TokenFilterName.Lowercase, TokenFilterName.Reverse));
46-
index.Analyzers.Add(BuildAnalyzer(UrlAnalyzer, LexicalTokenizerName.UaxUrlEmail, TokenFilterName.Lowercase));
44+
index.Analyzers.Add(BuildAnalyzer(StandardAnalyzer, LexicalTokenizerName.Standard, null, TokenFilterName.Lowercase));
45+
index.Analyzers.Add(BuildAnalyzer(PrefixAnalyzer, LexicalTokenizerName.Standard, DotReplacementCharFilter, TokenFilterName.Lowercase, EdgeNGramTokenFilter));
46+
index.Analyzers.Add(BuildAnalyzer(SuffixAnalyzer, LexicalTokenizerName.Standard, DotReplacementCharFilter, TokenFilterName.Lowercase, TokenFilterName.Reverse, EdgeNGramTokenFilter));
47+
index.Analyzers.Add(BuildAnalyzer(ReverseAnalyzer, LexicalTokenizerName.Standard, DotReplacementCharFilter, TokenFilterName.Lowercase, TokenFilterName.Reverse));
48+
index.Analyzers.Add(BuildAnalyzer(UrlAnalyzer, LexicalTokenizerName.UaxUrlEmail, null, TokenFilterName.Lowercase));
49+
index.CharFilters.Add(BuildCharFilter());
4750
index.TokenFilters.Add(BuildTokenFilter());
4851
index.ScoringProfiles.Add(BuildScoringProfile());
4952
index.DefaultScoringProfile = ScoringProfile;
@@ -57,14 +60,25 @@ private IList<SearchField> BuildFields()
5760
return new FieldBuilder().Build(typeof(ManifestInfo));
5861
}
5962

60-
private CustomAnalyzer BuildAnalyzer(string name, LexicalTokenizerName tokenizer, params TokenFilterName[] filters)
63+
private CustomAnalyzer BuildAnalyzer(string name, LexicalTokenizerName tokenizer, string? charFilter, params TokenFilterName[] filters)
6164
{
6265
var analyzer = new CustomAnalyzer(name, tokenizer);
66+
67+
if (charFilter != null)
68+
{
69+
analyzer.CharFilters.Add(charFilter);
70+
}
71+
6372
filters.ForEach(_ => analyzer.TokenFilters.Add(_));
6473

6574
return analyzer;
6675
}
6776

77+
private CharFilter BuildCharFilter()
78+
{
79+
return new PatternReplaceCharFilter(DotReplacementCharFilter, "\\.", " ");
80+
}
81+
6882
private TokenFilter BuildTokenFilter()
6983
{
7084
return new EdgeNGramTokenFilter(EdgeNGramTokenFilter)

0 commit comments

Comments
 (0)