diff --git a/src/df/core/df_core.h b/src/df/core/df_core.h index 2a6e3e57d..403d3d853 100644 --- a/src/df/core/df_core.h +++ b/src/df/core/df_core.h @@ -88,6 +88,13 @@ struct DF_FuzzyMatchRangeList U64 count; }; +typedef struct DF_ScoredFuzzyMatchRangeList DF_ScoredFuzzyMatchRangeList; +struct DF_ScoredFuzzyMatchRangeList +{ + DF_FuzzyMatchRangeList list; + S32 score; +}; + //////////////////////////////// //~ rjf: Control Context Types diff --git a/src/df/gfx/df_gfx.c b/src/df/gfx/df_gfx.c index 0878b83ea..aa98ef723 100644 --- a/src/df/gfx/df_gfx.c +++ b/src/df/gfx/df_gfx.c @@ -102,6 +102,84 @@ df_fuzzy_match_find(Arena *arena, String8List needles, String8 haystack) return result; } +internal DF_ScoredFuzzyMatchRangeList +df_scored_fuzzy_match_find(Arena *arena, String8List needles, String8 haystack) +{ + Temp scratch = scratch_begin(0, 0); + // We're going to implement a very simple scoring mechanism similar to that described in + // https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/. +#define df_scored_unmatched -1 +#define df_scored_consecutive 5 +#define df_scored_unmatched_leading -3 + DF_ScoredFuzzyMatchRangeList invalid = {0}; + DF_ScoredFuzzyMatchRangeList result = {0}; + // Simplify to a single needle. + String8 needle = str8_list_join(scratch.arena, &needles, 0); + if (needle.size == 0) + { + scratch_end(scratch); + return invalid; + } + String8 tmp_str = str8(needle.str, 1); + U64 find_pos = 0; + find_pos = str8_find_needle(haystack, find_pos, tmp_str, StringMatchFlag_CaseInsensitive); + if (find_pos >= haystack.size) + { + scratch_end(scratch); + return invalid; + } + // Leading character penalty. + // Only go to a max of 3 based on the article. + result.score += Min(find_pos, 3) * df_scored_unmatched_leading; + // We also want to deduct for additional unmatched characters between start and find_pos. + if (find_pos > 3) + { + result.score += (find_pos - 3) * df_scored_unmatched; + } + Rng1U64 range = r1u64(find_pos, find_pos + 1); + DF_FuzzyMatchRangeNode *n = push_array(arena, DF_FuzzyMatchRangeNode, 1); + n->range = range; + SLLQueuePush(result.list.first, result.list.last, n); + result.list.count += 1; + // Match the rest. + U64 prev_found = find_pos; + U64 search_start = 0; + find_pos += 1; + for (U64 idx = 1; idx < needle.size; ++idx) + { + tmp_str = str8(needle.str + idx, 1); + search_start = find_pos; + find_pos = str8_find_needle(haystack, find_pos, tmp_str, StringMatchFlag_CaseInsensitive); + if (find_pos >= haystack.size) + { + scratch_end(scratch); + return invalid; + } + // Compute consecutive bonus. + if (prev_found + 1 == find_pos) + { + result.score += df_scored_consecutive; + // We can reuse the existing node and simply extend it. + result.list.last->range.max = find_pos + 1; + } + else + { + result.score += (find_pos - search_start) * df_scored_unmatched; + Rng1U64 range = r1u64(find_pos, find_pos + 1); + DF_FuzzyMatchRangeNode *n = push_array(arena, DF_FuzzyMatchRangeNode, 1); + n->range = range; + SLLQueuePush(result.list.first, result.list.last, n); + result.list.count += 1; + } + prev_found = find_pos; + find_pos += 1; + } + // Compute final unmatched characters. + result.score += (haystack.size - find_pos) * df_scored_unmatched; + scratch_end(scratch); + return result; +} + //////////////////////////////// //~ rjf: View Type Functions diff --git a/src/df/gfx/df_gfx.h b/src/df/gfx/df_gfx.h index 7d8039cf8..60060f0ed 100644 --- a/src/df/gfx/df_gfx.h +++ b/src/df/gfx/df_gfx.h @@ -805,6 +805,7 @@ global DF_DragDropPayload df_g_drag_drop_payload = {0}; internal DF_PathQuery df_path_query_from_string(String8 string); internal DF_FuzzyMatchRangeList df_fuzzy_match_find(Arena *arena, String8List needles, String8 haystack); +internal DF_ScoredFuzzyMatchRangeList df_scored_fuzzy_match_find(Arena *arena, String8List needles, String8 haystack); //////////////////////////////// //~ rjf: View Type Functions diff --git a/src/df/gfx/df_views.c b/src/df/gfx/df_views.c index dd48d56d2..f3218e0bd 100644 --- a/src/df/gfx/df_views.c +++ b/src/df/gfx/df_views.c @@ -26,16 +26,15 @@ df_qsort_compare_file_info__default(DF_FileInfo *a, DF_FileInfo *b) internal int df_qsort_compare_file_info__default_filtered(DF_FileInfo *a, DF_FileInfo *b) { - int result = 0; - if(a->filename.size < b->filename.size) + if (a->match_ranges.score > b->match_ranges.score) { - result = -1; + return -1; } - else if(a->filename.size > b->filename.size) + if (a->match_ranges.score < b->match_ranges.score) { - result = +1; + return 1; } - return result; + return 0; } internal int @@ -2119,8 +2118,8 @@ DF_VIEW_UI_FUNCTION_DEF(FileSystem) OS_FileIter *it = os_file_iter_begin(scratch.arena, path_query.path, 0); for(OS_FileInfo info = {0}; os_file_iter_next(scratch.arena, it, &info);) { - DF_FuzzyMatchRangeList match_ranges = df_fuzzy_match_find(fs->cached_files_arena, search_needles, info.name); - B32 fits_search = (search_needles.node_count == 0 || match_ranges.count == search_needles.node_count); + DF_ScoredFuzzyMatchRangeList match_ranges = df_scored_fuzzy_match_find(fs->cached_files_arena, search_needles, info.name); + B32 fits_search = (search_needles.node_count == 0 || match_ranges.list.count != 0); B32 fits_dir_only = !!(info.props.flags & FilePropertyFlag_IsFolder) || !dir_selection; if(fits_search && fits_dir_only) { @@ -2419,7 +2418,7 @@ DF_VIEW_UI_FUNCTION_DEF(FileSystem) UI_PrefWidth(ui_pct(1, 0)) { UI_Box *box = ui_build_box_from_stringf(UI_BoxFlag_DrawText, "%S##%p", file->filename, view); - df_box_equip_fuzzy_match_range_list_vis(box, file->match_ranges); + df_box_equip_fuzzy_match_range_list_vis(box, file->match_ranges.list); } } diff --git a/src/df/gfx/df_views.h b/src/df/gfx/df_views.h index 1517afe77..d8cbe2b20 100644 --- a/src/df/gfx/df_views.h +++ b/src/df/gfx/df_views.h @@ -22,7 +22,7 @@ struct DF_FileInfo { String8 filename; FileProperties props; - DF_FuzzyMatchRangeList match_ranges; + DF_ScoredFuzzyMatchRangeList match_ranges; }; typedef struct DF_FileInfoNode DF_FileInfoNode;