Skip to content

Commit

Permalink
Merge pull request #44 from atom-ide-community/performance
Browse files Browse the repository at this point in the history
  • Loading branch information
aminya authored Nov 15, 2020
2 parents a2bb051 + 513e9d3 commit a039343
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 106 deletions.
13 changes: 9 additions & 4 deletions binding.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
'configurations': {
# Release Settings
'Release': {
'defines': [ 'NAPI_DISABLE_CPP_EXCEPTIONS' ],
"cflags": [ "-fno-exceptions", "-O3" ],
"cflags_cc": [ "-fno-exceptions", "-O3", "-std=c++2a" ],
'defines': [ 'NAPI_DISABLE_CPP_EXCEPTIONS', 'NDEBUG' ],
"cflags": [ "-fno-exceptions", "-Ofast" ],
"cflags_cc": [ "-fno-exceptions", "-Ofast", "-std=c++2a" ],
"xcode_settings": {
'GCC_OPTIMIZATION_LEVEL': '3', # stop gyp from defaulting to -Os
"CLANG_CXX_LIBRARY": "libc++",
Expand All @@ -28,6 +28,11 @@
"AdditionalOptions": [
# C++ standard
"/std:c++latest",
"/O2", # optimizations
"/Ob3", # agressive inline
"/Oi", # intrinsic functions
"/Ot", # favor speed
"/DNDEBUG" # turn off asserts
],
'EnableFunctionLevelLinking': 'true',
'EnableIntrinsicFunctions': 'true',
Expand All @@ -41,7 +46,7 @@
},
# Debug Settings
'Debug': {
'defines': [ 'DEBUG', 'NAPI_CPP_EXCEPTIONS', 'ENABLE_DEBUG' ],
'defines': [ 'DEBUG', 'NAPI_CPP_EXCEPTIONS' ],
'cflags': [ '-g', '-O0' ],
"cflags_cc": [
'-fexceptions', # enable exceptions
Expand Down
6 changes: 4 additions & 2 deletions src/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <cstring>
#include <iostream>
#include <utility>
#include <cassert>

#include <napi.h>

Expand All @@ -17,6 +18,7 @@ using namespace std;
constexpr size_t kMaxThreads = 16;

#ifdef ENABLE_DEBUG
// TODO does not work anymore because we added explicit to constructors
// Safe string class that logs error when index is accessed outside the string.
class SafeString : public std::string {
public:
Expand Down Expand Up @@ -75,9 +77,9 @@ struct Options {
struct AcronymResult {
Score score;
float pos;
int count;
size_t count;

explicit AcronymResult(Score s, float p, int c) noexcept : score(s), pos(p), count(c) {}
explicit AcronymResult(Score s, float p, size_t c) noexcept : score(s), pos(p), count(c) {}
};

extern Element ToLower(const Element &s);
Expand Down
45 changes: 25 additions & 20 deletions src/filter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ void filter_internal(const std::vector<CandidateString> &candidates,
const Options &options,
size_t max_results,
CandidateScorePriorityQueue &results) {
const auto scoreProvider = options.usePathScoring ? path_scorer_score : scorer_score;
for (size_t i = 0, len = candidates.size(); i < len; i++) {
const auto &candidate = candidates[i];
if (candidate.empty()) {
continue;
}
const auto scoreProvider = options.usePathScoring ? path_scorer_score : scorer_score;
auto score = scoreProvider(candidate, query, options);
if (score > 0) {
results.emplace(score, start_index + i);
Expand All @@ -40,20 +40,14 @@ void filter_internal(const std::vector<CandidateString> &candidates,
}
}

void thread_worker_filter(const std::vector<CandidateString> &candidates,
size_t start_index,
const Element &query,
const Options &options,
size_t max_results,
CandidateScorePriorityQueue &results) {
filter_internal(candidates, start_index, query, options, max_results, results);
}

std::vector<CandidateIndex> sort_priority_queue(CandidateScorePriorityQueue &&candidates) {
vector<CandidateScore> sorted;
std::vector<CandidateIndex> ret;
sorted.reserve(candidates.size());
ret.reserve(candidates.size());

const auto initial_candidates_size = candidates.size();
sorted.reserve(initial_candidates_size);
ret.reserve(initial_candidates_size);

while (!candidates.empty()) {
sorted.emplace_back(candidates.top());
candidates.pop();
Expand All @@ -66,6 +60,10 @@ std::vector<CandidateIndex> sort_priority_queue(CandidateScorePriorityQueue &&ca
}

std::vector<CandidateIndex> filter(const vector<std::vector<CandidateString>> &candidates, const Element &query, const Options &options) {
const auto candidates_size = candidates.size();

assert(1 <= candidates_size);// TODO handled outside

CandidateScorePriorityQueue top_k;
auto max_results = options.max_results;
if (max_results == 0u) {
Expand All @@ -74,17 +72,24 @@ std::vector<CandidateIndex> filter(const vector<std::vector<CandidateString>> &c

// Split the dataset and pass down to multiple threads.
vector<thread> threads;
vector<CandidateScorePriorityQueue> results(candidates.size());
threads.reserve(candidates.size());

auto results = vector<CandidateScorePriorityQueue>(candidates.size());

size_t start_index = 0;
for (size_t i = 1, len = candidates.size(); i < len; i++) {
start_index += candidates[i - 1].size();
threads.emplace_back(thread_worker_filter, ref(candidates[i]), start_index, ref(query), ref(options), max_results, ref(results[i]));
for (size_t i = 1; i < candidates_size; i++) {
assert(1 < i && i < candidates.size() && i < results.size());
start_index += candidates[i - 1].size();//inbounds
threads.emplace_back(filter_internal, ref(candidates[i]), start_index, ref(query), ref(options), max_results, ref(results[i]));// inbounds
}

assert(threads.size() == candidates.size() && results.size() == candidates.size());

// Do the work for first thread.
filter_internal(candidates[0], 0, query, options, max_results, top_k);
// Wait for threads to complete and merge the restuls.
for (size_t i = 1, len = candidates.size(); i < len; i++) {
threads[i - 1].join();
filter_internal(candidates[0], 0, query, options, max_results, top_k);//inbounds (candidate_size >= 1)
// Wait for threads to complete and merge the results.
for (size_t i = 1; i < candidates_size; i++) {
threads[i - 1].join();//inbounds
while (!results[i].empty()) {
top_k.emplace(results[i].top());
results[i].pop();
Expand Down
38 changes: 19 additions & 19 deletions src/matcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@ std::vector<size_t> computeMatch(const CandidateString &subject, const Candidate
const auto &query_lw = preparedQuery.query_lw;

// TODO making these two auto breaks the code. There are a lot of narrowing conversions in this file
const int m = subject.size();
const int n = query.size();
const int subject_size = subject.size();
const int query_size = query.size();

// this is like the consecutive bonus, but for camelCase / snake_case initials
const auto acro = scoreAcronyms(subject, subject_lw, query, query_lw);
const auto acro_score = acro.score;

// Init
vector<Score> score_row(n, 0);
vector<Score> csc_row(n, 0);
vector<Score> score_row(query_size, 0);
vector<Score> csc_row(query_size, 0);

// Directions constants
enum class Direction {
Expand All @@ -40,18 +40,18 @@ std::vector<size_t> computeMatch(const CandidateString &subject, const Candidate
};

// Traceback matrix
std::vector<Direction> trace(m * n, Direction::STOP);
std::vector<Direction> trace(subject_size * query_size, Direction::STOP);
auto pos = -1;

auto i = -1;
while (++i < m) {//foreach char is of subject
while (++i < subject_size) {//foreach char is of subject
Score score = 0;
Score score_up = 0;
Score csc_diag = 0;
const auto si_lw = subject_lw[i];

auto j = -1;//0..n-1
while (++j < n) {//foreach char qj of query
while (++j < query_size) {//foreach char qj of query
// reset score
Score csc_score = 0;
Score align = 0;
Expand Down Expand Up @@ -100,17 +100,17 @@ std::vector<size_t> computeMatch(const CandidateString &subject, const Candidate
// Go back in the trace matrix
// and collect matches (diagonals)

i = m - 1;
auto j = n - 1;
pos = i * n + j;
i = subject_size - 1;
auto j = query_size - 1;
pos = i * query_size + j;
auto backtrack = true;
std::vector<size_t> matches;

while (backtrack && i >= 0 && j >= 0) {
switch (trace[pos]) {
case Direction::UP:
i--;
pos -= n;
pos -= query_size;
break;
case Direction::LEFT:
j--;
Expand All @@ -120,7 +120,7 @@ std::vector<size_t> computeMatch(const CandidateString &subject, const Candidate
matches.emplace_back(i + offset);
j--;
i--;
pos -= n + 1;
pos -= query_size + 1;
break;
default:
backtrack = false;
Expand Down Expand Up @@ -172,13 +172,13 @@ std::vector<size_t> basenameMatch(const CandidateString &subject, const Candidat
// (Assume sequences are sorted, matches are sorted by construction.)
//
std::vector<size_t> mergeMatches(const std::vector<size_t> &a, const std::vector<size_t> &b) {
const auto m = a.size();
const auto n = b.size();
const auto a_size = a.size();
const auto b_size = b.size();

if (n == 0) {
if (b_size == 0) {
return a;
}
if (m == 0) {
if (a_size == 0) {
return b;
}

Expand All @@ -187,18 +187,18 @@ std::vector<size_t> mergeMatches(const std::vector<size_t> &a, const std::vector
auto bj = b[j];
std::vector<size_t> out;

while (++i < m) {
while (++i < a_size) {
auto ai = a[i];

while (bj <= ai && ++j < n) {
while (bj <= ai && ++j < b_size) {
if (bj < ai) {
out.emplace_back(bj);
}
bj = b[j];
}
out.emplace_back(ai);
}
while (j < n) {
while (j < b_size) {
out.emplace_back(b[j++]);
}
return out;
Expand Down
23 changes: 13 additions & 10 deletions src/path_scorer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ Score scorePath(const CandidateString &subject, const CandidateString &subject_l
// {preparedQuery, useExtensionBonus, pathSeparator} = options

// Skip trailing slashes
auto end = subject.size() - 1;
int end = subject.size() - 1;
while (subject[end] == options.pathSeparator) {
end--;
}
Expand Down Expand Up @@ -110,17 +110,19 @@ int countDir(const CandidateString &path, const size_t end, const char pathSepar

//skip slash at the start so `foo/bar` and `/foo/bar` have the same depth.
while ((i < end) && (path[i] == pathSeparator)) {//inbounds
// assert(i>=0); fuzz: if end==0, it does not enter while and i==0
assert(0 <= i);// fuzz: if end==0, it does not enter while and i==0
++i;
}
assert(0 <= i);

while (++i < end) {
// assert(i>=0); fuzz: if end==0, it does not enter while and i==0
assert(0 <= i && i < path.size());// fuzz: if end==0, it does not enter while and i==0
if (path[i] == pathSeparator) {//inbounds
count++;//record first slash, but then skip consecutive ones
while ((++i < end) && (path[i] == pathSeparator)) {}
}
}
assert(0 <= i && i < path.size());

return count;
}
Expand All @@ -146,31 +148,32 @@ Score getExtensionScore(const CandidateString &candidate, const CandidateString

// Check that (a) extension exist, (b) it is after the start of the basename
int pos = candidate.rfind('.', endPos);
// assert(pos >= 0u);
assert(pos >= 0u);
if (pos <= startPos) {
return 0;// (note that startPos >= -1)
}

int n = ext.size();
int ext_size = ext.size();
auto m = endPos - pos;

// n contain the smallest of both extension length, m the largest.
if (m < n) {
n = m;
if (m < ext_size) {
ext_size = m;
m = ext.size();
}

//place cursor after dot & count number of matching characters in extension
pos++;
// assert(pos >= 1u);
assert(pos >= 1u);
auto matched = 0;
while (matched < n) {
// assert(matched >=0); // fuzz: if n==0, does not enter while and matched==0
while (matched < ext_size) {
assert(matched >= 0);// fuzz: if n==0, does not enter while and matched==0
if (candidate[pos + matched] != ext[matched]) {// TODO candidate upper bound
break;
}
++matched;
}
assert(matched >= 0);

// if nothing found, try deeper for multiple extensions, with some penalty for depth
if (matched == 0u && maxDepth > 0) {
Expand Down
3 changes: 2 additions & 1 deletion src/query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ std::set<char> getCharCodes(const Element &str) {

// create map
while (i < len) {
// assert(i>=0); // fuzz: if len==0, does not enter while and i==0
assert(0 <= i && i < str.size());// fuzz: if len==0, does not enter while and i==0
charCodes.insert(str[i]);//inbounds
++i;
}
assert(0 <= i && i < str.size());
return charCodes;
}
Loading

0 comments on commit a039343

Please sign in to comment.