-
Notifications
You must be signed in to change notification settings - Fork 25
/
profile_queries.cpp
125 lines (102 loc) · 3.96 KB
/
profile_queries.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#include <iostream>
#include <thread>
#include <succinct/mapper.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include "index_types.hpp"
#include "wand_data.hpp"
#include "queries.hpp"
#include "util.hpp"
template <typename QueryOperator, typename IndexType>
void op_profile(IndexType const& index,
QueryOperator const& query_op,
std::vector<ds2i::term_id_vec> const& queries)
{
using namespace ds2i;
size_t n_threads = std::thread::hardware_concurrency();
std::vector<std::thread> threads(n_threads);
std::mutex io_mutex;
for (size_t tid = 0; tid < n_threads; ++tid) {
threads[tid] = std::thread([&, tid]() {
auto query_op_copy = query_op; // copy one query_op per thread
for (size_t i = tid; i < queries.size(); i += n_threads) {
if (i % 10000 == 0) {
std::lock_guard<std::mutex> lock(io_mutex);
logger() << i << " queries processed" << std::endl;
}
query_op_copy(index, queries[i]);
}
});
}
for (auto& thread: threads) thread.join();
}
template <typename IndexType>
struct add_profiling { typedef IndexType type; };
template <typename BlockType>
struct add_profiling<ds2i::block_freq_index<BlockType, false>> {
typedef ds2i::block_freq_index<BlockType, true> type;
};
template <typename IndexType>
void profile(const char* index_filename,
const char* wand_data_filename,
std::vector<ds2i::term_id_vec> const& queries,
std::string const& type,
std::string const& query_type)
{
using namespace ds2i;
typename add_profiling<IndexType>::type index;
logger() << "Loading index from " << index_filename << std::endl;
boost::iostreams::mapped_file_source m(index_filename);
succinct::mapper::map(index, m);
wand_data<> wdata;
boost::iostreams::mapped_file_source md;
if (wand_data_filename) {
md.open(wand_data_filename);
succinct::mapper::map(wdata, md, succinct::mapper::map_flags::warmup);
}
logger() << "Performing " << type << " queries" << std::endl;
std::vector<std::string> query_types;
boost::algorithm::split(query_types, query_type, boost::is_any_of(":"));
for (auto const& t: query_types) {
logger() << "Query type: " << t << std::endl;
if (t == "and") {
op_profile(index, and_query<false>(), queries);
} else if (t == "ranked_and" && wand_data_filename) {
op_profile(index, ranked_and_query(wdata, 10), queries);
} else if (t == "wand" && wand_data_filename) {
op_profile(index, wand_query(wdata, 10), queries);
} else if (t == "maxscore" && wand_data_filename) {
op_profile(index, maxscore_query(wdata, 10), queries);
} else {
logger() << "Unsupported query type: " << t << std::endl;
}
}
block_profiler::dump(std::cout);
}
int main(int argc, const char** argv)
{
using namespace ds2i;
std::string type = argv[1];
const char* query_type = argv[2];
const char* index_filename = argv[3];
const char* wand_data_filename = nullptr;
if (argc > 4) {
wand_data_filename = argv[4];
}
std::vector<term_id_vec> queries;
term_id_vec q;
while (read_query(q)) queries.push_back(q);
if (false) {
#define LOOP_BODY(R, DATA, T) \
} else if (type == BOOST_PP_STRINGIZE(T)) { \
profile<BOOST_PP_CAT(T, _index)> \
(index_filename, wand_data_filename, queries, \
type, query_type); \
/**/
BOOST_PP_SEQ_FOR_EACH(LOOP_BODY, _, DS2I_INDEX_TYPES);
#undef LOOP_BODY
} else {
logger() << "ERROR: Unknown type " << type << std::endl;
}
}