-
Notifications
You must be signed in to change notification settings - Fork 25
/
binary_freq_collection.hpp
103 lines (83 loc) · 2.58 KB
/
binary_freq_collection.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#pragma once
#include <stdexcept>
#include <iterator>
#include <stdint.h>
#include "binary_collection.hpp"
namespace ds2i {
class binary_freq_collection {
public:
binary_freq_collection(const char* basename)
: m_docs((std::string(basename) + ".docs").c_str())
, m_freqs((std::string(basename) + ".freqs").c_str())
{
auto firstseq = *m_docs.begin();
if (firstseq.size() != 1) {
throw std::invalid_argument("First sequence should only contain number of documents");
}
m_num_docs = *firstseq.begin();
}
class iterator;
iterator begin() const
{
auto docs_it = m_docs.begin();
return iterator(++docs_it, m_freqs.begin());
}
iterator end() const
{
return iterator(m_docs.end(), m_freqs.end());
}
uint64_t num_docs() const
{
return m_num_docs;
}
struct sequence {
binary_collection::sequence docs;
binary_collection::sequence freqs;
};
class iterator : public std::iterator<std::forward_iterator_tag,
sequence> {
public:
iterator()
{}
value_type const& operator*() const
{
return m_cur_seq;
}
value_type const* operator->() const
{
return &m_cur_seq;
}
iterator& operator++()
{
m_cur_seq.docs = *++m_docs_it;
m_cur_seq.freqs = *++m_freqs_it;
return *this;
}
bool operator==(iterator const& other) const
{
return m_docs_it == other.m_docs_it;
}
bool operator!=(iterator const& other) const
{
return !(*this == other);
}
private:
friend class binary_freq_collection;
iterator(binary_collection::iterator docs_it,
binary_collection::iterator freqs_it)
: m_docs_it(docs_it)
, m_freqs_it(freqs_it)
{
m_cur_seq.docs = *m_docs_it;
m_cur_seq.freqs = *m_freqs_it;
}
binary_collection::iterator m_docs_it;
binary_collection::iterator m_freqs_it;
sequence m_cur_seq;
};
private:
binary_collection m_docs;
binary_collection m_freqs;
uint64_t m_num_docs;
};
}