Skip to content

Commit 9e01204

Browse files
committed
Cleaning up and improving textmsg reading code
A lot more efficient than before, but still nowhere as good as it can be.
1 parent b8b46af commit 9e01204

File tree

12 files changed

+1734
-1024
lines changed

12 files changed

+1734
-1024
lines changed

files/endianio.h

Lines changed: 317 additions & 326 deletions
Large diffs are not rendered by default.

files/msgfile.cc

Lines changed: 146 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,14 @@
2727
#include "databuf.h"
2828
#include "ios_state.hpp"
2929

30+
#include <algorithm>
31+
#include <charconv>
3032
#include <cstdlib>
3133
#include <iostream>
34+
#include <limits>
3235
#include <sstream>
3336
#include <string>
37+
#include <string_view>
3438
#include <vector>
3539

3640
using std::cerr;
@@ -42,6 +46,17 @@ using std::string;
4246
using std::stringstream;
4347
using std::vector;
4448

49+
Text_msg_file_reader::Text_msg_file_reader() : global_first(0) {}
50+
51+
Text_msg_file_reader::Text_msg_file_reader(IDataSource& in) : global_first(0) {
52+
in.read(contents, in.getAvail());
53+
if (!parse_contents()) {
54+
cerr << "Error parsing text message file" << endl;
55+
global_section.clear();
56+
items.clear();
57+
}
58+
}
59+
4560
/*
4661
* Read in text, where each line is of the form "nnn:sssss", where nnn is
4762
* to be the Flex entry #, and anything after the ':' is the string to
@@ -54,143 +69,170 @@ using std::vector;
5469
* %%section shapes
5570
* ....
5671
* %%endsection
57-
* Output: # of first message (i.e., lowest-numbered msg), or -1 if
58-
* error.
72+
* Output: true if successful, false if not.
5973
*/
6074

61-
int Read_text_msg_file(
62-
IDataSource* in, vector<string>& strings, const char* section) {
63-
strings.resize(0); // Initialize.
64-
strings.reserve(1000);
65-
int linenum = 0;
66-
#define NONEFOUND 0xffffffff
67-
unsigned long first = NONEFOUND; // Index of first one found.
68-
long next_index = 0; // For auto-indexing of lines
69-
static const string sectionStart("%%section");
70-
static const string sectionEnd("%%endsection");
71-
while (!in->eof()) {
75+
bool Text_msg_file_reader::parse_contents() {
76+
constexpr static const auto NONEFOUND = std::numeric_limits<uint32>::max();
77+
constexpr static const std::string_view sectionStart("%%section");
78+
constexpr static const std::string_view sectionEnd("%%endsection");
79+
80+
Section_data* current_section = &global_section;
81+
uint32* current_first = &global_first;
82+
*current_first = NONEFOUND;
83+
84+
current_section->reserve(1000);
85+
86+
int linenum = 0;
87+
uint32 next_index = 0; // For auto-indexing of lines
88+
89+
enum class State : uint8 {
90+
None,
91+
InSection
92+
};
93+
std::string_view data(contents);
94+
State state = State::None;
95+
while (!data.empty()) {
7296
++linenum;
73-
std::string line;
74-
in->readline(line);
97+
const auto lineEnd = data.find_first_of("\r\n");
98+
auto line = data.substr(0, lineEnd);
99+
// Skip data up to the start of the next line.
100+
if (lineEnd == std::string_view::npos) {
101+
data.remove_prefix(data.size());
102+
} else {
103+
data.remove_prefix(line.size());
104+
}
105+
const auto nextLine = data.find_first_not_of("\r\n");
106+
if (nextLine != std::string_view::npos) {
107+
data.remove_prefix(nextLine);
108+
} else {
109+
data.remove_prefix(data.size());
110+
}
111+
// Ignore leading whitespace.
112+
auto nonWs = line.find_first_not_of(" \t\b");
113+
line.remove_prefix(nonWs);
75114
if (line.empty()) {
76115
continue; // Empty line.
77116
}
78117

79-
if (section) {
80-
if (line.compare(0, sectionStart.length(), sectionStart)) {
81-
continue;
118+
if (line.compare(0, sectionStart.length(), sectionStart) == 0) {
119+
if (state == State::InSection) {
120+
cerr << "Line " << linenum
121+
<< " has a section starting inside another section"
122+
<< endl;
82123
}
83-
const string sectionName(line.substr(
84-
line.find_first_not_of(" \t\b", sectionStart.length())));
85-
if (sectionName == section) {
86-
// Found the section.
87-
section = nullptr;
88-
continue;
124+
const auto namePos
125+
= line.find_first_not_of(" \t\b", sectionStart.length());
126+
line.remove_prefix(namePos);
127+
auto sectionName(line);
128+
if (sectionName.empty()) {
129+
cerr << "Line " << linenum << " has an empty section name"
130+
<< endl;
131+
return false;
132+
}
133+
{
134+
auto [iter, inserted] = items.try_emplace(sectionName);
135+
if (!inserted) {
136+
cerr << "Line " << linenum
137+
<< " has a duplicate section name: " << sectionName
138+
<< endl;
139+
return false;
140+
}
141+
current_section = &iter->second;
142+
current_section->reserve(1000);
143+
}
144+
{
145+
auto [iter, inserted]
146+
= firsts.try_emplace(sectionName, NONEFOUND);
147+
if (!inserted) {
148+
cerr << "Line " << linenum
149+
<< " has a duplicate section name: " << sectionName
150+
<< endl;
151+
return false;
152+
}
153+
current_first = &iter->second;
89154
}
90-
cerr << "Line #" << linenum
91-
<< " has the wrong section name: " << sectionName
92-
<< " != " << section << endl;
93-
return -1;
155+
state = State::InSection;
156+
continue;
94157
}
95-
if (!line.compare(0, sectionEnd.length(), sectionEnd)) {
96-
break;
158+
159+
if (line.compare(0, sectionEnd.length(), sectionEnd) == 0) {
160+
if (state != State::InSection) {
161+
cerr << "Line " << linenum
162+
<< " has an endsection without a section" << endl;
163+
}
164+
// Reset to sane defaults.
165+
state = State::None;
166+
current_section = &global_section;
167+
current_first = &global_first;
168+
continue;
97169
}
98170

99-
unsigned long index;
100-
string lineVal;
171+
uint32 index;
172+
std::string_view lineVal;
101173
if (line[0] == ':') {
102174
// Auto-index lines missing an index.
103175
index = next_index++;
104176
lineVal = line.substr(1);
105177
} else if (line[0] == '#') {
106178
continue;
107179
} else {
108-
char* endptr = &line[0];
109-
const char* ptr = endptr;
110180
// Get line# in decimal, hex, or oct.
111-
index = strtol(ptr, &endptr, 0);
112-
if (endptr == ptr) { // No #?
113-
cerr << "Line " << linenum << " doesn't start with a number"
114-
<< endl;
115-
return -1;
116-
}
117-
if (*endptr != ':') {
181+
auto colon = line.find(':');
182+
if (colon == std::string_view::npos) {
118183
cerr << "Missing ':' in line " << linenum << ". Ignoring line"
119184
<< endl;
120185
continue;
121186
}
122-
lineVal = line.substr(endptr - ptr + 1);
123-
}
124-
if (index >= strings.size()) {
125-
strings.resize(index + 1);
187+
int base = 10;
188+
if (line.size() > 2 && line[0] == '0'
189+
&& (line[1] == 'x' || line[1] == 'X')) {
190+
base = 16;
191+
colon -= 2;
192+
line.remove_prefix(2);
193+
} else if (line[0] == '0') {
194+
base = 8;
195+
}
196+
const auto* start = line.data();
197+
const auto* end = std::next(start, colon);
198+
auto [p, ec] = std::from_chars(start, end, index, base);
199+
if (ec != std::errc() || p != end) {
200+
cerr << "Line " << linenum << " doesn't start with a number"
201+
<< endl;
202+
return false;
203+
}
204+
lineVal = line.substr(colon + 1);
126205
}
127-
strings[index] = std::move(lineVal);
128-
if (index < first) {
129-
first = index;
206+
if (index >= current_section->size()) {
207+
current_section->resize(index + 1);
130208
}
209+
(*current_section)[index] = lineVal;
210+
*current_first = std::min(index, *current_first);
131211
}
132-
return first == NONEFOUND ? -1 : static_cast<int>(first);
212+
return true;
133213
}
134214

135-
/*
136-
* Searches for the start of section in a text msg file.
137-
* Returns true if section is found. The data source will
138-
* be just before the section start.
139-
*/
140-
141-
bool Search_text_msg_section(IDataSource* in, const char* section) {
142-
static const string sectionStart("%%section");
143-
while (!in->eof()) {
144-
std::string line;
145-
const size_t pos = in->getPos();
146-
in->readline(line);
147-
if (line.empty()) {
148-
continue; // Empty line.
149-
}
150-
151-
if (line.compare(0, sectionStart.length(), sectionStart)) {
152-
continue;
153-
}
154-
const string sectionName(line.substr(
155-
line.find_first_not_of(" \t\b", sectionStart.length())));
156-
if (sectionName == section) {
157-
// Found the section.
158-
// Seek to just before it.
159-
in->seek(pos);
160-
return true;
161-
}
215+
[[nodiscard]] std::optional<int> Text_msg_file_reader::get_version() const {
216+
constexpr static const std::string_view versionstr("version");
217+
int firstMsg;
218+
const auto* data = get_section(versionstr, firstMsg);
219+
if (data == nullptr) {
220+
cerr << "No version number in text message file" << endl;
221+
return std::nullopt;
162222
}
163-
in->clear_error();
164-
in->seek(0);
165-
// Section was not found.
166-
return false;
167-
}
168-
169-
int Read_text_msg_file_sections(
170-
IDataSource* in,
171-
vector<vector<string>>& strings, // Strings returned here
172-
const char* sections[], // Section names
173-
int numsections) {
174-
strings.resize(numsections);
175-
int version = 1;
176-
177-
vector<string> versioninfo;
178-
// Read version.
179-
const char* versionstr = "version";
180-
if (Search_text_msg_section(in, versionstr)
181-
&& Read_text_msg_file(in, versioninfo, versionstr) != -1) {
182-
version = static_cast<int>(strtol(versioninfo[0].c_str(), nullptr, 0));
223+
if (data->size() != 1) {
224+
cerr << "Invalid version number in text message file" << endl;
225+
return std::nullopt;
183226
}
184227

185-
for (int i = 0; i < numsections; i++) {
186-
in->clear_error();
187-
in->seek(0);
188-
if (!Search_text_msg_section(in, sections[i])) {
189-
continue;
190-
}
191-
Read_text_msg_file(in, strings[i], sections[i]);
228+
int version;
229+
auto versionStr = (*data)[0];
230+
const auto* start = versionStr.data();
231+
const auto* end = std::next(start, versionStr.size());
232+
if (std::from_chars(start, end, version).ec != std::errc()) {
233+
cerr << "Invalid version number in text message file" << endl;
234+
return std::nullopt;
192235
}
193-
194236
return version;
195237
}
196238

0 commit comments

Comments
 (0)