Skip to content

Commit 3bd2227

Browse files
committed
Fix off-by-one at EOF which put extra space on sf of final dot
We were getting ^. /.<sent>$ instead of ^./.<sent>$ since we're now using the value from readAnalysis even if it's 0, and it's 0 both at NUL bytes and EOF. Fixed by adding a NUL byte to input_buffer on EOF, so the rest of the code treats it the same. + test
1 parent ae8cb34 commit 3bd2227

File tree

3 files changed

+29
-1
lines changed

3 files changed

+29
-1
lines changed

lttoolbox/fst_processor.cc

+2-1
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,7 @@ FSTProcessor::readAnalysis(FILE *input)
340340
int altval = 0;
341341
if(feof(input))
342342
{
343+
input_buffer.add(0); // so it's treated like the NUL byte
343344
return 0;
344345
}
345346

@@ -1629,7 +1630,7 @@ FSTProcessor::analysis(FILE *input, FILE *output)
16291630

16301631
if(current_state.size() != 0)
16311632
{
1632-
if(val)
1633+
if(val != 0)
16331634
{
16341635
alphabet.getSymbol(sf, val);
16351636
}

tests/data/space-eof-incond.dix

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<dictionary>
3+
<alphabet/>
4+
<sdefs>
5+
<sdef n="adj"/>
6+
<sdef n="sent"/>
7+
</sdefs>
8+
<pardefs>
9+
<pardef n="somepardef">
10+
<e> <p><l>foo</l><r>bar</r></p></e>
11+
</pardef>
12+
</pardefs>
13+
14+
<section id="final" type="inconditional">
15+
<e> <i>.<b/>y</i> <p><l></l><r><s n="adj"/></r></p></e>
16+
<e> <i>.xx</i> <p><l></l><r><s n="adj"/></r></p></e>
17+
<e> <i>.</i> <p><l></l><r><s n="sent"/></r></p></e>
18+
</section>
19+
20+
</dictionary>

tests/lt_proc/__init__.py

+7
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,13 @@ class PostgenerationWordboundBlankNoRuleMatchTest(ProcTest):
212212
expectedOutputs = [ "[[t:span:HIIiRQ]]Complacer[[/]] [[t:span01:HIIiRQ]]le[[/]] [[t:span02:HIIiRQ]]ayuda[[/]] [[11t:span:HIIiRQ; t:a:_IOHRg]]mejora[[/]] [[22t:span:HIIiRQ; t:a:_IOHRg]]la[[/]] [[33t:span:HIIiRQ; t:a:_IOHRg]]prenda[[/]]"]
213213

214214

215+
class SpaceAtEOF(ProcTest):
216+
procdix = "data/space-eof-incond.dix"
217+
inputs = ['. ']
218+
expectedOutputs = ['^./.<sent>$ ']
219+
procflags = [] # type: List[str]
220+
flushing = False
221+
215222

216223
# These fail on some systems:
217224
#from null_flush_invalid_stream_format import *

0 commit comments

Comments
 (0)