diff --git a/lttoolbox/fst_processor.cc b/lttoolbox/fst_processor.cc index a370474e..846ffd1b 100644 --- a/lttoolbox/fst_processor.cc +++ b/lttoolbox/fst_processor.cc @@ -383,7 +383,7 @@ FSTProcessor::readTMAnalysis(InputFile& input) return val; } -int +int32_t FSTProcessor::readPostgeneration(InputFile& input, UFILE *output) { if(!input_buffer.isEmpty()) @@ -903,7 +903,7 @@ FSTProcessor::lastBlank(UString const &str) } void -FSTProcessor::printSpace(UChar const val, UFILE *output) +FSTProcessor::printSpace(UChar32 const val, UFILE *output) { if(blankqueue.size() > 0) { @@ -1803,7 +1803,7 @@ FSTProcessor::postgeneration(InputFile& input, UFILE *output) int last = 0; set empty_escaped_chars; - while(UChar val = readPostgeneration(input, output)) + while(UChar32 val = readPostgeneration(input, output)) { if(val == '~') { @@ -2027,7 +2027,7 @@ FSTProcessor::intergeneration(InputFile& input, UFILE *output) while (true) { - UChar val = readPostgeneration(input, output); + UChar32 val = readPostgeneration(input, output); if (val == '~') { @@ -2165,7 +2165,7 @@ FSTProcessor::transliteration(InputFile& input, UFILE *output) UString sf; int last = 0; - while(UChar val = readPostgeneration(input, output)) + while(UChar32 val = readPostgeneration(input, output)) { if(u_ispunct(val) || u_isspace(val)) { diff --git a/lttoolbox/fst_processor.h b/lttoolbox/fst_processor.h index 04cc68a0..7cde42e1 100644 --- a/lttoolbox/fst_processor.h +++ b/lttoolbox/fst_processor.h @@ -452,7 +452,7 @@ class FSTProcessor * @param val the space character to use if no blank queue * @param output stream where the word is written */ - void printSpace(UChar const val, UFILE *output); + void printSpace(UChar32 const val, UFILE *output); void skipUntil(InputFile& input, UFILE *output, UChar32 const character); static UString removeTags(UString const &str); diff --git a/tests/lt_proc/__init__.py b/tests/lt_proc/__init__.py index 7de96a52..22cf1794 100644 --- a/tests/lt_proc/__init__.py +++ b/tests/lt_proc/__init__.py @@ -254,5 +254,12 @@ class AlphabeticMultibyteTest(ProcTest): expectedOutputs = ["^𝜊/*𝜊$"] +class AlphabeticMultibyteTestPost(ProcTest): + procdix = "data/minimal-mono.dix" + inputs = ["𝜊"] # code point >65535, needs two bytes in utf-8, isAlphabetic + procflags = ['-z', '-p'] + expectedOutputs = ["𝜊"] + + # These fail on some systems: #from null_flush_invalid_stream_format import *