Skip to content

Commit

Permalink
Handle more cases
Browse files Browse the repository at this point in the history
  • Loading branch information
MarvNC committed Jan 20, 2024
1 parent a0f54bc commit 9ba3cb5
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 14 deletions.
20 changes: 20 additions & 0 deletions src/test/parseCantoneseReadings.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,26 @@ const testCases = [
{ text: '。', reading: '' },
],
},
{
text: '嗰個男仔喺我手臂上搣咗一下。',
reading: 'go2 go3 naam4 zai2 hai2 ngo5 sau2 bei3 soeng6 mit1 zo2 jat1 haa5',
expected: [
{ text: '嗰', reading: 'go2' },
{ text: '個', reading: 'go3' },
{ text: '男', reading: 'naam4' },
{ text: '仔', reading: 'zai2' },
{ text: '喺', reading: 'hai2' },
{ text: '我', reading: 'ngo5' },
{ text: '手', reading: 'sau2' },
{ text: '臂', reading: 'bei3' },
{ text: '上', reading: 'soeng6' },
{ text: '搣', reading: 'mit1' },
{ text: '咗', reading: 'zo2' },
{ text: '一', reading: 'jat1' },
{ text: '下', reading: 'haa5' },
{ text: '。', reading: '' },
],
},
];

for (const { text, reading, expected } of testCases) {
Expand Down
25 changes: 11 additions & 14 deletions src/util/textHandling/parseCantoneseReadings.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@ import {
* reading: "nei5 get1 m4 get1 dou2 ngo5 gong2 me1?"
* =>
* [{text: "你", reading: "nei5"}, {text: "get", reading: "get1"}, ...]
* @param {string} text
* @param {string} rawText
* @param {string} readings
* @returns {TextReadingPair[]}
*/
function parseCantoneseReadings(text, readings) {
function parseCantoneseReadings(rawText, readings) {
/**
* @type {TextReadingPair[]}
*/
const resultArray = [];

const textArray = splitString(text, punctuations);
const textArray = splitString(rawText, punctuations);
const readingsArray = splitString(readings, punctuations);

let readingIndex = 0;
Expand All @@ -43,28 +43,25 @@ function parseCantoneseReadings(text, readings) {
resultArray.push({ text, reading });
textIndex++;
readingIndex++;
} else if (isTextPunctuation && isReadingJyuutping) {
} else if (
(isTextPunctuation && isReadingJyuutping) ||
(!!text && reading === undefined)
) {
// Send empty string to reading
resultArray.push({ text, reading: '' });
textIndex++;
} else {
throw new Error(
`Unexpected text "${text}" and reading "${reading}" at index ${i}`
`Unexpected text "${text}" and reading "${reading}" at index ${i} in ${rawText}: ${readings}`
);
}
}
// Check if remaining text in either array
if (textIndex !== textArray.length) {
// Check if remaining readings exist
if (readingIndex < readingsArray.length) {
throw new Error(
`Unexpected text "${textArray[textIndex]}" at index ${textIndex}`
`Unexpected reading "${readingsArray[readingIndex]}" at index ${readingIndex} in ${rawText}: ${readings}`
);
}
if (readingIndex !== readingsArray.length) {
throw new Error(
`Unexpected reading "${readingsArray[readingIndex]}" at index ${readingIndex}`
);
}

return resultArray;
}

Expand Down
1 change: 1 addition & 0 deletions src/util/textHandling/textUtils.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const punctuations = [
'、',
',',
',',
'⋯',
];

/**
Expand Down

0 comments on commit 9ba3cb5

Please sign in to comment.