Skip to content

Commit

Permalink
Fixed error rendering Burmese text
Browse files Browse the repository at this point in the history
Iterate over graphemes instead of words, looking for word boundaries to use as line breaking opportunities. This eliminates the possibility of word-wrapping in the middle of a grapheme cluster, which is valid in some writing systems such as Thai, but mitigates the risk of an invalid section index in Burmese, because the word segmenter considers some modifiers to be “words”.
  • Loading branch information
1ec5 committed Sep 7, 2024
1 parent d5597ac commit b3b7359
Showing 1 changed file with 16 additions and 18 deletions.
34 changes: 16 additions & 18 deletions src/symbol/shaping.ts
Original file line number Diff line number Diff line change
Expand Up @@ -532,28 +532,26 @@ export function determineLineBreaks(
const potentialLineBreaks = [];
const targetWidth = determineAverageLineWidth(logicalInput, spacing, maxWidth, glyphMap, imagePositions, layoutTextSize);

const graphemes = splitByGraphemeCluster(logicalInput.text);
const words = wordSegmenter.segment(logicalInput.text);
let currentX = 0;
let graphemeIndex = 0;
for (const {index: wordIndex, segment: word} of wordSegmenter.segment(logicalInput.text)) {
const graphemes = splitByGraphemeCluster(word);
for (const {segment: grapheme} of graphemes) {
const section = logicalInput.getSection(graphemeIndex);
if (grapheme.trim()) {
currentX += getGlyphAdvance(grapheme, section, glyphMap, imagePositions, spacing, layoutTextSize);
}
graphemeIndex++;
for (const [i, grapheme] of graphemes.entries()) {
// Check whether the grapheme cluster immediately follows a word boundary.
const prevWord = words.containing(grapheme.index - 1);
const word = words.containing(grapheme.index);
if (prevWord && prevWord.index !== word.index) {
// Score the line breaking opportunity based on the characters immediately before and after the word boundary.
const prevCodePoint = logicalInput.text.codePointAt(grapheme.index - 1);
const firstCodePoint = grapheme.segment.codePointAt(0);
const penalty = calculatePenalty(prevCodePoint, firstCodePoint);
const lineBreak = evaluateBreak(i, currentX, targetWidth, potentialLineBreaks, penalty, false);
potentialLineBreaks.push(lineBreak);
}

const nextWordIndex = wordIndex + word.length;
const lastCodePoint = graphemes.at(-1).segment.codePointAt(0);
const nextWordCodePoint = logicalInput.text.codePointAt(nextWordIndex);
if (!nextWordCodePoint) {
continue;
const section = logicalInput.getSection(i);
if (grapheme.segment.trim()) {
currentX += getGlyphAdvance(grapheme.segment, section, glyphMap, imagePositions, spacing, layoutTextSize);
}

const penalty = calculatePenalty(lastCodePoint, nextWordCodePoint);
const lineBreak = evaluateBreak(graphemeIndex, currentX, targetWidth, potentialLineBreaks, penalty, false);
potentialLineBreaks.push(lineBreak);
}

return leastBadBreaks(
Expand Down

0 comments on commit b3b7359

Please sign in to comment.