Skip to content

Commit

Permalink
fix(unicode): escape glyph strings
Browse files Browse the repository at this point in the history
  • Loading branch information
JanDeDobbeleer committed Oct 12, 2023
1 parent 0640a15 commit 6be7fc2
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 0 deletions.
12 changes: 12 additions & 0 deletions encode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,18 @@ func TestEncoder(t *testing.T) {
nil,
},

// Unicode
{
`v: "\ue0b6"` + "\n",
map[string]string{"v": "\ue0b6"},
nil,
},
{
`v: "\ue0b6 test"` + "\n",
map[string]string{"v": "\ue0b6 test"},
nil,
},

// time value
{
"v: 0001-01-01T00:00:00Z\n",
Expand Down
40 changes: 40 additions & 0 deletions token/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package token
import (
"fmt"
"strings"
"unicode"
)

// Character type for character
Expand Down Expand Up @@ -635,6 +636,9 @@ func IsNeedQuoted(value string) bool {
return true
}
for i, c := range value {
if shouldQuoteRune(c) {
return true
}
switch c {
case '#', '\\':
return true
Expand All @@ -647,6 +651,42 @@ func IsNeedQuoted(value string) bool {
return false
}

// shouldQuoteRune returns true if the rune should be quoted.
// excludes all runes in the Basic Multilingual Plane and all Emoticons
func shouldQuoteRune(r rune) bool {
if r < 0x1000 { // Basic Multilingual Plane
return false
}
if unicode.IsLetter(r) { // Letters in any language
return false
}
if r > 0x1F600 && r < 0x1F64F { // Emoticons
return false
}
if r > 0x1F300 && r < 0x1F5FF { // Misc Symbols and Pictographs
return false
}
if r > 0x1F680 && r < 0x1F6FF { // Transport and Map
return false
}
if r > 0x2600 && r < 0x26FF { // Misc symbols
return false
}
if r > 0x2700 && r < 0x27BF { // Dingbats
return false
}
if r > 0xFE00 && r < 0xFE0F { // Variation Selectors
return false
}
if r > 0x1F900 && r < 0x1F9FF { // Supplemental Symbols and Pictographs
return false
}
if r > 0x1F1E6 && r < 0x1F1FF { // Flags
return false
}
return true
}

// LiteralBlockHeader detect literal block scalar header
func LiteralBlockHeader(value string) string {
lbc := DetectLineBreakCharacter(value)
Expand Down

0 comments on commit 6be7fc2

Please sign in to comment.