diff --git a/decode_test.go b/decode_test.go index cabfd33c..23229ef5 100644 --- a/decode_test.go +++ b/decode_test.go @@ -437,6 +437,18 @@ func TestDecoder(t *testing.T) { `"1": "a\x2Fb\u002Fc\U0000002Fd"`, map[interface{}]interface{}{"1": `a/b/c/d`}, }, + { + `"\ud83e\udd23"`, + "🤣", + }, + { + `"\uD83D\uDE00\uD83D\uDE01"`, + "😀😁", + }, + { + `"\uD83D\uDE00a\uD83D\uDE01"`, + "😀a😁", + }, { "'1': \"2\\n3\"", map[interface{}]interface{}{"1": "2\n3"}, diff --git a/scanner/scanner.go b/scanner/scanner.go index b0eac48d..865cfb84 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -391,6 +391,34 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (tk *token.Token, pos int) { return } codeNum := hexRunesToInt(src[idx+2 : idx+6]) + + // Handle surrogate pairs + if codeNum >= 0xD800 && codeNum <= 0xDBFF { + high := codeNum + + if idx+11 >= size { + // TODO: need to return error + //err = xerrors.New("not enough characters for surrogate pair") + return + } + + if src[idx+6] != '\\' || src[idx+7] != 'u' { + // TODO: need to return error + //err = xerrors.New("expected escape code after high surrogate") + return + } + + low := hexRunesToInt(src[idx+8 : idx+12]) + if low < 0xDC00 || low > 0xDFFF { + // TODO: need to return error + //err = xerrors.New("expected low surrogate after high surrogate") + return + } + + codeNum = ((high - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000 + idx += 6 + } + value = append(value, rune(codeNum)) idx += 5 continue