goccy · goccy · Nov 30, 2024 · Nov 29, 2024 · Nov 29, 2024 · Nov 29, 2024
diff --git a/decode_test.go b/decode_test.go
@@ -810,7 +810,7 @@ func TestDecoder(t *testing.T) {
 		},
 		{
 			"a: \"\\0\"\n",
-			map[string]string{"a": "\\0"},
+			map[string]string{"a": "\x00"},
 		},
 		{
 			"b: 2\na: 1\nd: 4\nc: 3\nsub:\n  e: 5\n",

diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go
@@ -1347,7 +1347,7 @@ t4: 2098-01-09T10:40:47Z
 					Type:          token.DoubleQuoteType,
 					CharacterType: token.CharacterTypeIndicator,
 					Indicator:     token.QuotedScalarIndicator,
-					Value:         "\\0",
+					Value:         "\x00",
 					Origin:        " \"\\0\"",
 				},
 			},

diff --git a/parser/parser.go b/parser/parser.go
@@ -670,6 +670,15 @@ func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonTk *Token)
 		return anchor, nil
 	}
 
+	if tk.Column() <= keyCol && tk.GroupType() == TokenGroupAnchorName {
+		// key: <value does not defined>
+		// &anchor
+		//
+		//  key: <value does not defined>
+		// &anchor
+		return nil, errors.ErrSyntax("anchor is not allowed in this context", tk.RawToken())
+	}
+
 	if tk.Column() < keyCol {
 		// in this case,
 		// ----

diff --git a/parser/token.go b/parser/token.go
@@ -573,6 +573,16 @@ func createDocumentTokens(tokens []*Token) ([]*Token, error) {
 					},
 				}), nil
 			}
+			if tokens[i].Line() == tokens[i+1].Line() {
+				switch tokens[i+1].GroupType() {
+				case TokenGroupMapKey, TokenGroupMapKeyValue:
+					return nil, errors.ErrSyntax("value cannot be placed after document separator", tokens[i+1].RawToken())
+				}
+				switch tokens[i+1].Type() {
+				case token.SequenceEntryType:
+					return nil, errors.ErrSyntax("value cannot be placed after document separator", tokens[i+1].RawToken())
+				}
+			}
 			tks, err := createDocumentTokens(tokens[i+1:])
 			if err != nil {
 				return nil, err

diff --git a/scanner/scanner.go b/scanner/scanner.go
@@ -255,15 +255,19 @@ func (s *Scanner) scanSingleQuote(ctx *Context) (*token.Token, error) {
 			isFirstLineChar = true
 			isNewLine = true
 			s.progressLine(ctx)
+			if idx+1 < size {
+				if err := s.validateDocumentSeparatorMarker(ctx, src[idx+1:]); err != nil {
+					return nil, err
+				}
+			}
 			continue
 		} else if isFirstLineChar && (c == ' ' || c == '\t') {
 			continue
 		} else if c != '\'' {
 			value = append(value, c)
 			isFirstLineChar = false
 			continue
-		}
-		if idx+1 < len(ctx.src) && ctx.src[idx+1] == '\'' {
+		} else if idx+1 < len(ctx.src) && ctx.src[idx+1] == '\'' {
 			// '' handle as ' character
 			value = append(value, c)
 			ctx.addOriginBuf(c)
@@ -339,6 +343,11 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) {
 			isFirstLineChar = true
 			isNewLine = true
 			s.progressLine(ctx)
+			if idx+1 < size {
+				if err := s.validateDocumentSeparatorMarker(ctx, src[idx+1:]); err != nil {
+					return nil, err
+				}
+			}
 			continue
 		} else if isFirstLineChar && (c == ' ' || c == '\t') {
 			continue
@@ -445,10 +454,20 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) {
 				progress = 1
 				ctx.addOriginBuf(nextChar)
 				value = append(value, nextChar)
+			case '0':
+				progress = 1
+				ctx.addOriginBuf(nextChar)
+				value = append(value, '\x00')
 			case ' ':
 				// skip escape character.
 			default:
-				value = append(value, c)
+				s.progressColumn(ctx, 1)
+				return nil, ErrInvalidToken(
+					token.Invalid(
+						fmt.Sprintf("found unknown escape character %q", nextChar),
+						string(ctx.obuf), s.pos(),
+					),
+				)
 			}
 			idx += progress
 			s.progressColumn(ctx, progress)
@@ -495,6 +514,26 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) {
 	)
 }
 
+func (s *Scanner) validateDocumentSeparatorMarker(ctx *Context, src []rune) error {
+	if len(src) < 3 {
+		return nil
+	}
+	var marker string
+	if len(src) == 3 {
+		marker = string(src)
+	} else {
+		marker = strings.TrimRightFunc(string(src[:4]), func(r rune) bool {
+			return r == ' ' || r == '\t' || r == '\n' || r == '\r'
+		})
+	}
-	var marker string
-	if len(src) == 3 {
-		marker = string(src)
-	} else {
-		marker = strings.TrimRightFunc(string(src[:4]), func(r rune) bool {
-			return r == ' ' || r == '\t' || r == '\n' || r == '\r'
-		})
-	}
+	marker := string(src)
+	if len(src) != 3 {
+		marker = strings.TrimRightFunc(string(src[:4]), unicode.IsSpace)
+	}
-	var marker string
-	if len(src) == 3 {
-		marker = string(src)
-	} else {
-		marker = strings.TrimRightFunc(string(src[:4]), func(r rune) bool {
-			return r == ' ' || r == '\t' || r == '\n' || r == '\r'
-		})
-	}
+	marker := string(src)
+	if len(src) != 3 {
+		marker = strings.TrimRightFunc(string(src[:4]), unicode.IsSpace)
+	}
+	if marker == "---" || marker == "..." {
+		return ErrInvalidToken(
+			token.Invalid("found unexpected document separator", string(ctx.obuf), s.pos()),
+		)
+	}
+	return nil
+}
+
 func (s *Scanner) scanQuote(ctx *Context, ch rune) (bool, error) {
 	if ctx.existsBuffer() {
 		return false, nil
@@ -719,7 +758,7 @@ func (s *Scanner) scanNewLine(ctx *Context, c rune) {
 
 	if ctx.isEOS() {
 		s.addBufferedTokenIfExists(ctx)
-	} else if s.isAnchor || s.isAlias {
+	} else if s.isAnchor || s.isAlias || s.isDirective {
 		s.addBufferedTokenIfExists(ctx)
 	}
 	if ctx.existsBuffer() && s.isFirstCharAtLine {
@@ -883,6 +922,7 @@ func (s *Scanner) scanDocumentEnd(ctx *Context) bool {
 		return false
 	}
 
+	s.addBufferedTokenIfExists(ctx)
 	ctx.addToken(token.DocumentEnd(string(ctx.obuf)+"...", s.pos()))
 	s.progressColumn(ctx, 3)
 	ctx.clear()

diff --git a/yaml_test_suite_test.go b/yaml_test_suite_test.go
@@ -14,12 +14,12 @@ import (
 )
 
 var failureTestNames = []string{
-	"anchors-on-empty-scalars",
-	"aliases-in-flow-objects",
-	"aliases-in-explicit-block-mapping",
+	"anchors-on-empty-scalars",          // no json.
+	"aliases-in-flow-objects",           // no json.
+	"aliases-in-explicit-block-mapping", // no json.
 	"aliases-in-implicit-block-mapping",
 	"bare-document-after-document-end-marker",
-	"block-mapping-with-missing-keys",
+	"block-mapping-with-missing-keys", // no json.
 	"block-mapping-with-missing-values",
 	"block-mapping-with-multiline-scalars",
 	"block-scalar-with-more-spaces-than-first-content-line",
@@ -36,26 +36,21 @@ var failureTestNames = []string{
 	"escaped-slash-in-double-quotes",
 	"explicit-key-and-value-seperated-by-comment", //nolint: misspell
 	"extra-words-on-yaml-directive",
-	"empty-implicit-key-in-single-pair-flow-sequences",
-	"empty-keys-in-block-and-flow-mapping",
-	"empty-lines-at-end-of-document",
-	"flow-mapping-separate-values",
+	"empty-implicit-key-in-single-pair-flow-sequences", // no json.
+	"empty-keys-in-block-and-flow-mapping",             // no json.
+	"empty-lines-at-end-of-document",                   // no json.
+	"flow-mapping-separate-values",                     // no json.
 	"flow-sequence-in-flow-mapping",
 	"flow-collections-over-many-lines/01",
 	"flow-mapping-colon-on-line-after-key/02",
 	"flow-mapping-edge-cases",
 	"flow-sequence-with-invalid-comma-at-the-beginning",
 	"folded-block-scalar",
 	"folded-block-scalar-1-3",
-	"implicit-flow-mapping-key-on-one-line",
-	"invalid-anchor-in-zero-indented-sequence",
+	"implicit-flow-mapping-key-on-one-line", // no json.
 	"invalid-comment-after-comma",
 	"invalid-comment-after-end-of-flow-sequence",
-	"invalid-document-end-marker-in-single-quoted-string",
-	"invalid-document-start-marker-in-doublequoted-tring",
-	"invalid-escape-in-double-quoted-string",
 	"invalid-tag",
-	"key-with-anchor-after-missing-explicit-mapping-value",
 	"leading-tabs-in-double-quoted/02",
 	"leading-tabs-in-double-quoted/05",
 	"legal-tab-after-indentation",
@@ -65,34 +60,31 @@ var failureTestNames = []string{
 	"literal-modifers/02",
 	"literal-modifers/03",
 	"literal-scalars",
-	"mapping-key-and-flow-sequence-item-anchors",
-	"mapping-starting-at-line",
-	"mapping-with-anchor-on-document-start-line",
+	"mapping-key-and-flow-sequence-item-anchors", // no json.
 	"multiline-double-quoted-implicit-keys",
 	"multiline-plain-flow-mapping-key",
 	"multiline-plain-value-with-tabs-on-empty-lines",
 	"multiline-scalar-at-top-level",
 	"multiline-scalar-at-top-level-1-3",
 	"multiline-single-quoted-implicit-keys",
 	"multiline-unidented-double-quoted-block-key",
-	"nested-implicit-complex-keys",
+	"nested-implicit-complex-keys", // no json.
 	"node-anchor-not-indented",
 	"plain-dashes-in-flow-sequence",
 	"plain-url-in-flow-mapping",
-	"question-mark-edge-cases/00",
-	"question-mark-edge-cases/01",
+	"question-mark-edge-cases/00", // no json.
+	"question-mark-edge-cases/01", // no json.
 	"scalar-doc-with-in-content/01",
 	"scalar-value-with-two-anchors",
-	"single-character-streams/01",
-	"single-pair-implicit-entries",
-	"spec-example-2-11-mapping-between-sequences",
-	"spec-example-6-12-separation-spaces",
-	"spec-example-7-16-flow-mapping-entries",
-	"spec-example-7-3-completely-empty-flow-nodes",
-	"spec-example-8-18-implicit-block-mapping-entries",
-	"spec-example-8-19-compact-block-mappings",
+	"single-character-streams/01",                      // no json.
+	"single-pair-implicit-entries",                     // no json.
+	"spec-example-2-11-mapping-between-sequences",      // no json.
+	"spec-example-6-12-separation-spaces",              // no json.
+	"spec-example-7-16-flow-mapping-entries",           // no json.
+	"spec-example-7-3-completely-empty-flow-nodes",     // no json.
+	"spec-example-8-18-implicit-block-mapping-entries", // no json.
+	"spec-example-8-19-compact-block-mappings",         // no json.
 	"spec-example-6-19-secondary-tag-handle",
-	"spec-example-6-21-local-tag-prefix",
 	"spec-example-6-24-verbatim-tags",
 	"spec-example-6-28-non-specific-tags",
 	"spec-example-6-4-line-prefixes",
@@ -112,7 +104,7 @@ var failureTestNames = []string{
 	"spec-example-9-5-directives-documents",
 	"spec-example-9-6-stream",
 	"spec-example-9-6-stream-1-3",
-	"syntax-character-edge-cases/00",
+	"syntax-character-edge-cases/00", // no json.
 	"tab-at-beginning-of-line-followed-by-a-flow-mapping",
 	"tab-indented-top-flow",
 	"tabs-in-various-contexts/003",
@@ -126,14 +118,14 @@ var failureTestNames = []string{
 	"tabs-that-look-like-indentation/08",
 	"tag-shorthand-used-in-documents-but-only-defined-in-the-first",
 	"tags-for-block-objects",
-	"tags-on-empty-scalars",
+	"tags-on-empty-scalars",                            // no json.
 	"trailing-line-of-spaces/01",                       // last '\n' character is needed ?
-	"various-combinations-of-explicit-block-mappings",  // no json
-	"various-trailing-comments",                        // no json
-	"various-trailing-comments-1-3",                    // no json
+	"various-combinations-of-explicit-block-mappings",  // no json.
+	"various-trailing-comments",                        // no json.
+	"various-trailing-comments-1-3",                    // no json.
 	"wrong-indented-flow-sequence",                     // error ?
 	"wrong-indented-multiline-quoted-scalar",           // error ?
-	"zero-indented-sequences-in-explicit-mapping-keys", // no json
+	"zero-indented-sequences-in-explicit-mapping-keys", // no json.
 }
 
 var failureTestNameMap map[string]struct{}