Skip to content

Commit

Permalink
Fix parsing of document and string (#513)
Browse files Browse the repository at this point in the history
* fix invalid test case
* fix parsing of document
* fix validation
  • Loading branch information
goccy authored Nov 9, 2024
1 parent e1bab38 commit 6b0c68e
Show file tree
Hide file tree
Showing 7 changed files with 306 additions and 39 deletions.
3 changes: 0 additions & 3 deletions encode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -922,9 +922,6 @@ func TestEncodeWithNestedYAML(t *testing.T) {
value: map[string]interface{}{"v": "# comment\n"},
expectDifferent: true,
},
{
value: map[string]interface{}{"v": "\n"},
},
}

for _, test := range tests {
Expand Down
210 changes: 210 additions & 0 deletions lexer/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1877,6 +1877,216 @@ a: !!binary |
},
{
YAML: `
a:
b
c
`,
Tokens: token.Tokens{
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "a",
Origin: "\na",
},
{
Type: token.MappingValueType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockStructureIndicator,
Value: ":",
Origin: ":",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "b\nc",
Origin: "\n b\n\n c",
},
},
},
{
YAML: `
a:
b
c
d
e: f
`,
Tokens: token.Tokens{
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "a",
Origin: "\na",
},
{
Type: token.MappingValueType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockStructureIndicator,
Value: ":",
Origin: ":",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "b\nc d",
Origin: "\n b\n\n\n c\n d\n",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "e",
Origin: "e",
},
{
Type: token.MappingValueType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockStructureIndicator,
Value: ":",
Origin: ":",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "f",
Origin: " f",
},
},
},
{
YAML: `
a: |
b
c
d
e: f
`,
Tokens: token.Tokens{
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "a",
Origin: "\na",
},
{
Type: token.MappingValueType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockStructureIndicator,
Value: ":",
Origin: ":",
},
{
Type: token.LiteralType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockScalarIndicator,
Value: "|",
Origin: " |\n",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "b \n\n \nc\nd \n",
Origin: " b \n\n \n c\n d \n",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "e",
Origin: "e",
},
{
Type: token.MappingValueType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockStructureIndicator,
Value: ":",
Origin: ":",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "f",
Origin: " f",
},
},
},
{
YAML: `
a: >
b
c
d
e: f
`,
Tokens: token.Tokens{
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "a",
Origin: "\na",
},
{
Type: token.MappingValueType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockStructureIndicator,
Value: ":",
Origin: ":",
},
{
Type: token.FoldedType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockScalarIndicator,
Value: ">",
Origin: " >\n",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "b \n\n \nc d \n",
Origin: " b \n\n \n c\n d \n",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "e",
Origin: "e",
},
{
Type: token.MappingValueType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockStructureIndicator,
Value: ":",
Origin: ":",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "f",
Origin: " f",
},
},
},
{
YAML: `
a: >
Text`,
Tokens: token.Tokens{
Expand Down
6 changes: 5 additions & 1 deletion parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,11 @@ func (p *parser) parseMappingValue(ctx *context) (ast.Node, error) {
ntk = p.nextNotCommentToken()
antk = p.afterNextNotCommentToken()
}
if tk := p.nextNotCommentToken(); tk != nil && tk.Position.Line > node.Start.Position.Line && tk.Position.Column > node.Start.Position.Column {
validationTk := node.Start
if len(node.Values) != 0 {
validationTk = node.Values[len(node.Values)-1].Key.GetToken()
}
if tk := p.nextNotCommentToken(); tk != nil && tk.Position.Line > validationTk.Position.Line && tk.Position.Column > validationTk.Position.Column {
// a: b
// c <= this token is invalid.
return nil, errors.ErrSyntax("value is not allowed in this context", tk)
Expand Down
47 changes: 47 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,20 @@ i: 'j'
"e": "f"
g: "h"
i: 'j'
`,
},
{
`
a:
- |2
b
c: d
`,
`
a:
- |2
b
c: d
`,
},
}
Expand Down Expand Up @@ -1165,6 +1179,39 @@ b: - 2
^
`,
},
{
`
a:
- |
b
c: d
`,
`
[5:5] value is not allowed in this context
2 | a:
3 | - |
4 | b
> 5 | c: d
^
`,
},
{
`
a:
- |
b
c:
d: e
`,
`
[5:5] value is not allowed in this context
2 | a:
3 | - |
4 | b
> 5 | c:
^
6 | d: e`,
},
}
for _, test := range tests {
t.Run(test.source, func(t *testing.T) {
Expand Down
33 changes: 19 additions & 14 deletions scanner/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,33 +145,32 @@ func (c *Context) addDocumentIndent(column int) {

// If the first line of the document has already been evaluated, the number is treated as the threshold, since the `docFirstLineIndentColumn` is a positive number.
if c.docFirstLineIndentColumn <= column {
// In the folded state, new-line-char is normally treated as space,
// but if the number of indents is different from the number of indents in the first line,
// new-line-char is used as is instead of space.
// Therefore, it is necessary to replace the space already added to buf.
// `c.docFoldedNewLine` is a variable that is set to true for every newline.
if c.isFolded && c.docFoldedNewLine {
c.buf[len(c.buf)-1] = '\n'
if (c.isFolded || c.isRawFolded) && c.docFoldedNewLine {
c.docFoldedNewLine = false
}
// Since addBuf ignore space character, add to the buffer directly.
c.buf = append(c.buf, ' ')
}
}

func (c *Context) addDocumentNewLineInFolded(column int) {
if !c.isFolded {
// updateDocumentNewLineInFolded if Folded or RawFolded context and the content on the current line starts at the same column as the previous line,
// treat the new-line-char as a space.
func (c *Context) updateDocumentNewLineInFolded(column int) {
if c.isLiteral {
return
}

// Folded or RawFolded.

if !c.docFoldedNewLine {
return
}
if c.docFirstLineIndentColumn == c.docLineIndentColumn &&
c.docLineIndentColumn == c.docPrevLineIndentColumn {
// use space as a new line delimiter.
return
if c.docLineIndentColumn == c.docPrevLineIndentColumn {
if c.buf[len(c.buf)-1] == '\n' {
c.buf[len(c.buf)-1] = ' '
}
}
c.buf[len(c.buf)-1] = '\n'
c.docFoldedNewLine = false
}

Expand Down Expand Up @@ -298,12 +297,18 @@ func (c *Context) bufferedSrc() []rune {

// If the text ends with a space character, remove all of them.
src = []rune(strings.TrimRight(string(src), " "))
if string(src) == "\n" {
// If the content consists only of a newline,
// it can be considered as the document ending without any specified value,
// so it is treated as an empty string.
src = []rune{}
}
}
return src
}

func (c *Context) hasTrimAllEndNewlineOpt() bool {
return strings.HasPrefix(c.docOpt, "-") || strings.HasSuffix(c.docOpt, "-")
return strings.HasPrefix(c.docOpt, "-") || strings.HasSuffix(c.docOpt, "-") || c.isRawFolded
}

func (c *Context) bufferedToken(pos *token.Position) *token.Token {
Expand Down
Loading

0 comments on commit 6b0c68e

Please sign in to comment.