From 7c09e1306401b6393c827a63d2f467fe85567b12 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Tue, 12 Nov 2024 16:40:31 +0900 Subject: [PATCH] fix token offset --- lexer/lexer_test.go | 29 +++++++++++++++++++++++++++++ scanner/scanner.go | 5 ++++- token/token.go | 4 ++-- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index b57fff41..b7392d20 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -3186,3 +3186,32 @@ a: |invalid`, }) } } + +func TestTokenOffset(t *testing.T) { + t.Run("crlf", func(t *testing.T) { + content := "project:\r\n version: 1.2.3\r\n" + tokens := lexer.Tokenize(content) + if len(tokens) != 5 { + t.Fatalf("invalid token num. got %d", len(tokens)) + } + if tokens[4].Value != "1.2.3" { + t.Fatalf("unexpected value. got %q", tokens[4].Value) + } + if tokens[4].Position.Offset != 22 { + t.Fatalf("unexpected offset. got %d", tokens[4].Position.Offset) + } + }) + t.Run("lf", func(t *testing.T) { + content := "project:\n version: 1.2.3\n" + tokens := lexer.Tokenize(content) + if len(tokens) != 5 { + t.Fatalf("invalid token num. got %d", len(tokens)) + } + if tokens[4].Value != "1.2.3" { + t.Fatalf("unexpected value. got %q", tokens[4].Value) + } + if tokens[4].Position.Offset != 21 { + t.Fatalf("unexpected offset. got %d", tokens[4].Position.Offset) + } + }) +} diff --git a/scanner/scanner.go b/scanner/scanner.go index 8739bf58..913c8784 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -606,8 +606,10 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error { func (s *Scanner) scanNewLine(ctx *Context, c rune) { if len(ctx.buf) > 0 && s.savedPos == nil { + bufLen := len(ctx.bufferedSrc()) s.savedPos = s.pos() - s.savedPos.Column -= len(ctx.bufferedSrc()) + s.savedPos.Column -= bufLen + s.savedPos.Offset -= bufLen } // if the following case, origin buffer has unnecessary two spaces. @@ -631,6 +633,7 @@ func (s *Scanner) scanNewLine(ctx *Context, c rune) { if c == '\r' && ctx.nextChar() == '\n' { ctx.addOriginBuf('\r') s.progress(ctx, 1) + s.offset++ c = '\n' } diff --git a/token/token.go b/token/token.go index 2b3d99d6..8eaf9743 100644 --- a/token/token.go +++ b/token/token.go @@ -765,8 +765,8 @@ func (t *Token) Clone() *Token { // Dump outputs token information to stdout for debugging. func (t *Token) Dump() { fmt.Printf( - "[TYPE]:%q [CHARTYPE]:%q [INDICATOR]:%q [VALUE]:%q [ORG]:%q [POS(line:column:level)]: %d:%d:%d\n", - t.Type, t.CharacterType, t.Indicator, t.Value, t.Origin, t.Position.Line, t.Position.Column, t.Position.IndentLevel, + "[TYPE]:%q [CHARTYPE]:%q [INDICATOR]:%q [VALUE]:%q [ORG]:%q [POS(line:column:level:offset)]: %d:%d:%d:%d\n", + t.Type, t.CharacterType, t.Indicator, t.Value, t.Origin, t.Position.Line, t.Position.Column, t.Position.IndentLevel, t.Position.Offset, ) }