Skip to content

Commit

Permalink
Merge pull request #289 from KisaragiEffective/perf/do-not-allocate-s…
Browse files Browse the repository at this point in the history
…rc-in-lexer
  • Loading branch information
KisaragiEffective authored Oct 19, 2023
2 parents ed34d0e + 2f26496 commit fc1c401
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 15 deletions.
2 changes: 2 additions & 0 deletions docs/syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

## 改行

改行コードはLF (`\n`) または CRLF (`\r\n`) をサポートします。どちらもトークン化の際は単なる改行として取り扱われます。

現状余計な改行を入れることはできません。これは実装の都合によります。将来的には、この制限事項は解除される予定です。

ただし、`block`式の`block`キーワードの直後だけは例外です。
Expand Down
19 changes: 8 additions & 11 deletions package/origlang-compiler/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,30 +33,26 @@ impl<T> AssociateWithPos for T {
}

#[derive(Debug)]
pub struct Lexer {
pub struct Lexer<'src> {
source_bytes_nth: Cell<Utf8CharBoundaryStartByte>,
source: String,
source: &'src str,
line: Cell<NonZeroUsize>,
column: Cell<NonZeroUsize>,
}

impl Lexer {
impl<'src> Lexer<'src> {
#[must_use = "Lexer do nothing unless calling parsing function"]
pub fn create(source: &str) -> Self {
let src: Cow<'_, str> = if cfg!(windows) {
source.replace("\r\n", "\n").into()
} else {
Cow::Borrowed(source)
};

pub fn create(source: &'src str) -> Self {
Self {
source_bytes_nth: Cell::new(Utf8CharBoundaryStartByte::new(0)),
source: src.to_string(),
source,
line: Cell::new(NonZeroUsize::new(1).unwrap()),
column: Cell::new(NonZeroUsize::new(1).unwrap()),
}
}
}

impl Lexer<'_> {
fn drain_space(&self) {
trace!("drain_space: start vvvvvvvvvvvvvvvvvvv");
while !self.reached_end() {
Expand Down Expand Up @@ -105,6 +101,7 @@ impl Lexer {
} else {
None
}
.or_else(|| self.try_and_eat_str("\r\n").expect("huh?").map(|_| Token::NewLine))
.or_else(|| self.try_and_eat_str("\n").expect("huh?").map(|_| Token::NewLine))
.or_else(||
fold!(
Expand Down
19 changes: 19 additions & 0 deletions package/origlang-compiler/src/lexer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ fn parse_string_literal_mixed_4_3() {

use std::num::NonZeroUsize;
use origlang_source_span::{Pointed, SourcePosition};
use crate::chars::boundary::Utf8CharBoundaryStartByte;

#[test]
fn token_location() {
Expand Down Expand Up @@ -198,3 +199,21 @@ fn digit_regression() {
let lexer = Lexer::create(EMPTY);
assert_eq!(lexer.next().data, Token::EndOfFile);
}

#[test]
fn crlf_positive() {
const S: &str = "\r\n";
let lexer = Lexer::create(S);
assert_eq!(lexer.next().data, Token::NewLine);
assert_eq!(lexer.next().data, Token::EndOfFile);
}

#[test]
fn crlf_negative() {
const S: &str = "\r";
let lexer = Lexer::create(S);
assert_eq!(lexer.next().data, Token::UnexpectedChar {
index: Utf8CharBoundaryStartByte::new(0),
char: '\r',
});
}
10 changes: 6 additions & 4 deletions package/origlang-compiler/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,18 +128,20 @@ impl TokenKind {
}
}

pub struct Parser {
lexer: Lexer,
pub struct Parser<'src> {
lexer: Lexer<'src>,
}

impl Parser {
impl<'src> Parser<'src> {
#[must_use = "Parser do nothing unless calling parsing function"]
pub fn create(source: &str) -> Self {
pub fn create(source: &'src str) -> Self {
Self {
lexer: Lexer::create(source)
}
}
}

impl Parser<'_> {
/// プログラムが文の列とみなしてパースを試みる。
/// 事前条件: プログラム全体が任意個の文として分解できる
/// # Errors
Expand Down

0 comments on commit fc1c401

Please sign in to comment.