Skip to content

Commit

Permalink
Fix for #174, more parser checks and more parser tests
Browse files Browse the repository at this point in the history
  • Loading branch information
tirix committed Jan 5, 2025
1 parent 5268bcf commit 82c9582
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 6 deletions.
14 changes: 14 additions & 0 deletions metamath-rs/src/diag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ pub enum Diagnostic {
MissingLabel,
MissingMarkupDef([bool; 3], Span),
MissingProof(Span),
MissingSpaceAfterCommandToken(Span),
MMReservedLabel(Span),
NestedComment(Span, Span),
NotActiveSymbol(TokenIndex),
Expand Down Expand Up @@ -183,6 +184,7 @@ pub enum Diagnostic {
UnclosedBeforeInclude(StatementIndex),
UnclosedCommandComment(Span),
UnclosedCommandString(Span),
UnclosedCommand(Span),
UnclosedComment(Span),
UnclosedHtml(u32, u32),
UnclosedInclude,
Expand Down Expand Up @@ -734,6 +736,12 @@ impl Diagnostic {
stmt,
*math_end,
)]),
MissingSpaceAfterCommandToken(span) => ("Missing space after command token".into(), vec![(
Level::Error,
"A quoted command token must be followed by a space or a semicolon (;)".into(),
stmt,
*span,
)]),
MMReservedLabel(span) => ("Reserved label".into(), vec![(
Level::Warning,
"Labels beginning with 'mm' are reserved for Metamath file names".into(),
Expand Down Expand Up @@ -1017,6 +1025,12 @@ impl Diagnostic {
stmt,
*span,
)]),
UnclosedCommand(span) => ("Unclosed command".into(), vec![(
Level::Error,
"A command must be closed with a semicolon (;)".into(),
stmt,
*span,
)]),
UnclosedComment(comment) => ("Unclosed comment".into(), vec![(
Level::Error,
"Comment requires closing $) before end of file".into(),
Expand Down
18 changes: 14 additions & 4 deletions metamath-rs/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -801,8 +801,8 @@ fn get_heading_name(buffer: &[u8], pos: FilePos) -> TokenPtr<'_> {
}

/// Extract the parser commands out of a $t or $j special comment
fn commands(buffer: &[u8], ch: u8, pos: FilePos) -> Result<CommandIter<'_>, Diagnostic> {
let mut iter = CommandIter {
pub(crate) fn commands(buffer: &[u8], ch: u8, pos: FilePos) -> Result<CommandIter<'_>, Diagnostic> {
let mut iter: CommandIter<'_> = CommandIter {
buffer,
index: pos as usize,
};
Expand All @@ -815,7 +815,7 @@ fn commands(buffer: &[u8], ch: u8, pos: FilePos) -> Result<CommandIter<'_>, Diag
Ok(iter)
}

struct CommandIter<'a> {
pub(crate) struct CommandIter<'a> {
buffer: &'a [u8],
index: usize,
}
Expand Down Expand Up @@ -904,6 +904,12 @@ impl Iterator for CommandIter<'_> {
}
}
}
if self.has_more()
&& !matches!(self.next_char(), b' ' | b'\t' | b'\n' | b'\r' | b';')
{
let cspan = Span::new(token_start, self.index);
return Some(Err(Diagnostic::MissingSpaceAfterCommandToken(cspan)));
}
CommandToken::String(Span::new(token_start, self.index - 1))
} else {
let token_start = self.index;
Expand All @@ -920,7 +926,11 @@ impl Iterator for CommandIter<'_> {
if let Err(diag) = self.skip_white_spaces() {
return Some(Err(diag));
}
if self.has_more() && self.next_char() == b';' {
if !self.has_more() {
let cspan = Span::new(command_start, self.index);
return Some(Err(Diagnostic::UnclosedCommand(cspan)));
}
if self.next_char() == b';' {
// Stop if unquoted semicolon $)
self.index += 1;
return Some(Ok((Span::new(command_start, self.index), command)));
Expand Down
182 changes: 181 additions & 1 deletion metamath-rs/src/parser_tests.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::database::Database;
use crate::diag::Diagnostic;
use crate::parser::commands;
use crate::segment::{Comparer, SegmentOrder};
use crate::statement::{Span, StatementAddress, TokenRef, NO_STATEMENT};
use crate::StatementType;
use crate::{as_str, StatementType};
use std::cmp::Ordering;

#[test]
Expand Down Expand Up @@ -150,3 +151,182 @@ parse_test!(
b"$c X Y\x7F $.",
[(0, Diagnostic::BadCharacter(6, 0x7F))]
);

parse_test!(
test_j_valid,
b"$( $j usage 'exel' avoids 'ax-5' 'ax-7' 'ax-8' 'ax-9' 'ax-10' 'ax-11'
'ax-12' 'ax-13' 'ax-ext' 'ax-sep' 'ax-nul' 'ax-pow'; $)",
[]
);

parse_test!(
test_j_missing_closing_quote,
b"$( $j usage 'exel $)",
[(
0,
Diagnostic::UnclosedCommandString(Span { start: 13, end: 20 })
)]
);

parse_test!(
test_j_unclosed_comment,
b"$( $j usage /* $)",
[(
0,
Diagnostic::UnclosedCommandComment(Span { start: 12, end: 17 })
)]
);

parse_test!(
test_j_missing_whitespace,
b"$( $j usage 'exel' avoids 'ax-5' 'ax-7' 'ax-8' 'ax-9' 'ax-10' 'ax-11'
'ax-12' 'ax-13''ax-ext' 'ax-sep' 'ax-nul' 'ax-pow'; $)",
[]
);

macro_rules! parse_command_test {
($name:ident, $text:expr, $commands:expr) => {
#[test]
fn $name() {
let commands = commands($text, b'j', 0);
let parsed = commands.and_then(|iter| {
Ok(iter
.map(|command| {
command.and_then(|(span, tokens)| {
Ok((
span,
tokens
.into_iter()
.map(|token| as_str(&token.value($text)).to_owned())
.collect::<Vec<_>>(),
))
})
})
.collect::<Vec<_>>())
});
let expected = Ok($commands
.into_iter()
.map(|item| {
item.and_then(|(start, end, tokens)| {
Ok((
Span::new(start, end),
tokens
.into_iter()
.map(|token| token.to_string())
.collect::<Vec<_>>(),
))
})
})
.collect::<Vec<_>>());
assert_eq!(parsed, expected);
}
};
}

parse_command_test!(
test_command_valid,
b"$( $j usage 'exel' avoids 'ax-5' 'ax-7' 'ax-8' 'ax-9' 'ax-10' 'ax-11'
'ax-12' 'ax-13' 'ax-ext' 'ax-sep' 'ax-nul' 'ax-pow'; $)",
[Ok((
6,
129,
[
"usage", "exel", "avoids", "ax-5", "ax-7", "ax-8", "ax-9", "ax-10", "ax-11", "ax-12",
"ax-13", "ax-ext", "ax-sep", "ax-nul", "ax-pow"
]
))]
);

parse_command_test!(
test_command_comment,
b"$( $j usage /* comment, may contain ; * / and ' - ignored */ 'exel' avoids 'ax-5'; $)",
[Ok((6, 82, ["usage", "exel", "avoids", "ax-5"]))]
);

parse_command_test!(
test_command_stop_at_end_tag,
b"$( $j usage 'exel' avoids 'ax-5'; $) Nothing is parsed here",
[Ok((6, 33, ["usage", "exel", "avoids", "ax-5"]))]
);

type NoCommandType = [Result<(usize, usize, [&'static str; 0]), Diagnostic>; 0];
const NO_COMMANDS: NoCommandType = [];
parse_command_test!(test_command_empty_list, b"$( $j $)", NO_COMMANDS);

parse_command_test!(
test_command_more_commands,
b"$( $j
syntax 'wff';
syntax '|-' as 'wff';
unambiguous 'klr 5';
$)",
[
Ok((18, 31, vec!["syntax", "wff"])),
Ok((44, 65, vec!["syntax", "|-", "as", "wff"])),
Ok((78, 98, vec!["unambiguous", "klr 5"]))
]
);

parse_command_test!(
test_command_missing_semicolon,
b"$( $j usage 'exel' avoids 'ax-5' $)",
[Err::<(usize, usize, Vec<&str>), Diagnostic>(
Diagnostic::UnclosedCommand(Span::new(6, 35))
)]
);

parse_command_test!(
test_command_double_quote_escape,
b"$( $j abc \"de\"\"f\" 'ghi'; $)",
[Ok((6, 24, ["abc", "de\"f", "ghi"]))]
);

parse_command_test!(
test_command_unclosed_comment,
b"$( $j usage /* unclosed comment",
[Err::<(usize, usize, Vec<&str>), Diagnostic>(
Diagnostic::UnclosedCommandComment(Span::new(12, 31))
)]
);

parse_command_test!(
test_command_missing_whitespace,
b"$( $j usage 'exel' avoids 'ax-5' 'ax-7' 'ax-8' 'ax-9' 'ax-10' 'ax-11'
'ax-12' 'ax-13''ax-ext' 'ax-sep' 'ax-nul' 'ax-pow'; $)",
[Ok((
6,
128,
[
"usage",
"exel",
"avoids",
"ax-5",
"ax-7",
"ax-8",
"ax-9",
"ax-10",
"ax-11",
"ax-12",
"ax-13'ax-ext",
"ax-sep",
"ax-nul",
"ax-pow"
]
))]
);

parse_command_test!(
test_command_missing_closing_quote,
b"$( $j usage 'exel' avoids 'ax-5' 'ax-7' 'ax-8' 'ax-9' 'ax-10 'ax-11'
'ax-12' 'ax-13' 'ax-ext' 'ax-sep' 'ax-nul' 'ax-pow'; $)",
[
Err::<(usize, usize, Vec<&str>), Diagnostic>(Diagnostic::MissingSpaceAfterCommandToken(
Span::new(55, 62)
)),
Ok((
62,
128,
vec!["ax-11'", "ax-12", "ax-13", "ax-ext", "ax-sep", "ax-nul", "ax-pow"]
))
]
);
2 changes: 1 addition & 1 deletion metamath-rs/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ impl CommandToken {
let buf = span.as_ref(buf);
if buf.contains(&quote) {
let mut out = vec![];
unescape(span.as_ref(buf), &mut out, |c| quote == c);
unescape(buf, &mut out, |c| quote == c);
out.into()
} else {
Cow::Borrowed(buf)
Expand Down

0 comments on commit 82c9582

Please sign in to comment.