Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for #174, more parser checks and more parser tests #175

Merged
merged 2 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions metamath-rs/src/diag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ pub enum Diagnostic {
MissingLabel,
MissingMarkupDef([bool; 3], Span),
MissingProof(Span),
MissingSpaceAfterCommandToken(Span),
MMReservedLabel(Span),
NestedComment(Span, Span),
NotActiveSymbol(TokenIndex),
Expand Down Expand Up @@ -183,6 +184,7 @@ pub enum Diagnostic {
UnclosedBeforeInclude(StatementIndex),
UnclosedCommandComment(Span),
UnclosedCommandString(Span),
UnclosedCommand(Span),
UnclosedComment(Span),
UnclosedHtml(u32, u32),
UnclosedInclude,
Expand Down Expand Up @@ -734,6 +736,12 @@ impl Diagnostic {
stmt,
*math_end,
)]),
MissingSpaceAfterCommandToken(span) => ("Missing space after command token".into(), vec![(
Level::Error,
"A quoted command token must be followed by a space or a semicolon (;)".into(),
stmt,
*span,
)]),
MMReservedLabel(span) => ("Reserved label".into(), vec![(
Level::Warning,
"Labels beginning with 'mm' are reserved for Metamath file names".into(),
Expand Down Expand Up @@ -1017,6 +1025,12 @@ impl Diagnostic {
stmt,
*span,
)]),
UnclosedCommand(span) => ("Unclosed command".into(), vec![(
Level::Error,
"A command must be closed with a semicolon (;)".into(),
stmt,
*span,
)]),
UnclosedComment(comment) => ("Unclosed comment".into(), vec![(
Level::Error,
"Comment requires closing $) before end of file".into(),
Expand Down
18 changes: 14 additions & 4 deletions metamath-rs/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -801,8 +801,8 @@ fn get_heading_name(buffer: &[u8], pos: FilePos) -> TokenPtr<'_> {
}

/// Extract the parser commands out of a $t or $j special comment
fn commands(buffer: &[u8], ch: u8, pos: FilePos) -> Result<CommandIter<'_>, Diagnostic> {
let mut iter = CommandIter {
pub(crate) fn commands(buffer: &[u8], ch: u8, pos: FilePos) -> Result<CommandIter<'_>, Diagnostic> {
let mut iter: CommandIter<'_> = CommandIter {
buffer,
index: pos as usize,
};
Expand All @@ -815,7 +815,7 @@ fn commands(buffer: &[u8], ch: u8, pos: FilePos) -> Result<CommandIter<'_>, Diag
Ok(iter)
}

struct CommandIter<'a> {
pub(crate) struct CommandIter<'a> {
buffer: &'a [u8],
index: usize,
}
Expand Down Expand Up @@ -904,6 +904,12 @@ impl Iterator for CommandIter<'_> {
}
}
}
if self.has_more()
&& !matches!(self.next_char(), b' ' | b'\t' | b'\n' | b'\r' | b';')
{
let cspan = Span::new(token_start, self.index);
return Some(Err(Diagnostic::MissingSpaceAfterCommandToken(cspan)));
}
CommandToken::String(Span::new(token_start, self.index - 1))
} else {
let token_start = self.index;
Expand All @@ -920,7 +926,11 @@ impl Iterator for CommandIter<'_> {
if let Err(diag) = self.skip_white_spaces() {
return Some(Err(diag));
}
if self.has_more() && self.next_char() == b';' {
if !self.has_more() {
let cspan = Span::new(command_start, self.index);
return Some(Err(Diagnostic::UnclosedCommand(cspan)));
}
if self.next_char() == b';' {
// Stop if unquoted semicolon $)
self.index += 1;
return Some(Ok((Span::new(command_start, self.index), command)));
Expand Down
182 changes: 181 additions & 1 deletion metamath-rs/src/parser_tests.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::database::Database;
use crate::diag::Diagnostic;
use crate::parser::commands;
use crate::segment::{Comparer, SegmentOrder};
use crate::statement::{Span, StatementAddress, TokenRef, NO_STATEMENT};
use crate::StatementType;
use crate::{as_str, StatementType};
use std::cmp::Ordering;

#[test]
Expand Down Expand Up @@ -150,3 +151,182 @@ parse_test!(
b"$c X Y\x7F $.",
[(0, Diagnostic::BadCharacter(6, 0x7F))]
);

parse_test!(
test_j_valid,
b"$( $j usage 'exel' avoids 'ax-5' 'ax-7' 'ax-8' 'ax-9' 'ax-10' 'ax-11'
'ax-12' 'ax-13' 'ax-ext' 'ax-sep' 'ax-nul' 'ax-pow'; $)",
[]
);

parse_test!(
test_j_missing_closing_quote,
b"$( $j usage 'exel $)",
[(
0,
Diagnostic::UnclosedCommandString(Span { start: 13, end: 20 })
)]
);

parse_test!(
test_j_unclosed_comment,
b"$( $j usage /* $)",
[(
0,
Diagnostic::UnclosedCommandComment(Span { start: 12, end: 17 })
)]
);

parse_test!(
test_j_missing_whitespace,
b"$( $j usage 'exel' avoids 'ax-5' 'ax-7' 'ax-8' 'ax-9' 'ax-10' 'ax-11'
'ax-12' 'ax-13''ax-ext' 'ax-sep' 'ax-nul' 'ax-pow'; $)",
[]
);

macro_rules! parse_command_test {
($name:ident, $text:expr, $commands:expr) => {
#[test]
fn $name() {
let commands = commands($text, b'j', 0);
let parsed = commands.and_then(|iter| {
Ok(iter
.map(|command| {
command.and_then(|(span, tokens)| {
Ok((
span,
tokens
.into_iter()
.map(|token| as_str(&token.value($text)).to_owned())
.collect::<Vec<_>>(),
))
})
})
.collect::<Vec<_>>())
});
let expected = Ok($commands
.into_iter()
.map(|item| {
item.and_then(|(start, end, tokens)| {
Ok((
Span::new(start, end),
tokens
.into_iter()
.map(|token| token.to_string())
.collect::<Vec<_>>(),
))
})
})
.collect::<Vec<_>>());
assert_eq!(parsed, expected);
}
};
}

parse_command_test!(
test_command_valid,
b"$( $j usage 'exel' avoids 'ax-5' 'ax-7' 'ax-8' 'ax-9' 'ax-10' 'ax-11'
'ax-12' 'ax-13' 'ax-ext' 'ax-sep' 'ax-nul' 'ax-pow'; $)",
[Ok((
6,
129,
[
"usage", "exel", "avoids", "ax-5", "ax-7", "ax-8", "ax-9", "ax-10", "ax-11", "ax-12",
"ax-13", "ax-ext", "ax-sep", "ax-nul", "ax-pow"
]
))]
);

parse_command_test!(
test_command_comment,
b"$( $j usage /* comment, may contain ; * / and ' - ignored */ 'exel' avoids 'ax-5'; $)",
[Ok((6, 82, ["usage", "exel", "avoids", "ax-5"]))]
);

parse_command_test!(
test_command_stop_at_end_tag,
b"$( $j usage 'exel' avoids 'ax-5'; $) Nothing is parsed here",
[Ok((6, 33, ["usage", "exel", "avoids", "ax-5"]))]
);

type NoCommandType = [Result<(usize, usize, [&'static str; 0]), Diagnostic>; 0];
const NO_COMMANDS: NoCommandType = [];
parse_command_test!(test_command_empty_list, b"$( $j $)", NO_COMMANDS);

parse_command_test!(
test_command_more_commands,
b"$( $j
syntax 'wff';
syntax '|-' as 'wff';
unambiguous 'klr 5';
$)",
[
Ok((18, 31, vec!["syntax", "wff"])),
Ok((44, 65, vec!["syntax", "|-", "as", "wff"])),
Ok((78, 98, vec!["unambiguous", "klr 5"]))
]
);

parse_command_test!(
test_command_missing_semicolon,
b"$( $j usage 'exel' avoids 'ax-5' $)",
[Err::<(usize, usize, Vec<&str>), Diagnostic>(
Diagnostic::UnclosedCommand(Span::new(6, 35))
)]
);

parse_command_test!(
test_command_double_quote_escape,
b"$( $j abc \"de\"\"f\" 'ghi'; $)",
[Ok((6, 24, ["abc", "de\"f", "ghi"]))]
);

parse_command_test!(
test_command_unclosed_comment,
b"$( $j usage /* unclosed comment",
[Err::<(usize, usize, Vec<&str>), Diagnostic>(
Diagnostic::UnclosedCommandComment(Span::new(12, 31))
)]
);

parse_command_test!(
test_command_missing_whitespace,
b"$( $j usage 'exel' avoids 'ax-5' 'ax-7' 'ax-8' 'ax-9' 'ax-10' 'ax-11'
'ax-12' 'ax-13''ax-ext' 'ax-sep' 'ax-nul' 'ax-pow'; $)",
[Ok((
6,
128,
[
"usage",
"exel",
"avoids",
"ax-5",
"ax-7",
"ax-8",
"ax-9",
"ax-10",
"ax-11",
"ax-12",
"ax-13'ax-ext",
"ax-sep",
"ax-nul",
"ax-pow"
]
))]
);

parse_command_test!(
test_command_missing_closing_quote,
b"$( $j usage 'exel' avoids 'ax-5' 'ax-7' 'ax-8' 'ax-9' 'ax-10 'ax-11'
'ax-12' 'ax-13' 'ax-ext' 'ax-sep' 'ax-nul' 'ax-pow'; $)",
[
Err::<(usize, usize, Vec<&str>), Diagnostic>(Diagnostic::MissingSpaceAfterCommandToken(
Span::new(55, 62)
)),
Ok((
62,
128,
vec!["ax-11'", "ax-12", "ax-13", "ax-ext", "ax-sep", "ax-nul", "ax-pow"]
))
]
);
2 changes: 1 addition & 1 deletion metamath-rs/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ impl CommandToken {
let buf = span.as_ref(buf);
if buf.contains(&quote) {
let mut out = vec![];
unescape(span.as_ref(buf), &mut out, |c| quote == c);
unescape(buf, &mut out, |c| quote == c);
out.into()
} else {
Cow::Borrowed(buf)
Expand Down
Loading