From 3910b6051793c3f755aae65c446f2bd4f9042d9c Mon Sep 17 00:00:00 2001 From: Alex Pozhylenkov Date: Tue, 26 Nov 2024 12:34:46 +0200 Subject: [PATCH] feat(rust/cbork): Add a CDDL preprocessing step (#80) * move parsers into the separate module * add validate_cddl pub function * add processor module * rename `rule` to `expr` and some simple expression processing functions * wip * wip * refactor Ast * wip * remove unused deps * move CDDLTestParser to the tests::common mod * refactor character_sets tests * cleanup tests/identifiers.rs * refactor tests/rules.rs * refactor tests/type_declaration * rename rule_TEST to expr_TEST --- rust/cbork-cddl-parser/Cargo.toml | 3 +- .../src/grammar/cddl_test.pest | 4 +- .../src/grammar/rfc_8610.pest | 4 +- rust/cbork-cddl-parser/src/lib.rs | 161 ++-------------- rust/cbork-cddl-parser/src/parser.rs | 88 +++++++++ rust/cbork-cddl-parser/src/preprocessor.rs | 44 +++++ .../cbork-cddl-parser/tests/byte_sequences.rs | 4 +- rust/cbork-cddl-parser/tests/cddl.rs | 6 +- .../cbork-cddl-parser/tests/character_sets.rs | 177 ++++++------------ rust/cbork-cddl-parser/tests/comments.rs | 4 +- rust/cbork-cddl-parser/tests/common/mod.rs | 21 ++- .../cbork-cddl-parser/tests/group_elements.rs | 4 +- rust/cbork-cddl-parser/tests/identifiers.rs | 52 ++--- .../cbork-cddl-parser/tests/literal_values.rs | 4 +- rust/cbork-cddl-parser/tests/rules.rs | 80 ++++---- .../cbork-cddl-parser/tests/text_sequences.rs | 4 +- .../tests/type_declarations.rs | 105 ++++++----- 17 files changed, 370 insertions(+), 395 deletions(-) create mode 100644 rust/cbork-cddl-parser/src/parser.rs create mode 100644 rust/cbork-cddl-parser/src/preprocessor.rs diff --git a/rust/cbork-cddl-parser/Cargo.toml b/rust/cbork-cddl-parser/Cargo.toml index b2c7e307a..312bab140 100644 --- a/rust/cbork-cddl-parser/Cargo.toml +++ b/rust/cbork-cddl-parser/Cargo.toml @@ -15,7 +15,6 @@ repository.workspace = true workspace = true [dependencies] -derive_more = {version = "1.0.0", features = ["from","display"] } pest = { version = "2.7.13", features = ["std", "pretty-print", "memchr", "const_prec_climber"] } pest_derive = { version = "2.7.13", features = ["grammar-extras"] } -thiserror = "1.0.64" +anyhow = "1.0.89" diff --git a/rust/cbork-cddl-parser/src/grammar/cddl_test.pest b/rust/cbork-cddl-parser/src/grammar/cddl_test.pest index 4eb04bda5..68d35a052 100644 --- a/rust/cbork-cddl-parser/src/grammar/cddl_test.pest +++ b/rust/cbork-cddl-parser/src/grammar/cddl_test.pest @@ -7,8 +7,8 @@ // cspell: words assigng genericparm genericarg rangeop ctlop // cspell: words grpchoice grpent memberkey bareword optcom -/// Test Expression for the `rule` Rule. -rule_TEST = ${ SOI ~ rule ~ EOI } +/// Test Expression for the `expr` Rule. +expr_TEST = ${ SOI ~ expr ~ EOI } /// Test Expression for the `typename` Rule. typename_TEST = ${ SOI ~ typename ~ EOI } diff --git a/rust/cbork-cddl-parser/src/grammar/rfc_8610.pest b/rust/cbork-cddl-parser/src/grammar/rfc_8610.pest index 4a609ca51..f33f4a9c4 100644 --- a/rust/cbork-cddl-parser/src/grammar/rfc_8610.pest +++ b/rust/cbork-cddl-parser/src/grammar/rfc_8610.pest @@ -7,13 +7,13 @@ cddl = ${ SOI - ~ S ~ (rule ~ S)+ + ~ S ~ (expr ~ S)+ ~ EOI } // ----------------------------------------------------------------------------- // Rules -rule = ${ +expr = ${ (typename ~ genericparm? ~ S ~ assignt ~ S ~ type) | (groupname ~ genericparm? ~ S ~ assigng ~ S ~ grpent) } diff --git a/rust/cbork-cddl-parser/src/lib.rs b/rust/cbork-cddl-parser/src/lib.rs index af8b4ab29..8d8d163c2 100644 --- a/rust/cbork-cddl-parser/src/lib.rs +++ b/rust/cbork-cddl-parser/src/lib.rs @@ -1,160 +1,27 @@ //! A parser for CDDL, utilized for parsing in accordance with RFC 8610. -#![allow(missing_docs)] // TODO(apskhem): Temporary, to bo removed in a subsequent PR +mod parser; +mod preprocessor; -use derive_more::{Display, From}; -pub use pest::Parser; -use pest::{error::Error, iterators::Pairs}; - -pub mod rfc_8610 { - pub use pest::Parser; - - #[derive(pest_derive::Parser)] - #[grammar = "grammar/rfc_8610.pest"] - pub struct RFC8610Parser; -} - -pub mod rfc_9165 { - pub use pest::Parser; - - #[derive(pest_derive::Parser)] - #[grammar = "grammar/rfc_8610.pest"] - #[grammar = "grammar/rfc_9165.pest"] - pub struct RFC8610Parser; -} - -pub mod cddl { - pub use pest::Parser; - - #[derive(pest_derive::Parser)] - #[grammar = "grammar/rfc_8610.pest"] - #[grammar = "grammar/rfc_9165.pest"] - #[grammar = "grammar/cddl_modules.pest"] - pub struct RFC8610Parser; -} - -pub mod cddl_test { - pub use pest::Parser; - - // Parser with DEBUG rules. These rules are only used in tests. - #[derive(pest_derive::Parser)] - #[grammar = "grammar/rfc_8610.pest"] - #[grammar = "grammar/rfc_9165.pest"] - #[grammar = "grammar/cddl_modules.pest"] - #[grammar = "grammar/cddl_test.pest"] // Ideally this would only be used in tests. - pub struct CDDLTestParser; -} - -/// Represents different parser extensions for handling CDDL specifications. +/// Represents different grammar extensions for handling CDDL specifications. pub enum Extension { - /// RFC8610 ONLY limited parser. - RFC8610Parser, - /// RFC8610 and RFC9165 limited parser. - RFC9165Parser, - /// RFC8610, RFC9165, and CDDL modules. - CDDLParser, -} - -// CDDL Standard Postlude - read from an external file -pub const POSTLUDE: &str = include_str!("grammar/postlude.cddl"); - -/// Abstract Syntax Tree (AST) representing parsed CDDL syntax. -// TODO: this is temporary. need to add more pragmatic nodes -#[derive(Debug)] -pub enum AST<'a> { - /// Represents the AST for RFC 8610 CDDL rules. - RFC8610(Pairs<'a, rfc_8610::Rule>), - /// Represents the AST for RFC 9165 CDDL rules. - RFC9165(Pairs<'a, rfc_9165::Rule>), - /// Represents the AST for CDDL Modules rules. - CDDL(Pairs<'a, cddl::Rule>), + /// RFC8610 ONLY limited grammar. + RFC8610, + /// RFC8610 and RFC9165 limited grammar. + RFC9165, + /// RFC8610, RFC9165, and CDDL grammar. + CDDL, } -/// Represents different types of errors related to different types of extension. -#[derive(Display, Debug)] -pub enum CDDLErrorType { - /// An error related to RFC 8610 extension. - RFC8610(Error), - /// An error related to RFC 9165 extension. - RFC9165(Error), - /// An error related to CDDL modules extension. - CDDL(Error), -} - -/// Represents an error that may occur during CDDL parsing. -#[derive(thiserror::Error, Debug, From)] -#[error("{0}")] -pub struct CDDLError(CDDLErrorType); - -/// Parses and checks semantically a CDDL input string. -/// -/// # Arguments -/// -/// * `input` - A string containing the CDDL input to be parsed. -/// -/// # Returns -/// -/// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing -/// a boxed `CDDLError` indicating the parsing error. +/// Verifies semantically a CDDL input string. /// /// # Errors /// /// This function may return an error in the following cases: /// /// - If there is an issue with parsing the CDDL input. -/// -/// # Examples -/// -/// ```rs -/// use cbork_cddl_parser::{parse_cddl, Extension}; -/// use std:fs; -/// -/// let mut input = fs::read_to_string("path/to/your/file.cddl").unwrap(); -/// let result = parse_cddl(&mut input, &Extension::CDDLParser); -/// assert!(result.is_ok()); -/// ``` -pub fn parse_cddl<'a>( - input: &'a mut String, extension: &Extension, -) -> Result, Box> { - input.push_str("\n\n"); - input.push_str(POSTLUDE); - - let result = match extension { - Extension::RFC8610Parser => { - rfc_8610::RFC8610Parser::parse(rfc_8610::Rule::cddl, input) - .map(AST::RFC8610) - .map_err(CDDLErrorType::RFC8610) - }, - Extension::RFC9165Parser => { - rfc_9165::RFC8610Parser::parse(rfc_9165::Rule::cddl, input) - .map(AST::RFC9165) - .map_err(CDDLErrorType::RFC9165) - }, - Extension::CDDLParser => { - cddl::RFC8610Parser::parse(cddl::Rule::cddl, input) - .map(AST::CDDL) - .map_err(CDDLErrorType::CDDL) - }, - }; - - result.map_err(|e| Box::new(CDDLError::from(e))) -} - -#[cfg(test)] -mod tests { - use crate::*; - - #[test] - fn it_works() { - let mut input = String::new(); - let result = parse_cddl(&mut input, &Extension::CDDLParser); - - match result { - Ok(c) => println!("{c:?}"), - Err(e) => { - println!("{e:?}"); - println!("{e}"); - }, - } - } +pub fn validate_cddl(input: &mut String, extension: &Extension) -> anyhow::Result<()> { + let ast = parser::parse_cddl(input, extension)?; + let _ast = preprocessor::process_ast(ast)?; + Ok(()) } diff --git a/rust/cbork-cddl-parser/src/parser.rs b/rust/cbork-cddl-parser/src/parser.rs new file mode 100644 index 000000000..ae4b16ed2 --- /dev/null +++ b/rust/cbork-cddl-parser/src/parser.rs @@ -0,0 +1,88 @@ +//! A parser for CDDL using the [pest](https://github.com/pest-parser/pest). +//! Utilized for parsing in accordance with RFC-8610, RFC-9165. + +use pest::{iterators::Pair, Parser}; + +use crate::Extension; + +/// RFC-8610 parser. +#[allow(missing_docs)] +pub(crate) mod rfc_8610 { + /// A Pest parser for RFC-8610. + #[derive(pest_derive::Parser)] + #[grammar = "grammar/rfc_8610.pest"] + pub(crate) struct Parser; +} + +/// RFC-9165 parser. +#[allow(missing_docs)] +pub(crate) mod rfc_9165 { + /// A Pest parser for RFC-9165. + #[derive(pest_derive::Parser)] + #[grammar = "grammar/rfc_8610.pest"] + #[grammar = "grammar/rfc_9165.pest"] + pub(crate) struct Parser; +} + +/// Full CDDL syntax parser. +#[allow(missing_docs)] +pub(crate) mod cddl { + /// A Pest parser for a full CDDL syntax. + #[derive(pest_derive::Parser)] + #[grammar = "grammar/rfc_8610.pest"] + #[grammar = "grammar/rfc_9165.pest"] + #[grammar = "grammar/cddl_modules.pest"] + pub(crate) struct Parser; +} + +/// CDDL Standard Postlude - read from an external file +const POSTLUDE: &str = include_str!("grammar/postlude.cddl"); + +/// PEST Abstract Syntax Tree (AST) representing parsed CDDL syntax. +#[derive(Debug)] +pub(crate) enum Ast<'a> { + /// Represents the AST for RFC-8610 CDDL rules. + Rfc8610(Vec>), + /// Represents the AST for RFC-9165 CDDL rules. + Rfc9165(Vec>), + /// Represents the AST for CDDL Modules rules. + Cddl(Vec>), +} + +/// Parses and checks semantically a CDDL input string. +/// +/// # Arguments +/// +/// * `input` - A string containing the CDDL input to be parsed. +/// +/// # Returns +/// +/// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing +/// a boxed `CDDLError` indicating the parsing error. +/// +/// # Errors +/// +/// This function may return an error in the following cases: +/// +/// - If there is an issue with parsing the CDDL input. +pub(crate) fn parse_cddl<'a>( + input: &'a mut String, extension: &Extension, +) -> anyhow::Result> { + input.push_str("\n\n"); + input.push_str(POSTLUDE); + + let ast = match extension { + Extension::RFC8610 => { + rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input) + .map(|p| Ast::Rfc8610(p.collect()))? + }, + Extension::RFC9165 => { + rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input) + .map(|p| Ast::Rfc9165(p.collect()))? + }, + Extension::CDDL => { + cddl::Parser::parse(cddl::Rule::cddl, input).map(|p| Ast::Cddl(p.collect()))? + }, + }; + Ok(ast) +} diff --git a/rust/cbork-cddl-parser/src/preprocessor.rs b/rust/cbork-cddl-parser/src/preprocessor.rs new file mode 100644 index 000000000..b20cc849d --- /dev/null +++ b/rust/cbork-cddl-parser/src/preprocessor.rs @@ -0,0 +1,44 @@ +//! A CDDL AST preprocessor. +//! +//! - Validates the root rule of the AST to be a `cddl` rule. +//! - Filters out all rules that are not `expr` rules. +//! - (TODO) Resolve #include and #import directives, by just adding the imported rules +//! into the final expression list + +use anyhow::{anyhow, ensure}; +use pest::{iterators::Pair, RuleType}; + +use crate::parser::{cddl, rfc_8610, rfc_9165, Ast}; + +/// Processes the AST. +pub(crate) fn process_ast(ast: Ast) -> anyhow::Result { + match ast { + Ast::Rfc8610(ast) => { + process_root_and_filter(ast, rfc_8610::Rule::cddl, rfc_8610::Rule::expr) + .map(Ast::Rfc8610) + }, + Ast::Rfc9165(ast) => { + process_root_and_filter(ast, rfc_9165::Rule::cddl, rfc_9165::Rule::expr) + .map(Ast::Rfc9165) + }, + Ast::Cddl(ast) => { + process_root_and_filter(ast, cddl::Rule::cddl, cddl::Rule::expr).map(Ast::Cddl) + }, + } +} + +/// Process the root rule of the AST and filter out all non `expected_rule` rules. +fn process_root_and_filter( + ast: Vec>, root_rule: R, expected_rule: R, +) -> anyhow::Result>> { + let mut ast_iter = ast.into_iter(); + let ast_root = ast_iter.next().ok_or(anyhow!("Empty AST."))?; + ensure!( + ast_root.as_rule() == root_rule && ast_iter.next().is_none(), + "AST must have only one root rule, which must be a `{root_rule:?}` rule." + ); + Ok(ast_root + .into_inner() + .filter(|pair| pair.as_rule() == expected_rule) + .collect()) +} diff --git a/rust/cbork-cddl-parser/tests/byte_sequences.rs b/rust/cbork-cddl-parser/tests/byte_sequences.rs index 6f5ea0215..9686e1f24 100644 --- a/rust/cbork-cddl-parser/tests/byte_sequences.rs +++ b/rust/cbork-cddl-parser/tests/byte_sequences.rs @@ -1,9 +1,7 @@ // cspell: words hexpair rstuvw abcdefghijklmnopqrstuvwyz rstuvw Xhhb Bhcm -use cbork_cddl_parser::cddl_test::Rule; - mod common; -use common::byte_sequences::*; +use common::{byte_sequences::*, Rule}; #[test] /// Test if the `HEX_PAIR` rule passes properly. diff --git a/rust/cbork-cddl-parser/tests/cddl.rs b/rust/cbork-cddl-parser/tests/cddl.rs index 9adc9384d..0c7f63ca0 100644 --- a/rust/cbork-cddl-parser/tests/cddl.rs +++ b/rust/cbork-cddl-parser/tests/cddl.rs @@ -1,6 +1,6 @@ use std::{ffi::OsStr, fs, io::Result}; -use cbork_cddl_parser::{parse_cddl, Extension}; +use cbork_cddl_parser::{validate_cddl, Extension}; #[test] /// # Panics @@ -32,7 +32,7 @@ fn parse_cddl_files() { for file_path in valid_file_paths { let mut content = fs::read_to_string(file_path).unwrap(); - if let Err(e) = parse_cddl(&mut content, &Extension::CDDLParser) { + if let Err(e) = validate_cddl(&mut content, &Extension::CDDL) { err_messages.push(format!("{}) {file_path:?} {e}", err_messages.len() + 1)); } } @@ -41,7 +41,7 @@ fn parse_cddl_files() { for file_path in invalid_file_paths { let mut content = fs::read_to_string(file_path).unwrap(); - let result = parse_cddl(&mut content, &Extension::CDDLParser); + let result = validate_cddl(&mut content, &Extension::CDDL); assert!(result.is_err(), "{:?} is expected to fail", &file_path); } diff --git a/rust/cbork-cddl-parser/tests/character_sets.rs b/rust/cbork-cddl-parser/tests/character_sets.rs index 46a3e2424..3815cc55f 100644 --- a/rust/cbork-cddl-parser/tests/character_sets.rs +++ b/rust/cbork-cddl-parser/tests/character_sets.rs @@ -1,150 +1,95 @@ // cspell: words PCHAR pchar BCHAR bchar SESC sesc SCHAR schar fffd fffe -use cbork_cddl_parser::{ - self, - cddl_test::{CDDLTestParser, Parser, Rule}, -}; +mod common; +use common::Rule; -#[test] /// Test if the `WHITESPACE` rule passes properly. +#[test] fn check_whitespace() { - let whitespace = vec![" ", "\t", "\r", "\n", "\r\n"]; - - let not_whitespace = "not"; - - for ws in whitespace { - let parse = CDDLTestParser::parse(Rule::WHITESPACE, ws); - assert!(parse.is_ok()); - } - - let parse = CDDLTestParser::parse(Rule::WHITESPACE, not_whitespace); - assert!(parse.is_err()); + common::check_tests_rule(Rule::WHITESPACE, &[" ", "\t", "\r", "\n", "\r\n"], &["not"]); } -#[test] /// Test if the `PCHAR` rule passes properly. +#[test] fn check_pchar() { - for x in ('\u{0}'..='\u{ff}').map(char::from) { - let test = format!("{x}"); - let parse = CDDLTestParser::parse(Rule::PCHAR, &test); - if x < ' ' || x == '\u{7f}' { - assert!(parse.is_err()); - } else { - assert!(parse.is_ok()); - } - } - - let parse = CDDLTestParser::parse(Rule::ASCII_VISIBLE, "\r"); - assert!(parse.is_err()); + let passes = ('\u{0}'..='\u{ff}') + .filter(|x| x >= &' ' && x != &'\u{7f}') + .map(String::from) + .collect::>(); + let fails = ('\u{0}'..='\u{ff}') + .filter(|x| x < &' ' || x == &'\u{7f}') + .map(String::from) + .collect::>(); + common::check_tests_rule(Rule::PCHAR, &passes, &fails); } -#[test] /// Test if the `BCHAR` rule passes properly. +#[test] fn check_bchar() { - for x in ('\u{0}'..='\u{ff}').map(char::from) { - let test = format!("{x}"); - let parse = CDDLTestParser::parse(Rule::BCHAR, &test); - if !matches!(x, '\n' | '\r') && x < ' ' || matches!(x, '\t' | '\'' | '\\' | '\u{7f}') { - assert!(parse.is_err()); - } else { - assert!(parse.is_ok()); - } - } - - let parse = CDDLTestParser::parse(Rule::ASCII_VISIBLE, "\r"); - assert!(parse.is_err()); + let passes = ('\u{0}'..='\u{ff}') + .filter(|x| { + (x >= &' ' && !matches!(x, '\t' | '\'' | '\\' | '\u{7f}')) || matches!(x, '\n' | '\r') + }) + .map(String::from) + .collect::>(); + + let fails = ('\u{0}'..='\u{ff}') + .filter(|x| { + x < &' ' && !matches!(x, '\n' | '\r') || matches!(x, '\t' | '\'' | '\\' | '\u{7f}') + }) + .map(String::from) + .collect::>(); + + common::check_tests_rule(Rule::BCHAR, &passes, &fails); } -#[test] /// Test if the `SESC` rule passes properly. +#[test] fn check_sesc() { - for x in (' '..='\u{ff}').map(char::from) { - let test = format!("\\{x}"); - let parse = CDDLTestParser::parse(Rule::SESC, &test); - if x == '\u{7f}' { - assert!(parse.is_err()); - } else { - assert!(parse.is_ok()); - } - } - - let parse = CDDLTestParser::parse(Rule::ASCII_VISIBLE, "\r"); - assert!(parse.is_err()); + let passes = (' '..='\u{ff}') + .filter(|x| x != &'\u{7f}') + .map(|x| format!("\\{x}")) + .collect::>(); + common::check_tests_rule(Rule::SESC, &passes, &["\u{7f}"]); } -#[test] /// Test if the `ASCII_VISIBLE` rule passes properly. +#[test] fn check_ascii_visible() { - for x in (b' '..=b'~').map(char::from) { - let test = x.to_string(); - let parse = CDDLTestParser::parse(Rule::ASCII_VISIBLE, &test); - assert!(parse.is_ok()); - } - - let parse = CDDLTestParser::parse(Rule::ASCII_VISIBLE, "\r"); - assert!(parse.is_err()); - - let parse = CDDLTestParser::parse(Rule::ASCII_VISIBLE, "\u{80}"); - assert!(parse.is_err()); + let passes = (' '..='~').map(String::from).collect::>(); + common::check_tests_rule(Rule::ASCII_VISIBLE, &passes, &["\r", "\u{80}"]); } -#[test] /// Test if the `SCHAR_ASCII_VISIBLE` rule passes properly. +#[test] fn check_schar_ascii_visible() { - let invalids = "\"\\"; - for x in (b' '..=b'~').map(char::from) { - let test = x.to_string(); - let parse = CDDLTestParser::parse(Rule::SCHAR_ASCII_VISIBLE, &test); - if invalids.contains(x) { - assert!(parse.is_err()); - } else { - assert!(parse.is_ok()); - } - } - - let parse = CDDLTestParser::parse(Rule::SCHAR_ASCII_VISIBLE, "\r"); - assert!(parse.is_err()); - - let parse = CDDLTestParser::parse(Rule::SCHAR_ASCII_VISIBLE, "\u{80}"); - assert!(parse.is_err()); + let passes = (' '..='~') + .filter(|c| c != &'"' && c != &'\\') + .map(String::from) + .collect::>(); + common::check_tests_rule(Rule::SCHAR_ASCII_VISIBLE, &passes, &[ + "\"", "\\", "\r", "\u{80}", + ]); } -#[test] /// Test if the `BCHAR_ASCII_VISIBLE` rule passes properly. +#[test] fn check_bchar_ascii_visible() { - let invalids = "'\\"; - for x in (b' '..=b'~').map(char::from) { - let test = x.to_string(); - let parse = CDDLTestParser::parse(Rule::BCHAR_ASCII_VISIBLE, &test); - if invalids.contains(x) { - assert!(parse.is_err()); - } else { - assert!(parse.is_ok()); - } - } - - let parse = CDDLTestParser::parse(Rule::BCHAR_ASCII_VISIBLE, "\r"); - assert!(parse.is_err()); - - let parse = CDDLTestParser::parse(Rule::BCHAR_ASCII_VISIBLE, "\u{80}"); - assert!(parse.is_err()); + let passes = (' '..='~') + .filter(|c| c != &'\'' && c != &'\\') + .map(String::from) + .collect::>(); + common::check_tests_rule(Rule::BCHAR_ASCII_VISIBLE, &passes, &[ + "'", "\\", "\r", "\u{80}", + ]); } -#[test] /// Test if the `UNICODE_CHAR` rule passes properly. +#[test] fn check_unicode() { - let parse = CDDLTestParser::parse(Rule::UNICODE_CHAR, "\r"); - assert!(parse.is_err()); - - let parse = CDDLTestParser::parse(Rule::UNICODE_CHAR, "\u{80}"); - assert!(parse.is_ok()); - - let parse = CDDLTestParser::parse(Rule::UNICODE_CHAR, "\u{10fffd}"); - assert!(parse.is_ok()); - - let parse = CDDLTestParser::parse(Rule::UNICODE_CHAR, "\u{7ffff}"); - assert!(parse.is_ok()); - - let parse = CDDLTestParser::parse(Rule::UNICODE_CHAR, "\u{10fffe}"); - assert!(parse.is_err()); + common::check_tests_rule( + Rule::UNICODE_CHAR, + &["\u{80}", "\u{10fffd}", "\u{7ffff}"], + &["\r", "\u{10fffe}"], + ); } diff --git a/rust/cbork-cddl-parser/tests/comments.rs b/rust/cbork-cddl-parser/tests/comments.rs index 435ab3633..99403aa20 100644 --- a/rust/cbork-cddl-parser/tests/comments.rs +++ b/rust/cbork-cddl-parser/tests/comments.rs @@ -1,7 +1,5 @@ -use cbork_cddl_parser::{self, cddl_test::Rule}; - mod common; -use common::comments::*; +use common::{comments::*, Rule}; #[test] /// Test if the `COMMENT` rule passes properly. diff --git a/rust/cbork-cddl-parser/tests/common/mod.rs b/rust/cbork-cddl-parser/tests/common/mod.rs index 107a95527..768c24691 100644 --- a/rust/cbork-cddl-parser/tests/common/mod.rs +++ b/rust/cbork-cddl-parser/tests/common/mod.rs @@ -1,7 +1,4 @@ -use cbork_cddl_parser::{ - self, - cddl_test::{CDDLTestParser, Parser, Rule}, -}; +use pest::Parser; pub(crate) mod byte_sequences; pub(crate) mod comments; @@ -12,15 +9,25 @@ pub(crate) mod rules; pub(crate) mod text_sequences; pub(crate) mod type_declarations; +/// A Pest test parser for a full CDDL syntax. +#[derive(pest_derive::Parser)] +#[grammar = "grammar/rfc_8610.pest"] +#[grammar = "grammar/rfc_9165.pest"] +#[grammar = "grammar/cddl_modules.pest"] +#[grammar = "grammar/cddl_test.pest"] // Ideally this would only be used in tests. +pub struct CDDLTestParser; + /// # Panics -pub(crate) fn check_tests_rule(rule_type: Rule, passes: &[&str], fails: &[&str]) { +pub(crate) fn check_tests_rule( + rule_type: Rule, passes: &[impl AsRef], fails: &[impl AsRef], +) { for test in passes { - let parse = CDDLTestParser::parse(rule_type, test); + let parse = CDDLTestParser::parse(rule_type, test.as_ref()); assert!(parse.is_ok()); } for test in fails { - let parse = CDDLTestParser::parse(rule_type, test); + let parse = CDDLTestParser::parse(rule_type, test.as_ref()); assert!(parse.is_err()); } } diff --git a/rust/cbork-cddl-parser/tests/group_elements.rs b/rust/cbork-cddl-parser/tests/group_elements.rs index e735ad1a2..1e52424ca 100644 --- a/rust/cbork-cddl-parser/tests/group_elements.rs +++ b/rust/cbork-cddl-parser/tests/group_elements.rs @@ -1,10 +1,8 @@ // cspell: words OPTCOM MEMBERKEY bareword tstr GRPENT GRPCHOICE // cspell: words optcom memberkey grpent grpchoice -use cbork_cddl_parser::{self, cddl_test::Rule}; - mod common; -use common::{group_elements::*, identifiers::*}; +use common::{group_elements::*, identifiers::*, Rule}; #[test] /// Test if the `occur` rule passes properly. diff --git a/rust/cbork-cddl-parser/tests/identifiers.rs b/rust/cbork-cddl-parser/tests/identifiers.rs index 63a0c80f0..7bbb1597b 100644 --- a/rust/cbork-cddl-parser/tests/identifiers.rs +++ b/rust/cbork-cddl-parser/tests/identifiers.rs @@ -1,36 +1,42 @@ // cspell: words aname groupsocket typesocket groupsocket -use cbork_cddl_parser::{ - self, - cddl_test::{CDDLTestParser, Parser, Rule}, -}; - mod common; -use common::identifiers::*; +use common::{identifiers::*, Rule}; -#[test] /// Check if the name components pass properly. -fn check_name_characters() { - for x in ('\u{0}'..='\u{ff}').map(char::from) { - let test = format!("{x}"); - let parse_start = CDDLTestParser::parse(Rule::NAME_START, &test); - let parse_end = CDDLTestParser::parse(Rule::NAME_END, &test); +#[test] +fn check_name_start_characters() { + let passes = ('\u{0}'..='\u{ff}') + .filter(|x| x.is_ascii_alphabetic() || matches!(x, '@' | '_' | '$')) + .map(String::from) + .collect::>(); + let fails = ('\u{0}'..='\u{ff}') + .filter(|x| !x.is_ascii_alphabetic() && !matches!(x, '@' | '_' | '$')) + .map(String::from) + .collect::>(); - if x.is_ascii_alphabetic() || matches!(x, '@' | '_' | '$') { - assert!(parse_start.is_ok()); - assert!(parse_end.is_ok()); - } else if x.is_ascii_digit() { - assert!(parse_start.is_err()); - assert!(parse_end.is_ok()); - } else { - assert!(parse_start.is_err()); - assert!(parse_end.is_err()); - } - } + common::check_tests_rule(Rule::NAME_START, &passes, &fails); } +/// Check if the name components pass properly. #[test] +fn check_name_end_characters() { + let passes = ('\u{0}'..='\u{ff}') + .filter(|x| x.is_ascii_alphabetic() || x.is_ascii_digit() || matches!(x, '@' | '_' | '$')) + .map(String::from) + .collect::>(); + let fails = ('\u{0}'..='\u{ff}') + .filter(|x| { + !x.is_ascii_alphabetic() && !x.is_ascii_digit() && !matches!(x, '@' | '_' | '$') + }) + .map(String::from) + .collect::>(); + + common::check_tests_rule(Rule::NAME_END, &passes, &fails); +} + /// Test if the `id` rule passes properly. +#[test] fn check_id() { common::check_tests_rule(Rule::id_TEST, ID_PASSES, ID_FAILS); } diff --git a/rust/cbork-cddl-parser/tests/literal_values.rs b/rust/cbork-cddl-parser/tests/literal_values.rs index 727dc7fd3..21ad645ed 100644 --- a/rust/cbork-cddl-parser/tests/literal_values.rs +++ b/rust/cbork-cddl-parser/tests/literal_values.rs @@ -2,10 +2,8 @@ use std::ops::Deref; -use cbork_cddl_parser::{self, cddl_test::Rule}; - mod common; -use common::{byte_sequences::*, literal_values::*, text_sequences::*}; +use common::{byte_sequences::*, literal_values::*, text_sequences::*, Rule}; #[test] /// Test if the `uint` rule passes properly. diff --git a/rust/cbork-cddl-parser/tests/rules.rs b/rust/cbork-cddl-parser/tests/rules.rs index b33b9239e..eee42a14c 100644 --- a/rust/cbork-cddl-parser/tests/rules.rs +++ b/rust/cbork-cddl-parser/tests/rules.rs @@ -1,24 +1,19 @@ // cspell: words GENERICARG bigfloat ASSIGNG GROUPNAME tstr genericarg GENERICARG // cspell: words assigng assignt ASSIGNT GENERICPARM genericparm -use cbork_cddl_parser::{ - self, - cddl_test::{CDDLTestParser, Parser, Rule}, -}; - mod common; -use common::{rules::*, type_declarations::*}; +use common::{rules::*, type_declarations::*, Rule}; -#[test] /// Test if the `genericarg` rule passes properly. /// This uses a special rule in the Grammar to test `genericarg` exhaustively. +#[test] fn check_genericarg() { common::check_tests_rule(Rule::genericarg_TEST, GENERICARG_PASSES, GENERICARG_FAILS); } -#[test] /// Test if the `genericparm` rule passes properly. /// This uses a special rule in the Grammar to test `genericparm` exhaustively. +#[test] fn check_genericparm() { common::check_tests_rule( Rule::genericparm_TEST, @@ -27,61 +22,80 @@ fn check_genericparm() { ); } -#[test] /// Test if the `assigng` rule passes properly. /// This uses a special rule in the Grammar to test `assigng` exhaustively. +#[test] fn check_assigng() { common::check_tests_rule(Rule::assigng_TEST, ASSIGNG_PASSES, ASSIGNG_FAILS); } -#[test] /// Test if the `assignt` rule passes properly. /// This uses a special rule in the Grammar to test `assignt` exhaustively. +#[test] fn check_assignt() { common::check_tests_rule(Rule::assignt_TEST, ASSIGNT_PASSES, ASSIGNT_FAILS); } -#[test] /// Test if the `typename` rule passes properly. /// This uses a special rule in the Grammar to test `typename` exhaustively. +#[test] fn check_typename() { common::check_tests_rule(Rule::typename_TEST, TYPENAME_PASSES, TYPENAME_FAILS); } -#[test] /// Test if the `groupname` rule passes properly. /// This uses a special rule in the Grammar to test `groupname` exhaustively. +#[test] fn check_groupname() { common::check_tests_rule(Rule::groupname_TEST, GROUPNAME_PASSES, GROUPNAME_FAILS); } -#[test] /// Test if the `rule` rule passes properly for type variant. +#[test] fn check_rule_type_composition() { - for (i, test_i) in [TYPENAME_PASSES, TYPENAME_FAILS] + let typename_iter = [TYPENAME_PASSES, TYPENAME_FAILS] .into_iter() .flatten() - .enumerate() - { - for (j, test_j) in [ASSIGNT_PASSES].into_iter().flatten().enumerate() { - for (k, test_k) in [TYPE_PASSES, TYPE_FAILS].into_iter().flatten().enumerate() { - let input = [test_i.to_owned(), test_j.to_owned(), test_k.to_owned()].join(" "); - let parse = CDDLTestParser::parse(Rule::rule_TEST, &input); - if (0..TYPENAME_PASSES.len()).contains(&i) - && (0..ASSIGNT_PASSES.len()).contains(&j) - && (0..TYPE_PASSES.len()).contains(&k) - { - assert!(parse.is_ok()); - } else { - assert!(parse.is_err()); - } - } - } - } + .enumerate(); + + let assign_iter = ASSIGNT_PASSES.iter(); + let type_iter = [ + TYPE_PASSES, + TYPE1_PASSES, + TYPE2_PASSES, + TYPE_FAILS, + TYPE1_FAILS, + TYPE2_FAILS, + ] + .into_iter() + .flatten() + .enumerate(); + + let rules_iter = typename_iter.zip(assign_iter).zip(type_iter).map( + |(((i, typename), assign), (k, r#type))| { + let is_passes = i < TYPENAME_PASSES.len() + && k < TYPE_PASSES.len() + TYPE1_PASSES.len() + TYPE2_PASSES.len(); + let input = [typename.to_owned(), assign.to_owned(), r#type.to_owned()].join(" "); + (input, is_passes) + }, + ); + + let passes = rules_iter + .clone() + .filter(|(_, is_passes)| *is_passes) + .map(|(input, _)| input) + .collect::>(); + + let fails = rules_iter + .filter(|(_, is_passes)| !*is_passes) + .map(|(input, _)| input) + .collect::>(); + + common::check_tests_rule(Rule::expr_TEST, &passes, &fails); } -#[test] /// Test if the `rule` rule passes properly for group variant. +#[test] fn check_rule_group() { - common::check_tests_rule(Rule::rule_TEST, RULE_GROUP_PASSES, RULE_GROUP_FAILS); + common::check_tests_rule(Rule::expr_TEST, RULE_GROUP_PASSES, RULE_GROUP_FAILS); } diff --git a/rust/cbork-cddl-parser/tests/text_sequences.rs b/rust/cbork-cddl-parser/tests/text_sequences.rs index 4c5270489..ac1d61178 100644 --- a/rust/cbork-cddl-parser/tests/text_sequences.rs +++ b/rust/cbork-cddl-parser/tests/text_sequences.rs @@ -1,7 +1,5 @@ -use cbork_cddl_parser::{self, cddl_test::Rule}; - mod common; -use common::text_sequences::*; +use common::{text_sequences::*, Rule}; #[test] /// Test if the `S` rule passes properly. diff --git a/rust/cbork-cddl-parser/tests/type_declarations.rs b/rust/cbork-cddl-parser/tests/type_declarations.rs index 14f024e14..b62617d51 100644 --- a/rust/cbork-cddl-parser/tests/type_declarations.rs +++ b/rust/cbork-cddl-parser/tests/type_declarations.rs @@ -1,89 +1,104 @@ // cspell: words CTLOP aname groupsocket typesocket RANGEOP tstr ctlop // cspell: words rangeop RANGEOP -use cbork_cddl_parser::{ - self, - cddl_test::{CDDLTestParser, Parser, Rule}, -}; - mod common; -use common::type_declarations::*; +use common::{type_declarations::*, Rule}; -#[test] /// Test if the `ctlop` rule passes properly. /// This uses a special rule in the Grammar to test `ctlop` exhaustively. +#[test] fn check_ctlop() { common::check_tests_rule(Rule::ctlop_TEST, CTLOP_PASSES, CTLOP_FAILS); } -#[test] /// Test if the `rangeop` rule passes properly. /// This uses a special rule in the Grammar to test `rangeop` exhaustively. +#[test] fn check_rangeop() { common::check_tests_rule(Rule::rangeop_TEST, RANGEOP_PASSES, RANGEOP_FAILS); } -#[test] /// Test if the `type2` rule passes properly. /// This uses a special rule in the Grammar to test `type2` exhaustively. +#[test] fn check_type2() { common::check_tests_rule(Rule::type2_TEST, TYPE2_PASSES, TYPE2_FAILS); } -#[test] /// Test if the `type1` rule passes properly. /// This uses a special rule in the Grammar to test `type1` exhaustively. +#[test] fn check_type1() { common::check_tests_rule(Rule::type1_TEST, TYPE1_PASSES, TYPE1_FAILS); } -#[test] /// Test if the `type1` rule passes properly based on composition of type2 test cases. +#[test] fn check_type1_composition() { - let j_len = CTLOP_PASSES.len() + RANGEOP_PASSES.len(); - for (i, test_i) in [TYPE2_PASSES, TYPE_FAILS].into_iter().flatten().enumerate() { - for (j, test_j) in [CTLOP_PASSES, RANGEOP_PASSES] - .into_iter() - .flatten() - .enumerate() - { - for (k, test_k) in [TYPE2_PASSES, TYPE_FAILS].into_iter().flatten().enumerate() { - let input = [test_i.to_owned(), test_j.to_owned(), test_k.to_owned()].join(" "); - let parse = CDDLTestParser::parse(Rule::type1_TEST, &input); - if (0..TYPE2_PASSES.len()).contains(&i) - && (0..j_len).contains(&j) - && (0..TYPE2_PASSES.len()).contains(&k) - { - assert!(parse.is_ok()); - } else { - assert!(parse.is_err()); - } - } - } - } + let separator_iter = [CTLOP_PASSES, RANGEOP_PASSES].into_iter().flatten(); + + let type_iter = [TYPE2_PASSES, TYPE_FAILS, TYPE1_FAILS, TYPE2_FAILS] + .into_iter() + .flatten() + .enumerate(); + + let rules_iter = type_iter.clone().zip(separator_iter).zip(type_iter).map( + |(((i, type_1), separator), (j, type_2))| { + let is_passed = i < TYPE2_PASSES.len() && j < TYPE2_PASSES.len(); + let input = [type_1.to_owned(), separator.to_owned(), type_2.to_owned()].join(" "); + (input, is_passed) + }, + ); + + let passes = rules_iter + .clone() + .filter(|(_, is_passes)| *is_passes) + .map(|(input, _)| input) + .collect::>(); + + let fails = rules_iter + .filter(|(_, is_passes)| !*is_passes) + .map(|(input, _)| input) + .collect::>(); + + common::check_tests_rule(Rule::type1_TEST, &passes, &fails); } -#[test] /// Test if the `type` rule passes properly. /// This uses a special rule in the Grammar to test `type` exhaustively. +#[test] fn check_type() { common::check_tests_rule(Rule::type_TEST, TYPE_PASSES, TYPE_FAILS); } -#[test] /// Test if the `type` rule passes properly based on composition of type2 test cases. +#[test] fn check_type_composition() { // type2 composition testing - for (i, test_i) in [TYPE2_PASSES, TYPE_FAILS].into_iter().flatten().enumerate() { - for (j, test_j) in [TYPE2_PASSES, TYPE_FAILS].into_iter().flatten().enumerate() { + let type_iter = [TYPE2_PASSES, TYPE_FAILS, TYPE1_FAILS, TYPE2_FAILS] + .into_iter() + .flatten() + .enumerate(); + + let rules_iter = type_iter + .clone() + .zip(type_iter) + .map(|((i, test_i), (j, test_j))| { + let is_passed = i < TYPE2_PASSES.len() && j < TYPE2_PASSES.len(); let input = [test_i.to_owned(), "/", test_j.to_owned()].join(" "); - let parse = CDDLTestParser::parse(Rule::type_TEST, &input); - - if (0..TYPE2_PASSES.len()).contains(&i) && (0..TYPE2_PASSES.len()).contains(&j) { - assert!(parse.is_ok()); - } else { - assert!(parse.is_err()); - } - } - } + (input, is_passed) + }); + + let passes = rules_iter + .clone() + .filter(|(_, is_passes)| *is_passes) + .map(|(input, _)| input) + .collect::>(); + + let fails = rules_iter + .filter(|(_, is_passes)| !*is_passes) + .map(|(input, _)| input) + .collect::>(); + + common::check_tests_rule(Rule::type_TEST, &passes, &fails); }