From 3910b6051793c3f755aae65c446f2bd4f9042d9c Mon Sep 17 00:00:00 2001
From: Alex Pozhylenkov <leshiy12345678@gmail.com>
Date: Tue, 26 Nov 2024 12:34:46 +0200
Subject: [PATCH] feat(rust/cbork): Add a CDDL preprocessing step (#80)

* move parsers into the separate module

* add validate_cddl pub function

* add processor module

* rename `rule` to `expr` and some simple expression processing functions

* wip

* wip

* refactor Ast

* wip

* remove unused deps

* move CDDLTestParser to the tests::common mod

* refactor character_sets tests

* cleanup tests/identifiers.rs

* refactor tests/rules.rs

* refactor tests/type_declaration

* rename rule_TEST to expr_TEST
---
 rust/cbork-cddl-parser/Cargo.toml             |   3 +-
 .../src/grammar/cddl_test.pest                |   4 +-
 .../src/grammar/rfc_8610.pest                 |   4 +-
 rust/cbork-cddl-parser/src/lib.rs             | 161 ++--------------
 rust/cbork-cddl-parser/src/parser.rs          |  88 +++++++++
 rust/cbork-cddl-parser/src/preprocessor.rs    |  44 +++++
 .../cbork-cddl-parser/tests/byte_sequences.rs |   4 +-
 rust/cbork-cddl-parser/tests/cddl.rs          |   6 +-
 .../cbork-cddl-parser/tests/character_sets.rs | 177 ++++++------------
 rust/cbork-cddl-parser/tests/comments.rs      |   4 +-
 rust/cbork-cddl-parser/tests/common/mod.rs    |  21 ++-
 .../cbork-cddl-parser/tests/group_elements.rs |   4 +-
 rust/cbork-cddl-parser/tests/identifiers.rs   |  52 ++---
 .../cbork-cddl-parser/tests/literal_values.rs |   4 +-
 rust/cbork-cddl-parser/tests/rules.rs         |  80 ++++----
 .../cbork-cddl-parser/tests/text_sequences.rs |   4 +-
 .../tests/type_declarations.rs                | 105 ++++++-----
 17 files changed, 370 insertions(+), 395 deletions(-)
 create mode 100644 rust/cbork-cddl-parser/src/parser.rs
 create mode 100644 rust/cbork-cddl-parser/src/preprocessor.rs

diff --git a/rust/cbork-cddl-parser/Cargo.toml b/rust/cbork-cddl-parser/Cargo.toml
index b2c7e307a..312bab140 100644
--- a/rust/cbork-cddl-parser/Cargo.toml
+++ b/rust/cbork-cddl-parser/Cargo.toml
@@ -15,7 +15,6 @@ repository.workspace = true
 workspace = true
 
 [dependencies]
-derive_more = {version = "1.0.0", features = ["from","display"] }
 pest = { version = "2.7.13", features = ["std", "pretty-print", "memchr", "const_prec_climber"] }
 pest_derive = { version = "2.7.13", features = ["grammar-extras"] }
-thiserror = "1.0.64"
+anyhow = "1.0.89"
diff --git a/rust/cbork-cddl-parser/src/grammar/cddl_test.pest b/rust/cbork-cddl-parser/src/grammar/cddl_test.pest
index 4eb04bda5..68d35a052 100644
--- a/rust/cbork-cddl-parser/src/grammar/cddl_test.pest
+++ b/rust/cbork-cddl-parser/src/grammar/cddl_test.pest
@@ -7,8 +7,8 @@
 // cspell: words assigng genericparm genericarg rangeop ctlop
 // cspell: words grpchoice grpent memberkey bareword optcom
 
-/// Test Expression for the `rule` Rule.
-rule_TEST  = ${ SOI ~ rule ~ EOI }
+/// Test Expression for the `expr` Rule.
+expr_TEST  = ${ SOI ~ expr ~ EOI }
 
 /// Test Expression for the `typename` Rule.
 typename_TEST  = ${ SOI ~ typename ~ EOI }
diff --git a/rust/cbork-cddl-parser/src/grammar/rfc_8610.pest b/rust/cbork-cddl-parser/src/grammar/rfc_8610.pest
index 4a609ca51..f33f4a9c4 100644
--- a/rust/cbork-cddl-parser/src/grammar/rfc_8610.pest
+++ b/rust/cbork-cddl-parser/src/grammar/rfc_8610.pest
@@ -7,13 +7,13 @@
 
 cddl = ${
     SOI
-    ~ S ~ (rule ~ S)+
+    ~ S ~ (expr ~ S)+
     ~ EOI
 }
 
 // -----------------------------------------------------------------------------
 // Rules
-rule = ${
+expr = ${
     (typename ~ genericparm? ~ S ~ assignt ~ S ~ type)
     | (groupname ~ genericparm? ~ S ~ assigng ~ S ~ grpent)
 }
diff --git a/rust/cbork-cddl-parser/src/lib.rs b/rust/cbork-cddl-parser/src/lib.rs
index af8b4ab29..8d8d163c2 100644
--- a/rust/cbork-cddl-parser/src/lib.rs
+++ b/rust/cbork-cddl-parser/src/lib.rs
@@ -1,160 +1,27 @@
 //! A parser for CDDL, utilized for parsing in accordance with RFC 8610.
 
-#![allow(missing_docs)] // TODO(apskhem): Temporary, to bo removed in a subsequent PR
+mod parser;
+mod preprocessor;
 
-use derive_more::{Display, From};
-pub use pest::Parser;
-use pest::{error::Error, iterators::Pairs};
-
-pub mod rfc_8610 {
-    pub use pest::Parser;
-
-    #[derive(pest_derive::Parser)]
-    #[grammar = "grammar/rfc_8610.pest"]
-    pub struct RFC8610Parser;
-}
-
-pub mod rfc_9165 {
-    pub use pest::Parser;
-
-    #[derive(pest_derive::Parser)]
-    #[grammar = "grammar/rfc_8610.pest"]
-    #[grammar = "grammar/rfc_9165.pest"]
-    pub struct RFC8610Parser;
-}
-
-pub mod cddl {
-    pub use pest::Parser;
-
-    #[derive(pest_derive::Parser)]
-    #[grammar = "grammar/rfc_8610.pest"]
-    #[grammar = "grammar/rfc_9165.pest"]
-    #[grammar = "grammar/cddl_modules.pest"]
-    pub struct RFC8610Parser;
-}
-
-pub mod cddl_test {
-    pub use pest::Parser;
-
-    // Parser with DEBUG rules. These rules are only used in tests.
-    #[derive(pest_derive::Parser)]
-    #[grammar = "grammar/rfc_8610.pest"]
-    #[grammar = "grammar/rfc_9165.pest"]
-    #[grammar = "grammar/cddl_modules.pest"]
-    #[grammar = "grammar/cddl_test.pest"] // Ideally this would only be used in tests.
-    pub struct CDDLTestParser;
-}
-
-/// Represents different parser extensions for handling CDDL specifications.
+/// Represents different grammar extensions for handling CDDL specifications.
 pub enum Extension {
-    /// RFC8610 ONLY limited parser.
-    RFC8610Parser,
-    /// RFC8610 and RFC9165 limited parser.
-    RFC9165Parser,
-    /// RFC8610, RFC9165, and CDDL modules.
-    CDDLParser,
-}
-
-// CDDL Standard Postlude - read from an external file
-pub const POSTLUDE: &str = include_str!("grammar/postlude.cddl");
-
-/// Abstract Syntax Tree (AST) representing parsed CDDL syntax.
-// TODO: this is temporary. need to add more pragmatic nodes
-#[derive(Debug)]
-pub enum AST<'a> {
-    /// Represents the AST for RFC 8610 CDDL rules.
-    RFC8610(Pairs<'a, rfc_8610::Rule>),
-    /// Represents the AST for RFC 9165 CDDL rules.
-    RFC9165(Pairs<'a, rfc_9165::Rule>),
-    /// Represents the AST for CDDL Modules rules.
-    CDDL(Pairs<'a, cddl::Rule>),
+    /// RFC8610 ONLY limited grammar.
+    RFC8610,
+    /// RFC8610 and RFC9165 limited grammar.
+    RFC9165,
+    /// RFC8610, RFC9165, and CDDL grammar.
+    CDDL,
 }
 
-/// Represents different types of errors related to different types of extension.
-#[derive(Display, Debug)]
-pub enum CDDLErrorType {
-    /// An error related to RFC 8610 extension.
-    RFC8610(Error<rfc_8610::Rule>),
-    /// An error related to RFC 9165 extension.
-    RFC9165(Error<rfc_9165::Rule>),
-    /// An error related to CDDL modules extension.
-    CDDL(Error<cddl::Rule>),
-}
-
-/// Represents an error that may occur during CDDL parsing.
-#[derive(thiserror::Error, Debug, From)]
-#[error("{0}")]
-pub struct CDDLError(CDDLErrorType);
-
-/// Parses and checks semantically a CDDL input string.
-///
-/// # Arguments
-///
-/// * `input` - A string containing the CDDL input to be parsed.
-///
-/// # Returns
-///
-/// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing
-/// a boxed `CDDLError` indicating the parsing error.
+/// Verifies semantically a CDDL input string.
 ///
 /// # Errors
 ///
 /// This function may return an error in the following cases:
 ///
 /// - If there is an issue with parsing the CDDL input.
-///
-/// # Examples
-///
-/// ```rs
-/// use cbork_cddl_parser::{parse_cddl, Extension};
-/// use std:fs;
-///
-/// let mut input = fs::read_to_string("path/to/your/file.cddl").unwrap();
-/// let result = parse_cddl(&mut input, &Extension::CDDLParser);
-/// assert!(result.is_ok());
-/// ```
-pub fn parse_cddl<'a>(
-    input: &'a mut String, extension: &Extension,
-) -> Result<AST<'a>, Box<CDDLError>> {
-    input.push_str("\n\n");
-    input.push_str(POSTLUDE);
-
-    let result = match extension {
-        Extension::RFC8610Parser => {
-            rfc_8610::RFC8610Parser::parse(rfc_8610::Rule::cddl, input)
-                .map(AST::RFC8610)
-                .map_err(CDDLErrorType::RFC8610)
-        },
-        Extension::RFC9165Parser => {
-            rfc_9165::RFC8610Parser::parse(rfc_9165::Rule::cddl, input)
-                .map(AST::RFC9165)
-                .map_err(CDDLErrorType::RFC9165)
-        },
-        Extension::CDDLParser => {
-            cddl::RFC8610Parser::parse(cddl::Rule::cddl, input)
-                .map(AST::CDDL)
-                .map_err(CDDLErrorType::CDDL)
-        },
-    };
-
-    result.map_err(|e| Box::new(CDDLError::from(e)))
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::*;
-
-    #[test]
-    fn it_works() {
-        let mut input = String::new();
-        let result = parse_cddl(&mut input, &Extension::CDDLParser);
-
-        match result {
-            Ok(c) => println!("{c:?}"),
-            Err(e) => {
-                println!("{e:?}");
-                println!("{e}");
-            },
-        }
-    }
+pub fn validate_cddl(input: &mut String, extension: &Extension) -> anyhow::Result<()> {
+    let ast = parser::parse_cddl(input, extension)?;
+    let _ast = preprocessor::process_ast(ast)?;
+    Ok(())
 }
diff --git a/rust/cbork-cddl-parser/src/parser.rs b/rust/cbork-cddl-parser/src/parser.rs
new file mode 100644
index 000000000..ae4b16ed2
--- /dev/null
+++ b/rust/cbork-cddl-parser/src/parser.rs
@@ -0,0 +1,88 @@
+//! A parser for CDDL using the [pest](https://github.com/pest-parser/pest).
+//! Utilized for parsing in accordance with RFC-8610, RFC-9165.
+
+use pest::{iterators::Pair, Parser};
+
+use crate::Extension;
+
+/// RFC-8610 parser.
+#[allow(missing_docs)]
+pub(crate) mod rfc_8610 {
+    /// A Pest parser for RFC-8610.
+    #[derive(pest_derive::Parser)]
+    #[grammar = "grammar/rfc_8610.pest"]
+    pub(crate) struct Parser;
+}
+
+/// RFC-9165 parser.
+#[allow(missing_docs)]
+pub(crate) mod rfc_9165 {
+    /// A Pest parser for RFC-9165.
+    #[derive(pest_derive::Parser)]
+    #[grammar = "grammar/rfc_8610.pest"]
+    #[grammar = "grammar/rfc_9165.pest"]
+    pub(crate) struct Parser;
+}
+
+/// Full CDDL syntax parser.
+#[allow(missing_docs)]
+pub(crate) mod cddl {
+    /// A Pest parser for a full CDDL syntax.
+    #[derive(pest_derive::Parser)]
+    #[grammar = "grammar/rfc_8610.pest"]
+    #[grammar = "grammar/rfc_9165.pest"]
+    #[grammar = "grammar/cddl_modules.pest"]
+    pub(crate) struct Parser;
+}
+
+/// CDDL Standard Postlude - read from an external file
+const POSTLUDE: &str = include_str!("grammar/postlude.cddl");
+
+/// PEST Abstract Syntax Tree (AST) representing parsed CDDL syntax.
+#[derive(Debug)]
+pub(crate) enum Ast<'a> {
+    /// Represents the AST for RFC-8610 CDDL rules.
+    Rfc8610(Vec<Pair<'a, rfc_8610::Rule>>),
+    /// Represents the AST for RFC-9165 CDDL rules.
+    Rfc9165(Vec<Pair<'a, rfc_9165::Rule>>),
+    /// Represents the AST for CDDL Modules rules.
+    Cddl(Vec<Pair<'a, cddl::Rule>>),
+}
+
+/// Parses and checks semantically a CDDL input string.
+///
+/// # Arguments
+///
+/// * `input` - A string containing the CDDL input to be parsed.
+///
+/// # Returns
+///
+/// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing
+/// a boxed `CDDLError` indicating the parsing error.
+///
+/// # Errors
+///
+/// This function may return an error in the following cases:
+///
+/// - If there is an issue with parsing the CDDL input.
+pub(crate) fn parse_cddl<'a>(
+    input: &'a mut String, extension: &Extension,
+) -> anyhow::Result<Ast<'a>> {
+    input.push_str("\n\n");
+    input.push_str(POSTLUDE);
+
+    let ast = match extension {
+        Extension::RFC8610 => {
+            rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input)
+                .map(|p| Ast::Rfc8610(p.collect()))?
+        },
+        Extension::RFC9165 => {
+            rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input)
+                .map(|p| Ast::Rfc9165(p.collect()))?
+        },
+        Extension::CDDL => {
+            cddl::Parser::parse(cddl::Rule::cddl, input).map(|p| Ast::Cddl(p.collect()))?
+        },
+    };
+    Ok(ast)
+}
diff --git a/rust/cbork-cddl-parser/src/preprocessor.rs b/rust/cbork-cddl-parser/src/preprocessor.rs
new file mode 100644
index 000000000..b20cc849d
--- /dev/null
+++ b/rust/cbork-cddl-parser/src/preprocessor.rs
@@ -0,0 +1,44 @@
+//! A CDDL AST preprocessor.
+//!
+//! - Validates the root rule of the AST to be a `cddl` rule.
+//! - Filters out all rules that are not `expr` rules.
+//! - (TODO) Resolve #include and #import directives, by just adding the imported rules
+//!   into the final expression list
+
+use anyhow::{anyhow, ensure};
+use pest::{iterators::Pair, RuleType};
+
+use crate::parser::{cddl, rfc_8610, rfc_9165, Ast};
+
+/// Processes the AST.
+pub(crate) fn process_ast(ast: Ast) -> anyhow::Result<Ast> {
+    match ast {
+        Ast::Rfc8610(ast) => {
+            process_root_and_filter(ast, rfc_8610::Rule::cddl, rfc_8610::Rule::expr)
+                .map(Ast::Rfc8610)
+        },
+        Ast::Rfc9165(ast) => {
+            process_root_and_filter(ast, rfc_9165::Rule::cddl, rfc_9165::Rule::expr)
+                .map(Ast::Rfc9165)
+        },
+        Ast::Cddl(ast) => {
+            process_root_and_filter(ast, cddl::Rule::cddl, cddl::Rule::expr).map(Ast::Cddl)
+        },
+    }
+}
+
+/// Process the root rule of the AST and filter out all non `expected_rule` rules.
+fn process_root_and_filter<R: RuleType>(
+    ast: Vec<Pair<'_, R>>, root_rule: R, expected_rule: R,
+) -> anyhow::Result<Vec<Pair<'_, R>>> {
+    let mut ast_iter = ast.into_iter();
+    let ast_root = ast_iter.next().ok_or(anyhow!("Empty AST."))?;
+    ensure!(
+        ast_root.as_rule() == root_rule && ast_iter.next().is_none(),
+        "AST must have only one root rule, which must be a `{root_rule:?}` rule."
+    );
+    Ok(ast_root
+        .into_inner()
+        .filter(|pair| pair.as_rule() == expected_rule)
+        .collect())
+}
diff --git a/rust/cbork-cddl-parser/tests/byte_sequences.rs b/rust/cbork-cddl-parser/tests/byte_sequences.rs
index 6f5ea0215..9686e1f24 100644
--- a/rust/cbork-cddl-parser/tests/byte_sequences.rs
+++ b/rust/cbork-cddl-parser/tests/byte_sequences.rs
@@ -1,9 +1,7 @@
 // cspell: words hexpair rstuvw abcdefghijklmnopqrstuvwyz rstuvw Xhhb Bhcm
 
-use cbork_cddl_parser::cddl_test::Rule;
-
 mod common;
-use common::byte_sequences::*;
+use common::{byte_sequences::*, Rule};
 
 #[test]
 /// Test if the `HEX_PAIR` rule passes properly.
diff --git a/rust/cbork-cddl-parser/tests/cddl.rs b/rust/cbork-cddl-parser/tests/cddl.rs
index 9adc9384d..0c7f63ca0 100644
--- a/rust/cbork-cddl-parser/tests/cddl.rs
+++ b/rust/cbork-cddl-parser/tests/cddl.rs
@@ -1,6 +1,6 @@
 use std::{ffi::OsStr, fs, io::Result};
 
-use cbork_cddl_parser::{parse_cddl, Extension};
+use cbork_cddl_parser::{validate_cddl, Extension};
 
 #[test]
 /// # Panics
@@ -32,7 +32,7 @@ fn parse_cddl_files() {
     for file_path in valid_file_paths {
         let mut content = fs::read_to_string(file_path).unwrap();
 
-        if let Err(e) = parse_cddl(&mut content, &Extension::CDDLParser) {
+        if let Err(e) = validate_cddl(&mut content, &Extension::CDDL) {
             err_messages.push(format!("{}) {file_path:?} {e}", err_messages.len() + 1));
         }
     }
@@ -41,7 +41,7 @@ fn parse_cddl_files() {
     for file_path in invalid_file_paths {
         let mut content = fs::read_to_string(file_path).unwrap();
 
-        let result = parse_cddl(&mut content, &Extension::CDDLParser);
+        let result = validate_cddl(&mut content, &Extension::CDDL);
 
         assert!(result.is_err(), "{:?} is expected to fail", &file_path);
     }
diff --git a/rust/cbork-cddl-parser/tests/character_sets.rs b/rust/cbork-cddl-parser/tests/character_sets.rs
index 46a3e2424..3815cc55f 100644
--- a/rust/cbork-cddl-parser/tests/character_sets.rs
+++ b/rust/cbork-cddl-parser/tests/character_sets.rs
@@ -1,150 +1,95 @@
 // cspell: words PCHAR pchar BCHAR bchar SESC sesc SCHAR schar fffd fffe
 
-use cbork_cddl_parser::{
-    self,
-    cddl_test::{CDDLTestParser, Parser, Rule},
-};
+mod common;
+use common::Rule;
 
-#[test]
 /// Test if the `WHITESPACE` rule passes properly.
+#[test]
 fn check_whitespace() {
-    let whitespace = vec![" ", "\t", "\r", "\n", "\r\n"];
-
-    let not_whitespace = "not";
-
-    for ws in whitespace {
-        let parse = CDDLTestParser::parse(Rule::WHITESPACE, ws);
-        assert!(parse.is_ok());
-    }
-
-    let parse = CDDLTestParser::parse(Rule::WHITESPACE, not_whitespace);
-    assert!(parse.is_err());
+    common::check_tests_rule(Rule::WHITESPACE, &[" ", "\t", "\r", "\n", "\r\n"], &["not"]);
 }
 
-#[test]
 /// Test if the `PCHAR` rule passes properly.
+#[test]
 fn check_pchar() {
-    for x in ('\u{0}'..='\u{ff}').map(char::from) {
-        let test = format!("{x}");
-        let parse = CDDLTestParser::parse(Rule::PCHAR, &test);
-        if x < ' ' || x == '\u{7f}' {
-            assert!(parse.is_err());
-        } else {
-            assert!(parse.is_ok());
-        }
-    }
-
-    let parse = CDDLTestParser::parse(Rule::ASCII_VISIBLE, "\r");
-    assert!(parse.is_err());
+    let passes = ('\u{0}'..='\u{ff}')
+        .filter(|x| x >= &' ' && x != &'\u{7f}')
+        .map(String::from)
+        .collect::<Vec<_>>();
+    let fails = ('\u{0}'..='\u{ff}')
+        .filter(|x| x < &' ' || x == &'\u{7f}')
+        .map(String::from)
+        .collect::<Vec<_>>();
+    common::check_tests_rule(Rule::PCHAR, &passes, &fails);
 }
 
-#[test]
 /// Test if the `BCHAR` rule passes properly.
+#[test]
 fn check_bchar() {
-    for x in ('\u{0}'..='\u{ff}').map(char::from) {
-        let test = format!("{x}");
-        let parse = CDDLTestParser::parse(Rule::BCHAR, &test);
-        if !matches!(x, '\n' | '\r') && x < ' ' || matches!(x, '\t' | '\'' | '\\' | '\u{7f}') {
-            assert!(parse.is_err());
-        } else {
-            assert!(parse.is_ok());
-        }
-    }
-
-    let parse = CDDLTestParser::parse(Rule::ASCII_VISIBLE, "\r");
-    assert!(parse.is_err());
+    let passes = ('\u{0}'..='\u{ff}')
+        .filter(|x| {
+            (x >= &' ' && !matches!(x, '\t' | '\'' | '\\' | '\u{7f}')) || matches!(x, '\n' | '\r')
+        })
+        .map(String::from)
+        .collect::<Vec<_>>();
+
+    let fails = ('\u{0}'..='\u{ff}')
+        .filter(|x| {
+            x < &' ' && !matches!(x, '\n' | '\r') || matches!(x, '\t' | '\'' | '\\' | '\u{7f}')
+        })
+        .map(String::from)
+        .collect::<Vec<_>>();
+
+    common::check_tests_rule(Rule::BCHAR, &passes, &fails);
 }
 
-#[test]
 /// Test if the `SESC` rule passes properly.
+#[test]
 fn check_sesc() {
-    for x in (' '..='\u{ff}').map(char::from) {
-        let test = format!("\\{x}");
-        let parse = CDDLTestParser::parse(Rule::SESC, &test);
-        if x == '\u{7f}' {
-            assert!(parse.is_err());
-        } else {
-            assert!(parse.is_ok());
-        }
-    }
-
-    let parse = CDDLTestParser::parse(Rule::ASCII_VISIBLE, "\r");
-    assert!(parse.is_err());
+    let passes = (' '..='\u{ff}')
+        .filter(|x| x != &'\u{7f}')
+        .map(|x| format!("\\{x}"))
+        .collect::<Vec<_>>();
+    common::check_tests_rule(Rule::SESC, &passes, &["\u{7f}"]);
 }
 
-#[test]
 /// Test if the `ASCII_VISIBLE` rule passes properly.
+#[test]
 fn check_ascii_visible() {
-    for x in (b' '..=b'~').map(char::from) {
-        let test = x.to_string();
-        let parse = CDDLTestParser::parse(Rule::ASCII_VISIBLE, &test);
-        assert!(parse.is_ok());
-    }
-
-    let parse = CDDLTestParser::parse(Rule::ASCII_VISIBLE, "\r");
-    assert!(parse.is_err());
-
-    let parse = CDDLTestParser::parse(Rule::ASCII_VISIBLE, "\u{80}");
-    assert!(parse.is_err());
+    let passes = (' '..='~').map(String::from).collect::<Vec<_>>();
+    common::check_tests_rule(Rule::ASCII_VISIBLE, &passes, &["\r", "\u{80}"]);
 }
 
-#[test]
 /// Test if the `SCHAR_ASCII_VISIBLE` rule passes properly.
+#[test]
 fn check_schar_ascii_visible() {
-    let invalids = "\"\\";
-    for x in (b' '..=b'~').map(char::from) {
-        let test = x.to_string();
-        let parse = CDDLTestParser::parse(Rule::SCHAR_ASCII_VISIBLE, &test);
-        if invalids.contains(x) {
-            assert!(parse.is_err());
-        } else {
-            assert!(parse.is_ok());
-        }
-    }
-
-    let parse = CDDLTestParser::parse(Rule::SCHAR_ASCII_VISIBLE, "\r");
-    assert!(parse.is_err());
-
-    let parse = CDDLTestParser::parse(Rule::SCHAR_ASCII_VISIBLE, "\u{80}");
-    assert!(parse.is_err());
+    let passes = (' '..='~')
+        .filter(|c| c != &'"' && c != &'\\')
+        .map(String::from)
+        .collect::<Vec<_>>();
+    common::check_tests_rule(Rule::SCHAR_ASCII_VISIBLE, &passes, &[
+        "\"", "\\", "\r", "\u{80}",
+    ]);
 }
 
-#[test]
 /// Test if the `BCHAR_ASCII_VISIBLE` rule passes properly.
+#[test]
 fn check_bchar_ascii_visible() {
-    let invalids = "'\\";
-    for x in (b' '..=b'~').map(char::from) {
-        let test = x.to_string();
-        let parse = CDDLTestParser::parse(Rule::BCHAR_ASCII_VISIBLE, &test);
-        if invalids.contains(x) {
-            assert!(parse.is_err());
-        } else {
-            assert!(parse.is_ok());
-        }
-    }
-
-    let parse = CDDLTestParser::parse(Rule::BCHAR_ASCII_VISIBLE, "\r");
-    assert!(parse.is_err());
-
-    let parse = CDDLTestParser::parse(Rule::BCHAR_ASCII_VISIBLE, "\u{80}");
-    assert!(parse.is_err());
+    let passes = (' '..='~')
+        .filter(|c| c != &'\'' && c != &'\\')
+        .map(String::from)
+        .collect::<Vec<_>>();
+    common::check_tests_rule(Rule::BCHAR_ASCII_VISIBLE, &passes, &[
+        "'", "\\", "\r", "\u{80}",
+    ]);
 }
 
-#[test]
 /// Test if the `UNICODE_CHAR` rule passes properly.
+#[test]
 fn check_unicode() {
-    let parse = CDDLTestParser::parse(Rule::UNICODE_CHAR, "\r");
-    assert!(parse.is_err());
-
-    let parse = CDDLTestParser::parse(Rule::UNICODE_CHAR, "\u{80}");
-    assert!(parse.is_ok());
-
-    let parse = CDDLTestParser::parse(Rule::UNICODE_CHAR, "\u{10fffd}");
-    assert!(parse.is_ok());
-
-    let parse = CDDLTestParser::parse(Rule::UNICODE_CHAR, "\u{7ffff}");
-    assert!(parse.is_ok());
-
-    let parse = CDDLTestParser::parse(Rule::UNICODE_CHAR, "\u{10fffe}");
-    assert!(parse.is_err());
+    common::check_tests_rule(
+        Rule::UNICODE_CHAR,
+        &["\u{80}", "\u{10fffd}", "\u{7ffff}"],
+        &["\r", "\u{10fffe}"],
+    );
 }
diff --git a/rust/cbork-cddl-parser/tests/comments.rs b/rust/cbork-cddl-parser/tests/comments.rs
index 435ab3633..99403aa20 100644
--- a/rust/cbork-cddl-parser/tests/comments.rs
+++ b/rust/cbork-cddl-parser/tests/comments.rs
@@ -1,7 +1,5 @@
-use cbork_cddl_parser::{self, cddl_test::Rule};
-
 mod common;
-use common::comments::*;
+use common::{comments::*, Rule};
 
 #[test]
 /// Test if the `COMMENT` rule passes properly.
diff --git a/rust/cbork-cddl-parser/tests/common/mod.rs b/rust/cbork-cddl-parser/tests/common/mod.rs
index 107a95527..768c24691 100644
--- a/rust/cbork-cddl-parser/tests/common/mod.rs
+++ b/rust/cbork-cddl-parser/tests/common/mod.rs
@@ -1,7 +1,4 @@
-use cbork_cddl_parser::{
-    self,
-    cddl_test::{CDDLTestParser, Parser, Rule},
-};
+use pest::Parser;
 
 pub(crate) mod byte_sequences;
 pub(crate) mod comments;
@@ -12,15 +9,25 @@ pub(crate) mod rules;
 pub(crate) mod text_sequences;
 pub(crate) mod type_declarations;
 
+/// A Pest test parser for a full CDDL syntax.
+#[derive(pest_derive::Parser)]
+#[grammar = "grammar/rfc_8610.pest"]
+#[grammar = "grammar/rfc_9165.pest"]
+#[grammar = "grammar/cddl_modules.pest"]
+#[grammar = "grammar/cddl_test.pest"] // Ideally this would only be used in tests.
+pub struct CDDLTestParser;
+
 /// # Panics
-pub(crate) fn check_tests_rule(rule_type: Rule, passes: &[&str], fails: &[&str]) {
+pub(crate) fn check_tests_rule(
+    rule_type: Rule, passes: &[impl AsRef<str>], fails: &[impl AsRef<str>],
+) {
     for test in passes {
-        let parse = CDDLTestParser::parse(rule_type, test);
+        let parse = CDDLTestParser::parse(rule_type, test.as_ref());
         assert!(parse.is_ok());
     }
 
     for test in fails {
-        let parse = CDDLTestParser::parse(rule_type, test);
+        let parse = CDDLTestParser::parse(rule_type, test.as_ref());
         assert!(parse.is_err());
     }
 }
diff --git a/rust/cbork-cddl-parser/tests/group_elements.rs b/rust/cbork-cddl-parser/tests/group_elements.rs
index e735ad1a2..1e52424ca 100644
--- a/rust/cbork-cddl-parser/tests/group_elements.rs
+++ b/rust/cbork-cddl-parser/tests/group_elements.rs
@@ -1,10 +1,8 @@
 // cspell: words OPTCOM MEMBERKEY bareword tstr GRPENT GRPCHOICE
 // cspell: words optcom memberkey grpent grpchoice
 
-use cbork_cddl_parser::{self, cddl_test::Rule};
-
 mod common;
-use common::{group_elements::*, identifiers::*};
+use common::{group_elements::*, identifiers::*, Rule};
 
 #[test]
 /// Test if the `occur` rule passes properly.
diff --git a/rust/cbork-cddl-parser/tests/identifiers.rs b/rust/cbork-cddl-parser/tests/identifiers.rs
index 63a0c80f0..7bbb1597b 100644
--- a/rust/cbork-cddl-parser/tests/identifiers.rs
+++ b/rust/cbork-cddl-parser/tests/identifiers.rs
@@ -1,36 +1,42 @@
 // cspell: words aname groupsocket typesocket groupsocket
 
-use cbork_cddl_parser::{
-    self,
-    cddl_test::{CDDLTestParser, Parser, Rule},
-};
-
 mod common;
-use common::identifiers::*;
+use common::{identifiers::*, Rule};
 
-#[test]
 /// Check if the name components pass properly.
-fn check_name_characters() {
-    for x in ('\u{0}'..='\u{ff}').map(char::from) {
-        let test = format!("{x}");
-        let parse_start = CDDLTestParser::parse(Rule::NAME_START, &test);
-        let parse_end = CDDLTestParser::parse(Rule::NAME_END, &test);
+#[test]
+fn check_name_start_characters() {
+    let passes = ('\u{0}'..='\u{ff}')
+        .filter(|x| x.is_ascii_alphabetic() || matches!(x, '@' | '_' | '$'))
+        .map(String::from)
+        .collect::<Vec<_>>();
+    let fails = ('\u{0}'..='\u{ff}')
+        .filter(|x| !x.is_ascii_alphabetic() && !matches!(x, '@' | '_' | '$'))
+        .map(String::from)
+        .collect::<Vec<_>>();
 
-        if x.is_ascii_alphabetic() || matches!(x, '@' | '_' | '$') {
-            assert!(parse_start.is_ok());
-            assert!(parse_end.is_ok());
-        } else if x.is_ascii_digit() {
-            assert!(parse_start.is_err());
-            assert!(parse_end.is_ok());
-        } else {
-            assert!(parse_start.is_err());
-            assert!(parse_end.is_err());
-        }
-    }
+    common::check_tests_rule(Rule::NAME_START, &passes, &fails);
 }
 
+/// Check if the name components pass properly.
 #[test]
+fn check_name_end_characters() {
+    let passes = ('\u{0}'..='\u{ff}')
+        .filter(|x| x.is_ascii_alphabetic() || x.is_ascii_digit() || matches!(x, '@' | '_' | '$'))
+        .map(String::from)
+        .collect::<Vec<_>>();
+    let fails = ('\u{0}'..='\u{ff}')
+        .filter(|x| {
+            !x.is_ascii_alphabetic() && !x.is_ascii_digit() && !matches!(x, '@' | '_' | '$')
+        })
+        .map(String::from)
+        .collect::<Vec<_>>();
+
+    common::check_tests_rule(Rule::NAME_END, &passes, &fails);
+}
+
 /// Test if the `id` rule passes properly.
+#[test]
 fn check_id() {
     common::check_tests_rule(Rule::id_TEST, ID_PASSES, ID_FAILS);
 }
diff --git a/rust/cbork-cddl-parser/tests/literal_values.rs b/rust/cbork-cddl-parser/tests/literal_values.rs
index 727dc7fd3..21ad645ed 100644
--- a/rust/cbork-cddl-parser/tests/literal_values.rs
+++ b/rust/cbork-cddl-parser/tests/literal_values.rs
@@ -2,10 +2,8 @@
 
 use std::ops::Deref;
 
-use cbork_cddl_parser::{self, cddl_test::Rule};
-
 mod common;
-use common::{byte_sequences::*, literal_values::*, text_sequences::*};
+use common::{byte_sequences::*, literal_values::*, text_sequences::*, Rule};
 
 #[test]
 /// Test if the `uint` rule passes properly.
diff --git a/rust/cbork-cddl-parser/tests/rules.rs b/rust/cbork-cddl-parser/tests/rules.rs
index b33b9239e..eee42a14c 100644
--- a/rust/cbork-cddl-parser/tests/rules.rs
+++ b/rust/cbork-cddl-parser/tests/rules.rs
@@ -1,24 +1,19 @@
 // cspell: words GENERICARG bigfloat ASSIGNG GROUPNAME tstr genericarg GENERICARG
 // cspell: words assigng assignt ASSIGNT GENERICPARM genericparm
 
-use cbork_cddl_parser::{
-    self,
-    cddl_test::{CDDLTestParser, Parser, Rule},
-};
-
 mod common;
-use common::{rules::*, type_declarations::*};
+use common::{rules::*, type_declarations::*, Rule};
 
-#[test]
 /// Test if the `genericarg` rule passes properly.
 /// This uses a special rule in the Grammar to test `genericarg` exhaustively.
+#[test]
 fn check_genericarg() {
     common::check_tests_rule(Rule::genericarg_TEST, GENERICARG_PASSES, GENERICARG_FAILS);
 }
 
-#[test]
 /// Test if the `genericparm` rule passes properly.
 /// This uses a special rule in the Grammar to test `genericparm` exhaustively.
+#[test]
 fn check_genericparm() {
     common::check_tests_rule(
         Rule::genericparm_TEST,
@@ -27,61 +22,80 @@ fn check_genericparm() {
     );
 }
 
-#[test]
 /// Test if the `assigng` rule passes properly.
 /// This uses a special rule in the Grammar to test `assigng` exhaustively.
+#[test]
 fn check_assigng() {
     common::check_tests_rule(Rule::assigng_TEST, ASSIGNG_PASSES, ASSIGNG_FAILS);
 }
 
-#[test]
 /// Test if the `assignt` rule passes properly.
 /// This uses a special rule in the Grammar to test `assignt` exhaustively.
+#[test]
 fn check_assignt() {
     common::check_tests_rule(Rule::assignt_TEST, ASSIGNT_PASSES, ASSIGNT_FAILS);
 }
 
-#[test]
 /// Test if the `typename` rule passes properly.
 /// This uses a special rule in the Grammar to test `typename` exhaustively.
+#[test]
 fn check_typename() {
     common::check_tests_rule(Rule::typename_TEST, TYPENAME_PASSES, TYPENAME_FAILS);
 }
 
-#[test]
 /// Test if the `groupname` rule passes properly.
 /// This uses a special rule in the Grammar to test `groupname` exhaustively.
+#[test]
 fn check_groupname() {
     common::check_tests_rule(Rule::groupname_TEST, GROUPNAME_PASSES, GROUPNAME_FAILS);
 }
 
-#[test]
 /// Test if the `rule` rule passes properly for type variant.
+#[test]
 fn check_rule_type_composition() {
-    for (i, test_i) in [TYPENAME_PASSES, TYPENAME_FAILS]
+    let typename_iter = [TYPENAME_PASSES, TYPENAME_FAILS]
         .into_iter()
         .flatten()
-        .enumerate()
-    {
-        for (j, test_j) in [ASSIGNT_PASSES].into_iter().flatten().enumerate() {
-            for (k, test_k) in [TYPE_PASSES, TYPE_FAILS].into_iter().flatten().enumerate() {
-                let input = [test_i.to_owned(), test_j.to_owned(), test_k.to_owned()].join(" ");
-                let parse = CDDLTestParser::parse(Rule::rule_TEST, &input);
-                if (0..TYPENAME_PASSES.len()).contains(&i)
-                    && (0..ASSIGNT_PASSES.len()).contains(&j)
-                    && (0..TYPE_PASSES.len()).contains(&k)
-                {
-                    assert!(parse.is_ok());
-                } else {
-                    assert!(parse.is_err());
-                }
-            }
-        }
-    }
+        .enumerate();
+
+    let assign_iter = ASSIGNT_PASSES.iter();
+    let type_iter = [
+        TYPE_PASSES,
+        TYPE1_PASSES,
+        TYPE2_PASSES,
+        TYPE_FAILS,
+        TYPE1_FAILS,
+        TYPE2_FAILS,
+    ]
+    .into_iter()
+    .flatten()
+    .enumerate();
+
+    let rules_iter = typename_iter.zip(assign_iter).zip(type_iter).map(
+        |(((i, typename), assign), (k, r#type))| {
+            let is_passes = i < TYPENAME_PASSES.len()
+                && k < TYPE_PASSES.len() + TYPE1_PASSES.len() + TYPE2_PASSES.len();
+            let input = [typename.to_owned(), assign.to_owned(), r#type.to_owned()].join(" ");
+            (input, is_passes)
+        },
+    );
+
+    let passes = rules_iter
+        .clone()
+        .filter(|(_, is_passes)| *is_passes)
+        .map(|(input, _)| input)
+        .collect::<Vec<_>>();
+
+    let fails = rules_iter
+        .filter(|(_, is_passes)| !*is_passes)
+        .map(|(input, _)| input)
+        .collect::<Vec<_>>();
+
+    common::check_tests_rule(Rule::expr_TEST, &passes, &fails);
 }
 
-#[test]
 /// Test if the `rule` rule passes properly for group variant.
+#[test]
 fn check_rule_group() {
-    common::check_tests_rule(Rule::rule_TEST, RULE_GROUP_PASSES, RULE_GROUP_FAILS);
+    common::check_tests_rule(Rule::expr_TEST, RULE_GROUP_PASSES, RULE_GROUP_FAILS);
 }
diff --git a/rust/cbork-cddl-parser/tests/text_sequences.rs b/rust/cbork-cddl-parser/tests/text_sequences.rs
index 4c5270489..ac1d61178 100644
--- a/rust/cbork-cddl-parser/tests/text_sequences.rs
+++ b/rust/cbork-cddl-parser/tests/text_sequences.rs
@@ -1,7 +1,5 @@
-use cbork_cddl_parser::{self, cddl_test::Rule};
-
 mod common;
-use common::text_sequences::*;
+use common::{text_sequences::*, Rule};
 
 #[test]
 /// Test if the `S` rule passes properly.
diff --git a/rust/cbork-cddl-parser/tests/type_declarations.rs b/rust/cbork-cddl-parser/tests/type_declarations.rs
index 14f024e14..b62617d51 100644
--- a/rust/cbork-cddl-parser/tests/type_declarations.rs
+++ b/rust/cbork-cddl-parser/tests/type_declarations.rs
@@ -1,89 +1,104 @@
 // cspell: words CTLOP aname groupsocket typesocket RANGEOP tstr ctlop
 // cspell: words rangeop RANGEOP
 
-use cbork_cddl_parser::{
-    self,
-    cddl_test::{CDDLTestParser, Parser, Rule},
-};
-
 mod common;
-use common::type_declarations::*;
+use common::{type_declarations::*, Rule};
 
-#[test]
 /// Test if the `ctlop` rule passes properly.
 /// This uses a special rule in the Grammar to test `ctlop` exhaustively.
+#[test]
 fn check_ctlop() {
     common::check_tests_rule(Rule::ctlop_TEST, CTLOP_PASSES, CTLOP_FAILS);
 }
 
-#[test]
 /// Test if the `rangeop` rule passes properly.
 /// This uses a special rule in the Grammar to test `rangeop` exhaustively.
+#[test]
 fn check_rangeop() {
     common::check_tests_rule(Rule::rangeop_TEST, RANGEOP_PASSES, RANGEOP_FAILS);
 }
 
-#[test]
 /// Test if the `type2` rule passes properly.
 /// This uses a special rule in the Grammar to test `type2` exhaustively.
+#[test]
 fn check_type2() {
     common::check_tests_rule(Rule::type2_TEST, TYPE2_PASSES, TYPE2_FAILS);
 }
 
-#[test]
 /// Test if the `type1` rule passes properly.
 /// This uses a special rule in the Grammar to test `type1` exhaustively.
+#[test]
 fn check_type1() {
     common::check_tests_rule(Rule::type1_TEST, TYPE1_PASSES, TYPE1_FAILS);
 }
 
-#[test]
 /// Test if the `type1` rule passes properly based on composition of type2 test cases.
+#[test]
 fn check_type1_composition() {
-    let j_len = CTLOP_PASSES.len() + RANGEOP_PASSES.len();
-    for (i, test_i) in [TYPE2_PASSES, TYPE_FAILS].into_iter().flatten().enumerate() {
-        for (j, test_j) in [CTLOP_PASSES, RANGEOP_PASSES]
-            .into_iter()
-            .flatten()
-            .enumerate()
-        {
-            for (k, test_k) in [TYPE2_PASSES, TYPE_FAILS].into_iter().flatten().enumerate() {
-                let input = [test_i.to_owned(), test_j.to_owned(), test_k.to_owned()].join(" ");
-                let parse = CDDLTestParser::parse(Rule::type1_TEST, &input);
-                if (0..TYPE2_PASSES.len()).contains(&i)
-                    && (0..j_len).contains(&j)
-                    && (0..TYPE2_PASSES.len()).contains(&k)
-                {
-                    assert!(parse.is_ok());
-                } else {
-                    assert!(parse.is_err());
-                }
-            }
-        }
-    }
+    let separator_iter = [CTLOP_PASSES, RANGEOP_PASSES].into_iter().flatten();
+
+    let type_iter = [TYPE2_PASSES, TYPE_FAILS, TYPE1_FAILS, TYPE2_FAILS]
+        .into_iter()
+        .flatten()
+        .enumerate();
+
+    let rules_iter = type_iter.clone().zip(separator_iter).zip(type_iter).map(
+        |(((i, type_1), separator), (j, type_2))| {
+            let is_passed = i < TYPE2_PASSES.len() && j < TYPE2_PASSES.len();
+            let input = [type_1.to_owned(), separator.to_owned(), type_2.to_owned()].join(" ");
+            (input, is_passed)
+        },
+    );
+
+    let passes = rules_iter
+        .clone()
+        .filter(|(_, is_passes)| *is_passes)
+        .map(|(input, _)| input)
+        .collect::<Vec<_>>();
+
+    let fails = rules_iter
+        .filter(|(_, is_passes)| !*is_passes)
+        .map(|(input, _)| input)
+        .collect::<Vec<_>>();
+
+    common::check_tests_rule(Rule::type1_TEST, &passes, &fails);
 }
 
-#[test]
 /// Test if the `type` rule passes properly.
 /// This uses a special rule in the Grammar to test `type` exhaustively.
+#[test]
 fn check_type() {
     common::check_tests_rule(Rule::type_TEST, TYPE_PASSES, TYPE_FAILS);
 }
 
-#[test]
 /// Test if the `type` rule passes properly based on composition of type2 test cases.
+#[test]
 fn check_type_composition() {
     // type2 composition testing
-    for (i, test_i) in [TYPE2_PASSES, TYPE_FAILS].into_iter().flatten().enumerate() {
-        for (j, test_j) in [TYPE2_PASSES, TYPE_FAILS].into_iter().flatten().enumerate() {
+    let type_iter = [TYPE2_PASSES, TYPE_FAILS, TYPE1_FAILS, TYPE2_FAILS]
+        .into_iter()
+        .flatten()
+        .enumerate();
+
+    let rules_iter = type_iter
+        .clone()
+        .zip(type_iter)
+        .map(|((i, test_i), (j, test_j))| {
+            let is_passed = i < TYPE2_PASSES.len() && j < TYPE2_PASSES.len();
             let input = [test_i.to_owned(), "/", test_j.to_owned()].join(" ");
-            let parse = CDDLTestParser::parse(Rule::type_TEST, &input);
-
-            if (0..TYPE2_PASSES.len()).contains(&i) && (0..TYPE2_PASSES.len()).contains(&j) {
-                assert!(parse.is_ok());
-            } else {
-                assert!(parse.is_err());
-            }
-        }
-    }
+            (input, is_passed)
+        });
+
+    let passes = rules_iter
+        .clone()
+        .filter(|(_, is_passes)| *is_passes)
+        .map(|(input, _)| input)
+        .collect::<Vec<_>>();
+
+    let fails = rules_iter
+        .filter(|(_, is_passes)| !*is_passes)
+        .map(|(input, _)| input)
+        .collect::<Vec<_>>();
+
+    common::check_tests_rule(Rule::type_TEST, &passes, &fails);
 }