Skip to content

Commit

Permalink
feat(rust/cbork): Add a CDDL preprocessing step (#80)
Browse files Browse the repository at this point in the history
* move parsers into the separate module

* add validate_cddl pub function

* add processor module

* rename `rule` to `expr` and some simple expression processing functions

* wip

* wip

* refactor Ast

* wip

* remove unused deps

* move CDDLTestParser to the tests::common mod

* refactor character_sets tests

* cleanup tests/identifiers.rs

* refactor tests/rules.rs

* refactor tests/type_declaration

* rename rule_TEST to expr_TEST
  • Loading branch information
Mr-Leshiy authored Nov 26, 2024
1 parent d4dc27a commit 3910b60
Show file tree
Hide file tree
Showing 17 changed files with 370 additions and 395 deletions.
3 changes: 1 addition & 2 deletions rust/cbork-cddl-parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ repository.workspace = true
workspace = true

[dependencies]
derive_more = {version = "1.0.0", features = ["from","display"] }
pest = { version = "2.7.13", features = ["std", "pretty-print", "memchr", "const_prec_climber"] }
pest_derive = { version = "2.7.13", features = ["grammar-extras"] }
thiserror = "1.0.64"
anyhow = "1.0.89"
4 changes: 2 additions & 2 deletions rust/cbork-cddl-parser/src/grammar/cddl_test.pest
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
// cspell: words assigng genericparm genericarg rangeop ctlop
// cspell: words grpchoice grpent memberkey bareword optcom

/// Test Expression for the `rule` Rule.
rule_TEST = ${ SOI ~ rule ~ EOI }
/// Test Expression for the `expr` Rule.
expr_TEST = ${ SOI ~ expr ~ EOI }

/// Test Expression for the `typename` Rule.
typename_TEST = ${ SOI ~ typename ~ EOI }
Expand Down
4 changes: 2 additions & 2 deletions rust/cbork-cddl-parser/src/grammar/rfc_8610.pest
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@

cddl = ${
SOI
~ S ~ (rule ~ S)+
~ S ~ (expr ~ S)+
~ EOI
}

// -----------------------------------------------------------------------------
// Rules
rule = ${
expr = ${
(typename ~ genericparm? ~ S ~ assignt ~ S ~ type)
| (groupname ~ genericparm? ~ S ~ assigng ~ S ~ grpent)
}
Expand Down
161 changes: 14 additions & 147 deletions rust/cbork-cddl-parser/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,160 +1,27 @@
//! A parser for CDDL, utilized for parsing in accordance with RFC 8610.
#![allow(missing_docs)] // TODO(apskhem): Temporary, to bo removed in a subsequent PR
mod parser;
mod preprocessor;

use derive_more::{Display, From};
pub use pest::Parser;
use pest::{error::Error, iterators::Pairs};

pub mod rfc_8610 {
pub use pest::Parser;

#[derive(pest_derive::Parser)]
#[grammar = "grammar/rfc_8610.pest"]
pub struct RFC8610Parser;
}

pub mod rfc_9165 {
pub use pest::Parser;

#[derive(pest_derive::Parser)]
#[grammar = "grammar/rfc_8610.pest"]
#[grammar = "grammar/rfc_9165.pest"]
pub struct RFC8610Parser;
}

pub mod cddl {
pub use pest::Parser;

#[derive(pest_derive::Parser)]
#[grammar = "grammar/rfc_8610.pest"]
#[grammar = "grammar/rfc_9165.pest"]
#[grammar = "grammar/cddl_modules.pest"]
pub struct RFC8610Parser;
}

pub mod cddl_test {
pub use pest::Parser;

// Parser with DEBUG rules. These rules are only used in tests.
#[derive(pest_derive::Parser)]
#[grammar = "grammar/rfc_8610.pest"]
#[grammar = "grammar/rfc_9165.pest"]
#[grammar = "grammar/cddl_modules.pest"]
#[grammar = "grammar/cddl_test.pest"] // Ideally this would only be used in tests.
pub struct CDDLTestParser;
}

/// Represents different parser extensions for handling CDDL specifications.
/// Represents different grammar extensions for handling CDDL specifications.
pub enum Extension {
/// RFC8610 ONLY limited parser.
RFC8610Parser,
/// RFC8610 and RFC9165 limited parser.
RFC9165Parser,
/// RFC8610, RFC9165, and CDDL modules.
CDDLParser,
}

// CDDL Standard Postlude - read from an external file
pub const POSTLUDE: &str = include_str!("grammar/postlude.cddl");

/// Abstract Syntax Tree (AST) representing parsed CDDL syntax.
// TODO: this is temporary. need to add more pragmatic nodes
#[derive(Debug)]
pub enum AST<'a> {
/// Represents the AST for RFC 8610 CDDL rules.
RFC8610(Pairs<'a, rfc_8610::Rule>),
/// Represents the AST for RFC 9165 CDDL rules.
RFC9165(Pairs<'a, rfc_9165::Rule>),
/// Represents the AST for CDDL Modules rules.
CDDL(Pairs<'a, cddl::Rule>),
/// RFC8610 ONLY limited grammar.
RFC8610,
/// RFC8610 and RFC9165 limited grammar.
RFC9165,
/// RFC8610, RFC9165, and CDDL grammar.
CDDL,
}

/// Represents different types of errors related to different types of extension.
#[derive(Display, Debug)]
pub enum CDDLErrorType {
/// An error related to RFC 8610 extension.
RFC8610(Error<rfc_8610::Rule>),
/// An error related to RFC 9165 extension.
RFC9165(Error<rfc_9165::Rule>),
/// An error related to CDDL modules extension.
CDDL(Error<cddl::Rule>),
}

/// Represents an error that may occur during CDDL parsing.
#[derive(thiserror::Error, Debug, From)]
#[error("{0}")]
pub struct CDDLError(CDDLErrorType);

/// Parses and checks semantically a CDDL input string.
///
/// # Arguments
///
/// * `input` - A string containing the CDDL input to be parsed.
///
/// # Returns
///
/// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing
/// a boxed `CDDLError` indicating the parsing error.
/// Verifies semantically a CDDL input string.
///
/// # Errors
///
/// This function may return an error in the following cases:
///
/// - If there is an issue with parsing the CDDL input.
///
/// # Examples
///
/// ```rs
/// use cbork_cddl_parser::{parse_cddl, Extension};
/// use std:fs;
///
/// let mut input = fs::read_to_string("path/to/your/file.cddl").unwrap();
/// let result = parse_cddl(&mut input, &Extension::CDDLParser);
/// assert!(result.is_ok());
/// ```
pub fn parse_cddl<'a>(
input: &'a mut String, extension: &Extension,
) -> Result<AST<'a>, Box<CDDLError>> {
input.push_str("\n\n");
input.push_str(POSTLUDE);

let result = match extension {
Extension::RFC8610Parser => {
rfc_8610::RFC8610Parser::parse(rfc_8610::Rule::cddl, input)
.map(AST::RFC8610)
.map_err(CDDLErrorType::RFC8610)
},
Extension::RFC9165Parser => {
rfc_9165::RFC8610Parser::parse(rfc_9165::Rule::cddl, input)
.map(AST::RFC9165)
.map_err(CDDLErrorType::RFC9165)
},
Extension::CDDLParser => {
cddl::RFC8610Parser::parse(cddl::Rule::cddl, input)
.map(AST::CDDL)
.map_err(CDDLErrorType::CDDL)
},
};

result.map_err(|e| Box::new(CDDLError::from(e)))
}

#[cfg(test)]
mod tests {
use crate::*;

#[test]
fn it_works() {
let mut input = String::new();
let result = parse_cddl(&mut input, &Extension::CDDLParser);

match result {
Ok(c) => println!("{c:?}"),
Err(e) => {
println!("{e:?}");
println!("{e}");
},
}
}
pub fn validate_cddl(input: &mut String, extension: &Extension) -> anyhow::Result<()> {
let ast = parser::parse_cddl(input, extension)?;
let _ast = preprocessor::process_ast(ast)?;
Ok(())
}
88 changes: 88 additions & 0 deletions rust/cbork-cddl-parser/src/parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
//! A parser for CDDL using the [pest](https://github.com/pest-parser/pest).
//! Utilized for parsing in accordance with RFC-8610, RFC-9165.
use pest::{iterators::Pair, Parser};

use crate::Extension;

/// RFC-8610 parser.
#[allow(missing_docs)]
pub(crate) mod rfc_8610 {
/// A Pest parser for RFC-8610.
#[derive(pest_derive::Parser)]
#[grammar = "grammar/rfc_8610.pest"]
pub(crate) struct Parser;
}

/// RFC-9165 parser.
#[allow(missing_docs)]
pub(crate) mod rfc_9165 {
/// A Pest parser for RFC-9165.
#[derive(pest_derive::Parser)]
#[grammar = "grammar/rfc_8610.pest"]
#[grammar = "grammar/rfc_9165.pest"]
pub(crate) struct Parser;
}

/// Full CDDL syntax parser.
#[allow(missing_docs)]
pub(crate) mod cddl {
/// A Pest parser for a full CDDL syntax.
#[derive(pest_derive::Parser)]
#[grammar = "grammar/rfc_8610.pest"]
#[grammar = "grammar/rfc_9165.pest"]
#[grammar = "grammar/cddl_modules.pest"]
pub(crate) struct Parser;
}

/// CDDL Standard Postlude - read from an external file
const POSTLUDE: &str = include_str!("grammar/postlude.cddl");

/// PEST Abstract Syntax Tree (AST) representing parsed CDDL syntax.
#[derive(Debug)]
pub(crate) enum Ast<'a> {
/// Represents the AST for RFC-8610 CDDL rules.
Rfc8610(Vec<Pair<'a, rfc_8610::Rule>>),
/// Represents the AST for RFC-9165 CDDL rules.
Rfc9165(Vec<Pair<'a, rfc_9165::Rule>>),
/// Represents the AST for CDDL Modules rules.
Cddl(Vec<Pair<'a, cddl::Rule>>),
}

/// Parses and checks semantically a CDDL input string.
///
/// # Arguments
///
/// * `input` - A string containing the CDDL input to be parsed.
///
/// # Returns
///
/// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing
/// a boxed `CDDLError` indicating the parsing error.
///
/// # Errors
///
/// This function may return an error in the following cases:
///
/// - If there is an issue with parsing the CDDL input.
pub(crate) fn parse_cddl<'a>(
input: &'a mut String, extension: &Extension,
) -> anyhow::Result<Ast<'a>> {
input.push_str("\n\n");
input.push_str(POSTLUDE);

let ast = match extension {
Extension::RFC8610 => {
rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input)
.map(|p| Ast::Rfc8610(p.collect()))?
},
Extension::RFC9165 => {
rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input)
.map(|p| Ast::Rfc9165(p.collect()))?
},
Extension::CDDL => {
cddl::Parser::parse(cddl::Rule::cddl, input).map(|p| Ast::Cddl(p.collect()))?
},
};
Ok(ast)
}
44 changes: 44 additions & 0 deletions rust/cbork-cddl-parser/src/preprocessor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//! A CDDL AST preprocessor.
//!
//! - Validates the root rule of the AST to be a `cddl` rule.
//! - Filters out all rules that are not `expr` rules.
//! - (TODO) Resolve #include and #import directives, by just adding the imported rules
//! into the final expression list
use anyhow::{anyhow, ensure};
use pest::{iterators::Pair, RuleType};

use crate::parser::{cddl, rfc_8610, rfc_9165, Ast};

/// Processes the AST.
pub(crate) fn process_ast(ast: Ast) -> anyhow::Result<Ast> {
match ast {
Ast::Rfc8610(ast) => {
process_root_and_filter(ast, rfc_8610::Rule::cddl, rfc_8610::Rule::expr)
.map(Ast::Rfc8610)
},
Ast::Rfc9165(ast) => {
process_root_and_filter(ast, rfc_9165::Rule::cddl, rfc_9165::Rule::expr)
.map(Ast::Rfc9165)
},
Ast::Cddl(ast) => {
process_root_and_filter(ast, cddl::Rule::cddl, cddl::Rule::expr).map(Ast::Cddl)
},
}
}

/// Process the root rule of the AST and filter out all non `expected_rule` rules.
fn process_root_and_filter<R: RuleType>(
ast: Vec<Pair<'_, R>>, root_rule: R, expected_rule: R,
) -> anyhow::Result<Vec<Pair<'_, R>>> {
let mut ast_iter = ast.into_iter();
let ast_root = ast_iter.next().ok_or(anyhow!("Empty AST."))?;
ensure!(
ast_root.as_rule() == root_rule && ast_iter.next().is_none(),
"AST must have only one root rule, which must be a `{root_rule:?}` rule."
);
Ok(ast_root
.into_inner()
.filter(|pair| pair.as_rule() == expected_rule)
.collect())
}
4 changes: 1 addition & 3 deletions rust/cbork-cddl-parser/tests/byte_sequences.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
// cspell: words hexpair rstuvw abcdefghijklmnopqrstuvwyz rstuvw Xhhb Bhcm

use cbork_cddl_parser::cddl_test::Rule;

mod common;
use common::byte_sequences::*;
use common::{byte_sequences::*, Rule};

#[test]
/// Test if the `HEX_PAIR` rule passes properly.
Expand Down
6 changes: 3 additions & 3 deletions rust/cbork-cddl-parser/tests/cddl.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{ffi::OsStr, fs, io::Result};

use cbork_cddl_parser::{parse_cddl, Extension};
use cbork_cddl_parser::{validate_cddl, Extension};

#[test]
/// # Panics
Expand Down Expand Up @@ -32,7 +32,7 @@ fn parse_cddl_files() {
for file_path in valid_file_paths {
let mut content = fs::read_to_string(file_path).unwrap();

if let Err(e) = parse_cddl(&mut content, &Extension::CDDLParser) {
if let Err(e) = validate_cddl(&mut content, &Extension::CDDL) {
err_messages.push(format!("{}) {file_path:?} {e}", err_messages.len() + 1));
}
}
Expand All @@ -41,7 +41,7 @@ fn parse_cddl_files() {
for file_path in invalid_file_paths {
let mut content = fs::read_to_string(file_path).unwrap();

let result = parse_cddl(&mut content, &Extension::CDDLParser);
let result = validate_cddl(&mut content, &Extension::CDDL);

assert!(result.is_err(), "{:?} is expected to fail", &file_path);
}
Expand Down
Loading

0 comments on commit 3910b60

Please sign in to comment.