-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(rust/cbork): Add a CDDL preprocessing step (#80)
* move parsers into the separate module * add validate_cddl pub function * add processor module * rename `rule` to `expr` and some simple expression processing functions * wip * wip * refactor Ast * wip * remove unused deps * move CDDLTestParser to the tests::common mod * refactor character_sets tests * cleanup tests/identifiers.rs * refactor tests/rules.rs * refactor tests/type_declaration * rename rule_TEST to expr_TEST
- Loading branch information
Showing
17 changed files
with
370 additions
and
395 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,160 +1,27 @@ | ||
//! A parser for CDDL, utilized for parsing in accordance with RFC 8610. | ||
#![allow(missing_docs)] // TODO(apskhem): Temporary, to bo removed in a subsequent PR | ||
mod parser; | ||
mod preprocessor; | ||
|
||
use derive_more::{Display, From}; | ||
pub use pest::Parser; | ||
use pest::{error::Error, iterators::Pairs}; | ||
|
||
pub mod rfc_8610 { | ||
pub use pest::Parser; | ||
|
||
#[derive(pest_derive::Parser)] | ||
#[grammar = "grammar/rfc_8610.pest"] | ||
pub struct RFC8610Parser; | ||
} | ||
|
||
pub mod rfc_9165 { | ||
pub use pest::Parser; | ||
|
||
#[derive(pest_derive::Parser)] | ||
#[grammar = "grammar/rfc_8610.pest"] | ||
#[grammar = "grammar/rfc_9165.pest"] | ||
pub struct RFC8610Parser; | ||
} | ||
|
||
pub mod cddl { | ||
pub use pest::Parser; | ||
|
||
#[derive(pest_derive::Parser)] | ||
#[grammar = "grammar/rfc_8610.pest"] | ||
#[grammar = "grammar/rfc_9165.pest"] | ||
#[grammar = "grammar/cddl_modules.pest"] | ||
pub struct RFC8610Parser; | ||
} | ||
|
||
pub mod cddl_test { | ||
pub use pest::Parser; | ||
|
||
// Parser with DEBUG rules. These rules are only used in tests. | ||
#[derive(pest_derive::Parser)] | ||
#[grammar = "grammar/rfc_8610.pest"] | ||
#[grammar = "grammar/rfc_9165.pest"] | ||
#[grammar = "grammar/cddl_modules.pest"] | ||
#[grammar = "grammar/cddl_test.pest"] // Ideally this would only be used in tests. | ||
pub struct CDDLTestParser; | ||
} | ||
|
||
/// Represents different parser extensions for handling CDDL specifications. | ||
/// Represents different grammar extensions for handling CDDL specifications. | ||
pub enum Extension { | ||
/// RFC8610 ONLY limited parser. | ||
RFC8610Parser, | ||
/// RFC8610 and RFC9165 limited parser. | ||
RFC9165Parser, | ||
/// RFC8610, RFC9165, and CDDL modules. | ||
CDDLParser, | ||
} | ||
|
||
// CDDL Standard Postlude - read from an external file | ||
pub const POSTLUDE: &str = include_str!("grammar/postlude.cddl"); | ||
|
||
/// Abstract Syntax Tree (AST) representing parsed CDDL syntax. | ||
// TODO: this is temporary. need to add more pragmatic nodes | ||
#[derive(Debug)] | ||
pub enum AST<'a> { | ||
/// Represents the AST for RFC 8610 CDDL rules. | ||
RFC8610(Pairs<'a, rfc_8610::Rule>), | ||
/// Represents the AST for RFC 9165 CDDL rules. | ||
RFC9165(Pairs<'a, rfc_9165::Rule>), | ||
/// Represents the AST for CDDL Modules rules. | ||
CDDL(Pairs<'a, cddl::Rule>), | ||
/// RFC8610 ONLY limited grammar. | ||
RFC8610, | ||
/// RFC8610 and RFC9165 limited grammar. | ||
RFC9165, | ||
/// RFC8610, RFC9165, and CDDL grammar. | ||
CDDL, | ||
} | ||
|
||
/// Represents different types of errors related to different types of extension. | ||
#[derive(Display, Debug)] | ||
pub enum CDDLErrorType { | ||
/// An error related to RFC 8610 extension. | ||
RFC8610(Error<rfc_8610::Rule>), | ||
/// An error related to RFC 9165 extension. | ||
RFC9165(Error<rfc_9165::Rule>), | ||
/// An error related to CDDL modules extension. | ||
CDDL(Error<cddl::Rule>), | ||
} | ||
|
||
/// Represents an error that may occur during CDDL parsing. | ||
#[derive(thiserror::Error, Debug, From)] | ||
#[error("{0}")] | ||
pub struct CDDLError(CDDLErrorType); | ||
|
||
/// Parses and checks semantically a CDDL input string. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `input` - A string containing the CDDL input to be parsed. | ||
/// | ||
/// # Returns | ||
/// | ||
/// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing | ||
/// a boxed `CDDLError` indicating the parsing error. | ||
/// Verifies semantically a CDDL input string. | ||
/// | ||
/// # Errors | ||
/// | ||
/// This function may return an error in the following cases: | ||
/// | ||
/// - If there is an issue with parsing the CDDL input. | ||
/// | ||
/// # Examples | ||
/// | ||
/// ```rs | ||
/// use cbork_cddl_parser::{parse_cddl, Extension}; | ||
/// use std:fs; | ||
/// | ||
/// let mut input = fs::read_to_string("path/to/your/file.cddl").unwrap(); | ||
/// let result = parse_cddl(&mut input, &Extension::CDDLParser); | ||
/// assert!(result.is_ok()); | ||
/// ``` | ||
pub fn parse_cddl<'a>( | ||
input: &'a mut String, extension: &Extension, | ||
) -> Result<AST<'a>, Box<CDDLError>> { | ||
input.push_str("\n\n"); | ||
input.push_str(POSTLUDE); | ||
|
||
let result = match extension { | ||
Extension::RFC8610Parser => { | ||
rfc_8610::RFC8610Parser::parse(rfc_8610::Rule::cddl, input) | ||
.map(AST::RFC8610) | ||
.map_err(CDDLErrorType::RFC8610) | ||
}, | ||
Extension::RFC9165Parser => { | ||
rfc_9165::RFC8610Parser::parse(rfc_9165::Rule::cddl, input) | ||
.map(AST::RFC9165) | ||
.map_err(CDDLErrorType::RFC9165) | ||
}, | ||
Extension::CDDLParser => { | ||
cddl::RFC8610Parser::parse(cddl::Rule::cddl, input) | ||
.map(AST::CDDL) | ||
.map_err(CDDLErrorType::CDDL) | ||
}, | ||
}; | ||
|
||
result.map_err(|e| Box::new(CDDLError::from(e))) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::*; | ||
|
||
#[test] | ||
fn it_works() { | ||
let mut input = String::new(); | ||
let result = parse_cddl(&mut input, &Extension::CDDLParser); | ||
|
||
match result { | ||
Ok(c) => println!("{c:?}"), | ||
Err(e) => { | ||
println!("{e:?}"); | ||
println!("{e}"); | ||
}, | ||
} | ||
} | ||
pub fn validate_cddl(input: &mut String, extension: &Extension) -> anyhow::Result<()> { | ||
let ast = parser::parse_cddl(input, extension)?; | ||
let _ast = preprocessor::process_ast(ast)?; | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
//! A parser for CDDL using the [pest](https://github.com/pest-parser/pest). | ||
//! Utilized for parsing in accordance with RFC-8610, RFC-9165. | ||
use pest::{iterators::Pair, Parser}; | ||
|
||
use crate::Extension; | ||
|
||
/// RFC-8610 parser. | ||
#[allow(missing_docs)] | ||
pub(crate) mod rfc_8610 { | ||
/// A Pest parser for RFC-8610. | ||
#[derive(pest_derive::Parser)] | ||
#[grammar = "grammar/rfc_8610.pest"] | ||
pub(crate) struct Parser; | ||
} | ||
|
||
/// RFC-9165 parser. | ||
#[allow(missing_docs)] | ||
pub(crate) mod rfc_9165 { | ||
/// A Pest parser for RFC-9165. | ||
#[derive(pest_derive::Parser)] | ||
#[grammar = "grammar/rfc_8610.pest"] | ||
#[grammar = "grammar/rfc_9165.pest"] | ||
pub(crate) struct Parser; | ||
} | ||
|
||
/// Full CDDL syntax parser. | ||
#[allow(missing_docs)] | ||
pub(crate) mod cddl { | ||
/// A Pest parser for a full CDDL syntax. | ||
#[derive(pest_derive::Parser)] | ||
#[grammar = "grammar/rfc_8610.pest"] | ||
#[grammar = "grammar/rfc_9165.pest"] | ||
#[grammar = "grammar/cddl_modules.pest"] | ||
pub(crate) struct Parser; | ||
} | ||
|
||
/// CDDL Standard Postlude - read from an external file | ||
const POSTLUDE: &str = include_str!("grammar/postlude.cddl"); | ||
|
||
/// PEST Abstract Syntax Tree (AST) representing parsed CDDL syntax. | ||
#[derive(Debug)] | ||
pub(crate) enum Ast<'a> { | ||
/// Represents the AST for RFC-8610 CDDL rules. | ||
Rfc8610(Vec<Pair<'a, rfc_8610::Rule>>), | ||
/// Represents the AST for RFC-9165 CDDL rules. | ||
Rfc9165(Vec<Pair<'a, rfc_9165::Rule>>), | ||
/// Represents the AST for CDDL Modules rules. | ||
Cddl(Vec<Pair<'a, cddl::Rule>>), | ||
} | ||
|
||
/// Parses and checks semantically a CDDL input string. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `input` - A string containing the CDDL input to be parsed. | ||
/// | ||
/// # Returns | ||
/// | ||
/// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing | ||
/// a boxed `CDDLError` indicating the parsing error. | ||
/// | ||
/// # Errors | ||
/// | ||
/// This function may return an error in the following cases: | ||
/// | ||
/// - If there is an issue with parsing the CDDL input. | ||
pub(crate) fn parse_cddl<'a>( | ||
input: &'a mut String, extension: &Extension, | ||
) -> anyhow::Result<Ast<'a>> { | ||
input.push_str("\n\n"); | ||
input.push_str(POSTLUDE); | ||
|
||
let ast = match extension { | ||
Extension::RFC8610 => { | ||
rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input) | ||
.map(|p| Ast::Rfc8610(p.collect()))? | ||
}, | ||
Extension::RFC9165 => { | ||
rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input) | ||
.map(|p| Ast::Rfc9165(p.collect()))? | ||
}, | ||
Extension::CDDL => { | ||
cddl::Parser::parse(cddl::Rule::cddl, input).map(|p| Ast::Cddl(p.collect()))? | ||
}, | ||
}; | ||
Ok(ast) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
//! A CDDL AST preprocessor. | ||
//! | ||
//! - Validates the root rule of the AST to be a `cddl` rule. | ||
//! - Filters out all rules that are not `expr` rules. | ||
//! - (TODO) Resolve #include and #import directives, by just adding the imported rules | ||
//! into the final expression list | ||
use anyhow::{anyhow, ensure}; | ||
use pest::{iterators::Pair, RuleType}; | ||
|
||
use crate::parser::{cddl, rfc_8610, rfc_9165, Ast}; | ||
|
||
/// Processes the AST. | ||
pub(crate) fn process_ast(ast: Ast) -> anyhow::Result<Ast> { | ||
match ast { | ||
Ast::Rfc8610(ast) => { | ||
process_root_and_filter(ast, rfc_8610::Rule::cddl, rfc_8610::Rule::expr) | ||
.map(Ast::Rfc8610) | ||
}, | ||
Ast::Rfc9165(ast) => { | ||
process_root_and_filter(ast, rfc_9165::Rule::cddl, rfc_9165::Rule::expr) | ||
.map(Ast::Rfc9165) | ||
}, | ||
Ast::Cddl(ast) => { | ||
process_root_and_filter(ast, cddl::Rule::cddl, cddl::Rule::expr).map(Ast::Cddl) | ||
}, | ||
} | ||
} | ||
|
||
/// Process the root rule of the AST and filter out all non `expected_rule` rules. | ||
fn process_root_and_filter<R: RuleType>( | ||
ast: Vec<Pair<'_, R>>, root_rule: R, expected_rule: R, | ||
) -> anyhow::Result<Vec<Pair<'_, R>>> { | ||
let mut ast_iter = ast.into_iter(); | ||
let ast_root = ast_iter.next().ok_or(anyhow!("Empty AST."))?; | ||
ensure!( | ||
ast_root.as_rule() == root_rule && ast_iter.next().is_none(), | ||
"AST must have only one root rule, which must be a `{root_rule:?}` rule." | ||
); | ||
Ok(ast_root | ||
.into_inner() | ||
.filter(|pair| pair.as_rule() == expected_rule) | ||
.collect()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.