From 973f760051d2e31a3beb3d10226cd3fcbe215a21 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sat, 24 Aug 2024 09:16:15 +0300 Subject: [PATCH 01/73] init mdast to markdown --- src/lib.rs | 6 ++ src/to_markdown.rs | 228 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 234 insertions(+) create mode 100644 src/to_markdown.rs diff --git a/src/lib.rs b/src/lib.rs index f1266e81..5ee47932 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -42,6 +42,7 @@ mod resolve; mod state; mod subtokenize; mod to_html; +mod to_markdown; mod to_mdast; mod tokenizer; mod util; @@ -50,6 +51,7 @@ pub mod mdast; // To do: externalize? pub mod message; // To do: externalize. pub mod unist; // To do: externalize. +use mdast::Node; #[doc(hidden)] pub use util::identifier::{id_cont, id_start}; @@ -159,3 +161,7 @@ pub fn to_mdast(value: &str, options: &ParseOptions) -> Result String { + to_markdown::serialize(tree) +} diff --git a/src/to_markdown.rs b/src/to_markdown.rs new file mode 100644 index 00000000..790cf04d --- /dev/null +++ b/src/to_markdown.rs @@ -0,0 +1,228 @@ +use crate::mdast::{Node, Paragraph, Root}; +use alloc::{string::String, vec::Vec}; + +#[allow(dead_code)] +pub enum ConstructName { + /// Whole autolink. + /// Example: + /// > `` and `` + Autolink, + + /// Whole block quote. + /// Example: + /// > `> a` + /// > `b` + Blockquote, + + /// Whole code (indented). + /// Example: + /// > ` console.log(1)` + CodeIndented, + + /// Whole code (fenced). + /// Example: + /// > ` ```js` + /// > `console.log(1)` + /// > ` ```` + CodeFenced, + + /// Code (fenced) language, when fenced with grave accents. + /// Example: + /// > ` ```js` + CodeFencedLangGraveAccent, + + /// Code (fenced) language, when fenced with tildes. + /// Example: + /// > ` ~~~js` + CodeFencedLangTilde, + + /// Code (fenced) meta string, when fenced with grave accents. + /// Example: + /// > ` ```js eval` + CodeFencedMetaGraveAccent, + + /// Code (fenced) meta string, when fenced with tildes. + /// Example: + /// > ` ~~~js eval` + CodeFencedMetaTilde, + + /// Whole definition. + /// Example: + /// > `[a]: b "c"` + Definition, + + /// Destination (literal) (occurs in definition, image, link). + /// Example: + /// > `[a]: "c"` + /// > `a ![b]( "d") e` + DestinationLiteral, + + /// Destination (raw) (occurs in definition, image, link). + /// Example: + /// > `[a]: b "c"` + /// > `a ![b](c "d") e` + DestinationRaw, + + /// Emphasis. + /// Example: + /// > `*a*` + Emphasis, + + /// Whole heading (atx). + /// Example: + /// > `# alpha` + HeadingAtx, + + /// Whole heading (setext). + /// Example: + /// > `alpha` + /// > `=====` + HeadingSetext, + + /// Whole image. + /// Example: + /// > `![a](b)` + Image, + + /// Whole image reference. + /// Example: + /// > `![a]` + ImageReference, + + /// Label (occurs in definitions, image reference, image, link reference, link). + /// Example: + /// > `[a]: b "c"` + /// > `a [b] c` + /// > `a ![b][c] d` + /// > `a [b](c) d` + Label, + + /// Whole link. + /// Example: + /// > `[a](b)` + Link, + + /// Whole link reference. + /// Example: + /// > `[a]` + LinkReference, + + /// List. + /// Example: + /// > `* a` + /// > `1. b` + List, + + /// List item. + /// Example: + /// > `* a` + /// > `1. b` + ListItem, + + /// Paragraph. + /// Example: + /// > `a b` + /// > `c.` + Paragraph, + + /// Phrasing (occurs in headings, paragraphs, etc). + /// Example: + /// > `a` + Phrasing, + + /// Reference (occurs in image, link). + /// Example: + /// > `[a][]` + Reference, + + /// Strong. + /// Example: + /// > `**a**` + Strong, + + /// Title using single quotes (occurs in definition, image, link). + /// Example: + /// > `[a](b 'c')` + TitleApostrophe, + + /// Title using double quotes (occurs in definition, image, link). + /// Example: + /// > `[a](b "c")` + TitleQuote, +} + +pub trait PhrasingParent { + fn children(&self) -> &Vec; +} + +pub trait FlowParent { + fn children(&self) -> &Vec; +} + +impl FlowParent for Root { + fn children(&self) -> &Vec { + &self.children + } +} + +impl PhrasingParent for Paragraph { + fn children(&self) -> &Vec { + &self.children + } +} + +struct State { + stack: Vec, +} + +impl State { + pub fn new() -> Self { + State { stack: Vec::new() } + } + + fn enter(&mut self, name: ConstructName) { + self.stack.push(name); + } + + fn exit(&mut self) { + self.stack.pop(); + } + + pub fn handle(mut self, node: Node) -> String { + match node { + Node::Root(root) => self.handle_root(root), + Node::Paragraph(paragarph) => self.handle_paragraph(paragarph), + _ => panic!("Not handled yet"), + } + } + + fn handle_root(&mut self, node: Root) -> String { + self.container_flow(node) + } + + fn handle_paragraph(&mut self, node: Paragraph) -> String { + self.enter(ConstructName::Paragraph); + + self.enter(ConstructName::Phrasing); + let value = self.container_phrasing(node); + // exit phrasing + self.exit(); + // exit paragarph + self.exit(); + value + } + + fn container_phrasing(&self, _parent: T) -> String { + String::new() + } + + fn container_flow(&self, _parent: T) -> String { + String::new() + } +} + +pub fn serialize(tree: Node) -> String { + let state = State::new(); + let result = state.handle(tree); + result +} From 691bb8732458a05f53c7aac852e0e1008aed7f5b Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sat, 24 Aug 2024 16:30:16 +0300 Subject: [PATCH 02/73] Handle texts in a simple way --- src/to_markdown.rs | 67 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 13 deletions(-) diff --git a/src/to_markdown.rs b/src/to_markdown.rs index 790cf04d..13a4d1b0 100644 --- a/src/to_markdown.rs +++ b/src/to_markdown.rs @@ -1,4 +1,4 @@ -use crate::mdast::{Node, Paragraph, Root}; +use crate::mdast::{Node, Paragraph, Root, Text}; use alloc::{string::String, vec::Vec}; #[allow(dead_code)] @@ -152,22 +152,22 @@ pub enum ConstructName { } pub trait PhrasingParent { - fn children(&self) -> &Vec; + fn children(self) -> Vec; } pub trait FlowParent { - fn children(&self) -> &Vec; + fn children(self) -> Vec; } impl FlowParent for Root { - fn children(&self) -> &Vec { - &self.children + fn children(self) -> Vec { + self.children } } impl PhrasingParent for Paragraph { - fn children(&self) -> &Vec { - &self.children + fn children(self) -> Vec { + self.children } } @@ -188,10 +188,11 @@ impl State { self.stack.pop(); } - pub fn handle(mut self, node: Node) -> String { + pub fn handle(&mut self, node: Node) -> String { match node { Node::Root(root) => self.handle_root(root), - Node::Paragraph(paragarph) => self.handle_paragraph(paragarph), + Node::Paragraph(paragraph) => self.handle_paragraph(paragraph), + Node::Text(text) => self.handle_text(text), _ => panic!("Not handled yet"), } } @@ -212,17 +213,57 @@ impl State { value } - fn container_phrasing(&self, _parent: T) -> String { - String::new() + fn container_phrasing(&mut self, parent: T) -> String { + let mut results = Vec::new(); + + for (_, child) in parent.children().into_iter().enumerate() { + results.push(self.handle(child)); + } + + results.into_iter().collect() } - fn container_flow(&self, _parent: T) -> String { + fn container_flow(&mut self, _parent: T) -> String { String::new() } + + fn handle_text(&self, text: Text) -> String { + self.safe(text.value) + } + + fn safe(&self, value: String) -> String { + value + } } pub fn serialize(tree: Node) -> String { - let state = State::new(); + let mut state = State::new(); let result = state.handle(tree); result } + +#[cfg(test)] +mod init_tests { + use super::*; + use alloc::{string::String, vec}; + + use crate::mdast::{Node, Paragraph, Text}; + + #[test] + fn it_works_for_simple_text() { + let text_a = Node::Text(Text { + value: String::from("a"), + position: None, + }); + let text_b = Node::Text(Text { + value: String::from("b"), + position: None, + }); + let paragraph = Node::Paragraph(Paragraph { + children: vec![text_a, text_b], + position: None, + }); + let actual = serialize(paragraph); + assert_eq!(actual, String::from("ab")); + } +} From b2a2ed3ec78866d400dfe491332f22093f261c73 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Tue, 27 Aug 2024 12:24:42 +0300 Subject: [PATCH 03/73] Add container phrasing --- src/to_markdown.rs | 104 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 84 insertions(+), 20 deletions(-) diff --git a/src/to_markdown.rs b/src/to_markdown.rs index 13a4d1b0..7d35c585 100644 --- a/src/to_markdown.rs +++ b/src/to_markdown.rs @@ -1,4 +1,4 @@ -use crate::mdast::{Node, Paragraph, Root, Text}; +use crate::mdast::{Node, Paragraph, Root, Strong, Text}; use alloc::{string::String, vec::Vec}; #[allow(dead_code)] @@ -151,6 +151,17 @@ pub enum ConstructName { TitleQuote, } +pub trait PeekNode { + // @todo make it take a reference to the state options + fn handle_peek(&self) -> String; +} + +impl PeekNode for Strong { + fn handle_peek(&self) -> String { + "*".into() + } +} + pub trait PhrasingParent { fn children(self) -> Vec; } @@ -173,11 +184,26 @@ impl PhrasingParent for Paragraph { struct State { stack: Vec, + index_stack: Vec, +} + +struct Info<'a> { + before: &'a str, + after: &'a str, +} + +impl<'a> Info<'a> { + pub fn new(before: &'a str, after: &'a str) -> Self { + Info { before, after } + } } impl State { pub fn new() -> Self { - State { stack: Vec::new() } + State { + stack: Vec::new(), + index_stack: Vec::new(), + } } fn enter(&mut self, name: ConstructName) { @@ -188,24 +214,24 @@ impl State { self.stack.pop(); } - pub fn handle(&mut self, node: Node) -> String { + pub fn handle(&mut self, node: Node, info: Info) -> String { match node { - Node::Root(root) => self.handle_root(root), - Node::Paragraph(paragraph) => self.handle_paragraph(paragraph), - Node::Text(text) => self.handle_text(text), + Node::Root(root) => self.handle_root(root, info), + Node::Paragraph(paragraph) => self.handle_paragraph(paragraph, info), + Node::Text(text) => self.handle_text(text, info), _ => panic!("Not handled yet"), } } - fn handle_root(&mut self, node: Root) -> String { - self.container_flow(node) + fn handle_root(&mut self, node: Root, info: Info) -> String { + self.container_flow(node, info) } - fn handle_paragraph(&mut self, node: Paragraph) -> String { + fn handle_paragraph(&mut self, node: Paragraph, info: Info) -> String { self.enter(ConstructName::Paragraph); self.enter(ConstructName::Phrasing); - let value = self.container_phrasing(node); + let value = self.container_phrasing(node, info); // exit phrasing self.exit(); // exit paragarph @@ -213,22 +239,60 @@ impl State { value } - fn container_phrasing(&mut self, parent: T) -> String { - let mut results = Vec::new(); + fn handle_text(&self, text: Text, _info: Info) -> String { + self.safe(text.value) + } - for (_, child) in parent.children().into_iter().enumerate() { - results.push(self.handle(child)); + fn container_phrasing(&mut self, parent: T, info: Info) -> String { + let mut results: Vec = Vec::new(); + + let mut children_iter = parent.children().into_iter().peekable(); + let mut index = 0; + // SAFETY : -1 is used to mark the absense of children, we make sure to never use this as + // an index before checking the presense of a child. + self.index_stack.push(-1); + + let index_stack_size = self.index_stack.len(); + while let Some(child) = children_iter.next() { + self.index_stack[index_stack_size - 1] = index; + + let mut after: String = "".into(); + if let Some(child_node) = children_iter.peek() { + after = match self.determine_first_char(child_node) { + Some(after_char) => after_char, + None => self + .handle(child_node.clone(), Info::new("", "")) + .chars() + .next() + .map(|c| c.into()) + .unwrap_or_default(), + }; + } + + if let Some(result) = results.last() { + results.push(self.handle( + child, + Info::new(&result[result.len() - 1..], after.as_ref()), + )); + } else { + results.push(self.handle(child, Info::new(info.before, after.as_ref()))); + } + + index += 1; } - + self.index_stack.pop(); results.into_iter().collect() } - fn container_flow(&mut self, _parent: T) -> String { - String::new() + fn determine_first_char(&self, node: &Node) -> Option { + match node { + Node::Strong(strong) => Some(strong.handle_peek()), + _ => None, + } } - fn handle_text(&self, text: Text) -> String { - self.safe(text.value) + fn container_flow(&mut self, _parent: T, _info: Info) -> String { + String::new() } fn safe(&self, value: String) -> String { @@ -238,7 +302,7 @@ impl State { pub fn serialize(tree: Node) -> String { let mut state = State::new(); - let result = state.handle(tree); + let result = state.handle(tree, Info::new("\n".into(), "\n".into())); result } From 25ee1b66ab5d12831743d3feb826517682c3e189 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Tue, 27 Aug 2024 12:25:57 +0300 Subject: [PATCH 04/73] Fix minor typo --- src/to_markdown.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/to_markdown.rs b/src/to_markdown.rs index 7d35c585..aa4b73b4 100644 --- a/src/to_markdown.rs +++ b/src/to_markdown.rs @@ -249,7 +249,7 @@ impl State { let mut children_iter = parent.children().into_iter().peekable(); let mut index = 0; // SAFETY : -1 is used to mark the absense of children, we make sure to never use this as - // an index before checking the presense of a child. + // an index before checking the presence of a child. self.index_stack.push(-1); let index_stack_size = self.index_stack.len(); From 4ff2d02f814ddff594fac86c330c7d01dfb1f004 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Tue, 27 Aug 2024 12:51:10 +0300 Subject: [PATCH 05/73] Change comment --- src/to_markdown.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/to_markdown.rs b/src/to_markdown.rs index aa4b73b4..3d979a2d 100644 --- a/src/to_markdown.rs +++ b/src/to_markdown.rs @@ -248,8 +248,8 @@ impl State { let mut children_iter = parent.children().into_iter().peekable(); let mut index = 0; - // SAFETY : -1 is used to mark the absense of children, we make sure to never use this as - // an index before checking the presence of a child. + // SAFETY : -1 is used to mark the absense of children. + // We don't use this to index into any child. self.index_stack.push(-1); let index_stack_size = self.index_stack.len(); From 06e89f57decdadc30b964c1727fbf92ec84e6181 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Tue, 27 Aug 2024 15:07:46 +0300 Subject: [PATCH 06/73] Update comment --- src/to_markdown.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/to_markdown.rs b/src/to_markdown.rs index 3d979a2d..83be78b4 100644 --- a/src/to_markdown.rs +++ b/src/to_markdown.rs @@ -249,7 +249,7 @@ impl State { let mut children_iter = parent.children().into_iter().peekable(); let mut index = 0; // SAFETY : -1 is used to mark the absense of children. - // We don't use this to index into any child. + // We don't use index_stack values to index into any child. self.index_stack.push(-1); let index_stack_size = self.index_stack.len(); From 78e6529fe7ed2cf910cfb225adaff252f6223391 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 28 Aug 2024 11:39:57 +0300 Subject: [PATCH 07/73] Add support for flow containers and default joins --- Cargo.toml | 1 + src/to_markdown.rs | 294 +++++++++++++-------------- src/util/format_code_as_indented.rs | 17 ++ src/util/format_heading_as_setext.rs | 50 +++++ src/util/mod.rs | 2 + 5 files changed, 215 insertions(+), 149 deletions(-) create mode 100644 src/util/format_code_as_indented.rs create mode 100644 src/util/format_heading_as_setext.rs diff --git a/Cargo.toml b/Cargo.toml index 8a53c928..d119a431 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ log = ["dep:log"] log = { version = "0.4", optional = true } unicode-id = { version = "0.3", features = ["no_std"] } serde = { version = "1", features = ["derive"], optional = true } +regex = { version = "1.7.3" } [dev-dependencies] env_logger = "0.11" diff --git a/src/to_markdown.rs b/src/to_markdown.rs index 83be78b4..fee86ec0 100644 --- a/src/to_markdown.rs +++ b/src/to_markdown.rs @@ -1,153 +1,40 @@ -use crate::mdast::{Node, Paragraph, Root, Strong, Text}; +use crate::{ + mdast::{List, Node, Paragraph, Root, Strong, Text}, + util::{ + format_code_as_indented::format_code_as_indented, + format_heading_as_setext::format_heading_as_settext, + }, +}; use alloc::{string::String, vec::Vec}; #[allow(dead_code)] pub enum ConstructName { - /// Whole autolink. - /// Example: - /// > `` and `` Autolink, - - /// Whole block quote. - /// Example: - /// > `> a` - /// > `b` Blockquote, - - /// Whole code (indented). - /// Example: - /// > ` console.log(1)` CodeIndented, - - /// Whole code (fenced). - /// Example: - /// > ` ```js` - /// > `console.log(1)` - /// > ` ```` CodeFenced, - - /// Code (fenced) language, when fenced with grave accents. - /// Example: - /// > ` ```js` CodeFencedLangGraveAccent, - - /// Code (fenced) language, when fenced with tildes. - /// Example: - /// > ` ~~~js` CodeFencedLangTilde, - - /// Code (fenced) meta string, when fenced with grave accents. - /// Example: - /// > ` ```js eval` CodeFencedMetaGraveAccent, - - /// Code (fenced) meta string, when fenced with tildes. - /// Example: - /// > ` ~~~js eval` CodeFencedMetaTilde, - - /// Whole definition. - /// Example: - /// > `[a]: b "c"` Definition, - - /// Destination (literal) (occurs in definition, image, link). - /// Example: - /// > `[a]: "c"` - /// > `a ![b]( "d") e` DestinationLiteral, - - /// Destination (raw) (occurs in definition, image, link). - /// Example: - /// > `[a]: b "c"` - /// > `a ![b](c "d") e` DestinationRaw, - - /// Emphasis. - /// Example: - /// > `*a*` Emphasis, - - /// Whole heading (atx). - /// Example: - /// > `# alpha` HeadingAtx, - - /// Whole heading (setext). - /// Example: - /// > `alpha` - /// > `=====` HeadingSetext, - - /// Whole image. - /// Example: - /// > `![a](b)` Image, - - /// Whole image reference. - /// Example: - /// > `![a]` ImageReference, - - /// Label (occurs in definitions, image reference, image, link reference, link). - /// Example: - /// > `[a]: b "c"` - /// > `a [b] c` - /// > `a ![b][c] d` - /// > `a [b](c) d` Label, - - /// Whole link. - /// Example: - /// > `[a](b)` Link, - - /// Whole link reference. - /// Example: - /// > `[a]` LinkReference, - - /// List. - /// Example: - /// > `* a` - /// > `1. b` List, - - /// List item. - /// Example: - /// > `* a` - /// > `1. b` ListItem, - - /// Paragraph. - /// Example: - /// > `a b` - /// > `c.` Paragraph, - - /// Phrasing (occurs in headings, paragraphs, etc). - /// Example: - /// > `a` Phrasing, - - /// Reference (occurs in image, link). - /// Example: - /// > `[a][]` Reference, - - /// Strong. - /// Example: - /// > `**a**` Strong, - - /// Title using single quotes (occurs in definition, image, link). - /// Example: - /// > `[a](b 'c')` TitleApostrophe, - - /// Title using double quotes (occurs in definition, image, link). - /// Example: - /// > `[a](b "c")` TitleQuote, } @@ -163,33 +50,58 @@ impl PeekNode for Strong { } pub trait PhrasingParent { - fn children(self) -> Vec; + fn children(&self) -> &Vec; } pub trait FlowParent { - fn children(self) -> Vec; + fn children(&self) -> &Vec; + + // `parent` has a `spread` field. + fn spread(&self) -> Option { + None + } +} + +impl FlowParent for List { + fn children(&self) -> &Vec { + &self.children + } + + fn spread(&self) -> Option { + Some(self.spread) + } } impl FlowParent for Root { - fn children(self) -> Vec { - self.children + fn children(&self) -> &Vec { + &self.children } } impl PhrasingParent for Paragraph { - fn children(self) -> Vec { - self.children + fn children(&self) -> &Vec { + &self.children } } -struct State { - stack: Vec, - index_stack: Vec, +pub enum Join { + Number(usize), + Bool(bool), } -struct Info<'a> { - before: &'a str, - after: &'a str, +#[allow(dead_code)] +pub struct State { + pub stack: Vec, + // SAFETY : -1 is used to mark the absense of children. + // We don't use index_stack values to index into any child. + pub index_stack: Vec, + pub bullet_last_used: Option, +} + +#[allow(dead_code)] +pub struct Info<'a> { + pub before: &'a str, + pub after: &'a str, } impl<'a> Info<'a> { @@ -203,6 +115,7 @@ impl State { State { stack: Vec::new(), index_stack: Vec::new(), + bullet_last_used: None, } } @@ -214,7 +127,7 @@ impl State { self.stack.pop(); } - pub fn handle(&mut self, node: Node, info: Info) -> String { + pub fn handle(&mut self, node: &Node, info: Info) -> String { match node { Node::Root(root) => self.handle_root(root, info), Node::Paragraph(paragraph) => self.handle_paragraph(paragraph, info), @@ -223,11 +136,11 @@ impl State { } } - fn handle_root(&mut self, node: Root, info: Info) -> String { + fn handle_root(&mut self, node: &Root, info: Info) -> String { self.container_flow(node, info) } - fn handle_paragraph(&mut self, node: Paragraph, info: Info) -> String { + fn handle_paragraph(&mut self, node: &Paragraph, info: Info) -> String { self.enter(ConstructName::Paragraph); self.enter(ConstructName::Phrasing); @@ -239,29 +152,28 @@ impl State { value } - fn handle_text(&self, text: Text, _info: Info) -> String { - self.safe(text.value) + fn handle_text(&self, text: &Text, _info: Info) -> String { + self.safe(text.value.clone()) } - fn container_phrasing(&mut self, parent: T, info: Info) -> String { + fn container_phrasing(&mut self, parent: &T, info: Info) -> String { let mut results: Vec = Vec::new(); - let mut children_iter = parent.children().into_iter().peekable(); let mut index = 0; - // SAFETY : -1 is used to mark the absense of children. - // We don't use index_stack values to index into any child. + self.index_stack.push(-1); - let index_stack_size = self.index_stack.len(); while let Some(child) = children_iter.next() { - self.index_stack[index_stack_size - 1] = index; + if let Some(top) = self.index_stack.last_mut() { + *top = index; + } let mut after: String = "".into(); - if let Some(child_node) = children_iter.peek() { - after = match self.determine_first_char(child_node) { + if let Some(child) = children_iter.peek() { + after = match self.determine_first_char(child) { Some(after_char) => after_char, None => self - .handle(child_node.clone(), Info::new("", "")) + .handle(child, Info::new("", "")) .chars() .next() .map(|c| c.into()) @@ -291,18 +203,102 @@ impl State { } } - fn container_flow(&mut self, _parent: T, _info: Info) -> String { - String::new() + fn container_flow(&mut self, parent: &T, _info: Info) -> String { + let mut results: Vec = Vec::new(); + + let mut children_iter = parent.children().into_iter().peekable(); + let mut index: usize = 0; + + self.index_stack.push(-1); + + while let Some(child) = children_iter.next() { + if let Some(top) = self.index_stack.last_mut() { + *top = index as i64; + } + + if matches!(child, Node::List(_)) { + self.bullet_last_used = None; + } + + results.push(self.handle(child, Info::new("\n", "\n"))); + + if let Some(next_child) = children_iter.peek() { + results.push(self.between(&child, next_child, parent)); + } + + index += 1; + } + + results.into_iter().collect() } fn safe(&self, value: String) -> String { value } + + fn between(&self, left: &Node, right: &Node, parent: &T) -> String { + match self.join_default(left, right, parent) { + Some(Join::Number(num)) => { + if num == 1 { + "\n\n".into() + } else { + "\n".repeat(1 + num) + } + } + Some(Join::Bool(bool)) => { + if bool { + "\n\n".into() + } else { + "\n\n\n\n".into() + } + } + None => "\n\n".into(), + } + } + + fn join_default(&self, left: &Node, right: &Node, parent: &T) -> Option { + if format_code_as_indented(right, self) + && (matches!(left, Node::List(_)) || format_code_as_indented(left, self)) + { + return Some(Join::Bool(false)); + } + + if let Some(spread) = parent.spread() { + if matches!(left, Node::Paragraph(_)) && Self::matches((left, right)) + || matches!(right, Node::Definition(_)) + || format_heading_as_settext(right, self) + { + return None; + } + + if spread { + return Some(Join::Number(1)); + } else { + return Some(Join::Number(0)); + } + } + + Some(Join::Bool(true)) + } + + fn matches(nodes: (&Node, &Node)) -> bool { + matches!( + nodes, + (Node::Root(_), Node::Root(_)) + | (Node::BlockQuote(_), Node::BlockQuote(_)) + | (Node::FootnoteDefinition(_), Node::FootnoteDefinition(_)) + | (Node::Heading(_), Node::Heading(_)) + | (Node::List(_), Node::List(_)) + | (Node::ListItem(_), Node::ListItem(_)) + | (Node::Paragraph(_), Node::Paragraph(_)) + | (Node::Table(_), Node::Table(_)) + ) + } } pub fn serialize(tree: Node) -> String { let mut state = State::new(); - let result = state.handle(tree, Info::new("\n".into(), "\n".into())); + let result = state.handle(&tree, Info::new("\n".into(), "\n".into())); result } diff --git a/src/util/format_code_as_indented.rs b/src/util/format_code_as_indented.rs new file mode 100644 index 00000000..100e6557 --- /dev/null +++ b/src/util/format_code_as_indented.rs @@ -0,0 +1,17 @@ +use regex::Regex; + +use crate::{mdast::Node, to_markdown::State}; + +pub fn format_code_as_indented(node: &Node, _state: &State) -> bool { + match node { + Node::Code(code) => { + let white_space = Regex::new(r"[^ \r\n]").unwrap(); + let blank = Regex::new(r"^[\t ]*(?:[\r\n]|$)|(?:^|[\r\n])[\t ]*$").unwrap(); + !code.value.is_empty() + && code.lang.is_none() + && white_space.is_match(&code.value) + && !blank.is_match(&code.value) + } + _ => false, + } +} diff --git a/src/util/format_heading_as_setext.rs b/src/util/format_heading_as_setext.rs new file mode 100644 index 00000000..120836ac --- /dev/null +++ b/src/util/format_heading_as_setext.rs @@ -0,0 +1,50 @@ +use alloc::string::ToString; +use regex::Regex; + +use crate::{mdast::Node, to_markdown::State}; + +pub fn format_heading_as_settext(node: &Node, state: &State) -> bool { + let line_berak = Regex::new(r"\r?\n|\r").unwrap(); + match node { + Node::Heading(heading) => { + let mut literal_with_break = false; + for child in heading.children.iter() { + if include_literal_with_break(child, state, &line_berak) { + literal_with_break = true; + break; + } + } + + heading.depth == 0 + || heading.depth < 3 && !node.to_string().is_empty() && literal_with_break + } + _ => false, + } +} + +fn include_literal_with_break(node: &Node, state: &State, regex: &Regex) -> bool { + match node { + Node::Break(_) => true, + Node::MdxjsEsm(x) => regex.is_match(&x.value), + Node::Toml(x) => regex.is_match(&x.value), + Node::Yaml(x) => regex.is_match(&x.value), + Node::InlineCode(x) => regex.is_match(&x.value), + Node::InlineMath(x) => regex.is_match(&x.value), + Node::MdxTextExpression(x) => regex.is_match(&x.value), + Node::Html(x) => regex.is_match(&x.value), + Node::Text(x) => regex.is_match(&x.value), + Node::Code(x) => regex.is_match(&x.value), + Node::Math(x) => regex.is_match(&x.value), + Node::MdxFlowExpression(x) => regex.is_match(&x.value), + _ => { + if let Some(children) = node.children() { + for child in children.into_iter() { + if include_literal_with_break(child, state, regex) { + return true; + } + } + } + return false; + } + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index cb9a40b0..75538823 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -5,6 +5,8 @@ pub mod character_reference; pub mod constant; pub mod edit_map; pub mod encode; +pub mod format_code_as_indented; +pub mod format_heading_as_setext; pub mod gfm_tagfilter; pub mod identifier; pub mod infer; From 9e5e9bed33b2801eab470ec9ec4df6f54d95f553 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 28 Aug 2024 12:13:00 +0300 Subject: [PATCH 08/73] Add parents macro --- src/to_markdown.rs | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/to_markdown.rs b/src/to_markdown.rs index fee86ec0..27299356 100644 --- a/src/to_markdown.rs +++ b/src/to_markdown.rs @@ -39,7 +39,7 @@ pub enum ConstructName { } pub trait PeekNode { - // @todo make it take a reference to the state options + // TODO make it take a reference to the state options fn handle_peek(&self) -> String; } @@ -72,18 +72,29 @@ impl FlowParent for List { } } -impl FlowParent for Root { - fn children(&self) -> &Vec { - &self.children +macro_rules! impl_PhrasingParent { + (for $($t:ty),+) => { + $(impl PhrasingParent for $t { + fn children(&self) -> &Vec { + &self.children + } + })* } } -impl PhrasingParent for Paragraph { - fn children(&self) -> &Vec { - &self.children +macro_rules! impl_FlowParent { + (for $($t:ty),+) => { + $(impl FlowParent for $t { + fn children(&self) -> &Vec { + &self.children + } + })* } } +impl_PhrasingParent!(for Paragraph); +impl_FlowParent!(for Root); + pub enum Join { Number(usize), Bool(bool), From 21486d15eda26508c25798f025e35ac1f690fe74 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 28 Aug 2024 12:41:04 +0300 Subject: [PATCH 09/73] rename join default --- src/to_markdown.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/to_markdown.rs b/src/to_markdown.rs index 27299356..57a7006b 100644 --- a/src/to_markdown.rs +++ b/src/to_markdown.rs @@ -167,6 +167,10 @@ impl State { self.safe(text.value.clone()) } + fn safe(&self, value: String) -> String { + value + } + fn container_phrasing(&mut self, parent: &T, info: Info) -> String { let mut results: Vec = Vec::new(); let mut children_iter = parent.children().into_iter().peekable(); @@ -243,12 +247,8 @@ impl State { results.into_iter().collect() } - fn safe(&self, value: String) -> String { - value - } - fn between(&self, left: &Node, right: &Node, parent: &T) -> String { - match self.join_default(left, right, parent) { + match self.join_defaults(left, right, parent) { Some(Join::Number(num)) => { if num == 1 { "\n\n".into() @@ -267,7 +267,7 @@ impl State { } } - fn join_default(&self, left: &Node, right: &Node, parent: &T) -> Option { + fn join_defaults(&self, left: &Node, right: &Node, parent: &T) -> Option { if format_code_as_indented(right, self) && (matches!(left, Node::List(_)) || format_code_as_indented(left, self)) { From 66885de687a634cf60575907c0f5bf5a9c81e005 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Thu, 29 Aug 2024 09:26:05 +0300 Subject: [PATCH 10/73] Add unsafe --- src/to_markdown.rs | 354 ++++++++++++++++++++++++++- src/util/format_heading_as_setext.rs | 2 +- 2 files changed, 352 insertions(+), 4 deletions(-) diff --git a/src/to_markdown.rs b/src/to_markdown.rs index 57a7006b..cb87953e 100644 --- a/src/to_markdown.rs +++ b/src/to_markdown.rs @@ -5,9 +5,14 @@ use crate::{ format_heading_as_setext::format_heading_as_settext, }, }; -use alloc::{string::String, vec::Vec}; +use alloc::{ + string::{String, ToString}, + vec, + vec::Vec, +}; #[allow(dead_code)] +#[derive(Clone)] pub enum ConstructName { Autolink, Blockquote, @@ -107,6 +112,348 @@ pub struct State { // We don't use index_stack values to index into any child. pub index_stack: Vec, pub bullet_last_used: Option, + pub r#unsafe: Vec, +} + +#[allow(dead_code)] +pub struct Unsafe { + pub character: char, + pub in_construct: Option, + pub not_in_construct: Option, + pub before: Option, + pub after: Option, + pub at_break: Option, +} + +#[allow(dead_code)] +pub enum Construct { + List(Vec), + Single(ConstructName), +} + +impl Unsafe { + pub fn new( + character: char, + before: Option, + after: Option, + in_construct: Option, + not_in_construct: Option, + at_break: Option, + ) -> Unsafe { + Unsafe { + character, + in_construct, + not_in_construct, + before, + after, + at_break, + } + } + + pub fn get_default_unsafe() -> Vec { + let full_phrasing_spans = vec![ + ConstructName::Autolink, + ConstructName::DestinationLiteral, + ConstructName::DestinationRaw, + ConstructName::Reference, + ConstructName::TitleQuote, + ConstructName::TitleApostrophe, + ]; + + vec![ + Self::new( + '\t', + None, + r"[\\r\\n]".to_string().into(), + Construct::Single(ConstructName::Phrasing).into(), + None, + None, + ), + Self::new( + '\t', + r"[\\r\\n]".to_string().into(), + None, + Construct::Single(ConstructName::Phrasing).into(), + None, + None, + ), + Self::new( + '\t', + None, + None, + Construct::List(vec![ + ConstructName::CodeFencedLangGraveAccent, + ConstructName::CodeFencedLangTilde, + ]) + .into(), + None, + None, + ), + Self::new( + '\r', + None, + None, + Construct::List(vec![ + ConstructName::CodeFencedLangGraveAccent, + ConstructName::CodeFencedLangTilde, + ConstructName::CodeFencedMetaGraveAccent, + ConstructName::CodeFencedMetaTilde, + ConstructName::DestinationLiteral, + ConstructName::HeadingAtx, + ]) + .into(), + None, + None, + ), + Self::new( + '\n', + None, + None, + Construct::List(vec![ + ConstructName::CodeFencedLangGraveAccent, + ConstructName::CodeFencedLangTilde, + ConstructName::CodeFencedMetaGraveAccent, + ConstructName::CodeFencedMetaTilde, + ConstructName::DestinationLiteral, + ConstructName::HeadingAtx, + ]) + .into(), + None, + None, + ), + Self::new( + ' ', + None, + r"[\\r\\n]".to_string().into(), + Construct::Single(ConstructName::Phrasing).into(), + None, + None, + ), + Self::new( + ' ', + r"[\\r\\n]".to_string().into(), + None, + Construct::Single(ConstructName::Phrasing).into(), + None, + None, + ), + Self::new( + ' ', + None, + None, + Construct::List(vec![ + ConstructName::CodeFencedLangGraveAccent, + ConstructName::CodeFencedLangTilde, + ]) + .into(), + None, + None, + ), + Self::new( + '!', + None, + r"\\[".to_string().into(), + Construct::Single(ConstructName::Phrasing).into(), + Construct::List(full_phrasing_spans.clone()).into(), + None, + ), + Self::new( + '"', + None, + None, + Construct::Single(ConstructName::TitleQuote).into(), + None, + None, + ), + Self::new('#', None, None, None, None, Some(true)), + Self::new( + '&', + None, + "[#A-Za-z]".to_string().into(), + Construct::Single(ConstructName::Phrasing).into(), + None, + None, + ), + Self::new( + '\'', + None, + None, + Construct::Single(ConstructName::TitleApostrophe).into(), + None, + None, + ), + Self::new( + '(', + None, + None, + Construct::Single(ConstructName::DestinationRaw).into(), + None, + None, + ), + Self::new( + '(', + r"\\]".to_string().into(), + None, + Construct::Single(ConstructName::Phrasing).into(), + Construct::List(full_phrasing_spans.clone()).into(), + None, + ), + Self::new( + ')', + r"\\d+".to_string().into(), + None, + None, + None, + Some(true), + ), + Self::new( + ')', + None, + None, + Construct::Single(ConstructName::DestinationRaw).into(), + None, + None, + ), + Self::new( + '*', + None, + r"(?:[ \t\r\n*])".to_string().into(), + None, + None, + Some(true), + ), + Self::new( + '*', + None, + None, + Construct::Single(ConstructName::Phrasing).into(), + Construct::List(full_phrasing_spans.clone()).into(), + None, + ), + Self::new( + '+', + None, + r"(?:[ \t\r\n])".to_string().into(), + None, + None, + Some(true), + ), + Self::new( + '-', + None, + r"(?:[ \t\r\n-])".to_string().into(), + None, + None, + Some(true), + ), + Self::new( + '.', + r"\\d+".to_string().into(), + r"(?:[ \t\r\n]|$)".to_string().into(), + None, + None, + Some(true), + ), + Self::new( + '<', + None, + r"[!/?A-Za-z]".to_string().into(), + None, + None, + Some(true), + ), + Self::new( + '<', + None, + r"[!/?A-Za-z]".to_string().into(), + Construct::Single(ConstructName::Phrasing).into(), + Construct::List(full_phrasing_spans.clone()).into(), + None, + ), + Self::new( + '<', + None, + None, + Construct::Single(ConstructName::DestinationLiteral).into(), + None, + None, + ), + Self::new('=', None, None, None, None, Some(true)), + Self::new('>', None, None, None, None, Some(true)), + Self::new( + '>', + None, + None, + Construct::Single(ConstructName::DestinationLiteral).into(), + None, + Some(true), + ), + Self::new('[', None, None, None, None, Some(true)), + Self::new( + '[', + None, + None, + Construct::Single(ConstructName::Phrasing).into(), + Construct::List(full_phrasing_spans.clone()).into(), + None, + ), + Self::new( + '[', + None, + None, + Construct::List(vec![ConstructName::Label, ConstructName::Reference]).into(), + None, + None, + ), + Self::new( + '\\', + None, + r"[\\r\\n]".to_string().into(), + Construct::Single(ConstructName::Phrasing).into(), + None, + None, + ), + Self::new( + ']', + None, + None, + Construct::List(vec![ConstructName::Label, ConstructName::Reference]).into(), + None, + None, + ), + Self::new('_', None, None, None, None, Some(true)), + Self::new( + '_', + None, + None, + Construct::Single(ConstructName::Phrasing).into(), + Construct::List(full_phrasing_spans.clone()).into(), + None, + ), + Self::new('`', None, None, None, None, Some(true)), + Self::new( + '`', + None, + None, + Construct::List(vec![ + ConstructName::CodeFencedLangGraveAccent, + ConstructName::CodeFencedMetaGraveAccent, + ]) + .into(), + None, + None, + ), + Self::new( + '`', + None, + None, + Construct::Single(ConstructName::Phrasing).into(), + Construct::List(full_phrasing_spans.clone()).into(), + None, + ), + Self::new('~', None, None, None, None, Some(true)), + ] + } } #[allow(dead_code)] @@ -127,6 +474,7 @@ impl State { stack: Vec::new(), index_stack: Vec::new(), bullet_last_used: None, + r#unsafe: Unsafe::get_default_unsafe(), } } @@ -167,8 +515,8 @@ impl State { self.safe(text.value.clone()) } - fn safe(&self, value: String) -> String { - value + fn safe(&self, input: String) -> String { + input } fn container_phrasing(&mut self, parent: &T, info: Info) -> String { diff --git a/src/util/format_heading_as_setext.rs b/src/util/format_heading_as_setext.rs index 120836ac..6aa0e5d8 100644 --- a/src/util/format_heading_as_setext.rs +++ b/src/util/format_heading_as_setext.rs @@ -38,7 +38,7 @@ fn include_literal_with_break(node: &Node, state: &State, regex: &Regex) -> bool Node::MdxFlowExpression(x) => regex.is_match(&x.value), _ => { if let Some(children) = node.children() { - for child in children.into_iter() { + for child in children.iter() { if include_literal_with_break(child, state, regex) { return true; } From 0733560596a1cddd9c0a612ea8afce6f6f04e15d Mon Sep 17 00:00:00 2001 From: Bnchi Date: Thu, 29 Aug 2024 09:35:01 +0300 Subject: [PATCH 11/73] Fix typo --- src/util/format_heading_as_setext.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/util/format_heading_as_setext.rs b/src/util/format_heading_as_setext.rs index 6aa0e5d8..fe1f8106 100644 --- a/src/util/format_heading_as_setext.rs +++ b/src/util/format_heading_as_setext.rs @@ -4,12 +4,12 @@ use regex::Regex; use crate::{mdast::Node, to_markdown::State}; pub fn format_heading_as_settext(node: &Node, state: &State) -> bool { - let line_berak = Regex::new(r"\r?\n|\r").unwrap(); + let line_break = Regex::new(r"\r?\n|\r").unwrap(); match node { Node::Heading(heading) => { let mut literal_with_break = false; for child in heading.children.iter() { - if include_literal_with_break(child, state, &line_berak) { + if include_literal_with_break(child, state, &line_break) { literal_with_break = true; break; } From 41d01f18c5664a6be02f40737ebc703dade066d4 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Fri, 30 Aug 2024 17:28:07 +0300 Subject: [PATCH 12/73] Make it safe --- src/to_markdown.rs | 474 +++++++++++++++++++++++++++++++++------------ 1 file changed, 348 insertions(+), 126 deletions(-) diff --git a/src/to_markdown.rs b/src/to_markdown.rs index cb87953e..40108c98 100644 --- a/src/to_markdown.rs +++ b/src/to_markdown.rs @@ -6,13 +6,16 @@ use crate::{ }, }; use alloc::{ + collections::BTreeMap, + format, string::{String, ToString}, vec, vec::Vec, }; +use regex::Regex; #[allow(dead_code)] -#[derive(Clone)] +#[derive(Clone, PartialEq)] pub enum ConstructName { Autolink, Blockquote, @@ -61,7 +64,8 @@ pub trait PhrasingParent { pub trait FlowParent { fn children(&self) -> &Vec; - // `parent` has a `spread` field. + /// One or more of its children are separated with a blank line from its + /// siblings (when `true`), or not (when `false`). fn spread(&self) -> Option { None } @@ -106,40 +110,42 @@ pub enum Join { } #[allow(dead_code)] -pub struct State { +pub struct State<'a> { pub stack: Vec, // SAFETY : -1 is used to mark the absense of children. // We don't use index_stack values to index into any child. pub index_stack: Vec, pub bullet_last_used: Option, - pub r#unsafe: Vec, + pub r#unsafe: Vec>, } #[allow(dead_code)] -pub struct Unsafe { - pub character: char, +pub struct Unsafe<'a> { + pub character: &'a str, pub in_construct: Option, pub not_in_construct: Option, - pub before: Option, - pub after: Option, + pub before: Option<&'a str>, + pub after: Option<&'a str>, pub at_break: Option, + compiled: Option, } #[allow(dead_code)] +// This could use a better name. pub enum Construct { List(Vec), Single(ConstructName), } -impl Unsafe { +impl<'a> Unsafe<'a> { pub fn new( - character: char, - before: Option, - after: Option, + character: &'a str, + before: Option<&'a str>, + after: Option<&'a str>, in_construct: Option, not_in_construct: Option, at_break: Option, - ) -> Unsafe { + ) -> Self { Unsafe { character, in_construct, @@ -147,10 +153,11 @@ impl Unsafe { before, after, at_break, + compiled: None, } } - pub fn get_default_unsafe() -> Vec { + pub fn get_default_unsafe() -> Vec { let full_phrasing_spans = vec![ ConstructName::Autolink, ConstructName::DestinationLiteral, @@ -162,23 +169,23 @@ impl Unsafe { vec![ Self::new( - '\t', + "\t", None, - r"[\\r\\n]".to_string().into(), + r"[\\r\\n]".into(), Construct::Single(ConstructName::Phrasing).into(), None, None, ), Self::new( - '\t', - r"[\\r\\n]".to_string().into(), + "\t", + r"[\\r\\n]".into(), None, Construct::Single(ConstructName::Phrasing).into(), None, None, ), Self::new( - '\t', + "\t", None, None, Construct::List(vec![ @@ -190,7 +197,7 @@ impl Unsafe { None, ), Self::new( - '\r', + "\r", None, None, Construct::List(vec![ @@ -206,7 +213,7 @@ impl Unsafe { None, ), Self::new( - '\n', + "\n", None, None, Construct::List(vec![ @@ -222,23 +229,23 @@ impl Unsafe { None, ), Self::new( - ' ', + " ", None, - r"[\\r\\n]".to_string().into(), + r"[\\r\\n]".into(), Construct::Single(ConstructName::Phrasing).into(), None, None, ), Self::new( - ' ', - r"[\\r\\n]".to_string().into(), + " ", + r"[\\r\\n]".into(), None, Construct::Single(ConstructName::Phrasing).into(), None, None, ), Self::new( - ' ', + " ", None, None, Construct::List(vec![ @@ -250,32 +257,32 @@ impl Unsafe { None, ), Self::new( - '!', + "!", None, - r"\\[".to_string().into(), + r"\[".into(), Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), None, ), Self::new( - '"', + "\"", None, None, Construct::Single(ConstructName::TitleQuote).into(), None, None, ), - Self::new('#', None, None, None, None, Some(true)), + Self::new("#", None, None, None, None, Some(true)), Self::new( - '&', + "&", None, - "[#A-Za-z]".to_string().into(), + r"[#A-Za-z]".into(), Construct::Single(ConstructName::Phrasing).into(), None, None, ), Self::new( - '\'', + "'", None, None, Construct::Single(ConstructName::TitleApostrophe).into(), @@ -283,7 +290,7 @@ impl Unsafe { None, ), Self::new( - '(', + "(", None, None, Construct::Single(ConstructName::DestinationRaw).into(), @@ -291,106 +298,71 @@ impl Unsafe { None, ), Self::new( - '(', - r"\\]".to_string().into(), + "(", + r"\]".into(), None, Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), None, ), + Self::new(")", r"\d+".into(), None, None, None, Some(true)), Self::new( - ')', - r"\\d+".to_string().into(), - None, - None, - None, - Some(true), - ), - Self::new( - ')', + ")", None, None, Construct::Single(ConstructName::DestinationRaw).into(), None, None, ), + Self::new("*", None, r"(?:[ \t\r\n*])".into(), None, None, Some(true)), Self::new( - '*', - None, - r"(?:[ \t\r\n*])".to_string().into(), - None, - None, - Some(true), - ), - Self::new( - '*', + "*", None, None, Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), None, ), + Self::new("+", None, r"(?:[ \t\r\n])".into(), None, None, Some(true)), + Self::new("-", None, r"(?:[ \t\r\n-])".into(), None, None, Some(true)), Self::new( - '+', - None, - r"(?:[ \t\r\n])".to_string().into(), + ".", + r"\d+".into(), + "(?:[ \t\r\n]|$)".into(), None, None, Some(true), ), + Self::new("<", None, r"[!/?A-Za-z]".into(), None, None, Some(true)), Self::new( - '-', - None, - r"(?:[ \t\r\n-])".to_string().into(), + "<", None, - None, - Some(true), - ), - Self::new( - '.', - r"\\d+".to_string().into(), - r"(?:[ \t\r\n]|$)".to_string().into(), - None, - None, - Some(true), - ), - Self::new( - '<', - None, - r"[!/?A-Za-z]".to_string().into(), - None, - None, - Some(true), - ), - Self::new( - '<', - None, - r"[!/?A-Za-z]".to_string().into(), + "[!/?A-Za-z]".into(), Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), None, ), Self::new( - '<', + "<", None, None, Construct::Single(ConstructName::DestinationLiteral).into(), None, None, ), - Self::new('=', None, None, None, None, Some(true)), - Self::new('>', None, None, None, None, Some(true)), + Self::new("=", None, None, None, None, Some(true)), + Self::new(">", None, None, None, None, Some(true)), Self::new( - '>', + ">", None, None, Construct::Single(ConstructName::DestinationLiteral).into(), None, Some(true), ), - Self::new('[', None, None, None, None, Some(true)), + Self::new("[", None, None, None, None, Some(true)), Self::new( - '[', + "[", None, None, Construct::Single(ConstructName::Phrasing).into(), @@ -398,7 +370,7 @@ impl Unsafe { None, ), Self::new( - '[', + "[", None, None, Construct::List(vec![ConstructName::Label, ConstructName::Reference]).into(), @@ -406,33 +378,33 @@ impl Unsafe { None, ), Self::new( - '\\', + r"\", None, - r"[\\r\\n]".to_string().into(), + "[\\r\\n]".into(), Construct::Single(ConstructName::Phrasing).into(), None, None, ), Self::new( - ']', + "]", None, None, Construct::List(vec![ConstructName::Label, ConstructName::Reference]).into(), None, None, ), - Self::new('_', None, None, None, None, Some(true)), + Self::new("_", None, None, None, None, Some(true)), Self::new( - '_', + "_", None, None, Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), None, ), - Self::new('`', None, None, None, None, Some(true)), + Self::new("`", None, None, None, None, Some(true)), Self::new( - '`', + "`", None, None, Construct::List(vec![ @@ -444,16 +416,24 @@ impl Unsafe { None, ), Self::new( - '`', + "`", None, None, Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), None, ), - Self::new('~', None, None, None, None, Some(true)), + Self::new("~", None, None, None, None, Some(true)), ] } + + pub fn compiled(&self) -> bool { + self.compiled.is_some() + } + + pub fn set_compiled(&mut self, regex_pattern: Regex) { + self.compiled = Some(regex_pattern); + } } #[allow(dead_code)] @@ -468,7 +448,33 @@ impl<'a> Info<'a> { } } -impl State { +#[allow(dead_code)] +pub struct SafeConfig<'a> { + pub before: &'a str, + pub after: &'a str, + pub encode: Option>, +} + +impl<'a> SafeConfig<'a> { + pub fn new( + before: Option<&'a str>, + after: Option<&'a str>, + encode: Option>, + ) -> Self { + SafeConfig { + before: before.unwrap_or(""), + after: after.unwrap_or(""), + encode, + } + } +} + +struct EscapeInfos { + before: bool, + after: bool, +} + +impl<'a> State<'a> { pub fn new() -> Self { State { stack: Vec::new(), @@ -511,16 +517,160 @@ impl State { value } - fn handle_text(&self, text: &Text, _info: Info) -> String { - self.safe(text.value.clone()) + fn handle_text(&mut self, text: &Text, info: Info) -> String { + self.safe( + text.value.clone(), + &SafeConfig::new(Some(info.before), Some(info.after), None), + ) } - fn safe(&self, input: String) -> String { - input + fn safe(&mut self, input: String, config: &SafeConfig) -> String { + let value = format!("{}{}{}", config.before, input, config.after); + let mut positions: Vec = Vec::new(); + let mut result: String = String::new(); + let mut infos: BTreeMap = BTreeMap::new(); + let mut unsafe_iter = self.r#unsafe.iter_mut(); + + while let Some(pattern) = unsafe_iter.next() { + if !pattern_in_scope(&self.stack, pattern) { + continue; + } + + Self::compile_pattern(pattern); + + if let Some(regex) = &pattern.compiled { + for m in regex.captures_iter(&value) { + let full_match = m.get(0).unwrap(); + let captured_group_len = if let Some(captured_group) = m.get(1) { + captured_group.len() + } else { + 0 + }; + + let before = pattern.before.is_some() || pattern.at_break.unwrap_or(false); + let after = pattern.after.is_some(); + let position = full_match.start() + if before { captured_group_len } else { 0 }; + + if positions.contains(&position) { + if let Some(entry) = infos.get_mut(&position) { + if entry.before && !before { + entry.before = false; + } + + if entry.after && !after { + entry.after = false; + } + } + } else { + positions.push(position); + infos.insert(position, EscapeInfos { before, after }); + } + } + } + } + + positions.sort_unstable(); + + let mut start = config.before.len(); + let end = value.len() - config.after.len(); + + for (index, position) in positions.iter().enumerate() { + if *position < start || *position >= end { + continue; + } + + // If this character is supposed to be escaped because it has a condition on + // the next character, and the next character is definitly being escaped, + // then skip this escape. + // SAFETY This will never panic because we're checking the correct bounds, and we + // gurantee to have the positions as key in the infos map before reaching this + // execution. + if index + 1 < positions.len() + && position + 1 < end + && positions[index + 1] == position + 1 + && infos[position].after + && !infos[&(position + 1)].before + && !infos[&(position + 1)].after + || index > 0 + && positions[index - 1] == position - 1 + && infos[position].before + && !infos[&(position - 1)].before + && !infos[&(position - 1)].after + { + continue; + } + + if start != *position { + result.push_str(&escape_backslashes(&value[start..*position], r"\")); + } + + start = *position; + + let char_match = Regex::new(r"[!-/:-@\[-{-~]").unwrap(); + if let Some(char_at_pos) = char_match.find_at(&value, *position).iter().next() { + match &config.encode { + Some(encode) => { + if encode.contains(&char_at_pos.as_str()) { + result.push_str(r"\"); + } + } + None => result.push_str(r"\"), + } + } else if let Some(character) = value.chars().nth(*position) { + let code = u32::from(character); + let hex_string = format!("{:X}", code); + result.push_str(&format!("&#x{};", hex_string)); + start += 1; + } + } + + result.push_str(&escape_backslashes(&value[start..end], config.after)); + + result + } + + fn compile_pattern(pattern: &mut Unsafe) { + if !pattern.compiled() { + let before = if pattern.at_break.unwrap_or(false) { + r"[\\r\\n][\\t ]*" + } else { + "" + }; + + let before = format!( + "{}{}", + before, + pattern + .before + .map_or(String::new(), |before| format!("(?:{})", before)) + ); + + let before = if !before.is_empty() { + format!("({})", before) + } else { + String::new() + }; + + let after = pattern + .after + .map_or(String::new(), |after| format!("(?:{})", after)); + + let special_char = if Regex::new(r"[\|\{}\()\[\]\\\^\$\+\*\?\.\-]") + .unwrap() + .is_match(pattern.character) + { + r"\" + } else { + "" + }; + + let regex = format!("{}{}{}{}", before, special_char, pattern.character, after); + pattern.set_compiled(Regex::new(®ex).unwrap()); + } } fn container_phrasing(&mut self, parent: &T, info: Info) -> String { - let mut results: Vec = Vec::new(); + let mut results: String = String::new(); let mut children_iter = parent.children().into_iter().peekable(); let mut index = 0; @@ -531,32 +681,35 @@ impl State { *top = index; } - let mut after: String = "".into(); - if let Some(child) = children_iter.peek() { - after = match self.determine_first_char(child) { + let after = if let Some(child) = children_iter.peek() { + match self.determine_first_char(child) { Some(after_char) => after_char, None => self .handle(child, Info::new("", "")) .chars() - .next() - .map(|c| c.into()) - .unwrap_or_default(), - }; - } + .nth(0) + .unwrap_or_default() + .to_string(), + } + } else { + String::from(info.after) + }; - if let Some(result) = results.last() { - results.push(self.handle( + if !results.is_empty() { + results.push_str(&self.handle( child, - Info::new(&result[result.len() - 1..], after.as_ref()), + Info::new(&results[results.len() - 1..], after.as_ref()), )); } else { - results.push(self.handle(child, Info::new(info.before, after.as_ref()))); + results.push_str(&self.handle(child, Info::new(info.before, after.as_ref()))); } index += 1; } + self.index_stack.pop(); - results.into_iter().collect() + + results } fn determine_first_char(&self, node: &Node) -> Option { @@ -567,32 +720,30 @@ impl State { } fn container_flow(&mut self, parent: &T, _info: Info) -> String { - let mut results: Vec = Vec::new(); - + let mut results: String = String::new(); let mut children_iter = parent.children().into_iter().peekable(); - let mut index: usize = 0; + let mut index = 0; self.index_stack.push(-1); while let Some(child) = children_iter.next() { if let Some(top) = self.index_stack.last_mut() { - *top = index as i64; + *top = index; } if matches!(child, Node::List(_)) { self.bullet_last_used = None; } - results.push(self.handle(child, Info::new("\n", "\n"))); + results.push_str(&self.handle(child, Info::new("\n", "\n"))); if let Some(next_child) = children_iter.peek() { - results.push(self.between(&child, next_child, parent)); + results.push_str(&self.between(&child, next_child, parent)); } index += 1; } - - results.into_iter().collect() + results } fn between(&self, left: &Node, right: &Node, parent: &T) -> String { @@ -655,6 +806,63 @@ impl State { } } +fn escape_backslashes(value: &str, after: &str) -> String { + let expression = Regex::new(r"\\[!-/:-@\[-`{-~]").unwrap(); + let mut results: String = String::new(); + let whole = format!("{}{}", value, after); + + let positions: Vec = expression.find_iter(&whole).map(|m| m.start()).collect(); + let mut start = 0; + + for position in positions.iter() { + if start != *position { + results.push_str(&value[start..*position]); + } + + results.push_str(r"\"); + + start = *position; + } + + results.push_str(&value[start..]); + + results +} + +fn pattern_in_scope(stack: &Vec, pattern: &Unsafe) -> bool { + list_in_scope(stack, &pattern.in_construct, true) + && !list_in_scope(stack, &pattern.not_in_construct, false) +} + +// This could use a better name +fn list_in_scope(stack: &Vec, list: &Option, none: bool) -> bool { + let Some(list) = list else { + return none; + }; + match list { + Construct::Single(construct_name) => { + if stack.contains(&construct_name) { + return true; + } + + return false; + } + Construct::List(constructs_names) => { + if constructs_names.len() == 0 { + return none; + } + + for construct_name in constructs_names.iter() { + if stack.contains(construct_name) { + return true; + } + } + + return false; + } + } +} + pub fn serialize(tree: Node) -> String { let mut state = State::new(); let result = state.handle(&tree, Info::new("\n".into(), "\n".into())); @@ -685,4 +893,18 @@ mod init_tests { let actual = serialize(paragraph); assert_eq!(actual, String::from("ab")); } + + #[test] + fn it_escape() { + let text_a = Node::Text(Text { + value: String::from("![](a.jpg)"), + position: None, + }); + let paragraph = Node::Paragraph(Paragraph { + children: vec![text_a], + position: None, + }); + let actual = serialize(paragraph); + assert_eq!(actual, "!\\[]\\(a.jpg)"); + } } From dd44b06216451d2acba29c59fcb8a6b2fa6a2ffd Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sat, 31 Aug 2024 09:44:11 +0300 Subject: [PATCH 13/73] Fix clippy issues --- src/lib.rs | 2 +- src/to_markdown.rs | 89 ++++++++++++++-------------- src/util/format_heading_as_setext.rs | 15 ++--- 3 files changed, 52 insertions(+), 54 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5ee47932..a4493207 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -162,6 +162,6 @@ pub fn to_mdast(value: &str, options: &ParseOptions) -> Result String { +pub fn to_markdown(tree: &Node) -> String { to_markdown::serialize(tree) } diff --git a/src/to_markdown.rs b/src/to_markdown.rs index 40108c98..f5615ac6 100644 --- a/src/to_markdown.rs +++ b/src/to_markdown.rs @@ -66,7 +66,7 @@ pub trait FlowParent { /// One or more of its children are separated with a blank line from its /// siblings (when `true`), or not (when `false`). - fn spread(&self) -> Option { + fn spreadable(&self) -> Option { None } } @@ -76,7 +76,7 @@ impl FlowParent for List { &self.children } - fn spread(&self) -> Option { + fn spreadable(&self) -> Option { Some(self.spread) } } @@ -492,7 +492,7 @@ impl<'a> State<'a> { self.stack.pop(); } - pub fn handle(&mut self, node: &Node, info: Info) -> String { + pub fn handle(&mut self, node: &Node, info: &Info) -> String { match node { Node::Root(root) => self.handle_root(root, info), Node::Paragraph(paragraph) => self.handle_paragraph(paragraph, info), @@ -501,11 +501,11 @@ impl<'a> State<'a> { } } - fn handle_root(&mut self, node: &Root, info: Info) -> String { + fn handle_root(&mut self, node: &Root, info: &Info) -> String { self.container_flow(node, info) } - fn handle_paragraph(&mut self, node: &Paragraph, info: Info) -> String { + fn handle_paragraph(&mut self, node: &Paragraph, info: &Info) -> String { self.enter(ConstructName::Paragraph); self.enter(ConstructName::Phrasing); @@ -517,27 +517,24 @@ impl<'a> State<'a> { value } - fn handle_text(&mut self, text: &Text, info: Info) -> String { + fn handle_text(&mut self, text: &Text, info: &Info) -> String { self.safe( - text.value.clone(), + &text.value, &SafeConfig::new(Some(info.before), Some(info.after), None), ) } - fn safe(&mut self, input: String, config: &SafeConfig) -> String { + fn safe(&mut self, input: &String, config: &SafeConfig) -> String { let value = format!("{}{}{}", config.before, input, config.after); let mut positions: Vec = Vec::new(); let mut result: String = String::new(); let mut infos: BTreeMap = BTreeMap::new(); - let mut unsafe_iter = self.r#unsafe.iter_mut(); - while let Some(pattern) = unsafe_iter.next() { + for pattern in &mut self.r#unsafe { if !pattern_in_scope(&self.stack, pattern) { continue; } - Self::compile_pattern(pattern); - if let Some(regex) = &pattern.compiled { for m in regex.captures_iter(&value) { let full_match = m.get(0).unwrap(); @@ -611,10 +608,10 @@ impl<'a> State<'a> { match &config.encode { Some(encode) => { if encode.contains(&char_at_pos.as_str()) { - result.push_str(r"\"); + result.push('\\'); } } - None => result.push_str(r"\"), + None => result.push('\\'), } } else if let Some(character) = value.chars().nth(*position) { let code = u32::from(character); @@ -645,10 +642,10 @@ impl<'a> State<'a> { .map_or(String::new(), |before| format!("(?:{})", before)) ); - let before = if !before.is_empty() { - format!("({})", before) - } else { + let before = if before.is_empty() { String::new() + } else { + format!("({})", before) }; let after = pattern @@ -669,9 +666,9 @@ impl<'a> State<'a> { } } - fn container_phrasing(&mut self, parent: &T, info: Info) -> String { + fn container_phrasing(&mut self, parent: &T, info: &Info) -> String { let mut results: String = String::new(); - let mut children_iter = parent.children().into_iter().peekable(); + let mut children_iter = parent.children().iter().peekable(); let mut index = 0; self.index_stack.push(-1); @@ -682,10 +679,10 @@ impl<'a> State<'a> { } let after = if let Some(child) = children_iter.peek() { - match self.determine_first_char(child) { + match Self::determine_first_char(child) { Some(after_char) => after_char, None => self - .handle(child, Info::new("", "")) + .handle(child, &Info::new("", "")) .chars() .nth(0) .unwrap_or_default() @@ -695,13 +692,13 @@ impl<'a> State<'a> { String::from(info.after) }; - if !results.is_empty() { + if results.is_empty() { + results.push_str(&self.handle(child, &Info::new(info.before, after.as_ref()))); + } else { results.push_str(&self.handle( child, - Info::new(&results[results.len() - 1..], after.as_ref()), + &Info::new(&results[results.len() - 1..], after.as_ref()), )); - } else { - results.push_str(&self.handle(child, Info::new(info.before, after.as_ref()))); } index += 1; @@ -712,16 +709,16 @@ impl<'a> State<'a> { results } - fn determine_first_char(&self, node: &Node) -> Option { + fn determine_first_char(node: &Node) -> Option { match node { Node::Strong(strong) => Some(strong.handle_peek()), _ => None, } } - fn container_flow(&mut self, parent: &T, _info: Info) -> String { + fn container_flow(&mut self, parent: &T, _info: &Info) -> String { let mut results: String = String::new(); - let mut children_iter = parent.children().into_iter().peekable(); + let mut children_iter = parent.children().iter().peekable(); let mut index = 0; self.index_stack.push(-1); @@ -735,10 +732,10 @@ impl<'a> State<'a> { self.bullet_last_used = None; } - results.push_str(&self.handle(child, Info::new("\n", "\n"))); + results.push_str(&self.handle(child, &Info::new("\n", "\n"))); if let Some(next_child) = children_iter.peek() { - results.push_str(&self.between(&child, next_child, parent)); + results.push_str(&self.between(child, next_child, parent)); } index += 1; @@ -773,7 +770,7 @@ impl<'a> State<'a> { return Some(Join::Bool(false)); } - if let Some(spread) = parent.spread() { + if let Some(spread) = parent.spreadable() { if matches!(left, Node::Paragraph(_)) && Self::matches((left, right)) || matches!(right, Node::Definition(_)) || format_heading_as_settext(right, self) @@ -783,9 +780,9 @@ impl<'a> State<'a> { if spread { return Some(Join::Number(1)); - } else { - return Some(Join::Number(0)); } + + return Some(Join::Number(0)); } Some(Join::Bool(true)) @@ -814,12 +811,12 @@ fn escape_backslashes(value: &str, after: &str) -> String { let positions: Vec = expression.find_iter(&whole).map(|m| m.start()).collect(); let mut start = 0; - for position in positions.iter() { + for position in &positions { if start != *position { results.push_str(&value[start..*position]); } - results.push_str(r"\"); + results.push('\\'); start = *position; } @@ -829,43 +826,43 @@ fn escape_backslashes(value: &str, after: &str) -> String { results } -fn pattern_in_scope(stack: &Vec, pattern: &Unsafe) -> bool { +fn pattern_in_scope(stack: &[ConstructName], pattern: &Unsafe) -> bool { list_in_scope(stack, &pattern.in_construct, true) && !list_in_scope(stack, &pattern.not_in_construct, false) } // This could use a better name -fn list_in_scope(stack: &Vec, list: &Option, none: bool) -> bool { +fn list_in_scope(stack: &[ConstructName], list: &Option, none: bool) -> bool { let Some(list) = list else { return none; }; match list { Construct::Single(construct_name) => { - if stack.contains(&construct_name) { + if stack.contains(construct_name) { return true; } - return false; + false } Construct::List(constructs_names) => { - if constructs_names.len() == 0 { + if constructs_names.is_empty() { return none; } - for construct_name in constructs_names.iter() { + for construct_name in constructs_names { if stack.contains(construct_name) { return true; } } - return false; + false } } } -pub fn serialize(tree: Node) -> String { +pub fn serialize(tree: &Node) -> String { let mut state = State::new(); - let result = state.handle(&tree, Info::new("\n".into(), "\n".into())); + let result = state.handle(tree, &Info::new("\n", "\n")); result } @@ -890,7 +887,7 @@ mod init_tests { children: vec![text_a, text_b], position: None, }); - let actual = serialize(paragraph); + let actual = serialize(¶graph); assert_eq!(actual, String::from("ab")); } @@ -904,7 +901,7 @@ mod init_tests { children: vec![text_a], position: None, }); - let actual = serialize(paragraph); + let actual = serialize(¶graph); assert_eq!(actual, "!\\[]\\(a.jpg)"); } } diff --git a/src/util/format_heading_as_setext.rs b/src/util/format_heading_as_setext.rs index fe1f8106..9eb7a9dd 100644 --- a/src/util/format_heading_as_setext.rs +++ b/src/util/format_heading_as_setext.rs @@ -3,13 +3,13 @@ use regex::Regex; use crate::{mdast::Node, to_markdown::State}; -pub fn format_heading_as_settext(node: &Node, state: &State) -> bool { +pub fn format_heading_as_settext(node: &Node, _state: &State) -> bool { let line_break = Regex::new(r"\r?\n|\r").unwrap(); match node { Node::Heading(heading) => { let mut literal_with_break = false; - for child in heading.children.iter() { - if include_literal_with_break(child, state, &line_break) { + for child in &heading.children { + if include_literal_with_break(child, &line_break) { literal_with_break = true; break; } @@ -22,7 +22,7 @@ pub fn format_heading_as_settext(node: &Node, state: &State) -> bool { } } -fn include_literal_with_break(node: &Node, state: &State, regex: &Regex) -> bool { +fn include_literal_with_break(node: &Node, regex: &Regex) -> bool { match node { Node::Break(_) => true, Node::MdxjsEsm(x) => regex.is_match(&x.value), @@ -38,13 +38,14 @@ fn include_literal_with_break(node: &Node, state: &State, regex: &Regex) -> bool Node::MdxFlowExpression(x) => regex.is_match(&x.value), _ => { if let Some(children) = node.children() { - for child in children.iter() { - if include_literal_with_break(child, state, regex) { + for child in children { + if include_literal_with_break(child, regex) { return true; } } } - return false; + + false } } } From dae7d26e0bc84ca4e34ce7c1606d7cdfcb4ff0cf Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sun, 1 Sep 2024 11:18:27 +0300 Subject: [PATCH 14/73] Reduce some of the allocations --- src/to_markdown.rs | 138 +++++++++++++++++++++++---- src/util/format_code_as_indented.rs | 17 ---- src/util/format_heading_as_setext.rs | 51 ---------- src/util/mod.rs | 2 - 4 files changed, 120 insertions(+), 88 deletions(-) delete mode 100644 src/util/format_code_as_indented.rs delete mode 100644 src/util/format_heading_as_setext.rs diff --git a/src/to_markdown.rs b/src/to_markdown.rs index f5615ac6..06cd0139 100644 --- a/src/to_markdown.rs +++ b/src/to_markdown.rs @@ -1,10 +1,4 @@ -use crate::{ - mdast::{List, Node, Paragraph, Root, Strong, Text}, - util::{ - format_code_as_indented::format_code_as_indented, - format_heading_as_setext::format_heading_as_settext, - }, -}; +use crate::mdast::{List, Node, Paragraph, Root, Strong, Text}; use alloc::{ collections::BTreeMap, format, @@ -101,7 +95,7 @@ macro_rules! impl_FlowParent { } } -impl_PhrasingParent!(for Paragraph); +impl_PhrasingParent!(for Paragraph, Strong); impl_FlowParent!(for Root); pub enum Join { @@ -112,7 +106,7 @@ pub enum Join { #[allow(dead_code)] pub struct State<'a> { pub stack: Vec, - // SAFETY : -1 is used to mark the absense of children. + // We use i64 for index_stack because -1 is used to mark the absense of children. // We don't use index_stack values to index into any child. pub index_stack: Vec, pub bullet_last_used: Option, @@ -497,6 +491,7 @@ impl<'a> State<'a> { Node::Root(root) => self.handle_root(root, info), Node::Paragraph(paragraph) => self.handle_paragraph(paragraph, info), Node::Text(text) => self.handle_text(text, info), + Node::Strong(strong) => self.handle_strong(strong, info), _ => panic!("Not handled yet"), } } @@ -524,6 +519,20 @@ impl<'a> State<'a> { ) } + fn handle_strong(&mut self, node: &Strong, info: &Info) -> String { + let marker = check_strong(self); + + self.enter(ConstructName::Strong); + + let mut value = format!("{}{}", marker, marker); + value.push_str(&self.container_phrasing(node, info)); + value.push_str(&format!("{}{}", marker, marker)); + + self.exit(); + + value + } + fn safe(&mut self, input: &String, config: &SafeConfig) -> String { let value = format!("{}{}{}", config.before, input, config.after); let mut positions: Vec = Vec::new(); @@ -559,8 +568,8 @@ impl<'a> State<'a> { } } } else { - positions.push(position); infos.insert(position, EscapeInfos { before, after }); + positions.push(position); } } } @@ -579,7 +588,7 @@ impl<'a> State<'a> { // If this character is supposed to be escaped because it has a condition on // the next character, and the next character is definitly being escaped, // then skip this escape. - // SAFETY This will never panic because we're checking the correct bounds, and we + // This will never panic because we're checking the correct bounds, and we // gurantee to have the positions as key in the infos map before reaching this // execution. if index + 1 < positions.len() @@ -735,31 +744,40 @@ impl<'a> State<'a> { results.push_str(&self.handle(child, &Info::new("\n", "\n"))); if let Some(next_child) = children_iter.peek() { - results.push_str(&self.between(child, next_child, parent)); + self.set_between(child, next_child, parent, &mut results); } index += 1; } + + self.index_stack.pop(); + results } - fn between(&self, left: &Node, right: &Node, parent: &T) -> String { + fn set_between( + &self, + left: &Node, + right: &Node, + parent: &T, + results: &mut String, + ) { match self.join_defaults(left, right, parent) { Some(Join::Number(num)) => { if num == 1 { - "\n\n".into() + results.push_str("\n\n"); } else { - "\n".repeat(1 + num) + results.push_str("\n".repeat(1 + num).as_ref()); } } Some(Join::Bool(bool)) => { if bool { - "\n\n".into() + results.push_str("\n\n"); } else { - "\n\n\n\n".into() + results.push_str("\n\n\n\n"); } } - None => "\n\n".into(), + None => results.push_str("\n\n"), } } @@ -803,6 +821,10 @@ impl<'a> State<'a> { } } +fn check_strong(_state: &State) -> String { + "*".into() +} + fn escape_backslashes(value: &str, after: &str) -> String { let expression = Regex::new(r"\\[!-/:-@\[-`{-~]").unwrap(); let mut results: String = String::new(); @@ -860,6 +882,67 @@ fn list_in_scope(stack: &[ConstructName], list: &Option, none: bool) } } +pub fn format_code_as_indented(node: &Node, _state: &State) -> bool { + match node { + Node::Code(code) => { + let white_space = Regex::new(r"[^ \r\n]").unwrap(); + let blank = Regex::new(r"^[\t ]*(?:[\r\n]|$)|(?:^|[\r\n])[\t ]*$").unwrap(); + !code.value.is_empty() + && code.lang.is_none() + && white_space.is_match(&code.value) + && !blank.is_match(&code.value) + } + _ => false, + } +} + +pub fn format_heading_as_settext(node: &Node, _state: &State) -> bool { + let line_break = Regex::new(r"\r?\n|\r").unwrap(); + match node { + Node::Heading(heading) => { + let mut literal_with_break = false; + for child in &heading.children { + if include_literal_with_break(child, &line_break) { + literal_with_break = true; + break; + } + } + + heading.depth == 0 + || heading.depth < 3 && !node.to_string().is_empty() && literal_with_break + } + _ => false, + } +} + +fn include_literal_with_break(node: &Node, regex: &Regex) -> bool { + match node { + Node::Break(_) => true, + Node::MdxjsEsm(x) => regex.is_match(&x.value), + Node::Toml(x) => regex.is_match(&x.value), + Node::Yaml(x) => regex.is_match(&x.value), + Node::InlineCode(x) => regex.is_match(&x.value), + Node::InlineMath(x) => regex.is_match(&x.value), + Node::MdxTextExpression(x) => regex.is_match(&x.value), + Node::Html(x) => regex.is_match(&x.value), + Node::Text(x) => regex.is_match(&x.value), + Node::Code(x) => regex.is_match(&x.value), + Node::Math(x) => regex.is_match(&x.value), + Node::MdxFlowExpression(x) => regex.is_match(&x.value), + _ => { + if let Some(children) = node.children() { + for child in children { + if include_literal_with_break(child, regex) { + return true; + } + } + } + + false + } + } +} + pub fn serialize(tree: &Node) -> String { let mut state = State::new(); let result = state.handle(tree, &Info::new("\n", "\n")); @@ -904,4 +987,23 @@ mod init_tests { let actual = serialize(¶graph); assert_eq!(actual, "!\\[]\\(a.jpg)"); } + + #[test] + fn it_will_strong() { + let text_a = Node::Text(Text { + value: String::from("a"), + position: None, + }); + + let text_b = Node::Text(Text { + value: String::from("b"), + position: None, + }); + let strong = Node::Strong(Strong { + children: vec![text_a, text_b], + position: None, + }); + let actual = serialize(&strong); + assert_eq!(actual, "**ab**"); + } } diff --git a/src/util/format_code_as_indented.rs b/src/util/format_code_as_indented.rs deleted file mode 100644 index 100e6557..00000000 --- a/src/util/format_code_as_indented.rs +++ /dev/null @@ -1,17 +0,0 @@ -use regex::Regex; - -use crate::{mdast::Node, to_markdown::State}; - -pub fn format_code_as_indented(node: &Node, _state: &State) -> bool { - match node { - Node::Code(code) => { - let white_space = Regex::new(r"[^ \r\n]").unwrap(); - let blank = Regex::new(r"^[\t ]*(?:[\r\n]|$)|(?:^|[\r\n])[\t ]*$").unwrap(); - !code.value.is_empty() - && code.lang.is_none() - && white_space.is_match(&code.value) - && !blank.is_match(&code.value) - } - _ => false, - } -} diff --git a/src/util/format_heading_as_setext.rs b/src/util/format_heading_as_setext.rs deleted file mode 100644 index 9eb7a9dd..00000000 --- a/src/util/format_heading_as_setext.rs +++ /dev/null @@ -1,51 +0,0 @@ -use alloc::string::ToString; -use regex::Regex; - -use crate::{mdast::Node, to_markdown::State}; - -pub fn format_heading_as_settext(node: &Node, _state: &State) -> bool { - let line_break = Regex::new(r"\r?\n|\r").unwrap(); - match node { - Node::Heading(heading) => { - let mut literal_with_break = false; - for child in &heading.children { - if include_literal_with_break(child, &line_break) { - literal_with_break = true; - break; - } - } - - heading.depth == 0 - || heading.depth < 3 && !node.to_string().is_empty() && literal_with_break - } - _ => false, - } -} - -fn include_literal_with_break(node: &Node, regex: &Regex) -> bool { - match node { - Node::Break(_) => true, - Node::MdxjsEsm(x) => regex.is_match(&x.value), - Node::Toml(x) => regex.is_match(&x.value), - Node::Yaml(x) => regex.is_match(&x.value), - Node::InlineCode(x) => regex.is_match(&x.value), - Node::InlineMath(x) => regex.is_match(&x.value), - Node::MdxTextExpression(x) => regex.is_match(&x.value), - Node::Html(x) => regex.is_match(&x.value), - Node::Text(x) => regex.is_match(&x.value), - Node::Code(x) => regex.is_match(&x.value), - Node::Math(x) => regex.is_match(&x.value), - Node::MdxFlowExpression(x) => regex.is_match(&x.value), - _ => { - if let Some(children) = node.children() { - for child in children { - if include_literal_with_break(child, regex) { - return true; - } - } - } - - false - } - } -} diff --git a/src/util/mod.rs b/src/util/mod.rs index 75538823..cb9a40b0 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -5,8 +5,6 @@ pub mod character_reference; pub mod constant; pub mod edit_map; pub mod encode; -pub mod format_code_as_indented; -pub mod format_heading_as_setext; pub mod gfm_tagfilter; pub mod identifier; pub mod infer; From b598a26f619cadaa689d83def653e8773930e1f6 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sun, 1 Sep 2024 11:40:20 +0300 Subject: [PATCH 15/73] Make things less pub for now --- src/to_markdown.rs | 74 ++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/src/to_markdown.rs b/src/to_markdown.rs index 06cd0139..3b6bf59e 100644 --- a/src/to_markdown.rs +++ b/src/to_markdown.rs @@ -10,7 +10,7 @@ use regex::Regex; #[allow(dead_code)] #[derive(Clone, PartialEq)] -pub enum ConstructName { +enum ConstructName { Autolink, Blockquote, CodeIndented, @@ -40,7 +40,7 @@ pub enum ConstructName { TitleQuote, } -pub trait PeekNode { +trait PeekNode { // TODO make it take a reference to the state options fn handle_peek(&self) -> String; } @@ -51,11 +51,11 @@ impl PeekNode for Strong { } } -pub trait PhrasingParent { +trait PhrasingParent { fn children(&self) -> &Vec; } -pub trait FlowParent { +trait FlowParent { fn children(&self) -> &Vec; /// One or more of its children are separated with a blank line from its @@ -98,41 +98,41 @@ macro_rules! impl_FlowParent { impl_PhrasingParent!(for Paragraph, Strong); impl_FlowParent!(for Root); -pub enum Join { +enum Join { Number(usize), Bool(bool), } #[allow(dead_code)] -pub struct State<'a> { - pub stack: Vec, +struct State<'a> { + stack: Vec, // We use i64 for index_stack because -1 is used to mark the absense of children. // We don't use index_stack values to index into any child. - pub index_stack: Vec, - pub bullet_last_used: Option, - pub r#unsafe: Vec>, + index_stack: Vec, + bullet_last_used: Option, + r#unsafe: Vec>, } #[allow(dead_code)] -pub struct Unsafe<'a> { - pub character: &'a str, - pub in_construct: Option, - pub not_in_construct: Option, - pub before: Option<&'a str>, - pub after: Option<&'a str>, - pub at_break: Option, +struct Unsafe<'a> { + character: &'a str, + in_construct: Option, + not_in_construct: Option, + before: Option<&'a str>, + after: Option<&'a str>, + at_break: Option, compiled: Option, } #[allow(dead_code)] // This could use a better name. -pub enum Construct { +enum Construct { List(Vec), Single(ConstructName), } impl<'a> Unsafe<'a> { - pub fn new( + fn new( character: &'a str, before: Option<&'a str>, after: Option<&'a str>, @@ -151,7 +151,7 @@ impl<'a> Unsafe<'a> { } } - pub fn get_default_unsafe() -> Vec { + fn get_default_unsafe() -> Vec { let full_phrasing_spans = vec![ ConstructName::Autolink, ConstructName::DestinationLiteral, @@ -421,40 +421,36 @@ impl<'a> Unsafe<'a> { ] } - pub fn compiled(&self) -> bool { + fn compiled(&self) -> bool { self.compiled.is_some() } - pub fn set_compiled(&mut self, regex_pattern: Regex) { + fn set_compiled(&mut self, regex_pattern: Regex) { self.compiled = Some(regex_pattern); } } #[allow(dead_code)] -pub struct Info<'a> { - pub before: &'a str, - pub after: &'a str, +struct Info<'a> { + before: &'a str, + after: &'a str, } impl<'a> Info<'a> { - pub fn new(before: &'a str, after: &'a str) -> Self { + fn new(before: &'a str, after: &'a str) -> Self { Info { before, after } } } #[allow(dead_code)] -pub struct SafeConfig<'a> { - pub before: &'a str, - pub after: &'a str, - pub encode: Option>, +struct SafeConfig<'a> { + before: &'a str, + after: &'a str, + encode: Option>, } impl<'a> SafeConfig<'a> { - pub fn new( - before: Option<&'a str>, - after: Option<&'a str>, - encode: Option>, - ) -> Self { + fn new(before: Option<&'a str>, after: Option<&'a str>, encode: Option>) -> Self { SafeConfig { before: before.unwrap_or(""), after: after.unwrap_or(""), @@ -469,7 +465,7 @@ struct EscapeInfos { } impl<'a> State<'a> { - pub fn new() -> Self { + fn new() -> Self { State { stack: Vec::new(), index_stack: Vec::new(), @@ -486,7 +482,7 @@ impl<'a> State<'a> { self.stack.pop(); } - pub fn handle(&mut self, node: &Node, info: &Info) -> String { + fn handle(&mut self, node: &Node, info: &Info) -> String { match node { Node::Root(root) => self.handle_root(root, info), Node::Paragraph(paragraph) => self.handle_paragraph(paragraph, info), @@ -882,7 +878,7 @@ fn list_in_scope(stack: &[ConstructName], list: &Option, none: bool) } } -pub fn format_code_as_indented(node: &Node, _state: &State) -> bool { +fn format_code_as_indented(node: &Node, _state: &State) -> bool { match node { Node::Code(code) => { let white_space = Regex::new(r"[^ \r\n]").unwrap(); @@ -896,7 +892,7 @@ pub fn format_code_as_indented(node: &Node, _state: &State) -> bool { } } -pub fn format_heading_as_settext(node: &Node, _state: &State) -> bool { +fn format_heading_as_settext(node: &Node, _state: &State) -> bool { let line_break = Regex::new(r"\r?\n|\r").unwrap(); match node { Node::Heading(heading) => { From e9991fd447c9a608bc55a4d74c608ec4283f1ebf Mon Sep 17 00:00:00 2001 From: Bnchi Date: Tue, 3 Sep 2024 10:46:57 +0300 Subject: [PATCH 16/73] Move mdast to md into a workspace --- Cargo.toml | 3 +- mdast_util_to_markdown/Cargo.toml | 9 +++ mdast_util_to_markdown/src/configure.rs | 49 ++++++++++++++ mdast_util_to_markdown/src/lib.rs | 12 ++++ .../src}/to_markdown.rs | 64 ++++++++++--------- src/lib.rs | 6 -- 6 files changed, 105 insertions(+), 38 deletions(-) create mode 100644 mdast_util_to_markdown/Cargo.toml create mode 100644 mdast_util_to_markdown/src/configure.rs create mode 100644 mdast_util_to_markdown/src/lib.rs rename {src => mdast_util_to_markdown/src}/to_markdown.rs (95%) diff --git a/Cargo.toml b/Cargo.toml index d119a431..c8165337 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,6 @@ log = ["dep:log"] log = { version = "0.4", optional = true } unicode-id = { version = "0.3", features = ["no_std"] } serde = { version = "1", features = ["derive"], optional = true } -regex = { version = "1.7.3" } [dev-dependencies] env_logger = "0.11" @@ -41,4 +40,4 @@ swc_core = { version = "0.100", features = [ ] } [workspace] -members = ["generate"] +members = ["generate", "mdast_util_to_markdown"] diff --git a/mdast_util_to_markdown/Cargo.toml b/mdast_util_to_markdown/Cargo.toml new file mode 100644 index 00000000..d0712cb2 --- /dev/null +++ b/mdast_util_to_markdown/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "mdast_util_to_markdown" +version = "0.0.0" +edition = "2018" +license = "MIT" + +[dependencies] +markdown = {path = "../"} +regex = { version = "1.7.3" } diff --git a/mdast_util_to_markdown/src/configure.rs b/mdast_util_to_markdown/src/configure.rs new file mode 100644 index 00000000..fb9e5976 --- /dev/null +++ b/mdast_util_to_markdown/src/configure.rs @@ -0,0 +1,49 @@ +#[allow(dead_code)] +pub struct Options { + bullet: char, + bullet_other: char, + bullet_orderd: char, + emphasis: char, + fences: char, + list_item_indent: IndentOptions, + quote: char, + rule: char, + strong: char, + increment_list_marker: bool, + close_atx: bool, + resource_link: bool, + rule_spaces: bool, + set_text: bool, + tight_definitions: bool, + rule_repetition: u32, +} + +#[allow(dead_code)] +pub enum IndentOptions { + Mixed, + One, + Tab, +} + +impl Default for Options { + fn default() -> Self { + Self { + bullet: '*', + bullet_other: '-', + bullet_orderd: '.', + emphasis: '*', + fences: '`', + increment_list_marker: false, + rule_repetition: 3, + list_item_indent: IndentOptions::One, + quote: '"', + rule: '*', + strong: '*', + close_atx: false, + rule_spaces: false, + resource_link: false, + set_text: false, + tight_definitions: false, + } + } +} diff --git a/mdast_util_to_markdown/src/lib.rs b/mdast_util_to_markdown/src/lib.rs new file mode 100644 index 00000000..1128b0a0 --- /dev/null +++ b/mdast_util_to_markdown/src/lib.rs @@ -0,0 +1,12 @@ +#![no_std] + +use alloc::string::String; +use markdown::mdast::Node; + +extern crate alloc; +mod configure; +mod to_markdown; + +pub fn to_markdown(tree: &Node) -> String { + to_markdown::serialize(tree) +} diff --git a/src/to_markdown.rs b/mdast_util_to_markdown/src/to_markdown.rs similarity index 95% rename from src/to_markdown.rs rename to mdast_util_to_markdown/src/to_markdown.rs index 3b6bf59e..31a57923 100644 --- a/src/to_markdown.rs +++ b/mdast_util_to_markdown/src/to_markdown.rs @@ -1,4 +1,3 @@ -use crate::mdast::{List, Node, Paragraph, Root, Strong, Text}; use alloc::{ collections::BTreeMap, format, @@ -6,6 +5,7 @@ use alloc::{ vec, vec::Vec, }; +use markdown::mdast::{List, Node, Paragraph, Root, Strong, Text}; use regex::Regex; #[allow(dead_code)] @@ -520,9 +520,14 @@ impl<'a> State<'a> { self.enter(ConstructName::Strong); - let mut value = format!("{}{}", marker, marker); - value.push_str(&self.container_phrasing(node, info)); - value.push_str(&format!("{}{}", marker, marker)); + let mut value = format!( + "{}{}{}", + marker, + marker, + self.container_phrasing(node, info) + ); + value.push(marker); + value.push(marker); self.exit(); @@ -817,8 +822,8 @@ impl<'a> State<'a> { } } -fn check_strong(_state: &State) -> String { - "*".into() +fn check_strong(_state: &State) -> char { + '*' } fn escape_backslashes(value: &str, after: &str) -> String { @@ -879,36 +884,35 @@ fn list_in_scope(stack: &[ConstructName], list: &Option, none: bool) } fn format_code_as_indented(node: &Node, _state: &State) -> bool { - match node { - Node::Code(code) => { - let white_space = Regex::new(r"[^ \r\n]").unwrap(); - let blank = Regex::new(r"^[\t ]*(?:[\r\n]|$)|(?:^|[\r\n])[\t ]*$").unwrap(); - !code.value.is_empty() - && code.lang.is_none() - && white_space.is_match(&code.value) - && !blank.is_match(&code.value) - } - _ => false, + if let Node::Code(code) = node { + let white_space = Regex::new(r"[^ \r\n]").unwrap(); + let blank = Regex::new(r"^[\t ]*(?:[\r\n]|$)|(?:^|[\r\n])[\t ]*$").unwrap(); + + return !code.value.is_empty() + && code.lang.is_none() + && white_space.is_match(&code.value) + && !blank.is_match(&code.value); } + + false } fn format_heading_as_settext(node: &Node, _state: &State) -> bool { - let line_break = Regex::new(r"\r?\n|\r").unwrap(); - match node { - Node::Heading(heading) => { - let mut literal_with_break = false; - for child in &heading.children { - if include_literal_with_break(child, &line_break) { - literal_with_break = true; - break; - } + if let Node::Heading(heading) = node { + let line_break = Regex::new(r"\r?\n|\r").unwrap(); + let mut literal_with_break = false; + for child in &heading.children { + if include_literal_with_break(child, &line_break) { + literal_with_break = true; + break; } - - heading.depth == 0 - || heading.depth < 3 && !node.to_string().is_empty() && literal_with_break } - _ => false, + + return heading.depth == 0 + || heading.depth < 3 && !node.to_string().is_empty() && literal_with_break; } + + false } fn include_literal_with_break(node: &Node, regex: &Regex) -> bool { @@ -950,7 +954,7 @@ mod init_tests { use super::*; use alloc::{string::String, vec}; - use crate::mdast::{Node, Paragraph, Text}; + use markdown::mdast::{Node, Paragraph, Text}; #[test] fn it_works_for_simple_text() { diff --git a/src/lib.rs b/src/lib.rs index a4493207..f1266e81 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -42,7 +42,6 @@ mod resolve; mod state; mod subtokenize; mod to_html; -mod to_markdown; mod to_mdast; mod tokenizer; mod util; @@ -51,7 +50,6 @@ pub mod mdast; // To do: externalize? pub mod message; // To do: externalize. pub mod unist; // To do: externalize. -use mdast::Node; #[doc(hidden)] pub use util::identifier::{id_cont, id_start}; @@ -161,7 +159,3 @@ pub fn to_mdast(value: &str, options: &ParseOptions) -> Result String { - to_markdown::serialize(tree) -} From 558b3168f3e063e17a199395fd355ef9826d80cc Mon Sep 17 00:00:00 2001 From: Bnchi Date: Tue, 3 Sep 2024 16:05:28 +0300 Subject: [PATCH 17/73] A few refactors for parent nodes abstraction --- mdast_util_to_markdown/src/lib.rs | 34 ++ mdast_util_to_markdown/src/parents.rs | 32 ++ mdast_util_to_markdown/src/to_markdown.rs | 421 +--------------------- mdast_util_to_markdown/src/unsafe.rs | 319 ++++++++++++++++ 4 files changed, 398 insertions(+), 408 deletions(-) create mode 100644 mdast_util_to_markdown/src/parents.rs create mode 100644 mdast_util_to_markdown/src/unsafe.rs diff --git a/mdast_util_to_markdown/src/lib.rs b/mdast_util_to_markdown/src/lib.rs index 1128b0a0..de94277c 100644 --- a/mdast_util_to_markdown/src/lib.rs +++ b/mdast_util_to_markdown/src/lib.rs @@ -5,7 +5,41 @@ use markdown::mdast::Node; extern crate alloc; mod configure; +pub mod parents; mod to_markdown; +pub mod r#unsafe; + +#[allow(dead_code)] +#[derive(Clone, PartialEq)] +pub enum ConstructName { + Autolink, + Blockquote, + CodeIndented, + CodeFenced, + CodeFencedLangGraveAccent, + CodeFencedLangTilde, + CodeFencedMetaGraveAccent, + CodeFencedMetaTilde, + Definition, + DestinationLiteral, + DestinationRaw, + Emphasis, + HeadingAtx, + HeadingSetext, + Image, + ImageReference, + Label, + Link, + LinkReference, + List, + ListItem, + Paragraph, + Phrasing, + Reference, + Strong, + TitleApostrophe, + TitleQuote, +} pub fn to_markdown(tree: &Node) -> String { to_markdown::serialize(tree) diff --git a/mdast_util_to_markdown/src/parents.rs b/mdast_util_to_markdown/src/parents.rs new file mode 100644 index 00000000..7973c62f --- /dev/null +++ b/mdast_util_to_markdown/src/parents.rs @@ -0,0 +1,32 @@ +use alloc::vec::Vec; +use markdown::mdast::{List, Node, Paragraph, Root, Strong}; + +pub trait Parent { + fn children(&self) -> &Vec; + + fn spreadable(&self) -> Option { + None + } +} + +impl Parent for List { + fn children(&self) -> &Vec { + &self.children + } + + fn spreadable(&self) -> Option { + Some(self.spread) + } +} + +macro_rules! impl_Parent { + (for $($t:ty),+) => { + $(impl Parent for $t { + fn children(&self) -> &Vec { + &self.children + } + })* + } +} + +impl_Parent!(for Root, Paragraph, Strong); diff --git a/mdast_util_to_markdown/src/to_markdown.rs b/mdast_util_to_markdown/src/to_markdown.rs index 31a57923..3cb0e885 100644 --- a/mdast_util_to_markdown/src/to_markdown.rs +++ b/mdast_util_to_markdown/src/to_markdown.rs @@ -1,44 +1,17 @@ +use crate::{ + parents::Parent, + r#unsafe::{Construct, Unsafe}, +}; use alloc::{ collections::BTreeMap, format, string::{String, ToString}, - vec, vec::Vec, }; -use markdown::mdast::{List, Node, Paragraph, Root, Strong, Text}; +use markdown::mdast::{Node, Paragraph, Root, Strong, Text}; use regex::Regex; -#[allow(dead_code)] -#[derive(Clone, PartialEq)] -enum ConstructName { - Autolink, - Blockquote, - CodeIndented, - CodeFenced, - CodeFencedLangGraveAccent, - CodeFencedLangTilde, - CodeFencedMetaGraveAccent, - CodeFencedMetaTilde, - Definition, - DestinationLiteral, - DestinationRaw, - Emphasis, - HeadingAtx, - HeadingSetext, - Image, - ImageReference, - Label, - Link, - LinkReference, - List, - ListItem, - Paragraph, - Phrasing, - Reference, - Strong, - TitleApostrophe, - TitleQuote, -} +use crate::ConstructName; trait PeekNode { // TODO make it take a reference to the state options @@ -51,53 +24,6 @@ impl PeekNode for Strong { } } -trait PhrasingParent { - fn children(&self) -> &Vec; -} - -trait FlowParent { - fn children(&self) -> &Vec; - - /// One or more of its children are separated with a blank line from its - /// siblings (when `true`), or not (when `false`). - fn spreadable(&self) -> Option { - None - } -} - -impl FlowParent for List { - fn children(&self) -> &Vec { - &self.children - } - - fn spreadable(&self) -> Option { - Some(self.spread) - } -} - -macro_rules! impl_PhrasingParent { - (for $($t:ty),+) => { - $(impl PhrasingParent for $t { - fn children(&self) -> &Vec { - &self.children - } - })* - } -} - -macro_rules! impl_FlowParent { - (for $($t:ty),+) => { - $(impl FlowParent for $t { - fn children(&self) -> &Vec { - &self.children - } - })* - } -} - -impl_PhrasingParent!(for Paragraph, Strong); -impl_FlowParent!(for Root); - enum Join { Number(usize), Bool(bool), @@ -113,323 +39,6 @@ struct State<'a> { r#unsafe: Vec>, } -#[allow(dead_code)] -struct Unsafe<'a> { - character: &'a str, - in_construct: Option, - not_in_construct: Option, - before: Option<&'a str>, - after: Option<&'a str>, - at_break: Option, - compiled: Option, -} - -#[allow(dead_code)] -// This could use a better name. -enum Construct { - List(Vec), - Single(ConstructName), -} - -impl<'a> Unsafe<'a> { - fn new( - character: &'a str, - before: Option<&'a str>, - after: Option<&'a str>, - in_construct: Option, - not_in_construct: Option, - at_break: Option, - ) -> Self { - Unsafe { - character, - in_construct, - not_in_construct, - before, - after, - at_break, - compiled: None, - } - } - - fn get_default_unsafe() -> Vec { - let full_phrasing_spans = vec![ - ConstructName::Autolink, - ConstructName::DestinationLiteral, - ConstructName::DestinationRaw, - ConstructName::Reference, - ConstructName::TitleQuote, - ConstructName::TitleApostrophe, - ]; - - vec![ - Self::new( - "\t", - None, - r"[\\r\\n]".into(), - Construct::Single(ConstructName::Phrasing).into(), - None, - None, - ), - Self::new( - "\t", - r"[\\r\\n]".into(), - None, - Construct::Single(ConstructName::Phrasing).into(), - None, - None, - ), - Self::new( - "\t", - None, - None, - Construct::List(vec![ - ConstructName::CodeFencedLangGraveAccent, - ConstructName::CodeFencedLangTilde, - ]) - .into(), - None, - None, - ), - Self::new( - "\r", - None, - None, - Construct::List(vec![ - ConstructName::CodeFencedLangGraveAccent, - ConstructName::CodeFencedLangTilde, - ConstructName::CodeFencedMetaGraveAccent, - ConstructName::CodeFencedMetaTilde, - ConstructName::DestinationLiteral, - ConstructName::HeadingAtx, - ]) - .into(), - None, - None, - ), - Self::new( - "\n", - None, - None, - Construct::List(vec![ - ConstructName::CodeFencedLangGraveAccent, - ConstructName::CodeFencedLangTilde, - ConstructName::CodeFencedMetaGraveAccent, - ConstructName::CodeFencedMetaTilde, - ConstructName::DestinationLiteral, - ConstructName::HeadingAtx, - ]) - .into(), - None, - None, - ), - Self::new( - " ", - None, - r"[\\r\\n]".into(), - Construct::Single(ConstructName::Phrasing).into(), - None, - None, - ), - Self::new( - " ", - r"[\\r\\n]".into(), - None, - Construct::Single(ConstructName::Phrasing).into(), - None, - None, - ), - Self::new( - " ", - None, - None, - Construct::List(vec![ - ConstructName::CodeFencedLangGraveAccent, - ConstructName::CodeFencedLangTilde, - ]) - .into(), - None, - None, - ), - Self::new( - "!", - None, - r"\[".into(), - Construct::Single(ConstructName::Phrasing).into(), - Construct::List(full_phrasing_spans.clone()).into(), - None, - ), - Self::new( - "\"", - None, - None, - Construct::Single(ConstructName::TitleQuote).into(), - None, - None, - ), - Self::new("#", None, None, None, None, Some(true)), - Self::new( - "&", - None, - r"[#A-Za-z]".into(), - Construct::Single(ConstructName::Phrasing).into(), - None, - None, - ), - Self::new( - "'", - None, - None, - Construct::Single(ConstructName::TitleApostrophe).into(), - None, - None, - ), - Self::new( - "(", - None, - None, - Construct::Single(ConstructName::DestinationRaw).into(), - None, - None, - ), - Self::new( - "(", - r"\]".into(), - None, - Construct::Single(ConstructName::Phrasing).into(), - Construct::List(full_phrasing_spans.clone()).into(), - None, - ), - Self::new(")", r"\d+".into(), None, None, None, Some(true)), - Self::new( - ")", - None, - None, - Construct::Single(ConstructName::DestinationRaw).into(), - None, - None, - ), - Self::new("*", None, r"(?:[ \t\r\n*])".into(), None, None, Some(true)), - Self::new( - "*", - None, - None, - Construct::Single(ConstructName::Phrasing).into(), - Construct::List(full_phrasing_spans.clone()).into(), - None, - ), - Self::new("+", None, r"(?:[ \t\r\n])".into(), None, None, Some(true)), - Self::new("-", None, r"(?:[ \t\r\n-])".into(), None, None, Some(true)), - Self::new( - ".", - r"\d+".into(), - "(?:[ \t\r\n]|$)".into(), - None, - None, - Some(true), - ), - Self::new("<", None, r"[!/?A-Za-z]".into(), None, None, Some(true)), - Self::new( - "<", - None, - "[!/?A-Za-z]".into(), - Construct::Single(ConstructName::Phrasing).into(), - Construct::List(full_phrasing_spans.clone()).into(), - None, - ), - Self::new( - "<", - None, - None, - Construct::Single(ConstructName::DestinationLiteral).into(), - None, - None, - ), - Self::new("=", None, None, None, None, Some(true)), - Self::new(">", None, None, None, None, Some(true)), - Self::new( - ">", - None, - None, - Construct::Single(ConstructName::DestinationLiteral).into(), - None, - Some(true), - ), - Self::new("[", None, None, None, None, Some(true)), - Self::new( - "[", - None, - None, - Construct::Single(ConstructName::Phrasing).into(), - Construct::List(full_phrasing_spans.clone()).into(), - None, - ), - Self::new( - "[", - None, - None, - Construct::List(vec![ConstructName::Label, ConstructName::Reference]).into(), - None, - None, - ), - Self::new( - r"\", - None, - "[\\r\\n]".into(), - Construct::Single(ConstructName::Phrasing).into(), - None, - None, - ), - Self::new( - "]", - None, - None, - Construct::List(vec![ConstructName::Label, ConstructName::Reference]).into(), - None, - None, - ), - Self::new("_", None, None, None, None, Some(true)), - Self::new( - "_", - None, - None, - Construct::Single(ConstructName::Phrasing).into(), - Construct::List(full_phrasing_spans.clone()).into(), - None, - ), - Self::new("`", None, None, None, None, Some(true)), - Self::new( - "`", - None, - None, - Construct::List(vec![ - ConstructName::CodeFencedLangGraveAccent, - ConstructName::CodeFencedMetaGraveAccent, - ]) - .into(), - None, - None, - ), - Self::new( - "`", - None, - None, - Construct::Single(ConstructName::Phrasing).into(), - Construct::List(full_phrasing_spans.clone()).into(), - None, - ), - Self::new("~", None, None, None, None, Some(true)), - ] - } - - fn compiled(&self) -> bool { - self.compiled.is_some() - } - - fn set_compiled(&mut self, regex_pattern: Regex) { - self.compiled = Some(regex_pattern); - } -} - #[allow(dead_code)] struct Info<'a> { before: &'a str, @@ -544,7 +153,9 @@ impl<'a> State<'a> { if !pattern_in_scope(&self.stack, pattern) { continue; } + Self::compile_pattern(pattern); + if let Some(regex) = &pattern.compiled { for m in regex.captures_iter(&value) { let full_match = m.get(0).unwrap(); @@ -637,7 +248,7 @@ impl<'a> State<'a> { } fn compile_pattern(pattern: &mut Unsafe) { - if !pattern.compiled() { + if !pattern.is_compiled() { let before = if pattern.at_break.unwrap_or(false) { r"[\\r\\n][\\t ]*" } else { @@ -676,7 +287,7 @@ impl<'a> State<'a> { } } - fn container_phrasing(&mut self, parent: &T, info: &Info) -> String { + fn container_phrasing(&mut self, parent: &T, info: &Info) -> String { let mut results: String = String::new(); let mut children_iter = parent.children().iter().peekable(); let mut index = 0; @@ -726,7 +337,7 @@ impl<'a> State<'a> { } } - fn container_flow(&mut self, parent: &T, _info: &Info) -> String { + fn container_flow(&mut self, parent: &T, _info: &Info) -> String { let mut results: String = String::new(); let mut children_iter = parent.children().iter().peekable(); let mut index = 0; @@ -756,13 +367,7 @@ impl<'a> State<'a> { results } - fn set_between( - &self, - left: &Node, - right: &Node, - parent: &T, - results: &mut String, - ) { + fn set_between(&self, left: &Node, right: &Node, parent: &T, results: &mut String) { match self.join_defaults(left, right, parent) { Some(Join::Number(num)) => { if num == 1 { @@ -782,7 +387,7 @@ impl<'a> State<'a> { } } - fn join_defaults(&self, left: &Node, right: &Node, parent: &T) -> Option { + fn join_defaults(&self, left: &Node, right: &Node, parent: &T) -> Option { if format_code_as_indented(right, self) && (matches!(left, Node::List(_)) || format_code_as_indented(left, self)) { diff --git a/mdast_util_to_markdown/src/unsafe.rs b/mdast_util_to_markdown/src/unsafe.rs new file mode 100644 index 00000000..f0130bb7 --- /dev/null +++ b/mdast_util_to_markdown/src/unsafe.rs @@ -0,0 +1,319 @@ +use alloc::{vec, vec::Vec}; +use regex::Regex; + +use crate::ConstructName; + +pub struct Unsafe<'a> { + pub character: &'a str, + pub in_construct: Option, + pub not_in_construct: Option, + pub before: Option<&'a str>, + pub after: Option<&'a str>, + pub at_break: Option, + pub(crate) compiled: Option, +} + +// This could use a better name. +pub enum Construct { + List(Vec), + Single(ConstructName), +} + +impl<'a> Unsafe<'a> { + fn new( + character: &'a str, + before: Option<&'a str>, + after: Option<&'a str>, + in_construct: Option, + not_in_construct: Option, + at_break: Option, + ) -> Self { + Unsafe { + character, + in_construct, + not_in_construct, + before, + after, + at_break, + compiled: None, + } + } + + pub fn get_default_unsafe() -> Vec { + let full_phrasing_spans = vec![ + ConstructName::Autolink, + ConstructName::DestinationLiteral, + ConstructName::DestinationRaw, + ConstructName::Reference, + ConstructName::TitleQuote, + ConstructName::TitleApostrophe, + ]; + + vec![ + Self::new( + "\t", + None, + r"[\\r\\n]".into(), + Construct::Single(ConstructName::Phrasing).into(), + None, + None, + ), + Self::new( + "\t", + r"[\\r\\n]".into(), + None, + Construct::Single(ConstructName::Phrasing).into(), + None, + None, + ), + Self::new( + "\t", + None, + None, + Construct::List(vec![ + ConstructName::CodeFencedLangGraveAccent, + ConstructName::CodeFencedLangTilde, + ]) + .into(), + None, + None, + ), + Self::new( + "\r", + None, + None, + Construct::List(vec![ + ConstructName::CodeFencedLangGraveAccent, + ConstructName::CodeFencedLangTilde, + ConstructName::CodeFencedMetaGraveAccent, + ConstructName::CodeFencedMetaTilde, + ConstructName::DestinationLiteral, + ConstructName::HeadingAtx, + ]) + .into(), + None, + None, + ), + Self::new( + "\n", + None, + None, + Construct::List(vec![ + ConstructName::CodeFencedLangGraveAccent, + ConstructName::CodeFencedLangTilde, + ConstructName::CodeFencedMetaGraveAccent, + ConstructName::CodeFencedMetaTilde, + ConstructName::DestinationLiteral, + ConstructName::HeadingAtx, + ]) + .into(), + None, + None, + ), + Self::new( + " ", + None, + r"[\\r\\n]".into(), + Construct::Single(ConstructName::Phrasing).into(), + None, + None, + ), + Self::new( + " ", + r"[\\r\\n]".into(), + None, + Construct::Single(ConstructName::Phrasing).into(), + None, + None, + ), + Self::new( + " ", + None, + None, + Construct::List(vec![ + ConstructName::CodeFencedLangGraveAccent, + ConstructName::CodeFencedLangTilde, + ]) + .into(), + None, + None, + ), + Self::new( + "!", + None, + r"\[".into(), + Construct::Single(ConstructName::Phrasing).into(), + Construct::List(full_phrasing_spans.clone()).into(), + None, + ), + Self::new( + "\"", + None, + None, + Construct::Single(ConstructName::TitleQuote).into(), + None, + None, + ), + Self::new("#", None, None, None, None, Some(true)), + Self::new( + "&", + None, + r"[#A-Za-z]".into(), + Construct::Single(ConstructName::Phrasing).into(), + None, + None, + ), + Self::new( + "'", + None, + None, + Construct::Single(ConstructName::TitleApostrophe).into(), + None, + None, + ), + Self::new( + "(", + None, + None, + Construct::Single(ConstructName::DestinationRaw).into(), + None, + None, + ), + Self::new( + "(", + r"\]".into(), + None, + Construct::Single(ConstructName::Phrasing).into(), + Construct::List(full_phrasing_spans.clone()).into(), + None, + ), + Self::new(")", r"\d+".into(), None, None, None, Some(true)), + Self::new( + ")", + None, + None, + Construct::Single(ConstructName::DestinationRaw).into(), + None, + None, + ), + Self::new("*", None, r"(?:[ \t\r\n*])".into(), None, None, Some(true)), + Self::new( + "*", + None, + None, + Construct::Single(ConstructName::Phrasing).into(), + Construct::List(full_phrasing_spans.clone()).into(), + None, + ), + Self::new("+", None, r"(?:[ \t\r\n])".into(), None, None, Some(true)), + Self::new("-", None, r"(?:[ \t\r\n-])".into(), None, None, Some(true)), + Self::new( + ".", + r"\d+".into(), + "(?:[ \t\r\n]|$)".into(), + None, + None, + Some(true), + ), + Self::new("<", None, r"[!/?A-Za-z]".into(), None, None, Some(true)), + Self::new( + "<", + None, + "[!/?A-Za-z]".into(), + Construct::Single(ConstructName::Phrasing).into(), + Construct::List(full_phrasing_spans.clone()).into(), + None, + ), + Self::new( + "<", + None, + None, + Construct::Single(ConstructName::DestinationLiteral).into(), + None, + None, + ), + Self::new("=", None, None, None, None, Some(true)), + Self::new(">", None, None, None, None, Some(true)), + Self::new( + ">", + None, + None, + Construct::Single(ConstructName::DestinationLiteral).into(), + None, + Some(true), + ), + Self::new("[", None, None, None, None, Some(true)), + Self::new( + "[", + None, + None, + Construct::Single(ConstructName::Phrasing).into(), + Construct::List(full_phrasing_spans.clone()).into(), + None, + ), + Self::new( + "[", + None, + None, + Construct::List(vec![ConstructName::Label, ConstructName::Reference]).into(), + None, + None, + ), + Self::new( + r"\", + None, + "[\\r\\n]".into(), + Construct::Single(ConstructName::Phrasing).into(), + None, + None, + ), + Self::new( + "]", + None, + None, + Construct::List(vec![ConstructName::Label, ConstructName::Reference]).into(), + None, + None, + ), + Self::new("_", None, None, None, None, Some(true)), + Self::new( + "_", + None, + None, + Construct::Single(ConstructName::Phrasing).into(), + Construct::List(full_phrasing_spans.clone()).into(), + None, + ), + Self::new("`", None, None, None, None, Some(true)), + Self::new( + "`", + None, + None, + Construct::List(vec![ + ConstructName::CodeFencedLangGraveAccent, + ConstructName::CodeFencedMetaGraveAccent, + ]) + .into(), + None, + None, + ), + Self::new( + "`", + None, + None, + Construct::Single(ConstructName::Phrasing).into(), + Construct::List(full_phrasing_spans.clone()).into(), + None, + ), + Self::new("~", None, None, None, None, Some(true)), + ] + } + + pub(crate) fn is_compiled(&self) -> bool { + self.compiled.is_some() + } + + pub(crate) fn set_compiled(&mut self, regex_pattern: Regex) { + self.compiled = Some(regex_pattern); + } +} From 973de9395ef071876dec7c8798e94b989fd2f9c1 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Tue, 3 Sep 2024 22:16:53 +0300 Subject: [PATCH 18/73] Refactor --- mdast_util_to_markdown/Cargo.toml | 2 +- mdast_util_to_markdown/src/configure.rs | 32 +- mdast_util_to_markdown/src/construct_name.rs | 31 ++ mdast_util_to_markdown/src/handle/mod.rs | 11 + .../src/handle/paragraph.rs | 25 ++ mdast_util_to_markdown/src/handle/strong.rs | 37 ++ mdast_util_to_markdown/src/handle/text.rs | 20 ++ mdast_util_to_markdown/src/lib.rs | 106 ++++-- .../src/{to_markdown.rs => state.rs} | 335 +++--------------- mdast_util_to_markdown/src/unsafe.rs | 9 +- .../src/util/check_strong.rs | 7 + .../src/util/format_code_as_indented.rs | 18 + .../src/util/format_heading_as_setext.rs | 51 +++ mdast_util_to_markdown/src/util/mod.rs | 5 + .../src/util/pattern_in_scope.rs | 37 ++ mdast_util_to_markdown/src/util/safe.rs | 50 +++ 16 files changed, 426 insertions(+), 350 deletions(-) create mode 100644 mdast_util_to_markdown/src/construct_name.rs create mode 100644 mdast_util_to_markdown/src/handle/mod.rs create mode 100644 mdast_util_to_markdown/src/handle/paragraph.rs create mode 100644 mdast_util_to_markdown/src/handle/strong.rs create mode 100644 mdast_util_to_markdown/src/handle/text.rs rename mdast_util_to_markdown/src/{to_markdown.rs => state.rs} (54%) create mode 100644 mdast_util_to_markdown/src/util/check_strong.rs create mode 100644 mdast_util_to_markdown/src/util/format_code_as_indented.rs create mode 100644 mdast_util_to_markdown/src/util/format_heading_as_setext.rs create mode 100644 mdast_util_to_markdown/src/util/mod.rs create mode 100644 mdast_util_to_markdown/src/util/pattern_in_scope.rs create mode 100644 mdast_util_to_markdown/src/util/safe.rs diff --git a/mdast_util_to_markdown/Cargo.toml b/mdast_util_to_markdown/Cargo.toml index d0712cb2..89d0b425 100644 --- a/mdast_util_to_markdown/Cargo.toml +++ b/mdast_util_to_markdown/Cargo.toml @@ -5,5 +5,5 @@ edition = "2018" license = "MIT" [dependencies] -markdown = {path = "../"} +markdown = { path = "../" } regex = { version = "1.7.3" } diff --git a/mdast_util_to_markdown/src/configure.rs b/mdast_util_to_markdown/src/configure.rs index fb9e5976..cd1fc456 100644 --- a/mdast_util_to_markdown/src/configure.rs +++ b/mdast_util_to_markdown/src/configure.rs @@ -1,21 +1,21 @@ #[allow(dead_code)] pub struct Options { - bullet: char, - bullet_other: char, - bullet_orderd: char, - emphasis: char, - fences: char, - list_item_indent: IndentOptions, - quote: char, - rule: char, - strong: char, - increment_list_marker: bool, - close_atx: bool, - resource_link: bool, - rule_spaces: bool, - set_text: bool, - tight_definitions: bool, - rule_repetition: u32, + pub bullet: char, + pub bullet_other: char, + pub bullet_orderd: char, + pub emphasis: char, + pub fences: char, + pub list_item_indent: IndentOptions, + pub quote: char, + pub rule: char, + pub strong: char, + pub increment_list_marker: bool, + pub close_atx: bool, + pub resource_link: bool, + pub rule_spaces: bool, + pub set_text: bool, + pub tight_definitions: bool, + pub rule_repetition: u32, } #[allow(dead_code)] diff --git a/mdast_util_to_markdown/src/construct_name.rs b/mdast_util_to_markdown/src/construct_name.rs new file mode 100644 index 00000000..52511866 --- /dev/null +++ b/mdast_util_to_markdown/src/construct_name.rs @@ -0,0 +1,31 @@ +#[derive(Clone, PartialEq)] +#[allow(dead_code)] +pub enum ConstructName { + Autolink, + Blockquote, + CodeIndented, + CodeFenced, + CodeFencedLangGraveAccent, + CodeFencedLangTilde, + CodeFencedMetaGraveAccent, + CodeFencedMetaTilde, + Definition, + DestinationLiteral, + DestinationRaw, + Emphasis, + HeadingAtx, + HeadingSetext, + Image, + ImageReference, + Label, + Link, + LinkReference, + List, + ListItem, + Paragraph, + Phrasing, + Reference, + Strong, + TitleApostrophe, + TitleQuote, +} diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs new file mode 100644 index 00000000..5834e001 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -0,0 +1,11 @@ +use crate::{state::Info, State}; +use alloc::string::String; + +mod paragraph; +pub mod strong; +mod text; + +pub trait Handle { + type Error; + fn handle(&self, state: &mut State, info: &Info) -> Result; +} diff --git a/mdast_util_to_markdown/src/handle/paragraph.rs b/mdast_util_to_markdown/src/handle/paragraph.rs new file mode 100644 index 00000000..792950b9 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/paragraph.rs @@ -0,0 +1,25 @@ +use alloc::string::String; +use markdown::mdast::Paragraph; + +use crate::{ + construct_name::ConstructName, + state::{Info, State}, +}; + +use super::Handle; + +impl Handle for Paragraph { + type Error = String; + + fn handle(&self, state: &mut State, info: &Info) -> Result { + state.enter(ConstructName::Paragraph); + + state.enter(ConstructName::Phrasing); + let value = state.container_phrasing(self, info)?; + // exit phrasing + state.exit(); + // exit paragarph + state.exit(); + Ok(value) + } +} diff --git a/mdast_util_to_markdown/src/handle/strong.rs b/mdast_util_to_markdown/src/handle/strong.rs new file mode 100644 index 00000000..e2580c18 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/strong.rs @@ -0,0 +1,37 @@ +use alloc::{format, string::String}; +use markdown::mdast::Strong; + +use crate::{ + construct_name::ConstructName, + state::{Info, State}, + util::check_strong::check_strong, +}; + +use super::Handle; + +impl Handle for Strong { + type Error = String; + + fn handle(&self, state: &mut State, info: &Info) -> Result { + let marker = check_strong(state)?; + + state.enter(ConstructName::Strong); + + let mut value = format!( + "{}{}{}", + marker, + marker, + state.container_phrasing(self, info)? + ); + value.push(marker); + value.push(marker); + + state.exit(); + + Ok(value) + } +} + +pub fn peek_strong(_state: &State) -> String { + "*".into() +} diff --git a/mdast_util_to_markdown/src/handle/text.rs b/mdast_util_to_markdown/src/handle/text.rs new file mode 100644 index 00000000..1b7c41b4 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/text.rs @@ -0,0 +1,20 @@ +use alloc::string::String; +use markdown::mdast::Text; + +use crate::{ + state::{Info, State}, + util::safe::SafeConfig, +}; + +use super::Handle; + +impl Handle for Text { + type Error = String; + + fn handle(&self, state: &mut State, info: &Info) -> Result { + Ok(state.safe( + &self.value, + &SafeConfig::new(Some(info.before), Some(info.after), None), + )) + } +} diff --git a/mdast_util_to_markdown/src/lib.rs b/mdast_util_to_markdown/src/lib.rs index de94277c..8d4be12f 100644 --- a/mdast_util_to_markdown/src/lib.rs +++ b/mdast_util_to_markdown/src/lib.rs @@ -1,46 +1,80 @@ #![no_std] use alloc::string::String; +pub use configure::Options; use markdown::mdast::Node; +use state::{Info, State}; extern crate alloc; mod configure; -pub mod parents; -mod to_markdown; -pub mod r#unsafe; - -#[allow(dead_code)] -#[derive(Clone, PartialEq)] -pub enum ConstructName { - Autolink, - Blockquote, - CodeIndented, - CodeFenced, - CodeFencedLangGraveAccent, - CodeFencedLangTilde, - CodeFencedMetaGraveAccent, - CodeFencedMetaTilde, - Definition, - DestinationLiteral, - DestinationRaw, - Emphasis, - HeadingAtx, - HeadingSetext, - Image, - ImageReference, - Label, - Link, - LinkReference, - List, - ListItem, - Paragraph, - Phrasing, - Reference, - Strong, - TitleApostrophe, - TitleQuote, +mod construct_name; +mod handle; +mod parents; +mod state; +mod r#unsafe; +mod util; + +pub fn to_markdown(tree: &Node, _options: &Options) -> Result { + let mut state = State::new(); + let result = state.handle(tree, &Info::new("\n", "\n"))?; + Ok(result) } -pub fn to_markdown(tree: &Node) -> String { - to_markdown::serialize(tree) +#[cfg(test)] +mod init_tests { + use super::*; + use alloc::{string::String, vec}; + + use markdown::mdast::{Node, Paragraph, Strong, Text}; + + #[test] + fn it_works_for_simple_text() { + let text_a = Node::Text(Text { + value: String::from("a"), + position: None, + }); + let text_b = Node::Text(Text { + value: String::from("b"), + position: None, + }); + let paragraph = Node::Paragraph(Paragraph { + children: vec![text_a, text_b], + position: None, + }); + let actual = to_markdown(¶graph, &Default::default()).unwrap(); + assert_eq!(actual, String::from("ab")); + } + + #[test] + fn it_escape() { + let text_a = Node::Text(Text { + value: String::from("![](a.jpg)"), + position: None, + }); + let paragraph = Node::Paragraph(Paragraph { + children: vec![text_a], + position: None, + }); + let actual = to_markdown(¶graph, &Default::default()).unwrap(); + assert_eq!(actual, "!\\[]\\(a.jpg)"); + } + + #[test] + fn it_will_strong() { + let text_a = Node::Text(Text { + value: String::from("a"), + position: None, + }); + + let text_b = Node::Text(Text { + value: String::from("b"), + position: None, + }); + let strong = Node::Strong(Strong { + children: vec![text_a, text_b], + position: None, + }); + let actual = to_markdown(&strong, &Default::default()).unwrap(); + assert_eq!(actual, "**ab**"); + } } diff --git a/mdast_util_to_markdown/src/to_markdown.rs b/mdast_util_to_markdown/src/state.rs similarity index 54% rename from mdast_util_to_markdown/src/to_markdown.rs rename to mdast_util_to_markdown/src/state.rs index 3cb0e885..1a9c3ecf 100644 --- a/mdast_util_to_markdown/src/to_markdown.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -1,6 +1,15 @@ +use crate::construct_name::ConstructName; +use crate::handle::strong::peek_strong; +use crate::handle::Handle; use crate::{ parents::Parent, - r#unsafe::{Construct, Unsafe}, + r#unsafe::Unsafe, + util::{ + format_code_as_indented::format_code_as_indented, + format_heading_as_setext::format_heading_as_setext, + pattern_in_scope::pattern_in_scope, + safe::{escape_backslashes, EscapeInfos, SafeConfig}, + }, }; use alloc::{ collections::BTreeMap, @@ -8,29 +17,16 @@ use alloc::{ string::{String, ToString}, vec::Vec, }; -use markdown::mdast::{Node, Paragraph, Root, Strong, Text}; +use markdown::mdast::Node; use regex::Regex; -use crate::ConstructName; - -trait PeekNode { - // TODO make it take a reference to the state options - fn handle_peek(&self) -> String; -} - -impl PeekNode for Strong { - fn handle_peek(&self) -> String { - "*".into() - } -} - enum Join { Number(usize), Bool(bool), } #[allow(dead_code)] -struct State<'a> { +pub struct State<'a> { stack: Vec, // We use i64 for index_stack because -1 is used to mark the absense of children. // We don't use index_stack values to index into any child. @@ -39,42 +35,20 @@ struct State<'a> { r#unsafe: Vec>, } -#[allow(dead_code)] -struct Info<'a> { - before: &'a str, - after: &'a str, +pub struct Info<'a> { + pub before: &'a str, + pub after: &'a str, } impl<'a> Info<'a> { - fn new(before: &'a str, after: &'a str) -> Self { + pub fn new(before: &'a str, after: &'a str) -> Self { Info { before, after } } } #[allow(dead_code)] -struct SafeConfig<'a> { - before: &'a str, - after: &'a str, - encode: Option>, -} - -impl<'a> SafeConfig<'a> { - fn new(before: Option<&'a str>, after: Option<&'a str>, encode: Option>) -> Self { - SafeConfig { - before: before.unwrap_or(""), - after: after.unwrap_or(""), - encode, - } - } -} - -struct EscapeInfos { - before: bool, - after: bool, -} - impl<'a> State<'a> { - fn new() -> Self { + pub fn new() -> Self { State { stack: Vec::new(), index_stack: Vec::new(), @@ -83,67 +57,28 @@ impl<'a> State<'a> { } } - fn enter(&mut self, name: ConstructName) { + pub fn enter(&mut self, name: ConstructName) { self.stack.push(name); } - fn exit(&mut self) { + pub fn exit(&mut self) { self.stack.pop(); } - fn handle(&mut self, node: &Node, info: &Info) -> String { + pub fn handle(&mut self, node: &Node, info: &Info) -> Result { match node { - Node::Root(root) => self.handle_root(root, info), - Node::Paragraph(paragraph) => self.handle_paragraph(paragraph, info), - Node::Text(text) => self.handle_text(text, info), - Node::Strong(strong) => self.handle_strong(strong, info), + Node::Paragraph(paragraph) => paragraph.handle(self, info), + Node::Text(text) => text.handle(self, info), + Node::Strong(strong) => strong.handle(self, info), _ => panic!("Not handled yet"), } } - fn handle_root(&mut self, node: &Root, info: &Info) -> String { - self.container_flow(node, info) - } - - fn handle_paragraph(&mut self, node: &Paragraph, info: &Info) -> String { - self.enter(ConstructName::Paragraph); - - self.enter(ConstructName::Phrasing); - let value = self.container_phrasing(node, info); - // exit phrasing - self.exit(); - // exit paragarph - self.exit(); - value - } - - fn handle_text(&mut self, text: &Text, info: &Info) -> String { - self.safe( - &text.value, - &SafeConfig::new(Some(info.before), Some(info.after), None), - ) - } - - fn handle_strong(&mut self, node: &Strong, info: &Info) -> String { - let marker = check_strong(self); - - self.enter(ConstructName::Strong); + //fn handle_root(&mut self, node: &Root, info: &Info) -> String { + // self.container_flow(node, info) + //} - let mut value = format!( - "{}{}{}", - marker, - marker, - self.container_phrasing(node, info) - ); - value.push(marker); - value.push(marker); - - self.exit(); - - value - } - - fn safe(&mut self, input: &String, config: &SafeConfig) -> String { + pub fn safe(&mut self, input: &String, config: &SafeConfig) -> String { let value = format!("{}{}{}", config.before, input, config.after); let mut positions: Vec = Vec::new(); let mut result: String = String::new(); @@ -248,7 +183,7 @@ impl<'a> State<'a> { } fn compile_pattern(pattern: &mut Unsafe) { - if !pattern.is_compiled() { + if pattern.compiled.is_none() { let before = if pattern.at_break.unwrap_or(false) { r"[\\r\\n][\\t ]*" } else { @@ -287,7 +222,11 @@ impl<'a> State<'a> { } } - fn container_phrasing(&mut self, parent: &T, info: &Info) -> String { + pub fn container_phrasing( + &mut self, + parent: &T, + info: &Info, + ) -> Result { let mut results: String = String::new(); let mut children_iter = parent.children().iter().peekable(); let mut index = 0; @@ -300,10 +239,10 @@ impl<'a> State<'a> { } let after = if let Some(child) = children_iter.peek() { - match Self::determine_first_char(child) { + match self.determine_first_char(child) { Some(after_char) => after_char, None => self - .handle(child, &Info::new("", "")) + .handle(child, &Info::new("", ""))? .chars() .nth(0) .unwrap_or_default() @@ -314,12 +253,12 @@ impl<'a> State<'a> { }; if results.is_empty() { - results.push_str(&self.handle(child, &Info::new(info.before, after.as_ref()))); + results.push_str(&self.handle(child, &Info::new(info.before, after.as_ref()))?); } else { results.push_str(&self.handle( child, &Info::new(&results[results.len() - 1..], after.as_ref()), - )); + )?); } index += 1; @@ -327,17 +266,17 @@ impl<'a> State<'a> { self.index_stack.pop(); - results + Ok(results) } - fn determine_first_char(node: &Node) -> Option { + fn determine_first_char(&self, node: &Node) -> Option { match node { - Node::Strong(strong) => Some(strong.handle_peek()), + Node::Strong(_) => Some(peek_strong(self)), _ => None, } } - fn container_flow(&mut self, parent: &T, _info: &Info) -> String { + fn container_flow(&mut self, parent: &T, _info: &Info) -> Result { let mut results: String = String::new(); let mut children_iter = parent.children().iter().peekable(); let mut index = 0; @@ -353,7 +292,7 @@ impl<'a> State<'a> { self.bullet_last_used = None; } - results.push_str(&self.handle(child, &Info::new("\n", "\n"))); + results.push_str(&self.handle(child, &Info::new("\n", "\n"))?); if let Some(next_child) = children_iter.peek() { self.set_between(child, next_child, parent, &mut results); @@ -364,7 +303,7 @@ impl<'a> State<'a> { self.index_stack.pop(); - results + Ok(results) } fn set_between(&self, left: &Node, right: &Node, parent: &T, results: &mut String) { @@ -397,7 +336,7 @@ impl<'a> State<'a> { if let Some(spread) = parent.spreadable() { if matches!(left, Node::Paragraph(_)) && Self::matches((left, right)) || matches!(right, Node::Definition(_)) - || format_heading_as_settext(right, self) + || format_heading_as_setext(right, self) { return None; } @@ -426,189 +365,3 @@ impl<'a> State<'a> { ) } } - -fn check_strong(_state: &State) -> char { - '*' -} - -fn escape_backslashes(value: &str, after: &str) -> String { - let expression = Regex::new(r"\\[!-/:-@\[-`{-~]").unwrap(); - let mut results: String = String::new(); - let whole = format!("{}{}", value, after); - - let positions: Vec = expression.find_iter(&whole).map(|m| m.start()).collect(); - let mut start = 0; - - for position in &positions { - if start != *position { - results.push_str(&value[start..*position]); - } - - results.push('\\'); - - start = *position; - } - - results.push_str(&value[start..]); - - results -} - -fn pattern_in_scope(stack: &[ConstructName], pattern: &Unsafe) -> bool { - list_in_scope(stack, &pattern.in_construct, true) - && !list_in_scope(stack, &pattern.not_in_construct, false) -} - -// This could use a better name -fn list_in_scope(stack: &[ConstructName], list: &Option, none: bool) -> bool { - let Some(list) = list else { - return none; - }; - match list { - Construct::Single(construct_name) => { - if stack.contains(construct_name) { - return true; - } - - false - } - Construct::List(constructs_names) => { - if constructs_names.is_empty() { - return none; - } - - for construct_name in constructs_names { - if stack.contains(construct_name) { - return true; - } - } - - false - } - } -} - -fn format_code_as_indented(node: &Node, _state: &State) -> bool { - if let Node::Code(code) = node { - let white_space = Regex::new(r"[^ \r\n]").unwrap(); - let blank = Regex::new(r"^[\t ]*(?:[\r\n]|$)|(?:^|[\r\n])[\t ]*$").unwrap(); - - return !code.value.is_empty() - && code.lang.is_none() - && white_space.is_match(&code.value) - && !blank.is_match(&code.value); - } - - false -} - -fn format_heading_as_settext(node: &Node, _state: &State) -> bool { - if let Node::Heading(heading) = node { - let line_break = Regex::new(r"\r?\n|\r").unwrap(); - let mut literal_with_break = false; - for child in &heading.children { - if include_literal_with_break(child, &line_break) { - literal_with_break = true; - break; - } - } - - return heading.depth == 0 - || heading.depth < 3 && !node.to_string().is_empty() && literal_with_break; - } - - false -} - -fn include_literal_with_break(node: &Node, regex: &Regex) -> bool { - match node { - Node::Break(_) => true, - Node::MdxjsEsm(x) => regex.is_match(&x.value), - Node::Toml(x) => regex.is_match(&x.value), - Node::Yaml(x) => regex.is_match(&x.value), - Node::InlineCode(x) => regex.is_match(&x.value), - Node::InlineMath(x) => regex.is_match(&x.value), - Node::MdxTextExpression(x) => regex.is_match(&x.value), - Node::Html(x) => regex.is_match(&x.value), - Node::Text(x) => regex.is_match(&x.value), - Node::Code(x) => regex.is_match(&x.value), - Node::Math(x) => regex.is_match(&x.value), - Node::MdxFlowExpression(x) => regex.is_match(&x.value), - _ => { - if let Some(children) = node.children() { - for child in children { - if include_literal_with_break(child, regex) { - return true; - } - } - } - - false - } - } -} - -pub fn serialize(tree: &Node) -> String { - let mut state = State::new(); - let result = state.handle(tree, &Info::new("\n", "\n")); - result -} - -#[cfg(test)] -mod init_tests { - use super::*; - use alloc::{string::String, vec}; - - use markdown::mdast::{Node, Paragraph, Text}; - - #[test] - fn it_works_for_simple_text() { - let text_a = Node::Text(Text { - value: String::from("a"), - position: None, - }); - let text_b = Node::Text(Text { - value: String::from("b"), - position: None, - }); - let paragraph = Node::Paragraph(Paragraph { - children: vec![text_a, text_b], - position: None, - }); - let actual = serialize(¶graph); - assert_eq!(actual, String::from("ab")); - } - - #[test] - fn it_escape() { - let text_a = Node::Text(Text { - value: String::from("![](a.jpg)"), - position: None, - }); - let paragraph = Node::Paragraph(Paragraph { - children: vec![text_a], - position: None, - }); - let actual = serialize(¶graph); - assert_eq!(actual, "!\\[]\\(a.jpg)"); - } - - #[test] - fn it_will_strong() { - let text_a = Node::Text(Text { - value: String::from("a"), - position: None, - }); - - let text_b = Node::Text(Text { - value: String::from("b"), - position: None, - }); - let strong = Node::Strong(Strong { - children: vec![text_a, text_b], - position: None, - }); - let actual = serialize(&strong); - assert_eq!(actual, "**ab**"); - } -} diff --git a/mdast_util_to_markdown/src/unsafe.rs b/mdast_util_to_markdown/src/unsafe.rs index f0130bb7..e750a3f0 100644 --- a/mdast_util_to_markdown/src/unsafe.rs +++ b/mdast_util_to_markdown/src/unsafe.rs @@ -1,8 +1,9 @@ use alloc::{vec, vec::Vec}; use regex::Regex; -use crate::ConstructName; +use crate::construct_name::ConstructName; +#[derive(Default)] pub struct Unsafe<'a> { pub character: &'a str, pub in_construct: Option, @@ -20,7 +21,7 @@ pub enum Construct { } impl<'a> Unsafe<'a> { - fn new( + pub fn new( character: &'a str, before: Option<&'a str>, after: Option<&'a str>, @@ -309,10 +310,6 @@ impl<'a> Unsafe<'a> { ] } - pub(crate) fn is_compiled(&self) -> bool { - self.compiled.is_some() - } - pub(crate) fn set_compiled(&mut self, regex_pattern: Regex) { self.compiled = Some(regex_pattern); } diff --git a/mdast_util_to_markdown/src/util/check_strong.rs b/mdast_util_to_markdown/src/util/check_strong.rs new file mode 100644 index 00000000..67eddb0e --- /dev/null +++ b/mdast_util_to_markdown/src/util/check_strong.rs @@ -0,0 +1,7 @@ +use alloc::string::String; + +use crate::state::State; + +pub fn check_strong(_state: &State) -> Result { + Ok('*') +} diff --git a/mdast_util_to_markdown/src/util/format_code_as_indented.rs b/mdast_util_to_markdown/src/util/format_code_as_indented.rs new file mode 100644 index 00000000..9167052f --- /dev/null +++ b/mdast_util_to_markdown/src/util/format_code_as_indented.rs @@ -0,0 +1,18 @@ +use markdown::mdast::Node; +use regex::Regex; + +use crate::state::State; + +pub fn format_code_as_indented(node: &Node, _state: &State) -> bool { + if let Node::Code(code) = node { + let white_space = Regex::new(r"[^ \r\n]").unwrap(); + let blank = Regex::new(r"^[\t ]*(?:[\r\n]|$)|(?:^|[\r\n])[\t ]*$").unwrap(); + + return !code.value.is_empty() + && code.lang.is_none() + && white_space.is_match(&code.value) + && !blank.is_match(&code.value); + } + + false +} diff --git a/mdast_util_to_markdown/src/util/format_heading_as_setext.rs b/mdast_util_to_markdown/src/util/format_heading_as_setext.rs new file mode 100644 index 00000000..ce7431a9 --- /dev/null +++ b/mdast_util_to_markdown/src/util/format_heading_as_setext.rs @@ -0,0 +1,51 @@ +use alloc::string::ToString; +use markdown::mdast::Node; +use regex::Regex; + +use crate::state::State; + +pub fn format_heading_as_setext(node: &Node, _state: &State) -> bool { + if let Node::Heading(heading) = node { + let line_break = Regex::new(r"\r?\n|\r").unwrap(); + let mut literal_with_break = false; + for child in &heading.children { + if include_literal_with_break(child, &line_break) { + literal_with_break = true; + break; + } + } + + return heading.depth == 0 + || heading.depth < 3 && !node.to_string().is_empty() && literal_with_break; + } + + false +} + +fn include_literal_with_break(node: &Node, regex: &Regex) -> bool { + match node { + Node::Break(_) => true, + Node::MdxjsEsm(x) => regex.is_match(&x.value), + Node::Toml(x) => regex.is_match(&x.value), + Node::Yaml(x) => regex.is_match(&x.value), + Node::InlineCode(x) => regex.is_match(&x.value), + Node::InlineMath(x) => regex.is_match(&x.value), + Node::MdxTextExpression(x) => regex.is_match(&x.value), + Node::Html(x) => regex.is_match(&x.value), + Node::Text(x) => regex.is_match(&x.value), + Node::Code(x) => regex.is_match(&x.value), + Node::Math(x) => regex.is_match(&x.value), + Node::MdxFlowExpression(x) => regex.is_match(&x.value), + _ => { + if let Some(children) = node.children() { + for child in children { + if include_literal_with_break(child, regex) { + return true; + } + } + } + + false + } + } +} diff --git a/mdast_util_to_markdown/src/util/mod.rs b/mdast_util_to_markdown/src/util/mod.rs new file mode 100644 index 00000000..6d369fa6 --- /dev/null +++ b/mdast_util_to_markdown/src/util/mod.rs @@ -0,0 +1,5 @@ +pub mod check_strong; +pub mod format_code_as_indented; +pub mod format_heading_as_setext; +pub mod pattern_in_scope; +pub mod safe; diff --git a/mdast_util_to_markdown/src/util/pattern_in_scope.rs b/mdast_util_to_markdown/src/util/pattern_in_scope.rs new file mode 100644 index 00000000..8debfa01 --- /dev/null +++ b/mdast_util_to_markdown/src/util/pattern_in_scope.rs @@ -0,0 +1,37 @@ +use crate::{ + construct_name::ConstructName, + r#unsafe::{Construct, Unsafe}, +}; + +pub fn pattern_in_scope(stack: &[ConstructName], pattern: &Unsafe) -> bool { + list_in_scope(stack, &pattern.in_construct, true) + && !list_in_scope(stack, &pattern.not_in_construct, false) +} + +fn list_in_scope(stack: &[ConstructName], list: &Option, none: bool) -> bool { + let Some(list) = list else { + return none; + }; + match list { + Construct::Single(construct_name) => { + if stack.contains(construct_name) { + return true; + } + + false + } + Construct::List(constructs_names) => { + if constructs_names.is_empty() { + return none; + } + + for construct_name in constructs_names { + if stack.contains(construct_name) { + return true; + } + } + + false + } + } +} diff --git a/mdast_util_to_markdown/src/util/safe.rs b/mdast_util_to_markdown/src/util/safe.rs new file mode 100644 index 00000000..6c5d106a --- /dev/null +++ b/mdast_util_to_markdown/src/util/safe.rs @@ -0,0 +1,50 @@ +use alloc::{format, string::String, vec::Vec}; +use regex::Regex; + +pub struct SafeConfig<'a> { + pub before: &'a str, + pub after: &'a str, + pub encode: Option>, +} + +impl<'a> SafeConfig<'a> { + pub(crate) fn new( + before: Option<&'a str>, + after: Option<&'a str>, + encode: Option>, + ) -> Self { + SafeConfig { + before: before.unwrap_or(""), + after: after.unwrap_or(""), + encode, + } + } +} + +pub struct EscapeInfos { + pub before: bool, + pub after: bool, +} + +pub fn escape_backslashes(value: &str, after: &str) -> String { + let expression = Regex::new(r"\\[!-/:-@\[-`{-~]").unwrap(); + let mut results: String = String::new(); + let whole = format!("{}{}", value, after); + + let positions: Vec = expression.find_iter(&whole).map(|m| m.start()).collect(); + let mut start = 0; + + for position in &positions { + if start != *position { + results.push_str(&value[start..*position]); + } + + results.push('\\'); + + start = *position; + } + + results.push_str(&value[start..]); + + results +} From 98243d389c4386b76be11d2790767b31e2fe8113 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 4 Sep 2024 18:49:41 +0300 Subject: [PATCH 19/73] Add support with tests for strong, paragraph and text --- Cargo.toml | 11 +- mdast_util_to_markdown/Cargo.toml | 3 + mdast_util_to_markdown/src/handle/mod.rs | 5 +- .../src/handle/paragraph.rs | 6 +- mdast_util_to_markdown/src/handle/strong.rs | 11 +- mdast_util_to_markdown/src/handle/text.rs | 6 +- mdast_util_to_markdown/src/lib.rs | 73 +++--------- mdast_util_to_markdown/src/message.rs | 24 ++++ mdast_util_to_markdown/src/state.rs | 99 ++++++++-------- mdast_util_to_markdown/src/unsafe.rs | 29 ++--- .../src/util/check_strong.rs | 19 ++- .../src/util/pattern_in_scope.rs | 8 +- mdast_util_to_markdown/src/util/safe.rs | 4 +- mdast_util_to_markdown/tests/paragraph.rs | 108 ++++++++++++++++++ mdast_util_to_markdown/tests/strong.rs | 66 +++++++++++ mdast_util_to_markdown/tests/text.rs | 27 +++++ 16 files changed, 342 insertions(+), 157 deletions(-) create mode 100644 mdast_util_to_markdown/src/message.rs create mode 100644 mdast_util_to_markdown/tests/paragraph.rs create mode 100644 mdast_util_to_markdown/tests/strong.rs create mode 100644 mdast_util_to_markdown/tests/text.rs diff --git a/Cargo.toml b/Cargo.toml index c8165337..b6a0c4cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,12 @@ keywords = ["commonmark", "markdown", "parse", "render", "tokenize"] categories = ["compilers", "encoding", "parser-implementations", "parsing", "text-processing"] include = ["src/", "license"] +[workspace] +members = ["generate", "mdast_util_to_markdown"] + +[workspace.dependencies] +pretty_assertions = "1" + [[bench]] name = "bench" path = "benches/bench.rs" @@ -31,13 +37,10 @@ serde = { version = "1", features = ["derive"], optional = true } [dev-dependencies] env_logger = "0.11" criterion = "0.5" -pretty_assertions = "1" +pretty_assertions = { workspace = true } swc_core = { version = "0.100", features = [ "ecma_ast", "ecma_visit", "ecma_parser", "common", ] } - -[workspace] -members = ["generate", "mdast_util_to_markdown"] diff --git a/mdast_util_to_markdown/Cargo.toml b/mdast_util_to_markdown/Cargo.toml index 89d0b425..faf3bccb 100644 --- a/mdast_util_to_markdown/Cargo.toml +++ b/mdast_util_to_markdown/Cargo.toml @@ -7,3 +7,6 @@ license = "MIT" [dependencies] markdown = { path = "../" } regex = { version = "1.7.3" } + +[dev-dependencies] +pretty_assertions = { workspace = true } diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index 5834e001..6027e0c0 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -1,4 +1,4 @@ -use crate::{state::Info, State}; +use crate::{message::Message, state::Info, State}; use alloc::string::String; mod paragraph; @@ -6,6 +6,5 @@ pub mod strong; mod text; pub trait Handle { - type Error; - fn handle(&self, state: &mut State, info: &Info) -> Result; + fn handle(&self, state: &mut State, info: &Info) -> Result; } diff --git a/mdast_util_to_markdown/src/handle/paragraph.rs b/mdast_util_to_markdown/src/handle/paragraph.rs index 792950b9..5b00bec6 100644 --- a/mdast_util_to_markdown/src/handle/paragraph.rs +++ b/mdast_util_to_markdown/src/handle/paragraph.rs @@ -1,17 +1,15 @@ -use alloc::string::String; use markdown::mdast::Paragraph; use crate::{ construct_name::ConstructName, + message::Message, state::{Info, State}, }; use super::Handle; impl Handle for Paragraph { - type Error = String; - - fn handle(&self, state: &mut State, info: &Info) -> Result { + fn handle(&self, state: &mut State, info: &Info) -> Result { state.enter(ConstructName::Paragraph); state.enter(ConstructName::Phrasing); diff --git a/mdast_util_to_markdown/src/handle/strong.rs b/mdast_util_to_markdown/src/handle/strong.rs index e2580c18..9995bd6d 100644 --- a/mdast_util_to_markdown/src/handle/strong.rs +++ b/mdast_util_to_markdown/src/handle/strong.rs @@ -1,8 +1,9 @@ -use alloc::{format, string::String}; +use alloc::format; use markdown::mdast::Strong; use crate::{ construct_name::ConstructName, + message::Message, state::{Info, State}, util::check_strong::check_strong, }; @@ -10,9 +11,7 @@ use crate::{ use super::Handle; impl Handle for Strong { - type Error = String; - - fn handle(&self, state: &mut State, info: &Info) -> Result { + fn handle(&self, state: &mut State, info: &Info) -> Result { let marker = check_strong(state)?; state.enter(ConstructName::Strong); @@ -32,6 +31,6 @@ impl Handle for Strong { } } -pub fn peek_strong(_state: &State) -> String { - "*".into() +pub fn peek_strong(_state: &State) -> char { + '*' } diff --git a/mdast_util_to_markdown/src/handle/text.rs b/mdast_util_to_markdown/src/handle/text.rs index 1b7c41b4..cada9306 100644 --- a/mdast_util_to_markdown/src/handle/text.rs +++ b/mdast_util_to_markdown/src/handle/text.rs @@ -1,7 +1,7 @@ -use alloc::string::String; use markdown::mdast::Text; use crate::{ + message::Message, state::{Info, State}, util::safe::SafeConfig, }; @@ -9,9 +9,7 @@ use crate::{ use super::Handle; impl Handle for Text { - type Error = String; - - fn handle(&self, state: &mut State, info: &Info) -> Result { + fn handle(&self, state: &mut State, info: &Info) -> Result { Ok(state.safe( &self.value, &SafeConfig::new(Some(info.before), Some(info.after), None), diff --git a/mdast_util_to_markdown/src/lib.rs b/mdast_util_to_markdown/src/lib.rs index 8d4be12f..522dccb8 100644 --- a/mdast_util_to_markdown/src/lib.rs +++ b/mdast_util_to_markdown/src/lib.rs @@ -3,78 +3,31 @@ use alloc::string::String; pub use configure::Options; use markdown::mdast::Node; +use message::Message; use state::{Info, State}; extern crate alloc; mod configure; mod construct_name; mod handle; +pub mod message; mod parents; mod state; mod r#unsafe; mod util; -pub fn to_markdown(tree: &Node, _options: &Options) -> Result { - let mut state = State::new(); - let result = state.handle(tree, &Info::new("\n", "\n"))?; - Ok(result) +pub fn to_markdown(tree: &Node) -> Result { + to_markdown_with_options(tree, &Options::default()) } -#[cfg(test)] -mod init_tests { - use super::*; - use alloc::{string::String, vec}; - - use markdown::mdast::{Node, Paragraph, Strong, Text}; - - #[test] - fn it_works_for_simple_text() { - let text_a = Node::Text(Text { - value: String::from("a"), - position: None, - }); - let text_b = Node::Text(Text { - value: String::from("b"), - position: None, - }); - let paragraph = Node::Paragraph(Paragraph { - children: vec![text_a, text_b], - position: None, - }); - let actual = to_markdown(¶graph, &Default::default()).unwrap(); - assert_eq!(actual, String::from("ab")); - } - - #[test] - fn it_escape() { - let text_a = Node::Text(Text { - value: String::from("![](a.jpg)"), - position: None, - }); - let paragraph = Node::Paragraph(Paragraph { - children: vec![text_a], - position: None, - }); - let actual = to_markdown(¶graph, &Default::default()).unwrap(); - assert_eq!(actual, "!\\[]\\(a.jpg)"); - } - - #[test] - fn it_will_strong() { - let text_a = Node::Text(Text { - value: String::from("a"), - position: None, - }); - - let text_b = Node::Text(Text { - value: String::from("b"), - position: None, - }); - let strong = Node::Strong(Strong { - children: vec![text_a, text_b], - position: None, - }); - let actual = to_markdown(&strong, &Default::default()).unwrap(); - assert_eq!(actual, "**ab**"); +pub fn to_markdown_with_options(tree: &Node, options: &Options) -> Result { + let mut state = State::new(options); + let mut result = state.handle(tree, &Info::new("\n", "\n"))?; + if !result.is_empty() { + let last_char = result.chars().last().unwrap(); + if last_char != '\n' && last_char != '\r' { + result.push('\n'); + } } + Ok(result) } diff --git a/mdast_util_to_markdown/src/message.rs b/mdast_util_to_markdown/src/message.rs new file mode 100644 index 00000000..c3f39447 --- /dev/null +++ b/mdast_util_to_markdown/src/message.rs @@ -0,0 +1,24 @@ +use core::{error::Error, fmt::Display}; + +use alloc::string::{String, ToString}; + +#[derive(Debug, PartialEq)] +pub struct Message { + pub reason: String, +} + +impl Error for Message {} + +impl Display for Message { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{}", self.reason) + } +} + +impl From<&str> for Message { + fn from(value: &str) -> Self { + Message { + reason: value.to_string(), + } + } +} diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 1a9c3ecf..348a708f 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -1,6 +1,8 @@ use crate::construct_name::ConstructName; use crate::handle::strong::peek_strong; use crate::handle::Handle; +use crate::message::Message; +use crate::Options; use crate::{ parents::Parent, r#unsafe::Unsafe, @@ -11,12 +13,7 @@ use crate::{ safe::{escape_backslashes, EscapeInfos, SafeConfig}, }, }; -use alloc::{ - collections::BTreeMap, - format, - string::{String, ToString}, - vec::Vec, -}; +use alloc::{collections::BTreeMap, format, string::String, vec::Vec}; use markdown::mdast::Node; use regex::Regex; @@ -33,6 +30,7 @@ pub struct State<'a> { index_stack: Vec, bullet_last_used: Option, r#unsafe: Vec>, + pub options: &'a Options, } pub struct Info<'a> { @@ -48,12 +46,13 @@ impl<'a> Info<'a> { #[allow(dead_code)] impl<'a> State<'a> { - pub fn new() -> Self { + pub fn new(options: &'a Options) -> Self { State { stack: Vec::new(), index_stack: Vec::new(), bullet_last_used: None, r#unsafe: Unsafe::get_default_unsafe(), + options, } } @@ -65,19 +64,17 @@ impl<'a> State<'a> { self.stack.pop(); } - pub fn handle(&mut self, node: &Node, info: &Info) -> Result { + pub fn handle(&mut self, node: &Node, info: &Info) -> Result { match node { Node::Paragraph(paragraph) => paragraph.handle(self, info), Node::Text(text) => text.handle(self, info), Node::Strong(strong) => strong.handle(self, info), - _ => panic!("Not handled yet"), + _ => Err(Message { + reason: "Cannot handle node".into(), + }), } } - //fn handle_root(&mut self, node: &Root, info: &Info) -> String { - // self.container_flow(node, info) - //} - pub fn safe(&mut self, input: &String, config: &SafeConfig) -> String { let value = format!("{}{}{}", config.before, input, config.after); let mut positions: Vec = Vec::new(); @@ -159,22 +156,19 @@ impl<'a> State<'a> { start = *position; - let char_match = Regex::new(r"[!-/:-@\[-{-~]").unwrap(); - if let Some(char_at_pos) = char_match.find_at(&value, *position).iter().next() { - match &config.encode { - Some(encode) => { - if encode.contains(&char_at_pos.as_str()) { - result.push('\\'); - } - } - None => result.push('\\'), + let char_at_pos = value.chars().nth(*position); + match char_at_pos { + Some('!'..='/') | Some(':'..='@') | Some('['..='`') | Some('{'..='~') => { + Self::encode(config, char_at_pos, &mut result) } - } else if let Some(character) = value.chars().nth(*position) { - let code = u32::from(character); - let hex_string = format!("{:X}", code); - result.push_str(&format!("&#x{};", hex_string)); - start += 1; - } + Some(character) => { + let code = u32::from(character); + let hex_string = format!("{:X}", code); + result.push_str(&format!("&#x{};", hex_string)); + start += 1; + } + _ => (), + }; } result.push_str(&escape_backslashes(&value[start..end], config.after)); @@ -182,10 +176,21 @@ impl<'a> State<'a> { result } + fn encode(config: &SafeConfig, char_at_pos: Option, result: &mut String) { + match &config.encode { + Some(encode) => { + if encode.contains(&char_at_pos.unwrap()) { + result.push('\\'); + } + } + None => result.push('\\'), + } + } + fn compile_pattern(pattern: &mut Unsafe) { if pattern.compiled.is_none() { let before = if pattern.at_break.unwrap_or(false) { - r"[\\r\\n][\\t ]*" + "[\\r\\n][\\t ]*" } else { "" }; @@ -212,7 +217,7 @@ impl<'a> State<'a> { .unwrap() .is_match(pattern.character) { - r"\" + "\\" } else { "" }; @@ -226,7 +231,7 @@ impl<'a> State<'a> { &mut self, parent: &T, info: &Info, - ) -> Result { + ) -> Result { let mut results: String = String::new(); let mut children_iter = parent.children().iter().peekable(); let mut index = 0; @@ -238,29 +243,25 @@ impl<'a> State<'a> { *top = index; } - let after = if let Some(child) = children_iter.peek() { - match self.determine_first_char(child) { - Some(after_char) => after_char, - None => self - .handle(child, &Info::new("", ""))? + let mut new_info = Info::new(info.before, info.after); + let mut buffer = [0u8; 4]; + if let Some(child) = children_iter.peek() { + if let Some(first_char) = self.determine_first_char(child) { + new_info.after = first_char.encode_utf8(&mut buffer); + } else { + self.handle(child, &Info::new("", ""))? .chars() .nth(0) .unwrap_or_default() - .to_string(), + .encode_utf8(&mut buffer); } - } else { - String::from(info.after) - }; + } - if results.is_empty() { - results.push_str(&self.handle(child, &Info::new(info.before, after.as_ref()))?); - } else { - results.push_str(&self.handle( - child, - &Info::new(&results[results.len() - 1..], after.as_ref()), - )?); + if !results.is_empty() { + new_info.before = &results[results.len() - 1..]; } + results.push_str(&self.handle(child, &new_info)?); index += 1; } @@ -269,14 +270,14 @@ impl<'a> State<'a> { Ok(results) } - fn determine_first_char(&self, node: &Node) -> Option { + fn determine_first_char(&self, node: &Node) -> Option { match node { Node::Strong(_) => Some(peek_strong(self)), _ => None, } } - fn container_flow(&mut self, parent: &T, _info: &Info) -> Result { + fn container_flow(&mut self, parent: &T, _info: &Info) -> Result { let mut results: String = String::new(); let mut children_iter = parent.children().iter().peekable(); let mut index = 0; diff --git a/mdast_util_to_markdown/src/unsafe.rs b/mdast_util_to_markdown/src/unsafe.rs index e750a3f0..6e1ef2e5 100644 --- a/mdast_util_to_markdown/src/unsafe.rs +++ b/mdast_util_to_markdown/src/unsafe.rs @@ -5,6 +5,7 @@ use crate::construct_name::ConstructName; #[derive(Default)] pub struct Unsafe<'a> { + // TODO this could be a char pub character: &'a str, pub in_construct: Option, pub not_in_construct: Option, @@ -54,14 +55,14 @@ impl<'a> Unsafe<'a> { Self::new( "\t", None, - r"[\\r\\n]".into(), + "[\\r\\n]".into(), Construct::Single(ConstructName::Phrasing).into(), None, None, ), Self::new( "\t", - r"[\\r\\n]".into(), + "[\\r\\n]".into(), None, Construct::Single(ConstructName::Phrasing).into(), None, @@ -114,14 +115,14 @@ impl<'a> Unsafe<'a> { Self::new( " ", None, - r"[\\r\\n]".into(), + "[\\r\\n]".into(), Construct::Single(ConstructName::Phrasing).into(), None, None, ), Self::new( " ", - r"[\\r\\n]".into(), + "[\\r\\n]".into(), None, Construct::Single(ConstructName::Phrasing).into(), None, @@ -142,7 +143,7 @@ impl<'a> Unsafe<'a> { Self::new( "!", None, - r"\[".into(), + "\\[".into(), Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), None, @@ -159,7 +160,7 @@ impl<'a> Unsafe<'a> { Self::new( "&", None, - r"[#A-Za-z]".into(), + "[#A-Za-z]".into(), Construct::Single(ConstructName::Phrasing).into(), None, None, @@ -182,13 +183,13 @@ impl<'a> Unsafe<'a> { ), Self::new( "(", - r"\]".into(), + "\\]".into(), None, Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), None, ), - Self::new(")", r"\d+".into(), None, None, None, Some(true)), + Self::new(")", "\\d+".into(), None, None, None, Some(true)), Self::new( ")", None, @@ -197,7 +198,7 @@ impl<'a> Unsafe<'a> { None, None, ), - Self::new("*", None, r"(?:[ \t\r\n*])".into(), None, None, Some(true)), + Self::new("*", None, "(?:[ \t\r\n*])".into(), None, None, Some(true)), Self::new( "*", None, @@ -206,17 +207,17 @@ impl<'a> Unsafe<'a> { Construct::List(full_phrasing_spans.clone()).into(), None, ), - Self::new("+", None, r"(?:[ \t\r\n])".into(), None, None, Some(true)), - Self::new("-", None, r"(?:[ \t\r\n-])".into(), None, None, Some(true)), + Self::new("+", None, "(?:[ \t\r\n])".into(), None, None, Some(true)), + Self::new("-", None, "(?:[ \t\r\n-])".into(), None, None, Some(true)), Self::new( ".", - r"\d+".into(), + "\\d+".into(), "(?:[ \t\r\n]|$)".into(), None, None, Some(true), ), - Self::new("<", None, r"[!/?A-Za-z]".into(), None, None, Some(true)), + Self::new("<", None, "[!/?A-Za-z]".into(), None, None, Some(true)), Self::new( "<", None, @@ -261,7 +262,7 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - r"\", + "\\", None, "[\\r\\n]".into(), Construct::Single(ConstructName::Phrasing).into(), diff --git a/mdast_util_to_markdown/src/util/check_strong.rs b/mdast_util_to_markdown/src/util/check_strong.rs index 67eddb0e..622ad94e 100644 --- a/mdast_util_to_markdown/src/util/check_strong.rs +++ b/mdast_util_to_markdown/src/util/check_strong.rs @@ -1,7 +1,18 @@ -use alloc::string::String; +use alloc::format; -use crate::state::State; +use crate::{message::Message, state::State}; -pub fn check_strong(_state: &State) -> Result { - Ok('*') +pub fn check_strong(state: &State) -> Result { + let marker = state.options.strong; + + if marker != '*' && marker != '_' { + return Err(Message { + reason: format!( + "Cannot serialize strong with `{}` for `options.strong`, expected `*`, or `_`", + marker + ), + }); + } + + Ok(marker) } diff --git a/mdast_util_to_markdown/src/util/pattern_in_scope.rs b/mdast_util_to_markdown/src/util/pattern_in_scope.rs index 8debfa01..0480b339 100644 --- a/mdast_util_to_markdown/src/util/pattern_in_scope.rs +++ b/mdast_util_to_markdown/src/util/pattern_in_scope.rs @@ -13,13 +13,7 @@ fn list_in_scope(stack: &[ConstructName], list: &Option, none: bool) return none; }; match list { - Construct::Single(construct_name) => { - if stack.contains(construct_name) { - return true; - } - - false - } + Construct::Single(construct_name) => stack.contains(construct_name), Construct::List(constructs_names) => { if constructs_names.is_empty() { return none; diff --git a/mdast_util_to_markdown/src/util/safe.rs b/mdast_util_to_markdown/src/util/safe.rs index 6c5d106a..9a3eaf7d 100644 --- a/mdast_util_to_markdown/src/util/safe.rs +++ b/mdast_util_to_markdown/src/util/safe.rs @@ -4,14 +4,14 @@ use regex::Regex; pub struct SafeConfig<'a> { pub before: &'a str, pub after: &'a str, - pub encode: Option>, + pub encode: Option>, } impl<'a> SafeConfig<'a> { pub(crate) fn new( before: Option<&'a str>, after: Option<&'a str>, - encode: Option>, + encode: Option>, ) -> Self { SafeConfig { before: before.unwrap_or(""), diff --git a/mdast_util_to_markdown/tests/paragraph.rs b/mdast_util_to_markdown/tests/paragraph.rs new file mode 100644 index 00000000..d0a7265e --- /dev/null +++ b/mdast_util_to_markdown/tests/paragraph.rs @@ -0,0 +1,108 @@ +use markdown::mdast::{Node, Paragraph, Text}; +use mdast_util_to_markdown::to_markdown as to; + +use pretty_assertions::assert_eq; + +#[test] +fn paragraph() { + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![], + position: None + })) + .unwrap(), + "", + "should support an empty paragraph" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + })], + position: None + })) + .unwrap(), + "a\nb\n", + "should support a paragraph" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from(" a"), + position: None + })], + position: None + })) + .unwrap(), + " a\n", + "should encode spaces at the start of paragraphs" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a "), + position: None + })], + position: None + })) + .unwrap(), + "a \n", + "should encode spaces at the end of paragraphs" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("\t\ta"), + position: None + })], + position: None + })) + .unwrap(), + " \ta\n", + "should encode tabs at the start of paragraphs" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\t\t"), + position: None + })], + position: None + })) + .unwrap(), + "a\t \n", + "should encode tabs at the end of paragraphs" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a \n b"), + position: None + })], + position: None + })) + .unwrap(), + "a \n b\n", + "should encode spaces around line endings in paragraphs" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\t\t\n\t\tb"), + position: None + })], + position: None + })) + .unwrap(), + "a\t \n \tb\n", + "should encode spaces around line endings in paragraphs" + ); +} diff --git a/mdast_util_to_markdown/tests/strong.rs b/mdast_util_to_markdown/tests/strong.rs new file mode 100644 index 00000000..77d094a5 --- /dev/null +++ b/mdast_util_to_markdown/tests/strong.rs @@ -0,0 +1,66 @@ +use markdown::mdast::{Node, Strong, Text}; +use mdast_util_to_markdown::to_markdown as to; +use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; + +use mdast_util_to_markdown::Options; +use pretty_assertions::assert_eq; + +#[test] +fn strong() { + assert_eq!( + to(&Node::Strong(Strong { + children: Vec::new(), + position: None + })) + .unwrap(), + "****\n", + "should support an empty strong" + ); + + assert_eq!( + to_md_with_opts( + &Node::Strong(Strong { + children: Vec::new(), + position: None + }), + &Options { + strong: '?', + ..Default::default() + } + ), + Err("Cannot serialize strong with `?` for `options.strong`, expected `*`, or `_`".into()), + "should throw on when given an incorrect `strong`" + ); + + assert_eq!( + to(&Node::Strong(Strong { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None, + })], + position: None + })) + .unwrap(), + "**a**\n", + "should support a strong w/ children" + ); + + assert_eq!( + to_md_with_opts( + &Node::Strong(Strong { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None, + })], + position: None + }), + &Options { + strong: '_', + ..Default::default() + } + ) + .unwrap(), + "__a__\n", + "should support a strong w/ children" + ); +} diff --git a/mdast_util_to_markdown/tests/text.rs b/mdast_util_to_markdown/tests/text.rs new file mode 100644 index 00000000..58270ed8 --- /dev/null +++ b/mdast_util_to_markdown/tests/text.rs @@ -0,0 +1,27 @@ +use markdown::mdast::{Node, Text}; +use mdast_util_to_markdown::to_markdown as to; + +use pretty_assertions::assert_eq; + +#[test] +fn text() { + assert_eq!( + to(&Node::Text(Text { + value: String::new(), + position: None, + })) + .unwrap(), + "", + "should support an empty text" + ); + + assert_eq!( + to(&Node::Text(Text { + value: String::from("a\nb"), + position: None, + })) + .unwrap(), + "a\nb\n", + "should support text" + ); +} From 8e2055a0827ff8b976e8c111cef555979c8ea586 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 4 Sep 2024 19:21:59 +0300 Subject: [PATCH 20/73] Add support for emphasis --- mdast_util_to_markdown/src/handle/emphasis.rs | 30 ++++++++ mdast_util_to_markdown/src/handle/mod.rs | 1 + mdast_util_to_markdown/src/handle/strong.rs | 4 +- mdast_util_to_markdown/src/lib.rs | 2 +- mdast_util_to_markdown/src/parents.rs | 4 +- mdast_util_to_markdown/src/state.rs | 8 ++- .../src/util/check_emphasis.rs | 18 +++++ mdast_util_to_markdown/src/util/mod.rs | 1 + mdast_util_to_markdown/tests/emphasis.rs | 70 +++++++++++++++++++ mdast_util_to_markdown/tests/strong.rs | 2 +- 10 files changed, 131 insertions(+), 9 deletions(-) create mode 100644 mdast_util_to_markdown/src/handle/emphasis.rs create mode 100644 mdast_util_to_markdown/src/util/check_emphasis.rs create mode 100644 mdast_util_to_markdown/tests/emphasis.rs diff --git a/mdast_util_to_markdown/src/handle/emphasis.rs b/mdast_util_to_markdown/src/handle/emphasis.rs new file mode 100644 index 00000000..febc40c4 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/emphasis.rs @@ -0,0 +1,30 @@ +use alloc::format; +use markdown::mdast::Emphasis; + +use crate::{ + construct_name::ConstructName, + message::Message, + state::{Info, State}, + util::check_emphasis::check_emphasis, +}; + +use super::Handle; + +impl Handle for Emphasis { + fn handle(&self, state: &mut State, info: &Info) -> Result { + let marker = check_emphasis(state)?; + + state.enter(ConstructName::Emphasis); + + let mut value = format!("{}{}", marker, state.container_phrasing(self, info)?); + value.push(marker); + + state.exit(); + + Ok(value) + } +} + +pub fn peek_emphasis(state: &State) -> char { + state.options.emphasis +} diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index 6027e0c0..d274478e 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -1,6 +1,7 @@ use crate::{message::Message, state::Info, State}; use alloc::string::String; +pub mod emphasis; mod paragraph; pub mod strong; mod text; diff --git a/mdast_util_to_markdown/src/handle/strong.rs b/mdast_util_to_markdown/src/handle/strong.rs index 9995bd6d..32a99b7b 100644 --- a/mdast_util_to_markdown/src/handle/strong.rs +++ b/mdast_util_to_markdown/src/handle/strong.rs @@ -31,6 +31,6 @@ impl Handle for Strong { } } -pub fn peek_strong(_state: &State) -> char { - '*' +pub fn peek_strong(state: &State) -> char { + state.options.strong } diff --git a/mdast_util_to_markdown/src/lib.rs b/mdast_util_to_markdown/src/lib.rs index 522dccb8..daefb10b 100644 --- a/mdast_util_to_markdown/src/lib.rs +++ b/mdast_util_to_markdown/src/lib.rs @@ -10,7 +10,7 @@ extern crate alloc; mod configure; mod construct_name; mod handle; -pub mod message; +mod message; mod parents; mod state; mod r#unsafe; diff --git a/mdast_util_to_markdown/src/parents.rs b/mdast_util_to_markdown/src/parents.rs index 7973c62f..9edf7458 100644 --- a/mdast_util_to_markdown/src/parents.rs +++ b/mdast_util_to_markdown/src/parents.rs @@ -1,5 +1,5 @@ use alloc::vec::Vec; -use markdown::mdast::{List, Node, Paragraph, Root, Strong}; +use markdown::mdast::{Emphasis, List, Node, Paragraph, Root, Strong}; pub trait Parent { fn children(&self) -> &Vec; @@ -29,4 +29,4 @@ macro_rules! impl_Parent { } } -impl_Parent!(for Root, Paragraph, Strong); +impl_Parent!(for Root, Paragraph, Strong, Emphasis); diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 348a708f..971540d5 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -1,4 +1,5 @@ use crate::construct_name::ConstructName; +use crate::handle::emphasis::peek_emphasis; use crate::handle::strong::peek_strong; use crate::handle::Handle; use crate::message::Message; @@ -69,9 +70,9 @@ impl<'a> State<'a> { Node::Paragraph(paragraph) => paragraph.handle(self, info), Node::Text(text) => text.handle(self, info), Node::Strong(strong) => strong.handle(self, info), - _ => Err(Message { - reason: "Cannot handle node".into(), - }), + Node::Strong(strong) => strong.handle(self, info), + Node::Emphasis(emphasis) => emphasis.handle(self, info), + _ => Err("Cannot handle node".into()), } } @@ -273,6 +274,7 @@ impl<'a> State<'a> { fn determine_first_char(&self, node: &Node) -> Option { match node { Node::Strong(_) => Some(peek_strong(self)), + Node::Emphasis(_) => Some(peek_emphasis(self)), _ => None, } } diff --git a/mdast_util_to_markdown/src/util/check_emphasis.rs b/mdast_util_to_markdown/src/util/check_emphasis.rs new file mode 100644 index 00000000..c8f7856e --- /dev/null +++ b/mdast_util_to_markdown/src/util/check_emphasis.rs @@ -0,0 +1,18 @@ +use alloc::format; + +use crate::{message::Message, state::State}; + +pub fn check_emphasis(state: &State) -> Result { + let marker = state.options.emphasis; + + if marker != '*' && marker != '_' { + return Err(Message { + reason: format!( + "Cannot serialize emphasis with `{}` for `options.emphasis`, expected `*`, or `_`", + marker + ), + }); + } + + Ok(marker) +} diff --git a/mdast_util_to_markdown/src/util/mod.rs b/mdast_util_to_markdown/src/util/mod.rs index 6d369fa6..bd5b4b14 100644 --- a/mdast_util_to_markdown/src/util/mod.rs +++ b/mdast_util_to_markdown/src/util/mod.rs @@ -1,3 +1,4 @@ +pub mod check_emphasis; pub mod check_strong; pub mod format_code_as_indented; pub mod format_heading_as_setext; diff --git a/mdast_util_to_markdown/tests/emphasis.rs b/mdast_util_to_markdown/tests/emphasis.rs new file mode 100644 index 00000000..be39cffc --- /dev/null +++ b/mdast_util_to_markdown/tests/emphasis.rs @@ -0,0 +1,70 @@ +use markdown::mdast::Emphasis; +use markdown::mdast::{Node, Text}; +use mdast_util_to_markdown::to_markdown as to; +use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; + +use mdast_util_to_markdown::Options; +use pretty_assertions::assert_eq; + +#[test] +fn emphasis() { + assert_eq!( + to(&Node::Emphasis(Emphasis { + children: Vec::new(), + position: None + })) + .unwrap(), + "**\n", + "should support an empty emphasis" + ); + + assert_eq!( + to_md_with_opts( + &Node::Emphasis(Emphasis { + children: Vec::new(), + position: None + }), + &Options { + emphasis: '?', + ..Default::default() + } + ), + Err( + "Cannot serialize emphasis with `?` for `options.emphasis`, expected `*`, or `_`" + .into() + ), + "should throw on when given an incorrect `emphasis`" + ); + + assert_eq!( + to(&Node::Emphasis(Emphasis { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None, + })], + position: None + })) + .unwrap(), + "*a*\n", + "should support an emphasis w/ children" + ); + + assert_eq!( + to_md_with_opts( + &Node::Emphasis(Emphasis { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None, + })], + position: None + }), + &Options { + emphasis: '_', + ..Default::default() + } + ) + .unwrap(), + "_a_\n", + "should support an emphasis w/ underscores when `emphasis: \"_\"`" + ); +} diff --git a/mdast_util_to_markdown/tests/strong.rs b/mdast_util_to_markdown/tests/strong.rs index 77d094a5..ce75d926 100644 --- a/mdast_util_to_markdown/tests/strong.rs +++ b/mdast_util_to_markdown/tests/strong.rs @@ -61,6 +61,6 @@ fn strong() { ) .unwrap(), "__a__\n", - "should support a strong w/ children" + "should support a strong w/ underscores when `emphasis: \"_\"`" ); } From 0d939d1693ed746956f67a1e062db1646fe6a71b Mon Sep 17 00:00:00 2001 From: Bnchi Date: Thu, 5 Sep 2024 09:22:31 +0300 Subject: [PATCH 21/73] Fix minor bug --- mdast_util_to_markdown/src/state.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 971540d5..696a0831 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -70,7 +70,6 @@ impl<'a> State<'a> { Node::Paragraph(paragraph) => paragraph.handle(self, info), Node::Text(text) => text.handle(self, info), Node::Strong(strong) => strong.handle(self, info), - Node::Strong(strong) => strong.handle(self, info), Node::Emphasis(emphasis) => emphasis.handle(self, info), _ => Err("Cannot handle node".into()), } @@ -250,7 +249,8 @@ impl<'a> State<'a> { if let Some(first_char) = self.determine_first_char(child) { new_info.after = first_char.encode_utf8(&mut buffer); } else { - self.handle(child, &Info::new("", ""))? + new_info.after = self + .handle(child, &Info::new("", ""))? .chars() .nth(0) .unwrap_or_default() From 3072d023038d8ac87e76a41e930429cf616a4e4d Mon Sep 17 00:00:00 2001 From: Bnchi Date: Thu, 5 Sep 2024 14:25:31 +0300 Subject: [PATCH 22/73] Add support for heading and break --- mdast_util_to_markdown/src/configure.rs | 4 +- mdast_util_to_markdown/src/handle/break.rs | 28 + mdast_util_to_markdown/src/handle/heading.rs | 74 +++ mdast_util_to_markdown/src/handle/mod.rs | 2 + mdast_util_to_markdown/src/parents.rs | 4 +- mdast_util_to_markdown/src/state.rs | 18 +- mdast_util_to_markdown/src/unsafe.rs | 8 + .../src/util/format_heading_as_setext.rs | 35 +- mdast_util_to_markdown/tests/break.rs | 67 +++ mdast_util_to_markdown/tests/heading.rs | 506 ++++++++++++++++++ 10 files changed, 719 insertions(+), 27 deletions(-) create mode 100644 mdast_util_to_markdown/src/handle/break.rs create mode 100644 mdast_util_to_markdown/src/handle/heading.rs create mode 100644 mdast_util_to_markdown/tests/break.rs create mode 100644 mdast_util_to_markdown/tests/heading.rs diff --git a/mdast_util_to_markdown/src/configure.rs b/mdast_util_to_markdown/src/configure.rs index cd1fc456..d5b328c5 100644 --- a/mdast_util_to_markdown/src/configure.rs +++ b/mdast_util_to_markdown/src/configure.rs @@ -13,7 +13,7 @@ pub struct Options { pub close_atx: bool, pub resource_link: bool, pub rule_spaces: bool, - pub set_text: bool, + pub setext: bool, pub tight_definitions: bool, pub rule_repetition: u32, } @@ -42,7 +42,7 @@ impl Default for Options { close_atx: false, rule_spaces: false, resource_link: false, - set_text: false, + setext: false, tight_definitions: false, } } diff --git a/mdast_util_to_markdown/src/handle/break.rs b/mdast_util_to_markdown/src/handle/break.rs new file mode 100644 index 00000000..7dbece02 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/break.rs @@ -0,0 +1,28 @@ +use alloc::string::ToString; +use markdown::mdast::Break; +use regex::Regex; + +use crate::{ + message::Message, + state::{Info, State}, + util::pattern_in_scope::pattern_in_scope, +}; + +use super::Handle; + +impl Handle for Break { + fn handle(&self, state: &mut State, info: &Info) -> Result { + for pattern in state.r#unsafe.iter() { + if pattern.character == "\n" && pattern_in_scope(&state.stack, pattern) { + let regex = Regex::new(r"[ \t]").unwrap(); + if regex.is_match(info.before) { + return Ok("".to_string()); + } + + return Ok(" ".to_string()); + } + } + + Ok("\\\n".to_string()) + } +} diff --git a/mdast_util_to_markdown/src/handle/heading.rs b/mdast_util_to_markdown/src/handle/heading.rs new file mode 100644 index 00000000..f873523d --- /dev/null +++ b/mdast_util_to_markdown/src/handle/heading.rs @@ -0,0 +1,74 @@ +use alloc::format; +use markdown::mdast::Heading; +use regex::Regex; + +use crate::{ + construct_name::ConstructName, + message::Message, + state::{Info, State}, + util::format_heading_as_setext::format_heading_as_setext, +}; + +use super::Handle; + +impl Handle for Heading { + fn handle(&self, state: &mut State, _info: &Info) -> Result { + let rank = self.depth.clamp(1, 6); + + if format_heading_as_setext(self, state) { + state.enter(ConstructName::HeadingSetext); + state.enter(ConstructName::Phrasing); + let value = state.container_phrasing(self, &Info::new("\n", "\n"))?; + + state.exit(); + state.exit(); + + let underline_char = if rank == 1 { "=" } else { "-" }; + let last_line_rank = value + .rfind('\n') + .unwrap_or(0) + .max(value.rfind('\r').unwrap_or(0)); + + let last_line_rank = if last_line_rank > 0 { + last_line_rank + 1 + } else { + 0 + }; + + let setext_underline = underline_char.repeat(value.len() - last_line_rank); + let value = format!("{}\n{}", value, setext_underline); + + return Ok(value); + } + + let sequence = "#".repeat(rank as usize); + state.enter(ConstructName::HeadingAtx); + state.enter(ConstructName::Phrasing); + + let mut value = state.container_phrasing(self, &Info::new("# ", "\n"))?; + + let tab_or_space_regex = Regex::new(r"^[\t ]").unwrap(); + if tab_or_space_regex.is_match(&value) { + if let Some(first_char) = value.chars().nth(0) { + let hex_code = u32::from(first_char); + value = format!("&#x{:X};{}", hex_code, &value[1..]) + } + } + + if value.is_empty() { + value.push_str(&sequence); + } else { + value = format!("{} {}", &sequence, value); + } + + if state.options.close_atx { + value.push(' '); + value.push_str(&sequence); + } + + state.exit(); + state.exit(); + + Ok(value) + } +} diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index d274478e..2f38d20c 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -1,7 +1,9 @@ use crate::{message::Message, state::Info, State}; use alloc::string::String; +mod r#break; pub mod emphasis; +mod heading; mod paragraph; pub mod strong; mod text; diff --git a/mdast_util_to_markdown/src/parents.rs b/mdast_util_to_markdown/src/parents.rs index 9edf7458..4223109e 100644 --- a/mdast_util_to_markdown/src/parents.rs +++ b/mdast_util_to_markdown/src/parents.rs @@ -1,5 +1,5 @@ use alloc::vec::Vec; -use markdown::mdast::{Emphasis, List, Node, Paragraph, Root, Strong}; +use markdown::mdast::{Emphasis, Heading, List, Node, Paragraph, Root, Strong}; pub trait Parent { fn children(&self) -> &Vec; @@ -29,4 +29,4 @@ macro_rules! impl_Parent { } } -impl_Parent!(for Root, Paragraph, Strong, Emphasis); +impl_Parent!(for Root, Paragraph, Strong, Emphasis, Heading); diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 696a0831..3364fc4d 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -25,12 +25,12 @@ enum Join { #[allow(dead_code)] pub struct State<'a> { - stack: Vec, + pub stack: Vec, // We use i64 for index_stack because -1 is used to mark the absense of children. // We don't use index_stack values to index into any child. index_stack: Vec, bullet_last_used: Option, - r#unsafe: Vec>, + pub r#unsafe: Vec>, pub options: &'a Options, } @@ -71,6 +71,8 @@ impl<'a> State<'a> { Node::Text(text) => text.handle(self, info), Node::Strong(strong) => strong.handle(self, info), Node::Emphasis(emphasis) => emphasis.handle(self, info), + Node::Heading(heading) => heading.handle(self, info), + Node::Break(r#break) => r#break.handle(self, info), _ => Err("Cannot handle node".into()), } } @@ -162,9 +164,8 @@ impl<'a> State<'a> { Self::encode(config, char_at_pos, &mut result) } Some(character) => { - let code = u32::from(character); - let hex_string = format!("{:X}", code); - result.push_str(&format!("&#x{};", hex_string)); + let hex_code = u32::from(character); + result.push_str(&format!("&#x{:X};", hex_code)); start += 1; } _ => (), @@ -339,11 +340,16 @@ impl<'a> State<'a> { if let Some(spread) = parent.spreadable() { if matches!(left, Node::Paragraph(_)) && Self::matches((left, right)) || matches!(right, Node::Definition(_)) - || format_heading_as_setext(right, self) { return None; } + if let Node::Heading(heading) = right { + if format_heading_as_setext(heading, self) { + return None; + } + } + if spread { return Some(Join::Number(1)); } diff --git a/mdast_util_to_markdown/src/unsafe.rs b/mdast_util_to_markdown/src/unsafe.rs index 6e1ef2e5..a35885f5 100644 --- a/mdast_util_to_markdown/src/unsafe.rs +++ b/mdast_util_to_markdown/src/unsafe.rs @@ -157,6 +157,14 @@ impl<'a> Unsafe<'a> { None, ), Self::new("#", None, None, None, None, Some(true)), + Self::new( + "#", + None, + "(?:[\r\n]|$)".into(), + Construct::Single(ConstructName::HeadingAtx).into(), + None, + None, + ), Self::new( "&", None, diff --git a/mdast_util_to_markdown/src/util/format_heading_as_setext.rs b/mdast_util_to_markdown/src/util/format_heading_as_setext.rs index ce7431a9..9fa55696 100644 --- a/mdast_util_to_markdown/src/util/format_heading_as_setext.rs +++ b/mdast_util_to_markdown/src/util/format_heading_as_setext.rs @@ -1,28 +1,25 @@ -use alloc::string::ToString; -use markdown::mdast::Node; +use alloc::string::{String, ToString}; +use markdown::mdast::{Heading, Node}; use regex::Regex; use crate::state::State; -pub fn format_heading_as_setext(node: &Node, _state: &State) -> bool { - if let Node::Heading(heading) = node { - let line_break = Regex::new(r"\r?\n|\r").unwrap(); - let mut literal_with_break = false; - for child in &heading.children { - if include_literal_with_break(child, &line_break) { - literal_with_break = true; - break; - } +pub fn format_heading_as_setext(heading: &Heading, state: &State) -> bool { + let line_break = Regex::new(r"\r?\n|\r").unwrap(); + let mut literal_with_line_break = false; + for child in &heading.children { + if include_literal_with_line_break(child, &line_break) { + literal_with_line_break = true; + break; } - - return heading.depth == 0 - || heading.depth < 3 && !node.to_string().is_empty() && literal_with_break; } - false + heading.depth < 3 + && !to_string(&heading.children).is_empty() + && (state.options.setext || literal_with_line_break) } -fn include_literal_with_break(node: &Node, regex: &Regex) -> bool { +fn include_literal_with_line_break(node: &Node, regex: &Regex) -> bool { match node { Node::Break(_) => true, Node::MdxjsEsm(x) => regex.is_match(&x.value), @@ -39,7 +36,7 @@ fn include_literal_with_break(node: &Node, regex: &Regex) -> bool { _ => { if let Some(children) = node.children() { for child in children { - if include_literal_with_break(child, regex) { + if include_literal_with_line_break(child, regex) { return true; } } @@ -49,3 +46,7 @@ fn include_literal_with_break(node: &Node, regex: &Regex) -> bool { } } } + +fn to_string(children: &[Node]) -> String { + children.iter().map(ToString::to_string).collect() +} diff --git a/mdast_util_to_markdown/tests/break.rs b/mdast_util_to_markdown/tests/break.rs new file mode 100644 index 00000000..bda63b51 --- /dev/null +++ b/mdast_util_to_markdown/tests/break.rs @@ -0,0 +1,67 @@ +use markdown::mdast::{Break, Heading, Node, Text}; +use mdast_util_to_markdown::to_markdown as to; +use pretty_assertions::assert_eq; + +#[test] +fn r#break() { + assert_eq!( + to(&Node::Break(Break { position: None })).unwrap(), + "\\\n", + "should support a break" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![ + Node::Text(Text { + value: String::from("a"), + position: None + }), + Node::Break(Break { position: None }), + Node::Text(Text { + value: String::from("b"), + position: None + }), + ], + position: None, + depth: 3 + })) + .unwrap(), + "### a b\n", + "should serialize breaks in heading (atx) as a space" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![ + Node::Text(Text { + value: String::from("a "), + position: None + }), + Node::Break(Break { position: None }), + Node::Text(Text { + value: String::from("b"), + position: None + }), + ], + position: None, + depth: 3 + })) + .unwrap(), + "### a b\n", + "should serialize breaks in heading (atx) as a space" + ); + + //assert_eq!( + // to_md_with_opts( + // &from("a \nb\n=\n", &Default::default()).unwrap(), + // &Options { + // setext: true, + // ..Default::default() + // } + // ) + // .unwrap(), + // "a\\\nb\n=\n", + // "should support a break" + //); +} diff --git a/mdast_util_to_markdown/tests/heading.rs b/mdast_util_to_markdown/tests/heading.rs new file mode 100644 index 00000000..a1dea1ce --- /dev/null +++ b/mdast_util_to_markdown/tests/heading.rs @@ -0,0 +1,506 @@ +use markdown::mdast::Break; +use markdown::mdast::{Heading, Node, Text}; +use mdast_util_to_markdown::to_markdown as to; +use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; + +use mdast_util_to_markdown::Options; +use pretty_assertions::assert_eq; + +#[test] +fn heading() { + assert_eq!( + to(&Node::Heading(Heading { + children: vec![], + position: None, + depth: 1 + })) + .unwrap(), + "#\n", + "should serialize a heading w/ rank 1" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![], + position: None, + depth: 6 + })) + .unwrap(), + "######\n", + "should serialize a heading w/ rank 6" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![], + position: None, + depth: 7 + })) + .unwrap(), + "######\n", + "should serialize a heading w/ rank 7 as 6" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![], + position: None, + depth: 0 + })) + .unwrap(), + "#\n", + "should serialize a heading w/ rank 0 as 1" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None, + depth: 1 + })) + .unwrap(), + "# a\n", + "should serialize a heading w/ content" + ); + + assert_eq!( + to_md_with_opts( + &Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None, + depth: 1 + }), + &Options { + setext: true, + ..Default::default() + } + ) + .unwrap(), + "a\n=\n", + "should serialize a heading w/ rank 1 as setext when `setext: true`" + ); + + assert_eq!( + to_md_with_opts( + &Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None, + depth: 2 + }), + &Options { + setext: true, + ..Default::default() + } + ) + .unwrap(), + "a\n-\n", + "should serialize a heading w/ rank 2 as setext when `setext: true`" + ); + + assert_eq!( + to_md_with_opts( + &Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None, + depth: 3 + }), + &Options { + setext: true, + ..Default::default() + } + ) + .unwrap(), + "### a\n", + "should serialize a heading w/ rank 3 as atx when `setext: true`" + ); + + assert_eq!( + to_md_with_opts( + &Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("aa\rb"), + position: None + })], + position: None, + depth: 2 + }), + &Options { + setext: true, + ..Default::default() + } + ) + .unwrap(), + "aa\rb\n-\n", + "should serialize a setext underline as long as the last line (1)" + ); + + assert_eq!( + to_md_with_opts( + &Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a\r\nbbb"), + position: None + })], + position: None, + depth: 1 + }), + &Options { + setext: true, + ..Default::default() + } + ) + .unwrap(), + "a\r\nbbb\n===\n", + "should serialize a setext underline as long as the last line (2)" + ); + + assert_eq!( + to_md_with_opts( + &Node::Heading(Heading { + children: vec![], + position: None, + depth: 1 + }), + &Options { + setext: true, + ..Default::default() + } + ) + .unwrap(), + "#\n", + "should serialize an empty heading w/ rank 1 as atx when `setext: true`" + ); + + assert_eq!( + to_md_with_opts( + &Node::Heading(Heading { + children: vec![], + position: None, + depth: 2 + }), + &Options { + setext: true, + ..Default::default() + } + ) + .unwrap(), + "##\n", + "should serialize an empty heading w/ rank 1 as atx when `setext: true`" + ); + + //assert_eq!( + // to(&Node::Heading(Heading { + // children: vec![], + // position: None, + // depth: 1 + // }),) + // .unwrap(), + // "`\n`\n=\n", + // "should serialize an heading w/ rank 1 and code w/ a line ending as setext" + //); + + //assert_eq!( + // to(&Node::Heading(Heading { + // children: vec![], + // position: None, + // depth: 1 + // }),) + // .unwrap(), + // "\n==\n", + // "should serialize an heading w/ rank 1 and html w/ a line ending as setext" + //); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + })], + position: None, + depth: 1 + })) + .unwrap(), + "a\nb\n=\n", + "should serialize an heading w/ rank 1 and text w/ a line ending as setext" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![ + Node::Text(Text { + value: String::from("a"), + position: None + }), + Node::Break(Break { position: None }), + Node::Text(Text { + value: String::from("b"), + position: None + }), + ], + position: None, + depth: 1 + })) + .unwrap(), + "a\\\nb\n=\n", + "should serialize an heading w/ rank 1 and a break as setext" + ); + + assert_eq!( + to_md_with_opts( + &Node::Heading(Heading { + children: vec![], + position: None, + depth: 1 + }), + &Options { + close_atx: true, + ..Default::default() + } + ) + .unwrap(), + "# #\n", + "should serialize a heading with a closing sequence when `closeAtx` (empty)" + ); + + assert_eq!( + to_md_with_opts( + &Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None, + depth: 3 + }), + &Options { + close_atx: true, + ..Default::default() + } + ) + .unwrap(), + "### a ###\n", + "should serialize a with a closing sequence when `closeAtx` (content)" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("# a"), + position: None + })], + position: None, + depth: 2 + })) + .unwrap(), + "## # a\n", + "should not escape a `#` at the start of phrasing in a heading" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("1) a"), + position: None + })], + position: None, + depth: 2 + })) + .unwrap(), + "## 1) a\n", + "should not escape a `1)` at the start of phrasing in a heading" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("+ a"), + position: None + })], + position: None, + depth: 2 + })) + .unwrap(), + "## + a\n", + "should not escape a `+` at the start of phrasing in a heading" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("- a"), + position: None + })], + position: None, + depth: 2 + })) + .unwrap(), + "## - a\n", + "should not escape a `-` at the start of phrasing in a heading" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("= a"), + position: None + })], + position: None, + depth: 2 + })) + .unwrap(), + "## = a\n", + "should not escape a `=` at the start of phrasing in a heading" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("> a"), + position: None + })], + position: None, + depth: 2 + })) + .unwrap(), + "## > a\n", + "should not escape a `>` at the start of phrasing in a heading" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a #"), + position: None + })], + position: None, + depth: 1 + })) + .unwrap(), + "# a \\#\n", + "should escape a `#` at the end of a heading (1)" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a ##"), + position: None + })], + position: None, + depth: 1 + })) + .unwrap(), + "# a #\\#\n", + "should escape a `#` at the end of a heading (2)" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a # b"), + position: None + })], + position: None, + depth: 1 + })) + .unwrap(), + "# a # b\n", + "should not escape a `#` in a heading (2)" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from(" a"), + position: None + })], + position: None, + depth: 1 + })) + .unwrap(), + "# a\n", + "should encode a space at the start of an atx heading" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("\t\ta"), + position: None + })], + position: None, + depth: 1 + })) + .unwrap(), + "# \ta\n", + "should encode a tab at the start of an atx heading" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a "), + position: None + })], + position: None, + depth: 1 + })) + .unwrap(), + "# a \n", + "should encode a space at the end of an atx heading" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a\t\t"), + position: None + })], + position: None, + depth: 1 + })) + .unwrap(), + "# a\t \n", + "should encode a tab at the end of an atx heading" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a \n b"), + position: None + })], + position: None, + depth: 1 + })) + .unwrap(), + "a \n b\n=======\n", + "should encode spaces around a line ending in a setext heading" + ); + + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a \n b"), + position: None + })], + position: None, + depth: 3 + })) + .unwrap(), + "### a b\n", + "should not need to encode spaces around a line ending in an atx heading (because the line ending is encoded)" + ); +} From 464f32756ba40bbebdf9dc4074385f33883555bc Mon Sep 17 00:00:00 2001 From: Bnchi Date: Thu, 5 Sep 2024 15:35:41 +0300 Subject: [PATCH 23/73] Refactor compile pattern --- mdast_util_to_markdown/src/handle/break.rs | 2 +- mdast_util_to_markdown/src/state.rs | 79 ++++++++++++-------- mdast_util_to_markdown/src/unsafe.rs | 85 +++++++++++----------- 3 files changed, 91 insertions(+), 75 deletions(-) diff --git a/mdast_util_to_markdown/src/handle/break.rs b/mdast_util_to_markdown/src/handle/break.rs index 7dbece02..fe3aff3b 100644 --- a/mdast_util_to_markdown/src/handle/break.rs +++ b/mdast_util_to_markdown/src/handle/break.rs @@ -13,7 +13,7 @@ use super::Handle; impl Handle for Break { fn handle(&self, state: &mut State, info: &Info) -> Result { for pattern in state.r#unsafe.iter() { - if pattern.character == "\n" && pattern_in_scope(&state.stack, pattern) { + if pattern.character == '\n' && pattern_in_scope(&state.stack, pattern) { let regex = Regex::new(r"[ \t]").unwrap(); if regex.is_match(info.before) { return Ok("".to_string()); diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 3364fc4d..31e854fe 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -190,41 +190,58 @@ impl<'a> State<'a> { fn compile_pattern(pattern: &mut Unsafe) { if pattern.compiled.is_none() { - let before = if pattern.at_break.unwrap_or(false) { - "[\\r\\n][\\t ]*" - } else { - "" - }; + let mut pattern_to_compile = String::new(); - let before = format!( - "{}{}", - before, - pattern - .before - .map_or(String::new(), |before| format!("(?:{})", before)) - ); - - let before = if before.is_empty() { - String::new() - } else { - format!("({})", before) - }; + let at_break = pattern.at_break.unwrap_or(false); - let after = pattern - .after - .map_or(String::new(), |after| format!("(?:{})", after)); + if let Some(pattern_before) = pattern.before { + pattern_to_compile.push('('); - let special_char = if Regex::new(r"[\|\{}\()\[\]\\\^\$\+\*\?\.\-]") - .unwrap() - .is_match(pattern.character) - { - "\\" - } else { - "" - }; + if at_break { + pattern_to_compile.push_str("[\\r\\n][\\t ]*"); + } + + pattern_to_compile.push_str("(?:"); + pattern_to_compile.push_str(pattern_before); + pattern_to_compile.push(')'); + pattern_to_compile.push(')'); + } + + if pattern_to_compile.is_empty() && at_break { + pattern_to_compile.push('('); + pattern_to_compile.push_str("[\\r\\n][\\t ]*"); + pattern_to_compile.push(')'); + } + + if matches!( + pattern.character, + '|' | '\\' + | '{' + | '}' + | '(' + | ')' + | '[' + | ']' + | '^' + | '$' + | '+' + | '*' + | '?' + | '.' + | '-' + ) { + pattern_to_compile.push('\\'); + } + + pattern_to_compile.push(pattern.character); + + if let Some(pattern_after) = pattern.after { + pattern_to_compile.push_str("(?:"); + pattern_to_compile.push_str(pattern_after); + pattern_to_compile.push(')'); + } - let regex = format!("{}{}{}{}", before, special_char, pattern.character, after); - pattern.set_compiled(Regex::new(®ex).unwrap()); + pattern.set_compiled(Regex::new(&pattern_to_compile).unwrap()); } } diff --git a/mdast_util_to_markdown/src/unsafe.rs b/mdast_util_to_markdown/src/unsafe.rs index a35885f5..e4375a44 100644 --- a/mdast_util_to_markdown/src/unsafe.rs +++ b/mdast_util_to_markdown/src/unsafe.rs @@ -5,8 +5,7 @@ use crate::construct_name::ConstructName; #[derive(Default)] pub struct Unsafe<'a> { - // TODO this could be a char - pub character: &'a str, + pub character: char, pub in_construct: Option, pub not_in_construct: Option, pub before: Option<&'a str>, @@ -23,7 +22,7 @@ pub enum Construct { impl<'a> Unsafe<'a> { pub fn new( - character: &'a str, + character: char, before: Option<&'a str>, after: Option<&'a str>, in_construct: Option, @@ -53,7 +52,7 @@ impl<'a> Unsafe<'a> { vec![ Self::new( - "\t", + '\t', None, "[\\r\\n]".into(), Construct::Single(ConstructName::Phrasing).into(), @@ -61,7 +60,7 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "\t", + '\t', "[\\r\\n]".into(), None, Construct::Single(ConstructName::Phrasing).into(), @@ -69,7 +68,7 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "\t", + '\t', None, None, Construct::List(vec![ @@ -81,7 +80,7 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "\r", + '\r', None, None, Construct::List(vec![ @@ -97,7 +96,7 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "\n", + '\n', None, None, Construct::List(vec![ @@ -113,7 +112,7 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - " ", + ' ', None, "[\\r\\n]".into(), Construct::Single(ConstructName::Phrasing).into(), @@ -121,7 +120,7 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - " ", + ' ', "[\\r\\n]".into(), None, Construct::Single(ConstructName::Phrasing).into(), @@ -129,7 +128,7 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - " ", + ' ', None, None, Construct::List(vec![ @@ -141,7 +140,7 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "!", + '!', None, "\\[".into(), Construct::Single(ConstructName::Phrasing).into(), @@ -149,16 +148,16 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "\"", + '\"', None, None, Construct::Single(ConstructName::TitleQuote).into(), None, None, ), - Self::new("#", None, None, None, None, Some(true)), + Self::new('#', None, None, None, None, Some(true)), Self::new( - "#", + '#', None, "(?:[\r\n]|$)".into(), Construct::Single(ConstructName::HeadingAtx).into(), @@ -166,7 +165,7 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "&", + '&', None, "[#A-Za-z]".into(), Construct::Single(ConstructName::Phrasing).into(), @@ -174,7 +173,7 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "'", + '\'', None, None, Construct::Single(ConstructName::TitleApostrophe).into(), @@ -182,7 +181,7 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "(", + '(', None, None, Construct::Single(ConstructName::DestinationRaw).into(), @@ -190,44 +189,44 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "(", + '(', "\\]".into(), None, Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), None, ), - Self::new(")", "\\d+".into(), None, None, None, Some(true)), + Self::new(')', "\\d+".into(), None, None, None, Some(true)), Self::new( - ")", + ')', None, None, Construct::Single(ConstructName::DestinationRaw).into(), None, None, ), - Self::new("*", None, "(?:[ \t\r\n*])".into(), None, None, Some(true)), + Self::new('*', None, "(?:[ \t\r\n*])".into(), None, None, Some(true)), Self::new( - "*", + '*', None, None, Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), None, ), - Self::new("+", None, "(?:[ \t\r\n])".into(), None, None, Some(true)), - Self::new("-", None, "(?:[ \t\r\n-])".into(), None, None, Some(true)), + Self::new('+', None, "(?:[ \t\r\n])".into(), None, None, Some(true)), + Self::new('-', None, "(?:[ \t\r\n-])".into(), None, None, Some(true)), Self::new( - ".", + '.', "\\d+".into(), "(?:[ \t\r\n]|$)".into(), None, None, Some(true), ), - Self::new("<", None, "[!/?A-Za-z]".into(), None, None, Some(true)), + Self::new('<', None, "[!/?A-Za-z]".into(), None, None, Some(true)), Self::new( - "<", + '<', None, "[!/?A-Za-z]".into(), Construct::Single(ConstructName::Phrasing).into(), @@ -235,26 +234,26 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "<", + '<', None, None, Construct::Single(ConstructName::DestinationLiteral).into(), None, None, ), - Self::new("=", None, None, None, None, Some(true)), - Self::new(">", None, None, None, None, Some(true)), + Self::new('=', None, None, None, None, Some(true)), + Self::new('>', None, None, None, None, Some(true)), Self::new( - ">", + '>', None, None, Construct::Single(ConstructName::DestinationLiteral).into(), None, Some(true), ), - Self::new("[", None, None, None, None, Some(true)), + Self::new('[', None, None, None, None, Some(true)), Self::new( - "[", + '[', None, None, Construct::Single(ConstructName::Phrasing).into(), @@ -262,7 +261,7 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "[", + '[', None, None, Construct::List(vec![ConstructName::Label, ConstructName::Reference]).into(), @@ -270,7 +269,7 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "\\", + '\\', None, "[\\r\\n]".into(), Construct::Single(ConstructName::Phrasing).into(), @@ -278,25 +277,25 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "]", + ']', None, None, Construct::List(vec![ConstructName::Label, ConstructName::Reference]).into(), None, None, ), - Self::new("_", None, None, None, None, Some(true)), + Self::new('_', None, None, None, None, Some(true)), Self::new( - "_", + '_', None, None, Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), None, ), - Self::new("`", None, None, None, None, Some(true)), + Self::new('`', None, None, None, None, Some(true)), Self::new( - "`", + '`', None, None, Construct::List(vec![ @@ -308,14 +307,14 @@ impl<'a> Unsafe<'a> { None, ), Self::new( - "`", + '`', None, None, Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), None, ), - Self::new("~", None, None, None, None, Some(true)), + Self::new('~', None, None, None, None, Some(true)), ] } From e94d898b9acfcd2a7a27bc07ee34b38f63d7040e Mon Sep 17 00:00:00 2001 From: Bnchi Date: Thu, 5 Sep 2024 15:41:38 +0300 Subject: [PATCH 24/73] More refactor for compile pattern --- mdast_util_to_markdown/src/state.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 31e854fe..4f11164c 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -196,18 +196,14 @@ impl<'a> State<'a> { if let Some(pattern_before) = pattern.before { pattern_to_compile.push('('); - if at_break { pattern_to_compile.push_str("[\\r\\n][\\t ]*"); } - pattern_to_compile.push_str("(?:"); pattern_to_compile.push_str(pattern_before); pattern_to_compile.push(')'); pattern_to_compile.push(')'); - } - - if pattern_to_compile.is_empty() && at_break { + } else if at_break { pattern_to_compile.push('('); pattern_to_compile.push_str("[\\r\\n][\\t ]*"); pattern_to_compile.push(')'); From 3d1a896a3c04b17a625394e43d8084be57fc4c9e Mon Sep 17 00:00:00 2001 From: Bnchi Date: Thu, 5 Sep 2024 16:55:09 +0300 Subject: [PATCH 25/73] Make inde_stack usize --- mdast_util_to_markdown/src/handle/heading.rs | 5 +- mdast_util_to_markdown/src/state.rs | 56 ++++++++++---------- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/mdast_util_to_markdown/src/handle/heading.rs b/mdast_util_to_markdown/src/handle/heading.rs index f873523d..e63e2569 100644 --- a/mdast_util_to_markdown/src/handle/heading.rs +++ b/mdast_util_to_markdown/src/handle/heading.rs @@ -18,7 +18,7 @@ impl Handle for Heading { if format_heading_as_setext(self, state) { state.enter(ConstructName::HeadingSetext); state.enter(ConstructName::Phrasing); - let value = state.container_phrasing(self, &Info::new("\n", "\n"))?; + let mut value = state.container_phrasing(self, &Info::new("\n", "\n"))?; state.exit(); state.exit(); @@ -36,7 +36,8 @@ impl Handle for Heading { }; let setext_underline = underline_char.repeat(value.len() - last_line_rank); - let value = format!("{}\n{}", value, setext_underline); + value.push('\n'); + value.push_str(&setext_underline); return Ok(value); } diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 4f11164c..941e7520 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -26,9 +26,7 @@ enum Join { #[allow(dead_code)] pub struct State<'a> { pub stack: Vec, - // We use i64 for index_stack because -1 is used to mark the absense of children. - // We don't use index_stack values to index into any child. - index_stack: Vec, + index_stack: Vec, bullet_last_used: Option, pub r#unsafe: Vec>, pub options: &'a Options, @@ -93,11 +91,10 @@ impl<'a> State<'a> { if let Some(regex) = &pattern.compiled { for m in regex.captures_iter(&value) { let full_match = m.get(0).unwrap(); - let captured_group_len = if let Some(captured_group) = m.get(1) { - captured_group.len() - } else { - 0 - }; + let captured_group_len = m + .get(1) + .map(|captured_group| captured_group.len()) + .unwrap_or(0); let before = pattern.before.is_some() || pattern.at_break.unwrap_or(false); let after = pattern.after.is_some(); @@ -161,7 +158,13 @@ impl<'a> State<'a> { let char_at_pos = value.chars().nth(*position); match char_at_pos { Some('!'..='/') | Some(':'..='@') | Some('['..='`') | Some('{'..='~') => { - Self::encode(config, char_at_pos, &mut result) + if let Some(encode) = &config.encode { + if encode.contains(&char_at_pos.unwrap()) { + result.push('\\'); + } + } else { + result.push('\\'); + } } Some(character) => { let hex_code = u32::from(character); @@ -177,17 +180,6 @@ impl<'a> State<'a> { result } - fn encode(config: &SafeConfig, char_at_pos: Option, result: &mut String) { - match &config.encode { - Some(encode) => { - if encode.contains(&char_at_pos.unwrap()) { - result.push('\\'); - } - } - None => result.push('\\'), - } - } - fn compile_pattern(pattern: &mut Unsafe) { if pattern.compiled.is_none() { let mut pattern_to_compile = String::new(); @@ -250,17 +242,21 @@ impl<'a> State<'a> { let mut children_iter = parent.children().iter().peekable(); let mut index = 0; - self.index_stack.push(-1); + if !parent.children().is_empty() { + self.index_stack.push(0); + } while let Some(child) = children_iter.next() { - if let Some(top) = self.index_stack.last_mut() { - *top = index; + if index > 0 { + if let Some(top) = self.index_stack.last_mut() { + *top = index; + } } let mut new_info = Info::new(info.before, info.after); let mut buffer = [0u8; 4]; if let Some(child) = children_iter.peek() { - if let Some(first_char) = self.determine_first_char(child) { + if let Some(first_char) = self.peek_node(child) { new_info.after = first_char.encode_utf8(&mut buffer); } else { new_info.after = self @@ -285,7 +281,7 @@ impl<'a> State<'a> { Ok(results) } - fn determine_first_char(&self, node: &Node) -> Option { + fn peek_node(&self, node: &Node) -> Option { match node { Node::Strong(_) => Some(peek_strong(self)), Node::Emphasis(_) => Some(peek_emphasis(self)), @@ -298,11 +294,15 @@ impl<'a> State<'a> { let mut children_iter = parent.children().iter().peekable(); let mut index = 0; - self.index_stack.push(-1); + if !parent.children().is_empty() { + self.index_stack.push(0); + } while let Some(child) = children_iter.next() { - if let Some(top) = self.index_stack.last_mut() { - *top = index; + if index > 0 { + if let Some(top) = self.index_stack.last_mut() { + *top = index; + } } if matches!(child, Node::List(_)) { From 16a3cb0a82a39210dd19885d232ad43223afb3be Mon Sep 17 00:00:00 2001 From: Bnchi Date: Thu, 5 Sep 2024 18:23:00 +0300 Subject: [PATCH 26/73] Add support for html --- mdast_util_to_markdown/src/handle/html.rs | 18 ++++ mdast_util_to_markdown/src/handle/mod.rs | 1 + mdast_util_to_markdown/src/state.rs | 18 +++- mdast_util_to_markdown/tests/heading.rs | 25 +++--- mdast_util_to_markdown/tests/html.rs | 103 ++++++++++++++++++++++ 5 files changed, 152 insertions(+), 13 deletions(-) create mode 100644 mdast_util_to_markdown/src/handle/html.rs create mode 100644 mdast_util_to_markdown/tests/html.rs diff --git a/mdast_util_to_markdown/src/handle/html.rs b/mdast_util_to_markdown/src/handle/html.rs new file mode 100644 index 00000000..352dbf49 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/html.rs @@ -0,0 +1,18 @@ +use markdown::mdast::Html; + +use crate::{ + message::Message, + state::{Info, State}, +}; + +use super::Handle; + +impl Handle for Html { + fn handle(&self, _state: &mut State, _info: &Info) -> Result { + Ok(self.value.clone()) + } +} + +pub fn peek_html() -> char { + '<' +} diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index 2f38d20c..8a124d8b 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -4,6 +4,7 @@ use alloc::string::String; mod r#break; pub mod emphasis; mod heading; +pub mod html; mod paragraph; pub mod strong; mod text; diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 941e7520..dc739a3a 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -1,5 +1,6 @@ use crate::construct_name::ConstructName; use crate::handle::emphasis::peek_emphasis; +use crate::handle::html::peek_html; use crate::handle::strong::peek_strong; use crate::handle::Handle; use crate::message::Message; @@ -71,6 +72,7 @@ impl<'a> State<'a> { Node::Emphasis(emphasis) => emphasis.handle(self, info), Node::Heading(heading) => heading.handle(self, info), Node::Break(r#break) => r#break.handle(self, info), + Node::Html(html) => html.handle(self, info), _ => Err("Cannot handle node".into()), } } @@ -105,7 +107,6 @@ impl<'a> State<'a> { if entry.before && !before { entry.before = false; } - if entry.after && !after { entry.after = false; } @@ -269,7 +270,19 @@ impl<'a> State<'a> { } if !results.is_empty() { - new_info.before = &results[results.len() - 1..]; + if info.before == "\r" || info.before == "\n" && matches!(child, Node::Html(_)) { + if let Some(last_poped_char) = results.pop() { + if last_poped_char == '\n' { + if results.ends_with('\r') { + results.pop(); + } + } + } + results.push(' '); + new_info.before = " "; + } else { + new_info.before = &results[results.len() - 1..]; + } } results.push_str(&self.handle(child, &new_info)?); @@ -285,6 +298,7 @@ impl<'a> State<'a> { match node { Node::Strong(_) => Some(peek_strong(self)), Node::Emphasis(_) => Some(peek_emphasis(self)), + Node::Html(_) => Some(peek_html()), _ => None, } } diff --git a/mdast_util_to_markdown/tests/heading.rs b/mdast_util_to_markdown/tests/heading.rs index a1dea1ce..4b128890 100644 --- a/mdast_util_to_markdown/tests/heading.rs +++ b/mdast_util_to_markdown/tests/heading.rs @@ -1,4 +1,4 @@ -use markdown::mdast::Break; +use markdown::mdast::{Break, Html}; use markdown::mdast::{Heading, Node, Text}; use mdast_util_to_markdown::to_markdown as to; use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; @@ -211,16 +211,19 @@ fn heading() { // "should serialize an heading w/ rank 1 and code w/ a line ending as setext" //); - //assert_eq!( - // to(&Node::Heading(Heading { - // children: vec![], - // position: None, - // depth: 1 - // }),) - // .unwrap(), - // "\n==\n", - // "should serialize an heading w/ rank 1 and html w/ a line ending as setext" - //); + assert_eq!( + to(&Node::Heading(Heading { + children: vec![Node::Html(Html { + value: "".to_string(), + position: None + })], + position: None, + depth: 1 + }),) + .unwrap(), + "\n==\n", + "should serialize an heading w/ rank 1 and html w/ a line ending as setext" + ); assert_eq!( to(&Node::Heading(Heading { diff --git a/mdast_util_to_markdown/tests/html.rs b/mdast_util_to_markdown/tests/html.rs new file mode 100644 index 00000000..24967ea4 --- /dev/null +++ b/mdast_util_to_markdown/tests/html.rs @@ -0,0 +1,103 @@ +use markdown::mdast::{Html, Node, Paragraph, Text}; +use mdast_util_to_markdown::to_markdown as to; + +use pretty_assertions::assert_eq; + +#[test] +fn html() { + assert_eq!( + to(&Node::Html(Html { + value: String::new(), + position: None + })) + .unwrap(), + "", + "should support an empty html" + ); + + assert_eq!( + to(&Node::Html(Html { + value: String::from("a\nb"), + position: None + })) + .unwrap(), + "a\nb\n", + "should support html" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: "a\n".to_string(), + position: None + }), + Node::Html(Html { + value: "
".to_string(), + position: None + }) + ], + position: None + })) + .unwrap(), + "a
\n", + "should prevent html (text) from becoming html (flow) (1)" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: "a\r".to_string(), + position: None + }), + Node::Html(Html { + value: "
".to_string(), + position: None + }) + ], + position: None + })) + .unwrap(), + "a
\n", + "should prevent html (text) from becoming html (flow) (2)" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: "a\r\n".to_string(), + position: None + }), + Node::Html(Html { + value: "
".to_string(), + position: None + }) + ], + position: None + })) + .unwrap(), + "a
\n", + "should prevent html (text) from becoming html (flow) (3)" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![ + Node::Html(Html { + value: "".to_string(), + position: None + }), + Node::Text(Text { + value: "a".to_string(), + position: None + }) + ], + position: None + })) + .unwrap(), + "a\n", + "should serialize html (text)" + ); +} From b9752b7d41da3e6ad1b9a9330a9261d76baf4783 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Thu, 5 Sep 2024 18:37:49 +0300 Subject: [PATCH 27/73] Fix minor bug in html --- mdast_util_to_markdown/src/state.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index dc739a3a..59c3b6e8 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -271,11 +271,10 @@ impl<'a> State<'a> { if !results.is_empty() { if info.before == "\r" || info.before == "\n" && matches!(child, Node::Html(_)) { - if let Some(last_poped_char) = results.pop() { - if last_poped_char == '\n' { - if results.ends_with('\r') { - results.pop(); - } + if results.ends_with('\n') || results.ends_with('\r') { + results.pop(); + if results.ends_with('\r') { + results.pop(); } } results.push(' '); From a8a2351fa776eb8bc27f78a2f8d3aa73127738e2 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Fri, 6 Sep 2024 09:15:53 +0300 Subject: [PATCH 28/73] Add valid expected unwraps --- mdast_util_to_markdown/src/state.rs | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 59c3b6e8..0685bcc9 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -92,7 +92,7 @@ impl<'a> State<'a> { if let Some(regex) = &pattern.compiled { for m in regex.captures_iter(&value) { - let full_match = m.get(0).unwrap(); + let full_match = m.get(0).expect("Guaranteed to have a match"); let captured_group_len = m .get(1) .map(|captured_group| captured_group.len()) @@ -230,7 +230,9 @@ impl<'a> State<'a> { pattern_to_compile.push(')'); } - pattern.set_compiled(Regex::new(&pattern_to_compile).unwrap()); + pattern.set_compiled( + Regex::new(&pattern_to_compile).expect("A valid unsafe regex pattern"), + ); } } @@ -249,9 +251,11 @@ impl<'a> State<'a> { while let Some(child) = children_iter.next() { if index > 0 { - if let Some(top) = self.index_stack.last_mut() { - *top = index; - } + let top = self + .index_stack + .last_mut() + .expect("The stack is populated with at least one child position"); + *top = index; } let mut new_info = Info::new(info.before, info.after); @@ -271,6 +275,8 @@ impl<'a> State<'a> { if !results.is_empty() { if info.before == "\r" || info.before == "\n" && matches!(child, Node::Html(_)) { + // TODO Remove this check here it might not be needed since we're + // checking for the before info. if results.ends_with('\n') || results.ends_with('\r') { results.pop(); if results.ends_with('\r') { @@ -313,9 +319,11 @@ impl<'a> State<'a> { while let Some(child) = children_iter.next() { if index > 0 { - if let Some(top) = self.index_stack.last_mut() { - *top = index; - } + let top = self + .index_stack + .last_mut() + .expect("The stack is populated with at least one child position"); + *top = index; } if matches!(child, Node::List(_)) { From c1e3758b030f0a3416d6d5c50c8592b4d27d179b Mon Sep 17 00:00:00 2001 From: Bnchi Date: Fri, 6 Sep 2024 09:19:58 +0300 Subject: [PATCH 29/73] Fix typos --- mdast_util_to_markdown/src/state.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 0685bcc9..1af88100 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -132,9 +132,9 @@ impl<'a> State<'a> { // If this character is supposed to be escaped because it has a condition on // the next character, and the next character is definitly being escaped, // then skip this escape. - // This will never panic because we're checking the correct bounds, and we - // gurantee to have the positions as key in the infos map before reaching this - // execution. + // This will never panic because the bounds are properly checked, and we + // guarantee that the positions are already keys in the `infos` map before this + // point in execution. if index + 1 < positions.len() && position + 1 < end && positions[index + 1] == position + 1 From 4bf37ce3c756e8f0c2ab161bb20d18f62eacb1d2 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Fri, 6 Sep 2024 10:12:50 +0300 Subject: [PATCH 30/73] Add support for thematic break --- mdast_util_to_markdown/src/handle/mod.rs | 1 + .../src/handle/thematic_break.rs | 26 ++++++ mdast_util_to_markdown/src/state.rs | 1 + mdast_util_to_markdown/src/util/check_rule.rs | 18 ++++ .../src/util/check_rule_repetition.rs | 18 ++++ mdast_util_to_markdown/src/util/mod.rs | 2 + .../tests/thematic_break.rs | 91 +++++++++++++++++++ 7 files changed, 157 insertions(+) create mode 100644 mdast_util_to_markdown/src/handle/thematic_break.rs create mode 100644 mdast_util_to_markdown/src/util/check_rule.rs create mode 100644 mdast_util_to_markdown/src/util/check_rule_repetition.rs create mode 100644 mdast_util_to_markdown/tests/thematic_break.rs diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index 8a124d8b..25f1b5d9 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -8,6 +8,7 @@ pub mod html; mod paragraph; pub mod strong; mod text; +mod thematic_break; pub trait Handle { fn handle(&self, state: &mut State, info: &Info) -> Result; diff --git a/mdast_util_to_markdown/src/handle/thematic_break.rs b/mdast_util_to_markdown/src/handle/thematic_break.rs new file mode 100644 index 00000000..39d77667 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/thematic_break.rs @@ -0,0 +1,26 @@ +use alloc::format; +use markdown::mdast::ThematicBreak; + +use crate::{ + message::Message, + state::{Info, State}, + util::{check_rule::check_rule, check_rule_repetition::check_rule_repetition}, +}; + +use super::Handle; + +impl Handle for ThematicBreak { + fn handle(&self, state: &mut State, _info: &Info) -> Result { + let marker = check_rule(state)?; + let space = if state.options.rule_spaces { " " } else { "" }; + let mut value = + format!("{}{}", marker, space).repeat(check_rule_repetition(state)? as usize); + + if state.options.rule_spaces { + value.pop(); // remove the last space + Ok(value) + } else { + Ok(value) + } + } +} diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 1af88100..40b514bb 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -73,6 +73,7 @@ impl<'a> State<'a> { Node::Heading(heading) => heading.handle(self, info), Node::Break(r#break) => r#break.handle(self, info), Node::Html(html) => html.handle(self, info), + Node::ThematicBreak(thematic_break) => thematic_break.handle(self, info), _ => Err("Cannot handle node".into()), } } diff --git a/mdast_util_to_markdown/src/util/check_rule.rs b/mdast_util_to_markdown/src/util/check_rule.rs new file mode 100644 index 00000000..59e3a667 --- /dev/null +++ b/mdast_util_to_markdown/src/util/check_rule.rs @@ -0,0 +1,18 @@ +use alloc::format; + +use crate::{message::Message, state::State}; + +pub fn check_rule(state: &State) -> Result { + let marker = state.options.rule; + + if marker != '*' && marker != '-' && marker != '_' { + return Err(Message { + reason: format!( + "Cannot serialize rules with `{}` for `options.rule`, expected `*`, `-`, or `_`", + marker + ), + }); + } + + Ok(marker) +} diff --git a/mdast_util_to_markdown/src/util/check_rule_repetition.rs b/mdast_util_to_markdown/src/util/check_rule_repetition.rs new file mode 100644 index 00000000..15a0d158 --- /dev/null +++ b/mdast_util_to_markdown/src/util/check_rule_repetition.rs @@ -0,0 +1,18 @@ +use alloc::format; + +use crate::{message::Message, state::State}; + +pub fn check_rule_repetition(state: &State) -> Result { + let repetition = state.options.rule_repetition; + + if repetition < 3 { + return Err(Message { + reason: format!( + "Cannot serialize rules with repetition `{}` for `options.rule_repetition`, expected `3` or more", + repetition + ), + }); + } + + Ok(repetition) +} diff --git a/mdast_util_to_markdown/src/util/mod.rs b/mdast_util_to_markdown/src/util/mod.rs index bd5b4b14..3d60ca49 100644 --- a/mdast_util_to_markdown/src/util/mod.rs +++ b/mdast_util_to_markdown/src/util/mod.rs @@ -1,4 +1,6 @@ pub mod check_emphasis; +pub mod check_rule; +pub mod check_rule_repetition; pub mod check_strong; pub mod format_code_as_indented; pub mod format_heading_as_setext; diff --git a/mdast_util_to_markdown/tests/thematic_break.rs b/mdast_util_to_markdown/tests/thematic_break.rs new file mode 100644 index 00000000..9b6cacee --- /dev/null +++ b/mdast_util_to_markdown/tests/thematic_break.rs @@ -0,0 +1,91 @@ +use markdown::mdast::{Node, ThematicBreak}; +use mdast_util_to_markdown::to_markdown as to; +use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; + +use mdast_util_to_markdown::Options; +use pretty_assertions::assert_eq; + +#[test] +fn thematic_break() { + assert_eq!( + to(&Node::ThematicBreak(ThematicBreak { position: None })).unwrap(), + "***\n", + "should support a thematic break" + ); + + assert_eq!( + to_md_with_opts( + &Node::ThematicBreak(ThematicBreak { position: None }), + &Options { + rule: '-', + ..Default::default() + } + ) + .unwrap(), + "---\n", + "should support a thematic break w/ dashes when `rule: \"-\"`" + ); + + assert_eq!( + to_md_with_opts( + &Node::ThematicBreak(ThematicBreak { position: None }), + &Options { + rule: '_', + ..Default::default() + } + ) + .unwrap(), + "___\n", + "should support a thematic break w/ underscores when `rule: \"_\"`" + ); + + assert_eq!( + to_md_with_opts( + &Node::ThematicBreak(ThematicBreak { position: None }), + &Options { + rule: '.', + ..Default::default() + } + ), + Err("Cannot serialize rules with `.` for `options.rule`, expected `*`, `-`, or `_`".into()), + "should throw on when given an incorrect `rule`" + ); + + assert_eq!( + to_md_with_opts( + &Node::ThematicBreak(ThematicBreak { position: None }), + &Options { + rule_repetition: 5, + ..Default::default() + } + ) + .unwrap(), + "*****\n", + "should support a thematic break w/ more repetitions w/ `rule_repetition`" + ); + + assert_eq!( + to_md_with_opts( + &Node::ThematicBreak(ThematicBreak { position: None }), + &Options { + rule_repetition: 2, + ..Default::default() + } + ), + Err("Cannot serialize rules with repetition `2` for `options.rule_repetition`, expected `3` or more".into()), + "should throw on when given an incorrect `ruleRepetition`" + ); + + assert_eq!( + to_md_with_opts( + &Node::ThematicBreak(ThematicBreak { position: None }), + &Options { + rule_spaces: true, + ..Default::default() + } + ) + .unwrap(), + "* * *\n", + "should support a thematic break w/ spaces w/ `rule_spaces`" + ); +} From 67aa7ee248fefdb9b5067b614084fe83b92fd8a4 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sat, 7 Sep 2024 11:27:33 +0300 Subject: [PATCH 31/73] Add support for code --- mdast_util_to_markdown/src/configure.rs | 6 +- mdast_util_to_markdown/src/handle/code.rs | 87 +++++ mdast_util_to_markdown/src/handle/mod.rs | 1 + mdast_util_to_markdown/src/state.rs | 33 +- .../src/util/check_fence.rs | 18 + .../src/util/format_code_as_indented.rs | 21 +- .../src/util/indent_lines.rs | 19 + .../src/util/longest_char_streak.rs | 44 +++ mdast_util_to_markdown/src/util/mod.rs | 3 + mdast_util_to_markdown/src/util/safe.rs | 4 +- mdast_util_to_markdown/tests/code.rs | 327 ++++++++++++++++++ 11 files changed, 539 insertions(+), 24 deletions(-) create mode 100644 mdast_util_to_markdown/src/handle/code.rs create mode 100644 mdast_util_to_markdown/src/util/check_fence.rs create mode 100644 mdast_util_to_markdown/src/util/indent_lines.rs create mode 100644 mdast_util_to_markdown/src/util/longest_char_streak.rs create mode 100644 mdast_util_to_markdown/tests/code.rs diff --git a/mdast_util_to_markdown/src/configure.rs b/mdast_util_to_markdown/src/configure.rs index d5b328c5..6fa62807 100644 --- a/mdast_util_to_markdown/src/configure.rs +++ b/mdast_util_to_markdown/src/configure.rs @@ -4,7 +4,8 @@ pub struct Options { pub bullet_other: char, pub bullet_orderd: char, pub emphasis: char, - pub fences: char, + pub fence: char, + pub fences: bool, pub list_item_indent: IndentOptions, pub quote: char, pub rule: char, @@ -32,7 +33,8 @@ impl Default for Options { bullet_other: '-', bullet_orderd: '.', emphasis: '*', - fences: '`', + fence: '`', + fences: true, increment_list_marker: false, rule_repetition: 3, list_item_indent: IndentOptions::One, diff --git a/mdast_util_to_markdown/src/handle/code.rs b/mdast_util_to_markdown/src/handle/code.rs new file mode 100644 index 00000000..bbca12f0 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/code.rs @@ -0,0 +1,87 @@ +use alloc::{ + format, + string::{String, ToString}, +}; +use markdown::mdast::Code; + +use crate::{ + construct_name::ConstructName, + message::Message, + state::{Info, State}, + util::{ + check_fence::check_fence, format_code_as_indented::format_code_as_indented, + indent_lines::indent_lines, longest_char_streak::longest_char_streak, safe::SafeConfig, + }, +}; + +use super::Handle; + +impl Handle for Code { + fn handle(&self, state: &mut State, _info: &Info) -> Result { + let marker = check_fence(state)?; + + if format_code_as_indented(self, state) { + state.enter(ConstructName::CodeIndented); + let value = indent_lines(&self.value, map); + state.exit(); + return Ok(value); + } + + let sequence = marker + .to_string() + .repeat((longest_char_streak(&self.value, marker) + 1).max(3)); + + state.enter(ConstructName::CodeFenced); + let mut value = sequence.clone(); + + if let Some(lang) = &self.lang { + let code_fenced_lang_construct = if marker == '`' { + ConstructName::CodeFencedLangGraveAccent + } else { + ConstructName::CodeFencedLangTilde + }; + state.enter(code_fenced_lang_construct); + + value + .push_str(&state.safe(lang, &SafeConfig::new(Some(&value), " ".into(), Some('`')))); + + state.exit(); + + if let Some(meta) = &self.meta { + let code_fenced_meta_construct = if marker == '`' { + ConstructName::CodeFencedMetaGraveAccent + } else { + ConstructName::CodeFencedMetaTilde + }; + + state.enter(code_fenced_meta_construct); + value.push(' '); + + value.push_str( + &state.safe(meta, &SafeConfig::new(Some(&value), "\n".into(), Some('`'))), + ); + + state.exit(); + } + } + + value.push('\n'); + + if !self.value.is_empty() { + value.push_str(&self.value); + value.push('\n'); + } + + value.push_str(&sequence); + + Ok(value) + } +} + +fn map(value: &str, _line: usize, blank: bool) -> String { + if blank { + String::new() + } else { + format!(" {}", value) + } +} diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index 25f1b5d9..f3a6e693 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -2,6 +2,7 @@ use crate::{message::Message, state::Info, State}; use alloc::string::String; mod r#break; +mod code; pub mod emphasis; mod heading; pub mod html; diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 40b514bb..d4897e72 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -74,6 +74,7 @@ impl<'a> State<'a> { Node::Break(r#break) => r#break.handle(self, info), Node::Html(html) => html.handle(self, info), Node::ThematicBreak(thematic_break) => thematic_break.handle(self, info), + Node::Code(code) => code.handle(self, info), _ => Err("Cannot handle node".into()), } } @@ -161,17 +162,22 @@ impl<'a> State<'a> { match char_at_pos { Some('!'..='/') | Some(':'..='@') | Some('['..='`') | Some('{'..='~') => { if let Some(encode) = &config.encode { - if encode.contains(&char_at_pos.unwrap()) { + let character = char_at_pos.expect("To be a valid char"); + if *encode != character { result.push('\\'); + } else { + let encoded_char = Self::encode_char(character); + result.push_str(&encoded_char); + start += character.len_utf8(); } } else { result.push('\\'); } } Some(character) => { - let hex_code = u32::from(character); - result.push_str(&format!("&#x{:X};", hex_code)); - start += 1; + let encoded_char = Self::encode_char(character); + result.push_str(&encoded_char); + start += character.len_utf8(); } _ => (), }; @@ -182,6 +188,11 @@ impl<'a> State<'a> { result } + fn encode_char(character: char) -> String { + let hex_code = u32::from(character); + format!("&#x{:X};", hex_code) + } + fn compile_pattern(pattern: &mut Unsafe) { if pattern.compiled.is_none() { let mut pattern_to_compile = String::new(); @@ -366,10 +377,16 @@ impl<'a> State<'a> { } fn join_defaults(&self, left: &Node, right: &Node, parent: &T) -> Option { - if format_code_as_indented(right, self) - && (matches!(left, Node::List(_)) || format_code_as_indented(left, self)) - { - return Some(Join::Bool(false)); + if let Node::Code(code) = right { + if format_code_as_indented(code, self) && matches!(left, Node::List(_)) { + return Some(Join::Bool(false)); + } + + if let Node::Code(code) = left { + if format_code_as_indented(code, self) { + return Some(Join::Bool(false)); + } + } } if let Some(spread) = parent.spreadable() { diff --git a/mdast_util_to_markdown/src/util/check_fence.rs b/mdast_util_to_markdown/src/util/check_fence.rs new file mode 100644 index 00000000..f7d03c11 --- /dev/null +++ b/mdast_util_to_markdown/src/util/check_fence.rs @@ -0,0 +1,18 @@ +use alloc::format; + +use crate::{message::Message, state::State}; + +pub fn check_fence(state: &mut State) -> Result { + let marker = state.options.fence; + + if marker != '`' && marker != '~' { + return Err(Message { + reason: format!( + "Cannot serialize code with `{}` for `options.fence`, expected `` ` `` or `~`", + marker + ), + }); + } + + Ok(marker) +} diff --git a/mdast_util_to_markdown/src/util/format_code_as_indented.rs b/mdast_util_to_markdown/src/util/format_code_as_indented.rs index 9167052f..acacf883 100644 --- a/mdast_util_to_markdown/src/util/format_code_as_indented.rs +++ b/mdast_util_to_markdown/src/util/format_code_as_indented.rs @@ -1,18 +1,15 @@ -use markdown::mdast::Node; +use markdown::mdast::Code; use regex::Regex; use crate::state::State; -pub fn format_code_as_indented(node: &Node, _state: &State) -> bool { - if let Node::Code(code) = node { - let white_space = Regex::new(r"[^ \r\n]").unwrap(); - let blank = Regex::new(r"^[\t ]*(?:[\r\n]|$)|(?:^|[\r\n])[\t ]*$").unwrap(); +pub fn format_code_as_indented(code: &Code, state: &State) -> bool { + let white_space = Regex::new(r"[^ \r\n]").unwrap(); + let blank = Regex::new(r"^[\t ]*(?:[\r\n]|$)|(?:^|[\r\n])[\t ]*$").unwrap(); - return !code.value.is_empty() - && code.lang.is_none() - && white_space.is_match(&code.value) - && !blank.is_match(&code.value); - } - - false + !state.options.fences + && !code.value.is_empty() + && code.lang.is_none() + && white_space.is_match(&code.value) + && !blank.is_match(&code.value) } diff --git a/mdast_util_to_markdown/src/util/indent_lines.rs b/mdast_util_to_markdown/src/util/indent_lines.rs new file mode 100644 index 00000000..374c493e --- /dev/null +++ b/mdast_util_to_markdown/src/util/indent_lines.rs @@ -0,0 +1,19 @@ +use alloc::string::String; +use regex::Regex; + +pub fn indent_lines(value: &str, map: fn(&str, usize, bool) -> String) -> String { + let mut result = String::new(); + let mut start = 0; + let mut line = 0; + let eol = Regex::new(r"\r?\n|\r").unwrap(); + for m in eol.captures_iter(value) { + let full_match = m.get(0).unwrap(); + let value_slice = &value[start..full_match.start()]; + result.push_str(&map(value_slice, line, value_slice.is_empty())); + result.push_str(full_match.as_str()); + start = full_match.start() + full_match.len(); + line += 1; + } + result.push_str(&map(&value[start..], line, value.is_empty())); + result +} diff --git a/mdast_util_to_markdown/src/util/longest_char_streak.rs b/mdast_util_to_markdown/src/util/longest_char_streak.rs new file mode 100644 index 00000000..7e30edb5 --- /dev/null +++ b/mdast_util_to_markdown/src/util/longest_char_streak.rs @@ -0,0 +1,44 @@ +pub fn longest_char_streak(haystack: &str, needle: char) -> usize { + let mut max = 0; + let mut chars = haystack.chars(); + + while let Some(char) = chars.next() { + if char == needle { + let mut count = 1; + for char in chars.by_ref() { + if char == needle { + count += 1; + } else { + break; + } + } + max = count.max(max); + } + } + + max +} + +#[cfg(test)] +mod code_handler_tests { + use super::*; + + #[test] + fn longest_streak_tests() { + assert_eq!(longest_char_streak("", 'f'), 0); + assert_eq!(longest_char_streak("foo", 'o'), 2); + assert_eq!(longest_char_streak("fo foo fo", 'o'), 2); + assert_eq!(longest_char_streak("fo foo foo", 'o'), 2); + + assert_eq!(longest_char_streak("fo fooo fo", 'o'), 3); + assert_eq!(longest_char_streak("fo fooo foo", 'o'), 3); + assert_eq!(longest_char_streak("ooo", 'o'), 3); + assert_eq!(longest_char_streak("fo fooo fooooo", 'o'), 5); + + assert_eq!(longest_char_streak("fo fooooo fooo", 'o'), 5); + assert_eq!(longest_char_streak("fo fooooo fooooo", 'o'), 5); + + assert_eq!(longest_char_streak("'`'", '`'), 1); + assert_eq!(longest_char_streak("'`'", '`'), 1); + } +} diff --git a/mdast_util_to_markdown/src/util/mod.rs b/mdast_util_to_markdown/src/util/mod.rs index 3d60ca49..923abe03 100644 --- a/mdast_util_to_markdown/src/util/mod.rs +++ b/mdast_util_to_markdown/src/util/mod.rs @@ -1,8 +1,11 @@ pub mod check_emphasis; +pub mod check_fence; pub mod check_rule; pub mod check_rule_repetition; pub mod check_strong; pub mod format_code_as_indented; pub mod format_heading_as_setext; +pub mod indent_lines; +pub mod longest_char_streak; pub mod pattern_in_scope; pub mod safe; diff --git a/mdast_util_to_markdown/src/util/safe.rs b/mdast_util_to_markdown/src/util/safe.rs index 9a3eaf7d..03a85a88 100644 --- a/mdast_util_to_markdown/src/util/safe.rs +++ b/mdast_util_to_markdown/src/util/safe.rs @@ -4,14 +4,14 @@ use regex::Regex; pub struct SafeConfig<'a> { pub before: &'a str, pub after: &'a str, - pub encode: Option>, + pub encode: Option, } impl<'a> SafeConfig<'a> { pub(crate) fn new( before: Option<&'a str>, after: Option<&'a str>, - encode: Option>, + encode: Option, ) -> Self { SafeConfig { before: before.unwrap_or(""), diff --git a/mdast_util_to_markdown/tests/code.rs b/mdast_util_to_markdown/tests/code.rs new file mode 100644 index 00000000..4fbfa9b1 --- /dev/null +++ b/mdast_util_to_markdown/tests/code.rs @@ -0,0 +1,327 @@ +use markdown::mdast::{Code, Node}; +use mdast_util_to_markdown::to_markdown as to; +use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; + +use mdast_util_to_markdown::Options; +use pretty_assertions::assert_eq; + +#[test] +fn text() { + assert_eq!( + to_md_with_opts( + &Node::Code(Code { + value: String::from("a"), + position: None, + lang: None, + meta: None + }), + &Options { + fences: false, + ..Default::default() + } + ) + .unwrap(), + " a\n", + "should support code w/ a value (indent)" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::from("a"), + position: None, + lang: None, + meta: None + })) + .unwrap(), + "```\na\n```\n", + "should support code w/ a value (fences)" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::new(), + position: None, + lang: Some("a".to_string()), + meta: None + })) + .unwrap(), + "```a\n```\n", + "should support code w/ a lang" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::new(), + position: None, + lang: None, + meta: Some("a".to_string()) + })) + .unwrap(), + "```\n```\n", + "should support (ignore) code w/ only a meta" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::new(), + position: None, + lang: Some("a".to_string()), + meta: Some("b".to_string()) + })) + .unwrap(), + "```a b\n```\n", + "should support code w/ lang and meta" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::new(), + position: None, + lang: Some("a b".to_string()), + meta: None + })) + .unwrap(), + "```a b\n```\n", + "should encode a space in `lang`" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::new(), + position: None, + lang: Some("a\nb".to_string()), + meta: None + })) + .unwrap(), + "```a b\n```\n", + "should encode a line ending in `lang`" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::new(), + position: None, + lang: Some("a`b".to_string()), + meta: None + })) + .unwrap(), + "```a`b\n```\n", + "should encode a grave accent in `lang`" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::new(), + position: None, + lang: Some("a\\-b".to_string()), + meta: None + })) + .unwrap(), + "```a\\\\-b\n```\n", + "should escape a backslash in `lang`" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::new(), + position: None, + lang: Some("x".to_string()), + meta: Some("a b".to_string()) + })) + .unwrap(), + "```x a b\n```\n", + "should not encode a space in `meta`" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::new(), + position: None, + lang: Some("x".to_string()), + meta: Some("a\nb".to_string()) + })) + .unwrap(), + "```x a b\n```\n", + "should encode a line ending in `meta`" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::new(), + position: None, + lang: Some("x".to_string()), + meta: Some("a`b".to_string()) + })) + .unwrap(), + "```x a`b\n```\n", + "should encode a grave accent in `meta`" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::new(), + position: None, + lang: Some("x".to_string()), + meta: Some("a\\-b".to_string()) + })) + .unwrap(), + "```x a\\\\-b\n```\n", + "should escape a backslash in `meta`" + ); + + assert_eq!( + to_md_with_opts( + &Node::Code(Code { + value: String::new(), + position: None, + lang: None, + meta: None + }), + &Options { + fence: '~', + ..Default::default() + } + ) + .unwrap(), + "~~~\n~~~\n", + "should support fenced code w/ tildes when `fence: \"~\"`" + ); + + assert_eq!( + to_md_with_opts( + &Node::Code(Code { + value: String::new(), + position: None, + lang: Some("a`b".to_string()), + meta: None + }), + &Options { + fence: '~', + ..Default::default() + } + ) + .unwrap(), + "~~~a`b\n~~~\n", + "should not encode a grave accent when using tildes for fences" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::from("```\nasd\n```"), + position: None, + lang: None, + meta: None + })) + .unwrap(), + "````\n```\nasd\n```\n````\n", + "should use more grave accents for fences if there are streaks of grave accents in the value (fences)" + ); + + assert_eq!( + to_md_with_opts( + &Node::Code(Code { + value: String::from("~~~\nasd\n~~~"), + position: None, + lang: None, + meta: None + }), + &Options { + fence: '~', + ..Default::default() + } + ) + .unwrap(), + "~~~~\n~~~\nasd\n~~~\n~~~~\n", + "should use more tildes for fences if there are streaks of tildes in the value (fences)" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::from("b"), + position: None, + lang: Some("a".to_string()), + meta: None + })) + .unwrap(), + "```a\nb\n```\n", + "should use a fence if there is an info" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::from(" "), + position: None, + lang: None, + meta: None + })) + .unwrap(), + "```\n \n```\n", + "should use a fence if there is only whitespace" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::from("\na"), + position: None, + lang: None, + meta: None + })) + .unwrap(), + "```\n\na\n```\n", + "should use a fence if there first line is blank (void)" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::from(" \na"), + position: None, + lang: None, + meta: None + })) + .unwrap(), + "```\n \na\n```\n", + "should use a fence if there first line is blank (filled)" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::from("a\n"), + position: None, + lang: None, + meta: None + })) + .unwrap(), + "```\na\n\n```\n", + "should use a fence if there last line is blank (void)" + ); + + assert_eq!( + to(&Node::Code(Code { + value: String::from("a\n "), + position: None, + lang: None, + meta: None + })) + .unwrap(), + "```\na\n \n```\n", + "should use a fence if there last line is blank (filled)" + ); + + assert_eq!( + to_md_with_opts( + &Node::Code(Code { + value: String::from(" a\n\n b"), + position: None, + lang: None, + meta: None + }), + &Options { + fences: false, + ..Default::default() + } + ) + .unwrap(), + " a\n\n b\n", + "should use an indent if the value is indented" + ); +} From b5427775fb34e8ac7319441adaa2901babe6da15 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sat, 7 Sep 2024 14:30:10 +0300 Subject: [PATCH 32/73] Add support for blockquote without full tests support --- .../src/handle/blockquote.rs | 28 ++++++++++ mdast_util_to_markdown/src/handle/mod.rs | 1 + mdast_util_to_markdown/src/parents.rs | 4 +- mdast_util_to_markdown/src/state.rs | 3 +- mdast_util_to_markdown/tests/blockquote.rs | 56 +++++++++++++++++++ 5 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 mdast_util_to_markdown/src/handle/blockquote.rs create mode 100644 mdast_util_to_markdown/tests/blockquote.rs diff --git a/mdast_util_to_markdown/src/handle/blockquote.rs b/mdast_util_to_markdown/src/handle/blockquote.rs new file mode 100644 index 00000000..270206d0 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/blockquote.rs @@ -0,0 +1,28 @@ +use alloc::string::String; +use markdown::mdast::BlockQuote; + +use crate::{ + construct_name::ConstructName, + message::Message, + state::{Info, State}, + util::indent_lines::indent_lines, +}; + +use super::Handle; + +impl Handle for BlockQuote { + fn handle(&self, state: &mut State, _info: &Info) -> Result { + state.enter(ConstructName::Blockquote); + let value = indent_lines(&state.container_flow(self)?, map); + Ok(value) + } +} + +fn map(value: &str, _line: usize, blank: bool) -> String { + let mut result = String::from(">"); + if !blank { + result.push(' '); + } + result.push_str(value); + result +} diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index f3a6e693..77034da3 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -1,6 +1,7 @@ use crate::{message::Message, state::Info, State}; use alloc::string::String; +mod blockquote; mod r#break; mod code; pub mod emphasis; diff --git a/mdast_util_to_markdown/src/parents.rs b/mdast_util_to_markdown/src/parents.rs index 4223109e..a01536a4 100644 --- a/mdast_util_to_markdown/src/parents.rs +++ b/mdast_util_to_markdown/src/parents.rs @@ -1,5 +1,5 @@ use alloc::vec::Vec; -use markdown::mdast::{Emphasis, Heading, List, Node, Paragraph, Root, Strong}; +use markdown::mdast::{BlockQuote, Emphasis, Heading, List, Node, Paragraph, Root, Strong}; pub trait Parent { fn children(&self) -> &Vec; @@ -29,4 +29,4 @@ macro_rules! impl_Parent { } } -impl_Parent!(for Root, Paragraph, Strong, Emphasis, Heading); +impl_Parent!(for Root, Paragraph, Strong, Emphasis, Heading, BlockQuote); diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index d4897e72..d55709a8 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -75,6 +75,7 @@ impl<'a> State<'a> { Node::Html(html) => html.handle(self, info), Node::ThematicBreak(thematic_break) => thematic_break.handle(self, info), Node::Code(code) => code.handle(self, info), + Node::BlockQuote(block_quote) => block_quote.handle(self, info), _ => Err("Cannot handle node".into()), } } @@ -320,7 +321,7 @@ impl<'a> State<'a> { } } - fn container_flow(&mut self, parent: &T, _info: &Info) -> Result { + pub fn container_flow(&mut self, parent: &T) -> Result { let mut results: String = String::new(); let mut children_iter = parent.children().iter().peekable(); let mut index = 0; diff --git a/mdast_util_to_markdown/tests/blockquote.rs b/mdast_util_to_markdown/tests/blockquote.rs new file mode 100644 index 00000000..4dc021b4 --- /dev/null +++ b/mdast_util_to_markdown/tests/blockquote.rs @@ -0,0 +1,56 @@ +use markdown::mdast::{BlockQuote, Node, Paragraph, Text, ThematicBreak}; +use mdast_util_to_markdown::to_markdown as to; + +use pretty_assertions::assert_eq; + +#[test] +fn block_quote() { + assert_eq!( + to(&Node::BlockQuote(BlockQuote { + children: vec![], + position: None, + })) + .unwrap(), + ">\n", + "should support a block quote" + ); + + assert_eq!( + to(&Node::BlockQuote(BlockQuote { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None, + })) + .unwrap(), + "> a\n", + "should support a block quote w/ a child" + ); + + assert_eq!( + to(&Node::BlockQuote(BlockQuote { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }), + Node::ThematicBreak(ThematicBreak { position: None }), + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("b"), + position: None + })], + position: None + }), + ], + position: None, + })) + .unwrap(), + "> a\n>\n> ***\n>\n> b\n", + "should support a block quote" + ); +} From 4969d1ad7b8d9946ea73eff70a08b9a4ce2e59ae Mon Sep 17 00:00:00 2001 From: Bnchi Date: Mon, 9 Sep 2024 18:43:19 +0300 Subject: [PATCH 33/73] Add support for list and list item --- mdast_util_to_markdown/src/configure.rs | 7 +- .../src/handle/blockquote.rs | 20 +- mdast_util_to_markdown/src/handle/break.rs | 10 +- mdast_util_to_markdown/src/handle/code.rs | 10 +- mdast_util_to_markdown/src/handle/emphasis.rs | 12 +- mdast_util_to_markdown/src/handle/heading.rs | 14 +- mdast_util_to_markdown/src/handle/html.rs | 10 +- mdast_util_to_markdown/src/handle/list.rs | 91 +++ .../src/handle/list_item.rs | 92 +++ mdast_util_to_markdown/src/handle/mod.rs | 11 +- .../src/handle/paragraph.rs | 12 +- mdast_util_to_markdown/src/handle/strong.rs | 12 +- mdast_util_to_markdown/src/handle/text.rs | 10 +- .../src/handle/thematic_break.rs | 10 +- mdast_util_to_markdown/src/lib.rs | 6 +- mdast_util_to_markdown/src/parents.rs | 32 - mdast_util_to_markdown/src/state.rs | 113 +-- .../src/util/check_bullet.rs | 18 + .../src/util/check_bullet_ordered.rs | 18 + .../src/util/check_bullet_other.rs | 30 + .../src/util/indent_lines.rs | 2 +- mdast_util_to_markdown/src/util/mod.rs | 3 + mdast_util_to_markdown/tests/list.rs | 704 ++++++++++++++++++ 23 files changed, 1128 insertions(+), 119 deletions(-) create mode 100644 mdast_util_to_markdown/src/handle/list.rs create mode 100644 mdast_util_to_markdown/src/handle/list_item.rs delete mode 100644 mdast_util_to_markdown/src/parents.rs create mode 100644 mdast_util_to_markdown/src/util/check_bullet.rs create mode 100644 mdast_util_to_markdown/src/util/check_bullet_ordered.rs create mode 100644 mdast_util_to_markdown/src/util/check_bullet_other.rs create mode 100644 mdast_util_to_markdown/tests/list.rs diff --git a/mdast_util_to_markdown/src/configure.rs b/mdast_util_to_markdown/src/configure.rs index 6fa62807..0960ea84 100644 --- a/mdast_util_to_markdown/src/configure.rs +++ b/mdast_util_to_markdown/src/configure.rs @@ -2,7 +2,7 @@ pub struct Options { pub bullet: char, pub bullet_other: char, - pub bullet_orderd: char, + pub bullet_ordered: char, pub emphasis: char, pub fence: char, pub fences: bool, @@ -20,6 +20,7 @@ pub struct Options { } #[allow(dead_code)] +#[derive(Copy, Clone)] pub enum IndentOptions { Mixed, One, @@ -31,11 +32,11 @@ impl Default for Options { Self { bullet: '*', bullet_other: '-', - bullet_orderd: '.', + bullet_ordered: '.', emphasis: '*', fence: '`', fences: true, - increment_list_marker: false, + increment_list_marker: true, rule_repetition: 3, list_item_indent: IndentOptions::One, quote: '"', diff --git a/mdast_util_to_markdown/src/handle/blockquote.rs b/mdast_util_to_markdown/src/handle/blockquote.rs index 270206d0..806e58c6 100644 --- a/mdast_util_to_markdown/src/handle/blockquote.rs +++ b/mdast_util_to_markdown/src/handle/blockquote.rs @@ -1,5 +1,5 @@ use alloc::string::String; -use markdown::mdast::BlockQuote; +use markdown::mdast::{BlockQuote, Node}; use crate::{ construct_name::ConstructName, @@ -11,17 +11,27 @@ use crate::{ use super::Handle; impl Handle for BlockQuote { - fn handle(&self, state: &mut State, _info: &Info) -> Result { + fn handle( + &self, + state: &mut State, + _info: &Info, + _parent: Option<&Node>, + node: &Node, + ) -> Result { state.enter(ConstructName::Blockquote); - let value = indent_lines(&state.container_flow(self)?, map); + let value = indent_lines(&state.container_flow(node)?, map); Ok(value) } } fn map(value: &str, _line: usize, blank: bool) -> String { - let mut result = String::from(">"); + let marker = ">"; + let total_allocation = marker.len() + value.len() + 1; + let mut result = String::with_capacity(total_allocation); + result.push_str(marker); if !blank { - result.push(' '); + let blank_str = " "; + result.push_str(blank_str); } result.push_str(value); result diff --git a/mdast_util_to_markdown/src/handle/break.rs b/mdast_util_to_markdown/src/handle/break.rs index fe3aff3b..f989e3b9 100644 --- a/mdast_util_to_markdown/src/handle/break.rs +++ b/mdast_util_to_markdown/src/handle/break.rs @@ -1,5 +1,5 @@ use alloc::string::ToString; -use markdown::mdast::Break; +use markdown::mdast::{Break, Node}; use regex::Regex; use crate::{ @@ -11,7 +11,13 @@ use crate::{ use super::Handle; impl Handle for Break { - fn handle(&self, state: &mut State, info: &Info) -> Result { + fn handle( + &self, + state: &mut State, + info: &Info, + _parent: Option<&Node>, + _node: &Node, + ) -> Result { for pattern in state.r#unsafe.iter() { if pattern.character == '\n' && pattern_in_scope(&state.stack, pattern) { let regex = Regex::new(r"[ \t]").unwrap(); diff --git a/mdast_util_to_markdown/src/handle/code.rs b/mdast_util_to_markdown/src/handle/code.rs index bbca12f0..9e0d45a0 100644 --- a/mdast_util_to_markdown/src/handle/code.rs +++ b/mdast_util_to_markdown/src/handle/code.rs @@ -2,7 +2,7 @@ use alloc::{ format, string::{String, ToString}, }; -use markdown::mdast::Code; +use markdown::mdast::{Code, Node}; use crate::{ construct_name::ConstructName, @@ -17,7 +17,13 @@ use crate::{ use super::Handle; impl Handle for Code { - fn handle(&self, state: &mut State, _info: &Info) -> Result { + fn handle( + &self, + state: &mut State, + _info: &Info, + _parent: Option<&Node>, + _node: &Node, + ) -> Result { let marker = check_fence(state)?; if format_code_as_indented(self, state) { diff --git a/mdast_util_to_markdown/src/handle/emphasis.rs b/mdast_util_to_markdown/src/handle/emphasis.rs index febc40c4..e0864785 100644 --- a/mdast_util_to_markdown/src/handle/emphasis.rs +++ b/mdast_util_to_markdown/src/handle/emphasis.rs @@ -1,5 +1,5 @@ use alloc::format; -use markdown::mdast::Emphasis; +use markdown::mdast::{Emphasis, Node}; use crate::{ construct_name::ConstructName, @@ -11,12 +11,18 @@ use crate::{ use super::Handle; impl Handle for Emphasis { - fn handle(&self, state: &mut State, info: &Info) -> Result { + fn handle( + &self, + state: &mut State, + info: &Info, + _parent: Option<&Node>, + node: &Node, + ) -> Result { let marker = check_emphasis(state)?; state.enter(ConstructName::Emphasis); - let mut value = format!("{}{}", marker, state.container_phrasing(self, info)?); + let mut value = format!("{}{}", marker, state.container_phrasing(node, info)?); value.push(marker); state.exit(); diff --git a/mdast_util_to_markdown/src/handle/heading.rs b/mdast_util_to_markdown/src/handle/heading.rs index e63e2569..f385796a 100644 --- a/mdast_util_to_markdown/src/handle/heading.rs +++ b/mdast_util_to_markdown/src/handle/heading.rs @@ -1,5 +1,5 @@ use alloc::format; -use markdown::mdast::Heading; +use markdown::mdast::{Heading, Node}; use regex::Regex; use crate::{ @@ -12,13 +12,19 @@ use crate::{ use super::Handle; impl Handle for Heading { - fn handle(&self, state: &mut State, _info: &Info) -> Result { + fn handle( + &self, + state: &mut State, + _info: &Info, + _parent: Option<&Node>, + node: &Node, + ) -> Result { let rank = self.depth.clamp(1, 6); if format_heading_as_setext(self, state) { state.enter(ConstructName::HeadingSetext); state.enter(ConstructName::Phrasing); - let mut value = state.container_phrasing(self, &Info::new("\n", "\n"))?; + let mut value = state.container_phrasing(node, &Info::new("\n", "\n"))?; state.exit(); state.exit(); @@ -46,7 +52,7 @@ impl Handle for Heading { state.enter(ConstructName::HeadingAtx); state.enter(ConstructName::Phrasing); - let mut value = state.container_phrasing(self, &Info::new("# ", "\n"))?; + let mut value = state.container_phrasing(node, &Info::new("# ", "\n"))?; let tab_or_space_regex = Regex::new(r"^[\t ]").unwrap(); if tab_or_space_regex.is_match(&value) { diff --git a/mdast_util_to_markdown/src/handle/html.rs b/mdast_util_to_markdown/src/handle/html.rs index 352dbf49..32ed6bd9 100644 --- a/mdast_util_to_markdown/src/handle/html.rs +++ b/mdast_util_to_markdown/src/handle/html.rs @@ -1,4 +1,4 @@ -use markdown::mdast::Html; +use markdown::mdast::{Html, Node}; use crate::{ message::Message, @@ -8,7 +8,13 @@ use crate::{ use super::Handle; impl Handle for Html { - fn handle(&self, _state: &mut State, _info: &Info) -> Result { + fn handle( + &self, + _state: &mut State, + _info: &Info, + _parent: Option<&Node>, + _node: &Node, + ) -> Result { Ok(self.value.clone()) } } diff --git a/mdast_util_to_markdown/src/handle/list.rs b/mdast_util_to_markdown/src/handle/list.rs new file mode 100644 index 00000000..e58b201f --- /dev/null +++ b/mdast_util_to_markdown/src/handle/list.rs @@ -0,0 +1,91 @@ +use markdown::mdast::{List, Node}; + +use crate::{ + construct_name::ConstructName, + message::Message, + state::{Info, State}, + util::{ + check_bullet::check_bullet, check_bullet_ordered::check_bullet_ordered, + check_bullet_other::check_bullet_other, check_rule::check_rule, + }, +}; + +use super::Handle; + +impl Handle for List { + fn handle( + &self, + state: &mut State, + _info: &Info, + _parent: Option<&Node>, + node: &Node, + ) -> Result { + state.enter(ConstructName::List); + let bullet_current = state.bullet_current; + + let mut bullet = if self.ordered { + check_bullet_ordered(state)? + } else { + check_bullet(state)? + }; + + let bullet_other = if self.ordered { + if bullet == '.' { + ')' + } else { + '.' + } + } else { + check_bullet_other(state)? + }; + + let mut use_different_marker = false; + if let Some(bullet_last_used) = state.bullet_last_used { + use_different_marker = bullet == bullet_last_used; + } + + if !self.ordered && !self.children.is_empty() { + let is_valid_bullet = bullet == '*' || bullet == '-'; + let first_child_has_no_children = self.children[0].children().is_none(); + let is_within_bounds = state.stack.len() >= 4 && state.index_stack.len() >= 3; + if is_valid_bullet + && first_child_has_no_children + && is_within_bounds + && state.stack[state.stack.len() - 1] == ConstructName::List + && state.stack[state.stack.len() - 2] == ConstructName::ListItem + && state.stack[state.stack.len() - 3] == ConstructName::List + && state.stack[state.stack.len() - 4] == ConstructName::ListItem + && state.index_stack[state.index_stack.len() - 1] == 0 + && state.index_stack[state.index_stack.len() - 2] == 0 + && state.index_stack[state.index_stack.len() - 3] == 0 + { + use_different_marker = true; + } + } + + if check_rule(state)? == bullet { + for child in self.children.iter() { + if let Some(child_children) = child.children() { + if !child_children.is_empty() + && matches!(child, Node::ListItem(_)) + && matches!(child_children[0], Node::ThematicBreak(_)) + { + use_different_marker = true; + break; + } + } + } + } + + if use_different_marker { + bullet = bullet_other; + } + + state.bullet_current = Some(bullet); + let value = state.container_flow(node)?; + state.bullet_last_used = Some(bullet); + state.bullet_current = bullet_current; + state.exit(); + Ok(value) + } +} diff --git a/mdast_util_to_markdown/src/handle/list_item.rs b/mdast_util_to_markdown/src/handle/list_item.rs new file mode 100644 index 00000000..1f3fff15 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/list_item.rs @@ -0,0 +1,92 @@ +use alloc::{ + format, + string::{String, ToString}, +}; +use markdown::mdast::{ListItem, Node}; + +use crate::{ + configure::IndentOptions, + construct_name::ConstructName, + message::Message, + state::{Info, State}, + util::{check_bullet::check_bullet, indent_lines::indent_lines}, +}; + +use super::Handle; + +impl Handle for ListItem { + fn handle( + &self, + state: &mut State, + _info: &Info, + parent: Option<&Node>, + node: &Node, + ) -> Result { + let list_item_indent = state.options.list_item_indent; + let mut bullet = state + .bullet_current + .unwrap_or(check_bullet(state)?) + .to_string(); + + // This is equal to bullet.len() + 1, since we know bullet is always one byte long we can + // safely assign 2 to size. + let mut size = 2; + if let Some(Node::List(list)) = parent { + if list.ordered { + let bullet_number = if let Some(start) = list.start { + start as usize + } else { + 1 + }; + + if state.options.increment_list_marker { + if let Some(position_node) = list.children.iter().position(|x| *x == *node) { + bullet = format!("{}{}", bullet_number + position_node, bullet); + } + } else { + bullet = format!("{}{}", bullet_number, bullet); + } + } + + size = bullet.len() + 1; + + if matches!(list_item_indent, IndentOptions::Tab) + || (matches!(list_item_indent, IndentOptions::Mixed) && list.spread || self.spread) + { + size = compute_size(size); + } + } + + state.enter(ConstructName::ListItem); + + let value = indent_lines(&state.container_flow(node)?, |line, index, blank| { + if index > 0 { + if blank { + String::new() + } else { + let blank = " ".repeat(size); + let mut result = String::with_capacity(blank.len() + line.len()); + result.push_str(&blank); + result.push_str(line); + result + } + } else if blank { + bullet.clone() + } else { + // size - bullet.len() will never panic because size > bullet.len() always. + let blank = " ".repeat(size - bullet.len()); + let mut result = String::with_capacity(blank.len() + line.len() + bullet.len()); + result.push_str(&bullet); + result.push_str(&blank); + result.push_str(line); + result + } + }); + + Ok(value) + } +} + +fn compute_size(a: usize) -> usize { + ((a + 4 - 1) / 4) * 4 +} diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index 77034da3..6c53053f 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -1,5 +1,6 @@ use crate::{message::Message, state::Info, State}; use alloc::string::String; +use markdown::mdast::Node; mod blockquote; mod r#break; @@ -7,11 +8,19 @@ mod code; pub mod emphasis; mod heading; pub mod html; +mod list; +mod list_item; mod paragraph; pub mod strong; mod text; mod thematic_break; pub trait Handle { - fn handle(&self, state: &mut State, info: &Info) -> Result; + fn handle( + &self, + state: &mut State, + info: &Info, + parent: Option<&Node>, + _node: &Node, + ) -> Result; } diff --git a/mdast_util_to_markdown/src/handle/paragraph.rs b/mdast_util_to_markdown/src/handle/paragraph.rs index 5b00bec6..1ada03b9 100644 --- a/mdast_util_to_markdown/src/handle/paragraph.rs +++ b/mdast_util_to_markdown/src/handle/paragraph.rs @@ -1,4 +1,4 @@ -use markdown::mdast::Paragraph; +use markdown::mdast::{Node, Paragraph}; use crate::{ construct_name::ConstructName, @@ -9,11 +9,17 @@ use crate::{ use super::Handle; impl Handle for Paragraph { - fn handle(&self, state: &mut State, info: &Info) -> Result { + fn handle( + &self, + state: &mut State, + info: &Info, + _parent: Option<&Node>, + node: &Node, + ) -> Result { state.enter(ConstructName::Paragraph); state.enter(ConstructName::Phrasing); - let value = state.container_phrasing(self, info)?; + let value = state.container_phrasing(node, info)?; // exit phrasing state.exit(); // exit paragarph diff --git a/mdast_util_to_markdown/src/handle/strong.rs b/mdast_util_to_markdown/src/handle/strong.rs index 32a99b7b..928f343b 100644 --- a/mdast_util_to_markdown/src/handle/strong.rs +++ b/mdast_util_to_markdown/src/handle/strong.rs @@ -1,5 +1,5 @@ use alloc::format; -use markdown::mdast::Strong; +use markdown::mdast::{Node, Strong}; use crate::{ construct_name::ConstructName, @@ -11,7 +11,13 @@ use crate::{ use super::Handle; impl Handle for Strong { - fn handle(&self, state: &mut State, info: &Info) -> Result { + fn handle( + &self, + state: &mut State, + info: &Info, + _parent: Option<&Node>, + node: &Node, + ) -> Result { let marker = check_strong(state)?; state.enter(ConstructName::Strong); @@ -20,7 +26,7 @@ impl Handle for Strong { "{}{}{}", marker, marker, - state.container_phrasing(self, info)? + state.container_phrasing(node, info)? ); value.push(marker); value.push(marker); diff --git a/mdast_util_to_markdown/src/handle/text.rs b/mdast_util_to_markdown/src/handle/text.rs index cada9306..78331ffe 100644 --- a/mdast_util_to_markdown/src/handle/text.rs +++ b/mdast_util_to_markdown/src/handle/text.rs @@ -1,4 +1,4 @@ -use markdown::mdast::Text; +use markdown::mdast::{Node, Text}; use crate::{ message::Message, @@ -9,7 +9,13 @@ use crate::{ use super::Handle; impl Handle for Text { - fn handle(&self, state: &mut State, info: &Info) -> Result { + fn handle( + &self, + state: &mut State, + info: &Info, + _parent: Option<&Node>, + _node: &Node, + ) -> Result { Ok(state.safe( &self.value, &SafeConfig::new(Some(info.before), Some(info.after), None), diff --git a/mdast_util_to_markdown/src/handle/thematic_break.rs b/mdast_util_to_markdown/src/handle/thematic_break.rs index 39d77667..23f98fda 100644 --- a/mdast_util_to_markdown/src/handle/thematic_break.rs +++ b/mdast_util_to_markdown/src/handle/thematic_break.rs @@ -1,5 +1,5 @@ use alloc::format; -use markdown::mdast::ThematicBreak; +use markdown::mdast::{Node, ThematicBreak}; use crate::{ message::Message, @@ -10,7 +10,13 @@ use crate::{ use super::Handle; impl Handle for ThematicBreak { - fn handle(&self, state: &mut State, _info: &Info) -> Result { + fn handle( + &self, + state: &mut State, + _info: &Info, + _parent: Option<&Node>, + _node: &Node, + ) -> Result { let marker = check_rule(state)?; let space = if state.options.rule_spaces { " " } else { "" }; let mut value = diff --git a/mdast_util_to_markdown/src/lib.rs b/mdast_util_to_markdown/src/lib.rs index daefb10b..0f56c3e9 100644 --- a/mdast_util_to_markdown/src/lib.rs +++ b/mdast_util_to_markdown/src/lib.rs @@ -1,7 +1,7 @@ #![no_std] use alloc::string::String; -pub use configure::Options; +pub use configure::{IndentOptions, Options}; use markdown::mdast::Node; use message::Message; use state::{Info, State}; @@ -11,7 +11,6 @@ mod configure; mod construct_name; mod handle; mod message; -mod parents; mod state; mod r#unsafe; mod util; @@ -22,12 +21,13 @@ pub fn to_markdown(tree: &Node) -> Result { pub fn to_markdown_with_options(tree: &Node, options: &Options) -> Result { let mut state = State::new(options); - let mut result = state.handle(tree, &Info::new("\n", "\n"))?; + let mut result = state.handle(tree, &Info::new("\n", "\n"), None)?; if !result.is_empty() { let last_char = result.chars().last().unwrap(); if last_char != '\n' && last_char != '\r' { result.push('\n'); } } + Ok(result) } diff --git a/mdast_util_to_markdown/src/parents.rs b/mdast_util_to_markdown/src/parents.rs deleted file mode 100644 index a01536a4..00000000 --- a/mdast_util_to_markdown/src/parents.rs +++ /dev/null @@ -1,32 +0,0 @@ -use alloc::vec::Vec; -use markdown::mdast::{BlockQuote, Emphasis, Heading, List, Node, Paragraph, Root, Strong}; - -pub trait Parent { - fn children(&self) -> &Vec; - - fn spreadable(&self) -> Option { - None - } -} - -impl Parent for List { - fn children(&self) -> &Vec { - &self.children - } - - fn spreadable(&self) -> Option { - Some(self.spread) - } -} - -macro_rules! impl_Parent { - (for $($t:ty),+) => { - $(impl Parent for $t { - fn children(&self) -> &Vec { - &self.children - } - })* - } -} - -impl_Parent!(for Root, Paragraph, Strong, Emphasis, Heading, BlockQuote); diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index d55709a8..c97dc778 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -6,7 +6,6 @@ use crate::handle::Handle; use crate::message::Message; use crate::Options; use crate::{ - parents::Parent, r#unsafe::Unsafe, util::{ format_code_as_indented::format_code_as_indented, @@ -19,16 +18,20 @@ use alloc::{collections::BTreeMap, format, string::String, vec::Vec}; use markdown::mdast::Node; use regex::Regex; +#[allow(dead_code)] +#[derive(Debug)] enum Join { + True, + False, Number(usize), - Bool(bool), } #[allow(dead_code)] pub struct State<'a> { pub stack: Vec, - index_stack: Vec, - bullet_last_used: Option, + pub index_stack: Vec, + pub bullet_last_used: Option, + pub bullet_current: Option, pub r#unsafe: Vec>, pub options: &'a Options, } @@ -51,6 +54,7 @@ impl<'a> State<'a> { stack: Vec::new(), index_stack: Vec::new(), bullet_last_used: None, + bullet_current: None, r#unsafe: Unsafe::get_default_unsafe(), options, } @@ -64,18 +68,25 @@ impl<'a> State<'a> { self.stack.pop(); } - pub fn handle(&mut self, node: &Node, info: &Info) -> Result { + pub fn handle( + &mut self, + node: &Node, + info: &Info, + parent: Option<&Node>, + ) -> Result { match node { - Node::Paragraph(paragraph) => paragraph.handle(self, info), - Node::Text(text) => text.handle(self, info), - Node::Strong(strong) => strong.handle(self, info), - Node::Emphasis(emphasis) => emphasis.handle(self, info), - Node::Heading(heading) => heading.handle(self, info), - Node::Break(r#break) => r#break.handle(self, info), - Node::Html(html) => html.handle(self, info), - Node::ThematicBreak(thematic_break) => thematic_break.handle(self, info), - Node::Code(code) => code.handle(self, info), - Node::BlockQuote(block_quote) => block_quote.handle(self, info), + Node::Paragraph(paragraph) => paragraph.handle(self, info, parent, node), + Node::Text(text) => text.handle(self, info, parent, node), + Node::Strong(strong) => strong.handle(self, info, parent, node), + Node::Emphasis(emphasis) => emphasis.handle(self, info, parent, node), + Node::Heading(heading) => heading.handle(self, info, parent, node), + Node::Break(r#break) => r#break.handle(self, info, parent, node), + Node::Html(html) => html.handle(self, info, parent, node), + Node::ThematicBreak(thematic_break) => thematic_break.handle(self, info, parent, node), + Node::Code(code) => code.handle(self, info, parent, node), + Node::BlockQuote(block_quote) => block_quote.handle(self, info, parent, node), + Node::List(list) => list.handle(self, info, parent, node), + Node::ListItem(list_item) => list_item.handle(self, info, parent, node), _ => Err("Cannot handle node".into()), } } @@ -249,16 +260,14 @@ impl<'a> State<'a> { } } - pub fn container_phrasing( - &mut self, - parent: &T, - info: &Info, - ) -> Result { + pub fn container_phrasing(&mut self, parent: &Node, info: &Info) -> Result { + let children = parent.children().expect("To be a parent."); + let mut results: String = String::new(); - let mut children_iter = parent.children().iter().peekable(); let mut index = 0; + let mut children_iter = children.iter().peekable(); - if !parent.children().is_empty() { + if !children.is_empty() { self.index_stack.push(0); } @@ -278,7 +287,7 @@ impl<'a> State<'a> { new_info.after = first_char.encode_utf8(&mut buffer); } else { new_info.after = self - .handle(child, &Info::new("", ""))? + .handle(child, &Info::new("", ""), Some(parent))? .chars() .nth(0) .unwrap_or_default() @@ -303,7 +312,7 @@ impl<'a> State<'a> { } } - results.push_str(&self.handle(child, &new_info)?); + results.push_str(&self.handle(child, &new_info, Some(parent))?); index += 1; } @@ -321,12 +330,14 @@ impl<'a> State<'a> { } } - pub fn container_flow(&mut self, parent: &T) -> Result { + pub fn container_flow(&mut self, parent: &Node) -> Result { + let children = parent.children().expect("To be a parent."); + let mut results: String = String::new(); - let mut children_iter = parent.children().iter().peekable(); + let mut children_iter = children.iter().peekable(); let mut index = 0; - if !parent.children().is_empty() { + if !children.is_empty() { self.index_stack.push(0); } @@ -343,7 +354,7 @@ impl<'a> State<'a> { self.bullet_last_used = None; } - results.push_str(&self.handle(child, &Info::new("\n", "\n"))?); + results.push_str(&self.handle(child, &Info::new("\n", "\n"), Some(parent))?); if let Some(next_child) = children_iter.peek() { self.set_between(child, next_child, parent, &mut results); @@ -357,60 +368,60 @@ impl<'a> State<'a> { Ok(results) } - fn set_between(&self, left: &Node, right: &Node, parent: &T, results: &mut String) { + fn set_between(&self, left: &Node, right: &Node, parent: &Node, results: &mut String) { match self.join_defaults(left, right, parent) { - Some(Join::Number(num)) => { - if num == 1 { - results.push_str("\n\n"); - } else { - results.push_str("\n".repeat(1 + num).as_ref()); - } + Join::Number(n) => { + results.push_str("\n".repeat(1 + n).as_ref()); } - Some(Join::Bool(bool)) => { - if bool { - results.push_str("\n\n"); - } else { - results.push_str("\n\n\n\n"); - } + Join::False => { + results.push_str("\n\n\n\n"); } - None => results.push_str("\n\n"), + Join::True => results.push_str("\n\n"), } } - fn join_defaults(&self, left: &Node, right: &Node, parent: &T) -> Option { + fn join_defaults(&self, left: &Node, right: &Node, parent: &Node) -> Join { if let Node::Code(code) = right { if format_code_as_indented(code, self) && matches!(left, Node::List(_)) { - return Some(Join::Bool(false)); + return Join::False; } if let Node::Code(code) = left { if format_code_as_indented(code, self) { - return Some(Join::Bool(false)); + return Join::False; } } } - if let Some(spread) = parent.spreadable() { + if matches!(parent, Node::List(_) | Node::ListItem(_)) { if matches!(left, Node::Paragraph(_)) && Self::matches((left, right)) || matches!(right, Node::Definition(_)) { - return None; + return Join::True; } if let Node::Heading(heading) = right { if format_heading_as_setext(heading, self) { - return None; + return Join::True; } } + let spread = if let Node::List(list) = parent { + list.spread + } else if let Node::ListItem(list_item) = parent { + list_item.spread + } else { + false + }; + if spread { - return Some(Join::Number(1)); + return Join::Number(1); } - return Some(Join::Number(0)); + return Join::Number(0); } - Some(Join::Bool(true)) + Join::True } fn matches(nodes: (&Node, &Node)) -> bool { diff --git a/mdast_util_to_markdown/src/util/check_bullet.rs b/mdast_util_to_markdown/src/util/check_bullet.rs new file mode 100644 index 00000000..a2379f2f --- /dev/null +++ b/mdast_util_to_markdown/src/util/check_bullet.rs @@ -0,0 +1,18 @@ +use alloc::format; + +use crate::{message::Message, state::State}; + +pub fn check_bullet(state: &mut State) -> Result { + let marker = state.options.bullet; + + if marker != '*' && marker != '+' && marker != '-' { + return Err(Message { + reason: format!( + "Cannot serialize items with `' {} '` for `options.bullet`, expected `*`, `+`, or `-`", + marker + ), + }); + } + + Ok(marker) +} diff --git a/mdast_util_to_markdown/src/util/check_bullet_ordered.rs b/mdast_util_to_markdown/src/util/check_bullet_ordered.rs new file mode 100644 index 00000000..271bce8e --- /dev/null +++ b/mdast_util_to_markdown/src/util/check_bullet_ordered.rs @@ -0,0 +1,18 @@ +use alloc::format; + +use crate::{message::Message, state::State}; + +pub fn check_bullet_ordered(state: &mut State) -> Result { + let marker = state.options.bullet_ordered; + + if marker != '.' && marker != ')' { + return Err(Message { + reason: format!( + "Cannot serialize items with `' {} '` for `options.bulletOrdered`, expected `.` or `)`", + marker + ), + }); + } + + Ok(marker) +} diff --git a/mdast_util_to_markdown/src/util/check_bullet_other.rs b/mdast_util_to_markdown/src/util/check_bullet_other.rs new file mode 100644 index 00000000..06409422 --- /dev/null +++ b/mdast_util_to_markdown/src/util/check_bullet_other.rs @@ -0,0 +1,30 @@ +use alloc::format; + +use crate::{message::Message, state::State}; + +use super::check_bullet::check_bullet; + +pub fn check_bullet_other(state: &mut State) -> Result { + let bullet = check_bullet(state)?; + let bullet_other = state.options.bullet_other; + + if bullet_other != '*' && bullet_other != '+' && bullet_other != '-' { + return Err(Message { + reason: format!( + "Cannot serialize items with `' {} '` for `options.bullet_other`, expected `*`, `+`, or `-`", + bullet_other + ), + }); + } + + if bullet_other == bullet { + return Err(Message { + reason: format!( + "Expected `bullet` (`' {} '`) and `bullet_other` (`' {} '`) to be different", + bullet, bullet_other + ), + }); + } + + Ok(bullet_other) +} diff --git a/mdast_util_to_markdown/src/util/indent_lines.rs b/mdast_util_to_markdown/src/util/indent_lines.rs index 374c493e..144fed62 100644 --- a/mdast_util_to_markdown/src/util/indent_lines.rs +++ b/mdast_util_to_markdown/src/util/indent_lines.rs @@ -1,7 +1,7 @@ use alloc::string::String; use regex::Regex; -pub fn indent_lines(value: &str, map: fn(&str, usize, bool) -> String) -> String { +pub fn indent_lines(value: &str, map: impl Fn(&str, usize, bool) -> String) -> String { let mut result = String::new(); let mut start = 0; let mut line = 0; diff --git a/mdast_util_to_markdown/src/util/mod.rs b/mdast_util_to_markdown/src/util/mod.rs index 923abe03..f11ea15b 100644 --- a/mdast_util_to_markdown/src/util/mod.rs +++ b/mdast_util_to_markdown/src/util/mod.rs @@ -1,3 +1,6 @@ +pub mod check_bullet; +pub mod check_bullet_ordered; +pub mod check_bullet_other; pub mod check_emphasis; pub mod check_fence; pub mod check_rule; diff --git a/mdast_util_to_markdown/tests/list.rs b/mdast_util_to_markdown/tests/list.rs new file mode 100644 index 00000000..6a9952ef --- /dev/null +++ b/mdast_util_to_markdown/tests/list.rs @@ -0,0 +1,704 @@ +use markdown::mdast::{List, ListItem, Node, Paragraph, Text, ThematicBreak}; +use mdast_util_to_markdown::to_markdown as to; +use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; + +use mdast_util_to_markdown::IndentOptions; +use mdast_util_to_markdown::Options; +use pretty_assertions::assert_eq; + +#[test] +fn list() { + assert_eq!( + to(&Node::List(List { + children: vec![], + position: None, + ordered: false, + start: None, + spread: false + })) + .unwrap(), + "", + "should support an empty list" + ); + + assert_eq!( + to(&Node::List(List { + children: vec![Node::ListItem(ListItem { + children: Vec::new(), + position: None, + spread: false, + checked: None + })], + position: None, + ordered: false, + start: None, + spread: false + })) + .unwrap(), + "*\n", + "should support a list w/ an item" + ); + + assert_eq!( + to(&Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::ThematicBreak(ThematicBreak { position: None })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("b"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }) + ], + position: None, + ordered: false, + start: None, + spread: false + })) + .unwrap(), + "- a\n- ***\n- b\n", + "should support a list w/ items" + ); + + assert_eq!( + to(&Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::ThematicBreak(ThematicBreak { position: None })], + position: None, + spread: false, + checked: None + }), + ], + position: None, + ordered: false, + start: None, + spread: false + })) + .unwrap(), + "- a\n- ***\n", + "should not use blank lines between items for lists w/ `spread: false`" + ); + + assert_eq!( + to(&Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }), + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("b"), + position: None + })], + position: None + }) + ], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::ThematicBreak(ThematicBreak { position: None })], + position: None, + spread: false, + checked: None + }), + ], + position: None, + ordered: false, + start: None, + spread: false + })) + .unwrap(), + "- a\n\n b\n- ***\n", + "should support a list w/ `spread: false`, w/ a spread item" + ); + + assert_eq!( + to(&Node::List(List { + children: vec![Node::ListItem(ListItem { + children: Vec::new(), + position: None, + spread: false, + checked: None + })], + position: None, + ordered: true, + start: None, + spread: false + })) + .unwrap(), + "1.\n", + "should support a list w/ `ordered` and an empty item" + ); + + assert_eq!( + to(&Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::ThematicBreak(ThematicBreak { position: None })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("b"), + position: None + }),], + position: None + })], + position: None, + spread: false, + checked: None + }) + ], + position: None, + ordered: true, + start: None, + spread: false + })) + .unwrap(), + "1. a\n2. ***\n3. b\n", + "should support a list w/ `ordered`" + ); + + assert_eq!( + to(&Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::ThematicBreak(ThematicBreak { position: None })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("b"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }) + ], + position: None, + ordered: true, + start: None, + spread: false + })) + .unwrap(), + "1. a\n2. ***\n3. b\n", + "should support a list w/ `ordered` and `spread: false`" + ); + + assert_eq!( + to_md_with_opts( + &Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::ThematicBreak(ThematicBreak { position: None })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("b"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }) + ], + position: None, + ordered: true, + start: None, + spread: false + }), + &Options { + increment_list_marker: false, + ..Default::default() + } + ) + .unwrap(), + "1. a\n1. ***\n1. b\n", + "should support a list w/ `ordered` when `increment_list_marker: false`" + ); + + assert_eq!( + to_md_with_opts( + &Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::ThematicBreak(ThematicBreak { position: None })], + position: None, + spread: false, + checked: None + }) + ], + position: None, + ordered: true, + start: Some(0), + spread: false + }), + &Options { + list_item_indent: IndentOptions::One, + ..Default::default() + } + ) + .unwrap(), + "0. a\n1. ***\n", + "should support a list w/ `ordered` and `start`" + ); + + assert_eq!( + to_md_with_opts( + &Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("c\nd"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + ], + position: None, + ordered: false, + start: None, + spread: false + }), + &Options { + list_item_indent: IndentOptions::Mixed, + ..Default::default() + } + ) + .unwrap(), + "* a\n b\n* c\n d\n", + "should support a correct prefix and indent `list_item_indent: IndentOptions::Mixed` and a tight list" + ); + + assert_eq!( + to_md_with_opts( + &Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + }),], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("c\nd"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + ], + position: None, + ordered: false, + start: None, + spread:true + }), + &Options { + list_item_indent: IndentOptions::Mixed, + ..Default::default() + } + ) + .unwrap(), + "* a\n b\n\n* c\n d\n", + "should support a correct prefix and indent `list_item_indent: IndentOptions::Mixed` and a tight list" + ); + + assert_eq!( + to_md_with_opts( + &Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + }),], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("c\nd"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + ], + position: None, + ordered: true, + start: Some(9), + spread: false + }), + &Options { + list_item_indent: IndentOptions::One, + ..Default::default() + } + ) + .unwrap(), + "9. a\n b\n10. c\n d\n", + "should support a correct prefix and indent for items 9 and 10 when `list_item_indent: IndentOptions::One`" + ); + + assert_eq!( + to_md_with_opts( + &Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + }),], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("c\nd"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + ], + position: None, + ordered: true, + start: Some(99), + spread: false + }), + &Options { + list_item_indent: IndentOptions::One, + ..Default::default() + } + ) + .unwrap(), + "99. a\n b\n100. c\n d\n", + "should support a correct prefix and indent for items 90 and 100 when `list_item_indent: IndentOptions::One`" + ); + + assert_eq!( + to_md_with_opts( + &Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + }),], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("c\nd"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + ], + position: None, + ordered: true, + start: Some(999), + spread: false + }), + &Options { + list_item_indent: IndentOptions::One, + ..Default::default() + } + ) + .unwrap(), + "999. a\n b\n1000. c\n d\n", + "should support a correct prefix and indent for items 999 and 1000 when `list_item_indent: IndentOptions::One`" + ); + + assert_eq!( + to_md_with_opts( + &Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + }),], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("c\nd"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + ], + position: None, + ordered: true, + start: Some(9), + spread: false + }), + &Options { + list_item_indent: IndentOptions::Tab, + ..Default::default() + } + ) + .unwrap(), + "9. a\n b\n10. c\n d\n", + "should support a correct prefix and indent for items 9 and 10 when `list_item_indent: IndentOptions::Tab`" + ); + + assert_eq!( + to_md_with_opts( + &Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + }),], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("c\nd"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + ], + position: None, + ordered: true, + start: Some(99), + spread: false + }), + &Options { + list_item_indent: IndentOptions::Tab, + ..Default::default() + } + ) + .unwrap(), + "99. a\n b\n100. c\n d\n", + "should support a correct prefix and indent for items 99 and 100 when `list_item_indent: IndentOptions::Tab`" + ); + + assert_eq!( + to_md_with_opts( + &Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + }),], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("c\nd"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + ], + position: None, + ordered: true, + start: Some(999), + spread: false + }), + &Options { + list_item_indent: IndentOptions::Tab, + ..Default::default() + } + ) + .unwrap(), + "999. a\n b\n1000. c\n d\n", + "should support a correct prefix and indent for items 999 and 1000 when `list_item_indent: IndentOptions::Tab`" + ); +} From 04fb9e0bd8b872178f6b70b56ded16d3cb3b0eaa Mon Sep 17 00:00:00 2001 From: Bnchi Date: Thu, 12 Sep 2024 14:55:45 +0300 Subject: [PATCH 34/73] Add support for image --- mdast_util_to_markdown/src/handle/image.rs | 88 +++++++ mdast_util_to_markdown/src/handle/mod.rs | 1 + mdast_util_to_markdown/src/state.rs | 3 + mdast_util_to_markdown/src/unsafe.rs | 2 +- .../src/util/check_quote.rs | 18 ++ mdast_util_to_markdown/src/util/mod.rs | 1 + mdast_util_to_markdown/tests/image.rs | 237 ++++++++++++++++++ 7 files changed, 349 insertions(+), 1 deletion(-) create mode 100644 mdast_util_to_markdown/src/handle/image.rs create mode 100644 mdast_util_to_markdown/src/util/check_quote.rs create mode 100644 mdast_util_to_markdown/tests/image.rs diff --git a/mdast_util_to_markdown/src/handle/image.rs b/mdast_util_to_markdown/src/handle/image.rs new file mode 100644 index 00000000..f3f812d3 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/image.rs @@ -0,0 +1,88 @@ +use alloc::string::String; +use markdown::mdast::{Image, Node}; + +use crate::{ + construct_name::ConstructName, + message::Message, + state::{Info, State}, + util::{check_quote::check_quote, safe::SafeConfig}, +}; + +use super::Handle; + +impl Handle for Image { + fn handle( + &self, + state: &mut State, + _info: &Info, + _parent: Option<&Node>, + _node: &Node, + ) -> Result { + let quote = check_quote(state)?; + state.enter(ConstructName::Image); + state.enter(ConstructName::Label); + + let mut value = String::new(); + + value.push_str("!["); + + value.push_str(&state.safe( + &self.alt, + &SafeConfig::new(Some(value.as_str()), Some("]"), None), + )); + + value.push_str("]("); + state.exit(); + + if self.url.is_empty() && self.title.is_some() + || contain_control_char_or_whitespace(&self.url) + { + state.enter(ConstructName::DestinationLiteral); + value.push('<'); + value.push_str(&state.safe(&self.url, &SafeConfig::new(Some(&value), Some(">"), None))); + value.push('>'); + } else { + state.enter(ConstructName::DestinationRaw); + let after = if self.title.is_some() { " " } else { ")" }; + value.push_str( + &state.safe(&self.url, &SafeConfig::new(Some(&value), Some(after), None)), + ); + } + + state.exit(); + + if let Some(title) = &self.title { + let title_construct_name = if quote == '"' { + ConstructName::TitleQuote + } else { + ConstructName::TitleApostrophe + }; + + state.enter(title_construct_name); + value.push(' '); + value.push(quote); + + let mut before_buffer = [0u8; 4]; + let before = quote.encode_utf8(&mut before_buffer); + value.push_str( + &state.safe(title, &SafeConfig::new(Some(&self.url), Some(before), None)), + ); + + value.push(quote); + state.exit(); + } + + value.push(')'); + state.exit(); + + Ok(value) + } +} + +fn contain_control_char_or_whitespace(value: &str) -> bool { + value.chars().any(|c| c.is_whitespace() || c.is_control()) +} + +pub fn peek_image() -> char { + '!' +} diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index 6c53053f..3d5df688 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -8,6 +8,7 @@ mod code; pub mod emphasis; mod heading; pub mod html; +pub mod image; mod list; mod list_item; mod paragraph; diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index c97dc778..2bb5c708 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -1,6 +1,7 @@ use crate::construct_name::ConstructName; use crate::handle::emphasis::peek_emphasis; use crate::handle::html::peek_html; +use crate::handle::image::peek_image; use crate::handle::strong::peek_strong; use crate::handle::Handle; use crate::message::Message; @@ -87,6 +88,7 @@ impl<'a> State<'a> { Node::BlockQuote(block_quote) => block_quote.handle(self, info, parent, node), Node::List(list) => list.handle(self, info, parent, node), Node::ListItem(list_item) => list_item.handle(self, info, parent, node), + Node::Image(image) => image.handle(self, info, parent, node), _ => Err("Cannot handle node".into()), } } @@ -326,6 +328,7 @@ impl<'a> State<'a> { Node::Strong(_) => Some(peek_strong(self)), Node::Emphasis(_) => Some(peek_emphasis(self)), Node::Html(_) => Some(peek_html()), + Node::Image(_) => Some(peek_image()), _ => None, } } diff --git a/mdast_util_to_markdown/src/unsafe.rs b/mdast_util_to_markdown/src/unsafe.rs index e4375a44..411655ef 100644 --- a/mdast_util_to_markdown/src/unsafe.rs +++ b/mdast_util_to_markdown/src/unsafe.rs @@ -249,7 +249,7 @@ impl<'a> Unsafe<'a> { None, Construct::Single(ConstructName::DestinationLiteral).into(), None, - Some(true), + None, ), Self::new('[', None, None, None, None, Some(true)), Self::new( diff --git a/mdast_util_to_markdown/src/util/check_quote.rs b/mdast_util_to_markdown/src/util/check_quote.rs new file mode 100644 index 00000000..ed3bd04d --- /dev/null +++ b/mdast_util_to_markdown/src/util/check_quote.rs @@ -0,0 +1,18 @@ +use alloc::format; + +use crate::{message::Message, state::State}; + +pub fn check_quote(state: &State) -> Result { + let marker = state.options.quote; + + if marker != '"' && marker != '\'' { + return Err(Message { + reason: format!( + "Cannot serialize title with `' {} '` for `options.quote`, expected `\"`, or `'`", + marker + ), + }); + } + + Ok(marker) +} diff --git a/mdast_util_to_markdown/src/util/mod.rs b/mdast_util_to_markdown/src/util/mod.rs index f11ea15b..49c67e51 100644 --- a/mdast_util_to_markdown/src/util/mod.rs +++ b/mdast_util_to_markdown/src/util/mod.rs @@ -3,6 +3,7 @@ pub mod check_bullet_ordered; pub mod check_bullet_other; pub mod check_emphasis; pub mod check_fence; +pub mod check_quote; pub mod check_rule; pub mod check_rule_repetition; pub mod check_strong; diff --git a/mdast_util_to_markdown/tests/image.rs b/mdast_util_to_markdown/tests/image.rs new file mode 100644 index 00000000..45308da2 --- /dev/null +++ b/mdast_util_to_markdown/tests/image.rs @@ -0,0 +1,237 @@ +use markdown::mdast::{Image, Node}; +use mdast_util_to_markdown::to_markdown as to; +use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; + +use mdast_util_to_markdown::Options; +use pretty_assertions::assert_eq; + +#[test] +fn image() { + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::new(), + title: None + })) + .unwrap(), + "![]()\n", + "should support an image" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::from("a"), + url: String::new(), + title: None + })) + .unwrap(), + "![a]()\n", + "should support `alt`" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::from("a"), + title: None + })) + .unwrap(), + "![](a)\n", + "should support a url" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::new(), + title: Some(String::from("a")) + })) + .unwrap(), + "![](<> \"a\")\n", + "should support a title" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::from("a"), + title: Some(String::from("b")) + })) + .unwrap(), + "![](a \"b\")\n", + "should support a url and title" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::from("b c"), + title: None + })) + .unwrap(), + "![]()\n", + "should support an image w/ enclosed url w/ whitespace in url" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::from("b )\n", + "should escape an opening angle bracket in `url` in an enclosed url" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::from("b >c"), + title: None + })) + .unwrap(), + "![](c>)\n", + "should escape a closing angle bracket in `url` in an enclosed url" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::from("b \\+c"), + title: None + })) + .unwrap(), + "![]()\n", + "should escape a backslash in `url` in an enclosed url" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::from("b\nc"), + title: None + })) + .unwrap(), + "![]()\n", + "should encode a line ending in `url` in an enclosed url" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::from("b(c"), + title: None + })) + .unwrap(), + "![](b\\(c)\n", + "should escape an opening paren in `url` in a raw url" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::from("b)c"), + title: None + })) + .unwrap(), + "![](b\\)c)\n", + "should escape a closing paren in `url` in a raw url" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::from("b\\+c"), + title: None + })) + .unwrap(), + "![](b\\\\+c)\n", + "should escape a backslash in `url` in a raw url" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::from("\x0C"), + title: None + })) + .unwrap(), + "![](<\x0C>)\n", + "should support control characters in images" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::new(), + title: Some(String::from("b\"c")) + })) + .unwrap(), + "![](<> \"b\\\"c\")\n", + "should escape a double quote in `title`" + ); + + assert_eq!( + to(&Node::Image(Image { + position: None, + alt: String::new(), + url: String::new(), + title: Some(String::from("b\\.c")) + })) + .unwrap(), + "![](<> \"b\\\\.c\")\n", + "should escape a backslash in `title`" + ); + + assert_eq!( + to_md_with_opts( + &Node::Image(Image { + position: None, + alt: String::new(), + url: String::new(), + title: Some(String::from("b")) + }), + &Options { + quote: '\'', + ..Default::default() + } + ) + .unwrap(), + "![](<> 'b')\n", + "should support an image w/ title when `quote: \"\'\"`" + ); + + assert_eq!( + to_md_with_opts( + &Node::Image(Image { + position: None, + alt: String::new(), + url: String::new(), + title: Some(String::from("'")) + }), + &Options { + quote: '\'', + ..Default::default() + } + ) + .unwrap(), + "![](<> '\\'')\n", + "should escape a quote in `title` in a title when `quote: \"\'\"`" + ); +} From 7ff2abbb7575e4a2506e321b955adcd1cf9fa401 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sat, 14 Sep 2024 08:27:39 +0300 Subject: [PATCH 35/73] Add support for link --- mdast_util_to_markdown/src/handle/code.rs | 7 +- mdast_util_to_markdown/src/handle/image.rs | 15 +- mdast_util_to_markdown/src/handle/link.rs | 97 +++++ mdast_util_to_markdown/src/handle/mod.rs | 1 + mdast_util_to_markdown/src/handle/text.rs | 5 +- mdast_util_to_markdown/src/state.rs | 3 + .../src/util/format_link.rs | 26 ++ .../src/util/format_link_as_auto_link.rs | 36 ++ .../src/util/longest_char_streak.rs | 2 +- mdast_util_to_markdown/src/util/mod.rs | 1 + mdast_util_to_markdown/src/util/safe.rs | 10 +- mdast_util_to_markdown/tests/link.rs | 393 ++++++++++++++++++ 12 files changed, 568 insertions(+), 28 deletions(-) create mode 100644 mdast_util_to_markdown/src/handle/link.rs create mode 100644 mdast_util_to_markdown/src/util/format_link.rs create mode 100644 mdast_util_to_markdown/src/util/format_link_as_auto_link.rs create mode 100644 mdast_util_to_markdown/tests/link.rs diff --git a/mdast_util_to_markdown/src/handle/code.rs b/mdast_util_to_markdown/src/handle/code.rs index 9e0d45a0..45d6aa09 100644 --- a/mdast_util_to_markdown/src/handle/code.rs +++ b/mdast_util_to_markdown/src/handle/code.rs @@ -48,8 +48,7 @@ impl Handle for Code { }; state.enter(code_fenced_lang_construct); - value - .push_str(&state.safe(lang, &SafeConfig::new(Some(&value), " ".into(), Some('`')))); + value.push_str(&state.safe(lang, &SafeConfig::new(&value, " ", Some('`')))); state.exit(); @@ -63,9 +62,7 @@ impl Handle for Code { state.enter(code_fenced_meta_construct); value.push(' '); - value.push_str( - &state.safe(meta, &SafeConfig::new(Some(&value), "\n".into(), Some('`'))), - ); + value.push_str(&state.safe(meta, &SafeConfig::new(&value, "\n", Some('`')))); state.exit(); } diff --git a/mdast_util_to_markdown/src/handle/image.rs b/mdast_util_to_markdown/src/handle/image.rs index f3f812d3..0a1fc4e7 100644 --- a/mdast_util_to_markdown/src/handle/image.rs +++ b/mdast_util_to_markdown/src/handle/image.rs @@ -26,10 +26,7 @@ impl Handle for Image { value.push_str("!["); - value.push_str(&state.safe( - &self.alt, - &SafeConfig::new(Some(value.as_str()), Some("]"), None), - )); + value.push_str(&state.safe(&self.alt, &SafeConfig::new(value.as_str(), "]", None))); value.push_str("]("); state.exit(); @@ -39,14 +36,12 @@ impl Handle for Image { { state.enter(ConstructName::DestinationLiteral); value.push('<'); - value.push_str(&state.safe(&self.url, &SafeConfig::new(Some(&value), Some(">"), None))); + value.push_str(&state.safe(&self.url, &SafeConfig::new(&value, ">", None))); value.push('>'); } else { state.enter(ConstructName::DestinationRaw); let after = if self.title.is_some() { " " } else { ")" }; - value.push_str( - &state.safe(&self.url, &SafeConfig::new(Some(&value), Some(after), None)), - ); + value.push_str(&state.safe(&self.url, &SafeConfig::new(&value, after, None))); } state.exit(); @@ -64,9 +59,7 @@ impl Handle for Image { let mut before_buffer = [0u8; 4]; let before = quote.encode_utf8(&mut before_buffer); - value.push_str( - &state.safe(title, &SafeConfig::new(Some(&self.url), Some(before), None)), - ); + value.push_str(&state.safe(title, &SafeConfig::new(&self.url, before, None))); value.push(quote); state.exit(); diff --git a/mdast_util_to_markdown/src/handle/link.rs b/mdast_util_to_markdown/src/handle/link.rs new file mode 100644 index 00000000..81afe716 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/link.rs @@ -0,0 +1,97 @@ +use core::mem; + +use alloc::string::String; +use markdown::mdast::{Link, Node}; + +use crate::{ + construct_name::ConstructName, + message::Message, + state::{Info, State}, + util::{ + check_quote::check_quote, format_link_as_auto_link::format_link_as_auto_link, + safe::SafeConfig, + }, +}; + +use super::Handle; + +impl Handle for Link { + fn handle( + &self, + state: &mut State, + _info: &Info, + _parent: Option<&Node>, + node: &Node, + ) -> Result { + let quote = check_quote(state)?; + + if format_link_as_auto_link(self, node, state) { + let old_stack = mem::take(&mut state.stack); + state.enter(ConstructName::Autolink); + let mut value = String::from("<"); + value.push_str(&state.container_phrasing(node, &Info::new(&value, ">"))?); + value.push('>'); + state.exit(); + state.stack = old_stack; + return Ok(value); + } + + state.enter(ConstructName::Link); + state.enter(ConstructName::Label); + let mut value = String::from("["); + value.push_str(&state.container_phrasing(node, &Info::new(&value, "]("))?); + value.push_str("]("); + state.exit(); + + if self.url.is_empty() && self.title.is_some() + || contain_control_char_or_whitespace(&self.url) + { + state.enter(ConstructName::DestinationLiteral); + value.push('<'); + value.push_str(&state.safe(&self.url, &SafeConfig::new(&value, ">", None))); + value.push('>'); + } else { + state.enter(ConstructName::DestinationRaw); + let after = if self.title.is_some() { " " } else { ")" }; + value.push_str(&state.safe(&self.url, &SafeConfig::new(&value, after, None))) + } + + state.exit(); + + if let Some(title) = &self.title { + let title_construct_name = if quote == '"' { + ConstructName::TitleQuote + } else { + ConstructName::TitleApostrophe + }; + + state.enter(title_construct_name); + value.push(' '); + value.push(quote); + + let mut before_buffer = [0u8; 4]; + let before = quote.encode_utf8(&mut before_buffer); + value.push_str(&state.safe(title, &SafeConfig::new(&self.url, before, None))); + + value.push(quote); + state.exit(); + } + + value.push(')'); + state.exit(); + + Ok(value) + } +} + +fn contain_control_char_or_whitespace(value: &str) -> bool { + value.chars().any(|c| c.is_whitespace() || c.is_control()) +} + +pub fn peek_link(link: &Link, node: &Node, state: &State) -> char { + if format_link_as_auto_link(link, node, state) { + '>' + } else { + '[' + } +} diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index 3d5df688..affe5ce5 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -9,6 +9,7 @@ pub mod emphasis; mod heading; pub mod html; pub mod image; +pub mod link; mod list; mod list_item; mod paragraph; diff --git a/mdast_util_to_markdown/src/handle/text.rs b/mdast_util_to_markdown/src/handle/text.rs index 78331ffe..8decb542 100644 --- a/mdast_util_to_markdown/src/handle/text.rs +++ b/mdast_util_to_markdown/src/handle/text.rs @@ -16,9 +16,6 @@ impl Handle for Text { _parent: Option<&Node>, _node: &Node, ) -> Result { - Ok(state.safe( - &self.value, - &SafeConfig::new(Some(info.before), Some(info.after), None), - )) + Ok(state.safe(&self.value, &SafeConfig::new(info.before, info.after, None))) } } diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 2bb5c708..b4a63d64 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -2,6 +2,7 @@ use crate::construct_name::ConstructName; use crate::handle::emphasis::peek_emphasis; use crate::handle::html::peek_html; use crate::handle::image::peek_image; +use crate::handle::link::peek_link; use crate::handle::strong::peek_strong; use crate::handle::Handle; use crate::message::Message; @@ -89,6 +90,7 @@ impl<'a> State<'a> { Node::List(list) => list.handle(self, info, parent, node), Node::ListItem(list_item) => list_item.handle(self, info, parent, node), Node::Image(image) => image.handle(self, info, parent, node), + Node::Link(link) => link.handle(self, info, parent, node), _ => Err("Cannot handle node".into()), } } @@ -329,6 +331,7 @@ impl<'a> State<'a> { Node::Emphasis(_) => Some(peek_emphasis(self)), Node::Html(_) => Some(peek_html()), Node::Image(_) => Some(peek_image()), + Node::Link(link) => Some(peek_link(link, node, self)), _ => None, } } diff --git a/mdast_util_to_markdown/src/util/format_link.rs b/mdast_util_to_markdown/src/util/format_link.rs new file mode 100644 index 00000000..264febf9 --- /dev/null +++ b/mdast_util_to_markdown/src/util/format_link.rs @@ -0,0 +1,26 @@ + +fn format_link_as_auto_link(link: &Link, node: &Node, state: &State) -> bool { + let raw = node.to_string(); + + if let Some(children) = node.children() { + if children.len() != 1 { + return false; + } + + let mail_to = format!("mailto:{}", raw); + let start_with_protocol = RegexBuilder::new("^[a-z][a-z+.-]+:") + .case_insensitive(true) + .build() + .unwrap(); + + return !state.options.resource_link + && !link.url.is_empty() + && link.title.is_none() + && matches!(children[0], Node::Text(_)) + && (raw == link.url || mail_to == link.url) + && start_with_protocol.is_match(&link.url) + && is_valid_url(&link.url); + } + + false +} diff --git a/mdast_util_to_markdown/src/util/format_link_as_auto_link.rs b/mdast_util_to_markdown/src/util/format_link_as_auto_link.rs new file mode 100644 index 00000000..7def88f9 --- /dev/null +++ b/mdast_util_to_markdown/src/util/format_link_as_auto_link.rs @@ -0,0 +1,36 @@ +use alloc::{format, string::ToString}; +use markdown::mdast::{Link, Node}; +use regex::RegexBuilder; + +use crate::state::State; + +pub fn format_link_as_auto_link(link: &Link, node: &Node, state: &State) -> bool { + let raw = node.to_string(); + + if let Some(children) = node.children() { + if children.len() != 1 { + return false; + } + + let mail_to = format!("mailto:{}", raw); + let start_with_protocol = RegexBuilder::new("^[a-z][a-z+.-]+:") + .case_insensitive(true) + .build() + .unwrap(); + + return !state.options.resource_link + && !link.url.is_empty() + && link.title.is_none() + && matches!(children[0], Node::Text(_)) + && (raw == link.url || mail_to == link.url) + && start_with_protocol.is_match(&link.url) + && is_valid_url(&link.url); + } + + false +} + +fn is_valid_url(url: &str) -> bool { + !url.chars() + .any(|c| c.is_whitespace() || c.is_control() || c == '>' || c == '<') +} diff --git a/mdast_util_to_markdown/src/util/longest_char_streak.rs b/mdast_util_to_markdown/src/util/longest_char_streak.rs index 7e30edb5..b5097d55 100644 --- a/mdast_util_to_markdown/src/util/longest_char_streak.rs +++ b/mdast_util_to_markdown/src/util/longest_char_streak.rs @@ -20,7 +20,7 @@ pub fn longest_char_streak(haystack: &str, needle: char) -> usize { } #[cfg(test)] -mod code_handler_tests { +mod longest_char_streak { use super::*; #[test] diff --git a/mdast_util_to_markdown/src/util/mod.rs b/mdast_util_to_markdown/src/util/mod.rs index 49c67e51..c002aee4 100644 --- a/mdast_util_to_markdown/src/util/mod.rs +++ b/mdast_util_to_markdown/src/util/mod.rs @@ -9,6 +9,7 @@ pub mod check_rule_repetition; pub mod check_strong; pub mod format_code_as_indented; pub mod format_heading_as_setext; +pub mod format_link_as_auto_link; pub mod indent_lines; pub mod longest_char_streak; pub mod pattern_in_scope; diff --git a/mdast_util_to_markdown/src/util/safe.rs b/mdast_util_to_markdown/src/util/safe.rs index 03a85a88..4fa33bea 100644 --- a/mdast_util_to_markdown/src/util/safe.rs +++ b/mdast_util_to_markdown/src/util/safe.rs @@ -8,14 +8,10 @@ pub struct SafeConfig<'a> { } impl<'a> SafeConfig<'a> { - pub(crate) fn new( - before: Option<&'a str>, - after: Option<&'a str>, - encode: Option, - ) -> Self { + pub(crate) fn new(before: &'a str, after: &'a str, encode: Option) -> Self { SafeConfig { - before: before.unwrap_or(""), - after: after.unwrap_or(""), + before, + after, encode, } } diff --git a/mdast_util_to_markdown/tests/link.rs b/mdast_util_to_markdown/tests/link.rs new file mode 100644 index 00000000..dfb9cd7c --- /dev/null +++ b/mdast_util_to_markdown/tests/link.rs @@ -0,0 +1,393 @@ +use markdown::mdast::{Link, Node, Text}; +use mdast_util_to_markdown::to_markdown as to; + +use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; +use mdast_util_to_markdown::Options; +use pretty_assertions::assert_eq; + +#[test] +fn text() { + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: String::new(), + title: None + })) + .unwrap(), + "[]()\n", + "should support a link" + ); + + assert_eq!( + to(&Node::Link(Link { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None, + url: String::new(), + title: None + })) + .unwrap(), + "[a]()\n", + "should support children" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: String::from("a"), + title: None + })) + .unwrap(), + "[](a)\n", + "should support a url" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: String::new(), + title: Some(String::from("a")) + })) + .unwrap(), + "[](<> \"a\")\n", + "should support a title" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: String::from("a"), + title: Some(String::from("b")) + })) + .unwrap(), + "[](a \"b\")\n", + "should support a url and title" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: String::from("b c"), + title: None + })) + .unwrap(), + "[]()\n", + "should support a link w/ enclosed url w/ whitespace in url" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: String::from("b )\n", + "should escape an opening angle bracket in `url` in an enclosed url" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: String::from("b >c"), + title: None + })) + .unwrap(), + "[](c>)\n", + "should escape a closing angle bracket in `url` in an enclosed url" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: String::from("b \\+c"), + title: None + })) + .unwrap(), + "[]()\n", + "should escape a backslash in `url` in an enclosed url" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: String::from("b\nc"), + title: None + })) + .unwrap(), + "[]()\n", + "should encode a line ending in `url` in an enclosed url" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: String::from("b(c"), + title: None + })) + .unwrap(), + "[](b\\(c)\n", + "should escape an opening paren in `url` in a raw url" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: String::from("b)c"), + title: None + })) + .unwrap(), + "[](b\\)c)\n", + "should escape a closing paren in `url` in a raw url" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: String::from("b\\.c"), + title: None + })) + .unwrap(), + "[](b\\\\.c)\n", + "should escape a backslash in `url` in a raw url" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: String::from("\x0C"), + title: None + })) + .unwrap(), + "[](<\x0C>)\n", + "should support control characters in links" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: String::new(), + title: Some(String::from("b\\-c")) + })) + .unwrap(), + "[](<> \"b\\\\-c\")\n", + "should escape a backslash in `title`" + ); + + assert_eq!( + to(&Node::Link(Link { + children: vec![Node::Text(Text { + value: String::from("tel:123"), + position: None + })], + position: None, + url: String::from("tel:123"), + title: None + })) + .unwrap(), + "\n", + "should use an autolink for nodes w/ a value similar to the url and a protocol" + ); + + assert_eq!( + to_md_with_opts( + &Node::Link(Link { + children: vec![Node::Text(Text { + value: String::from("tel:123"), + position: None + })], + position: None, + url: String::from("tel:123"), + title: None + }), + &Options { + resource_link: true, + ..Default::default() + } + ) + .unwrap(), + "[tel:123](tel:123)\n", + "should use a resource link (`resourceLink: true`)" + ); + + assert_eq!( + to(&Node::Link(Link { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None, + url: String::from("a"), + title: None + }),) + .unwrap(), + "[a](a)\n", + "should use a normal link for nodes w/ a value similar to the url w/o a protocol" + ); + + assert_eq!( + to(&Node::Link(Link { + children: vec![Node::Text(Text { + value: String::from("tel:123"), + position: None + })], + position: None, + url: String::from("tel:123"), + title: None + }),) + .unwrap(), + "\n", + "should use an autolink for nodes w/ a value similar to the url and a protocol" + ); + + assert_eq!( + to(&Node::Link(Link { + children: vec![Node::Text(Text { + value: String::from("tel:123"), + position: None + })], + position: None, + url: String::from("tel:123"), + title: Some(String::from("a")) + }),) + .unwrap(), + "[tel:123](tel:123 \"a\")\n", + "should use a normal link for nodes w/ a value similar to the url w/ a title" + ); + + assert_eq!( + to(&Node::Link(Link { + children: vec![Node::Text(Text { + value: String::from("a@b.c"), + position: None + })], + position: None, + url: String::from("mailto:a@b.c"), + title: None + }),) + .unwrap(), + "\n", + "should use an autolink for nodes w/ a value similar to the url and a protocol (email)" + ); + + assert_eq!( + to(&Node::Link(Link { + children: vec![Node::Text(Text { + value: String::from("a.b-c_d@a.b"), + position: None + })], + position: None, + url: String::from("mailto:a.b-c_d@a.b"), + title: None + }),) + .unwrap(), + "\n", + "should not escape in autolinks" + ); + + assert_eq!( + to_md_with_opts( + &Node::Link(Link { + children: Vec::new(), + position: None, + url: String::new(), + title: Some("b".to_string()) + }), + &Options { + quote: '\'', + ..Default::default() + } + ) + .unwrap(), + "[](<> 'b')\n", + "should support a link w/ title when `quote: \"\'\"`" + ); + + assert_eq!( + to_md_with_opts( + &Node::Link(Link { + children: Vec::new(), + position: None, + url: String::new(), + title: Some("'".to_string()) + }), + &Options { + quote: '\'', + ..Default::default() + } + ) + .unwrap(), + "[](<> '\\'')\n", + "should escape a quote in `title` in a title when `quote: \"\'\"`'" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: "a b![c](d*e_f[g_h`i".to_string(), + title: None + })) + .unwrap(), + "[]()\n", + "should not escape unneeded characters in a `DestinationLiteral`" + ); + + assert_eq!( + to(&Node::Link(Link { + children: Vec::new(), + position: None, + url: "a![b](c*d_e[f_g`h Date: Sat, 14 Sep 2024 08:49:52 +0300 Subject: [PATCH 36/73] Depend on Regex less in some trivial code --- mdast_util_to_markdown/src/handle/break.rs | 6 ++--- mdast_util_to_markdown/src/handle/heading.rs | 6 ++--- .../src/util/format_link.rs | 26 ------------------- 3 files changed, 5 insertions(+), 33 deletions(-) delete mode 100644 mdast_util_to_markdown/src/util/format_link.rs diff --git a/mdast_util_to_markdown/src/handle/break.rs b/mdast_util_to_markdown/src/handle/break.rs index f989e3b9..5fcc956f 100644 --- a/mdast_util_to_markdown/src/handle/break.rs +++ b/mdast_util_to_markdown/src/handle/break.rs @@ -1,6 +1,5 @@ use alloc::string::ToString; use markdown::mdast::{Break, Node}; -use regex::Regex; use crate::{ message::Message, @@ -20,8 +19,9 @@ impl Handle for Break { ) -> Result { for pattern in state.r#unsafe.iter() { if pattern.character == '\n' && pattern_in_scope(&state.stack, pattern) { - let regex = Regex::new(r"[ \t]").unwrap(); - if regex.is_match(info.before) { + let is_whitespace_or_tab = + info.before.chars().any(|c| c.is_whitespace() || c == '\t'); + if is_whitespace_or_tab { return Ok("".to_string()); } diff --git a/mdast_util_to_markdown/src/handle/heading.rs b/mdast_util_to_markdown/src/handle/heading.rs index f385796a..cb64cf9b 100644 --- a/mdast_util_to_markdown/src/handle/heading.rs +++ b/mdast_util_to_markdown/src/handle/heading.rs @@ -1,6 +1,5 @@ use alloc::format; use markdown::mdast::{Heading, Node}; -use regex::Regex; use crate::{ construct_name::ConstructName, @@ -54,9 +53,8 @@ impl Handle for Heading { let mut value = state.container_phrasing(node, &Info::new("# ", "\n"))?; - let tab_or_space_regex = Regex::new(r"^[\t ]").unwrap(); - if tab_or_space_regex.is_match(&value) { - if let Some(first_char) = value.chars().nth(0) { + if let Some(first_char) = value.chars().nth(0) { + if first_char.is_whitespace() || first_char == '\t' { let hex_code = u32::from(first_char); value = format!("&#x{:X};{}", hex_code, &value[1..]) } diff --git a/mdast_util_to_markdown/src/util/format_link.rs b/mdast_util_to_markdown/src/util/format_link.rs deleted file mode 100644 index 264febf9..00000000 --- a/mdast_util_to_markdown/src/util/format_link.rs +++ /dev/null @@ -1,26 +0,0 @@ - -fn format_link_as_auto_link(link: &Link, node: &Node, state: &State) -> bool { - let raw = node.to_string(); - - if let Some(children) = node.children() { - if children.len() != 1 { - return false; - } - - let mail_to = format!("mailto:{}", raw); - let start_with_protocol = RegexBuilder::new("^[a-z][a-z+.-]+:") - .case_insensitive(true) - .build() - .unwrap(); - - return !state.options.resource_link - && !link.url.is_empty() - && link.title.is_none() - && matches!(children[0], Node::Text(_)) - && (raw == link.url || mail_to == link.url) - && start_with_protocol.is_match(&link.url) - && is_valid_url(&link.url); - } - - false -} From 66f5024d74aa352bbfe24ebfa7794aa2661cf817 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sat, 14 Sep 2024 09:10:12 +0300 Subject: [PATCH 37/73] Few updates --- mdast_util_to_markdown/src/handle/break.rs | 3 +-- mdast_util_to_markdown/src/handle/heading.rs | 2 +- mdast_util_to_markdown/src/handle/mod.rs | 2 +- mdast_util_to_markdown/src/util/format_code_as_indented.rs | 5 ++--- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/mdast_util_to_markdown/src/handle/break.rs b/mdast_util_to_markdown/src/handle/break.rs index 5fcc956f..07b81739 100644 --- a/mdast_util_to_markdown/src/handle/break.rs +++ b/mdast_util_to_markdown/src/handle/break.rs @@ -19,8 +19,7 @@ impl Handle for Break { ) -> Result { for pattern in state.r#unsafe.iter() { if pattern.character == '\n' && pattern_in_scope(&state.stack, pattern) { - let is_whitespace_or_tab = - info.before.chars().any(|c| c.is_whitespace() || c == '\t'); + let is_whitespace_or_tab = info.before.chars().any(|c| c == ' ' || c == '\t'); if is_whitespace_or_tab { return Ok("".to_string()); } diff --git a/mdast_util_to_markdown/src/handle/heading.rs b/mdast_util_to_markdown/src/handle/heading.rs index cb64cf9b..6e547482 100644 --- a/mdast_util_to_markdown/src/handle/heading.rs +++ b/mdast_util_to_markdown/src/handle/heading.rs @@ -54,7 +54,7 @@ impl Handle for Heading { let mut value = state.container_phrasing(node, &Info::new("# ", "\n"))?; if let Some(first_char) = value.chars().nth(0) { - if first_char.is_whitespace() || first_char == '\t' { + if first_char == ' ' || first_char == '\t' { let hex_code = u32::from(first_char); value = format!("&#x{:X};{}", hex_code, &value[1..]) } diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index affe5ce5..67f18344 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -23,6 +23,6 @@ pub trait Handle { state: &mut State, info: &Info, parent: Option<&Node>, - _node: &Node, + node: &Node, ) -> Result; } diff --git a/mdast_util_to_markdown/src/util/format_code_as_indented.rs b/mdast_util_to_markdown/src/util/format_code_as_indented.rs index acacf883..68e318e1 100644 --- a/mdast_util_to_markdown/src/util/format_code_as_indented.rs +++ b/mdast_util_to_markdown/src/util/format_code_as_indented.rs @@ -4,12 +4,11 @@ use regex::Regex; use crate::state::State; pub fn format_code_as_indented(code: &Code, state: &State) -> bool { - let white_space = Regex::new(r"[^ \r\n]").unwrap(); + let non_whitespace = code.value.chars().any(|c| !c.is_whitespace()); let blank = Regex::new(r"^[\t ]*(?:[\r\n]|$)|(?:^|[\r\n])[\t ]*$").unwrap(); - !state.options.fences && !code.value.is_empty() && code.lang.is_none() - && white_space.is_match(&code.value) + && non_whitespace && !blank.is_match(&code.value) } From 9b03c74cef3d8fad5c31b1593e196964de6a12aa Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sat, 14 Sep 2024 14:54:47 +0300 Subject: [PATCH 38/73] Add support for inline code --- .../src/handle/inline_code.rs | 72 +++++++ mdast_util_to_markdown/src/handle/mod.rs | 1 + mdast_util_to_markdown/src/state.rs | 13 +- mdast_util_to_markdown/src/unsafe.rs | 84 ++++---- mdast_util_to_markdown/tests/inline_code.rs | 187 ++++++++++++++++++ 5 files changed, 309 insertions(+), 48 deletions(-) create mode 100644 mdast_util_to_markdown/src/handle/inline_code.rs create mode 100644 mdast_util_to_markdown/tests/inline_code.rs diff --git a/mdast_util_to_markdown/src/handle/inline_code.rs b/mdast_util_to_markdown/src/handle/inline_code.rs new file mode 100644 index 00000000..e8a2d78e --- /dev/null +++ b/mdast_util_to_markdown/src/handle/inline_code.rs @@ -0,0 +1,72 @@ +use alloc::{format, string::String}; +use markdown::mdast::{InlineCode, Node}; +use regex::Regex; + +use crate::{ + message::Message, + state::{Info, State}, +}; + +use super::Handle; + +impl Handle for InlineCode { + fn handle( + &self, + state: &mut State, + _info: &Info, + _parent: Option<&Node>, + _node: &Node, + ) -> Result { + let mut value = self.value.clone(); + let mut sequence = String::from('`'); + let mut grave_accent_match = Regex::new(&format!(r"(^|[^`]){}([^`]|$)", sequence)).unwrap(); + while grave_accent_match.is_match(&value) { + sequence.push('`'); + grave_accent_match = Regex::new(&format!(r"(^|[^`]){}([^`]|$)", sequence)).unwrap(); + } + + let no_whitespaces = !value.chars().all(char::is_whitespace); + let starts_with_whitespace = value.starts_with(char::is_whitespace); + let ends_with_whitespace = value.ends_with(char::is_whitespace); + let starts_with_tick = value.starts_with('`'); + let ends_with_tick = value.ends_with('`'); + if no_whitespaces + && ((starts_with_whitespace && ends_with_whitespace) + || starts_with_tick + || ends_with_tick) + { + value = format!("{}{}{}", ' ', value, ' '); + } + + for pattern in &mut state.r#unsafe { + if !pattern.at_break { + continue; + } + + State::compile_pattern(pattern); + + if let Some(regex) = &pattern.compiled { + while let Some(m) = regex.find(&value) { + let position = m.start(); + + let position = if position > 0 + && &value[position..m.len()] == "\n" + && &value[position - 1..position] == "\r" + { + position - 1 + } else { + position + }; + + value.replace_range(position..m.start() + 1, " "); + } + } + } + + Ok(format!("{}{}{}", sequence, value, sequence)) + } +} + +pub fn peek_inline_code() -> char { + '`' +} diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index 67f18344..6eb45723 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -9,6 +9,7 @@ pub mod emphasis; mod heading; pub mod html; pub mod image; +pub mod inline_code; pub mod link; mod list; mod list_item; diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index b4a63d64..9fe0aaf4 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -2,6 +2,7 @@ use crate::construct_name::ConstructName; use crate::handle::emphasis::peek_emphasis; use crate::handle::html::peek_html; use crate::handle::image::peek_image; +use crate::handle::inline_code::peek_inline_code; use crate::handle::link::peek_link; use crate::handle::strong::peek_strong; use crate::handle::Handle; @@ -91,6 +92,7 @@ impl<'a> State<'a> { Node::ListItem(list_item) => list_item.handle(self, info, parent, node), Node::Image(image) => image.handle(self, info, parent, node), Node::Link(link) => link.handle(self, info, parent, node), + Node::InlineCode(inline_code) => inline_code.handle(self, info, parent, node), _ => Err("Cannot handle node".into()), } } @@ -116,7 +118,7 @@ impl<'a> State<'a> { .map(|captured_group| captured_group.len()) .unwrap_or(0); - let before = pattern.before.is_some() || pattern.at_break.unwrap_or(false); + let before = pattern.before.is_some() || pattern.at_break; let after = pattern.after.is_some(); let position = full_match.start() + if before { captured_group_len } else { 0 }; @@ -209,22 +211,20 @@ impl<'a> State<'a> { format!("&#x{:X};", hex_code) } - fn compile_pattern(pattern: &mut Unsafe) { + pub fn compile_pattern(pattern: &mut Unsafe) { if pattern.compiled.is_none() { let mut pattern_to_compile = String::new(); - let at_break = pattern.at_break.unwrap_or(false); - if let Some(pattern_before) = pattern.before { pattern_to_compile.push('('); - if at_break { + if pattern.at_break { pattern_to_compile.push_str("[\\r\\n][\\t ]*"); } pattern_to_compile.push_str("(?:"); pattern_to_compile.push_str(pattern_before); pattern_to_compile.push(')'); pattern_to_compile.push(')'); - } else if at_break { + } else if pattern.at_break { pattern_to_compile.push('('); pattern_to_compile.push_str("[\\r\\n][\\t ]*"); pattern_to_compile.push(')'); @@ -332,6 +332,7 @@ impl<'a> State<'a> { Node::Html(_) => Some(peek_html()), Node::Image(_) => Some(peek_image()), Node::Link(link) => Some(peek_link(link, node, self)), + Node::InlineCode(_) => Some(peek_inline_code()), _ => None, } } diff --git a/mdast_util_to_markdown/src/unsafe.rs b/mdast_util_to_markdown/src/unsafe.rs index 411655ef..08c76133 100644 --- a/mdast_util_to_markdown/src/unsafe.rs +++ b/mdast_util_to_markdown/src/unsafe.rs @@ -10,7 +10,7 @@ pub struct Unsafe<'a> { pub not_in_construct: Option, pub before: Option<&'a str>, pub after: Option<&'a str>, - pub at_break: Option, + pub at_break: bool, pub(crate) compiled: Option, } @@ -27,7 +27,7 @@ impl<'a> Unsafe<'a> { after: Option<&'a str>, in_construct: Option, not_in_construct: Option, - at_break: Option, + at_break: bool, ) -> Self { Unsafe { character, @@ -57,7 +57,7 @@ impl<'a> Unsafe<'a> { "[\\r\\n]".into(), Construct::Single(ConstructName::Phrasing).into(), None, - None, + false, ), Self::new( '\t', @@ -65,7 +65,7 @@ impl<'a> Unsafe<'a> { None, Construct::Single(ConstructName::Phrasing).into(), None, - None, + false, ), Self::new( '\t', @@ -77,7 +77,7 @@ impl<'a> Unsafe<'a> { ]) .into(), None, - None, + false, ), Self::new( '\r', @@ -93,7 +93,7 @@ impl<'a> Unsafe<'a> { ]) .into(), None, - None, + false, ), Self::new( '\n', @@ -109,7 +109,7 @@ impl<'a> Unsafe<'a> { ]) .into(), None, - None, + false, ), Self::new( ' ', @@ -117,7 +117,7 @@ impl<'a> Unsafe<'a> { "[\\r\\n]".into(), Construct::Single(ConstructName::Phrasing).into(), None, - None, + false, ), Self::new( ' ', @@ -125,7 +125,7 @@ impl<'a> Unsafe<'a> { None, Construct::Single(ConstructName::Phrasing).into(), None, - None, + false, ), Self::new( ' ', @@ -137,7 +137,7 @@ impl<'a> Unsafe<'a> { ]) .into(), None, - None, + false, ), Self::new( '!', @@ -145,7 +145,7 @@ impl<'a> Unsafe<'a> { "\\[".into(), Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), - None, + false, ), Self::new( '\"', @@ -153,16 +153,16 @@ impl<'a> Unsafe<'a> { None, Construct::Single(ConstructName::TitleQuote).into(), None, - None, + false, ), - Self::new('#', None, None, None, None, Some(true)), + Self::new('#', None, None, None, None, true), Self::new( '#', None, "(?:[\r\n]|$)".into(), Construct::Single(ConstructName::HeadingAtx).into(), None, - None, + false, ), Self::new( '&', @@ -170,7 +170,7 @@ impl<'a> Unsafe<'a> { "[#A-Za-z]".into(), Construct::Single(ConstructName::Phrasing).into(), None, - None, + false, ), Self::new( '\'', @@ -178,7 +178,7 @@ impl<'a> Unsafe<'a> { None, Construct::Single(ConstructName::TitleApostrophe).into(), None, - None, + false, ), Self::new( '(', @@ -186,7 +186,7 @@ impl<'a> Unsafe<'a> { None, Construct::Single(ConstructName::DestinationRaw).into(), None, - None, + false, ), Self::new( '(', @@ -194,44 +194,44 @@ impl<'a> Unsafe<'a> { None, Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), - None, + false, ), - Self::new(')', "\\d+".into(), None, None, None, Some(true)), + Self::new(')', "\\d+".into(), None, None, None, true), Self::new( ')', None, None, Construct::Single(ConstructName::DestinationRaw).into(), None, - None, + false, ), - Self::new('*', None, "(?:[ \t\r\n*])".into(), None, None, Some(true)), + Self::new('*', None, "(?:[ \t\r\n*])".into(), None, None, true), Self::new( '*', None, None, Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), - None, + false, ), - Self::new('+', None, "(?:[ \t\r\n])".into(), None, None, Some(true)), - Self::new('-', None, "(?:[ \t\r\n-])".into(), None, None, Some(true)), + Self::new('+', None, "(?:[ \t\r\n])".into(), None, None, true), + Self::new('-', None, "(?:[ \t\r\n-])".into(), None, None, true), Self::new( '.', "\\d+".into(), "(?:[ \t\r\n]|$)".into(), None, None, - Some(true), + true, ), - Self::new('<', None, "[!/?A-Za-z]".into(), None, None, Some(true)), + Self::new('<', None, "[!/?A-Za-z]".into(), None, None, true), Self::new( '<', None, "[!/?A-Za-z]".into(), Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), - None, + false, ), Self::new( '<', @@ -239,26 +239,26 @@ impl<'a> Unsafe<'a> { None, Construct::Single(ConstructName::DestinationLiteral).into(), None, - None, + false, ), - Self::new('=', None, None, None, None, Some(true)), - Self::new('>', None, None, None, None, Some(true)), + Self::new('=', None, None, None, None, true), + Self::new('>', None, None, None, None, true), Self::new( '>', None, None, Construct::Single(ConstructName::DestinationLiteral).into(), None, - None, + false, ), - Self::new('[', None, None, None, None, Some(true)), + Self::new('[', None, None, None, None, true), Self::new( '[', None, None, Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), - None, + false, ), Self::new( '[', @@ -266,7 +266,7 @@ impl<'a> Unsafe<'a> { None, Construct::List(vec![ConstructName::Label, ConstructName::Reference]).into(), None, - None, + false, ), Self::new( '\\', @@ -274,7 +274,7 @@ impl<'a> Unsafe<'a> { "[\\r\\n]".into(), Construct::Single(ConstructName::Phrasing).into(), None, - None, + false, ), Self::new( ']', @@ -282,18 +282,18 @@ impl<'a> Unsafe<'a> { None, Construct::List(vec![ConstructName::Label, ConstructName::Reference]).into(), None, - None, + false, ), - Self::new('_', None, None, None, None, Some(true)), + Self::new('_', None, None, None, None, true), Self::new( '_', None, None, Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), - None, + false, ), - Self::new('`', None, None, None, None, Some(true)), + Self::new('`', None, None, None, None, true), Self::new( '`', None, @@ -304,7 +304,7 @@ impl<'a> Unsafe<'a> { ]) .into(), None, - None, + false, ), Self::new( '`', @@ -312,9 +312,9 @@ impl<'a> Unsafe<'a> { None, Construct::Single(ConstructName::Phrasing).into(), Construct::List(full_phrasing_spans.clone()).into(), - None, + false, ), - Self::new('~', None, None, None, None, Some(true)), + Self::new('~', None, None, None, None, true), ] } diff --git a/mdast_util_to_markdown/tests/inline_code.rs b/mdast_util_to_markdown/tests/inline_code.rs new file mode 100644 index 00000000..62bf2fec --- /dev/null +++ b/mdast_util_to_markdown/tests/inline_code.rs @@ -0,0 +1,187 @@ +use markdown::mdast::{InlineCode, Node}; +use mdast_util_to_markdown::to_markdown as to; + +use pretty_assertions::assert_eq; + +#[test] +fn text() { + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::new(), + position: None + })) + .unwrap(), + "``\n", + "should support an empty code text" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from("a"), + position: None + })) + .unwrap(), + "`a`\n", + "should support a code text" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from(" "), + position: None + })) + .unwrap(), + "` `\n", + "should support a space" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from("\n"), + position: None + })) + .unwrap(), + "`\n`\n", + "should support an eol" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from(" "), + position: None + })) + .unwrap(), + "` `\n", + "should support several spaces" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from("a`b"), + position: None + })) + .unwrap(), + "``a`b``\n", + "should use a fence of two grave accents if the value contains one" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from("a``b"), + position: None + })) + .unwrap(), + "`a``b`\n", + "should use a fence of one grave accent if the value contains two" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from("a``b`c"), + position: None + })) + .unwrap(), + "```a``b`c```\n", + "should use a fence of three grave accents if the value contains two and one" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from("`a"), + position: None + })) + .unwrap(), + "`` `a ``\n", + "should pad w/ a space if the value starts w/ a grave accent" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from("a`"), + position: None + })) + .unwrap(), + "`` a` ``\n", + "should pad w/ a space if the value ends w/ a grave accent" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from(" a "), + position: None + })) + .unwrap(), + "` a `\n", + "should pad w/ a space if the value starts and ends w/ a space" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from(" a"), + position: None + })) + .unwrap(), + "` a`\n", + "should not pad w/ spaces if the value ends w/ a non-space" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from("a "), + position: None + })) + .unwrap(), + "`a `\n", + "should not pad w/ spaces if the value starts w/ a non-space" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from("a\n- b"), + position: None + })) + .unwrap(), + "`a - b`\n", + "should prevent breaking out of code (-)" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from("a\n#"), + position: None + })) + .unwrap(), + "`a #`\n", + "should prevent breaking out of code (#)" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from("a\n1. "), + position: None + })) + .unwrap(), + "`a 1. `\n", + "should prevent breaking out of code (\\d\\.)" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from("a\r- b"), + position: None + })) + .unwrap(), + "`a - b`\n", + "should prevent breaking out of code (cr)" + ); + + assert_eq!( + to(&Node::InlineCode(InlineCode { + value: String::from("a\r\n- b"), + position: None + })) + .unwrap(), + "`a - b`\n", + "should prevent breaking out of code (crlf)" + ); +} From c43f209e4e15636e91ef72ce109e3d3cac7249fd Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sun, 15 Sep 2024 09:34:48 +0300 Subject: [PATCH 39/73] Add support for root --- mdast_util_to_markdown/src/handle/mod.rs | 1 + mdast_util_to_markdown/src/handle/root.rs | 41 +++++++++++++++++++++++ mdast_util_to_markdown/src/state.rs | 1 + mdast_util_to_markdown/tests/break.rs | 28 +++++++++------- mdast_util_to_markdown/tests/core.rs | 33 ++++++++++++++++++ 5 files changed, 91 insertions(+), 13 deletions(-) create mode 100644 mdast_util_to_markdown/src/handle/root.rs create mode 100644 mdast_util_to_markdown/tests/core.rs diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index 6eb45723..4353a8a9 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -14,6 +14,7 @@ pub mod link; mod list; mod list_item; mod paragraph; +mod root; pub mod strong; mod text; mod thematic_break; diff --git a/mdast_util_to_markdown/src/handle/root.rs b/mdast_util_to_markdown/src/handle/root.rs new file mode 100644 index 00000000..c3ce928c --- /dev/null +++ b/mdast_util_to_markdown/src/handle/root.rs @@ -0,0 +1,41 @@ +use alloc::string::String; +use markdown::mdast::{Node, Root}; + +use crate::{ + message::Message, + state::{Info, State}, +}; + +use super::Handle; + +impl Handle for Root { + fn handle( + &self, + state: &mut State, + info: &Info, + _parent: Option<&Node>, + node: &Node, + ) -> Result { + let has_phrasing = self.children.iter().any(phrasing); + if has_phrasing { + state.container_phrasing(node, info) + } else { + state.container_flow(node) + } + } +} + +fn phrasing(child: &Node) -> bool { + matches!( + *child, + Node::Break(_) + | Node::Emphasis(_) + | Node::Image(_) + | Node::ImageReference(_) + | Node::InlineCode(_) + | Node::Link(_) + | Node::LinkReference(_) + | Node::Strong(_) + | Node::Text(_) + ) +} diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 9fe0aaf4..79e4d032 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -78,6 +78,7 @@ impl<'a> State<'a> { parent: Option<&Node>, ) -> Result { match node { + Node::Root(root) => root.handle(self, info, parent, node), Node::Paragraph(paragraph) => paragraph.handle(self, info, parent, node), Node::Text(text) => text.handle(self, info, parent, node), Node::Strong(strong) => strong.handle(self, info, parent, node), diff --git a/mdast_util_to_markdown/tests/break.rs b/mdast_util_to_markdown/tests/break.rs index bda63b51..661f8b81 100644 --- a/mdast_util_to_markdown/tests/break.rs +++ b/mdast_util_to_markdown/tests/break.rs @@ -1,5 +1,7 @@ use markdown::mdast::{Break, Heading, Node, Text}; -use mdast_util_to_markdown::to_markdown as to; +use markdown::to_mdast as from; +use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; +use mdast_util_to_markdown::{to_markdown as to, Options}; use pretty_assertions::assert_eq; #[test] @@ -52,16 +54,16 @@ fn r#break() { "should serialize breaks in heading (atx) as a space" ); - //assert_eq!( - // to_md_with_opts( - // &from("a \nb\n=\n", &Default::default()).unwrap(), - // &Options { - // setext: true, - // ..Default::default() - // } - // ) - // .unwrap(), - // "a\\\nb\n=\n", - // "should support a break" - //); + assert_eq!( + to_md_with_opts( + &from("a \nb\n=\n", &Default::default()).unwrap(), + &Options { + setext: true, + ..Default::default() + } + ) + .unwrap(), + "a\\\nb\n=\n", + "should support a break" + ); } diff --git a/mdast_util_to_markdown/tests/core.rs b/mdast_util_to_markdown/tests/core.rs new file mode 100644 index 00000000..6ea07d9b --- /dev/null +++ b/mdast_util_to_markdown/tests/core.rs @@ -0,0 +1,33 @@ +use markdown::mdast::{Node, Paragraph, Root, Text, ThematicBreak}; +use mdast_util_to_markdown::to_markdown as to; + +use pretty_assertions::assert_eq; + +#[test] +fn core() { + assert_eq!( + to(&Node::Root(Root { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }), + Node::ThematicBreak(ThematicBreak { position: None }), + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("b"), + position: None + })], + position: None + }), + ], + position: None + })) + .unwrap(), + "a\n\n***\n\nb\n", + "should support root" + ); +} From f3efe18524b7b0b9a01d059aafdf178ee71a0d39 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sun, 15 Sep 2024 09:36:50 +0300 Subject: [PATCH 40/73] Update break comment --- mdast_util_to_markdown/tests/break.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mdast_util_to_markdown/tests/break.rs b/mdast_util_to_markdown/tests/break.rs index 661f8b81..521a08c2 100644 --- a/mdast_util_to_markdown/tests/break.rs +++ b/mdast_util_to_markdown/tests/break.rs @@ -4,7 +4,7 @@ use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; use mdast_util_to_markdown::{to_markdown as to, Options}; use pretty_assertions::assert_eq; -#[test] +#2[test] fn r#break() { assert_eq!( to(&Node::Break(Break { position: None })).unwrap(), @@ -64,6 +64,6 @@ fn r#break() { ) .unwrap(), "a\\\nb\n=\n", - "should support a break" + "should serialize breaks in heading (setext)" ); } From 8d34b35cf6226be91b6a654604f18ff0ec552cca Mon Sep 17 00:00:00 2001 From: Bnchi Date: Tue, 17 Sep 2024 10:53:18 +0300 Subject: [PATCH 41/73] Add support for definition --- mdast_util_to_markdown/src/association_id.rs | 17 + .../src/handle/definition.rs | 77 ++++ mdast_util_to_markdown/src/handle/mod.rs | 1 + mdast_util_to_markdown/src/lib.rs | 1 + mdast_util_to_markdown/src/state.rs | 49 ++- mdast_util_to_markdown/tests/break.rs | 2 +- mdast_util_to_markdown/tests/definition.rs | 345 ++++++++++++++++++ 7 files changed, 489 insertions(+), 3 deletions(-) create mode 100644 mdast_util_to_markdown/src/association_id.rs create mode 100644 mdast_util_to_markdown/src/handle/definition.rs create mode 100644 mdast_util_to_markdown/tests/definition.rs diff --git a/mdast_util_to_markdown/src/association_id.rs b/mdast_util_to_markdown/src/association_id.rs new file mode 100644 index 00000000..ef82a442 --- /dev/null +++ b/mdast_util_to_markdown/src/association_id.rs @@ -0,0 +1,17 @@ +use alloc::string::String; +use markdown::mdast::Definition; + +pub trait AssociationId { + fn identifier(&self) -> &String; + fn label(&self) -> &Option; +} + +impl AssociationId for Definition { + fn identifier(&self) -> &String { + &self.identifier + } + + fn label(&self) -> &Option { + &self.label + } +} diff --git a/mdast_util_to_markdown/src/handle/definition.rs b/mdast_util_to_markdown/src/handle/definition.rs new file mode 100644 index 00000000..a553448a --- /dev/null +++ b/mdast_util_to_markdown/src/handle/definition.rs @@ -0,0 +1,77 @@ +use alloc::string::String; +use markdown::mdast::{Definition, Node}; + +use crate::{ + construct_name::ConstructName, + message::Message, + state::{Info, State}, + util::{check_quote::check_quote, safe::SafeConfig}, +}; + +use super::Handle; + +impl Handle for Definition { + fn handle( + &self, + state: &mut State, + _info: &Info, + _parent: Option<&Node>, + _node: &Node, + ) -> Result { + let quote = check_quote(state)?; + + state.enter(ConstructName::Definition); + state.enter(ConstructName::Label); + + let mut value = String::from('['); + + value.push_str(&state.safe( + &state.association(self), + &SafeConfig::new(&value, "]", None), + )); + + value.push_str("]: "); + + state.exit(); + + if self.url.is_empty() || contain_control_char_or_whitespace(&self.url) { + state.enter(ConstructName::DestinationLiteral); + value.push('<'); + value.push_str(&state.safe(&self.url, &SafeConfig::new(&value, ">", None))); + value.push('>'); + } else { + state.enter(ConstructName::DestinationRaw); + let after = if self.title.is_some() { " " } else { ")" }; + value.push_str(&state.safe(&self.url, &SafeConfig::new(&value, after, None))); + } + + state.exit(); + + if let Some(title) = &self.title { + let title_construct_name = if quote == '"' { + ConstructName::TitleQuote + } else { + ConstructName::TitleApostrophe + }; + + state.enter(title_construct_name); + value.push(' '); + value.push(quote); + + let mut before_buffer = [0u8; 4]; + let before = quote.encode_utf8(&mut before_buffer); + value.push_str(&state.safe(title, &SafeConfig::new(&self.url, before, None))); + + value.push(quote); + state.exit(); + } + + state.exit(); + + Ok(value) + } +} + +fn contain_control_char_or_whitespace(value: &str) -> bool { + value.chars().any(|c| c.is_whitespace() || c.is_control()) +} diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index 4353a8a9..4774d006 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -5,6 +5,7 @@ use markdown::mdast::Node; mod blockquote; mod r#break; mod code; +mod definition; pub mod emphasis; mod heading; pub mod html; diff --git a/mdast_util_to_markdown/src/lib.rs b/mdast_util_to_markdown/src/lib.rs index 0f56c3e9..a0697363 100644 --- a/mdast_util_to_markdown/src/lib.rs +++ b/mdast_util_to_markdown/src/lib.rs @@ -7,6 +7,7 @@ use message::Message; use state::{Info, State}; extern crate alloc; +mod association_id; mod configure; mod construct_name; mod handle; diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 79e4d032..540e5ebb 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -1,3 +1,4 @@ +use crate::association_id::AssociationId; use crate::construct_name::ConstructName; use crate::handle::emphasis::peek_emphasis; use crate::handle::html::peek_html; @@ -17,9 +18,11 @@ use crate::{ safe::{escape_backslashes, EscapeInfos, SafeConfig}, }, }; +use alloc::string::ToString; use alloc::{collections::BTreeMap, format, string::String, vec::Vec}; use markdown::mdast::Node; -use regex::Regex; +use markdown::util::character_reference::{decode_named, decode_numeric}; +use regex::{Captures, Regex, RegexBuilder}; #[allow(dead_code)] #[derive(Debug)] @@ -94,11 +97,12 @@ impl<'a> State<'a> { Node::Image(image) => image.handle(self, info, parent, node), Node::Link(link) => link.handle(self, info, parent, node), Node::InlineCode(inline_code) => inline_code.handle(self, info, parent, node), + Node::Definition(definition) => definition.handle(self, info, parent, node), _ => Err("Cannot handle node".into()), } } - pub fn safe(&mut self, input: &String, config: &SafeConfig) -> String { + pub fn safe(&mut self, input: &str, config: &SafeConfig) -> String { let value = format!("{}{}{}", config.before, input, config.after); let mut positions: Vec = Vec::new(); let mut result: String = String::new(); @@ -445,4 +449,45 @@ impl<'a> State<'a> { | (Node::Table(_), Node::Table(_)) ) } + + pub fn association(&self, node: &impl AssociationId) -> String { + if node.label().is_some() || node.identifier().is_empty() { + return node.label().clone().unwrap_or_default(); + } + + let character_escape_or_reference = + RegexBuilder::new(r"\\([!-/:-@\[-`{-~])|&(#(?:\d{1,7}|x[\da-f]{1,6})|[\da-z]{1,31});") + .case_insensitive(true) + .build() + .unwrap(); + + character_escape_or_reference + .replace_all(node.identifier(), Self::decode) + .into_owned() + } + + fn decode(caps: &Captures) -> String { + if let Some(first_cap) = caps.get(1) { + return String::from(first_cap.as_str()); + } + + if let Some(head) = &caps[2].chars().nth(0) { + if *head == '#' { + let radix = match caps[2].chars().nth(1) { + Some('x') | Some('X') => 16, + _ => 10, + }; + + let capture = &caps[2]; + let numeric_encoded = if radix == 16 { + &capture[2..] + } else { + &capture[1..] + }; + return decode_numeric(numeric_encoded, radix); + } + } + + decode_named(&caps[2], true).unwrap_or(caps[0].to_string()) + } } diff --git a/mdast_util_to_markdown/tests/break.rs b/mdast_util_to_markdown/tests/break.rs index 521a08c2..228a5897 100644 --- a/mdast_util_to_markdown/tests/break.rs +++ b/mdast_util_to_markdown/tests/break.rs @@ -4,7 +4,7 @@ use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; use mdast_util_to_markdown::{to_markdown as to, Options}; use pretty_assertions::assert_eq; -#2[test] +#[test] fn r#break() { assert_eq!( to(&Node::Break(Break { position: None })).unwrap(), diff --git a/mdast_util_to_markdown/tests/definition.rs b/mdast_util_to_markdown/tests/definition.rs new file mode 100644 index 00000000..f526376a --- /dev/null +++ b/mdast_util_to_markdown/tests/definition.rs @@ -0,0 +1,345 @@ +use markdown::mdast::{Definition, Node}; +use mdast_util_to_markdown::{to_markdown as to, Options}; + +use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; +use pretty_assertions::assert_eq; + +#[test] +fn defintion() { + assert_eq!( + to(&Node::Definition(Definition { + url: String::new(), + title: None, + identifier: String::new(), + position: None, + label: None + })) + .unwrap(), + "[]: <>\n", + "should support a definition w/o label" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::new(), + title: None, + identifier: String::new(), + position: None, + label: Some(String::from("a")) + })) + .unwrap(), + "[a]: <>\n", + "should support a definition w/ label" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::new(), + title: None, + identifier: String::new(), + position: None, + label: Some(String::from("\\")) + })) + .unwrap(), + "[\\\\]: <>\n", + "should escape a backslash in `label`" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::new(), + title: None, + identifier: String::new(), + position: None, + label: Some(String::from("[")) + })) + .unwrap(), + "[\\[]: <>\n", + "should escape an opening bracket in `label`" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::new(), + title: None, + identifier: String::new(), + position: None, + label: Some(String::from("]")) + })) + .unwrap(), + "[\\]]: <>\n", + "should escape a closing bracket in `label`" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::new(), + title: None, + identifier: String::from("a"), + position: None, + label: None + })) + .unwrap(), + "[a]: <>\n", + "should support a definition w/ identifier" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::new(), + title: None, + identifier: String::from(r"\\"), + position: None, + label: None + })) + .unwrap(), + "[\\\\]: <>\n", + "should escape a backslash in `identifier`" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::new(), + title: None, + identifier: String::from("["), + position: None, + label: None + })) + .unwrap(), + "[\\[]: <>\n", + "should escape an opening bracket in `identifier`" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::new(), + title: None, + identifier: String::from("]"), + position: None, + label: None + })) + .unwrap(), + "[\\]]: <>\n", + "should escape a closing bracket in `identifier`" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::from("b"), + title: None, + identifier: String::from("a"), + position: None, + label: None + })) + .unwrap(), + "[a]: b\n", + "should support a definition w/ url" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::from("b c"), + title: None, + identifier: String::from("a"), + position: None, + label: None + })) + .unwrap(), + "[a]: \n", + "should support a definition w/ enclosed url w/ whitespace in url" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::from("b \n", + "should escape an opening angle bracket in `url` in an enclosed url" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::from("b >c"), + title: None, + identifier: String::from("a"), + position: None, + label: None + })) + .unwrap(), + "[a]: c>\n", + "should escape a closing angle bracket in `url` in an enclosed url" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::from("b \\.c"), + title: None, + identifier: String::from("a"), + position: None, + label: None + })) + .unwrap(), + "[a]: \n", + "should escape a backslash in `url` in an enclosed url" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::from("b\nc"), + title: None, + identifier: String::from("a"), + position: None, + label: None + })) + .unwrap(), + "[a]: \n", + "should encode a line ending in `url` in an enclosed url" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::from("\x0C"), + title: None, + identifier: String::from("a"), + position: None, + label: None + })) + .unwrap(), + "[a]: <\x0C>\n", + "should encode a line ending in `url` in an enclosed url" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::from("b(c"), + title: None, + identifier: String::from("a"), + position: None, + label: None + })) + .unwrap(), + "[a]: b\\(c\n", + "should escape an opening paren in `url` in a raw url" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::from("b)c"), + title: None, + identifier: String::from("a"), + position: None, + label: None + })) + .unwrap(), + "[a]: b\\)c\n", + "should escape a closing paren in `url` in a raw url" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::from("b\\?c"), + title: None, + identifier: String::from("a"), + position: None, + label: None + })) + .unwrap(), + "[a]: b\\\\?c\n", + "should escape a backslash in `url` in a raw url" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::new(), + title: String::from("b").into(), + identifier: String::from("a"), + position: None, + label: None + })) + .unwrap(), + "[a]: <> \"b\"\n", + "should support a definition w/ title" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::from("b"), + title: String::from("c").into(), + identifier: String::from("a"), + position: None, + label: None + })) + .unwrap(), + "[a]: b \"c\"\n", + "should support a definition w/ url & title" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::new(), + title: String::from("\"").into(), + identifier: String::from("a"), + position: None, + label: None + })) + .unwrap(), + "[a]: <> \"\\\"\"\n", + "should escape a quote in `title` in a title" + ); + + assert_eq!( + to(&Node::Definition(Definition { + url: String::new(), + title: String::from("\\").into(), + identifier: String::from("a"), + position: None, + label: None + })) + .unwrap(), + "[a]: <> \"\\\\\"\n", + "should escape a backslash in `title` in a title" + ); + + assert_eq!( + to_md_with_opts( + &Node::Definition(Definition { + url: String::new(), + title: String::from("b").into(), + identifier: String::from("a"), + position: None, + label: None + }), + &Options { + quote: '\'', + ..Default::default() + } + ) + .unwrap(), + "[a]: <> 'b'\n", + "should support a definition w/ title when `quote: \"\'\"`" + ); + + assert_eq!( + to_md_with_opts( + &Node::Definition(Definition { + url: String::new(), + title: String::from("'").into(), + identifier: String::from("a"), + position: None, + label: None + }), + &Options { + quote: '\'', + ..Default::default() + } + ) + .unwrap(), + "[a]: <> '\\''\n", + "should escape a quote in `title` in a title when `quote: \"\'\"`" + ); +} From e062d85828e4a3a1be712303d3071f621acaf1c8 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Tue, 17 Sep 2024 11:43:54 +0300 Subject: [PATCH 42/73] make markdown utils pub --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index f1266e81..8f2baa19 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,7 +44,7 @@ mod subtokenize; mod to_html; mod to_mdast; mod tokenizer; -mod util; +pub mod util; pub mod mdast; // To do: externalize? pub mod message; // To do: externalize. From 2b22136a1d44263e1ac5db3e7192a790f0a1ef64 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Tue, 17 Sep 2024 11:50:03 +0300 Subject: [PATCH 43/73] Change util visibility and make decode_named and decode_numeric pub --- mdast_util_to_markdown/src/state.rs | 5 ++--- src/lib.rs | 5 ++++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 540e5ebb..96738bc2 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -21,7 +21,6 @@ use crate::{ use alloc::string::ToString; use alloc::{collections::BTreeMap, format, string::String, vec::Vec}; use markdown::mdast::Node; -use markdown::util::character_reference::{decode_named, decode_numeric}; use regex::{Captures, Regex, RegexBuilder}; #[allow(dead_code)] @@ -484,10 +483,10 @@ impl<'a> State<'a> { } else { &capture[1..] }; - return decode_numeric(numeric_encoded, radix); + return markdown::decode_numeric(numeric_encoded, radix); } } - decode_named(&caps[2], true).unwrap_or(caps[0].to_string()) + markdown::decode_named(&caps[2], true).unwrap_or(caps[0].to_string()) } } diff --git a/src/lib.rs b/src/lib.rs index 8f2baa19..ac98c814 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,12 +44,15 @@ mod subtokenize; mod to_html; mod to_mdast; mod tokenizer; -pub mod util; +mod util; pub mod mdast; // To do: externalize? pub mod message; // To do: externalize. pub mod unist; // To do: externalize. +#[doc(hidden)] +pub use util::character_reference::{decode_named, decode_numeric}; + #[doc(hidden)] pub use util::identifier::{id_cont, id_start}; From ff30eb1af0c23163869171fc75db0701b9a4a235 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Tue, 17 Sep 2024 12:01:12 +0300 Subject: [PATCH 44/73] Refactor association --- mdast_util_to_markdown/src/association_id.rs | 4 ++-- mdast_util_to_markdown/src/state.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mdast_util_to_markdown/src/association_id.rs b/mdast_util_to_markdown/src/association_id.rs index ef82a442..777583b4 100644 --- a/mdast_util_to_markdown/src/association_id.rs +++ b/mdast_util_to_markdown/src/association_id.rs @@ -1,12 +1,12 @@ use alloc::string::String; use markdown::mdast::Definition; -pub trait AssociationId { +pub trait Association { fn identifier(&self) -> &String; fn label(&self) -> &Option; } -impl AssociationId for Definition { +impl Association for Definition { fn identifier(&self) -> &String { &self.identifier } diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 96738bc2..787f16a0 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -1,4 +1,4 @@ -use crate::association_id::AssociationId; +use crate::association_id::Association; use crate::construct_name::ConstructName; use crate::handle::emphasis::peek_emphasis; use crate::handle::html::peek_html; @@ -449,7 +449,7 @@ impl<'a> State<'a> { ) } - pub fn association(&self, node: &impl AssociationId) -> String { + pub fn association(&self, node: &impl Association) -> String { if node.label().is_some() || node.identifier().is_empty() { return node.label().clone().unwrap_or_default(); } From 9c5e8087d81d4fa9b5f41c501b3eedbb0700ecbf Mon Sep 17 00:00:00 2001 From: Bnchi Date: Tue, 17 Sep 2024 12:05:41 +0300 Subject: [PATCH 45/73] Update the name of the association mod --- .../src/{association_id.rs => association.rs} | 0 mdast_util_to_markdown/src/lib.rs | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename mdast_util_to_markdown/src/{association_id.rs => association.rs} (100%) diff --git a/mdast_util_to_markdown/src/association_id.rs b/mdast_util_to_markdown/src/association.rs similarity index 100% rename from mdast_util_to_markdown/src/association_id.rs rename to mdast_util_to_markdown/src/association.rs diff --git a/mdast_util_to_markdown/src/lib.rs b/mdast_util_to_markdown/src/lib.rs index a0697363..c11e5205 100644 --- a/mdast_util_to_markdown/src/lib.rs +++ b/mdast_util_to_markdown/src/lib.rs @@ -7,7 +7,7 @@ use message::Message; use state::{Info, State}; extern crate alloc; -mod association_id; +mod association; mod configure; mod construct_name; mod handle; From 58ff3b88a9327bd24d346e08c5c3158f4cd437b2 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Tue, 17 Sep 2024 14:38:29 +0300 Subject: [PATCH 46/73] Add support for image reference --- mdast_util_to_markdown/src/association.rs | 12 +- .../src/handle/image_reference.rs | 62 +++++++ mdast_util_to_markdown/src/handle/mod.rs | 1 + mdast_util_to_markdown/src/state.rs | 7 +- .../tests/image_reference.rs | 166 ++++++++++++++++++ 5 files changed, 246 insertions(+), 2 deletions(-) create mode 100644 mdast_util_to_markdown/src/handle/image_reference.rs create mode 100644 mdast_util_to_markdown/tests/image_reference.rs diff --git a/mdast_util_to_markdown/src/association.rs b/mdast_util_to_markdown/src/association.rs index 777583b4..32577a79 100644 --- a/mdast_util_to_markdown/src/association.rs +++ b/mdast_util_to_markdown/src/association.rs @@ -1,5 +1,5 @@ use alloc::string::String; -use markdown::mdast::Definition; +use markdown::mdast::{Definition, ImageReference}; pub trait Association { fn identifier(&self) -> &String; @@ -15,3 +15,13 @@ impl Association for Definition { &self.label } } + +impl Association for ImageReference { + fn identifier(&self) -> &String { + &self.identifier + } + + fn label(&self) -> &Option { + &self.label + } +} diff --git a/mdast_util_to_markdown/src/handle/image_reference.rs b/mdast_util_to_markdown/src/handle/image_reference.rs new file mode 100644 index 00000000..d3fd3e96 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/image_reference.rs @@ -0,0 +1,62 @@ +use core::mem; + +use alloc::string::String; +use markdown::mdast::{ImageReference, Node, ReferenceKind}; + +use crate::{ + construct_name::ConstructName, + message::Message, + state::{Info, State}, + util::safe::SafeConfig, +}; + +use super::Handle; + +impl Handle for ImageReference { + fn handle( + &self, + state: &mut State, + _info: &Info, + _parent: Option<&Node>, + _node: &Node, + ) -> Result { + state.enter(ConstructName::ImageReference); + state.enter(ConstructName::Label); + + let mut value = String::from("!["); + let alt = state.safe(&self.alt, &SafeConfig::new(&value, "]", None)); + + value.push_str(&alt); + value.push_str("]["); + + state.exit(); + + let old_stack = mem::take(&mut state.stack); + state.enter(ConstructName::Reference); + + let reference = state.safe( + &state.association(self), + &SafeConfig::new(&value, "]", None), + ); + + state.exit(); + state.stack = old_stack; + state.exit(); + + if matches!(self.reference_kind, ReferenceKind::Full) || alt.is_empty() || alt != reference + { + value.push_str(&reference); + value.push(']'); + } else if matches!(self.reference_kind, ReferenceKind::Shortcut) { + value.pop(); + } else { + value.push(']'); + } + + Ok(value) + } +} + +pub fn peek_image_reference() -> char { + '!' +} diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index 4774d006..051909e3 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -10,6 +10,7 @@ pub mod emphasis; mod heading; pub mod html; pub mod image; +pub mod image_reference; pub mod inline_code; pub mod link; mod list; diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 787f16a0..c372a112 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -1,8 +1,9 @@ -use crate::association_id::Association; +use crate::association::Association; use crate::construct_name::ConstructName; use crate::handle::emphasis::peek_emphasis; use crate::handle::html::peek_html; use crate::handle::image::peek_image; +use crate::handle::image_reference::peek_image_reference; use crate::handle::inline_code::peek_inline_code; use crate::handle::link::peek_link; use crate::handle::strong::peek_strong; @@ -97,6 +98,9 @@ impl<'a> State<'a> { Node::Link(link) => link.handle(self, info, parent, node), Node::InlineCode(inline_code) => inline_code.handle(self, info, parent, node), Node::Definition(definition) => definition.handle(self, info, parent, node), + Node::ImageReference(image_reference) => { + image_reference.handle(self, info, parent, node) + } _ => Err("Cannot handle node".into()), } } @@ -337,6 +341,7 @@ impl<'a> State<'a> { Node::Image(_) => Some(peek_image()), Node::Link(link) => Some(peek_link(link, node, self)), Node::InlineCode(_) => Some(peek_inline_code()), + Node::ImageReference(_) => Some(peek_image_reference()), _ => None, } } diff --git a/mdast_util_to_markdown/tests/image_reference.rs b/mdast_util_to_markdown/tests/image_reference.rs new file mode 100644 index 00000000..fba31bfa --- /dev/null +++ b/mdast_util_to_markdown/tests/image_reference.rs @@ -0,0 +1,166 @@ +use markdown::mdast::{ImageReference, Node, Paragraph, ReferenceKind}; +use mdast_util_to_markdown::to_markdown as to; + +use pretty_assertions::assert_eq; + +#[test] +fn image_reference() { + assert_eq!( + to(&Node::ImageReference(ImageReference { + position: None, + alt: String::new(), + reference_kind: ReferenceKind::Full, + identifier: String::new(), + label: None + })) + .unwrap(), + "![][]\n", + "should support a link reference (nonsensical)" + ); + + assert_eq!( + to(&Node::ImageReference(ImageReference { + position: None, + alt: String::from("a"), + reference_kind: ReferenceKind::Full, + identifier: String::new(), + label: None + })) + .unwrap(), + "![a][]\n", + "should support `alt`" + ); + + assert_eq!( + to(&Node::ImageReference(ImageReference { + position: None, + alt: String::new(), + reference_kind: ReferenceKind::Full, + identifier: String::from("a"), + label: None + })) + .unwrap(), + "![][a]\n", + "should support an `identifier` (nonsensical)" + ); + + assert_eq!( + to(&Node::ImageReference(ImageReference { + position: None, + alt: String::new(), + reference_kind: ReferenceKind::Full, + identifier: String::new(), + label: String::from("a").into() + })) + .unwrap(), + "![][a]\n", + "should support a `label` (nonsensical)" + ); + + assert_eq!( + to(&Node::ImageReference(ImageReference { + position: None, + alt: String::from("A"), + reference_kind: ReferenceKind::Shortcut, + identifier: String::from("A"), + label: None + })) + .unwrap(), + "![A]\n", + "should support `reference_kind: \"ReferenceKind::Shortcut\"`" + ); + + assert_eq!( + to(&Node::ImageReference(ImageReference { + position: None, + alt: String::from("A"), + reference_kind: ReferenceKind::Collapsed, + identifier: String::from("A"), + label: None + })) + .unwrap(), + "![A][]\n", + "should support `reference_kind: \"ReferenceKind::Collapsed\"`" + ); + + assert_eq!( + to(&Node::ImageReference(ImageReference { + position: None, + alt: String::from("A"), + reference_kind: ReferenceKind::Full, + identifier: String::from("A"), + label: None + })) + .unwrap(), + "![A][A]\n", + "should support `reference_kind: \"ReferenceKind::Full\"`" + ); + + assert_eq!( + to(&Node::ImageReference(ImageReference { + position: None, + alt: String::from("&"), + label: String::from("&").into(), + reference_kind: ReferenceKind::Full, + identifier: String::from("&"), + })) + .unwrap(), + "![&][&]\n", + "should prefer label over identifier" + ); + + assert_eq!( + to(&Node::ImageReference(ImageReference { + position: None, + label: None, + alt: String::from("&"), + reference_kind: ReferenceKind::Full, + identifier: String::from("&"), + })) + .unwrap(), + "![&][&]\n", + "should decode `identifier` if w/o `label`" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![Node::ImageReference(ImageReference { + position: None, + label: None, + alt: String::from("&a;"), + reference_kind: ReferenceKind::Full, + identifier: String::from("&b;"), + })], + position: None + })) + .unwrap(), + "![\\&a;][&b;]\n", + "should support incorrect character references" + ); + + assert_eq!( + to(&Node::ImageReference(ImageReference { + position: None, + label: None, + alt: String::from("+"), + reference_kind: ReferenceKind::Full, + identifier: String::from("\\+"), + })) + .unwrap(), + "![+][+]\n", + "should unescape `identifier` if w/o `label`" + ); + + assert_eq!( + to(&Node::ImageReference(ImageReference { + position: None, + label: None, + alt: String::from("a"), + reference_kind: ReferenceKind::Collapsed, + identifier: String::from("b"), + })) + .unwrap(), + "![a][b]\n", + "should use a full reference if w/o `ReferenceKind` and the label does not match the reference" + ); +} From 250a6cd181a93d2156cfae5769e243665538f280 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Tue, 17 Sep 2024 23:11:45 +0300 Subject: [PATCH 47/73] Update blockquote --- mdast_util_to_markdown/src/handle/blockquote.rs | 4 ++-- mdast_util_to_markdown/src/state.rs | 4 ++-- mdast_util_to_markdown/tests/blockquote.rs | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/mdast_util_to_markdown/src/handle/blockquote.rs b/mdast_util_to_markdown/src/handle/blockquote.rs index 806e58c6..80a575b8 100644 --- a/mdast_util_to_markdown/src/handle/blockquote.rs +++ b/mdast_util_to_markdown/src/handle/blockquote.rs @@ -1,5 +1,5 @@ use alloc::string::String; -use markdown::mdast::{BlockQuote, Node}; +use markdown::mdast::{Blockquote, Node}; use crate::{ construct_name::ConstructName, @@ -10,7 +10,7 @@ use crate::{ use super::Handle; -impl Handle for BlockQuote { +impl Handle for Blockquote { fn handle( &self, state: &mut State, diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index c372a112..b8101c07 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -91,7 +91,7 @@ impl<'a> State<'a> { Node::Html(html) => html.handle(self, info, parent, node), Node::ThematicBreak(thematic_break) => thematic_break.handle(self, info, parent, node), Node::Code(code) => code.handle(self, info, parent, node), - Node::BlockQuote(block_quote) => block_quote.handle(self, info, parent, node), + Node::Blockquote(block_quote) => block_quote.handle(self, info, parent, node), Node::List(list) => list.handle(self, info, parent, node), Node::ListItem(list_item) => list_item.handle(self, info, parent, node), Node::Image(image) => image.handle(self, info, parent, node), @@ -444,7 +444,7 @@ impl<'a> State<'a> { matches!( nodes, (Node::Root(_), Node::Root(_)) - | (Node::BlockQuote(_), Node::BlockQuote(_)) + | (Node::Blockquote(_), Node::Blockquote(_)) | (Node::FootnoteDefinition(_), Node::FootnoteDefinition(_)) | (Node::Heading(_), Node::Heading(_)) | (Node::List(_), Node::List(_)) diff --git a/mdast_util_to_markdown/tests/blockquote.rs b/mdast_util_to_markdown/tests/blockquote.rs index 4dc021b4..ab27a5b2 100644 --- a/mdast_util_to_markdown/tests/blockquote.rs +++ b/mdast_util_to_markdown/tests/blockquote.rs @@ -1,4 +1,4 @@ -use markdown::mdast::{BlockQuote, Node, Paragraph, Text, ThematicBreak}; +use markdown::mdast::{Blockquote, Node, Paragraph, Text, ThematicBreak}; use mdast_util_to_markdown::to_markdown as to; use pretty_assertions::assert_eq; @@ -6,7 +6,7 @@ use pretty_assertions::assert_eq; #[test] fn block_quote() { assert_eq!( - to(&Node::BlockQuote(BlockQuote { + to(&Node::Blockquote(Blockquote { children: vec![], position: None, })) @@ -16,7 +16,7 @@ fn block_quote() { ); assert_eq!( - to(&Node::BlockQuote(BlockQuote { + to(&Node::Blockquote(Blockquote { children: vec![Node::Text(Text { value: String::from("a"), position: None @@ -29,7 +29,7 @@ fn block_quote() { ); assert_eq!( - to(&Node::BlockQuote(BlockQuote { + to(&Node::Blockquote(Blockquote { children: vec![ Node::Paragraph(Paragraph { children: vec![Node::Text(Text { From 04bdc7165d646e22ce6315ac85ba76ceda8c663b Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 18 Sep 2024 15:54:07 +0300 Subject: [PATCH 48/73] Add support for link reference --- mdast_util_to_markdown/src/association.rs | 12 +- .../src/handle/link_reference.rs | 64 ++++++ mdast_util_to_markdown/src/handle/mod.rs | 1 + mdast_util_to_markdown/src/state.rs | 3 + .../tests/link_reference.rs | 206 ++++++++++++++++++ 5 files changed, 285 insertions(+), 1 deletion(-) create mode 100644 mdast_util_to_markdown/src/handle/link_reference.rs create mode 100644 mdast_util_to_markdown/tests/link_reference.rs diff --git a/mdast_util_to_markdown/src/association.rs b/mdast_util_to_markdown/src/association.rs index 32577a79..a5e9d16f 100644 --- a/mdast_util_to_markdown/src/association.rs +++ b/mdast_util_to_markdown/src/association.rs @@ -1,5 +1,5 @@ use alloc::string::String; -use markdown::mdast::{Definition, ImageReference}; +use markdown::mdast::{Definition, ImageReference, LinkReference}; pub trait Association { fn identifier(&self) -> &String; @@ -25,3 +25,13 @@ impl Association for ImageReference { &self.label } } + +impl Association for LinkReference { + fn identifier(&self) -> &String { + &self.identifier + } + + fn label(&self) -> &Option { + &self.label + } +} diff --git a/mdast_util_to_markdown/src/handle/link_reference.rs b/mdast_util_to_markdown/src/handle/link_reference.rs new file mode 100644 index 00000000..e63beb53 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/link_reference.rs @@ -0,0 +1,64 @@ +use core::mem; + +use alloc::string::String; +use markdown::mdast::{LinkReference, Node, ReferenceKind}; + +use crate::{ + construct_name::ConstructName, + message::Message, + state::{Info, State}, + util::safe::SafeConfig, +}; + +use super::Handle; + +impl Handle for LinkReference { + fn handle( + &self, + state: &mut State, + _info: &Info, + _parent: Option<&Node>, + node: &Node, + ) -> Result { + state.enter(ConstructName::LinkReference); + state.enter(ConstructName::Label); + + let mut value = String::from("["); + let text = state.container_phrasing(node, &Info::new(&value, "]"))?; + + value.push_str(&text); + value.push_str("]["); + + state.exit(); + + let old_stack = mem::take(&mut state.stack); + state.enter(ConstructName::Reference); + + let reference = state.safe( + &state.association(self), + &SafeConfig::new(&value, "]", None), + ); + + state.exit(); + state.stack = old_stack; + state.exit(); + + if matches!(self.reference_kind, ReferenceKind::Full) + || text.is_empty() + || text != reference + { + value.push_str(&reference); + value.push(']'); + } else if matches!(self.reference_kind, ReferenceKind::Shortcut) { + value.pop(); + } else { + value.push(']'); + } + + Ok(value) + } +} + +pub fn peek_link_reference() -> char { + '[' +} diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index 051909e3..debca631 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -13,6 +13,7 @@ pub mod image; pub mod image_reference; pub mod inline_code; pub mod link; +pub mod link_reference; mod list; mod list_item; mod paragraph; diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index b8101c07..3ba3a3d6 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -6,6 +6,7 @@ use crate::handle::image::peek_image; use crate::handle::image_reference::peek_image_reference; use crate::handle::inline_code::peek_inline_code; use crate::handle::link::peek_link; +use crate::handle::link_reference::peek_link_reference; use crate::handle::strong::peek_strong; use crate::handle::Handle; use crate::message::Message; @@ -101,6 +102,7 @@ impl<'a> State<'a> { Node::ImageReference(image_reference) => { image_reference.handle(self, info, parent, node) } + Node::LinkReference(link_reference) => link_reference.handle(self, info, parent, node), _ => Err("Cannot handle node".into()), } } @@ -342,6 +344,7 @@ impl<'a> State<'a> { Node::Link(link) => Some(peek_link(link, node, self)), Node::InlineCode(_) => Some(peek_inline_code()), Node::ImageReference(_) => Some(peek_image_reference()), + Node::LinkReference(_) => Some(peek_link_reference()), _ => None, } } diff --git a/mdast_util_to_markdown/tests/link_reference.rs b/mdast_util_to_markdown/tests/link_reference.rs new file mode 100644 index 00000000..de8f9e81 --- /dev/null +++ b/mdast_util_to_markdown/tests/link_reference.rs @@ -0,0 +1,206 @@ +use markdown::mdast::{LinkReference, Node, Paragraph, ReferenceKind, Text}; +use mdast_util_to_markdown::to_markdown as to; + +use pretty_assertions::assert_eq; + +#[test] +fn link_reference() { + assert_eq!( + to(&Node::LinkReference(LinkReference { + children: Vec::new(), + position: None, + reference_kind: ReferenceKind::Full, + identifier: String::new(), + label: None + })) + .unwrap(), + "[][]\n", + "should support a link reference (nonsensical" + ); + + assert_eq!( + to(&Node::LinkReference(LinkReference { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None, + reference_kind: ReferenceKind::Full, + identifier: String::new(), + label: None + })) + .unwrap(), + "[a][]\n", + "should support `children`" + ); + + assert_eq!( + to(&Node::LinkReference(LinkReference { + children: Vec::new(), + position: None, + reference_kind: ReferenceKind::Full, + identifier: String::from("a"), + label: None + })) + .unwrap(), + "[][a]\n", + "should support an `identifier` (nonsensical)" + ); + + assert_eq!( + to(&Node::LinkReference(LinkReference { + children: Vec::new(), + position: None, + reference_kind: ReferenceKind::Full, + identifier: String::new(), + label: Some(String::from("a")), + })) + .unwrap(), + "[][a]\n", + "should support a `label` (nonsensical)" + ); + + assert_eq!( + to(&Node::LinkReference(LinkReference { + children: vec![Node::Text(Text { + value: String::from("A"), + position: None + })], + position: None, + reference_kind: ReferenceKind::Shortcut, + identifier: String::from("A"), + label: None + })) + .unwrap(), + "[A]\n", + "should support `reference_type: ReferenceKind::Shortcut`" + ); + + assert_eq!( + to(&Node::LinkReference(LinkReference { + children: vec![Node::Text(Text { + value: String::from("A"), + position: None + })], + position: None, + reference_kind: ReferenceKind::Collapsed, + identifier: String::from("A"), + label: Some("A".into()) + })) + .unwrap(), + "[A][]\n", + "should support `reference_type: ReferenceKind::Collapsed`" + ); + + assert_eq!( + to(&Node::LinkReference(LinkReference { + children: vec![Node::Text(Text { + value: String::from("A"), + position: None + })], + position: None, + reference_kind: ReferenceKind::Full, + identifier: String::from("A"), + label: Some("A".into()) + })) + .unwrap(), + "[A][A]\n", + "should support `reference_type: ReferenceKind::Full`" + ); + + assert_eq!( + to(&Node::LinkReference(LinkReference { + children: vec![Node::Text(Text { + value: String::from("&"), + position: None + })], + position: None, + reference_kind: ReferenceKind::Full, + identifier: String::from("&"), + label: Some("&".into()) + })) + .unwrap(), + "[&][&]\n", + "should prefer label over identifier" + ); + + assert_eq!( + to(&Node::LinkReference(LinkReference { + children: vec![Node::Text(Text { + value: String::from("&"), + position: None + })], + position: None, + reference_kind: ReferenceKind::Full, + identifier: String::from("&"), + label: None + })) + .unwrap(), + "[&][&]\n", + "should decode `identifier` if w/o `label`" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![Node::LinkReference(LinkReference { + position: None, + label: None, + children: vec![Node::Text(Text { + value: String::from("&a;"), + position: None + })], + reference_kind: ReferenceKind::Full, + identifier: String::from("&b;"), + })], + position: None + })) + .unwrap(), + "[\\&a;][&b;]\n", + "should support incorrect character references" + ); + + assert_eq!( + to(&Node::LinkReference(LinkReference { + children: vec![], + position: None, + reference_kind: ReferenceKind::Full, + identifier: String::from("a![b](c*d_e[f_g`h Date: Thu, 19 Sep 2024 16:55:38 +0300 Subject: [PATCH 49/73] Fix typo --- mdast_util_to_markdown/src/util/check_bullet_ordered.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdast_util_to_markdown/src/util/check_bullet_ordered.rs b/mdast_util_to_markdown/src/util/check_bullet_ordered.rs index 271bce8e..4057f9c7 100644 --- a/mdast_util_to_markdown/src/util/check_bullet_ordered.rs +++ b/mdast_util_to_markdown/src/util/check_bullet_ordered.rs @@ -8,7 +8,7 @@ pub fn check_bullet_ordered(state: &mut State) -> Result { if marker != '.' && marker != ')' { return Err(Message { reason: format!( - "Cannot serialize items with `' {} '` for `options.bulletOrdered`, expected `.` or `)`", + "Cannot serialize items with `' {} '` for `options.bullet_ordered`, expected `.` or `)`", marker ), }); From 3eef67f361daaafa2b277847eb3f8822da4f53af Mon Sep 17 00:00:00 2001 From: Bnchi Date: Fri, 20 Sep 2024 13:09:36 +0300 Subject: [PATCH 50/73] Refactor --- mdast_util_to_markdown/Cargo.toml | 2 +- mdast_util_to_markdown/src/configure.rs | 2 -- mdast_util_to_markdown/src/construct_name.rs | 1 - .../src/handle/blockquote.rs | 10 +++--- mdast_util_to_markdown/src/handle/code.rs | 8 ++--- .../src/handle/list_item.rs | 5 +-- mdast_util_to_markdown/src/state.rs | 32 ++++++++++++------- .../src/util/indent_lines.rs | 19 ----------- mdast_util_to_markdown/src/util/mod.rs | 1 - 9 files changed, 34 insertions(+), 46 deletions(-) delete mode 100644 mdast_util_to_markdown/src/util/indent_lines.rs diff --git a/mdast_util_to_markdown/Cargo.toml b/mdast_util_to_markdown/Cargo.toml index faf3bccb..26846865 100644 --- a/mdast_util_to_markdown/Cargo.toml +++ b/mdast_util_to_markdown/Cargo.toml @@ -6,7 +6,7 @@ license = "MIT" [dependencies] markdown = { path = "../" } -regex = { version = "1.7.3" } +regex = { version = "1" } [dev-dependencies] pretty_assertions = { workspace = true } diff --git a/mdast_util_to_markdown/src/configure.rs b/mdast_util_to_markdown/src/configure.rs index 0960ea84..c5a19e00 100644 --- a/mdast_util_to_markdown/src/configure.rs +++ b/mdast_util_to_markdown/src/configure.rs @@ -1,4 +1,3 @@ -#[allow(dead_code)] pub struct Options { pub bullet: char, pub bullet_other: char, @@ -19,7 +18,6 @@ pub struct Options { pub rule_repetition: u32, } -#[allow(dead_code)] #[derive(Copy, Clone)] pub enum IndentOptions { Mixed, diff --git a/mdast_util_to_markdown/src/construct_name.rs b/mdast_util_to_markdown/src/construct_name.rs index 52511866..bac2da9f 100644 --- a/mdast_util_to_markdown/src/construct_name.rs +++ b/mdast_util_to_markdown/src/construct_name.rs @@ -1,5 +1,4 @@ #[derive(Clone, PartialEq)] -#[allow(dead_code)] pub enum ConstructName { Autolink, Blockquote, diff --git a/mdast_util_to_markdown/src/handle/blockquote.rs b/mdast_util_to_markdown/src/handle/blockquote.rs index 80a575b8..253ca60c 100644 --- a/mdast_util_to_markdown/src/handle/blockquote.rs +++ b/mdast_util_to_markdown/src/handle/blockquote.rs @@ -5,7 +5,6 @@ use crate::{ construct_name::ConstructName, message::Message, state::{Info, State}, - util::indent_lines::indent_lines, }; use super::Handle; @@ -19,20 +18,21 @@ impl Handle for Blockquote { node: &Node, ) -> Result { state.enter(ConstructName::Blockquote); - let value = indent_lines(&state.container_flow(node)?, map); + let value = state.container_flow(node)?; + let value = state.indent_lines(&value, map); Ok(value) } } -fn map(value: &str, _line: usize, blank: bool) -> String { +fn map(line: &str, _index: usize, blank: bool) -> String { let marker = ">"; - let total_allocation = marker.len() + value.len() + 1; + let total_allocation = marker.len() + line.len() + 1; let mut result = String::with_capacity(total_allocation); result.push_str(marker); if !blank { let blank_str = " "; result.push_str(blank_str); } - result.push_str(value); + result.push_str(line); result } diff --git a/mdast_util_to_markdown/src/handle/code.rs b/mdast_util_to_markdown/src/handle/code.rs index 45d6aa09..842d602b 100644 --- a/mdast_util_to_markdown/src/handle/code.rs +++ b/mdast_util_to_markdown/src/handle/code.rs @@ -10,7 +10,7 @@ use crate::{ state::{Info, State}, util::{ check_fence::check_fence, format_code_as_indented::format_code_as_indented, - indent_lines::indent_lines, longest_char_streak::longest_char_streak, safe::SafeConfig, + longest_char_streak::longest_char_streak, safe::SafeConfig, }, }; @@ -28,7 +28,7 @@ impl Handle for Code { if format_code_as_indented(self, state) { state.enter(ConstructName::CodeIndented); - let value = indent_lines(&self.value, map); + let value = state.indent_lines(&self.value, map); state.exit(); return Ok(value); } @@ -81,10 +81,10 @@ impl Handle for Code { } } -fn map(value: &str, _line: usize, blank: bool) -> String { +fn map(line: &str, _index: usize, blank: bool) -> String { if blank { String::new() } else { - format!(" {}", value) + format!(" {}", line) } } diff --git a/mdast_util_to_markdown/src/handle/list_item.rs b/mdast_util_to_markdown/src/handle/list_item.rs index 1f3fff15..c9f0a468 100644 --- a/mdast_util_to_markdown/src/handle/list_item.rs +++ b/mdast_util_to_markdown/src/handle/list_item.rs @@ -9,7 +9,7 @@ use crate::{ construct_name::ConstructName, message::Message, state::{Info, State}, - util::{check_bullet::check_bullet, indent_lines::indent_lines}, + util::check_bullet::check_bullet, }; use super::Handle; @@ -59,7 +59,8 @@ impl Handle for ListItem { state.enter(ConstructName::ListItem); - let value = indent_lines(&state.container_flow(node)?, |line, index, blank| { + let value = state.container_flow(node)?; + let value = state.indent_lines(&value, |line, index, blank| { if index > 0 { if blank { String::new() diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 3ba3a3d6..eb6e39f4 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -25,7 +25,6 @@ use alloc::{collections::BTreeMap, format, string::String, vec::Vec}; use markdown::mdast::Node; use regex::{Captures, Regex, RegexBuilder}; -#[allow(dead_code)] #[derive(Debug)] enum Join { True, @@ -33,7 +32,6 @@ enum Join { Number(usize), } -#[allow(dead_code)] pub struct State<'a> { pub stack: Vec, pub index_stack: Vec, @@ -54,7 +52,6 @@ impl<'a> Info<'a> { } } -#[allow(dead_code)] impl<'a> State<'a> { pub fn new(options: &'a Options) -> Self { State { @@ -127,7 +124,6 @@ impl<'a> State<'a> { .get(1) .map(|captured_group| captured_group.len()) .unwrap_or(0); - let before = pattern.before.is_some() || pattern.at_break; let after = pattern.after.is_some(); let position = full_match.start() + if before { captured_group_len } else { 0 }; @@ -153,7 +149,6 @@ impl<'a> State<'a> { let mut start = config.before.len(); let end = value.len() - config.after.len(); - for (index, position) in positions.iter().enumerate() { if *position < start || *position >= end { continue; @@ -183,7 +178,6 @@ impl<'a> State<'a> { if start != *position { result.push_str(&escape_backslashes(&value[start..*position], r"\")); } - start = *position; let char_at_pos = value.chars().nth(*position); @@ -275,8 +269,9 @@ impl<'a> State<'a> { } pub fn container_phrasing(&mut self, parent: &Node, info: &Info) -> Result { - let children = parent.children().expect("To be a parent."); - + let children = parent + .children() + .expect("The node to be a phrasing parent."); let mut results: String = String::new(); let mut index = 0; let mut children_iter = children.iter().peekable(); @@ -350,8 +345,7 @@ impl<'a> State<'a> { } pub fn container_flow(&mut self, parent: &Node) -> Result { - let children = parent.children().expect("To be a parent."); - + let children = parent.children().expect("The node to be a flow parent."); let mut results: String = String::new(); let mut children_iter = children.iter().peekable(); let mut index = 0; @@ -457,6 +451,23 @@ impl<'a> State<'a> { ) } + pub fn indent_lines(&self, value: &str, map: impl Fn(&str, usize, bool) -> String) -> String { + let mut result = String::new(); + let mut start = 0; + let mut line = 0; + let eol = Regex::new(r"\r?\n|\r").unwrap(); + for m in eol.captures_iter(value) { + let full_match = m.get(0).unwrap(); + let value_slice = &value[start..full_match.start()]; + result.push_str(&map(value_slice, line, value_slice.is_empty())); + result.push_str(full_match.as_str()); + start = full_match.start() + full_match.len(); + line += 1; + } + result.push_str(&map(&value[start..], line, value.is_empty())); + result + } + pub fn association(&self, node: &impl Association) -> String { if node.label().is_some() || node.identifier().is_empty() { return node.label().clone().unwrap_or_default(); @@ -484,7 +495,6 @@ impl<'a> State<'a> { Some('x') | Some('X') => 16, _ => 10, }; - let capture = &caps[2]; let numeric_encoded = if radix == 16 { &capture[2..] diff --git a/mdast_util_to_markdown/src/util/indent_lines.rs b/mdast_util_to_markdown/src/util/indent_lines.rs deleted file mode 100644 index 144fed62..00000000 --- a/mdast_util_to_markdown/src/util/indent_lines.rs +++ /dev/null @@ -1,19 +0,0 @@ -use alloc::string::String; -use regex::Regex; - -pub fn indent_lines(value: &str, map: impl Fn(&str, usize, bool) -> String) -> String { - let mut result = String::new(); - let mut start = 0; - let mut line = 0; - let eol = Regex::new(r"\r?\n|\r").unwrap(); - for m in eol.captures_iter(value) { - let full_match = m.get(0).unwrap(); - let value_slice = &value[start..full_match.start()]; - result.push_str(&map(value_slice, line, value_slice.is_empty())); - result.push_str(full_match.as_str()); - start = full_match.start() + full_match.len(); - line += 1; - } - result.push_str(&map(&value[start..], line, value.is_empty())); - result -} diff --git a/mdast_util_to_markdown/src/util/mod.rs b/mdast_util_to_markdown/src/util/mod.rs index c002aee4..7863418e 100644 --- a/mdast_util_to_markdown/src/util/mod.rs +++ b/mdast_util_to_markdown/src/util/mod.rs @@ -10,7 +10,6 @@ pub mod check_strong; pub mod format_code_as_indented; pub mod format_heading_as_setext; pub mod format_link_as_auto_link; -pub mod indent_lines; pub mod longest_char_streak; pub mod pattern_in_scope; pub mod safe; From ddde5b4e1880e3fbac0f1701b4dac831ae2fcb4d Mon Sep 17 00:00:00 2001 From: Bnchi Date: Fri, 20 Sep 2024 18:31:39 +0300 Subject: [PATCH 51/73] Add support for tight defs and few refactors --- .../src/handle/blockquote.rs | 6 +- mdast_util_to_markdown/src/state.rs | 79 +++++++++++++------ mdast_util_to_markdown/tests/core.rs | 50 +++++++++++- 3 files changed, 107 insertions(+), 28 deletions(-) diff --git a/mdast_util_to_markdown/src/handle/blockquote.rs b/mdast_util_to_markdown/src/handle/blockquote.rs index 253ca60c..46ebad82 100644 --- a/mdast_util_to_markdown/src/handle/blockquote.rs +++ b/mdast_util_to_markdown/src/handle/blockquote.rs @@ -25,13 +25,11 @@ impl Handle for Blockquote { } fn map(line: &str, _index: usize, blank: bool) -> String { + let mut result = String::with_capacity(2 + line.len()); let marker = ">"; - let total_allocation = marker.len() + line.len() + 1; - let mut result = String::with_capacity(total_allocation); result.push_str(marker); if !blank { - let blank_str = " "; - result.push_str(blank_str); + result.push_str(" "); } result.push_str(line); result diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index eb6e39f4..18d529da 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -27,9 +27,9 @@ use regex::{Captures, Regex, RegexBuilder}; #[derive(Debug)] enum Join { - True, - False, - Number(usize), + Break, + HTMLComment, + Lines(usize), } pub struct State<'a> { @@ -370,7 +370,7 @@ impl<'a> State<'a> { results.push_str(&self.handle(child, &Info::new("\n", "\n"), Some(parent))?); if let Some(next_child) = children_iter.peek() { - self.set_between(child, next_child, parent, &mut results); + self.join(child, next_child, parent, &mut results); } index += 1; @@ -381,41 +381,75 @@ impl<'a> State<'a> { Ok(results) } - fn set_between(&self, left: &Node, right: &Node, parent: &Node, results: &mut String) { - match self.join_defaults(left, right, parent) { - Join::Number(n) => { + fn join(&self, left: &Node, right: &Node, parent: &Node, results: &mut String) { + let joins: [Join; 2] = [ + self.join_defaults(left, right, parent), + self.tight_definition(left, right), + ]; + + let mut index = 0; + if self.options.tight_definitions { + index += 1; + } + + loop { + if let Join::Break = joins[index] { + results.push_str("\n\n"); + return; + } + + if let Join::Lines(n) = joins[index] { + if n == 1 { + results.push_str("\n\n"); + return; + } results.push_str("\n".repeat(1 + n).as_ref()); + return; } - Join::False => { + + if let Join::HTMLComment = joins[index] { results.push_str("\n\n\n\n"); + return; } - Join::True => results.push_str("\n\n"), + + if index == 0 { + break; + } + + index -= 1; + } + } + + fn tight_definition(&self, left: &Node, right: &Node) -> Join { + if matches!(left, Node::Definition(_)) && Self::matches((left, right)) { + return Join::Lines(0); } + Join::Break } fn join_defaults(&self, left: &Node, right: &Node, parent: &Node) -> Join { if let Node::Code(code) = right { if format_code_as_indented(code, self) && matches!(left, Node::List(_)) { - return Join::False; + return Join::HTMLComment; } if let Node::Code(code) = left { if format_code_as_indented(code, self) { - return Join::False; + return Join::HTMLComment; } } } if matches!(parent, Node::List(_) | Node::ListItem(_)) { - if matches!(left, Node::Paragraph(_)) && Self::matches((left, right)) - || matches!(right, Node::Definition(_)) - { - return Join::True; - } + if matches!(left, Node::Paragraph(_)) { + if Self::matches((left, right)) || matches!(right, Node::Definition(_)) { + return Join::Break; + } - if let Node::Heading(heading) = right { - if format_heading_as_setext(heading, self) { - return Join::True; + if let Node::Heading(heading) = right { + if format_heading_as_setext(heading, self) { + return Join::Break; + } } } @@ -428,13 +462,13 @@ impl<'a> State<'a> { }; if spread { - return Join::Number(1); + return Join::Lines(1); } - return Join::Number(0); + return Join::Lines(0); } - Join::True + Join::Break } fn matches(nodes: (&Node, &Node)) -> bool { @@ -442,6 +476,7 @@ impl<'a> State<'a> { nodes, (Node::Root(_), Node::Root(_)) | (Node::Blockquote(_), Node::Blockquote(_)) + | (Node::Definition(_), Node::Definition(_)) | (Node::FootnoteDefinition(_), Node::FootnoteDefinition(_)) | (Node::Heading(_), Node::Heading(_)) | (Node::List(_), Node::List(_)) diff --git a/mdast_util_to_markdown/tests/core.rs b/mdast_util_to_markdown/tests/core.rs index 6ea07d9b..15ba7125 100644 --- a/mdast_util_to_markdown/tests/core.rs +++ b/mdast_util_to_markdown/tests/core.rs @@ -1,10 +1,56 @@ +use markdown::mdast::Definition; use markdown::mdast::{Node, Paragraph, Root, Text, ThematicBreak}; -use mdast_util_to_markdown::to_markdown as to; - +use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; +use mdast_util_to_markdown::{to_markdown as to, Options}; use pretty_assertions::assert_eq; #[test] fn core() { + assert_eq!( + to_md_with_opts( + &Node::Root(Root { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }), + Node::Definition(Definition { + position: None, + url: String::new(), + title: None, + identifier: String::from("b"), + label: None + }), + Node::Definition(Definition { + position: None, + url: String::new(), + title: None, + identifier: String::from("c"), + label: None + }), + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("d"), + position: None + })], + position: None + }), + ], + position: None + }), + &Options { + tight_definitions: true, + ..Default::default() + } + ) + .unwrap(), + "a\n\n[b]: <>\n[c]: <>\n\nd\n", + "should support tight adjacent definitions when `tight_definitions: true`" + ); + assert_eq!( to(&Node::Root(Root { children: vec![ From 51bb56aa70b3642d1ab750ef40b430f2d23398d2 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Fri, 20 Sep 2024 18:59:33 +0300 Subject: [PATCH 52/73] Refactor join --- mdast_util_to_markdown/src/state.rs | 46 +++++++++++------------------ 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 18d529da..a2592b42 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -382,41 +382,31 @@ impl<'a> State<'a> { } fn join(&self, left: &Node, right: &Node, parent: &Node, results: &mut String) { - let joins: [Join; 2] = [ - self.join_defaults(left, right, parent), - self.tight_definition(left, right), - ]; - - let mut index = 0; if self.options.tight_definitions { - index += 1; + Self::set_between(&self.tight_definition(left, right), results) + } else { + Self::set_between(&self.join_defaults(left, right, parent), results) } + } - loop { - if let Join::Break = joins[index] { - results.push_str("\n\n"); - return; - } - - if let Join::Lines(n) = joins[index] { - if n == 1 { - results.push_str("\n\n"); - return; - } - results.push_str("\n".repeat(1 + n).as_ref()); - return; - } + fn set_between(join: &Join, results: &mut String) -> () { + if let Join::Break = join { + results.push_str("\n\n"); + return; + } - if let Join::HTMLComment = joins[index] { - results.push_str("\n\n\n\n"); + if let Join::Lines(n) = join { + if *n == 1 { + results.push_str("\n\n"); return; } + results.push_str("\n".repeat(1 + n).as_ref()); + return; + } - if index == 0 { - break; - } - - index -= 1; + if let Join::HTMLComment = join { + results.push_str("\n\n\n\n"); + return; } } From 8ff33fd1b24493c3b90b86aa6507b384cee5ed70 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sun, 22 Sep 2024 22:34:24 +0300 Subject: [PATCH 53/73] Refactor message --- .../src/handle/blockquote.rs | 6 +++-- mdast_util_to_markdown/src/handle/break.rs | 6 +++-- mdast_util_to_markdown/src/handle/code.rs | 6 +++-- .../src/handle/definition.rs | 6 +++-- mdast_util_to_markdown/src/handle/emphasis.rs | 6 +++-- mdast_util_to_markdown/src/handle/heading.rs | 6 +++-- mdast_util_to_markdown/src/handle/html.rs | 8 +++---- mdast_util_to_markdown/src/handle/image.rs | 6 +++-- .../src/handle/image_reference.rs | 6 +++-- .../src/handle/inline_code.rs | 10 ++++---- mdast_util_to_markdown/src/handle/link.rs | 6 +++-- .../src/handle/link_reference.rs | 6 +++-- mdast_util_to_markdown/src/handle/list.rs | 6 +++-- .../src/handle/list_item.rs | 6 +++-- mdast_util_to_markdown/src/handle/mod.rs | 4 ++-- .../src/handle/paragraph.rs | 6 +++-- mdast_util_to_markdown/src/handle/root.rs | 8 +++---- mdast_util_to_markdown/src/handle/strong.rs | 6 +++-- mdast_util_to_markdown/src/handle/text.rs | 6 +++-- .../src/handle/thematic_break.rs | 6 +++-- mdast_util_to_markdown/src/lib.rs | 4 +--- mdast_util_to_markdown/src/message.rs | 24 ------------------- mdast_util_to_markdown/src/state.rs | 14 +++++++---- .../src/util/check_bullet.rs | 8 +++++-- .../src/util/check_bullet_ordered.rs | 8 +++++-- .../src/util/check_bullet_other.rs | 11 +++++++-- .../src/util/check_emphasis.rs | 8 +++++-- .../src/util/check_fence.rs | 8 +++++-- .../src/util/check_quote.rs | 8 +++++-- mdast_util_to_markdown/src/util/check_rule.rs | 8 +++++-- .../src/util/check_rule_repetition.rs | 8 +++++-- .../src/util/check_strong.rs | 8 +++++-- mdast_util_to_markdown/tests/emphasis.rs | 18 -------------- mdast_util_to_markdown/tests/strong.rs | 15 ------------ .../tests/thematic_break.rs | 24 ------------------- 35 files changed, 147 insertions(+), 153 deletions(-) delete mode 100644 mdast_util_to_markdown/src/message.rs diff --git a/mdast_util_to_markdown/src/handle/blockquote.rs b/mdast_util_to_markdown/src/handle/blockquote.rs index 46ebad82..68f4386f 100644 --- a/mdast_util_to_markdown/src/handle/blockquote.rs +++ b/mdast_util_to_markdown/src/handle/blockquote.rs @@ -1,9 +1,11 @@ use alloc::string::String; -use markdown::mdast::{Blockquote, Node}; +use markdown::{ + mdast::{Blockquote, Node}, + message::Message, +}; use crate::{ construct_name::ConstructName, - message::Message, state::{Info, State}, }; diff --git a/mdast_util_to_markdown/src/handle/break.rs b/mdast_util_to_markdown/src/handle/break.rs index 07b81739..95cef5a0 100644 --- a/mdast_util_to_markdown/src/handle/break.rs +++ b/mdast_util_to_markdown/src/handle/break.rs @@ -1,8 +1,10 @@ use alloc::string::ToString; -use markdown::mdast::{Break, Node}; +use markdown::{ + mdast::{Break, Node}, + message::Message, +}; use crate::{ - message::Message, state::{Info, State}, util::pattern_in_scope::pattern_in_scope, }; diff --git a/mdast_util_to_markdown/src/handle/code.rs b/mdast_util_to_markdown/src/handle/code.rs index 842d602b..d4d74e7a 100644 --- a/mdast_util_to_markdown/src/handle/code.rs +++ b/mdast_util_to_markdown/src/handle/code.rs @@ -2,11 +2,13 @@ use alloc::{ format, string::{String, ToString}, }; -use markdown::mdast::{Code, Node}; +use markdown::{ + mdast::{Code, Node}, + message::Message, +}; use crate::{ construct_name::ConstructName, - message::Message, state::{Info, State}, util::{ check_fence::check_fence, format_code_as_indented::format_code_as_indented, diff --git a/mdast_util_to_markdown/src/handle/definition.rs b/mdast_util_to_markdown/src/handle/definition.rs index a553448a..17d7c974 100644 --- a/mdast_util_to_markdown/src/handle/definition.rs +++ b/mdast_util_to_markdown/src/handle/definition.rs @@ -1,9 +1,11 @@ use alloc::string::String; -use markdown::mdast::{Definition, Node}; +use markdown::{ + mdast::{Definition, Node}, + message::Message, +}; use crate::{ construct_name::ConstructName, - message::Message, state::{Info, State}, util::{check_quote::check_quote, safe::SafeConfig}, }; diff --git a/mdast_util_to_markdown/src/handle/emphasis.rs b/mdast_util_to_markdown/src/handle/emphasis.rs index e0864785..56c0184e 100644 --- a/mdast_util_to_markdown/src/handle/emphasis.rs +++ b/mdast_util_to_markdown/src/handle/emphasis.rs @@ -1,9 +1,11 @@ use alloc::format; -use markdown::mdast::{Emphasis, Node}; +use markdown::{ + mdast::{Emphasis, Node}, + message::Message, +}; use crate::{ construct_name::ConstructName, - message::Message, state::{Info, State}, util::check_emphasis::check_emphasis, }; diff --git a/mdast_util_to_markdown/src/handle/heading.rs b/mdast_util_to_markdown/src/handle/heading.rs index 6e547482..e78553c6 100644 --- a/mdast_util_to_markdown/src/handle/heading.rs +++ b/mdast_util_to_markdown/src/handle/heading.rs @@ -1,9 +1,11 @@ use alloc::format; -use markdown::mdast::{Heading, Node}; +use markdown::{ + mdast::{Heading, Node}, + message::Message, +}; use crate::{ construct_name::ConstructName, - message::Message, state::{Info, State}, util::format_heading_as_setext::format_heading_as_setext, }; diff --git a/mdast_util_to_markdown/src/handle/html.rs b/mdast_util_to_markdown/src/handle/html.rs index 32ed6bd9..35dceee4 100644 --- a/mdast_util_to_markdown/src/handle/html.rs +++ b/mdast_util_to_markdown/src/handle/html.rs @@ -1,10 +1,10 @@ -use markdown::mdast::{Html, Node}; - -use crate::{ +use markdown::{ + mdast::{Html, Node}, message::Message, - state::{Info, State}, }; +use crate::state::{Info, State}; + use super::Handle; impl Handle for Html { diff --git a/mdast_util_to_markdown/src/handle/image.rs b/mdast_util_to_markdown/src/handle/image.rs index 0a1fc4e7..c7a4874e 100644 --- a/mdast_util_to_markdown/src/handle/image.rs +++ b/mdast_util_to_markdown/src/handle/image.rs @@ -1,9 +1,11 @@ use alloc::string::String; -use markdown::mdast::{Image, Node}; +use markdown::{ + mdast::{Image, Node}, + message::Message, +}; use crate::{ construct_name::ConstructName, - message::Message, state::{Info, State}, util::{check_quote::check_quote, safe::SafeConfig}, }; diff --git a/mdast_util_to_markdown/src/handle/image_reference.rs b/mdast_util_to_markdown/src/handle/image_reference.rs index d3fd3e96..6de5d0c4 100644 --- a/mdast_util_to_markdown/src/handle/image_reference.rs +++ b/mdast_util_to_markdown/src/handle/image_reference.rs @@ -1,11 +1,13 @@ use core::mem; use alloc::string::String; -use markdown::mdast::{ImageReference, Node, ReferenceKind}; +use markdown::{ + mdast::{ImageReference, Node, ReferenceKind}, + message::Message, +}; use crate::{ construct_name::ConstructName, - message::Message, state::{Info, State}, util::safe::SafeConfig, }; diff --git a/mdast_util_to_markdown/src/handle/inline_code.rs b/mdast_util_to_markdown/src/handle/inline_code.rs index e8a2d78e..3631e1a0 100644 --- a/mdast_util_to_markdown/src/handle/inline_code.rs +++ b/mdast_util_to_markdown/src/handle/inline_code.rs @@ -1,11 +1,11 @@ use alloc::{format, string::String}; -use markdown::mdast::{InlineCode, Node}; -use regex::Regex; - -use crate::{ +use markdown::{ + mdast::{InlineCode, Node}, message::Message, - state::{Info, State}, }; +use regex::Regex; + +use crate::state::{Info, State}; use super::Handle; diff --git a/mdast_util_to_markdown/src/handle/link.rs b/mdast_util_to_markdown/src/handle/link.rs index 81afe716..bd576b95 100644 --- a/mdast_util_to_markdown/src/handle/link.rs +++ b/mdast_util_to_markdown/src/handle/link.rs @@ -1,11 +1,13 @@ use core::mem; use alloc::string::String; -use markdown::mdast::{Link, Node}; +use markdown::{ + mdast::{Link, Node}, + message::Message, +}; use crate::{ construct_name::ConstructName, - message::Message, state::{Info, State}, util::{ check_quote::check_quote, format_link_as_auto_link::format_link_as_auto_link, diff --git a/mdast_util_to_markdown/src/handle/link_reference.rs b/mdast_util_to_markdown/src/handle/link_reference.rs index e63beb53..becc53bf 100644 --- a/mdast_util_to_markdown/src/handle/link_reference.rs +++ b/mdast_util_to_markdown/src/handle/link_reference.rs @@ -1,11 +1,13 @@ use core::mem; use alloc::string::String; -use markdown::mdast::{LinkReference, Node, ReferenceKind}; +use markdown::{ + mdast::{LinkReference, Node, ReferenceKind}, + message::Message, +}; use crate::{ construct_name::ConstructName, - message::Message, state::{Info, State}, util::safe::SafeConfig, }; diff --git a/mdast_util_to_markdown/src/handle/list.rs b/mdast_util_to_markdown/src/handle/list.rs index e58b201f..baab1a42 100644 --- a/mdast_util_to_markdown/src/handle/list.rs +++ b/mdast_util_to_markdown/src/handle/list.rs @@ -1,8 +1,10 @@ -use markdown::mdast::{List, Node}; +use markdown::{ + mdast::{List, Node}, + message::Message, +}; use crate::{ construct_name::ConstructName, - message::Message, state::{Info, State}, util::{ check_bullet::check_bullet, check_bullet_ordered::check_bullet_ordered, diff --git a/mdast_util_to_markdown/src/handle/list_item.rs b/mdast_util_to_markdown/src/handle/list_item.rs index c9f0a468..0b00d1e3 100644 --- a/mdast_util_to_markdown/src/handle/list_item.rs +++ b/mdast_util_to_markdown/src/handle/list_item.rs @@ -2,12 +2,14 @@ use alloc::{ format, string::{String, ToString}, }; -use markdown::mdast::{ListItem, Node}; +use markdown::{ + mdast::{ListItem, Node}, + message::Message, +}; use crate::{ configure::IndentOptions, construct_name::ConstructName, - message::Message, state::{Info, State}, util::check_bullet::check_bullet, }; diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index debca631..74708e77 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -1,6 +1,6 @@ -use crate::{message::Message, state::Info, State}; +use crate::{state::Info, State}; use alloc::string::String; -use markdown::mdast::Node; +use markdown::{mdast::Node, message::Message}; mod blockquote; mod r#break; diff --git a/mdast_util_to_markdown/src/handle/paragraph.rs b/mdast_util_to_markdown/src/handle/paragraph.rs index 1ada03b9..47a09669 100644 --- a/mdast_util_to_markdown/src/handle/paragraph.rs +++ b/mdast_util_to_markdown/src/handle/paragraph.rs @@ -1,8 +1,10 @@ -use markdown::mdast::{Node, Paragraph}; +use markdown::{ + mdast::{Node, Paragraph}, + message::Message, +}; use crate::{ construct_name::ConstructName, - message::Message, state::{Info, State}, }; diff --git a/mdast_util_to_markdown/src/handle/root.rs b/mdast_util_to_markdown/src/handle/root.rs index c3ce928c..4addfaf2 100644 --- a/mdast_util_to_markdown/src/handle/root.rs +++ b/mdast_util_to_markdown/src/handle/root.rs @@ -1,11 +1,11 @@ use alloc::string::String; -use markdown::mdast::{Node, Root}; - -use crate::{ +use markdown::{ + mdast::{Node, Root}, message::Message, - state::{Info, State}, }; +use crate::state::{Info, State}; + use super::Handle; impl Handle for Root { diff --git a/mdast_util_to_markdown/src/handle/strong.rs b/mdast_util_to_markdown/src/handle/strong.rs index 928f343b..c765715c 100644 --- a/mdast_util_to_markdown/src/handle/strong.rs +++ b/mdast_util_to_markdown/src/handle/strong.rs @@ -1,9 +1,11 @@ use alloc::format; -use markdown::mdast::{Node, Strong}; +use markdown::{ + mdast::{Node, Strong}, + message::Message, +}; use crate::{ construct_name::ConstructName, - message::Message, state::{Info, State}, util::check_strong::check_strong, }; diff --git a/mdast_util_to_markdown/src/handle/text.rs b/mdast_util_to_markdown/src/handle/text.rs index 8decb542..61b04fba 100644 --- a/mdast_util_to_markdown/src/handle/text.rs +++ b/mdast_util_to_markdown/src/handle/text.rs @@ -1,7 +1,9 @@ -use markdown::mdast::{Node, Text}; +use markdown::{ + mdast::{Node, Text}, + message::Message, +}; use crate::{ - message::Message, state::{Info, State}, util::safe::SafeConfig, }; diff --git a/mdast_util_to_markdown/src/handle/thematic_break.rs b/mdast_util_to_markdown/src/handle/thematic_break.rs index 23f98fda..a0f1c4e4 100644 --- a/mdast_util_to_markdown/src/handle/thematic_break.rs +++ b/mdast_util_to_markdown/src/handle/thematic_break.rs @@ -1,8 +1,10 @@ use alloc::format; -use markdown::mdast::{Node, ThematicBreak}; +use markdown::{ + mdast::{Node, ThematicBreak}, + message::Message, +}; use crate::{ - message::Message, state::{Info, State}, util::{check_rule::check_rule, check_rule_repetition::check_rule_repetition}, }; diff --git a/mdast_util_to_markdown/src/lib.rs b/mdast_util_to_markdown/src/lib.rs index c11e5205..d7ce4501 100644 --- a/mdast_util_to_markdown/src/lib.rs +++ b/mdast_util_to_markdown/src/lib.rs @@ -2,8 +2,7 @@ use alloc::string::String; pub use configure::{IndentOptions, Options}; -use markdown::mdast::Node; -use message::Message; +use markdown::{mdast::Node, message::Message}; use state::{Info, State}; extern crate alloc; @@ -11,7 +10,6 @@ mod association; mod configure; mod construct_name; mod handle; -mod message; mod state; mod r#unsafe; mod util; diff --git a/mdast_util_to_markdown/src/message.rs b/mdast_util_to_markdown/src/message.rs deleted file mode 100644 index c3f39447..00000000 --- a/mdast_util_to_markdown/src/message.rs +++ /dev/null @@ -1,24 +0,0 @@ -use core::{error::Error, fmt::Display}; - -use alloc::string::{String, ToString}; - -#[derive(Debug, PartialEq)] -pub struct Message { - pub reason: String, -} - -impl Error for Message {} - -impl Display for Message { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "{}", self.reason) - } -} - -impl From<&str> for Message { - fn from(value: &str) -> Self { - Message { - reason: value.to_string(), - } - } -} diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index a2592b42..d84e67c9 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -9,7 +9,6 @@ use crate::handle::link::peek_link; use crate::handle::link_reference::peek_link_reference; use crate::handle::strong::peek_strong; use crate::handle::Handle; -use crate::message::Message; use crate::Options; use crate::{ r#unsafe::Unsafe, @@ -20,9 +19,11 @@ use crate::{ safe::{escape_backslashes, EscapeInfos, SafeConfig}, }, }; +use alloc::boxed::Box; use alloc::string::ToString; use alloc::{collections::BTreeMap, format, string::String, vec::Vec}; use markdown::mdast::Node; +use markdown::message::Message; use regex::{Captures, Regex, RegexBuilder}; #[derive(Debug)] @@ -100,7 +101,12 @@ impl<'a> State<'a> { image_reference.handle(self, info, parent, node) } Node::LinkReference(link_reference) => link_reference.handle(self, info, parent, node), - _ => Err("Cannot handle node".into()), + _ => Err(Message { + reason: format!("Can't handle node",), + rule_id: Box::new("unexpected-marker".into()), + source: Box::new("mdast-util-to_markdown".into()), + place: None, + }), } } @@ -370,7 +376,7 @@ impl<'a> State<'a> { results.push_str(&self.handle(child, &Info::new("\n", "\n"), Some(parent))?); if let Some(next_child) = children_iter.peek() { - self.join(child, next_child, parent, &mut results); + self.betweenn(child, next_child, parent, &mut results); } index += 1; @@ -381,7 +387,7 @@ impl<'a> State<'a> { Ok(results) } - fn join(&self, left: &Node, right: &Node, parent: &Node, results: &mut String) { + fn betweenn(&self, left: &Node, right: &Node, parent: &Node, results: &mut String) { if self.options.tight_definitions { Self::set_between(&self.tight_definition(left, right), results) } else { diff --git a/mdast_util_to_markdown/src/util/check_bullet.rs b/mdast_util_to_markdown/src/util/check_bullet.rs index a2379f2f..726949b6 100644 --- a/mdast_util_to_markdown/src/util/check_bullet.rs +++ b/mdast_util_to_markdown/src/util/check_bullet.rs @@ -1,6 +1,7 @@ -use alloc::format; +use alloc::{boxed::Box, format}; +use markdown::message::Message; -use crate::{message::Message, state::State}; +use crate::state::State; pub fn check_bullet(state: &mut State) -> Result { let marker = state.options.bullet; @@ -11,6 +12,9 @@ pub fn check_bullet(state: &mut State) -> Result { "Cannot serialize items with `' {} '` for `options.bullet`, expected `*`, `+`, or `-`", marker ), + rule_id: Box::new("unexpected-marker".into()), + source: Box::new("mdast-util-to_markdown".into()), + place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_bullet_ordered.rs b/mdast_util_to_markdown/src/util/check_bullet_ordered.rs index 4057f9c7..4b26e009 100644 --- a/mdast_util_to_markdown/src/util/check_bullet_ordered.rs +++ b/mdast_util_to_markdown/src/util/check_bullet_ordered.rs @@ -1,6 +1,7 @@ -use alloc::format; +use alloc::{boxed::Box, format}; +use markdown::message::Message; -use crate::{message::Message, state::State}; +use crate::state::State; pub fn check_bullet_ordered(state: &mut State) -> Result { let marker = state.options.bullet_ordered; @@ -11,6 +12,9 @@ pub fn check_bullet_ordered(state: &mut State) -> Result { "Cannot serialize items with `' {} '` for `options.bullet_ordered`, expected `.` or `)`", marker ), + rule_id: Box::new("unexpected-marker".into()), + source: Box::new("mdast-util-to_markdown".into()), + place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_bullet_other.rs b/mdast_util_to_markdown/src/util/check_bullet_other.rs index 06409422..3421361b 100644 --- a/mdast_util_to_markdown/src/util/check_bullet_other.rs +++ b/mdast_util_to_markdown/src/util/check_bullet_other.rs @@ -1,6 +1,7 @@ -use alloc::format; +use alloc::{boxed::Box, format}; +use markdown::message::Message; -use crate::{message::Message, state::State}; +use crate::state::State; use super::check_bullet::check_bullet; @@ -14,6 +15,9 @@ pub fn check_bullet_other(state: &mut State) -> Result { "Cannot serialize items with `' {} '` for `options.bullet_other`, expected `*`, `+`, or `-`", bullet_other ), + rule_id: Box::new("unexpected-marker".into()), + source: Box::new("mdast-util-to_markdown".into()), + place: None, }); } @@ -23,6 +27,9 @@ pub fn check_bullet_other(state: &mut State) -> Result { "Expected `bullet` (`' {} '`) and `bullet_other` (`' {} '`) to be different", bullet, bullet_other ), + rule_id: Box::new("bullet-match-bullet_other".into()), + source: Box::new("mdast-util-to_markdown".into()), + place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_emphasis.rs b/mdast_util_to_markdown/src/util/check_emphasis.rs index c8f7856e..3bc05aa5 100644 --- a/mdast_util_to_markdown/src/util/check_emphasis.rs +++ b/mdast_util_to_markdown/src/util/check_emphasis.rs @@ -1,6 +1,7 @@ -use alloc::format; +use alloc::{boxed::Box, format}; +use markdown::message::Message; -use crate::{message::Message, state::State}; +use crate::state::State; pub fn check_emphasis(state: &State) -> Result { let marker = state.options.emphasis; @@ -11,6 +12,9 @@ pub fn check_emphasis(state: &State) -> Result { "Cannot serialize emphasis with `{}` for `options.emphasis`, expected `*`, or `_`", marker ), + rule_id: Box::new("unexpected-marker".into()), + source: Box::new("mdast-util-to_markdown".into()), + place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_fence.rs b/mdast_util_to_markdown/src/util/check_fence.rs index f7d03c11..1fe2df0e 100644 --- a/mdast_util_to_markdown/src/util/check_fence.rs +++ b/mdast_util_to_markdown/src/util/check_fence.rs @@ -1,6 +1,7 @@ -use alloc::format; +use alloc::{boxed::Box, format}; +use markdown::message::Message; -use crate::{message::Message, state::State}; +use crate::state::State; pub fn check_fence(state: &mut State) -> Result { let marker = state.options.fence; @@ -11,6 +12,9 @@ pub fn check_fence(state: &mut State) -> Result { "Cannot serialize code with `{}` for `options.fence`, expected `` ` `` or `~`", marker ), + rule_id: Box::new("unexpected-marker".into()), + source: Box::new("mdast-util-to_markdown".into()), + place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_quote.rs b/mdast_util_to_markdown/src/util/check_quote.rs index ed3bd04d..0b88dd8c 100644 --- a/mdast_util_to_markdown/src/util/check_quote.rs +++ b/mdast_util_to_markdown/src/util/check_quote.rs @@ -1,6 +1,7 @@ -use alloc::format; +use alloc::{boxed::Box, format}; +use markdown::message::Message; -use crate::{message::Message, state::State}; +use crate::state::State; pub fn check_quote(state: &State) -> Result { let marker = state.options.quote; @@ -11,6 +12,9 @@ pub fn check_quote(state: &State) -> Result { "Cannot serialize title with `' {} '` for `options.quote`, expected `\"`, or `'`", marker ), + rule_id: Box::new("unexpected-marker".into()), + source: Box::new("mdast-util-to_markdown".into()), + place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_rule.rs b/mdast_util_to_markdown/src/util/check_rule.rs index 59e3a667..dfc77a48 100644 --- a/mdast_util_to_markdown/src/util/check_rule.rs +++ b/mdast_util_to_markdown/src/util/check_rule.rs @@ -1,6 +1,7 @@ -use alloc::format; +use alloc::{boxed::Box, format}; +use markdown::message::Message; -use crate::{message::Message, state::State}; +use crate::state::State; pub fn check_rule(state: &State) -> Result { let marker = state.options.rule; @@ -11,6 +12,9 @@ pub fn check_rule(state: &State) -> Result { "Cannot serialize rules with `{}` for `options.rule`, expected `*`, `-`, or `_`", marker ), + rule_id: Box::new("unexpected-marker".into()), + source: Box::new("mdast-util-to_markdown".into()), + place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_rule_repetition.rs b/mdast_util_to_markdown/src/util/check_rule_repetition.rs index 15a0d158..ccb0fe9a 100644 --- a/mdast_util_to_markdown/src/util/check_rule_repetition.rs +++ b/mdast_util_to_markdown/src/util/check_rule_repetition.rs @@ -1,6 +1,7 @@ -use alloc::format; +use alloc::{boxed::Box, format}; +use markdown::message::Message; -use crate::{message::Message, state::State}; +use crate::state::State; pub fn check_rule_repetition(state: &State) -> Result { let repetition = state.options.rule_repetition; @@ -11,6 +12,9 @@ pub fn check_rule_repetition(state: &State) -> Result { "Cannot serialize rules with repetition `{}` for `options.rule_repetition`, expected `3` or more", repetition ), + rule_id: Box::new("unexpected-marker".into()), + source: Box::new("mdast-util-to_markdown".into()), + place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_strong.rs b/mdast_util_to_markdown/src/util/check_strong.rs index 622ad94e..28a50296 100644 --- a/mdast_util_to_markdown/src/util/check_strong.rs +++ b/mdast_util_to_markdown/src/util/check_strong.rs @@ -1,6 +1,7 @@ -use alloc::format; +use alloc::{boxed::Box, format}; +use markdown::message::Message; -use crate::{message::Message, state::State}; +use crate::state::State; pub fn check_strong(state: &State) -> Result { let marker = state.options.strong; @@ -11,6 +12,9 @@ pub fn check_strong(state: &State) -> Result { "Cannot serialize strong with `{}` for `options.strong`, expected `*`, or `_`", marker ), + rule_id: Box::new("unexpected-marker".into()), + source: Box::new("mdast-util-to_markdown".into()), + place: None, }); } diff --git a/mdast_util_to_markdown/tests/emphasis.rs b/mdast_util_to_markdown/tests/emphasis.rs index be39cffc..dcc18344 100644 --- a/mdast_util_to_markdown/tests/emphasis.rs +++ b/mdast_util_to_markdown/tests/emphasis.rs @@ -18,24 +18,6 @@ fn emphasis() { "should support an empty emphasis" ); - assert_eq!( - to_md_with_opts( - &Node::Emphasis(Emphasis { - children: Vec::new(), - position: None - }), - &Options { - emphasis: '?', - ..Default::default() - } - ), - Err( - "Cannot serialize emphasis with `?` for `options.emphasis`, expected `*`, or `_`" - .into() - ), - "should throw on when given an incorrect `emphasis`" - ); - assert_eq!( to(&Node::Emphasis(Emphasis { children: vec![Node::Text(Text { diff --git a/mdast_util_to_markdown/tests/strong.rs b/mdast_util_to_markdown/tests/strong.rs index ce75d926..f4e8491d 100644 --- a/mdast_util_to_markdown/tests/strong.rs +++ b/mdast_util_to_markdown/tests/strong.rs @@ -17,21 +17,6 @@ fn strong() { "should support an empty strong" ); - assert_eq!( - to_md_with_opts( - &Node::Strong(Strong { - children: Vec::new(), - position: None - }), - &Options { - strong: '?', - ..Default::default() - } - ), - Err("Cannot serialize strong with `?` for `options.strong`, expected `*`, or `_`".into()), - "should throw on when given an incorrect `strong`" - ); - assert_eq!( to(&Node::Strong(Strong { children: vec![Node::Text(Text { diff --git a/mdast_util_to_markdown/tests/thematic_break.rs b/mdast_util_to_markdown/tests/thematic_break.rs index 9b6cacee..b1e4df4d 100644 --- a/mdast_util_to_markdown/tests/thematic_break.rs +++ b/mdast_util_to_markdown/tests/thematic_break.rs @@ -39,18 +39,6 @@ fn thematic_break() { "should support a thematic break w/ underscores when `rule: \"_\"`" ); - assert_eq!( - to_md_with_opts( - &Node::ThematicBreak(ThematicBreak { position: None }), - &Options { - rule: '.', - ..Default::default() - } - ), - Err("Cannot serialize rules with `.` for `options.rule`, expected `*`, `-`, or `_`".into()), - "should throw on when given an incorrect `rule`" - ); - assert_eq!( to_md_with_opts( &Node::ThematicBreak(ThematicBreak { position: None }), @@ -64,18 +52,6 @@ fn thematic_break() { "should support a thematic break w/ more repetitions w/ `rule_repetition`" ); - assert_eq!( - to_md_with_opts( - &Node::ThematicBreak(ThematicBreak { position: None }), - &Options { - rule_repetition: 2, - ..Default::default() - } - ), - Err("Cannot serialize rules with repetition `2` for `options.rule_repetition`, expected `3` or more".into()), - "should throw on when given an incorrect `ruleRepetition`" - ); - assert_eq!( to_md_with_opts( &Node::ThematicBreak(ThematicBreak { position: None }), From 216ffd674dd048b1cc97fb7bd56496d16ea19578 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sun, 22 Sep 2024 22:38:04 +0300 Subject: [PATCH 54/73] Update the message source --- mdast_util_to_markdown/src/state.rs | 4 ++-- mdast_util_to_markdown/src/util/check_bullet.rs | 2 +- mdast_util_to_markdown/src/util/check_bullet_ordered.rs | 2 +- mdast_util_to_markdown/src/util/check_bullet_other.rs | 2 +- mdast_util_to_markdown/src/util/check_emphasis.rs | 2 +- mdast_util_to_markdown/src/util/check_fence.rs | 2 +- mdast_util_to_markdown/src/util/check_quote.rs | 2 +- mdast_util_to_markdown/src/util/check_rule.rs | 2 +- mdast_util_to_markdown/src/util/check_rule_repetition.rs | 2 +- mdast_util_to_markdown/src/util/check_strong.rs | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index d84e67c9..1b375489 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -103,8 +103,8 @@ impl<'a> State<'a> { Node::LinkReference(link_reference) => link_reference.handle(self, info, parent, node), _ => Err(Message { reason: format!("Can't handle node",), - rule_id: Box::new("unexpected-marker".into()), - source: Box::new("mdast-util-to_markdown".into()), + rule_id: Box::new("unexpected-node".into()), + source: Box::new("mdast-util-to-markdown".into()), place: None, }), } diff --git a/mdast_util_to_markdown/src/util/check_bullet.rs b/mdast_util_to_markdown/src/util/check_bullet.rs index 726949b6..3c7fc623 100644 --- a/mdast_util_to_markdown/src/util/check_bullet.rs +++ b/mdast_util_to_markdown/src/util/check_bullet.rs @@ -13,7 +13,7 @@ pub fn check_bullet(state: &mut State) -> Result { marker ), rule_id: Box::new("unexpected-marker".into()), - source: Box::new("mdast-util-to_markdown".into()), + source: Box::new("mdast-util-to-markdown".into()), place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_bullet_ordered.rs b/mdast_util_to_markdown/src/util/check_bullet_ordered.rs index 4b26e009..1d5bd460 100644 --- a/mdast_util_to_markdown/src/util/check_bullet_ordered.rs +++ b/mdast_util_to_markdown/src/util/check_bullet_ordered.rs @@ -13,7 +13,7 @@ pub fn check_bullet_ordered(state: &mut State) -> Result { marker ), rule_id: Box::new("unexpected-marker".into()), - source: Box::new("mdast-util-to_markdown".into()), + source: Box::new("mdast-util-to-markdown".into()), place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_bullet_other.rs b/mdast_util_to_markdown/src/util/check_bullet_other.rs index 3421361b..18b3c41e 100644 --- a/mdast_util_to_markdown/src/util/check_bullet_other.rs +++ b/mdast_util_to_markdown/src/util/check_bullet_other.rs @@ -16,7 +16,7 @@ pub fn check_bullet_other(state: &mut State) -> Result { bullet_other ), rule_id: Box::new("unexpected-marker".into()), - source: Box::new("mdast-util-to_markdown".into()), + source: Box::new("mdast-util-to-markdown".into()), place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_emphasis.rs b/mdast_util_to_markdown/src/util/check_emphasis.rs index 3bc05aa5..e395f2dc 100644 --- a/mdast_util_to_markdown/src/util/check_emphasis.rs +++ b/mdast_util_to_markdown/src/util/check_emphasis.rs @@ -13,7 +13,7 @@ pub fn check_emphasis(state: &State) -> Result { marker ), rule_id: Box::new("unexpected-marker".into()), - source: Box::new("mdast-util-to_markdown".into()), + source: Box::new("mdast-util-to-markdown".into()), place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_fence.rs b/mdast_util_to_markdown/src/util/check_fence.rs index 1fe2df0e..fc11c9e1 100644 --- a/mdast_util_to_markdown/src/util/check_fence.rs +++ b/mdast_util_to_markdown/src/util/check_fence.rs @@ -13,7 +13,7 @@ pub fn check_fence(state: &mut State) -> Result { marker ), rule_id: Box::new("unexpected-marker".into()), - source: Box::new("mdast-util-to_markdown".into()), + source: Box::new("mdast-util-to-markdown".into()), place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_quote.rs b/mdast_util_to_markdown/src/util/check_quote.rs index 0b88dd8c..ac2c96bb 100644 --- a/mdast_util_to_markdown/src/util/check_quote.rs +++ b/mdast_util_to_markdown/src/util/check_quote.rs @@ -13,7 +13,7 @@ pub fn check_quote(state: &State) -> Result { marker ), rule_id: Box::new("unexpected-marker".into()), - source: Box::new("mdast-util-to_markdown".into()), + source: Box::new("mdast-util-to-markdown".into()), place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_rule.rs b/mdast_util_to_markdown/src/util/check_rule.rs index dfc77a48..beba0efe 100644 --- a/mdast_util_to_markdown/src/util/check_rule.rs +++ b/mdast_util_to_markdown/src/util/check_rule.rs @@ -13,7 +13,7 @@ pub fn check_rule(state: &State) -> Result { marker ), rule_id: Box::new("unexpected-marker".into()), - source: Box::new("mdast-util-to_markdown".into()), + source: Box::new("mdast-util-to-markdown".into()), place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_rule_repetition.rs b/mdast_util_to_markdown/src/util/check_rule_repetition.rs index ccb0fe9a..ea3614b7 100644 --- a/mdast_util_to_markdown/src/util/check_rule_repetition.rs +++ b/mdast_util_to_markdown/src/util/check_rule_repetition.rs @@ -13,7 +13,7 @@ pub fn check_rule_repetition(state: &State) -> Result { repetition ), rule_id: Box::new("unexpected-marker".into()), - source: Box::new("mdast-util-to_markdown".into()), + source: Box::new("mdast-util-to-markdown".into()), place: None, }); } diff --git a/mdast_util_to_markdown/src/util/check_strong.rs b/mdast_util_to_markdown/src/util/check_strong.rs index 28a50296..eea2dd4b 100644 --- a/mdast_util_to_markdown/src/util/check_strong.rs +++ b/mdast_util_to_markdown/src/util/check_strong.rs @@ -13,7 +13,7 @@ pub fn check_strong(state: &State) -> Result { marker ), rule_id: Box::new("unexpected-marker".into()), - source: Box::new("mdast-util-to_markdown".into()), + source: Box::new("mdast-util-to-markdown".into()), place: None, }); } From 088fd7a043a222d4062996fbcdc1f4d5b5e61c4c Mon Sep 17 00:00:00 2001 From: Bnchi Date: Mon, 23 Sep 2024 00:49:06 +0300 Subject: [PATCH 55/73] Add more blockquote tests --- mdast_util_to_markdown/tests/blockquote.rs | 206 ++++++++++++++++++++- 1 file changed, 204 insertions(+), 2 deletions(-) diff --git a/mdast_util_to_markdown/tests/blockquote.rs b/mdast_util_to_markdown/tests/blockquote.rs index ab27a5b2..3990520d 100644 --- a/mdast_util_to_markdown/tests/blockquote.rs +++ b/mdast_util_to_markdown/tests/blockquote.rs @@ -1,6 +1,12 @@ -use markdown::mdast::{Blockquote, Node, Paragraph, Text, ThematicBreak}; +use markdown::mdast::Definition; +use markdown::mdast::{ + Blockquote, Break, Code, Heading, InlineCode, Node, Paragraph, Text, ThematicBreak, +}; + use mdast_util_to_markdown::to_markdown as to; +use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; +use mdast_util_to_markdown::Options; use pretty_assertions::assert_eq; #[test] @@ -51,6 +57,202 @@ fn block_quote() { })) .unwrap(), "> a\n>\n> ***\n>\n> b\n", - "should support a block quote" + "should support a block quote w/ children" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + })], + position: None + }),], + position: None, + })) + .unwrap(), + "> a\n> b\n", + "should support text w/ a line ending in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: String::from("a"), + position: None + }), + Node::Text(Text { + value: String::from("b"), + position: None + }) + ], + position: None + }),], + position: None, + })) + .unwrap(), + "> ab\n", + "should support adjacent texts in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + })], + position: None + }), + Node::Blockquote(Blockquote { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: String::from("a\n"), + position: None + }), + Node::InlineCode(InlineCode { + value: String::from("b\nc"), + position: None + }), + Node::Text(Text { + value: String::from("\nd"), + position: None + }), + ], + position: None + }), + Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a b"), + position: None + })], + position: None, + depth: 1 + }) + ], + position: None + }), + ], + position: None, + })) + .unwrap(), + "> a\n> b\n>\n> > a\n> > `b\n> > c`\n> > d\n> >\n> > # a b\n", + "should support a block quote in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: String::from("a"), + position: None + }), + Node::Break(Break { position: None }), + Node::Text(Text { + value: String::from("b"), + position: None + }) + ], + position: None + }),], + position: None, + })) + .unwrap(), + "> a\\\n> b\n", + "should support a break in a block quote" + ); + + assert_eq!( + to_md_with_opts( + &Node::Blockquote(Blockquote { + children: vec![Node::Code(Code { + value: String::from("a\nb\n\nc"), + position: None, + lang: None, + meta: None + })], + position: None, + }), + &Options { + fences: false, + ..Default::default() + } + ) + .unwrap(), + "> a\n> b\n>\n> c\n", + "should support code (flow, indented) in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![Node::Code(Code { + value: String::from("c\nd\n\ne"), + position: None, + lang: String::from("a\nb").into(), + meta: None + })], + position: None, + })) + .unwrap(), + "> ```a b\n> c\n> d\n>\n> e\n> ```\n", + "should support code (flow, fenced) in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: String::from("a\n"), + position: None + }), + Node::InlineCode(InlineCode { + value: String::from("b\nc"), + position: None + }), + Node::Text(Text { + value: String::from("\nd"), + position: None + }) + ], + position: None + })], + position: None, + })) + .unwrap(), + "> a\n> `b\n> c`\n> d\n", + "should support code (text) in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![ + Node::Definition(Definition { + position: None, + title: Some("e\nf".into()), + url: "c\nd".into(), + identifier: "a\nb".into(), + label: None + }), + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + })], + position: None + }) + ], + position: None, + })) + .unwrap(), + "> [a\n> b]: \"e\n> f\"\n>\n> a\n> b\n", + "should support a definition in a block quote" ); } From bc509ea0c7a98c9b01fbd0f7d8bfbf6dbbeae46c Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 25 Sep 2024 08:37:12 +0300 Subject: [PATCH 56/73] Complete blockquote tests --- mdast_util_to_markdown/src/state.rs | 6 +- mdast_util_to_markdown/tests/blockquote.rs | 331 ++++++++++++++++++++- mdast_util_to_markdown/tests/roundtrip.rs | 16 + 3 files changed, 348 insertions(+), 5 deletions(-) create mode 100644 mdast_util_to_markdown/tests/roundtrip.rs diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 1b375489..12bb3dbd 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -102,7 +102,7 @@ impl<'a> State<'a> { } Node::LinkReference(link_reference) => link_reference.handle(self, info, parent, node), _ => Err(Message { - reason: format!("Can't handle node",), + reason: format!("Can't handle node"), rule_id: Box::new("unexpected-node".into()), source: Box::new("mdast-util-to-markdown".into()), place: None, @@ -376,7 +376,7 @@ impl<'a> State<'a> { results.push_str(&self.handle(child, &Info::new("\n", "\n"), Some(parent))?); if let Some(next_child) = children_iter.peek() { - self.betweenn(child, next_child, parent, &mut results); + self.between(child, next_child, parent, &mut results); } index += 1; @@ -387,7 +387,7 @@ impl<'a> State<'a> { Ok(results) } - fn betweenn(&self, left: &Node, right: &Node, parent: &Node, results: &mut String) { + fn between(&self, left: &Node, right: &Node, parent: &Node, results: &mut String) { if self.options.tight_definitions { Self::set_between(&self.tight_definition(left, right), results) } else { diff --git a/mdast_util_to_markdown/tests/blockquote.rs b/mdast_util_to_markdown/tests/blockquote.rs index 3990520d..1de821e4 100644 --- a/mdast_util_to_markdown/tests/blockquote.rs +++ b/mdast_util_to_markdown/tests/blockquote.rs @@ -1,7 +1,8 @@ -use markdown::mdast::Definition; use markdown::mdast::{ - Blockquote, Break, Code, Heading, InlineCode, Node, Paragraph, Text, ThematicBreak, + Blockquote, Break, Code, Heading, Html, Image, ImageReference, InlineCode, Link, LinkReference, + List, ListItem, Node, Paragraph, ReferenceKind, Strong, Text, ThematicBreak, }; +use markdown::mdast::{Definition, Emphasis}; use mdast_util_to_markdown::to_markdown as to; use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; @@ -255,4 +256,330 @@ fn block_quote() { "> [a\n> b]: \"e\n> f\"\n>\n> a\n> b\n", "should support a definition in a block quote" ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: String::from("a\n"), + position: None + }), + Node::Emphasis(Emphasis { + children: vec![Node::Text(Text { + value: String::from("c\nd"), + position: None + }),], + position: None + }), + Node::Text(Text { + value: String::from("\nd"), + position: None + }), + ], + position: None + })], + position: None, + })) + .unwrap(), + "> a\n> *c\n> d*\n> d\n", + "should support an emphasis in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + }),], + position: None, + depth: 3 + })], + position: None, + })) + .unwrap(), + "> ### a b\n", + "should support a heading (atx) in a block quote" + ); + + assert_eq!( + to_md_with_opts( + &Node::Blockquote(Blockquote { + children: vec![Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + }),], + position: None, + depth: 1 + })], + position: None, + }), + &Options { + setext: true, + ..Default::default() + } + ) + .unwrap(), + "> a\n> b\n> =\n", + "should support a heading (setext) in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![Node::Html(Html { + value: String::from(""), + position: None + })], + position: None, + })) + .unwrap(), + "> hidden>\n", + "should support html (flow) in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: String::from("a"), + position: None + }), + Node::Html(Html { + value: String::from(""), + position: None + }), + Node::Text(Text { + value: String::from("\nb"), + position: None + }), + ], + position: None + })], + position: None, + })) + .unwrap(), + "> a hidden>\n> b\n", + "should support html (text) in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: String::from("a\n"), + position: None + }), + Node::Image(Image { + position: None, + alt: String::from("d\ne"), + url: String::from("b\nc"), + title: Some(String::from("f\ng")) + }), + Node::Text(Text { + value: String::from("\nh"), + position: None + }), + ], + position: None + })], + position: None, + })) + .unwrap(), + "> a\n> ![d\n> e]( \"f\n> g\")\n> h\n", + "should support an image (resource) in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: String::from("a\n"), + position: None + }), + Node::ImageReference(ImageReference { + position: None, + alt: String::from("b\nc"), + label: Some(String::from("d\ne")), + reference_kind: ReferenceKind::Collapsed, + identifier: String::from("f"), + }), + Node::Text(Text { + value: String::from("\ng"), + position: None + }), + ], + position: None + })], + position: None, + })) + .unwrap(), + "> a\n> ![b\n> c][d\n> e]\n> g\n", + "should support an image (reference) in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: String::from("a\n"), + position: None + }), + Node::Link(Link { + children: vec![Node::Text(Text { + value: String::from("d\ne"), + position: None + })], + position: None, + url: String::from("b\nc"), + title: Some(String::from("f\ng")) + }), + Node::Text(Text { + value: String::from("\nh"), + position: None + }), + ], + position: None + })], + position: None, + })) + .unwrap(), + "> a\n> [d\n> e]( \"f\n> g\")\n> h\n", + "should support a link (resource) in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: String::from("a\n"), + position: None + }), + Node::LinkReference(LinkReference { + children: vec![Node::Text(Text { + value: String::from("b\nc"), + position: None + }),], + position: None, + reference_kind: ReferenceKind::Collapsed, + identifier: String::from("f"), + label: Some(String::from("d\ne")) + }), + Node::Text(Text { + value: String::from("\ng"), + position: None + }), + ], + position: None + })], + position: None, + })) + .unwrap(), + "> a\n> [b\n> c][d\n> e]\n> g\n", + "should support a link (reference) in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a\nb"), + position: None + })], + position: None + }), + Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("c\nd"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::ThematicBreak(ThematicBreak { position: None })], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("e\nf"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + }), + ], + position: None, + ordered: false, + start: None, + spread: false + }) + ], + position: None, + })) + .unwrap(), + "> a\n> b\n>\n> - c\n> d\n> - ***\n> - e\n> f\n", + "should support a list in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: String::from("a\n"), + position: None + }), + Node::Strong(Strong { + children: vec![Node::Text(Text { + value: String::from("c\nd"), + position: None + })], + position: None + }), + Node::Text(Text { + value: String::from("\nd"), + position: None + }), + ], + position: None + })], + position: None, + })) + .unwrap(), + "> a\n> **c\n> d**\n> d\n", + "should support a strong in a block quote" + ); + + assert_eq!( + to(&Node::Blockquote(Blockquote { + children: vec![ + Node::ThematicBreak(ThematicBreak { position: None }), + Node::ThematicBreak(ThematicBreak { position: None }) + ], + position: None, + })) + .unwrap(), + "> ***\n>\n> ***\n", + "should support a thematic break in a block quote" + ); } diff --git a/mdast_util_to_markdown/tests/roundtrip.rs b/mdast_util_to_markdown/tests/roundtrip.rs new file mode 100644 index 00000000..ae8d89ba --- /dev/null +++ b/mdast_util_to_markdown/tests/roundtrip.rs @@ -0,0 +1,16 @@ +use mdast_util_to_markdown::to_markdown as to; + +use markdown::to_mdast as from; +use pretty_assertions::assert_eq; + +#[test] +fn round_trip() { + let doc: String = vec![ + "> * Lorem ipsum dolor sit amet", + ">", + "> * consectetur adipisicing elit", + "", + ] + .join("\n"); + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); +} From 9bfa391ca9027913278685ee89af2741e10dccb8 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 25 Sep 2024 09:49:36 +0300 Subject: [PATCH 57/73] Add tests for list item --- .../src/handle/list_item.rs | 11 +- mdast_util_to_markdown/src/state.rs | 6 +- mdast_util_to_markdown/tests/list_item.rs | 183 ++++++++++++++++++ 3 files changed, 196 insertions(+), 4 deletions(-) create mode 100644 mdast_util_to_markdown/tests/list_item.rs diff --git a/mdast_util_to_markdown/src/handle/list_item.rs b/mdast_util_to_markdown/src/handle/list_item.rs index 0b00d1e3..20463562 100644 --- a/mdast_util_to_markdown/src/handle/list_item.rs +++ b/mdast_util_to_markdown/src/handle/list_item.rs @@ -51,10 +51,14 @@ impl Handle for ListItem { } size = bullet.len() + 1; + } + + if matches!(list_item_indent, IndentOptions::Tab) || self.spread { + size = compute_size(size); + } - if matches!(list_item_indent, IndentOptions::Tab) - || (matches!(list_item_indent, IndentOptions::Mixed) && list.spread || self.spread) - { + if let Some(Node::List(list)) = parent { + if matches!(list_item_indent, IndentOptions::Mixed) && list.spread { size = compute_size(size); } } @@ -85,6 +89,7 @@ impl Handle for ListItem { result } }); + state.exit(); Ok(value) } diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 12bb3dbd..56ecdb1c 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -438,7 +438,11 @@ impl<'a> State<'a> { if matches!(parent, Node::List(_) | Node::ListItem(_)) { if matches!(left, Node::Paragraph(_)) { - if Self::matches((left, right)) || matches!(right, Node::Definition(_)) { + if Self::matches((left, right)) { + return Join::Break; + } + + if matches!(right, Node::Definition(_)) { return Join::Break; } diff --git a/mdast_util_to_markdown/tests/list_item.rs b/mdast_util_to_markdown/tests/list_item.rs new file mode 100644 index 00000000..d18043ef --- /dev/null +++ b/mdast_util_to_markdown/tests/list_item.rs @@ -0,0 +1,183 @@ +use markdown::mdast::{ListItem, Node, Text}; +use markdown::mdast::{Paragraph, ThematicBreak}; +use mdast_util_to_markdown::{to_markdown as to, IndentOptions}; + +use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; +use mdast_util_to_markdown::Options; +use pretty_assertions::assert_eq; + +#[test] +fn list_item() { + assert_eq!( + to(&Node::ListItem(ListItem { + children: vec![], + position: None, + spread: false, + checked: None + })) + .unwrap(), + "*\n", + "should support a list item" + ); + + assert_eq!( + to_md_with_opts( + &Node::ListItem(ListItem { + children: Vec::new(), + position: None, + spread: false, + checked: None + }), + &Options { + bullet: '+', + ..Default::default() + } + ) + .unwrap(), + "+\n", + "should serialize an item w/ a plus as bullet when `bullet: \" + \"`" + ); + + assert_eq!( + to(&Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + })], + position: None, + spread: false, + checked: None + })) + .unwrap(), + "* a\n", + "should support a list item w/ a child" + ); + + assert_eq!( + to(&Node::ListItem(ListItem { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }), + Node::ThematicBreak(ThematicBreak { position: None }), + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("b"), + position: None + })], + position: None + }), + ], + position: None, + spread: false, + checked: None + })) + .unwrap(), + "* a\n ***\n b\n", + "should support a list item w/ children" + ); + + assert_eq!( + to_md_with_opts( + &Node::ListItem(ListItem { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }), + Node::ThematicBreak(ThematicBreak { position: None }) + ], + position: None, + spread: false, + checked: None + }), + &Options { + list_item_indent: IndentOptions::One, + ..Default::default() + } + ) + .unwrap(), + "* a\n ***\n", + "should use one space after the bullet for `listItemIndent: \"one\"`" + ); + + assert_eq!( + to_md_with_opts( + &Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }),], + position: None, + spread: false, + checked: None + }), + &Options { + list_item_indent: IndentOptions::Mixed, + ..Default::default() + } + ) + .unwrap(), + "* a\n", + "should use one space after the bullet for `listItemIndent: \"mixed\"`, when the item is not spread" + ); + + assert_eq!( + to_md_with_opts( + &Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }), + Node::ThematicBreak(ThematicBreak { position: None })], + position: None, + spread: true, + checked: None + }), + &Options { + list_item_indent: IndentOptions::Mixed, + ..Default::default() + } + ) + .unwrap(), + "* a\n\n ***\n", + "should use a tab stop of spaces after the bullet for `listItemIndent: \"mixed\"`, when the item is spread" + ); + + assert_eq!( + to(&Node::ListItem(ListItem { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }), + Node::ThematicBreak(ThematicBreak { position: None }), + ], + position: None, + spread: false, + checked: None + })) + .unwrap(), + "* a\n ***\n", + "should not use blank lines between child blocks for items w/ `spread: false`" + ); +} From a288baa9cc494086459acaa49598e8c024e57d1c Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 25 Sep 2024 10:03:11 +0300 Subject: [PATCH 58/73] Minor refactor --- mdast_util_to_markdown/src/handle/blockquote.rs | 1 + mdast_util_to_markdown/src/handle/code.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/mdast_util_to_markdown/src/handle/blockquote.rs b/mdast_util_to_markdown/src/handle/blockquote.rs index 68f4386f..11b5a559 100644 --- a/mdast_util_to_markdown/src/handle/blockquote.rs +++ b/mdast_util_to_markdown/src/handle/blockquote.rs @@ -22,6 +22,7 @@ impl Handle for Blockquote { state.enter(ConstructName::Blockquote); let value = state.container_flow(node)?; let value = state.indent_lines(&value, map); + state.exit(); Ok(value) } } diff --git a/mdast_util_to_markdown/src/handle/code.rs b/mdast_util_to_markdown/src/handle/code.rs index d4d74e7a..4bb9cafe 100644 --- a/mdast_util_to_markdown/src/handle/code.rs +++ b/mdast_util_to_markdown/src/handle/code.rs @@ -78,6 +78,7 @@ impl Handle for Code { } value.push_str(&sequence); + state.exit(); Ok(value) } From 3312e3889a44559cbb7fb64b81abe389981f4fa1 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 25 Sep 2024 10:11:42 +0300 Subject: [PATCH 59/73] Update CI jobs --- .github/workflows/main.yml | 2 ++ mdast_util_to_markdown/src/handle/blockquote.rs | 2 +- mdast_util_to_markdown/src/state.rs | 14 ++++---------- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e2a80e17..f7a8d914 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,6 +13,8 @@ jobs: components: rustfmt, clippy - run: cargo fmt --check && cargo clippy --examples --tests --benches --all-features - run: cargo test --all-features + - run: cargo clippy -p mdast_util_to_markdown + - run: cargo test -p mdast_util_to_markdown coverage: runs-on: ubuntu-latest steps: diff --git a/mdast_util_to_markdown/src/handle/blockquote.rs b/mdast_util_to_markdown/src/handle/blockquote.rs index 11b5a559..1aa9603b 100644 --- a/mdast_util_to_markdown/src/handle/blockquote.rs +++ b/mdast_util_to_markdown/src/handle/blockquote.rs @@ -32,7 +32,7 @@ fn map(line: &str, _index: usize, blank: bool) -> String { let marker = ">"; result.push_str(marker); if !blank { - result.push_str(" "); + result.push(' '); } result.push_str(line); result diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 56ecdb1c..a6d48b5a 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -102,7 +102,7 @@ impl<'a> State<'a> { } Node::LinkReference(link_reference) => link_reference.handle(self, info, parent, node), _ => Err(Message { - reason: format!("Can't handle node"), + reason: String::from("Can't handle node"), rule_id: Box::new("unexpected-node".into()), source: Box::new("mdast-util-to-markdown".into()), place: None, @@ -395,24 +395,18 @@ impl<'a> State<'a> { } } - fn set_between(join: &Join, results: &mut String) -> () { + fn set_between(join: &Join, results: &mut String) { if let Join::Break = join { results.push_str("\n\n"); - return; - } - - if let Join::Lines(n) = join { + } else if let Join::Lines(n) = join { if *n == 1 { results.push_str("\n\n"); return; } results.push_str("\n".repeat(1 + n).as_ref()); return; - } - - if let Join::HTMLComment = join { + } else if let Join::HTMLComment = join { results.push_str("\n\n\n\n"); - return; } } From 2262c6ef83557f6e16df62c1616315e7b17fc065 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 2 Oct 2024 12:45:10 +0300 Subject: [PATCH 60/73] Complete the tests for list_item --- mdast_util_to_markdown/src/construct_name.rs | 2 +- mdast_util_to_markdown/src/handle/list.rs | 32 ++- mdast_util_to_markdown/src/state.rs | 20 +- .../src/util/check_bullet_other.rs | 6 +- mdast_util_to_markdown/tests/list_item.rs | 263 +++++++++++++++++- 5 files changed, 300 insertions(+), 23 deletions(-) diff --git a/mdast_util_to_markdown/src/construct_name.rs b/mdast_util_to_markdown/src/construct_name.rs index bac2da9f..94277758 100644 --- a/mdast_util_to_markdown/src/construct_name.rs +++ b/mdast_util_to_markdown/src/construct_name.rs @@ -1,4 +1,4 @@ -#[derive(Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub enum ConstructName { Autolink, Blockquote, diff --git a/mdast_util_to_markdown/src/handle/list.rs b/mdast_util_to_markdown/src/handle/list.rs index baab1a42..465e09da 100644 --- a/mdast_util_to_markdown/src/handle/list.rs +++ b/mdast_util_to_markdown/src/handle/list.rs @@ -46,13 +46,19 @@ impl Handle for List { use_different_marker = bullet == bullet_last_used; } - if !self.ordered && !self.children.is_empty() { + if !self.ordered { let is_valid_bullet = bullet == '*' || bullet == '-'; - let first_child_has_no_children = self.children[0].children().is_none(); let is_within_bounds = state.stack.len() >= 4 && state.index_stack.len() >= 3; + + let first_list_item_has_no_children = !self.children.is_empty() + && self.children[0] + .children() + .map(|inner| inner.is_empty()) + .unwrap(); + if is_valid_bullet - && first_child_has_no_children && is_within_bounds + && first_list_item_has_no_children && state.stack[state.stack.len() - 1] == ConstructName::List && state.stack[state.stack.len() - 2] == ConstructName::ListItem && state.stack[state.stack.len() - 3] == ConstructName::List @@ -63,17 +69,17 @@ impl Handle for List { { use_different_marker = true; } - } - if check_rule(state)? == bullet { - for child in self.children.iter() { - if let Some(child_children) = child.children() { - if !child_children.is_empty() - && matches!(child, Node::ListItem(_)) - && matches!(child_children[0], Node::ThematicBreak(_)) - { - use_different_marker = true; - break; + if check_rule(state)? == bullet { + for child in self.children.iter() { + if let Some(child_children) = child.children() { + if !child_children.is_empty() + && matches!(child, Node::ListItem(_)) + && matches!(child_children[0], Node::ThematicBreak(_)) + { + use_different_marker = true; + break; + } } } } diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index a6d48b5a..3d192df8 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -278,13 +278,16 @@ impl<'a> State<'a> { let children = parent .children() .expect("The node to be a phrasing parent."); + + if children.is_empty() { + return Ok(String::new()); + } + let mut results: String = String::new(); let mut index = 0; let mut children_iter = children.iter().peekable(); - if !children.is_empty() { - self.index_stack.push(0); - } + self.index_stack.push(0); while let Some(child) = children_iter.next() { if index > 0 { @@ -352,13 +355,16 @@ impl<'a> State<'a> { pub fn container_flow(&mut self, parent: &Node) -> Result { let children = parent.children().expect("The node to be a flow parent."); + + if children.is_empty() { + return Ok(String::new()); + } + let mut results: String = String::new(); let mut children_iter = children.iter().peekable(); let mut index = 0; - if !children.is_empty() { - self.index_stack.push(0); - } + self.index_stack.push(0); while let Some(child) = children_iter.next() { if index > 0 { @@ -369,7 +375,7 @@ impl<'a> State<'a> { *top = index; } - if matches!(child, Node::List(_)) { + if !matches!(child, Node::List(_)) { self.bullet_last_used = None; } diff --git a/mdast_util_to_markdown/src/util/check_bullet_other.rs b/mdast_util_to_markdown/src/util/check_bullet_other.rs index 18b3c41e..3b1ee793 100644 --- a/mdast_util_to_markdown/src/util/check_bullet_other.rs +++ b/mdast_util_to_markdown/src/util/check_bullet_other.rs @@ -7,7 +7,11 @@ use super::check_bullet::check_bullet; pub fn check_bullet_other(state: &mut State) -> Result { let bullet = check_bullet(state)?; - let bullet_other = state.options.bullet_other; + let mut bullet_other = state.options.bullet_other; + + if bullet != '*' { + bullet_other = '*'; + } if bullet_other != '*' && bullet_other != '+' && bullet_other != '-' { return Err(Message { diff --git a/mdast_util_to_markdown/tests/list_item.rs b/mdast_util_to_markdown/tests/list_item.rs index d18043ef..2ccd850e 100644 --- a/mdast_util_to_markdown/tests/list_item.rs +++ b/mdast_util_to_markdown/tests/list_item.rs @@ -1,4 +1,4 @@ -use markdown::mdast::{ListItem, Node, Text}; +use markdown::mdast::{List, ListItem, Node, Root, Text}; use markdown::mdast::{Paragraph, ThematicBreak}; use mdast_util_to_markdown::{to_markdown as to, IndentOptions}; @@ -180,4 +180,265 @@ fn list_item() { "* a\n ***\n", "should not use blank lines between child blocks for items w/ `spread: false`" ); + + assert_eq!( + to_md_with_opts( + &create_list(create_list(create_list::>(None))), + &Options { + bullet_other: '+', + ..Default::default() + } + ) + .unwrap(), + "* * +\n", + "should support `bullet_other`" + ); + + assert_eq!( + to_md_with_opts( + &create_list(create_list(create_list::>(None))), + &Options { + bullet: '-', + ..Default::default() + } + ) + .unwrap(), + "- - *\n", + "should default to an `bullet_other` different from `bullet` (1)" + ); + + assert_eq!( + to_md_with_opts( + &create_list(create_list(create_list::>(None))), + &Options { + bullet: '*', + ..Default::default() + } + ) + .unwrap(), + "* * -\n", + "should default to an `bullet_other` different from `bullet` (2)" + ); + + assert_eq!( + to(&Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }),], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![Node::ThematicBreak(ThematicBreak { position: None })], + position: None, + spread: false, + checked: None + }) + ], + position: None, + ordered: false, + start: None, + spread: false + })) + .unwrap(), + "- a\n- ***\n", + "should use a different bullet than a thematic rule marker, if the first child of a list item is a thematic break (2)" + ); + + assert_eq!( + to(&create_list(create_list::>(None))).unwrap(), + "* *\n", + "should *not* use a different bullet for an empty list item in two lists" + ); + + assert_eq!( + to(&create_list(create_list(create_list::>(None)))).unwrap(), + "* * -\n", + "should use a different bullet for an empty list item in three lists (1)" + ); + + assert_eq!( + to(&Node::List(List { + children: vec![ + Node::ListItem(ListItem { + children: vec![], + position: None, + spread: false, + checked: None + }), + Node::ListItem(ListItem { + children: vec![create_list(create_list::>(None))], + position: None, + spread: false, + checked: None + }) + ], + position: None, + ordered: false, + start: None, + spread: false + })) + .unwrap(), + "*\n* * -\n", + "should use a different bullet for an empty list item in three lists (2)" + ); + + assert_eq!( + to_md_with_opts( + &create_list(create_list(create_list::>(None))), + &Options { + bullet: '+', + ..Default::default() + } + ) + .unwrap(), + "+ + +\n", + "should not use a different bullet for an empty list item in three lists if `bullet` isn’t a thematic rule marker" + ); + + assert_eq!( + to(&create_list(create_list(create_list(create_list::< + Option, + >(None))))) + .unwrap(), + "* * * -\n", + "should use a different bullet for an empty list item in four lists" + ); + + assert_eq!( + to(&create_list(create_list(create_list(create_list( + create_list::>(None) + ))))) + .unwrap(), + "* * * * -\n", + "should use a different bullet for an empty list item in five lists" + ); + + assert_eq!( + to(&create_list(create_list(vec![ + create_list(Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + })), + create_list::>(None) + ]))) + .unwrap(), + "* * * a\n -\n", + "should not use a different bullet for an empty list item at non-head in two lists" + ); + + assert_eq!( + to_md_with_opts( + &Node::List(List { + children: vec![Node::ListItem(ListItem { + children: vec![], + position: None, + spread: false, + checked: None + })], + position: None, + ordered: true, + start: None, + spread: false + }), + &Options { + bullet_ordered: ')', + ..Default::default() + } + ) + .unwrap(), + "1)\n", + "should support `bullet_ordered`" + ); + + assert_eq!( + to_md_with_opts( + &Node::Root(Root { + children: vec![ + Node::List(List { + children: vec![Node::ListItem(ListItem { + children: vec![], + position: None, + spread: false, + checked: None + })], + position: None, + ordered: true, + start: None, + spread: false + }), + Node::List(List { + children: vec![Node::ListItem(ListItem { + children: vec![], + position: None, + spread: false, + checked: None + })], + position: None, + ordered: true, + start: None, + spread: false + }), + ], + position: None + }), + &Options { + bullet_ordered: ')', + ..Default::default() + } + ) + .unwrap(), + "1)\n\n1.\n", + "should use a different bullet for adjacent ordered lists" + ); +} + +trait IntoVecNode { + fn into_vec(self) -> Vec; +} + +impl IntoVecNode for Node { + fn into_vec(self) -> Vec { + vec![self] + } +} + +impl IntoVecNode for Option { + fn into_vec(self) -> Vec { + self.map(|n| vec![n]).unwrap_or_default() + } +} + +impl IntoVecNode for Vec { + fn into_vec(self) -> Vec { + self + } +} + +fn create_list(d: T) -> Node +where + T: IntoVecNode, +{ + Node::List(List { + children: vec![Node::ListItem(ListItem { + children: d.into_vec(), + position: None, + spread: false, + checked: None, + })], + position: None, + ordered: false, + start: None, + spread: false, + }) } From 0785d368fb02ed6624f89184f5af160da89d46c6 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 2 Oct 2024 12:47:08 +0300 Subject: [PATCH 61/73] Change comment --- mdast_util_to_markdown/src/handle/list.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdast_util_to_markdown/src/handle/list.rs b/mdast_util_to_markdown/src/handle/list.rs index 465e09da..ffe420c8 100644 --- a/mdast_util_to_markdown/src/handle/list.rs +++ b/mdast_util_to_markdown/src/handle/list.rs @@ -54,7 +54,7 @@ impl Handle for List { && self.children[0] .children() .map(|inner| inner.is_empty()) - .unwrap(); + .expect("There's at least one list item."); if is_valid_bullet && is_within_bounds From 65751f801e05ff23e9c5f9832000d08296bf39bc Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 2 Oct 2024 20:28:24 +0300 Subject: [PATCH 62/73] Add tests for core --- mdast_util_to_markdown/src/lib.rs | 1 - mdast_util_to_markdown/src/state.rs | 1 - mdast_util_to_markdown/tests/core.rs | 328 ++++++++++++++++++++++++++- 3 files changed, 323 insertions(+), 7 deletions(-) diff --git a/mdast_util_to_markdown/src/lib.rs b/mdast_util_to_markdown/src/lib.rs index d7ce4501..7be88a46 100644 --- a/mdast_util_to_markdown/src/lib.rs +++ b/mdast_util_to_markdown/src/lib.rs @@ -27,6 +27,5 @@ pub fn to_markdown_with_options(tree: &Node, options: &Options) -> Result State<'a> { return; } results.push_str("\n".repeat(1 + n).as_ref()); - return; } else if let Join::HTMLComment = join { results.push_str("\n\n\n\n"); } diff --git a/mdast_util_to_markdown/tests/core.rs b/mdast_util_to_markdown/tests/core.rs index 15ba7125..528d3966 100644 --- a/mdast_util_to_markdown/tests/core.rs +++ b/mdast_util_to_markdown/tests/core.rs @@ -1,4 +1,4 @@ -use markdown::mdast::Definition; +use markdown::mdast::{Break, Code, Definition, Heading, List, ListItem}; use markdown::mdast::{Node, Paragraph, Root, Text, ThematicBreak}; use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; use mdast_util_to_markdown::{to_markdown as to, Options}; @@ -6,6 +6,91 @@ use pretty_assertions::assert_eq; #[test] fn core() { + assert_eq!( + to(&Node::Root(Root { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }), + Node::ThematicBreak(ThematicBreak { position: None }), + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("b"), + position: None + })], + position: None + }), + ], + position: None + })) + .unwrap(), + "a\n\n***\n\nb\n", + "should support root" + ); + + assert_eq!( + to(&Node::Root(Root { + children: vec![ + Node::Text(Text { + value: String::from("a"), + position: None + }), + Node::Break(Break { position: None }), + Node::Text(Text { + value: String::from("b"), + position: None + }), + ], + position: None + })) + .unwrap(), + "a\\\nb\n", + "should not use blank lines between nodes when given phrasing" + ); + + assert_eq!( + to(&Node::Root(Root { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }), + Node::Definition(Definition { + position: None, + url: String::new(), + title: None, + identifier: String::from("b"), + label: None + }), + Node::Definition(Definition { + position: None, + url: String::new(), + title: None, + identifier: String::from("c"), + label: None + }), + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("d"), + position: None + })], + position: None + }), + ], + position: None + })) + .unwrap(), + "a\n\n[b]: <>\n\n[c]: <>\n\nd\n", + "should support adjacent definitions" + ); + assert_eq!( to_md_with_opts( &Node::Root(Root { @@ -61,10 +146,57 @@ fn core() { })], position: None }), - Node::ThematicBreak(ThematicBreak { position: None }), + Node::List(List { + children: vec![Node::ListItem(ListItem { + children: vec![], + position: None, + spread: false, + checked: None + })], + position: None, + ordered: false, + start: None, + spread: false + }), + Node::List(List { + children: vec![Node::ListItem(ListItem { + children: vec![], + position: None, + spread: false, + checked: None + })], + position: None, + ordered: false, + start: None, + spread: false + }), + Node::List(List { + children: vec![Node::ListItem(ListItem { + children: vec![], + position: None, + spread: false, + checked: None + })], + position: None, + ordered: true, + start: None, + spread: false + }), + Node::List(List { + children: vec![Node::ListItem(ListItem { + children: vec![], + position: None, + spread: false, + checked: None + })], + position: None, + ordered: true, + start: None, + spread: false + }), Node::Paragraph(Paragraph { children: vec![Node::Text(Text { - value: String::from("b"), + value: String::from("d"), position: None })], position: None @@ -73,7 +205,193 @@ fn core() { position: None })) .unwrap(), - "a\n\n***\n\nb\n", - "should support root" + "a\n\n*\n\n-\n\n1.\n\n1)\n\nd\n", + "should use a different marker for adjacent lists" + ); + + assert_eq!( + to_md_with_opts( + &Node::Root(Root { + children: vec![ + Node::Code(Code { + value: String::from("a"), + position: None, + lang: None, + meta: None + }), + Node::List(List { + children: vec![Node::ListItem(ListItem { + children: vec![], + position: None, + spread: false, + checked: None + })], + position: None, + ordered: false, + start: None, + spread: false + }), + Node::Code(Code { + value: String::from("b"), + position: None, + lang: None, + meta: None + }), + ], + position: None + }), + &Options { + fences: false, + ..Default::default() + } + ) + .unwrap(), + " a\n\n*\n\n\n\n b\n", + "should inject HTML comments between lists and an indented code" + ); + + assert_eq!( + to_md_with_opts( + &Node::Root(Root { + children: vec![ + Node::Code(Code { + value: String::from("a"), + position: None, + lang: None, + meta: None + }), + Node::Code(Code { + value: String::from("b"), + position: None, + lang: None, + meta: None + }), + ], + position: None + }), + &Options { + fences: false, + ..Default::default() + } + ) + .unwrap(), + " a\n\n\n\n b\n", + "should inject HTML comments between adjacent indented code" + ); + + assert_eq!( + to(&Node::ListItem(ListItem { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }), + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("b"), + position: None + })], + position: None + }), + ], + position: None, + spread: false, + checked: None + })) + .unwrap(), + "* a\n\n b\n", + "should not honour `spread: false` for two paragraphs" + ); + + assert_eq!( + to(&Node::ListItem(ListItem { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }), + Node::Definition(Definition { + position: None, + url: String::from("d"), + title: None, + identifier: String::from("b"), + label: Some(String::from("c")) + }), + ], + position: None, + spread: false, + checked: None + })) + .unwrap(), + "* a\n\n [c]: d\n", + "should not honour `spread: false` for a paragraph and a definition" + ); + + assert_eq!( + to(&Node::ListItem(ListItem { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }), + Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("b"), + position: None + })], + position: None, + depth: 1 + }) + ], + position: None, + spread: false, + checked: None + })) + .unwrap(), + "* a\n # b\n", + "should honour `spread: false` for a paragraph and a heading" + ); + + assert_eq!( + to_md_with_opts( + &Node::ListItem(ListItem { + children: vec![ + Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a"), + position: None + })], + position: None + }), + Node::Heading(Heading { + children: vec![Node::Text(Text { + value: String::from("b"), + position: None + })], + position: None, + depth: 1 + }) + ], + position: None, + spread: false, + checked: None + }), + &Options { + setext: true, + ..Default::default() + } + ) + .unwrap(), + "* a\n\n b\n =\n", + "should not honour `spread: false` for a paragraph and a setext heading" ); } From cfc6963e19376a6c361d2169985798d53627e74f Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 2 Oct 2024 20:34:25 +0300 Subject: [PATCH 63/73] fix few test assertion message --- mdast_util_to_markdown/tests/list_item.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mdast_util_to_markdown/tests/list_item.rs b/mdast_util_to_markdown/tests/list_item.rs index 2ccd850e..ce9ce09a 100644 --- a/mdast_util_to_markdown/tests/list_item.rs +++ b/mdast_util_to_markdown/tests/list_item.rs @@ -108,7 +108,7 @@ fn list_item() { ) .unwrap(), "* a\n ***\n", - "should use one space after the bullet for `listItemIndent: \"one\"`" + "should use one space after the bullet for `list_item_indent: \"IndentOptions::One\"`" ); assert_eq!( @@ -132,7 +132,7 @@ fn list_item() { ) .unwrap(), "* a\n", - "should use one space after the bullet for `listItemIndent: \"mixed\"`, when the item is not spread" + "should use one space after the bullet for `list_item_indent: \"IndentOptions::Mixed\"`, when the item is not spread" ); assert_eq!( @@ -157,7 +157,7 @@ fn list_item() { ) .unwrap(), "* a\n\n ***\n", - "should use a tab stop of spaces after the bullet for `listItemIndent: \"mixed\"`, when the item is spread" + "should use a tab stop of spaces after the bullet for `list_item_indent: \"IndentOptions::Mixed\"`, when the item is spread" ); assert_eq!( From aced8d4f4e2403cb91f92e6f30ed61d14b4a18e4 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Wed, 2 Oct 2024 20:38:26 +0300 Subject: [PATCH 64/73] Fix minor typo in bullet other check --- mdast_util_to_markdown/src/util/check_bullet_other.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdast_util_to_markdown/src/util/check_bullet_other.rs b/mdast_util_to_markdown/src/util/check_bullet_other.rs index 3b1ee793..0388d0d0 100644 --- a/mdast_util_to_markdown/src/util/check_bullet_other.rs +++ b/mdast_util_to_markdown/src/util/check_bullet_other.rs @@ -32,7 +32,7 @@ pub fn check_bullet_other(state: &mut State) -> Result { bullet, bullet_other ), rule_id: Box::new("bullet-match-bullet_other".into()), - source: Box::new("mdast-util-to_markdown".into()), + source: Box::new("mdast-util-to-markdown".into()), place: None, }); } From ff12c7b654bc2ff423829bc40e8add985bc6fe0e Mon Sep 17 00:00:00 2001 From: Bnchi Date: Fri, 4 Oct 2024 13:26:42 +0300 Subject: [PATCH 65/73] Add roundtrip tests --- .../src/handle/list_item.rs | 21 +- mdast_util_to_markdown/src/state.rs | 2 +- mdast_util_to_markdown/tests/roundtrip.rs | 413 +++++++++++++++++- 3 files changed, 422 insertions(+), 14 deletions(-) diff --git a/mdast_util_to_markdown/src/handle/list_item.rs b/mdast_util_to_markdown/src/handle/list_item.rs index 20463562..68f02b8d 100644 --- a/mdast_util_to_markdown/src/handle/list_item.rs +++ b/mdast_util_to_markdown/src/handle/list_item.rs @@ -48,17 +48,19 @@ impl Handle for ListItem { } else { bullet = format!("{}{}", bullet_number, bullet); } - } - size = bullet.len() + 1; + size = bullet.len() + 1; + } } - if matches!(list_item_indent, IndentOptions::Tab) || self.spread { + if matches!(list_item_indent, IndentOptions::Tab) { size = compute_size(size); - } - - if let Some(Node::List(list)) = parent { - if matches!(list_item_indent, IndentOptions::Mixed) && list.spread { + } else if matches!(list_item_indent, IndentOptions::Mixed) { + if let Some(Node::List(list)) = parent { + if list.spread || self.spread { + size = compute_size(size); + } + } else if self.spread { size = compute_size(size); } } @@ -78,7 +80,10 @@ impl Handle for ListItem { result } } else if blank { - bullet.clone() + let mut result = String::with_capacity(bullet.len() + line.len()); + result.push_str(&bullet); + result.push_str(line); + result } else { // size - bullet.len() will never panic because size > bullet.len() always. let blank = " ".repeat(size - bullet.len()); diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 803ce896..4a08d729 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -102,7 +102,7 @@ impl<'a> State<'a> { } Node::LinkReference(link_reference) => link_reference.handle(self, info, parent, node), _ => Err(Message { - reason: String::from("Can't handle node"), + reason: format!("Unexpected node type {:?}", node), rule_id: Box::new("unexpected-node".into()), source: Box::new("mdast-util-to-markdown".into()), place: None, diff --git a/mdast_util_to_markdown/tests/roundtrip.rs b/mdast_util_to_markdown/tests/roundtrip.rs index ae8d89ba..4cf7a60b 100644 --- a/mdast_util_to_markdown/tests/roundtrip.rs +++ b/mdast_util_to_markdown/tests/roundtrip.rs @@ -1,16 +1,419 @@ -use mdast_util_to_markdown::to_markdown as to; +use mdast_util_to_markdown::{to_markdown as to, Options}; -use markdown::to_mdast as from; +use markdown::{mdast::Node, to_mdast as from}; use pretty_assertions::assert_eq; +use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; + #[test] fn round_trip() { - let doc: String = vec![ + let doc: String = document(vec![ "> * Lorem ipsum dolor sit amet", ">", "> * consectetur adipisicing elit", "", - ] - .join("\n"); + ]); + + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc: String = document(vec![ + "* Lorem ipsum dolor sit amet", + "", + " 1. consectetur adipisicing elit", + "", + " 2. sed do eiusmod tempor incididunt", + "", + ]); + + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc: String = document(vec![ + "* 1. Lorem ipsum dolor sit amet", + "", + " 2. consectetur adipisicing elit", + "", + ]); + + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc: String = document(vec![ + "* hello", + " * world", + " how", + "", + " are", + " you", + "", + " * today", + "* hi", + "", + ]); + + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc: String = "An autolink: .\n".to_string(); + + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc: String = document(vec![ + "A [primary][toString], [secondary][constructor], and [tertiary][__proto__] link.", + "", + "[toString]: http://primary.com", + "", + "[__proto__]: http://tertiary.com", + "", + "[constructor]: http://secondary.com", + "", + ]); + + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc: String = document(vec![ + "* foo", + "", + "*", + "", + "* bar", + "", + "* baz", + "", + "*", + "", + "* qux quux", + "", + ]); + + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc: String = "* a\n\n\n\n* b\n".to_string(); + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc: String = document(vec![ + "

Header 3

", + "", + "
", + "

This is a blockquote.

", + " ", + "

This is the second paragraph in the blockquote.

", + " ", + "

This is an H2 in a blockquote

", + "
", + "", + ]); + + assert_eq!( + to_md_with_opts( + &from(&doc, &Default::default()).unwrap(), + &Options { + fences: false, + ..Default::default() + } + ) + .unwrap(), + doc + ); + + let doc: String = "> a\n\n> b\n".to_string(); + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc: String = "[**https://unifiedjs.com/**](https://unifiedjs.com/)\n".to_string(); + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let step1 = "\\ \\\\ \\\\\\ \\\\\\\\"; + let step2 = "\\ \\ \\\\\\ \\\\\\\\\n"; + assert_eq!( + to(&from(&step1, &Default::default()).unwrap()).unwrap(), + step2 + ); + assert_eq!( + to(&from(&step2, &Default::default()).unwrap()).unwrap(), + step2 + ); + + let doc = "\\\\\\*a\n"; + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc = "\\\\*a\\\\\\*"; + assert_eq!( + remove_pos(&mut from(doc, &Default::default()).unwrap()), + remove_pos( + &mut from( + &to(&from(&doc, &Default::default()).unwrap()).unwrap(), + &Default::default() + ) + .unwrap() + ) + ); + + let doc = "```\n \n```\n"; assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc = "* * -\n"; + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc = "- ***\n"; + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc = "- ***\n"; + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let mut tree = from("* a\n- b", &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut tree), + remove_pos( + &mut from( + &to_md_with_opts( + &tree, + &Options { + bullet: '*', + bullet_other: '-', + ..Default::default() + } + ) + .unwrap(), + &Default::default() + ) + .unwrap() + ) + ); + + let mut tree = from("* ---\n- - +\n+ b", &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut tree), + remove_pos( + &mut from( + &to_md_with_opts( + &tree, + &Options { + bullet: '*', + bullet_other: '-', + ..Default::default() + } + ) + .unwrap(), + &Default::default() + ) + .unwrap() + ) + ); + + let mut tree = from("- - +\n* ---\n+ b", &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut tree), + remove_pos( + &mut from( + &to_md_with_opts( + &tree, + &Options { + bullet: '*', + bullet_other: '-', + ..Default::default() + } + ) + .unwrap(), + &Default::default() + ) + .unwrap() + ) + ); + + let mut tree = from("- - +\n- -", &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut tree), + remove_pos( + &mut from( + &to_md_with_opts( + &tree, + &Options { + bullet: '*', + bullet_other: '-', + ..Default::default() + } + ) + .unwrap(), + &Default::default() + ) + .unwrap() + ) + ); + + let mut tree = from("* - +\n *\n -\n +", &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut tree), + remove_pos( + &mut from( + &to_md_with_opts( + &tree, + &Options { + bullet: '*', + bullet_other: '-', + ..Default::default() + } + ) + .unwrap(), + &Default::default() + ) + .unwrap() + ) + ); + + let mut tree = from("- +\n- *\n -\n +", &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut tree), + remove_pos( + &mut from( + &to_md_with_opts( + &tree, + &Options { + bullet: '*', + bullet_other: '-', + ..Default::default() + } + ) + .unwrap(), + &Default::default() + ) + .unwrap() + ) + ); + + let mut tree = from("1. a\n1) b", &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut tree), + remove_pos(&mut from(&to(&tree).unwrap(), &Default::default()).unwrap()) + ); + + let mut tree = from("1. ---\n1) 1. 1)\n1. b", &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut tree), + remove_pos(&mut from(&to(&tree).unwrap(), &Default::default()).unwrap()) + ); + + let mut tree = from("1. 1. 1)\n1) ---\n1. b", &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut tree), + remove_pos(&mut from(&to(&tree).unwrap(), &Default::default()).unwrap()) + ); + + let mut tree = from("1. 1. 1)\n1. 1.", &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut tree), + remove_pos(&mut from(&to(&tree).unwrap(), &Default::default()).unwrap()) + ); + + let mut tree = from("1. 1) 1.\n 1.\n 1)\n 1.", &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut tree), + remove_pos(&mut from(&to(&tree).unwrap(), &Default::default()).unwrap()) + ); + + let mut tree = from("1. 1) 1.\n 1) 1.\n 1)\n 1.", &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut tree), + remove_pos(&mut from(&to(&tree).unwrap(), &Default::default()).unwrap()) + ); + + let mut tree = from("1. 1)\n1. 1.\n 1)\n 1.", &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut tree), + remove_pos(&mut from(&to(&tree).unwrap(), &Default::default()).unwrap()) + ); + + let doc: String = " \n".to_string(); + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc: String = " \n".to_string(); + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc: String = " a \n \tb\t \n".to_string(); + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc: String = "Separate paragraphs: + +a * is this emphasis? * + +a ** is this emphasis? ** + +a *** is this emphasis? *** + +a *\\* is this emphasis? *\\* + +a \\** is this emphasis? \\** + +a **\\* is this emphasis? **\\* + +a *\\** is this emphasis? *\\** + +One paragraph: + +a * is this emphasis? * +a ** is this emphasis? ** +a *** is this emphasis? *** +a *\\* is this emphasis? *\\* +a \\** is this emphasis? \\** +a **\\* is this emphasis? **\\* +a *\\** is this emphasis? *\\**" + .to_string(); + let mut tree = from(&doc, &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut from(&to(&tree).unwrap(), &Default::default()).unwrap()), + remove_pos(&mut tree), + ); + + let doc: String = "Separate paragraphs: + +a _ is this emphasis? _ + +a __ is this emphasis? __ + +a ___ is this emphasis? ___ + +a _\\_ is this emphasis? _\\_ + +a \\__ is this emphasis? \\__ + +a __\\_ is this emphasis? __\\_ + +a _\\__ is this emphasis? _\\__ + +One paragraph: + +a _ is this emphasis? _ +a __ is this emphasis? __ +a ___ is this emphasis? ___ +a _\\_ is this emphasis? _\\_ +a \\__ is this emphasis? \\__ +a __\\_ is this emphasis? __\\_ +a _\\__ is this emphasis? _\\__" + .to_string(); + let mut tree = from(&doc, &Default::default()).unwrap(); + assert_eq!( + remove_pos(&mut from(&to(&tree).unwrap(), &Default::default()).unwrap()), + remove_pos(&mut tree), + ); + + let doc: String = to(&from("(____", &Default::default()).unwrap()).unwrap(); + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); + + let doc: String = to(&from( + "Once activated, a service worker ______, then transitions to idle…", + &Default::default(), + ) + .unwrap()) + .unwrap(); + assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); +} + +fn remove_pos(node: &mut Node) { + node.position_set(None); + if let Some(children) = node.children_mut() { + for child in children { + child.position_set(None); + remove_pos(child); + } + } +} + +fn document(doc: Vec<&str>) -> String { + doc.join("\n") } From 77861445f2cdf5b6bd9feb7e0174b4a250f54ac4 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Fri, 4 Oct 2024 13:28:49 +0300 Subject: [PATCH 66/73] Update remove_pos --- mdast_util_to_markdown/tests/roundtrip.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/mdast_util_to_markdown/tests/roundtrip.rs b/mdast_util_to_markdown/tests/roundtrip.rs index 4cf7a60b..b1a5172e 100644 --- a/mdast_util_to_markdown/tests/roundtrip.rs +++ b/mdast_util_to_markdown/tests/roundtrip.rs @@ -408,7 +408,6 @@ fn remove_pos(node: &mut Node) { node.position_set(None); if let Some(children) = node.children_mut() { for child in children { - child.position_set(None); remove_pos(child); } } From 76f182f11a0e830d70270280c52b8195c7358bd4 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Fri, 4 Oct 2024 13:32:44 +0300 Subject: [PATCH 67/73] Remove duplicate test --- mdast_util_to_markdown/tests/roundtrip.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/mdast_util_to_markdown/tests/roundtrip.rs b/mdast_util_to_markdown/tests/roundtrip.rs index b1a5172e..e78165a7 100644 --- a/mdast_util_to_markdown/tests/roundtrip.rs +++ b/mdast_util_to_markdown/tests/roundtrip.rs @@ -154,9 +154,6 @@ fn round_trip() { let doc = "- ***\n"; assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); - let doc = "- ***\n"; - assert_eq!(to(&from(&doc, &Default::default()).unwrap()).unwrap(), doc); - let mut tree = from("* a\n- b", &Default::default()).unwrap(); assert_eq!( remove_pos(&mut tree), From 487225a072d3206e120e6d7f8c3e5c7d7291e13f Mon Sep 17 00:00:00 2001 From: Bnchi Date: Fri, 4 Oct 2024 14:58:50 +0300 Subject: [PATCH 68/73] Minor refactor for list_item --- .../src/handle/list_item.rs | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/mdast_util_to_markdown/src/handle/list_item.rs b/mdast_util_to_markdown/src/handle/list_item.rs index 68f02b8d..9bbb3d1c 100644 --- a/mdast_util_to_markdown/src/handle/list_item.rs +++ b/mdast_util_to_markdown/src/handle/list_item.rs @@ -30,9 +30,6 @@ impl Handle for ListItem { .unwrap_or(check_bullet(state)?) .to_string(); - // This is equal to bullet.len() + 1, since we know bullet is always one byte long we can - // safely assign 2 to size. - let mut size = 2; if let Some(Node::List(list)) = parent { if list.ordered { let bullet_number = if let Some(start) = list.start { @@ -48,21 +45,25 @@ impl Handle for ListItem { } else { bullet = format!("{}{}", bullet_number, bullet); } - - size = bullet.len() + 1; } } - if matches!(list_item_indent, IndentOptions::Tab) { - size = compute_size(size); - } else if matches!(list_item_indent, IndentOptions::Mixed) { - if let Some(Node::List(list)) = parent { - if list.spread || self.spread { - size = compute_size(size); + let mut size = bullet.len() + 1; + + let should_compute_size = match list_item_indent { + IndentOptions::Tab => true, + IndentOptions::Mixed => { + if let Some(Node::List(list)) = parent { + list.spread || self.spread + } else { + self.spread } - } else if self.spread { - size = compute_size(size); } + _ => false, + }; + + if should_compute_size { + size = compute_size(size); } state.enter(ConstructName::ListItem); @@ -71,7 +72,7 @@ impl Handle for ListItem { let value = state.indent_lines(&value, |line, index, blank| { if index > 0 { if blank { - String::new() + String::from(line) } else { let blank = " ".repeat(size); let mut result = String::with_capacity(blank.len() + line.len()); From cb4c81e43ff67a8b21e3647493b93772a0d33caf Mon Sep 17 00:00:00 2001 From: Bnchi Date: Fri, 4 Oct 2024 15:07:33 +0300 Subject: [PATCH 69/73] Fix typo --- mdast_util_to_markdown/tests/roundtrip.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdast_util_to_markdown/tests/roundtrip.rs b/mdast_util_to_markdown/tests/roundtrip.rs index e78165a7..1bc85a50 100644 --- a/mdast_util_to_markdown/tests/roundtrip.rs +++ b/mdast_util_to_markdown/tests/roundtrip.rs @@ -6,7 +6,7 @@ use pretty_assertions::assert_eq; use mdast_util_to_markdown::to_markdown_with_options as to_md_with_opts; #[test] -fn round_trip() { +fn roundtrip() { let doc: String = document(vec![ "> * Lorem ipsum dolor sit amet", ">", From 5faeb16286ceaafad2f5abed61246a7d7aac7949 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sat, 5 Oct 2024 09:44:14 +0300 Subject: [PATCH 70/73] Add comments --- mdast_util_to_markdown/src/configure.rs | 29 +++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mdast_util_to_markdown/src/configure.rs b/mdast_util_to_markdown/src/configure.rs index c5a19e00..83fa4e21 100644 --- a/mdast_util_to_markdown/src/configure.rs +++ b/mdast_util_to_markdown/src/configure.rs @@ -1,27 +1,56 @@ pub struct Options { + /// Marker to use for bullets of items in unordered lists ('*', '+', or '-', default: '*'). pub bullet: char, + // Marker to use in certain cases where the primary bullet doesn’t work + // ('*', '+', or '-', default: '-' when bullet is '*', '*' otherwise). pub bullet_other: char, + /// Marker to use for bullets of items in ordered lists ('.' or ')', default: '.'). pub bullet_ordered: char, + /// Marker to use for emphasis ('*' or '_', default: '*'). pub emphasis: char, + // Marker to use for fenced code ('`' or '~', default: '`'). pub fence: char, + /// Whether to use fenced code always (boolean, default: true). + /// The default is to use fenced code if there is a language defined, + /// if the code is empty, or if it starts or ends in blank lines. pub fences: bool, + // How to indent the content of list items (default: 'IndentOptions::One'). pub list_item_indent: IndentOptions, + /// Marker to use for titles ('"' or "'", default: '"'). pub quote: char, + /// Marker to use for thematic breaks ('*', '-', or '_', default: '*'). pub rule: char, + // Marker to use for strong ('*' or '_', default: '*'). pub strong: char, + // Whether to increment the counter of ordered lists items (bool, default: true). pub increment_list_marker: bool, + /// Whether to add the same number of number signs (#) at the end of an ATX heading as the + /// opening sequence (bool, default: false). pub close_atx: bool, + /// Whether to always use resource links (boolean, default: false). The default is to use + /// autolinks () when possible and resource links ([text](url)) otherwise. pub resource_link: bool, + /// Whether to add spaces between markers in thematic breaks (bool, default: false). pub rule_spaces: bool, + /// Whether to use setext headings when possible (bool, default: false). + /// The default is to always use ATX headings (# heading) instead of + /// setext headings (heading\n=======). + /// Setext headings cannot be used for empty headings or headings with a rank of three or more. pub setext: bool, + /// Whether to join definitions without a blank line (bool, default: false). pub tight_definitions: bool, + // Number of markers to use for thematic breaks (u32, default: 3, min: 3). pub rule_repetition: u32, } #[derive(Copy, Clone)] pub enum IndentOptions { + // Depends on the item and its parent list uses 'One' if the item and list are tight + // and 'Tab' otherwise. Mixed, + // The size of the bullet plus one space. One, + /// Tab stop. Tab, } From c83247d4417f35e607cc56621ff2e617756926c3 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sat, 5 Oct 2024 10:04:38 +0300 Subject: [PATCH 71/73] Add comments for construct names --- mdast_util_to_markdown/src/construct_name.rs | 228 ++++++++++++++++++- 1 file changed, 227 insertions(+), 1 deletion(-) diff --git a/mdast_util_to_markdown/src/construct_name.rs b/mdast_util_to_markdown/src/construct_name.rs index 94277758..ac1b5dd5 100644 --- a/mdast_util_to_markdown/src/construct_name.rs +++ b/mdast_util_to_markdown/src/construct_name.rs @@ -1,30 +1,256 @@ -#[derive(Debug, Clone, PartialEq)] +#[derive(Clone, PartialEq)] pub enum ConstructName { + /// + //// Whole autolink. + //// + //// ```markdown + //// > | and + //// ^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^ + //// ``` Autolink, + /// + /// Whole block quote. + /// + /// ```markdown + /// > | > a + /// ^^^ + /// > | b + /// ^ + /// ``` Blockquote, + /// + /// Whole code (indented). + /// + /// ```markdown + /// ␠␠␠␠console.log(1) + /// ^^^^^^^^^^^^^^^^^^ + /// ``` CodeIndented, + /// + /// Whole code (fenced). + /// + /// ````markdown + /// > | ```js + /// ^^^^^ + /// > | console.log(1) + /// ^^^^^^^^^^^^^^ + /// > | ``` + /// ^^^ + /// ```` CodeFenced, + /// + /// Code (fenced) language, when fenced with grave accents. + /// + /// ````markdown + /// > | ```js + /// ^^ + /// | console.log(1) + /// | ``` + /// ```` CodeFencedLangGraveAccent, + /// + /// Code (fenced) language, when fenced with tildes. + /// + /// ````markdown + /// > | ~~~js + /// ^^ + /// | console.log(1) + /// | ~~~ + /// ```` CodeFencedLangTilde, + /// + /// Code (fenced) meta string, when fenced with grave accents. + /// + /// ````markdown + /// > | ```js eval + /// ^^^^ + /// | console.log(1) + /// | ``` + /// ```` CodeFencedMetaGraveAccent, + /// + /// Code (fenced) meta string, when fenced with tildes. + /// + /// ````markdown + /// > | ~~~js eval + /// ^^^^ + /// | console.log(1) + /// | ~~~ + /// ```` CodeFencedMetaTilde, + /// + /// Whole definition. + /// + /// ```markdown + /// > | [a]: b "c" + /// ^^^^^^^^^^ + /// ``` Definition, + /// + /// Destination (literal) (occurs in definition, image, link). + /// + /// ```markdown + /// > | [a]: "c" + /// ^^^ + /// > | a ![b]( "d") e + /// ^^^ + /// ``` DestinationLiteral, + /// + /// Destination (raw) (occurs in definition, image, link). + /// + /// ```markdown + /// > | [a]: b "c" + /// ^ + /// > | a ![b](c "d") e + /// ^ + /// ``` DestinationRaw, + /// + /// Emphasis. + /// + /// ```markdown + /// > | *a* + /// ^^^ + /// ``` Emphasis, + /// + /// Whole heading (atx). + /// + /// ```markdown + /// > | # alpha + /// ^^^^^^^ + /// ``` HeadingAtx, + /// + /// Whole heading (setext). + /// + /// ```markdown + /// > | alpha + /// ^^^^^ + /// > | ===== + /// ^^^^^ + /// ``` HeadingSetext, + /// + /// Whole image. + /// + /// ```markdown + /// > | ![a](b) + /// ^^^^^^^ + /// > | ![c] + /// ^^^^ + /// ``` Image, + /// + /// Whole image reference. + /// + /// ```markdown + /// > | ![a] + /// ^^^^ + /// ``` ImageReference, + /// + /// Label (occurs in definitions, image reference, image, link reference, + /// link). + /// + /// ```markdown + /// > | [a]: b "c" + /// ^^^ + /// > | a [b] c + /// ^^^ + /// > | a ![b][c] d + /// ^^^^ + /// > | a [b](c) d + /// ^^^ + /// ``` Label, + /// + /// Whole link. + /// + /// ```markdown + /// > | [a](b) + /// ^^^^^^ + /// > | [c] + /// ^^^ + /// ``` Link, + /// + /// Whole link reference. + /// + /// ```markdown + /// > | [a] + /// ^^^ + /// ``` LinkReference, + /// + /// List. + /// + /// ```markdown + /// > | * a + /// ^^^ + /// > | 1. b + /// ^^^^ + /// ``` List, + /// + /// List item. + /// + /// ```markdown + /// > | * a + /// ^^^ + /// > | 1. b + /// ^^^^ + /// ``` ListItem, + /// + /// Paragraph. + /// + /// ```markdown + /// > | a b + /// ^^^ + /// > | c. + /// ^^ + /// ``` Paragraph, + /// + /// Phrasing (occurs in headings, paragraphs, etc). + /// + /// ```markdown + /// > | a + /// ^ + /// ``` Phrasing, + /// + /// Reference (occurs in image, link). + /// + /// ```markdown + /// > | [a][] + /// ^^ + /// ``` Reference, + /// + /// Strong. + /// + /// ```markdown + /// > | **a** + /// ^^^^^ + /// ``` Strong, + /// + /// Title using single quotes (occurs in definition, image, link). + /// + /// ```markdown + /// > | [a](b 'c') + /// ^^^ + /// ``` TitleApostrophe, + /// + /// Title using double quotes (occurs in definition, image, link). + /// + /// ```markdown + /// > | [a](b "c") + /// ^^^ + /// ``` TitleQuote, } From 82d8d906a294165e6ed97ce6593c84b52b8051f8 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sat, 5 Oct 2024 10:07:19 +0300 Subject: [PATCH 72/73] Update comments --- mdast_util_to_markdown/src/configure.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mdast_util_to_markdown/src/configure.rs b/mdast_util_to_markdown/src/configure.rs index 83fa4e21..6e5ea594 100644 --- a/mdast_util_to_markdown/src/configure.rs +++ b/mdast_util_to_markdown/src/configure.rs @@ -10,7 +10,7 @@ pub struct Options { pub emphasis: char, // Marker to use for fenced code ('`' or '~', default: '`'). pub fence: char, - /// Whether to use fenced code always (boolean, default: true). + /// Whether to use fenced code always (bool, default: true). /// The default is to use fenced code if there is a language defined, /// if the code is empty, or if it starts or ends in blank lines. pub fences: bool, @@ -27,7 +27,7 @@ pub struct Options { /// Whether to add the same number of number signs (#) at the end of an ATX heading as the /// opening sequence (bool, default: false). pub close_atx: bool, - /// Whether to always use resource links (boolean, default: false). The default is to use + /// Whether to always use resource links (bool, default: false). The default is to use /// autolinks () when possible and resource links ([text](url)) otherwise. pub resource_link: bool, /// Whether to add spaces between markers in thematic breaks (bool, default: false). From c73a56b9d89d64af744660512aab010b29116c93 Mon Sep 17 00:00:00 2001 From: Bnchi Date: Sat, 5 Oct 2024 10:10:01 +0300 Subject: [PATCH 73/73] Update comments wrapping --- mdast_util_to_markdown/src/configure.rs | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/mdast_util_to_markdown/src/configure.rs b/mdast_util_to_markdown/src/configure.rs index 6e5ea594..3419d35e 100644 --- a/mdast_util_to_markdown/src/configure.rs +++ b/mdast_util_to_markdown/src/configure.rs @@ -10,9 +10,8 @@ pub struct Options { pub emphasis: char, // Marker to use for fenced code ('`' or '~', default: '`'). pub fence: char, - /// Whether to use fenced code always (bool, default: true). - /// The default is to use fenced code if there is a language defined, - /// if the code is empty, or if it starts or ends in blank lines. + /// Whether to use fenced code always (bool, default: true). The default is to use fenced code + /// if there is a language defined, if the code is empty, or if it starts or ends in blank lines. pub fences: bool, // How to indent the content of list items (default: 'IndentOptions::One'). pub list_item_indent: IndentOptions, @@ -27,15 +26,14 @@ pub struct Options { /// Whether to add the same number of number signs (#) at the end of an ATX heading as the /// opening sequence (bool, default: false). pub close_atx: bool, - /// Whether to always use resource links (bool, default: false). The default is to use - /// autolinks () when possible and resource links ([text](url)) otherwise. + /// Whether to always use resource links (bool, default: false). The default is to use autolinks + /// () when possible and resource links ([text](url)) otherwise. pub resource_link: bool, /// Whether to add spaces between markers in thematic breaks (bool, default: false). pub rule_spaces: bool, - /// Whether to use setext headings when possible (bool, default: false). - /// The default is to always use ATX headings (# heading) instead of - /// setext headings (heading\n=======). - /// Setext headings cannot be used for empty headings or headings with a rank of three or more. + /// Whether to use setext headings when possible (bool, default: false). The default is to always + /// use ATX headings (# heading) instead of setext headings (heading\n=======). Setext headings + /// cannot be used for empty headings or headings with a rank of three or more. pub setext: bool, /// Whether to join definitions without a blank line (bool, default: false). pub tight_definitions: bool, @@ -45,8 +43,8 @@ pub struct Options { #[derive(Copy, Clone)] pub enum IndentOptions { - // Depends on the item and its parent list uses 'One' if the item and list are tight - // and 'Tab' otherwise. + // Depends on the item and its parent list uses 'One' if the item and list are tight and 'Tab' + // otherwise. Mixed, // The size of the bullet plus one space. One,