Skip to content

Commit a0dd1a4

Browse files
authored
Upgrade dependencies and add line number test
Just as I was hoping, upgrading dependencies fixed #301.
1 parent bb7188a commit a0dd1a4

File tree

9 files changed

+433
-317
lines changed

9 files changed

+433
-317
lines changed

Cargo.lock

Lines changed: 345 additions & 249 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@ edition = "2024"
88

99
[dependencies]
1010
tokio = { version = "1", features = ["full"] }
11-
html5ever = "0.26.0"
12-
markup5ever_rcdom = "0.2.0"
11+
html5ever = "0.35.0"
12+
markup5ever_rcdom = "0.35.0"
1313
regex = "1"
14-
delegate = "0.12.0"
15-
url = "2.5.4"
14+
delegate = "0.13.4"
15+
url = "2"
1616

1717
[dev-dependencies]
1818
tempfile = "3"

src/annotate_attributes.rs

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::io;
55
use std::rc::Rc;
66

77
use html5ever::tendril::StrTendril;
8-
use html5ever::{LocalName, QualName, local_name, namespace_url, ns};
8+
use html5ever::{LocalName, QualName, local_name, ns};
99
use markup5ever_rcdom::{Handle, NodeData};
1010

1111
use crate::dom_utils::{self, NodeHandleExt};
@@ -142,11 +142,11 @@ impl Processor {
142142
let mut variant_comment = None;
143143
let mut variant_str = None;
144144
for node in description.iter() {
145-
if let NodeData::Comment { contents } = &node.data {
146-
if contents.trim().starts_with("or:") {
147-
variant_comment = Some(node);
148-
variant_str = Some(StrTendril::from(contents.trim()[3..].trim_start()));
149-
}
145+
if let NodeData::Comment { contents } = &node.data
146+
&& contents.trim().starts_with("or:")
147+
{
148+
variant_comment = Some(node);
149+
variant_str = Some(StrTendril::from(contents.trim()[3..].trim_start()));
150150
}
151151
}
152152

@@ -276,14 +276,14 @@ impl Processor {
276276
let mut dd_children = dd.children.borrow_mut();
277277
if has_special_semantics {
278278
// Replace the trailing period with a separating colon.
279-
if let Some(last) = dd_children.last_mut() {
280-
if let NodeData::Text { contents } = &last.data {
281-
let mut text = contents.borrow_mut();
282-
*text = StrTendril::from(
283-
text.trim_end_matches(|c: char| c.is_ascii_whitespace() || c == '.'),
284-
);
285-
text.push_slice(": ");
286-
}
279+
if let Some(last) = dd_children.last_mut()
280+
&& let NodeData::Text { contents } = &last.data
281+
{
282+
let mut text = contents.borrow_mut();
283+
*text = StrTendril::from(
284+
text.trim_end_matches(|c: char| c.is_ascii_whitespace() || c == '.'),
285+
);
286+
text.push_slice(": ");
287287
}
288288
} else {
289289
// Insert an em dash.

src/dom_utils.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::cell::RefCell;
22
use std::rc::Rc;
33

44
use html5ever::tendril::StrTendril;
5-
use html5ever::{Attribute, LocalName, QualName, local_name, namespace_url, ns};
5+
use html5ever::{Attribute, LocalName, QualName, local_name, ns};
66
use markup5ever_rcdom::{Handle, Node, NodeData};
77

88
/// Extensions to the DOM interface to make manipulation more ergonimc.
@@ -137,10 +137,9 @@ pub fn scan_dom<F: FnMut(&Handle)>(handle: &Handle, f: &mut F) {
137137
template_contents: ref tc,
138138
..
139139
} = handle.data
140+
&& let Some(ref tc_handle) = *tc.borrow()
140141
{
141-
if let Some(ref tc_handle) = *tc.borrow() {
142-
scan_dom(tc_handle, f);
143-
}
142+
scan_dom(tc_handle, f);
144143
}
145144
}
146145

src/interface_index.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::collections::BTreeMap;
55
use std::io;
66

77
use html5ever::tendril::StrTendril;
8-
use html5ever::{QualName, local_name, namespace_url, ns};
8+
use html5ever::{QualName, local_name, ns};
99
use markup5ever_rcdom::Handle;
1010

1111
use crate::dom_utils::NodeHandleExt;

src/parser.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ pub async fn parse_fragment_async<R: AsyncRead + Unpin>(
5050
RcDomWithLineNumbers::default(),
5151
create_error_opts(),
5252
context.clone(),
53+
false,
5354
None,
5455
);
5556

@@ -143,6 +144,25 @@ pub(crate) mod tests {
143144
Ok(())
144145
}
145146

147+
// See https://github.com/whatwg/html-build/issues/301
148+
#[tokio::test]
149+
async fn test_document_error_line_number_pre() -> io::Result<()> {
150+
let result = parse_document_async(
151+
r##"<!DOCTYPE html>
152+
<pre>h1&gt;
153+
</pre>
154+
<p>Test 2</span>"##
155+
.as_bytes(),
156+
)
157+
.await;
158+
159+
let error = result.unwrap_err();
160+
assert_eq!(error.kind(), io::ErrorKind::InvalidData);
161+
assert!(error.to_string().contains("Line 4: "));
162+
163+
Ok(())
164+
}
165+
146166
#[tokio::test]
147167
async fn test_document_error_exact() -> io::Result<()> {
148168
let result = parse_document_async("<!DOCTYPE html>&asdf;".as_bytes()).await;

src/rcdom_with_line_numbers.rs

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,16 @@ use html5ever::interface::TreeSink;
55
use html5ever::{
66
Attribute, ExpandedName, QualName,
77
tendril::StrTendril,
8-
tree_builder::{ElementFlags, NextParserState, NodeOrText, QuirksMode},
8+
tree_builder::{ElementFlags, NodeOrText, QuirksMode},
99
};
1010
use markup5ever_rcdom::{Handle, RcDom};
1111
use std::borrow::Cow;
12+
use std::cell::Cell;
1213
use std::io;
1314

1415
pub struct RcDomWithLineNumbers {
1516
dom: RcDom,
16-
current_line: u64,
17+
current_line: Cell<u64>,
1718
}
1819

1920
impl RcDomWithLineNumbers {
@@ -23,10 +24,11 @@ impl RcDomWithLineNumbers {
2324
}
2425

2526
pub fn create_error_from_parse_errors(&self) -> io::Result<()> {
26-
if !self.dom.errors.is_empty() {
27+
if !self.dom.errors.borrow().is_empty() {
2728
let error_messages = self
2829
.dom
2930
.errors
31+
.borrow()
3032
.iter()
3133
.map(|e| e.to_string())
3234
.collect::<Vec<String>>()
@@ -45,7 +47,7 @@ impl Default for RcDomWithLineNumbers {
4547
fn default() -> Self {
4648
Self {
4749
dom: RcDom::default(),
48-
current_line: 1,
50+
current_line: Cell::new(1),
4951
}
5052
}
5153
}
@@ -54,15 +56,17 @@ impl TreeSink for RcDomWithLineNumbers {
5456
type Output = RcDomWithLineNumbers;
5557
type Handle = <RcDom as TreeSink>::Handle;
5658

59+
type ElemName<'a> = <RcDom as TreeSink>::ElemName<'a>;
60+
5761
// Override the parse_error method to add line numbers to the error messages.
58-
fn parse_error(&mut self, msg: Cow<'static, str>) {
59-
let msg_with_line = format!("Line {}: {}", self.current_line, msg);
62+
fn parse_error(&self, msg: Cow<'static, str>) {
63+
let msg_with_line = format!("Line {}: {}", self.current_line.get(), msg);
6064
self.dom.parse_error(Cow::Owned(msg_with_line));
6165
}
6266

6367
// Override to track the current line number.
64-
fn set_current_line(&mut self, line: u64) {
65-
self.current_line = line;
68+
fn set_current_line(&self, line: u64) {
69+
self.current_line.set(line);
6670
}
6771

6872
// Override to return RcDomWithLineNumbers instead of RcDom.
@@ -73,69 +77,67 @@ impl TreeSink for RcDomWithLineNumbers {
7377
// Delegate all other methods to RcDom.
7478
delegate! {
7579
to self.dom {
76-
fn get_document(&mut self) -> Self::Handle;
80+
fn get_document(&self) -> Self::Handle;
7781

7882
fn elem_name<'a>(&'a self, target: &'a Self::Handle) -> ExpandedName<'a>;
7983

8084
fn create_element(
81-
&mut self,
85+
&self,
8286
name: QualName,
8387
attrs: Vec<Attribute>,
8488
flags: ElementFlags,
8589
) -> Self::Handle;
8690

87-
fn create_comment(&mut self, text: StrTendril) -> Self::Handle;
91+
fn create_comment(&self, text: StrTendril) -> Self::Handle;
8892

89-
fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Self::Handle;
93+
fn create_pi(&self, target: StrTendril, data: StrTendril) -> Self::Handle;
9094

91-
fn append(&mut self, parent: &Self::Handle, child: NodeOrText<Self::Handle>);
95+
fn append(&self, parent: &Self::Handle, child: NodeOrText<Self::Handle>);
9296

9397
fn append_based_on_parent_node(
94-
&mut self,
98+
&self,
9599
element: &Self::Handle,
96100
prev_element: &Self::Handle,
97101
child: NodeOrText<Self::Handle>,
98102
);
99103

100104
fn append_doctype_to_document(
101-
&mut self,
105+
&self,
102106
name: StrTendril,
103107
public_id: StrTendril,
104108
system_id: StrTendril,
105109
);
106110

107-
fn mark_script_already_started(&mut self, node: &Self::Handle);
111+
fn mark_script_already_started(&self, node: &Self::Handle);
108112

109-
fn pop(&mut self, node: &Self::Handle);
113+
fn pop(&self, node: &Self::Handle);
110114

111-
fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle;
115+
fn get_template_contents(&self, target: &Self::Handle) -> Self::Handle;
112116

113117
fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool;
114118

115-
fn set_quirks_mode(&mut self, mode: QuirksMode);
119+
fn set_quirks_mode(&self, mode: QuirksMode);
116120

117121
fn append_before_sibling(
118-
&mut self,
122+
&self,
119123
sibling: &Self::Handle,
120124
new_node: NodeOrText<Self::Handle>,
121125
);
122126

123-
fn add_attrs_if_missing(&mut self, target: &Self::Handle, attrs: Vec<Attribute>);
127+
fn add_attrs_if_missing(&self, target: &Self::Handle, attrs: Vec<Attribute>);
124128

125129
fn associate_with_form(
126-
&mut self,
130+
&self,
127131
target: &Self::Handle,
128132
form: &Self::Handle,
129133
nodes: (&Self::Handle, Option<&Self::Handle>),
130134
);
131135

132-
fn remove_from_parent(&mut self, target: &Self::Handle);
136+
fn remove_from_parent(&self, target: &Self::Handle);
133137

134-
fn reparent_children(&mut self, node: &Self::Handle, new_parent: &Self::Handle);
138+
fn reparent_children(&self, node: &Self::Handle, new_parent: &Self::Handle);
135139

136140
fn is_mathml_annotation_xml_integration_point(&self, handle: &Self::Handle) -> bool;
137-
138-
fn complete_script(&mut self, node: &Self::Handle) -> NextParserState;
139141
}
140142
}
141143
}

src/self_link.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Inserts `<a class="self-link" href="#ID">` links for elements with `id` attributes and certain classes.
22
33
use html5ever::tendril::StrTendril;
4-
use html5ever::{QualName, local_name, namespace_url, ns};
4+
use html5ever::{QualName, local_name, ns};
55
use markup5ever_rcdom::Handle;
66
use url::Url;
77

src/tag_omission.rs

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::collections::HashMap;
77
use std::io;
88

99
use html5ever::tendril::StrTendril;
10-
use html5ever::{LocalName, QualName, local_name, namespace_url, ns};
10+
use html5ever::{LocalName, QualName, local_name, ns};
1111
use markup5ever_rcdom::{Handle, NodeData};
1212
use regex::Regex;
1313

@@ -67,19 +67,17 @@ impl Processor {
6767
// If we encounter the Void elements section, look for the next dt.
6868
if node.is_html_element(&local_name!("dfn"))
6969
&& node.text_content().trim() == "Void elements"
70-
{
71-
if let Some(dt) = node
70+
&& let Some(dt) = node
7271
.parent_node()
7372
.filter(|n| n.is_html_element(&local_name!("dt")))
74-
{
75-
for dd in dom_utils::dt_descriptions(&dt) {
76-
dom_utils::scan_dom(&dd, &mut |n| {
77-
if n.is_html_element(&local_name!("code")) {
78-
let info = self.elements.entry(n.text_content()).or_default();
79-
info.is_void_element = true;
80-
}
81-
});
82-
}
73+
{
74+
for dd in dom_utils::dt_descriptions(&dt) {
75+
dom_utils::scan_dom(&dd, &mut |n| {
76+
if n.is_html_element(&local_name!("code")) {
77+
let info = self.elements.entry(n.text_content()).or_default();
78+
info.is_void_element = true;
79+
}
80+
});
8381
}
8482
}
8583

@@ -91,12 +89,13 @@ impl Processor {
9189
}
9290

9391
// If we see a <dl class="element">, record that.
94-
if node.is_html_element(&local_name!("dl")) && node.has_class("element") {
95-
if let Some(elem) = std::mem::take(&mut self.most_recent_element_dfn) {
96-
let info = self.elements.entry(elem).or_default();
97-
if info.dl.is_none() {
98-
info.dl = Some(node.clone());
99-
}
92+
if node.is_html_element(&local_name!("dl"))
93+
&& node.has_class("element")
94+
&& let Some(elem) = std::mem::take(&mut self.most_recent_element_dfn)
95+
{
96+
let info = self.elements.entry(elem).or_default();
97+
if info.dl.is_none() {
98+
info.dl = Some(node.clone());
10099
}
101100
}
102101
}

0 commit comments

Comments
 (0)