Skip to content

Commit

Permalink
Merge pull request #412 from Mingun/soundness
Browse files Browse the repository at this point in the history
Rename reading methods, fix some encoding errors in error-processing paths and tests
  • Loading branch information
Mingun authored Jul 9, 2022
2 parents 0febc2b + ae458cb commit 2eecf00
Show file tree
Hide file tree
Showing 21 changed files with 370 additions and 218 deletions.
16 changes: 15 additions & 1 deletion Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
returns `ResolveResult::Unknown` if prefix was not registered in namespace buffer
- [#393]: Fix breaking processing after encounter an attribute with a reserved name (started with "xmlns")
- [#363]: Do not generate empty `Event::Text` events
- [#412]: Fix using incorrect encoding if `read_to_end` family of methods or `read_text`
method not found a corresponding end tag and reader has non-UTF-8 encoding

### Misc Changes

Expand Down Expand Up @@ -96,17 +98,28 @@

- [#403]: Remove deprecated `quick_xml::de::from_bytes` and `Deserializer::from_borrowing_reader`

- [#412]: Rename methods of `Reader`:
|Old Name |New Name
|-------------------------|---------------------------------------------------
|`read_event` |`read_event_into`
|`read_to_end` |`read_to_end_into`
|`read_text` |`read_text_into`
|`read_event_unbuffered` |`read_event`
|`read_to_end_unbuffered` |`read_to_end`
- [#412]: Change `read_to_end*` and `read_text_into` to accept `QName` instead of `AsRef<[u8]>`

### New Tests

- [#9]: Added tests for incorrect nested tags in input
- [#387]: Added a bunch of tests for sequences deserialization
- [#393]: Added more tests for namespace resolver
- [#393]: Added tests for reserved names (started with "xml"i) -- see <https://www.w3.org/TR/xml-names11/#xmlReserved>
- [#363]: Add tests for `Reader::read_event_buffered` to ensure that proper events generated for corresponding inputs
- [#363]: Add tests for `Reader::read_event_impl` to ensure that proper events generated for corresponding inputs
- [#407]: Improved benchmark suite to cover whole-document parsing, escaping and unescaping text

[#8]: https://github.com/Mingun/fast-xml/pull/8
[#9]: https://github.com/Mingun/fast-xml/pull/9
[#118]: https://github.com/tafia/quick-xml/issues/118
[#180]: https://github.com/tafia/quick-xml/issues/180
[#191]: https://github.com/tafia/quick-xml/issues/191
[#324]: https://github.com/tafia/quick-xml/issues/324
Expand All @@ -117,6 +130,7 @@
[#395]: https://github.com/tafia/quick-xml/pull/395
[#403]: https://github.com/tafia/quick-xml/pull/403
[#407]: https://github.com/tafia/quick-xml/pull/407
[#412]: https://github.com/tafia/quick-xml/pull/412

## 0.23.0 -- 2022-05-08

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ let mut buf = Vec::new();
loop {
// NOTE: this is the generic case when we don't know about the input BufRead.
// when the input is a &str or a &[u8], we don't actually need to use another
// buffer, we could directly call `reader.read_event_unbuffered()`
match reader.read_event(&mut buf) {
// buffer, we could directly call `reader.read_event()`
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
match e.name() {
b"tag1" => println!("attributes values: {:?}",
Expand Down Expand Up @@ -77,7 +77,7 @@ reader.trim_text(true);
let mut writer = Writer::new(Cursor::new(Vec::new()));
let mut buf = Vec::new();
loop {
match reader.read_event(&mut buf) {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) if e.name() == b"this_tag" => {

// crates a new element ... alternatively we could reuse `e` by calling
Expand Down
2 changes: 1 addition & 1 deletion benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");
fn parse_document(doc: &[u8]) -> XmlResult<()> {
let mut r = Reader::from_reader(doc);
loop {
match r.read_event_unbuffered()? {
match r.read_event()? {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
criterion::black_box(attr?.unescaped_value()?);
Expand Down
22 changes: 11 additions & 11 deletions benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ fn read_event(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Eof) => break,
_ => (),
Expand All @@ -57,7 +57,7 @@ fn read_event(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Eof) => break,
_ => (),
Expand Down Expand Up @@ -137,7 +137,7 @@ fn bytes_text_unescaped(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut nbtxt = criterion::black_box(0);
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(),
Ok(Event::Eof) => break,
Expand Down Expand Up @@ -175,7 +175,7 @@ fn bytes_text_unescaped(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut nbtxt = criterion::black_box(0);
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(),
Ok(Event::Eof) => break,
Expand Down Expand Up @@ -215,7 +215,7 @@ fn one_event(c: &mut Criterion) {
let mut r = Reader::from_reader(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false).check_comments(false);
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::StartText(e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};
Expand All @@ -235,7 +235,7 @@ fn one_event(c: &mut Criterion) {
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};
Expand All @@ -255,7 +255,7 @@ fn one_event(c: &mut Criterion) {
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Comment(ref e)) => nbtxt += e.unescaped().unwrap().len(),
something_else => panic!("Did not expect {:?}", something_else),
};
Expand All @@ -275,7 +275,7 @@ fn one_event(c: &mut Criterion) {
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::CData(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};
Expand All @@ -298,7 +298,7 @@ fn attributes(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Empty(e)) => {
for attr in e.attributes() {
let _attr = attr.unwrap();
Expand All @@ -321,7 +321,7 @@ fn attributes(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Empty(e)) => {
for attr in e.attributes().with_checks(false) {
let _attr = attr.unwrap();
Expand All @@ -344,7 +344,7 @@ fn attributes(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Empty(e)) if e.name() == QName(b"player") => {
for name in ["num", "status", "avg"] {
if let Some(_attr) = e.try_get_attribute(name).unwrap() {
Expand Down
2 changes: 1 addition & 1 deletion compare/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn low_level_comparison(c: &mut Criterion) {
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event(&mut buf) {
match r.read_event_into(&mut buf) {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Eof) => break,
_ => (),
Expand Down
2 changes: 1 addition & 1 deletion examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;

loop {
match reader.read_event(&mut buf) {
match reader.read_event_into(&mut buf) {
Ok(Event::DocType(ref e)) => {
for cap in entity_re.captures_iter(&e) {
custom_entities.insert(cap[1].to_vec(), cap[2].to_vec());
Expand Down
4 changes: 2 additions & 2 deletions examples/nested_readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ fn main() -> Result<(), quick_xml::Error> {
let mut reader = Reader::from_file("tests/documents/document.xml")?;
let mut found_tables = Vec::new();
loop {
match reader.read_event(&mut buf)? {
match reader.read_event_into(&mut buf)? {
Event::Start(element) => match element.name().as_ref() {
b"w:tbl" => {
count += 1;
Expand All @@ -33,7 +33,7 @@ fn main() -> Result<(), quick_xml::Error> {
let mut row_index = 0;
loop {
skip_buf.clear();
match reader.read_event(&mut skip_buf)? {
match reader.read_event_into(&mut skip_buf)? {
Event::Start(element) => match element.name().as_ref() {
b"w:tr" => {
stats.rows.push(vec![]);
Expand Down
5 changes: 3 additions & 2 deletions examples/read_texts.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
fn main() {
use quick_xml::events::Event;
use quick_xml::name::QName;
use quick_xml::Reader;

let xml = "<tag1>text1</tag1><tag1>text2</tag1>\
Expand All @@ -12,11 +13,11 @@ fn main() {
let mut buf = Vec::new();

loop {
match reader.read_event(&mut buf) {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) if e.name().as_ref() == b"tag2" => {
txt.push(
reader
.read_text(b"tag2", &mut Vec::new())
.read_text_into(QName(b"tag2"), &mut Vec::new())
.expect("Cannot decode text value"),
);
println!("{:?}", txt);
Expand Down
2 changes: 1 addition & 1 deletion fuzz/fuzz_targets/fuzz_target_1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ fuzz_target!(|data: &[u8]| {
let mut reader = Reader::from_reader(cursor);
let mut buf = vec![];
loop {
match reader.read_event(&mut buf) {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e))=> {
if e.unescaped().is_err() {
break;
Expand Down
8 changes: 4 additions & 4 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,7 @@ pub struct IoReader<R: BufRead> {
impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
fn next(&mut self) -> Result<DeEvent<'static>, DeError> {
let event = loop {
let e = self.reader.read_event(&mut self.buf)?;
let e = self.reader.read_event_into(&mut self.buf)?;
match e {
//TODO: Probably not the best idea treat StartText as usual text
// Usually this event will represent a BOM
Expand All @@ -971,7 +971,7 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
}

fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
match self.reader.read_to_end(name, &mut self.buf) {
match self.reader.read_to_end_into(name, &mut self.buf) {
Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
other => Ok(other?),
}
Expand All @@ -993,7 +993,7 @@ pub struct SliceReader<'de> {
impl<'de> XmlRead<'de> for SliceReader<'de> {
fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
loop {
let e = self.reader.read_event_unbuffered()?;
let e = self.reader.read_event()?;
match e {
//TODO: Probably not the best idea treat StartText as usual text
// Usually this event will represent a BOM
Expand All @@ -1011,7 +1011,7 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
}

fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
match self.reader.read_to_end_unbuffered(name) {
match self.reader.read_to_end(name) {
Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
other => Ok(other?),
}
Expand Down
8 changes: 4 additions & 4 deletions src/events/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::reader::{is_whitespace, Reader};
use crate::utils::{write_byte_string, write_cow_string, Bytes};
use std::fmt::{self, Debug, Display, Formatter};
use std::iter::FusedIterator;
use std::{borrow::Cow, collections::HashMap, io::BufRead, ops::Range};
use std::{borrow::Cow, collections::HashMap, ops::Range};

/// A struct representing a key/value XML attribute.
///
Expand Down Expand Up @@ -81,7 +81,7 @@ impl<'a> Attribute<'a> {
///
/// [`unescaped_value()`]: #method.unescaped_value
/// [`Reader::decode()`]: ../../reader/struct.Reader.html#method.decode
pub fn unescape_and_decode_value<B: BufRead>(&self, reader: &Reader<B>) -> XmlResult<String> {
pub fn unescape_and_decode_value<B>(&self, reader: &Reader<B>) -> XmlResult<String> {
self.do_unescape_and_decode_value(reader, None)
}

Expand All @@ -99,7 +99,7 @@ impl<'a> Attribute<'a> {
/// # Pre-condition
///
/// The keys and values of `custom_entities`, if any, must be valid UTF-8.
pub fn unescape_and_decode_value_with_custom_entities<B: BufRead>(
pub fn unescape_and_decode_value_with_custom_entities<B>(
&self,
reader: &Reader<B>,
custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
Expand All @@ -108,7 +108,7 @@ impl<'a> Attribute<'a> {
}

/// The keys and values of `custom_entities`, if any, must be valid UTF-8.
fn do_unescape_and_decode_value<B: BufRead>(
fn do_unescape_and_decode_value<B>(
&self,
reader: &Reader<B>,
custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
Expand Down
24 changes: 12 additions & 12 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
//!
//! # Reading
//! When reading a XML stream, the events are emitted by
//! [`Reader::read_event`]. You must listen
//! [`Reader::read_event_into`]. You must listen
//! for the different types of events you are interested in.
//!
//! See [`Reader`] for further information.
Expand All @@ -29,10 +29,8 @@
//!
//! See [`Writer`] for further information.
//!
//! [`Reader::read_event`]: ../reader/struct.Reader.html#method.read_event
//! [`Reader`]: ../reader/struct.Reader.html
//! [`Writer`]: ../writer/struct.Writer.html
//! [`Event`]: enum.Event.html
//! [`Writer`]: crate::writer::Writer
//! [`Event`]: crate::events::Event

pub mod attributes;

Expand All @@ -41,7 +39,6 @@ use encoding_rs::Encoding;
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::{self, Debug, Formatter};
use std::io::BufRead;
use std::ops::Deref;
use std::str::from_utf8;

Expand Down Expand Up @@ -757,7 +754,7 @@ impl<'a> BytesText<'a> {
/// it might be wiser to manually use
/// 1. BytesText::unescaped()
/// 2. Reader::decode(...)
pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
pub fn unescape_and_decode<B>(&self, reader: &Reader<B>) -> Result<String> {
self.do_unescape_and_decode_with_custom_entities(reader, None)
}

Expand All @@ -771,15 +768,15 @@ impl<'a> BytesText<'a> {
/// # Pre-condition
///
/// The keys and values of `custom_entities`, if any, must be valid UTF-8.
pub fn unescape_and_decode_with_custom_entities<B: BufRead>(
pub fn unescape_and_decode_with_custom_entities<B>(
&self,
reader: &Reader<B>,
custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
) -> Result<String> {
self.do_unescape_and_decode_with_custom_entities(reader, Some(custom_entities))
}

fn do_unescape_and_decode_with_custom_entities<B: BufRead>(
fn do_unescape_and_decode_with_custom_entities<B>(
&self,
reader: &Reader<B>,
custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
Expand Down Expand Up @@ -928,7 +925,7 @@ impl<'a> Deref for BytesCData<'a> {

////////////////////////////////////////////////////////////////////////////////////////////////////

/// Event emitted by [`Reader::read_event`].
/// Event emitted by [`Reader::read_event_into`].
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Event<'a> {
/// Text that appeared before the first opening tag or an [XML declaration].
Expand Down Expand Up @@ -956,7 +953,7 @@ pub enum Event<'a> {
/// let mut reader = Reader::from_bytes(xml);
/// let mut events_processed = 0;
/// loop {
/// match reader.read_event_unbuffered() {
/// match reader.read_event() {
/// Ok(Event::StartText(e)) => {
/// assert_eq!(events_processed, 0);
/// // Content contains BOM
Expand Down Expand Up @@ -1066,7 +1063,10 @@ mod test {
let mut buf = Vec::new();
let mut parsed_local_names = Vec::new();
loop {
match rdr.read_event(&mut buf).expect("unable to read xml event") {
match rdr
.read_event_into(&mut buf)
.expect("unable to read xml event")
{
Event::Start(ref e) => parsed_local_names.push(
from_utf8(e.local_name().as_ref())
.expect("unable to build str from local_name")
Expand Down
Loading

0 comments on commit 2eecf00

Please sign in to comment.