Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow attributes in the Event::End and fix .error_position() #780

Merged
merged 10 commits into from
Jul 8, 2024
6 changes: 6 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,15 @@

- [#781]: Fix conditions to start CDATA section. Only uppercase `<![CDATA[` can start it.
Previously any case was allowed.
- [#780]: Fixed incorrect `.error_position()` when encountering syntax error for open or self-closed tag.

### Misc Changes

- [#780]: `reader::Parser`, `reader::ElementParser` and `reader::PiParser` moved to the new module `parser`.
- [#776]: Allow to have attributes in the end tag for compatibility reasons with Adobe Flash XML parser.

[#776]: https://github.com/tafia/quick-xml/issues/776
[#780]: https://github.com/tafia/quick-xml/pull/780
[#781]: https://github.com/tafia/quick-xml/pull/781


Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ loop {
// when the input is a &str or a &[u8], we don't actually need to use another
// buffer, we could directly call `reader.read_event()`
match reader.read_event_into(&mut buf) {
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
// exits the loop when reaching end of file
Ok(Event::Eof) => break,

Expand Down Expand Up @@ -98,7 +98,7 @@ loop {
Ok(Event::Eof) => break,
// we can either move or borrow the event to write, depending on your use-case
Ok(e) => assert!(writer.write_event(e).is_ok()),
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
}
}

Expand Down
2 changes: 1 addition & 1 deletion examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
);
}
Ok(Event::Eof) => break,
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
_ => (),
}
}
Expand Down
2 changes: 1 addition & 1 deletion examples/read_buffered.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn main() -> Result<(), quick_xml::Error> {
count += 1;
}
Ok(Event::Eof) => break, // exits the loop when reaching end of file
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
_ => (), // There are several other `Event`s we do not consider here
}
}
Expand Down
2 changes: 1 addition & 1 deletion examples/read_texts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn main() {
println!("{:?}", txt);
}
Ok(Event::Eof) => break, // exits the loop when reaching end of file
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
_ => (), // There are several other `Event`s we do not consider here
}
}
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub mod errors;
pub mod escape;
pub mod events;
pub mod name;
pub mod parser;
pub mod reader;
#[cfg(feature = "serialize")]
pub mod se;
Expand Down
4 changes: 2 additions & 2 deletions src/reader/element.rs → src/parser/element.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Contains a parser for an XML element.

use crate::errors::SyntaxError;
use crate::reader::Parser;
use crate::parser::Parser;

/// A parser that search a `>` symbol in the slice outside of quoted regions.
///
Expand All @@ -25,7 +25,7 @@ use crate::reader::Parser;
///
/// ```
/// # use pretty_assertions::assert_eq;
/// use quick_xml::reader::{ElementParser, Parser};
/// use quick_xml::parser::{ElementParser, Parser};
///
/// let mut parser = ElementParser::default();
///
Expand Down
29 changes: 29 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//! Contains low-level parsers of different XML pieces.

use crate::errors::SyntaxError;

mod element;
mod pi;

pub use element::ElementParser;
pub use pi::PiParser;

/// Used to decouple reading of data from data source and parsing XML structure from it.
/// This is a state preserved between getting chunks of bytes from the reader.
///
/// This trait is implemented for every parser that processes piece of XML grammar.
pub trait Parser {
/// Process new data and try to determine end of the parsed thing.
///
/// Returns position of the end of thing in `bytes` in case of successful search
/// and `None` otherwise.
///
/// # Parameters
/// - `bytes`: a slice to find the end of a thing.
/// Should contain text in ASCII-compatible encoding
fn feed(&mut self, bytes: &[u8]) -> Option<usize>;

/// Returns parse error produced by this parser in case of reaching end of
/// input without finding the end of a parsed thing.
fn eof_error() -> SyntaxError;
}
4 changes: 2 additions & 2 deletions src/reader/pi.rs → src/parser/pi.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Contains a parser for an XML processing instruction.

use crate::errors::SyntaxError;
use crate::reader::Parser;
use crate::parser::Parser;

/// A parser that search a `?>` sequence in the slice.
///
Expand All @@ -19,7 +19,7 @@ use crate::reader::Parser;
///
/// ```
/// # use pretty_assertions::assert_eq;
/// use quick_xml::reader::{Parser, PiParser};
/// use quick_xml::parser::{Parser, PiParser};
///
/// let mut parser = PiParser::default();
///
Expand Down
7 changes: 3 additions & 4 deletions src/reader/async_tokio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@ use tokio::io::{self, AsyncBufRead, AsyncBufReadExt};
use crate::errors::{Error, Result, SyntaxError};
use crate::events::Event;
use crate::name::{QName, ResolveResult};
use crate::parser::{ElementParser, Parser, PiParser};
use crate::reader::buffered_reader::impl_buffered_source;
use crate::reader::{
BangType, ElementParser, NsReader, ParseState, Parser, PiParser, ReadTextResult, Reader, Span,
};
use crate::reader::{BangType, NsReader, ParseState, ReadTextResult, Reader, Span};
use crate::utils::is_whitespace;

/// A struct for read XML asynchronously from an [`AsyncBufRead`].
Expand Down Expand Up @@ -59,7 +58,7 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
/// match reader.read_event_into_async(&mut buf).await {
/// Ok(Event::Start(_)) => count += 1,
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
/// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
/// Ok(Event::Eof) => break,
/// _ => (),
/// }
Expand Down
53 changes: 3 additions & 50 deletions src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ use std::path::Path;
use crate::errors::{Error, Result};
use crate::events::Event;
use crate::name::QName;
use crate::reader::{BangType, Parser, ReadTextResult, Reader, Span, XmlSource};
use crate::parser::Parser;
use crate::reader::{BangType, ReadTextResult, Reader, Span, XmlSource};
use crate::utils::is_whitespace;

macro_rules! impl_buffered_source {
Expand Down Expand Up @@ -100,54 +101,6 @@ macro_rules! impl_buffered_source {
ReadTextResult::UpToEof(&buf[start..])
}

#[inline]
$($async)? fn read_bytes_until $(<$lf>)? (
&mut self,
byte: u8,
buf: &'b mut Vec<u8>,
position: &mut u64,
) -> io::Result<(&'b [u8], bool)> {
// search byte must be within the ascii range
debug_assert!(byte.is_ascii());

let mut read = 0;
let start = buf.len();
loop {
let available = match self $(.$reader)? .fill_buf() $(.$await)? {
Ok(n) if n.is_empty() => break,
Ok(n) => n,
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(e) => {
*position += read;
return Err(e);
}
};

match memchr::memchr(byte, available) {
Some(i) => {
buf.extend_from_slice(&available[..i]);

let used = i + 1;
self $(.$reader)? .consume(used);
read += used as u64;

*position += read;
return Ok((&buf[start..], true));
}
None => {
buf.extend_from_slice(available);

let used = available.len();
self $(.$reader)? .consume(used);
read += used as u64;
}
}
}

*position += read;
Ok((&buf[start..], false))
}

#[inline]
$($async)? fn read_with<$($lf,)? P: Parser>(
&mut self,
Expand Down Expand Up @@ -327,7 +280,7 @@ impl<R: BufRead> Reader<R> {
/// match reader.read_event_into(&mut buf) {
/// Ok(Event::Start(_)) => count += 1,
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
/// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
/// Ok(Event::Eof) => break,
/// _ => (),
/// }
Expand Down
Loading