Skip to content

Commit

Permalink
Remove quick_xml::encoding::Decoder and Reader::decoder()
Browse files Browse the repository at this point in the history
As quick-xml will pre-decode everything, it is unnecessary.
  • Loading branch information
dralley committed Jul 26, 2023
1 parent 07c3a92 commit daf407e
Show file tree
Hide file tree
Showing 12 changed files with 34 additions and 111 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ async-tokio = ["tokio"]
## let mut buf = Vec::new();
## let mut unsupported = false;
## loop {
## if !reader.decoder().encoding().is_ascii_compatible() {
## if !reader.encoding().is_ascii_compatible() {
## unsupported = true;
## break;
## }
Expand Down
3 changes: 3 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,9 @@

### Misc Changes

- [#441]: `Reader::decoder()` removed as it is no longer necessary (`Reader` already
decodes everything for you). `Reader::encoding()` is provided to make the current
encoding accessible as it was before.
- [#481]: Removed the uses of `const fn` added in version 0.24 in favor of a lower minimum
supported Rust version (1.46.0). Minimum supported Rust version is now verified in the CI.
- [#489]: Reduced the size of the package uploaded into the crates.io by excluding
Expand Down
2 changes: 1 addition & 1 deletion src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1931,7 +1931,7 @@ pub use crate::errors::serialize::DeError;
pub use resolver::{EntityResolver, NoEntityResolver};

use crate::{
encoding::{Decoder, Utf8BytesReader},
encoding::Utf8BytesReader,
errors::Error,
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
name::QName,
Expand Down
68 changes: 0 additions & 68 deletions src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,74 +75,6 @@ impl<R: io::Read> io::BufRead for Utf8BytesReader<R> {
}
}

/// Decoder of byte slices into strings.
///
/// If feature `encoding` is enabled, this encoding taken from the `"encoding"`
/// XML declaration or assumes UTF-8, if XML has no <?xml ?> declaration, encoding
/// key is not defined or contains unknown encoding.
///
/// The library supports any UTF-8 compatible encodings that crate `encoding_rs`
/// is supported. [*UTF-16 and ISO-2022-JP are not supported at the present*][utf16].
///
/// If feature `encoding` is disabled, the decoder is always UTF-8 decoder:
/// any XML declarations are ignored.
///
/// [utf16]: https://github.com/tafia/quick-xml/issues/158
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Decoder {
#[cfg(feature = "encoding")]
pub(crate) encoding: &'static Encoding,
}

impl Decoder {
pub(crate) fn utf8() -> Self {
Decoder {
#[cfg(feature = "encoding")]
encoding: UTF_8,
}
}

#[cfg(all(test, feature = "encoding", feature = "serialize"))]
pub(crate) fn utf16() -> Self {
Decoder { encoding: UTF_16LE }
}
}

impl Decoder {
/// Returns the `Reader`s encoding.
///
/// This encoding will be used by [`decode`].
///
/// [`decode`]: Self::decode
#[cfg(feature = "encoding")]
pub fn encoding(&self) -> &'static Encoding {
self.encoding
}

/// ## Without `encoding` feature
///
/// Decodes an UTF-8 slice regardless of XML declaration and ignoring BOM
/// if it is present in the `bytes`.
///
/// ## With `encoding` feature
///
/// Decodes specified bytes using encoding, declared in the XML, if it was
/// declared there, or UTF-8 otherwise, and ignoring BOM if it is present
/// in the `bytes`.
///
/// ----
/// Returns an error in case of malformed sequences in the `bytes`.
pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result<Cow<'b, str>> {
#[cfg(not(feature = "encoding"))]
let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?));

#[cfg(feature = "encoding")]
let decoded = decode(bytes, self.encoding);

decoded
}
}

/// Decodes the provided bytes using the specified encoding.
///
/// Returns an error in case of malformed or non-representable sequences in the `bytes`.
Expand Down
1 change: 0 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ pub mod utils;
pub mod writer;

// reexports
pub use crate::encoding::Decoder;
#[cfg(feature = "serialize")]
pub use crate::errors::serialize::DeError;
pub use crate::errors::{Error, Result};
Expand Down
10 changes: 5 additions & 5 deletions src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -445,9 +445,9 @@ mod test {
Reader::from_reader(b"\xFF\xFE<?xml encoding='windows-1251'?>".as_ref());
let mut buf = Vec::new();

assert_eq!(reader.decoder().encoding(), UTF_8);
assert_eq!(reader.encoding(), UTF_8);
reader.read_event_into(&mut buf).unwrap();
assert_eq!(reader.decoder().encoding(), WINDOWS_1251);
assert_eq!(reader.encoding(), WINDOWS_1251);

assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
}
Expand All @@ -460,12 +460,12 @@ mod test {
);
let mut buf = Vec::new();

assert_eq!(reader.decoder().encoding(), UTF_8);
assert_eq!(reader.encoding(), UTF_8);
reader.read_event_into(&mut buf).unwrap();
assert_eq!(reader.decoder().encoding(), UTF_16LE);
assert_eq!(reader.encoding(), UTF_16LE);

reader.read_event_into(&mut buf).unwrap();
assert_eq!(reader.decoder().encoding(), UTF_16LE);
assert_eq!(reader.encoding(), UTF_16LE);

assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
}
Expand Down
12 changes: 6 additions & 6 deletions src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::ops::Range;
#[cfg(feature = "encoding")]
use encoding_rs::{Encoding, UTF_8};

use crate::encoding::{Decoder, Utf8BytesReader};
use crate::encoding::Utf8BytesReader;
use crate::errors::{Error, Result};
use crate::events::Event;
use crate::reader::parser::Parser;
Expand Down Expand Up @@ -350,8 +350,7 @@ macro_rules! read_to_end {
depth -= 1;
}
Ok(Event::Eof) => {
let name = $self.decoder().decode($end.as_ref().as_bytes());
return Err(Error::UnexpectedEof(format!("</{:?}>", name)));
return Err(Error::UnexpectedEof(format!("</{:?}>", $end.as_ref())));
}
_ => (),
}
Expand Down Expand Up @@ -598,16 +597,17 @@ impl<R> Reader<R> {
}
}

/// Get the decoder, used to decode bytes, read by this reader, to the strings.
/// Get the encoding this reader is currently using to decode strings.
///
/// If `encoding` feature is enabled, the used encoding may change after
/// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
///
/// If `encoding` feature is enabled and no encoding is specified in declaration,
/// defaults to UTF-8.
#[cfg(feature = "encoding")]
#[inline]
pub fn decoder(&self) -> Decoder {
self.parser.decoder()
pub fn encoding(&self) -> &'static Encoding {
self.parser.encoding.encoding()
}
}

Expand Down
5 changes: 1 addition & 4 deletions src/reader/ns_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -775,13 +775,11 @@ impl<'i> NsReader<&'i [u8]> {
/// it reads, and if, for example, it contains CDATA section, attempt to
/// unescape it content will spoil data.
///
/// Any text will be decoded using the XML current [`decoder()`].
///
/// Actually, this method perform the following code:
///
/// ```ignore
/// let span = reader.read_to_end(end)?;
/// let text = reader.decoder().decode(&reader.inner_slice[span]);
/// let text = std::str::from_utf8(&reader.inner_slice[span]);
/// ```
///
/// # Examples
Expand Down Expand Up @@ -828,7 +826,6 @@ impl<'i> NsReader<&'i [u8]> {
/// ```
///
/// [`Start`]: Event::Start
/// [`decoder()`]: Reader::decoder()
#[inline]
pub fn read_text(&mut self, end: QName) -> Result<Cow<'i, str>> {
self.reader.read_text(end)
Expand Down
25 changes: 10 additions & 15 deletions src/reader/parser.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#[cfg(feature = "encoding")]
use encoding_rs::UTF_8;

use crate::encoding::Decoder;
use crate::errors::{Error, Result};
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
#[cfg(feature = "encoding")]
Expand Down Expand Up @@ -55,6 +54,16 @@ pub(super) struct Parser {

#[cfg(feature = "encoding")]
/// Reference to the encoding used to read an XML
///
/// If feature `encoding` is enabled, this encoding is taken from the `"encoding"`
/// XML declaration or assumes UTF-8, if XML has no <?xml ?> declaration, encoding
/// key is not defined or contains unknown encoding.
///
/// The library supports any UTF-8 compatible encodings that crate `encoding_rs`
/// is supported. [*UTF-16 and ISO-2022-JP are not supported at the present*][utf16].
///
/// If feature `encoding` is disabled, the decoder is always UTF-8 decoder:
/// any XML declarations are ignored.
pub encoding: EncodingRef,
}

Expand Down Expand Up @@ -249,20 +258,6 @@ impl Parser {
.split_off(self.opened_starts.pop().unwrap());
Ok(Event::End(BytesEnd::new(name)))
}

/// Get the decoder, used to decode bytes, read by this reader, to the strings.
///
/// If `encoding` feature is enabled, the used encoding may change after
/// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
///
/// If `encoding` feature is enabled and no encoding is specified in declaration,
/// defaults to UTF-8.
pub fn decoder(&self) -> Decoder {
Decoder {
#[cfg(feature = "encoding")]
encoding: self.encoding.encoding(),
}
}
}

impl Default for Parser {
Expand Down
11 changes: 4 additions & 7 deletions src/reader/slice_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,11 @@ impl<'a> Reader<&'a [u8]> {
/// it reads, and if, for example, it contains CDATA section, attempt to
/// unescape it content will spoil data.
///
/// Any text will be decoded using the XML current [`decoder()`].
///
/// Actually, this method perform the following code:
///
/// ```ignore
/// let span = reader.read_to_end(end)?;
/// let text = reader.decoder().decode(&reader.inner_slice[span]);
/// let text = std::str::from_utf8(&reader.inner_slice[span]);
/// ```
///
/// # Examples
Expand Down Expand Up @@ -206,13 +204,12 @@ impl<'a> Reader<&'a [u8]> {
/// ```
///
/// [`Start`]: Event::Start
/// [`decoder()`]: Self::decoder()
pub fn read_text(&mut self, end: QName) -> Result<Cow<'a, str>> {
// self.reader will be changed, so store original reference
let buffer = self.reader;
let span = self.read_to_end(end)?;

self.decoder().decode(&buffer[0..span.len()])
Ok(Cow::Borrowed(std::str::from_utf8(&buffer[0..span.len()])?))
}
}

Expand Down Expand Up @@ -364,9 +361,9 @@ mod test {
fn str_always_has_utf8() {
let mut reader = Reader::from_str("<?xml encoding='UTF-16'?>");

assert_eq!(reader.decoder().encoding(), UTF_8);
assert_eq!(reader.encoding(), UTF_8);
reader.read_event().unwrap();
assert_eq!(reader.decoder().encoding(), UTF_8);
assert_eq!(reader.encoding(), UTF_8);

assert_eq!(reader.read_event().unwrap(), Event::Eof);
}
Expand Down
4 changes: 2 additions & 2 deletions tests/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,15 @@ mod detect {
let mut r = Reader::from_reader(
include_bytes!(concat!("documents/encoding/", $file, ".xml")).as_ref(),
);
assert_eq!(r.decoder().encoding(), UTF_8);
assert_eq!(r.encoding(), UTF_8);

let mut buf = Vec::new();
loop {
match dbg!(r.read_event_into(&mut buf).unwrap()) {
Event::Eof => break,
_ => {}
}
assert_eq!(r.decoder().encoding(), $enc);
assert_eq!(r.encoding(), $enc);
buf.clear();
$($break)?
}
Expand Down
2 changes: 1 addition & 1 deletion tests/xmlrs_reader_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ fn test_bytes(input: &[u8], output: &[u8], trim: bool) {
loop {
let line = match reader.read_resolved_event_into(&mut Vec::new()) {
Ok((_, Event::Decl(e))) => {
// Declaration could change decoder
// Declaration could change encoding
let version = e.version().unwrap();
let encoding = e.encoding().unwrap().unwrap();
format!("StartDocument({}, {})", version, encoding)
Expand Down

0 comments on commit daf407e

Please sign in to comment.