From f786c6277008fa2c0f954fe03362116c62239fc9 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 13:35:45 +0200 Subject: [PATCH 1/7] TarFormatString: improve debug output --- src/tar_format_types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tar_format_types.rs b/src/tar_format_types.rs index e58b8a5..074fbd0 100644 --- a/src/tar_format_types.rs +++ b/src/tar_format_types.rs @@ -77,7 +77,7 @@ impl Debug for TarFormatString { let sub_array = &self.bytes[0..self.len()]; write!( f, - "{},{} of {},{}", + "str='{}',byte_usage={}/{},is_nul_terminated={}", from_utf8(sub_array).unwrap(), self.len(), N, From fff7dcd6664152ca46af08c99e0d5086eb83ab62 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 13:55:00 +0200 Subject: [PATCH 2/7] TarFormatString: fix memory issues + return result 1: memory bug: length of bytes was not properly calculated 2: as_str() now returns a result --- src/archive.rs | 4 +- src/header.rs | 10 ++--- src/tar_format_types.rs | 93 ++++++++++++++++++----------------------- 3 files changed, 47 insertions(+), 60 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index f809c2e..7dcc6d4 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -273,7 +273,7 @@ impl<'a> Iterator for ArchiveIterator<'a> { let mut filename: TarFormatString<256> = TarFormatString::::new([0; POSIX_1003_MAX_FILENAME_LEN]); - if hdr.magic.as_str() == "ustar" && hdr.version.as_str() == "00" && !hdr.prefix.is_empty() { + if hdr.magic.as_str().unwrap() == "ustar" && hdr.version.as_str().unwrap() == "00" && !hdr.prefix.is_empty() { filename.append(&hdr.prefix); filename.append(&TarFormatString::<1>::new([b'/'])); } @@ -411,7 +411,7 @@ mod tests { /// Test that the entry's contents match the expected content. fn assert_entry_content(entry: &ArchiveEntry, filename: &str, size: usize) { - assert_eq!(entry.filename().as_str(), filename); + assert_eq!(entry.filename().as_str(), Ok(filename)); assert_eq!(entry.size(), size); assert_eq!(entry.data().len(), size); } diff --git a/src/header.rs b/src/header.rs index 2983ea2..94b1419 100644 --- a/src/header.rs +++ b/src/header.rs @@ -273,7 +273,7 @@ mod tests { TypeFlag::REGTYPE, "the first entry is a regular file!" ); - assert_eq!(archive.name.as_str(), "bye_world_513b.txt"); + assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt")); let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_gnu.tar")); assert_eq!( @@ -281,7 +281,7 @@ mod tests { TypeFlag::REGTYPE, "the first entry is a regular file!" ); - assert_eq!(archive.name.as_str(), "bye_world_513b.txt"); + assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt")); let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_oldgnu.tar")); assert_eq!( @@ -289,7 +289,7 @@ mod tests { TypeFlag::REGTYPE, "the first entry is a regular file!" ); - assert_eq!(archive.name.as_str(), "bye_world_513b.txt"); + assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt")); /* UNSUPPORTED YET. Uses extensions.. let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_pax.tar")); @@ -307,7 +307,7 @@ mod tests { TypeFlag::REGTYPE, "the first entry is a regular file!" ); - assert_eq!(archive.name.as_str(), "bye_world_513b.txt"); + assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt")); let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_v7.tar")); // ARegType: legacy @@ -316,7 +316,7 @@ mod tests { TypeFlag::AREGTYPE, "the first entry is a regular file!" ); - assert_eq!(archive.name.as_str(), "bye_world_513b.txt"); + assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt")); } #[test] diff --git a/src/tar_format_types.rs b/src/tar_format_types.rs index 074fbd0..6ffe875 100644 --- a/src/tar_format_types.rs +++ b/src/tar_format_types.rs @@ -3,9 +3,12 @@ use core::fmt::{Debug, Formatter}; use core::num::ParseIntError; use core::ptr::copy_nonoverlapping; -use core::str::from_utf8; +use core::str::{from_utf8, Utf8Error}; use num_traits::Num; +/// Base type for strings embedded in a Tar header. The length depends on the +/// context. The returned string +/// /// An optionally null terminated string. The contents are either: /// 1. A fully populated string with no null termination or /// 2. A partially populated string where the unused bytes are zero. @@ -34,36 +37,30 @@ impl TarFormatString { self.bytes[0] == 0 } - // True if the string is NULL terminated - pub const fn is_nul_terminated(&self) -> bool { - self.bytes[N - 1] == 0 - } - - /// Returns the length of the string (ignoring NULL bytes). - pub fn len(&self) -> usize { - if self.is_nul_terminated() { - memchr::memchr(0, &self.bytes).unwrap() - } else { - N - } + /// Returns the length of the bytes. This is either the full capacity `N` + /// or the data until the first NULL byte. + pub fn size(&self) -> usize { + memchr::memchr(0, &self.bytes).unwrap_or(N) } - /// Returns a str ref without NULL bytes. Panics if the string is not valid UTF-8. - pub fn as_str(&self) -> &str { - from_utf8(&self.bytes[0..self.len()]).expect("byte array is not UTF-8") + /// Returns a str ref without terminating or intermediate NULL bytes. The + /// string is truncated at the first NULL byte, in case not the full length + /// was used. + pub fn as_str(&self) -> Result<&str, Utf8Error> { + from_utf8(&self.bytes[0..self.size()]) } /// Append to end of string. Panics if there is not enough capacity. pub fn append(&mut self, other: &TarFormatString) { - let resulting_length = self.len() + other.len(); + let resulting_length = self.size() + other.size(); if resulting_length > N { panic!("Result to long for capacity {}", N); } unsafe { - let dst = self.bytes.as_mut_ptr().add(self.len()); + let dst = self.bytes.as_mut_ptr().add(self.size()); let src = other.bytes.as_ptr(); - copy_nonoverlapping(src, dst, other.len()); + copy_nonoverlapping(src, dst, other.size()); } if resulting_length < N { @@ -74,14 +71,13 @@ impl TarFormatString { impl Debug for TarFormatString { fn fmt(&self, f: &mut Formatter) -> core::fmt::Result { - let sub_array = &self.bytes[0..self.len()]; + let sub_array = &self.bytes[0..self.size()]; write!( f, - "str='{}',byte_usage={}/{},is_nul_terminated={}", + "str='{}',byte_usage={}/{}", from_utf8(sub_array).unwrap(), - self.len(), - N, - self.is_nul_terminated() + self.size(), + N ) } } @@ -107,7 +103,7 @@ impl TarFormatNumber { T: num_traits::Num, { memchr::memchr2(32, 0, &self.0.bytes).map_or_else( - || T::from_str_radix(self.0.as_str(), R), + || T::from_str_radix(self.0.as_str().expect("Should be valid Tar archive"), R), |idx| { T::from_str_radix( from_utf8(&self.0.bytes[..idx]).expect("byte array is not UTF-8"), @@ -120,7 +116,7 @@ impl TarFormatNumber { impl Debug for TarFormatNumber { fn fmt(&self, f: &mut Formatter) -> core::fmt::Result { - let sub_array = &self.0.bytes[0..self.0.len()]; + let sub_array = &self.0.bytes[0..self.0.size()]; match self.as_number::() { Err(msg) => write!(f, "{} [{}]", msg, from_utf8(sub_array).unwrap()), Ok(val) => write!(f, "{} [{}]", val, from_utf8(sub_array).unwrap()), @@ -168,9 +164,8 @@ mod tests { let empty = TarFormatString::new([0]); assert_eq!(size_of_val(&empty), 1); assert!(empty.is_empty()); - assert_eq!(empty.len(), 0); - assert!(empty.is_nul_terminated()); - assert_eq!(empty.as_str(), ""); + assert_eq!(empty.size(), 0); + assert_eq!(empty.as_str(), Ok("")); } #[test] @@ -178,9 +173,8 @@ mod tests { let s = TarFormatString::new([65]); assert_eq!(size_of_val(&s), 1); assert!(!s.is_empty()); - assert_eq!(s.len(), 1); - assert!(!s.is_nul_terminated()); - assert_eq!(s.as_str(), "A"); + assert_eq!(s.size(), 1); + assert_eq!(s.as_str(), Ok("A")); } #[test] @@ -188,9 +182,8 @@ mod tests { let s = TarFormatString::new([65, 0]); assert_eq!(size_of_val(&s), 2); assert!(!s.is_empty()); - assert_eq!(s.len(), 1); - assert!(s.is_nul_terminated()); - assert_eq!(s.as_str(), "A"); + assert_eq!(s.size(), 1); + assert_eq!(s.as_str(), Ok("A")); } #[test] @@ -203,49 +196,43 @@ mod tests { // Then the result is no change assert_eq!(size_of_val(&s), 20); assert!(s.is_empty()); - assert_eq!(s.len(), 0); - assert!(s.is_nul_terminated()); - assert_eq!(s.as_str(), ""); + assert_eq!(s.size(), 0); + assert_eq!(s.as_str(), Ok("")); // When adding ABC s.append(&TarFormatString::new([65, 66, 67])); // Then the string contains the additional 3 chars assert_eq!(size_of_val(&s), 20); assert!(!s.is_empty()); - assert_eq!(s.len(), 3); - assert!(s.is_nul_terminated()); - assert_eq!(s.as_str(), "ABC"); + assert_eq!(s.size(), 3); + assert_eq!(s.as_str(), Ok("ABC")); s.append(&TarFormatString::new([68, 69, 70])); // Then the string contains the additional 3 chars assert_eq!(size_of_val(&s), 20); assert!(!s.is_empty()); - assert_eq!(s.len(), 6); - assert!(s.is_nul_terminated()); - assert_eq!(s.as_str(), "ABCDEF"); + assert_eq!(s.size(), 6); + assert_eq!(s.as_str(), Ok("ABCDEF")); s.append(&TarFormatString::new([b'A'; 12])); // Then the string contains the additional 12 chars assert_eq!(size_of_val(&s), 20); assert!(!s.is_empty()); - assert_eq!(s.len(), 18); - assert!(s.is_nul_terminated()); - assert_eq!(s.as_str(), "ABCDEFAAAAAAAAAAAA"); + assert_eq!(s.size(), 18); + assert_eq!(s.as_str(), Ok("ABCDEFAAAAAAAAAAAA")); s.append(&TarFormatString::new([b'A'; 1])); // Then the string contains the additional 1 chars assert_eq!(size_of_val(&s), 20); assert!(!s.is_empty()); - assert_eq!(s.len(), 19); - assert!(s.is_nul_terminated()); - assert_eq!(s.as_str(), "ABCDEFAAAAAAAAAAAAA"); + assert_eq!(s.size(), 19); + assert_eq!(s.as_str(), Ok("ABCDEFAAAAAAAAAAAAA")); s.append(&TarFormatString::new([b'Z'; 1])); // Then the string contains the additional 1 char, is full and not null terminated assert_eq!(size_of_val(&s), 20); assert!(!s.is_empty()); - assert_eq!(s.len(), 20); - assert!(!s.is_nul_terminated()); - assert_eq!(s.as_str(), "ABCDEFAAAAAAAAAAAAAZ"); + assert_eq!(s.size(), 20); + assert_eq!(s.as_str(), Ok("ABCDEFAAAAAAAAAAAAAZ")); } } From 9c1cd95f6f52894673f5a148597feef47e5bd492 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 14:29:24 +0200 Subject: [PATCH 3/7] TypeFlag: add is_regular_file() --- src/header.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/header.rs b/src/header.rs index 94b1419..78ce63c 100644 --- a/src/header.rs +++ b/src/header.rs @@ -177,6 +177,14 @@ pub enum TypeFlag { XGLTYPE = b'g', } +impl TypeFlag { + /// Whether we have a regular file. + pub fn is_regular_file(self) -> bool { + // Equivalent. See spec. + self == Self::AREGTYPE || self == Self::REGTYPE + } +} + bitflags::bitflags! { /// UNIX file permissions in octal format. #[repr(transparent)] From 54d7c4851a5e16bad1ca38c90885be47edc962d7 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 15:17:50 +0200 Subject: [PATCH 4/7] ArchiveIterator: split into ArchiveHeaderIterator and ArchiveEntryIterator --- src/archive.rs | 201 ++++++++++++++++++++++++---------------- src/header.rs | 25 +++-- src/tar_format_types.rs | 17 +++- 3 files changed, 152 insertions(+), 91 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index 7dcc6d4..b9691bf 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -26,12 +26,12 @@ SOFTWARE. use crate::header::PosixHeader; use crate::tar_format_types::TarFormatString; -use crate::{TypeFlag, BLOCKSIZE, POSIX_1003_MAX_FILENAME_LEN}; +use crate::{BLOCKSIZE, POSIX_1003_MAX_FILENAME_LEN}; #[cfg(feature = "alloc")] use alloc::boxed::Box; use core::fmt::{Debug, Display, Formatter}; use core::str::Utf8Error; -use log::warn; +use log::{error, warn}; /// Describes an entry in an archive. /// Currently only supports files but no directories. @@ -124,9 +124,9 @@ impl TarArchive { /// Iterates over all entries of the Tar archive. /// Returns items of type [`ArchiveEntry`]. - /// See also [`ArchiveIterator`]. - pub fn entries(&self) -> ArchiveIterator { - ArchiveIterator::new(self.data.as_ref()) + /// See also [`ArchiveEntryIterator`]. + pub fn entries(&self) -> ArchiveEntryIterator { + ArchiveEntryIterator::new(self.data.as_ref()) } } @@ -164,116 +164,133 @@ impl<'a> TarArchiveRef<'a> { /// Iterates over all entries of the Tar archive. /// Returns items of type [`ArchiveEntry`]. - /// See also [`ArchiveIterator`]. - pub const fn entries(&self) -> ArchiveIterator { - ArchiveIterator::new(self.data) + /// See also [`ArchiveEntryIterator`]. + pub fn entries(&self) -> ArchiveEntryIterator { + ArchiveEntryIterator::new(self.data) } } -/// Iterator over the files of the archive. Each iteration starts -/// at the next Tar header entry. +/// Iterates over the headers of the Tar archive. #[derive(Debug)] -pub struct ArchiveIterator<'a> { +pub struct ArchiveHeaderIterator<'a> { archive_data: &'a [u8], block_index: usize, } -impl<'a> ArchiveIterator<'a> { - pub const fn new(archive: &'a [u8]) -> Self { +impl<'a> ArchiveHeaderIterator<'a> { + pub fn new(archive: &'a [u8]) -> Self { + assert!(!archive.is_empty()); + assert_eq!(archive.len() % BLOCKSIZE, 0); Self { archive_data: archive, block_index: 0, } } - /// Returns a reference to the next Header. - fn next_hdr(&self, block_index: usize) -> &'a PosixHeader { + /// Parse the memory at the given block as [`PosixHeader`]. + fn block_as_header(&self, block_index: usize) -> &'a PosixHeader { let hdr_ptr = &self.archive_data[block_index * BLOCKSIZE]; unsafe { (hdr_ptr as *const u8).cast::().as_ref() }.unwrap() } } -impl<'a> Iterator for ArchiveIterator<'a> { - type Item = ArchiveEntry<'a>; +type BlockIndex = usize; + +impl<'a> Iterator for ArchiveHeaderIterator<'a> { + type Item = (BlockIndex, &'a PosixHeader); + /// Returns the next header. Internally, it updates the necessary data + /// structures to not read the same header multiple times. + /// + /// This returns `None` if either no further headers are found or if a + /// header can't be parsed. fn next(&mut self) -> Option { - if self.block_index * BLOCKSIZE >= self.archive_data.len() { - warn!("Reached end of Tar archive data without finding zero/end blocks!"); - return None; - } + // TODO better check for two end zero blocks here? + assert!(self.block_index < self.archive_data.len() / BLOCKSIZE); - let mut hdr = self.next_hdr(self.block_index); - - loop { - // check if we found end of archive - if hdr.is_zero_block() { - let next_hdr = self.next_hdr(self.block_index + 1); - if next_hdr.is_zero_block() { - // gracefully terminated Archive - log::debug!("End of Tar archive with two zero blocks!"); - } else { - log::warn!( - "Zero block found at end of Tar archive, but only one instead of two!" - ); - } - // end of archive - return None; - } + let hdr = self.block_as_header(self.block_index); + let block_index = self.block_index; - // Ignore directory entries, i.e. yield only regular files. Works as - // filenames in tarballs are fully specified, e.g. dirA/dirB/file1 - if hdr.typeflag != TypeFlag::DIRTYPE { - break; - } + // Start at next block on next iteration. + self.block_index += 1; + log::info!("{:#?}, {:#?}", hdr.name, hdr.typeflag); - // in next iteration: start at next Archive entry header - // +1 for current hdr block itself + all data blocks - let data_block_count: usize = hdr.payload_block_count().unwrap(); - self.block_index += data_block_count + 1; - hdr = self.next_hdr(self.block_index); + let block_count = hdr + .payload_block_count() + .inspect_err(|e| { + log::error!("Unparsable size ({e:?}) in header {hdr:#?}"); + }) + .ok()?; + + if !hdr.is_zero_block() { + self.block_index += block_count; } - if hdr.typeflag != TypeFlag::AREGTYPE && hdr.typeflag != TypeFlag::REGTYPE { - log::warn!( - "Found entry of type={:?}, but only files are supported", + Some((block_index, hdr)) + } +} + +impl<'a> ExactSizeIterator for ArchiveEntryIterator<'a> {} + +/// Iterator over the files of the archive. +#[derive(Debug)] +pub struct ArchiveEntryIterator<'a>(ArchiveHeaderIterator<'a>); + +impl<'a> ArchiveEntryIterator<'a> { + pub fn new(archive: &'a [u8]) -> Self { + Self(ArchiveHeaderIterator::new(archive)) + } + + fn next_hdr(&mut self) -> Option<(BlockIndex, &'a PosixHeader)> { + self.0.next() + } +} + +impl<'a> Iterator for ArchiveEntryIterator<'a> { + type Item = ArchiveEntry<'a>; + + fn next(&mut self) -> Option { + let (mut block_index, mut hdr) = self.next_hdr()?; + + // Ignore directory entries, i.e. yield only regular files. Works as + // filenames in tarballs are fully specified, e.g. dirA/dirB/file1 + while !hdr.typeflag.is_regular_file() { + warn!( + "Skipping entry of type {:?} (not supported yet)", hdr.typeflag ); - return None; - } - if hdr.name.is_empty() { - warn!("Found empty file name",); + // Update properties. + (block_index, hdr) = self.next_hdr()?; } - let hdr_size = hdr.size.as_number::(); - if let Err(e) = hdr_size { - warn!("Can't parse the file size from the header block. Stop iterating Tar archive. {e:#?}"); - return None; + // check if we found end of archive (two zero blocks) + if hdr.is_zero_block() { + if self.next_hdr()?.1.is_zero_block() { + // found end + return None; + } else { + panic!("Never expected to have a situation where self.next_hdr() returns a zero block and the next one is not a zero block, as we should never point to an 'end zero block of a regular file'"); + } } - let hdr_size = hdr_size.unwrap(); - - // Fetch data of file from next block(s). - // .unwrap() is fine as we checked that hdr.size().val() is valid - // above - let data_block_count = hdr.payload_block_count().unwrap(); - - // +1: skip hdr block itself and start at data! - // i_begin is the byte begin index of this file in the array of the whole archive - let i_begin = (self.block_index + 1) * BLOCKSIZE; - // i_end is the exclusive byte end index of the data of the current file - let i_end = i_begin + data_block_count * BLOCKSIZE; - let file_block_bytes = &self.archive_data[i_begin..i_end]; - // Each block is 512 bytes long, but the file size is not necessarily a - // multiple of 512. - let file_bytes = &file_block_bytes[0..hdr_size]; - - // in next iteration: start at next Archive entry header - // +1 for current hdr block itself + all data blocks - self.block_index += data_block_count + 1; + + let payload_size: usize = hdr + .size + .as_number() + .inspect_err(|e| error!("Can't parse the file size from the header. {e:#?}")) + .ok()?; + + let idx_first_data_block = block_index + 1; + let idx_begin = idx_first_data_block * BLOCKSIZE; + let idx_end_exclusive = idx_begin + payload_size; + let file_bytes = &self.0.archive_data[idx_begin..idx_end_exclusive]; let mut filename: TarFormatString<256> = TarFormatString::::new([0; POSIX_1003_MAX_FILENAME_LEN]); - if hdr.magic.as_str().unwrap() == "ustar" && hdr.version.as_str().unwrap() == "00" && !hdr.prefix.is_empty() { + if hdr.magic.as_str().unwrap() == "ustar" + && hdr.version.as_str().unwrap() == "00" + && !hdr.prefix.is_empty() + { filename.append(&hdr.prefix); filename.append(&TarFormatString::<1>::new([b'/'])); } @@ -302,6 +319,30 @@ mod tests { }; } + #[test] + fn test_header_iterator() { + let archive = include_bytes!("../tests/gnu_tar_default.tar"); + let iter = ArchiveHeaderIterator::new(archive); + let names = iter + .map(|(_i, hdr)| hdr.name.as_str().unwrap()) + .collect::>(); + assert_eq!( + names.as_slice(), + &[ + "bye_world_513b.txt", + "hello_world_513b.txt", + "hello_world.txt", + ] + ) + + /*for hdr in iter { + dbg!(hdr); + }*/ + + // TODO make PartialEq + //assert_eq!(ArchiveHeaderIterator::new(archive).collect::>().as_slice(), &[]); + } + #[test] fn test_archive_list() { let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar")).unwrap(); diff --git a/src/header.rs b/src/header.rs index 78ce63c..efb0dc7 100644 --- a/src/header.rs +++ b/src/header.rs @@ -72,6 +72,7 @@ impl Debug for Mode { /// This is also mostly compatible with the "Ustar"-header and the "GNU format". /// Because this library only needs to fetch data and filename, we don't need /// further checks. +// TODO make PartialEq? #[derive(Debug, Copy, Clone)] #[repr(C, packed)] pub struct PosixHeader { @@ -107,14 +108,12 @@ impl PosixHeader { /// content. Returns an error, if the file size can't be parsed from the /// header. pub fn payload_block_count(&self) -> Result { - let div = self.size.as_number::()? / BLOCKSIZE; - let modulo = self.size.as_number::()? % BLOCKSIZE; - let block_count = if modulo > 0 { div + 1 } else { div }; - Ok(block_count) + let parsed_size = self.size.as_number::()?; + Ok(parsed_size.div_ceil(BLOCKSIZE)) } - /// A Tar archive is terminated, if a end-of-archive entry, which consists of two 512 blocks - /// of zero bytes, is found. + /// A Tar archive is terminated, if an end-of-archive entry, which consists + /// of two 512 blocks of zero bytes, is found. pub fn is_zero_block(&self) -> bool { let ptr = self as *const Self as *const u8; let self_bytes = unsafe { core::slice::from_raw_parts(ptr, BLOCKSIZE) }; @@ -223,17 +222,25 @@ mod tests { use crate::BLOCKSIZE; use std::mem::size_of; - /// Casts the bytes to a reference to a PosixhHeader. - fn bytes_to_archive(bytes: &[u8]) -> &PosixHeader { - unsafe { (bytes.as_ptr() as *const PosixHeader).as_ref() }.unwrap() + /// Returns the PosixHeader at the beginning of the Tar archive. + fn bytes_to_archive(tar_archive_data: &[u8]) -> &PosixHeader { + unsafe { (tar_archive_data.as_ptr() as *const PosixHeader).as_ref() }.unwrap() } #[test] fn test_display_header() { let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar")); + assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt")); println!("{:#?}'", archive); } + #[test] + fn test_payload_block_count() { + // first file is "bye_world_513b.txt" => we expect two data blocks + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar")); + assert_eq!(archive.payload_block_count(), Ok(2)); + } + #[test] fn test_show_tar_header_magics() { let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar")); diff --git a/src/tar_format_types.rs b/src/tar_format_types.rs index 6ffe875..7630b67 100644 --- a/src/tar_format_types.rs +++ b/src/tar_format_types.rs @@ -14,7 +14,7 @@ use num_traits::Num; /// 2. A partially populated string where the unused bytes are zero. /// /// The content is likely to be UTF-8/ASCII, but that is not verified by this -/// type. +/// type. The #[derive(Copy, Clone)] #[repr(C)] pub struct TarFormatString { @@ -82,7 +82,7 @@ impl Debug for TarFormatString { } } -/// A number. Trailing spaces in the string are ignored. +/// A number with a specified base. Trailing spaces in the string are ignored. #[derive(Copy, Clone)] #[repr(C)] pub struct TarFormatNumber(TarFormatString); @@ -112,6 +112,11 @@ impl TarFormatNumber { }, ) } + + /// Returns the raw string describing this type. + pub fn as_raw_str(&self) -> core::result::Result<&str, Utf8Error> { + self.0.as_str() + } } impl Debug for TarFormatNumber { @@ -143,6 +148,10 @@ impl TarFormatDecimal { { self.0.as_number::() } + + pub fn as_raw_str(&self) -> core::result::Result<&str, Utf8Error> { + self.0.as_raw_str() + } } impl TarFormatOctal { @@ -152,6 +161,10 @@ impl TarFormatOctal { { self.0.as_number::() } + + pub fn as_raw_str(&self) -> core::result::Result<&str, Utf8Error> { + self.0.as_raw_str() + } } mod tests { From 34f0e56da712d961072cf2274278d82903e5d21d Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 15:20:09 +0200 Subject: [PATCH 5/7] miri: fix memory issue --- src/archive.rs | 10 ++++++++-- src/lib.rs | 4 ---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index b9691bf..f57f771 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -189,8 +189,14 @@ impl<'a> ArchiveHeaderIterator<'a> { /// Parse the memory at the given block as [`PosixHeader`]. fn block_as_header(&self, block_index: usize) -> &'a PosixHeader { - let hdr_ptr = &self.archive_data[block_index * BLOCKSIZE]; - unsafe { (hdr_ptr as *const u8).cast::().as_ref() }.unwrap() + unsafe { + self.archive_data + .as_ptr() + .add(block_index * BLOCKSIZE) + .cast::() + .as_ref() + .unwrap() + } } } diff --git a/src/lib.rs b/src/lib.rs index 691fe64..f25fef6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,10 +47,6 @@ SOFTWARE. //! ```rust //! use tar_no_std::TarArchiveRef; //! -//! // log: not mandatory -//! std::env::set_var("RUST_LOG", "trace"); -//! env_logger::init(); -//! //! // also works in no_std environment (except the println!, of course) //! let archive = include_bytes!("../tests/gnu_tar_default.tar"); //! let archive = TarArchiveRef::new(archive).unwrap(); From 0e6f5a9f08ffdf550e35b2433864bfb8d5cab5f8 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 15:24:52 +0200 Subject: [PATCH 6/7] ci: run miri --- .github/workflows/rust.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 4be95ef..77c3c4a 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -36,6 +36,29 @@ jobs: - name: Run tests run: cargo test --verbose --features alloc + miri: + runs-on: "${{ matrix.runs-on }}" + needs: + # Logical dependency and wait for cache to be present + - build + strategy: + matrix: + runs-on: + - ubuntu-latest + rust: + - nightly + steps: + - uses: actions/checkout@v2 + - name: Setup Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: "${{ matrix.rust }}" + - uses: Swatinem/rust-cache@v2 + with: + key: "${{ matrix.runs-on }}-${{ matrix.rust }}" + - run: rustup component add miri + - run: cargo miri test --tests + style_checks: runs-on: ubuntu-latest strategy: From a0e58fc047c9c3e1a0a80d951cc6d09e201e5c41 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 15:26:45 +0200 Subject: [PATCH 7/7] PosixHeader: derive PartialEq and Eq --- src/header.rs | 5 ++--- src/tar_format_types.rs | 8 ++++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/header.rs b/src/header.rs index efb0dc7..696fd42 100644 --- a/src/header.rs +++ b/src/header.rs @@ -41,7 +41,7 @@ pub enum ModeError { } /// Wrapper around the UNIX file permissions given in octal ASCII. -#[derive(Copy, Clone)] +#[derive(Copy, Clone, PartialEq, Eq)] #[repr(transparent)] pub struct Mode(TarFormatOctal<8>); @@ -72,8 +72,7 @@ impl Debug for Mode { /// This is also mostly compatible with the "Ustar"-header and the "GNU format". /// Because this library only needs to fetch data and filename, we don't need /// further checks. -// TODO make PartialEq? -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[repr(C, packed)] pub struct PosixHeader { pub name: TarFormatString, diff --git a/src/tar_format_types.rs b/src/tar_format_types.rs index 7630b67..53d51c2 100644 --- a/src/tar_format_types.rs +++ b/src/tar_format_types.rs @@ -15,7 +15,7 @@ use num_traits::Num; /// /// The content is likely to be UTF-8/ASCII, but that is not verified by this /// type. The -#[derive(Copy, Clone)] +#[derive(Copy, Clone, PartialEq, Eq)] #[repr(C)] pub struct TarFormatString { bytes: [u8; N], @@ -83,17 +83,17 @@ impl Debug for TarFormatString { } /// A number with a specified base. Trailing spaces in the string are ignored. -#[derive(Copy, Clone)] +#[derive(Copy, Clone, PartialEq, Eq)] #[repr(C)] pub struct TarFormatNumber(TarFormatString); /// An octal number. Trailing spaces in the string are ignored. -#[derive(Copy, Clone)] +#[derive(Copy, Clone, PartialEq, Eq)] #[repr(C)] pub struct TarFormatOctal(TarFormatNumber); /// A decimal number. Trailing spaces in the string are ignored. -#[derive(Copy, Clone)] +#[derive(Copy, Clone, PartialEq, Eq)] #[repr(C)] pub struct TarFormatDecimal(TarFormatNumber);