From 14097e009b191b23cd00d712b5ffb616c60145a1 Mon Sep 17 00:00:00 2001 From: Andy Schneider Date: Fri, 3 May 2024 09:27:56 +0100 Subject: [PATCH] Add 256 filename support (prefix + name) and add support for space terminated numbers and non-null terminated names (#10) * Add 256 filename support (prefix + name) and add support for space terminated numbers. * Format check passes. Add missing file. * Act on clippy issues found * fx nit and idiomatic conditional compile * Clippy is wrong as from_utf8 is not yet stable for const fn usage. * Move long or deep filenames for tar file creation into a tar file. Normally these aren't needed as the .tar files are used in the source. The tests themselves note what files they depend on. * Upgrade MSVR as there are dependencies in the tree where SemVer is upgrading them past the Rust 1.60 compiler. Example from an indirectly dependent crate on Linux: error: package `rustix v0.38.11` cannot be built because it requires rustc 1.63 or newer, while the currently active rustc version is 1.60.0 --- .github/workflows/rust.yml | 4 +- Cargo.toml | 15 +- README.md | 5 +- examples/alloc_feature.rs | 2 +- src/archive.rs | 162 +++++++++--- src/header.rs | 138 ++-------- src/lib.rs | 8 +- src/tar_format_types.rs | 250 ++++++++++++++++++ tests/README.md | 2 + tests/gnu_tar_default_with_dir.tar | Bin 0 -> 10240 bytes tests/gnu_tar_ustar_deep.tar | Bin 0 -> 10240 bytes tests/gnu_tar_ustar_long.tar | Bin 0 -> 10240 bytes tests/mac_tar_ustar_with_dir.tar | Bin 0 -> 5632 bytes ...ontent_subset_for_recreating_tar_files.tar | Bin 0 -> 15360 bytes 14 files changed, 429 insertions(+), 157 deletions(-) create mode 100644 src/tar_format_types.rs create mode 100644 tests/README.md create mode 100644 tests/gnu_tar_default_with_dir.tar create mode 100644 tests/gnu_tar_ustar_deep.tar create mode 100644 tests/gnu_tar_ustar_long.tar create mode 100644 tests/mac_tar_ustar_with_dir.tar create mode 100644 tests/tar_content_subset_for_recreating_tar_files.tar diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index dbe04a2..6d83c74 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -16,7 +16,7 @@ jobs: rust: - stable - nightly - - 1.60.0 # MSVR + - 1.63.0 # MSVR steps: - uses: actions/checkout@v2 # Important preparation step: override the latest default Rust version in GitHub CI @@ -41,7 +41,7 @@ jobs: strategy: matrix: rust: - - 1.60.0 + - 1.63.0 steps: - uses: actions/checkout@v2 # Important preparation step: override the latest default Rust version in GitHub CI diff --git a/Cargo.toml b/Cargo.toml index 6dcf169..8507748 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,9 +3,9 @@ name = "tar-no-std" description = """ Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations. The crate is simple and only supports reading of "basic" archives, therefore no extensions, such -as GNU Longname. The maximum supported file name length is 100 characters including the NULL-byte. -The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat -collections of files. +as GNU Longname. The maximum supported file name length is 256 characters excluding the NULL-byte +(using the tar name/prefix longname implementation).The maximum supported file size is 8GiB. +Directories are supported, but only regular fields are yielded in iteration. """ version = "0.2.0" edition = "2021" @@ -24,10 +24,15 @@ resolver = "2" default = [] alloc = [] +[[example]] +name = "alloc_feature" +required-features = ["alloc"] + [dependencies] -arrayvec = { version = "0.7", default-features = false } bitflags = "2.0" log = { version = "0.4", default-features = false } +memchr = { version = "2.6.3", default-features = false } +num-traits = { version = "0.2.16", default-features = false } [dev-dependencies] -env_logger = "0.10" +env_logger = "0.10" \ No newline at end of file diff --git a/README.md b/README.md index 7a27cfd..79d1b99 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,8 @@ environment and need full feature support, I recommend the use of { - filename: ArrayString, + filename: TarFormatString, data: &'a [u8], size: usize, } #[allow(unused)] impl<'a> ArchiveEntry<'a> { - const fn new(filename: ArrayString, data: &'a [u8]) -> Self { + const fn new(filename: TarFormatString, data: &'a [u8]) -> Self { ArchiveEntry { filename, data, @@ -53,7 +53,7 @@ impl<'a> ArchiveEntry<'a> { /// Filename of the entry with a maximum of 100 characters (including the /// terminating NULL-byte). - pub const fn filename(&self) -> ArrayString<{ FILENAME_MAX_LEN }> { + pub const fn filename(&self) -> TarFormatString<{ POSIX_1003_MAX_FILENAME_LEN }> { self.filename } @@ -63,6 +63,7 @@ impl<'a> ArchiveEntry<'a> { } /// Data of the file as string slice, if data is valid UTF-8. + #[allow(clippy::missing_const_for_fn)] pub fn data_as_str(&self) -> Result<&'a str, Utf8Error> { core::str::from_utf8(self.data) } @@ -192,19 +193,35 @@ impl<'a> Iterator for ArchiveIterator<'a> { return None; } - let hdr = self.next_hdr(self.block_index); + let mut hdr = self.next_hdr(self.block_index); + + loop { + // check if we found end of archive + if hdr.is_zero_block() { + let next_hdr = self.next_hdr(self.block_index + 1); + if next_hdr.is_zero_block() { + // gracefully terminated Archive + log::debug!("End of Tar archive with two zero blocks!"); + } else { + log::warn!( + "Zero block found at end of Tar archive, but only one instead of two!" + ); + } + // end of archive + return None; + } - // check if we found end of archive - if hdr.is_zero_block() { - let next_hdr = self.next_hdr(self.block_index + 1); - if next_hdr.is_zero_block() { - // gracefully terminated Archive - log::debug!("End of Tar archive with two zero blocks!"); - } else { - log::warn!("Zero block found at end of Tar archive, but only one instead of two!"); + // Ignore directory entries, i.e. yield only regular files. Works as + // filenames in tarballs are fully specified, e.g. dirA/dirB/file1 + if hdr.typeflag != TypeFlag::DIRTYPE { + break; } - // end of archive - return None; + + // in next iteration: start at next Archive entry header + // +1 for current hdr block itself + all data blocks + let data_block_count: usize = hdr.payload_block_count().unwrap(); + self.block_index += data_block_count + 1; + hdr = self.next_hdr(self.block_index); } if hdr.typeflag != TypeFlag::AREGTYPE && hdr.typeflag != TypeFlag::REGTYPE { @@ -219,7 +236,7 @@ impl<'a> Iterator for ArchiveIterator<'a> { warn!("Found empty file name",); } - let hdr_size = hdr.size.val(); + let hdr_size = hdr.size.as_number::(); if let Err(e) = hdr_size { warn!("Can't parse the file size from the header block. Stop iterating Tar archive. {e:#?}"); return None; @@ -245,10 +262,13 @@ impl<'a> Iterator for ArchiveIterator<'a> { // +1 for current hdr block itself + all data blocks self.block_index += data_block_count + 1; - let filename = ArrayString::from_str(hdr.name.as_string().as_str()); - // .unwrap is fine as the capacity is MUST be ok. - let filename = filename.unwrap(); - + let mut filename: TarFormatString<256> = + TarFormatString::::new([0; POSIX_1003_MAX_FILENAME_LEN]); + if hdr.magic.as_str() == "ustar" && hdr.version.as_str() == "00" && !hdr.prefix.is_empty() { + filename.append(&hdr.prefix); + filename.append(&TarFormatString::<1>::new([b'/'])); + } + filename.append(&hdr.name); Some(ArchiveEntry::new(filename, file_bytes)) } } @@ -264,7 +284,6 @@ mod tests { let entries = archive.entries().collect::>(); println!("{:#?}", entries); } - /// Tests to read the entries from existing archives in various Tar flavors. #[test] fn test_archive_entries() { @@ -299,6 +318,54 @@ mod tests { assert_archive_content(&entries); } + /// Tests to read the entries from an existing tarball with a directory in it + #[test] + fn test_archive_with_long_dir_entries() { + // tarball created with: + // $ cd tests; gtar --format=ustar -cf gnu_tar_ustar_long.tar 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ + let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar_long.tar")); + let entries = archive.entries().collect::>(); + + assert_eq!(entries.len(), 2); + // Maximum length of a directory and name when the directory itself is tar'd + assert_entry_content(&entries[0], "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ", 7); + // Maximum length of a directory and name when only the file is tar'd. + assert_entry_content(&entries[1], "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ", 7); + } + + #[test] + fn test_archive_with_deep_dir_entries() { + // tarball created with: + // $ cd tests; gtar --format=ustar -cf gnu_tar_ustar_deep.tar 0123456789 + let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar_deep.tar")); + let entries = archive.entries().collect::>(); + + assert_eq!(entries.len(), 1); + assert_entry_content(&entries[0], "0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/empty", 0); + } + + #[test] + fn test_archive_with_dir_entries() { + // tarball created with: + // $ gtar -cf tests/gnu_tar_default_with_dir.tar --exclude '*.tar' --exclude '012345678*' tests + { + let archive = + TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default_with_dir.tar")); + let entries = archive.entries().collect::>(); + + assert_archive_with_dir_content(&entries); + } + + // tarball created with: + // $(osx) tar -cf tests/mac_tar_ustar_with_dir.tar --format=ustar --exclude '*.tar' --exclude '012345678*' tests + { + let archive = TarArchiveRef::new(include_bytes!("../tests/mac_tar_ustar_with_dir.tar")); + let entries = archive.entries().collect::>(); + + assert_archive_with_dir_content(&entries); + } + } + /// Like [`test_archive_entries`] but with additional `alloc` functionality. #[cfg(feature = "alloc")] #[test] @@ -314,15 +381,20 @@ mod tests { assert_eq!(data, archive.into()); } + /// Test that the entry's contents match the expected content. + fn assert_entry_content(entry: &ArchiveEntry, filename: &str, size: usize) { + assert_eq!(entry.filename().as_str(), filename); + assert_eq!(entry.size(), size); + assert_eq!(entry.data().len(), size); + } + /// Tests that the parsed archive matches the expected order. The tarballs /// the tests directory were created once by me with files in the order /// specified in this test. fn assert_archive_content(entries: &[ArchiveEntry]) { assert_eq!(entries.len(), 3); - assert_eq!(entries[0].filename().as_str(), "bye_world_513b.txt"); - assert_eq!(entries[0].size(), 513); - assert_eq!(entries[0].data().len(), 513); + assert_entry_content(&entries[0], "bye_world_513b.txt", 513); assert_eq!( entries[0].data_as_str().expect("Should be valid UTF-8"), // .replace: Ensure that the test also works on Windows @@ -331,22 +403,48 @@ mod tests { // Test that an entry that needs two 512 byte data blocks is read // properly. - assert_eq!(entries[1].filename().as_str(), "hello_world_513b.txt"); - assert_eq!(entries[1].size(), 513); - assert_eq!(entries[1].data().len(), 513); + assert_entry_content(&entries[1], "hello_world_513b.txt", 513); assert_eq!( entries[1].data_as_str().expect("Should be valid UTF-8"), // .replace: Ensure that the test also works on Windows include_str!("../tests/hello_world_513b.txt").replace("\r\n", "\n") ); - assert_eq!(entries[2].filename().as_str(), "hello_world.txt"); - assert_eq!(entries[2].size(), 12); - assert_eq!(entries[2].data().len(), 12); + assert_entry_content(&entries[2], "hello_world.txt", 12); assert_eq!( entries[2].data_as_str().expect("Should be valid UTF-8"), "Hello World\n", "file content must match" ); } + + /// Tests that the parsed archive matches the expected order and the filename includes + /// the directory name. The tarballs the tests directory were created once by me with files + /// in the order specified in this test. + fn assert_archive_with_dir_content(entries: &[ArchiveEntry]) { + assert_eq!(entries.len(), 3); + + assert_entry_content(&entries[0], "tests/hello_world.txt", 12); + assert_eq!( + entries[0].data_as_str().expect("Should be valid UTF-8"), + "Hello World\n", + "file content must match" + ); + + // Test that an entry that needs two 512 byte data blocks is read + // properly. + assert_entry_content(&entries[1], "tests/bye_world_513b.txt", 513); + assert_eq!( + entries[1].data_as_str().expect("Should be valid UTF-8"), + // .replace: Ensure that the test also works on Windows + include_str!("../tests/bye_world_513b.txt").replace("\r\n", "\n") + ); + + assert_entry_content(&entries[2], "tests/hello_world_513b.txt", 513); + assert_eq!( + entries[2].data_as_str().expect("Should be valid UTF-8"), + // .replace: Ensure that the test also works on Windows + include_str!("../tests/hello_world_513b.txt").replace("\r\n", "\n") + ); + } } diff --git a/src/header.rs b/src/header.rs index aadb6a6..2983ea2 100644 --- a/src/header.rs +++ b/src/header.rs @@ -30,31 +30,10 @@ SOFTWARE. #![allow(non_upper_case_globals)] -use crate::{BLOCKSIZE, FILENAME_MAX_LEN}; -use arrayvec::ArrayString; +use crate::{TarFormatDecimal, TarFormatOctal, TarFormatString, BLOCKSIZE, NAME_LEN, PREFIX_LEN}; use core::fmt::{Debug, Formatter}; use core::num::ParseIntError; -/// The file size is encoded as octal ASCII number inside a Tar header. -#[derive(Copy, Clone)] -#[repr(transparent)] -pub struct Size(StaticCString<12>); - -impl Size { - /// Returns the octal ASCII number as actual size in bytes. - pub fn val(&self) -> Result { - usize::from_str_radix(self.0.as_string().as_str(), 8) - } -} - -impl Debug for Size { - fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { - let mut debug = f.debug_tuple("Size"); - debug.field(&self.val()); - debug.finish() - } -} - #[derive(Debug)] pub enum ModeError { ParseInt(ParseIntError), @@ -64,14 +43,12 @@ pub enum ModeError { /// Wrapper around the UNIX file permissions given in octal ASCII. #[derive(Copy, Clone)] #[repr(transparent)] -pub struct Mode(StaticCString<8>); +pub struct Mode(TarFormatOctal<8>); impl Mode { /// Parses the [`ModeFlags`] from the mode string. pub fn to_flags(self) -> Result { - let octal_number_str = self.0.as_string(); - let bits = - u64::from_str_radix(octal_number_str.as_str(), 8).map_err(ModeError::ParseInt)?; + let bits = self.0.as_number::().map_err(ModeError::ParseInt)?; ModeFlags::from_bits(bits).ok_or(ModeError::IllegalMode) } } @@ -84,60 +61,6 @@ impl Debug for Mode { } } -/// A C-String that is stored in a static array. There is always a terminating -/// NULL-byte. -/// -/// The content is likely to be UTF-8/ASCII, but that is not verified by this -/// type. -#[derive(Copy, Clone)] -#[repr(transparent)] -pub struct StaticCString([u8; N]); - -#[allow(unused)] -impl StaticCString { - /// Constructor. - const fn new(bytes: [u8; N]) -> Self { - Self(bytes) - } - - /// Returns the length of the string without NULL-byte. - pub fn len(&self) -> usize { - // not as efficient as it could be but negligible - self.as_string().len() - } - - /// Returns if the string without NULL-byte is empty. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns a string that includes all characters until the first null. - pub fn as_string(&self) -> ArrayString { - let mut string = ArrayString::new(); - self.0 - .clone() - .iter() - .copied() - // Take all chars until the terminating null. - .take_while(|byte| *byte != 0) - .for_each(|byte| string.push(byte as char)); - string - } -} - -impl Debug for StaticCString { - fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { - let mut debug = f.debug_tuple("Name"); - let str = self.as_string(); - if str.is_empty() { - debug.field(&""); - } else { - debug.field(&str); - } - debug.finish() - } -} - /// Header of the TAR format as specified by POSIX (POSIX 1003.1-1990. /// "New" (version?) GNU Tar versions use this archive format by default. /// (). @@ -152,33 +75,29 @@ impl Debug for StaticCString { #[derive(Debug, Copy, Clone)] #[repr(C, packed)] pub struct PosixHeader { - /// Name. There is always a null byte, therefore - /// the max len is 99. - pub name: StaticCString<{ FILENAME_MAX_LEN }>, + pub name: TarFormatString, pub mode: Mode, - pub uid: [u8; 8], - pub gid: [u8; 8], + pub uid: TarFormatOctal<8>, + pub gid: TarFormatOctal<8>, // confusing; size is stored as ASCII string - pub size: Size, - pub mtime: [u8; 12], - pub cksum: [u8; 8], + pub size: TarFormatOctal<12>, + pub mtime: TarFormatDecimal<12>, + pub cksum: TarFormatOctal<8>, pub typeflag: TypeFlag, /// Name. There is always a null byte, therefore /// the max len is 99. - pub linkname: StaticCString<{ FILENAME_MAX_LEN }>, - pub magic: StaticCString<6>, - pub version: StaticCString<2>, + pub linkname: TarFormatString, + pub magic: TarFormatString<6>, + pub version: TarFormatString<2>, /// Username. There is always a null byte, therefore /// the max len is N-1. - pub uname: StaticCString<32>, + pub uname: TarFormatString<32>, /// Groupname. There is always a null byte, therefore /// the max len is N-1. - pub gname: StaticCString<32>, - pub dev_major: [u8; 8], - pub dev_minor: [u8; 8], - /// There is always a null byte, therefore - /// the max len is N-1. - pub prefix: StaticCString<155>, + pub gname: TarFormatString<32>, + pub dev_major: TarFormatOctal<8>, + pub dev_minor: TarFormatOctal<8>, + pub prefix: TarFormatString, // padding => to BLOCKSIZE bytes pub _pad: [u8; 12], } @@ -188,8 +107,8 @@ impl PosixHeader { /// content. Returns an error, if the file size can't be parsed from the /// header. pub fn payload_block_count(&self) -> Result { - let div = self.size.val()? / BLOCKSIZE; - let modulo = self.size.val()? % BLOCKSIZE; + let div = self.size.as_number::()? / BLOCKSIZE; + let modulo = self.size.as_number::()? % BLOCKSIZE; let block_count = if modulo > 0 { div + 1 } else { div }; Ok(block_count) } @@ -292,7 +211,7 @@ bitflags::bitflags! { #[cfg(test)] mod tests { - use crate::header::{PosixHeader, StaticCString, TypeFlag}; + use crate::header::{PosixHeader, TypeFlag}; use crate::BLOCKSIZE; use std::mem::size_of; @@ -354,7 +273,7 @@ mod tests { TypeFlag::REGTYPE, "the first entry is a regular file!" ); - assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); + assert_eq!(archive.name.as_str(), "bye_world_513b.txt"); let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_gnu.tar")); assert_eq!( @@ -362,7 +281,7 @@ mod tests { TypeFlag::REGTYPE, "the first entry is a regular file!" ); - assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); + assert_eq!(archive.name.as_str(), "bye_world_513b.txt"); let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_oldgnu.tar")); assert_eq!( @@ -370,7 +289,7 @@ mod tests { TypeFlag::REGTYPE, "the first entry is a regular file!" ); - assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); + assert_eq!(archive.name.as_str(), "bye_world_513b.txt"); /* UNSUPPORTED YET. Uses extensions.. let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_pax.tar")); @@ -388,7 +307,7 @@ mod tests { TypeFlag::REGTYPE, "the first entry is a regular file!" ); - assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); + assert_eq!(archive.name.as_str(), "bye_world_513b.txt"); let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_v7.tar")); // ARegType: legacy @@ -397,18 +316,11 @@ mod tests { TypeFlag::AREGTYPE, "the first entry is a regular file!" ); - assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); + assert_eq!(archive.name.as_str(), "bye_world_513b.txt"); } #[test] fn test_size() { assert_eq!(BLOCKSIZE, size_of::()); } - - #[test] - fn test_static_str() { - let str = StaticCString::new(*b"0000633\0"); - assert_eq!(str.len(), 7); - assert_eq!(str.as_string().as_str(), "0000633"); - } } diff --git a/src/lib.rs b/src/lib.rs index fbaefbd..c7f8caa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -90,10 +90,16 @@ extern crate alloc; /// Each Archive Entry (either Header or Data Block) is a block of 512 bytes. const BLOCKSIZE: usize = 512; /// Maximum filename length of the base Tar format including the terminating NULL-byte. -const FILENAME_MAX_LEN: usize = 100; +const NAME_LEN: usize = 100; +/// Maximum long filename length of the base Tar format including the prefix +const POSIX_1003_MAX_FILENAME_LEN: usize = 256; +/// Maximum length of the prefix in Posix tar format +const PREFIX_LEN: usize = 155; mod archive; mod header; +mod tar_format_types; pub use archive::*; pub use header::*; +pub use tar_format_types::*; diff --git a/src/tar_format_types.rs b/src/tar_format_types.rs new file mode 100644 index 0000000..8b7ed4a --- /dev/null +++ b/src/tar_format_types.rs @@ -0,0 +1,250 @@ +#![allow(unused_imports)] + +use core::fmt::{Debug, Formatter}; +use core::num::ParseIntError; +use core::ptr::copy_nonoverlapping; +use core::str::from_utf8; +use num_traits::Num; + +/// An optionally null terminated string. The contents are either: +/// 1. A fully populated string with no null termination or +/// 2. A partially populated string where the unused bytes are zero. +/// +/// The content is likely to be UTF-8/ASCII, but that is not verified by this +/// type. +#[derive(Copy, Clone)] +#[repr(C)] +pub struct TarFormatString { + bytes: [u8; N], +} + +/// A Tar format string is a fixed length byte array containing UTF-8 bytes. +/// This string will be null terminated if it doesn't fill the entire array. +impl TarFormatString { + /// Constructor. + pub fn new(bytes: [u8; N]) -> Self { + if N == 0 { + panic!("Array cannot be zero length"); + } + Self { bytes } + } + + /// True if the is string empty (ignoring NULL bytes). + pub const fn is_empty(&self) -> bool { + self.bytes[0] == 0 + } + + // True if the string is NULL terminated + pub const fn is_nul_terminated(&self) -> bool { + self.bytes[N - 1] == 0 + } + + /// Returns the length of the string (ignoring NULL bytes). + pub fn len(&self) -> usize { + if self.is_nul_terminated() { + memchr::memchr(0, &self.bytes).unwrap() + } else { + N + } + } + + /// Returns a str ref without NULL bytes. Panics if the string is not valid UTF-8. + pub fn as_str(&self) -> &str { + from_utf8(&self.bytes[0..self.len()]).expect("byte array is not UTF-8") + } + + /// Append to end of string. Panics if there is not enough capacity. + pub fn append(&mut self, other: &TarFormatString) { + let resulting_length = self.len() + other.len(); + if resulting_length > N { + panic!("Result to long for capacity {}", N); + } + + unsafe { + let dst = self.bytes.as_mut_ptr().add(self.len()); + let src = other.bytes.as_ptr(); + copy_nonoverlapping(src, dst, other.len()); + } + + if resulting_length < N { + self.bytes[resulting_length] = 0; + } + } +} + +impl Debug for TarFormatString { + fn fmt(&self, f: &mut Formatter) -> core::fmt::Result { + let sub_array = &self.bytes[0..self.len()]; + write!( + f, + "{},{} of {},{}", + from_utf8(sub_array).unwrap(), + self.len(), + N, + self.is_nul_terminated() + ) + } +} + +/// A number. Trailing spaces in the string are ignored. +#[derive(Copy, Clone)] +#[repr(C)] +pub struct TarFormatNumber(TarFormatString); + +/// An octal number. Trailing spaces in the string are ignored. +#[derive(Copy, Clone)] +#[repr(C)] +pub struct TarFormatOctal(TarFormatNumber); + +/// A decimal number. Trailing spaces in the string are ignored. +#[derive(Copy, Clone)] +#[repr(C)] +pub struct TarFormatDecimal(TarFormatNumber); + +impl TarFormatNumber { + pub fn as_number(&self) -> core::result::Result + where + T: num_traits::Num, + { + memchr::memchr2(32, 0, &self.0.bytes).map_or_else( + || T::from_str_radix(self.0.as_str(), R), + |idx| { + T::from_str_radix( + from_utf8(&self.0.bytes[..idx]).expect("byte array is not UTF-8"), + 8, + ) + }, + ) + } +} + +impl Debug for TarFormatNumber { + fn fmt(&self, f: &mut Formatter) -> core::fmt::Result { + let sub_array = &self.0.bytes[0..self.0.len()]; + match self.as_number::() { + Err(msg) => write!(f, "{} [{}]", msg, from_utf8(sub_array).unwrap()), + Ok(val) => write!(f, "{} [{}]", val, from_utf8(sub_array).unwrap()), + } + } +} + +impl Debug for TarFormatOctal { + fn fmt(&self, f: &mut Formatter) -> core::fmt::Result { + self.0.fmt(f) + } +} + +impl Debug for TarFormatDecimal { + fn fmt(&self, f: &mut Formatter) -> core::fmt::Result { + self.0.fmt(f) + } +} + +impl TarFormatDecimal { + pub fn as_number(&self) -> core::result::Result + where + T: num_traits::Num, + { + self.0.as_number::() + } +} + +impl TarFormatOctal { + pub fn as_number(&self) -> core::result::Result + where + T: num_traits::Num, + { + self.0.as_number::() + } +} + +mod tests { + use super::TarFormatString; + + use core::mem::size_of_val; + + #[test] + fn test_empty_string() { + let empty = TarFormatString::new([0]); + assert_eq!(size_of_val(&empty), 1); + assert!(empty.is_empty()); + assert_eq!(empty.len(), 0); + assert!(empty.is_nul_terminated()); + assert_eq!(empty.as_str(), ""); + } + + #[test] + fn test_one_byte_string() { + let s = TarFormatString::new([65]); + assert_eq!(size_of_val(&s), 1); + assert!(!s.is_empty()); + assert_eq!(s.len(), 1); + assert!(!s.is_nul_terminated()); + assert_eq!(s.as_str(), "A"); + } + + #[test] + fn test_two_byte_string_nul_terminated() { + let s = TarFormatString::new([65, 0]); + assert_eq!(size_of_val(&s), 2); + assert!(!s.is_empty()); + assert_eq!(s.len(), 1); + assert!(s.is_nul_terminated()); + assert_eq!(s.as_str(), "A"); + } + + #[test] + fn test_append() { + let mut s = TarFormatString::new([0; 20]); + + // When adding a zero terminated string with one byte of zero + s.append(&TarFormatString::new([0])); + // Then the result is no change + assert_eq!(size_of_val(&s), 20); + assert!(s.is_empty()); + assert_eq!(s.len(), 0); + assert!(s.is_nul_terminated()); + assert_eq!(s.as_str(), ""); + + // When adding ABC + s.append(&TarFormatString::new([65, 66, 67])); + // Then the string contains the additional 3 chars + assert_eq!(size_of_val(&s), 20); + assert!(!s.is_empty()); + assert_eq!(s.len(), 3); + assert!(s.is_nul_terminated()); + assert_eq!(s.as_str(), "ABC"); + + s.append(&TarFormatString::new([68, 69, 70])); + // Then the string contains the additional 3 chars + assert_eq!(size_of_val(&s), 20); + assert!(!s.is_empty()); + assert_eq!(s.len(), 6); + assert!(s.is_nul_terminated()); + assert_eq!(s.as_str(), "ABCDEF"); + + s.append(&TarFormatString::new([b'A'; 12])); + // Then the string contains the additional 12 chars + assert_eq!(size_of_val(&s), 20); + assert!(!s.is_empty()); + assert_eq!(s.len(), 18); + assert!(s.is_nul_terminated()); + assert_eq!(s.as_str(), "ABCDEFAAAAAAAAAAAA"); + + s.append(&TarFormatString::new([b'A'; 1])); + // Then the string contains the additional 1 chars + assert_eq!(size_of_val(&s), 20); + assert!(!s.is_empty()); + assert_eq!(s.len(), 19); + assert!(s.is_nul_terminated()); + assert_eq!(s.as_str(), "ABCDEFAAAAAAAAAAAAA"); + + s.append(&TarFormatString::new([b'Z'; 1])); + // Then the string contains the additional 1 char, is full and not null terminated + assert_eq!(size_of_val(&s), 20); + assert!(!s.is_empty()); + assert_eq!(s.len(), 20); + assert!(!s.is_nul_terminated()); + assert_eq!(s.as_str(), "ABCDEFAAAAAAAAAAAAAZ"); + } +} diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..1b1971d --- /dev/null +++ b/tests/README.md @@ -0,0 +1,2 @@ +Test tar files and original contents. Some content is within a tar file to avoid Windows filename length restrictions. + diff --git a/tests/gnu_tar_default_with_dir.tar b/tests/gnu_tar_default_with_dir.tar new file mode 100644 index 0000000000000000000000000000000000000000..41eb152b906ff6413c2a387f3c6753455a284958 GIT binary patch literal 10240 zcmeH}O%B2!6ogrO3X<-X*FNA1-oT}?vBpKCiN$DdFD)i6Y%nn`@n_hO@(9pxm}yQArh}q_btx~>Dq}U3nCq31D@pI!gCn?3|ux(@e`Dwdd zY{GZeT(AGWs_WzWay-@Bxx2cNU!@b+aM0Th=}#Q=FRayyy#7pNgj6G+V{o}Y*MHe5 zFIS!Nvq3%>{AmKg|7BYR_pdb-<=7GM!vCba=llGB_`k>r{!el7vUr=t%Ngo%Kp)Qs zp`XHKpS#D?|K9{(fH(5rutxr;xHa+kLcaexALM@umthYO009sH0T2KI5C8!X009sH V0T2KI5C8!X009sH0T4(_;0D7->puVh literal 0 HcmV?d00001 diff --git a/tests/gnu_tar_ustar_deep.tar b/tests/gnu_tar_ustar_deep.tar new file mode 100644 index 0000000000000000000000000000000000000000..fdf89f3d3f6155013aac14ae767754845583dd9b GIT binary patch literal 10240 zcmeI0QEtN^42C&MZ%_jUw&PUV`nGB6x`*9Jow>pX{; zUS9WR)Sx_42xfl9iTD& zuLqp}t%57&l}T5G-%%pezm#y8tzp_2tH1x9%U>D%{u>5eBo<7Cx38(Me@`|% zIe#JSAhQc3v9~VyN=dfz_nz|qzTE$FY$<_%u{9@vt!7E}ZN4?fUcJy!89io&NvIKTgNMj@p;pK66z6Ux92tD0aIDh=c-A Y017|>C;$bZ02F`%Pyh-*f&Z?+AGzM00RR91 literal 0 HcmV?d00001 diff --git a/tests/gnu_tar_ustar_long.tar b/tests/gnu_tar_ustar_long.tar new file mode 100644 index 0000000000000000000000000000000000000000..a6a69adc7542ddd855ed3d828b0df5be9897daa3 GIT binary patch literal 10240 zcmeIz$qIuo5P;#F`xL%GC!1-H-IpimRVW^N=yK@WH|jwPMJO0S_~(*n!X$j-plD;A z&%6~s>6*zZIy~EG?hwu8^;I&qnj6s^J{>j>@ZPX!nE_KX?NJWkb z3Z(1$HuK;8@4N|~{!jH&?tZ59e^-M!0tg_000IagfB*srAb+PFF7llen!HC$IO)xVgllSsoG<9$K zV-ht0KubvyH>z<2Cj7UZ@p?P^;5-kaiPTPv>B5Zcd{HBF#k zI)^)?4HBr_RLXe%FLm2?`J=nF)xkW?!W~*Xloq%FCrK;EVn#Sj{{Cf78DL>;I_#_*X9rM;cCN zm=6QAd3@0H<7;foJAC~AMR);z_kTs1ME_so-h}N7{eAEFp#QI-GTZ|ZKm-th?IG|2 Dmh<)v literal 0 HcmV?d00001 diff --git a/tests/tar_content_subset_for_recreating_tar_files.tar b/tests/tar_content_subset_for_recreating_tar_files.tar new file mode 100644 index 0000000000000000000000000000000000000000..729773d5801bbc258c67d5efdb9b3774e90de168 GIT binary patch literal 15360 zcmeHO%W~T=4D{Jwp)ZI@03@YD4t=LR_1LSXaV9;alS$o~^zTb6&IoqoGEhiYcA$$P zh%fCD)PjdvF9J1R>Q$Qnd%Dq{g0%qmxEK)l`G zzgF#C99sMH@(=g#+OgQx)vlABZ|-A#lkuq7l}Vcs>qq{XnRmz$4<02=>kU^A3R(km&jXFOv5!5Yx`--Bjew}f6vSw2cR|6r}gKZmfGE#Y)z z*)H!F<^Ob6~7pXVjL zZZfvFm_uFNtbW`q&#iBeY6>--*#_&zH@bAaXclCosr!oPX?u%g{6~GT6aNTK_kUl+ z>vJoYQ+<8Qa{cEW7}bCAqr9i#l)=a6hU!0##_O{uq5lHytp5~02UY!dh}n2F`tSQb z{fDM%x39UV4WW*o5Cf`mOiZF8511__sQNEK!GFEnum8PX%WHM{|3CNtE$-z1N&KsHr~bjdcM83HbB9g1;oZ#AME<-P*%RF%X?9KstDwb_2IV& dn*VM7Vl$C7fzL(F|2P_ID*}puBJg?;_zMJzEF=H` literal 0 HcmV?d00001