From 14097e009b191b23cd00d712b5ffb616c60145a1 Mon Sep 17 00:00:00 2001
From: Andy Schneider <andy@schnoberts.com>
Date: Fri, 3 May 2024 09:27:56 +0100
Subject: [PATCH] Add 256 filename support (prefix + name) and add support for
 space terminated numbers and non-null terminated names (#10)

* Add 256 filename support (prefix + name) and add support for space terminated numbers.

* Format check passes. Add missing file.

* Act on clippy issues found

* fx nit and idiomatic conditional compile

* Clippy is wrong as from_utf8 is not yet stable for const fn usage.

* Move long or deep filenames for tar file creation into a tar file. Normally these aren't needed as the .tar files are used in the source. The tests themselves note what files they depend on.

* Upgrade MSVR as there are dependencies in the tree where SemVer is upgrading them past
the Rust 1.60 compiler. Example from an indirectly dependent crate on Linux:

error: package `rustix v0.38.11` cannot be built because it requires rustc 1.63 or newer, while the currently active rustc version is 1.60.0
---
 .github/workflows/rust.yml                    |   4 +-
 Cargo.toml                                    |  15 +-
 README.md                                     |   5 +-
 examples/alloc_feature.rs                     |   2 +-
 src/archive.rs                                | 162 +++++++++---
 src/header.rs                                 | 138 ++--------
 src/lib.rs                                    |   8 +-
 src/tar_format_types.rs                       | 250 ++++++++++++++++++
 tests/README.md                               |   2 +
 tests/gnu_tar_default_with_dir.tar            | Bin 0 -> 10240 bytes
 tests/gnu_tar_ustar_deep.tar                  | Bin 0 -> 10240 bytes
 tests/gnu_tar_ustar_long.tar                  | Bin 0 -> 10240 bytes
 tests/mac_tar_ustar_with_dir.tar              | Bin 0 -> 5632 bytes
 ...ontent_subset_for_recreating_tar_files.tar | Bin 0 -> 15360 bytes
 14 files changed, 429 insertions(+), 157 deletions(-)
 create mode 100644 src/tar_format_types.rs
 create mode 100644 tests/README.md
 create mode 100644 tests/gnu_tar_default_with_dir.tar
 create mode 100644 tests/gnu_tar_ustar_deep.tar
 create mode 100644 tests/gnu_tar_ustar_long.tar
 create mode 100644 tests/mac_tar_ustar_with_dir.tar
 create mode 100644 tests/tar_content_subset_for_recreating_tar_files.tar

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index dbe04a2..6d83c74 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -16,7 +16,7 @@ jobs:
         rust:
           - stable
           - nightly
-          - 1.60.0 # MSVR
+          - 1.63.0 # MSVR
     steps:
       - uses: actions/checkout@v2
       # Important preparation step: override the latest default Rust version in GitHub CI
@@ -41,7 +41,7 @@ jobs:
     strategy:
       matrix:
         rust:
-          - 1.60.0
+          - 1.63.0
     steps:
       - uses: actions/checkout@v2
       # Important preparation step: override the latest default Rust version in GitHub CI
diff --git a/Cargo.toml b/Cargo.toml
index 6dcf169..8507748 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,9 +3,9 @@ name = "tar-no-std"
 description = """
 Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations.
 The crate is simple and only supports reading of "basic" archives, therefore no extensions, such
-as GNU Longname. The maximum supported file name length is 100 characters including the NULL-byte.
-The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat
-collections of files.
+as GNU Longname. The maximum supported file name length is 256 characters excluding the NULL-byte 
+(using the tar name/prefix longname implementation).The maximum supported file size is 8GiB. 
+Directories are supported, but only regular fields are yielded in iteration.
 """
 version = "0.2.0"
 edition = "2021"
@@ -24,10 +24,15 @@ resolver = "2"
 default = []
 alloc = []
 
+[[example]]
+name = "alloc_feature"
+required-features = ["alloc"]
+
 [dependencies]
-arrayvec = { version = "0.7", default-features = false }
 bitflags = "2.0"
 log = { version = "0.4", default-features = false }
+memchr = { version = "2.6.3", default-features = false }
+num-traits =  { version = "0.2.16", default-features = false }
 
 [dev-dependencies]
-env_logger = "0.10"
+env_logger = "0.10"
\ No newline at end of file
diff --git a/README.md b/README.md
index 7a27cfd..79d1b99 100644
--- a/README.md
+++ b/README.md
@@ -8,9 +8,8 @@ environment and need full feature support, I recommend the use of <https://crate
 
 ## Limitations
 The crate is simple and only supports reading of "basic" archives, therefore no extensions, such
-as *GNU Longname*. The maximum supported file name length is 100 characters including the NULL-byte.
-The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat
-collections of files.
+as GNU Longname. The maximum supported file name length is 256 characters excluding the NULL-byte (using the tar name/prefix longname implementation). The maximum supported file size is 8GiB. Directories are supported, but only regular fields are yielded in iteration.
+
 
 ## Use Case
 
diff --git a/examples/alloc_feature.rs b/examples/alloc_feature.rs
index 93492e2..7ac7f75 100644
--- a/examples/alloc_feature.rs
+++ b/examples/alloc_feature.rs
@@ -21,9 +21,9 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 */
+
 use tar_no_std::TarArchive;
 
-/// This example needs the `alloc` feature.
 fn main() {
     // log: not mandatory
     std::env::set_var("RUST_LOG", "trace");
diff --git a/src/archive.rs b/src/archive.rs
index 9f34b44..ae7a889 100644
--- a/src/archive.rs
+++ b/src/archive.rs
@@ -25,25 +25,25 @@ SOFTWARE.
 //! also exports `TarArchive`, which owns data on the heap.
 
 use crate::header::PosixHeader;
-use crate::{TypeFlag, BLOCKSIZE, FILENAME_MAX_LEN};
+use crate::tar_format_types::TarFormatString;
+use crate::{TypeFlag, BLOCKSIZE, POSIX_1003_MAX_FILENAME_LEN};
 #[cfg(feature = "alloc")]
 use alloc::boxed::Box;
-use arrayvec::ArrayString;
 use core::fmt::{Debug, Formatter};
-use core::str::{FromStr, Utf8Error};
+use core::str::Utf8Error;
 use log::warn;
 
 /// Describes an entry in an archive.
 /// Currently only supports files but no directories.
 pub struct ArchiveEntry<'a> {
-    filename: ArrayString<FILENAME_MAX_LEN>,
+    filename: TarFormatString<POSIX_1003_MAX_FILENAME_LEN>,
     data: &'a [u8],
     size: usize,
 }
 
 #[allow(unused)]
 impl<'a> ArchiveEntry<'a> {
-    const fn new(filename: ArrayString<FILENAME_MAX_LEN>, data: &'a [u8]) -> Self {
+    const fn new(filename: TarFormatString<POSIX_1003_MAX_FILENAME_LEN>, data: &'a [u8]) -> Self {
         ArchiveEntry {
             filename,
             data,
@@ -53,7 +53,7 @@ impl<'a> ArchiveEntry<'a> {
 
     /// Filename of the entry with a maximum of 100 characters (including the
     /// terminating NULL-byte).
-    pub const fn filename(&self) -> ArrayString<{ FILENAME_MAX_LEN }> {
+    pub const fn filename(&self) -> TarFormatString<{ POSIX_1003_MAX_FILENAME_LEN }> {
         self.filename
     }
 
@@ -63,6 +63,7 @@ impl<'a> ArchiveEntry<'a> {
     }
 
     /// Data of the file as string slice, if data is valid UTF-8.
+    #[allow(clippy::missing_const_for_fn)]
     pub fn data_as_str(&self) -> Result<&'a str, Utf8Error> {
         core::str::from_utf8(self.data)
     }
@@ -192,19 +193,35 @@ impl<'a> Iterator for ArchiveIterator<'a> {
             return None;
         }
 
-        let hdr = self.next_hdr(self.block_index);
+        let mut hdr = self.next_hdr(self.block_index);
+
+        loop {
+            // check if we found end of archive
+            if hdr.is_zero_block() {
+                let next_hdr = self.next_hdr(self.block_index + 1);
+                if next_hdr.is_zero_block() {
+                    // gracefully terminated Archive
+                    log::debug!("End of Tar archive with two zero blocks!");
+                } else {
+                    log::warn!(
+                        "Zero block found at end of Tar archive, but only one instead of two!"
+                    );
+                }
+                // end of archive
+                return None;
+            }
 
-        // check if we found end of archive
-        if hdr.is_zero_block() {
-            let next_hdr = self.next_hdr(self.block_index + 1);
-            if next_hdr.is_zero_block() {
-                // gracefully terminated Archive
-                log::debug!("End of Tar archive with two zero blocks!");
-            } else {
-                log::warn!("Zero block found at end of Tar archive, but only one instead of two!");
+            // Ignore directory entries, i.e. yield only regular files. Works as
+            // filenames in tarballs are fully specified, e.g. dirA/dirB/file1
+            if hdr.typeflag != TypeFlag::DIRTYPE {
+                break;
             }
-            // end of archive
-            return None;
+
+            // in next iteration: start at next Archive entry header
+            // +1 for current hdr block itself + all data blocks
+            let data_block_count: usize = hdr.payload_block_count().unwrap();
+            self.block_index += data_block_count + 1;
+            hdr = self.next_hdr(self.block_index);
         }
 
         if hdr.typeflag != TypeFlag::AREGTYPE && hdr.typeflag != TypeFlag::REGTYPE {
@@ -219,7 +236,7 @@ impl<'a> Iterator for ArchiveIterator<'a> {
             warn!("Found empty file name",);
         }
 
-        let hdr_size = hdr.size.val();
+        let hdr_size = hdr.size.as_number::<usize>();
         if let Err(e) = hdr_size {
             warn!("Can't parse the file size from the header block. Stop iterating Tar archive. {e:#?}");
             return None;
@@ -245,10 +262,13 @@ impl<'a> Iterator for ArchiveIterator<'a> {
         // +1 for current hdr block itself + all data blocks
         self.block_index += data_block_count + 1;
 
-        let filename = ArrayString::from_str(hdr.name.as_string().as_str());
-        // .unwrap is fine as the capacity is MUST be ok.
-        let filename = filename.unwrap();
-
+        let mut filename: TarFormatString<256> =
+            TarFormatString::<POSIX_1003_MAX_FILENAME_LEN>::new([0; POSIX_1003_MAX_FILENAME_LEN]);
+        if hdr.magic.as_str() == "ustar" && hdr.version.as_str() == "00" && !hdr.prefix.is_empty() {
+            filename.append(&hdr.prefix);
+            filename.append(&TarFormatString::<1>::new([b'/']));
+        }
+        filename.append(&hdr.name);
         Some(ArchiveEntry::new(filename, file_bytes))
     }
 }
@@ -264,7 +284,6 @@ mod tests {
         let entries = archive.entries().collect::<Vec<_>>();
         println!("{:#?}", entries);
     }
-
     /// Tests to read the entries from existing archives in various Tar flavors.
     #[test]
     fn test_archive_entries() {
@@ -299,6 +318,54 @@ mod tests {
         assert_archive_content(&entries);
     }
 
+    /// Tests to read the entries from an existing tarball with a directory in it
+    #[test]
+    fn test_archive_with_long_dir_entries() {
+        // tarball created with:
+        //     $ cd tests; gtar --format=ustar -cf gnu_tar_ustar_long.tar 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ
+        let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar_long.tar"));
+        let entries = archive.entries().collect::<Vec<_>>();
+
+        assert_eq!(entries.len(), 2);
+        // Maximum length of a directory and name when the directory itself is tar'd
+        assert_entry_content(&entries[0], "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ", 7);
+        // Maximum length of a directory and name when only the file is tar'd.
+        assert_entry_content(&entries[1], "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ", 7);
+    }
+
+    #[test]
+    fn test_archive_with_deep_dir_entries() {
+        // tarball created with:
+        //     $ cd tests; gtar --format=ustar -cf gnu_tar_ustar_deep.tar 0123456789
+        let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar_deep.tar"));
+        let entries = archive.entries().collect::<Vec<_>>();
+
+        assert_eq!(entries.len(), 1);
+        assert_entry_content(&entries[0], "0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/empty", 0);
+    }
+
+    #[test]
+    fn test_archive_with_dir_entries() {
+        // tarball created with:
+        //     $ gtar -cf tests/gnu_tar_default_with_dir.tar --exclude '*.tar' --exclude '012345678*' tests
+        {
+            let archive =
+                TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default_with_dir.tar"));
+            let entries = archive.entries().collect::<Vec<_>>();
+
+            assert_archive_with_dir_content(&entries);
+        }
+
+        // tarball created with:
+        //     $(osx) tar -cf tests/mac_tar_ustar_with_dir.tar --format=ustar --exclude '*.tar' --exclude '012345678*' tests
+        {
+            let archive = TarArchiveRef::new(include_bytes!("../tests/mac_tar_ustar_with_dir.tar"));
+            let entries = archive.entries().collect::<Vec<_>>();
+
+            assert_archive_with_dir_content(&entries);
+        }
+    }
+
     /// Like [`test_archive_entries`] but with additional `alloc` functionality.
     #[cfg(feature = "alloc")]
     #[test]
@@ -314,15 +381,20 @@ mod tests {
         assert_eq!(data, archive.into());
     }
 
+    /// Test that the entry's contents match the expected content.
+    fn assert_entry_content(entry: &ArchiveEntry, filename: &str, size: usize) {
+        assert_eq!(entry.filename().as_str(), filename);
+        assert_eq!(entry.size(), size);
+        assert_eq!(entry.data().len(), size);
+    }
+
     /// Tests that the parsed archive matches the expected order. The tarballs
     /// the tests directory were created once by me with files in the order
     /// specified in this test.
     fn assert_archive_content(entries: &[ArchiveEntry]) {
         assert_eq!(entries.len(), 3);
 
-        assert_eq!(entries[0].filename().as_str(), "bye_world_513b.txt");
-        assert_eq!(entries[0].size(), 513);
-        assert_eq!(entries[0].data().len(), 513);
+        assert_entry_content(&entries[0], "bye_world_513b.txt", 513);
         assert_eq!(
             entries[0].data_as_str().expect("Should be valid UTF-8"),
             // .replace: Ensure that the test also works on Windows
@@ -331,22 +403,48 @@ mod tests {
 
         // Test that an entry that needs two 512 byte data blocks is read
         // properly.
-        assert_eq!(entries[1].filename().as_str(), "hello_world_513b.txt");
-        assert_eq!(entries[1].size(), 513);
-        assert_eq!(entries[1].data().len(), 513);
+        assert_entry_content(&entries[1], "hello_world_513b.txt", 513);
         assert_eq!(
             entries[1].data_as_str().expect("Should be valid UTF-8"),
             // .replace: Ensure that the test also works on Windows
             include_str!("../tests/hello_world_513b.txt").replace("\r\n", "\n")
         );
 
-        assert_eq!(entries[2].filename().as_str(), "hello_world.txt");
-        assert_eq!(entries[2].size(), 12);
-        assert_eq!(entries[2].data().len(), 12);
+        assert_entry_content(&entries[2], "hello_world.txt", 12);
         assert_eq!(
             entries[2].data_as_str().expect("Should be valid UTF-8"),
             "Hello World\n",
             "file content must match"
         );
     }
+
+    /// Tests that the parsed archive matches the expected order and the filename includes
+    /// the directory name. The tarballs the tests directory were created once by me with files
+    /// in the order specified in this test.
+    fn assert_archive_with_dir_content(entries: &[ArchiveEntry]) {
+        assert_eq!(entries.len(), 3);
+
+        assert_entry_content(&entries[0], "tests/hello_world.txt", 12);
+        assert_eq!(
+            entries[0].data_as_str().expect("Should be valid UTF-8"),
+            "Hello World\n",
+            "file content must match"
+        );
+
+        // Test that an entry that needs two 512 byte data blocks is read
+        // properly.
+        assert_entry_content(&entries[1], "tests/bye_world_513b.txt", 513);
+        assert_eq!(
+            entries[1].data_as_str().expect("Should be valid UTF-8"),
+            // .replace: Ensure that the test also works on Windows
+            include_str!("../tests/bye_world_513b.txt").replace("\r\n", "\n")
+        );
+
+        assert_entry_content(&entries[2], "tests/hello_world_513b.txt", 513);
+        assert_eq!(
+            entries[2].data_as_str().expect("Should be valid UTF-8"),
+            // .replace: Ensure that the test also works on Windows
+            include_str!("../tests/hello_world_513b.txt").replace("\r\n", "\n")
+        );
+    }
 }
diff --git a/src/header.rs b/src/header.rs
index aadb6a6..2983ea2 100644
--- a/src/header.rs
+++ b/src/header.rs
@@ -30,31 +30,10 @@ SOFTWARE.
 
 #![allow(non_upper_case_globals)]
 
-use crate::{BLOCKSIZE, FILENAME_MAX_LEN};
-use arrayvec::ArrayString;
+use crate::{TarFormatDecimal, TarFormatOctal, TarFormatString, BLOCKSIZE, NAME_LEN, PREFIX_LEN};
 use core::fmt::{Debug, Formatter};
 use core::num::ParseIntError;
 
-/// The file size is encoded as octal ASCII number inside a Tar header.
-#[derive(Copy, Clone)]
-#[repr(transparent)]
-pub struct Size(StaticCString<12>);
-
-impl Size {
-    /// Returns the octal ASCII number as actual size in bytes.
-    pub fn val(&self) -> Result<usize, ParseIntError> {
-        usize::from_str_radix(self.0.as_string().as_str(), 8)
-    }
-}
-
-impl Debug for Size {
-    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
-        let mut debug = f.debug_tuple("Size");
-        debug.field(&self.val());
-        debug.finish()
-    }
-}
-
 #[derive(Debug)]
 pub enum ModeError {
     ParseInt(ParseIntError),
@@ -64,14 +43,12 @@ pub enum ModeError {
 /// Wrapper around the UNIX file permissions given in octal ASCII.
 #[derive(Copy, Clone)]
 #[repr(transparent)]
-pub struct Mode(StaticCString<8>);
+pub struct Mode(TarFormatOctal<8>);
 
 impl Mode {
     /// Parses the [`ModeFlags`] from the mode string.
     pub fn to_flags(self) -> Result<ModeFlags, ModeError> {
-        let octal_number_str = self.0.as_string();
-        let bits =
-            u64::from_str_radix(octal_number_str.as_str(), 8).map_err(ModeError::ParseInt)?;
+        let bits = self.0.as_number::<u64>().map_err(ModeError::ParseInt)?;
         ModeFlags::from_bits(bits).ok_or(ModeError::IllegalMode)
     }
 }
@@ -84,60 +61,6 @@ impl Debug for Mode {
     }
 }
 
-/// A C-String that is stored in a static array. There is always a terminating
-/// NULL-byte.
-///
-/// The content is likely to be UTF-8/ASCII, but that is not verified by this
-/// type.
-#[derive(Copy, Clone)]
-#[repr(transparent)]
-pub struct StaticCString<const N: usize>([u8; N]);
-
-#[allow(unused)]
-impl<const N: usize> StaticCString<N> {
-    /// Constructor.
-    const fn new(bytes: [u8; N]) -> Self {
-        Self(bytes)
-    }
-
-    /// Returns the length of the string without NULL-byte.
-    pub fn len(&self) -> usize {
-        // not as efficient as it could be but negligible
-        self.as_string().len()
-    }
-
-    /// Returns if the string without NULL-byte is empty.
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    /// Returns a string that includes all characters until the first null.
-    pub fn as_string(&self) -> ArrayString<N> {
-        let mut string = ArrayString::new();
-        self.0
-            .clone()
-            .iter()
-            .copied()
-            // Take all chars until the terminating null.
-            .take_while(|byte| *byte != 0)
-            .for_each(|byte| string.push(byte as char));
-        string
-    }
-}
-
-impl<const N: usize> Debug for StaticCString<N> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
-        let mut debug = f.debug_tuple("Name");
-        let str = self.as_string();
-        if str.is_empty() {
-            debug.field(&"<empty>");
-        } else {
-            debug.field(&str);
-        }
-        debug.finish()
-    }
-}
-
 /// Header of the TAR format as specified by POSIX (POSIX 1003.1-1990.
 /// "New" (version?) GNU Tar versions use this archive format by default.
 /// (<https://www.gnu.org/software/tar/manual/html_node/Formats.html#Formats>).
@@ -152,33 +75,29 @@ impl<const N: usize> Debug for StaticCString<N> {
 #[derive(Debug, Copy, Clone)]
 #[repr(C, packed)]
 pub struct PosixHeader {
-    /// Name. There is always a null byte, therefore
-    /// the max len is 99.
-    pub name: StaticCString<{ FILENAME_MAX_LEN }>,
+    pub name: TarFormatString<NAME_LEN>,
     pub mode: Mode,
-    pub uid: [u8; 8],
-    pub gid: [u8; 8],
+    pub uid: TarFormatOctal<8>,
+    pub gid: TarFormatOctal<8>,
     // confusing; size is stored as ASCII string
-    pub size: Size,
-    pub mtime: [u8; 12],
-    pub cksum: [u8; 8],
+    pub size: TarFormatOctal<12>,
+    pub mtime: TarFormatDecimal<12>,
+    pub cksum: TarFormatOctal<8>,
     pub typeflag: TypeFlag,
     /// Name. There is always a null byte, therefore
     /// the max len is 99.
-    pub linkname: StaticCString<{ FILENAME_MAX_LEN }>,
-    pub magic: StaticCString<6>,
-    pub version: StaticCString<2>,
+    pub linkname: TarFormatString<NAME_LEN>,
+    pub magic: TarFormatString<6>,
+    pub version: TarFormatString<2>,
     /// Username. There is always a null byte, therefore
     /// the max len is N-1.
-    pub uname: StaticCString<32>,
+    pub uname: TarFormatString<32>,
     /// Groupname. There is always a null byte, therefore
     /// the max len is N-1.
-    pub gname: StaticCString<32>,
-    pub dev_major: [u8; 8],
-    pub dev_minor: [u8; 8],
-    /// There is always a null byte, therefore
-    /// the max len is N-1.
-    pub prefix: StaticCString<155>,
+    pub gname: TarFormatString<32>,
+    pub dev_major: TarFormatOctal<8>,
+    pub dev_minor: TarFormatOctal<8>,
+    pub prefix: TarFormatString<PREFIX_LEN>,
     // padding => to BLOCKSIZE bytes
     pub _pad: [u8; 12],
 }
@@ -188,8 +107,8 @@ impl PosixHeader {
     /// content. Returns an error, if the file size can't be parsed from the
     /// header.
     pub fn payload_block_count(&self) -> Result<usize, ParseIntError> {
-        let div = self.size.val()? / BLOCKSIZE;
-        let modulo = self.size.val()? % BLOCKSIZE;
+        let div = self.size.as_number::<usize>()? / BLOCKSIZE;
+        let modulo = self.size.as_number::<usize>()? % BLOCKSIZE;
         let block_count = if modulo > 0 { div + 1 } else { div };
         Ok(block_count)
     }
@@ -292,7 +211,7 @@ bitflags::bitflags! {
 
 #[cfg(test)]
 mod tests {
-    use crate::header::{PosixHeader, StaticCString, TypeFlag};
+    use crate::header::{PosixHeader, TypeFlag};
     use crate::BLOCKSIZE;
     use std::mem::size_of;
 
@@ -354,7 +273,7 @@ mod tests {
             TypeFlag::REGTYPE,
             "the first entry is a regular file!"
         );
-        assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt");
+        assert_eq!(archive.name.as_str(), "bye_world_513b.txt");
 
         let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_gnu.tar"));
         assert_eq!(
@@ -362,7 +281,7 @@ mod tests {
             TypeFlag::REGTYPE,
             "the first entry is a regular file!"
         );
-        assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt");
+        assert_eq!(archive.name.as_str(), "bye_world_513b.txt");
 
         let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_oldgnu.tar"));
         assert_eq!(
@@ -370,7 +289,7 @@ mod tests {
             TypeFlag::REGTYPE,
             "the first entry is a regular file!"
         );
-        assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt");
+        assert_eq!(archive.name.as_str(), "bye_world_513b.txt");
 
         /* UNSUPPORTED YET. Uses extensions..
         let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_pax.tar"));
@@ -388,7 +307,7 @@ mod tests {
             TypeFlag::REGTYPE,
             "the first entry is a regular file!"
         );
-        assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt");
+        assert_eq!(archive.name.as_str(), "bye_world_513b.txt");
 
         let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_v7.tar"));
         // ARegType: legacy
@@ -397,18 +316,11 @@ mod tests {
             TypeFlag::AREGTYPE,
             "the first entry is a regular file!"
         );
-        assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt");
+        assert_eq!(archive.name.as_str(), "bye_world_513b.txt");
     }
 
     #[test]
     fn test_size() {
         assert_eq!(BLOCKSIZE, size_of::<PosixHeader>());
     }
-
-    #[test]
-    fn test_static_str() {
-        let str = StaticCString::new(*b"0000633\0");
-        assert_eq!(str.len(), 7);
-        assert_eq!(str.as_string().as_str(), "0000633");
-    }
 }
diff --git a/src/lib.rs b/src/lib.rs
index fbaefbd..c7f8caa 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -90,10 +90,16 @@ extern crate alloc;
 /// Each Archive Entry (either Header or Data Block) is a block of 512 bytes.
 const BLOCKSIZE: usize = 512;
 /// Maximum filename length of the base Tar format including the terminating NULL-byte.
-const FILENAME_MAX_LEN: usize = 100;
+const NAME_LEN: usize = 100;
+/// Maximum long filename length of the base Tar format including the prefix
+const POSIX_1003_MAX_FILENAME_LEN: usize = 256;
+/// Maximum length of the prefix in Posix tar format
+const PREFIX_LEN: usize = 155;
 
 mod archive;
 mod header;
+mod tar_format_types;
 
 pub use archive::*;
 pub use header::*;
+pub use tar_format_types::*;
diff --git a/src/tar_format_types.rs b/src/tar_format_types.rs
new file mode 100644
index 0000000..8b7ed4a
--- /dev/null
+++ b/src/tar_format_types.rs
@@ -0,0 +1,250 @@
+#![allow(unused_imports)]
+
+use core::fmt::{Debug, Formatter};
+use core::num::ParseIntError;
+use core::ptr::copy_nonoverlapping;
+use core::str::from_utf8;
+use num_traits::Num;
+
+/// An optionally null terminated string. The contents are either:
+/// 1. A fully populated string with no null termination or
+/// 2. A partially populated string where the unused bytes are zero.
+///
+/// The content is likely to be UTF-8/ASCII, but that is not verified by this
+/// type.
+#[derive(Copy, Clone)]
+#[repr(C)]
+pub struct TarFormatString<const N: usize> {
+    bytes: [u8; N],
+}
+
+/// A Tar format string is a fixed length byte array containing UTF-8 bytes.
+/// This string will be null terminated if it doesn't fill the entire array.
+impl<const N: usize> TarFormatString<N> {
+    /// Constructor.
+    pub fn new(bytes: [u8; N]) -> Self {
+        if N == 0 {
+            panic!("Array cannot be zero length");
+        }
+        Self { bytes }
+    }
+
+    /// True if the is string empty (ignoring NULL bytes).
+    pub const fn is_empty(&self) -> bool {
+        self.bytes[0] == 0
+    }
+
+    // True if the string is NULL terminated
+    pub const fn is_nul_terminated(&self) -> bool {
+        self.bytes[N - 1] == 0
+    }
+
+    /// Returns the length of the string (ignoring NULL bytes).
+    pub fn len(&self) -> usize {
+        if self.is_nul_terminated() {
+            memchr::memchr(0, &self.bytes).unwrap()
+        } else {
+            N
+        }
+    }
+
+    /// Returns a str ref without NULL bytes. Panics if the string is not valid UTF-8.
+    pub fn as_str(&self) -> &str {
+        from_utf8(&self.bytes[0..self.len()]).expect("byte array is not UTF-8")
+    }
+
+    /// Append to end of string. Panics if there is not enough capacity.
+    pub fn append<const S: usize>(&mut self, other: &TarFormatString<S>) {
+        let resulting_length = self.len() + other.len();
+        if resulting_length > N {
+            panic!("Result to long for capacity {}", N);
+        }
+
+        unsafe {
+            let dst = self.bytes.as_mut_ptr().add(self.len());
+            let src = other.bytes.as_ptr();
+            copy_nonoverlapping(src, dst, other.len());
+        }
+
+        if resulting_length < N {
+            self.bytes[resulting_length] = 0;
+        }
+    }
+}
+
+impl<const N: usize> Debug for TarFormatString<N> {
+    fn fmt(&self, f: &mut Formatter) -> core::fmt::Result {
+        let sub_array = &self.bytes[0..self.len()];
+        write!(
+            f,
+            "{},{} of {},{}",
+            from_utf8(sub_array).unwrap(),
+            self.len(),
+            N,
+            self.is_nul_terminated()
+        )
+    }
+}
+
+/// A number. Trailing spaces in the string are ignored.
+#[derive(Copy, Clone)]
+#[repr(C)]
+pub struct TarFormatNumber<const N: usize, const R: u32>(TarFormatString<N>);
+
+/// An octal number. Trailing spaces in the string are ignored.
+#[derive(Copy, Clone)]
+#[repr(C)]
+pub struct TarFormatOctal<const N: usize>(TarFormatNumber<N, 8>);
+
+/// A decimal number. Trailing spaces in the string are ignored.
+#[derive(Copy, Clone)]
+#[repr(C)]
+pub struct TarFormatDecimal<const N: usize>(TarFormatNumber<N, 10>);
+
+impl<const N: usize, const R: u32> TarFormatNumber<N, R> {
+    pub fn as_number<T>(&self) -> core::result::Result<T, T::FromStrRadixErr>
+    where
+        T: num_traits::Num,
+    {
+        memchr::memchr2(32, 0, &self.0.bytes).map_or_else(
+            || T::from_str_radix(self.0.as_str(), R),
+            |idx| {
+                T::from_str_radix(
+                    from_utf8(&self.0.bytes[..idx]).expect("byte array is not UTF-8"),
+                    8,
+                )
+            },
+        )
+    }
+}
+
+impl<const N: usize, const R: u32> Debug for TarFormatNumber<N, R> {
+    fn fmt(&self, f: &mut Formatter) -> core::fmt::Result {
+        let sub_array = &self.0.bytes[0..self.0.len()];
+        match self.as_number::<u64>() {
+            Err(msg) => write!(f, "{} [{}]", msg, from_utf8(sub_array).unwrap()),
+            Ok(val) => write!(f, "{} [{}]", val, from_utf8(sub_array).unwrap()),
+        }
+    }
+}
+
+impl<const N: usize> Debug for TarFormatOctal<N> {
+    fn fmt(&self, f: &mut Formatter) -> core::fmt::Result {
+        self.0.fmt(f)
+    }
+}
+
+impl<const N: usize> Debug for TarFormatDecimal<N> {
+    fn fmt(&self, f: &mut Formatter) -> core::fmt::Result {
+        self.0.fmt(f)
+    }
+}
+
+impl<const N: usize> TarFormatDecimal<N> {
+    pub fn as_number<T>(&self) -> core::result::Result<T, T::FromStrRadixErr>
+    where
+        T: num_traits::Num,
+    {
+        self.0.as_number::<T>()
+    }
+}
+
+impl<const N: usize> TarFormatOctal<N> {
+    pub fn as_number<T>(&self) -> core::result::Result<T, T::FromStrRadixErr>
+    where
+        T: num_traits::Num,
+    {
+        self.0.as_number::<T>()
+    }
+}
+
+mod tests {
+    use super::TarFormatString;
+
+    use core::mem::size_of_val;
+
+    #[test]
+    fn test_empty_string() {
+        let empty = TarFormatString::new([0]);
+        assert_eq!(size_of_val(&empty), 1);
+        assert!(empty.is_empty());
+        assert_eq!(empty.len(), 0);
+        assert!(empty.is_nul_terminated());
+        assert_eq!(empty.as_str(), "");
+    }
+
+    #[test]
+    fn test_one_byte_string() {
+        let s = TarFormatString::new([65]);
+        assert_eq!(size_of_val(&s), 1);
+        assert!(!s.is_empty());
+        assert_eq!(s.len(), 1);
+        assert!(!s.is_nul_terminated());
+        assert_eq!(s.as_str(), "A");
+    }
+
+    #[test]
+    fn test_two_byte_string_nul_terminated() {
+        let s = TarFormatString::new([65, 0]);
+        assert_eq!(size_of_val(&s), 2);
+        assert!(!s.is_empty());
+        assert_eq!(s.len(), 1);
+        assert!(s.is_nul_terminated());
+        assert_eq!(s.as_str(), "A");
+    }
+
+    #[test]
+    fn test_append() {
+        let mut s = TarFormatString::new([0; 20]);
+
+        // When adding a zero terminated string with one byte of zero
+        s.append(&TarFormatString::new([0]));
+        // Then the result is no change
+        assert_eq!(size_of_val(&s), 20);
+        assert!(s.is_empty());
+        assert_eq!(s.len(), 0);
+        assert!(s.is_nul_terminated());
+        assert_eq!(s.as_str(), "");
+
+        // When adding ABC
+        s.append(&TarFormatString::new([65, 66, 67]));
+        // Then the string contains the additional 3 chars
+        assert_eq!(size_of_val(&s), 20);
+        assert!(!s.is_empty());
+        assert_eq!(s.len(), 3);
+        assert!(s.is_nul_terminated());
+        assert_eq!(s.as_str(), "ABC");
+
+        s.append(&TarFormatString::new([68, 69, 70]));
+        // Then the string contains the additional 3 chars
+        assert_eq!(size_of_val(&s), 20);
+        assert!(!s.is_empty());
+        assert_eq!(s.len(), 6);
+        assert!(s.is_nul_terminated());
+        assert_eq!(s.as_str(), "ABCDEF");
+
+        s.append(&TarFormatString::new([b'A'; 12]));
+        // Then the string contains the additional 12 chars
+        assert_eq!(size_of_val(&s), 20);
+        assert!(!s.is_empty());
+        assert_eq!(s.len(), 18);
+        assert!(s.is_nul_terminated());
+        assert_eq!(s.as_str(), "ABCDEFAAAAAAAAAAAA");
+
+        s.append(&TarFormatString::new([b'A'; 1]));
+        // Then the string contains the additional 1 chars
+        assert_eq!(size_of_val(&s), 20);
+        assert!(!s.is_empty());
+        assert_eq!(s.len(), 19);
+        assert!(s.is_nul_terminated());
+        assert_eq!(s.as_str(), "ABCDEFAAAAAAAAAAAAA");
+
+        s.append(&TarFormatString::new([b'Z'; 1]));
+        // Then the string contains the additional 1 char, is full and not null terminated
+        assert_eq!(size_of_val(&s), 20);
+        assert!(!s.is_empty());
+        assert_eq!(s.len(), 20);
+        assert!(!s.is_nul_terminated());
+        assert_eq!(s.as_str(), "ABCDEFAAAAAAAAAAAAAZ");
+    }
+}
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 0000000..1b1971d
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,2 @@
+Test tar files and original contents. Some content is within a tar file to avoid Windows filename length restrictions.
+
diff --git a/tests/gnu_tar_default_with_dir.tar b/tests/gnu_tar_default_with_dir.tar
new file mode 100644
index 0000000000000000000000000000000000000000..41eb152b906ff6413c2a387f3c6753455a284958
GIT binary patch
literal 10240
zcmeH}O%B2!6ogrO3X<-X*FNA1-oT}?vBpKCiN$DdFD)i6Y%nn`@n_hO@(9pxm}y<r
zxMm?HZOS*Rwdi{jdX;nk+>QArh}q_btx~>Dq}U3nCq31D@pI!gCn?3|ux(@e`Dwdd
zY{GZeT(AGWs_WzWay-@Bxx2cNU!@b+aM0Th=}#Q=FRayyy#7pNgj6G+V{o}Y*MHe5
zFIS!Nvq3%>{AmKg|7BYR_pdb-<=7GM!vCba=llGB_`k>r{!el7vUr=t%Ngo%Kp)Qs
zp`XHKpS#D?|K9{(fH(5rutxr;xHa+kLcaexALM@umthYO009sH0T2KI5C8!X009sH
V0T2KI5C8!X009sH0T4(_;0D7->puVh

literal 0
HcmV?d00001

diff --git a/tests/gnu_tar_ustar_deep.tar b/tests/gnu_tar_ustar_deep.tar
new file mode 100644
index 0000000000000000000000000000000000000000..fdf89f3d3f6155013aac14ae767754845583dd9b
GIT binary patch
literal 10240
zcmeI0QEtN^42C&MZ%_jUw&PUV`nGB6x`*9<B#|1cq9sP4s?*O)9LpH`e>Jow>pX{;
zUS9WR)Sx_42xfl9<w=g`_sN%svCPTENaUj-Yl+G1!l(_&yg%RP!zpj&@cH&D{nvI5
zJ07o{&;L#<e_{6hToYg&e`h^|e_smaYyx`OEKgH;_58i_sDG8lt-)pJWpDEu>iTD&
zuLqp}t%57&l}T5<iWSUq@2l?rzz+4V(zrFaO1c_QUs-?t$6!(abzwV1RPky8ZtO?T
zKShiBzoBMdw+62!R_;>G-%%pezm#y8tzp_2tH1x9%U>D%{u>5eBo<7Cx38(Me@`|%
zIe#JSAhQc3v9~VyN=dfz_nz|qzTE$FY$<_%u{9@vt!7E}ZN4?f<NLoe|45mCunGJH
z8ilzH`z7lA<Li94?|q4EH>UcJy!89io&NvIKTgNMj@p;pK66z6Ux92tD0aIDh=c-A
Y017|>C;$bZ02F`%Pyh-*f&Z?+AGzM00RR91

literal 0
HcmV?d00001

diff --git a/tests/gnu_tar_ustar_long.tar b/tests/gnu_tar_ustar_long.tar
new file mode 100644
index 0000000000000000000000000000000000000000..a6a69adc7542ddd855ed3d828b0df5be9897daa3
GIT binary patch
literal 10240
zcmeIz$qIuo5P;#F`xL%GC!1-H-IpimRVW^N=yK@WH|jwPMJO0S_~(*n!X$j-plD;A
z&%<IVsb+JD5xiIRm^Y&sS3g(pVkqrguvWB~xNc0mi^{wjAzqJXagSFiZr69;adBH&
z_HCHv+G@So?)Hb{>6~s>6*zZIy~EG?hwu8^;I&qnj6s^J{>j>@ZPX!nE_KX?NJWkb
z3Z(1$HuK;8@4N|~{!jH&?tZ59e^-M!0tg_000IagfB*srAb<b@2q1s}0tg_000Iag
PfB*srAb<b@(-imsccrs{

literal 0
HcmV?d00001

diff --git a/tests/mac_tar_ustar_with_dir.tar b/tests/mac_tar_ustar_with_dir.tar
new file mode 100644
index 0000000000000000000000000000000000000000..e06ee6d1beed9dc47a69de502bebd6ee1e482a00
GIT binary patch
literal 5632
zcmeHJOA3Q95OwV-0`A+%{|dc9m!hb2Ar)#$>+PFF7llen!HC$IO)xVgllSsoG<9$K
zV-ht0KubvyH>z<2Cj7UZ@<FH&nsdpt)&wZyf+j>p?P^;5-kaiPTPv>B5Zcd{HBF#k
zI)^)?4HBr_RLXe%FLm2?`J=nF)xkW?!W~*Xlo<J*=bsACKjT_Upq78hwMqyq+{f&4
zf6o8OF;C8p`E(W^oZK}6qyEdG9@U>q%FCrK;EVn#Sj{{Cf78DL>;I_#_*X9rM;cCN
zm=6QAd3@0H<7;foJAC~AMR);z_kTs1ME_so-h}N7{eAEFp#QI-GTZ|ZKm-th?IG|2
Dmh<)v

literal 0
HcmV?d00001

diff --git a/tests/tar_content_subset_for_recreating_tar_files.tar b/tests/tar_content_subset_for_recreating_tar_files.tar
new file mode 100644
index 0000000000000000000000000000000000000000..729773d5801bbc258c67d5efdb9b3774e90de168
GIT binary patch
literal 15360
zcmeHO%W~T=4D{Jwp)ZI@03@YD4t=LR_1LSXaV9;alS$o~^zTb6&IoqoGEhiYcA$$P
zh%fCD)Pj<Y1tjM~-7IFM4r5Gku9~fX{ll*T>dvF9J1R>Q$Qnd%Dq{g0%qmxEK)l`G
zzgF#C99sMH@(=g#+OgQx)vlABZ|-A#lkuq7l}Vcs>qq{XnRmz$4<Y}E<dy%tEDX2C
zNFO$btBJgF{)tHAUxe}4!o#Ewdq?*nj(_J+<9{sNqI$zjC%v*h#Qb}Y8vi1U#}-a9
zoh*><S-$><02=>kU^A3R(km&jXFOv5!5Yx`--Bjew}f6vSw2cR|6r}gKZmfGE#Y)z
z*)H!F<^O<SHU5in&Kn-dsifKn{s|x)dH&xc#NnQt-4An7${fMJL%@^ap8-2L`e&s`
zXYG#Q-;+I^_-Fhxb0SlL@s!nE-rTMp_tT^P^YpOa|GfQw2;`&wGl#&a)c(H!%|qPg
zzCQY47PA&|{jdA~`}oT3fAjX;`wt&KU3~sh&PvCB*na;<CLo<m{1*$X9CP7Ycg%A%
zrEMG`0$<zvi)DMcykpWN?}VKt^&cA8j(;GFQUBi`0?$&<|3i#DLm$@r>b6~7pXVjL
zZZfvFm_uFNtbW`q&#iBeY6>--*#_&zH@bAaXclCosr!oPX?u%g{6~GT6aNTK_kUl+
z>vJoYQ+<8Qa{cEW7}bCAqr9i#l)=a6hU!0##_O{uq5lHytp5~02UY!dh}n2F`tSQb
z{fDM%x39UV4WW*o5Cf`mOiZF8511__sQNEK!GFEnum8PX%WHM{|3CNtE$-z1<ilG3
zA9em;KCnhBK|bN)>N&KsHr~bjdcM83HbB9g1;oZ#AME<-P*%RF%X?9KstDwb_2IV&
dn*VM7Vl$C7fzL(F|2P_ID*}puBJg?;_zMJzEF=H`

literal 0
HcmV?d00001