Skip to content

Commit

Permalink
Merge pull request #16 from phip1611/dev2
Browse files Browse the repository at this point in the history
various code improvements + run miri
  • Loading branch information
phip1611 authored May 3, 2024
2 parents 6044c50 + a0e58fc commit 46e33cc
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 162 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,29 @@ jobs:
- name: Run tests
run: cargo test --verbose --features alloc

miri:
runs-on: "${{ matrix.runs-on }}"
needs:
# Logical dependency and wait for cache to be present
- build
strategy:
matrix:
runs-on:
- ubuntu-latest
rust:
- nightly
steps:
- uses: actions/checkout@v2
- name: Setup Rust toolchain
uses: dtolnay/rust-toolchain@stable
with:
toolchain: "${{ matrix.rust }}"
- uses: Swatinem/rust-cache@v2
with:
key: "${{ matrix.runs-on }}-${{ matrix.rust }}"
- run: rustup component add miri
- run: cargo miri test --tests

style_checks:
runs-on: ubuntu-latest
strategy:
Expand Down
213 changes: 130 additions & 83 deletions src/archive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ SOFTWARE.
use crate::header::PosixHeader;
use crate::tar_format_types::TarFormatString;
use crate::{TypeFlag, BLOCKSIZE, POSIX_1003_MAX_FILENAME_LEN};
use crate::{BLOCKSIZE, POSIX_1003_MAX_FILENAME_LEN};
#[cfg(feature = "alloc")]
use alloc::boxed::Box;
use core::fmt::{Debug, Display, Formatter};
use core::str::Utf8Error;
use log::warn;
use log::{error, warn};

/// Describes an entry in an archive.
/// Currently only supports files but no directories.
Expand Down Expand Up @@ -124,9 +124,9 @@ impl TarArchive {

/// Iterates over all entries of the Tar archive.
/// Returns items of type [`ArchiveEntry`].
/// See also [`ArchiveIterator`].
pub fn entries(&self) -> ArchiveIterator {
ArchiveIterator::new(self.data.as_ref())
/// See also [`ArchiveEntryIterator`].
pub fn entries(&self) -> ArchiveEntryIterator {
ArchiveEntryIterator::new(self.data.as_ref())
}
}

Expand Down Expand Up @@ -164,116 +164,139 @@ impl<'a> TarArchiveRef<'a> {

/// Iterates over all entries of the Tar archive.
/// Returns items of type [`ArchiveEntry`].
/// See also [`ArchiveIterator`].
pub const fn entries(&self) -> ArchiveIterator {
ArchiveIterator::new(self.data)
/// See also [`ArchiveEntryIterator`].
pub fn entries(&self) -> ArchiveEntryIterator {
ArchiveEntryIterator::new(self.data)
}
}

/// Iterator over the files of the archive. Each iteration starts
/// at the next Tar header entry.
/// Iterates over the headers of the Tar archive.
#[derive(Debug)]
pub struct ArchiveIterator<'a> {
pub struct ArchiveHeaderIterator<'a> {
archive_data: &'a [u8],
block_index: usize,
}

impl<'a> ArchiveIterator<'a> {
pub const fn new(archive: &'a [u8]) -> Self {
impl<'a> ArchiveHeaderIterator<'a> {
pub fn new(archive: &'a [u8]) -> Self {
assert!(!archive.is_empty());
assert_eq!(archive.len() % BLOCKSIZE, 0);
Self {
archive_data: archive,
block_index: 0,
}
}

/// Returns a reference to the next Header.
fn next_hdr(&self, block_index: usize) -> &'a PosixHeader {
let hdr_ptr = &self.archive_data[block_index * BLOCKSIZE];
unsafe { (hdr_ptr as *const u8).cast::<PosixHeader>().as_ref() }.unwrap()
/// Parse the memory at the given block as [`PosixHeader`].
fn block_as_header(&self, block_index: usize) -> &'a PosixHeader {
unsafe {
self.archive_data
.as_ptr()
.add(block_index * BLOCKSIZE)
.cast::<PosixHeader>()
.as_ref()
.unwrap()
}
}
}

impl<'a> Iterator for ArchiveIterator<'a> {
type Item = ArchiveEntry<'a>;
type BlockIndex = usize;

impl<'a> Iterator for ArchiveHeaderIterator<'a> {
type Item = (BlockIndex, &'a PosixHeader);

/// Returns the next header. Internally, it updates the necessary data
/// structures to not read the same header multiple times.
///
/// This returns `None` if either no further headers are found or if a
/// header can't be parsed.
fn next(&mut self) -> Option<Self::Item> {
if self.block_index * BLOCKSIZE >= self.archive_data.len() {
warn!("Reached end of Tar archive data without finding zero/end blocks!");
return None;
}
// TODO better check for two end zero blocks here?
assert!(self.block_index < self.archive_data.len() / BLOCKSIZE);

let mut hdr = self.next_hdr(self.block_index);

loop {
// check if we found end of archive
if hdr.is_zero_block() {
let next_hdr = self.next_hdr(self.block_index + 1);
if next_hdr.is_zero_block() {
// gracefully terminated Archive
log::debug!("End of Tar archive with two zero blocks!");
} else {
log::warn!(
"Zero block found at end of Tar archive, but only one instead of two!"
);
}
// end of archive
return None;
}
let hdr = self.block_as_header(self.block_index);
let block_index = self.block_index;

// Ignore directory entries, i.e. yield only regular files. Works as
// filenames in tarballs are fully specified, e.g. dirA/dirB/file1
if hdr.typeflag != TypeFlag::DIRTYPE {
break;
}
// Start at next block on next iteration.
self.block_index += 1;
log::info!("{:#?}, {:#?}", hdr.name, hdr.typeflag);

// in next iteration: start at next Archive entry header
// +1 for current hdr block itself + all data blocks
let data_block_count: usize = hdr.payload_block_count().unwrap();
self.block_index += data_block_count + 1;
hdr = self.next_hdr(self.block_index);
let block_count = hdr
.payload_block_count()
.inspect_err(|e| {
log::error!("Unparsable size ({e:?}) in header {hdr:#?}");
})
.ok()?;

if !hdr.is_zero_block() {
self.block_index += block_count;
}

if hdr.typeflag != TypeFlag::AREGTYPE && hdr.typeflag != TypeFlag::REGTYPE {
log::warn!(
"Found entry of type={:?}, but only files are supported",
Some((block_index, hdr))
}
}

impl<'a> ExactSizeIterator for ArchiveEntryIterator<'a> {}

/// Iterator over the files of the archive.
#[derive(Debug)]
pub struct ArchiveEntryIterator<'a>(ArchiveHeaderIterator<'a>);

impl<'a> ArchiveEntryIterator<'a> {
pub fn new(archive: &'a [u8]) -> Self {
Self(ArchiveHeaderIterator::new(archive))
}

fn next_hdr(&mut self) -> Option<(BlockIndex, &'a PosixHeader)> {
self.0.next()
}
}

impl<'a> Iterator for ArchiveEntryIterator<'a> {
type Item = ArchiveEntry<'a>;

fn next(&mut self) -> Option<Self::Item> {
let (mut block_index, mut hdr) = self.next_hdr()?;

// Ignore directory entries, i.e. yield only regular files. Works as
// filenames in tarballs are fully specified, e.g. dirA/dirB/file1
while !hdr.typeflag.is_regular_file() {
warn!(
"Skipping entry of type {:?} (not supported yet)",
hdr.typeflag
);
return None;
}

if hdr.name.is_empty() {
warn!("Found empty file name",);
// Update properties.
(block_index, hdr) = self.next_hdr()?;
}

let hdr_size = hdr.size.as_number::<usize>();
if let Err(e) = hdr_size {
warn!("Can't parse the file size from the header block. Stop iterating Tar archive. {e:#?}");
return None;
// check if we found end of archive (two zero blocks)
if hdr.is_zero_block() {
if self.next_hdr()?.1.is_zero_block() {
// found end
return None;
} else {
panic!("Never expected to have a situation where self.next_hdr() returns a zero block and the next one is not a zero block, as we should never point to an 'end zero block of a regular file'");
}
}
let hdr_size = hdr_size.unwrap();

// Fetch data of file from next block(s).
// .unwrap() is fine as we checked that hdr.size().val() is valid
// above
let data_block_count = hdr.payload_block_count().unwrap();

// +1: skip hdr block itself and start at data!
// i_begin is the byte begin index of this file in the array of the whole archive
let i_begin = (self.block_index + 1) * BLOCKSIZE;
// i_end is the exclusive byte end index of the data of the current file
let i_end = i_begin + data_block_count * BLOCKSIZE;
let file_block_bytes = &self.archive_data[i_begin..i_end];
// Each block is 512 bytes long, but the file size is not necessarily a
// multiple of 512.
let file_bytes = &file_block_bytes[0..hdr_size];

// in next iteration: start at next Archive entry header
// +1 for current hdr block itself + all data blocks
self.block_index += data_block_count + 1;

let payload_size: usize = hdr
.size
.as_number()
.inspect_err(|e| error!("Can't parse the file size from the header. {e:#?}"))
.ok()?;

let idx_first_data_block = block_index + 1;
let idx_begin = idx_first_data_block * BLOCKSIZE;
let idx_end_exclusive = idx_begin + payload_size;
let file_bytes = &self.0.archive_data[idx_begin..idx_end_exclusive];

let mut filename: TarFormatString<256> =
TarFormatString::<POSIX_1003_MAX_FILENAME_LEN>::new([0; POSIX_1003_MAX_FILENAME_LEN]);
if hdr.magic.as_str() == "ustar" && hdr.version.as_str() == "00" && !hdr.prefix.is_empty() {
if hdr.magic.as_str().unwrap() == "ustar"
&& hdr.version.as_str().unwrap() == "00"
&& !hdr.prefix.is_empty()
{
filename.append(&hdr.prefix);
filename.append(&TarFormatString::<1>::new([b'/']));
}
Expand Down Expand Up @@ -302,6 +325,30 @@ mod tests {
};
}

#[test]
fn test_header_iterator() {
let archive = include_bytes!("../tests/gnu_tar_default.tar");
let iter = ArchiveHeaderIterator::new(archive);
let names = iter
.map(|(_i, hdr)| hdr.name.as_str().unwrap())
.collect::<Vec<_>>();
assert_eq!(
names.as_slice(),
&[
"bye_world_513b.txt",
"hello_world_513b.txt",
"hello_world.txt",
]
)

/*for hdr in iter {
dbg!(hdr);
}*/

// TODO make PartialEq
//assert_eq!(ArchiveHeaderIterator::new(archive).collect::<Vec<_>>().as_slice(), &[]);
}

#[test]
fn test_archive_list() {
let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar")).unwrap();
Expand Down Expand Up @@ -411,7 +458,7 @@ mod tests {

/// Test that the entry's contents match the expected content.
fn assert_entry_content(entry: &ArchiveEntry, filename: &str, size: usize) {
assert_eq!(entry.filename().as_str(), filename);
assert_eq!(entry.filename().as_str(), Ok(filename));
assert_eq!(entry.size(), size);
assert_eq!(entry.data().len(), size);
}
Expand Down
Loading

0 comments on commit 46e33cc

Please sign in to comment.