From 1e84b7d8fb26f37f5b2c44a3a0d9d79dbcf8c4f1 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 15:35:58 +0200 Subject: [PATCH 01/18] remove unnecessary log --- src/archive.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index f57f771..d8b4da6 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -212,14 +212,14 @@ impl<'a> Iterator for ArchiveHeaderIterator<'a> { /// header can't be parsed. fn next(&mut self) -> Option { // TODO better check for two end zero blocks here? - assert!(self.block_index < self.archive_data.len() / BLOCKSIZE); + let total_block_count = self.archive_data.len() / BLOCKSIZE; + assert!(self.block_index < total_block_count); let hdr = self.block_as_header(self.block_index); let block_index = self.block_index; // Start at next block on next iteration. self.block_index += 1; - log::info!("{:#?}, {:#?}", hdr.name, hdr.typeflag); let block_count = hdr .payload_block_count() @@ -355,6 +355,7 @@ mod tests { let entries = archive.entries().collect::>(); println!("{:#?}", entries); } + /// Tests to read the entries from existing archives in various Tar flavors. #[test] fn test_archive_entries() { From e5ac1ab687696d8d319daf54769265aa491975e0 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 16:21:42 +0200 Subject: [PATCH 02/18] replace magic constants --- src/tar_format_types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tar_format_types.rs b/src/tar_format_types.rs index 53d51c2..1b24260 100644 --- a/src/tar_format_types.rs +++ b/src/tar_format_types.rs @@ -102,7 +102,7 @@ impl TarFormatNumber { where T: num_traits::Num, { - memchr::memchr2(32, 0, &self.0.bytes).map_or_else( + memchr::memchr2(b' ', b'\0', &self.0.bytes).map_or_else( || T::from_str_radix(self.0.as_str().expect("Should be valid Tar archive"), R), |idx| { T::from_str_radix( From 0962d518d00fc322c06f18384de7ec7a607c287d Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 16:36:16 +0200 Subject: [PATCH 03/18] examples: use coloring always --- examples/alloc_feature.rs | 1 + examples/minimal.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/examples/alloc_feature.rs b/examples/alloc_feature.rs index a631c3e..599e649 100644 --- a/examples/alloc_feature.rs +++ b/examples/alloc_feature.rs @@ -27,6 +27,7 @@ use tar_no_std::TarArchive; fn main() { // log: not mandatory std::env::set_var("RUST_LOG", "trace"); + std::env::set_var("RUST_LOG_STYLE", "always"); env_logger::init(); // also works in no_std environment (except the println!, of course) diff --git a/examples/minimal.rs b/examples/minimal.rs index e0140d5..a8ced6b 100644 --- a/examples/minimal.rs +++ b/examples/minimal.rs @@ -26,6 +26,7 @@ use tar_no_std::TarArchiveRef; fn main() { // log: not mandatory std::env::set_var("RUST_LOG", "trace"); + std::env::set_var("RUST_LOG_STYLE", "always"); env_logger::init(); // also works in no_std environment (except the println!, of course) From f9dd04c11f317cc9ea7967a35b73b01e8af90fdf Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 16:47:16 +0200 Subject: [PATCH 04/18] ArchiveHeaderIterator: more resilient --- src/archive.rs | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index d8b4da6..129486e 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -174,7 +174,7 @@ impl<'a> TarArchiveRef<'a> { #[derive(Debug)] pub struct ArchiveHeaderIterator<'a> { archive_data: &'a [u8], - block_index: usize, + next_hdr_block_index: usize, } impl<'a> ArchiveHeaderIterator<'a> { @@ -183,7 +183,7 @@ impl<'a> ArchiveHeaderIterator<'a> { assert_eq!(archive.len() % BLOCKSIZE, 0); Self { archive_data: archive, - block_index: 0, + next_hdr_block_index: 0, } } @@ -213,23 +213,26 @@ impl<'a> Iterator for ArchiveHeaderIterator<'a> { fn next(&mut self) -> Option { // TODO better check for two end zero blocks here? let total_block_count = self.archive_data.len() / BLOCKSIZE; - assert!(self.block_index < total_block_count); + assert!(self.next_hdr_block_index < total_block_count); - let hdr = self.block_as_header(self.block_index); - let block_index = self.block_index; + let hdr = self.block_as_header(self.next_hdr_block_index); + let block_index = self.next_hdr_block_index; // Start at next block on next iteration. - self.block_index += 1; - - let block_count = hdr - .payload_block_count() - .inspect_err(|e| { - log::error!("Unparsable size ({e:?}) in header {hdr:#?}"); - }) - .ok()?; - - if !hdr.is_zero_block() { - self.block_index += block_count; + self.next_hdr_block_index += 1; + + // We only update the block index for types that have a payload. + // In directory entries, for example, the size field has other + // semantics. See spec. + if hdr.typeflag.is_regular_file() { + let payload_block_count = hdr + .payload_block_count() + .inspect_err(|e| { + log::error!("Unparsable size ({e:?}) in header {hdr:#?}"); + }) + .ok()?; + + self.next_hdr_block_index += payload_block_count; } Some((block_index, hdr)) From 68135cfa4f2f5fd5d4f90541a9f94733f51ab095 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 16:55:36 +0200 Subject: [PATCH 05/18] ArchiveHeaderIterator: more robust --- src/archive.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index 129486e..e6f390a 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -211,9 +211,11 @@ impl<'a> Iterator for ArchiveHeaderIterator<'a> { /// This returns `None` if either no further headers are found or if a /// header can't be parsed. fn next(&mut self) -> Option { - // TODO better check for two end zero blocks here? let total_block_count = self.archive_data.len() / BLOCKSIZE; - assert!(self.next_hdr_block_index < total_block_count); + if self.next_hdr_block_index >= total_block_count { + warn!("Invalid block index. Probably the Tar is corrupt: an header had an invalid payload size"); + return None; + } let hdr = self.block_as_header(self.next_hdr_block_index); let block_index = self.next_hdr_block_index; From 65c60fc3909d33ac000d8d306b7c0fd089f55ffb Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 16:56:30 +0200 Subject: [PATCH 06/18] ArchiveEntryIterator: more robust --- src/archive.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/archive.rs b/src/archive.rs index e6f390a..4e0c609 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -294,6 +294,13 @@ impl<'a> Iterator for ArchiveEntryIterator<'a> { let idx_first_data_block = block_index + 1; let idx_begin = idx_first_data_block * BLOCKSIZE; let idx_end_exclusive = idx_begin + payload_size; + + let max_data_end_index_exclusive = self.0.archive_data.len() - 2 * BLOCKSIZE; + if idx_end_exclusive > max_data_end_index_exclusive { + warn!("Invalid Tar. The size of the payload ({payload_size}) is larger than what is valid"); + return None; + } + let file_bytes = &self.0.archive_data[idx_begin..idx_end_exclusive]; let mut filename: TarFormatString<256> = From 8088c1db80abbfdac35c959e4569f9cdab633629 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 17:03:14 +0200 Subject: [PATCH 07/18] TarFormatNumber: don't panic any longer It would be nicer to properly propagate the error to the outside. But with the current code structure, I couldn't find a nice way. Good enough, for now. --- src/tar_format_types.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tar_format_types.rs b/src/tar_format_types.rs index 1b24260..fd34cc4 100644 --- a/src/tar_format_types.rs +++ b/src/tar_format_types.rs @@ -103,10 +103,10 @@ impl TarFormatNumber { T: num_traits::Num, { memchr::memchr2(b' ', b'\0', &self.0.bytes).map_or_else( - || T::from_str_radix(self.0.as_str().expect("Should be valid Tar archive"), R), + || T::from_str_radix(self.0.as_str().unwrap_or("0"), R), |idx| { T::from_str_radix( - from_utf8(&self.0.bytes[..idx]).expect("byte array is not UTF-8"), + from_utf8(&self.0.bytes[..idx]).unwrap_or("0"), 8, ) }, From 3d381bce276246c43dc8d2777222b9a99409b0cd Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 17:05:29 +0200 Subject: [PATCH 08/18] ArchiveEntryIterator: code improvement, less panics --- src/archive.rs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index 4e0c609..5c5c95a 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -305,12 +305,19 @@ impl<'a> Iterator for ArchiveEntryIterator<'a> { let mut filename: TarFormatString<256> = TarFormatString::::new([0; POSIX_1003_MAX_FILENAME_LEN]); - if hdr.magic.as_str().unwrap() == "ustar" - && hdr.version.as_str().unwrap() == "00" - && !hdr.prefix.is_empty() - { - filename.append(&hdr.prefix); - filename.append(&TarFormatString::<1>::new([b'/'])); + + // POXIS_1003 long filename check + // https://docs.scinet.utoronto.ca/index.php/(POSIX_1003.1_USTAR) + match ( + hdr.magic.as_str(), + hdr.version.as_str(), + hdr.prefix.is_empty(), + ) { + (Ok("ustar"), Ok("00"), false) => { + filename.append(&hdr.prefix); + filename.append(&TarFormatString::<1>::new([b'/'])); + } + _ => (), } filename.append(&hdr.name); Some(ArchiveEntry::new(filename, file_bytes)) From 698a1e08c14cf263ed6d4923569ff4689173904e Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 17:16:21 +0200 Subject: [PATCH 09/18] tests: add weird_fuzzing_tarballs.tar --- src/archive.rs | 30 ++++++++++++++++++++++++++++++ src/tar_format_types.rs | 4 ++-- tests/weird_fuzzing_tarballs.tar | Bin 0 -> 153600 bytes 3 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 tests/weird_fuzzing_tarballs.tar diff --git a/src/archive.rs b/src/archive.rs index 5c5c95a..6431bb7 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -375,6 +375,36 @@ mod tests { println!("{:#?}", entries); } + /// Tests various weird (= invalid, corrupt) tarballs that are bundled + /// within this file. The tarball(s) originate from a fuzzing process from a + /// GitHub contributor [0]. + /// + /// The test succeeds if no panics occur. + /// + /// [0] https://github.com/phip1611/tar-no-std/issues/12#issuecomment-2092632090 + #[test] + fn test_weird_fuzzing_tarballs() { + /*std::env::set_var("RUST_LOG", "trace"); + std::env::set_var("RUST_LOG_STYLE", "always"); + env_logger::init();*/ + + let main_tarball = + TarArchiveRef::new(include_bytes!("../tests/weird_fuzzing_tarballs.tar")).unwrap(); + + let mut all_entries = vec![]; + for tarball in main_tarball.entries() { + let tarball = TarArchiveRef::new(tarball.data()).unwrap(); + for entry in tarball.entries() { + all_entries.push(entry.filename()); + } + } + + // Test succeeds if this works without a panic. + for entry in all_entries { + eprintln!("\"{entry:?}\","); + } + } + /// Tests to read the entries from existing archives in various Tar flavors. #[test] fn test_archive_entries() { diff --git a/src/tar_format_types.rs b/src/tar_format_types.rs index fd34cc4..5e1030a 100644 --- a/src/tar_format_types.rs +++ b/src/tar_format_types.rs @@ -74,8 +74,8 @@ impl Debug for TarFormatString { let sub_array = &self.bytes[0..self.size()]; write!( f, - "str='{}',byte_usage={}/{}", - from_utf8(sub_array).unwrap(), + "str='{:?}',byte_usage={}/{}", + from_utf8(sub_array), self.size(), N ) diff --git a/tests/weird_fuzzing_tarballs.tar b/tests/weird_fuzzing_tarballs.tar new file mode 100644 index 0000000000000000000000000000000000000000..1d5b9aafc0417f802557bc2425ff07b693444c96 GIT binary patch literal 153600 zcmeHQ&2Hs5cD~Le2&RK%0%SH>#w`IdunQ`x_0g=SCe3{2 zM16-x;zu4Hy?guk#dc5}x6Y40-46Jz^RwsEob1-c@#~{)p4>Y6^62^bb`??oE`uA@Z$L7&8^du^X*`yJcM)}1S9$h)1>$f!Z;RxizB6fqcG-S5;D$WDrYRA zYU38M%4=#DzCFJ2L8zxD{lKM^O+4jOsB)ts0FJACMOMr=}ggdGVF%delu`zEo8hX4%f}WN-iA&fRr9s`>S~t)IS3Mw}IktWw^6?DcRWmvHtJWDQ~#F z6)gt>KmZ5;0b2-2{vYY^|I{1*_v-(L_5anZ|4*I&>3{hD+cIcFIdYWlE@eFahyTCU z_}@^~wbjA@Yhk%DneMD!N;bBC@P94)*IDbY^DMeCof|g_1b_e#00NB&Nd6x;^8dyD z|1|OD|DQ0d|2HblVuA1VfB64fRGtMWYLUP@lJ z|6-K@$a0B*d16Js?neDmvhnyI{J%2(ZyMLz=rUOUw}IktWk@S^ zmkKTozyC3G{{O$!A9}o4ZF2p^v*Wu(DjsfcMaz34@O97gaXJtH0vm*YG} zhO|;u>gwup)P0`N*#5!)wd`L$3CvQ%?X76}>JYf1V|7=@mF*$`RocaK4W<1+r%jUo zhmHI{o5%kPgPf<;^?zsn|KR^R>g!#{7yqwqs2CVdk5L%MX%q!vz>-nG7|kb)#yYVj zYUP?_zD4{$>+AmqSpT=R{->H)GwJ3{b;19ekhvN8D%M~2;ptOPJmPsAQ;z*sd_(tP@tp96`|7~;`AN>E1Z8Q`zoFJfU!+q*$*#A{!{ulgT%l^$){pH>P zCmjhwfdCKy0-KD$Uv1vaN&X)-^Z$kae;6i9{6EuxqpZ{NuR1E)bRbD?DtY5fl*M{Pqo;%2nibf9} zfxjI59jx6Ad}G}MqT4P0Nw?E*)~XSZ{6B8w|9QAr|7THFUH?xtZlDwYXW;)D(DtqZ z{yzZzPs6#(-kPo#pgV@|Q_pz(kM(~QJ6M=(y++M)eXReRwFtF+BA~MC$E6sas(Nkv z1QhoFH9gv>cC*bYv;F@G_`lSWyOf842;%>_Q(te%#kl@*REgK$XRKd$u44Qzis)Dx z|NG?sx(L!V(aV>*m8qTw!!l@1V^#3~Mrb+~{BY^K8GT#IU6()W?A3OjuyCXOKj8mb z^M76E4%Vgb4ZIz?k%I<)n`k}|00JvTV0lPku`!bW^Je~^F4q6!B&x3eJL3NlZ5P1) ze~Wr<66JxW44?lKNF!7J*97>Q<=Y^W3H+bBgqyE^c4ht2cVM`^Rj1`w|DjV0b$|d600P59;P+Oy zd`SMEHu3*BkpBNXSc?D8Gyp8#|AzbjIrzT@vi+-gpee)W{{m)H{?}hMUL>eT?siIz z$N!%B|2Jn{_y1ph-KZoFhShj&g97k>1F$x*svQegOEI?pP2m3??7!+!7D~ z0zd!=7$P9~f7Z3{$}WyQ#*@S?ZCyol@iRzi0k` z*qQ$;RMge5{wY;N?viVGBBi*xb`ki$HdGr~(T;_yr5M}4NB;la1x@U(ll_xjcjfif zr$e)pjqJ!nxT)6fmkr@2&_*Bt1U3Z$$^Y|a{xA3cr%_z>|98g!vHtI+i*=I5p`M-f-Sy2B7{NDzO!EHt zEJ79Ue^dYe6#M^u^iB`Mln(r#f&aTy{x_#twQ+se&_NUkiA9-_=x|D;yC|KBzq4LUIO z|7;MmZrOdt^~!UX%>T>x(<>yAG5;@L*?HR4_|JL6?OdSisQ(}C>hBXU9{+>?ga6y< z-jLFQKv(;}dZQ8+`=p|29w@ zt_(XCu9jkK|Ni+uDR6lEf7Gwt=F;KzR?| zOyJ4s_N&9sPoCd8J38HV75@+X-%sCPJ^#PDy1EQTVH~GX6odhz_)y^gew0sFQIFj1 zlp2r!vHp+s|LYcT_1o)tE_1Ga?^61j$hKqQYAMF{@1Oq%LyZ5?%KcLc!c5{x|r)WBxC#{wCW4CvvHuYNkyh!kAXe<>-;ZKTZgPcQu#`nQ37n~aju>yvE)G|J8OAF&AbuYqj; zD)maQ&+U3ObN$B=eG~-G|E_==oL8^u)?9-tjEb+kn7#j#n8E&yFz#BpRnx5%+OKSG z|2zp{{}R|MnY5j1Rj)~fhQ^ueKjkzJjOTv?o;I^h)N4|=+(u*lvmgy%|78fBrA>6& zv99_1pE2q(uzzPtF9|N`w3KT%X>R}I{zYH?(TOP3yQQw8zLbD*bZi>7CG_mXZFsSoKRn;q?ps>*~MM zK@9bC^!oJTE5mb4s|)?BSA7<%ei1A5Pm90YXVfa?|1#P3jII-i-M4q+rCI|=vTi@blR~l{680+X#El;ojPlrx&6ccGt0|ktdf>w(u%&M zxYh6fhW}@~((8Lg_pY#Z`iK8#KR#yI72y9#1G_W$e;RfhR0je;00;m9AOHk_01y~F z0@D98TlW8q_%!fm$dY;F&nU|%mm+^YKl$wB?D*G*7sn@Wiuj+Q0tqn_L^0mPj9{21 z#cx30dnjfIDKDjeOYuJ=IsyKl3P`OLQ4HoRN#i_?D3xZhprK*Fb51^Z6p}SaQp(L@ z+M1iBBAHUuX8KeIONuk{JZBk?=-iyb_{ABgGvYYQ5}s3Doaa%RM?VhfU~#-9epQhG zRp0;Ea&>(A%hRI^YTC<_mlvvQoShtf@?2G6m}Qg6!T#>ZWce|9wce|AN+Y9Li9 zm9nWPz}9>xXtiePuP;ttd~#U&yDcdUf#nHHgmGj%&yqL|14=|9lTwV4*-@b5AdflE zXbUWzQCM9hj4k7tu&k+Je)jRbKer$WB8G^-{$y|e zVCTV8vhbb#orB#+kDol+-yy@@zxUqHpO8`S?(IJP#V8{700;nq8U!T&&#&?SsQCUz9L=Ty;%vSFH0CT_-T8buZM~oF?~#V*jUgt=SW(-nnU`^m<#x^FWvXUq9P&j$-^@xss_TsIEEns8?xU z$3LDH$2%R*#%XJgKou|e9*+U}rd6kk_wd!%?O)_k z&BHsoKeysi{ChBf+WDPYR=i<;-)J#3EICY@o& z!qrmh?0>=kqhOi;^I%H+S-{!s%bzrd9HwIZ|JC8=C(nz8e^ub8nSzSz8m<3_QG)M( zdga7v#W${G-(I7ttg+QMD7_^?z#}K_Q4- z2sy=F%Y+XS7qNcrk{qn7<@)y5)@yREu3d1Us14gG2Kqvt0Soryq+KXQr_D_GsGl7n=@cSh{ zR_biQnA*e=Hn1;%&F*8inPDt@-g>7>`Y$Ezwwa(Y)PEF-FMti|3ve5yf4g15T>rGG z1@>>(2pgo8G~JqP5Yd`F*Z;2e^FK)<*ncaoHY&6#y}p9crRVw=zk2U~+Q|a@H^R7U z<$Cp+rEq3ijU)tc4mTnr^Mz&>my`)7CZy`)@$gZg#6qJ60&S(p>*63}OFe z2%V)h)%!2x$Wb4BvOTxp{YLzsMPlLy{Lh)zrC?3{3rC)IP1g(Spnux7JhU&s(?gDq z`J@*8E@s^&*lU=Q&Fre)RtX*SFE+^!-2Q7}S-%cY+f2J%p@aS@q`~Ice?6LO(iL3k zzZ_YZo{O}kSv(r`_&-I&@w@Mry6EAX7hkD#(zfrfc>LEs|LgT%%2i#~b(z$^vy|}r zHJkrunE!XJ_qlxOo#!sqZvSa@{VxvEDE7JT@6`UC8P;5feSYEh+XubjpZnqAmv@g2 zUmTs4-bZ#j?5K787X_?p{}k^kqVfNiKH#*9bWupB`Sk`iWhom$v;Z zdmSg0O+h-Px8Gse`1Qr{>!Y896gQFL&rpEUcf(u+Gs_XOpjR(&ulC!m_D`m>Wd9L) zpbeb=_tJ{LliiK_q-68)A4NCx>i0R!@t@AKv+ikI?p+xFmC?9)>9rXDy&Eq*|6>sr Y9^*d`-Eltt^KdC#0tf(sHAUe60rl(K`Tzg` literal 0 HcmV?d00001 From ead744f1c6780ad9c150f6db823a6bafef718662 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 17:29:05 +0200 Subject: [PATCH 10/18] split test_archive_with_dir_entries into two test cases This makes debugging easier --- src/archive.rs | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index 6431bb7..469328e 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -468,27 +468,25 @@ mod tests { } #[test] - fn test_archive_with_dir_entries() { + fn test_default_archive_with_dir_entries() { // tarball created with: // $ gtar -cf tests/gnu_tar_default_with_dir.tar --exclude '*.tar' --exclude '012345678*' tests - { - let archive = - TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default_with_dir.tar")) - .unwrap(); - let entries = archive.entries().collect::>(); + let archive = + TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default_with_dir.tar")).unwrap(); + let entries = archive.entries().collect::>(); - assert_archive_with_dir_content(&entries); - } + assert_archive_with_dir_content(&entries); + } + #[test] + fn test_ustar_archive_with_dir_entries() { // tarball created with: // $(osx) tar -cf tests/mac_tar_ustar_with_dir.tar --format=ustar --exclude '*.tar' --exclude '012345678*' tests - { - let archive = - TarArchiveRef::new(include_bytes!("../tests/mac_tar_ustar_with_dir.tar")).unwrap(); - let entries = archive.entries().collect::>(); + let archive = + TarArchiveRef::new(include_bytes!("../tests/mac_tar_ustar_with_dir.tar")).unwrap(); + let entries = archive.entries().collect::>(); - assert_archive_with_dir_content(&entries); - } + assert_archive_with_dir_content(&entries); } /// Like [`test_archive_entries`] but with additional `alloc` functionality. From 1d06956afe248adc887782726a9fd0baa549ed64 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 17:31:17 +0200 Subject: [PATCH 11/18] test_header_iterator: remove outdated comment --- src/archive.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index 469328e..1339803 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -351,6 +351,7 @@ mod tests { let names = iter .map(|(_i, hdr)| hdr.name.as_str().unwrap()) .collect::>(); + assert_eq!( names.as_slice(), &[ @@ -359,13 +360,6 @@ mod tests { "hello_world.txt", ] ) - - /*for hdr in iter { - dbg!(hdr); - }*/ - - // TODO make PartialEq - //assert_eq!(ArchiveHeaderIterator::new(archive).collect::>().as_slice(), &[]); } #[test] From 51b68c6bf12f0d26da9bc356c6cc8a802314df3a Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 18:32:33 +0200 Subject: [PATCH 12/18] tar_format_types: better doc, better code, better tests The main motivation here is to cope with ustar-style Tar archives that put a space into the string describing certain numbers. A tar archive that fails without the new `as_str_until_first_space()` helper --- src/tar_format_types.rs | 88 ++++++++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 28 deletions(-) diff --git a/src/tar_format_types.rs b/src/tar_format_types.rs index 5e1030a..f45f894 100644 --- a/src/tar_format_types.rs +++ b/src/tar_format_types.rs @@ -7,14 +7,12 @@ use core::str::{from_utf8, Utf8Error}; use num_traits::Num; /// Base type for strings embedded in a Tar header. The length depends on the -/// context. The returned string +/// context. The returned string is likely to be UTF-8/ASCII, which is verified +/// by getters, such as [`TarFormatString::as_str`]. /// /// An optionally null terminated string. The contents are either: /// 1. A fully populated string with no null termination or /// 2. A partially populated string where the unused bytes are zero. -/// -/// The content is likely to be UTF-8/ASCII, but that is not verified by this -/// type. The #[derive(Copy, Clone, PartialEq, Eq)] #[repr(C)] pub struct TarFormatString { @@ -37,8 +35,8 @@ impl TarFormatString { self.bytes[0] == 0 } - /// Returns the length of the bytes. This is either the full capacity `N` - /// or the data until the first NULL byte. + /// Returns the length of the payload in bytes. This is either the full + /// capacity `N` or the data until the first NULL byte. pub fn size(&self) -> usize { memchr::memchr(0, &self.bytes).unwrap_or(N) } @@ -50,6 +48,17 @@ impl TarFormatString { from_utf8(&self.bytes[0..self.size()]) } + /// Wrapper around [`Self::as_str`] that stops as soon as the first space + /// is found. This is necessary to properly parse certain Tar-style encoded + /// numbers. Some ustar implementations pad spaces which prevents the proper + /// parsing as number. + pub fn as_str_until_first_space(&self) -> Result<&str, Utf8Error> { + from_utf8(&self.bytes[0..self.size()]).map(|str| { + let end_index_exclusive = str.find(' ').unwrap_or(str.len()); + &str[0..end_index_exclusive] + }) + } + /// Append to end of string. Panics if there is not enough capacity. pub fn append(&mut self, other: &TarFormatString) { let resulting_length = self.size() + other.size(); @@ -98,24 +107,22 @@ pub struct TarFormatOctal(TarFormatNumber); pub struct TarFormatDecimal(TarFormatNumber); impl TarFormatNumber { + #[cfg(test)] + fn new(bytes: [u8; N]) -> Self { + Self(TarFormatString:: { bytes }) + } + pub fn as_number(&self) -> core::result::Result where T: num_traits::Num, { - memchr::memchr2(b' ', b'\0', &self.0.bytes).map_or_else( - || T::from_str_radix(self.0.as_str().unwrap_or("0"), R), - |idx| { - T::from_str_radix( - from_utf8(&self.0.bytes[..idx]).unwrap_or("0"), - 8, - ) - }, - ) + let str = self.0.as_str_until_first_space().unwrap_or("0"); + T::from_str_radix(str, R) } - /// Returns the raw string describing this type. - pub fn as_raw_str(&self) -> core::result::Result<&str, Utf8Error> { - self.0.as_str() + /// Returns the underlying [`TarFormatString`]. + pub fn as_inner(&self) -> &TarFormatString { + &self.0 } } @@ -149,8 +156,9 @@ impl TarFormatDecimal { self.0.as_number::() } - pub fn as_raw_str(&self) -> core::result::Result<&str, Utf8Error> { - self.0.as_raw_str() + /// Returns the underlying [`TarFormatString`]. + pub fn as_inner(&self) -> &TarFormatString { + self.0.as_inner() } } @@ -162,12 +170,14 @@ impl TarFormatOctal { self.0.as_number::() } - pub fn as_raw_str(&self) -> core::result::Result<&str, Utf8Error> { - self.0.as_raw_str() + /// Returns the underlying [`TarFormatString`]. + pub fn as_inner(&self) -> &TarFormatString { + self.0.as_inner() } } -mod tests { +#[cfg(test)] +mod tar_format_string_tests { use super::TarFormatString; use core::mem::size_of_val; @@ -183,7 +193,7 @@ mod tests { #[test] fn test_one_byte_string() { - let s = TarFormatString::new([65]); + let s = TarFormatString::new([b'A']); assert_eq!(size_of_val(&s), 1); assert!(!s.is_empty()); assert_eq!(s.size(), 1); @@ -192,13 +202,23 @@ mod tests { #[test] fn test_two_byte_string_nul_terminated() { - let s = TarFormatString::new([65, 0]); - assert_eq!(size_of_val(&s), 2); + let s = TarFormatString::new([b'A', 0, b'B']); + assert_eq!(size_of_val(&s), 3); assert!(!s.is_empty()); assert_eq!(s.size(), 1); assert_eq!(s.as_str(), Ok("A")); } + #[test] + fn test_str_until_first_space() { + let s = TarFormatString::new([b'A', b'B', b' ', b'X', 0]); + assert_eq!(size_of_val(&s), 5); + assert!(!s.is_empty()); + assert_eq!(s.size(), 4); + assert_eq!(s.as_str(), Ok("AB X")); + assert_eq!(s.as_str_until_first_space(), Ok("AB")); + } + #[test] #[allow(clippy::cognitive_complexity)] fn test_append() { @@ -213,14 +233,14 @@ mod tests { assert_eq!(s.as_str(), Ok("")); // When adding ABC - s.append(&TarFormatString::new([65, 66, 67])); + s.append(&TarFormatString::new([b'A', b'B', b'C'])); // Then the string contains the additional 3 chars assert_eq!(size_of_val(&s), 20); assert!(!s.is_empty()); assert_eq!(s.size(), 3); assert_eq!(s.as_str(), Ok("ABC")); - s.append(&TarFormatString::new([68, 69, 70])); + s.append(&TarFormatString::new([b'D', b'E', b'F'])); // Then the string contains the additional 3 chars assert_eq!(size_of_val(&s), 20); assert!(!s.is_empty()); @@ -249,3 +269,15 @@ mod tests { assert_eq!(s.as_str(), Ok("ABCDEFAAAAAAAAAAAAAZ")); } } + +#[cfg(test)] +mod tar_format_number_tests { + use crate::{TarFormatDecimal, TarFormatNumber, TarFormatString}; + + #[test] + fn test_as_number_with_space_in_string() { + let str = [b'0', b'1', b'0', b' ', 0]; + let str = TarFormatNumber::<5, 10>::new(str); + assert_eq!(str.as_number::(), Ok(10)); + } +} From de553f5cc775a08a6998fb7b3fe67d35e37491aa Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 19:16:23 +0200 Subject: [PATCH 13/18] doc: various improvements and cleanups --- README.md | 65 +++++++++++++++++++++--------------- src/archive.rs | 17 +++++----- src/header.rs | 1 + src/lib.rs | 90 +++++++++++++++++++++++++++++++++++--------------- 4 files changed, 112 insertions(+), 61 deletions(-) diff --git a/README.md b/README.md index eb75e5f..753222a 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,35 @@ # `tar-no-std` - Parse Tar Archives (Tarballs) -_Due to historical reasons, there are several formats of tar archives. All of them are based on the same principles, -but have some subtle differences that often make them incompatible with each other._ [0] +_Due to historical reasons, there are several formats of Tar archives. All of +them are based on the same principles, but have some subtle differences that +often make them incompatible with each other._ [(reference)](https://www.gnu.org/software/tar/manual/html_section/Formats.html) -Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations. If you have a standard -environment and need full feature support, I recommend the use of instead. +Library to read Tar archives in `no_std` environments with zero allocations. If +you have a standard environment and need full feature support, I recommend the +use of instead. ## Limitations -The crate is simple and only supports reading of "basic" archives, therefore no extensions, such -as GNU Longname. The maximum supported file name length is 256 characters excluding the NULL-byte (using the tar name/prefix longname implementation). The maximum supported file size is 8GiB. Directories are supported, but only regular fields are yielded in iteration. +This crate is simple and focuses on reading files and their content from a Tar +archive. Historic basic Tar and ustar [formats](https://www.gnu.org/software/tar/manual/html_section/Formats.html) +are supported. Other formats may work, but likely without all supported +features. GNU Extensions such as sparse files, incremental archives, and long +filename extension are not supported. + +The maximum supported file name length is 256 characters excluding the +NULL-byte (using the Tar name/prefix longname implementation of ustar). The +maximum supported file size is 8GiB. Directories are supported, but only regular +fields are yielded in iteration. The path is reflected in their file name. ## Use Case -This library is useful, if you write a kernel or a similar low-level application, which needs -"a bunch of files" from an archive ("init ramdisk"). The Tar file could for example come -as a Multiboot2 boot module provided by the bootloader. +This library is useful, if you write a kernel or a similar low-level +application, which needs "a bunch of files" from an archive (like an +"init ramdisk"). The Tar file could for example come as a Multiboot2 boot module +provided by the bootloader. -This crate focuses on extracting files from uncompressed Tar archives created with default options by **GNU Tar**. -GNU Extensions such as sparse files, incremental archives, and long filename extension are not supported yet. -[This link](https://www.gnu.org/software/tar/manual/html_section/Formats.html) gives a good overview over possible -archive formats and their limitations. +## Example -## Example (without `alloc`-feature) ```rust use tar_no_std::TarArchiveRef; @@ -33,27 +40,31 @@ fn main() { // also works in no_std environment (except the println!, of course) let archive = include_bytes!("../tests/gnu_tar_default.tar"); - let archive = TarArchiveRef::new(archive); + let archive = TarArchiveRef::new(archive).unwrap(); // Vec needs an allocator of course, but the library itself doesn't need one let entries = archive.entries().collect::>(); println!("{:#?}", entries); - println!("content of last file:"); - println!("{:#?}", entries[2].data_as_str().expect("Should be valid UTF-8")); + println!("content of first file:"); + println!( + "{:#?}", + entries[0].data_as_str().expect("Should be valid UTF-8") + ); } ``` -## Alloc Feature -This crate allows the usage of the additional Cargo build time feature `alloc`. When this is used, -the crate also provides the type `TarArchive`, which owns the data on the heap. +## Cargo Feature + +This crate allows the usage of the additional Cargo build time feature `alloc`. +When this is active, the crate also provides the type `TarArchive`, which owns +the data on the heap. The `unstable` feature provides additional convenience +only available on the nightly channel. ## Compression (`tar.gz`) -If your tar file is compressed, e.g. by `.tar.gz`/`gzip`, you need to uncompress the bytes first -(e.g. by a *gzip* library). Afterwards, this crate can read the Tar archive format from the uncompressed -bytes. -## MSRV -The MSRV is 1.76.0 stable. +If your Tar file is compressed, e.g. by `.tar.gz`/`gzip`, you need to uncompress +the bytes first (e.g. by a *gzip* library). Afterwards, this crate can read the +Tar archive format from the uncompressed bytes. +## MSRV -## References -[0]\: https://www.gnu.org/software/tar/manual/html_section/Formats.html +The MSRV is 1.76.0 stable. diff --git a/src/archive.rs b/src/archive.rs index 1339803..379b56b 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -101,8 +101,7 @@ impl Display for CorruptDataError { impl core::error::Error for CorruptDataError {} /// Type that owns bytes on the heap, that represents a Tar archive. -/// Unlike [`TarArchiveRef`], this type is useful, if you need to own the -/// data as long as you need the archive, but no longer. +/// Unlike [`TarArchiveRef`], this type takes ownership of the data. /// /// This is only available with the `alloc` feature of this crate. #[cfg(feature = "alloc")] @@ -144,8 +143,8 @@ impl From for Box<[u8]> { } } -/// Wrapper type around bytes, which represents a Tar archive. -/// Unlike [`TarArchive`], this uses only a reference to the data. +/// Wrapper type around bytes, which represents a Tar archive. To iterate the +/// entries, use [`TarArchiveRef::entries`]. #[derive(Clone, Debug, PartialEq, Eq)] pub struct TarArchiveRef<'a> { data: &'a [u8], @@ -162,9 +161,7 @@ impl<'a> TarArchiveRef<'a> { .ok_or(CorruptDataError) } - /// Iterates over all entries of the Tar archive. - /// Returns items of type [`ArchiveEntry`]. - /// See also [`ArchiveEntryIterator`]. + /// Creates an [`ArchiveEntryIterator`]. pub fn entries(&self) -> ArchiveEntryIterator { ArchiveEntryIterator::new(self.data) } @@ -244,11 +241,15 @@ impl<'a> Iterator for ArchiveHeaderIterator<'a> { impl<'a> ExactSizeIterator for ArchiveEntryIterator<'a> {} /// Iterator over the files of the archive. +/// +/// Only regular files are supported, but not directories, links, or other +/// special types ([`crate::TypeFlag`]). The full path to files is reflected +/// in their file name. #[derive(Debug)] pub struct ArchiveEntryIterator<'a>(ArchiveHeaderIterator<'a>); impl<'a> ArchiveEntryIterator<'a> { - pub fn new(archive: &'a [u8]) -> Self { + fn new(archive: &'a [u8]) -> Self { Self(ArchiveHeaderIterator::new(archive)) } diff --git a/src/header.rs b/src/header.rs index 696fd42..ae931a6 100644 --- a/src/header.rs +++ b/src/header.rs @@ -34,6 +34,7 @@ use crate::{TarFormatDecimal, TarFormatOctal, TarFormatString, BLOCKSIZE, NAME_L use core::fmt::{Debug, Formatter}; use core::num::ParseIntError; +/// Errors that may happen when parsing the [`ModeFlags`]. #[derive(Debug)] pub enum ModeError { ParseInt(ParseIntError), diff --git a/src/lib.rs b/src/lib.rs index f25fef6..eae679d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,42 +21,80 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -//! Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero -//! allocations. If you have a standard environment and need full feature -//! support, I recommend the use of instead. +//! # `tar-no-std` - Parse Tar Archives (Tarballs) //! -//! The crate is simple and only supports reading of "basic" archives, therefore -//! no extensions, such as GNU Longname. The maximum supported file name length -//! is 100 characters including the NULL-byte. The maximum supported file size -//! is 8 GiB. Also, directories are not supported yet but only flat collections -//! of files. +//! _Due to historical reasons, there are several formats of Tar archives. All of +//! them are based on the same principles, but have some subtle differences that +//! often make them incompatible with each other._ [(reference)](https://www.gnu.org/software/tar/manual/html_section/Formats.html) +//! +//! Library to read Tar archives in `no_std` environments with zero allocations. If +//! you have a standard environment and need full feature support, I recommend the +//! use of instead. +//! +//! ## TL;DR +//! +//! Look at the [`TarArchiveRef`] type. +//! +//! ## Limitations +//! +//! This crate is simple and focuses on reading files and their content from a Tar +//! archive. Historic basic Tar and ustar [formats](https://www.gnu.org/software/tar/manual/html_section/Formats.html) +//! are supported. Other formats may work, but likely without all supported +//! features. GNU Extensions such as sparse files, incremental archives, and +//! long filename extension are not supported. +//! +//! The maximum supported file name length is 256 characters excluding the +//! NULL-byte (using the Tar name/prefix longname implementation of ustar). The +//! maximum supported file size is 8GiB. Directories are supported, but only regular +//! fields are yielded in iteration. The path is reflected in their file name. +//! +//! ## Use Case //! //! This library is useful, if you write a kernel or a similar low-level -//! application, which needs "a bunch of files" from an archive ("init ram -//! disk"). The Tar file could for example come as a Multiboot2 boot module +//! application, which needs "a bunch of files" from an archive (like an +//! "init ramdisk"). The Tar file could for example come as a Multiboot2 boot module //! provided by the bootloader. //! -//! This crate focuses on extracting files from uncompressed Tar archives -//! created with default options by **GNU Tar**. GNU Extensions such as sparse -//! files, incremental archives, and long filename extension are not supported -//! yet. [gnu.org](https://www.gnu.org/software/tar/manual/html_section/Formats.html) -//! provides a good overview over possible archive formats and their -//! limitations. +//! ## Example //! -//! # Example //! ```rust //! use tar_no_std::TarArchiveRef; //! -//! // also works in no_std environment (except the println!, of course) -//! let archive = include_bytes!("../tests/gnu_tar_default.tar"); -//! let archive = TarArchiveRef::new(archive).unwrap(); -//! // Vec needs an allocator of course, but the library itself doesn't need one -//! let entries = archive.entries().collect::>(); -//! println!("{:#?}", entries); -//! println!("content of last file:"); -//! let last_file_content = unsafe { core::str::from_utf8_unchecked(entries[2].data()) }; -//! println!("{:#?}", last_file_content); +//! fn main() { +//! // log: not mandatory +//! std::env::set_var("RUST_LOG", "trace"); +//! env_logger::init(); +//! +//! // also works in no_std environment (except the println!, of course) +//! let archive = include_bytes!("../tests/gnu_tar_default.tar"); +//! let archive = TarArchiveRef::new(archive).unwrap(); +//! // Vec needs an allocator of course, but the library itself doesn't need one +//! let entries = archive.entries().collect::>(); +//! println!("{:#?}", entries); +//! println!("content of first file:"); +//! println!( +//! "{:#?}", +//! entries[0].data_as_str().expect("Should be valid UTF-8") +//! ); +//! } //! ``` +//! +//! ## Cargo Feature +//! +//! This crate allows the usage of the additional Cargo build time feature `alloc`. +//! When this is active, the crate also provides the type `TarArchive`, which owns +//! the data on the heap. The `unstable` feature provides additional convenience +//! only available on the nightly channel. +//! +//! ## Compression (`tar.gz`) +//! +//! If your Tar file is compressed, e.g. by `.tar.gz`/`gzip`, you need to uncompress +//! the bytes first (e.g. by a *gzip* library). Afterwards, this crate can read the +//! Tar archive format from the uncompressed bytes. +//! +//! ## MSRV +//! +//! The MSRV is 1.76.0 stable. #![cfg_attr(feature = "unstable", feature(error_in_core))] #![cfg_attr(not(test), no_std)] From 0c13ef8f11fa4041a0954541886ff017f3eaac3d Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 19:18:52 +0200 Subject: [PATCH 14/18] doc: changelog updated --- CHANGELOG.md | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a0443f3..4b51808 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,16 +1,27 @@ # v0.3.0 (2024-05-03) + - MSRV is now 1.76 stable - added support for more Tar archives - - 256 character long filename support (prefix + name) - - add support for space terminated numbers - - non-null terminated names - - iterate over directories: read regular files from directories - - more info: + - 256 character long filename support (prefix + name) + - add support for space terminated numbers + - non-null terminated names + - iterate over directories: read regular files from directories + - more info: - `TarArchive[Ref]::new` now returns a result - added `unstable` feature with enhanced functionality for `nightly` compilers - - error types implement `core::error::Error` + - error types implement `core::error::Error` +- various bug fixes and code improvements +- better error reporting / less panics + +Special thanks to the following external contributors or helpers: + +- https://github.com/thenhnn: provide me with a bunch of Tar archives coming + from a fuzzer +- https://github.com/schnoberts1 implemented 256 character long filenames (ustar + Tar format) # v0.2.0 (2023-04-11) + - MSRV is 1.60.0 - bitflags bump: 1.x -> 2.x - few internal code improvements (less possible panics) From 7b207c1111082e8ac16e84ae19faafebb5470287 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 19:26:55 +0200 Subject: [PATCH 15/18] clippy: fixes --- README.md | 7 +------ src/archive.rs | 12 +++++------- src/lib.rs | 27 ++++++++++----------------- src/tar_format_types.rs | 8 ++++---- 4 files changed, 20 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 753222a..fd6af30 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ provided by the bootloader. use tar_no_std::TarArchiveRef; fn main() { - // log: not mandatory + // init a logger (optional) std::env::set_var("RUST_LOG", "trace"); env_logger::init(); @@ -44,11 +44,6 @@ fn main() { // Vec needs an allocator of course, but the library itself doesn't need one let entries = archive.entries().collect::>(); println!("{:#?}", entries); - println!("content of first file:"); - println!( - "{:#?}", - entries[0].data_as_str().expect("Should be valid UTF-8") - ); } ``` diff --git a/src/archive.rs b/src/archive.rs index 379b56b..9d91615 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -309,16 +309,14 @@ impl<'a> Iterator for ArchiveEntryIterator<'a> { // POXIS_1003 long filename check // https://docs.scinet.utoronto.ca/index.php/(POSIX_1003.1_USTAR) - match ( + if ( hdr.magic.as_str(), hdr.version.as_str(), hdr.prefix.is_empty(), - ) { - (Ok("ustar"), Ok("00"), false) => { - filename.append(&hdr.prefix); - filename.append(&TarFormatString::<1>::new([b'/'])); - } - _ => (), + ) == (Ok("ustar"), Ok("00"), false) + { + filename.append(&hdr.prefix); + filename.append(&TarFormatString::<1>::new([b'/'])); } filename.append(&hdr.name); Some(ArchiveEntry::new(filename, file_bytes)) diff --git a/src/lib.rs b/src/lib.rs index eae679d..54f6612 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -60,23 +60,16 @@ SOFTWARE. //! ```rust //! use tar_no_std::TarArchiveRef; //! -//! fn main() { -//! // log: not mandatory -//! std::env::set_var("RUST_LOG", "trace"); -//! env_logger::init(); -//! -//! // also works in no_std environment (except the println!, of course) -//! let archive = include_bytes!("../tests/gnu_tar_default.tar"); -//! let archive = TarArchiveRef::new(archive).unwrap(); -//! // Vec needs an allocator of course, but the library itself doesn't need one -//! let entries = archive.entries().collect::>(); -//! println!("{:#?}", entries); -//! println!("content of first file:"); -//! println!( -//! "{:#?}", -//! entries[0].data_as_str().expect("Should be valid UTF-8") -//! ); -//! } +//! // init a logger (optional) +//! std::env::set_var("RUST_LOG", "trace"); +//! env_logger::init(); +//! +//! // also works in no_std environment (except the println!, of course) +//! let archive = include_bytes!("../tests/gnu_tar_default.tar"); +//! let archive = TarArchiveRef::new(archive).unwrap(); +//! // Vec needs an allocator of course, but the library itself doesn't need one +//! let entries = archive.entries().collect::>(); +//! println!("{:#?}", entries); //! ``` //! //! ## Cargo Feature diff --git a/src/tar_format_types.rs b/src/tar_format_types.rs index f45f894..7b0ccbf 100644 --- a/src/tar_format_types.rs +++ b/src/tar_format_types.rs @@ -108,7 +108,7 @@ pub struct TarFormatDecimal(TarFormatNumber); impl TarFormatNumber { #[cfg(test)] - fn new(bytes: [u8; N]) -> Self { + const fn new(bytes: [u8; N]) -> Self { Self(TarFormatString:: { bytes }) } @@ -121,7 +121,7 @@ impl TarFormatNumber { } /// Returns the underlying [`TarFormatString`]. - pub fn as_inner(&self) -> &TarFormatString { + pub const fn as_inner(&self) -> &TarFormatString { &self.0 } } @@ -157,7 +157,7 @@ impl TarFormatDecimal { } /// Returns the underlying [`TarFormatString`]. - pub fn as_inner(&self) -> &TarFormatString { + pub const fn as_inner(&self) -> &TarFormatString { self.0.as_inner() } } @@ -171,7 +171,7 @@ impl TarFormatOctal { } /// Returns the underlying [`TarFormatString`]. - pub fn as_inner(&self) -> &TarFormatString { + pub const fn as_inner(&self) -> &TarFormatString { self.0.as_inner() } } From 852599c6a2afb06afd722f344410683b7c9f1ab9 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 19:51:06 +0200 Subject: [PATCH 16/18] miri: fix error when typeflag is invalid The correct solution here is to check typeflag being valid during runtime --- src/archive.rs | 40 +++++++++++++++++++-------- src/header.rs | 74 ++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 91 insertions(+), 23 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index 9d91615..1a19d21 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -223,15 +223,16 @@ impl<'a> Iterator for ArchiveHeaderIterator<'a> { // We only update the block index for types that have a payload. // In directory entries, for example, the size field has other // semantics. See spec. - if hdr.typeflag.is_regular_file() { - let payload_block_count = hdr - .payload_block_count() - .inspect_err(|e| { - log::error!("Unparsable size ({e:?}) in header {hdr:#?}"); - }) - .ok()?; - - self.next_hdr_block_index += payload_block_count; + if let Ok(typeflag) = hdr.typeflag.try_to_type_flag() { + if typeflag.is_regular_file() { + let payload_block_count = hdr + .payload_block_count() + .inspect_err(|e| { + log::error!("Unparsable size ({e:?}) in header {hdr:#?}"); + }) + .ok()?; + self.next_hdr_block_index += payload_block_count; + } } Some((block_index, hdr)) @@ -266,7 +267,13 @@ impl<'a> Iterator for ArchiveEntryIterator<'a> { // Ignore directory entries, i.e. yield only regular files. Works as // filenames in tarballs are fully specified, e.g. dirA/dirB/file1 - while !hdr.typeflag.is_regular_file() { + while !hdr + .typeflag + .try_to_type_flag() + .inspect_err(|e| error!("Invalid TypeFlag: {e:?}")) + .ok()? + .is_regular_file() + { warn!( "Skipping entry of type {:?} (not supported yet)", hdr.typeflag @@ -361,8 +368,19 @@ mod tests { ) } + /// The test here is that no panics occur. + #[test] + fn test_print_archive_headers() { + let data = include_bytes!("../tests/gnu_tar_default.tar"); + + let iter = ArchiveHeaderIterator::new(data); + let entries = iter.map(|(_, hdr)| hdr).collect::>(); + println!("{:#?}", entries); + } + + /// The test here is that no panics occur. #[test] - fn test_archive_list() { + fn test_print_archive_list() { let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar")).unwrap(); let entries = archive.entries().collect::>(); println!("{:#?}", entries); diff --git a/src/header.rs b/src/header.rs index ae931a6..21b39eb 100644 --- a/src/header.rs +++ b/src/header.rs @@ -31,7 +31,7 @@ SOFTWARE. #![allow(non_upper_case_globals)] use crate::{TarFormatDecimal, TarFormatOctal, TarFormatString, BLOCKSIZE, NAME_LEN, PREFIX_LEN}; -use core::fmt::{Debug, Formatter}; +use core::fmt::{Debug, Display, Formatter}; use core::num::ParseIntError; /// Errors that may happen when parsing the [`ModeFlags`]. @@ -84,7 +84,7 @@ pub struct PosixHeader { pub size: TarFormatOctal<12>, pub mtime: TarFormatDecimal<12>, pub cksum: TarFormatOctal<8>, - pub typeflag: TypeFlag, + pub typeflag: TypeFlagRaw, /// Name. There is always a null byte, therefore /// the max len is 99. pub linkname: TarFormatString, @@ -121,6 +121,35 @@ impl PosixHeader { } } +#[derive(Copy, Clone, Debug, PartialOrd, PartialEq, Eq)] +pub struct InvalidTypeFlagError(u8); + +impl Display for InvalidTypeFlagError { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + f.write_fmt(format_args!("{:x} is not a valid TypeFlag", self.0)) + } +} + +#[cfg(feature = "unstable")] +impl core::error::Error for InvalidTypeFlagError {} + +#[derive(Copy, Clone, PartialOrd, PartialEq, Eq)] +pub struct TypeFlagRaw(u8); + +impl TypeFlagRaw { + /// Tries to parse the underlying value as [`TypeFlag`]. This fails if the + /// Tar file is corrupt and the type is invalid. + pub fn try_to_type_flag(self) -> Result { + TypeFlag::try_from(self) + } +} + +impl Debug for TypeFlagRaw { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + Debug::fmt(&self.try_to_type_flag(), f) + } +} + /// Describes the kind of payload, that follows after a /// [`PosixHeader`]. The properties of this payload are /// described inside the header. @@ -184,6 +213,27 @@ impl TypeFlag { } } +impl TryFrom for TypeFlag { + type Error = InvalidTypeFlagError; + + fn try_from(value: TypeFlagRaw) -> Result { + match value.0 { + b'0' => Ok(Self::REGTYPE), + b'\0' => Ok(Self::AREGTYPE), + b'1' => Ok(Self::LINK), + b'2' => Ok(Self::SYMTYPE), + b'3' => Ok(Self::CHRTYPE), + b'4' => Ok(Self::BLKTYPE), + b'5' => Ok(Self::DIRTYPE), + b'6' => Ok(Self::FIFOTYPE), + b'7' => Ok(Self::CONTTYPE), + b'x' => Ok(Self::XHDTYPE), + b'g' => Ok(Self::XGLTYPE), + e => Err(InvalidTypeFlagError(e)), + } + } +} + bitflags::bitflags! { /// UNIX file permissions in octal format. #[repr(transparent)] @@ -284,24 +334,24 @@ mod tests { fn test_parse_tar_header_filename() { let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar")); assert_eq!( - archive.typeflag, - TypeFlag::REGTYPE, + archive.typeflag.try_to_type_flag(), + Ok(TypeFlag::REGTYPE), "the first entry is a regular file!" ); assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt")); let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_gnu.tar")); assert_eq!( - archive.typeflag, - TypeFlag::REGTYPE, + archive.typeflag.try_to_type_flag(), + Ok(TypeFlag::REGTYPE), "the first entry is a regular file!" ); assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt")); let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_oldgnu.tar")); assert_eq!( - archive.typeflag, - TypeFlag::REGTYPE, + archive.typeflag.try_to_type_flag(), + Ok(TypeFlag::REGTYPE), "the first entry is a regular file!" ); assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt")); @@ -318,8 +368,8 @@ mod tests { let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_ustar.tar")); assert_eq!( - archive.typeflag, - TypeFlag::REGTYPE, + archive.typeflag.try_to_type_flag(), + Ok(TypeFlag::REGTYPE), "the first entry is a regular file!" ); assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt")); @@ -327,8 +377,8 @@ mod tests { let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_v7.tar")); // ARegType: legacy assert_eq!( - archive.typeflag, - TypeFlag::AREGTYPE, + archive.typeflag.try_to_type_flag(), + Ok(TypeFlag::AREGTYPE), "the first entry is a regular file!" ); assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt")); From bdc5184feff61a919e6741eafbe1e77246f3cba4 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 19:51:15 +0200 Subject: [PATCH 17/18] mode: improved Debug print --- src/header.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/header.rs b/src/header.rs index 21b39eb..ef7340c 100644 --- a/src/header.rs +++ b/src/header.rs @@ -56,9 +56,7 @@ impl Mode { impl Debug for Mode { fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { - let mut debug = f.debug_tuple("Mode"); - debug.field(&self.to_flags()); - debug.finish() + Debug::fmt(&self.to_flags(), f) } } From 78e6f4c5048595fd6b2175a6cdd32f03b20ae7fc Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 3 May 2024 19:52:15 +0200 Subject: [PATCH 18/18] header: improve file structure --- src/header.rs | 118 +++++++++++++++++++++++++------------------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/src/header.rs b/src/header.rs index ef7340c..9d2d5de 100644 --- a/src/header.rs +++ b/src/header.rs @@ -60,65 +60,6 @@ impl Debug for Mode { } } -/// Header of the TAR format as specified by POSIX (POSIX 1003.1-1990. -/// "New" (version?) GNU Tar versions use this archive format by default. -/// (). -/// -/// Each file is started by such a header, that describes the size and -/// the file name. After that, the file content stands in chunks of 512 bytes. -/// The number of bytes can be derived from the file size. -/// -/// This is also mostly compatible with the "Ustar"-header and the "GNU format". -/// Because this library only needs to fetch data and filename, we don't need -/// further checks. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -#[repr(C, packed)] -pub struct PosixHeader { - pub name: TarFormatString, - pub mode: Mode, - pub uid: TarFormatOctal<8>, - pub gid: TarFormatOctal<8>, - // confusing; size is stored as ASCII string - pub size: TarFormatOctal<12>, - pub mtime: TarFormatDecimal<12>, - pub cksum: TarFormatOctal<8>, - pub typeflag: TypeFlagRaw, - /// Name. There is always a null byte, therefore - /// the max len is 99. - pub linkname: TarFormatString, - pub magic: TarFormatString<6>, - pub version: TarFormatString<2>, - /// Username. There is always a null byte, therefore - /// the max len is N-1. - pub uname: TarFormatString<32>, - /// Groupname. There is always a null byte, therefore - /// the max len is N-1. - pub gname: TarFormatString<32>, - pub dev_major: TarFormatOctal<8>, - pub dev_minor: TarFormatOctal<8>, - pub prefix: TarFormatString, - // padding => to BLOCKSIZE bytes - pub _pad: [u8; 12], -} - -impl PosixHeader { - /// Returns the number of blocks that are required to read the whole file - /// content. Returns an error, if the file size can't be parsed from the - /// header. - pub fn payload_block_count(&self) -> Result { - let parsed_size = self.size.as_number::()?; - Ok(parsed_size.div_ceil(BLOCKSIZE)) - } - - /// A Tar archive is terminated, if an end-of-archive entry, which consists - /// of two 512 blocks of zero bytes, is found. - pub fn is_zero_block(&self) -> bool { - let ptr = self as *const Self as *const u8; - let self_bytes = unsafe { core::slice::from_raw_parts(ptr, BLOCKSIZE) }; - self_bytes.iter().filter(|x| **x == 0).count() == BLOCKSIZE - } -} - #[derive(Copy, Clone, Debug, PartialOrd, PartialEq, Eq)] pub struct InvalidTypeFlagError(u8); @@ -264,6 +205,65 @@ bitflags::bitflags! { } } +/// Header of the TAR format as specified by POSIX (POSIX 1003.1-1990. +/// "New" (version?) GNU Tar versions use this archive format by default. +/// (). +/// +/// Each file is started by such a header, that describes the size and +/// the file name. After that, the file content stands in chunks of 512 bytes. +/// The number of bytes can be derived from the file size. +/// +/// This is also mostly compatible with the "Ustar"-header and the "GNU format". +/// Because this library only needs to fetch data and filename, we don't need +/// further checks. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[repr(C, packed)] +pub struct PosixHeader { + pub name: TarFormatString, + pub mode: Mode, + pub uid: TarFormatOctal<8>, + pub gid: TarFormatOctal<8>, + // confusing; size is stored as ASCII string + pub size: TarFormatOctal<12>, + pub mtime: TarFormatDecimal<12>, + pub cksum: TarFormatOctal<8>, + pub typeflag: TypeFlagRaw, + /// Name. There is always a null byte, therefore + /// the max len is 99. + pub linkname: TarFormatString, + pub magic: TarFormatString<6>, + pub version: TarFormatString<2>, + /// Username. There is always a null byte, therefore + /// the max len is N-1. + pub uname: TarFormatString<32>, + /// Groupname. There is always a null byte, therefore + /// the max len is N-1. + pub gname: TarFormatString<32>, + pub dev_major: TarFormatOctal<8>, + pub dev_minor: TarFormatOctal<8>, + pub prefix: TarFormatString, + // padding => to BLOCKSIZE bytes + pub _pad: [u8; 12], +} + +impl PosixHeader { + /// Returns the number of blocks that are required to read the whole file + /// content. Returns an error, if the file size can't be parsed from the + /// header. + pub fn payload_block_count(&self) -> Result { + let parsed_size = self.size.as_number::()?; + Ok(parsed_size.div_ceil(BLOCKSIZE)) + } + + /// A Tar archive is terminated, if an end-of-archive entry, which consists + /// of two 512 blocks of zero bytes, is found. + pub fn is_zero_block(&self) -> bool { + let ptr = self as *const Self as *const u8; + let self_bytes = unsafe { core::slice::from_raw_parts(ptr, BLOCKSIZE) }; + self_bytes.iter().filter(|x| **x == 0).count() == BLOCKSIZE + } +} + #[cfg(test)] mod tests { use crate::header::{PosixHeader, TypeFlag};