From 05b9b13a059ef00c83b08cf70e28536c47be48a5 Mon Sep 17 00:00:00 2001 From: ncihnegn Date: Fri, 12 Aug 2022 02:25:37 -0700 Subject: [PATCH] Add support for PAX Format, Version 1.0 --- src/archive.rs | 88 +++++++++++++++++++++++++++++++++++++------------- src/entry.rs | 38 ++++++++++++++++++++++ src/header.rs | 18 ++++++++++- src/lib.rs | 1 + 4 files changed, 121 insertions(+), 24 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index e875124a..9afb1417 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -9,9 +9,10 @@ use std::path::Path; use crate::entry::{EntryFields, EntryIo}; use crate::error::TarError; +use crate::header::{SparseEntry, BLOCK_SIZE}; use crate::other; use crate::pax::pax_extensions_size; -use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header}; +use crate::{Entry, GnuExtSparseHeader, Header}; /// A top-level representation of an archive file. /// @@ -260,6 +261,7 @@ impl<'a> EntriesFields<'a> { fn next_entry_raw( &mut self, pax_size: Option, + pax_extensions: Option>, ) -> io::Result>> { let mut header = Header::new_old(); let mut header_pos = self.next; @@ -277,14 +279,14 @@ impl<'a> EntriesFields<'a> { // Otherwise, check if we are ignoring zeros and continue, or break as if this is the // end of the archive. if !header.as_bytes().iter().all(|i| *i == 0) { - self.next += 512; + self.next += BLOCK_SIZE as u64; break; } if !self.archive.inner.ignore_zeros { return Ok(None); } - self.next += 512; + self.next += BLOCK_SIZE as u64; header_pos = self.next; } @@ -314,7 +316,7 @@ impl<'a> EntriesFields<'a> { header: header, long_pathname: None, long_linkname: None, - pax_extensions: None, + pax_extensions: pax_extensions, unpack_xattrs: self.archive.inner.unpack_xattrs, preserve_permissions: self.archive.inner.preserve_permissions, preserve_mtime: self.archive.inner.preserve_mtime, @@ -325,11 +327,11 @@ impl<'a> EntriesFields<'a> { // Store where the next entry is, rounding up by 512 bytes (the size of // a header); let size = size - .checked_add(511) + .checked_add(BLOCK_SIZE as u64 - 1) .ok_or_else(|| other("size overflow"))?; self.next = self .next - .checked_add(size & !(512 - 1)) + .checked_add(size & !(BLOCK_SIZE as u64 - 1)) .ok_or_else(|| other("size overflow"))?; Ok(Some(ret.into_entry())) @@ -337,7 +339,7 @@ impl<'a> EntriesFields<'a> { fn next_entry(&mut self) -> io::Result>> { if self.raw { - return self.next_entry_raw(None); + return self.next_entry_raw(None, None); } let mut gnu_longname = None; @@ -347,7 +349,7 @@ impl<'a> EntriesFields<'a> { let mut processed = 0; loop { processed += 1; - let entry = match self.next_entry_raw(pax_size)? { + let entry = match self.next_entry_raw(pax_size, pax_extensions.clone())? { Some(entry) => entry, None if processed > 1 => { return Err(other( @@ -394,26 +396,63 @@ impl<'a> EntriesFields<'a> { if let Some(pax_extensions_ref) = &pax_extensions { pax_size = pax_extensions_size(pax_extensions_ref); } + // Not an entry + // Keep pax_extensions for the next ustar header + processed -= 1; continue; } let mut fields = EntryFields::from(entry); + if is_recognized_header && fields.is_pax_sparse() { + gnu_longname = fields.pax_sparse_name(); + } fields.long_pathname = gnu_longname; fields.long_linkname = gnu_longlink; fields.pax_extensions = pax_extensions; + pax_extensions = None; // Reset pax_extensions after use self.parse_sparse_header(&mut fields)?; return Ok(Some(fields.into_entry())); } } fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> { - if !entry.header.entry_type().is_gnu_sparse() { + if !entry.is_pax_sparse() && !entry.header.entry_type().is_gnu_sparse() { return Ok(()); } - let gnu = match entry.header.as_gnu() { - Some(gnu) => gnu, - None => return Err(other("sparse entry type listed but not GNU header")), - }; + let mut sparse_map = Vec::::new(); + let mut real_size = 0; + if entry.is_pax_sparse() { + real_size = entry.pax_sparse_realsize()?; + let mut num_bytes_read = 0; + let mut reader = io::BufReader::with_capacity(BLOCK_SIZE, &self.archive.inner); + let mut read_decimal_line = || -> io::Result { + let mut str = String::new(); + num_bytes_read += reader.read_line(&mut str)?; + str.strip_suffix("\n") + .and_then(|s| s.parse::().ok()) + .ok_or_else(|| other("Can't read a line")) + }; + + let num_entries = read_decimal_line()?; + for _ in 0..num_entries { + let offset = read_decimal_line()?; + let size = read_decimal_line()?; + sparse_map.push(SparseEntry { offset, size }); + } + let rem = BLOCK_SIZE - (num_bytes_read % BLOCK_SIZE); + entry.size -= (num_bytes_read + rem) as u64; + } else if entry.header.entry_type().is_gnu_sparse() { + let gnu = match entry.header.as_gnu() { + Some(gnu) => gnu, + None => return Err(other("sparse entry type listed but not GNU header")), + }; + real_size = gnu.real_size()?; + for block in gnu.sparse.iter() { + let offset = block.offset()?; + let size = block.length()?; + sparse_map.push(SparseEntry { offset, size }); + } + } // Sparse files are represented internally as a list of blocks that are // read. Blocks are either a bunch of 0's or they're data from the @@ -442,13 +481,13 @@ impl<'a> EntriesFields<'a> { let data = &mut entry.data; let reader = &self.archive.inner; let size = entry.size; - let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> { + let mut add_block = |block: &SparseEntry| -> io::Result<_> { if block.is_empty() { return Ok(()); } - let off = block.offset()?; - let len = block.length()?; - if len != 0 && (size - remaining) % 512 != 0 { + let off = block.offset; + let len = block.size; + if len != 0 && (size - remaining) % BLOCK_SIZE as u64 != 0 { return Err(other( "previous block in sparse file was not \ aligned to 512-byte boundary", @@ -474,10 +513,10 @@ impl<'a> EntriesFields<'a> { data.push(EntryIo::Data(reader.take(len))); Ok(()) }; - for block in gnu.sparse.iter() { - add_block(block)? + for block in sparse_map { + add_block(&block)? } - if gnu.is_extended() { + if let Some(gnu) = entry.header.as_gnu() && gnu.is_extended() { let mut ext = GnuExtSparseHeader::new(); ext.isextended[0] = 1; while ext.is_extended() { @@ -485,14 +524,17 @@ impl<'a> EntriesFields<'a> { return Err(other("failed to read extension")); } - self.next += 512; + self.next += BLOCK_SIZE as u64; for block in ext.sparse.iter() { - add_block(block)?; + add_block(&SparseEntry { + offset: block.offset()?, + size: block.length()?, + })?; } } } } - if cur != gnu.real_size()? { + if cur != real_size { return Err(other( "mismatch in sparse file chunks and \ size in header", diff --git a/src/entry.rs b/src/entry.rs index cce39d45..3dcfc1ef 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -285,6 +285,44 @@ impl<'a> EntryFields<'a> { self.read_to_end(&mut v).map(|_| v) } + pub fn is_pax_sparse(&mut self) -> bool { + if let Some(ref pax) = self.pax_extensions { + return PaxExtensions::new(pax) + .filter_map(|f| f.ok()) + .find(|f| f.key_bytes() == b"GNU.sparse.major") + .map(|f| f.value_bytes()) + .is_some(); + } + false + } + + pub fn pax_sparse_name(&mut self) -> Option> { + if let Some(ref pax) = self.pax_extensions { + return PaxExtensions::new(pax) + .filter_map(|f| f.ok()) + .find(|f| f.key_bytes() == b"GNU.sparse.name") + .map(|f| f.value_bytes().to_vec()); + } + None + } + + pub fn pax_sparse_realsize(&mut self) -> io::Result { + if let Some(ref pax) = self.pax_extensions { + let pax = PaxExtensions::new(pax) + .filter_map(|f| f.ok()) + .find(|f| f.key_bytes() == b"GNU.sparse.realsize") + .map(|f| f.value_bytes()); + if let Some(field) = pax { + let str = + std::str::from_utf8(&field).map_err(|_| other("failed to read string"))?; + return str + .parse::() + .map_err(|_| other("failed to parse the real size")); + } + } + Err(other("PAX extension GNU.sparse.realsize not found")) + } + fn path(&self) -> io::Result> { bytes2path(self.path_bytes()) } diff --git a/src/header.rs b/src/header.rs index 7e507fc7..cda307d1 100644 --- a/src/header.rs +++ b/src/header.rs @@ -16,11 +16,13 @@ use std::str; use crate::other; use crate::EntryType; +pub const BLOCK_SIZE: usize = 512; + /// Representation of the header of an entry in an archive #[repr(C)] #[allow(missing_docs)] pub struct Header { - bytes: [u8; 512], + bytes: [u8; BLOCK_SIZE], } /// Declares the information that should be included when filling a Header @@ -110,6 +112,13 @@ pub struct GnuHeader { pub pad: [u8; 17], } +/// Description of a spare entry. +#[derive(Debug)] +pub struct SparseEntry { + pub offset: u64, + pub size: u64, +} + /// Description of the header of a spare entry. /// /// Specifies the offset/number of bytes of a chunk of data in octal. @@ -1309,6 +1318,13 @@ impl<'a> fmt::Debug for DebugSparseHeaders<'a> { } } +impl SparseEntry { + /// Returns true if block is empty + pub fn is_empty(&self) -> bool { + self.size == 0 + } +} + impl GnuSparseHeader { /// Returns true if block is empty pub fn is_empty(&self) -> bool { diff --git a/src/lib.rs b/src/lib.rs index 52251cd2..8866dbce 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,6 +20,7 @@ #![doc(html_root_url = "https://docs.rs/tar/0.4")] #![deny(missing_docs)] #![cfg_attr(test, deny(warnings))] +#![feature(let_chains)] use std::io::{Error, ErrorKind};