Skip to content

Commit

Permalink
Add support for PAX Format, Version 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
ncihnegn committed Aug 12, 2022
1 parent f4f439c commit 05b9b13
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 24 deletions.
88 changes: 65 additions & 23 deletions src/archive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ use std::path::Path;

use crate::entry::{EntryFields, EntryIo};
use crate::error::TarError;
use crate::header::{SparseEntry, BLOCK_SIZE};
use crate::other;
use crate::pax::pax_extensions_size;
use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
use crate::{Entry, GnuExtSparseHeader, Header};

/// A top-level representation of an archive file.
///
Expand Down Expand Up @@ -260,6 +261,7 @@ impl<'a> EntriesFields<'a> {
fn next_entry_raw(
&mut self,
pax_size: Option<u64>,
pax_extensions: Option<Vec<u8>>,
) -> io::Result<Option<Entry<'a, io::Empty>>> {
let mut header = Header::new_old();
let mut header_pos = self.next;
Expand All @@ -277,14 +279,14 @@ impl<'a> EntriesFields<'a> {
// Otherwise, check if we are ignoring zeros and continue, or break as if this is the
// end of the archive.
if !header.as_bytes().iter().all(|i| *i == 0) {
self.next += 512;
self.next += BLOCK_SIZE as u64;
break;
}

if !self.archive.inner.ignore_zeros {
return Ok(None);
}
self.next += 512;
self.next += BLOCK_SIZE as u64;
header_pos = self.next;
}

Expand Down Expand Up @@ -314,7 +316,7 @@ impl<'a> EntriesFields<'a> {
header: header,
long_pathname: None,
long_linkname: None,
pax_extensions: None,
pax_extensions: pax_extensions,
unpack_xattrs: self.archive.inner.unpack_xattrs,
preserve_permissions: self.archive.inner.preserve_permissions,
preserve_mtime: self.archive.inner.preserve_mtime,
Expand All @@ -325,19 +327,19 @@ impl<'a> EntriesFields<'a> {
// Store where the next entry is, rounding up by 512 bytes (the size of
// a header);
let size = size
.checked_add(511)
.checked_add(BLOCK_SIZE as u64 - 1)
.ok_or_else(|| other("size overflow"))?;
self.next = self
.next
.checked_add(size & !(512 - 1))
.checked_add(size & !(BLOCK_SIZE as u64 - 1))
.ok_or_else(|| other("size overflow"))?;

Ok(Some(ret.into_entry()))
}

fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
if self.raw {
return self.next_entry_raw(None);
return self.next_entry_raw(None, None);
}

let mut gnu_longname = None;
Expand All @@ -347,7 +349,7 @@ impl<'a> EntriesFields<'a> {
let mut processed = 0;
loop {
processed += 1;
let entry = match self.next_entry_raw(pax_size)? {
let entry = match self.next_entry_raw(pax_size, pax_extensions.clone())? {
Some(entry) => entry,
None if processed > 1 => {
return Err(other(
Expand Down Expand Up @@ -394,26 +396,63 @@ impl<'a> EntriesFields<'a> {
if let Some(pax_extensions_ref) = &pax_extensions {
pax_size = pax_extensions_size(pax_extensions_ref);
}
// Not an entry
// Keep pax_extensions for the next ustar header
processed -= 1;
continue;
}

let mut fields = EntryFields::from(entry);
if is_recognized_header && fields.is_pax_sparse() {
gnu_longname = fields.pax_sparse_name();
}
fields.long_pathname = gnu_longname;
fields.long_linkname = gnu_longlink;
fields.pax_extensions = pax_extensions;
pax_extensions = None; // Reset pax_extensions after use
self.parse_sparse_header(&mut fields)?;
return Ok(Some(fields.into_entry()));
}
}

fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
if !entry.header.entry_type().is_gnu_sparse() {
if !entry.is_pax_sparse() && !entry.header.entry_type().is_gnu_sparse() {
return Ok(());
}
let gnu = match entry.header.as_gnu() {
Some(gnu) => gnu,
None => return Err(other("sparse entry type listed but not GNU header")),
};
let mut sparse_map = Vec::<SparseEntry>::new();
let mut real_size = 0;
if entry.is_pax_sparse() {
real_size = entry.pax_sparse_realsize()?;
let mut num_bytes_read = 0;
let mut reader = io::BufReader::with_capacity(BLOCK_SIZE, &self.archive.inner);
let mut read_decimal_line = || -> io::Result<u64> {
let mut str = String::new();
num_bytes_read += reader.read_line(&mut str)?;
str.strip_suffix("\n")
.and_then(|s| s.parse::<u64>().ok())
.ok_or_else(|| other("Can't read a line"))
};

let num_entries = read_decimal_line()?;
for _ in 0..num_entries {
let offset = read_decimal_line()?;
let size = read_decimal_line()?;
sparse_map.push(SparseEntry { offset, size });
}
let rem = BLOCK_SIZE - (num_bytes_read % BLOCK_SIZE);
entry.size -= (num_bytes_read + rem) as u64;
} else if entry.header.entry_type().is_gnu_sparse() {
let gnu = match entry.header.as_gnu() {
Some(gnu) => gnu,
None => return Err(other("sparse entry type listed but not GNU header")),
};
real_size = gnu.real_size()?;
for block in gnu.sparse.iter() {
let offset = block.offset()?;
let size = block.length()?;
sparse_map.push(SparseEntry { offset, size });
}
}

// Sparse files are represented internally as a list of blocks that are
// read. Blocks are either a bunch of 0's or they're data from the
Expand Down Expand Up @@ -442,13 +481,13 @@ impl<'a> EntriesFields<'a> {
let data = &mut entry.data;
let reader = &self.archive.inner;
let size = entry.size;
let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
let mut add_block = |block: &SparseEntry| -> io::Result<_> {
if block.is_empty() {
return Ok(());
}
let off = block.offset()?;
let len = block.length()?;
if len != 0 && (size - remaining) % 512 != 0 {
let off = block.offset;
let len = block.size;
if len != 0 && (size - remaining) % BLOCK_SIZE as u64 != 0 {
return Err(other(
"previous block in sparse file was not \
aligned to 512-byte boundary",
Expand All @@ -474,25 +513,28 @@ impl<'a> EntriesFields<'a> {
data.push(EntryIo::Data(reader.take(len)));
Ok(())
};
for block in gnu.sparse.iter() {
add_block(block)?
for block in sparse_map {
add_block(&block)?
}
if gnu.is_extended() {
if let Some(gnu) = entry.header.as_gnu() && gnu.is_extended() {
let mut ext = GnuExtSparseHeader::new();
ext.isextended[0] = 1;
while ext.is_extended() {
if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
return Err(other("failed to read extension"));
}

self.next += 512;
self.next += BLOCK_SIZE as u64;
for block in ext.sparse.iter() {
add_block(block)?;
add_block(&SparseEntry {
offset: block.offset()?,
size: block.length()?,
})?;
}
}
}
}
if cur != gnu.real_size()? {
if cur != real_size {
return Err(other(
"mismatch in sparse file chunks and \
size in header",
Expand Down
38 changes: 38 additions & 0 deletions src/entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,44 @@ impl<'a> EntryFields<'a> {
self.read_to_end(&mut v).map(|_| v)
}

pub fn is_pax_sparse(&mut self) -> bool {
if let Some(ref pax) = self.pax_extensions {
return PaxExtensions::new(pax)
.filter_map(|f| f.ok())
.find(|f| f.key_bytes() == b"GNU.sparse.major")
.map(|f| f.value_bytes())
.is_some();
}
false
}

pub fn pax_sparse_name(&mut self) -> Option<Vec<u8>> {
if let Some(ref pax) = self.pax_extensions {
return PaxExtensions::new(pax)
.filter_map(|f| f.ok())
.find(|f| f.key_bytes() == b"GNU.sparse.name")
.map(|f| f.value_bytes().to_vec());
}
None
}

pub fn pax_sparse_realsize(&mut self) -> io::Result<u64> {
if let Some(ref pax) = self.pax_extensions {
let pax = PaxExtensions::new(pax)
.filter_map(|f| f.ok())
.find(|f| f.key_bytes() == b"GNU.sparse.realsize")
.map(|f| f.value_bytes());
if let Some(field) = pax {
let str =
std::str::from_utf8(&field).map_err(|_| other("failed to read string"))?;
return str
.parse::<u64>()
.map_err(|_| other("failed to parse the real size"));
}
}
Err(other("PAX extension GNU.sparse.realsize not found"))
}

fn path(&self) -> io::Result<Cow<Path>> {
bytes2path(self.path_bytes())
}
Expand Down
18 changes: 17 additions & 1 deletion src/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ use std::str;
use crate::other;
use crate::EntryType;

pub const BLOCK_SIZE: usize = 512;

/// Representation of the header of an entry in an archive
#[repr(C)]
#[allow(missing_docs)]
pub struct Header {
bytes: [u8; 512],
bytes: [u8; BLOCK_SIZE],
}

/// Declares the information that should be included when filling a Header
Expand Down Expand Up @@ -110,6 +112,13 @@ pub struct GnuHeader {
pub pad: [u8; 17],
}

/// Description of a spare entry.
#[derive(Debug)]
pub struct SparseEntry {
pub offset: u64,
pub size: u64,
}

/// Description of the header of a spare entry.
///
/// Specifies the offset/number of bytes of a chunk of data in octal.
Expand Down Expand Up @@ -1309,6 +1318,13 @@ impl<'a> fmt::Debug for DebugSparseHeaders<'a> {
}
}

impl SparseEntry {
/// Returns true if block is empty
pub fn is_empty(&self) -> bool {
self.size == 0
}
}

impl GnuSparseHeader {
/// Returns true if block is empty
pub fn is_empty(&self) -> bool {
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#![doc(html_root_url = "https://docs.rs/tar/0.4")]
#![deny(missing_docs)]
#![cfg_attr(test, deny(warnings))]
#![feature(let_chains)]

use std::io::{Error, ErrorKind};

Expand Down

0 comments on commit 05b9b13

Please sign in to comment.