diff --git a/src/read/elf/section.rs b/src/read/elf/section.rs index 6e0e615e..f175d84d 100644 --- a/src/read/elf/section.rs +++ b/src/read/elf/section.rs @@ -5,8 +5,9 @@ use crate::elf; use crate::endian::{self, Endianness, U32Bytes}; use crate::pod::{self, Pod}; use crate::read::{ - self, CompressedData, CompressedFileRange, CompressionFormat, Error, ObjectSection, ReadError, - ReadRef, RelocationMap, SectionFlags, SectionIndex, SectionKind, StringTable, + self, gnu_compression, CompressedData, CompressedFileRange, CompressionFormat, Error, + ObjectSection, ReadError, ReadRef, RelocationMap, SectionFlags, SectionIndex, SectionKind, + StringTable, }; use super::{ @@ -508,46 +509,19 @@ impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ElfSection<'data, 'file, } } - /// Try GNU-style "ZLIB" header decompression. + // Try GNU-style "ZLIB" header decompression. fn maybe_compressed_gnu(&self) -> read::Result> { - let name = match self.name() { - Ok(name) => name, - // I think it's ok to ignore this error? - Err(_) => return Ok(None), - }; - if !name.starts_with(".zdebug_") { + if !self + .name() + .map_or(false, |name| name.starts_with(".zdebug_")) + { return Ok(None); } let (section_offset, section_size) = self - .section - .file_range(self.file.endian) + .file_range() .read_error("Invalid ELF GNU compressed section type")?; - let mut offset = section_offset; - let data = self.file.data; - // Assume ZLIB-style uncompressed data is no more than 4GB to avoid accidentally - // huge allocations. This also reduces the chance of accidentally matching on a - // .debug_str that happens to start with "ZLIB". - if data - .read_bytes(&mut offset, 8) - .read_error("ELF GNU compressed section is too short")? - != b"ZLIB\0\0\0\0" - { - return Err(Error("Invalid ELF GNU compressed section header")); - } - let uncompressed_size = data - .read::>(&mut offset) - .read_error("ELF GNU compressed section is too short")? - .get(endian::BigEndian) - .into(); - let compressed_size = section_size - .checked_sub(offset - section_offset) - .read_error("ELF GNU compressed section is too short")?; - Ok(Some(CompressedFileRange { - format: CompressionFormat::Zlib, - offset, - compressed_size, - uncompressed_size, - })) + gnu_compression::compressed_file_range(self.file.data, section_offset, section_size) + .map(Some) } } diff --git a/src/read/gnu_compression.rs b/src/read/gnu_compression.rs new file mode 100644 index 00000000..7ef7d91e --- /dev/null +++ b/src/read/gnu_compression.rs @@ -0,0 +1,36 @@ +use crate::read::{self, Error, ReadError as _}; +use crate::{endian, CompressedFileRange, CompressionFormat, ReadRef, U32Bytes}; + +// Attempt to parse the the CompressedFileRange for a section using the GNU-style +// inline compression header format. This is used by the Go compiler in Mach-O files +// as well as by the GNU linker in some ELF files. +pub(super) fn compressed_file_range<'data, R: ReadRef<'data>>( + file_data: R, + section_offset: u64, + section_size: u64, +) -> read::Result { + let mut offset = section_offset; + // Assume ZLIB-style uncompressed data is no more than 4GB to avoid accidentally + // huge allocations. This also reduces the chance of accidentally matching on a + // .debug_str that happens to start with "ZLIB". + let header = file_data + .read_bytes(&mut offset, 8) + .read_error("GNU compressed section is too short")?; + if header != b"ZLIB\0\0\0\0" { + return Err(Error("Invalid GNU compressed section header")); + } + let uncompressed_size = file_data + .read::>(&mut offset) + .read_error("GNU compressed section is too short")? + .get(endian::BigEndian) + .into(); + let compressed_size = section_size + .checked_sub(offset - section_offset) + .read_error("GNU compressed section is too short")?; + Ok(CompressedFileRange { + format: CompressionFormat::Zlib, + offset, + compressed_size, + uncompressed_size, + }) +} diff --git a/src/read/macho/file.rs b/src/read/macho/file.rs index 9c62f71c..d3721cd1 100644 --- a/src/read/macho/file.rs +++ b/src/read/macho/file.rs @@ -287,32 +287,32 @@ where &'file self, section_name: &[u8], ) -> Option> { - // Translate the "." prefix to the "__" prefix used by OSX/Mach-O, eg - // ".debug_info" to "__debug_info", and limit to 16 bytes total. - let system_name = if section_name.starts_with(b".") { - if section_name.len() > 15 { - Some(§ion_name[1..15]) - } else { - Some(§ion_name[1..]) - } - } else { - None - }; - let cmp_section_name = |section: &MachOSection<'data, 'file, Mach, R>| { - section - .name_bytes() - .map(|name| { - section_name == name - || system_name - .filter(|system_name| { - name.starts_with(b"__") && name[2..] == **system_name - }) - .is_some() - }) - .unwrap_or(false) + // Translate the section_name by stripping the query_prefix to construct + // a function that matches names starting with name_prefix, taking into + // consideration the maximum section name length. + let make_prefix_matcher = |query_prefix: &'static [u8], name_prefix: &'static [u8]| { + const MAX_SECTION_NAME_LEN: usize = 16; + let suffix = section_name.strip_prefix(query_prefix).map(|suffix| { + let max_len = MAX_SECTION_NAME_LEN - name_prefix.len(); + &suffix[..suffix.len().min(max_len)] + }); + move |name: &[u8]| suffix.is_some() && name.strip_prefix(name_prefix) == suffix }; - - self.sections().find(cmp_section_name) + // Matches "__text" when searching for ".text" and "__debug_str_offs" + // when searching for ".debug_str_offsets", as is common in + // macOS/Mach-O. + let matches_underscores_prefix = make_prefix_matcher(b".", b"__"); + // Matches "__zdebug_info" when searching for ".debug_info" and + // "__zdebug_str_off" when searching for ".debug_str_offsets", as is + // used by Go when using GNU-style compression. + let matches_zdebug_prefix = make_prefix_matcher(b".debug_", b"__zdebug_"); + self.sections().find(|section| { + section.name_bytes().map_or(false, |name| { + name == section_name + || matches_underscores_prefix(name) + || matches_zdebug_prefix(name) + }) + }) } fn section_by_index(&self, index: SectionIndex) -> Result> { diff --git a/src/read/macho/section.rs b/src/read/macho/section.rs index 87bcee39..d27a9a9f 100644 --- a/src/read/macho/section.rs +++ b/src/read/macho/section.rs @@ -5,8 +5,8 @@ use crate::endian::{self, Endianness}; use crate::macho; use crate::pod::Pod; use crate::read::{ - self, CompressedData, CompressedFileRange, ObjectSection, ReadError, ReadRef, RelocationMap, - Result, SectionFlags, SectionIndex, SectionKind, + self, gnu_compression, CompressedData, CompressedFileRange, ObjectSection, ReadError, ReadRef, + RelocationMap, Result, SectionFlags, SectionIndex, SectionKind, }; use super::{MachHeader, MachOFile, MachORelocationIterator}; @@ -102,6 +102,21 @@ where .data(self.file.endian, self.internal.data) .read_error("Invalid Mach-O section size or offset") } + + // Try GNU-style "ZLIB" header decompression. + fn maybe_compressed_gnu(&self) -> Result> { + if !self + .name() + .map_or(false, |name| name.starts_with("__zdebug_")) + { + return Ok(None); + } + let (section_offset, section_size) = self + .file_range() + .read_error("Invalid ELF GNU compressed section type")?; + gnu_compression::compressed_file_range(self.internal.data, section_offset, section_size) + .map(Some) + } } impl<'data, 'file, Mach, R> read::private::Sealed for MachOSection<'data, 'file, Mach, R> @@ -162,14 +177,16 @@ where )) } - #[inline] fn compressed_file_range(&self) -> Result { - Ok(CompressedFileRange::none(self.file_range())) + Ok(if let Some(data) = self.maybe_compressed_gnu()? { + data + } else { + CompressedFileRange::none(self.file_range()) + }) } - #[inline] - fn compressed_data(&self) -> Result> { - self.data().map(CompressedData::none) + fn compressed_data(&self) -> read::Result> { + self.compressed_file_range()?.data(self.file.data) } #[inline] diff --git a/src/read/mod.rs b/src/read/mod.rs index 7b1ae838..50bcd7b2 100644 --- a/src/read/mod.rs +++ b/src/read/mod.rs @@ -61,6 +61,9 @@ pub use read_cache::*; mod util; pub use util::*; +#[cfg(any(feature = "elf", feature = "macho"))] +mod gnu_compression; + #[cfg(any( feature = "coff", feature = "elf", diff --git a/src/read/traits.rs b/src/read/traits.rs index bbf39a24..cbc93fc7 100644 --- a/src/read/traits.rs +++ b/src/read/traits.rs @@ -121,16 +121,18 @@ pub trait Object<'data>: read::private::Sealed { /// Get the section named `section_name`, if such a section exists. /// - /// If `section_name` starts with a '.' then it is treated as a system section name, - /// and is compared using the conventions specific to the object file format. This - /// includes: - /// - if ".debug_str_offsets" is requested for a Mach-O object file, then the actual - /// section name that is searched for is "__debug_str_offs". + /// If `section_name` starts with a '.' then it is treated as a system + /// section name, and is compared using the conventions specific to the + /// object file format. This includes: + /// - if ".debug_str_offsets" is requested for a Mach-O object file, then + /// the actual section name that is searched for is "__debug_str_offs". /// - if ".debug_info" is requested for an ELF object file, then - /// ".zdebug_info" may be returned (and similarly for other debug sections). + /// ".zdebug_info" may be returned (and similarly for other debug + /// sections). Similarly, if ".debug_info" is requested for a Mach-O + /// object file, then "__zdebug_info" may be returned. /// - /// For some object files, multiple segments may contain sections with the same - /// name. In this case, the first matching section will be used. + /// For some object files, multiple segments may contain sections with the + /// same name. In this case, the first matching section will be used. /// /// This method skips over sections with invalid names. fn section_by_name(&self, section_name: &str) -> Option> { diff --git a/testfiles b/testfiles index 14d80667..cebc8967 160000 --- a/testfiles +++ b/testfiles @@ -1 +1 @@ -Subproject commit 14d806678c02be9b6571d9af402eea0da1bde802 +Subproject commit cebc89674360f005d415bae42ca66fa4bbfe40a5 diff --git a/tests/read/macho.rs b/tests/read/macho.rs new file mode 100644 index 00000000..59f314be --- /dev/null +++ b/tests/read/macho.rs @@ -0,0 +1,49 @@ +#[cfg(feature = "std")] +use object::{Object, ObjectSection as _}; + +// Test that we can read compressed sections in Mach-O files as produced +// by the Go compiler. +#[cfg(feature = "std")] +#[test] +fn test_go_macho() { + let macho_testfiles = std::path::Path::new("testfiles/macho"); + + // Section names we expect to find, whether they should be + // compressed, and the actual name of the section in the file. + const EXPECTED: &[(&str, bool, &str)] = &[ + (".debug_abbrev", true, "__zdebug_abbrev"), + (".debug_gdb_scripts", false, "__debug_gdb_scri"), + (".debug_ranges", true, "__zdebug_ranges"), + ("__data", false, "__data"), + ]; + + for file in &["go-aarch64", "go-x86_64"] { + let path = macho_testfiles.join(file); + let file = std::fs::File::open(path).unwrap(); + let reader = object::read::ReadCache::new(file); + let object = object::read::File::parse(&reader).unwrap(); + for &(name, compressed, actual_name) in EXPECTED { + let section = object.section_by_name(name).unwrap(); + assert_eq!(section.name(), Ok(actual_name)); + let compressed_file_range = section.compressed_file_range().unwrap(); + let size = section.size(); + if compressed { + assert_eq!( + compressed_file_range.format, + object::CompressionFormat::Zlib + ); + assert_eq!(compressed_file_range.compressed_size, size - 12); + assert!( + compressed_file_range.uncompressed_size > compressed_file_range.compressed_size, + "decompressed size is greater than compressed size" + ); + } else { + assert_eq!( + compressed_file_range.format, + object::CompressionFormat::None + ); + assert_eq!(compressed_file_range.compressed_size, size); + } + } + } +} diff --git a/tests/read/mod.rs b/tests/read/mod.rs index ef402104..48e005ee 100644 --- a/tests/read/mod.rs +++ b/tests/read/mod.rs @@ -2,3 +2,4 @@ mod coff; mod elf; +mod macho;