From 742ea87d41856c34a53c163970f5d5ff8db73005 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Thu, 15 Aug 2024 15:31:01 +0900 Subject: [PATCH] Merge .ARM.exidx entries This change should make our output for ARM32 a little bit smaller. --- src/arch-arm32.cc | 88 +++++++++++++++++++++++++++++++------------- src/main.cc | 4 ++ src/mold.h | 18 +++++++++ src/output-chunks.cc | 4 +- src/passes.cc | 3 -- 5 files changed, 87 insertions(+), 30 deletions(-) diff --git a/src/arch-arm32.cc b/src/arch-arm32.cc index 5be1ef3b38..a2202e0ec0 100644 --- a/src/arch-arm32.cc +++ b/src/arch-arm32.cc @@ -710,6 +710,45 @@ u64 get_eflags(Context &ctx) { return EF_ARM_EABI_VER5; } +void create_arm_exidx_section(Context &ctx) { + for (i64 i = 0; i < ctx.chunks.size(); i++) { + if (ctx.chunks[i]->shdr.sh_type == SHT_ARM_EXIDX) { + auto *sec = new Arm32ExidxSection(ctx, *ctx.chunks[i]->to_osec()); + ctx.extra.exidx = sec; + ctx.chunks[i] = sec; + ctx.chunk_pool.emplace_back(sec); + break; + } + } +} + +Arm32ExidxSection::Arm32ExidxSection(Context &ctx, + OutputSection &osec) + : output_section(osec) { + this->name = ".ARM.exidx"; + this->shdr.sh_type = SHT_ARM_EXIDX; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_addralign = 4; + this->shdr.sh_size = get_contents(ctx).size(); + this->sect_order = osec.sect_order; + + for (InputSection *isec : osec.members) + isec->is_alive = false; +} + +void Arm32ExidxSection::update_shdr(Context &ctx) { + // .ARM.exidx's sh_link should be set to the .text section index. + // Runtime doesn't care about it, but the binutils's strip command does. + if (Chunk *chunk = find_chunk(ctx, ".text")) + this->shdr.sh_link = chunk->shndx; +} + +void Arm32ExidxSection::copy_buf(Context &ctx) { + std::vector contents = get_contents(ctx); + assert(this->shdr.sh_size = contents.size()); + write_vector(ctx.buf + this->shdr.sh_offset, contents); +} + // ARM executables use an .ARM.exidx section to look up an exception // handling record for the current instruction pointer. The table needs // to be sorted by their addresses. @@ -718,17 +757,12 @@ u64 get_eflags(Context &ctx) { // I don't know why only ARM uses the different mechanism, but it's // likely that it's due to some historical reason. // -// This function sorts .ARM.exidx records. -void fixup_arm_exidx_section(Context &ctx) { - Timer t(ctx, "fixup_arm_exidx_section"); - - Chunk *chunk = find_chunk(ctx, SHT_ARM_EXIDX); - if (!chunk) - return; +// This function returns contents of .ARM.exidx. +std::vector Arm32ExidxSection::get_contents(Context &ctx) { + std::vector buf(output_section.shdr.sh_size); - OutputSection *osec = chunk->to_osec(); - if (!osec) - return; + output_section.shdr.sh_addr = this->shdr.sh_addr; + output_section.write_to(ctx, buf.data()); // .ARM.exidx records consists of a signed 31-bit relative address // and a 32-bit value. The relative address indicates the start @@ -742,24 +776,24 @@ void fixup_arm_exidx_section(Context &ctx) { // // CANTUNWIND is value 1. The most significant bit is set in (2) but // not in (3). So we can distinguished them just by looking at a value. - const u32 EXIDX_CANTUNWIND = 1; + const u32 CANTUNWIND = 1; struct Entry { ul32 addr; ul32 val; }; - if (osec->shdr.sh_size % sizeof(Entry)) + if (buf.size() % sizeof(Entry)) Fatal(ctx) << "invalid .ARM.exidx section size"; - Entry *ent = (Entry *)(ctx.buf + osec->shdr.sh_offset); - i64 num_entries = osec->shdr.sh_size / sizeof(Entry); + Entry *ent = (Entry *)buf.data(); + i64 num_entries = buf.size() / sizeof(Entry); // Entry's addresses are relative to themselves. In order to sort - // records by addresses, we first translate them so that the addresses + // records by address, we first translate them so that the addresses // are relative to the beginning of the section. auto is_relative = [](u32 val) { - return val != EXIDX_CANTUNWIND && !(val & 0x8000'0000); + return val != CANTUNWIND && !(val & 0x8000'0000); }; tbb::parallel_for((i64)0, num_entries, [&](i64 i) { @@ -769,10 +803,21 @@ void fixup_arm_exidx_section(Context &ctx) { ent[i].val = 0x7fff'ffff & (ent[i].val + offset); }); - tbb::parallel_sort(ent, ent + num_entries, [](const Entry &a, const Entry &b) { + std::sort(ent, ent + num_entries, [](const Entry &a, const Entry &b) { return a.addr < b.addr; }); + // Remove duplicate adjacent entries. That is, if two adjacent functions + // have the same compact unwind info or are both CANTUNWIND, we can + // merge them into a single range. + auto it = std::unique(ent, ent + num_entries, + [](const Entry &a, const Entry &b) { + return a.val == b.val; + }); + + num_entries = it - ent; + buf.resize(num_entries * sizeof(Entry)); + // Make addresses relative to themselves. tbb::parallel_for((i64)0, num_entries, [&](i64 i) { i64 offset = sizeof(Entry) * i; @@ -781,14 +826,7 @@ void fixup_arm_exidx_section(Context &ctx) { ent[i].val = 0x7fff'ffff & (ent[i].val - offset); }); - // .ARM.exidx's sh_link should be set to the .text section index. - // Runtime doesn't care about it, but the binutils's strip command does. - if (ctx.shdr) { - if (Chunk *text = find_chunk(ctx, ".text")) { - osec->shdr.sh_link = text->shndx; - ctx.shdr->copy_buf(ctx); - } - } + return buf; } } // namespace mold diff --git a/src/main.cc b/src/main.cc index 4b0f2e5a7a..502a5d6d6b 100644 --- a/src/main.cc +++ b/src/main.cc @@ -579,6 +579,10 @@ int mold_main(int argc, char **argv) { if (ctx.arg.pack_dyn_relocs_relr) construct_relr(ctx); + // Convert an .ARM.exidx to a synthetic section. + if constexpr (is_arm32) + create_arm_exidx_section(ctx); + // Reserve a space for dynamic symbol strings in .dynstr and sort // .dynsym contents if necessary. Beyond this point, no symbol will // be added to .dynsym. diff --git a/src/mold.h b/src/mold.h index b6ab7808ea..5447e48026 100644 --- a/src/mold.h +++ b/src/mold.h @@ -1528,7 +1528,20 @@ void rewrite_endbr(Context &ctx); // arch-arm32.cc // +class Arm32ExidxSection : public Chunk { +public: + Arm32ExidxSection(Context &ctx, OutputSection &osec); + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + +private: + std::vector get_contents(Context &ctx); + + OutputSection &output_section; +}; + template <> u64 get_eflags(Context &ctx); +void create_arm_exidx_section(Context &ctx); void fixup_arm_exidx_section(Context &ctx); // @@ -1705,6 +1718,11 @@ struct SectionOrder { template struct ContextExtras {}; +template <> +struct ContextExtras { + Arm32ExidxSection *exidx = nullptr; +}; + template struct ContextExtras { RiscvAttributesSection *riscv_attributes = nullptr; diff --git a/src/output-chunks.cc b/src/output-chunks.cc index b3f2645986..68fe942615 100644 --- a/src/output-chunks.cc +++ b/src/output-chunks.cc @@ -295,8 +295,8 @@ static std::vector> create_phdr(Context &ctx) { // Create a PT_ARM_EDXIDX if constexpr (is_arm32) - if (Chunk *chunk = find_chunk(ctx, SHT_ARM_EXIDX)) - define(PT_ARM_EXIDX, PF_R, chunk); + if (ctx.extra.exidx) + define(PT_ARM_EXIDX, PF_R, ctx.extra.exidx); // Create a PT_RISCV_ATTRIBUTES if constexpr (is_riscv) diff --git a/src/passes.cc b/src/passes.cc index 7b9c1270a3..4ffaf1186f 100644 --- a/src/passes.cc +++ b/src/passes.cc @@ -1662,9 +1662,6 @@ void copy_chunks(Context &ctx) { // undefined errors. report_undef_errors(ctx); - if constexpr (is_arm32) - fixup_arm_exidx_section(ctx); - // Zero-clear paddings between chunks auto zero = [&](Chunk *chunk, i64 next_start) { i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size;