Skip to content

Commit

Permalink
Merge .ARM.exidx entries
Browse files Browse the repository at this point in the history
This change should make our output for ARM32 a little bit smaller.
  • Loading branch information
rui314 committed Aug 15, 2024
1 parent fa54696 commit 742ea87
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 30 deletions.
88 changes: 63 additions & 25 deletions src/arch-arm32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,45 @@ u64 get_eflags(Context<E> &ctx) {
return EF_ARM_EABI_VER5;
}

void create_arm_exidx_section(Context<E> &ctx) {
for (i64 i = 0; i < ctx.chunks.size(); i++) {
if (ctx.chunks[i]->shdr.sh_type == SHT_ARM_EXIDX) {
auto *sec = new Arm32ExidxSection(ctx, *ctx.chunks[i]->to_osec());
ctx.extra.exidx = sec;
ctx.chunks[i] = sec;
ctx.chunk_pool.emplace_back(sec);
break;
}
}
}

Arm32ExidxSection::Arm32ExidxSection(Context<ARM32> &ctx,
OutputSection<ARM32> &osec)
: output_section(osec) {
this->name = ".ARM.exidx";
this->shdr.sh_type = SHT_ARM_EXIDX;
this->shdr.sh_flags = SHF_ALLOC;
this->shdr.sh_addralign = 4;
this->shdr.sh_size = get_contents(ctx).size();
this->sect_order = osec.sect_order;

for (InputSection<E> *isec : osec.members)
isec->is_alive = false;
}

void Arm32ExidxSection::update_shdr(Context<E> &ctx) {
// .ARM.exidx's sh_link should be set to the .text section index.
// Runtime doesn't care about it, but the binutils's strip command does.
if (Chunk<E> *chunk = find_chunk(ctx, ".text"))
this->shdr.sh_link = chunk->shndx;
}

void Arm32ExidxSection::copy_buf(Context<E> &ctx) {
std::vector<u8> contents = get_contents(ctx);
assert(this->shdr.sh_size = contents.size());
write_vector(ctx.buf + this->shdr.sh_offset, contents);
}

// ARM executables use an .ARM.exidx section to look up an exception
// handling record for the current instruction pointer. The table needs
// to be sorted by their addresses.
Expand All @@ -718,17 +757,12 @@ u64 get_eflags(Context<E> &ctx) {
// I don't know why only ARM uses the different mechanism, but it's
// likely that it's due to some historical reason.
//
// This function sorts .ARM.exidx records.
void fixup_arm_exidx_section(Context<E> &ctx) {
Timer t(ctx, "fixup_arm_exidx_section");

Chunk<E> *chunk = find_chunk(ctx, SHT_ARM_EXIDX);
if (!chunk)
return;
// This function returns contents of .ARM.exidx.
std::vector<u8> Arm32ExidxSection::get_contents(Context<E> &ctx) {
std::vector<u8> buf(output_section.shdr.sh_size);

OutputSection<E> *osec = chunk->to_osec();
if (!osec)
return;
output_section.shdr.sh_addr = this->shdr.sh_addr;
output_section.write_to(ctx, buf.data());

// .ARM.exidx records consists of a signed 31-bit relative address
// and a 32-bit value. The relative address indicates the start
Expand All @@ -742,24 +776,24 @@ void fixup_arm_exidx_section(Context<E> &ctx) {
//
// CANTUNWIND is value 1. The most significant bit is set in (2) but
// not in (3). So we can distinguished them just by looking at a value.
const u32 EXIDX_CANTUNWIND = 1;
const u32 CANTUNWIND = 1;

struct Entry {
ul32 addr;
ul32 val;
};

if (osec->shdr.sh_size % sizeof(Entry))
if (buf.size() % sizeof(Entry))
Fatal(ctx) << "invalid .ARM.exidx section size";

Entry *ent = (Entry *)(ctx.buf + osec->shdr.sh_offset);
i64 num_entries = osec->shdr.sh_size / sizeof(Entry);
Entry *ent = (Entry *)buf.data();
i64 num_entries = buf.size() / sizeof(Entry);

// Entry's addresses are relative to themselves. In order to sort
// records by addresses, we first translate them so that the addresses
// records by address, we first translate them so that the addresses
// are relative to the beginning of the section.
auto is_relative = [](u32 val) {
return val != EXIDX_CANTUNWIND && !(val & 0x8000'0000);
return val != CANTUNWIND && !(val & 0x8000'0000);
};

tbb::parallel_for((i64)0, num_entries, [&](i64 i) {
Expand All @@ -769,10 +803,21 @@ void fixup_arm_exidx_section(Context<E> &ctx) {
ent[i].val = 0x7fff'ffff & (ent[i].val + offset);
});

tbb::parallel_sort(ent, ent + num_entries, [](const Entry &a, const Entry &b) {
std::sort(ent, ent + num_entries, [](const Entry &a, const Entry &b) {
return a.addr < b.addr;
});

// Remove duplicate adjacent entries. That is, if two adjacent functions
// have the same compact unwind info or are both CANTUNWIND, we can
// merge them into a single range.
auto it = std::unique(ent, ent + num_entries,
[](const Entry &a, const Entry &b) {
return a.val == b.val;
});

num_entries = it - ent;
buf.resize(num_entries * sizeof(Entry));

// Make addresses relative to themselves.
tbb::parallel_for((i64)0, num_entries, [&](i64 i) {
i64 offset = sizeof(Entry) * i;
Expand All @@ -781,14 +826,7 @@ void fixup_arm_exidx_section(Context<E> &ctx) {
ent[i].val = 0x7fff'ffff & (ent[i].val - offset);
});

// .ARM.exidx's sh_link should be set to the .text section index.
// Runtime doesn't care about it, but the binutils's strip command does.
if (ctx.shdr) {
if (Chunk<E> *text = find_chunk(ctx, ".text")) {
osec->shdr.sh_link = text->shndx;
ctx.shdr->copy_buf(ctx);
}
}
return buf;
}

} // namespace mold
4 changes: 4 additions & 0 deletions src/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,10 @@ int mold_main(int argc, char **argv) {
if (ctx.arg.pack_dyn_relocs_relr)
construct_relr(ctx);

// Convert an .ARM.exidx to a synthetic section.
if constexpr (is_arm32<E>)
create_arm_exidx_section(ctx);

// Reserve a space for dynamic symbol strings in .dynstr and sort
// .dynsym contents if necessary. Beyond this point, no symbol will
// be added to .dynsym.
Expand Down
18 changes: 18 additions & 0 deletions src/mold.h
Original file line number Diff line number Diff line change
Expand Up @@ -1528,7 +1528,20 @@ void rewrite_endbr(Context<X86_64> &ctx);
// arch-arm32.cc
//

class Arm32ExidxSection : public Chunk<ARM32> {
public:
Arm32ExidxSection(Context<ARM32> &ctx, OutputSection<ARM32> &osec);
void update_shdr(Context<ARM32> &ctx) override;
void copy_buf(Context<ARM32> &ctx) override;

private:
std::vector<u8> get_contents(Context<ARM32> &ctx);

OutputSection<ARM32> &output_section;
};

template <> u64 get_eflags(Context<ARM32> &ctx);
void create_arm_exidx_section(Context<ARM32> &ctx);
void fixup_arm_exidx_section(Context<ARM32> &ctx);

//
Expand Down Expand Up @@ -1705,6 +1718,11 @@ struct SectionOrder {
template <typename E>
struct ContextExtras {};

template <>
struct ContextExtras<ARM32> {
Arm32ExidxSection *exidx = nullptr;
};

template <is_riscv E>
struct ContextExtras<E> {
RiscvAttributesSection<E> *riscv_attributes = nullptr;
Expand Down
4 changes: 2 additions & 2 deletions src/output-chunks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,8 @@ static std::vector<ElfPhdr<E>> create_phdr(Context<E> &ctx) {

// Create a PT_ARM_EDXIDX
if constexpr (is_arm32<E>)
if (Chunk<E> *chunk = find_chunk(ctx, SHT_ARM_EXIDX))
define(PT_ARM_EXIDX, PF_R, chunk);
if (ctx.extra.exidx)
define(PT_ARM_EXIDX, PF_R, ctx.extra.exidx);

// Create a PT_RISCV_ATTRIBUTES
if constexpr (is_riscv<E>)
Expand Down
3 changes: 0 additions & 3 deletions src/passes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1662,9 +1662,6 @@ void copy_chunks(Context<E> &ctx) {
// undefined errors.
report_undef_errors(ctx);

if constexpr (is_arm32<E>)
fixup_arm_exidx_section(ctx);

// Zero-clear paddings between chunks
auto zero = [&](Chunk<E> *chunk, i64 next_start) {
i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size;
Expand Down

0 comments on commit 742ea87

Please sign in to comment.