From 7be1b66791e2a21b8c8f516f8d622ab3617292c0 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Fri, 20 Oct 2023 09:13:11 +0900 Subject: [PATCH] Fix range extension thunks Fixes https://github.com/rui314/mold/issues/1132 --- elf/thunks.cc | 51 +++++++++++------------ test/elf/arm_range-extension-thunk2.sh | 57 ++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 26 deletions(-) create mode 100755 test/elf/arm_range-extension-thunk2.sh diff --git a/elf/thunks.cc b/elf/thunks.cc index 77207acd4f..5ee6680f71 100644 --- a/elf/thunks.cc +++ b/elf/thunks.cc @@ -170,17 +170,17 @@ static void scan_rels(Context &ctx, InputSection &isec, template <> void OutputSection::create_range_extension_thunks(Context &ctx) { + using Thunk = RangeExtensionThunk; + std::span *> m = members; if (m.empty()) return; - m[0]->offset = 0; - // Initialize input sections with a dummy offset so that we can // distinguish sections that have got an address with the one who // haven't. - tbb::parallel_for((i64)1, (i64)m.size(), [&](i64 i) { - m[i]->offset = -1; + tbb::parallel_for_each(m, [](InputSection *isec) { + isec->offset = -1; }); // We create thunks from the beginning of the section to the end. @@ -190,7 +190,7 @@ void OutputSection::create_range_extension_thunks(Context &ctx) { // Input sections between B and C are in the current batch. // // A is the input section with the smallest address than can reach - // anywhere from the current batch. + // from the current batch. // // D is the input section with the largest address such that the thunk // is reachable from the current batch if it's inserted right before D. @@ -198,9 +198,9 @@ void OutputSection::create_range_extension_thunks(Context &ctx) { // ................................ ............ // A B C D // ^ We insert a thunk for the current batch just before D - // <---> The current batch, which is smaller than batch_size - // <--------> Smaller than max_distance - // <--------> Smaller than max_distance + // <---> The current batch, which is smaller than BATCH_SIZE + // <--------> Smaller than MAX_DISTANCE + // <--------> Smaller than MAX_DISTANCE // <-------------> Reachable from the current batch i64 a = 0; i64 b = 0; @@ -212,41 +212,40 @@ void OutputSection::create_range_extension_thunks(Context &ctx) { i64 t = 0; while (b < m.size()) { + // Move D foward as far as we can jump from B to a thunk at D. + auto d_thunk_end = [&] { + u64 d_end = align_to(offset, 1 << m[d]->p2align) + m[d]->sh_size; + return align_to(d_end, Thunk::alignment) + max_thunk_size; + }; + + while (d < m.size() && + (b == d || d_thunk_end() <= m[b]->offset + max_distance())) { + offset = align_to(offset, 1 << m[d]->p2align); + m[d]->offset = offset; + offset += m[d]->sh_size; + d++; + } + // Move C forward so that C is apart from B by BATCH_SIZE. We want // to make sure that there's at least one section between B and C // to ensure progress. c = b + 1; - while (c < m.size() && - m[c]->offset + m[c]->sh_size < m[b]->offset + batch_size) + while (c < d && m[c]->offset + m[c]->sh_size < m[b]->offset + batch_size) c++; - // Move D foward as far as we can jump from B to anywhere in a thunk at D. - d = c; - while (d < m.size() && - align_to(offset, 1 << m[d]->p2align) + m[d]->sh_size + max_thunk_size < - m[b]->offset + max_distance()) - d++; - // Move A forward so that A is reachable from C. i64 c_offset = (c == m.size()) ? offset : m[c]->offset; while (a < b && a < m.size() && m[a]->offset + max_distance() < c_offset) a++; - // Assign offsets to all sections before D. - for (i64 i = b; i < d; i++) { - offset = align_to(offset, 1 << m[i]->p2align); - m[i]->offset = offset; - offset += m[i]->sh_size; - } - // Erase references to out-of-range thunks. while (t < thunks.size() && thunks[t]->offset < m[a]->offset) reset_thunk(*thunks[t++]); // Create a new thunk and place it at D. - offset = align_to(offset, RangeExtensionThunk::alignment); + offset = align_to(offset, Thunk::alignment); i64 thunk_idx = thunks.size(); - RangeExtensionThunk *thunk = new RangeExtensionThunk(*this, offset); + Thunk *thunk = new Thunk(*this, offset); thunks.emplace_back(thunk); // Scan relocations between B and C to collect symbols that need diff --git a/test/elf/arm_range-extension-thunk2.sh b/test/elf/arm_range-extension-thunk2.sh new file mode 100755 index 0000000000..8d8634132e --- /dev/null +++ b/test/elf/arm_range-extension-thunk2.sh @@ -0,0 +1,57 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +void f0(); +void f1(); +void f2(); +void f3(); +void f4(); +void f5(); +void f6(); +void f7(); +void f8(); +void f9(); +void f10(); +void f11(); +void f12(); +void f13(); +void f14(); +void f15(); +void f16(); +void f17(); +void f18(); +void f19(); + +__attribute__((aligned(1024*1024))) void f0(int x) { printf("0 "); if (!x) f9(); } +__attribute__((aligned(1024*1024))) void f1(int x) { printf("1 "); f8(x); } +__attribute__((aligned(1024*1024))) void f2(int x) { printf("2 "); f7(x); } +__attribute__((aligned(1024*1024))) void f3(int x) { printf("3 "); f6(x); } +__attribute__((aligned(1024*1024))) void f4(int x) { printf("4 "); f5(x); } +__attribute__((aligned(1024*1024))) void f5(int x) { printf("5 "); f10(x); } +__attribute__((aligned(1024*1024))) void f6(int x) { printf("6 "); f4(x); } +__attribute__((aligned(1024*1024))) void f7(int x) { printf("7 "); f3(x); } +__attribute__((aligned(1024*1024))) void f8(int x) { printf("8 "); f2(x); } +__attribute__((aligned(1024*1024))) void f9(int x) { printf("9 "); f1(x); } + +__attribute__((aligned(8*1024*1024))) void f10(int x) { printf("10 "); f19(x); } +__attribute__((aligned(8*1024*1024))) void f11(int x) { printf("11 "); f18(x); } +__attribute__((aligned(8*1024*1024))) void f12(int x) { printf("12 "); f17(x); } +__attribute__((aligned(8*1024*1024))) void f13(int x) { printf("13 "); f16(x); } +__attribute__((aligned(8*1024*1024))) void f14(int x) { printf("14 "); f15(x); } +__attribute__((aligned(8*1024*1024))) void f15(int x) { printf("15 "); f0(x + 1); } +__attribute__((aligned(8*1024*1024))) void f16(int x) { printf("16 "); f14(x); } +__attribute__((aligned(8*1024*1024))) void f17(int x) { printf("17 "); f13(x); } +__attribute__((aligned(8*1024*1024))) void f18(int x) { printf("18 "); f12(x); } +__attribute__((aligned(8*1024*1024))) void f19(int x) { printf("19 "); f11(x); } + +int main() { + f0(0); + printf("\n"); +} +EOF + +$CC -B. -o $t/exe $t/a.o +$QEMU $t/exe | grep -Eq '^0 9 1 8 2 7 3 6 4 5 10 19 11 18 12 17 13 16 14 15 0 $'