From 47c092aac0dc64d3addfc4819212ecd5a93a8c1f Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Wed, 31 Jul 2024 19:17:13 +0900 Subject: [PATCH] Remove range extension support from LoongArch It looks like emitting PCADDU18I + JIRL instead of just BL for function calls is a way to go, so we don't want to bother to maintain range extension thunks for the psABI. Since no other linkers support range extension thunks for LoongArch other than us, removing the support should be OK. --- elf/arch-loongarch.cc | 35 ++++--------------------------- elf/elf.h | 4 ---- elf/mold.h | 10 ++------- elf/thunks.cc | 7 ++----- test/elf/range-extension-thunk.sh | 5 +++++ 5 files changed, 13 insertions(+), 48 deletions(-) diff --git a/elf/arch-loongarch.cc b/elf/arch-loongarch.cc index 0ef232b133..b121d44a75 100644 --- a/elf/arch-loongarch.cc +++ b/elf/arch-loongarch.cc @@ -13,7 +13,7 @@ // Speaking of the ISA, all instructions are 4 byte long and aligned to 4 // byte boundaries in LoongArch. It has 32 general-purpose registers. // Among these, $t0 - $t8 (aliases for $r12 - $r20) are temporary -// registers that we can use in our PLT and range extension thunks. +// registers that we can use in our PLT. // // Just like RISC-V, LoongArch supports section-shrinking relaxations. // That is, it allows linkers to rewrite certain instruction sequences to @@ -320,13 +320,10 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { check_branch(S + A - P, -(1 << 22), 1 << 22); write_d5k16(loc, (S + A - P) >> 2); break; - case R_LARCH_B26: { - i64 val = S + A - P; - if (val < -(1 << 27) || (1 << 27) <= val) - val = get_thunk_addr(i) + A - P; - write_d10k16(loc, val >> 2); + case R_LARCH_B26: + check_branch(S + A - P, -(1 << 27), 1 << 27); + write_d10k16(loc, (S + A - P) >> 2); break; - } case R_LARCH_ABS_LO12: write_k12(loc, S + A); break; @@ -826,30 +823,6 @@ void shrink_section(Context &ctx, InputSection &isec, bool use_rvc) { isec.sh_size -= delta; } -template <> -void Thunk::copy_buf(Context &ctx) { - constexpr ul32 insn[] = { - 0x1e00'000c, // pcaddu18i $t0, 0 - 0x4c00'0180, // jirl $zero, $t0, 0 - }; - - static_assert(E::thunk_size == sizeof(insn)); - - u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; - u64 P = output_section.shdr.sh_addr + offset; - - for (Symbol *sym : symbols) { - u64 S = sym->get_addr(ctx); - - memcpy(buf, insn, sizeof(insn)); - write_j20(buf, (S - P + 0x20000) >> 18); - write_k16(buf + 4, (S - P) >> 2); - - buf += sizeof(insn); - P += sizeof(insn); - } -} - } // namespace mold::elf #endif diff --git a/elf/elf.h b/elf/elf.h index f1530fb93c..09afc195f4 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -2271,8 +2271,6 @@ struct LOONGARCH64 { static constexpr u32 plt_hdr_size = 32; static constexpr u32 plt_size = 16; static constexpr u32 pltgot_size = 16; - static constexpr u32 thunk_hdr_size = 0; - static constexpr u32 thunk_size = 8; static constexpr u8 filler[] = { 0x00, 0x00, 0x2a, 0x00 }; // break 0 static constexpr u32 R_COPY = R_LARCH_COPY; @@ -2297,8 +2295,6 @@ struct LOONGARCH32 { static constexpr u32 plt_hdr_size = 32; static constexpr u32 plt_size = 16; static constexpr u32 pltgot_size = 16; - static constexpr u32 thunk_hdr_size = 0; - static constexpr u32 thunk_size = 8; static constexpr u8 filler[] = { 0x00, 0x00, 0x2a, 0x00 }; // break 0 static constexpr u32 R_COPY = R_LARCH_COPY; diff --git a/elf/mold.h b/elf/mold.h index 8bebc52daa..36bc321cee 100644 --- a/elf/mold.h +++ b/elf/mold.h @@ -232,22 +232,16 @@ struct FdeRecord { template struct InputSectionExtras {}; -template requires (needs_thunk && !is_loongarch) +template struct InputSectionExtras { std::vector thunk_refs; }; -template +template requires is_riscv || is_loongarch struct InputSectionExtras { std::vector r_deltas; }; -template -struct InputSectionExtras { - std::vector thunk_refs; - std::vector r_deltas; -}; - // InputSection represents a section in an input object file. template class __attribute__((aligned(4))) InputSection { diff --git a/elf/thunks.cc b/elf/thunks.cc index 0601ba80b8..a195a5ed2e 100644 --- a/elf/thunks.cc +++ b/elf/thunks.cc @@ -20,8 +20,7 @@ // we don't need to try too hard to reduce thunk size to the absolute // minimum. -#if MOLD_ARM32 || MOLD_ARM64 || MOLD_PPC32 || MOLD_PPC64V1 || MOLD_PPC64V2 || \ - MOLD_LOONGARCH64 || MOLD_LOONGARCH32 +#if MOLD_ARM32 || MOLD_ARM64 || MOLD_PPC32 || MOLD_PPC64V1 || MOLD_PPC64V2 #include "mold.h" @@ -39,9 +38,7 @@ static consteval i64 max_distance() { // and therefore the least two bits are always zero. So the branch // operand is effectively 28 bits long. That means the branch range is // [-2^27, 2^27) or PC ± 128 MiB. - // - // LoongArch's BR instruction also takes a 26 bit immediate. - if (is_arm64 || is_loongarch) + if (is_arm64) return 1 << 27; // ARM32's Thumb branch has 24 bits immediate, and the instructions are diff --git a/test/elf/range-extension-thunk.sh b/test/elf/range-extension-thunk.sh index 6cc979561a..68bc20df57 100755 --- a/test/elf/range-extension-thunk.sh +++ b/test/elf/range-extension-thunk.sh @@ -9,6 +9,11 @@ # It looks like SPARC's runtime can't handle PLT if it's too far from GOT. [ $MACHINE = sparc64 ] && skip +# Current LoongArch compilers emit BL for function calls, but I believe +# they'll emit PCADDU18I + JIRL (which is addressable PC ± 128 GiB) in the +# future. +[[ $MACHINE = loongarch* ]] && skip + # qemu aborts with the "Unknown exception 0x5" error, although this # test passes on a real POWER10 machine. on_qemu && [ "$CPU" = power10 ] && skip