diff --git a/elf/arch-loongarch.cc b/elf/arch-loongarch.cc index 0ef232b133..b121d44a75 100644 --- a/elf/arch-loongarch.cc +++ b/elf/arch-loongarch.cc @@ -13,7 +13,7 @@ // Speaking of the ISA, all instructions are 4 byte long and aligned to 4 // byte boundaries in LoongArch. It has 32 general-purpose registers. // Among these, $t0 - $t8 (aliases for $r12 - $r20) are temporary -// registers that we can use in our PLT and range extension thunks. +// registers that we can use in our PLT. // // Just like RISC-V, LoongArch supports section-shrinking relaxations. // That is, it allows linkers to rewrite certain instruction sequences to @@ -320,13 +320,10 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { check_branch(S + A - P, -(1 << 22), 1 << 22); write_d5k16(loc, (S + A - P) >> 2); break; - case R_LARCH_B26: { - i64 val = S + A - P; - if (val < -(1 << 27) || (1 << 27) <= val) - val = get_thunk_addr(i) + A - P; - write_d10k16(loc, val >> 2); + case R_LARCH_B26: + check_branch(S + A - P, -(1 << 27), 1 << 27); + write_d10k16(loc, (S + A - P) >> 2); break; - } case R_LARCH_ABS_LO12: write_k12(loc, S + A); break; @@ -826,30 +823,6 @@ void shrink_section(Context &ctx, InputSection &isec, bool use_rvc) { isec.sh_size -= delta; } -template <> -void Thunk::copy_buf(Context &ctx) { - constexpr ul32 insn[] = { - 0x1e00'000c, // pcaddu18i $t0, 0 - 0x4c00'0180, // jirl $zero, $t0, 0 - }; - - static_assert(E::thunk_size == sizeof(insn)); - - u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; - u64 P = output_section.shdr.sh_addr + offset; - - for (Symbol *sym : symbols) { - u64 S = sym->get_addr(ctx); - - memcpy(buf, insn, sizeof(insn)); - write_j20(buf, (S - P + 0x20000) >> 18); - write_k16(buf + 4, (S - P) >> 2); - - buf += sizeof(insn); - P += sizeof(insn); - } -} - } // namespace mold::elf #endif diff --git a/elf/elf.h b/elf/elf.h index f1530fb93c..09afc195f4 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -2271,8 +2271,6 @@ struct LOONGARCH64 { static constexpr u32 plt_hdr_size = 32; static constexpr u32 plt_size = 16; static constexpr u32 pltgot_size = 16; - static constexpr u32 thunk_hdr_size = 0; - static constexpr u32 thunk_size = 8; static constexpr u8 filler[] = { 0x00, 0x00, 0x2a, 0x00 }; // break 0 static constexpr u32 R_COPY = R_LARCH_COPY; @@ -2297,8 +2295,6 @@ struct LOONGARCH32 { static constexpr u32 plt_hdr_size = 32; static constexpr u32 plt_size = 16; static constexpr u32 pltgot_size = 16; - static constexpr u32 thunk_hdr_size = 0; - static constexpr u32 thunk_size = 8; static constexpr u8 filler[] = { 0x00, 0x00, 0x2a, 0x00 }; // break 0 static constexpr u32 R_COPY = R_LARCH_COPY; diff --git a/elf/mold.h b/elf/mold.h index 8bebc52daa..36bc321cee 100644 --- a/elf/mold.h +++ b/elf/mold.h @@ -232,22 +232,16 @@ struct FdeRecord { template struct InputSectionExtras {}; -template requires (needs_thunk && !is_loongarch) +template struct InputSectionExtras { std::vector thunk_refs; }; -template +template requires is_riscv || is_loongarch struct InputSectionExtras { std::vector r_deltas; }; -template -struct InputSectionExtras { - std::vector thunk_refs; - std::vector r_deltas; -}; - // InputSection represents a section in an input object file. template class __attribute__((aligned(4))) InputSection { diff --git a/elf/thunks.cc b/elf/thunks.cc index 0601ba80b8..a195a5ed2e 100644 --- a/elf/thunks.cc +++ b/elf/thunks.cc @@ -20,8 +20,7 @@ // we don't need to try too hard to reduce thunk size to the absolute // minimum. -#if MOLD_ARM32 || MOLD_ARM64 || MOLD_PPC32 || MOLD_PPC64V1 || MOLD_PPC64V2 || \ - MOLD_LOONGARCH64 || MOLD_LOONGARCH32 +#if MOLD_ARM32 || MOLD_ARM64 || MOLD_PPC32 || MOLD_PPC64V1 || MOLD_PPC64V2 #include "mold.h" @@ -39,9 +38,7 @@ static consteval i64 max_distance() { // and therefore the least two bits are always zero. So the branch // operand is effectively 28 bits long. That means the branch range is // [-2^27, 2^27) or PC ± 128 MiB. - // - // LoongArch's BR instruction also takes a 26 bit immediate. - if (is_arm64 || is_loongarch) + if (is_arm64) return 1 << 27; // ARM32's Thumb branch has 24 bits immediate, and the instructions are diff --git a/test/elf/range-extension-thunk.sh b/test/elf/range-extension-thunk.sh index 6cc979561a..68bc20df57 100755 --- a/test/elf/range-extension-thunk.sh +++ b/test/elf/range-extension-thunk.sh @@ -9,6 +9,11 @@ # It looks like SPARC's runtime can't handle PLT if it's too far from GOT. [ $MACHINE = sparc64 ] && skip +# Current LoongArch compilers emit BL for function calls, but I believe +# they'll emit PCADDU18I + JIRL (which is addressable PC ± 128 GiB) in the +# future. +[[ $MACHINE = loongarch* ]] && skip + # qemu aborts with the "Unknown exception 0x5" error, although this # test passes on a real POWER10 machine. on_qemu && [ "$CPU" = power10 ] && skip