diff --git a/elf/arch-loongarch.cc b/elf/arch-loongarch.cc index 8e59d660f9..466eaf91eb 100644 --- a/elf/arch-loongarch.cc +++ b/elf/arch-loongarch.cc @@ -115,11 +115,11 @@ static void write_d10k16(u8 *loc, u32 val) { } static u32 get_rd(u32 insn) { - return insn & 0x1f; + return bits(insn, 4, 0); } static u32 get_rj(u32 insn) { - return (insn >> 5) & 0x1f; + return bits(insn, 9, 5); } static void set_rj(u8 *loc, u32 rj) { @@ -128,6 +128,30 @@ static void set_rj(u8 *loc, u32 rj) { *(ul32 *)loc |= rj << 5; } +// Returns true if isec's i'th relocation refers to the following +// relaxable instructioon pair. +// +// pcalau12i $t0, 0 # R_LARCH_GOT_PC_HI20 +// ld.d $t0, $t0, 0 # R_LARCH_GOT_PC_LO12 +static bool is_relaxable_got_load(Context &ctx, InputSection &isec, i64 i) { + std::span> rels = isec.get_rels(ctx); + Symbol &sym = *isec.file.symbols[rels[i].r_sym]; + + if (ctx.arg.relax && + sym.is_pcrel_linktime_const(ctx) && + i + 3 < rels.size() && + rels[i + 2].r_type == R_LARCH_GOT_PC_LO12 && + rels[i + 2].r_offset == rels[i].r_offset + 4 && + rels[i + 3].r_type == R_LARCH_RELAX) { + u32 insn1 = *(ul32 *)(isec.contents.data() + rels[i].r_offset); + u32 insn2 = *(ul32 *)(isec.contents.data() + rels[i].r_offset + 4); + bool is_ld_d = (insn2 & 0xffc0'0000) == 0x28c0'0000; + return get_rd(insn1) == get_rd(insn2) && get_rd(insn2) == get_rj(insn2) && + is_ld_d; + } + return false; +} + template <> void write_plt_header(Context &ctx, u8 *buf) { constexpr ul32 insn_64[] = { @@ -371,44 +395,36 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { write_k12(loc, GOT + G + A); break; case R_LARCH_GOT_PC_HI20: - switch (removed_bytes) { - // pcalau12i/ld.d has been relaxed to pcaddi, the first insn has been removed. - case 4: - // loc stores 'ld.d', rewrite ld.d with pcaddi - *(ul32 *)(loc) = 0x1800'0000 | get_rd(*(ul32 *)loc); - write_j20(loc, (S + A - P) >> 2); - i += 3; - break; - case 0: - if (ctx.arg.relax && - sym.is_pcrel_linktime_const(ctx) && - i + 3 < rels.size() && - rels[i + 1].r_type == R_LARCH_RELAX && - rels[i + 3].r_type == R_LARCH_RELAX && - rels[i + 2].r_type == R_LARCH_GOT_PC_LO12 && - rels[i + 2].r_offset == rel.r_offset + 4) { - u32 insn1 = *(ul32 *)(contents.data() + rel.r_offset); - u32 insn2 = *(ul32 *)(contents.data() + rels[i + 2].r_offset); - u32 rd = get_rd(insn1); - - if (rd == get_rd(insn2) && rd == get_rj(insn2)) { - // relax pcalau12i/ld.d to pcalau12i/addi.d - // reloc the pcalau12i as R_LARCH_PLACA_HI20 - write_j20(loc, hi20(S + A, P)); - - // rewrite the ld.d insn with addi.d insn - *(ul32 *)(loc + 4) = 0x02c00000 | rd | (rd << 5); - write_k12(loc + 4, S + rels[i + 2].r_addend); - i += 3; - break; - } + if (removed_bytes == 0) { + // If the PC-relative symbol address is known at link-time, we can + // rewrite the following GOT load + // + // pcalau12i $t0, 0 # R_LARCH_GOT_PC_HI20 + // ld.d $t0, $t0, 0 # R_LARCH_GOT_PC_LO12 + // + // with the following address materialization + // + // pcalau12i $t0, 0 + // addi.d $t0, $t0, 0 + if (is_relaxable_got_load(ctx, *this, i)) { + i64 dist = compute_distance(ctx, sym, *this, rel); + if (-(1LL << 31) <= dist && dist < (1LL << 31)) { + u32 rd = get_rd(*(ul32 *)loc); + *(ul32 *)(loc + 4) = 0x02c0'0000 | (rd << 5) | rd; // addi.d + + write_j20(loc, hi20(S + A, P)); + write_k12(loc + 4, S + A); + i += 3; + break; } - - // relax not applied. - write_j20(loc, hi20(GOT + G + A, P)); - break; - default: - unreachable(); + } + write_j20(loc, hi20(GOT + G + A, P)); + } else { + // Rewrite pcalau12i + ld.d with pcaddi + assert(removed_bytes == 4); + *(ul32 *)(loc) = 0x1800'0000 | get_rd(*(ul32 *)loc); // pcaddi + write_j20(loc, (S + A - P) >> 2); + i += 3; } break; case R_LARCH_GOT64_PC_LO20: @@ -860,34 +876,20 @@ void shrink_section(Context &ctx, InputSection &isec, bool use_rvc) { delta += 4; break; case R_LARCH_GOT_PC_HI20: - // The following two instructions are used to load a - // symbol value from the GOT + // The following two instructions are used to load a symbol address + // from the GOT. // // pcalau12i $t0, 0 # R_LARCH_GOT_PC_HI20 // ld.d $t0, $t0, 0 # R_LARCH_GOT_PC_LO12 // - // If the symbol is defined in the file current relocation belongs to, - // we can relax them to the following instructions and avoid memory load. + // If the PC-relative symbol address is known at link-time, we can + // relax them to the following instruction. // - // pcalau12i $t0, 0 - // addi.d $t0, $t0, 0 - if (ctx.arg.relax && - sym.is_pcrel_linktime_const(ctx) && - i + 3 < rels.size() && - rels[i + 2].r_type == R_LARCH_GOT_PC_LO12 && - rels[i + 2].r_offset == rels[i].r_offset + 4 && - rels[i + 3].r_type == R_LARCH_RELAX) { - u32 insn1 = *(ul32 *)(isec.contents.data() + rels[i].r_offset); - u32 insn2 = *(ul32 *)(isec.contents.data() + rels[i].r_offset + 4); - - // relax pcalau12i/ld.d to pcalau12i/addi.d - if (get_rd(insn1) != get_rd(insn2) || get_rd(insn2) != get_rj(insn2)) - continue; - - i64 dist = compute_distance(ctx, sym, isec, r); - // the second phase: relax pcalau12i/addi.d to pcaddi - if (dist % 4 == 0 && -(1 << 21) <= dist && dist < (1 << 21)) - delta += 4; + // pcaddi $t0, + if (is_relaxable_got_load(ctx, isec, i)) { + i64 dist = compute_distance(ctx, sym, isec, r); + if (dist % 4 == 0 && -(1 << 21) <= dist && dist < (1 << 21)) + delta += 4; } break; } diff --git a/test/elf/loongarch64_relax-got-load.sh b/test/elf/loongarch64_relax-got-load.sh new file mode 100755 index 0000000000..279fa8b5fd --- /dev/null +++ b/test/elf/loongarch64_relax-got-load.sh @@ -0,0 +1,33 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int get_foo(); +int main() { printf("%d\n", get_foo()); } +EOF + +$CC -B. -o $t/exe1 $t/a.o $t/b.o $t/c.o -pie -Wl,--no-relax +$QEMU $t/exe1 | grep -q '^3$' +$OBJDUMP -d $t/exe1 | grep -A2 ':' | grep -Fqw pcalau12i +$OBJDUMP -d $t/exe1 | grep -A2 ':' | grep -Fqw ld.d + +$CC -B. -o $t/exe2 $t/a.o $t/b.o $t/c.o -pie -Wl,--relax +$QEMU $t/exe2 | grep -q '^3$' +$OBJDUMP -d $t/exe2 | grep -A1 ':' | grep -Fqw pcaddi + +$CC -B. -o $t/exe3 $t/a.o $t/b.o $t/c.o -pie -Wl,--relax \ + -Wl,-Ttext=0x1000000,-Tdata=0x2000000 + +$QEMU $t/exe3 | grep -q '^3$' +$OBJDUMP -d $t/exe3 | grep -A2 ':' | grep -Fqw pcalau12i +$OBJDUMP -d $t/exe3 | grep -A2 ':' | grep -Fqw addi.d diff --git a/test/elf/loongarch64_relax-got.sh b/test/elf/loongarch64_relax-got.sh deleted file mode 100755 index 4e4df47741..0000000000 --- a/test/elf/loongarch64_relax-got.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -. $(dirname $0)/common.inc - -cat <<'EOF' | $CC -o $t/a.o -c -xassembler - -.globl get_sym -get_sym: - la.global $a0, sym - ld.w $a0, $a0, 0 - ret -EOF - -cat < - -int get_sym(); - -int main() { - printf("%x\n", get_sym()); -} -EOF - -$CC -B. -o $t/exe1 $t/a.o $t/b.o $t/c.o -Wl,--no-relax -$QEMU $t/exe1 | grep -Eq '^beef$' - -$CC -B. -o $t/exe2 $t/a.o $t/b.o $t/c.o -$QEMU $t/exe2 | grep -Eq '^beef$' - -$OBJDUMP -d $t/exe2 | grep -A2 ':' | grep -Eq $'pcaddi'