Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
rui314 committed Aug 5, 2024
1 parent 121f917 commit 1aa966c
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 97 deletions.
126 changes: 64 additions & 62 deletions elf/arch-loongarch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,11 @@ static void write_d10k16(u8 *loc, u32 val) {
}

static u32 get_rd(u32 insn) {
return insn & 0x1f;
return bits(insn, 4, 0);
}

static u32 get_rj(u32 insn) {
return (insn >> 5) & 0x1f;
return bits(insn, 9, 5);
}

static void set_rj(u8 *loc, u32 rj) {
Expand All @@ -128,6 +128,30 @@ static void set_rj(u8 *loc, u32 rj) {
*(ul32 *)loc |= rj << 5;
}

// Returns true if isec's i'th relocation refers to the following
// relaxable instructioon pair.
//
// pcalau12i $t0, 0 # R_LARCH_GOT_PC_HI20
// ld.d $t0, $t0, 0 # R_LARCH_GOT_PC_LO12
static bool is_relaxable_got_load(Context<E> &ctx, InputSection<E> &isec, i64 i) {
std::span<const ElfRel<E>> rels = isec.get_rels(ctx);
Symbol<E> &sym = *isec.file.symbols[rels[i].r_sym];

if (ctx.arg.relax &&
sym.is_pcrel_linktime_const(ctx) &&
i + 3 < rels.size() &&
rels[i + 2].r_type == R_LARCH_GOT_PC_LO12 &&
rels[i + 2].r_offset == rels[i].r_offset + 4 &&
rels[i + 3].r_type == R_LARCH_RELAX) {
u32 insn1 = *(ul32 *)(isec.contents.data() + rels[i].r_offset);
u32 insn2 = *(ul32 *)(isec.contents.data() + rels[i].r_offset + 4);
bool is_ld_d = (insn2 & 0xffc0'0000) == 0x28c0'0000;
return get_rd(insn1) == get_rd(insn2) && get_rd(insn2) == get_rj(insn2) &&
is_ld_d;
}
return false;
}

template <>
void write_plt_header<E>(Context<E> &ctx, u8 *buf) {
constexpr ul32 insn_64[] = {
Expand Down Expand Up @@ -371,44 +395,36 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
write_k12(loc, GOT + G + A);
break;
case R_LARCH_GOT_PC_HI20:
switch (removed_bytes) {
// pcalau12i/ld.d has been relaxed to pcaddi, the first insn has been removed.
case 4:
// loc stores 'ld.d', rewrite ld.d with pcaddi
*(ul32 *)(loc) = 0x1800'0000 | get_rd(*(ul32 *)loc);
write_j20(loc, (S + A - P) >> 2);
i += 3;
break;
case 0:
if (ctx.arg.relax &&
sym.is_pcrel_linktime_const(ctx) &&
i + 3 < rels.size() &&
rels[i + 1].r_type == R_LARCH_RELAX &&
rels[i + 3].r_type == R_LARCH_RELAX &&
rels[i + 2].r_type == R_LARCH_GOT_PC_LO12 &&
rels[i + 2].r_offset == rel.r_offset + 4) {
u32 insn1 = *(ul32 *)(contents.data() + rel.r_offset);
u32 insn2 = *(ul32 *)(contents.data() + rels[i + 2].r_offset);
u32 rd = get_rd(insn1);

if (rd == get_rd(insn2) && rd == get_rj(insn2)) {
// relax pcalau12i/ld.d to pcalau12i/addi.d
// reloc the pcalau12i as R_LARCH_PLACA_HI20
write_j20(loc, hi20(S + A, P));

// rewrite the ld.d insn with addi.d insn
*(ul32 *)(loc + 4) = 0x02c00000 | rd | (rd << 5);
write_k12(loc + 4, S + rels[i + 2].r_addend);
i += 3;
break;
}
if (removed_bytes == 0) {
// If the PC-relative symbol address is known at link-time, we can
// rewrite the following GOT load
//
// pcalau12i $t0, 0 # R_LARCH_GOT_PC_HI20
// ld.d $t0, $t0, 0 # R_LARCH_GOT_PC_LO12
//
// with the following address materialization
//
// pcalau12i $t0, 0
// addi.d $t0, $t0, 0
if (is_relaxable_got_load(ctx, *this, i)) {
i64 dist = compute_distance(ctx, sym, *this, rel);
if (-(1LL << 31) <= dist && dist < (1LL << 31)) {
u32 rd = get_rd(*(ul32 *)loc);
*(ul32 *)(loc + 4) = 0x02c0'0000 | (rd << 5) | rd; // addi.d

write_j20(loc, hi20(S + A, P));
write_k12(loc + 4, S + A);
i += 3;
break;
}

// relax not applied.
write_j20(loc, hi20(GOT + G + A, P));
break;
default:
unreachable();
}
write_j20(loc, hi20(GOT + G + A, P));
} else {
// Rewrite pcalau12i + ld.d with pcaddi
assert(removed_bytes == 4);
*(ul32 *)(loc) = 0x1800'0000 | get_rd(*(ul32 *)loc); // pcaddi
write_j20(loc, (S + A - P) >> 2);
i += 3;
}
break;
case R_LARCH_GOT64_PC_LO20:
Expand Down Expand Up @@ -860,34 +876,20 @@ void shrink_section(Context<E> &ctx, InputSection<E> &isec, bool use_rvc) {
delta += 4;
break;
case R_LARCH_GOT_PC_HI20:
// The following two instructions are used to load a
// symbol value from the GOT
// The following two instructions are used to load a symbol address
// from the GOT.
//
// pcalau12i $t0, 0 # R_LARCH_GOT_PC_HI20
// ld.d $t0, $t0, 0 # R_LARCH_GOT_PC_LO12
//
// If the symbol is defined in the file current relocation belongs to,
// we can relax them to the following instructions and avoid memory load.
// If the PC-relative symbol address is known at link-time, we can
// relax them to the following instruction.
//
// pcalau12i $t0, 0
// addi.d $t0, $t0, 0
if (ctx.arg.relax &&
sym.is_pcrel_linktime_const(ctx) &&
i + 3 < rels.size() &&
rels[i + 2].r_type == R_LARCH_GOT_PC_LO12 &&
rels[i + 2].r_offset == rels[i].r_offset + 4 &&
rels[i + 3].r_type == R_LARCH_RELAX) {
u32 insn1 = *(ul32 *)(isec.contents.data() + rels[i].r_offset);
u32 insn2 = *(ul32 *)(isec.contents.data() + rels[i].r_offset + 4);

// relax pcalau12i/ld.d to pcalau12i/addi.d
if (get_rd(insn1) != get_rd(insn2) || get_rd(insn2) != get_rj(insn2))
continue;

i64 dist = compute_distance(ctx, sym, isec, r);
// the second phase: relax pcalau12i/addi.d to pcaddi
if (dist % 4 == 0 && -(1 << 21) <= dist && dist < (1 << 21))
delta += 4;
// pcaddi $t0, <offset>
if (is_relaxable_got_load(ctx, isec, i)) {
i64 dist = compute_distance(ctx, sym, isec, r);
if (dist % 4 == 0 && -(1 << 21) <= dist && dist < (1 << 21))
delta += 4;
}
break;
}
Expand Down
33 changes: 33 additions & 0 deletions test/elf/loongarch64_relax-got-load.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash
. $(dirname $0)/common.inc

cat <<EOF | $CC -o $t/a.o -c -xc - -fPIC
int foo = 3;
EOF

cat <<EOF | $CC -o $t/b.o -c -xc - -fPIC -O
extern int foo;
int get_foo() { return foo; }
EOF

cat <<EOF | $CC -o $t/c.o -c -xc - -fPIC
#include <stdio.h>
int get_foo();
int main() { printf("%d\n", get_foo()); }
EOF

$CC -B. -o $t/exe1 $t/a.o $t/b.o $t/c.o -pie -Wl,--no-relax
$QEMU $t/exe1 | grep -q '^3$'
$OBJDUMP -d $t/exe1 | grep -A2 '<get_foo>:' | grep -Fqw pcalau12i
$OBJDUMP -d $t/exe1 | grep -A2 '<get_foo>:' | grep -Fqw ld.d

$CC -B. -o $t/exe2 $t/a.o $t/b.o $t/c.o -pie -Wl,--relax
$QEMU $t/exe2 | grep -q '^3$'
$OBJDUMP -d $t/exe2 | grep -A1 '<get_foo>:' | grep -Fqw pcaddi

$CC -B. -o $t/exe3 $t/a.o $t/b.o $t/c.o -pie -Wl,--relax \
-Wl,-Ttext=0x1000000,-Tdata=0x2000000

$QEMU $t/exe3 | grep -q '^3$'
$OBJDUMP -d $t/exe3 | grep -A2 '<get_foo>:' | grep -Fqw pcalau12i
$OBJDUMP -d $t/exe3 | grep -A2 '<get_foo>:' | grep -Fqw addi.d
35 changes: 0 additions & 35 deletions test/elf/loongarch64_relax-got.sh

This file was deleted.

0 comments on commit 1aa966c

Please sign in to comment.