Skip to content

Commit

Permalink
Update stub decoding for .NET 8 for disassemblers (#2416)
Browse files Browse the repository at this point in the history
* Update stub decoding for .NET 8 for disassemblers

The call counting stub, stub precode and fixup precode were modified in
.NET 8 to have larger size of the interleaved code / data blocks. The
stub decoder in the intel and arm64 disassemblers need to be updated to
take that into account.
  • Loading branch information
janvorli authored Aug 25, 2023
1 parent e0c667f commit d391085
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 59 deletions.
94 changes: 56 additions & 38 deletions src/BenchmarkDotNet/Disassemblers/Arm64Disassembler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -141,45 +141,63 @@ public void Feed(Arm64Instruction instruction)

internal class Arm64Disassembler : ClrMdV2Disassembler
{
// See dotnet/runtime src/coreclr/vm/arm64/thunktemplates.asm/.S for the stub code
// ldr x9, DATA_SLOT(CallCountingStub, RemainingCallCountCell)
// ldrh w10, [x9]
// subs w10, w10, #0x1
private static byte[] callCountingStubTemplate = new byte[12] { 0x09, 0x00, 0x00, 0x58, 0x2a, 0x01, 0x40, 0x79, 0x4a, 0x05, 0x00, 0x71 };
// ldr x10, DATA_SLOT(StubPrecode, Target)
// ldr x12, DATA_SLOT(StubPrecode, MethodDesc)
// br x10
private static byte[] stubPrecodeTemplate = new byte[12] { 0x4a, 0x00, 0x00, 0x58, 0xec, 0x00, 0x00, 0x58, 0x40, 0x01, 0x1f, 0xd6 };
// ldr x11, DATA_SLOT(FixupPrecode, Target)
// br x11
// ldr x12, DATA_SLOT(FixupPrecode, MethodDesc)
private static byte[] fixupPrecodeTemplate = new byte[12] { 0x0b, 0x00, 0x00, 0x58, 0x60, 0x01, 0x1f, 0xd6, 0x0c, 0x00, 0x00, 0x58 };

static Arm64Disassembler()
internal sealed class RuntimeSpecificData
{
// The stubs code depends on the current OS memory page size, so we need to update the templates to reflect that
int pageSizeShifted = Environment.SystemPageSize / 32;
// Calculate the ldr x9, #offset instruction with offset based on the page size
callCountingStubTemplate[1] = (byte)(pageSizeShifted & 0xff);
callCountingStubTemplate[2] = (byte)(pageSizeShifted >> 8);
// See dotnet/runtime src/coreclr/vm/arm64/thunktemplates.asm/.S for the stub code
// ldr x9, DATA_SLOT(CallCountingStub, RemainingCallCountCell)
// ldrh w10, [x9]
// subs w10, w10, #0x1
internal readonly byte[] callCountingStubTemplate = new byte[12] { 0x09, 0x00, 0x00, 0x58, 0x2a, 0x01, 0x40, 0x79, 0x4a, 0x05, 0x00, 0x71 };
// ldr x10, DATA_SLOT(StubPrecode, Target)
// ldr x12, DATA_SLOT(StubPrecode, MethodDesc)
// br x10
internal readonly byte[] stubPrecodeTemplate = new byte[12] { 0x4a, 0x00, 0x00, 0x58, 0xec, 0x00, 0x00, 0x58, 0x40, 0x01, 0x1f, 0xd6 };
// ldr x11, DATA_SLOT(FixupPrecode, Target)
// br x11
// ldr x12, DATA_SLOT(FixupPrecode, MethodDesc)
internal readonly byte[] fixupPrecodeTemplate = new byte[12] { 0x0b, 0x00, 0x00, 0x58, 0x60, 0x01, 0x1f, 0xd6, 0x0c, 0x00, 0x00, 0x58 };
internal readonly ulong stubPageSize;

internal RuntimeSpecificData(State state)
{
stubPageSize = (ulong)Environment.SystemPageSize;
if (state.RuntimeVersion.Major >= 8)
{
// In .NET 8, the stub page size was changed to min 16kB
stubPageSize = Math.Max(stubPageSize, 16384);
}

// The stubs code depends on the current OS memory page size, so we need to update the templates to reflect that
ulong pageSizeShifted = stubPageSize / 32;
// Calculate the ldr x9, #offset instruction with offset based on the page size
callCountingStubTemplate[1] = (byte)(pageSizeShifted & 0xff);
callCountingStubTemplate[2] = (byte)(pageSizeShifted >> 8);

// Calculate the ldr x10, #offset instruction with offset based on the page size
stubPrecodeTemplate[1] = (byte)(pageSizeShifted & 0xff);
stubPrecodeTemplate[2] = (byte)(pageSizeShifted >> 8);
// Calculate the ldr x12, #offset instruction with offset based on the page size
stubPrecodeTemplate[5] = (byte)((pageSizeShifted - 1) & 0xff);
stubPrecodeTemplate[6] = (byte)((pageSizeShifted - 1) >> 8);
// Calculate the ldr x10, #offset instruction with offset based on the page size
stubPrecodeTemplate[1] = (byte)(pageSizeShifted & 0xff);
stubPrecodeTemplate[2] = (byte)(pageSizeShifted >> 8);
// Calculate the ldr x12, #offset instruction with offset based on the page size
stubPrecodeTemplate[5] = (byte)((pageSizeShifted - 1) & 0xff);
stubPrecodeTemplate[6] = (byte)((pageSizeShifted - 1) >> 8);

// Calculate the ldr x11, #offset instruction with offset based on the page size
fixupPrecodeTemplate[1] = (byte)(pageSizeShifted & 0xff);
fixupPrecodeTemplate[2] = (byte)(pageSizeShifted >> 8);
// Calculate the ldr x12, #offset instruction with offset based on the page size
fixupPrecodeTemplate[9] = (byte)(pageSizeShifted & 0xff);
fixupPrecodeTemplate[10] = (byte)(pageSizeShifted >> 8);
// Calculate the ldr x11, #offset instruction with offset based on the page size
fixupPrecodeTemplate[1] = (byte)(pageSizeShifted & 0xff);
fixupPrecodeTemplate[2] = (byte)(pageSizeShifted >> 8);
// Calculate the ldr x12, #offset instruction with offset based on the page size
fixupPrecodeTemplate[9] = (byte)(pageSizeShifted & 0xff);
fixupPrecodeTemplate[10] = (byte)(pageSizeShifted >> 8);
}
}

private static readonly Dictionary<Version, RuntimeSpecificData> runtimeSpecificData = new ();

protected override IEnumerable<Asm> Decode(byte[] code, ulong startAddress, State state, int depth, ClrMethod currentMethod, DisassemblySyntax syntax)
{
if (!runtimeSpecificData.TryGetValue(state.RuntimeVersion, out RuntimeSpecificData data))
{
runtimeSpecificData.Add(state.RuntimeVersion, data = new RuntimeSpecificData(state));
}

const Arm64DisassembleMode disassembleMode = Arm64DisassembleMode.Arm;
using (CapstoneArm64Disassembler disassembler = CapstoneDisassembler.CreateArm64Disassembler(disassembleMode))
{
Expand Down Expand Up @@ -210,21 +228,21 @@ protected override IEnumerable<Asm> Decode(byte[] code, ulong startAddress, Stat

if (state.Runtime.DataTarget.DataReader.Read(address, buffer) == buffer.Length)
{
if (buffer.SequenceEqual(callCountingStubTemplate))
if (buffer.SequenceEqual(data.callCountingStubTemplate))
{
const ulong TargetMethodAddressSlotOffset = 8;
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + (ulong)Environment.SystemPageSize + TargetMethodAddressSlotOffset);
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + data.stubPageSize + TargetMethodAddressSlotOffset);
}
else if (buffer.SequenceEqual(stubPrecodeTemplate))
else if (buffer.SequenceEqual(data.stubPrecodeTemplate))
{
const ulong MethodDescSlotOffset = 0;
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + (ulong)Environment.SystemPageSize + MethodDescSlotOffset);
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + data.stubPageSize + MethodDescSlotOffset);
isPrestubMD = true;
}
else if (buffer.SequenceEqual(fixupPrecodeTemplate))
else if (buffer.SequenceEqual(data.fixupPrecodeTemplate))
{
const ulong MethodDescSlotOffset = 8;
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + (ulong)Environment.SystemPageSize + MethodDescSlotOffset);
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + data.stubPageSize + MethodDescSlotOffset);
isPrestubMD = true;
}
}
Expand Down
70 changes: 49 additions & 21 deletions src/BenchmarkDotNet/Disassemblers/IntelDisassembler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,52 @@ namespace BenchmarkDotNet.Disassemblers
{
internal class IntelDisassembler : ClrMdV2Disassembler
{
// See dotnet/runtime src/coreclr/vm/amd64/thunktemplates.asm/.S for the stub code
// mov rax,QWORD PTR [rip + DATA_SLOT(CallCountingStub, RemainingCallCountCell)]
// dec WORD PTR [rax]
// je LOCAL_LABEL(CountReachedZero)
// jmp QWORD PTR [rip + DATA_SLOT(CallCountingStub, TargetForMethod)]
// LOCAL_LABEL(CountReachedZero):
// jmp QWORD PTR [rip + DATA_SLOT(CallCountingStub, TargetForThresholdReached)]
private static byte[] callCountingStubTemplate = new byte[10] { 0x48, 0x8b, 0x05, 0xf9, 0x0f, 0x00, 0x00, 0x66, 0xff, 0x08 };
// mov r10, [rip + DATA_SLOT(StubPrecode, MethodDesc)]
// jmp [rip + DATA_SLOT(StubPrecode, Target)]
private static byte[] stubPrecodeTemplate = new byte[13] { 0x4c, 0x8b, 0x15, 0xf9, 0x0f, 0x00, 0x00, 0xff, 0x25, 0xfb, 0x0f, 0x00, 0x00 };
// jmp [rip + DATA_SLOT(FixupPrecode, Target)]
// mov r10, [rip + DATA_SLOT(FixupPrecode, MethodDesc)]
// jmp [rip + DATA_SLOT(FixupPrecode, PrecodeFixupThunk)]
private static byte[] fixupPrecodeTemplate = new byte[19] { 0xff, 0x25, 0xfa, 0x0f, 0x00, 0x00, 0x4c, 0x8b, 0x15, 0xfb, 0x0f, 0x00, 0x00, 0xff, 0x25, 0xfd, 0x0f, 0x00, 0x00 };
internal sealed class RuntimeSpecificData
{
// See dotnet/runtime src/coreclr/vm/amd64/thunktemplates.asm/.S for the stub code
// mov rax,QWORD PTR [rip + DATA_SLOT(CallCountingStub, RemainingCallCountCell)]
// dec WORD PTR [rax]
// je LOCAL_LABEL(CountReachedZero)
// jmp QWORD PTR [rip + DATA_SLOT(CallCountingStub, TargetForMethod)]
// LOCAL_LABEL(CountReachedZero):
// jmp QWORD PTR [rip + DATA_SLOT(CallCountingStub, TargetForThresholdReached)]
internal readonly byte[] callCountingStubTemplate = new byte[10] { 0x48, 0x8b, 0x05, 0xf9, 0x0f, 0x00, 0x00, 0x66, 0xff, 0x08 };
// mov r10, [rip + DATA_SLOT(StubPrecode, MethodDesc)]
// jmp [rip + DATA_SLOT(StubPrecode, Target)]
internal readonly byte[] stubPrecodeTemplate = new byte[13] { 0x4c, 0x8b, 0x15, 0xf9, 0x0f, 0x00, 0x00, 0xff, 0x25, 0xfb, 0x0f, 0x00, 0x00 };
// jmp [rip + DATA_SLOT(FixupPrecode, Target)]
// mov r10, [rip + DATA_SLOT(FixupPrecode, MethodDesc)]
// jmp [rip + DATA_SLOT(FixupPrecode, PrecodeFixupThunk)]
internal readonly byte[] fixupPrecodeTemplate = new byte[19] { 0xff, 0x25, 0xfa, 0x0f, 0x00, 0x00, 0x4c, 0x8b, 0x15, 0xfb, 0x0f, 0x00, 0x00, 0xff, 0x25, 0xfd, 0x0f, 0x00, 0x00 };
internal readonly ulong stubPageSize;

internal RuntimeSpecificData(State state)
{
stubPageSize = (ulong)Environment.SystemPageSize;
if (state.RuntimeVersion.Major >= 8)
{
// In .NET 8, the stub page size was changed to 16kB
stubPageSize = 16384;
// Update the templates so that the offsets are correct
callCountingStubTemplate[4] = 0x3f;
stubPrecodeTemplate[4] = 0x3f;
stubPrecodeTemplate[10] = 0x3f;
fixupPrecodeTemplate[3] = 0x3f;
fixupPrecodeTemplate[10] = 0x3f;
fixupPrecodeTemplate[16] = 0x3f;
}
}
}

private static readonly Dictionary<Version, RuntimeSpecificData> runtimeSpecificData = new ();

protected override IEnumerable<Asm> Decode(byte[] code, ulong startAddress, State state, int depth, ClrMethod currentMethod, DisassemblySyntax syntax)
{
if (!runtimeSpecificData.TryGetValue(state.RuntimeVersion, out RuntimeSpecificData data))
{
runtimeSpecificData.Add(state.RuntimeVersion, data = new RuntimeSpecificData(state));
}

var reader = new ByteArrayCodeReader(code);
var decoder = Decoder.Create(state.Runtime.DataTarget.DataReader.PointerSize * 8, reader);
decoder.IP = startAddress;
Expand All @@ -53,27 +81,27 @@ protected override IEnumerable<Asm> Decode(byte[] code, ulong startAddress, Stat

FlushCachedDataIfNeeded(state.Runtime.DataTarget.DataReader, address, buffer);

if (state.Runtime.DataTarget.DataReader.Read(address, buffer) == buffer.Length && buffer.SequenceEqual(callCountingStubTemplate))
if (state.Runtime.DataTarget.DataReader.Read(address, buffer) == buffer.Length && buffer.SequenceEqual(data.callCountingStubTemplate))
{
const ulong TargetMethodAddressSlotOffset = 8;
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + (ulong)Environment.SystemPageSize + TargetMethodAddressSlotOffset);
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + data.stubPageSize + TargetMethodAddressSlotOffset);
}
else
{
buffer = new byte[13];
if (state.Runtime.DataTarget.DataReader.Read(address, buffer) == buffer.Length && buffer.SequenceEqual(stubPrecodeTemplate))
if (state.Runtime.DataTarget.DataReader.Read(address, buffer) == buffer.Length && buffer.SequenceEqual(data.stubPrecodeTemplate))
{
const ulong MethodDescSlotOffset = 0;
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + (ulong)Environment.SystemPageSize + MethodDescSlotOffset);
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + data.stubPageSize + MethodDescSlotOffset);
isPrestubMD = true;
}
else
{
buffer = new byte[19];
if (state.Runtime.DataTarget.DataReader.Read(address, buffer) == buffer.Length && buffer.SequenceEqual(fixupPrecodeTemplate))
if (state.Runtime.DataTarget.DataReader.Read(address, buffer) == buffer.Length && buffer.SequenceEqual(data.fixupPrecodeTemplate))
{
const ulong MethodDescSlotOffset = 8;
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + (ulong)Environment.SystemPageSize + MethodDescSlotOffset);
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + data.stubPageSize + MethodDescSlotOffset);
isPrestubMD = true;
}

Expand Down

0 comments on commit d391085

Please sign in to comment.