Skip to content

Commit

Permalink
hashtest: add function to encode arbitrary test instructions
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 651506548
  • Loading branch information
ncbray authored and copybara-github committed Jul 11, 2024
1 parent 8389ccc commit b29ead0
Show file tree
Hide file tree
Showing 3 changed files with 324 additions and 0 deletions.
2 changes: 2 additions & 0 deletions fuzzer/hashtest/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ cc_library(
"debugging.cc",
"prefilter.cc",
"register_info.cc",
"synthesize_instruction.cc",
"xed_operand_util.cc",
],
hdrs = [
Expand All @@ -31,6 +32,7 @@ cc_library(
"prefilter.h",
"rand_util.h",
"register_info.h",
"synthesize_instruction.h",
"xed_operand_util.h",
],
deps = [
Expand Down
260 changes: 260 additions & 0 deletions fuzzer/hashtest/synthesize_instruction.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
// Copyright 2024 The Silifuzz Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "./fuzzer/hashtest/synthesize_instruction.h"

#include <bitset>
#include <cstddef>
#include <cstdint>
#include <random>
#include <utility>
#include <vector>

#include "./fuzzer/hashtest/candidate.h"
#include "./fuzzer/hashtest/debugging.h"
#include "./fuzzer/hashtest/rand_util.h"
#include "./fuzzer/hashtest/register_info.h"
#include "./fuzzer/hashtest/xed_operand_util.h"
#include "./instruction/xed_util.h"
#include "./util/checks.h"

namespace silifuzz {

namespace {

// Internal helper for PrepareFixedRegisters.
template <size_t N>
void PrepareFixedRegisters(RegisterBank bank, const std::bitset<N>& read,
const std::bitset<N>& written, std::bitset<N>& tmp,
std::vector<RegisterID>& needs_init,
std::vector<unsigned int>& is_written) {
for (size_t i = 0; i < N; i++) {
if (read.test(i) || written.test(i)) {
// Validate the register is in the tmp set.
CHECK(tmp.test(i));
tmp[i] = false;
}
if (read.test(i)) {
needs_init.push_back(
RegisterID{.bank = bank, .index = static_cast<unsigned int>(i)});
}
if (written.test(i)) {
is_written.push_back(i);
}
}
}

// Remove the fixed registers from the temp set and generate lists of fixed
// registers that may be read and written by the instruction.
void PrepareFixedRegisters(const RegisterReadWrite& fixed_reg,
RegisterMask& tmp,
std::vector<RegisterID>& needs_init,
std::vector<unsigned int>& is_written) {
PrepareFixedRegisters(RegisterBank::kGP, fixed_reg.read.gp,
fixed_reg.written.gp, tmp.gp, needs_init, is_written);
PrepareFixedRegisters(RegisterBank::kVec, fixed_reg.read.vec,
fixed_reg.written.vec, tmp.vec, needs_init, is_written);
PrepareFixedRegisters(RegisterBank::kMask, fixed_reg.read.mask,
fixed_reg.written.mask, tmp.mask, needs_init,
is_written);
PrepareFixedRegisters(RegisterBank::kMMX, fixed_reg.read.mmx,
fixed_reg.written.mmx, tmp.mmx, needs_init, is_written);
}

template <size_t N>
unsigned int HandleOperand(RegisterBank bank, Rng& rng, std::bitset<N>& tmp,
std::bitset<N>& entropy, bool read, bool written,
std::vector<RegisterID>& needs_init,
std::vector<unsigned int>& is_written) {
if (written) {
// Write to temp registers.
unsigned int index = PopRandomBit(rng, tmp);
if (read) {
needs_init.push_back(RegisterID{.bank = bank, .index = index});
}
is_written.push_back(index);
return index;
} else {
// Read directly from entropy.
return PopRandomBit(rng, entropy);
}
}

} // namespace

// TODO(ncbray): support "high byte" iforms. Unfortunately these can only target
// AH, BH, CH, and DH so this looks very similar to supporting fixed registers.
xed_encoder_operand_t GPRegOperand(unsigned int index, size_t width) {
return xed_reg(RegisterIDToXedReg(
RegisterID{.bank = RegisterBank::kGP, .index = index}, width));
}

xed_encoder_operand_t VecRegOperand(unsigned int index, size_t width) {
return xed_reg(RegisterIDToXedReg(
RegisterID{.bank = RegisterBank::kVec, .index = index}, width));
}

xed_encoder_operand_t MaskRegOperand(unsigned int index) {
CHECK_LT(index, 8);
return xed_reg(static_cast<xed_reg_enum_t>(XED_REG_MASK_FIRST + index));
}

xed_encoder_operand_t MMXRegOperand(unsigned int index) {
CHECK_LT(index, 8);
return xed_reg(static_cast<xed_reg_enum_t>(XED_REG_MMX_FIRST + index));
}

[[nodiscard]] bool SynthesizeTestInstruction(
const InstructionCandidate& candidate, RegisterPool& rpool, Rng& rng,
unsigned int effective_op_width, std::vector<RegisterID>& needs_init,
std::vector<unsigned int>& reg_is_written, uint8_t* ibuf,
size_t& ibuf_len) {
const RegisterBank mode = candidate.OutputMode();
const xed_inst_t* instruction = candidate.instruction;

// Use a writemask ~1/3rd of the time.
// On one hand we want to test writemasks, on the other hand they
// discard output bits.
bool use_writemask =
candidate.writemask && std::bernoulli_distribution(0.333)(rng);
// Zero half the time.
// Note: it appears that masked writes to mask registers must always be
// zeroing, although the disassembly doesn't include a {z}.
bool zero_writemask =
use_writemask &&
(std::bernoulli_distribution(0.5)(rng) || mode == RegisterBank::kMask);

// Remove the fixed registers from the tmp bitmask.
PrepareFixedRegisters(candidate.fixed_reg, rpool.tmp, needs_init,
reg_is_written);

InstructionBuilder builder(xed_inst_iclass(instruction), effective_op_width);

// Generate each operand
for (size_t operand_index = 0;
operand_index < xed_inst_noperands(instruction); ++operand_index) {
const xed_operand_t* const operand =
xed_inst_operand(instruction, operand_index);

if (OperandIsRegister(operand)) {
bool written = xed_operand_written(operand);
bool read = xed_operand_read(operand) ||
xed_operand_conditional_write(operand) ||
(written && use_writemask && !zero_writemask);

if (OperandIsExplicit(operand)) {
xed_encoder_operand_t op = {.type = XED_ENCODER_OPERAND_TYPE_INVALID};

if (OperandIsGPRegister(operand)) {
// Explicit general purpose register.
CHECK(!written || mode == RegisterBank::kGP);
unsigned int index = HandleOperand(
RegisterBank::kGP, rng, rpool.tmp.gp, rpool.entropy.gp, read,
written, needs_init, reg_is_written);
op =
GPRegOperand(index, OperandBitWidth(operand, effective_op_width));
} else if (OperandIsVectorRegister(operand)) {
// Explicit vector register.
CHECK(!written || mode == RegisterBank::kVec);
unsigned int index = HandleOperand(
RegisterBank::kVec, rng, rpool.tmp.vec, rpool.entropy.vec, read,
written, needs_init, reg_is_written);
op = VecRegOperand(index, VectorWidth(operand));
} else if (OperandIsMaskRegister(operand)) {
// Explicit mask register.
if (OperandIsWritemask(operand)) {
CHECK(!written);
if (use_writemask) {
unsigned int index = PopRandomBit(rng, rpool.entropy.mask);
op = MaskRegOperand(index);
} else {
// When k0 is used as writemask, this means "ignore writemask".
op = MaskRegOperand(0);
}
} else {
CHECK(!written || mode == RegisterBank::kMask);
unsigned int index = HandleOperand(
RegisterBank::kMask, rng, rpool.tmp.mask, rpool.entropy.mask,
read, written, needs_init, reg_is_written);
op = MaskRegOperand(index);
}
} else if (OperandIsMMXRegister(operand)) {
// Explicit MMX register.
CHECK(!written || mode == RegisterBank::kMMX);
unsigned int index = HandleOperand(
RegisterBank::kMMX, rng, rpool.tmp.mmx, rpool.entropy.mmx, read,
written, needs_init, reg_is_written);
op = MMXRegOperand(index);
} else {
DieBecauseOperand(instruction, operand);
}
builder.AddOperands(std::move(op));
} else if (OperandIsFlagRegister(operand)) {
// Nothing needed, this is XED explicitly annotating flag access.
CHECK(OperandIsSuppressed(operand));
} else if (OperandIsImplicit(operand) || OperandIsSuppressed(operand)) {
// Note: we're handling suppressed operands here so that we can validate
// they are not anything unexpected. They will not affect the encoded
// instruction, supressed operands cannot be affected in any way.
xed_encoder_operand_t op = {.type = XED_ENCODER_OPERAND_TYPE_INVALID};
if (xed_operand_type(operand) == XED_OPERAND_TYPE_REG) {
// A fixed register.
op = xed_reg(xed_operand_reg(operand));
} else if (xed_operand_type(operand) == XED_OPERAND_TYPE_NT_LOOKUP_FN) {
// A fixed GP register of variable width.
xed_nonterminal_enum_t name = xed_operand_nonterminal_name(operand);
RegisterID reg_id = XedNonterminalToRegisterID(name);
CHECK(reg_id.bank == RegisterBank::kGP);
op = GPRegOperand(reg_id.index,
OperandBitWidth(operand, effective_op_width));
} else {
DieBecauseOperand(instruction, operand);
}
// Implicit operands must be emitted, suppressed must not.
if (OperandIsImplicit(operand)) {
builder.AddOperands(std::move(op));
}
} else {
DieBecauseOperand(instruction, operand);
}
} else if (OperandIsImmediate(operand)) {
// Note: IMM1 only used for memory ops?
CHECK_EQ(xed_operand_name(operand), XED_OPERAND_IMM0);
if (OperandIsExplicit(operand)) {
// Note: XED appears to truncate out-of-range immediates, so we don't
// bother doing it here.
// TODO(ncbray): bias towards "interesting" intermediates such as 0, 1,
// -1, etc?
builder.AddOperands(
xed_simm0(rng(), OperandBitWidth(operand, effective_op_width)));
} else if (OperandIsImplicit(operand)) {
// Implicit immediates appear to always be 1?
builder.AddOperands(xed_simm0(1, 8));
} else {
// A supressed immediate doesn't make sense?
DieBecauseOperand(instruction, operand);
}
} else {
DieBecauseOperand(instruction, operand);
}
}

if (zero_writemask) {
builder.AddOperands(xed_other(XED_OPERAND_ZEROING, 1));
}

return builder.Encode(ibuf, ibuf_len);
}

} // namespace silifuzz
62 changes: 62 additions & 0 deletions fuzzer/hashtest/synthesize_instruction.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2024 The Silifuzz Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef THIRD_PARTY_SILIFUZZ_FUZZER_HASHTEST_SYNTHESIZE_INSTRUCTION_H_
#define THIRD_PARTY_SILIFUZZ_FUZZER_HASHTEST_SYNTHESIZE_INSTRUCTION_H_

#include <cstddef>
#include <cstdint>
#include <random>
#include <vector>

#include "./fuzzer/hashtest/candidate.h"
#include "./fuzzer/hashtest/register_info.h"

namespace silifuzz {

// RNG used for random instruction and test generation.
using Rng = std::mt19937_64;

// The layout of registers for generating instructions and tests.
// `tmp` registers can be used for any purpose. If a register is "fixed" for any
// instruction (for example the SSE4.1 version of BLENDVPS will always read from
// XMM0) that register must be contained in `tmp`.
// `entropy` registers contain high-entropy values. These registers contain
// state that is updated and persists throughout the test. They are mutually
// exclusive with `tmp` registers.
// `vec_width` is the maximum vector register width that should be used.
// `mask_width` is the maximum mask register width that should be used.
struct RegisterPool {
RegisterMask tmp;
RegisterMask entropy;
size_t vec_width;
size_t mask_width;
};

// Synthesize a randomized instruction based on `candidate`.
// Used temp and entropy registers will be removed from `rpool`.
[[nodiscard]] bool SynthesizeTestInstruction(
const InstructionCandidate& candidate, RegisterPool& rpool, Rng& rng,
unsigned int effective_op_width, std::vector<RegisterID>& needs_init,
std::vector<unsigned int>& is_written, uint8_t* ibuf, size_t& ibuf_len);

// Helpers for generating XED register operands.
xed_encoder_operand_t GPRegOperand(unsigned int index, size_t width);
xed_encoder_operand_t VecRegOperand(unsigned int index, size_t width);
xed_encoder_operand_t MaskRegOperand(unsigned int index);
xed_encoder_operand_t MMXRegOperand(unsigned int index);

} // namespace silifuzz

#endif // THIRD_PARTY_SILIFUZZ_FUZZER_HASHTEST_SYNTHESIZE_INSTRUCTION_H_

0 comments on commit b29ead0

Please sign in to comment.