From b29ead013ffe6541b5862478f20565294b5ba987 Mon Sep 17 00:00:00 2001 From: Nick Bray Date: Thu, 11 Jul 2024 12:57:12 -0700 Subject: [PATCH] hashtest: add function to encode arbitrary test instructions PiperOrigin-RevId: 651506548 --- fuzzer/hashtest/BUILD | 2 + fuzzer/hashtest/synthesize_instruction.cc | 260 ++++++++++++++++++++++ fuzzer/hashtest/synthesize_instruction.h | 62 ++++++ 3 files changed, 324 insertions(+) create mode 100644 fuzzer/hashtest/synthesize_instruction.cc create mode 100644 fuzzer/hashtest/synthesize_instruction.h diff --git a/fuzzer/hashtest/BUILD b/fuzzer/hashtest/BUILD index bc7752f0..aa32fe5e 100644 --- a/fuzzer/hashtest/BUILD +++ b/fuzzer/hashtest/BUILD @@ -23,6 +23,7 @@ cc_library( "debugging.cc", "prefilter.cc", "register_info.cc", + "synthesize_instruction.cc", "xed_operand_util.cc", ], hdrs = [ @@ -31,6 +32,7 @@ cc_library( "prefilter.h", "rand_util.h", "register_info.h", + "synthesize_instruction.h", "xed_operand_util.h", ], deps = [ diff --git a/fuzzer/hashtest/synthesize_instruction.cc b/fuzzer/hashtest/synthesize_instruction.cc new file mode 100644 index 00000000..eb5c717c --- /dev/null +++ b/fuzzer/hashtest/synthesize_instruction.cc @@ -0,0 +1,260 @@ +// Copyright 2024 The Silifuzz Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./fuzzer/hashtest/synthesize_instruction.h" + +#include +#include +#include +#include +#include +#include + +#include "./fuzzer/hashtest/candidate.h" +#include "./fuzzer/hashtest/debugging.h" +#include "./fuzzer/hashtest/rand_util.h" +#include "./fuzzer/hashtest/register_info.h" +#include "./fuzzer/hashtest/xed_operand_util.h" +#include "./instruction/xed_util.h" +#include "./util/checks.h" + +namespace silifuzz { + +namespace { + +// Internal helper for PrepareFixedRegisters. +template +void PrepareFixedRegisters(RegisterBank bank, const std::bitset& read, + const std::bitset& written, std::bitset& tmp, + std::vector& needs_init, + std::vector& is_written) { + for (size_t i = 0; i < N; i++) { + if (read.test(i) || written.test(i)) { + // Validate the register is in the tmp set. + CHECK(tmp.test(i)); + tmp[i] = false; + } + if (read.test(i)) { + needs_init.push_back( + RegisterID{.bank = bank, .index = static_cast(i)}); + } + if (written.test(i)) { + is_written.push_back(i); + } + } +} + +// Remove the fixed registers from the temp set and generate lists of fixed +// registers that may be read and written by the instruction. +void PrepareFixedRegisters(const RegisterReadWrite& fixed_reg, + RegisterMask& tmp, + std::vector& needs_init, + std::vector& is_written) { + PrepareFixedRegisters(RegisterBank::kGP, fixed_reg.read.gp, + fixed_reg.written.gp, tmp.gp, needs_init, is_written); + PrepareFixedRegisters(RegisterBank::kVec, fixed_reg.read.vec, + fixed_reg.written.vec, tmp.vec, needs_init, is_written); + PrepareFixedRegisters(RegisterBank::kMask, fixed_reg.read.mask, + fixed_reg.written.mask, tmp.mask, needs_init, + is_written); + PrepareFixedRegisters(RegisterBank::kMMX, fixed_reg.read.mmx, + fixed_reg.written.mmx, tmp.mmx, needs_init, is_written); +} + +template +unsigned int HandleOperand(RegisterBank bank, Rng& rng, std::bitset& tmp, + std::bitset& entropy, bool read, bool written, + std::vector& needs_init, + std::vector& is_written) { + if (written) { + // Write to temp registers. + unsigned int index = PopRandomBit(rng, tmp); + if (read) { + needs_init.push_back(RegisterID{.bank = bank, .index = index}); + } + is_written.push_back(index); + return index; + } else { + // Read directly from entropy. + return PopRandomBit(rng, entropy); + } +} + +} // namespace + +// TODO(ncbray): support "high byte" iforms. Unfortunately these can only target +// AH, BH, CH, and DH so this looks very similar to supporting fixed registers. +xed_encoder_operand_t GPRegOperand(unsigned int index, size_t width) { + return xed_reg(RegisterIDToXedReg( + RegisterID{.bank = RegisterBank::kGP, .index = index}, width)); +} + +xed_encoder_operand_t VecRegOperand(unsigned int index, size_t width) { + return xed_reg(RegisterIDToXedReg( + RegisterID{.bank = RegisterBank::kVec, .index = index}, width)); +} + +xed_encoder_operand_t MaskRegOperand(unsigned int index) { + CHECK_LT(index, 8); + return xed_reg(static_cast(XED_REG_MASK_FIRST + index)); +} + +xed_encoder_operand_t MMXRegOperand(unsigned int index) { + CHECK_LT(index, 8); + return xed_reg(static_cast(XED_REG_MMX_FIRST + index)); +} + +[[nodiscard]] bool SynthesizeTestInstruction( + const InstructionCandidate& candidate, RegisterPool& rpool, Rng& rng, + unsigned int effective_op_width, std::vector& needs_init, + std::vector& reg_is_written, uint8_t* ibuf, + size_t& ibuf_len) { + const RegisterBank mode = candidate.OutputMode(); + const xed_inst_t* instruction = candidate.instruction; + + // Use a writemask ~1/3rd of the time. + // On one hand we want to test writemasks, on the other hand they + // discard output bits. + bool use_writemask = + candidate.writemask && std::bernoulli_distribution(0.333)(rng); + // Zero half the time. + // Note: it appears that masked writes to mask registers must always be + // zeroing, although the disassembly doesn't include a {z}. + bool zero_writemask = + use_writemask && + (std::bernoulli_distribution(0.5)(rng) || mode == RegisterBank::kMask); + + // Remove the fixed registers from the tmp bitmask. + PrepareFixedRegisters(candidate.fixed_reg, rpool.tmp, needs_init, + reg_is_written); + + InstructionBuilder builder(xed_inst_iclass(instruction), effective_op_width); + + // Generate each operand + for (size_t operand_index = 0; + operand_index < xed_inst_noperands(instruction); ++operand_index) { + const xed_operand_t* const operand = + xed_inst_operand(instruction, operand_index); + + if (OperandIsRegister(operand)) { + bool written = xed_operand_written(operand); + bool read = xed_operand_read(operand) || + xed_operand_conditional_write(operand) || + (written && use_writemask && !zero_writemask); + + if (OperandIsExplicit(operand)) { + xed_encoder_operand_t op = {.type = XED_ENCODER_OPERAND_TYPE_INVALID}; + + if (OperandIsGPRegister(operand)) { + // Explicit general purpose register. + CHECK(!written || mode == RegisterBank::kGP); + unsigned int index = HandleOperand( + RegisterBank::kGP, rng, rpool.tmp.gp, rpool.entropy.gp, read, + written, needs_init, reg_is_written); + op = + GPRegOperand(index, OperandBitWidth(operand, effective_op_width)); + } else if (OperandIsVectorRegister(operand)) { + // Explicit vector register. + CHECK(!written || mode == RegisterBank::kVec); + unsigned int index = HandleOperand( + RegisterBank::kVec, rng, rpool.tmp.vec, rpool.entropy.vec, read, + written, needs_init, reg_is_written); + op = VecRegOperand(index, VectorWidth(operand)); + } else if (OperandIsMaskRegister(operand)) { + // Explicit mask register. + if (OperandIsWritemask(operand)) { + CHECK(!written); + if (use_writemask) { + unsigned int index = PopRandomBit(rng, rpool.entropy.mask); + op = MaskRegOperand(index); + } else { + // When k0 is used as writemask, this means "ignore writemask". + op = MaskRegOperand(0); + } + } else { + CHECK(!written || mode == RegisterBank::kMask); + unsigned int index = HandleOperand( + RegisterBank::kMask, rng, rpool.tmp.mask, rpool.entropy.mask, + read, written, needs_init, reg_is_written); + op = MaskRegOperand(index); + } + } else if (OperandIsMMXRegister(operand)) { + // Explicit MMX register. + CHECK(!written || mode == RegisterBank::kMMX); + unsigned int index = HandleOperand( + RegisterBank::kMMX, rng, rpool.tmp.mmx, rpool.entropy.mmx, read, + written, needs_init, reg_is_written); + op = MMXRegOperand(index); + } else { + DieBecauseOperand(instruction, operand); + } + builder.AddOperands(std::move(op)); + } else if (OperandIsFlagRegister(operand)) { + // Nothing needed, this is XED explicitly annotating flag access. + CHECK(OperandIsSuppressed(operand)); + } else if (OperandIsImplicit(operand) || OperandIsSuppressed(operand)) { + // Note: we're handling suppressed operands here so that we can validate + // they are not anything unexpected. They will not affect the encoded + // instruction, supressed operands cannot be affected in any way. + xed_encoder_operand_t op = {.type = XED_ENCODER_OPERAND_TYPE_INVALID}; + if (xed_operand_type(operand) == XED_OPERAND_TYPE_REG) { + // A fixed register. + op = xed_reg(xed_operand_reg(operand)); + } else if (xed_operand_type(operand) == XED_OPERAND_TYPE_NT_LOOKUP_FN) { + // A fixed GP register of variable width. + xed_nonterminal_enum_t name = xed_operand_nonterminal_name(operand); + RegisterID reg_id = XedNonterminalToRegisterID(name); + CHECK(reg_id.bank == RegisterBank::kGP); + op = GPRegOperand(reg_id.index, + OperandBitWidth(operand, effective_op_width)); + } else { + DieBecauseOperand(instruction, operand); + } + // Implicit operands must be emitted, suppressed must not. + if (OperandIsImplicit(operand)) { + builder.AddOperands(std::move(op)); + } + } else { + DieBecauseOperand(instruction, operand); + } + } else if (OperandIsImmediate(operand)) { + // Note: IMM1 only used for memory ops? + CHECK_EQ(xed_operand_name(operand), XED_OPERAND_IMM0); + if (OperandIsExplicit(operand)) { + // Note: XED appears to truncate out-of-range immediates, so we don't + // bother doing it here. + // TODO(ncbray): bias towards "interesting" intermediates such as 0, 1, + // -1, etc? + builder.AddOperands( + xed_simm0(rng(), OperandBitWidth(operand, effective_op_width))); + } else if (OperandIsImplicit(operand)) { + // Implicit immediates appear to always be 1? + builder.AddOperands(xed_simm0(1, 8)); + } else { + // A supressed immediate doesn't make sense? + DieBecauseOperand(instruction, operand); + } + } else { + DieBecauseOperand(instruction, operand); + } + } + + if (zero_writemask) { + builder.AddOperands(xed_other(XED_OPERAND_ZEROING, 1)); + } + + return builder.Encode(ibuf, ibuf_len); +} + +} // namespace silifuzz diff --git a/fuzzer/hashtest/synthesize_instruction.h b/fuzzer/hashtest/synthesize_instruction.h new file mode 100644 index 00000000..56ffec9c --- /dev/null +++ b/fuzzer/hashtest/synthesize_instruction.h @@ -0,0 +1,62 @@ +// Copyright 2024 The Silifuzz Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_SILIFUZZ_FUZZER_HASHTEST_SYNTHESIZE_INSTRUCTION_H_ +#define THIRD_PARTY_SILIFUZZ_FUZZER_HASHTEST_SYNTHESIZE_INSTRUCTION_H_ + +#include +#include +#include +#include + +#include "./fuzzer/hashtest/candidate.h" +#include "./fuzzer/hashtest/register_info.h" + +namespace silifuzz { + +// RNG used for random instruction and test generation. +using Rng = std::mt19937_64; + +// The layout of registers for generating instructions and tests. +// `tmp` registers can be used for any purpose. If a register is "fixed" for any +// instruction (for example the SSE4.1 version of BLENDVPS will always read from +// XMM0) that register must be contained in `tmp`. +// `entropy` registers contain high-entropy values. These registers contain +// state that is updated and persists throughout the test. They are mutually +// exclusive with `tmp` registers. +// `vec_width` is the maximum vector register width that should be used. +// `mask_width` is the maximum mask register width that should be used. +struct RegisterPool { + RegisterMask tmp; + RegisterMask entropy; + size_t vec_width; + size_t mask_width; +}; + +// Synthesize a randomized instruction based on `candidate`. +// Used temp and entropy registers will be removed from `rpool`. +[[nodiscard]] bool SynthesizeTestInstruction( + const InstructionCandidate& candidate, RegisterPool& rpool, Rng& rng, + unsigned int effective_op_width, std::vector& needs_init, + std::vector& is_written, uint8_t* ibuf, size_t& ibuf_len); + +// Helpers for generating XED register operands. +xed_encoder_operand_t GPRegOperand(unsigned int index, size_t width); +xed_encoder_operand_t VecRegOperand(unsigned int index, size_t width); +xed_encoder_operand_t MaskRegOperand(unsigned int index); +xed_encoder_operand_t MMXRegOperand(unsigned int index); + +} // namespace silifuzz + +#endif // THIRD_PARTY_SILIFUZZ_FUZZER_HASHTEST_SYNTHESIZE_INSTRUCTION_H_