Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Vector Uop Generation Support for Mask Generation and Widening Instructions #181

Merged
merged 16 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
220 changes: 173 additions & 47 deletions arches/isa_json/gen_uarch_rv64v_json.py

Large diffs are not rendered by default.

551 changes: 461 additions & 90 deletions arches/isa_json/olympia_uarch_rv64v.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion core/Dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ namespace olympia

InstGroupPtr insts_dispatched =
sparta::allocate_sparta_shared_pointer<InstGroup>(instgroup_allocator);
;

bool keep_dispatching = true;
for (uint32_t i = 0; (i < num_dispatch) && keep_dispatching; ++i)
{
Expand Down
112 changes: 33 additions & 79 deletions core/Dispatch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,34 +123,24 @@ namespace olympia

///////////////////////////////////////////////////////////////////////
// Stall counters
enum StallReason
enum StallReason : uint16_t
{
CMOV_BUSY = InstArchInfo::TargetPipe::CMOV, // Could not send any or all instructions --
// CMOV busy
DIV_BUSY =
InstArchInfo::TargetPipe::DIV, // Could not send any or all instructions -- DIV busy
FADDSUB_BUSY = InstArchInfo::TargetPipe::FADDSUB, // Could not send any or all
// instructions -- FADDSUB busy
FLOAT_BUSY = InstArchInfo::TargetPipe::FLOAT, // Could not send any or all instructions
// -- FLOAT busy
FMAC_BUSY = InstArchInfo::TargetPipe::FMAC, // Could not send any or all instructions --
// FMAC busy
I2F_BUSY =
InstArchInfo::TargetPipe::I2F, // Could not send any or all instructions -- I2F busy
F2I_BUSY =
InstArchInfo::TargetPipe::F2I, // Could not send any or all instructions -- F2I busy
INT_BUSY =
InstArchInfo::TargetPipe::INT, // Could not send any or all instructions -- INT busy
LSU_BUSY =
InstArchInfo::TargetPipe::LSU, // Could not send any or all instructions -- LSU busy
MUL_BUSY =
InstArchInfo::TargetPipe::MUL, // Could not send any or all instructions -- MUL busy
BR_BUSY =
InstArchInfo::TargetPipe::BR, // Could not send any or all instructions -- BR busy
VINT_BUSY = InstArchInfo::TargetPipe::VINT,
VMUL_BUSY = InstArchInfo::TargetPipe::VMUL,
VDIV_BUSY = InstArchInfo::TargetPipe::VDIV,
VSET_BUSY = InstArchInfo::TargetPipe::VSET,
BR_BUSY = InstArchInfo::TargetPipe::BR,
CMOV_BUSY = InstArchInfo::TargetPipe::CMOV,
DIV_BUSY = InstArchInfo::TargetPipe::DIV,
FADDSUB_BUSY = InstArchInfo::TargetPipe::FADDSUB,
FLOAT_BUSY = InstArchInfo::TargetPipe::FLOAT,
FMAC_BUSY = InstArchInfo::TargetPipe::FMAC,
I2F_BUSY = InstArchInfo::TargetPipe::I2F,
F2I_BUSY = InstArchInfo::TargetPipe::F2I,
INT_BUSY = InstArchInfo::TargetPipe::INT,
LSU_BUSY = InstArchInfo::TargetPipe::LSU,
MUL_BUSY = InstArchInfo::TargetPipe::MUL,
VINT_BUSY = InstArchInfo::TargetPipe::VINT,
VMASK_BUSY = InstArchInfo::TargetPipe::VMASK,
VMUL_BUSY = InstArchInfo::TargetPipe::VMUL,
VDIV_BUSY = InstArchInfo::TargetPipe::VDIV,
VSET_BUSY = InstArchInfo::TargetPipe::VSET,
NO_ROB_CREDITS = InstArchInfo::TargetPipe::SYS, // No credits from the ROB
NOT_STALLED, // Made forward progress (dispatched all instructions or no instructions)
N_STALL_REASONS
Expand Down Expand Up @@ -186,6 +176,8 @@ namespace olympia
sparta::Counter::COUNT_NORMAL, getClock()),
sparta::CycleCounter(getStatisticSet(), "stall_vint_busy", "VINT busy",
sparta::Counter::COUNT_NORMAL, getClock()),
sparta::CycleCounter(getStatisticSet(), "stall_vmask_busy", "VMASK busy",
sparta::Counter::COUNT_NORMAL, getClock()),
sparta::CycleCounter(getStatisticSet(), "stall_vmul_busy", "VMUL busy",
sparta::Counter::COUNT_NORMAL, getClock()),
sparta::CycleCounter(getStatisticSet(), "stall_vdiv_busy", "VDIV busy",
Expand Down Expand Up @@ -223,6 +215,8 @@ namespace olympia
sparta::Counter::COUNT_NORMAL),
sparta::Counter(getStatisticSet(), "count_vint_insts", "Total VINT insts",
sparta::Counter::COUNT_NORMAL),
sparta::Counter(getStatisticSet(), "count_vmask_insts", "Total VMASK insts",
sparta::Counter::COUNT_NORMAL),
sparta::Counter(getStatisticSet(), "count_vmul_insts", "Total VMUL insts",
sparta::Counter::COUNT_NORMAL),
sparta::Counter(getStatisticSet(), "count_vdiv_insts", "Total VDIV insts",
Expand Down Expand Up @@ -282,63 +276,23 @@ namespace olympia

inline std::ostream & operator<<(std::ostream & os, const Dispatch::StallReason & stall)
{
switch (stall)
if (stall == Dispatch::StallReason::NOT_STALLED)
{
case Dispatch::StallReason::NOT_STALLED:
os << "NOT_STALLED";
break;
case Dispatch::StallReason::NO_ROB_CREDITS:
}
else if (stall == Dispatch::StallReason::NO_ROB_CREDITS)
{
os << "NO_ROB_CREDITS";
break;
case Dispatch::StallReason::LSU_BUSY:
os << "LSU_BUSY";
break;
case Dispatch::StallReason::CMOV_BUSY:
os << "CMOV_BUSY";
break;
case Dispatch::StallReason::DIV_BUSY:
os << "DIV_BUSY";
break;
case Dispatch::StallReason::FADDSUB_BUSY:
os << "FADDSUB_BUSY";
break;
case Dispatch::StallReason::FLOAT_BUSY:
os << "FLOAT_BUSY";
break;
case Dispatch::StallReason::FMAC_BUSY:
os << "FMAC_BUSY";
break;
case Dispatch::StallReason::I2F_BUSY:
os << "I2F_BUSY";
break;
case Dispatch::StallReason::F2I_BUSY:
os << "F2I_BUSY";
break;
case Dispatch::StallReason::INT_BUSY:
os << "INT_BUSY";
break;
case Dispatch::StallReason::MUL_BUSY:
os << "MUL_BUSY";
break;
case Dispatch::StallReason::BR_BUSY:
os << "BR_BUSY";
break;
case Dispatch::StallReason::VINT_BUSY:
os << "VINT_BUSY";
break;
case Dispatch::StallReason::VMUL_BUSY:
os << "VMUL_BUSY";
break;
case Dispatch::StallReason::VDIV_BUSY:
os << "VDIV_BUSY";
break;
case Dispatch::StallReason::VSET_BUSY:
os << "VSET_BUSY";
break;
case Dispatch::StallReason::N_STALL_REASONS:
}
else if (stall != Dispatch::StallReason::N_STALL_REASONS)
{
os << InstArchInfo::execution_pipe_string_map.at((InstArchInfo::TargetPipe)stall) << "_BUSY";
}
else
{
sparta_assert(false, "How'd we get here?");
}

return os;
}
} // namespace olympia
} // namespace olympia
22 changes: 9 additions & 13 deletions core/ExecutePipe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ namespace olympia
unit_busy_ == false,
"ExecutePipe is receiving a new instruction when it's already busy!!");
}

// Get instruction latency
uint32_t exe_time = ignore_inst_execute_time_ ? execute_time_ : ex_inst->getExecuteTime();

if (!ex_inst->isVset() && ex_inst->isVector())
{
// have to factor in vlen, sew, valu length to calculate how many passes are needed
Expand All @@ -72,14 +75,14 @@ namespace olympia
// will truncate, but we have each adder support the largest SEW possible
if (ex_inst->getPipe() == InstArchInfo::TargetPipe::VINT)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to define some additional resources for other types of vector resources like boolean logicals, compares, min/max, multiplies and divides. I'm not sure which operations should be supported by the VALU adder.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can just support all under VALU adder, and then have a separate unit for masks, slides, and more complex computations like scatter/gather. I guess in the case of scatter/gathers, we just define a new unit that has a state machine to track for the VALU that is used only by that unit.

{
// First time seeing this uop, determine number of passes needed
if (num_passes_needed_ == 0)
{
// number of elements we operate on is dependent on either the AVL or current
// VLMAX we divide VLMAX by LMUL, because we UOp fracture, so we divide by LMUL
// for current instruction VL
uint32_t vl = ex_inst->getVL() < ex_inst->getVLMAX() / ex_inst->getLMUL()
? ex_inst->getVL()
: ex_inst->getVLMAX() / ex_inst->getLMUL();
// The number of non-tail elements in the uop is used to determine how many
// passes are needed
const uint32_t num_elems_per_uop = ex_inst->getVLMAX() / ex_inst->getLMUL();
const uint32_t num_elems_remaining = ex_inst->getVL() - (num_elems_per_uop * (ex_inst->getUOpID() - 1));
const uint32_t vl = std::min(num_elems_per_uop, num_elems_remaining);
const uint32_t num_passes = std::ceil(vl / valu_adder_num_);
if (num_passes > 1)
{
Expand Down Expand Up @@ -169,13 +172,6 @@ namespace olympia
ex_inst->setStatus(Inst::Status::COMPLETED);
complete_event_.collect(*ex_inst);
ILOG("Completing inst: " << ex_inst);
if (ex_inst->isUOp())
{
sparta_assert(!ex_inst->getUOpParent().expired(),
"UOp instruction parent shared pointer is expired");
auto shared_ex_inst = ex_inst->getUOpParent().lock();
shared_ex_inst->incrementUOpDoneCount();
}
out_execute_pipe_.send(1);
}

Expand Down
58 changes: 21 additions & 37 deletions core/Inst.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,14 @@ namespace olympia

// Set the instruction's target PC (branch target or load/store target)
void setTargetVAddr(sparta::memory::addr_t target_vaddr) { target_vaddr_ = target_vaddr; }
sparta::memory::addr_t getTargetVAddr() const { return target_vaddr_; }

void setVCSRs(const VCSRs * inputVCSRs)
{
VCSRs_.setVCSRs(inputVCSRs->vl, inputVCSRs->sew, inputVCSRs->lmul, inputVCSRs->vta);
kathlenemagnus marked this conversation as resolved.
Show resolved Hide resolved
}

const VCSRs * getVCSRs() const { return &VCSRs_; }

// Set lmul from vset (vsetivli, vsetvli)
void setLMUL(uint32_t lmul)
Expand All @@ -270,41 +278,20 @@ namespace olympia
// vta = false means undisturbed, maintain original destination values
void setVTA(bool vta) { VCSRs_.vta = vta; }

void setTail(bool has_tail) { has_tail_ = has_tail; }

void setVCSRs(const VCSRs * inputVCSRs)
{
VCSRs_.setVCSRs(inputVCSRs->vl, inputVCSRs->sew, inputVCSRs->lmul, inputVCSRs->vta);
}

const VCSRs * getVCSRs() const { return &VCSRs_; }

void setUOpParent(sparta::SpartaWeakPointer<olympia::Inst> & uop_parent)
{
uop_parent_ = uop_parent;
}

void setUOpCount(uint64_t uop_count) { uop_count_ = uop_count; }

void incrementUOpDoneCount() { uop_done_count_++; }

sparta::memory::addr_t getTargetVAddr() const { return target_vaddr_; }

uint32_t getSEW() const { return VCSRs_.sew; }

uint32_t getLMUL() const { return VCSRs_.lmul; }

uint32_t getVL() const { return VCSRs_.vl; }

uint32_t getVTA() const { return VCSRs_.vta; }

uint32_t getVLMAX() const { return VCSRs_.vlmax; }

uint64_t getUOpDoneCount() { return uop_done_count_; }

sparta::SpartaWeakPointer<olympia::Inst> getUOpParent() { return uop_parent_; }
void setTail(bool has_tail) { has_tail_ = has_tail; }
bool hasTail() const { return has_tail_; }

uint64_t getUOpCount() const { return uop_count_; }
void setUOpParent(sparta::SpartaWeakPointer<olympia::Inst> & parent_uop)
{
parent_uop_ = parent_uop;
}
sparta::SpartaWeakPointer<olympia::Inst> getUOpParent() { return parent_uop_; }

// Branch instruction was taken (always set for JAL/JALR)
void setTakenBranch(bool taken) { is_taken_branch_ = taken; }
Expand Down Expand Up @@ -361,6 +348,8 @@ namespace olympia

uint32_t getExecuteTime() const { return inst_arch_info_->getExecutionTime(); }

InstArchInfo::UopGenType getUopGenType() const { return inst_arch_info_->getUopGenType(); }

uint64_t getRAdr() const { return target_vaddr_ | 0x8000000; } // faked

bool isSpeculative() const { return is_speculative_; }
Expand All @@ -383,10 +372,6 @@ namespace olympia

bool isVector() const { return is_vector_; }

bool hasTail() const { return has_tail_; }

// bool isVX() const {} // checking if instruction is a vector-scalar

// Rename information
core_types::RegisterBitMask & getSrcRegisterBitMask(const core_types::RegFile rf)
{
Expand Down Expand Up @@ -480,17 +465,16 @@ namespace olympia
const bool is_csr_;
const bool is_vector_;
const bool is_return_;
bool has_tail_; // Does this vector instruction have a tail?
uint64_t uop_done_count_ =
1; // start at 1 because the uop count includes the parent instruction
uint64_t uop_count_ = 0;

VCSRs VCSRs_;
bool has_tail_; // Does this vector uop have a tail?
kathlenemagnus marked this conversation as resolved.
Show resolved Hide resolved

// blocking vset is a vset that needs to read a value from a register value. A blocking vset
// can't be resolved until after execution, so we need to block on it due to UOp fracturing
bool is_blocking_vset_ = false;

sparta::SpartaWeakPointer<olympia::Inst> uop_parent_;
sparta::SpartaWeakPointer<olympia::Inst> parent_uop_;

// Did this instruction mispredict?
bool is_mispredicted_ = false;
bool is_taken_branch_ = false;
Expand Down
66 changes: 57 additions & 9 deletions core/InstArchInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,53 @@
namespace olympia
{
const InstArchInfo::TargetPipeMap InstArchInfo::execution_pipe_map = {
{"br", InstArchInfo::TargetPipe::BR}, {"cmov", InstArchInfo::TargetPipe::CMOV},
{"div", InstArchInfo::TargetPipe::DIV}, {"faddsub", InstArchInfo::TargetPipe::FADDSUB},
{"float", InstArchInfo::TargetPipe::FLOAT}, {"fmac", InstArchInfo::TargetPipe::FMAC},
{"i2f", InstArchInfo::TargetPipe::I2F}, {"f2i", InstArchInfo::TargetPipe::F2I},
{"int", InstArchInfo::TargetPipe::INT}, {"lsu", InstArchInfo::TargetPipe::LSU},
{"mul", InstArchInfo::TargetPipe::MUL}, {"vint", InstArchInfo::TargetPipe::VINT},
{"vset", InstArchInfo::TargetPipe::VSET}, {"vmul", InstArchInfo::TargetPipe::VMUL},
{"vdiv", InstArchInfo::TargetPipe::VDIV}, {"sys", InstArchInfo::TargetPipe::SYS},
{"?", InstArchInfo::TargetPipe::UNKNOWN}};
{"br", InstArchInfo::TargetPipe::BR},
{"cmov", InstArchInfo::TargetPipe::CMOV},
{"div", InstArchInfo::TargetPipe::DIV},
{"faddsub", InstArchInfo::TargetPipe::FADDSUB},
{"float", InstArchInfo::TargetPipe::FLOAT},
{"fmac", InstArchInfo::TargetPipe::FMAC},
{"i2f", InstArchInfo::TargetPipe::I2F},
{"f2i", InstArchInfo::TargetPipe::F2I},
{"int", InstArchInfo::TargetPipe::INT},
{"lsu", InstArchInfo::TargetPipe::LSU},
{"mul", InstArchInfo::TargetPipe::MUL},
{"vint", InstArchInfo::TargetPipe::VINT},
{"vmask", InstArchInfo::TargetPipe::VMASK},
{"vset", InstArchInfo::TargetPipe::VSET},
{"vmul", InstArchInfo::TargetPipe::VMUL},
{"vdiv", InstArchInfo::TargetPipe::VDIV},
{"sys", InstArchInfo::TargetPipe::SYS},
{"?", InstArchInfo::TargetPipe::UNKNOWN}
};

const InstArchInfo::TargetPipeStringMap InstArchInfo::execution_pipe_string_map = {
{InstArchInfo::TargetPipe::BR, "BR"},
{InstArchInfo::TargetPipe::CMOV, "CMOV"},
{InstArchInfo::TargetPipe::DIV, "DIV"},
{InstArchInfo::TargetPipe::FADDSUB, "FADDSUB"},
{InstArchInfo::TargetPipe::FLOAT, "FLOAT"},
{InstArchInfo::TargetPipe::FMAC, "FMAC"},
{InstArchInfo::TargetPipe::I2F, "I2F"},
{InstArchInfo::TargetPipe::F2I, "F2I"},
{InstArchInfo::TargetPipe::INT, "INT"},
{InstArchInfo::TargetPipe::LSU, "LSU"},
{InstArchInfo::TargetPipe::MUL, "MUL"},
{InstArchInfo::TargetPipe::VINT, "VINT"},
{InstArchInfo::TargetPipe::VMASK, "VMASK"},
{InstArchInfo::TargetPipe::VSET, "VSET"},
{InstArchInfo::TargetPipe::VMUL, "VMUL"},
{InstArchInfo::TargetPipe::VDIV, "VDIV"},
{InstArchInfo::TargetPipe::SYS, "SYS"},
{InstArchInfo::TargetPipe::UNKNOWN, "?"}
};

const InstArchInfo::UopGenMap InstArchInfo::uop_gen_type_map = {
{"ARITH", InstArchInfo::UopGenType::ARITH},
{"ARITH_SINGLE_DEST", InstArchInfo::UopGenType::ARITH_SINGLE_DEST},
{"ARITH_WIDE_DEST", InstArchInfo::UopGenType::ARITH_WIDE_DEST},
{"NONE", InstArchInfo::UopGenType::NONE}
};

void InstArchInfo::update(const nlohmann::json & jobj)
{
Expand All @@ -33,6 +71,16 @@ namespace olympia
execute_time_ = jobj["latency"].get<uint32_t>();
}

if (jobj.find("uop_gen") != jobj.end())
{
auto uop_gen_name = jobj["uop_gen"].get<std::string>();
const auto itr = uop_gen_type_map.find(uop_gen_name);
sparta_assert(itr != uop_gen_type_map.end(),
"Unknown uop gen: " << uop_gen_name << " for inst: "
<< jobj["mnemonic"].get<std::string>());
uop_gen_ = itr->second;
}

is_load_store_ = (tgt_pipe_ == TargetPipe::LSU);
is_vset_ = {tgt_pipe_ == TargetPipe::VSET};
}
Expand Down
Loading
Loading