Skip to content

Commit

Permalink
1st commit of fusion capable decoder, json support (#163)
Browse files Browse the repository at this point in the history
fusion decoder implementation. This is suitable for analysis.

Adds fusion parameters to fusion.yaml. And specifies external file
definition of fusion groups using JSON

New statistics for fused instructions and fusion group stats

HCache as separate structure in fusion/fusion

Includes the FSL.md domain language description

Inst.hpp modified to add FUSED and FUSION_GHOST status as extended
status

Support for non-sequential program ID. Each instr has a PID increment
value, and methods

Formatting, clean up of Inst.hpp

JSON support added to Fusion.hpp

New exceptions for JSON errors

Fusion default transform no longer modifies the input buffer

HCache testbench functions

---------

Co-authored-by: Jeff Nye <jeff@www.condorcomputing.com>
  • Loading branch information
jeffnye-gh and Jeff Nye authored Apr 15, 2024
1 parent 2ed051b commit a0f965f
Show file tree
Hide file tree
Showing 38 changed files with 3,958 additions and 964 deletions.
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,15 @@ set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")

# Include directories
include_directories (core mss sim)
include_directories (SYSTEM fusion)
include_directories (SYSTEM mavis)
include_directories (SYSTEM stf_lib)

# Mavis, the Core, MSS, and the simulator
# Mavis, the Core, MSS, the simulator and Fusion
add_subdirectory (mavis)
add_subdirectory (core)
add_subdirectory (mss)
add_subdirectory (fusion)

# Add STF library to the build
add_subdirectory (${STF_LIB_BASE})
Expand Down
13 changes: 13 additions & 0 deletions arches/fusion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
include: big_core.yaml
top.cpu.core0:
decode:
params:
num_to_decode: 8
fusion_enable: true
fusion_debug: false
fusion_enable_register: 0xFFFFFFFF
fusion_max_latency: 8
fusion_match_max_tries: 1023
fusion_max_group_size: 8
fusion_summary_report: fusion_summary.txt
fusion_group_definitions: [ arches/fusion/dhrystone.json ]
37 changes: 37 additions & 0 deletions arches/fusion/dhrystone.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"fusiongroups" : [
{ "name" : "uf039", "uids" : ["0xd","0xa"], "tx" : "dfltXform_" },
{ "name" : "uf038", "uids" : ["0x3","0xe"], "tx" : "dfltXform_" },
{ "name" : "uf037", "uids" : ["0x20","0x4"], "tx" : "dfltXform_" },
{ "name" : "uf036", "uids" : ["0x9","0x2d"], "tx" : "dfltXform_" },
{ "name" : "uf035", "uids" : ["0x18","0xe"], "tx" : "dfltXform_" },
{ "name" : "uf034", "uids" : ["0x20","0x18"], "tx" : "dfltXform_" },
{ "name" : "uf033", "uids" : ["0xe","0xd","0xa"], "tx" : "dfltXform_" },
{ "name" : "uf032", "uids" : ["0x10","0x10"], "tx" : "dfltXform_" },
{ "name" : "uf031", "uids" : ["0x18","0x20"], "tx" : "dfltXform_" },
{ "name" : "uf030", "uids" : ["0x22","0x26"], "tx" : "dfltXform_" },
{ "name" : "uf029", "uids" : ["0x26","0x34"], "tx" : "dfltXform_" },
{ "name" : "uf028", "uids" : ["0x21","0x20"], "tx" : "dfltXform_" },
{ "name" : "uf027", "uids" : ["0x34","0x35"], "tx" : "dfltXform_" },
{ "name" : "uf026", "uids" : ["0x2d","0x22"], "tx" : "dfltXform_" },
{ "name" : "uf025", "uids" : ["0x2e","0x2d"], "tx" : "dfltXform_" },
{ "name" : "uf024", "uids" : ["0x2e","0x21"], "tx" : "dfltXform_" },
{ "name" : "uf023", "uids" : ["0xd","0xa","0x22"], "tx" : "dfltXform_" },
{ "name" : "uf022", "uids" : ["0x26","0x34","0x9"], "tx" : "dfltXform_" },
{ "name" : "uf021", "uids" : ["0xa","0x22","0x26"], "tx" : "dfltXform_" },
{ "name" : "uf020", "uids" : ["0x18","0x20","0x4"], "tx" : "dfltXform_" },
{ "name" : "uf019", "uids" : ["0x22","0x26","0x34"], "tx" : "dfltXform_" },
{ "name" : "uf018", "uids" : ["0x2e","0x21","0x20"], "tx" : "dfltXform_" },
{ "name" : "uf017", "uids" : ["0x21","0x20","0x18"], "tx" : "dfltXform_" },
{ "name" : "uf016", "uids" : ["0x20","0x18","0x20"], "tx" : "dfltXform_" },
{ "name" : "uf008", "uids" : ["0xd","0x35"], "tx" : "dfltXform_" },
{ "name" : "uf007", "uids" : ["0xa","0x22"], "tx" : "dfltXform_" },
{ "name" : "uf005", "uids" : ["0xe","0xd"], "tx" : "dfltXform_" },
{ "name" : "uf004", "uids" : ["0xe","0x34"], "tx" : "dfltXform_" },
{ "name" : "uf003", "uids" : ["0x34","0x9"], "tx" : "dfltXform_" },
{ "name" : "uf002", "uids" : ["0x2e","0x35"], "tx" : "dfltXform_" },
{ "name" : "uf001", "uids" : ["0x35","0x35"], "tx" : "dfltXform_" },
{ "name" : "uf213", "uids" : ["0x2e","0x2e"], "tx" : "dfltXform_" },
{ "name" : "uf000", "uids" : ["0x35","0x2e"], "tx" : "dfltXform_" }
]
}
1 change: 1 addition & 0 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
project (core)
add_library(core
FusionDecode.cpp
Core.cpp
SimpleBranchPred.cpp
Fetch.cpp
Expand Down
146 changes: 124 additions & 22 deletions core/Decode.cpp
Original file line number Diff line number Diff line change
@@ -1,45 +1,94 @@
// <Decode.cpp> -*- C++ -*-


#include <algorithm>

#include "Decode.hpp"
#include "fusion/FusionTypes.hpp"

#include "sparta/events/StartupEvent.hpp"
#include "sparta/utils/LogUtils.hpp"

#include <algorithm>
#include <iostream>

using namespace std;

namespace olympia
{
constexpr char Decode::name[];

Decode::Decode(sparta::TreeNode * node,
const DecodeParameterSet * p) :
Decode::Decode(sparta::TreeNode* node, const DecodeParameterSet* p) :
sparta::Unit(node),

fetch_queue_("FetchQueue", p->fetch_queue_size, node->getClock(), &unit_stat_set_),
num_to_decode_(p->num_to_decode)

fusion_num_fuse_instructions_(&unit_stat_set_, "fusion_num_fuse_instructions",
"The number of custom instructions created by fusion",
sparta::Counter::COUNT_NORMAL),

fusion_num_ghost_instructions_(&unit_stat_set_, "fusion_num_ghost_instructions",
"The number of instructions eliminated by fusion",
sparta::Counter::COUNT_NORMAL),

fusion_num_groups_defined_(&unit_stat_set_, "fusion_num_groups_defined",
"Number of fusion groups compiled or read at run time",
sparta::Counter::COUNT_LATEST),

fusion_num_groups_utilized_(&unit_stat_set_, "fusion_num_groups_utilized",
"Incremented on first use of a fusion group",
sparta::Counter::COUNT_LATEST),

fusion_pred_cycles_saved_(&unit_stat_set_, "fusion_pred_cycles_saved",
"Optimistic prediction of the cycles saved by fusion",
sparta::Counter::COUNT_NORMAL),

num_to_decode_(p->num_to_decode),
fusion_enable_(p->fusion_enable),
fusion_debug_(p->fusion_debug),
fusion_enable_register_(p->fusion_enable_register),
fusion_max_latency_(p->fusion_max_latency),
fusion_match_max_tries_(p->fusion_match_max_tries),
fusion_max_group_size_(p->fusion_max_group_size),
fusion_summary_report_(p->fusion_summary_report),
fusion_group_definitions_(p->fusion_group_definitions)
{
initializeFusion_();

fetch_queue_.enableCollection(node);

fetch_queue_write_in_.
registerConsumerHandler(CREATE_SPARTA_HANDLER_WITH_DATA(Decode, fetchBufferAppended_, InstGroupPtr));
uop_queue_credits_in_.
registerConsumerHandler(CREATE_SPARTA_HANDLER_WITH_DATA(Decode, receiveUopQueueCredits_, uint32_t));
in_reorder_flush_.
registerConsumerHandler(CREATE_SPARTA_HANDLER_WITH_DATA(Decode, handleFlush_, FlushManager::FlushingCriteria));
fetch_queue_write_in_.registerConsumerHandler(
CREATE_SPARTA_HANDLER_WITH_DATA(Decode, fetchBufferAppended_, InstGroupPtr));
uop_queue_credits_in_.registerConsumerHandler(
CREATE_SPARTA_HANDLER_WITH_DATA(Decode, receiveUopQueueCredits_, uint32_t));
in_reorder_flush_.registerConsumerHandler(
CREATE_SPARTA_HANDLER_WITH_DATA(Decode, handleFlush_, FlushManager::FlushingCriteria));

sparta::StartupEvent(node, CREATE_SPARTA_HANDLER(Decode, sendInitialCredits_));
}

// Send fetch the initial credit count
void Decode::sendInitialCredits_()
void Decode::sendInitialCredits_() { fetch_queue_credits_outp_.send(fetch_queue_.capacity()); }

// -------------------------------------------------------------------
// -------------------------------------------------------------------
void Decode::initializeFusion_()
{
fetch_queue_credits_outp_.send(fetch_queue_.capacity());
if (fusion_enable_)
{
fuser_ = std::make_unique<FusionType>(fusion_group_definitions_);
hcache_ = fusion::HCache(FusionGroupType::jenkins_1aat);
fusion_num_groups_defined_ = fuser_->getFusionGroupContainer().size();
}
else
{
fuser_ = nullptr;
}
}

// Receive Uop credits from Dispatch
void Decode::receiveUopQueueCredits_(const uint32_t & credits) {
void Decode::receiveUopQueueCredits_(const uint32_t & credits)
{
uop_queue_credits_ += credits;
if (fetch_queue_.size() > 0) {
if (fetch_queue_.size() > 0)
{
ev_decode_insts_event_.schedule(sparta::Clock::Cycle(0));
}

Expand All @@ -52,12 +101,13 @@ namespace olympia
void Decode::fetchBufferAppended_(const InstGroupPtr & insts)
{
// Cache the instructions in the instruction queue if we can't decode this cycle
for(auto & i : *insts)
for (auto & i : *insts)
{
fetch_queue_.push(i);
ILOG("Received: " << i);
}
if (uop_queue_credits_ > 0) {
if (uop_queue_credits_ > 0)
{
ev_decode_insts_event_.schedule(sparta::Clock::Cycle(0));
}
}
Expand All @@ -76,25 +126,76 @@ namespace olympia
uint32_t num_decode = std::min(uop_queue_credits_, fetch_queue_.size());
num_decode = std::min(num_decode, num_to_decode_);

if(num_decode > 0)
// buffer to maximize the chances of a group match limited
// by max allowed latency, bounded by max group size
if (fusion_enable_)
{
if (num_decode < fusion_max_group_size_ && latency_count_ < fusion_max_latency_)
{
++latency_count_;
return;
}
}

latency_count_ = 0;

if (num_decode > 0)
{
InstGroupPtr insts =
sparta::allocate_sparta_shared_pointer<InstGroup>(instgroup_allocator);

InstUidListType uids;
// Send instructions on their way to rename
for(uint32_t i = 0; i < num_decode; ++i) {
for (uint32_t i = 0; i < num_decode; ++i)
{
const auto & inst = fetch_queue_.read(0);
insts->emplace_back(inst);
inst->setStatus(Inst::Status::DECODED);

if (fusion_enable_)
{
uids.push_back(inst->getMavisUid());
}

ILOG("Decoded: " << inst);

fetch_queue_.pop();
}

if (fusion_enable_)
{
MatchInfoListType matches;
uint32_t max_itrs = 0;
FusionGroupContainerType & container = fuser_->getFusionGroupContainer();
do
{
matchFusionGroups_(matches, insts, uids, container);
processMatches_(matches, insts, uids);
// Future feature whereIsEgon(insts,numGhosts);
++max_itrs;
} while (matches.size() > 0 && max_itrs < fusion_match_max_tries_);

if (max_itrs >= fusion_match_max_tries_)
{
throw sparta::SpartaException("Fusion group match watch dog exceeded.");
}
}

// Debug statement
if (fusion_debug_ && fusion_enable_)
infoInsts_(cout, insts);
// Send decoded instructions to rename
uop_queue_outp_.send(insts);

// TODO: whereisegon() would remove the ghosts,
// Commented out for now, in practice insts
// would be smaller due to the fused ops
// uint32_t unfusedInstsSize = insts->size();

// Decrement internal Uop Queue credits
sparta_assert(uop_queue_credits_ >= insts->size(),
"Attempt to decrement d0q credits below what is available");

uop_queue_credits_ -= insts->size();

// Send credits back to Fetch to get more instructions
Expand All @@ -103,8 +204,9 @@ namespace olympia

// If we still have credits to send instructions as well as
// instructions in the queue, schedule another decode session
if(uop_queue_credits_ > 0 && fetch_queue_.size() > 0) {
if (uop_queue_credits_ > 0 && fetch_queue_.size() > 0)
{
ev_decode_insts_event_.schedule(1);
}
}
}
} // namespace olympia
Loading

0 comments on commit a0f965f

Please sign in to comment.