-
Notifications
You must be signed in to change notification settings - Fork 20
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Full SME(1) instruction support and STREAMING Groups #415
base: dev
Are you sure you want to change the base?
Changes from 39 commits
201fc1e
fa95a72
8f6051a
989c994
801a47d
2d4b07b
2523db9
2b299e7
fcd1594
c710e78
5aacba5
7974237
0ebddc1
958d686
eacc73f
2fe1ea6
aa5e01b
16c0ce4
d73a4a3
a423a3e
500dd5b
95459c5
8a1ca0b
a897e3d
87ead9b
8a91683
fe74782
34d5287
5b7626f
c91f7cc
2c3aedf
cda97ba
e5fecac
f64e251
e65a3f1
17c7ac5
8c0fbe1
85cb8f5
79d026d
3426968
d5c631c
4ad3b6e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LOAD_FP/STORE_ADDRESS_FP/STORE_DATA_FP don't exist. Also, this may be GitHub not showing it correctly, but the background should just be the alpha channel. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,6 +70,12 @@ class Architecture : public arch::Architecture { | |
/** Returns the current value of SVCRval_. */ | ||
uint64_t getSVCRval() const; | ||
|
||
/** Returns if SVE Streaming Mode is enabled. */ | ||
bool isStreamingModeEnabled() const; | ||
|
||
/** Returns if the SME ZA Register is enabled. */ | ||
bool isZA_RegisterEnabled() const; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the underscore in ZA_Register needed/canonical? Looks a bit weird jumping between camel and snake cases |
||
|
||
/** Update the value of SVCRval_. */ | ||
void setSVCRval(const uint64_t newVal) const; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,37 +72,53 @@ const uint16_t LOAD_SVE = 62; | |
const uint16_t STORE_ADDRESS_SVE = 63; | ||
const uint16_t STORE_DATA_SVE = 64; | ||
const uint16_t STORE_SVE = 65; | ||
const uint16_t PREDICATE = 66; | ||
const uint16_t LOAD = 67; | ||
const uint16_t STORE_ADDRESS = 68; | ||
const uint16_t STORE_DATA = 69; | ||
const uint16_t STORE = 70; | ||
const uint16_t BRANCH = 71; | ||
const uint16_t SME = 72; | ||
const uint16_t SME_SIMPLE = 73; | ||
const uint16_t SME_SIMPLE_ARTH = 74; | ||
const uint16_t SME_SIMPLE_ARTH_NOSHIFT = 75; | ||
const uint16_t SME_SIMPLE_LOGICAL = 76; | ||
const uint16_t SME_SIMPLE_LOGICAL_NOSHIFT = 77; | ||
const uint16_t SME_SIMPLE_CMP = 78; | ||
const uint16_t SME_SIMPLE_CVT = 79; | ||
const uint16_t SME_MUL = 80; | ||
const uint16_t SME_DIV_OR_SQRT = 81; | ||
const uint16_t LOAD_SME = 82; | ||
const uint16_t STORE_ADDRESS_SME = 83; | ||
const uint16_t STORE_DATA_SME = 84; | ||
const uint16_t STORE_SME = 85; | ||
const uint16_t ALL = 86; | ||
const uint16_t NONE = 87; | ||
const uint16_t STREAMING_SVE = 66; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When I looked at this file in isolation, I assumed that the ordering of these was arbitrary. However, it seems to be important for how instruction.cc works. So I'd be in favour of adding a comment here to explain that the ordering of these instructions is important and can't be changed without looking at instruction.cc. |
||
const uint16_t STREAMING_SVE_SIMPLE = 67; | ||
const uint16_t STREAMING_SVE_SIMPLE_ARTH = 68; | ||
const uint16_t STREAMING_SVE_SIMPLE_ARTH_NOSHIFT = 69; | ||
const uint16_t STREAMING_SVE_SIMPLE_LOGICAL = 70; | ||
const uint16_t STREAMING_SVE_SIMPLE_LOGICAL_NOSHIFT = 71; | ||
const uint16_t STREAMING_SVE_SIMPLE_CMP = 72; | ||
const uint16_t STREAMING_SVE_SIMPLE_CVT = 73; | ||
const uint16_t STREAMING_SVE_MUL = 74; | ||
const uint16_t STREAMING_SVE_DIV_OR_SQRT = 75; | ||
const uint16_t LOAD_STREAMING_SVE = 76; | ||
const uint16_t STORE_ADDRESS_STREAMING_SVE = 77; | ||
const uint16_t STORE_DATA_STREAMING_SVE = 78; | ||
const uint16_t STORE_STREAMING_SVE = 79; | ||
const uint16_t SME = 80; | ||
const uint16_t SME_SIMPLE = 81; | ||
const uint16_t SME_SIMPLE_ARTH = 82; | ||
const uint16_t SME_SIMPLE_ARTH_NOSHIFT = 83; | ||
const uint16_t SME_SIMPLE_LOGICAL = 84; | ||
const uint16_t SME_SIMPLE_LOGICAL_NOSHIFT = 85; | ||
const uint16_t SME_SIMPLE_CMP = 86; | ||
const uint16_t SME_SIMPLE_CVT = 87; | ||
const uint16_t SME_MUL = 88; | ||
const uint16_t SME_DIV_OR_SQRT = 89; | ||
const uint16_t LOAD_SME = 90; | ||
const uint16_t STORE_ADDRESS_SME = 91; | ||
const uint16_t STORE_DATA_SME = 92; | ||
const uint16_t STORE_SME = 93; | ||
const uint16_t PREDICATE = 94; | ||
const uint16_t STREAMING_PREDICATE = 95; | ||
const uint16_t LOAD = 96; | ||
const uint16_t STORE_ADDRESS = 97; | ||
const uint16_t STORE_DATA = 98; | ||
const uint16_t STORE = 99; | ||
const uint16_t BRANCH = 100; | ||
const uint16_t ALL = 101; | ||
const uint16_t NONE = 102; | ||
} // namespace InstructionGroups | ||
|
||
/** The number of aarch64 instruction groups. */ | ||
static constexpr uint8_t NUM_GROUPS = 88; | ||
static constexpr uint8_t NUM_GROUPS = 103; | ||
|
||
const std::unordered_map<uint16_t, std::vector<uint16_t>> groupInheritance_ = { | ||
{InstructionGroups::ALL, | ||
{InstructionGroups::INT, InstructionGroups::FP, InstructionGroups::SVE, | ||
InstructionGroups::PREDICATE, InstructionGroups::SME, | ||
InstructionGroups::STREAMING_SVE, InstructionGroups::SME, | ||
InstructionGroups::PREDICATE, InstructionGroups::STREAMING_PREDICATE, | ||
InstructionGroups::LOAD, InstructionGroups::STORE, | ||
InstructionGroups::BRANCH}}, | ||
{InstructionGroups::INT, | ||
|
@@ -176,6 +192,19 @@ const std::unordered_map<uint16_t, std::vector<uint16_t>> groupInheritance_ = { | |
{InstructionGroups::SVE_SIMPLE_ARTH_NOSHIFT}}, | ||
{InstructionGroups::SVE_SIMPLE_LOGICAL, | ||
{InstructionGroups::SVE_SIMPLE_LOGICAL_NOSHIFT}}, | ||
{InstructionGroups::STREAMING_SVE, | ||
{InstructionGroups::STREAMING_SVE_SIMPLE, | ||
InstructionGroups::STREAMING_SVE_DIV_OR_SQRT, | ||
InstructionGroups::STREAMING_SVE_MUL}}, | ||
{InstructionGroups::STREAMING_SVE_SIMPLE, | ||
{InstructionGroups::STREAMING_SVE_SIMPLE_ARTH, | ||
InstructionGroups::STREAMING_SVE_SIMPLE_LOGICAL, | ||
InstructionGroups::STREAMING_SVE_SIMPLE_CMP, | ||
InstructionGroups::STREAMING_SVE_SIMPLE_CVT}}, | ||
{InstructionGroups::STREAMING_SVE_SIMPLE_ARTH, | ||
{InstructionGroups::STREAMING_SVE_SIMPLE_ARTH_NOSHIFT}}, | ||
{InstructionGroups::STREAMING_SVE_SIMPLE_LOGICAL, | ||
{InstructionGroups::STREAMING_SVE_SIMPLE_LOGICAL_NOSHIFT}}, | ||
{InstructionGroups::SME, | ||
{InstructionGroups::SME_SIMPLE, InstructionGroups::SME_DIV_OR_SQRT, | ||
InstructionGroups::SME_MUL}}, | ||
|
@@ -189,11 +218,11 @@ const std::unordered_map<uint16_t, std::vector<uint16_t>> groupInheritance_ = { | |
{InstructionGroups::LOAD, | ||
{InstructionGroups::LOAD_INT, InstructionGroups::LOAD_SCALAR, | ||
InstructionGroups::LOAD_VECTOR, InstructionGroups::LOAD_SVE, | ||
InstructionGroups::LOAD_SME}}, | ||
InstructionGroups::LOAD_STREAMING_SVE, InstructionGroups::LOAD_SME}}, | ||
{InstructionGroups::STORE, | ||
{InstructionGroups::STORE_INT, InstructionGroups::STORE_SCALAR, | ||
InstructionGroups::STORE_VECTOR, InstructionGroups::STORE_SVE, | ||
InstructionGroups::STORE_SME}}, | ||
InstructionGroups::STORE_STREAMING_SVE, InstructionGroups::STORE_SME}}, | ||
{InstructionGroups::STORE_INT, | ||
{InstructionGroups::STORE_ADDRESS_INT, InstructionGroups::STORE_DATA_INT}}, | ||
{InstructionGroups::STORE_SCALAR, | ||
|
@@ -204,17 +233,22 @@ const std::unordered_map<uint16_t, std::vector<uint16_t>> groupInheritance_ = { | |
InstructionGroups::STORE_DATA_VECTOR}}, | ||
{InstructionGroups::STORE_SVE, | ||
{InstructionGroups::STORE_ADDRESS_SVE, InstructionGroups::STORE_DATA_SVE}}, | ||
{InstructionGroups::STORE_STREAMING_SVE, | ||
{InstructionGroups::STORE_ADDRESS_STREAMING_SVE, | ||
InstructionGroups::STORE_DATA_STREAMING_SVE}}, | ||
{InstructionGroups::STORE_SME, | ||
{InstructionGroups::STORE_ADDRESS_SME, InstructionGroups::STORE_DATA_SME}}, | ||
{InstructionGroups::STORE_ADDRESS, | ||
{InstructionGroups::STORE_ADDRESS_INT, | ||
InstructionGroups::STORE_ADDRESS_SCALAR, | ||
InstructionGroups::STORE_ADDRESS_VECTOR, | ||
InstructionGroups::STORE_ADDRESS_SVE, | ||
InstructionGroups::STORE_ADDRESS_STREAMING_SVE, | ||
InstructionGroups::STORE_ADDRESS_SME}}, | ||
{InstructionGroups::STORE_DATA, | ||
{InstructionGroups::STORE_DATA_INT, InstructionGroups::STORE_DATA_SCALAR, | ||
InstructionGroups::STORE_DATA_VECTOR, InstructionGroups::STORE_DATA_SVE, | ||
InstructionGroups::STORE_DATA_STREAMING_SVE, | ||
InstructionGroups::STORE_DATA_SME}}}; | ||
|
||
} // namespace aarch64 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -188,6 +188,20 @@ uint8_t Architecture::predecode(const uint8_t* ptr, uint16_t bytesAvailable, | |
newInsn.setExecutionInfo(getExecutionInfo(newInsn)); | ||
// Cache the instruction | ||
iter = decodeCache_.insert({insn, newInsn}).first; | ||
} else { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have no data on this but doing this process for every AArch64 instruction may have a detrimental effect on performance. Will need to run through the new CI/CD when it's ready to determine this. Would argue that such a change shouldn't be merged until we know there's no significant performance regression. |
||
Instruction& cachedInsn = decodeCache_.at(insn); | ||
// Check if SVE or Predicate instructions need their group updating due to | ||
// SVE Streaming Mode activeness being different from when the instruction | ||
// was first decoded. | ||
if (cachedInsn.checkStreamingGroup()) { | ||
// If the instruction's group has changed then update its execution info. | ||
// The newly set group is most likely to be the most accurate, as an | ||
// incorrect group allocation is only achieved when an exception/flush is | ||
// triggered by changing the SVE Streaming Mode state. | ||
cachedInsn.setExecutionInfo(getExecutionInfo(cachedInsn)); | ||
} | ||
// Need to re-set iterator after updating the decodeCache_ structure | ||
iter = decodeCache_.find(insn); | ||
} | ||
|
||
// Split instruction into 1 or more defined micro-ops | ||
|
@@ -281,6 +295,12 @@ void Architecture::setSVCRval(const uint64_t newVal) const { | |
SVCRval_ = newVal; | ||
} | ||
|
||
// 0th bit of SVCR register determines if streaming-mode is enabled. | ||
bool Architecture::isStreamingModeEnabled() const { return SVCRval_ & 1; } | ||
|
||
// 1st bit of SVCR register determines if ZA register is enabled. | ||
bool Architecture::isZA_RegisterEnabled() const { return SVCRval_ & 2; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment about underscore |
||
|
||
} // namespace aarch64 | ||
} // namespace arch | ||
} // namespace simeng |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Aren't these expansions redundant due to the inheritance mappings defined?