change spec from v0.3 to v0.4, add config example for cpf

XUANTIE-RV · Dec 15, 2023 · 6810049 · 6810049
1 parent de18701
commit 6810049
Show file tree

Hide file tree

Showing 17 changed files with 1,780 additions and 1,141 deletions.
diff --git a/demos/toolchain/xuantie-qemu-x86_64-Ubuntu-18.04.tar.gz b/demos/toolchain/xuantie-qemu-x86_64-Ubuntu-18.04.tar.gz
diff --git a/spec/LICENSE b/spec/LICENSE
diff --git a/spec/Makefile b/spec/Makefile
@@ -13,7 +13,6 @@ build_spec:
     --failure-level=ERROR \
     --require=asciidoctor-bibtex \
     --require=asciidoctor-diagram \
-    --require=asciidoctor-mathematical \
     --out-file=$(SPEC_RESULT) \
     $(SPEC_SOURCE)
 

diff --git a/spec/images/CVT_P1.png b/spec/images/CVT_P1.png
diff --git a/spec/images/CVT_P2.png b/spec/images/CVT_P2.png
diff --git a/spec/images/FI_CVT_P1.png b/spec/images/FI_CVT_P1.png
diff --git a/spec/images/FI_CVT_P2.png b/spec/images/FI_CVT_P2.png
diff --git a/spec/images/IF_CVT_P1.png b/spec/images/IF_CVT_P1.png
diff --git a/spec/images/IF_CVT_P2.png b/spec/images/IF_CVT_P2.png
diff --git a/spec/matrix_body.adoc b/spec/matrix_body.adoc
diff --git a/spec/matrix_instruction_format.adoc b/spec/matrix_instruction_format.adoc
@@ -0,0 +1,299 @@
+[[chapter4]]
+
+
+== Instruction Format
+
+Matrix instructions use custom-1 (0101011) as major opcode and the func3 is 3'b000.
+bit[26:25] is uop filed, indicating the operation type.
+
+[width="100%",cols="1,2,5",options="header",]
+|===
+|uop[1:0] |type |meaning
+|00 |Matrix-Matrix(mm)   |matrix computation, source and destination operands are matrix
+|01 |Matrix-Vector(mv.i) |matrix computation, one source operand is vector, row index provided by uimm3
+|10 |Matrix memory access |normal, streaming and whole regsiter loads/stores
+|11 |Matrix MISC   |special instructions and configuration instructions 
+
+|===
+
+The instruction formats are:
+
+[width="100%",cols="2,2,2,1,2,1,1,1,2,2,2,2,6",options="header",]
+|===
+||31 27|26 25 |24 |23  21|20|19|18|17  15 |14 12 |11 10 |9 7 |6     0 
+^|Calc|func |01/00|size| ms2 3+|ms1 | md/ms3 |func3 |size |uimm3 |major opcode
+^|Load/Store|func |10 3+|rs2 3+|rs1 |func3 |size |md/ms3 |major opcode
+.4+^|MISC
+|func |11 5+|uimm7 |0|func3 2+|rd|major opcode
+|func |11 3+|0 3+|rs1| func3 2+|rd|major opcode
+|func |11 5+|0 |md | func3 2+|0 |major opcode
+|func |11 3+|rs2 3+|rs1 |func3 |size |md/ms3 |major opcode
+|===
+
+
+=== Arithmetic Instructions 
+
+The arithmetic instructions format:
+
+[width="99%",cols="1,1,1,1,1,1,1,1,1,2",options="header",]
+|===
+|31 28 |27 25 |24 |23 21 |20 18 |17 15 |14 12 |11 10 |9 7 |6 0
+|func |uop |size |ms2 |ms1 |md/ms3 |func3 |size |uimm3 |major opcode
+|===
+
+
+Size field indicates the element, set to 0 if not needed.
+
+[width="80%",cols=",",align="center",options="header",]
+|===
+|size[1:0] |element data width
+|00 |8-bit
+|01 |16-bit
+|10 |32-bit
+|11 |64-bit
+|===
+
+The instruction encoding list is in following tables.
+
+==== Matrix Move
+
+Move between matrix instructions and mzero(section 4.3.2) reuse arithmetic instruction format.
+|===
+|31 27 | 26 25 | 24 | 23 21 | 20 18 | 17 15 | 14 12 | 11 10 | 9 7 | 6 0 | 
+| 00000 | 00 | 0 | 000 | ms1 | md | func3 | 00 | 001 | major opcode | mmov.mm 
+| 00000 | 01 | 0 | 000 | ms1 | md | func3 | 00 | uimm3 | major opcode | mmov.mv.i 
+|===
+
+==== Matrix Multiplication
+Matrix multiplication instructions use arithmetic instruction format where the func domain are 00001/00010.
+
+|===
+|31 27 | 26 25 | 24 | 23 21 | 20 18 | 17 15 | 14 12 | 11 10 | 9 7 | 6 0 | 
+| 00001 | 00 | 0 | ms2 | ms1 | md/ms3 | func3 | 01 | 000 | major opcode | fmmacc.h 
+| 00001 | 00 | 0 | ms2 | ms1 | md/ms3 | func3 | 10 | 000 | major opcode | fmmacc.s 
+| 00001 | 00 | 0 | ms2 | ms1 | md/ms3 | func3 | 11 | 000 | major opcode | fmmacc.d 
+| 00001 | 00 | 1 | ms2 | ms1 | md/ms3 | func3 | 01 | 000 | major opcode | fwmmacc.h 
+| 00001 | 00 | 1 | ms2 | ms1 | md/ms3 | func3 | 10 | 000 | major opcode | fwmmacc.s 
+| 00010 | 00 | 0 | ms2 | ms1 | md/ms3 | func3 | 00 | 000 | major opcode | mmaqa.b 
+| 00010 | 00 | 0 | ms2 | ms1 | md/ms3 | func3 | 00 | 001 | major opcode | mmaqau.b 
+| 00010 | 00 | 0 | ms2 | ms1 | md/ms3 | func3 | 00 | 010 | major opcode | mmaqaus.b 
+| 00010 | 00 | 0 | ms2 | ms1 | md/ms3 | func3 | 00 | 011 | major opcode | mmaqasu.b 
+| 00010 | 00 | 0 | ms2 | ms1 | md/ms3 | func3 | 01 | 000 | major opcode | mmaqa.h 
+| 00010 | 00 | 0 | ms2 | ms1 | md/ms3 | func3 | 01 | 001 | major opcode | mmaqau.h 
+| 00010 | 00 | 0 | ms2 | ms1 | md/ms3 | func3 | 01 | 010 | major opcode | mmaqaus.h 
+| 00010 | 00 | 0 | ms2 | ms1 | md/ms3 | func3 | 01 | 011 | major opcode | mmaqasu.h 
+|===
+
+==== Integer Pointwise
+
+Integer pointwise instructions use arithmetic instruction format where the func domains are 00011/00100/00101/00110/00111/01000/01001/01010/01011/01100/01101/01110/01111.
+
+|===
+|31 27 | 26 25 | 24 | 23 21 | 20 18 | 17 15 | 14 12 | 11 10 | 9 7 | 6 0 | 
+|00011	|00 |0|	ms2|ms1|md|func3|10|	000	|major opcode|	madd.s.mm
+|00011	|01 |0|	ms2|ms1|md|func3|10|	uimm3	|major opcode|	madd.s.mv.i
+|00100	|00 |0|	ms2|ms1|md|func3|10|	000	|major opcode|	msub.s.mm
+|00100	|01 |0|	ms2|ms1|md|func3|10|	uimm3	|major opcode|	msub.s.mv.i
+|00101	|00 |0|	ms2|ms1|md|func3|10|	000	|major opcode|	msra.s.mm
+|00101	|01 |0|	ms2|ms1|md|func3|10|	uimm3	|major opcode|	msra.s.mv.i
+|00110	|00 |0|	ms2|ms1|md|func3|10|	000	|major opcode|	msrl.s.mm
+|00110	|01 |0|	ms2|ms1|md|func3|10|	uimm3	|major opcode|	msrl.s.mv.i
+|00111	|00 |0|	ms2|ms1|md|func3|10|	000	|major opcode|	msll.s.mm
+|00111	|01 |0|	ms2|ms1|md|func3|10|	uimm3	|major opcode|	msll.s.mv.i
+|01000	|00 |0|	ms2|ms1|md|func3|10|	000	|major opcode|	mn4clip.s.mm
+|01000	|01 |0|	ms2|ms1|md|func3|10|	uimm3	|major opcode|	mn4clip.s.mv.i
+|01001	|00 |0|	ms2|ms1|md|func3|10|	000	|major opcode|	mn4clipu.s.mm
+|01001	|01 |0|	ms2|ms1|md|func3|10|	uimm3	|major opcode|	mn4clipu.s.mv.i
+|01010	|00 |0|	ms2|ms1|md|func3|10|	000	|major opcode|	mmul.s.mm
+|01010	|10 |0|	ms2|ms1|md|func3|10|	uimm3	|major opcode|	mmul.s.mv.i
+|01011	|00 |0|	ms2|ms1|md|func3|10|	000	|major opcode|	mmulh.s.mm
+|01011	|01 |0|	ms2|ms1|md|func3|10|	uimm3	|major opcode|	mmulh.s.mv.i
+|01100	|00 |0|	ms2|ms1|md|func3|10|	000	|major opcode|	mmax.s.mm
+|01100	|01 |0|	ms2|ms1|md|func3|10|	uimm3	|major opcode|	mmax.s.mv.i
+|01101	|00 |0|	ms2|ms1|md|func3|10|	000	|major opcode|	mumax.s.mm
+|01101	|01 |0|	ms2|ms1|md|func3|10|	uimm3	|major opcode|	mumax.s.mv.i
+|01110	|00 |0|	ms2|ms1|md|func3|10|	000	|major opcode|	mmin.s.mm
+|01110	|01 |0|	ms2|ms1|md|func3|10|	uimm3	|major opcode|	mmin.s.mv.i
+|01111	|00 |0|	ms2|ms1|md|func3|10|	000	|major opcode|	mumin.s.mm
+|01111	|01 |0|	ms2|ms1|md|func3|10|	uimm3	|major opcode|	mumin.s.mv.i
+|00011	|00 |0|	ms2|ms1|md|func3|11|	000	|major opcode|	madd.d.mm
+|00011	|01 |0|	ms2|ms1|md|func3|11|	uimm3	|major opcode|	madd.d.mv.i
+|00100	|00 |0|	ms2|ms1|md|func3|11|	000	|major opcode|	msub.d.mm
+|00100	|01 |0|	ms2|ms1|md|func3|11|	uimm3	|major opcode|	msub.d.mv.i
+|00101	|00 |0|	ms2|ms1|md|func3|11|	000	|major opcode|	msra.d.mm
+|00101	|01 |0|	ms2|ms1|md|func3|11|	uimm3	|major opcode|	msra.d.mv.i
+|00110	|00 |0|	ms2|ms1|md|func3|11|	000	|major opcode|	msrl
+|00110	|01 |0|	ms2|ms1|md|func3|11|	uimm3	|major opcode|	msrl
+|00111	|00 |0|	ms2|ms1|md|func3|11|	000	|major opcode|	msll
+|00111	|01 |0|	ms2|ms1|md|func3|11|	uimm3	|major opcode|	msll
+|01000	|00 |0|	ms2|ms1|md|func3|11|	000	|major opcode|	mn4clip.d.mm
+|01000	|01 |0|	ms2|ms1|md|func3|11|	uimm3	|major opcode|	mn4clip.d.mv.i
+|01001	|00 |0|	ms2|ms1|md|func3|11|	000	|major opcode|	mn4clipu.d.mm
+|01001	|01 |0|	ms2|ms1|md|func3|11|	uimm3	|major opcode|	mn4clipu.d.mv.i
+|01010	|00 |0|	ms2|ms1|md|func3|11|	000	|major opcode|	mmul.d.mm
+|01010	|01 |0|	ms2|ms1|md|func3|11|	uimm3	|major opcode|	mmul.d.mv.i
+|01011	|00 |0|	ms2|ms1|md|func3|11|	000	|major opcode|	mmulh.d.mm
+|01011	|01 |0|	ms2|ms1|md|func3|11|	uimm3	|major opcode|	mmulh.d.mv.i
+|01100	|00 |0|	ms2|ms1|md|func3|11|	000	|major opcode|	mmax.d.mm
+|01100	|01 |0|	ms2|ms1|md|func3|11|	uimm3	|major opcode|	mmax.d.mv.i
+|01101	|00 |0|	ms2|ms1|md|func3|11|	000	|major opcode|	mumax.d.mm
+|01101	|01 |0|	ms2|ms1|md|func3|11|	uimm3	|major opcode|	mumax.d.mv.i
+|01110	|00 |0|	ms2|ms1|md|func3|11|	000	|major opcode|	mmin.d.mm
+|01110	|01 |0|	ms2|ms1|md|func3|11|	uimm3	|major opcode|	mmin.d.mv.i
+|01111	|00 |0|	ms2|ms1|md|func3|11|	000	|major opcode|	mumin.d.mm
+|01111	|01 |0|	ms2|ms1|md|func3|11|	uimm3	|major opcode|	mumin.d.mv.i
+|===
+
+==== Float Pointwise
+
+Float pointwise instructions use arithmetic instruction format where the func domains are 10000/10001/10010/10011/10100/10101.
+
+|===
+|31 27 | 26 25 | 24 | 23 21 | 20 18 | 17 15 | 14 12 | 11 10 | 9 7 | 6 0 | 
+|10000|	00|	0|	ms2|	ms1|	md|	func3|	01|	000	|major opcode	|mfadd.h.mm
+|10000|	01|	0|	ms2|	ms1|	md|	func3|	01|	uimm3	|major opcode	|mfadd.h.mv.i
+|10000|	00|	1|	ms2|	ms1|	md|	func3|	01|	000	|major opcode	|mfwadd.h.mm
+|10000|	01|	1|	ms2|	ms1|	md|	func3|	01|	uimm3	|major opcode	|mfwadd.h.mv.i
+|10001|	00|	0|	ms2|	ms1|	md|	func3|	01|	000	|major opcode	|mfsub.h.mm
+|10001|	01|	0|	ms2|	ms1|	md|	func3|	01|	uimm3	|major opcode	|mfsub.h.mv.i
+|10001|	00|	1|	ms2|	ms1|	md|	func3|	01|	000	|major opcode	|mfwsub.h.mm
+|10001|	01|	1|	ms2|	ms1|	md|	func3|	01|	uimm3	|major opcode	|mfwsub.h.mv.i
+|10010|	00|	0|	ms2|	ms1|	md|	func3|	01|	000	|major opcode	|mfmul.h.mm
+|10010|	01|	0|	ms2|	ms1|	md|	func3|	01|	uimm3	|major opcode	|mfmul.h.mv.i
+|10010|	00|	1|	ms2|	ms1|	md|	func3|	01|	000	|major opcode	|mfwmul.h.mm
+|10010|	01|	1|	ms2|	ms1|	md|	func3|	01|	uimm3	|major opcode	|mfwmul.h.mv.i
+|10011|	00|	0|	ms2|	ms1|	md|	func3|	01|	000	|major opcode	|mfmax.h.mm
+|10011|	01|	0|	ms2|	ms1|	md|	func3|	01|	uimm3	|major opcode	|mfmax.h.mv.i
+|10100|	00|	0|	ms2|	ms1|	md|	func3|	01|	000	|major opcode	|mfmin.h.mm
+|10100|	01|	0|	ms2|	ms1|	md|	func3|	01|	uimm3	|major opcode	|mfmin.h.mv.i
+|10000|	00|	0|	ms2|	ms1|	md|	func3|	10|	000	|major opcode	|mfadd.s.mm
+|10000|	01|	0|	ms2|	ms1|	md|	func3|	10|	uimm3	|major opcode	|mfadd.s.mv.i
+|10000|	00|	1|	ms2|	ms1|	md|	func3|	10|	000	|major opcode	|mfwadd.s.mm
+|10000|	01|	1|	ms2|	ms1|	md|	func3|	10|	uimm3	|major opcode	|mfwadd.s.mv.i
+|10001|	00|	0|	ms2|	ms1|	md|	func3|	10|	000	|major opcode	|mfsub.s.mm
+|10001|	01|	0|	ms2|	ms1|	md|	func3|	10|	uimm3	|major opcode	|mfsub.s.mv.i
+|10001|	00|	1|	ms2|	ms1|	md|	func3|	10|	000	|major opcode	|mfwsub.s.mm
+|10001|	01|	1|	ms2|	ms1|	md|	func3|	10|	uimm3	|major opcode	|mfwsub.s.mv.i
+|10010|	00|	0|	ms2|	ms1|	md|	func3|	10|	000	|major opcode	|mfmul.s.mm
+|10010|	01|	0|	ms2|	ms1|	md|	func3|	10|	uimm3	|major opcode	|mfmul.s.mv.i
+|10010|	00|	1|	ms2|	ms1|	md|	func3|	10|	000	|major opcode	|mfwmul.s.mm
+|10010|	01|	1|	ms2|	ms1|	md|	func3|	10|	uimm3	|major opcode	|mfwmul.s.mv.i
+|10011|	00|	0|	ms2|	ms1|	md|	func3|	10|	000	|major opcode	|mfmax.s.mm
+|10011|	01|	0|	ms2|	ms1|	md|	func3|	10|	uimm3	|major opcode	|mfmax.s.mv.i
+|10100|	00|	0|	ms2|	ms1|	md|	func3|	10|	000	|major opcode	|mfmin.s.mm
+|10100|	01|	0|	ms2|	ms1|	md|	func3|	10|	uimm3	|major opcode	|mfmin.s.mv.i
+|10000|	00|	0|	ms2|	ms1|	md|	func3|	11|	000	|major opcode	|mfadd.d.mm
+|10000|	01|	0|	ms2|	ms1|	md|	func3|	11|	uimm3	|major opcode	|mfadd.d.mv.i
+|10001|	00|	0|	ms2|	ms1|	md|	func3|	11|	000	|major opcode	|mfsub.d.mm
+|10001|	01|	0|	ms2|	ms1|	md|	func3|	11|	uimm3	|major opcode	|mfsub.d.mv.i
+|10010|	00|	0|	ms2|	ms1|	md|	func3|	11|	000	|major opcode	|mfmul.d.mm
+|10010|	01|	0|	ms2|	ms1|	md|	func3|	11|	uimm3	|major opcode	|mfmul.d.mv.i
+|10011|	00|	0|	ms2|	ms1|	md|	func3|	11|	000	|major opcode	|mfmax.d.mm
+|10011|	01|	0|	ms2|	ms1|	md|	func3|	11|	uimm3	|major opcode	|mfmax.d.mv.i
+|10100|	00|	0|	ms2|	ms1|	md|	func3|	11|	000	|major opcode	|mfmin.d.mm
+|10100|	01|	0|	ms2|	ms1|	md|	func3|	11|	uimm3	|major opcode	|mfmin.d.mv.i
+|10101|	00|	0|	000|	ms1|	md|	func3|	10|	000	|major opcode	|mfncvt.s.mm
+|10101|	00|	0|	000|	ms1|	md|	func3|	11|	000	|major opcode	|mfncvt.d.mm
+|10101|	00|	0|	000|	ms1|	md|	func3|	01|	001	|major opcode	|mfwcvt.h.mm
+|10101|	00|	0|	000|	ms1|	md|	func3|	10|	001	|major opcode	|mfwcvt.s.mm
+|===
+
+====  Float Integer Conversion
+
+Float integer conversion instructions use arithmetic instruction format where the func domains are 10110.
+
+|===
+|31 27 | 26 25 | 24 | 23 21 | 20 18 | 17 15 | 14 12 | 11 10 | 9 7 | 6 0 | 
+|10110	|00	|0	|000	|ms1	|md	|func3	|10	|000	|major opcode	|mufcvt.w.mm
+|10110	|00	|0	|000	|ms1	|md	|func3	|00	|001	|major opcode	|mufwcvt.b.mm
+|10110	|00	|0	|000	|ms1	|md	|func3	|10	|100	|major opcode	|msfcvt.w.mm
+|10110	|00	|0	|000	|ms1	|md	|func3	|00	|101	|major opcode	|msfwcvt.b.mm
+|10110	|00	|0	|001	|ms1	|md	|func3	|10	|000	|major opcode	|mfucvt.s.mm
+|10110	|00	|0	|001	|ms1	|md	|func3	|01	|001	|major opcode	|mfuncvt.h.mm
+|10110	|00	|0	|001	|ms1	|md	|func3	|10	|100	|major opcode	|mfscvt.s.mm
+|10110	|00	|0	|001	|ms1	|md	|func3	|01	|101	|major opcode	|mfsncvt.h.mm
+|===
+
+=== Matrix Load/Store Instructions
+
+The matrix load/store instruction format:
+
+[width="100%",cols="1,1,1,1,1,1,1,2",options="header",]
+|===
+|31 27 |26 25 |24 20 |19 15 |14 12 |11 10 |9 7 |6  0
+|func |10 |rs2 |rs1 |func3 |size |md/ms3 |major opcode
+|===
+bit[27] = 1 indicates store operations, while bit[27] = 0 indicates load operations. bit[28] = 1 indicates streaming memory access and bit[29]=1 indicates whole register memory access.
+
+
+[width="99%",cols="1,1,1,1,1,1,1,2,3",options="header",]
+|===
+|31 27 |26 25 |24 20 |19 15 |14 12 |11 10 |9 7 |6 0 |
+|00000	|10|	rs2	|rs1	|func3	|size	|md	|major opcode	|mld<b/h/w/d> 
+|00001	|10|	rs2	|rs1	|func3	|size	|ms3	|major opcode	|mst<b/h/w/d> 
+|00010	|10|	rs2	|rs1	|func3	|size	|md	|major opcode	|mld.<b/h/w/d>.s
+|00011	|10|	rs2	|rs1	|func3	|size	|ms3	|major opcode	|mst.<b/h/w/d>.s
+|00100	|10|	{00,nf}	|rs1	|func3	|size	|md	|major opcode	|mld<1/2/4/8>m<b/h/w/d>
+|00101	|10|	{00,nf}	|rs1	|func3	|size	|md	|major opcode	|mst<1/2/4/8>m <b/h/w/d>
+|===
+
+=== Other Instructions
+
+==== configuration
+
+The uop of configuration instructions is 2'b11.
+
+[width="99%",cols="1,1,1,1,1,1,1,2,2",options="header",]
+|===
+|31 |30 27 |26 25 |24 20 |19 15 |14 12 |11 7 |6 0 |
+| 0 | 0001 | 11 2+| {uimm7,000} | func3 | rd | major opcode | mcfgki 
+| 0 | 0011 | 11 2+| {uimm7,000} | func3 | rd | major opcode | mcfgmi 
+| 0 | 0101 | 11 2+| {uimm7,000} | func3 | rd | major opcode | mcfgni 
+| 1 | 0001 | 1 | 00000 | rs1 | func3 | rd | major opcode | mcfgk 
+| 1 | 0011 | 11 | 00000 | rs1 | func3 | rd | major opcode | mcfgm 
+| 1 | 0101 | 11 | 00000 | rs1 | func3 | rd | major opcode | mcfgn 
+| 1 | 1111 | 11 | 00000 | rs1 | func3 | rd | major opcode | mcfg 
+|===
+
+==== mzero
+The mzero instruction shares the 2'b00 uop with the arithmetic instructions.
+[width="99%",cols="1,1,1,1,1,1,1,1,1,2,2",options="header",]
+|===
+|31 27 |26 25 |24 |23 21 |20 18 |17 15 |14 12 |11 10 |9 7 |6 0| 
+|11111 |00 |0 |000 |000 |md |func3 |00 |000 |major code |mzero      
+|===
+
+==== mrelease
+
+The mrelease instruction uses the configuration 2'b11 uop.
+
+[width="99%",cols="1,1,1,1,1,1,1,2,2",options="header",]
+|===
+|31 |30 27 |26 25 |24 20 |19 15 |14 12 |11 7 |6 0 |
+| 0 | 1111 | 11 | 00000 | 00000 | func3 | 00000 | major opcode | mrelease 
+|===
+
+==== move from matrix
+[width="99%",cols="1,1,1,1,1,1,1,1,2,2",options="header",]
+|===
+|31 27 |26 25 |24 |23 21 |20 |19 15 |14 12 |11 7 |6 0 |
+| 00000 | 11 | 0 | ms2 | 0 | rs1 | func3 | rd | major opcode | mmovb.x.m 
+| 00000 | 11 | 0 | ms2 | 1 | rs1 | func3 | rd | major opcode | mmovh.x.m 
+| 00000 | 11 | 1 | ms2 | 0 | rs1 | func3 | rd | major opcode | mmovw.x.m 
+| 00000 | 11 | 1 | ms2 | 1 | rs1 | func3 | rd | major opcode | mmovd.x.m 
+|===
+
+==== move GPR to matrix
+[width="99%",cols="1,1,1,1,1,1,1,2,2",options="header",]
+|===
+|31 28 |27 25 |24 20 |19 15 |14 12 |11 10 |9 7 |6 0 |
+| 00010 | 11 | rs2 | 0000 | func3 | 00 | md | major opcode | mdupb.m.x 
+| 00010 | 11 | rs2 | 0000 | func3 | 01 | md | major opcode | mduph.m.x 
+| 00010 | 11 | rs2 | 0000 | func3 | 10 | md | major opcode | mdupw.m.x 
+| 00010 | 11 | rs2 | 0000 | func3 | 11 | md | major opcode | mdupd.m.x 
+| 00100 | 11 | rs2 | rs1 | func3 | 00 | md | major opcode | mmovb.m.x 
+| 00100 | 11 | rs2 | rs1 | func3 | 01 | md | major opcode | mmovh.m.x 
+| 00100 | 11 | rs2 | rs1 | func3 | 10 | md | major opcode | mmovw.m.x 
+| 00100 | 11 | rs2 | rs1 | func3 | 11 | md | major opcode | mmovd.m.x 
+|===
+
diff --git a/spec/matrix_instruction_list.adoc b/spec/matrix_instruction_list.adoc
@@ -0,0 +1,62 @@
+[[chapter6]]
+
+== Instruction List
+
+There are 39 instructions extended for matrix, some are optional for hardware implementations.
+
+[width="100%",cols="1,2,4",options="header",]
+|===
+|catagory |instructions |
+.10+^|matrix multiplication(10) 
+|fmmacc. |float matrix multiplication
+|fwmmacc. |float widen matrix multiplication
+|mmaqa. |signed integer 4x matrix multiplication
+|mmaqau. |unsigned integer 4x matrix multiplication
+|mmaqasu. |signed-unsigned integer 4x matrix multiplication
+|mmaqaus. |unsigned-signed integer 4x matrix multiplication
+|pmmaqa. |int4 signed integer matrix multiplication
+|pmmaqau. |int4 unsigned integer matrix multiplication
+|pmmaqasu. |int4 signed -unsigned integer matrix multiplication
+|pmmaqaus. |int4 unsigned -signed integer matrix multiplication
+.6+^|matrix load/store(6) 
+|mld.<b/h/w/d> |matrix load to matrix registers
+|mst.<b/h/w/d> |matrix store from matrix registers
+|mld.<b/h/w/d>.s|stream load to matrix registers
+|mst.<b/h/w/d>.s|stream matrix store from matrix registers
+|mld<1/2/4/8>m.<b/h/w/d> |load to whole matrix register
+|mst<1/2/4/8>m.<b/h/w/d> |store to whole matrix register
+^|matrix movement(1) 
+|mmov.mm/mmov.mv.x
+mmov.mv.i/
+mmov.<b/h/w/d>.m.x/
+mdup.<b/h/w/d>.m.x/
+mmov.<b/h/w/d>.x.m 
+|move from/to matrix registers 
+ .7+^|matrix integer pointwise arithmetic (7) 
+|madd/msub.<s/d>.<mm/mv>.<i> |
+|mshift.<s/d>.<mm/mv>.<i> |
+|mn4clip.<s/d>.<mm/mv>.<i> |
+|mn4clipu.<s/d>.<mm/mv>.<i> |
+|mmul.<s/d>.<mm/mv>.<i> |
+|mmulh.<s/d>.<mm/mv>.<i> |
+|mmax/mmin/mumax/mumin/<s/d>.<mm/mv>.<i> |
+ .7+^|matrix float pointwise arithmetic (7) 
+|mfadd/mfsub.<h/s/d>.<mm/mv>.<i> |
+|mfwadd/mfwsub.<h/s>.<mm/mv>.<i> |
+|mfmul.<h/s/d>.<mm/mv>.<i> |
+|mfwmul.<h/s>.<mm/mv>.<i> |
+|mfmax/mfmin.<h/s/d>.<mm/mv>.<i>|
+|mfncvt.<s/d>.mm |
+|mfwcvt.<h/s>.mm|
+ .4+^|matrix float integer conversion (4) 
+|mufcvt/msfcvt.s.mm|
+|mufwcvt/msfwcvt.b.mm |
+|mfucvt/mfscvt.s.mm |
+|mfuncvt/mfsncvt.b.mm |
+.2+^|configuration(2) 
+|mcfgi |
+|mcfg |
+.2+^|others(2) 
+|mrelease |clear ms to CLEAN
+|mzero| 
+|===