From 73192ae02a1ae6ef7940b4d01b9008a62b6bdf3b Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Mon, 18 Mar 2024 22:07:16 +0800 Subject: [PATCH] T-Head C9xx cores implement an older version (0.7.1) of the vector specification. Relevant changes concerning the kernel are: - different placement of the SR_VS bit for the vector unit status - different encoding of the vsetvli instruction - different instructions for loads and stores And a fixed VLEN of 128. The in-kernel access to vector instances is limited to the save and restore of process states so the above mentioned areas can simply be handled via the alternatives framework, similar to other T-Head specific issues. Signed-off-by: Heiko Stuebner Co-developed-by: Guo Ren Signed-off-by: Guo Ren Tested-by: Chen Pei Signed-off-by: Chen Pei --- arch/riscv/Kconfig.errata | 13 +++ arch/riscv/errata/thead/errata.c | 20 +++++ arch/riscv/include/asm/csr.h | 12 ++- arch/riscv/include/asm/errata_list.h | 46 +++++++++- arch/riscv/include/asm/vector.h | 129 +++++++++++++++++++++++---- arch/riscv/kernel/vector.c | 8 ++ 6 files changed, 207 insertions(+), 21 deletions(-) diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index e2c731cfed8cc..f5f8013dca8bb 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -99,4 +99,17 @@ config ERRATA_THEAD_PMU If you don't know what to do here, say "Y". +config ERRATA_THEAD_VECTOR + bool "Apply T-Head Vector errata" + depends on ERRATA_THEAD && RISCV_ISA_V && !ARCH_RV32I + default y + help + The T-Head C9xx cores implement an earlier version 0.7.1 + of the vector extensions. + + This will apply the necessary errata to handle the non-standard + behaviour via when switch to and from vector mode for processes. + + If you don't know what to do here, say "Y". + endmenu # "CPU errata selection" diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index 0554ed4bf087c..bb729cfd6f5a3 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -15,6 +15,7 @@ #include #include #include +#include #include static bool errata_probe_pbmt(unsigned int stage, @@ -33,6 +34,22 @@ static bool errata_probe_pbmt(unsigned int stage, return false; } +static bool errata_probe_vector(unsigned int stage, + unsigned long arch_id, unsigned long impid) +{ + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR)) + return false; + + /* target-c9xx cores report arch_id and impid as 0 */ + if (arch_id != 0 || impid != 0) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + return true; +} + static bool errata_probe_cmo(unsigned int stage, unsigned long arch_id, unsigned long impid) { @@ -83,6 +100,9 @@ static u32 thead_errata_probe(unsigned int stage, if (errata_probe_pmu(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_PMU); + if (errata_probe_vector(stage, archid, impid)) + cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR); + return cpu_req_errata; } diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index fef5c42a986dc..39234a148d47d 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -38,6 +38,16 @@ #define SR_VS_CLEAN _AC(0x00000400, UXL) #define SR_VS_DIRTY _AC(0x00000600, UXL) +#define SR_VS_THEAD _AC(0x01800000, UXL) /* Vector Status */ +#define SR_VS_OFF_THEAD _AC(0x00000000, UXL) +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UXL) +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UXL) +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UXL) +/* VCSR flags */ +#define VCSR_VXRM_MASK 3 +#define VCSR_VXRM_SHIFT 1 +#define VCSR_VXSAT_MASK 1 + #define SR_MS _AC(0x06000000, UXL) /* Matrix Status */ #define SR_MS_OFF _AC(0x00000000, UXL) #define SR_MS_INITIAL _AC(0x02000000, UXL) @@ -50,7 +60,7 @@ #define SR_XS_CLEAN _AC(0x00010000, UXL) #define SR_XS_DIRTY _AC(0x00018000, UXL) -#define SR_FS_VS (SR_FS | SR_VS | SR_MS) /* Vector and Floating-Point Unit */ +#define SR_FS_VS (SR_FS | SR_VS | SR_MS | SR_VS_THEAD) /* Vector and Floating-Point Unit */ #if __riscv_xlen == 32 #define SR_SD _AC(0x80000000, UXL) /* FS/VS/XS dirty */ diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 69d41b1d491a8..824057111d92f 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -26,7 +26,8 @@ #define ERRATA_THEAD_PBMT 0 #define ERRATA_THEAD_CMO 1 #define ERRATA_THEAD_PMU 2 -#define ERRATA_THEAD_NUMBER 3 +#define ERRATA_THEAD_VECTOR 3 +#define ERRATA_THEAD_NUMBER 4 #endif #ifdef __ASSEMBLY__ @@ -164,6 +165,49 @@ asm volatile(ALTERNATIVE( \ : "=r" (__ovl) : \ : "memory") +#define THEAD_C9XX_CSR_VXSAT 0x9 +#define THEAD_C9XX_CSR_VXRM 0xa + + /* + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for + * vsetvli t4, x0, e8, m8, d1 + */ +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t" + + /* + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize + * the call resulting in a different encoding and then using a value for + * the "mop" field that is not part of vector-0.7.1 + * So encode specific variants for vstate_save and _restore. + */ +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t" +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t" +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t" +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t" +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t" +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t" +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t" +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t" + +#define ALT_SR_VS_VECTOR_1_0_SHIFT 9 +#define ALT_SR_VS_THEAD_SHIFT 23 + +#ifdef CONFIG_ERRATA_THEAD_VECTOR +#define ALT_SR_VS(_val, prot) \ + asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3", \ + "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \ + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) \ + : "=r"(_val) \ + : "I"(prot >> ALT_SR_VS_VECTOR_1_0_SHIFT), \ + "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT), \ + "I"(ALT_SR_VS_VECTOR_1_0_SHIFT), \ + "I"(ALT_SR_VS_THEAD_SHIFT)) +#else +#define ALT_SR_VS(_val, prot) _val = prot; +#endif /* CONFIG_ERRATA_THEAD_VECTOR */ + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index ce34e8acf2738..81136697c9d19 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -19,6 +19,7 @@ #include #include #include +#include static __always_inline bool has_matrix(void) { @@ -113,62 +114,117 @@ static __always_inline bool has_vector(void) static inline void __riscv_v_vstate_clean(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_CLEAN; + xlen_t sr_vs, sr_vs_clean; + + ALT_SR_VS(sr_vs, SR_VS); + ALT_SR_VS(sr_vs_clean, SR_VS_CLEAN); + + regs->status = (regs->status & ~sr_vs) | sr_vs_clean; } static inline void __riscv_v_vstate_dirty(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_DIRTY; + xlen_t sr_vs, sr_vs_dirty; + + ALT_SR_VS(sr_vs, SR_VS); + ALT_SR_VS(sr_vs_dirty, SR_VS_DIRTY); + + regs->status = (regs->status & ~sr_vs) | sr_vs_dirty; } static inline void riscv_v_vstate_off(struct pt_regs *regs) { regs->status = (regs->status & ~SR_VS) | SR_VS_OFF; + regs->status = (regs->status & ~SR_VS_THEAD) | SR_VS_OFF_THEAD; } static inline void riscv_v_vstate_on(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_INITIAL; + xlen_t sr_vs, sr_vs_initial; + + ALT_SR_VS(sr_vs, SR_VS); + ALT_SR_VS(sr_vs_initial, SR_VS_INITIAL); + + regs->status = (regs->status & ~sr_vs) | sr_vs_initial; } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { - return (regs->status & SR_VS) != 0; + xlen_t sr_vs; + + ALT_SR_VS(sr_vs, SR_VS); + + return (regs->status & sr_vs) != 0; } static __always_inline void riscv_v_enable(void) { - csr_set(CSR_SSTATUS, SR_VS); + xlen_t sr_vs; + + ALT_SR_VS(sr_vs, SR_VS); + + csr_set(CSR_SSTATUS, sr_vs); } static __always_inline void riscv_v_disable(void) { - csr_clear(CSR_SSTATUS, SR_VS); + csr_clear(CSR_SSTATUS, SR_VS | SR_VS_THEAD); } static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) { - asm volatile ( + register u32 t1 asm("t1") = (SR_FS); + + asm volatile (ALTERNATIVE( "csrr %0, " __stringify(CSR_VSTART) "\n\t" "csrr %1, " __stringify(CSR_VTYPE) "\n\t" "csrr %2, " __stringify(CSR_VL) "\n\t" "csrr %3, " __stringify(CSR_VCSR) "\n\t" "csrr %4, " __stringify(CSR_VLENB) "\n\t" + __nops(4), + "csrs sstatus, t1\n\t" + "csrr %0, " __stringify(CSR_VSTART) "\n\t" + "csrr %1, " __stringify(CSR_VTYPE) "\n\t" + "csrr %2, " __stringify(CSR_VL) "\n\t" + "csrr %3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t" + "slliw %3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" + "csrr t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t" + "or %3, %3, t4\n\t" + "csrc sstatus, t1\n\t", + THEAD_VENDOR_ID, + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), - "=r" (dest->vcsr), "=r" (dest->vlenb) : :); + "=r" (dest->vcsr), "=r" (dest->vlenb) : "r"(t1) : "t4"); } static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) { - asm volatile ( + register u32 t1 asm("t1") = (SR_FS); + + asm volatile (ALTERNATIVE( ".option push\n\t" ".option arch, +v\n\t" "vsetvl x0, %2, %1\n\t" ".option pop\n\t" "csrw " __stringify(CSR_VSTART) ", %0\n\t" "csrw " __stringify(CSR_VCSR) ", %3\n\t" + __nops(6), + "csrs sstatus, t1\n\t" + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvl x0, %2, %1\n\t" + ".option pop\n\t" + "csrw " __stringify(CSR_VSTART) ", %0\n\t" + "srliw t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" + "andi t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t" + "csrw " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t" + "andi %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t" + "csrw " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t" + "csrc sstatus, t1\n\t", + THEAD_VENDOR_ID, + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl), - "r" (src->vcsr) :); + "r" (src->vcsr), "r"(t1) : "t4"); } static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, @@ -178,7 +234,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, riscv_v_enable(); __vstate_csr_save(save_to); - asm volatile ( + asm volatile (ALTERNATIVE( + "nop\n\t" ".option push\n\t" ".option arch, +v\n\t" "vsetvli %0, x0, e8, m8, ta, ma\n\t" @@ -189,8 +246,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, "vse8.v v16, (%1)\n\t" "add %1, %1, %0\n\t" "vse8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl) : "r" (datap) : "memory"); + ".option pop\n\t", + "mv t0, %1\n\t" + THEAD_VSETVLI_T4X0E8M8D1 + THEAD_VSB_V_V0T0 + "addi t0, t0, 128\n\t" + THEAD_VSB_V_V8T0 + "addi t0, t0, 128\n\t" + THEAD_VSB_V_V16T0 + "addi t0, t0, 128\n\t" + THEAD_VSB_V_V24T0, THEAD_VENDOR_ID, + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) + : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory"); riscv_v_disable(); } @@ -200,7 +267,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ unsigned long vl; riscv_v_enable(); - asm volatile ( + asm volatile (ALTERNATIVE( + "nop\n\t" ".option push\n\t" ".option arch, +v\n\t" "vsetvli %0, x0, e8, m8, ta, ma\n\t" @@ -211,8 +279,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ "vle8.v v16, (%1)\n\t" "add %1, %1, %0\n\t" "vle8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl) : "r" (datap) : "memory"); + ".option pop\n\t", + "mv t0, %1\n\t" + THEAD_VSETVLI_T4X0E8M8D1 + THEAD_VLB_V_V0T0 + "addi t0, t0, 128\n\t" + THEAD_VLB_V_V8T0 + "addi t0, t0, 128\n\t" + THEAD_VLB_V_V16T0 + "addi t0, t0, 128\n\t" + THEAD_VLB_V_V24T0, THEAD_VENDOR_ID, + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) + : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory"); __vstate_csr_restore(restore_from); riscv_v_disable(); } @@ -238,7 +316,11 @@ static inline void __riscv_v_vstate_discard(void) static inline void riscv_v_vstate_discard(struct pt_regs *regs) { - if ((regs->status & SR_VS) == SR_VS_OFF) + xlen_t sr_vs; + + ALT_SR_VS(sr_vs, SR_VS); + + if ((regs->status & sr_vs) == SR_VS_OFF) return; __riscv_v_vstate_discard(); @@ -248,7 +330,12 @@ static inline void riscv_v_vstate_discard(struct pt_regs *regs) static inline void riscv_v_vstate_save(struct task_struct *task, struct pt_regs *regs) { - if ((regs->status & SR_VS) == SR_VS_DIRTY) { + xlen_t sr_vs, sr_vs_dirty; + + ALT_SR_VS(sr_vs, SR_VS); + ALT_SR_VS(sr_vs_dirty, SR_VS_DIRTY); + + if ((regs->status & sr_vs) == sr_vs_dirty) { struct __riscv_v_ext_state *vstate = &task->thread.vstate; __riscv_v_vstate_save(vstate, vstate->datap); @@ -270,7 +357,11 @@ static inline void riscv_m_mstate_save(struct task_struct *task, static inline void riscv_v_vstate_restore(struct task_struct *task, struct pt_regs *regs) { - if ((regs->status & SR_VS) != SR_VS_OFF) { + xlen_t sr_vs; + + ALT_SR_VS(sr_vs, SR_VS); + + if ((regs->status & sr_vs) != SR_VS_OFF) { struct __riscv_v_ext_state *vstate = &task->thread.vstate; __riscv_v_vstate_restore(vstate, vstate->datap); diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index ee75b57905a42..e3d363e153805 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -18,6 +18,7 @@ #include #include #include +#include #include static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE); @@ -29,6 +30,13 @@ int riscv_v_setup_vsize(void) { unsigned long this_vsize; + if (riscv_cached_mvendorid(0) == THEAD_VENDOR_ID && + riscv_cached_marchid(0) == 0 && + riscv_cached_mimpid(0) == 0) { + riscv_v_vsize = 128 / 8 * 32; + return 0; + } + /* There are 32 vector registers with vlenb length. */ riscv_v_enable(); this_vsize = csr_read(CSR_VLENB) * 32;