From 1e28b9695216e1de2abc153253feec70fd61d700 Mon Sep 17 00:00:00 2001 From: Rairii <2650838+Wack0@users.noreply.github.com> Date: Sat, 3 Aug 2024 19:54:57 +0100 Subject: [PATCH] inc(runtime): changes for uni-north, slight bugfixes detect big endian hardware by IBAT3L bit 0 (which HAL should set as early as possible) remove all the old cursed-emulator stuff, replace with the MSR_LE address munge operation (applying it twice is equal to no operation) fix Mmio(Read|Write)Buf32 to actually operate on 32-bit data add PCI slot interrupt info to runtime headers (for uni-north) --- inc/runtime.h | 101 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 75 insertions(+), 26 deletions(-) diff --git a/inc/runtime.h b/inc/runtime.h index 440ac56..ce490b6 100644 --- a/inc/runtime.h +++ b/inc/runtime.h @@ -3,6 +3,7 @@ // if we're building for NT, then we're LE. #ifndef ARC_LE +#define __BUILDING_FOR_NT__ #define ARC_LE #define ARC_BE __attribute__((scalar_storage_order("big-endian"))) #define ARC_ALIGNED(x) __attribute__((aligned(x))) @@ -16,28 +17,68 @@ typedef struct ARC_BE _U32BE { } U32BE, *PU32BE; static inline BOOLEAN _Mmio_IsLittleEndian(void) { -#ifdef __FORCE_MSR_LE__ // used for HALs where it is known at compile time +#ifdef __FORCE_MSR_LE__ // used for HALs where the status of device endianness is known at compile time +#ifdef __BIG_ENDIAN_SYSTEM__ // uni-north removed the endianness switch bit from bandit + return FALSE; +#else return TRUE; +#endif #else + // All systems are running MSR_LE. + // NT does not use any IBAT other than IBAT0. + // Therefore, use IBAT3L bit 0. (bit 31, from a little endian viewpoint) + // The HAL on a fully big-endian system can set this bit as early as possible; + // IBAT3U is left at zero, to ensure the BAT won't ever be used. + ULONG _ibat3l; + __asm__ __volatile( "mfspr %0, 535" : "=r" ((_ibat3l)) ); + return (_ibat3l & 1) == 0; +#if 0 // old code checking MSR_LE ULONG _msr; __asm__ __volatile( "mfmsr %0" : "=r" ((_msr)) ); return (_msr & 1) != 0; #endif +#endif } -// following macro emits an "EMU_OP .", -// loader hooks will patch any EMU_OP to nop -// and skip following instruction for emu/aot. -#define FOLLOWING_ACCESS_BIG_ASM ".long 0xe0000000 \n " -#define FOLLOWING_ACCESS_BIG __asm__ __volatile__ ( FOLLOWING_ACCESS_BIG_ASM ) #else static inline BOOLEAN _Mmio_IsLittleEndian(void) { +#ifdef __BIG_ENDIAN_SYSTEM__ + return 0; +#else return __LITTLE_ENDIAN__; +#endif } -#define FOLLOWING_ACCESS_BIG_ASM #endif +// Given the address, and the length to access, munge the address to counteract what the CPU does with MSR_LE enabled. +// Length must be 1, 2, 4, or 8, this won't be checked for. +static inline ULONG _Mmio_MungeAddressConstant(ULONG Length) { + // 1 => 7, 2 => 6, 4 => 4, 8 => 0 + // this is enough, and should be calculated at compile time :) + return (8 - Length); +} +static inline PVOID _Mmio_MungeAddressForBig(PVOID Address, ULONG Length) { + // do nothing for 64 bits. + if (Length == 8) return Address; + + ULONG Addr32 = (ULONG)Address; + ULONG AlignOffset = Addr32 & (Length - 1); + if (AlignOffset == 0) { + // Aligned access, just XOR with munge constant. + return (PVOID)(Addr32 ^ _Mmio_MungeAddressConstant(Length)); + } + + // Unaligned access. + // Convert the address to an aligned address. + Addr32 &= ~(Length - 1); + // XOR with munge constant + Addr32 ^= _Mmio_MungeAddressConstant(Length); + // And subtract the align offset. + return (PVOID)(Addr32 - AlignOffset); +} + static inline UCHAR MmioRead8(PVOID addr) { + if (!_Mmio_IsLittleEndian()) addr = _Mmio_MungeAddressForBig(addr, 1); return *(volatile UCHAR*)addr; } @@ -49,8 +90,8 @@ static inline ULONG MmioRead32(PVOID addr) "lwbrx %0,0,%1 ; sync" : "=r"(x) : "r"(addr)); return x; } + addr = _Mmio_MungeAddressForBig(addr, 4); __asm__ __volatile__( - FOLLOWING_ACCESS_BIG_ASM "lwz %0,0(%1) ; sync" : "=r"(x) : "b"(addr)); return x; } @@ -62,8 +103,8 @@ static inline void MmioWrite32(PVOID addr, ULONG x) "stwbrx %0,0,%1 ; eieio" : : "r"(x), "b"(addr)); return; } + addr = _Mmio_MungeAddressForBig(addr, 4); __asm__ __volatile__( - FOLLOWING_ACCESS_BIG_ASM "stw %0,0(%1) ; eieio" : : "r"(x), "b"(addr)); } @@ -75,8 +116,8 @@ static inline ULONG MmioRead32L(PVOID addr) "lwz %0,0(%1) ; sync" : "=r"(x) : "b"(addr)); return x; } + addr = _Mmio_MungeAddressForBig(addr, 4); __asm__ __volatile__( - FOLLOWING_ACCESS_BIG_ASM "lwbrx %0,0,%1 ; sync" : "=r"(x) : "r"(addr)); return x; } @@ -88,8 +129,8 @@ static inline void MmioWrite32L(PVOID addr, ULONG x) "stw %0,0(%1) ; eieio" : : "r"(x), "b"(addr)); return; } + addr = _Mmio_MungeAddressForBig(addr, 4); __asm__ __volatile__( - FOLLOWING_ACCESS_BIG_ASM "stwbrx %0,0,%1 ; eieio" : : "r"(x), "r"(addr)); } @@ -101,8 +142,8 @@ static inline USHORT MmioRead16(PVOID addr) "lhbrx %0,0,%1 ; sync" : "=r"(x) : "r"(addr)); return x; } + addr = _Mmio_MungeAddressForBig(addr, 2); __asm__ __volatile__( - FOLLOWING_ACCESS_BIG_ASM "lhz %0,0(%1) ; sync" : "=r"(x) : "b"(addr)); return x; } @@ -114,8 +155,8 @@ static inline void MmioWrite16(PVOID addr, USHORT x) "sthbrx %0,0,%1 ; eieio" : : "r"(x), "b"(addr)); return; } + addr = _Mmio_MungeAddressForBig(addr, 2); __asm__ __volatile__( - FOLLOWING_ACCESS_BIG_ASM "sth %0,0(%1) ; eieio" : : "r"(x), "b"(addr)); } @@ -127,8 +168,8 @@ static inline USHORT MmioRead16L(PVOID addr) "lhz %0,0(%1) ; sync" : "=r"(x) : "b"(addr)); return x; } + addr = _Mmio_MungeAddressForBig(addr, 2); __asm__ __volatile__( - FOLLOWING_ACCESS_BIG_ASM "lhbrx %0,0,%1 ; sync" : "=r"(x) : "r"(addr)); return x; } @@ -140,13 +181,13 @@ static inline void MmioWrite16L(PVOID addr, USHORT x) "sth %0,0(%1) ; eieio" : : "r"(x), "b"(addr)); return; } + addr = _Mmio_MungeAddressForBig(addr, 2); __asm__ __volatile__( - FOLLOWING_ACCESS_BIG_ASM "sthbrx %0,0,%1 ; eieio" : : "r"(x), "r"(addr)); } static inline void MmioWrite8(PVOID addr, UCHAR x) { - + if (!_Mmio_IsLittleEndian()) addr = _Mmio_MungeAddressForBig(addr, 1); __asm__ __volatile__( "stb %0,0(%1) ; eieio" : : "r"(x), "b"(addr)); } @@ -173,11 +214,11 @@ static inline void MmioReadBuf16L(PVOID addr, PUSHORT buf, ULONG len) { __MMIO_BUF_READ_BODY(MmioRead16L); } -static inline void MmioReadBuf32(PVOID addr, PUSHORT buf, ULONG len) { +static inline void MmioReadBuf32(PVOID addr, PULONG buf, ULONG len) { __MMIO_BUF_READ_BODY(MmioRead32); } -static inline void MmioReadBuf32L(PVOID addr, PUSHORT buf, ULONG len) { +static inline void MmioReadBuf32L(PVOID addr, PULONG buf, ULONG len) { __MMIO_BUF_READ_BODY(MmioRead32L); } @@ -193,11 +234,11 @@ static inline void MmioWriteBuf16L(PVOID addr, PUSHORT buf, ULONG len) { __MMIO_BUF_WRITE_BODY(MmioWrite16L); } -static inline void MmioWriteBuf32(PVOID addr, PUSHORT buf, ULONG len) { +static inline void MmioWriteBuf32(PVOID addr, PULONG buf, ULONG len) { __MMIO_BUF_WRITE_BODY(MmioWrite32); } -static inline void MmioWriteBuf32L(PVOID addr, PUSHORT buf, ULONG len) { +static inline void MmioWriteBuf32L(PVOID addr, PULONG buf, ULONG len) { __MMIO_BUF_WRITE_BODY(MmioWrite32L); } @@ -205,8 +246,8 @@ static inline ULONG NativeRead32(PVOID addr) { ULONG x; if (_Mmio_IsLittleEndian()) return *(PULONG)addr; + if (!_Mmio_IsLittleEndian()) addr = _Mmio_MungeAddressForBig(addr, 4); __asm__ __volatile__( - FOLLOWING_ACCESS_BIG_ASM "lwz %0,0(%1)" : "=r"(x) : "b"(addr)); return x; } @@ -217,8 +258,8 @@ static inline void NativeWrite32(PVOID addr, ULONG x) *(PULONG)addr = x; return; } + if (!_Mmio_IsLittleEndian()) addr = _Mmio_MungeAddressForBig(addr, 4); __asm__ __volatile__( - FOLLOWING_ACCESS_BIG_ASM "stw %0,0(%1)" : : "r"(x), "b"(addr)); } @@ -226,8 +267,8 @@ static inline USHORT NativeRead16(PVOID addr) { USHORT x; if (_Mmio_IsLittleEndian()) return *(PUSHORT)addr; + if (!_Mmio_IsLittleEndian()) addr = _Mmio_MungeAddressForBig(addr, 2); __asm__ __volatile__( - FOLLOWING_ACCESS_BIG_ASM "lhz %0,0(%1)" : "=r"(x) : "b"(addr)); return x; } @@ -238,8 +279,8 @@ static inline void NativeWrite16(PVOID addr, USHORT x) *(PUSHORT)addr = x; return; } + if (!_Mmio_IsLittleEndian()) addr = _Mmio_MungeAddressForBig(addr, 2); __asm__ __volatile__( - FOLLOWING_ACCESS_BIG_ASM "sth %0,0(%1)" : : "r"(x), "b"(addr)); } @@ -253,7 +294,8 @@ enum { RUNTIME_IN_EMULATOR, RUNTIME_DECREMENTER_FREQUENCY, RUNTIME_RAMDISK, - RUNTIME_HAS_CUDA + RUNTIME_HAS_CUDA, + RUNTIME_PCI_INTERRUPTS }; typedef struct ARC_LE _FRAME_BUFFER { @@ -277,17 +319,24 @@ typedef struct ARC_LE _RAMDISK_DESC { MEMORY_AREA Buffer; } RAMDISK_DESC, *PRAMDISK_DESC; -#ifndef FOLLOWING_ACCESS_BIG +typedef struct ARC_LE _PCI_INTERRUPT { + UCHAR Slot; + UCHAR Vector; +} PCI_INTERRUPT, *PPCI_INTERRUPT; + +#ifndef __BUILDING_FOR_NT__ typedef struct ARC_LE { U32LE RuntimePointers[16]; FRAME_BUFFER RuntimeFb; RAMDISK_DESC Ramdisk; + PCI_INTERRUPT PciInterrupts[32]; } RUNTIME_AREA, * PRUNTIME_AREA; #define s_RuntimeArea ((PRUNTIME_AREA)0x80005000) #define s_RuntimePointers s_RuntimeArea->RuntimePointers #define s_RuntimeFb s_RuntimeArea->RuntimeFb #define s_RuntimeRamdisk s_RuntimeArea->Ramdisk +#define s_RuntimePciInt s_RuntimeArea->PciInterrupts #endif #define STACK_ALIGN(type, name, cnt, alignment) UCHAR _al__##name[((sizeof(type)*(cnt)) + (alignment) + (((sizeof(type)*(cnt))%(alignment)) > 0 ? ((alignment) - ((sizeof(type)*(cnt))%(alignment))) : 0))]; \