From f34c895c3b308772d09780e1f4dc2fde7ebe6046 Mon Sep 17 00:00:00 2001 From: r-a-sattarov Date: Sat, 14 Oct 2023 01:32:54 +0300 Subject: [PATCH] E2K: added initial support of MCST Elbrus 2000 CPU architecture --- mathlib/mathlib_base.cpp | 2 +- mathlib/sse.cpp | 4 ++++ public/materialsystem/imesh.h | 4 ++-- public/mathlib/mathlib.h | 8 ++++---- public/steam/steamtypes.h | 8 ++------ public/tier0/platform.h | 13 +++++++++---- scripts/waifulib/compiler_optimizations.py | 8 +++++++- tier0/cpu.cpp | 22 +++++++++++++++++----- tier0/cpu_posix.cpp | 5 ++--- tier1/processor_detect_linux.cpp | 2 +- tier1/reliabletimer.cpp | 15 +-------------- vtf/vtf.cpp | 4 +++- wscript | 3 +++ 13 files changed, 56 insertions(+), 42 deletions(-) diff --git a/mathlib/mathlib_base.cpp b/mathlib/mathlib_base.cpp index 872ffec5e6..9f5aef32d5 100644 --- a/mathlib/mathlib_base.cpp +++ b/mathlib/mathlib_base.cpp @@ -3281,7 +3281,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright { s_bSSEEnabled = true; -#ifndef PLATFORM_WINDOWS_PC64 +#if !defined(PLATFORM_WINDOWS_PC64) && (defined(__i386__) || defined(__amd64__)) // These are not yet available. // Select the SSE specific routines if available pfVectorNormalize = _VectorNormalize; diff --git a/mathlib/sse.cpp b/mathlib/sse.cpp index 6122b66487..f8567e5f83 100644 --- a/mathlib/sse.cpp +++ b/mathlib/sse.cpp @@ -15,6 +15,8 @@ #include "sse2neon.h" #endif +#if !defined(__e2k__) + #include "sse.h" // memdbgon must be the last include file in a .cpp file!!! @@ -1127,3 +1129,5 @@ vec_t DotProduct (const vec_t *a, const vec_t *c) */ #endif // COMPILER_MSVC64 + +#endif // !defined(__e2k__) diff --git a/public/materialsystem/imesh.h b/public/materialsystem/imesh.h index 6a952e0099..5e19d4cb55 100644 --- a/public/materialsystem/imesh.h +++ b/public/materialsystem/imesh.h @@ -1152,7 +1152,7 @@ inline void CVertexBuilder::FastAdvanceNVertices( int n ) //----------------------------------------------------------------------------- inline void CVertexBuilder::FastVertex( const ModelVertexDX7_t &vertex ) { -#if defined(__arm__) || defined(__aarch64__) || defined(PLATFORM_WINDOWS_PC64) +#if defined(__arm__) || defined(__aarch64__) || defined(__e2k__) || defined(PLATFORM_WINDOWS_PC64) FastVertexSSE( vertex ); #else Assert( m_CompressionType == VERTEX_COMPRESSION_NONE ); // FIXME: support compressed verts if needed @@ -1354,7 +1354,7 @@ inline void CVertexBuilder::Fast4VerticesSSE( inline void CVertexBuilder::FastVertex( const ModelVertexDX8_t &vertex ) { -#if defined(__arm__) || defined(__aarch64__) || defined(PLATFORM_WINDOWS_PC64) +#if defined(__arm__) || defined(__aarch64__) || defined(__e2k__) || defined(PLATFORM_WINDOWS_PC64) FastVertexSSE( vertex ); #else Assert( m_CompressionType == VERTEX_COMPRESSION_NONE ); // FIXME: support compressed verts if needed diff --git a/public/mathlib/mathlib.h b/public/mathlib/mathlib.h index 6503da0f59..c3cc63c451 100644 --- a/public/mathlib/mathlib.h +++ b/public/mathlib/mathlib.h @@ -1201,8 +1201,8 @@ FORCEINLINE int RoundFloatToInt(float f) }; flResult = __fctiw( f ); return pResult[1]; -#elif defined (__arm__) || defined (__aarch64__) - return (int)(f + 0.5f); +#elif defined(__arm__) || defined(__aarch64__) || defined(__e2k__) + return (int)(f + 0.5f); #else #error Unknown architecture #endif @@ -1233,8 +1233,8 @@ FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f) Assert( pIntResult[1] >= 0 ); return pResult[1]; #else // !X360 -#if defined(__arm__) || defined(__aarch64__) - return (unsigned long)(f + 0.5f); +#if defined(__arm__) || defined(__aarch64__) || defined(__e2k__) + return (unsigned long)(f + 0.5f); #elif defined( PLATFORM_WINDOWS_PC64 ) uint nRet = ( uint ) f; if ( nRet & 1 ) diff --git a/public/steam/steamtypes.h b/public/steam/steamtypes.h index c32150e2b1..7d7546536c 100644 --- a/public/steam/steamtypes.h +++ b/public/steam/steamtypes.h @@ -24,10 +24,6 @@ typedef unsigned char uint8; #define POSIX 1 #endif -#if defined(__x86_64__) || defined(_WIN64) || defined(__aarch64__) -#define X64BITS -#endif - // Make sure VALVE_BIG_ENDIAN gets set on PS3, may already be set previously in Valve internal code. #if !defined(VALVE_BIG_ENDIAN) && defined(_PS3) #define VALVE_BIG_ENDIAN @@ -48,7 +44,7 @@ typedef unsigned __int64 uint64; typedef int64 lint64; typedef uint64 ulint64; -#ifdef X64BITS +#ifdef PLATFORM_64BITS typedef __int64 intp; // intp is an integer that can accomodate a pointer typedef unsigned __int64 uintp; // (ie, sizeof(intp) >= sizeof(int) && sizeof(intp) >= sizeof(void *) #else @@ -74,7 +70,7 @@ typedef unsigned long long uint64; typedef long int lint64; typedef unsigned long int ulint64; -#ifdef X64BITS +#ifdef PLATFORM_64BITS typedef long long intp; typedef unsigned long long uintp; #else diff --git a/public/tier0/platform.h b/public/tier0/platform.h index a8ad1b02b1..2592fb6cd2 100644 --- a/public/tier0/platform.h +++ b/public/tier0/platform.h @@ -9,7 +9,7 @@ #ifndef PLATFORM_H #define PLATFORM_H -#if defined(__x86_64__) || defined(_WIN64) || defined(__aarch64__) +#if defined(__x86_64__) || defined(_WIN64) || defined(__aarch64__) || defined(__e2k__) #define PLATFORM_64BITS 1 #endif @@ -17,6 +17,11 @@ #define COMPILER_GCC 1 #endif +#if defined(__LCC__) && defined(__MCST__) +// MCST LCC (eLbrus Compiler Collection) +#define COMPILER_MCST_LCC 1 +#endif + #ifdef __GLIBC__ #define PLATFORM_GLIBC 1 #endif @@ -898,7 +903,7 @@ static FORCEINLINE double fsel(double fComparand, double fValGE, double fLT) #endif #endif -#elif defined (__arm__) || defined (__aarch64__) +#elif defined(__arm__) || defined(__aarch64__) || defined(__e2k__) inline void SetupFPUControlWord() {} #else inline void SetupFPUControlWord() @@ -1069,7 +1074,7 @@ inline T QWordSwapC( T dw ) // The typically used methods. //------------------------------------- -#if (defined(__i386__) || defined(__amd64__) || defined(__arm__) || defined(__aarch64__)) && !defined(VALVE_LITTLE_ENDIAN) +#if (defined(__i386__) || defined(__amd64__) || defined(__arm__) || defined(__aarch64__) || defined(__e2k__)) && !defined(VALVE_LITTLE_ENDIAN) #define VALVE_LITTLE_ENDIAN 1 #endif @@ -1235,7 +1240,7 @@ PLATFORM_INTERFACE struct tm * Plat_localtime( const time_t *timep, struct tm * inline uint64 Plat_Rdtsc() { -#if (defined( __arm__ ) || defined( __aarch64__ )) && defined (POSIX) +#if (defined( __arm__ ) || defined( __aarch64__ ) || defined(__e2k__)) && defined (POSIX) struct timespec t; clock_gettime( CLOCK_REALTIME, &t); return t.tv_sec * 1000000000ULL + t.tv_nsec; diff --git a/scripts/waifulib/compiler_optimizations.py b/scripts/waifulib/compiler_optimizations.py index fe64efc3ab..dff044d3f7 100644 --- a/scripts/waifulib/compiler_optimizations.py +++ b/scripts/waifulib/compiler_optimizations.py @@ -30,7 +30,7 @@ } ''' -VALID_BUILD_TYPES = ['fastnative', 'fast', 'release', 'debug', 'nooptimize', 'sanitize', 'none'] +VALID_BUILD_TYPES = ['native','fastnative', 'fast', 'release', 'debug', 'nooptimize', 'sanitize', 'none'] LINKFLAGS = { 'common': { @@ -65,6 +65,12 @@ 'clang': ['-O2', '-march=native'], 'default': ['-O3'] }, + 'native': { + 'msvc': ['/O2', '/Oy', '/MT'], + 'gcc': ['-O2', '-march=native'], + 'clang': ['-O2', '-march=native'], + 'default': ['-O3'] + }, 'release': { 'msvc': ['/O2', '/MT'], 'owcc': ['-O3', '-fomit-leaf-frame-pointer', '-fomit-frame-pointer', '-finline-functions', '-finline-limit=512'], diff --git a/tier0/cpu.cpp b/tier0/cpu.cpp index 0a1d84be0c..d598ab1d41 100644 --- a/tier0/cpu.cpp +++ b/tier0/cpu.cpp @@ -22,7 +22,7 @@ const tchar* GetProcessorVendorId(); static bool cpuid(uint32 function, uint32& out_eax, uint32& out_ebx, uint32& out_ecx, uint32& out_edx) { -#if defined (__arm__) || defined (__aarch64__) || defined( _X360 ) +#if defined(__arm__) || defined(__aarch64__) || defined(__e2k__) || defined(_X360) return false; #elif defined(GNUC) @@ -337,6 +337,8 @@ const tchar* GetProcessorVendorId() return "PPC"; #elif defined ( __arm__ ) return "ARM"; +#elif defined (__e2k__) + return "MCST"; #else uint32 unused, VendorIDRegisters[3]; @@ -372,10 +374,12 @@ const tchar* GetProcessorArchName() return "amd64"; #elif defined(__i386__) || defined(_X86_) || defined(_M_IX86) return "i386"; -#elif defined __aarch64__ - return "aarch64"; -#elif defined __arm__ || defined _M_ARM - return "arm"; +#elif defined(__aarch64__) + return "aarch64"; +#elif defined(__arm__) || defined(_M_ARM) + return "arm"; +#elif defined(__e2k__) + return "e2k"; #else #error "Unknown architecture" #endif @@ -528,6 +532,13 @@ const CPUInformation* GetCPUInformation() pi.m_nPhysicalProcessors = 3; pi.m_nLogicalProcessors = 6; #elif defined(_LINUX) + #if defined(__e2k__) // MCST Elbrus 2000 + // e2k CPU don't have "core id" and "physical id" in "/proc/cpuinfo" (but have "processor") + // and don't have Hyper-Threading (HT) technology + // used sysconf() to count CPU cores + pi.m_nLogicalProcessors = sysconf( _SC_NPROCESSORS_CONF ); // _SC_NPROCESSORS_ONLN may not be reliable on ARM/Android + pi.m_nPhysicalProcessors = pi.m_nLogicalProcessors; // hack for CPU without Hyper-Threading (HT) technology + #else // TODO: poll /dev/cpuinfo when we have some benefits from multithreading FILE *fpCpuInfo = fopen( "/proc/cpuinfo", "r" ); if ( fpCpuInfo ) @@ -584,6 +595,7 @@ const CPUInformation* GetCPUInformation() pi.m_nLogicalProcessors = 1; Assert( !"couldn't read cpu information from /proc/cpuinfo" ); } + #endif // if e2k #elif defined(OSX) || defined(PLATFORM_BSD) int mib[2], num_cpu = 1; size_t len; diff --git a/tier0/cpu_posix.cpp b/tier0/cpu_posix.cpp index a86f40a77b..9800e8bf2c 100644 --- a/tier0/cpu_posix.cpp +++ b/tier0/cpu_posix.cpp @@ -124,7 +124,7 @@ uint64 CalculateCPUFreq() } } -#if !defined(__arm__) && !defined(__aarch64__) +#if defined(__i386__) || defined(_M_IX86) // fallback mechanism to calculate when failed // Compute the period. Loop until we get 3 consecutive periods that // are the same to within a small error. The error is chosen @@ -178,7 +178,6 @@ uint64 CalculateCPUFreq() #else // ARM hard-coded frequency return (uint64)2000000000; -#endif // if !ARM +#endif // if i386 #endif // if APPLE } - diff --git a/tier1/processor_detect_linux.cpp b/tier1/processor_detect_linux.cpp index 64d771a248..e0e81c3bf9 100644 --- a/tier1/processor_detect_linux.cpp +++ b/tier1/processor_detect_linux.cpp @@ -13,7 +13,7 @@ bool CheckMMXTechnology(void) { return false; } bool CheckSSETechnology(void) { return false; } bool CheckSSE2Technology(void) { return false; } bool Check3DNowTechnology(void) { return false; } -#elif defined (__arm__) || defined (__aarch64__) +#elif defined(__arm__) || defined(__aarch64__) || defined(__e2k__) bool CheckMMXTechnology(void) { return false; } bool CheckSSETechnology(void) { return false; } bool CheckSSE2Technology(void) { return false; } diff --git a/tier1/reliabletimer.cpp b/tier1/reliabletimer.cpp index f575a67982..2189c185fe 100644 --- a/tier1/reliabletimer.cpp +++ b/tier1/reliabletimer.cpp @@ -82,20 +82,7 @@ int64 CReliableTimer::GetPerformanceCountNow() CycleCount.Sample(); return CycleCount.GetLongCycles(); } -#elif defined( _PS3 ) - // use handy macro to grab tb - uint64 ulNow; - SYS_TIMEBASE_GET( ulNow ); - return ulNow; -#elif (defined( __arm__ ) || defined( __aarch64__ )) && defined (POSIX) - struct timespec ts; - clock_gettime(CLOCK_REALTIME, &ts); - return ts.tv_sec * 1000000000ULL + ts.tv_nsec; #else - uint64 un64; - __asm__ __volatile__ ( - "rdtsc\n\t" - : "=A" (un64) ); - return (int64)un64; + return (int64)Plat_Rdtsc(); #endif } diff --git a/vtf/vtf.cpp b/vtf/vtf.cpp index 0dad472231..f8ad13e863 100644 --- a/vtf/vtf.cpp +++ b/vtf/vtf.cpp @@ -951,7 +951,8 @@ static bool ReadHeaderFromBufferPastBaseHeader( CUtlBuffer &buf, VTFFileHeader_t bool CVTFTexture::ReadHeader( CUtlBuffer &buf, VTFFileHeader_t &header ) { - if ( IsX360() && SetupByteSwap( buf ) ) +#ifdef _X360 + if ( SetupByteSwap( buf ) ) { VTFFileBaseHeader_t baseHeader; m_Swap.SwapFieldsToTargetEndian( &baseHeader, (VTFFileBaseHeader_t*)buf.PeekGet() ); @@ -979,6 +980,7 @@ bool CVTFTexture::ReadHeader( CUtlBuffer &buf, VTFFileHeader_t &header ) } } } +#endif memset( &header, 0, sizeof(VTFFileHeader_t) ); buf.Get( &header, sizeof(VTFFileBaseHeader_t) ); diff --git a/wscript b/wscript index 7f760500ba..941ce7b80d 100644 --- a/wscript +++ b/wscript @@ -502,6 +502,9 @@ def configure(conf): if conf.env.DEST_OS == 'freebsd': linkflags += ['-lexecinfo'] + if conf.env.DEST_OS != 'darwin': + linkflags += ['-latomic'] + if conf.env.DEST_OS != 'win32': cflags += flags linkflags += flags