Merge pull request #28 from r-a-sattarov/master

E2K: added initial support of MCST Elbrus 2000 CPU architecture
SwagSoftware · Mar 20, 2022 · 8f16ca0 · 8f16ca0
2 parents 6180b3c + 4ab5ddf
commit 8f16ca0
Show file tree

Hide file tree

Showing 23 changed files with 336 additions and 60 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -40,7 +40,11 @@ add_subdirectory(vgui2/vgui_controls)
 add_subdirectory(vgui2/vgui_surfacelib)
 add_subdirectory(soundsystem/lowlevel)
 add_subdirectory(thirdparty/quickhull)
-add_subdirectory(thirdparty/gperftools-2.8.1) #We include this version instead of distro-pkg because there is a false positive in ASAN
+if(CMAKE_SYSTEM_PROCESSOR STREQUAL "e2k")
+    # We use sustem gperftools-2.5 on OS Elbrus
+else()
+    add_subdirectory(thirdparty/gperftools-2.8.1) #We include this version instead of distro-pkg because there is a false positive in ASAN
+endif()
 add_subdirectory(thirdparty/protobuf-2.5.0/cmake)
 add_subdirectory(utils/bzip2)
 add_subdirectory(utils/jpeglib)

diff --git a/cmake/source_exe_posix_base.cmake b/cmake/source_exe_posix_base.cmake
@@ -42,7 +42,11 @@ if( LINUXALL AND NOT DEDICATED )
     if( LINUX64 )
         #target_link_libraries(${OUTBINNAME} "${SRCDIR}/thirdparty/gperftools-2.0/.libs/x86_64/libtcmalloc_minimal.so")# [$LINUX64]
         #SWITCH BACK to a new version in /thirdparty. Unfortunately ASAN detects a false positive in this library and we need to edit the source.
-        target_link_libraries(${OUTBINNAME} tcmalloc_minimal)
+        if(CMAKE_SYSTEM_PROCESSOR STREQUAL "e2k")
+            target_link_libraries(${OUTBINNAME} "/usr/lib/libtcmalloc_minimal.so.4.3.0") # use sustem gperftools-2.5 on OS Elbrus
+        else()
+            target_link_libraries(${OUTBINNAME} tcmalloc_minimal)
+        endif()
     else()
         #$ImpLibExternal	"$SRCDIR/thirdparty/gperftools-2.0/.libs/tcmalloc_minimal" [$LINUX32]
         message(FATAL_ERROR "linux32 not supported in cmake")

diff --git a/cmake/source_posix_base.cmake b/cmake/source_posix_base.cmake
@@ -43,7 +43,12 @@ else()
             message("^^ Not Setting -O for Target")
             set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${LINUX_DEBUG_FLAGS} ${LINUX_FLAGS_COMMON}")
         else()
-            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 ${LINUX_DEBUG_FLAGS} ${LINUX_FLAGS_COMMON}")
+            if(CMAKE_SYSTEM_PROCESSOR STREQUAL "e2k")
+                # O3 on mcst-lcc approximately equal to O2 at gcc X86/ARM
+                set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 ${LINUX_FLAGS_COMMON}")
+            else()
+                set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 ${LINUX_DEBUG_FLAGS} ${LINUX_FLAGS_COMMON}")
+            endif()
         endif()
     endif()
 endif()

diff --git a/engine/engine_inc.cmake b/engine/engine_inc.cmake
@@ -50,7 +50,11 @@ if( LINUXALL AND (NOT DEDICATED) )
     target_link_libraries(${OUTBINNAME} SDL2 rt openal)
 endif()
 if( LINUXALL )
-    target_link_options(${OUTBINNAME} PRIVATE -L/usr/lib32 -L/usr/lib)
+    if(CMAKE_SYSTEM_PROCESSOR STREQUAL "e2k")
+        target_link_options(${OUTBINNAME} PRIVATE -L/usr/lib)
+    else()
+        target_link_options(${OUTBINNAME} PRIVATE -L/usr/lib32 -L/usr/lib)
+    endif()
     target_compile_options(${OUTBINNAME} PRIVATE -Wno-narrowing -fpermissive) #downgrade some errors to fix build
 endif()
 

diff --git a/ivp/ivp_collision/ivp_compact_ledge.hxx b/ivp/ivp_collision/ivp_compact_ledge.hxx
@@ -255,7 +255,7 @@ const IVP_Compact_Triangle *IVP_Compact_Edge::get_triangle() const
     //lwss - x64 fixes ( original is x86 )
 #if defined(__i386__)
     return (IVP_Compact_Triangle *)(((unsigned int)this) & 0xfffffff0);
-#elif defined( __x86_64__ )
+#elif defined(__x86_64__) || defined(__e2k__)
     return (IVP_Compact_Triangle *)(((unsigned long int)this) & 0xFFFFFFFFFFFFFFF0);
 #else
 #error fix this for your platform

diff --git a/materialsystem/shadersystem.cpp b/materialsystem/shadersystem.cpp
@@ -315,14 +315,14 @@ void CShaderSystem::LoadAllShaderDLLs( )
 #if defined( _PS3 ) || defined( _OSX )
 	LoadShaderDLL( "stdshader_dx9" DLL_EXT_STRING );
 #else // _PS3 || _OSX
-
+#ifndef __e2k__ // Don't load stdshader_dbg module on Elbrus (prevent "Module stdshader_dbg failed to load! Error: ((null))" message)
 	// 360 has the the debug shaders in its dx9 dll
 	if ( IsPC() || !IsX360() )
 	{
 		// Always need the debug shaders
 		LoadShaderDLL( "stdshader_dbg" );
 	}
-
+#endif
 	// Load up standard shader DLLs...
 	int dxSupportLevel = HardwareConfig()->GetMaxDXSupportLevel();
 	Assert( dxSupportLevel >= 60 );

diff --git a/mathlib/sse.cpp b/mathlib/sse.cpp
@@ -80,7 +80,54 @@ void  __cdecl _SSE_VectorMA( const float *start, float scale, const float *direc
 // SSE implementations of optimized routines:
 //-----------------------------------------------------------------------------
 
+#ifdef POSIX
+const __m128  f3  = _mm_set_ss(3.0f);  // 3 as SSE value
+const __m128  f05 = _mm_set_ss(0.5f);  // 0.5 as SSE value
+#endif
+
+float _SSE_RSqrtAccurate(float a)
+{
+
+#ifdef _WIN32
+	float x;
+	float half = 0.5f;
+	float three = 3.f;
+
+	__asm
+	{
+		movss   xmm3, a;
+		movss   xmm1, half;
+		movss   xmm2, three;
+		rsqrtss xmm0, xmm3;
+
+		mulss   xmm3, xmm0;
+		mulss   xmm1, xmm0;
+		mulss   xmm3, xmm0;
+		subss   xmm2, xmm3;
+		mulss   xmm1, xmm2;
+
+		movss   x,    xmm1;
+	}
 
+	return x;
+#elif POSIX
+	__m128  xx = _mm_load_ss( &a );
+	__m128  xr = _mm_rsqrt_ss( xx );
+	__m128  xt;
+
+	xt = _mm_mul_ss( xr, xr );
+	xt = _mm_mul_ss( xt, xx );
+	xt = _mm_sub_ss( f3, xt );
+	xt = _mm_mul_ss( xt, f05 );
+	xr = _mm_mul_ss( xr, xt );
+
+	_mm_store_ss( &a, xr );
+	return a;
+#else
+	#error "Not Implemented"
+#endif
+
+}
 
 float FASTCALL _SSE_VectorNormalize (Vector& vec)
 {
@@ -91,7 +138,7 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec)
 #ifdef _WIN32
 	__declspec(align(16)) float result[4];
 #elif POSIX
-	 float result[4] __attribute__((aligned(16)));
+	float result[4] __attribute__((aligned(16)));
 #endif
 
 	float *v = &vec[0];
@@ -133,7 +180,11 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec)
 		r[ 0 ] = vec.x * recipSqrt;
 		r[ 1 ] = vec.y * recipSqrt;
 		r[ 2 ] = vec.z * recipSqrt;
-
+#elif defined __e2k__
+		float rsqrt = _SSE_RSqrtAccurate( v[0] * v[0] + v[1] * v[1] + v[2] * v[2] );
+		r[0] = v[0] * rsqrt;
+		r[1] = v[1] * rsqrt;
+		r[2] = v[2] * rsqrt;
 #elif POSIX
 		__asm__ __volatile__(
 #ifdef ALIGNED_VECTOR
@@ -451,7 +502,7 @@ float FastCos( float x )
 		movss   x,    xmm0
 
 	}
-#elif defined( _WIN64 )
+#elif defined( _WIN64 ) || defined( __e2k__ )
 	return cosf( x );
 #elif POSIX
 

diff --git a/public/glmgr/glmgrbasics.h b/public/glmgr/glmgrbasics.h
@@ -178,11 +178,13 @@ float	GLMKnobToggle( char *knobname );
 #if GLMDEBUG
 inline void GLMDebugger( void )
 {
+#ifndef __e2k__
 	if (GLMDebugChannelMask() & (1<<eDebugger))
 	{
 		asm ( "int $3" );
 	}
-
+#endif // ifndef __e2k__
+
 	if (GLMDebugChannelMask() & (1<<eGLProfiler))
 	{
 		// we call an obscure GL function which we know has been breakpointed in the OGLP function list

diff --git a/public/localize/ilocalize.h b/public/localize/ilocalize.h
@@ -17,7 +17,7 @@
 
 // unicode character type
 // for more unicode manipulation functions #include <wchar.h>
-#if !defined( _WCHAR_T_DEFINED ) && !defined( _PS3 ) && !defined(__clang__)
+#if !defined( _WCHAR_T_DEFINED ) && !defined( _PS3 ) && !defined(__clang__) && !defined(__e2k__)
 typedef unsigned short wchar_t;
 #define _WCHAR_T_DEFINED
 #endif

diff --git a/public/materialsystem/imesh.h b/public/materialsystem/imesh.h
@@ -1324,7 +1324,7 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX8_t &vertex )
 		movntps [edi + 16], xmm1
 		movntps [edi + 32], xmm2
 	}
-#elif defined(GNUC)
+#elif defined(GNUC) && !defined(__e2k__)
 	const void *pRead = &vertex;
 	void *pCurrPos = m_pCurrPosition;
 	__asm__ __volatile__ (
@@ -1335,7 +1335,7 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX8_t &vertex )
 						  "movntps %%xmm0, (%1)\n"
 						  "movntps %%xmm1, 16(%1)\n"
 						  "movntps %%xmm2, 32(%1)\n"
-						  "movntps %%xmm3, 48(%1)\n"						  
+						  "movntps %%xmm3, 48(%1)\n"
 						  :: "r" (pRead), "r" (pCurrPos) : "memory");
 #else
 	Error( "Implement CMeshBuilder::FastVertexSSE((dx8)" );

diff --git a/public/mathlib/mathlib.h b/public/mathlib/mathlib.h
@@ -532,7 +532,7 @@ void inline SinCos( float radians, float * RESTRICT sine, float * RESTRICT cosin
 		fstp DWORD PTR [edx]
 		fstp DWORD PTR [eax]
 	}
-#elif defined( GNUC )
+#elif defined( GNUC ) && !defined( __e2k__ )
     //lwss - remove 'register' keyword
 	//register double __cosr, __sinr;
 	double __cosr, __sinr;
@@ -1682,7 +1682,7 @@ FORCEINLINE int RoundFloatToInt(float f)
 		fld f
 		fistp nResult
 	}
-#elif GNUC
+#elif defined( GNUC ) && !defined( __e2k__ )
 	__asm __volatile__ (
 		"fistpl %0;": "=m" (nResult): "t" (f) : "st"
 	);
@@ -1729,7 +1729,7 @@ FORCEINLINE unsigned char RoundFloatToByte(float f)
 		fld f
 		fistp nResult
 	}
-#elif GNUC
+#elif defined( GNUC ) && !defined( __e2k__ )
 	__asm __volatile__ (
 		"fistpl %0;": "=m" (nResult): "t" (f) : "st"
 	);
@@ -1767,7 +1767,7 @@ FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f)
 	return __fctiw( f );
 #endif
 #else  // !X360
-	
+
 #if defined( COMPILER_MSVC32 )
 	unsigned char nResult[8];
 	__asm
@@ -1776,7 +1776,7 @@ FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f)
 		fistp       qword ptr nResult
 	}
 	return *((unsigned long*)nResult);
-#elif defined( COMPILER_GCC )
+#elif defined( COMPILER_GCC ) && !defined( __e2k__ )
 	unsigned char nResult[8];
 	__asm __volatile__ (
 		"fistpl %0;": "=m" (nResult): "t" (f) : "st"

diff --git a/public/saverestoretypes.h b/public/saverestoretypes.h
@@ -64,7 +64,7 @@ class CSaveRestoreSegment
 	const char *StringFromSymbol( int token );
 
 private:
-#ifndef _WIN32
+#if !defined _WIN32 && !defined __e2k__
 	unsigned _rotr ( unsigned val, int shift);
 #endif
 	unsigned int HashString( const char *pszToken );
@@ -521,7 +521,7 @@ inline const char *CSaveRestoreSegment::StringFromSymbol( int token )
 	return "<<illegal>>";
 }
 
-#ifndef _WIN32
+#if !defined _WIN32 && !defined __e2k__
 inline unsigned CSaveRestoreSegment::_rotr ( unsigned val, int shift)
 {
 		register unsigned lobit;        /* non-zero means lo bit set */

diff --git a/public/steam/steamtypes.h b/public/steam/steamtypes.h
@@ -24,7 +24,7 @@ typedef unsigned char uint8;
 	#define POSIX 1
 #endif
 
-#if defined(__x86_64__) || defined(_WIN64)
+#if defined(__x86_64__) || defined(_WIN64) || defined(__e2k__)
 #define X64BITS
 #endif
 

diff --git a/public/tier0/hardware_clock_fast.h b/public/tier0/hardware_clock_fast.h
@@ -4,7 +4,7 @@
 
 #include "tier0/platform.h"
 
-#ifdef GNUC
+#if defined GNUC && !defined __e2k__
 inline int GetHardwareClockFast( void )
 {
 	unsigned long long int nRet;
@@ -35,15 +35,18 @@ inline int GetHardwareClockFast()
 }
 #else
 
+#ifdef __e2k__
+#include <x86intrin.h>
+#else
 #include <intrin.h>
-
+#endif // ifdef __e2k__
 
 inline int GetHardwareClockFast()
 {
 	return __rdtsc();
 }
-#endif
+#endif // ifdef _X360
 
-#endif
+#endif // defined GNUC && !defined __e2k__
 
-#endif
+#endif // ifndef TIER0_HARDWARE_TIMER
diff --git a/public/tier0/microprofiler.h b/public/tier0/microprofiler.h
@@ -26,8 +26,11 @@ PLATFORM_INTERFACE int64 GetHardwareClockReliably();
 #include <intrin.h>	// get __rdtsc
 #endif
 
+#ifdef __e2k__
+#include <x86intrin.h>
+#endif
 
-#if defined(_LINUX) || defined( OSX )
+#if (defined(_LINUX) || defined( OSX )) && !defined(__e2k__)
 inline unsigned long long GetTimebaseRegister( void )
 {
 #ifdef PLATFORM_64BITS

diff --git a/public/tier0/platform.h b/public/tier0/platform.h
@@ -9,10 +9,14 @@
 #ifndef PLATFORM_H
 #define PLATFORM_H
 
-#if defined(__x86_64__) || defined(_WIN64)
+#if defined(__x86_64__) || defined(_WIN64) || defined(__e2k__)
 #define PLATFORM_64BITS 1
 #endif
 
+#if defined(__e2k__)
+#define PLATFORM_E2K 1
+#endif
+
 #if defined( LINUX ) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406
 // based on some Jonathan Wakely macros on the net...
 #define GCC_DIAG_STR(s) #s
@@ -1154,7 +1158,7 @@ typedef void * HINSTANCE;
 		#endif
 	#elif defined( OSX )
 		#define DebuggerBreak()  if ( Plat_IsInDebugSession() ) asm( "int3" ); else { raise(SIGTRAP); }
-	#elif defined( PLATFORM_CYGWIN ) || defined( PLATFORM_POSIX )
+	#elif ( defined( PLATFORM_CYGWIN ) || defined( PLATFORM_POSIX ) ) && !defined( __e2k__ )
 		#define DebuggerBreak()		__asm__( "int $0x3;")
 	#else
 		#define DebuggerBreak()	raise(SIGTRAP)
@@ -1386,7 +1390,7 @@ typedef int socklen_t;
 // Works for PS3 
 	inline void SetupFPUControlWord()
 	{
-#ifdef _PS3
+#if defined ( _PS3 ) || defined ( __e2k__ )
 // TODO: PS3 compiler spits out the following errors:
 // C:/tmp/ccIN0aaa.s: Assembler messages:
 // C:/tmp/ccIN0aaa.s(80): Error: Unrecognized opcode: `fnstcw'
@@ -1829,6 +1833,10 @@ extern "C" unsigned __int64 __rdtsc();
 #pragma intrinsic(__rdtsc)
 #endif
 
+#if defined( __e2k__ )
+#include <x86intrin.h> // get __rdtsc
+#endif
+
 inline uint64 Plat_Rdtsc()
 {
 #if defined( _X360 )
@@ -1850,6 +1858,8 @@ inline uint64 Plat_Rdtsc()
 	uint32 lo, hi;
 	__asm__ __volatile__ ( "rdtsc" : "=a" (lo), "=d" (hi));
 	return ( ( ( uint64 )hi ) << 32 ) | lo;
+#elif defined( __e2k__ )
+	return ( uint64 )__rdtsc();
 #else
 #error
 #endif