Hopefully fixed some mistakes

dhewm · Nov 18, 2024 · f50471b · f50471b
1 parent 0a1f2a3
commit f50471b
Show file tree

Hide file tree

Showing 8 changed files with 477 additions and 211 deletions.
diff --git a/neo/idlib/math/Simd.cpp b/neo/idlib/math/Simd.cpp
diff --git a/neo/idlib/math/Simd_AVX.cpp b/neo/idlib/math/Simd_AVX.cpp
@@ -35,19 +35,6 @@ If you have questions concerning this license or the applicable additional terms
 //
 //===============================================================
 
-#if defined(__GNUC__) && defined(__SSE3__)
-
-/*
-============
-idSIMD_SSE3::GetName
-============
-*/
-const char *idSIMD_AVX::GetName( void ) const {
-	return "MMX & SSE & SSE2 & SSE3 & AVX";
-}
-
-#elif defined(_MSC_VER) && defined(_M_IX86)
-
 #include <immintrin.h>
 
 #include "idlib/geometry/DrawVert.h"
@@ -76,6 +63,7 @@ void VPCALL idSIMD_AVX::CullByFrustum( idDrawVert *verts, const int numVerts, co
 	const __m256 fD = _mm256_set_ps( 0, 0, frustum[5][3], frustum[4][3], frustum[3][3], frustum[2][3], frustum[1][3], frustum[0][3] );
 	const __m256 eps = _mm256_set1_ps( epsilon );
 	const byte mask6 = ( 1 << 6 ) - 1;
+
 	for( int j = 0; j < numVerts; j++ ) {
 		idVec3 &vec = verts[j].xyz;
 		__m256 vX = _mm256_set1_ps( vec.x );
@@ -111,6 +99,7 @@ void VPCALL idSIMD_AVX::CullByFrustum2( idDrawVert *verts, const int numVerts, c
 	const __m256 eps = _mm256_set1_ps( epsilon );
 	static const __m256 epsM = _mm256_set1_ps( -epsilon );
 	const short mask6 = ( 1 << 6 ) - 1;
+
 	for( int j = 0; j < numVerts; j++ ) {
 		idVec3 &vec = verts[j].xyz;
 		__m256 vX = _mm256_set1_ps( vec.x );
@@ -133,5 +122,3 @@ void VPCALL idSIMD_AVX::CullByFrustum2( idDrawVert *verts, const int numVerts, c
 	}
 	_mm256_zeroupper();
 }
-
-#endif
diff --git a/neo/idlib/math/Simd_AVX.h b/neo/idlib/math/Simd_AVX.h
@@ -28,13 +28,10 @@
 
 class idSIMD_AVX : public idSIMD_SSE3 {
 public:
-#if defined(__GNUC__) && defined(__AVX__)
-	virtual const char *VPCALL GetName( void ) const;
-#elif defined(_MSC_VER) && defined(_M_IX86)
+	// Revelator: these work whether in gcc clang or msvc x86 or x64 (no inline assembly used)
 	virtual const char *VPCALL GetName( void ) const;
 	virtual void VPCALL CullByFrustum( idDrawVert *verts, const int numVerts, const idPlane frustum[6], byte *pointCull, float epsilon );
 	virtual void VPCALL CullByFrustum2( idDrawVert *verts, const int numVerts, const idPlane frustum[6], unsigned short *pointCull, float epsilon );
-#endif
 };
 
 #endif /* !__MATH_SIMD_AVX_H__ */
diff --git a/neo/idlib/math/Simd_AVX2.cpp b/neo/idlib/math/Simd_AVX2.cpp
@@ -31,23 +31,10 @@ If you have questions concerning this license or the applicable additional terms
 
 //===============================================================
 //
-//	AVX implementation of idSIMDProcessor
+//	AVX2 implementation of idSIMDProcessor
 //
 //===============================================================
 
-#if defined(__GNUC__) && defined(__SSE3__)
-
-/*
-============
-idSIMD_SSE3::GetName
-============
-*/
-const char *idSIMD_AVX:2:GetName( void ) const {
-	return "MMX & SSE & SSE2 & SSE3 & AVX & AVX2";
-}
-
-#elif defined(_MSC_VER) && defined(_M_IX86)
-
 #include <immintrin.h>
 
 #include "idlib/geometry/DrawVert.h"
@@ -76,6 +63,7 @@ void VPCALL idSIMD_AVX2::CullByFrustum( idDrawVert *verts, const int numVerts, c
 	const __m256 fD = _mm256_set_ps( 0, 0, frustum[5][3], frustum[4][3], frustum[3][3], frustum[2][3], frustum[1][3], frustum[0][3] );
 	const __m256 eps = _mm256_set1_ps( epsilon );
 	const byte mask6 = (1 << 6) - 1;
+
 	for ( int j = 0; j < numVerts; j++ ) {
 		idVec3 &vec = verts[j].xyz;
 		__m256 vX = _mm256_set1_ps( vec.x );
@@ -105,6 +93,7 @@ void VPCALL idSIMD_AVX2::CullByFrustum2( idDrawVert *verts, const int numVerts,
 	const __m256 eps = _mm256_set1_ps( epsilon );
 	static const __m256 epsM = _mm256_set1_ps( -epsilon );
 	const short mask6 = (1 << 6) - 1;
+
 	for ( int j = 0; j < numVerts; j++ ) {
 		idVec3 &vec = verts[j].xyz;
 		__m256 vX = _mm256_set1_ps( vec.x );
@@ -121,6 +110,3 @@ void VPCALL idSIMD_AVX2::CullByFrustum2( idDrawVert *verts, const int numVerts,
 	}
 	_mm256_zeroupper();
 }
-
-#endif
-
diff --git a/neo/idlib/math/Simd_AVX2.h b/neo/idlib/math/Simd_AVX2.h
@@ -28,13 +28,10 @@
 
 class idSIMD_AVX2 : public idSIMD_AVX {
 public:
-#if defined(__GNUC__) && defined(__AVX__)
-	virtual const char *VPCALL GetName( void ) const;
-#elif defined(_MSC_VER) && defined(_M_IX86)
+	// Revelator: these work whether gcc clang or msvc in x86 or x64 (no inline assembly used)
 	virtual const char *VPCALL GetName( void ) const;
 	virtual void VPCALL CullByFrustum( idDrawVert *verts, const int numVerts, const idPlane frustum[6], byte *pointCull, float epsilon );
 	virtual void VPCALL CullByFrustum2( idDrawVert *verts, const int numVerts, const idPlane frustum[6], unsigned short *pointCull, float epsilon );
-#endif
 };
 
 #endif /* !__MATH_SIMD_AVX2_H__ */
diff --git a/neo/idlib/math/Simd_AltiVec.cpp b/neo/idlib/math/Simd_AltiVec.cpp
@@ -416,7 +416,6 @@ inline vector float VectorSin16( vector float v ) {
 #if 0
 	// load up half PI and use it to calculate the rest of the values. This is
 	// sometimes cheaper than loading them from memory
-
 	vector float halfPI = (vector float) ( 0.5f * 3.14159265358979323846f );
 	vector float PI = vec_add( halfPI, halfPI );
 	vector float oneandhalfPI = vec_add( PI, halfPI );
@@ -603,7 +602,6 @@ inline void FastScalarInvSqrt_x6( float *arg1, float *arg2, float *arg3, float *
 #endif
 }
 
-
 // End Helper Functions
 
 #ifdef ENABLE_SIMPLE_MATH

diff --git a/neo/idlib/math/Simd_SSE.cpp b/neo/idlib/math/Simd_SSE.cpp
@@ -27,7 +27,6 @@ If you have questions concerning this license or the applicable additional terms
 */
 
 #include "sys/platform.h"
-#include "idlib/geometry/DrawVert.h"
 #include "idlib/math/Simd_SSE.h"
 
 //===============================================================
@@ -36,6 +35,16 @@ If you have questions concerning this license or the applicable additional terms
 //                                                        E
 //===============================================================
 
+#include <xmmintrin.h>
+
+#include "idlib/geometry/DrawVert.h"
+#include "idlib/geometry/JointTransform.h"
+#include "idlib/math/Vector.h"
+#include "idlib/math/Matrix.h"
+#include "idlib/math/Quat.h"
+#include "idlib/math/Plane.h"
+#include "renderer/Model.h"
+
 #define DRAWVERT_SIZE				60
 #define DRAWVERT_XYZ_OFFSET			(0*4)
 #define DRAWVERT_ST_OFFSET			(3*4)
@@ -46,8 +55,6 @@ If you have questions concerning this license or the applicable additional terms
 
 #if defined(__GNUC__) && defined(__SSE__)
 
-#include <xmmintrin.h>
-
 #define SHUFFLEPS( x, y, z, w )		(( (x) & 3 ) << 6 | ( (y) & 3 ) << 4 | ( (z) & 3 ) << 2 | ( (w) & 3 ))
 #define R_SHUFFLEPS( x, y, z, w )	(( (w) & 3 ) << 6 | ( (z) & 3 ) << 4 | ( (y) & 3 ) << 2 | ( (x) & 3 ))
 
@@ -626,15 +633,6 @@ void VPCALL idSIMD_SSE::Dot( float *dst, const idVec3 &constant, const idPlane *
 
 #elif defined(_MSC_VER) && defined(_M_IX86)
 
-#include <xmmintrin.h>
-
-#include "idlib/geometry/JointTransform.h"
-#include "idlib/math/Vector.h"
-#include "idlib/math/Matrix.h"
-#include "idlib/math/Quat.h"
-#include "idlib/math/Plane.h"
-#include "renderer/Model.h"
-
 #define SHUFFLEPS( x, y, z, w )		(( (x) & 3 ) << 6 | ( (y) & 3 ) << 4 | ( (z) & 3 ) << 2 | ( (w) & 3 ))
 #define R_SHUFFLEPS( x, y, z, w )	(( (w) & 3 ) << 6 | ( (z) & 3 ) << 4 | ( (y) & 3 ) << 2 | ( (x) & 3 ))
 
@@ -18093,6 +18091,8 @@ void VPCALL idSIMD_SSE::MixedSoundToSamples( short *samples, const float *mixBuf
 #endif
 }
 
+#endif  /* _MSC_VER */
+
 /*
 ============
 idSIMD_SSE::CullByFrustum
@@ -18107,31 +18107,32 @@ void VPCALL idSIMD_SSE::CullByFrustum( idDrawVert *verts, const int numVerts, co
 	__m128 fC56 = _mm_set_ps( 0, 0, frustum[5][2], frustum[4][2] );
 	__m128 fD14 = _mm_set_ps( frustum[3][3], frustum[2][3], frustum[1][3], frustum[0][3] );
 	__m128 fD56 = _mm_set_ps( 0, 0, frustum[5][3], frustum[4][3] );
+
 	for ( int j = 0; j < numVerts; j++ ) {
 		idVec3 &vec = verts[j].xyz;
 		__m128 vX = _mm_set1_ps( vec.x );
 		__m128 vY = _mm_set1_ps( vec.y );
 		__m128 vZ = _mm_set1_ps( vec.z );
 		__m128 d14 = _mm_add_ps(
-		                 _mm_add_ps(
-		                     _mm_mul_ps( fA14, vX ),
-		                     _mm_mul_ps( fB14, vY )
-		                 ),
-		                 _mm_add_ps(
-		                     _mm_mul_ps( fC14, vZ ),
-		                     fD14
-		                 )
-		             );
+			_mm_add_ps(
+				_mm_mul_ps( fA14, vX ),
+				_mm_mul_ps( fB14, vY )
+			),
+			_mm_add_ps(
+				_mm_mul_ps( fC14, vZ ),
+				fD14
+			)
+		);
 		__m128 d56 = _mm_add_ps(
-		                 _mm_add_ps(
-		                     _mm_mul_ps( fA56, vX ),
-		                     _mm_mul_ps( fB56, vY )
-		                 ),
-		                 _mm_add_ps(
-		                     _mm_mul_ps( fC56, vZ ),
-		                     fD56
-		                 )
-		             );
+			_mm_add_ps(
+				_mm_mul_ps( fA56, vX ),
+				_mm_mul_ps( fB56, vY )
+			),
+			_mm_add_ps(
+				_mm_mul_ps( fC56, vZ ),
+				fD56
+			)
+		);
 		const short mask6 = ( 1 << 6 ) - 1;
 		__m128 eps = _mm_set1_ps( epsilon );
 		int mask_lo14 = _mm_movemask_ps( _mm_cmplt_ps( d14, eps ) );
@@ -18155,31 +18156,32 @@ void VPCALL idSIMD_SSE::CullByFrustum2( idDrawVert *verts, const int numVerts, c
 	__m128 fC56 = _mm_set_ps( 0, 0, frustum[5][2], frustum[4][2] );
 	__m128 fD14 = _mm_set_ps( frustum[3][3], frustum[2][3], frustum[1][3], frustum[0][3] );
 	__m128 fD56 = _mm_set_ps( 0, 0, frustum[5][3], frustum[4][3] );
+
 	for ( int j = 0; j < numVerts; j++ ) {
 		idVec3 &vec = verts[j].xyz;
 		__m128 vX = _mm_set1_ps( vec.x );
 		__m128 vY = _mm_set1_ps( vec.y );
 		__m128 vZ = _mm_set1_ps( vec.z );
 		__m128 d14 = _mm_add_ps(
-		                 _mm_add_ps(
-		                     _mm_mul_ps( fA14, vX ),
-		                     _mm_mul_ps( fB14, vY )
-		                 ),
-		                 _mm_add_ps(
-		                     _mm_mul_ps( fC14, vZ ),
-		                     fD14
-		                 )
-		             );
+			_mm_add_ps(
+				_mm_mul_ps( fA14, vX ),
+				_mm_mul_ps( fB14, vY )
+			),
+			_mm_add_ps(
+				_mm_mul_ps( fC14, vZ ),
+				fD14
+			)
+		);
 		__m128 d56 = _mm_add_ps(
-		                 _mm_add_ps(
-		                     _mm_mul_ps( fA56, vX ),
-		                     _mm_mul_ps( fB56, vY )
-		                 ),
-		                 _mm_add_ps(
-		                     _mm_mul_ps( fC56, vZ ),
-		                     fD56
-		                 )
-		             );
+			_mm_add_ps(
+				_mm_mul_ps( fA56, vX ),
+				_mm_mul_ps( fB56, vY )
+			),
+			_mm_add_ps(
+				_mm_mul_ps( fC56, vZ ),
+				fD56
+			)
+		);
 		const short mask6 = ( 1 << 6 ) - 1;
 		__m128 eps = _mm_set1_ps( epsilon );
 		int mask_lo14 = _mm_movemask_ps( _mm_cmplt_ps( d14, eps ) );
@@ -18192,5 +18194,3 @@ void VPCALL idSIMD_SSE::CullByFrustum2( idDrawVert *verts, const int numVerts, c
 		pointCull[j] = mask_lo & mask6 | ( mask_hi & mask6 ) << 6;
 	}
 }
-
-#endif /* _MSC_VER */
diff --git a/neo/idlib/math/Simd_SSE.h b/neo/idlib/math/Simd_SSE.h
@@ -46,7 +46,7 @@ class idSIMD_SSE : public idSIMD_MMX {
 	using idSIMD_MMX::MinMax;
 
 	virtual const char *VPCALL GetName( void ) const;
-	virtual void VPCALL Dot( float *dst,			const idPlane &constant,const idDrawVert *src,	const int count );
+	virtual void VPCALL Dot( float *dst,			const idPlane &constant, const idDrawVert *src,	const int count );
 	virtual	void VPCALL MinMax( idVec3 &min,		idVec3 &max,			const idDrawVert *src,	const int *indexes,		const int count );
 	virtual void VPCALL Dot( float *dst,			const idVec3 &constant,	const idPlane *src,		const int count );
 
@@ -69,9 +69,9 @@ class idSIMD_SSE : public idSIMD_MMX {
 	virtual void VPCALL Dot( float *dst,			const idVec3 &constant,	const idVec3 *src,		const int count );
 	virtual void VPCALL Dot( float *dst,			const idVec3 &constant,	const idPlane *src,		const int count );
 	virtual void VPCALL Dot( float *dst,			const idVec3 &constant,	const idDrawVert *src,	const int count );
-	virtual void VPCALL Dot( float *dst,			const idPlane &constant,const idVec3 *src,		const int count );
-	virtual void VPCALL Dot( float *dst,			const idPlane &constant,const idPlane *src,		const int count );
-	virtual void VPCALL Dot( float *dst,			const idPlane &constant,const idDrawVert *src,	const int count );
+	virtual void VPCALL Dot( float *dst,			const idPlane &constant, const idVec3 *src,		const int count );
+	virtual void VPCALL Dot( float *dst,			const idPlane &constant, const idPlane *src,		const int count );
+	virtual void VPCALL Dot( float *dst,			const idPlane &constant, const idDrawVert *src,	const int count );
 	virtual void VPCALL Dot( float *dst,			const idVec3 *src0,		const idVec3 *src1,		const int count );
 	virtual void VPCALL Dot( float &dot,			const float *src1,		const float *src2,		const int count );
 
@@ -143,9 +143,12 @@ class idSIMD_SSE : public idSIMD_MMX {
 	virtual void VPCALL MixSoundSixSpeakerStereo( float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6] );
 	virtual void VPCALL MixedSoundToSamples( short *samples, const float *mixBuffer, const int numSamples );
 
-	virtual void VPCALL CullByFrustum( idDrawVert *verts, const int numVerts, const idPlane frustum[6], byte *pointCull, float epsilon );
-	virtual void VPCALL CullByFrustum2( idDrawVert *verts, const int numVerts, const idPlane frustum[6], unsigned short *pointCull, float epsilon );
 #endif
+
+	// Revelator: these work whether in gcc clang or msvc x86 or x64 (no inline assembly used)
+	virtual void VPCALL CullByFrustum( idDrawVert *verts, const int numVerts, const idPlane frustum[6], byte *pointCull, float epsilon );
+	virtual void VPCALL CullByFrustum2( idDrawVert *verts, const int numVerts, const idPlane frustum[6], unsigned short *pointCull, float epsilon );
+
 };
 
 #endif /* !__MATH_SIMD_SSE_H__ */