Skip to content

Commit

Permalink
Hopefully fixed some mistakes
Browse files Browse the repository at this point in the history
  • Loading branch information
revelator authored Nov 18, 2024
1 parent 0a1f2a3 commit f50471b
Show file tree
Hide file tree
Showing 8 changed files with 477 additions and 211 deletions.
524 changes: 411 additions & 113 deletions neo/idlib/math/Simd.cpp

Large diffs are not rendered by default.

17 changes: 2 additions & 15 deletions neo/idlib/math/Simd_AVX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,6 @@ If you have questions concerning this license or the applicable additional terms
//
//===============================================================

#if defined(__GNUC__) && defined(__SSE3__)

/*
============
idSIMD_SSE3::GetName
============
*/
const char *idSIMD_AVX::GetName( void ) const {
return "MMX & SSE & SSE2 & SSE3 & AVX";
}

#elif defined(_MSC_VER) && defined(_M_IX86)

#include <immintrin.h>

#include "idlib/geometry/DrawVert.h"
Expand Down Expand Up @@ -76,6 +63,7 @@ void VPCALL idSIMD_AVX::CullByFrustum( idDrawVert *verts, const int numVerts, co
const __m256 fD = _mm256_set_ps( 0, 0, frustum[5][3], frustum[4][3], frustum[3][3], frustum[2][3], frustum[1][3], frustum[0][3] );
const __m256 eps = _mm256_set1_ps( epsilon );
const byte mask6 = ( 1 << 6 ) - 1;

for( int j = 0; j < numVerts; j++ ) {
idVec3 &vec = verts[j].xyz;
__m256 vX = _mm256_set1_ps( vec.x );
Expand Down Expand Up @@ -111,6 +99,7 @@ void VPCALL idSIMD_AVX::CullByFrustum2( idDrawVert *verts, const int numVerts, c
const __m256 eps = _mm256_set1_ps( epsilon );
static const __m256 epsM = _mm256_set1_ps( -epsilon );
const short mask6 = ( 1 << 6 ) - 1;

for( int j = 0; j < numVerts; j++ ) {
idVec3 &vec = verts[j].xyz;
__m256 vX = _mm256_set1_ps( vec.x );
Expand All @@ -133,5 +122,3 @@ void VPCALL idSIMD_AVX::CullByFrustum2( idDrawVert *verts, const int numVerts, c
}
_mm256_zeroupper();
}

#endif
5 changes: 1 addition & 4 deletions neo/idlib/math/Simd_AVX.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,10 @@

class idSIMD_AVX : public idSIMD_SSE3 {
public:
#if defined(__GNUC__) && defined(__AVX__)
virtual const char *VPCALL GetName( void ) const;
#elif defined(_MSC_VER) && defined(_M_IX86)
// Revelator: these work whether in gcc clang or msvc x86 or x64 (no inline assembly used)
virtual const char *VPCALL GetName( void ) const;
virtual void VPCALL CullByFrustum( idDrawVert *verts, const int numVerts, const idPlane frustum[6], byte *pointCull, float epsilon );
virtual void VPCALL CullByFrustum2( idDrawVert *verts, const int numVerts, const idPlane frustum[6], unsigned short *pointCull, float epsilon );
#endif
};

#endif /* !__MATH_SIMD_AVX_H__ */
20 changes: 3 additions & 17 deletions neo/idlib/math/Simd_AVX2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,10 @@ If you have questions concerning this license or the applicable additional terms

//===============================================================
//
// AVX implementation of idSIMDProcessor
// AVX2 implementation of idSIMDProcessor
//
//===============================================================

#if defined(__GNUC__) && defined(__SSE3__)

/*
============
idSIMD_SSE3::GetName
============
*/
const char *idSIMD_AVX:2:GetName( void ) const {
return "MMX & SSE & SSE2 & SSE3 & AVX & AVX2";
}

#elif defined(_MSC_VER) && defined(_M_IX86)

#include <immintrin.h>

#include "idlib/geometry/DrawVert.h"
Expand Down Expand Up @@ -76,6 +63,7 @@ void VPCALL idSIMD_AVX2::CullByFrustum( idDrawVert *verts, const int numVerts, c
const __m256 fD = _mm256_set_ps( 0, 0, frustum[5][3], frustum[4][3], frustum[3][3], frustum[2][3], frustum[1][3], frustum[0][3] );
const __m256 eps = _mm256_set1_ps( epsilon );
const byte mask6 = (1 << 6) - 1;

for ( int j = 0; j < numVerts; j++ ) {
idVec3 &vec = verts[j].xyz;
__m256 vX = _mm256_set1_ps( vec.x );
Expand Down Expand Up @@ -105,6 +93,7 @@ void VPCALL idSIMD_AVX2::CullByFrustum2( idDrawVert *verts, const int numVerts,
const __m256 eps = _mm256_set1_ps( epsilon );
static const __m256 epsM = _mm256_set1_ps( -epsilon );
const short mask6 = (1 << 6) - 1;

for ( int j = 0; j < numVerts; j++ ) {
idVec3 &vec = verts[j].xyz;
__m256 vX = _mm256_set1_ps( vec.x );
Expand All @@ -121,6 +110,3 @@ void VPCALL idSIMD_AVX2::CullByFrustum2( idDrawVert *verts, const int numVerts,
}
_mm256_zeroupper();
}

#endif

5 changes: 1 addition & 4 deletions neo/idlib/math/Simd_AVX2.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,10 @@

class idSIMD_AVX2 : public idSIMD_AVX {
public:
#if defined(__GNUC__) && defined(__AVX__)
virtual const char *VPCALL GetName( void ) const;
#elif defined(_MSC_VER) && defined(_M_IX86)
// Revelator: these work whether gcc clang or msvc in x86 or x64 (no inline assembly used)
virtual const char *VPCALL GetName( void ) const;
virtual void VPCALL CullByFrustum( idDrawVert *verts, const int numVerts, const idPlane frustum[6], byte *pointCull, float epsilon );
virtual void VPCALL CullByFrustum2( idDrawVert *verts, const int numVerts, const idPlane frustum[6], unsigned short *pointCull, float epsilon );
#endif
};

#endif /* !__MATH_SIMD_AVX2_H__ */
2 changes: 0 additions & 2 deletions neo/idlib/math/Simd_AltiVec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,6 @@ inline vector float VectorSin16( vector float v ) {
#if 0
// load up half PI and use it to calculate the rest of the values. This is
// sometimes cheaper than loading them from memory

vector float halfPI = (vector float) ( 0.5f * 3.14159265358979323846f );
vector float PI = vec_add( halfPI, halfPI );
vector float oneandhalfPI = vec_add( PI, halfPI );
Expand Down Expand Up @@ -603,7 +602,6 @@ inline void FastScalarInvSqrt_x6( float *arg1, float *arg2, float *arg3, float *
#endif
}


// End Helper Functions

#ifdef ENABLE_SIMPLE_MATH
Expand Down
100 changes: 50 additions & 50 deletions neo/idlib/math/Simd_SSE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ If you have questions concerning this license or the applicable additional terms
*/

#include "sys/platform.h"
#include "idlib/geometry/DrawVert.h"
#include "idlib/math/Simd_SSE.h"

//===============================================================
Expand All @@ -36,6 +35,16 @@ If you have questions concerning this license or the applicable additional terms
// E
//===============================================================

#include <xmmintrin.h>

#include "idlib/geometry/DrawVert.h"
#include "idlib/geometry/JointTransform.h"
#include "idlib/math/Vector.h"
#include "idlib/math/Matrix.h"
#include "idlib/math/Quat.h"
#include "idlib/math/Plane.h"
#include "renderer/Model.h"

#define DRAWVERT_SIZE 60
#define DRAWVERT_XYZ_OFFSET (0*4)
#define DRAWVERT_ST_OFFSET (3*4)
Expand All @@ -46,8 +55,6 @@ If you have questions concerning this license or the applicable additional terms

#if defined(__GNUC__) && defined(__SSE__)

#include <xmmintrin.h>

#define SHUFFLEPS( x, y, z, w ) (( (x) & 3 ) << 6 | ( (y) & 3 ) << 4 | ( (z) & 3 ) << 2 | ( (w) & 3 ))
#define R_SHUFFLEPS( x, y, z, w ) (( (w) & 3 ) << 6 | ( (z) & 3 ) << 4 | ( (y) & 3 ) << 2 | ( (x) & 3 ))

Expand Down Expand Up @@ -626,15 +633,6 @@ void VPCALL idSIMD_SSE::Dot( float *dst, const idVec3 &constant, const idPlane *

#elif defined(_MSC_VER) && defined(_M_IX86)

#include <xmmintrin.h>

#include "idlib/geometry/JointTransform.h"
#include "idlib/math/Vector.h"
#include "idlib/math/Matrix.h"
#include "idlib/math/Quat.h"
#include "idlib/math/Plane.h"
#include "renderer/Model.h"

#define SHUFFLEPS( x, y, z, w ) (( (x) & 3 ) << 6 | ( (y) & 3 ) << 4 | ( (z) & 3 ) << 2 | ( (w) & 3 ))
#define R_SHUFFLEPS( x, y, z, w ) (( (w) & 3 ) << 6 | ( (z) & 3 ) << 4 | ( (y) & 3 ) << 2 | ( (x) & 3 ))

Expand Down Expand Up @@ -18093,6 +18091,8 @@ void VPCALL idSIMD_SSE::MixedSoundToSamples( short *samples, const float *mixBuf
#endif
}

#endif /* _MSC_VER */

/*
============
idSIMD_SSE::CullByFrustum
Expand All @@ -18107,31 +18107,32 @@ void VPCALL idSIMD_SSE::CullByFrustum( idDrawVert *verts, const int numVerts, co
__m128 fC56 = _mm_set_ps( 0, 0, frustum[5][2], frustum[4][2] );
__m128 fD14 = _mm_set_ps( frustum[3][3], frustum[2][3], frustum[1][3], frustum[0][3] );
__m128 fD56 = _mm_set_ps( 0, 0, frustum[5][3], frustum[4][3] );

for ( int j = 0; j < numVerts; j++ ) {
idVec3 &vec = verts[j].xyz;
__m128 vX = _mm_set1_ps( vec.x );
__m128 vY = _mm_set1_ps( vec.y );
__m128 vZ = _mm_set1_ps( vec.z );
__m128 d14 = _mm_add_ps(
_mm_add_ps(
_mm_mul_ps( fA14, vX ),
_mm_mul_ps( fB14, vY )
),
_mm_add_ps(
_mm_mul_ps( fC14, vZ ),
fD14
)
);
_mm_add_ps(
_mm_mul_ps( fA14, vX ),
_mm_mul_ps( fB14, vY )
),
_mm_add_ps(
_mm_mul_ps( fC14, vZ ),
fD14
)
);
__m128 d56 = _mm_add_ps(
_mm_add_ps(
_mm_mul_ps( fA56, vX ),
_mm_mul_ps( fB56, vY )
),
_mm_add_ps(
_mm_mul_ps( fC56, vZ ),
fD56
)
);
_mm_add_ps(
_mm_mul_ps( fA56, vX ),
_mm_mul_ps( fB56, vY )
),
_mm_add_ps(
_mm_mul_ps( fC56, vZ ),
fD56
)
);
const short mask6 = ( 1 << 6 ) - 1;
__m128 eps = _mm_set1_ps( epsilon );
int mask_lo14 = _mm_movemask_ps( _mm_cmplt_ps( d14, eps ) );
Expand All @@ -18155,31 +18156,32 @@ void VPCALL idSIMD_SSE::CullByFrustum2( idDrawVert *verts, const int numVerts, c
__m128 fC56 = _mm_set_ps( 0, 0, frustum[5][2], frustum[4][2] );
__m128 fD14 = _mm_set_ps( frustum[3][3], frustum[2][3], frustum[1][3], frustum[0][3] );
__m128 fD56 = _mm_set_ps( 0, 0, frustum[5][3], frustum[4][3] );

for ( int j = 0; j < numVerts; j++ ) {
idVec3 &vec = verts[j].xyz;
__m128 vX = _mm_set1_ps( vec.x );
__m128 vY = _mm_set1_ps( vec.y );
__m128 vZ = _mm_set1_ps( vec.z );
__m128 d14 = _mm_add_ps(
_mm_add_ps(
_mm_mul_ps( fA14, vX ),
_mm_mul_ps( fB14, vY )
),
_mm_add_ps(
_mm_mul_ps( fC14, vZ ),
fD14
)
);
_mm_add_ps(
_mm_mul_ps( fA14, vX ),
_mm_mul_ps( fB14, vY )
),
_mm_add_ps(
_mm_mul_ps( fC14, vZ ),
fD14
)
);
__m128 d56 = _mm_add_ps(
_mm_add_ps(
_mm_mul_ps( fA56, vX ),
_mm_mul_ps( fB56, vY )
),
_mm_add_ps(
_mm_mul_ps( fC56, vZ ),
fD56
)
);
_mm_add_ps(
_mm_mul_ps( fA56, vX ),
_mm_mul_ps( fB56, vY )
),
_mm_add_ps(
_mm_mul_ps( fC56, vZ ),
fD56
)
);
const short mask6 = ( 1 << 6 ) - 1;
__m128 eps = _mm_set1_ps( epsilon );
int mask_lo14 = _mm_movemask_ps( _mm_cmplt_ps( d14, eps ) );
Expand All @@ -18192,5 +18194,3 @@ void VPCALL idSIMD_SSE::CullByFrustum2( idDrawVert *verts, const int numVerts, c
pointCull[j] = mask_lo & mask6 | ( mask_hi & mask6 ) << 6;
}
}

#endif /* _MSC_VER */
15 changes: 9 additions & 6 deletions neo/idlib/math/Simd_SSE.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class idSIMD_SSE : public idSIMD_MMX {
using idSIMD_MMX::MinMax;

virtual const char *VPCALL GetName( void ) const;
virtual void VPCALL Dot( float *dst, const idPlane &constant,const idDrawVert *src, const int count );
virtual void VPCALL Dot( float *dst, const idPlane &constant, const idDrawVert *src, const int count );
virtual void VPCALL MinMax( idVec3 &min, idVec3 &max, const idDrawVert *src, const int *indexes, const int count );
virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idPlane *src, const int count );

Expand All @@ -69,9 +69,9 @@ class idSIMD_SSE : public idSIMD_MMX {
virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idVec3 *src, const int count );
virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idPlane *src, const int count );
virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idDrawVert *src, const int count );
virtual void VPCALL Dot( float *dst, const idPlane &constant,const idVec3 *src, const int count );
virtual void VPCALL Dot( float *dst, const idPlane &constant,const idPlane *src, const int count );
virtual void VPCALL Dot( float *dst, const idPlane &constant,const idDrawVert *src, const int count );
virtual void VPCALL Dot( float *dst, const idPlane &constant, const idVec3 *src, const int count );
virtual void VPCALL Dot( float *dst, const idPlane &constant, const idPlane *src, const int count );
virtual void VPCALL Dot( float *dst, const idPlane &constant, const idDrawVert *src, const int count );
virtual void VPCALL Dot( float *dst, const idVec3 *src0, const idVec3 *src1, const int count );
virtual void VPCALL Dot( float &dot, const float *src1, const float *src2, const int count );

Expand Down Expand Up @@ -143,9 +143,12 @@ class idSIMD_SSE : public idSIMD_MMX {
virtual void VPCALL MixSoundSixSpeakerStereo( float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6] );
virtual void VPCALL MixedSoundToSamples( short *samples, const float *mixBuffer, const int numSamples );

virtual void VPCALL CullByFrustum( idDrawVert *verts, const int numVerts, const idPlane frustum[6], byte *pointCull, float epsilon );
virtual void VPCALL CullByFrustum2( idDrawVert *verts, const int numVerts, const idPlane frustum[6], unsigned short *pointCull, float epsilon );
#endif

// Revelator: these work whether in gcc clang or msvc x86 or x64 (no inline assembly used)
virtual void VPCALL CullByFrustum( idDrawVert *verts, const int numVerts, const idPlane frustum[6], byte *pointCull, float epsilon );
virtual void VPCALL CullByFrustum2( idDrawVert *verts, const int numVerts, const idPlane frustum[6], unsigned short *pointCull, float epsilon );

};

#endif /* !__MATH_SIMD_SSE_H__ */

0 comments on commit f50471b

Please sign in to comment.