Restored generic (non-SIMD) code

This commit is contained in:
Brian Harris
2013-05-29 13:12:13 -05:00
parent be311f42e1
commit 9c37079c16
23 changed files with 3328 additions and 24 deletions

View File

@@ -193,6 +193,7 @@ Assumes input is in the range [-1, 1]
ID_INLINE void VertexFloatToByte( const float & x, const float & y, const float & z, byte * bval ) {
assert_4_byte_aligned( bval ); // for __stvebx
#ifdef ID_WIN_X86_SSE2_INTRIN
const __m128 vector_float_one = { 1.0f, 1.0f, 1.0f, 1.0f };
const __m128 vector_float_half = { 0.5f, 0.5f, 0.5f, 0.5f };
@@ -209,6 +210,13 @@ ID_INLINE void VertexFloatToByte( const float & x, const float & y, const float
bval[1] = (byte)_mm_extract_epi16( xyz16, 1 );
bval[2] = (byte)_mm_extract_epi16( xyz16, 2 );
#else
bval[0] = VERTEX_FLOAT_TO_BYTE( x );
bval[1] = VERTEX_FLOAT_TO_BYTE( y );
bval[2] = VERTEX_FLOAT_TO_BYTE( z );
#endif
}
/*
@@ -609,6 +617,7 @@ ID_INLINE void WriteDrawVerts16( idDrawVert * destVerts, const idDrawVert * loca
assert_16_byte_aligned( destVerts );
assert_16_byte_aligned( localVerts );
#ifdef ID_WIN_X86_SSE2_INTRIN
for ( int i = 0; i < numVerts; i++ ) {
__m128i v0 = _mm_load_si128( (const __m128i *)( (byte *)( localVerts + i ) + 0 ) );
@@ -617,6 +626,11 @@ ID_INLINE void WriteDrawVerts16( idDrawVert * destVerts, const idDrawVert * loca
_mm_stream_si128( (__m128i *)( (byte *)( destVerts + i ) + 16 ), v1 );
}
#else
memcpy( destVerts, localVerts, numVerts * sizeof( idDrawVert ) );
#endif
}
/*

View File

@@ -29,6 +29,7 @@ If you have questions concerning this license or the applicable additional terms
#ifndef __DRAWVERT_INTRINSICS_H__
#define __DRAWVERT_INTRINSICS_H__
#ifdef ID_WIN_X86_SSE2_INTRIN
static const __m128i vector_int_f32_sign_mask = _mm_set1_epi32( 1U << IEEE_FLT_SIGN_BIT );
static const __m128i vector_int_f32_exponent_mask = _mm_set1_epi32( ( ( 1U << IEEE_FLT_EXPONENT_BITS ) - 1 ) << IEEE_FLT_MANTISSA_BITS );
@@ -50,12 +51,14 @@ static const __m128 vector_float_last_one = { 0.0f, 0.0f, 0.0f, 1.0
static const __m128 vector_float_1_over_255 = { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f };
static const __m128 vector_float_1_over_4 = { 1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f };
#endif
/*
====================
FastF32toF16
====================
*/
#ifdef ID_WIN_X86_SSE2_INTRIN
ID_INLINE_EXTERN __m128i FastF32toF16( __m128i f32_bits ) {
__m128i f16_sign = _mm_srli_epi32( _mm_and_si128( f32_bits, vector_int_f32_sign_mask ), f32_to_f16_sign_shift );
@@ -77,6 +80,7 @@ ID_INLINE_EXTERN __m128i FastF32toF16( __m128i f32_bits ) {
return _mm_packs_epi32( flt16, flt16 );
}
#endif
ID_INLINE_EXTERN halfFloat_t Scalar_FastF32toF16( float f32 ) {
const int f32_sign_mask = 1U << IEEE_FLT_SIGN_BIT;
@@ -115,6 +119,7 @@ ID_INLINE_EXTERN halfFloat_t Scalar_FastF32toF16( float f32 ) {
LoadSkinnedDrawVertPosition
====================
*/
#ifdef ID_WIN_X86_SSE2_INTRIN
ID_INLINE_EXTERN __m128 LoadSkinnedDrawVertPosition( const idDrawVert & base, const idJointMat * joints ) {
const idJointMat & j0 = joints[base.color[0]];
@@ -176,6 +181,7 @@ ID_INLINE_EXTERN __m128 LoadSkinnedDrawVertPosition( const idDrawVert & base, co
return r0;
}
#endif
ID_INLINE_EXTERN idVec3 Scalar_LoadSkinnedDrawVertPosition( const idDrawVert & vert, const idJointMat * joints ) {
const idJointMat & j0 = joints[vert.color[0]];

File diff suppressed because it is too large Load Diff