mirror of
https://github.com/id-Software/DOOM-3-BFG.git
synced 2026-03-20 09:00:25 +01:00
Restored generic (non-SIMD) code
This commit is contained in:
@@ -102,6 +102,7 @@ static void R_OverlayPointCullStatic( byte * cullBits, halfFloat_t * texCoordS,
|
||||
assert_16_byte_aligned( texCoordT );
|
||||
assert_16_byte_aligned( verts );
|
||||
|
||||
#ifdef ID_WIN_X86_SSE2_INTRIN
|
||||
|
||||
idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts );
|
||||
|
||||
@@ -176,6 +177,39 @@ static void R_OverlayPointCullStatic( byte * cullBits, halfFloat_t * texCoordS,
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 1 > vertsODS( verts, numVerts );
|
||||
|
||||
for ( int i = 0; i < numVerts; ) {
|
||||
|
||||
const int nextNumVerts = vertsODS.FetchNextBatch() - 1;
|
||||
|
||||
for ( ; i <= nextNumVerts; i++ ) {
|
||||
const idVec3 & v = vertsODS[i].xyz;
|
||||
|
||||
const float d0 = planes[0].Distance( v );
|
||||
const float d1 = planes[1].Distance( v );
|
||||
const float d2 = 1.0f - d0;
|
||||
const float d3 = 1.0f - d1;
|
||||
|
||||
halfFloat_t s = Scalar_FastF32toF16( d0 );
|
||||
halfFloat_t t = Scalar_FastF32toF16( d1 );
|
||||
|
||||
texCoordS[i] = s;
|
||||
texCoordT[i] = t;
|
||||
|
||||
byte bits;
|
||||
bits = IEEE_FLT_SIGNBITSET( d0 ) << 0;
|
||||
bits |= IEEE_FLT_SIGNBITSET( d1 ) << 1;
|
||||
bits |= IEEE_FLT_SIGNBITSET( d2 ) << 2;
|
||||
bits |= IEEE_FLT_SIGNBITSET( d3 ) << 3;
|
||||
|
||||
cullBits[i] = bits;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -189,6 +223,7 @@ static void R_OverlayPointCullSkinned( byte * cullBits, halfFloat_t * texCoordS,
|
||||
assert_16_byte_aligned( texCoordT );
|
||||
assert_16_byte_aligned( verts );
|
||||
|
||||
#ifdef ID_WIN_X86_SSE2_INTRIN
|
||||
|
||||
idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts );
|
||||
|
||||
@@ -263,6 +298,39 @@ static void R_OverlayPointCullSkinned( byte * cullBits, halfFloat_t * texCoordS,
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 1 > vertsODS( verts, numVerts );
|
||||
|
||||
for ( int i = 0; i < numVerts; ) {
|
||||
|
||||
const int nextNumVerts = vertsODS.FetchNextBatch() - 1;
|
||||
|
||||
for ( ; i <= nextNumVerts; i++ ) {
|
||||
const idVec3 transformed = Scalar_LoadSkinnedDrawVertPosition( vertsODS[i], joints );
|
||||
|
||||
const float d0 = planes[0].Distance( transformed );
|
||||
const float d1 = planes[1].Distance( transformed );
|
||||
const float d2 = 1.0f - d0;
|
||||
const float d3 = 1.0f - d1;
|
||||
|
||||
halfFloat_t s = Scalar_FastF32toF16( d0 );
|
||||
halfFloat_t t = Scalar_FastF32toF16( d1 );
|
||||
|
||||
texCoordS[i] = s;
|
||||
texCoordT[i] = t;
|
||||
|
||||
byte bits;
|
||||
bits = IEEE_FLT_SIGNBITSET( d0 ) << 0;
|
||||
bits |= IEEE_FLT_SIGNBITSET( d1 ) << 1;
|
||||
bits |= IEEE_FLT_SIGNBITSET( d2 ) << 2;
|
||||
bits |= IEEE_FLT_SIGNBITSET( d3 ) << 3;
|
||||
|
||||
cullBits[i] = bits;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -446,6 +514,7 @@ static void R_CopyOverlaySurface( idDrawVert * verts, int numVerts, triIndex_t *
|
||||
assert( ( ( overlay->numVerts * sizeof( idDrawVert ) ) & 15 ) == 0 );
|
||||
assert( ( ( overlay->numIndexes * sizeof( triIndex_t ) ) & 15 ) == 0 );
|
||||
|
||||
#ifdef ID_WIN_X86_SSE2_INTRIN
|
||||
|
||||
const __m128i vector_int_clear_last = _mm_set_epi32( 0, -1, -1, -1 );
|
||||
const __m128i vector_int_num_verts = _mm_shuffle_epi32( _mm_cvtsi32_si128( numVerts ), 0 );
|
||||
@@ -482,6 +551,25 @@ static void R_CopyOverlaySurface( idDrawVert * verts, int numVerts, triIndex_t *
|
||||
|
||||
_mm_sfence();
|
||||
|
||||
#else
|
||||
|
||||
// copy vertices
|
||||
for ( int i = 0; i < overlay->numVerts; i++ ) {
|
||||
const overlayVertex_t &overlayVert = overlay->verts[i];
|
||||
|
||||
// NOTE: bad out-of-order write-combined write, SIMD code does the right thing
|
||||
verts[numVerts + i] = sourceVerts[overlayVert.vertexNum];
|
||||
verts[numVerts + i].st[0] = overlayVert.st[0];
|
||||
verts[numVerts + i].st[1] = overlayVert.st[1];
|
||||
}
|
||||
|
||||
// copy indexes
|
||||
for ( int i = 0; i < overlay->numIndexes; i += 2 ) {
|
||||
assert( overlay->indexes[i + 0] < overlay->numVerts && overlay->indexes[i + 1] < overlay->numVerts );
|
||||
WriteIndexPair( &indexes[numIndexes + i], numVerts + overlay->indexes[i + 0], numVerts + overlay->indexes[i + 1] );
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user