Restored generic (non-SIMD) code

This commit is contained in:
Brian Harris
2013-05-29 13:12:13 -05:00
parent be311f42e1
commit 9c37079c16
23 changed files with 3328 additions and 24 deletions

View File

@@ -102,6 +102,7 @@ static void R_OverlayPointCullStatic( byte * cullBits, halfFloat_t * texCoordS,
assert_16_byte_aligned( texCoordT );
assert_16_byte_aligned( verts );
#ifdef ID_WIN_X86_SSE2_INTRIN
idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts );
@@ -176,6 +177,39 @@ static void R_OverlayPointCullStatic( byte * cullBits, halfFloat_t * texCoordS,
}
}
#else
idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 1 > vertsODS( verts, numVerts );
for ( int i = 0; i < numVerts; ) {
const int nextNumVerts = vertsODS.FetchNextBatch() - 1;
for ( ; i <= nextNumVerts; i++ ) {
const idVec3 & v = vertsODS[i].xyz;
const float d0 = planes[0].Distance( v );
const float d1 = planes[1].Distance( v );
const float d2 = 1.0f - d0;
const float d3 = 1.0f - d1;
halfFloat_t s = Scalar_FastF32toF16( d0 );
halfFloat_t t = Scalar_FastF32toF16( d1 );
texCoordS[i] = s;
texCoordT[i] = t;
byte bits;
bits = IEEE_FLT_SIGNBITSET( d0 ) << 0;
bits |= IEEE_FLT_SIGNBITSET( d1 ) << 1;
bits |= IEEE_FLT_SIGNBITSET( d2 ) << 2;
bits |= IEEE_FLT_SIGNBITSET( d3 ) << 3;
cullBits[i] = bits;
}
}
#endif
}
/*
@@ -189,6 +223,7 @@ static void R_OverlayPointCullSkinned( byte * cullBits, halfFloat_t * texCoordS,
assert_16_byte_aligned( texCoordT );
assert_16_byte_aligned( verts );
#ifdef ID_WIN_X86_SSE2_INTRIN
idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts );
@@ -263,6 +298,39 @@ static void R_OverlayPointCullSkinned( byte * cullBits, halfFloat_t * texCoordS,
}
}
#else
idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 1 > vertsODS( verts, numVerts );
for ( int i = 0; i < numVerts; ) {
const int nextNumVerts = vertsODS.FetchNextBatch() - 1;
for ( ; i <= nextNumVerts; i++ ) {
const idVec3 transformed = Scalar_LoadSkinnedDrawVertPosition( vertsODS[i], joints );
const float d0 = planes[0].Distance( transformed );
const float d1 = planes[1].Distance( transformed );
const float d2 = 1.0f - d0;
const float d3 = 1.0f - d1;
halfFloat_t s = Scalar_FastF32toF16( d0 );
halfFloat_t t = Scalar_FastF32toF16( d1 );
texCoordS[i] = s;
texCoordT[i] = t;
byte bits;
bits = IEEE_FLT_SIGNBITSET( d0 ) << 0;
bits |= IEEE_FLT_SIGNBITSET( d1 ) << 1;
bits |= IEEE_FLT_SIGNBITSET( d2 ) << 2;
bits |= IEEE_FLT_SIGNBITSET( d3 ) << 3;
cullBits[i] = bits;
}
}
#endif
}
/*
@@ -446,6 +514,7 @@ static void R_CopyOverlaySurface( idDrawVert * verts, int numVerts, triIndex_t *
assert( ( ( overlay->numVerts * sizeof( idDrawVert ) ) & 15 ) == 0 );
assert( ( ( overlay->numIndexes * sizeof( triIndex_t ) ) & 15 ) == 0 );
#ifdef ID_WIN_X86_SSE2_INTRIN
const __m128i vector_int_clear_last = _mm_set_epi32( 0, -1, -1, -1 );
const __m128i vector_int_num_verts = _mm_shuffle_epi32( _mm_cvtsi32_si128( numVerts ), 0 );
@@ -482,6 +551,25 @@ static void R_CopyOverlaySurface( idDrawVert * verts, int numVerts, triIndex_t *
_mm_sfence();
#else
// copy vertices
for ( int i = 0; i < overlay->numVerts; i++ ) {
const overlayVertex_t &overlayVert = overlay->verts[i];
// NOTE: bad out-of-order write-combined write, SIMD code does the right thing
verts[numVerts + i] = sourceVerts[overlayVert.vertexNum];
verts[numVerts + i].st[0] = overlayVert.st[0];
verts[numVerts + i].st[1] = overlayVert.st[1];
}
// copy indexes
for ( int i = 0; i < overlay->numIndexes; i += 2 ) {
assert( overlay->indexes[i + 0] < overlay->numVerts && overlay->indexes[i + 1] < overlay->numVerts );
WriteIndexPair( &indexes[numIndexes + i], numVerts + overlay->indexes[i + 0], numVerts + overlay->indexes[i + 1] );
}
#endif
}
/*