mirror of
https://github.com/id-Software/Quake-2.git
synced 2026-03-19 16:39:43 +01:00
The original Quake 2 sources as originally released under the GPL license on December 21, 2001.
This commit is contained in:
884
ref_soft/r_spr8.asm
Normal file
884
ref_soft/r_spr8.asm
Normal file
@@ -0,0 +1,884 @@
|
||||
.386P
|
||||
.model FLAT
|
||||
;
|
||||
; d_spr8.s
|
||||
; x86 assembly-language horizontal 8-bpp transparent span-drawing code.
|
||||
;
|
||||
|
||||
include qasm.inc
|
||||
include d_if.inc
|
||||
|
||||
if id386
|
||||
|
||||
;----------------------------------------------------------------------
|
||||
; 8-bpp horizontal span drawing code for polygons, with transparency.
|
||||
;----------------------------------------------------------------------
|
||||
|
||||
_TEXT SEGMENT
|
||||
|
||||
; out-of-line, rarely-needed clamping code
|
||||
|
||||
LClampHigh0:
|
||||
mov esi,ds:dword ptr[_bbextents]
|
||||
jmp LClampReentry0
|
||||
LClampHighOrLow0:
|
||||
jg LClampHigh0
|
||||
xor esi,esi
|
||||
jmp LClampReentry0
|
||||
|
||||
LClampHigh1:
|
||||
mov edx,ds:dword ptr[_bbextentt]
|
||||
jmp LClampReentry1
|
||||
LClampHighOrLow1:
|
||||
jg LClampHigh1
|
||||
xor edx,edx
|
||||
jmp LClampReentry1
|
||||
|
||||
LClampLow2:
|
||||
mov ebp,2048
|
||||
jmp LClampReentry2
|
||||
LClampHigh2:
|
||||
mov ebp,ds:dword ptr[_bbextents]
|
||||
jmp LClampReentry2
|
||||
|
||||
LClampLow3:
|
||||
mov ecx,2048
|
||||
jmp LClampReentry3
|
||||
LClampHigh3:
|
||||
mov ecx,ds:dword ptr[_bbextentt]
|
||||
jmp LClampReentry3
|
||||
|
||||
LClampLow4:
|
||||
mov eax,2048
|
||||
jmp LClampReentry4
|
||||
LClampHigh4:
|
||||
mov eax,ds:dword ptr[_bbextents]
|
||||
jmp LClampReentry4
|
||||
|
||||
LClampLow5:
|
||||
mov ebx,2048
|
||||
jmp LClampReentry5
|
||||
LClampHigh5:
|
||||
mov ebx,ds:dword ptr[_bbextentt]
|
||||
jmp LClampReentry5
|
||||
|
||||
|
||||
pspans equ 4+16
|
||||
|
||||
align 4
|
||||
public _D_SpriteDrawSpansXXX
|
||||
_D_SpriteDrawSpansXXX:
|
||||
push ebp ; preserve caller's stack frame
|
||||
push edi
|
||||
push esi ; preserve register variables
|
||||
push ebx
|
||||
|
||||
;
|
||||
; set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
|
||||
; and span list pointers, and 1/z step in 0.32 fixed-point
|
||||
;
|
||||
; FIXME: any overlap from rearranging?
|
||||
fld ds:dword ptr[_d_sdivzstepu]
|
||||
fmul ds:dword ptr[fp_8]
|
||||
mov edx,ds:dword ptr[_cacheblock]
|
||||
fld ds:dword ptr[_d_tdivzstepu]
|
||||
fmul ds:dword ptr[fp_8]
|
||||
mov ebx,ds:dword ptr[pspans+esp] ; point to the first span descriptor
|
||||
fld ds:dword ptr[_d_zistepu]
|
||||
fmul ds:dword ptr[fp_8]
|
||||
mov ds:dword ptr[pbase],edx ; pbase = cacheblock
|
||||
fld ds:dword ptr[_d_zistepu]
|
||||
fmul ds:dword ptr[fp_64kx64k]
|
||||
fxch st(3)
|
||||
fstp ds:dword ptr[sdivz8stepu]
|
||||
fstp ds:dword ptr[zi8stepu]
|
||||
fstp ds:dword ptr[tdivz8stepu]
|
||||
fistp ds:dword ptr[izistep]
|
||||
mov eax,ds:dword ptr[izistep]
|
||||
ror eax,16 ; put upper 16 bits in low word
|
||||
mov ecx,ds:dword ptr[sspan_t_count+ebx]
|
||||
mov ds:dword ptr[izistep],eax
|
||||
|
||||
cmp ecx,0
|
||||
jle LNextSpan
|
||||
|
||||
LSpanLoop:
|
||||
|
||||
;
|
||||
; set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
|
||||
; initial s and t values
|
||||
;
|
||||
; FIXME: pipeline FILD?
|
||||
fild ds:dword ptr[sspan_t_v+ebx]
|
||||
fild ds:dword ptr[sspan_t_u+ebx]
|
||||
|
||||
fld st(1) ; dv | du | dv
|
||||
fmul ds:dword ptr[_d_sdivzstepv] ; dv*d_sdivzstepv | du | dv
|
||||
fld st(1) ; du | dv*d_sdivzstepv | du | dv
|
||||
fmul ds:dword ptr[_d_sdivzstepu] ; du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
|
||||
fld st(2) ; du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
|
||||
fmul ds:dword ptr[_d_tdivzstepu] ; du*d_tdivzstepu | du*d_sdivzstepu |
|
||||
; dv*d_sdivzstepv | du | dv
|
||||
fxch st(1) ; du*d_sdivzstepu | du*d_tdivzstepu |
|
||||
; dv*d_sdivzstepv | du | dv
|
||||
faddp st(2),st(0) ; du*d_tdivzstepu |
|
||||
; du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
|
||||
fxch st(1) ; du*d_sdivzstepu + dv*d_sdivzstepv |
|
||||
; du*d_tdivzstepu | du | dv
|
||||
fld st(3) ; dv | du*d_sdivzstepu + dv*d_sdivzstepv |
|
||||
; du*d_tdivzstepu | du | dv
|
||||
fmul ds:dword ptr[_d_tdivzstepv] ; dv*d_tdivzstepv |
|
||||
; du*d_sdivzstepu + dv*d_sdivzstepv |
|
||||
; du*d_tdivzstepu | du | dv
|
||||
fxch st(1) ; du*d_sdivzstepu + dv*d_sdivzstepv |
|
||||
; dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
|
||||
fadd ds:dword ptr[_d_sdivzorigin] ; sdivz = d_sdivzorigin + dv*d_sdivzstepv +
|
||||
; du*d_sdivzstepu; stays in %st(2) at end
|
||||
fxch st(4) ; dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
|
||||
; s/z
|
||||
fmul ds:dword ptr[_d_zistepv] ; dv*d_zistepv | dv*d_tdivzstepv |
|
||||
; du*d_tdivzstepu | du | s/z
|
||||
fxch st(1) ; dv*d_tdivzstepv | dv*d_zistepv |
|
||||
; du*d_tdivzstepu | du | s/z
|
||||
faddp st(2),st(0) ; dv*d_zistepv |
|
||||
; dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
|
||||
fxch st(2) ; du | dv*d_tdivzstepv + du*d_tdivzstepu |
|
||||
; dv*d_zistepv | s/z
|
||||
fmul ds:dword ptr[_d_zistepu] ; du*d_zistepu |
|
||||
; dv*d_tdivzstepv + du*d_tdivzstepu |
|
||||
; dv*d_zistepv | s/z
|
||||
fxch st(1) ; dv*d_tdivzstepv + du*d_tdivzstepu |
|
||||
; du*d_zistepu | dv*d_zistepv | s/z
|
||||
fadd ds:dword ptr[_d_tdivzorigin] ; tdivz = d_tdivzorigin + dv*d_tdivzstepv +
|
||||
; du*d_tdivzstepu; stays in %st(1) at end
|
||||
fxch st(2) ; dv*d_zistepv | du*d_zistepu | t/z | s/z
|
||||
faddp st(1),st(0) ; dv*d_zistepv + du*d_zistepu | t/z | s/z
|
||||
|
||||
fld ds:dword ptr[fp_64k] ; fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
|
||||
fxch st(1) ; dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
|
||||
fadd ds:dword ptr[_d_ziorigin] ; zi = d_ziorigin + dv*d_zistepv +
|
||||
; du*d_zistepu; stays in %st(0) at end
|
||||
; 1/z | fp_64k | t/z | s/z
|
||||
|
||||
fld st(0) ; FIXME: get rid of stall on FMUL?
|
||||
fmul ds:dword ptr[fp_64kx64k]
|
||||
fxch st(1)
|
||||
|
||||
;
|
||||
; calculate and clamp s & t
|
||||
;
|
||||
fdiv st(2),st(0) ; 1/z | z*64k | t/z | s/z
|
||||
fxch st(1)
|
||||
|
||||
fistp ds:dword ptr[izi] ; 0.32 fixed-point 1/z
|
||||
mov ebp,ds:dword ptr[izi]
|
||||
|
||||
;
|
||||
; set pz to point to the first z-buffer pixel in the span
|
||||
;
|
||||
ror ebp,16 ; put upper 16 bits in low word
|
||||
mov eax,ds:dword ptr[sspan_t_v+ebx]
|
||||
mov ds:dword ptr[izi],ebp
|
||||
mov ebp,ds:dword ptr[sspan_t_u+ebx]
|
||||
imul ds:dword ptr[_d_zrowbytes]
|
||||
shl ebp,1 ; a word per pixel
|
||||
add eax,ds:dword ptr[_d_pzbuffer]
|
||||
add eax,ebp
|
||||
mov ds:dword ptr[pz],eax
|
||||
|
||||
;
|
||||
; point %edi to the first pixel in the span
|
||||
;
|
||||
mov ebp,ds:dword ptr[_d_viewbuffer]
|
||||
mov eax,ds:dword ptr[sspan_t_v+ebx]
|
||||
push ebx ; preserve spans pointer
|
||||
mov edx,ds:dword ptr[_tadjust]
|
||||
mov esi,ds:dword ptr[_sadjust]
|
||||
mov edi,ds:dword ptr[_d_scantable+eax*4] ; v * screenwidth
|
||||
add edi,ebp
|
||||
mov ebp,ds:dword ptr[sspan_t_u+ebx]
|
||||
add edi,ebp ; pdest = &pdestspan[scans->u];
|
||||
|
||||
;
|
||||
; now start the FDIV for the end of the span
|
||||
;
|
||||
cmp ecx,8
|
||||
ja LSetupNotLast1
|
||||
|
||||
dec ecx
|
||||
jz LCleanup1 ; if only one pixel, no need to start an FDIV
|
||||
mov ds:dword ptr[spancountminus1],ecx
|
||||
|
||||
; finish up the s and t calcs
|
||||
fxch st(1) ; z*64k | 1/z | t/z | s/z
|
||||
|
||||
fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
|
||||
fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
|
||||
fxch st(1) ; z*64k | s | 1/z | t/z | s/z
|
||||
fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
|
||||
fxch st(1) ; s | t | 1/z | t/z | s/z
|
||||
fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
|
||||
fistp ds:dword ptr[t] ; 1/z | t/z | s/z
|
||||
|
||||
fild ds:dword ptr[spancountminus1]
|
||||
|
||||
fld ds:dword ptr[_d_tdivzstepu] ; _d_tdivzstepu | spancountminus1
|
||||
fld ds:dword ptr[_d_zistepu] ; _d_zistepu | _d_tdivzstepu | spancountminus1
|
||||
fmul st(0),st(2) ; _d_zistepu*scm1 | _d_tdivzstepu | scm1
|
||||
fxch st(1) ; _d_tdivzstepu | _d_zistepu*scm1 | scm1
|
||||
fmul st(0),st(2) ; _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
|
||||
fxch st(2) ; scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
|
||||
fmul ds:dword ptr[_d_sdivzstepu] ; _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
|
||||
; _d_tdivzstepu*scm1
|
||||
fxch st(1) ; _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
|
||||
; _d_tdivzstepu*scm1
|
||||
faddp st(3),st(0) ; _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
|
||||
fxch st(1) ; _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
|
||||
faddp st(3),st(0) ; _d_sdivzstepu*scm1
|
||||
faddp st(3),st(0)
|
||||
|
||||
fld ds:dword ptr[fp_64k]
|
||||
fdiv st(0),st(1) ; this is what we've gone to all this trouble to
|
||||
; overlap
|
||||
jmp LFDIVInFlight1
|
||||
|
||||
LCleanup1:
|
||||
; finish up the s and t calcs
|
||||
fxch st(1) ; z*64k | 1/z | t/z | s/z
|
||||
|
||||
fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
|
||||
fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
|
||||
fxch st(1) ; z*64k | s | 1/z | t/z | s/z
|
||||
fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
|
||||
fxch st(1) ; s | t | 1/z | t/z | s/z
|
||||
fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
|
||||
fistp ds:dword ptr[t] ; 1/z | t/z | s/z
|
||||
jmp LFDIVInFlight1
|
||||
|
||||
align 4
|
||||
LSetupNotLast1:
|
||||
; finish up the s and t calcs
|
||||
fxch st(1) ; z*64k | 1/z | t/z | s/z
|
||||
|
||||
fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
|
||||
fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
|
||||
fxch st(1) ; z*64k | s | 1/z | t/z | s/z
|
||||
fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
|
||||
fxch st(1) ; s | t | 1/z | t/z | s/z
|
||||
fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
|
||||
fistp ds:dword ptr[t] ; 1/z | t/z | s/z
|
||||
|
||||
fadd ds:dword ptr[zi8stepu]
|
||||
fxch st(2)
|
||||
fadd ds:dword ptr[sdivz8stepu]
|
||||
fxch st(2)
|
||||
fld ds:dword ptr[tdivz8stepu]
|
||||
faddp st(2),st(0)
|
||||
fld ds:dword ptr[fp_64k]
|
||||
fdiv st(0),st(1) ; z = 1/1/z
|
||||
; this is what we've gone to all this trouble to
|
||||
; overlap
|
||||
LFDIVInFlight1:
|
||||
|
||||
add esi,ds:dword ptr[s]
|
||||
add edx,ds:dword ptr[t]
|
||||
mov ebx,ds:dword ptr[_bbextents]
|
||||
mov ebp,ds:dword ptr[_bbextentt]
|
||||
cmp esi,ebx
|
||||
ja LClampHighOrLow0
|
||||
LClampReentry0:
|
||||
mov ds:dword ptr[s],esi
|
||||
mov ebx,ds:dword ptr[pbase]
|
||||
shl esi,16
|
||||
cmp edx,ebp
|
||||
mov ds:dword ptr[sfracf],esi
|
||||
ja LClampHighOrLow1
|
||||
LClampReentry1:
|
||||
mov ds:dword ptr[t],edx
|
||||
mov esi,ds:dword ptr[s] ; sfrac = scans->sfrac;
|
||||
shl edx,16
|
||||
mov eax,ds:dword ptr[t] ; tfrac = scans->tfrac;
|
||||
sar esi,16
|
||||
mov ds:dword ptr[tfracf],edx
|
||||
|
||||
;
|
||||
; calculate the texture starting address
|
||||
;
|
||||
sar eax,16
|
||||
add esi,ebx
|
||||
imul eax,ds:dword ptr[_cachewidth] ; (tfrac >> 16) * cachewidth
|
||||
add esi,eax ; psource = pbase + (sfrac >> 16) +
|
||||
; ((tfrac >> 16) * cachewidth);
|
||||
|
||||
;
|
||||
; determine whether last span or not
|
||||
;
|
||||
cmp ecx,8
|
||||
jna LLastSegment
|
||||
|
||||
;
|
||||
; not the last segment; do full 8-wide segment
|
||||
;
|
||||
LNotLastSegment:
|
||||
|
||||
;
|
||||
; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
|
||||
; get there
|
||||
;
|
||||
|
||||
; pick up after the FDIV that was left in flight previously
|
||||
|
||||
fld st(0) ; duplicate it
|
||||
fmul st(0),st(4) ; s = s/z * z
|
||||
fxch st(1)
|
||||
fmul st(0),st(3) ; t = t/z * z
|
||||
fxch st(1)
|
||||
fistp ds:dword ptr[snext]
|
||||
fistp ds:dword ptr[tnext]
|
||||
mov eax,ds:dword ptr[snext]
|
||||
mov edx,ds:dword ptr[tnext]
|
||||
|
||||
sub ecx,8 ; count off this segments' pixels
|
||||
mov ebp,ds:dword ptr[_sadjust]
|
||||
push ecx ; remember count of remaining pixels
|
||||
mov ecx,ds:dword ptr[_tadjust]
|
||||
|
||||
add ebp,eax
|
||||
add ecx,edx
|
||||
|
||||
mov eax,ds:dword ptr[_bbextents]
|
||||
mov edx,ds:dword ptr[_bbextentt]
|
||||
|
||||
cmp ebp,2048
|
||||
jl LClampLow2
|
||||
cmp ebp,eax
|
||||
ja LClampHigh2
|
||||
LClampReentry2:
|
||||
|
||||
cmp ecx,2048
|
||||
jl LClampLow3
|
||||
cmp ecx,edx
|
||||
ja LClampHigh3
|
||||
LClampReentry3:
|
||||
|
||||
mov ds:dword ptr[snext],ebp
|
||||
mov ds:dword ptr[tnext],ecx
|
||||
|
||||
sub ebp,ds:dword ptr[s]
|
||||
sub ecx,ds:dword ptr[t]
|
||||
|
||||
;
|
||||
; set up advancetable
|
||||
;
|
||||
mov eax,ecx
|
||||
mov edx,ebp
|
||||
sar edx,19 ; sstep >>= 16;
|
||||
mov ebx,ds:dword ptr[_cachewidth]
|
||||
sar eax,19 ; tstep >>= 16;
|
||||
jz LIsZero
|
||||
imul eax,ebx ; (tstep >> 16) * cachewidth;
|
||||
LIsZero:
|
||||
add eax,edx ; add in sstep
|
||||
; (tstep >> 16) * cachewidth + (sstep >> 16);
|
||||
mov edx,ds:dword ptr[tfracf]
|
||||
mov ds:dword ptr[advancetable+4],eax ; advance base in t
|
||||
add eax,ebx ; ((tstep >> 16) + 1) * cachewidth +
|
||||
; (sstep >> 16);
|
||||
shl ebp,13 ; left-justify sstep fractional part
|
||||
mov ds:dword ptr[sstep],ebp
|
||||
mov ebx,ds:dword ptr[sfracf]
|
||||
shl ecx,13 ; left-justify tstep fractional part
|
||||
mov ds:dword ptr[advancetable],eax ; advance extra in t
|
||||
mov ds:dword ptr[tstep],ecx
|
||||
|
||||
mov ecx,ds:dword ptr[pz]
|
||||
mov ebp,ds:dword ptr[izi]
|
||||
|
||||
cmp bp,ds:word ptr[ecx]
|
||||
jl Lp1
|
||||
mov al,ds:byte ptr[esi] ; get first source texel
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp1
|
||||
mov ds:word ptr[ecx],bp
|
||||
mov ds:byte ptr[edi],al ; store first dest pixel
|
||||
Lp1:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep] ; advance tfrac fractional part by tstep frac
|
||||
|
||||
sbb eax,eax ; turn tstep carry into -1 (0 if none)
|
||||
add ebx,ds:dword ptr[sstep] ; advance sfrac fractional part by sstep frac
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4] ; point to next source texel
|
||||
|
||||
cmp bp,ds:word ptr[2+ecx]
|
||||
jl Lp2
|
||||
mov al,ds:byte ptr[esi]
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp2
|
||||
mov ds:word ptr[2+ecx],bp
|
||||
mov ds:byte ptr[1+edi],al
|
||||
Lp2:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep]
|
||||
sbb eax,eax
|
||||
add ebx,ds:dword ptr[sstep]
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
||||
|
||||
cmp bp,ds:word ptr[4+ecx]
|
||||
jl Lp3
|
||||
mov al,ds:byte ptr[esi]
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp3
|
||||
mov ds:word ptr[4+ecx],bp
|
||||
mov ds:byte ptr[2+edi],al
|
||||
Lp3:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep]
|
||||
sbb eax,eax
|
||||
add ebx,ds:dword ptr[sstep]
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
||||
|
||||
cmp bp,ds:word ptr[6+ecx]
|
||||
jl Lp4
|
||||
mov al,ds:byte ptr[esi]
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp4
|
||||
mov ds:word ptr[6+ecx],bp
|
||||
mov ds:byte ptr[3+edi],al
|
||||
Lp4:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep]
|
||||
sbb eax,eax
|
||||
add ebx,ds:dword ptr[sstep]
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
||||
|
||||
cmp bp,ds:word ptr[8+ecx]
|
||||
jl Lp5
|
||||
mov al,ds:byte ptr[esi]
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp5
|
||||
mov ds:word ptr[8+ecx],bp
|
||||
mov ds:byte ptr[4+edi],al
|
||||
Lp5:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep]
|
||||
sbb eax,eax
|
||||
add ebx,ds:dword ptr[sstep]
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
||||
|
||||
;
|
||||
; start FDIV for end of next segment in flight, so it can overlap
|
||||
;
|
||||
pop eax
|
||||
cmp eax,8 ; more than one segment after this?
|
||||
ja LSetupNotLast2 ; yes
|
||||
|
||||
dec eax
|
||||
jz LFDIVInFlight2 ; if only one pixel, no need to start an FDIV
|
||||
mov ds:dword ptr[spancountminus1],eax
|
||||
fild ds:dword ptr[spancountminus1]
|
||||
|
||||
fld ds:dword ptr[_d_zistepu] ; _d_zistepu | spancountminus1
|
||||
fmul st(0),st(1) ; _d_zistepu*scm1 | scm1
|
||||
fld ds:dword ptr[_d_tdivzstepu] ; _d_tdivzstepu | _d_zistepu*scm1 | scm1
|
||||
fmul st(0),st(2) ; _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
|
||||
fxch st(1) ; _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
|
||||
faddp st(3),st(0) ; _d_tdivzstepu*scm1 | scm1
|
||||
fxch st(1) ; scm1 | _d_tdivzstepu*scm1
|
||||
fmul ds:dword ptr[_d_sdivzstepu] ; _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
|
||||
fxch st(1) ; _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
|
||||
faddp st(3),st(0) ; _d_sdivzstepu*scm1
|
||||
fld ds:dword ptr[fp_64k] ; 64k | _d_sdivzstepu*scm1
|
||||
fxch st(1) ; _d_sdivzstepu*scm1 | 64k
|
||||
faddp st(4),st(0) ; 64k
|
||||
|
||||
fdiv st(0),st(1) ; this is what we've gone to all this trouble to
|
||||
; overlap
|
||||
jmp LFDIVInFlight2
|
||||
|
||||
align 4
|
||||
LSetupNotLast2:
|
||||
fadd ds:dword ptr[zi8stepu]
|
||||
fxch st(2)
|
||||
fadd ds:dword ptr[sdivz8stepu]
|
||||
fxch st(2)
|
||||
fld ds:dword ptr[tdivz8stepu]
|
||||
faddp st(2),st(0)
|
||||
fld ds:dword ptr[fp_64k]
|
||||
fdiv st(0),st(1) ; z = 1/1/z
|
||||
; this is what we've gone to all this trouble to
|
||||
; overlap
|
||||
LFDIVInFlight2:
|
||||
push eax
|
||||
|
||||
cmp bp,ds:word ptr[10+ecx]
|
||||
jl Lp6
|
||||
mov al,ds:byte ptr[esi]
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp6
|
||||
mov ds:word ptr[10+ecx],bp
|
||||
mov ds:byte ptr[5+edi],al
|
||||
Lp6:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep]
|
||||
sbb eax,eax
|
||||
add ebx,ds:dword ptr[sstep]
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
||||
|
||||
cmp bp,ds:word ptr[12+ecx]
|
||||
jl Lp7
|
||||
mov al,ds:byte ptr[esi]
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp7
|
||||
mov ds:word ptr[12+ecx],bp
|
||||
mov ds:byte ptr[6+edi],al
|
||||
Lp7:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep]
|
||||
sbb eax,eax
|
||||
add ebx,ds:dword ptr[sstep]
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
||||
|
||||
cmp bp,ds:word ptr[14+ecx]
|
||||
jl Lp8
|
||||
mov al,ds:byte ptr[esi]
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp8
|
||||
mov ds:word ptr[14+ecx],bp
|
||||
mov ds:byte ptr[7+edi],al
|
||||
Lp8:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep]
|
||||
sbb eax,eax
|
||||
add ebx,ds:dword ptr[sstep]
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
||||
|
||||
add edi,8
|
||||
add ecx,16
|
||||
mov ds:dword ptr[tfracf],edx
|
||||
mov edx,ds:dword ptr[snext]
|
||||
mov ds:dword ptr[sfracf],ebx
|
||||
mov ebx,ds:dword ptr[tnext]
|
||||
mov ds:dword ptr[s],edx
|
||||
mov ds:dword ptr[t],ebx
|
||||
|
||||
mov ds:dword ptr[pz],ecx
|
||||
mov ds:dword ptr[izi],ebp
|
||||
|
||||
pop ecx ; retrieve count
|
||||
|
||||
;
|
||||
; determine whether last span or not
|
||||
;
|
||||
cmp ecx,8 ; are there multiple segments remaining?
|
||||
ja LNotLastSegment ; yes
|
||||
|
||||
;
|
||||
; last segment of scan
|
||||
;
|
||||
LLastSegment:
|
||||
|
||||
;
|
||||
; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
|
||||
; get there. The number of pixels left is variable, and we want to land on the
|
||||
; last pixel, not step one past it, so we can't run into arithmetic problems
|
||||
;
|
||||
test ecx,ecx
|
||||
jz LNoSteps ; just draw the last pixel and we're done
|
||||
|
||||
; pick up after the FDIV that was left in flight previously
|
||||
|
||||
|
||||
fld st(0) ; duplicate it
|
||||
fmul st(0),st(4) ; s = s/z * z
|
||||
fxch st(1)
|
||||
fmul st(0),st(3) ; t = t/z * z
|
||||
fxch st(1)
|
||||
fistp ds:dword ptr[snext]
|
||||
fistp ds:dword ptr[tnext]
|
||||
|
||||
mov ebx,ds:dword ptr[_tadjust]
|
||||
mov eax,ds:dword ptr[_sadjust]
|
||||
|
||||
add eax,ds:dword ptr[snext]
|
||||
add ebx,ds:dword ptr[tnext]
|
||||
|
||||
mov ebp,ds:dword ptr[_bbextents]
|
||||
mov edx,ds:dword ptr[_bbextentt]
|
||||
|
||||
cmp eax,2048
|
||||
jl LClampLow4
|
||||
cmp eax,ebp
|
||||
ja LClampHigh4
|
||||
LClampReentry4:
|
||||
mov ds:dword ptr[snext],eax
|
||||
|
||||
cmp ebx,2048
|
||||
jl LClampLow5
|
||||
cmp ebx,edx
|
||||
ja LClampHigh5
|
||||
LClampReentry5:
|
||||
|
||||
cmp ecx,1 ; don't bother
|
||||
je LOnlyOneStep ; if two pixels in segment, there's only one step,
|
||||
; of the segment length
|
||||
sub eax,ds:dword ptr[s]
|
||||
sub ebx,ds:dword ptr[t]
|
||||
|
||||
add eax,eax ; convert to 15.17 format so multiply by 1.31
|
||||
add ebx,ebx ; reciprocal yields 16.48
|
||||
imul ds:dword ptr[reciprocal_table-8+ecx*4] ; sstep = (snext - s) / (spancount-1)
|
||||
mov ebp,edx
|
||||
|
||||
mov eax,ebx
|
||||
imul ds:dword ptr[reciprocal_table-8+ecx*4] ; tstep = (tnext - t) / (spancount-1)
|
||||
|
||||
LSetEntryvec:
|
||||
;
|
||||
; set up advancetable
|
||||
;
|
||||
mov ebx,ds:dword ptr[spr8entryvec_table+ecx*4]
|
||||
mov eax,edx
|
||||
push ebx ; entry point into code for RET later
|
||||
mov ecx,ebp
|
||||
sar ecx,16 ; sstep >>= 16;
|
||||
mov ebx,ds:dword ptr[_cachewidth]
|
||||
sar edx,16 ; tstep >>= 16;
|
||||
jz LIsZeroLast
|
||||
imul edx,ebx ; (tstep >> 16) * cachewidth;
|
||||
LIsZeroLast:
|
||||
add edx,ecx ; add in sstep
|
||||
; (tstep >> 16) * cachewidth + (sstep >> 16);
|
||||
mov ecx,ds:dword ptr[tfracf]
|
||||
mov ds:dword ptr[advancetable+4],edx ; advance base in t
|
||||
add edx,ebx ; ((tstep >> 16) + 1) * cachewidth +
|
||||
; (sstep >> 16);
|
||||
shl ebp,16 ; left-justify sstep fractional part
|
||||
mov ebx,ds:dword ptr[sfracf]
|
||||
shl eax,16 ; left-justify tstep fractional part
|
||||
mov ds:dword ptr[advancetable],edx ; advance extra in t
|
||||
|
||||
mov ds:dword ptr[tstep],eax
|
||||
mov ds:dword ptr[sstep],ebp
|
||||
mov edx,ecx
|
||||
|
||||
mov ecx,ds:dword ptr[pz]
|
||||
mov ebp,ds:dword ptr[izi]
|
||||
|
||||
ret ; jump to the number-of-pixels handler
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
LNoSteps:
|
||||
mov ecx,ds:dword ptr[pz]
|
||||
sub edi,7 ; adjust for hardwired offset
|
||||
sub ecx,14
|
||||
jmp LEndSpan
|
||||
|
||||
|
||||
LOnlyOneStep:
|
||||
sub eax,ds:dword ptr[s]
|
||||
sub ebx,ds:dword ptr[t]
|
||||
mov ebp,eax
|
||||
mov edx,ebx
|
||||
jmp LSetEntryvec
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
public Spr8Entry2_8
|
||||
Spr8Entry2_8:
|
||||
sub edi,6 ; adjust for hardwired offsets
|
||||
sub ecx,12
|
||||
mov al,ds:byte ptr[esi]
|
||||
jmp LLEntry2_8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
public Spr8Entry3_8
|
||||
Spr8Entry3_8:
|
||||
sub edi,5 ; adjust for hardwired offsets
|
||||
sub ecx,10
|
||||
jmp LLEntry3_8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
public Spr8Entry4_8
|
||||
Spr8Entry4_8:
|
||||
sub edi,4 ; adjust for hardwired offsets
|
||||
sub ecx,8
|
||||
jmp LLEntry4_8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
public Spr8Entry5_8
|
||||
Spr8Entry5_8:
|
||||
sub edi,3 ; adjust for hardwired offsets
|
||||
sub ecx,6
|
||||
jmp LLEntry5_8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
public Spr8Entry6_8
|
||||
Spr8Entry6_8:
|
||||
sub edi,2 ; adjust for hardwired offsets
|
||||
sub ecx,4
|
||||
jmp LLEntry6_8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
public Spr8Entry7_8
|
||||
Spr8Entry7_8:
|
||||
dec edi ; adjust for hardwired offsets
|
||||
sub ecx,2
|
||||
jmp LLEntry7_8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
public Spr8Entry8_8
|
||||
Spr8Entry8_8:
|
||||
cmp bp,ds:word ptr[ecx]
|
||||
jl Lp9
|
||||
mov al,ds:byte ptr[esi]
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp9
|
||||
mov ds:word ptr[ecx],bp
|
||||
mov ds:byte ptr[edi],al
|
||||
Lp9:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep]
|
||||
sbb eax,eax
|
||||
add ebx,ds:dword ptr[sstep]
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
||||
LLEntry7_8:
|
||||
cmp bp,ds:word ptr[2+ecx]
|
||||
jl Lp10
|
||||
mov al,ds:byte ptr[esi]
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp10
|
||||
mov ds:word ptr[2+ecx],bp
|
||||
mov ds:byte ptr[1+edi],al
|
||||
Lp10:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep]
|
||||
sbb eax,eax
|
||||
add ebx,ds:dword ptr[sstep]
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
||||
LLEntry6_8:
|
||||
cmp bp,ds:word ptr[4+ecx]
|
||||
jl Lp11
|
||||
mov al,ds:byte ptr[esi]
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp11
|
||||
mov ds:word ptr[4+ecx],bp
|
||||
mov ds:byte ptr[2+edi],al
|
||||
Lp11:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep]
|
||||
sbb eax,eax
|
||||
add ebx,ds:dword ptr[sstep]
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
||||
LLEntry5_8:
|
||||
cmp bp,ds:word ptr[6+ecx]
|
||||
jl Lp12
|
||||
mov al,ds:byte ptr[esi]
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp12
|
||||
mov ds:word ptr[6+ecx],bp
|
||||
mov ds:byte ptr[3+edi],al
|
||||
Lp12:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep]
|
||||
sbb eax,eax
|
||||
add ebx,ds:dword ptr[sstep]
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
||||
LLEntry4_8:
|
||||
cmp bp,ds:word ptr[8+ecx]
|
||||
jl Lp13
|
||||
mov al,ds:byte ptr[esi]
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp13
|
||||
mov ds:word ptr[8+ecx],bp
|
||||
mov ds:byte ptr[4+edi],al
|
||||
Lp13:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep]
|
||||
sbb eax,eax
|
||||
add ebx,ds:dword ptr[sstep]
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
||||
LLEntry3_8:
|
||||
cmp bp,ds:word ptr[10+ecx]
|
||||
jl Lp14
|
||||
mov al,ds:byte ptr[esi]
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp14
|
||||
mov ds:word ptr[10+ecx],bp
|
||||
mov ds:byte ptr[5+edi],al
|
||||
Lp14:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep]
|
||||
sbb eax,eax
|
||||
add ebx,ds:dword ptr[sstep]
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
||||
LLEntry2_8:
|
||||
cmp bp,ds:word ptr[12+ecx]
|
||||
jl Lp15
|
||||
mov al,ds:byte ptr[esi]
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp15
|
||||
mov ds:word ptr[12+ecx],bp
|
||||
mov ds:byte ptr[6+edi],al
|
||||
Lp15:
|
||||
add ebp,ds:dword ptr[izistep]
|
||||
adc ebp,0
|
||||
add edx,ds:dword ptr[tstep]
|
||||
sbb eax,eax
|
||||
add ebx,ds:dword ptr[sstep]
|
||||
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
||||
|
||||
LEndSpan:
|
||||
cmp bp,ds:word ptr[14+ecx]
|
||||
jl Lp16
|
||||
mov al,ds:byte ptr[esi] ; load first texel in segment
|
||||
cmp al,offset TRANSPARENT_COLOR
|
||||
jz Lp16
|
||||
mov ds:word ptr[14+ecx],bp
|
||||
mov ds:byte ptr[7+edi],al
|
||||
Lp16:
|
||||
|
||||
;
|
||||
; clear s/z, t/z, 1/z from FP stack
|
||||
;
|
||||
fstp st(0)
|
||||
fstp st(0)
|
||||
fstp st(0)
|
||||
|
||||
pop ebx ; restore spans pointer
|
||||
LNextSpan:
|
||||
add ebx,offset sspan_t_size ; point to next span
|
||||
mov ecx,ds:dword ptr[sspan_t_count+ebx]
|
||||
cmp ecx,0 ; any more spans?
|
||||
jg LSpanLoop ; yes
|
||||
jz LNextSpan ; yes, but this one's empty
|
||||
|
||||
pop ebx ; restore register variables
|
||||
pop esi
|
||||
pop edi
|
||||
pop ebp ; restore the caller's stack frame
|
||||
ret
|
||||
|
||||
_TEXT ENDS
|
||||
endif ; id386
|
||||
END
|
||||
Reference in New Issue
Block a user