shithub: qk1

Download patch

ref: 47eca22c0a296502f838d1b3676b0cd8f5ba57b9
parent: 83d4cf3fd1a257df21468f5bb3b4329e5cf8a40f
author: Konstantinn Bonnet <[email protected]>
date: Wed Jan 28 15:52:07 EST 2015

remove asm, not used on amd64 anyway

--- a/asm_draw.h
+++ /dev/null
@@ -1,132 +1,0 @@
-//
-// asm_draw.h
-//
-// Include file for asm drawing routines.
-//
-
-//
-// !!! note that this file must match the corresponding C structures at all
-// times !!!
-//
-
-// !!! if this is changed, it must be changed in r_local.h too !!!
-#define	NEAR_CLIP	0.01
-
-// !!! if this is changed, it must be changed in r_local.h too !!!
-#define	CYCLE	128
-
-// espan_t structure
-// !!! if this is changed, it must be changed in r_shared.h too !!!
-#define espan_t_u    	0
-#define espan_t_v	    4
-#define espan_t_count   8
-#define espan_t_pnext	12
-#define espan_t_size    16
-
-// sspan_t structure
-// !!! if this is changed, it must be changed in d_local.h too !!!
-#define sspan_t_u    	0
-#define sspan_t_v	    4
-#define sspan_t_count   8
-#define sspan_t_size    12
-
-// spanpackage_t structure
-// !!! if this is changed, it must be changed in d_polyset.c too !!!
-#define spanpackage_t_pdest				0
-#define spanpackage_t_pz				4
-#define spanpackage_t_count				8
-#define spanpackage_t_ptex				12
-#define spanpackage_t_sfrac				16
-#define spanpackage_t_tfrac				20
-#define spanpackage_t_light				24
-#define spanpackage_t_zi				28
-#define spanpackage_t_size				32 
-
-// edge_t structure
-// !!! if this is changed, it must be changed in r_shared.h too !!!
-#define et_u			0
-#define et_u_step		4
-#define et_prev			8
-#define et_next			12
-#define et_surfs		16
-#define et_nextremove	20
-#define et_nearzi		24
-#define et_owner		28
-#define et_size			32
-
-// surf_t structure
-// !!! if this is changed, it must be changed in r_shared.h too !!!
-#define SURF_T_SHIFT	6
-#define st_next			0
-#define st_prev			4
-#define st_spans		8
-#define st_key			12
-#define st_last_u		16
-#define st_spanstate	20
-#define st_flags		24
-#define st_data			28
-#define st_entity		32
-#define st_nearzi		36
-#define st_insubmodel	40
-#define st_d_ziorigin	44
-#define st_d_zistepu	48
-#define st_d_zistepv	52
-#define st_pad			56
-#define st_size			64
-
-// clipplane_t structure
-// !!! if this is changed, it must be changed in r_local.h too !!!
-#define cp_normal		0
-#define cp_dist			12
-#define cp_next			16
-#define cp_leftedge		20
-#define cp_rightedge	21
-#define cp_reserved		22
-#define cp_size			24
-
-// medge_t structure
-// !!! if this is changed, it must be changed in model.h too !!!
-#define me_v				0
-#define me_cachededgeoffset	4
-#define me_size				8
-
-// mvertex_t structure
-// !!! if this is changed, it must be changed in model.h too !!!
-#define mv_position		0
-#define mv_size			12
-
-// refdef_t structure
-// !!! if this is changed, it must be changed in render.h too !!!
-#define rd_vrect					0
-#define rd_aliasvrect				20
-#define rd_vrectright				40
-#define rd_vrectbottom				44
-#define rd_aliasvrectright			48
-#define rd_aliasvrectbottom			52
-#define rd_vrectrightedge			56
-#define rd_fvrectx					60
-#define rd_fvrecty					64
-#define rd_fvrectx_adj				68
-#define rd_fvrecty_adj				72
-#define rd_vrect_x_adj_shift20		76
-#define rd_vrectright_adj_shift20	80
-#define rd_fvrectright_adj			84
-#define rd_fvrectbottom_adj			88
-#define rd_fvrectright				92
-#define rd_fvrectbottom				96
-#define rd_horizontalFieldOfView	100
-#define rd_xOrigin					104
-#define rd_yOrigin					108
-#define rd_vieworg					112
-#define rd_viewangles				124
-#define rd_ambientlight				136
-#define rd_size						140
-
-// mtriangle_t structure
-// !!! if this is changed, it must be changed in model.h too !!!
-#define mtri_facesfront		0
-#define mtri_vertindex		4
-#define mtri_size			16	// !!! if this changes, array indexing in !!!
-								// !!! d_polysa.s must be changed to match !!!
-#define mtri_shift			4
-
--- a/asm_i386.h
+++ /dev/null
@@ -1,78 +1,0 @@
-#ifndef __ASM_I386__
-#define __ASM_I386__
-
-#ifdef ELF
-#define C(label) label
-#endif
-#ifndef ELF
-#define C(label) _##label
-#endif
-
-//
-// !!! note that this file must match the corresponding C structures at all
-// times !!!
-//
-
-// plane_t structure
-// !!! if this is changed, it must be changed in model.h too !!!
-// !!! if the size of this is changed, the array lookup in SV_HullPointContents
-//     must be changed too !!!
-#define pl_normal	0
-#define pl_dist		12
-#define pl_type		16
-#define pl_signbits	17
-#define pl_pad		18
-#define pl_size		20
-
-// hull_t structure
-// !!! if this is changed, it must be changed in model.h too !!!
-#define	hu_clipnodes		0
-#define	hu_planes			4
-#define	hu_firstclipnode	8
-#define	hu_lastclipnode		12
-#define	hu_clip_mins		16
-#define	hu_clip_maxs		28
-#define hu_size  			40
-
-// dnode_t structure
-// !!! if this is changed, it must be changed in bspfile.h too !!!
-#define	nd_planenum		0
-#define	nd_children		4
-#define	nd_mins			8
-#define	nd_maxs			20
-#define	nd_firstface	32
-#define	nd_numfaces		36
-#define nd_size			40
-
-// sfxcache_t structure
-// !!! if this is changed, it much be changed in sound.h too !!!
-#define sfxc_length		0
-#define sfxc_loopstart	4
-#define sfxc_speed		8
-#define sfxc_width		12
-#define sfxc_stereo		16
-#define sfxc_data		20
-
-// channel_t structure
-// !!! if this is changed, it much be changed in sound.h too !!!
-#define ch_sfx			0
-#define ch_leftvol		4
-#define ch_rightvol		8
-#define ch_end			12
-#define ch_pos			16
-#define ch_looping		20
-#define ch_entnum		24
-#define ch_entchannel	28
-#define ch_origin		32
-#define ch_dist_mult	44
-#define ch_master_vol	48
-#define ch_size			52
-
-// portable_samplepair_t structure
-// !!! if this is changed, it much be changed in sound.h too !!!
-#define psp_left		0
-#define psp_right		4
-#define psp_size		8
-
-#endif
-
--- a/block16.h
+++ /dev/null
@@ -1,123 +1,0 @@
-LEnter16_16:
-	movb	(%esi),%al
-	movb	(%esi,%ebx,),%cl
-	movb	%dh,%ah
-	addl	%ebp,%edx
-	movb	%dh,%ch
-	leal	(%esi,%ebx,2),%esi
-	movw	0x12345678(,%eax,2),%ax
-LBPatch0:
-	addl	%ebp,%edx
-	movw	%ax,(%edi)
-	movw	0x12345678(,%ecx,2),%cx
-LBPatch1:
-	movw	%cx,2(%edi)
-	addl	$0x4,%edi
-
-	movb	(%esi),%al
-	movb	(%esi,%ebx,),%cl
-	movb	%dh,%ah
-	addl	%ebp,%edx
-	movb	%dh,%ch
-	leal	(%esi,%ebx,2),%esi
-	movw	0x12345678(,%eax,2),%ax
-LBPatch2:
-	addl	%ebp,%edx
-	movw	%ax,(%edi)
-	movw	0x12345678(,%ecx,2),%cx
-LBPatch3:
-	movw	%cx,2(%edi)
-	addl	$0x4,%edi
-
-	movb	(%esi),%al
-	movb	(%esi,%ebx,),%cl
-	movb	%dh,%ah
-	addl	%ebp,%edx
-	movb	%dh,%ch
-	leal	(%esi,%ebx,2),%esi
-	movw	0x12345678(,%eax,2),%ax
-LBPatch4:
-	addl	%ebp,%edx
-	movw	%ax,(%edi)
-	movw	0x12345678(,%ecx,2),%cx
-LBPatch5:
-	movw	%cx,2(%edi)
-	addl	$0x4,%edi
-
-	movb	(%esi),%al
-	movb	(%esi,%ebx,),%cl
-	movb	%dh,%ah
-	addl	%ebp,%edx
-	movb	%dh,%ch
-	leal	(%esi,%ebx,2),%esi
-	movw	0x12345678(,%eax,2),%ax
-LBPatch6:
-	addl	%ebp,%edx
-	movw	%ax,(%edi)
-	movw	0x12345678(,%ecx,2),%cx
-LBPatch7:
-	movw	%cx,2(%edi)
-	addl	$0x4,%edi
-
-LEnter8_16:
-	movb	(%esi),%al
-	movb	(%esi,%ebx,),%cl
-	movb	%dh,%ah
-	addl	%ebp,%edx
-	movb	%dh,%ch
-	leal	(%esi,%ebx,2),%esi
-	movw	0x12345678(,%eax,2),%ax
-LBPatch8:
-	addl	%ebp,%edx
-	movw	%ax,(%edi)
-	movw	0x12345678(,%ecx,2),%cx
-LBPatch9:
-	movw	%cx,2(%edi)
-	addl	$0x4,%edi
-
-	movb	(%esi),%al
-	movb	(%esi,%ebx,),%cl
-	movb	%dh,%ah
-	addl	%ebp,%edx
-	movb	%dh,%ch
-	leal	(%esi,%ebx,2),%esi
-	movw	0x12345678(,%eax,2),%ax
-LBPatch10:
-	addl	%ebp,%edx
-	movw	%ax,(%edi)
-	movw	0x12345678(,%ecx,2),%cx
-LBPatch11:
-	movw	%cx,2(%edi)
-	addl	$0x4,%edi
-
-LEnter4_16:
-	movb	(%esi),%al
-	movb	(%esi,%ebx,),%cl
-	movb	%dh,%ah
-	addl	%ebp,%edx
-	movb	%dh,%ch
-	leal	(%esi,%ebx,2),%esi
-	movw	0x12345678(,%eax,2),%ax
-LBPatch12:
-	addl	%ebp,%edx
-	movw	%ax,(%edi)
-	movw	0x12345678(,%ecx,2),%cx
-LBPatch13:
-	movw	%cx,2(%edi)
-	addl	$0x4,%edi
-
-LEnter2_16:
-	movb	(%esi),%al
-	movb	(%esi,%ebx,),%cl
-	movb	%dh,%ah
-	addl	%ebp,%edx
-	movb	%dh,%ch
-	leal	(%esi,%ebx,2),%esi
-	movw	0x12345678(,%eax,2),%ax
-LBPatch14:
-	addl	%ebp,%edx
-	movw	%ax,(%edi)
-	movw	0x12345678(,%ecx,2),%cx
-LBPatch15:
-	movw	%cx,2(%edi)
-	addl	$0x4,%edi
--- a/d_draw.s
+++ /dev/null
@@ -1,1018 +1,0 @@
-//
-// d_draw.s
-// x86 assembly-language horizontal 8-bpp span-drawing code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef	id386
-
-//----------------------------------------------------------------------
-// 8-bpp horizontal span drawing code for polygons, with no transparency.
-//
-// Assumes there is at least one span in pspans, and that every span
-// contains at least one pixel
-//----------------------------------------------------------------------
-
-	.text
-
-// out-of-line, rarely-needed clamping code
-
-LClampHigh0:
-	movl	C(bbextents),%esi
-	jmp		LClampReentry0
-LClampHighOrLow0:
-	jg		LClampHigh0
-	xorl	%esi,%esi
-	jmp		LClampReentry0
-
-LClampHigh1:
-	movl	C(bbextentt),%edx
-	jmp		LClampReentry1
-LClampHighOrLow1:
-	jg		LClampHigh1
-	xorl	%edx,%edx
-	jmp		LClampReentry1
-
-LClampLow2:
-	movl	$2048,%ebp
-	jmp		LClampReentry2
-LClampHigh2:
-	movl	C(bbextents),%ebp
-	jmp		LClampReentry2
-
-LClampLow3:
-	movl	$2048,%ecx
-	jmp		LClampReentry3
-LClampHigh3:
-	movl	C(bbextentt),%ecx
-	jmp		LClampReentry3
-
-LClampLow4:
-	movl	$2048,%eax
-	jmp		LClampReentry4
-LClampHigh4:
-	movl	C(bbextents),%eax
-	jmp		LClampReentry4
-
-LClampLow5:
-	movl	$2048,%ebx
-	jmp		LClampReentry5
-LClampHigh5:
-	movl	C(bbextentt),%ebx
-	jmp		LClampReentry5
-
-
-#define pspans	4+16
-
-	.align 4
-.globl C(D_DrawSpans8)
-C(D_DrawSpans8):
-	pushl	%ebp				// preserve caller's stack frame
-	pushl	%edi
-	pushl	%esi				// preserve register variables
-	pushl	%ebx
-
-//
-// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
-// and span list pointers
-//
-// TODO: any overlap from rearranging?
-	flds	C(d_sdivzstepu)
-	fmuls	fp_8
-	movl	C(cacheblock),%edx
-	flds	C(d_tdivzstepu)
-	fmuls	fp_8
-	movl	pspans(%esp),%ebx	// point to the first span descriptor
-	flds	C(d_zistepu)
-	fmuls	fp_8
-	movl	%edx,pbase			// pbase = cacheblock
-	fstps	zi8stepu
-	fstps	tdivz8stepu
-	fstps	sdivz8stepu
-
-LSpanLoop:
-//
-// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
-// initial s and t values
-//
-// FIXME: pipeline FILD?
-	fildl	espan_t_v(%ebx)
-	fildl	espan_t_u(%ebx)
-
-	fld		%st(1)			// dv | du | dv
-	fmuls	C(d_sdivzstepv)	// dv*d_sdivzstepv | du | dv
-	fld		%st(1)			// du | dv*d_sdivzstepv | du | dv
-	fmuls	C(d_sdivzstepu)	// du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
-	fld		%st(2)			// du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
-	fmuls	C(d_tdivzstepu)	// du*d_tdivzstepu | du*d_sdivzstepu |
-							//  dv*d_sdivzstepv | du | dv
-	fxch	%st(1)			// du*d_sdivzstepu | du*d_tdivzstepu |
-							//  dv*d_sdivzstepv | du | dv
-	faddp	%st(0),%st(2)	// du*d_tdivzstepu |
-							//  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
-	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
-							//  du*d_tdivzstepu | du | dv
-	fld		%st(3)			// dv | du*d_sdivzstepu + dv*d_sdivzstepv |
-							//  du*d_tdivzstepu | du | dv
-	fmuls	C(d_tdivzstepv)	// dv*d_tdivzstepv |
-							//  du*d_sdivzstepu + dv*d_sdivzstepv |
-							//  du*d_tdivzstepu | du | dv
-	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
-							//  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
-	fadds	C(d_sdivzorigin)	// sdivz = d_sdivzorigin + dv*d_sdivzstepv +
-							//  du*d_sdivzstepu; stays in %st(2) at end
-	fxch	%st(4)			// dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
-							//  s/z
-	fmuls	C(d_zistepv)		// dv*d_zistepv | dv*d_tdivzstepv |
-							//  du*d_tdivzstepu | du | s/z
-	fxch	%st(1)			// dv*d_tdivzstepv |  dv*d_zistepv |
-							//  du*d_tdivzstepu | du | s/z
-	faddp	%st(0),%st(2)	// dv*d_zistepv |
-							//  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
-	fxch	%st(2)			// du | dv*d_tdivzstepv + du*d_tdivzstepu |
-							//  dv*d_zistepv | s/z
-	fmuls	C(d_zistepu)		// du*d_zistepu |
-							//  dv*d_tdivzstepv + du*d_tdivzstepu |
-							//  dv*d_zistepv | s/z
-	fxch	%st(1)			// dv*d_tdivzstepv + du*d_tdivzstepu |
-							//  du*d_zistepu | dv*d_zistepv | s/z
-	fadds	C(d_tdivzorigin)	// tdivz = d_tdivzorigin + dv*d_tdivzstepv +
-							//  du*d_tdivzstepu; stays in %st(1) at end
-	fxch	%st(2)			// dv*d_zistepv | du*d_zistepu | t/z | s/z
-	faddp	%st(0),%st(1)	// dv*d_zistepv + du*d_zistepu | t/z | s/z
-
-	flds	fp_64k			// fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
-	fxch	%st(1)			// dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
-	fadds	C(d_ziorigin)		// zi = d_ziorigin + dv*d_zistepv +
-							//  du*d_zistepu; stays in %st(0) at end
-							// 1/z | fp_64k | t/z | s/z
-//
-// calculate and clamp s & t
-//
-	fdivr	%st(0),%st(1)	// 1/z | z*64k | t/z | s/z
-
-//
-// point %edi to the first pixel in the span
-//
-	movl	C(d_viewbuffer),%ecx
-	movl	espan_t_v(%ebx),%eax
-	movl	%ebx,pspantemp	// preserve spans pointer
-
-	movl	C(tadjust),%edx
-	movl	C(sadjust),%esi
-	movl	C(d_scantable)(,%eax,4),%edi	// v * screenwidth
-	addl	%ecx,%edi
-	movl	espan_t_u(%ebx),%ecx
-	addl	%ecx,%edi				// pdest = &pdestspan[scans->u];
-	movl	espan_t_count(%ebx),%ecx
-
-//
-// now start the FDIV for the end of the span
-//
-	cmpl	$8,%ecx
-	ja		LSetupNotLast1
-
-	decl	%ecx
-	jz		LCleanup1		// if only one pixel, no need to start an FDIV
-	movl	%ecx,spancountminus1
-
-// finish up the s and t calcs
-	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
-
-	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
-	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
-	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
-	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
-	fxch	%st(1)			// s | t | 1/z | t/z | s/z
-	fistpl	s				// 1/z | t | t/z | s/z
-	fistpl	t				// 1/z | t/z | s/z
-
-	fildl	spancountminus1
-
-	flds	C(d_tdivzstepu)	// C(d_tdivzstepu) | spancountminus1
-	flds	C(d_zistepu)		// C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
-	fmul	%st(2),%st(0)	// C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
-	fxch	%st(1)			// C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
-	fmul	%st(2),%st(0)	// C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
-	fxch	%st(2)			// scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
-	fmuls	C(d_sdivzstepu)	// C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
-							//  C(d_tdivzstepu)*scm1
-	fxch	%st(1)			// C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
-							//  C(d_tdivzstepu)*scm1
-	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
-	fxch	%st(1)			// C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
-	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1
-	faddp	%st(0),%st(3)
-
-	flds	fp_64k
-	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
-							//  overlap
-	jmp		LFDIVInFlight1
-
-LCleanup1:
-// finish up the s and t calcs
-	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
-
-	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
-	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
-	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
-	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
-	fxch	%st(1)			// s | t | 1/z | t/z | s/z
-	fistpl	s				// 1/z | t | t/z | s/z
-	fistpl	t				// 1/z | t/z | s/z
-	jmp		LFDIVInFlight1
-
-	.align	4
-LSetupNotLast1:
-// finish up the s and t calcs
-	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
-
-	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
-	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
-	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
-	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
-	fxch	%st(1)			// s | t | 1/z | t/z | s/z
-	fistpl	s				// 1/z | t | t/z | s/z
-	fistpl	t				// 1/z | t/z | s/z
-
-	fadds	zi8stepu
-	fxch	%st(2)
-	fadds	sdivz8stepu
-	fxch	%st(2)
-	flds	tdivz8stepu
-	faddp	%st(0),%st(2)
-	flds	fp_64k
-	fdiv	%st(1),%st(0)	// z = 1/1/z
-							// this is what we've gone to all this trouble to
-							//  overlap
-LFDIVInFlight1:
-
-	addl	s,%esi
-	addl	t,%edx
-	movl	C(bbextents),%ebx
-	movl	C(bbextentt),%ebp
-	cmpl	%ebx,%esi
-	ja		LClampHighOrLow0
-LClampReentry0:
-	movl	%esi,s
-	movl	pbase,%ebx
-	shll	$16,%esi
-	cmpl	%ebp,%edx
-	movl	%esi,sfracf
-	ja		LClampHighOrLow1
-LClampReentry1:
-	movl	%edx,t
-	movl	s,%esi					// sfrac = scans->sfrac;
-	shll	$16,%edx
-	movl	t,%eax					// tfrac = scans->tfrac;
-	sarl	$16,%esi
-	movl	%edx,tfracf
-
-//
-// calculate the texture starting address
-//
-	sarl	$16,%eax
-	movl	C(cachewidth),%edx
-	imull	%edx,%eax				// (tfrac >> 16) * cachewidth
-	addl	%ebx,%esi
-	addl	%eax,%esi				// psource = pbase + (sfrac >> 16) +
-									//           ((tfrac >> 16) * cachewidth);
-
-//
-// determine whether last span or not
-//
-	cmpl	$8,%ecx
-	jna		LLastSegment
-
-//
-// not the last segment; do full 8-wide segment
-//
-LNotLastSegment:
-
-//
-// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
-// get there
-//
-
-// pick up after the FDIV that was left in flight previously
-
-	fld		%st(0)			// duplicate it
-	fmul	%st(4),%st(0)	// s = s/z * z
-	fxch	%st(1)
-	fmul	%st(3),%st(0)	// t = t/z * z
-	fxch	%st(1)
-	fistpl	snext
-	fistpl	tnext
-	movl	snext,%eax
-	movl	tnext,%edx
-
-	movb	(%esi),%bl	// get first source texel
-	subl	$8,%ecx		// count off this segments' pixels
-	movl	C(sadjust),%ebp
-	movl	%ecx,counttemp	// remember count of remaining pixels
-
-	movl	C(tadjust),%ecx
-	movb	%bl,(%edi)	// store first dest pixel
-
-	addl	%eax,%ebp
-	addl	%edx,%ecx
-
-	movl	C(bbextents),%eax
-	movl	C(bbextentt),%edx
-
-	cmpl	$2048,%ebp
-	jl		LClampLow2
-	cmpl	%eax,%ebp
-	ja		LClampHigh2
-LClampReentry2:
-
-	cmpl	$2048,%ecx
-	jl		LClampLow3
-	cmpl	%edx,%ecx
-	ja		LClampHigh3
-LClampReentry3:
-
-	movl	%ebp,snext
-	movl	%ecx,tnext
-
-	subl	s,%ebp
-	subl	t,%ecx
-	
-//
-// set up advancetable
-//
-	movl	%ecx,%eax
-	movl	%ebp,%edx
-	sarl	$19,%eax			// tstep >>= 16;
-	jz		LZero
-	sarl	$19,%edx			// sstep >>= 16;
-	movl	C(cachewidth),%ebx
-	imull	%ebx,%eax
-	jmp		LSetUp1
-
-LZero:
-	sarl	$19,%edx			// sstep >>= 16;
-	movl	C(cachewidth),%ebx
-
-LSetUp1:
-
-	addl	%edx,%eax			// add in sstep
-								// (tstep >> 16) * cachewidth + (sstep >> 16);
-	movl	tfracf,%edx
-	movl	%eax,advancetable+4	// advance base in t
-	addl	%ebx,%eax			// ((tstep >> 16) + 1) * cachewidth +
-								//  (sstep >> 16);
-	shll	$13,%ebp			// left-justify sstep fractional part
-	movl	sfracf,%ebx
-	shll	$13,%ecx			// left-justify tstep fractional part
-	movl	%eax,advancetable	// advance extra in t
-
-	movl	%ecx,tstep
-	addl	%ecx,%edx			// advance tfrac fractional part by tstep frac
-
-	sbbl	%ecx,%ecx			// turn tstep carry into -1 (0 if none)
-	addl	%ebp,%ebx			// advance sfrac fractional part by sstep frac
-	adcl	advancetable+4(,%ecx,4),%esi	// point to next source texel
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	(%esi),%al
-	addl	%ebp,%ebx
-	movb	%al,1(%edi)
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,2(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,3(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-
-//
-// start FDIV for end of next segment in flight, so it can overlap
-//
-	movl	counttemp,%ecx
-	cmpl	$8,%ecx			// more than one segment after this?
-	ja		LSetupNotLast2	// yes
-
-	decl	%ecx
-	jz		LFDIVInFlight2	// if only one pixel, no need to start an FDIV
-	movl	%ecx,spancountminus1
-	fildl	spancountminus1
-
-	flds	C(d_zistepu)		// C(d_zistepu) | spancountminus1
-	fmul	%st(1),%st(0)	// C(d_zistepu)*scm1 | scm1
-	flds	C(d_tdivzstepu)	// C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
-	fmul	%st(2),%st(0)	// C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
-	fxch	%st(1)			// C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
-	faddp	%st(0),%st(3)	// C(d_tdivzstepu)*scm1 | scm1
-	fxch	%st(1)			// scm1 | C(d_tdivzstepu)*scm1
-	fmuls	C(d_sdivzstepu)	// C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
-	fxch	%st(1)			// C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
-	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1
-	flds	fp_64k			// 64k | C(d_sdivzstepu)*scm1
-	fxch	%st(1)			// C(d_sdivzstepu)*scm1 | 64k
-	faddp	%st(0),%st(4)	// 64k
-
-	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
-							//  overlap
-	jmp		LFDIVInFlight2
-
-	.align	4
-LSetupNotLast2:
-	fadds	zi8stepu
-	fxch	%st(2)
-	fadds	sdivz8stepu
-	fxch	%st(2)
-	flds	tdivz8stepu
-	faddp	%st(0),%st(2)
-	flds	fp_64k
-	fdiv	%st(1),%st(0)	// z = 1/1/z
-							// this is what we've gone to all this trouble to
-							//  overlap
-LFDIVInFlight2:
-	movl	%ecx,counttemp
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,4(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,5(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,6(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	$8,%edi
-	movl	%edx,tfracf
-	movl	snext,%edx
-	movl	%ebx,sfracf
-	movl	tnext,%ebx
-	movl	%edx,s
-	movl	%ebx,t
-
-	movl	counttemp,%ecx		// retrieve count
-
-//
-// determine whether last span or not
-//
-	cmpl	$8,%ecx				// are there multiple segments remaining?
-	movb	%al,-1(%edi)
-	ja		LNotLastSegment		// yes
-
-//
-// last segment of scan
-//
-LLastSegment:
-
-//
-// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
-// get there. The number of pixels left is variable, and we want to land on the
-// last pixel, not step one past it, so we can't run into arithmetic problems
-//
-	testl	%ecx,%ecx
-	jz		LNoSteps		// just draw the last pixel and we're done
-
-// pick up after the FDIV that was left in flight previously
-
-
-	fld		%st(0)			// duplicate it
-	fmul	%st(4),%st(0)	// s = s/z * z
-	fxch	%st(1)
-	fmul	%st(3),%st(0)	// t = t/z * z
-	fxch	%st(1)
-	fistpl	snext
-	fistpl	tnext
-
-	movb	(%esi),%al		// load first texel in segment
-	movl	C(tadjust),%ebx
-	movb	%al,(%edi)		// store first pixel in segment
-	movl	C(sadjust),%eax
-
-	addl	snext,%eax
-	addl	tnext,%ebx
-
-	movl	C(bbextents),%ebp
-	movl	C(bbextentt),%edx
-
-	cmpl	$2048,%eax
-	jl		LClampLow4
-	cmpl	%ebp,%eax
-	ja		LClampHigh4
-LClampReentry4:
-	movl	%eax,snext
-
-	cmpl	$2048,%ebx
-	jl		LClampLow5
-	cmpl	%edx,%ebx
-	ja		LClampHigh5
-LClampReentry5:
-
-	cmpl	$1,%ecx			// don't bother 
-	je		LOnlyOneStep	// if two pixels in segment, there's only one step,
-							//  of the segment length
-	subl	s,%eax
-	subl	t,%ebx
-
-	addl	%eax,%eax		// convert to 15.17 format so multiply by 1.31
-	addl	%ebx,%ebx		//  reciprocal yields 16.48
-
-	imull	reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
-	movl	%edx,%ebp
-
-	movl	%ebx,%eax
-	imull	reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
-
-LSetEntryvec:
-//
-// set up advancetable
-//
-	movl	entryvec_table(,%ecx,4),%ebx
-	movl	%edx,%eax
-	movl	%ebx,jumptemp		// entry point into code for RET later
-	movl	%ebp,%ecx
-	sarl	$16,%edx			// tstep >>= 16;
-	movl	C(cachewidth),%ebx
-	sarl	$16,%ecx			// sstep >>= 16;
-	imull	%ebx,%edx
-
-	addl	%ecx,%edx			// add in sstep
-								// (tstep >> 16) * cachewidth + (sstep >> 16);
-	movl	tfracf,%ecx
-	movl	%edx,advancetable+4	// advance base in t
-	addl	%ebx,%edx			// ((tstep >> 16) + 1) * cachewidth +
-								//  (sstep >> 16);
-	shll	$16,%ebp			// left-justify sstep fractional part
-	movl	sfracf,%ebx
-	shll	$16,%eax			// left-justify tstep fractional part
-	movl	%edx,advancetable	// advance extra in t
-
-	movl	%eax,tstep
-	movl	%ecx,%edx
-	addl	%eax,%edx
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	jmp		*jumptemp			// jump to the number-of-pixels handler
-
-//----------------------------------------
-
-LNoSteps:
-	movb	(%esi),%al		// load first texel in segment
-	subl	$7,%edi			// adjust for hardwired offset
-	jmp		LEndSpan
-
-
-LOnlyOneStep:
-	subl	s,%eax
-	subl	t,%ebx
-	movl	%eax,%ebp
-	movl	%ebx,%edx
-	jmp		LSetEntryvec
-
-//----------------------------------------
-
-.globl	Entry2_8
-Entry2_8:
-	subl	$6,%edi		// adjust for hardwired offsets
-	movb	(%esi),%al
-	jmp		LLEntry2_8
-
-//----------------------------------------
-
-.globl	Entry3_8
-Entry3_8:
-	subl	$5,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	jmp		LLEntry3_8
-
-//----------------------------------------
-
-.globl	Entry4_8
-Entry4_8:
-	subl	$4,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LLEntry4_8
-
-//----------------------------------------
-
-.globl	Entry5_8
-Entry5_8:
-	subl	$3,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LLEntry5_8
-
-//----------------------------------------
-
-.globl	Entry6_8
-Entry6_8:
-	subl	$2,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LLEntry6_8
-
-//----------------------------------------
-
-.globl	Entry7_8
-Entry7_8:
-	decl	%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LLEntry7_8
-
-//----------------------------------------
-
-.globl	Entry8_8
-Entry8_8:
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,1(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LLEntry7_8:
-	sbbl	%ecx,%ecx
-	movb	%al,2(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LLEntry6_8:
-	sbbl	%ecx,%ecx
-	movb	%al,3(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LLEntry5_8:
-	sbbl	%ecx,%ecx
-	movb	%al,4(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LLEntry4_8:
-	sbbl	%ecx,%ecx
-	movb	%al,5(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-LLEntry3_8:
-	movb	%al,6(%edi)
-	movb	(%esi),%al
-LLEntry2_8:
-
-LEndSpan:
-
-//
-// clear s/z, t/z, 1/z from FP stack
-//
-	fstp %st(0)
-	fstp %st(0)
-	fstp %st(0)
-
-	movl	pspantemp,%ebx				// restore spans pointer
-	movl	espan_t_pnext(%ebx),%ebx	// point to next span
-	testl	%ebx,%ebx			// any more spans?
-	movb	%al,7(%edi)
-	jnz		LSpanLoop			// more spans
-
-	popl	%ebx				// restore register variables
-	popl	%esi
-	popl	%edi
-	popl	%ebp				// restore the caller's stack frame
-	ret
-
-//----------------------------------------------------------------------
-// 8-bpp horizontal span z drawing codefor polygons, with no transparency.
-//
-// Assumes there is at least one span in pzspans, and that every span
-// contains at least one pixel
-//----------------------------------------------------------------------
-
-	.text
-
-// z-clamp on a non-negative gradient span
-LClamp:
-	movl	$0x40000000,%edx
-	xorl	%ebx,%ebx
-	fstp	%st(0)
-	jmp		LZDraw
-
-// z-clamp on a negative gradient span
-LClampNeg:
-	movl	$0x40000000,%edx
-	xorl	%ebx,%ebx
-	fstp	%st(0)
-	jmp		LZDrawNeg
-
-
-#define pzspans	4+16
-
-.globl C(D_DrawZSpans)
-C(D_DrawZSpans):
-	pushl	%ebp				// preserve caller's stack frame
-	pushl	%edi
-	pushl	%esi				// preserve register variables
-	pushl	%ebx
-
-	flds	C(d_zistepu)
-	movl	C(d_zistepu),%eax
-	movl	pzspans(%esp),%esi
-	testl	%eax,%eax
-	jz		LFNegSpan
-
-	fmuls	Float2ToThe31nd
-	fistpl	izistep		// note: we are relying on FP exceptions being turned
-						// off here to avoid range problems
-	movl	izistep,%ebx	// remains loaded for all spans
-
-LFSpanLoop:
-// set up the initial 1/z value
-	fildl	espan_t_v(%esi)
-	fildl	espan_t_u(%esi)
-	movl	espan_t_v(%esi),%ecx
-	movl	C(d_pzbuffer),%edi
-	fmuls	C(d_zistepu)
-	fxch	%st(1)
-	fmuls	C(d_zistepv)
-	fxch	%st(1)
-	fadds	C(d_ziorigin)
-	imull	C(d_zrowbytes),%ecx
-	faddp	%st(0),%st(1)
-
-// clamp if z is nearer than 2 (1/z > 0.5)
-	fcoms	float_point5
-	addl	%ecx,%edi
-	movl	espan_t_u(%esi),%edx
-	addl	%edx,%edx				// word count
-	movl	espan_t_count(%esi),%ecx
-	addl	%edx,%edi				// pdest = &pdestspan[scans->u];
-	pushl	%esi		// preserve spans pointer
-	fnstsw	%ax
-	testb	$0x45,%ah
-	jz		LClamp
-
-	fmuls	Float2ToThe31nd
-	fistpl	izi			// note: we are relying on FP exceptions being turned
-						// off here to avoid problems when the span is closer
-						// than 1/(2**31)
-	movl	izi,%edx
-
-// at this point:
-// %ebx = izistep
-// %ecx = count
-// %edx = izi
-// %edi = pdest
-
-LZDraw:
-
-// do a single pixel up front, if necessary to dword align the destination
-	testl	$2,%edi
-	jz		LFMiddle
-	movl	%edx,%eax
-	addl	%ebx,%edx
-	shrl	$16,%eax
-	decl	%ecx
-	movw	%ax,(%edi)
-	addl	$2,%edi
-
-// do middle a pair of aligned dwords at a time
-LFMiddle:
-	pushl	%ecx
-	shrl	$1,%ecx				// count / 2
-	jz		LFLast				// no aligned dwords to do
-	shrl	$1,%ecx				// (count / 2) / 2
-	jnc		LFMiddleLoop		// even number of aligned dwords to do
-
-	movl	%edx,%eax
-	addl	%ebx,%edx
-	shrl	$16,%eax
-	movl	%edx,%esi
-	addl	%ebx,%edx
-	andl	$0xFFFF0000,%esi
-	orl		%esi,%eax
-	movl	%eax,(%edi)
-	addl	$4,%edi
-	andl	%ecx,%ecx
-	jz		LFLast
-
-LFMiddleLoop:
-	movl	%edx,%eax
-	addl	%ebx,%edx
-	shrl	$16,%eax
-	movl	%edx,%esi
-	addl	%ebx,%edx
-	andl	$0xFFFF0000,%esi
-	orl		%esi,%eax
-	movl	%edx,%ebp
-	movl	%eax,(%edi)
-	addl	%ebx,%edx
-	shrl	$16,%ebp
-	movl	%edx,%esi
-	addl	%ebx,%edx
-	andl	$0xFFFF0000,%esi
-	orl		%esi,%ebp
-	movl	%ebp,4(%edi)	// FIXME: eliminate register contention
-	addl	$8,%edi
-
-	decl	%ecx
-	jnz		LFMiddleLoop
-
-LFLast:
-	popl	%ecx			// retrieve count
-	popl	%esi			// retrieve span pointer
-
-// do the last, unaligned pixel, if there is one
-	andl	$1,%ecx			// is there an odd pixel left to do?
-	jz		LFSpanDone		// no
-	shrl	$16,%edx
-	movw	%dx,(%edi)		// do the final pixel's z
-
-LFSpanDone:
-	movl	espan_t_pnext(%esi),%esi
-	testl	%esi,%esi
-	jnz		LFSpanLoop
-
-	jmp		LFDone
-
-LFNegSpan:
-	fmuls	FloatMinus2ToThe31nd
-	fistpl	izistep		// note: we are relying on FP exceptions being turned
-						// off here to avoid range problems
-	movl	izistep,%ebx	// remains loaded for all spans
-
-LFNegSpanLoop:
-// set up the initial 1/z value
-	fildl	espan_t_v(%esi)
-	fildl	espan_t_u(%esi)
-	movl	espan_t_v(%esi),%ecx
-	movl	C(d_pzbuffer),%edi
-	fmuls	C(d_zistepu)
-	fxch	%st(1)
-	fmuls	C(d_zistepv)
-	fxch	%st(1)
-	fadds	C(d_ziorigin)
-	imull	C(d_zrowbytes),%ecx
-	faddp	%st(0),%st(1)
-
-// clamp if z is nearer than 2 (1/z > 0.5)
-	fcoms	float_point5
-	addl	%ecx,%edi
-	movl	espan_t_u(%esi),%edx
-	addl	%edx,%edx				// word count
-	movl	espan_t_count(%esi),%ecx
-	addl	%edx,%edi				// pdest = &pdestspan[scans->u];
-	pushl	%esi		// preserve spans pointer
-	fnstsw	%ax
-	testb	$0x45,%ah
-	jz		LClampNeg
-
-	fmuls	Float2ToThe31nd
-	fistpl	izi			// note: we are relying on FP exceptions being turned
-						// off here to avoid problems when the span is closer
-						// than 1/(2**31)
-	movl	izi,%edx
-
-// at this point:
-// %ebx = izistep
-// %ecx = count
-// %edx = izi
-// %edi = pdest
-
-LZDrawNeg:
-
-// do a single pixel up front, if necessary to dword align the destination
-	testl	$2,%edi
-	jz		LFNegMiddle
-	movl	%edx,%eax
-	subl	%ebx,%edx
-	shrl	$16,%eax
-	decl	%ecx
-	movw	%ax,(%edi)
-	addl	$2,%edi
-
-// do middle a pair of aligned dwords at a time
-LFNegMiddle:
-	pushl	%ecx
-	shrl	$1,%ecx				// count / 2
-	jz		LFNegLast			// no aligned dwords to do
-	shrl	$1,%ecx				// (count / 2) / 2
-	jnc		LFNegMiddleLoop		// even number of aligned dwords to do
-
-	movl	%edx,%eax
-	subl	%ebx,%edx
-	shrl	$16,%eax
-	movl	%edx,%esi
-	subl	%ebx,%edx
-	andl	$0xFFFF0000,%esi
-	orl		%esi,%eax
-	movl	%eax,(%edi)
-	addl	$4,%edi
-	andl	%ecx,%ecx
-	jz		LFNegLast
-
-LFNegMiddleLoop:
-	movl	%edx,%eax
-	subl	%ebx,%edx
-	shrl	$16,%eax
-	movl	%edx,%esi
-	subl	%ebx,%edx
-	andl	$0xFFFF0000,%esi
-	orl		%esi,%eax
-	movl	%edx,%ebp
-	movl	%eax,(%edi)
-	subl	%ebx,%edx
-	shrl	$16,%ebp
-	movl	%edx,%esi
-	subl	%ebx,%edx
-	andl	$0xFFFF0000,%esi
-	orl		%esi,%ebp
-	movl	%ebp,4(%edi)	// FIXME: eliminate register contention
-	addl	$8,%edi
-
-	decl	%ecx
-	jnz		LFNegMiddleLoop
-
-LFNegLast:
-	popl	%ecx			// retrieve count
-	popl	%esi			// retrieve span pointer
-
-// do the last, unaligned pixel, if there is one
-	andl	$1,%ecx			// is there an odd pixel left to do?
-	jz		LFNegSpanDone	// no
-	shrl	$16,%edx
-	movw	%dx,(%edi)		// do the final pixel's z
-
-LFNegSpanDone:
-	movl	espan_t_pnext(%esi),%esi
-	testl	%esi,%esi
-	jnz		LFNegSpanLoop
-
-LFDone:
-	popl	%ebx				// restore register variables
-	popl	%esi
-	popl	%edi
-	popl	%ebp				// restore the caller's stack frame
-	ret
-
-#endif	// id386
--- a/d_draw16.s
+++ /dev/null
@@ -1,955 +1,0 @@
-//
-// d_draw16.s
-// x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
-// subdivision.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef	id386
-
-//----------------------------------------------------------------------
-// 8-bpp horizontal span drawing code for polygons, with no transparency and
-// 16-pixel subdivision.
-//
-// Assumes there is at least one span in pspans, and that every span
-// contains at least one pixel
-//----------------------------------------------------------------------
-
-	.data
-
-	.text
-
-// out-of-line, rarely-needed clamping code
-
-LClampHigh0:
-	movl	C(bbextents),%esi
-	jmp		LClampReentry0
-LClampHighOrLow0:
-	jg		LClampHigh0
-	xorl	%esi,%esi
-	jmp		LClampReentry0
-
-LClampHigh1:
-	movl	C(bbextentt),%edx
-	jmp		LClampReentry1
-LClampHighOrLow1:
-	jg		LClampHigh1
-	xorl	%edx,%edx
-	jmp		LClampReentry1
-
-LClampLow2:
-	movl	$4096,%ebp
-	jmp		LClampReentry2
-LClampHigh2:
-	movl	C(bbextents),%ebp
-	jmp		LClampReentry2
-
-LClampLow3:
-	movl	$4096,%ecx
-	jmp		LClampReentry3
-LClampHigh3:
-	movl	C(bbextentt),%ecx
-	jmp		LClampReentry3
-
-LClampLow4:
-	movl	$4096,%eax
-	jmp		LClampReentry4
-LClampHigh4:
-	movl	C(bbextents),%eax
-	jmp		LClampReentry4
-
-LClampLow5:
-	movl	$4096,%ebx
-	jmp		LClampReentry5
-LClampHigh5:
-	movl	C(bbextentt),%ebx
-	jmp		LClampReentry5
-
-
-#define pspans	4+16
-
-	.align 4
-.globl C(D_DrawSpans16)
-C(D_DrawSpans16):
-	pushl	%ebp				// preserve caller's stack frame
-	pushl	%edi
-	pushl	%esi				// preserve register variables
-	pushl	%ebx
-
-//
-// set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
-// and span list pointers
-//
-// TODO: any overlap from rearranging?
-	flds	C(d_sdivzstepu)
-	fmuls	fp_16
-	movl	C(cacheblock),%edx
-	flds	C(d_tdivzstepu)
-	fmuls	fp_16
-	movl	pspans(%esp),%ebx	// point to the first span descriptor
-	flds	C(d_zistepu)
-	fmuls	fp_16
-	movl	%edx,pbase			// pbase = cacheblock
-	fstps	zi16stepu
-	fstps	tdivz16stepu
-	fstps	sdivz16stepu
-
-LSpanLoop:
-//
-// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
-// initial s and t values
-//
-// FIXME: pipeline FILD?
-	fildl	espan_t_v(%ebx)
-	fildl	espan_t_u(%ebx)
-
-	fld		%st(1)			// dv | du | dv
-	fmuls	C(d_sdivzstepv)	// dv*d_sdivzstepv | du | dv
-	fld		%st(1)			// du | dv*d_sdivzstepv | du | dv
-	fmuls	C(d_sdivzstepu)	// du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
-	fld		%st(2)			// du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
-	fmuls	C(d_tdivzstepu)	// du*d_tdivzstepu | du*d_sdivzstepu |
-							//  dv*d_sdivzstepv | du | dv
-	fxch	%st(1)			// du*d_sdivzstepu | du*d_tdivzstepu |
-							//  dv*d_sdivzstepv | du | dv
-	faddp	%st(0),%st(2)	// du*d_tdivzstepu |
-							//  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
-	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
-							//  du*d_tdivzstepu | du | dv
-	fld		%st(3)			// dv | du*d_sdivzstepu + dv*d_sdivzstepv |
-							//  du*d_tdivzstepu | du | dv
-	fmuls	C(d_tdivzstepv)	// dv*d_tdivzstepv |
-							//  du*d_sdivzstepu + dv*d_sdivzstepv |
-							//  du*d_tdivzstepu | du | dv
-	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
-							//  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
-	fadds	C(d_sdivzorigin)	// sdivz = d_sdivzorigin + dv*d_sdivzstepv +
-							//  du*d_sdivzstepu; stays in %st(2) at end
-	fxch	%st(4)			// dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
-							//  s/z
-	fmuls	C(d_zistepv)		// dv*d_zistepv | dv*d_tdivzstepv |
-							//  du*d_tdivzstepu | du | s/z
-	fxch	%st(1)			// dv*d_tdivzstepv |  dv*d_zistepv |
-							//  du*d_tdivzstepu | du | s/z
-	faddp	%st(0),%st(2)	// dv*d_zistepv |
-							//  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
-	fxch	%st(2)			// du | dv*d_tdivzstepv + du*d_tdivzstepu |
-							//  dv*d_zistepv | s/z
-	fmuls	C(d_zistepu)		// du*d_zistepu |
-							//  dv*d_tdivzstepv + du*d_tdivzstepu |
-							//  dv*d_zistepv | s/z
-	fxch	%st(1)			// dv*d_tdivzstepv + du*d_tdivzstepu |
-							//  du*d_zistepu | dv*d_zistepv | s/z
-	fadds	C(d_tdivzorigin)	// tdivz = d_tdivzorigin + dv*d_tdivzstepv +
-							//  du*d_tdivzstepu; stays in %st(1) at end
-	fxch	%st(2)			// dv*d_zistepv | du*d_zistepu | t/z | s/z
-	faddp	%st(0),%st(1)	// dv*d_zistepv + du*d_zistepu | t/z | s/z
-
-	flds	fp_64k			// fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
-	fxch	%st(1)			// dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
-	fadds	C(d_ziorigin)		// zi = d_ziorigin + dv*d_zistepv +
-							//  du*d_zistepu; stays in %st(0) at end
-							// 1/z | fp_64k | t/z | s/z
-//
-// calculate and clamp s & t
-//
-	fdivr	%st(0),%st(1)	// 1/z | z*64k | t/z | s/z
-
-//
-// point %edi to the first pixel in the span
-//
-	movl	C(d_viewbuffer),%ecx
-	movl	espan_t_v(%ebx),%eax
-	movl	%ebx,pspantemp	// preserve spans pointer
-
-	movl	C(tadjust),%edx
-	movl	C(sadjust),%esi
-	movl	C(d_scantable)(,%eax,4),%edi	// v * screenwidth
-	addl	%ecx,%edi
-	movl	espan_t_u(%ebx),%ecx
-	addl	%ecx,%edi				// pdest = &pdestspan[scans->u];
-	movl	espan_t_count(%ebx),%ecx
-
-//
-// now start the FDIV for the end of the span
-//
-	cmpl	$16,%ecx
-	ja		LSetupNotLast1
-
-	decl	%ecx
-	jz		LCleanup1		// if only one pixel, no need to start an FDIV
-	movl	%ecx,spancountminus1
-
-// finish up the s and t calcs
-	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
-
-	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
-	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
-	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
-	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
-	fxch	%st(1)			// s | t | 1/z | t/z | s/z
-	fistpl	s				// 1/z | t | t/z | s/z
-	fistpl	t				// 1/z | t/z | s/z
-
-	fildl	spancountminus1
-
-	flds	C(d_tdivzstepu)	// C(d_tdivzstepu) | spancountminus1
-	flds	C(d_zistepu)		// C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
-	fmul	%st(2),%st(0)	// C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
-	fxch	%st(1)			// C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
-	fmul	%st(2),%st(0)	// C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
-	fxch	%st(2)			// scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
-	fmuls	C(d_sdivzstepu)	// C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
-							//  C(d_tdivzstepu)*scm1
-	fxch	%st(1)			// C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
-							//  C(d_tdivzstepu)*scm1
-	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
-	fxch	%st(1)			// C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
-	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1
-	faddp	%st(0),%st(3)
-
-	flds	fp_64k
-	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
-							//  overlap
-	jmp		LFDIVInFlight1
-
-LCleanup1:
-// finish up the s and t calcs
-	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
-
-	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
-	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
-	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
-	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
-	fxch	%st(1)			// s | t | 1/z | t/z | s/z
-	fistpl	s				// 1/z | t | t/z | s/z
-	fistpl	t				// 1/z | t/z | s/z
-	jmp		LFDIVInFlight1
-
-	.align	4
-LSetupNotLast1:
-// finish up the s and t calcs
-	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
-
-	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
-	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
-	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
-	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
-	fxch	%st(1)			// s | t | 1/z | t/z | s/z
-	fistpl	s				// 1/z | t | t/z | s/z
-	fistpl	t				// 1/z | t/z | s/z
-
-	fadds	zi16stepu
-	fxch	%st(2)
-	fadds	sdivz16stepu
-	fxch	%st(2)
-	flds	tdivz16stepu
-	faddp	%st(0),%st(2)
-	flds	fp_64k
-	fdiv	%st(1),%st(0)	// z = 1/1/z
-							// this is what we've gone to all this trouble to
-							//  overlap
-LFDIVInFlight1:
-
-	addl	s,%esi
-	addl	t,%edx
-	movl	C(bbextents),%ebx
-	movl	C(bbextentt),%ebp
-	cmpl	%ebx,%esi
-	ja		LClampHighOrLow0
-LClampReentry0:
-	movl	%esi,s
-	movl	pbase,%ebx
-	shll	$16,%esi
-	cmpl	%ebp,%edx
-	movl	%esi,sfracf
-	ja		LClampHighOrLow1
-LClampReentry1:
-	movl	%edx,t
-	movl	s,%esi					// sfrac = scans->sfrac;
-	shll	$16,%edx
-	movl	t,%eax					// tfrac = scans->tfrac;
-	sarl	$16,%esi
-	movl	%edx,tfracf
-
-//
-// calculate the texture starting address
-//
-	sarl	$16,%eax
-	movl	C(cachewidth),%edx
-	imull	%edx,%eax				// (tfrac >> 16) * cachewidth
-	addl	%ebx,%esi
-	addl	%eax,%esi				// psource = pbase + (sfrac >> 16) +
-									//           ((tfrac >> 16) * cachewidth);
-//
-// determine whether last span or not
-//
-	cmpl	$16,%ecx
-	jna		LLastSegment
-
-//
-// not the last segment; do full 16-wide segment
-//
-LNotLastSegment:
-
-//
-// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
-// get there
-//
-
-// pick up after the FDIV that was left in flight previously
-
-	fld		%st(0)			// duplicate it
-	fmul	%st(4),%st(0)	// s = s/z * z
-	fxch	%st(1)
-	fmul	%st(3),%st(0)	// t = t/z * z
-	fxch	%st(1)
-	fistpl	snext
-	fistpl	tnext
-	movl	snext,%eax
-	movl	tnext,%edx
-
-	movb	(%esi),%bl	// get first source texel
-	subl	$16,%ecx		// count off this segments' pixels
-	movl	C(sadjust),%ebp
-	movl	%ecx,counttemp	// remember count of remaining pixels
-
-	movl	C(tadjust),%ecx
-	movb	%bl,(%edi)	// store first dest pixel
-
-	addl	%eax,%ebp
-	addl	%edx,%ecx
-
-	movl	C(bbextents),%eax
-	movl	C(bbextentt),%edx
-
-	cmpl	$4096,%ebp
-	jl		LClampLow2
-	cmpl	%eax,%ebp
-	ja		LClampHigh2
-LClampReentry2:
-
-	cmpl	$4096,%ecx
-	jl		LClampLow3
-	cmpl	%edx,%ecx
-	ja		LClampHigh3
-LClampReentry3:
-
-	movl	%ebp,snext
-	movl	%ecx,tnext
-
-	subl	s,%ebp
-	subl	t,%ecx
-	
-//
-// set up advancetable
-//
-	movl	%ecx,%eax
-	movl	%ebp,%edx
-	sarl	$20,%eax			// tstep >>= 16;
-	jz		LZero
-	sarl	$20,%edx			// sstep >>= 16;
-	movl	C(cachewidth),%ebx
-	imull	%ebx,%eax
-	jmp		LSetUp1
-
-LZero:
-	sarl	$20,%edx			// sstep >>= 16;
-	movl	C(cachewidth),%ebx
-
-LSetUp1:
-
-	addl	%edx,%eax			// add in sstep
-								// (tstep >> 16) * cachewidth + (sstep >> 16);
-	movl	tfracf,%edx
-	movl	%eax,advancetable+4	// advance base in t
-	addl	%ebx,%eax			// ((tstep >> 16) + 1) * cachewidth +
-								//  (sstep >> 16);
-	shll	$12,%ebp			// left-justify sstep fractional part
-	movl	sfracf,%ebx
-	shll	$12,%ecx			// left-justify tstep fractional part
-	movl	%eax,advancetable	// advance extra in t
-
-	movl	%ecx,tstep
-	addl	%ecx,%edx			// advance tfrac fractional part by tstep frac
-
-	sbbl	%ecx,%ecx			// turn tstep carry into -1 (0 if none)
-	addl	%ebp,%ebx			// advance sfrac fractional part by sstep frac
-	adcl	advancetable+4(,%ecx,4),%esi	// point to next source texel
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	(%esi),%al
-	addl	%ebp,%ebx
-	movb	%al,1(%edi)
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,2(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,3(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,4(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,5(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,6(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,7(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-
-//
-// start FDIV for end of next segment in flight, so it can overlap
-//
-	movl	counttemp,%ecx
-	cmpl	$16,%ecx			// more than one segment after this?
-	ja		LSetupNotLast2	// yes
-
-	decl	%ecx
-	jz		LFDIVInFlight2	// if only one pixel, no need to start an FDIV
-	movl	%ecx,spancountminus1
-	fildl	spancountminus1
-
-	flds	C(d_zistepu)		// C(d_zistepu) | spancountminus1
-	fmul	%st(1),%st(0)	// C(d_zistepu)*scm1 | scm1
-	flds	C(d_tdivzstepu)	// C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
-	fmul	%st(2),%st(0)	// C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
-	fxch	%st(1)			// C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
-	faddp	%st(0),%st(3)	// C(d_tdivzstepu)*scm1 | scm1
-	fxch	%st(1)			// scm1 | C(d_tdivzstepu)*scm1
-	fmuls	C(d_sdivzstepu)	// C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
-	fxch	%st(1)			// C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
-	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1
-	flds	fp_64k			// 64k | C(d_sdivzstepu)*scm1
-	fxch	%st(1)			// C(d_sdivzstepu)*scm1 | 64k
-	faddp	%st(0),%st(4)	// 64k
-
-	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
-							//  overlap
-	jmp		LFDIVInFlight2
-
-	.align	4
-LSetupNotLast2:
-	fadds	zi16stepu
-	fxch	%st(2)
-	fadds	sdivz16stepu
-	fxch	%st(2)
-	flds	tdivz16stepu
-	faddp	%st(0),%st(2)
-	flds	fp_64k
-	fdiv	%st(1),%st(0)	// z = 1/1/z
-							// this is what we've gone to all this trouble to
-							//  overlap
-LFDIVInFlight2:
-	movl	%ecx,counttemp
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,8(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,9(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,10(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,11(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,12(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,13(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,14(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	$16,%edi
-	movl	%edx,tfracf
-	movl	snext,%edx
-	movl	%ebx,sfracf
-	movl	tnext,%ebx
-	movl	%edx,s
-	movl	%ebx,t
-
-	movl	counttemp,%ecx		// retrieve count
-
-//
-// determine whether last span or not
-//
-	cmpl	$16,%ecx				// are there multiple segments remaining?
-	movb	%al,-1(%edi)
-	ja		LNotLastSegment		// yes
-
-//
-// last segment of scan
-//
-LLastSegment:
-
-//
-// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
-// get there. The number of pixels left is variable, and we want to land on the
-// last pixel, not step one past it, so we can't run into arithmetic problems
-//
-	testl	%ecx,%ecx
-	jz		LNoSteps		// just draw the last pixel and we're done
-
-// pick up after the FDIV that was left in flight previously
-
-
-	fld		%st(0)			// duplicate it
-	fmul	%st(4),%st(0)	// s = s/z * z
-	fxch	%st(1)
-	fmul	%st(3),%st(0)	// t = t/z * z
-	fxch	%st(1)
-	fistpl	snext
-	fistpl	tnext
-
-	movb	(%esi),%al		// load first texel in segment
-	movl	C(tadjust),%ebx
-	movb	%al,(%edi)		// store first pixel in segment
-	movl	C(sadjust),%eax
-
-	addl	snext,%eax
-	addl	tnext,%ebx
-
-	movl	C(bbextents),%ebp
-	movl	C(bbextentt),%edx
-
-	cmpl	$4096,%eax
-	jl		LClampLow4
-	cmpl	%ebp,%eax
-	ja		LClampHigh4
-LClampReentry4:
-	movl	%eax,snext
-
-	cmpl	$4096,%ebx
-	jl		LClampLow5
-	cmpl	%edx,%ebx
-	ja		LClampHigh5
-LClampReentry5:
-
-	cmpl	$1,%ecx			// don't bother 
-	je		LOnlyOneStep	// if two pixels in segment, there's only one step,
-							//  of the segment length
-	subl	s,%eax
-	subl	t,%ebx
-
-	addl	%eax,%eax		// convert to 15.17 format so multiply by 1.31
-	addl	%ebx,%ebx		//  reciprocal yields 16.48
-
-	imull	reciprocal_table_16-8(,%ecx,4)	// sstep = (snext - s) /
-											//  (spancount-1)
-	movl	%edx,%ebp
-
-	movl	%ebx,%eax
-	imull	reciprocal_table_16-8(,%ecx,4)	// tstep = (tnext - t) /
-											//  (spancount-1)
-LSetEntryvec:
-//
-// set up advancetable
-//
-	movl	entryvec_table_16(,%ecx,4),%ebx
-	movl	%edx,%eax
-	movl	%ebx,jumptemp		// entry point into code for RET later
-	movl	%ebp,%ecx
-	sarl	$16,%edx			// tstep >>= 16;
-	movl	C(cachewidth),%ebx
-	sarl	$16,%ecx			// sstep >>= 16;
-	imull	%ebx,%edx
-
-	addl	%ecx,%edx			// add in sstep
-								// (tstep >> 16) * cachewidth + (sstep >> 16);
-	movl	tfracf,%ecx
-	movl	%edx,advancetable+4	// advance base in t
-	addl	%ebx,%edx			// ((tstep >> 16) + 1) * cachewidth +
-								//  (sstep >> 16);
-	shll	$16,%ebp			// left-justify sstep fractional part
-	movl	sfracf,%ebx
-	shll	$16,%eax			// left-justify tstep fractional part
-	movl	%edx,advancetable	// advance extra in t
-
-	movl	%eax,tstep
-	movl	%ecx,%edx
-	addl	%eax,%edx
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	jmp		*jumptemp			// jump to the number-of-pixels handler
-
-//----------------------------------------
-
-LNoSteps:
-	movb	(%esi),%al		// load first texel in segment
-	subl	$15,%edi			// adjust for hardwired offset
-	jmp		LEndSpan
-
-
-LOnlyOneStep:
-	subl	s,%eax
-	subl	t,%ebx
-	movl	%eax,%ebp
-	movl	%ebx,%edx
-	jmp		LSetEntryvec
-
-//----------------------------------------
-
-.globl	Entry2_16, Entry3_16, Entry4_16, Entry5_16
-.globl	Entry6_16, Entry7_16, Entry8_16, Entry9_16
-.globl	Entry10_16, Entry11_16, Entry12_16, Entry13_16
-.globl	Entry14_16, Entry15_16, Entry16_16
-
-Entry2_16:
-	subl	$14,%edi		// adjust for hardwired offsets
-	movb	(%esi),%al
-	jmp		LEntry2_16
-
-//----------------------------------------
-
-Entry3_16:
-	subl	$13,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	jmp		LEntry3_16
-
-//----------------------------------------
-
-Entry4_16:
-	subl	$12,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LEntry4_16
-
-//----------------------------------------
-
-Entry5_16:
-	subl	$11,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LEntry5_16
-
-//----------------------------------------
-
-Entry6_16:
-	subl	$10,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LEntry6_16
-
-//----------------------------------------
-
-Entry7_16:
-	subl	$9,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LEntry7_16
-
-//----------------------------------------
-
-Entry8_16:
-	subl	$8,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LEntry8_16
-
-//----------------------------------------
-
-Entry9_16:
-	subl	$7,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LEntry9_16
-
-//----------------------------------------
-
-Entry10_16:
-	subl	$6,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LEntry10_16
-
-//----------------------------------------
-
-Entry11_16:
-	subl	$5,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LEntry11_16
-
-//----------------------------------------
-
-Entry12_16:
-	subl	$4,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LEntry12_16
-
-//----------------------------------------
-
-Entry13_16:
-	subl	$3,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LEntry13_16
-
-//----------------------------------------
-
-Entry14_16:
-	subl	$2,%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LEntry14_16
-
-//----------------------------------------
-
-Entry15_16:
-	decl	%edi		// adjust for hardwired offsets
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-	jmp		LEntry15_16
-
-//----------------------------------------
-
-Entry16_16:
-	addl	%eax,%edx
-	movb	(%esi),%al
-	sbbl	%ecx,%ecx
-	addl	%ebp,%ebx
-	adcl	advancetable+4(,%ecx,4),%esi
-
-	addl	tstep,%edx
-	sbbl	%ecx,%ecx
-	movb	%al,1(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LEntry15_16:
-	sbbl	%ecx,%ecx
-	movb	%al,2(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LEntry14_16:
-	sbbl	%ecx,%ecx
-	movb	%al,3(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LEntry13_16:
-	sbbl	%ecx,%ecx
-	movb	%al,4(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LEntry12_16:
-	sbbl	%ecx,%ecx
-	movb	%al,5(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LEntry11_16:
-	sbbl	%ecx,%ecx
-	movb	%al,6(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LEntry10_16:
-	sbbl	%ecx,%ecx
-	movb	%al,7(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LEntry9_16:
-	sbbl	%ecx,%ecx
-	movb	%al,8(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LEntry8_16:
-	sbbl	%ecx,%ecx
-	movb	%al,9(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LEntry7_16:
-	sbbl	%ecx,%ecx
-	movb	%al,10(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LEntry6_16:
-	sbbl	%ecx,%ecx
-	movb	%al,11(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LEntry5_16:
-	sbbl	%ecx,%ecx
-	movb	%al,12(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-	addl	tstep,%edx
-LEntry4_16:
-	sbbl	%ecx,%ecx
-	movb	%al,13(%edi)
-	addl	%ebp,%ebx
-	movb	(%esi),%al
-	adcl	advancetable+4(,%ecx,4),%esi
-LEntry3_16:
-	movb	%al,14(%edi)
-	movb	(%esi),%al
-LEntry2_16:
-
-LEndSpan:
-
-//
-// clear s/z, t/z, 1/z from FP stack
-//
-	fstp %st(0)
-	fstp %st(0)
-	fstp %st(0)
-
-	movl	pspantemp,%ebx				// restore spans pointer
-	movl	espan_t_pnext(%ebx),%ebx	// point to next span
-	testl	%ebx,%ebx			// any more spans?
-	movb	%al,15(%edi)
-	jnz		LSpanLoop			// more spans
-
-	popl	%ebx				// restore register variables
-	popl	%esi
-	popl	%edi
-	popl	%ebp				// restore the caller's stack frame
-	ret
-
-#endif	// id386
--- a/d_ifacea.h
+++ /dev/null
@@ -1,79 +1,0 @@
-//
-// d_ifacea.h
-//
-// Include file for asm driver interface.
-//
-
-//
-// !!! note that this file must match the corresponding C structures in
-// d_iface.h at all times !!!
-//
-
-// !!! if this is changed, it must be changed in r_shared.h too !!!
-#define ALIAS_ONSEAM				0x0020
-
-// !!! if this is changed, it must be changed in d_iface.h too !!!
-#define TURB_TEX_SIZE	64		// base turbulent texture size
-
-// !!! if this is changed, it must be changed in d_iface.h too !!!
-#define	CYCLE	128
-
-// !!! if this is changed, it must be changed in r_shared.h too !!!
-#define	MAXHEIGHT	1024
-
-// !!! if this is changed, it must be changed in quakedef.h too !!!
-#define CACHE_SIZE	32		// used to align key data structures
-
-// particle_t structure
-// !!! if this is changed, it must be changed in d_iface.h too !!!
-// driver-usable fields
-#define pt_org				0
-#define pt_color			12
-// drivers never touch the following fields
-#define pt_next				16
-#define pt_vel				20
-#define pt_ramp				32
-#define pt_die				36
-#define pt_type				40
-#define pt_size				44
-
-#define PARTICLE_Z_CLIP	8.0
-
-// finalvert_t structure
-// !!! if this is changed, it must be changed in d_iface.h too !!!
-#define fv_v				0	// !!! if this is moved, cases where the !!!
-								// !!! address of this field is pushed in !!!
-								// !!! d_polysa.s must be changed !!!
-#define fv_flags			24
-#define fv_reserved			28
-#define fv_size				32
-#define fv_shift			5
-
-
-// stvert_t structure
-// !!! if this is changed, it must be changed in modelgen.h too !!!
-#define stv_onseam	0
-#define stv_s		4
-#define stv_t		8
-#define stv_size	12
-
-
-// trivertx_t structure
-// !!! if this is changed, it must be changed in modelgen.h too !!!
-#define tv_v				0
-#define tv_lightnormalindex	3
-#define tv_size				4
-
-// affinetridesc_t structure
-// !!! if this is changed, it must be changed in d_iface.h too !!!
-#define atd_pskin			0
-#define atd_pskindesc		4
-#define atd_skinwidth		8
-#define atd_skinheight		12
-#define atd_ptriangles		16
-#define atd_pfinalverts		20
-#define atd_numtriangles	24
-#define atd_drawtype		28
-#define atd_seamfixupX16	32
-#define atd_size			36
-
--- a/d_parta.s
+++ /dev/null
@@ -1,458 +1,0 @@
-//
-// d_parta.s
-// x86 assembly-language 8-bpp particle-drawing code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "d_ifacea.h"
-#include "asm_draw.h"
-
-#ifdef	id386
-
-//----------------------------------------------------------------------
-// 8-bpp particle drawing code.
-//----------------------------------------------------------------------
-
-//FIXME: comments, full optimization
-
-//----------------------------------------------------------------------
-// 8-bpp particle queueing code.
-//----------------------------------------------------------------------
-
-	.text
-
-#define P	12+4
-
-	.align 4
-.globl C(D_DrawParticle)
-C(D_DrawParticle):
-	pushl	%ebp				// preserve caller's stack frame
-	pushl	%edi				// preserve register variables
-	pushl	%ebx
-
-	movl	P(%esp),%edi
-
-// FIXME: better FP overlap in general here
-
-// transform point
-//	VectorSubtract (p->org, r_origin, local);
-	flds	C(r_origin)
-	fsubrs	pt_org(%edi)
-	flds	pt_org+4(%edi)
-	fsubs	C(r_origin)+4
-	flds	pt_org+8(%edi)
-	fsubs	C(r_origin)+8
-	fxch	%st(2)			// local[0] | local[1] | local[2]
-
-//	transformed[2] = DotProduct(local, r_ppn);		
-	flds	C(r_ppn)		// r_ppn[0] | local[0] | local[1] | local[2]
-	fmul	%st(1),%st(0)	// dot0 | local[0] | local[1] | local[2]
-	flds	C(r_ppn)+4	// r_ppn[1] | dot0 | local[0] | local[1] | local[2]
-	fmul	%st(3),%st(0)	// dot1 | dot0 | local[0] | local[1] | local[2]
-	flds	C(r_ppn)+8	// r_ppn[2] | dot1 | dot0 | local[0] |
-						//  local[1] | local[2]
-	fmul	%st(5),%st(0)	// dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
-	fxch	%st(2)		// dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
-	faddp	%st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
-						  //  local[2]
-	faddp	%st(0),%st(1) // z | local[0] | local[1] | local[2]
-	fld		%st(0)		// z | z | local[0] | local[1] |
-						//  local[2]
-	fdivrs	float_1		// 1/z | z | local[0] | local[1] | local[2]
-	fxch	%st(1)		// z | 1/z | local[0] | local[1] | local[2]
-
-//	if (transformed[2] < PARTICLE_Z_CLIP)
-//		return;
-	fcomps	float_particle_z_clip	// 1/z | local[0] | local[1] | local[2]
-	fxch	%st(3)					// local[2] | local[0] | local[1] | 1/z
-
-	flds	C(r_pup)	// r_pup[0] | local[2] | local[0] | local[1] | 1/z
-	fmul	%st(2),%st(0)	// dot0 | local[2] | local[0] | local[1] | 1/z 
-	flds	C(r_pup)+4	// r_pup[1] | dot0 | local[2] | local[0] |
-						//  local[1] | 1/z 
-
-	fnstsw	%ax
-	testb	$1,%ah
-	jnz		LPop6AndDone
-
-//	transformed[1] = DotProduct(local, r_pup);
-	fmul	%st(4),%st(0)	// dot1 | dot0 | local[2] | local[0] | local[1] | 1/z 
-	flds	C(r_pup)+8	// r_pup[2] | dot1 | dot0 | local[2] |
-						//  local[0] | local[1] | 1/z 
-	fmul	%st(3),%st(0)	// dot2 | dot1 | dot0 | local[2] | local[0] |
-						//  local[1] | 1/z 
-	fxch	%st(2)		// dot0 | dot1 | dot2 | local[2] | local[0] |
-						//  local[1] | 1/z 
-	faddp	%st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
-						//  local[1] | 1/z 
-	faddp	%st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z 
-	fxch	%st(3)		// local[1] | local[2] | local[0] | y | 1/z 
-
-//	transformed[0] = DotProduct(local, r_pright);
-	fmuls	C(r_pright)+4	// dot1 | local[2] | local[0] | y | 1/z
-	fxch	%st(2)		// local[0] | local[2] | dot1 | y | 1/z
-	fmuls	C(r_pright)	// dot0 | local[2] | dot1 | y | 1/z
-	fxch	%st(1)		// local[2] | dot0 | dot1 | y | 1/z
-	fmuls	C(r_pright)+8	// dot2 | dot0 | dot1 | y | 1/z
-	fxch	%st(2)		// dot1 | dot0 | dot2 | y | 1/z
-	faddp	%st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z
-
-	faddp	%st(0),%st(1)	// x | y | 1/z
-	fxch	%st(1)			// y | x | 1/z
-
-// project the point
-	fmul	%st(2),%st(0)	// y/z | x | 1/z
-	fxch	%st(1)			// x | y/z | 1/z
-	fmul	%st(2),%st(0)	// x/z | y/z | 1/z
-	fxch	%st(1)			// y/z | x/z | 1/z
-	fsubrs	C(ycenter)		// v | x/z | 1/z
-	fxch	%st(1)			// x/z | v | 1/z
-	fadds	C(xcenter)		// u | v | 1/z
-// FIXME: preadjust xcenter and ycenter
-	fxch	%st(1)			// v | u | 1/z
-	fadds	float_point5	// v | u | 1/z
-	fxch	%st(1)			// u | v | 1/z
-	fadds	float_point5	// u | v | 1/z
-	fxch	%st(2)			// 1/z | v | u
-	fmuls	DP_32768		// 1/z * 0x8000 | v | u
-	fxch	%st(2)			// u | v | 1/z * 0x8000
-
-// FIXME: use Terje's fp->int trick here?
-// FIXME: check we're getting proper rounding here
-	fistpl	DP_u			// v | 1/z * 0x8000
-	fistpl	DP_v			// 1/z * 0x8000
-
-	movl	DP_u,%eax
-	movl	DP_v,%edx
-
-// if ((v > d_vrectbottom_particle) || 
-// 	(u > d_vrectright_particle) ||
-// 	(v < d_vrecty) ||
-// 	(u < d_vrectx))
-// {
-// 	continue;
-// }
-
-	movl	C(d_vrectbottom_particle),%ebx
-	movl	C(d_vrectright_particle),%ecx
-	cmpl	%ebx,%edx
-	jg		LPop1AndDone
-	cmpl	%ecx,%eax
-	jg		LPop1AndDone
-	movl	C(d_vrecty),%ebx
-	movl	C(d_vrectx),%ecx
-	cmpl	%ebx,%edx
-	jl		LPop1AndDone
-
-	cmpl	%ecx,%eax
-	jl		LPop1AndDone
-
-	flds	pt_color(%edi)	// color | 1/z * 0x8000
-// FIXME: use Terje's fast fp->int trick?
-	fistpl	DP_Color		// 1/z * 0x8000
-
-	movl	C(d_viewbuffer),%ebx
-
-	addl	%eax,%ebx
-	movl	C(d_scantable)(,%edx,4),%edi		// point to the pixel
-
-	imull	C(d_zrowbytes),%edx		// point to the z pixel
-
-	leal	(%edx,%eax,2),%edx
-	movl	C(d_pzbuffer),%eax
-
-	fistpl	izi
-
-	addl	%ebx,%edi
-	addl	%eax,%edx
-
-// pix = izi >> d_pix_shift;
-
-	movl	izi,%eax
-	movl	C(d_pix_shift),%ecx
-	shrl	%cl,%eax
-	movl	izi,%ebp
-
-// if (pix < d_pix_min)
-// 		pix = d_pix_min;
-// else if (pix > d_pix_max)
-//  	pix = d_pix_max;
-
-	movl	C(d_pix_min),%ebx
-	movl	C(d_pix_max),%ecx
-	cmpl	%ebx,%eax
-	jnl		LTestPixMax
-	movl	%ebx,%eax
-	jmp		LTestDone
-
-LTestPixMax:
-	cmpl	%ecx,%eax
-	jng		LTestDone
-	movl	%ecx,%eax
-LTestDone:
-
-	movb	DP_Color,%ch
-
-	movl	C(d_y_aspect_shift),%ebx
-	testl	%ebx,%ebx
-	jnz		LDefault
-
-	cmpl	$4,%eax
-	ja		LDefault
-
-	jmp		DP_EntryTable-4(,%eax,4)
-
-// 1x1
-.globl	DP_1x1
-DP_1x1:
-	cmpw	%bp,(%edx)		// just one pixel to do
-	jg		LDone
-	movw	%bp,(%edx)
-	movb	%ch,(%edi)
-	jmp		LDone
-
-// 2x2
-.globl	DP_2x2
-DP_2x2:
-	pushl	%esi
-	movl	C(screenwidth),%ebx
-	movl	C(d_zrowbytes),%esi
-
-	cmpw	%bp,(%edx)
-	jg		L2x2_1
-	movw	%bp,(%edx)
-	movb	%ch,(%edi)
-L2x2_1:
-	cmpw	%bp,2(%edx)
-	jg		L2x2_2
-	movw	%bp,2(%edx)
-	movb	%ch,1(%edi)
-L2x2_2:
-	cmpw	%bp,(%edx,%esi,1)
-	jg		L2x2_3
-	movw	%bp,(%edx,%esi,1)
-	movb	%ch,(%edi,%ebx,1)
-L2x2_3:
-	cmpw	%bp,2(%edx,%esi,1)
-	jg		L2x2_4
-	movw	%bp,2(%edx,%esi,1)
-	movb	%ch,1(%edi,%ebx,1)
-L2x2_4:
-
-	popl	%esi
-	jmp		LDone
-
-// 3x3
-.globl	DP_3x3
-DP_3x3:
-	pushl	%esi
-	movl	C(screenwidth),%ebx
-	movl	C(d_zrowbytes),%esi
-
-	cmpw	%bp,(%edx)
-	jg		L3x3_1
-	movw	%bp,(%edx)
-	movb	%ch,(%edi)
-L3x3_1:
-	cmpw	%bp,2(%edx)
-	jg		L3x3_2
-	movw	%bp,2(%edx)
-	movb	%ch,1(%edi)
-L3x3_2:
-	cmpw	%bp,4(%edx)
-	jg		L3x3_3
-	movw	%bp,4(%edx)
-	movb	%ch,2(%edi)
-L3x3_3:
-
-	cmpw	%bp,(%edx,%esi,1)
-	jg		L3x3_4
-	movw	%bp,(%edx,%esi,1)
-	movb	%ch,(%edi,%ebx,1)
-L3x3_4:
-	cmpw	%bp,2(%edx,%esi,1)
-	jg		L3x3_5
-	movw	%bp,2(%edx,%esi,1)
-	movb	%ch,1(%edi,%ebx,1)
-L3x3_5:
-	cmpw	%bp,4(%edx,%esi,1)
-	jg		L3x3_6
-	movw	%bp,4(%edx,%esi,1)
-	movb	%ch,2(%edi,%ebx,1)
-L3x3_6:
-
-	cmpw	%bp,(%edx,%esi,2)
-	jg		L3x3_7
-	movw	%bp,(%edx,%esi,2)
-	movb	%ch,(%edi,%ebx,2)
-L3x3_7:
-	cmpw	%bp,2(%edx,%esi,2)
-	jg		L3x3_8
-	movw	%bp,2(%edx,%esi,2)
-	movb	%ch,1(%edi,%ebx,2)
-L3x3_8:
-	cmpw	%bp,4(%edx,%esi,2)
-	jg		L3x3_9
-	movw	%bp,4(%edx,%esi,2)
-	movb	%ch,2(%edi,%ebx,2)
-L3x3_9:
-
-	popl	%esi
-	jmp		LDone
-
-
-// 4x4
-.globl	DP_4x4
-DP_4x4:
-	pushl	%esi
-	movl	C(screenwidth),%ebx
-	movl	C(d_zrowbytes),%esi
-
-	cmpw	%bp,(%edx)
-	jg		L4x4_1
-	movw	%bp,(%edx)
-	movb	%ch,(%edi)
-L4x4_1:
-	cmpw	%bp,2(%edx)
-	jg		L4x4_2
-	movw	%bp,2(%edx)
-	movb	%ch,1(%edi)
-L4x4_2:
-	cmpw	%bp,4(%edx)
-	jg		L4x4_3
-	movw	%bp,4(%edx)
-	movb	%ch,2(%edi)
-L4x4_3:
-	cmpw	%bp,6(%edx)
-	jg		L4x4_4
-	movw	%bp,6(%edx)
-	movb	%ch,3(%edi)
-L4x4_4:
-
-	cmpw	%bp,(%edx,%esi,1)
-	jg		L4x4_5
-	movw	%bp,(%edx,%esi,1)
-	movb	%ch,(%edi,%ebx,1)
-L4x4_5:
-	cmpw	%bp,2(%edx,%esi,1)
-	jg		L4x4_6
-	movw	%bp,2(%edx,%esi,1)
-	movb	%ch,1(%edi,%ebx,1)
-L4x4_6:
-	cmpw	%bp,4(%edx,%esi,1)
-	jg		L4x4_7
-	movw	%bp,4(%edx,%esi,1)
-	movb	%ch,2(%edi,%ebx,1)
-L4x4_7:
-	cmpw	%bp,6(%edx,%esi,1)
-	jg		L4x4_8
-	movw	%bp,6(%edx,%esi,1)
-	movb	%ch,3(%edi,%ebx,1)
-L4x4_8:
-
-	leal	(%edx,%esi,2),%edx
-	leal	(%edi,%ebx,2),%edi
-
-	cmpw	%bp,(%edx)
-	jg		L4x4_9
-	movw	%bp,(%edx)
-	movb	%ch,(%edi)
-L4x4_9:
-	cmpw	%bp,2(%edx)
-	jg		L4x4_10
-	movw	%bp,2(%edx)
-	movb	%ch,1(%edi)
-L4x4_10:
-	cmpw	%bp,4(%edx)
-	jg		L4x4_11
-	movw	%bp,4(%edx)
-	movb	%ch,2(%edi)
-L4x4_11:
-	cmpw	%bp,6(%edx)
-	jg		L4x4_12
-	movw	%bp,6(%edx)
-	movb	%ch,3(%edi)
-L4x4_12:
-
-	cmpw	%bp,(%edx,%esi,1)
-	jg		L4x4_13
-	movw	%bp,(%edx,%esi,1)
-	movb	%ch,(%edi,%ebx,1)
-L4x4_13:
-	cmpw	%bp,2(%edx,%esi,1)
-	jg		L4x4_14
-	movw	%bp,2(%edx,%esi,1)
-	movb	%ch,1(%edi,%ebx,1)
-L4x4_14:
-	cmpw	%bp,4(%edx,%esi,1)
-	jg		L4x4_15
-	movw	%bp,4(%edx,%esi,1)
-	movb	%ch,2(%edi,%ebx,1)
-L4x4_15:
-	cmpw	%bp,6(%edx,%esi,1)
-	jg		L4x4_16
-	movw	%bp,6(%edx,%esi,1)
-	movb	%ch,3(%edi,%ebx,1)
-L4x4_16:
-
-	popl	%esi
-	jmp		LDone
-
-// default case, handling any size particle
-LDefault:
-
-// count = pix << d_y_aspect_shift;
-
-	movl	%eax,%ebx
-	movl	%eax,DP_Pix
-	movb	C(d_y_aspect_shift),%cl
-	shll	%cl,%ebx
-
-// for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
-// {
-// 	for (i=0 ; i<pix ; i++)
-// 	{
-// 		if (pz[i] <= izi)
-// 		{
-// 			pz[i] = izi;
-// 			pdest[i] = color;
-// 		}
-// 	}
-// }
-
-LGenRowLoop:
-	movl	DP_Pix,%eax
-
-LGenColLoop:
-	cmpw	%bp,-2(%edx,%eax,2)
-	jg		LGSkip
-	movw	%bp,-2(%edx,%eax,2)
-	movb	%ch,-1(%edi,%eax,1)
-LGSkip:
-	decl	%eax			// --pix
-	jnz		LGenColLoop
-
-	addl	C(d_zrowbytes),%edx
-	addl	C(screenwidth),%edi
-
-	decl	%ebx			// --count
-	jnz		LGenRowLoop
-
-LDone:
-	popl	%ebx				// restore register variables
-	popl	%edi
-	popl	%ebp				// restore the caller's stack frame
-	ret
-
-LPop6AndDone:
-	fstp	%st(0)
-	fstp	%st(0)
-	fstp	%st(0)
-	fstp	%st(0)
-	fstp	%st(0)
-LPop1AndDone:
-	fstp	%st(0)
-	jmp		LDone
-
-#endif	// id386
--- a/d_polysa.s
+++ /dev/null
@@ -1,1723 +1,0 @@
-//
-// d_polysa.s
-// x86 assembly-language polygon model drawing code
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef	id386
-
-// !!! if this is changed, it must be changed in d_polyse.c too !!!
-#define DPS_MAXSPANS			MAXHEIGHT+1	
-									// 1 extra for spanpackage that marks end
-
-//#define	SPAN_SIZE	(((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
-#define SPAN_SIZE (1024+1+1+1)*32
-
-
-	.data
-
-	.align	4
-p10_minus_p20:	.single		0
-p01_minus_p21:	.single		0
-temp0:			.single		0
-temp1:			.single		0
-Ltemp:			.single		0
-
-aff8entryvec_table:	.long	LDraw8, LDraw7, LDraw6, LDraw5
-				.long	LDraw4, LDraw3, LDraw2, LDraw1
-
-lzistepx:		.long	0
-
-
-	.text
-
-	.extern C(D_PolysetSetEdgeTable)
-	.extern C(D_RasterizeAliasPolySmooth)
-
-//----------------------------------------------------------------------
-// affine triangle gradient calculation code
-//----------------------------------------------------------------------
-
-#define skinwidth	4+0
-
-.globl C(D_PolysetCalcGradients)
-C(D_PolysetCalcGradients):
-
-//	p00_minus_p20 = r_p0[0] - r_p2[0];
-//	p01_minus_p21 = r_p0[1] - r_p2[1];
-//	p10_minus_p20 = r_p1[0] - r_p2[0];
-//	p11_minus_p21 = r_p1[1] - r_p2[1];
-//
-//	xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
-//			     p00_minus_p20 * p11_minus_p21);
-//
-//	ystepdenominv = -xstepdenominv;
-
-	fildl	C(r_p0)+0		// r_p0[0]
-	fildl	C(r_p2)+0		// r_p2[0] | r_p0[0]
-	fildl	C(r_p0)+4		// r_p0[1] | r_p2[0] | r_p0[0]
-	fildl	C(r_p2)+4		// r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
-	fildl	C(r_p1)+0		// r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
-	fildl	C(r_p1)+4		// r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
-							//  r_p2[0] | r_p0[0]
-	fxch	%st(3)			// r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
-							//  r_p2[0] | r_p0[0]
-	fsub	%st(2),%st(0)	// p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
-							//  r_p2[0] | r_p0[0]
-	fxch	%st(1)			// r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
-							//  r_p2[0] | r_p0[0]
-	fsub	%st(4),%st(0)	// p10_minus_p20 | p01_minus_p21 | r_p2[1] |
-							//  r_p1[1] | r_p2[0] | r_p0[0]
-	fxch	%st(5)			// r_p0[0] | p01_minus_p21 | r_p2[1] |
-							//  r_p1[1] | r_p2[0] | p10_minus_p20
-	fsubp	%st(0),%st(4)	// p01_minus_p21 | r_p2[1] | r_p1[1] |
-							//  p00_minus_p20 | p10_minus_p20
-	fxch	%st(2)			// r_p1[1] | r_p2[1] | p01_minus_p21 |
-							//  p00_minus_p20 | p10_minus_p20
-	fsubp	%st(0),%st(1)	// p11_minus_p21 | p01_minus_p21 |
-							//  p00_minus_p20 | p10_minus_p20
-	fxch	%st(1)			// p01_minus_p21 | p11_minus_p21 |
-							//  p00_minus_p20 | p10_minus_p20
-	flds	C(d_xdenom)		// d_xdenom | p01_minus_p21 | p11_minus_p21 |
-							//  p00_minus_p20 | p10_minus_p20
-	fxch	%st(4)			// p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
-							//  p00_minus_p20 | d_xdenom
-	fstps	p10_minus_p20	// p01_minus_p21 | p11_minus_p21 |
-							//  p00_minus_p20 | d_xdenom
-	fstps	p01_minus_p21	// p11_minus_p21 | p00_minus_p20 | xstepdenominv
-	fxch	%st(2)			// xstepdenominv | p00_minus_p20 | p11_minus_p21
-
-//// ceil () for light so positive steps are exaggerated, negative steps
-//// diminished,  pushing us away from underflow toward overflow. Underflow is
-//// very visible, overflow is very unlikely, because of ambient lighting
-//	t0 = r_p0[4] - r_p2[4];
-//	t1 = r_p1[4] - r_p2[4];
-
-	fildl	C(r_p2)+16		// r_p2[4] | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fildl	C(r_p0)+16		// r_p0[4] | r_p2[4] | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fildl	C(r_p1)+16		// r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fxch	%st(2)			// r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fld		%st(0)			// r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fsubrp	%st(0),%st(2)	// r_p2[4] | t0 | r_p1[4] | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fsubrp	%st(0),%st(2)	// t0 | t1 | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-
-//	r_lstepx = (int)
-//			ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
-//	r_lstepy = (int)
-//			ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
-
-	fld		%st(0)			// t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fmul	%st(5),%st(0)	// t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
-							//  t0*p11_minus_p21 | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
-							//  t0*p11_minus_p21 | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fmul	%st(5),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
-							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
-							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
-							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
-							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fld		%st(2)			// xstepdenominv |
-							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
-							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fmuls	float_minus_1	// ystepdenominv |
-							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
-							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fxch	%st(2)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
-							//   xstepdenominv |
-							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
-							//   | ystepdenominv | xstepdenominv |
-							//   p00_minus_p20 | p11_minus_p21
-	fxch	%st(1)			// t1*p00_minus_p20 - t0*p10_minus_p20 |
-							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
-							//   xstepdenominv | ystepdenominv |
-							//   xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
-							//  ystepdenominv |
-							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
-							//  xstepdenominv | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fldcw	ceil_cw
-	fistpl	C(r_lstepy)		// r_lstepx | ystepdenominv | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fistpl	C(r_lstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fldcw	single_cw
-
-//	t0 = r_p0[2] - r_p2[2];
-//	t1 = r_p1[2] - r_p2[2];
-
-	fildl	C(r_p2)+8		// r_p2[2] | ystepdenominv | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fildl	C(r_p0)+8		// r_p0[2] | r_p2[2] | ystepdenominv |
-							//   xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fildl	C(r_p1)+8		// r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fxch	%st(2)			// r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fld		%st(0)			// r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fsubrp	%st(0),%st(2)	// r_p2[2] | t0 | r_p1[2] | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-
-//	r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
-//			xstepdenominv);
-//	r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
-//			ystepdenominv);
-
-	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv
-	fmul	%st(6),%st(0)	// t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
-							//  t0*p11_minus_p21 | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
-							//  t0*p11_minus_p21 | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fmul	%st(6),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
-							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
-							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
-							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
-							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
-							//   ystepdenominv |
-							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fxch	%st(1)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
-							//   ystepdenominv | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
-							//  xstepdenominv |
-							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
-							//  ystepdenominv | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fxch	%st(1)			// (t1*p00_minus_p20 - t0*p10_minus_p20)*
-							//  ystepdenominv |
-							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
-							//  xstepdenominv | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fistpl	C(r_sstepy)		// r_sstepx | ystepdenominv | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fistpl	C(r_sstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-
-//	t0 = r_p0[3] - r_p2[3];
-//	t1 = r_p1[3] - r_p2[3];
-
-	fildl	C(r_p2)+12		// r_p2[3] | ystepdenominv | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fildl	C(r_p0)+12		// r_p0[3] | r_p2[3] | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fildl	C(r_p1)+12		// r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fxch	%st(2)			// r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fld		%st(0)			// r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fsubrp	%st(0),%st(2)	// r_p2[3] | t0 | r_p1[3] | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-
-//	r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
-//			xstepdenominv);
-//	r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
-//			ystepdenominv);
-
-	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fmul	%st(6),%st(0)	// t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
-							//  t0*p11_minus_p21 | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
-							//  t0*p11_minus_p21 | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fmul	%st(6),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
-							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
-							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
-							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
-							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
-							//   ystepdenominv |
-							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fxch	%st(1)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
-							//  ystepdenominv | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
-							//  xstepdenominv |
-							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
-							//  ystepdenominv | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fxch	%st(1)			// (t1*p00_minus_p20 - t0*p10_minus_p20)*
-							//  ystepdenominv |
-							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
-							//  xstepdenominv | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fistpl	C(r_tstepy)		// r_tstepx | ystepdenominv | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fistpl	C(r_tstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-
-//	t0 = r_p0[5] - r_p2[5];
-//	t1 = r_p1[5] - r_p2[5];
-
-	fildl	C(r_p2)+20		// r_p2[5] | ystepdenominv | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fildl	C(r_p0)+20		// r_p0[5] | r_p2[5] | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fildl	C(r_p1)+20		// r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fxch	%st(2)			// r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fld		%st(0)			// r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  p11_minus_p21
-	fsubrp	%st(0),%st(2)	// r_p2[5] | t0 | r_p1[5] | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
-	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-
-//	r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
-//			xstepdenominv);
-//	r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
-//			ystepdenominv);
-
-	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv |
-							//  p00_minus_p20 | p11_minus_p21
-	fmulp	%st(0),%st(6)	// t0 | t1 | ystepdenominv | xstepdenominv |
-							//  p00_minus_p20 | t0*p11_minus_p21
-	fxch	%st(1)			// t1 | t0 | ystepdenominv | xstepdenominv |
-							//  p00_minus_p20 | t0*p11_minus_p21
-	fld		%st(0)			// t1 | t1 | t0 | ystepdenominv | xstepdenominv |
-							//  p00_minus_p20 | t0*p11_minus_p21
-	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 |
-							//  t0*p11_minus_p21
-	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
-							//  xstepdenominv | p00_minus_p20 |
-							//  t0*p11_minus_p21
-	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  t0*p11_minus_p21
-	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
-							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
-							//  t0*p11_minus_p21
-	fmulp	%st(0),%st(5)	// t0*p10_minus_p20 | t1*p01_minus_p21 |
-							//  ystepdenominv | xstepdenominv |
-							//  t1*p00_minus_p20 | t0*p11_minus_p21
-	fxch	%st(5)			// t0*p11_minus_p21 | t1*p01_minus_p21 |
-							//  ystepdenominv | xstepdenominv |
-							//  t1*p00_minus_p20 | t0*p10_minus_p20
-	fsubrp	%st(0),%st(1)	// t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  ystepdenominv | xstepdenominv |
-							//  t1*p00_minus_p20 | t0*p10_minus_p20
-	fxch	%st(3)			// t1*p00_minus_p20 | ystepdenominv |
-							//  xstepdenominv |
-							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  t0*p10_minus_p20
-	fsubp	%st(0),%st(4)	// ystepdenominv | xstepdenominv |
-							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  t1*p00_minus_p20 - t0*p10_minus_p20
-	fxch	%st(1)			// xstepdenominv | ystepdenominv |
-							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
-							//  t1*p00_minus_p20 - t0*p10_minus_p20
-	fmulp	%st(0),%st(2)	// ystepdenominv |
-							//  (t1*p01_minus_p21 - t0*p11_minus_p21) *
-							//  xstepdenominv |
-							//  t1*p00_minus_p20 - t0*p10_minus_p20
-	fmulp	%st(0),%st(2)	// (t1*p01_minus_p21 - t0*p11_minus_p21) *
-							//  xstepdenominv |
-							//  (t1*p00_minus_p20 - t0*p10_minus_p20) *
-							//  ystepdenominv
-	fistpl	C(r_zistepx)	// (t1*p00_minus_p20 - t0*p10_minus_p20) *
-							//  ystepdenominv
-	fistpl	C(r_zistepy)
-
-//	a_sstepxfrac = r_sstepx << 16;
-//	a_tstepxfrac = r_tstepx << 16;
-//
-//	a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
-//			(r_sstepx >> 16);
-
-	movl	C(r_sstepx),%eax
-	movl	C(r_tstepx),%edx
-	shll	$16,%eax
-	shll	$16,%edx
-	movl	%eax,C(a_sstepxfrac)
-	movl	%edx,C(a_tstepxfrac)
-
-	movl	C(r_sstepx),%ecx
-	movl	C(r_tstepx),%eax
-	sarl	$16,%ecx
-	sarl	$16,%eax
-	imull	skinwidth(%esp)
-	addl	%ecx,%eax
-	movl	%eax,C(a_ststepxwhole)
-
-	ret
-
-
-//----------------------------------------------------------------------
-// recursive subdivision affine triangle drawing code
-//
-// not C-callable because of stdcall return
-//----------------------------------------------------------------------
-
-#define lp1	4+16
-#define lp2	8+16
-#define lp3	12+16
-
-.globl C(D_PolysetRecursiveTriangle)
-C(D_PolysetRecursiveTriangle):
-	pushl	%ebp				// preserve caller stack frame pointer
-	pushl	%esi				// preserve register variables
-	pushl	%edi
-	pushl	%ebx
-
-//	int		*temp;
-//	int		d;
-//	int		new[6];
-//	int		i;
-//	int		z;
-//	short	*zbuf;
-	movl	lp2(%esp),%esi
-	movl	lp1(%esp),%ebx
-	movl	lp3(%esp),%edi
-
-//	d = lp2[0] - lp1[0];
-//	if (d < -1 || d > 1)
-//		goto split;
-	movl	0(%esi),%eax
-
-	movl	0(%ebx),%edx
-	movl	4(%esi),%ebp
-
-	subl	%edx,%eax
-	movl	4(%ebx),%ecx
-
-	subl	%ecx,%ebp
-	incl	%eax
-
-	cmpl	$2,%eax
-	ja		LSplit
-
-//	d = lp2[1] - lp1[1];
-//	if (d < -1 || d > 1)
-//		goto split;
-	movl	0(%edi),%eax
-	incl	%ebp
-
-	cmpl	$2,%ebp
-	ja		LSplit
-
-//	d = lp3[0] - lp2[0];
-//	if (d < -1 || d > 1)
-//		goto split2;
-	movl	0(%esi),%edx
-	movl	4(%edi),%ebp
-
-	subl	%edx,%eax
-	movl	4(%esi),%ecx
-
-	subl	%ecx,%ebp
-	incl	%eax
-
-	cmpl	$2,%eax
-	ja		LSplit2
-
-//	d = lp3[1] - lp2[1];
-//	if (d < -1 || d > 1)
-//		goto split2;
-	movl	0(%ebx),%eax
-	incl	%ebp
-
-	cmpl	$2,%ebp
-	ja		LSplit2
-
-//	d = lp1[0] - lp3[0];
-//	if (d < -1 || d > 1)
-//		goto split3;
-	movl	0(%edi),%edx
-	movl	4(%ebx),%ebp
-
-	subl	%edx,%eax
-	movl	4(%edi),%ecx
-
-	subl	%ecx,%ebp
-	incl	%eax
-
-	incl	%ebp
-	movl	%ebx,%edx
-
-	cmpl	$2,%eax
-	ja		LSplit3
-
-//	d = lp1[1] - lp3[1];
-//	if (d < -1 || d > 1)
-//	{
-//split3:
-//		temp = lp1;
-//		lp3 = lp2;
-//		lp1 = lp3;
-//		lp2 = temp;
-//		goto split;
-//	}
-//
-//	return;			// entire tri is filled
-//
-	cmpl	$2,%ebp
-	jna		LDone
-
-LSplit3:
-	movl	%edi,%ebx
-	movl	%esi,%edi
-	movl	%edx,%esi
-	jmp		LSplit
-
-//split2:
-LSplit2:
-
-//	temp = lp1;
-//	lp1 = lp2;
-//	lp2 = lp3;
-//	lp3 = temp;
-	movl	%ebx,%eax
-	movl	%esi,%ebx
-	movl	%edi,%esi
-	movl	%eax,%edi
-
-//split:
-LSplit:
-
-	subl	$24,%esp		// allocate space for a new vertex
-
-//// split this edge
-//	new[0] = (lp1[0] + lp2[0]) >> 1;
-//	new[1] = (lp1[1] + lp2[1]) >> 1;
-//	new[2] = (lp1[2] + lp2[2]) >> 1;
-//	new[3] = (lp1[3] + lp2[3]) >> 1;
-//	new[5] = (lp1[5] + lp2[5]) >> 1;
-	movl	8(%ebx),%eax
-
-	movl	8(%esi),%edx
-	movl	12(%ebx),%ecx
-
-	addl	%edx,%eax
-	movl	12(%esi),%edx
-
-	sarl	$1,%eax
-	addl	%edx,%ecx
-
-	movl	%eax,8(%esp)
-	movl	20(%ebx),%eax
-
-	sarl	$1,%ecx
-	movl	20(%esi),%edx
-
-	movl	%ecx,12(%esp)
-	addl	%edx,%eax
-
-	movl	0(%ebx),%ecx
-	movl	0(%esi),%edx
-
-	sarl	$1,%eax
-	addl	%ecx,%edx
-
-	movl	%eax,20(%esp)
-	movl	4(%ebx),%eax
-
-	sarl	$1,%edx
-	movl	4(%esi),%ebp
-
-	movl	%edx,0(%esp)
-	addl	%eax,%ebp
-
-	sarl	$1,%ebp
-	movl	%ebp,4(%esp)
-
-//// draw the point if splitting a leading edge
-//	if (lp2[1] > lp1[1])
-//		goto nodraw;
-	cmpl	%eax,4(%esi)
-	jg		LNoDraw
-
-//	if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
-//		goto nodraw;
-	movl	0(%esi),%edx
-	jnz		LDraw
-
-	cmpl	%ecx,%edx
-	jl		LNoDraw
-
-LDraw:
-
-// z = new[5] >> 16;
-	movl	20(%esp),%edx
-	movl	4(%esp),%ecx
-
-	sarl	$16,%edx
-	movl	0(%esp),%ebp
-
-//	zbuf = zspantable[new[1]] + new[0];
-	movl	C(zspantable)(,%ecx,4),%eax
-
-//	if (z >= *zbuf)
-//	{
-	cmpw	(%eax,%ebp,2),%dx
-	jnge	LNoDraw
-
-//		int		pix;
-//		
-//		*zbuf = z;
-	movw	%dx,(%eax,%ebp,2)
-
-//		pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]];
-	movl	12(%esp),%eax
-
-	sarl	$16,%eax
-	movl	8(%esp),%edx
-
-	sarl	$16,%edx
-	subl	%ecx,%ecx
-
-	movl	C(skintable)(,%eax,4),%eax
-	movl	4(%esp),%ebp
-
-	movb	(%eax,%edx,),%cl
-	movl	C(d_pcolormap),%edx
-
-	movb	(%edx,%ecx,),%dl
-	movl	0(%esp),%ecx
-
-//		d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
-	movl	C(d_scantable)(,%ebp,4),%eax
-	addl	%eax,%ecx
-	movl	C(d_viewbuffer),%eax
-	movb	%dl,(%eax,%ecx,1)
-
-//	}
-//
-//nodraw:
-LNoDraw:
-
-//// recursively continue
-//	D_PolysetRecursiveTriangle (lp3, lp1, new);
-	pushl	%esp
-	pushl	%ebx
-	pushl	%edi
-	call	C(D_PolysetRecursiveTriangle)
-
-//	D_PolysetRecursiveTriangle (lp3, new, lp2);
-	movl	%esp,%ebx
-	pushl	%esi
-	pushl	%ebx
-	pushl	%edi
-	call	C(D_PolysetRecursiveTriangle)
-	addl	$24,%esp
-
-LDone:
-	popl	%ebx				// restore register variables
-	popl	%edi
-	popl	%esi
-	popl	%ebp				// restore caller stack frame pointer
-	ret		$12
-
-
-//----------------------------------------------------------------------
-// 8-bpp horizontal span drawing code for affine polygons, with smooth
-// shading and no transparency
-//----------------------------------------------------------------------
-
-#define pspans	4+8
-
-.globl C(D_PolysetAff8Start)
-C(D_PolysetAff8Start):
-
-.globl C(D_PolysetDrawSpans8)
-C(D_PolysetDrawSpans8):
-	pushl	%esi				// preserve register variables
-	pushl	%ebx
-
-	movl	pspans(%esp),%esi	// point to the first span descriptor
-	movl	C(r_zistepx),%ecx
-
-	pushl	%ebp				// preserve caller's stack frame
-	pushl	%edi
-
-	rorl	$16,%ecx			// put high 16 bits of 1/z step in low word
-	movl	spanpackage_t_count(%esi),%edx
-
-	movl	%ecx,lzistepx
-
-LSpanLoop:
-
-//		lcount = d_aspancount - pspanpackage->count;
-//
-//		errorterm += erroradjustup;
-//		if (errorterm >= 0)
-//		{
-//			d_aspancount += d_countextrastep;
-//			errorterm -= erroradjustdown;
-//		}
-//		else
-//		{
-//			d_aspancount += ubasestep;
-//		}
-	movl	C(d_aspancount),%eax
-	subl	%edx,%eax
-
-	movl	C(erroradjustup),%edx
-	movl	C(errorterm),%ebx
-	addl	%edx,%ebx
-	js		LNoTurnover
-
-	movl	C(erroradjustdown),%edx
-	movl	C(d_countextrastep),%edi
-	subl	%edx,%ebx
-	movl	C(d_aspancount),%ebp
-	movl	%ebx,C(errorterm)
-	addl	%edi,%ebp
-	movl	%ebp,C(d_aspancount)
-	jmp		LRightEdgeStepped
-
-LNoTurnover:
-	movl	C(d_aspancount),%edi
-	movl	C(ubasestep),%edx
-	movl	%ebx,C(errorterm)
-	addl	%edx,%edi
-	movl	%edi,C(d_aspancount)
-
-LRightEdgeStepped:
-	cmpl	$1,%eax
-
-	jl		LNextSpan
-	jz		LExactlyOneLong
-
-//
-// set up advancetable
-//
-	movl	C(a_ststepxwhole),%ecx
-	movl	C(r_affinetridesc)+atd_skinwidth,%edx
-
-	movl	%ecx,advancetable+4	// advance base in t
-	addl	%edx,%ecx
-
-	movl	%ecx,advancetable	// advance extra in t
-	movl	C(a_tstepxfrac),%ecx
-
-	movw	C(r_lstepx),%cx
-	movl	%eax,%edx			// count
-
-	movl	%ecx,tstep
-	addl	$7,%edx
-
-	shrl	$3,%edx				// count of full and partial loops
-	movl	spanpackage_t_sfrac(%esi),%ebx
-
-	movw	%dx,%bx
-	movl	spanpackage_t_pz(%esi),%ecx
-
-	negl	%eax
-
-	movl	spanpackage_t_pdest(%esi),%edi
-	andl	$7,%eax		// 0->0, 1->7, 2->6, ... , 7->1
-
-	subl	%eax,%edi	// compensate for hardwired offsets
-	subl	%eax,%ecx
-
-	subl	%eax,%ecx
-	movl	spanpackage_t_tfrac(%esi),%edx
-
-	movw	spanpackage_t_light(%esi),%dx
-	movl	spanpackage_t_zi(%esi),%ebp
-
-	rorl	$16,%ebp	// put high 16 bits of 1/z in low word
-	pushl	%esi
-
-	movl	spanpackage_t_ptex(%esi),%esi
-	jmp		aff8entryvec_table(,%eax,4)
-
-// %bx = count of full and partial loops
-// %ebx high word = sfrac
-// %ecx = pz
-// %dx = light
-// %edx high word = tfrac
-// %esi = ptex
-// %edi = pdest
-// %ebp = 1/z
-// tstep low word = C(r_lstepx)
-// tstep high word = C(a_tstepxfrac)
-// C(a_sstepxfrac) low word = 0
-// C(a_sstepxfrac) high word = C(a_sstepxfrac)
-
-LDrawLoop:
-
-// FIXME: do we need to clamp light? We may need at least a buffer bit to
-// keep it from poking into tfrac and causing problems
-
-LDraw8:
-	cmpw	(%ecx),%bp
-	jl		Lp1
-	xorl	%eax,%eax
-	movb	%dh,%ah
-	movb	(%esi),%al
-	movw	%bp,(%ecx)
-	movb	0x12345678(%eax),%al
-LPatch8:
-	movb	%al,(%edi)
-Lp1:
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	lzistepx,%ebp
-	adcl	$0,%ebp
-	addl	C(a_sstepxfrac),%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-LDraw7:
-	cmpw	2(%ecx),%bp
-	jl		Lp2
-	xorl	%eax,%eax
-	movb	%dh,%ah
-	movb	(%esi),%al
-	movw	%bp,2(%ecx)
-	movb	0x12345678(%eax),%al
-LPatch7:
-	movb	%al,1(%edi)
-Lp2:
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	lzistepx,%ebp
-	adcl	$0,%ebp
-	addl	C(a_sstepxfrac),%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-LDraw6:
-	cmpw	4(%ecx),%bp
-	jl		Lp3
-	xorl	%eax,%eax
-	movb	%dh,%ah
-	movb	(%esi),%al
-	movw	%bp,4(%ecx)
-	movb	0x12345678(%eax),%al
-LPatch6:
-	movb	%al,2(%edi)
-Lp3:
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	lzistepx,%ebp
-	adcl	$0,%ebp
-	addl	C(a_sstepxfrac),%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-LDraw5:
-	cmpw	6(%ecx),%bp
-	jl		Lp4
-	xorl	%eax,%eax
-	movb	%dh,%ah
-	movb	(%esi),%al
-	movw	%bp,6(%ecx)
-	movb	0x12345678(%eax),%al
-LPatch5:
-	movb	%al,3(%edi)
-Lp4:
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	lzistepx,%ebp
-	adcl	$0,%ebp
-	addl	C(a_sstepxfrac),%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-LDraw4:
-	cmpw	8(%ecx),%bp
-	jl		Lp5
-	xorl	%eax,%eax
-	movb	%dh,%ah
-	movb	(%esi),%al
-	movw	%bp,8(%ecx)
-	movb	0x12345678(%eax),%al
-LPatch4:
-	movb	%al,4(%edi)
-Lp5:
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	lzistepx,%ebp
-	adcl	$0,%ebp
-	addl	C(a_sstepxfrac),%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-LDraw3:
-	cmpw	10(%ecx),%bp
-	jl		Lp6
-	xorl	%eax,%eax
-	movb	%dh,%ah
-	movb	(%esi),%al
-	movw	%bp,10(%ecx)
-	movb	0x12345678(%eax),%al
-LPatch3:
-	movb	%al,5(%edi)
-Lp6:
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	lzistepx,%ebp
-	adcl	$0,%ebp
-	addl	C(a_sstepxfrac),%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-LDraw2:
-	cmpw	12(%ecx),%bp
-	jl		Lp7
-	xorl	%eax,%eax
-	movb	%dh,%ah
-	movb	(%esi),%al
-	movw	%bp,12(%ecx)
-	movb	0x12345678(%eax),%al
-LPatch2:
-	movb	%al,6(%edi)
-Lp7:
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	lzistepx,%ebp
-	adcl	$0,%ebp
-	addl	C(a_sstepxfrac),%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-LDraw1:
-	cmpw	14(%ecx),%bp
-	jl		Lp8
-	xorl	%eax,%eax
-	movb	%dh,%ah
-	movb	(%esi),%al
-	movw	%bp,14(%ecx)
-	movb	0x12345678(%eax),%al
-LPatch1:
-	movb	%al,7(%edi)
-Lp8:
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	lzistepx,%ebp
-	adcl	$0,%ebp
-	addl	C(a_sstepxfrac),%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-	addl	$8,%edi
-	addl	$16,%ecx
-
-	decw	%bx
-	jnz		LDrawLoop
-
-	popl	%esi				// restore spans pointer
-LNextSpan:
-	addl	$(spanpackage_t_size),%esi	// point to next span
-LNextSpanESISet:
-	movl	spanpackage_t_count(%esi),%edx
-	cmpl	$-999999,%edx		// any more spans?
-	jnz		LSpanLoop			// yes
-
-	popl	%edi
-	popl	%ebp				// restore the caller's stack frame
-	popl	%ebx				// restore register variables
-	popl	%esi
-	ret
-
-
-// draw a one-long span
-
-LExactlyOneLong:
-
-	movl	spanpackage_t_pz(%esi),%ecx
-	movl	spanpackage_t_zi(%esi),%ebp
-
-	rorl	$16,%ebp	// put high 16 bits of 1/z in low word
-	movl	spanpackage_t_ptex(%esi),%ebx
-
-	cmpw	(%ecx),%bp
-	jl		LNextSpan
-	xorl	%eax,%eax
-	movl	spanpackage_t_pdest(%esi),%edi
-	movb	spanpackage_t_light+1(%esi),%ah
-	addl	$(spanpackage_t_size),%esi	// point to next span
-	movb	(%ebx),%al
-	movw	%bp,(%ecx)
-	movb	0x12345678(%eax),%al
-LPatch9:
-	movb	%al,(%edi)
-
-	jmp		LNextSpanESISet
-
-.globl C(D_PolysetAff8End)
-C(D_PolysetAff8End):
-
-
-#define pcolormap		4
-
-.globl C(D_Aff8Patch)
-C(D_Aff8Patch):
-	movl	pcolormap(%esp),%eax
-	movl	%eax,LPatch1-4
-	movl	%eax,LPatch2-4
-	movl	%eax,LPatch3-4
-	movl	%eax,LPatch4-4
-	movl	%eax,LPatch5-4
-	movl	%eax,LPatch6-4
-	movl	%eax,LPatch7-4
-	movl	%eax,LPatch8-4
-	movl	%eax,LPatch9-4
-
-	ret
-
-
-//----------------------------------------------------------------------
-// Alias model polygon dispatching code, combined with subdivided affine
-// triangle drawing code
-//----------------------------------------------------------------------
-
-.globl C(D_PolysetDraw)
-C(D_PolysetDraw):
-
-//	spanpackage_t	spans[DPS_MAXSPANS + 1 +
-//			((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1];
-//						// one extra because of cache line pretouching
-//
-//	a_spans = (spanpackage_t *)
-//			(((intptr)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1));
-	subl	$(SPAN_SIZE),%esp
-	movl	%esp,%eax
-	addl	$(CACHE_SIZE - 1),%eax
-	andl	$(~(CACHE_SIZE - 1)),%eax
-	movl	%eax,C(a_spans)
-
-//	if (r_affinetridesc.drawtype)
-//		D_DrawSubdiv ();
-//	else
-//		D_DrawNonSubdiv ();
-	movl	C(r_affinetridesc)+atd_drawtype,%eax
-	testl	%eax,%eax
-	jz		C(D_DrawNonSubdiv)
-
-	pushl	%ebp				// preserve caller stack frame pointer
-
-//	lnumtriangles = r_affinetridesc.numtriangles;
-	movl	C(r_affinetridesc)+atd_numtriangles,%ebp
-
-	pushl	%esi				// preserve register variables
-	shll	$4,%ebp
-
-	pushl	%ebx
-//	ptri = r_affinetridesc.ptriangles;
-	movl	C(r_affinetridesc)+atd_ptriangles,%ebx
-
-	pushl	%edi
-
-//	mtriangle_t		*ptri;
-//	finalvert_t		*pfv, *index0, *index1, *index2;
-//	int				i;
-//	int				lnumtriangles;
-//	int				s0, s1, s2;
-
-//	pfv = r_affinetridesc.pfinalverts;
-	movl	C(r_affinetridesc)+atd_pfinalverts,%edi
-
-//	for (i=0 ; i<lnumtriangles ; i++)
-//	{
-
-Llooptop:
-
-//		index0 = pfv + ptri[i].vertindex[0];
-//		index1 = pfv + ptri[i].vertindex[1];
-//		index2 = pfv + ptri[i].vertindex[2];
-	movl	mtri_vertindex-16+0(%ebx,%ebp,),%ecx
-	movl	mtri_vertindex-16+4(%ebx,%ebp,),%esi
-
-	shll	$(fv_shift),%ecx
-	movl	mtri_vertindex-16+8(%ebx,%ebp,),%edx
-
-	shll	$(fv_shift),%esi
-	addl	%edi,%ecx
-
-	shll	$(fv_shift),%edx
-	addl	%edi,%esi
-
-	addl	%edi,%edx
-
-//		if (((index0->v[1]-index1->v[1]) *
-//				(index0->v[0]-index2->v[0]) -
-//				(index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0)
-//		{
-//			continue;
-//		}
-//
-//		d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00];
-	fildl	fv_v+4(%ecx)	// i0v1
-	fildl	fv_v+4(%esi)	// i1v1 | i0v1
-	fildl	fv_v+0(%ecx)	// i0v0 | i1v1 | i0v1
-	fildl	fv_v+0(%edx)	// i2v0 | i0v0 | i1v1 | i0v1
-	fxch	%st(2)			// i1v1 | i0v0 | i2v0 | i0v1
-	fsubr	%st(3),%st(0)	// i0v1-i1v1 | i0v0 | i2v0 | i0v1
-	fildl	fv_v+0(%esi)	// i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1
-	fxch	%st(2)			// i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1
-	fsub	%st(0),%st(3)	// i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1
-	fildl	fv_v+4(%edx)	// i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
-	fxch	%st(1)			// i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
-	fsubp	%st(0),%st(3)	// i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
-	fxch	%st(1)			// i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
-	fmulp	%st(0),%st(3)	// i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1
-	fsubrp	%st(0),%st(3)	// i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1
-	movl	fv_v+16(%ecx),%eax
-	andl	$0xFF00,%eax
-	fmulp	%st(0),%st(2)	// i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1
-	addl	C(acolormap),%eax
-	fsubp	%st(0),%st(1)	// (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1)
-	movl	%eax,C(d_pcolormap)
-	fstps	Ltemp
-	movl	Ltemp,%eax
-	subl	$0x80000001,%eax
-	jc		Lskip
-
-//		if (ptri[i].facesfront)
-//		{
-//			D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
-	movl	mtri_facesfront-16(%ebx,%ebp,),%eax
-	testl	%eax,%eax
-	jz		Lfacesback
-
-	pushl	%edx
-	pushl	%esi
-	pushl	%ecx
-	call	C(D_PolysetRecursiveTriangle)
-
-	subl	$16,%ebp
-	jnz		Llooptop
-	jmp		Ldone2
-
-//		}
-//		else
-//		{
-Lfacesback:
-
-//			s0 = index0->v[2];
-//			s1 = index1->v[2];
-//			s2 = index2->v[2];
-	movl	fv_v+8(%ecx),%eax
-	pushl	%eax
-	movl	fv_v+8(%esi),%eax
-	pushl	%eax
-	movl	fv_v+8(%edx),%eax
-	pushl	%eax
-	pushl	%ecx
-	pushl	%edx
-
-//			if (index0->flags & ALIAS_ONSEAM)
-//				index0->v[2] += r_affinetridesc.seamfixupX16;
-	movl	C(r_affinetridesc)+atd_seamfixupX16,%eax
-	testl	$(ALIAS_ONSEAM),fv_flags(%ecx)
-	jz		Lp11
-	addl	%eax,fv_v+8(%ecx)
-Lp11:
-
-//			if (index1->flags & ALIAS_ONSEAM)
-//				index1->v[2] += r_affinetridesc.seamfixupX16;
-	testl	$(ALIAS_ONSEAM),fv_flags(%esi)
-	jz		Lp12
-	addl	%eax,fv_v+8(%esi)
-Lp12:
-
-//			if (index2->flags & ALIAS_ONSEAM)
-//				index2->v[2] += r_affinetridesc.seamfixupX16;
-	testl	$(ALIAS_ONSEAM),fv_flags(%edx)
-	jz		Lp13
-	addl	%eax,fv_v+8(%edx)
-Lp13:
-
-//			D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
-	pushl	%edx
-	pushl	%esi
-	pushl	%ecx
-	call	C(D_PolysetRecursiveTriangle)
-
-//			index0->v[2] = s0;
-//			index1->v[2] = s1;
-//			index2->v[2] = s2;
-	popl	%edx
-	popl	%ecx
-	popl	%eax
-	movl	%eax,fv_v+8(%edx)
-	popl	%eax
-	movl	%eax,fv_v+8(%esi)
-	popl	%eax
-	movl	%eax,fv_v+8(%ecx)
-
-//		}
-//	}
-Lskip:
-	subl	$16,%ebp
-	jnz		Llooptop
-
-Ldone2:
-	popl	%edi				// restore the caller's stack frame
-	popl	%ebx
-	popl	%esi				// restore register variables
-	popl	%ebp
-
-	addl	$(SPAN_SIZE),%esp
-
-	ret
-
-
-//----------------------------------------------------------------------
-// Alias model triangle left-edge scanning code
-//----------------------------------------------------------------------
-
-#define height	4+16
-
-.globl C(D_PolysetScanLeftEdge)
-C(D_PolysetScanLeftEdge):
-	pushl	%ebp				// preserve caller stack frame pointer
-	pushl	%esi				// preserve register variables
-	pushl	%edi
-	pushl	%ebx
-
-	movl	height(%esp),%eax
-	movl	C(d_sfrac),%ecx
-	andl	$0xFFFF,%eax
-	movl	C(d_ptex),%ebx
-	orl		%eax,%ecx
-	movl	C(d_pedgespanpackage),%esi
-	movl	C(d_tfrac),%edx
-	movl	C(d_light),%edi
-	movl	C(d_zi),%ebp
-
-// %eax: scratch
-// %ebx: d_ptex
-// %ecx: d_sfrac in high word, count in low word
-// %edx: d_tfrac
-// %esi: d_pedgespanpackage, errorterm, scratch alternately
-// %edi: d_light
-// %ebp: d_zi
-
-//	do
-//	{
-
-LScanLoop:
-
-//		d_pedgespanpackage->ptex = ptex;
-//		d_pedgespanpackage->pdest = d_pdest;
-//		d_pedgespanpackage->pz = d_pz;
-//		d_pedgespanpackage->count = d_aspancount;
-//		d_pedgespanpackage->light = d_light;
-//		d_pedgespanpackage->zi = d_zi;
-//		d_pedgespanpackage->sfrac = d_sfrac << 16;
-//		d_pedgespanpackage->tfrac = d_tfrac << 16;
-	movl	%ebx,spanpackage_t_ptex(%esi)
-	movl	C(d_pdest),%eax
-	movl	%eax,spanpackage_t_pdest(%esi)
-	movl	C(d_pz),%eax
-	movl	%eax,spanpackage_t_pz(%esi)
-	movl	C(d_aspancount),%eax
-	movl	%eax,spanpackage_t_count(%esi)
-	movl	%edi,spanpackage_t_light(%esi)
-	movl	%ebp,spanpackage_t_zi(%esi)
-	movl	%ecx,spanpackage_t_sfrac(%esi)
-	movl	%edx,spanpackage_t_tfrac(%esi)
-
-// pretouch the next cache line
-	movb	spanpackage_t_size(%esi),%al
-
-//		d_pedgespanpackage++;
-	addl	$(spanpackage_t_size),%esi
-	movl	C(erroradjustup),%eax
-	movl	%esi,C(d_pedgespanpackage)
-
-//		errorterm += erroradjustup;
-	movl	C(errorterm),%esi
-	addl	%eax,%esi
-	movl	C(d_pdest),%eax
-
-//		if (errorterm >= 0)
-//		{
-	js		LNoLeftEdgeTurnover
-
-//			errorterm -= erroradjustdown;
-//			d_pdest += d_pdestextrastep;
-	subl	C(erroradjustdown),%esi
-	addl	C(d_pdestextrastep),%eax
-	movl	%esi,C(errorterm)
-	movl	%eax,C(d_pdest)
-
-//			d_pz += d_pzextrastep;
-//			d_aspancount += d_countextrastep;
-//			d_ptex += d_ptexextrastep;
-//			d_sfrac += d_sfracextrastep;
-//			d_ptex += d_sfrac >> 16;
-//			d_sfrac &= 0xFFFF;
-//			d_tfrac += d_tfracextrastep;
-	movl	C(d_pz),%eax
-	movl	C(d_aspancount),%esi
-	addl	C(d_pzextrastep),%eax
-	addl	C(d_sfracextrastep),%ecx
-	adcl	C(d_ptexextrastep),%ebx
-	addl	C(d_countextrastep),%esi
-	movl	%eax,C(d_pz)
-	movl	C(d_tfracextrastep),%eax
-	movl	%esi,C(d_aspancount)
-	addl	%eax,%edx
-
-//			if (d_tfrac & 0x10000)
-//			{
-	jnc		LSkip1
-
-//				d_ptex += r_affinetridesc.skinwidth;
-//				d_tfrac &= 0xFFFF;
-	addl	C(r_affinetridesc)+atd_skinwidth,%ebx
-
-//			}
-
-LSkip1:
-
-//			d_light += d_lightextrastep;
-//			d_zi += d_ziextrastep;
-	addl	C(d_lightextrastep),%edi
-	addl	C(d_ziextrastep),%ebp
-
-//		}
-	movl	C(d_pedgespanpackage),%esi
-	decl	%ecx
-	testl	$0xFFFF,%ecx
-	jnz		LScanLoop
-
-	popl	%ebx
-	popl	%edi
-	popl	%esi
-	popl	%ebp
-	ret
-
-//		else
-//		{
-
-LNoLeftEdgeTurnover:
-	movl	%esi,C(errorterm)
-
-//			d_pdest += d_pdestbasestep;
-	addl	C(d_pdestbasestep),%eax
-	movl	%eax,C(d_pdest)
-
-//			d_pz += d_pzbasestep;
-//			d_aspancount += ubasestep;
-//			d_ptex += d_ptexbasestep;
-//			d_sfrac += d_sfracbasestep;
-//			d_ptex += d_sfrac >> 16;
-//			d_sfrac &= 0xFFFF;
-	movl	C(d_pz),%eax
-	movl	C(d_aspancount),%esi
-	addl	C(d_pzbasestep),%eax
-	addl	C(d_sfracbasestep),%ecx
-	adcl	C(d_ptexbasestep),%ebx
-	addl	C(ubasestep),%esi
-	movl	%eax,C(d_pz)
-	movl	%esi,C(d_aspancount)
-
-//			d_tfrac += d_tfracbasestep;
-	movl	C(d_tfracbasestep),%esi
-	addl	%esi,%edx
-
-//			if (d_tfrac & 0x10000)
-//			{
-	jnc		LSkip2
-
-//				d_ptex += r_affinetridesc.skinwidth;
-//				d_tfrac &= 0xFFFF;
-	addl	C(r_affinetridesc)+atd_skinwidth,%ebx
-
-//			}
-
-LSkip2:
-
-//			d_light += d_lightbasestep;
-//			d_zi += d_zibasestep;
-	addl	C(d_lightbasestep),%edi
-	addl	C(d_zibasestep),%ebp
-
-//		}
-//	} while (--height);
-	movl	C(d_pedgespanpackage),%esi
-	decl	%ecx
-	testl	$0xFFFF,%ecx
-	jnz		LScanLoop
-
-	popl	%ebx
-	popl	%edi
-	popl	%esi
-	popl	%ebp
-	ret
-
-
-//----------------------------------------------------------------------
-// Alias model vertex drawing code
-//----------------------------------------------------------------------
-
-#define fv			4+8
-#define	numverts	8+8
-
-.globl C(D_PolysetDrawFinalVerts)
-C(D_PolysetDrawFinalVerts):
-	pushl	%ebp				// preserve caller stack frame pointer
-	pushl	%ebx
-
-//	int		i, z;
-//	short	*zbuf;
-
-	movl	numverts(%esp),%ecx
-	movl	fv(%esp),%ebx
-
-	pushl	%esi				// preserve register variables
-	pushl	%edi
-
-LFVLoop:
-
-//	for (i=0 ; i<numverts ; i++, fv++)
-//	{
-//	// valid triangle coordinates for filling can include the bottom and
-//	// right clip edges, due to the fill rule; these shouldn't be drawn
-//		if ((fv->v[0] < r_refdef.vrectright) &&
-//			(fv->v[1] < r_refdef.vrectbottom))
-//		{
-	movl	fv_v+0(%ebx),%eax
-	movl	C(r_refdef)+rd_vrectright,%edx
-	cmpl	%edx,%eax
-	jge		LNextVert
-	movl	fv_v+4(%ebx),%esi
-	movl	C(r_refdef)+rd_vrectbottom,%edx
-	cmpl	%edx,%esi
-	jge		LNextVert
-
-//			zbuf = zspantable[fv->v[1]] + fv->v[0];
-	movl	C(zspantable)(,%esi,4),%edi
-
-//			z = fv->v[5]>>16;
-	movl	fv_v+20(%ebx),%edx
-	shrl	$16,%edx
-
-//			if (z >= *zbuf)
-//			{
-//				int		pix;
-	cmpw	(%edi,%eax,2),%dx
-	jl		LNextVert
-
-//				*zbuf = z;
-	movw	%dx,(%edi,%eax,2)
-
-//				pix = skintable[fv->v[3]>>16][fv->v[2]>>16];
-	movl	fv_v+12(%ebx),%edi
-	shrl	$16,%edi
-	movl	C(skintable)(,%edi,4),%edi
-	movl	fv_v+8(%ebx),%edx
-	shrl	$16,%edx
-	movb	(%edi,%edx),%dl
-
-//				pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)];
-	movl	fv_v+16(%ebx),%edi
-	andl	$0xFF00,%edi
-	andl	$0x00FF,%edx
-	addl	%edx,%edi
-	movl	C(acolormap),%edx
-	movb	(%edx,%edi,1),%dl
-
-//				d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix;
-	movl	C(d_scantable)(,%esi,4),%edi
-	movl	C(d_viewbuffer),%esi
-	addl	%eax,%edi
-	movb	%dl,(%esi,%edi)
-
-//			}
-//		}
-//	}
-LNextVert:
-	addl	$(fv_size),%ebx
-	decl	%ecx
-	jnz		LFVLoop
-
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-
-
-//----------------------------------------------------------------------
-// Alias model non-subdivided polygon dispatching code
-//
-// not C-callable because of stack buffer cleanup
-//----------------------------------------------------------------------
-
-.globl C(D_DrawNonSubdiv)
-C(D_DrawNonSubdiv):
-	pushl	%ebp				// preserve caller stack frame pointer
-	movl	C(r_affinetridesc)+atd_numtriangles,%ebp
-	pushl	%ebx
-	shll	$(mtri_shift),%ebp
-	pushl	%esi				// preserve register variables
-	movl	C(r_affinetridesc)+atd_ptriangles,%esi
-	pushl	%edi
-
-//	mtriangle_t		*ptri;
-//	finalvert_t		*pfv, *index0, *index1, *index2;
-//	int				i;
-//	int				lnumtriangles;
-
-//	pfv = r_affinetridesc.pfinalverts;
-//	ptri = r_affinetridesc.ptriangles;
-//	lnumtriangles = r_affinetridesc.numtriangles;
-
-LNDLoop:
-
-//	for (i=0 ; i<lnumtriangles ; i++, ptri++)
-//	{
-//		index0 = pfv + ptri->vertindex[0];
-//		index1 = pfv + ptri->vertindex[1];
-//		index2 = pfv + ptri->vertindex[2];
-	movl	C(r_affinetridesc)+atd_pfinalverts,%edi
-	movl	mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx
-	shll	$(fv_shift),%ecx
-	movl	mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx
-	shll	$(fv_shift),%edx
-	movl	mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx
-	shll	$(fv_shift),%ebx
-	addl	%edi,%ecx
-	addl	%edi,%edx
-	addl	%edi,%ebx
-
-//		d_xdenom = (index0->v[1]-index1->v[1]) *
-//				(index0->v[0]-index2->v[0]) -
-//				(index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]);
-	movl	fv_v+4(%ecx),%eax
-	movl	fv_v+0(%ecx),%esi
-	subl	fv_v+4(%edx),%eax
-	subl	fv_v+0(%ebx),%esi
-	imull	%esi,%eax
-	movl	fv_v+0(%ecx),%esi
-	movl	fv_v+4(%ecx),%edi
-	subl	fv_v+0(%edx),%esi
-	subl	fv_v+4(%ebx),%edi
-	imull	%esi,%edi
-	subl	%edi,%eax
-
-//		if (d_xdenom >= 0)
-//		{
-//			continue;
-	jns		LNextTri
-
-//		}
-
-	movl	%eax,C(d_xdenom)
-	fildl	C(d_xdenom)
-
-//		r_p0[0] = index0->v[0];		// u
-//		r_p0[1] = index0->v[1];		// v
-//		r_p0[2] = index0->v[2];		// s
-//		r_p0[3] = index0->v[3];		// t
-//		r_p0[4] = index0->v[4];		// light
-//		r_p0[5] = index0->v[5];		// iz
-	movl	fv_v+0(%ecx),%eax
-	movl	fv_v+4(%ecx),%esi
-	movl	%eax,C(r_p0)+0
-	movl	%esi,C(r_p0)+4
-	movl	fv_v+8(%ecx),%eax
-	movl	fv_v+12(%ecx),%esi
-	movl	%eax,C(r_p0)+8
-	movl	%esi,C(r_p0)+12
-	movl	fv_v+16(%ecx),%eax
-	movl	fv_v+20(%ecx),%esi
-	movl	%eax,C(r_p0)+16
-	movl	%esi,C(r_p0)+20
-
-	fdivrs	float_1
-
-//		r_p1[0] = index1->v[0];
-//		r_p1[1] = index1->v[1];
-//		r_p1[2] = index1->v[2];
-//		r_p1[3] = index1->v[3];
-//		r_p1[4] = index1->v[4];
-//		r_p1[5] = index1->v[5];
-	movl	fv_v+0(%edx),%eax
-	movl	fv_v+4(%edx),%esi
-	movl	%eax,C(r_p1)+0
-	movl	%esi,C(r_p1)+4
-	movl	fv_v+8(%edx),%eax
-	movl	fv_v+12(%edx),%esi
-	movl	%eax,C(r_p1)+8
-	movl	%esi,C(r_p1)+12
-	movl	fv_v+16(%edx),%eax
-	movl	fv_v+20(%edx),%esi
-	movl	%eax,C(r_p1)+16
-	movl	%esi,C(r_p1)+20
-
-//		r_p2[0] = index2->v[0];
-//		r_p2[1] = index2->v[1];
-//		r_p2[2] = index2->v[2];
-//		r_p2[3] = index2->v[3];
-//		r_p2[4] = index2->v[4];
-//		r_p2[5] = index2->v[5];
-	movl	fv_v+0(%ebx),%eax
-	movl	fv_v+4(%ebx),%esi
-	movl	%eax,C(r_p2)+0
-	movl	%esi,C(r_p2)+4
-	movl	fv_v+8(%ebx),%eax
-	movl	fv_v+12(%ebx),%esi
-	movl	%eax,C(r_p2)+8
-	movl	%esi,C(r_p2)+12
-	movl	fv_v+16(%ebx),%eax
-	movl	fv_v+20(%ebx),%esi
-	movl	%eax,C(r_p2)+16
-	movl	C(r_affinetridesc)+atd_ptriangles,%edi
-	movl	%esi,C(r_p2)+20
-	movl	mtri_facesfront-mtri_size(%edi,%ebp,1),%eax
-
-//		if (!ptri->facesfront)
-//		{
-	testl	%eax,%eax
-	jnz		LFacesFront
-
-//			if (index0->flags & ALIAS_ONSEAM)
-//				r_p0[2] += r_affinetridesc.seamfixupX16;
-	movl	fv_flags(%ecx),%eax
-	movl	fv_flags(%edx),%esi
-	movl	fv_flags(%ebx),%edi
-	testl	$(ALIAS_ONSEAM),%eax
-	movl	C(r_affinetridesc)+atd_seamfixupX16,%eax
-	jz		LOnseamDone0
-	addl	%eax,C(r_p0)+8
-LOnseamDone0:
-
-//			if (index1->flags & ALIAS_ONSEAM)
-// 				r_p1[2] += r_affinetridesc.seamfixupX16;
-	testl	$(ALIAS_ONSEAM),%esi
-	jz		LOnseamDone1
-	addl	%eax,C(r_p1)+8
-LOnseamDone1:
-
-//			if (index2->flags & ALIAS_ONSEAM)
-//				r_p2[2] += r_affinetridesc.seamfixupX16;
-	testl	$(ALIAS_ONSEAM),%edi
-	jz		LOnseamDone2
-	addl	%eax,C(r_p2)+8
-LOnseamDone2:
-
-//		}
-
-LFacesFront:
-
-	fstps	C(d_xdenom)
-
-//		D_PolysetSetEdgeTable ();
-//		D_RasterizeAliasPolySmooth ();
-		call	C(D_PolysetSetEdgeTable)
-		call	C(D_RasterizeAliasPolySmooth)
-
-LNextTri:
-		movl	C(r_affinetridesc)+atd_ptriangles,%esi
-		subl	$16,%ebp
-		jnz		LNDLoop
-//	}
-
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-
-	addl	$(SPAN_SIZE),%esp
-
-	ret
-
-
-#endif	// id386
-
--- a/d_scana.s
+++ /dev/null
@@ -1,70 +1,0 @@
-//
-// d_scana.s
-// x86 assembly-language turbulent texture mapping code
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
-	.data
-
-	.text
-
-//----------------------------------------------------------------------
-// turbulent texture mapping code
-//----------------------------------------------------------------------
-
-	.align 4
-.globl C(D_DrawTurbulent8Span)
-C(D_DrawTurbulent8Span):
-	pushl	%ebp				// preserve caller's stack frame pointer
-	pushl	%esi				// preserve register variables
-	pushl	%edi
-	pushl	%ebx
-
-	movl	C(r_turb_s),%esi
-	movl	C(r_turb_t),%ecx
-	movl	C(r_turb_pdest),%edi
-	movl	C(r_turb_spancount),%ebx
-
-Llp:
-	movl	%ecx,%eax
-	movl	%esi,%edx
-	sarl	$16,%eax
-	movl	C(r_turb_turb),%ebp
-	sarl	$16,%edx
-	andl	$(CYCLE-1),%eax
-	andl	$(CYCLE-1),%edx
-	movl	(%ebp,%eax,4),%eax
-	movl	(%ebp,%edx,4),%edx
-	addl	%esi,%eax
-	sarl	$16,%eax
-	addl	%ecx,%edx
-	sarl	$16,%edx
-	andl	$(TURB_TEX_SIZE-1),%eax
-	andl	$(TURB_TEX_SIZE-1),%edx
-	shll	$6,%edx
-	movl	C(r_turb_pbase),%ebp
-	addl	%eax,%edx
-	incl	%edi
-	addl	C(r_turb_sstep),%esi
-	addl	C(r_turb_tstep),%ecx
-	movb	(%ebp,%edx,1),%dl
-	decl	%ebx
-	movb	%dl,-1(%edi)
-	jnz		Llp
-
-	movl	%edi,C(r_turb_pdest)
-
-	popl	%ebx				// restore register variables
-	popl	%edi
-	popl	%esi
-	popl	%ebp				// restore caller's stack frame pointer
-	ret
-
-#endif	// id386
-
--- a/d_spr8.s
+++ /dev/null
@@ -1,881 +1,0 @@
-//
-// d_spr8.s
-// x86 assembly-language horizontal 8-bpp transparent span-drawing code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-
-#ifdef id386
-
-//----------------------------------------------------------------------
-// 8-bpp horizontal span drawing code for polygons, with transparency.
-//----------------------------------------------------------------------
-
-	.text
-
-// out-of-line, rarely-needed clamping code
-
-LClampHigh0:
-	movl	C(bbextents),%esi
-	jmp		LClampReentry0
-LClampHighOrLow0:
-	jg		LClampHigh0
-	xorl	%esi,%esi
-	jmp		LClampReentry0
-
-LClampHigh1:
-	movl	C(bbextentt),%edx
-	jmp		LClampReentry1
-LClampHighOrLow1:
-	jg		LClampHigh1
-	xorl	%edx,%edx
-	jmp		LClampReentry1
-
-LClampLow2:
-	movl	$2048,%ebp
-	jmp		LClampReentry2
-LClampHigh2:
-	movl	C(bbextents),%ebp
-	jmp		LClampReentry2
-
-LClampLow3:
-	movl	$2048,%ecx
-	jmp		LClampReentry3
-LClampHigh3:
-	movl	C(bbextentt),%ecx
-	jmp		LClampReentry3
-
-LClampLow4:
-	movl	$2048,%eax
-	jmp		LClampReentry4
-LClampHigh4:
-	movl	C(bbextents),%eax
-	jmp		LClampReentry4
-
-LClampLow5:
-	movl	$2048,%ebx
-	jmp		LClampReentry5
-LClampHigh5:
-	movl	C(bbextentt),%ebx
-	jmp		LClampReentry5
-
-
-#define pspans	4+16
-
-	.align 4
-.globl C(D_SpriteDrawSpans)
-C(D_SpriteDrawSpans):
-	pushl	%ebp				// preserve caller's stack frame
-	pushl	%edi
-	pushl	%esi				// preserve register variables
-	pushl	%ebx
-
-//
-// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
-// and span list pointers, and 1/z step in 0.32 fixed-point
-//
-// FIXME: any overlap from rearranging?
-	flds	C(d_sdivzstepu)
-	fmuls	fp_8
-	movl	C(cacheblock),%edx
-	flds	C(d_tdivzstepu)
-	fmuls	fp_8
-	movl	pspans(%esp),%ebx	// point to the first span descriptor
-	flds	C(d_zistepu)
-	fmuls	fp_8
-	movl	%edx,pbase			// pbase = cacheblock
-	flds	C(d_zistepu)
-	fmuls	fp_64kx64k
-	fxch	%st(3)
-	fstps	sdivz8stepu
-	fstps	zi8stepu
-	fstps	tdivz8stepu
-	fistpl	izistep
-	movl	izistep,%eax
-	rorl	$16,%eax		// put upper 16 bits in low word
-	movl	sspan_t_count(%ebx),%ecx
-	movl	%eax,izistep
-
-	cmpl	$0,%ecx
-	jle		LNextSpan
-
-LSpanLoop:
-
-//
-// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
-// initial s and t values
-//
-// FIXME: pipeline FILD?
-	fildl	sspan_t_v(%ebx)
-	fildl	sspan_t_u(%ebx)
-
-	fld		%st(1)			// dv | du | dv
-	fmuls	C(d_sdivzstepv)	// dv*d_sdivzstepv | du | dv
-	fld		%st(1)			// du | dv*d_sdivzstepv | du | dv
-	fmuls	C(d_sdivzstepu)	// du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
-	fld		%st(2)			// du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
-	fmuls	C(d_tdivzstepu)	// du*d_tdivzstepu | du*d_sdivzstepu |
-							//  dv*d_sdivzstepv | du | dv
-	fxch	%st(1)			// du*d_sdivzstepu | du*d_tdivzstepu |
-							//  dv*d_sdivzstepv | du | dv
-	faddp	%st(0),%st(2)	// du*d_tdivzstepu |
-							//  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
-	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
-							//  du*d_tdivzstepu | du | dv
-	fld		%st(3)			// dv | du*d_sdivzstepu + dv*d_sdivzstepv |
-							//  du*d_tdivzstepu | du | dv
-	fmuls	C(d_tdivzstepv)	// dv*d_tdivzstepv |
-							//  du*d_sdivzstepu + dv*d_sdivzstepv |
-							//  du*d_tdivzstepu | du | dv
-	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
-							//  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
-	fadds	C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
-							//  du*d_sdivzstepu; stays in %st(2) at end
-	fxch	%st(4)			// dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
-							//  s/z
-	fmuls	C(d_zistepv)		// dv*d_zistepv | dv*d_tdivzstepv |
-							//  du*d_tdivzstepu | du | s/z
-	fxch	%st(1)			// dv*d_tdivzstepv |  dv*d_zistepv |
-							//  du*d_tdivzstepu | du | s/z
-	faddp	%st(0),%st(2)	// dv*d_zistepv |
-							//  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
-	fxch	%st(2)			// du | dv*d_tdivzstepv + du*d_tdivzstepu |
-							//  dv*d_zistepv | s/z
-	fmuls	C(d_zistepu)		// du*d_zistepu |
-							//  dv*d_tdivzstepv + du*d_tdivzstepu |
-							//  dv*d_zistepv | s/z
-	fxch	%st(1)			// dv*d_tdivzstepv + du*d_tdivzstepu |
-							//  du*d_zistepu | dv*d_zistepv | s/z
-	fadds	C(d_tdivzorigin)	// tdivz = d_tdivzorigin + dv*d_tdivzstepv +
-							//  du*d_tdivzstepu; stays in %st(1) at end
-	fxch	%st(2)			// dv*d_zistepv | du*d_zistepu | t/z | s/z
-	faddp	%st(0),%st(1)	// dv*d_zistepv + du*d_zistepu | t/z | s/z
-
-	flds	fp_64k			// fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
-	fxch	%st(1)			// dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
-	fadds	C(d_ziorigin)		// zi = d_ziorigin + dv*d_zistepv +
-							//  du*d_zistepu; stays in %st(0) at end
-							// 1/z | fp_64k | t/z | s/z
-
-	fld		%st(0)			// FIXME: get rid of stall on FMUL?
-	fmuls	fp_64kx64k
-	fxch	%st(1)
-
-//
-// calculate and clamp s & t
-//
-	fdivr	%st(0),%st(2)	// 1/z | z*64k | t/z | s/z
-	fxch	%st(1)
-
-	fistpl	izi				// 0.32 fixed-point 1/z
-	movl	izi,%ebp
-
-//
-// set pz to point to the first z-buffer pixel in the span
-//
-	rorl	$16,%ebp		// put upper 16 bits in low word
-	movl	sspan_t_v(%ebx),%eax
-	movl	%ebp,izi
-	movl	sspan_t_u(%ebx),%ebp
-	imull	C(d_zrowbytes)
-	shll	$1,%ebp					// a word per pixel
-	addl	C(d_pzbuffer),%eax
-	addl	%ebp,%eax
-	movl	%eax,pz
-
-//
-// point %edi to the first pixel in the span
-//
-	movl	C(d_viewbuffer),%ebp
-	movl	sspan_t_v(%ebx),%eax
-	pushl	%ebx		// preserve spans pointer
-	movl	C(tadjust),%edx
-	movl	C(sadjust),%esi
-	movl	C(d_scantable)(,%eax,4),%edi	// v * screenwidth
-	addl	%ebp,%edi
-	movl	sspan_t_u(%ebx),%ebp
-	addl	%ebp,%edi				// pdest = &pdestspan[scans->u];
-
-//
-// now start the FDIV for the end of the span
-//
-	cmpl	$8,%ecx
-	ja		LSetupNotLast1
-
-	decl	%ecx
-	jz		LCleanup1		// if only one pixel, no need to start an FDIV
-	movl	%ecx,spancountminus1
-
-// finish up the s and t calcs
-	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
-
-	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
-	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
-	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
-	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
-	fxch	%st(1)			// s | t | 1/z | t/z | s/z
-	fistpl	s				// 1/z | t | t/z | s/z
-	fistpl	t				// 1/z | t/z | s/z
-
-	fildl	spancountminus1
-
-	flds	C(d_tdivzstepu)	// _d_tdivzstepu | spancountminus1
-	flds	C(d_zistepu)	// _d_zistepu | _d_tdivzstepu | spancountminus1
-	fmul	%st(2),%st(0)	// _d_zistepu*scm1 | _d_tdivzstepu | scm1
-	fxch	%st(1)			// _d_tdivzstepu | _d_zistepu*scm1 | scm1
-	fmul	%st(2),%st(0)	// _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
-	fxch	%st(2)			// scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
-	fmuls	C(d_sdivzstepu)	// _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
-							//  _d_tdivzstepu*scm1
-	fxch	%st(1)			// _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
-							//  _d_tdivzstepu*scm1
-	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
-	fxch	%st(1)			// _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
-	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1
-	faddp	%st(0),%st(3)
-
-	flds	fp_64k
-	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
-							//  overlap
-	jmp		LFDIVInFlight1
-
-LCleanup1:
-// finish up the s and t calcs
-	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
-
-	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
-	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
-	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
-	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
-	fxch	%st(1)			// s | t | 1/z | t/z | s/z
-	fistpl	s				// 1/z | t | t/z | s/z
-	fistpl	t				// 1/z | t/z | s/z
-	jmp		LFDIVInFlight1
-
-	.align	4
-LSetupNotLast1:
-// finish up the s and t calcs
-	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
-
-	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
-	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
-	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
-	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
-	fxch	%st(1)			// s | t | 1/z | t/z | s/z
-	fistpl	s				// 1/z | t | t/z | s/z
-	fistpl	t				// 1/z | t/z | s/z
-
-	fadds	zi8stepu
-	fxch	%st(2)
-	fadds	sdivz8stepu
-	fxch	%st(2)
-	flds	tdivz8stepu
-	faddp	%st(0),%st(2)
-	flds	fp_64k
-	fdiv	%st(1),%st(0)	// z = 1/1/z
-							// this is what we've gone to all this trouble to
-							//  overlap
-LFDIVInFlight1:
-
-	addl	s,%esi
-	addl	t,%edx
-	movl	C(bbextents),%ebx
-	movl	C(bbextentt),%ebp
-	cmpl	%ebx,%esi
-	ja		LClampHighOrLow0
-LClampReentry0:
-	movl	%esi,s
-	movl	pbase,%ebx
-	shll	$16,%esi
-	cmpl	%ebp,%edx
-	movl	%esi,sfracf
-	ja		LClampHighOrLow1
-LClampReentry1:
-	movl	%edx,t
-	movl	s,%esi					// sfrac = scans->sfrac;
-	shll	$16,%edx
-	movl	t,%eax					// tfrac = scans->tfrac;
-	sarl	$16,%esi
-	movl	%edx,tfracf
-
-//
-// calculate the texture starting address
-//
-	sarl	$16,%eax
-	addl	%ebx,%esi
-	imull	C(cachewidth),%eax		// (tfrac >> 16) * cachewidth
-	addl	%eax,%esi				// psource = pbase + (sfrac >> 16) +
-									//           ((tfrac >> 16) * cachewidth);
-
-//
-// determine whether last span or not
-//
-	cmpl	$8,%ecx
-	jna		LLastSegment
-
-//
-// not the last segment; do full 8-wide segment
-//
-LNotLastSegment:
-
-//
-// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
-// get there
-//
-
-// pick up after the FDIV that was left in flight previously
-
-	fld		%st(0)			// duplicate it
-	fmul	%st(4),%st(0)	// s = s/z * z
-	fxch	%st(1)
-	fmul	%st(3),%st(0)	// t = t/z * z
-	fxch	%st(1)
-	fistpl	snext
-	fistpl	tnext
-	movl	snext,%eax
-	movl	tnext,%edx
-
-	subl	$8,%ecx		// count off this segments' pixels
-	movl	C(sadjust),%ebp
-	pushl	%ecx		// remember count of remaining pixels
-	movl	C(tadjust),%ecx
-
-	addl	%eax,%ebp
-	addl	%edx,%ecx
-
-	movl	C(bbextents),%eax
-	movl	C(bbextentt),%edx
-
-	cmpl	$2048,%ebp
-	jl		LClampLow2
-	cmpl	%eax,%ebp
-	ja		LClampHigh2
-LClampReentry2:
-
-	cmpl	$2048,%ecx
-	jl		LClampLow3
-	cmpl	%edx,%ecx
-	ja		LClampHigh3
-LClampReentry3:
-
-	movl	%ebp,snext
-	movl	%ecx,tnext
-
-	subl	s,%ebp
-	subl	t,%ecx
-	
-//
-// set up advancetable
-//
-	movl	%ecx,%eax
-	movl	%ebp,%edx
-	sarl	$19,%edx			// sstep >>= 16;
-	movl	C(cachewidth),%ebx
-	sarl	$19,%eax			// tstep >>= 16;
-	jz		LIsZero
-	imull	%ebx,%eax			// (tstep >> 16) * cachewidth;
-LIsZero:
-	addl	%edx,%eax			// add in sstep
-								// (tstep >> 16) * cachewidth + (sstep >> 16);
-	movl	tfracf,%edx
-	movl	%eax,advancetable+4	// advance base in t
-	addl	%ebx,%eax			// ((tstep >> 16) + 1) * cachewidth +
-								//  (sstep >> 16);
-	shll	$13,%ebp			// left-justify sstep fractional part
-	movl	%ebp,sstep
-	movl	sfracf,%ebx
-	shll	$13,%ecx			// left-justify tstep fractional part
-	movl	%eax,advancetable	// advance extra in t
-	movl	%ecx,tstep
-
-	movl	pz,%ecx
-	movl	izi,%ebp
-
-	cmpw	(%ecx),%bp
-	jl		Lp1
-	movb	(%esi),%al			// get first source texel
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp1
-	movw	%bp,(%ecx)
-	movb	%al,(%edi)			// store first dest pixel
-Lp1:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx			// advance tfrac fractional part by tstep frac
-
-	sbbl	%eax,%eax			// turn tstep carry into -1 (0 if none)
-	addl	sstep,%ebx			// advance sfrac fractional part by sstep frac
-	adcl	advancetable+4(,%eax,4),%esi	// point to next source texel
-
-	cmpw	2(%ecx),%bp
-	jl		Lp2
-	movb	(%esi),%al
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp2
-	movw	%bp,2(%ecx)
-	movb	%al,1(%edi)
-Lp2:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	sstep,%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-	cmpw	4(%ecx),%bp
-	jl		Lp3
-	movb	(%esi),%al
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp3
-	movw	%bp,4(%ecx)
-	movb	%al,2(%edi)
-Lp3:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	sstep,%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-	cmpw	6(%ecx),%bp
-	jl		Lp4
-	movb	(%esi),%al
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp4
-	movw	%bp,6(%ecx)
-	movb	%al,3(%edi)
-Lp4:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	sstep,%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-	cmpw	8(%ecx),%bp
-	jl		Lp5
-	movb	(%esi),%al
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp5
-	movw	%bp,8(%ecx)
-	movb	%al,4(%edi)
-Lp5:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	sstep,%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-//
-// start FDIV for end of next segment in flight, so it can overlap
-//
-	popl	%eax
-	cmpl	$8,%eax			// more than one segment after this?
-	ja		LSetupNotLast2	// yes
-
-	decl	%eax
-	jz		LFDIVInFlight2	// if only one pixel, no need to start an FDIV
-	movl	%eax,spancountminus1
-	fildl	spancountminus1
-
-	flds	C(d_zistepu)		// _d_zistepu | spancountminus1
-	fmul	%st(1),%st(0)	// _d_zistepu*scm1 | scm1
-	flds	C(d_tdivzstepu)	// _d_tdivzstepu | _d_zistepu*scm1 | scm1
-	fmul	%st(2),%st(0)	// _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
-	fxch	%st(1)			// _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
-	faddp	%st(0),%st(3)	// _d_tdivzstepu*scm1 | scm1
-	fxch	%st(1)			// scm1 | _d_tdivzstepu*scm1
-	fmuls	C(d_sdivzstepu)	// _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
-	fxch	%st(1)			// _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
-	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1
-	flds	fp_64k			// 64k | _d_sdivzstepu*scm1
-	fxch	%st(1)			// _d_sdivzstepu*scm1 | 64k
-	faddp	%st(0),%st(4)	// 64k
-
-	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
-							//  overlap
-	jmp		LFDIVInFlight2
-
-	.align	4
-LSetupNotLast2:
-	fadds	zi8stepu
-	fxch	%st(2)
-	fadds	sdivz8stepu
-	fxch	%st(2)
-	flds	tdivz8stepu
-	faddp	%st(0),%st(2)
-	flds	fp_64k
-	fdiv	%st(1),%st(0)	// z = 1/1/z
-							// this is what we've gone to all this trouble to
-							//  overlap
-LFDIVInFlight2:
-	pushl	%eax
-
-	cmpw	10(%ecx),%bp
-	jl		Lp6
-	movb	(%esi),%al
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp6
-	movw	%bp,10(%ecx)
-	movb	%al,5(%edi)
-Lp6:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	sstep,%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-	cmpw	12(%ecx),%bp
-	jl		Lp7
-	movb	(%esi),%al
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp7
-	movw	%bp,12(%ecx)
-	movb	%al,6(%edi)
-Lp7:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	sstep,%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-	cmpw	14(%ecx),%bp
-	jl		Lp8
-	movb	(%esi),%al
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp8
-	movw	%bp,14(%ecx)
-	movb	%al,7(%edi)
-Lp8:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	sstep,%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-	addl	$8,%edi
-	addl	$16,%ecx
-	movl	%edx,tfracf
-	movl	snext,%edx
-	movl	%ebx,sfracf
-	movl	tnext,%ebx
-	movl	%edx,s
-	movl	%ebx,t
-
-	movl	%ecx,pz
-	movl	%ebp,izi
-
-	popl	%ecx				// retrieve count
-
-//
-// determine whether last span or not
-//
-	cmpl	$8,%ecx				// are there multiple segments remaining?
-	ja		LNotLastSegment		// yes
-
-//
-// last segment of scan
-//
-LLastSegment:
-
-//
-// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
-// get there. The number of pixels left is variable, and we want to land on the
-// last pixel, not step one past it, so we can't run into arithmetic problems
-//
-	testl	%ecx,%ecx
-	jz		LNoSteps		// just draw the last pixel and we're done
-
-// pick up after the FDIV that was left in flight previously
-
-
-	fld		%st(0)			// duplicate it
-	fmul	%st(4),%st(0)	// s = s/z * z
-	fxch	%st(1)
-	fmul	%st(3),%st(0)	// t = t/z * z
-	fxch	%st(1)
-	fistpl	snext
-	fistpl	tnext
-
-	movl	C(tadjust),%ebx
-	movl	C(sadjust),%eax
-
-	addl	snext,%eax
-	addl	tnext,%ebx
-
-	movl	C(bbextents),%ebp
-	movl	C(bbextentt),%edx
-
-	cmpl	$2048,%eax
-	jl		LClampLow4
-	cmpl	%ebp,%eax
-	ja		LClampHigh4
-LClampReentry4:
-	movl	%eax,snext
-
-	cmpl	$2048,%ebx
-	jl		LClampLow5
-	cmpl	%edx,%ebx
-	ja		LClampHigh5
-LClampReentry5:
-
-	cmpl	$1,%ecx			// don't bother 
-	je		LOnlyOneStep	// if two pixels in segment, there's only one step,
-							//  of the segment length
-	subl	s,%eax
-	subl	t,%ebx
-
-	addl	%eax,%eax		// convert to 15.17 format so multiply by 1.31
-	addl	%ebx,%ebx		//  reciprocal yields 16.48
-	imull	reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
-	movl	%edx,%ebp
-
-	movl	%ebx,%eax
-	imull	reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
-
-LSetEntryvec:
-//
-// set up advancetable
-//
-	movl	spr8entryvec_table(,%ecx,4),%ebx
-	movl	%edx,%eax
-	pushl	%ebx				// entry point into code for RET later
-	movl	%ebp,%ecx
-	sarl	$16,%ecx			// sstep >>= 16;
-	movl	C(cachewidth),%ebx
-	sarl	$16,%edx			// tstep >>= 16;
-	jz		LIsZeroLast
-	imull	%ebx,%edx			// (tstep >> 16) * cachewidth;
-LIsZeroLast:
-	addl	%ecx,%edx			// add in sstep
-								// (tstep >> 16) * cachewidth + (sstep >> 16);
-	movl	tfracf,%ecx
-	movl	%edx,advancetable+4	// advance base in t
-	addl	%ebx,%edx			// ((tstep >> 16) + 1) * cachewidth +
-								//  (sstep >> 16);
-	shll	$16,%ebp			// left-justify sstep fractional part
-	movl	sfracf,%ebx
-	shll	$16,%eax			// left-justify tstep fractional part
-	movl	%edx,advancetable	// advance extra in t
-
-	movl	%eax,tstep
-	movl	%ebp,sstep
-	movl	%ecx,%edx
-
-	movl	pz,%ecx
-	movl	izi,%ebp
-
-	ret							// jump to the number-of-pixels handler
-
-//----------------------------------------
-
-LNoSteps:
-	movl	pz,%ecx
-	subl	$7,%edi			// adjust for hardwired offset
-	subl	$14,%ecx
-	jmp		LEndSpan
-
-
-LOnlyOneStep:
-	subl	s,%eax
-	subl	t,%ebx
-	movl	%eax,%ebp
-	movl	%ebx,%edx
-	jmp		LSetEntryvec
-
-//----------------------------------------
-
-.globl	Spr8Entry2_8
-Spr8Entry2_8:
-	subl	$6,%edi		// adjust for hardwired offsets
-	subl	$12,%ecx
-	movb	(%esi),%al
-	jmp		LLEntry2_8
-
-//----------------------------------------
-
-.globl	Spr8Entry3_8
-Spr8Entry3_8:
-	subl	$5,%edi		// adjust for hardwired offsets
-	subl	$10,%ecx
-	jmp		LLEntry3_8
-
-//----------------------------------------
-
-.globl	Spr8Entry4_8
-Spr8Entry4_8:
-	subl	$4,%edi		// adjust for hardwired offsets
-	subl	$8,%ecx
-	jmp		LLEntry4_8
-
-//----------------------------------------
-
-.globl	Spr8Entry5_8
-Spr8Entry5_8:
-	subl	$3,%edi		// adjust for hardwired offsets
-	subl	$6,%ecx
-	jmp		LLEntry5_8
-
-//----------------------------------------
-
-.globl	Spr8Entry6_8
-Spr8Entry6_8:
-	subl	$2,%edi		// adjust for hardwired offsets
-	subl	$4,%ecx
-	jmp		LLEntry6_8
-
-//----------------------------------------
-
-.globl	Spr8Entry7_8
-Spr8Entry7_8:
-	decl	%edi		// adjust for hardwired offsets
-	subl	$2,%ecx
-	jmp		LLEntry7_8
-
-//----------------------------------------
-
-.globl	Spr8Entry8_8
-Spr8Entry8_8:
-	cmpw	(%ecx),%bp
-	jl		Lp9
-	movb	(%esi),%al
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp9
-	movw	%bp,(%ecx)
-	movb	%al,(%edi)
-Lp9:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	sstep,%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-LLEntry7_8:
-	cmpw	2(%ecx),%bp
-	jl		Lp10
-	movb	(%esi),%al
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp10
-	movw	%bp,2(%ecx)
-	movb	%al,1(%edi)
-Lp10:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	sstep,%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-LLEntry6_8:
-	cmpw	4(%ecx),%bp
-	jl		Lp11
-	movb	(%esi),%al
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp11
-	movw	%bp,4(%ecx)
-	movb	%al,2(%edi)
-Lp11:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	sstep,%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-LLEntry5_8:
-	cmpw	6(%ecx),%bp
-	jl		Lp12
-	movb	(%esi),%al
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp12
-	movw	%bp,6(%ecx)
-	movb	%al,3(%edi)
-Lp12:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	sstep,%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-LLEntry4_8:
-	cmpw	8(%ecx),%bp
-	jl		Lp13
-	movb	(%esi),%al
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp13
-	movw	%bp,8(%ecx)
-	movb	%al,4(%edi)
-Lp13:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	sstep,%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-LLEntry3_8:
-	cmpw	10(%ecx),%bp
-	jl		Lp14
-	movb	(%esi),%al
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp14
-	movw	%bp,10(%ecx)
-	movb	%al,5(%edi)
-Lp14:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	sstep,%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-LLEntry2_8:
-	cmpw	12(%ecx),%bp
-	jl		Lp15
-	movb	(%esi),%al
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp15
-	movw	%bp,12(%ecx)
-	movb	%al,6(%edi)
-Lp15:
-	addl	izistep,%ebp
-	adcl	$0,%ebp
-	addl	tstep,%edx
-	sbbl	%eax,%eax
-	addl	sstep,%ebx
-	adcl	advancetable+4(,%eax,4),%esi
-
-LEndSpan:
-	cmpw	14(%ecx),%bp
-	jl		Lp16
-	movb	(%esi),%al		// load first texel in segment
-	cmpb	$(TRANSPARENT_COLOR),%al
-	jz		Lp16
-	movw	%bp,14(%ecx)
-	movb	%al,7(%edi)
-Lp16:
-
-//
-// clear s/z, t/z, 1/z from FP stack
-//
-	fstp %st(0)
-	fstp %st(0)
-	fstp %st(0)
-
-	popl	%ebx				// restore spans pointer
-LNextSpan:
-	addl	$(sspan_t_size),%ebx // point to next span
-	movl	sspan_t_count(%ebx),%ecx
-	cmpl	$0,%ecx				// any more spans?
-	jg		LSpanLoop			// yes
-	jz		LNextSpan			// yes, but this one's empty
-
-	popl	%ebx				// restore register variables
-	popl	%esi
-	popl	%edi
-	popl	%ebp				// restore the caller's stack frame
-	ret
-
-#endif	// id386
--- a/d_varsa.s
+++ /dev/null
@@ -1,186 +1,0 @@
-//
-// d_varsa.s
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef	id386
-
-	.data
-
-//-------------------------------------------------------
-// global refresh variables
-//-------------------------------------------------------
-
-// FIXME: put all refresh variables into one contiguous block. Make into one
-// big structure, like cl or sv?
-
-	.align	4
-.globl	C(d_sdivzstepu)
-.globl	C(d_tdivzstepu)
-.globl	C(d_zistepu)
-.globl	C(d_sdivzstepv)
-.globl	C(d_tdivzstepv)
-.globl	C(d_zistepv)
-.globl	C(d_sdivzorigin)
-.globl	C(d_tdivzorigin)
-.globl	C(d_ziorigin)
-C(d_sdivzstepu):	.single	0
-C(d_tdivzstepu):	.single	0
-C(d_zistepu):		.single	0
-C(d_sdivzstepv):	.single	0
-C(d_tdivzstepv):	.single	0
-C(d_zistepv):		.single	0
-C(d_sdivzorigin):	.single	0
-C(d_tdivzorigin):	.single	0
-C(d_ziorigin):		.single	0
-
-.globl	C(sadjust)
-.globl	C(tadjust)
-.globl	C(bbextents)
-.globl	C(bbextentt)
-C(sadjust):			.long	0
-C(tadjust):			.long	0
-C(bbextents):		.long	0
-C(bbextentt):		.long	0
-
-.globl	C(cacheblock)
-.globl	C(d_viewbuffer)
-.globl	C(cachewidth)
-.globl	C(d_pzbuffer)
-.globl	C(d_zrowbytes)
-.globl	C(d_zwidth)
-C(cacheblock):		.long	0
-C(cachewidth):		.long	0
-C(d_viewbuffer):	.long	0
-C(d_pzbuffer):		.long	0
-C(d_zrowbytes):		.long	0
-C(d_zwidth):		.long	0
-
-
-//-------------------------------------------------------
-// ASM-only variables
-//-------------------------------------------------------
-.globl	izi
-izi:			.long	0
-
-.globl	pbase, s, t, sfracf, tfracf, snext, tnext
-.globl	spancountminus1, zi16stepu, sdivz16stepu, tdivz16stepu
-.globl	zi8stepu, sdivz8stepu, tdivz8stepu, pz
-s:				.long	0
-t:				.long	0
-snext:			.long	0
-tnext:			.long	0
-sfracf:			.long	0
-tfracf:			.long	0
-pbase:			.long	0
-zi8stepu:		.long	0
-sdivz8stepu:	.long	0
-tdivz8stepu:	.long	0
-zi16stepu:		.long	0
-sdivz16stepu:	.long	0
-tdivz16stepu:	.long	0
-spancountminus1: .long	0
-pz:				.long	0
-
-.globl	izistep
-izistep:				.long	0
-
-//-------------------------------------------------------
-// local variables for d_draw16.s
-//-------------------------------------------------------
-
-.globl	reciprocal_table_16, entryvec_table_16
-// 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10, 1/11, 1/12, 1/13,
-// 1/14, and 1/15 in 0.32 form
-reciprocal_table_16:	.long	0x40000000, 0x2aaaaaaa, 0x20000000
-						.long	0x19999999, 0x15555555, 0x12492492
-						.long	0x10000000, 0xe38e38e, 0xccccccc, 0xba2e8ba
-						.long	0xaaaaaaa, 0x9d89d89, 0x9249249, 0x8888888
-
-	.extern Entry2_16
-	.extern Entry3_16
-	.extern Entry4_16
-	.extern Entry5_16
-	.extern Entry6_16
-	.extern Entry7_16
-	.extern Entry8_16
-	.extern Entry9_16
-	.extern Entry10_16
-	.extern Entry11_16
-	.extern Entry12_16
-	.extern Entry13_16
-	.extern Entry14_16
-	.extern Entry15_16
-	.extern Entry16_16
-
-entryvec_table_16:	.long	0, Entry2_16, Entry3_16, Entry4_16
-					.long	Entry5_16, Entry6_16, Entry7_16, Entry8_16
-					.long	Entry9_16, Entry10_16, Entry11_16, Entry12_16
-					.long	Entry13_16, Entry14_16, Entry15_16, Entry16_16
-
-//-------------------------------------------------------
-// local variables for d_parta.s
-//-------------------------------------------------------
-.globl	DP_Count, DP_u, DP_v, DP_32768, DP_Color, DP_Pix, DP_EntryTable
-DP_Count:		.long	0
-DP_u:			.long	0
-DP_v:			.long	0
-DP_32768:		.single	32768.0
-DP_Color:		.long	0
-DP_Pix:			.long	0
-
-
-	.extern DP_1x1
-	.extern DP_2x2
-	.extern DP_3x3
-	.extern DP_4x4
-
-DP_EntryTable:	.long	DP_1x1, DP_2x2, DP_3x3, DP_4x4
-
-//
-// advancetable is 8 bytes, but points to the middle of that range so negative
-// offsets will work
-//
-.globl	advancetable, sstep, tstep, pspantemp, counttemp, jumptemp
-advancetable:	.long	0, 0
-sstep:			.long	0
-tstep:			.long	0
-
-pspantemp:		.long	0
-counttemp:		.long	0
-jumptemp:		.long	0
-
-// 1/2, 1/3, 1/4, 1/5, 1/6, and 1/7 in 0.32 form
-.globl	reciprocal_table, entryvec_table
-reciprocal_table:	.long	0x40000000, 0x2aaaaaaa, 0x20000000
-					.long	0x19999999, 0x15555555, 0x12492492
-
-	.extern Entry2_8
-	.extern Entry3_8
-	.extern Entry4_8
-	.extern Entry5_8
-	.extern Entry6_8
-	.extern Entry7_8
-	.extern Entry8_8
-
-entryvec_table:	.long	0, Entry2_8, Entry3_8, Entry4_8
-				.long	Entry5_8, Entry6_8, Entry7_8, Entry8_8
-
-	.extern Spr8Entry2_8
-	.extern Spr8Entry3_8
-	.extern Spr8Entry4_8
-	.extern Spr8Entry5_8
-	.extern Spr8Entry6_8
-	.extern Spr8Entry7_8
-	.extern Spr8Entry8_8
-	
-.globl spr8entryvec_table
-spr8entryvec_table:	.long	0, Spr8Entry2_8, Spr8Entry3_8, Spr8Entry4_8
-					.long	Spr8Entry5_8, Spr8Entry6_8, Spr8Entry7_8, Spr8Entry8_8
-
-#endif	// id386
-
--- a/math.s
+++ /dev/null
@@ -1,399 +1,0 @@
-//
-// math.s
-// x86 assembly-language math routines.
-
-#define GLQUAKE	1	// don't include unneeded defs
-#include "asm_i386.h"
-#include "quakeasm.h"
-
-
-#ifdef	id386
-
-	.data
-
-	.align	4
-Ljmptab:	.long	Lcase0, Lcase1, Lcase2, Lcase3
-			.long	Lcase4, Lcase5, Lcase6, Lcase7
-
-	.text
-
-// TODO: rounding needed?
-// stack parameter offset
-#define	val	4
-
-.globl C(Invert24To16)
-C(Invert24To16):
-
-	movl	val(%esp),%ecx
-	movl	$0x100,%edx		// 0x10000000000 as dividend
-	cmpl	%edx,%ecx
-	jle		LOutOfRange
-
-	subl	%eax,%eax
-	divl	%ecx
-
-	ret
-
-LOutOfRange:
-	movl	$0xFFFFFFFF,%eax
-	ret
-
-#define	in	4
-#define out	8
-
-	.align 2
-.globl C(TransformVector)
-C(TransformVector):
-	movl	in(%esp),%eax
-	movl	out(%esp),%edx
-
-	flds	(%eax)		// in[0]
-	fmuls	C(vright)		// in[0]*vright[0]
-	flds	(%eax)		// in[0] | in[0]*vright[0]
-	fmuls	C(vup)		// in[0]*vup[0] | in[0]*vright[0]
-	flds	(%eax)		// in[0] | in[0]*vup[0] | in[0]*vright[0]
-	fmuls	C(vpn)		// in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0]
-
-	flds	4(%eax)		// in[1] | ...
-	fmuls	C(vright)+4	// in[1]*vright[1] | ...
-	flds	4(%eax)		// in[1] | in[1]*vright[1] | ...
-	fmuls	C(vup)+4		// in[1]*vup[1] | in[1]*vright[1] | ...
-	flds	4(%eax)		// in[1] | in[1]*vup[1] | in[1]*vright[1] | ...
-	fmuls	C(vpn)+4		// in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ...
-	fxch	%st(2)		// in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ...
-
-	faddp	%st(0),%st(5)	// in[1]*vup[1] | in[1]*vpn[1] | ...
-	faddp	%st(0),%st(3)	// in[1]*vpn[1] | ...
-	faddp	%st(0),%st(1)	// vpn_accum | vup_accum | vright_accum
-
-	flds	8(%eax)		// in[2] | ...
-	fmuls	C(vright)+8	// in[2]*vright[2] | ...
-	flds	8(%eax)		// in[2] | in[2]*vright[2] | ...
-	fmuls	C(vup)+8		// in[2]*vup[2] | in[2]*vright[2] | ...
-	flds	8(%eax)		// in[2] | in[2]*vup[2] | in[2]*vright[2] | ...
-	fmuls	C(vpn)+8		// in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ...
-	fxch	%st(2)		// in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ...
-
-	faddp	%st(0),%st(5)	// in[2]*vup[2] | in[2]*vpn[2] | ...
-	faddp	%st(0),%st(3)	// in[2]*vpn[2] | ...
-	faddp	%st(0),%st(1)	// vpn_accum | vup_accum | vright_accum
-
-	fstps	8(%edx)		// out[2]
-	fstps	4(%edx)		// out[1]
-	fstps	(%edx)		// out[0]
-
-	ret
-
-
-#define EMINS	4+4
-#define EMAXS	4+8
-#define P		4+12
-
-	.align 2
-.globl C(BoxOnPlaneSide)
-C(BoxOnPlaneSide):
-	pushl	%ebx
-
-	movl	P(%esp),%edx
-	movl	EMINS(%esp),%ecx
-	xorl	%eax,%eax
-	movl	EMAXS(%esp),%ebx
-	movb	pl_signbits(%edx),%al
-	cmpb	$8,%al
-	jge		Lerror
-	flds	pl_normal(%edx)		// p->normal[0]
-	fld		%st(0)				// p->normal[0] | p->normal[0]
-	jmp		Ljmptab(,%eax,4)
-
-
-//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
-//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
-Lcase0:
-	fmuls	(%ebx)				// p->normal[0]*emaxs[0] | p->normal[0]
-	flds	pl_normal+4(%edx)	// p->normal[1] | p->normal[0]*emaxs[0] |
-								//  p->normal[0]
-	fxch	%st(2)				// p->normal[0] | p->normal[0]*emaxs[0] |
-								//  p->normal[1]
-	fmuls	(%ecx)				// p->normal[0]*emins[0] |
-								//  p->normal[0]*emaxs[0] | p->normal[1]
-	fxch	%st(2)				// p->normal[1] | p->normal[0]*emaxs[0] |
-								//  p->normal[0]*emins[0]
-	fld		%st(0)				// p->normal[1] | p->normal[1] |
-								//  p->normal[0]*emaxs[0] |
-								//  p->normal[0]*emins[0]
-	fmuls	4(%ebx)				// p->normal[1]*emaxs[1] | p->normal[1] |
-								//  p->normal[0]*emaxs[0] |
-								//  p->normal[0]*emins[0]
-	flds	pl_normal+8(%edx)	// p->normal[2] | p->normal[1]*emaxs[1] |
-								//  p->normal[1] | p->normal[0]*emaxs[0] |
-								//  p->normal[0]*emins[0]
-	fxch	%st(2)				// p->normal[1] | p->normal[1]*emaxs[1] |
-								//  p->normal[2] | p->normal[0]*emaxs[0] |
-								//  p->normal[0]*emins[0]
-	fmuls	4(%ecx)				// p->normal[1]*emins[1] |
-								//  p->normal[1]*emaxs[1] |
-								//  p->normal[2] | p->normal[0]*emaxs[0] |
-								//  p->normal[0]*emins[0]
-	fxch	%st(2)				// p->normal[2] | p->normal[1]*emaxs[1] |
-								//  p->normal[1]*emins[1] |
-								//  p->normal[0]*emaxs[0] |
-								//  p->normal[0]*emins[0]
-	fld		%st(0)				// p->normal[2] | p->normal[2] |
-								//  p->normal[1]*emaxs[1] |
-								//  p->normal[1]*emins[1] |
-								//  p->normal[0]*emaxs[0] |
-								//  p->normal[0]*emins[0]
-	fmuls	8(%ebx)				// p->normal[2]*emaxs[2] |
-								//  p->normal[2] |
-								//  p->normal[1]*emaxs[1] |
-								//  p->normal[1]*emins[1] |
-								//  p->normal[0]*emaxs[0] |
-								//  p->normal[0]*emins[0]
-	fxch	%st(5)				// p->normal[0]*emins[0] |
-								//  p->normal[2] |
-								//  p->normal[1]*emaxs[1] |
-								//  p->normal[1]*emins[1] |
-								//  p->normal[0]*emaxs[0] |
-								//  p->normal[2]*emaxs[2]
-	faddp	%st(0),%st(3)		//p->normal[2] |
-								// p->normal[1]*emaxs[1] |
-								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
-								// p->normal[0]*emaxs[0] |
-								// p->normal[2]*emaxs[2]
-	fmuls	8(%ecx)				//p->normal[2]*emins[2] |
-								// p->normal[1]*emaxs[1] |
-								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
-								// p->normal[0]*emaxs[0] |
-								// p->normal[2]*emaxs[2]
-	fxch	%st(1)				//p->normal[1]*emaxs[1] |
-								// p->normal[2]*emins[2] |
-								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
-								// p->normal[0]*emaxs[0] |
-								// p->normal[2]*emaxs[2]
-	faddp	%st(0),%st(3)		//p->normal[2]*emins[2] |
-								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
-								// p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
-								// p->normal[2]*emaxs[2]
-	fxch	%st(3)				//p->normal[2]*emaxs[2] +
-								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
-								// p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
-								// p->normal[2]*emins[2]
-	faddp	%st(0),%st(2)		//p->normal[1]*emins[1]+p->normal[0]*emins[0]|
-								// dist1 | p->normal[2]*emins[2]
-
-	jmp		LSetSides
-
-//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
-//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
-Lcase1:
-	fmuls	(%ecx)				// emins[0]
-	flds	pl_normal+4(%edx)
-	fxch	%st(2)
-	fmuls	(%ebx)				// emaxs[0]
-	fxch	%st(2)
-	fld		%st(0)
-	fmuls	4(%ebx)				// emaxs[1]
-	flds	pl_normal+8(%edx)
-	fxch	%st(2)
-	fmuls	4(%ecx)				// emins[1]
-	fxch	%st(2)
-	fld		%st(0)
-	fmuls	8(%ebx)				// emaxs[2]
-	fxch	%st(5)
-	faddp	%st(0),%st(3)
-	fmuls	8(%ecx)				// emins[2]
-	fxch	%st(1)
-	faddp	%st(0),%st(3)
-	fxch	%st(3)
-	faddp	%st(0),%st(2)
-
-	jmp		LSetSides
-
-//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
-//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
-Lcase2:
-	fmuls	(%ebx)				// emaxs[0]
-	flds	pl_normal+4(%edx)
-	fxch	%st(2)
-	fmuls	(%ecx)				// emins[0]
-	fxch	%st(2)
-	fld		%st(0)
-	fmuls	4(%ecx)				// emins[1]
-	flds	pl_normal+8(%edx)
-	fxch	%st(2)
-	fmuls	4(%ebx)				// emaxs[1]
-	fxch	%st(2)
-	fld		%st(0)
-	fmuls	8(%ebx)				// emaxs[2]
-	fxch	%st(5)
-	faddp	%st(0),%st(3)
-	fmuls	8(%ecx)				// emins[2]
-	fxch	%st(1)
-	faddp	%st(0),%st(3)
-	fxch	%st(3)
-	faddp	%st(0),%st(2)
-
-	jmp		LSetSides
-
-//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
-//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
-Lcase3:
-	fmuls	(%ecx)				// emins[0]
-	flds	pl_normal+4(%edx)
-	fxch	%st(2)
-	fmuls	(%ebx)				// emaxs[0]
-	fxch	%st(2)
-	fld		%st(0)
-	fmuls	4(%ecx)				// emins[1]
-	flds	pl_normal+8(%edx)
-	fxch	%st(2)
-	fmuls	4(%ebx)				// emaxs[1]
-	fxch	%st(2)
-	fld		%st(0)
-	fmuls	8(%ebx)				// emaxs[2]
-	fxch	%st(5)
-	faddp	%st(0),%st(3)
-	fmuls	8(%ecx)				// emins[2]
-	fxch	%st(1)
-	faddp	%st(0),%st(3)
-	fxch	%st(3)
-	faddp	%st(0),%st(2)
-
-	jmp		LSetSides
-
-//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
-//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
-Lcase4:
-	fmuls	(%ebx)				// emaxs[0]
-	flds	pl_normal+4(%edx)
-	fxch	%st(2)
-	fmuls	(%ecx)				// emins[0]
-	fxch	%st(2)
-	fld		%st(0)
-	fmuls	4(%ebx)				// emaxs[1]
-	flds	pl_normal+8(%edx)
-	fxch	%st(2)
-	fmuls	4(%ecx)				// emins[1]
-	fxch	%st(2)
-	fld		%st(0)
-	fmuls	8(%ecx)				// emins[2]
-	fxch	%st(5)
-	faddp	%st(0),%st(3)
-	fmuls	8(%ebx)				// emaxs[2]
-	fxch	%st(1)
-	faddp	%st(0),%st(3)
-	fxch	%st(3)
-	faddp	%st(0),%st(2)
-
-	jmp		LSetSides
-
-//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
-//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
-Lcase5:
-	fmuls	(%ecx)				// emins[0]
-	flds	pl_normal+4(%edx)
-	fxch	%st(2)
-	fmuls	(%ebx)				// emaxs[0]
-	fxch	%st(2)
-	fld		%st(0)
-	fmuls	4(%ebx)				// emaxs[1]
-	flds	pl_normal+8(%edx)
-	fxch	%st(2)
-	fmuls	4(%ecx)				// emins[1]
-	fxch	%st(2)
-	fld		%st(0)
-	fmuls	8(%ecx)				// emins[2]
-	fxch	%st(5)
-	faddp	%st(0),%st(3)
-	fmuls	8(%ebx)				// emaxs[2]
-	fxch	%st(1)
-	faddp	%st(0),%st(3)
-	fxch	%st(3)
-	faddp	%st(0),%st(2)
-
-	jmp		LSetSides
-
-//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
-//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
-Lcase6:
-	fmuls	(%ebx)				// emaxs[0]
-	flds	pl_normal+4(%edx)
-	fxch	%st(2)
-	fmuls	(%ecx)				// emins[0]
-	fxch	%st(2)
-	fld		%st(0)
-	fmuls	4(%ecx)				// emins[1]
-	flds	pl_normal+8(%edx)
-	fxch	%st(2)
-	fmuls	4(%ebx)				// emaxs[1]
-	fxch	%st(2)
-	fld		%st(0)
-	fmuls	8(%ecx)				// emins[2]
-	fxch	%st(5)
-	faddp	%st(0),%st(3)
-	fmuls	8(%ebx)				// emaxs[2]
-	fxch	%st(1)
-	faddp	%st(0),%st(3)
-	fxch	%st(3)
-	faddp	%st(0),%st(2)
-
-	jmp		LSetSides
-
-//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
-//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
-Lcase7:
-	fmuls	(%ecx)				// emins[0]
-	flds	pl_normal+4(%edx)
-	fxch	%st(2)
-	fmuls	(%ebx)				// emaxs[0]
-	fxch	%st(2)
-	fld		%st(0)
-	fmuls	4(%ecx)				// emins[1]
-	flds	pl_normal+8(%edx)
-	fxch	%st(2)
-	fmuls	4(%ebx)				// emaxs[1]
-	fxch	%st(2)
-	fld		%st(0)
-	fmuls	8(%ecx)				// emins[2]
-	fxch	%st(5)
-	faddp	%st(0),%st(3)
-	fmuls	8(%ebx)				// emaxs[2]
-	fxch	%st(1)
-	faddp	%st(0),%st(3)
-	fxch	%st(3)
-	faddp	%st(0),%st(2)
-
-LSetSides:
-
-//	sides = 0;
-//	if (dist1 >= p->dist)
-//		sides = 1;
-//	if (dist2 < p->dist)
-//		sides |= 2;
-
-	faddp	%st(0),%st(2)		// dist1 | dist2
-	fcomps	pl_dist(%edx)
-	xorl	%ecx,%ecx
-	fnstsw	%ax
-	fcomps	pl_dist(%edx)
-	andb	$1,%ah
-	xorb	$1,%ah
-	addb	%ah,%cl
-
-	fnstsw	%ax
-	andb	$1,%ah
-	addb	%ah,%ah
-	addb	%ah,%cl
-
-//	return sides;
-
-	popl	%ebx
-	movl	%ecx,%eax	// return status
-
-	ret
-
-
-Lerror:
-	call	C(BOPS_Error)
-
-#endif	// id386
--- a/mkfile
+++ b/mkfile
@@ -80,31 +80,10 @@
 	snd_mem.o\
 	snd_mix.o\
 	snd_linux.o\
-	d_draw.o\
-	d_draw16.o\
-	d_parta.o\
-	d_polysa.o\
-	d_scana.o\
-	d_spr8.o\
-	d_varsa.o\
-	math.o\
-	r_aliasa.o\
-	r_drawa.o\
-	r_edgea.o\
-	r_varsa.o\
-	surf16.o\
-	surf8.o\
-	worlda.o\
-	r_aclipa.o\
-	snd_mixa.o\
-	#sys_dosa.o\
 
 HFILES=\
 	adivtab.h\
 	anorms.h\
-	asm_draw.h\
-	asm_i386.h\
-	block16.h\
 	bspfile.h\
 	cdaudio.h\
 	client.h\
@@ -113,7 +92,6 @@
 	console.h\
 	crc.h\
 	cvar.h\
-	d_ifacea.h\
 	d_iface.h\
 	d_local.h\
 	draw.h\
@@ -131,7 +109,6 @@
 	progdefs.h\
 	progs.h\
 	protocol.h\
-	quakeasm.h\
 	quakedef.h\
 	render.h\
 	r_local.h\
@@ -149,8 +126,3 @@
 	zone.h\
 
 <$PLAN9/src/mkone
-
-AS=gcc
-
-%.$O:	%.s
-	$AS $AFLAGS -o $target -c $stem.s
--- a/quakeasm.h
+++ /dev/null
@@ -1,248 +1,0 @@
-//
-// quakeasm.h: general asm header file
-//
-
-//#define GLQUAKE	1
-
-#ifdef __i386__
-#define id386
-#endif
-
-// !!! must be kept the same as in d_iface.h !!!
-#define TRANSPARENT_COLOR	255
-
-#ifndef GLQUAKE
-	.extern C(d_zistepu)
-	.extern C(d_pzbuffer)
-	.extern C(d_zistepv)
-	.extern C(d_zrowbytes)
-	.extern C(d_ziorigin)
-	.extern C(r_turb_s)
-	.extern C(r_turb_t)
-	.extern C(r_turb_pdest)
-	.extern C(r_turb_spancount)
-	.extern C(r_turb_turb)
-	.extern C(r_turb_pbase)
-	.extern C(r_turb_sstep)
-	.extern C(r_turb_tstep)
-	.extern	C(r_bmodelactive)
-	.extern	C(d_sdivzstepu)
-	.extern	C(d_tdivzstepu)
-	.extern	C(d_sdivzstepv)
-	.extern	C(d_tdivzstepv)
-	.extern	C(d_sdivzorigin)
-	.extern	C(d_tdivzorigin)
-	.extern	C(sadjust)
-	.extern	C(tadjust)
-	.extern	C(bbextents)
-	.extern	C(bbextentt)
-	.extern	C(cacheblock)
-	.extern	C(d_viewbuffer)
-	.extern	C(cachewidth)
-	.extern	C(d_pzbuffer)
-	.extern	C(d_zrowbytes)
-	.extern	C(d_zwidth)
-	.extern C(d_scantable)
-	.extern C(r_lightptr)
-	.extern C(r_numvblocks)
-	.extern C(prowdestbase)
-	.extern C(pbasesource)
-	.extern C(r_lightwidth)
-	.extern C(lightright)
-	.extern C(lightrightstep)
-	.extern C(lightdeltastep)
-	.extern C(lightdelta)
-	.extern C(lightright)
-	.extern C(lightdelta)
-	.extern C(sourcetstep)
-	.extern C(surfrowbytes)
-	.extern C(lightrightstep)
-	.extern C(lightdeltastep)
-	.extern C(r_sourcemax)
-	.extern C(r_stepback)
-	.extern C(colormap)
-	.extern C(blocksize)
-	.extern C(sourcesstep)
-	.extern C(lightleft)
-	.extern C(blockdivshift)
-	.extern C(blockdivmask)
-	.extern C(lightleftstep)
-	.extern C(r_origin)
-	.extern C(r_ppn)
-	.extern C(r_pup)
-	.extern C(r_pright)
-	.extern C(ycenter)
-	.extern C(xcenter)
-	.extern C(d_vrectbottom_particle)
-	.extern C(d_vrectright_particle)
-	.extern C(d_vrecty)
-	.extern C(d_vrectx)
-	.extern C(d_pix_shift)
-	.extern C(d_pix_min)
-	.extern C(d_pix_max)
-	.extern C(d_y_aspect_shift)
-	.extern C(screenwidth)
-	.extern C(r_leftclipped)
-	.extern C(r_leftenter)
-	.extern C(r_rightclipped)
-	.extern C(r_rightenter)
-	.extern C(modelorg)
-	.extern C(xscale)
-	.extern C(r_refdef)
-	.extern C(yscale)
-	.extern C(r_leftexit)
-	.extern C(r_rightexit)
-	.extern C(r_lastvertvalid)
-	.extern C(cacheoffset)
-	.extern C(newedges)
-	.extern C(removeedges)
-	.extern C(r_pedge)
-	.extern C(r_framecount)
-	.extern C(r_u1)
-	.extern C(r_emitted)
-	.extern C(edge_p)
-	.extern C(surface_p)
-	.extern C(surfaces)
-	.extern C(r_lzi1)
-	.extern C(r_v1)
-	.extern C(r_ceilv1)
-	.extern C(r_nearzi)
-	.extern C(r_nearzionly)
-	.extern C(edge_aftertail)
-	.extern C(edge_tail)
-	.extern C(current_iv)
-	.extern C(edge_head_u_shift20)
-	.extern C(span_p)
-	.extern C(edge_head)
-	.extern C(fv)
-	.extern C(edge_tail_u_shift20)
-	.extern C(r_apverts)
-	.extern C(r_anumverts)
-	.extern C(aliastransform)
-	.extern C(r_avertexnormals)
-	.extern C(r_plightvec)
-	.extern C(r_ambientlight)
-	.extern C(r_shadelight)
-	.extern C(aliasxcenter)
-	.extern C(aliasycenter)
-	.extern C(a_sstepxfrac)
-	.extern C(r_affinetridesc)
-	.extern C(acolormap)
-	.extern C(d_pcolormap)
-	.extern C(r_affinetridesc)
-	.extern C(d_sfrac)
-	.extern C(d_ptex)
-	.extern C(d_pedgespanpackage)
-	.extern C(d_tfrac)
-	.extern C(d_light)
-	.extern C(d_zi)
-	.extern C(d_pdest)
-	.extern C(d_pz)
-	.extern C(d_aspancount)
-	.extern C(erroradjustup)
-	.extern C(errorterm)
-	.extern C(d_xdenom)
-	.extern C(r_p0)
-	.extern C(r_p1)
-	.extern C(r_p2)
-	.extern C(a_tstepxfrac)
-	.extern C(r_sstepx)
-	.extern C(r_tstepx)
-	.extern C(a_ststepxwhole)
-	.extern C(zspantable)
-	.extern C(skintable)
-	.extern C(r_zistepx)
-	.extern C(erroradjustdown)
-	.extern C(d_countextrastep)
-	.extern C(ubasestep)
-	.extern C(a_ststepxwhole)
-	.extern C(a_tstepxfrac)
-	.extern C(r_lstepx)
-	.extern C(a_spans)
-	.extern C(erroradjustdown)
-	.extern C(d_pdestextrastep)
-	.extern C(d_pzextrastep)
-	.extern C(d_sfracextrastep)
-	.extern C(d_ptexextrastep)
-	.extern C(d_countextrastep)
-	.extern C(d_tfracextrastep)
-	.extern C(d_lightextrastep)
-	.extern C(d_ziextrastep)
-	.extern C(d_pdestbasestep)
-	.extern C(d_pzbasestep)
-	.extern C(d_sfracbasestep)
-	.extern C(d_ptexbasestep)
-	.extern C(ubasestep)
-	.extern C(d_tfracbasestep)
-	.extern C(d_lightbasestep)
-	.extern C(d_zibasestep)
-	.extern C(zspantable)
-	.extern C(r_lstepy)
-	.extern C(r_sstepy)
-	.extern C(r_tstepy)
-	.extern C(r_zistepy)
-	.extern C(D_PolysetSetEdgeTable)
-	.extern C(D_RasterizeAliasPolySmooth)
-
-	.extern float_point5
-	.extern Float2ToThe31nd
-	.extern izistep
-	.extern izi
-	.extern FloatMinus2ToThe31nd
-	.extern float_1
-	.extern float_particle_z_clip
-	.extern float_minus_1
-	.extern float_0
-	.extern fp_16
-	.extern fp_64k
-	.extern fp_1m
-	.extern fp_1m_minus_1
-	.extern fp_8 
-	.extern entryvec_table
-	.extern advancetable
-	.extern sstep
-	.extern tstep
-	.extern pspantemp
-	.extern counttemp
-	.extern jumptemp
-	.extern reciprocal_table
-	.extern DP_Count
-	.extern DP_u
-	.extern DP_v
-	.extern DP_32768
-	.extern DP_Color
-	.extern DP_Pix
-	.extern DP_EntryTable
-	.extern	pbase
-	.extern s
-	.extern t
-	.extern sfracf
-	.extern tfracf
-	.extern snext
-	.extern tnext
-	.extern	spancountminus1
-	.extern zi16stepu
-	.extern sdivz16stepu
-	.extern tdivz16stepu
-	.extern	zi8stepu
-	.extern sdivz8stepu
-	.extern tdivz8stepu
-	.extern reciprocal_table_16
-	.extern entryvec_table_16
-	.extern ceil_cw
-	.extern single_cw
-	.extern fp_64kx64k
-	.extern pz
-	.extern spr8entryvec_table
-#endif
-
-	.extern C(snd_scaletable)
-	.extern C(paintbuffer)
-	.extern C(snd_linear_count)
-	.extern C(snd_p)
-	.extern C(snd_vol)
-	.extern C(snd_out)
-	.extern C(vright)
-	.extern C(vup)
-	.extern C(vpn)
-	.extern C(BOPS_Error)
--- a/r_aclipa.s
+++ /dev/null
@@ -1,197 +1,0 @@
-//
-// r_aliasa.s
-// x86 assembly-language Alias model transform and project code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
-	.data
-Ltemp0:	.long	0
-Ltemp1:	.long	0
-
-	.text
-
-#define pfv0		8+4
-#define pfv1		8+8
-#define out			8+12
-
-.globl C(R_Alias_clip_bottom)
-C(R_Alias_clip_bottom):
-	pushl	%esi
-	pushl	%edi
-
-	movl	pfv0(%esp),%esi
-	movl	pfv1(%esp),%edi
-
-	movl	C(r_refdef)+rd_aliasvrectbottom,%eax
-
-LDoForwardOrBackward:
-
-	movl	fv_v+4(%esi),%edx
-	movl	fv_v+4(%edi),%ecx
-
-	cmpl	%ecx,%edx
-	jl		LDoForward
-
-	movl	fv_v+4(%esi),%ecx
-	movl	fv_v+4(%edi),%edx
-	movl	pfv0(%esp),%edi
-	movl	pfv1(%esp),%esi
-
-LDoForward:
-
-	subl	%edx,%ecx
-	subl	%edx,%eax
-	movl	%ecx,Ltemp1
-	movl	%eax,Ltemp0
-	fildl	Ltemp1
-	fildl	Ltemp0
-	movl	out(%esp),%edx
-	movl	$2,%eax
-
-	fdivp	%st(0),%st(1)					// scale
-
-LDo3Forward:
-	fildl	fv_v+0(%esi)	// fv0v0 | scale
-	fildl	fv_v+0(%edi)	// fv1v0 | fv0v0 | scale
-	fildl	fv_v+4(%esi)	// fv0v1 | fv1v0 | fv0v0 | scale
-	fildl	fv_v+4(%edi)	// fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale
-	fildl	fv_v+8(%esi)	// fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale
-	fildl	fv_v+8(%edi)	// fv1v2 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 |
-							//  scale
-	fxch	%st(5)			// fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv1v2 |
-							//  scale
-	fsubr	%st(0),%st(4)	// fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0-fv0v0 |
-							//  fv1v2 | scale
-	fxch	%st(3)			// fv0v1 | fv0v2 | fv1v1 | fv0v0 | fv1v0-fv0v0 |
-							//  fv1v2 | scale
-	fsubr	%st(0),%st(2)	// fv0v1 | fv0v2 | fv1v1-fv0v1 | fv0v0 |
-							//  fv1v0-fv0v0 | fv1v2 | scale
-	fxch	%st(1)			// fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 |
-							//  fv1v0-fv0v0 | fv1v2 | scale
-	fsubr	%st(0),%st(5)	// fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 |
-							//  fv1v0-fv0v0 | fv1v2-fv0v2 | scale
-	fxch	%st(6)			// scale | fv0v1 | fv1v1-fv0v1 | fv0v0 |
-							//  fv1v0-fv0v0 | fv1v2-fv0v2 | fv0v2
-	fmul	%st(0),%st(4)	// scale | fv0v1 | fv1v1-fv0v1 | fv0v0 |
-							//  (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2
-	addl	$12,%edi
-	fmul	%st(0),%st(2)	// scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 |
-							//  (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2
-	addl	$12,%esi
-	addl	$12,%edx
-	fmul	%st(0),%st(5)	// scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 |
-							//  (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale |
-							//  fv0v2
-	fxch	%st(3)			// fv0v0 | fv0v1 | (fv1v1-fv0v1)*scale | scale |
-							//  (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale |
-							//  fv0v2
-	faddp	%st(0),%st(4)	// fv0v1 | (fv1v1-fv0v1)*scale | scale |
-							//  fv0v0+(fv1v0-fv0v0)*scale |
-							//  (fv1v2-fv0v2)*scale | fv0v2
-	faddp	%st(0),%st(1)	// fv0v1+(fv1v1-fv0v1)*scale | scale |
-							//  fv0v0+(fv1v0-fv0v0)*scale |
-							//  (fv1v2-fv0v2)*scale | fv0v2
-	fxch	%st(4)			// fv0v2 | scale | fv0v0+(fv1v0-fv0v0)*scale |
-							//  (fv1v2-fv0v2)*scale | fv0v1+(fv1v1-fv0v1)*scale
-	faddp	%st(0),%st(3)	// scale | fv0v0+(fv1v0-fv0v0)*scale |
-							//  fv0v2+(fv1v2-fv0v2)*scale |
-							//  fv0v1+(fv1v1-fv0v1)*scale
-	fxch	%st(1)			// fv0v0+(fv1v0-fv0v0)*scale | scale | 
-							//  fv0v2+(fv1v2-fv0v2)*scale |
-							//  fv0v1+(fv1v1-fv0v1)*scale
-	fadds	float_point5
-	fxch	%st(3)			// fv0v1+(fv1v1-fv0v1)*scale | scale | 
-							//  fv0v2+(fv1v2-fv0v2)*scale |
-							//  fv0v0+(fv1v0-fv0v0)*scale
-	fadds	float_point5
-	fxch	%st(2)			// fv0v2+(fv1v2-fv0v2)*scale | scale | 
-							//  fv0v1+(fv1v1-fv0v1)*scale |
-							//  fv0v0+(fv1v0-fv0v0)*scale
-	fadds	float_point5
-	fxch	%st(3)			// fv0v0+(fv1v0-fv0v0)*scale | scale | 
-							//  fv0v1+(fv1v1-fv0v1)*scale |
-							//  fv0v2+(fv1v2-fv0v2)*scale
-	fistpl	fv_v+0-12(%edx)	// scale | fv0v1+(fv1v1-fv0v1)*scale |
-							//  fv0v2+(fv1v2-fv0v2)*scale
-	fxch	%st(1)			// fv0v1+(fv1v1-fv0v1)*scale | scale |
-							//  fv0v2+(fv1v2-fv0v2)*scale | scale
-	fistpl	fv_v+4-12(%edx)	// scale | fv0v2+(fv1v2-fv0v2)*scale
-	fxch	%st(1)			// fv0v2+(fv1v2-fv0v2)*sc | scale
-	fistpl	fv_v+8-12(%edx)	// scale
-
-	decl	%eax
-	jnz		LDo3Forward
-
-	fstp	%st(0)
-
-	popl	%edi
-	popl	%esi
-
-	ret
-
-
-.globl C(R_Alias_clip_top)
-C(R_Alias_clip_top):
-	pushl	%esi
-	pushl	%edi
-
-	movl	pfv0(%esp),%esi
-	movl	pfv1(%esp),%edi
-
-	movl	C(r_refdef)+rd_aliasvrect+4,%eax
-	jmp		LDoForwardOrBackward
-
-
-
-.globl C(R_Alias_clip_right)
-C(R_Alias_clip_right):
-	pushl	%esi
-	pushl	%edi
-
-	movl	pfv0(%esp),%esi
-	movl	pfv1(%esp),%edi
-
-	movl	C(r_refdef)+rd_aliasvrectright,%eax
-
-LRightLeftEntry:
-
-
-	movl	fv_v+4(%esi),%edx
-	movl	fv_v+4(%edi),%ecx
-
-	cmpl	%ecx,%edx
-	movl	fv_v+0(%esi),%edx
-
-	movl	fv_v+0(%edi),%ecx
-	jl		LDoForward2
-
-	movl	fv_v+0(%esi),%ecx
-	movl	fv_v+0(%edi),%edx
-	movl	pfv0(%esp),%edi
-	movl	pfv1(%esp),%esi
-
-LDoForward2:
-
-	jmp		LDoForward
-
-
-.globl C(R_Alias_clip_left)
-C(R_Alias_clip_left):
-	pushl	%esi
-	pushl	%edi
-
-	movl	pfv0(%esp),%esi
-	movl	pfv1(%esp),%edi
-
-	movl	C(r_refdef)+rd_aliasvrect+0,%eax
-	jmp		LRightLeftEntry
-
-
-#endif	// id386
-
--- a/r_aliasa.s
+++ /dev/null
@@ -1,218 +1,0 @@
-//
-// r_aliasa.s
-// x86 assembly-language Alias model transform and project code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
-	.data
-
-Lfloat_1:	.single	1.0
-Ltemp:		.long	0
-Lcoords:	.long	0, 0, 0
-
-	.text
-
-#define fv			12+4
-#define pstverts	12+8
-
-.globl C(R_AliasTransformAndProjectFinalVerts)
-C(R_AliasTransformAndProjectFinalVerts):
-	pushl	%ebp				// preserve caller's stack frame
-	pushl	%edi
-	pushl	%esi				// preserve register variables
-
-//	int			i, temp;
-//	float		lightcos, *plightnormal, zi;
-//	trivertx_t	*pverts;
-
-//	pverts = r_apverts;
-	movl	C(r_apverts),%esi
-
-//	for (i=0 ; i<r_anumverts ; i++, fv++, pverts++, pstverts++)
-//	{
-	movl	pstverts(%esp),%ebp
-	movl	fv(%esp),%edi
-	movl	C(r_anumverts),%ecx
-	subl	%edx,%edx
-
-Lloop:
-
-//	// transform and project
-//		zi = 1.0 / (DotProduct(pverts->v, aliastransform[2]) +
-//				aliastransform[2][3]);
-	movb	(%esi),%dl
-	movb	%dl,Lcoords
-	fildl	Lcoords				// v[0]
-	movb	1(%esi),%dl
-	movb	%dl,Lcoords+4
-	fildl	Lcoords+4			// v[1] | v[0]
-	movb	2(%esi),%dl	
-	movb	%dl,Lcoords+8
-	fildl	Lcoords+8			// v[2] | v[1] | v[0]
-
-	fld		%st(2)				// v[0] | v[2] | v[1] | v[0]
-	fmuls	C(aliastransform)+32 // accum | v[2] | v[1] | v[0]
-	fld		%st(2)				// v[1] | accum | v[2] | v[1] | v[0]
-	fmuls	C(aliastransform)+36 // accum2 | accum | v[2] | v[1] | v[0]
-	fxch	%st(1)				// accum | accum2 | v[2] | v[1] | v[0]
-	fadds	C(aliastransform)+44 // accum | accum2 | v[2] | v[1] | v[0]
-	fld		%st(2)				// v[2] | accum | accum2 | v[2] | v[1] | v[0]
-	fmuls	C(aliastransform)+40 // accum3 | accum | accum2 | v[2] | v[1] |
-								 //  v[0]
-	fxch	%st(1)				// accum | accum3 | accum2 | v[2] | v[1] | v[0]
-	faddp	%st(0),%st(2)		// accum3 | accum | v[2] | v[1] | v[0]
-	movb	tv_lightnormalindex(%esi),%dl
-	movl	stv_s(%ebp),%eax
-	movl	%eax,fv_v+8(%edi)
-	faddp	%st(0),%st(1)		// z | v[2] | v[1] | v[0]
-
-	movl	stv_t(%ebp),%eax
-	movl	%eax,fv_v+12(%edi)
-
-//	// lighting
-//		plightnormal = r_avertexnormals[pverts->lightnormalindex];
-
-	fdivrs	Lfloat_1			// zi | v[2] | v[1] | v[0]
-
-//		fv->v[2] = pstverts->s;
-//		fv->v[3] = pstverts->t;
-//		fv->flags = pstverts->onseam;
-	movl	stv_onseam(%ebp),%eax
-	movl	%eax,fv_flags(%edi)
-
-	movl	fv_size(%edi),%eax
-	movl	stv_size(%ebp),%eax
-	movl	4(%esi),%eax
-
-	leal	(%edx,%edx,2),%eax	// index*3
-
-	fxch	%st(3)				// v[0] | v[2] | v[1] | zi
-
-//		lightcos = DotProduct (plightnormal, r_plightvec);
-	flds	C(r_avertexnormals)(,%eax,4)
-	fmuls	C(r_plightvec)
-	flds	C(r_avertexnormals)+4(,%eax,4)
-	fmuls	C(r_plightvec)+4
-	flds	C(r_avertexnormals)+8(,%eax,4)
-	fmuls	C(r_plightvec)+8
-	fxch	%st(1)
-	faddp	%st(0),%st(2)
-	fld		%st(2)				 // v[0] | laccum | laccum2 | v[0] | v[2] |
-								 //  v[1] | zi
-	fmuls	C(aliastransform)+0  // xaccum | laccum | laccum2 | v[0] | v[2] |
-								 //  v[1] | zi
-	fxch	%st(2)				 // laccum2 | laccum | xaccum | v[0] | v[2] |
-								 //  v[1] | zi
-	faddp	%st(0),%st(1)		 // laccum | xaccum | v[0] | v[2] | v[1] | zi
-
-//		temp = r_ambientlight;
-//		if (lightcos < 0)
-//		{
-	fsts	Ltemp
-	movl	C(r_ambientlight),%eax
-	movb	Ltemp+3,%dl
-	testb	$0x80,%dl
-	jz		Lsavelight	// no need to clamp if only ambient lit, because
-						//  r_ambientlight is preclamped
-
-//			temp += (int)(r_shadelight * lightcos);
-	fmuls	C(r_shadelight)
-// FIXME: fast float->int conversion?
-	fistpl	Ltemp
-	addl	Ltemp,%eax
-
-//		// clamp; because we limited the minimum ambient and shading light, we
-//		// don't have to clamp low light, just bright
-//			if (temp < 0)
-//				temp = 0;
-	jns		Lp1
-	subl	%eax,%eax
-
-//		}
-
-Lp1:
-
-//		fv->v[4] = temp;
-//
-//	// x, y, and z are scaled down by 1/2**31 in the transform, so 1/z is
-//	// scaled up by 1/2**31, and the scaling cancels out for x and y in the
-//	// projection
-//		fv->v[0] = ((DotProduct(pverts->v, aliastransform[0]) +
-//				aliastransform[0][3]) * zi) + aliasxcenter;
-//		fv->v[1] = ((DotProduct(pverts->v, aliastransform[1]) +
-//				aliastransform[1][3]) * zi) + aliasycenter;
-//		fv->v[5] = zi;
-	fxch	%st(1)				 // v[0] | xaccum | v[2] | v[1] | zi
-	fmuls	C(aliastransform)+16 // yaccum | xaccum | v[2] | v[1] | zi
-	fxch	%st(3)				 // v[1] | xaccum | v[2] | yaccum | zi
-	fld		%st(0)				 // v[1] | v[1] | xaccum | v[2] | yaccum | zi
-	fmuls	C(aliastransform)+4	 // xaccum2 | v[1] | xaccum | v[2] | yaccum |zi
-	fxch	%st(1)				 // v[1] | xaccum2 | xaccum | v[2] | yaccum |zi
-	movl	%eax,fv_v+16(%edi)
-	fmuls	C(aliastransform)+20 // yaccum2 | xaccum2 | xaccum | v[2] | yaccum|
-								 //  zi
-	fxch	%st(2)				 // xaccum | xaccum2 | yaccum2 | v[2] | yaccum|
-								 //  zi
-	fadds	C(aliastransform)+12 // xaccum | xaccum2 | yaccum2 | v[2] | yaccum|
-								 //  zi
-	fxch	%st(4)				 // yaccum | xaccum2 | yaccum2 | v[2] | xaccum|
-								 //  zi
-	fadds	C(aliastransform)+28 // yaccum | xaccum2 | yaccum2 | v[2] | xaccum|
-								 //  zi
-	fxch	%st(3)				 // v[2] | xaccum2 | yaccum2 | yaccum | xaccum|
-								 //  zi
-	fld		%st(0)				 // v[2] | v[2] | xaccum2 | yaccum2 | yaccum |
-								 //  xaccum | zi
-	fmuls	C(aliastransform)+8	 // xaccum3 | v[2] | xaccum2 | yaccum2 |yaccum|
-								 //  xaccum | zi
-	fxch	%st(1)				 // v[2] | xaccum3 | xaccum2 | yaccum2 |yaccum|
-								 //  xaccum | zi
-	fmuls	C(aliastransform)+24 // yaccum3 | xaccum3 | xaccum2 | yaccum2 |
-								 // yaccum | xaccum | zi
-	fxch	%st(5)				 // xaccum | xaccum3 | xaccum2 | yaccum2 |
-								 // yaccum | yaccum3 | zi
-	faddp	%st(0),%st(2)		 // xaccum3 | xaccum | yaccum2 | yaccum |
-								 //  yaccum3 | zi
-	fxch	%st(3)				 // yaccum | xaccum | yaccum2 | xaccum3 |
-								 //  yaccum3 | zi
-	faddp	%st(0),%st(2)		 // xaccum | yaccum | xaccum3 | yaccum3 | zi
-	addl	$(tv_size),%esi
-	faddp	%st(0),%st(2)		 // yaccum | x | yaccum3 | zi
-	faddp	%st(0),%st(2)		 // x | y | zi
-	addl	$(stv_size),%ebp
-	fmul	%st(2),%st(0)		 // x/z | y | zi
-	fxch	%st(1)				 // y | x/z | zi
-	fmul	%st(2),%st(0)		 // y/z | x/z | zi
-	fxch	%st(1)				 // x/z | y/z | zi
-	fadds	C(aliasxcenter)		 // u | y/z | zi
-	fxch	%st(1)				 // y/z | u | zi
-	fadds	C(aliasycenter)		 // v | u | zi
-	fxch	%st(2)				 // zi | u | v
-// FIXME: fast float->int conversion?
-	fistpl	fv_v+20(%edi)		 // u | v
-	fistpl	fv_v+0(%edi)		 // v
-	fistpl	fv_v+4(%edi)
-
-//	}
-
-	addl	$(fv_size),%edi
-	decl	%ecx
-	jnz		Lloop
-
-	popl	%esi				// restore register variables
-	popl	%edi
-	popl	%ebp				// restore the caller's stack frame
-	ret
-
-Lsavelight:
-	fstp	%st(0)
-	jmp		Lp1
-
-#endif	// id386
-
--- a/r_drawa.s
+++ /dev/null
@@ -1,819 +1,0 @@
-//
-// r_drawa.s
-// x86 assembly-language edge clipping and emission code
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef	id386
-
-// !!! if these are changed, they must be changed in r_draw.c too !!!
-#define FULLY_CLIPPED_CACHED	0x80000000
-#define FRAMECOUNT_MASK			0x7FFFFFFF
-
-	.data
-
-Ld0:			.single		0.0
-Ld1:			.single		0.0
-Lstack:			.long		0
-Lfp_near_clip:	.single		NEAR_CLIP
-Lceilv0:		.long		0
-Lv:				.long		0
-Lu0:			.long		0
-Lv0:			.long		0
-Lzi0:			.long		0
-
-	.text
-
-//----------------------------------------------------------------------
-// edge clipping code
-//----------------------------------------------------------------------
-
-#define pv0		4+12
-#define pv1		8+12
-#define clip	12+12
-
-	.align 4
-.globl C(R_ClipEdge)
-C(R_ClipEdge):
-	pushl	%esi				// preserve register variables
-	pushl	%edi
-	pushl	%ebx
-	movl	%esp,Lstack			// for clearing the stack later
-
-//	float		d0, d1, f;
-//	mvertex_t	clipvert;
-
-	movl	clip(%esp),%ebx
-	movl	pv0(%esp),%esi
-	movl	pv1(%esp),%edx
-
-//	if (clip)
-//	{
-	testl	%ebx,%ebx
-	jz		Lemit
-
-//		do
-//		{
-
-Lcliploop:
-
-//			d0 = DotProduct (pv0->position, clip->normal) - clip->dist;
-//			d1 = DotProduct (pv1->position, clip->normal) - clip->dist;
-	flds	mv_position+0(%esi)
-	fmuls	cp_normal+0(%ebx)
-	flds	mv_position+4(%esi)
-	fmuls	cp_normal+4(%ebx)
-	flds	mv_position+8(%esi)
-	fmuls	cp_normal+8(%ebx)
-	fxch	%st(1)
-	faddp	%st(0),%st(2)		// d0mul2 | d0add0
-
-	flds	mv_position+0(%edx)
-	fmuls	cp_normal+0(%ebx)
-	flds	mv_position+4(%edx)
-	fmuls	cp_normal+4(%ebx)
-	flds	mv_position+8(%edx)
-	fmuls	cp_normal+8(%ebx)
-	fxch	%st(1)
-	faddp	%st(0),%st(2)		// d1mul2 | d1add0 | d0mul2 | d0add0
-	fxch	%st(3)				// d0add0 | d1add0 | d0mul2 | d1mul2
-
-	faddp	%st(0),%st(2)		// d1add0 | dot0 | d1mul2 
-	faddp	%st(0),%st(2)		// dot0 | dot1
-
-	fsubs	cp_dist(%ebx)		// d0 | dot1
-	fxch	%st(1)				// dot1 | d0
-	fsubs	cp_dist(%ebx)		// d1 | d0
-	fxch	%st(1)
-	fstps	Ld0
-	fstps	Ld1
-
-//			if (d0 >= 0)
-//			{
-	movl	Ld0,%eax
-	movl	Ld1,%ecx
-	orl		%eax,%ecx
-	js		Lp2
-
-// both points are unclipped
-
-Lcontinue:
-
-//
-//				R_ClipEdge (&clipvert, pv1, clip->next);
-//				return;
-//			}
-//		} while ((clip = clip->next) != NULL);
-	movl	cp_next(%ebx),%ebx
-	testl	%ebx,%ebx
-	jnz		Lcliploop
-
-//	}
-
-//// add the edge
-//	R_EmitEdge (pv0, pv1);
-Lemit:
-
-//
-// set integer rounding to ceil mode, set to single precision
-//
-// FIXME: do away with by manually extracting integers from floats?
-// FIXME: set less often
-	fldcw	ceil_cw
-
-//	edge_t	*edge, *pcheck;
-//	int		u_check;
-//	float	u, u_step;
-//	vec3_t	local, transformed;
-//	float	*world;
-//	int		v, v2, ceilv0;
-//	float	scale, lzi0, u0, v0;
-//	int		side;
-
-//	if (r_lastvertvalid)
-//	{
-	cmpl	$0,C(r_lastvertvalid)
-	jz		LCalcFirst
-
-//		u0 = r_u1;
-//		v0 = r_v1;
-//		lzi0 = r_lzi1;
-//		ceilv0 = r_ceilv1;
-	movl	C(r_lzi1),%eax
-	movl	C(r_u1),%ecx
-	movl	%eax,Lzi0
-	movl	%ecx,Lu0
-	movl	C(r_v1),%ecx
-	movl	C(r_ceilv1),%eax
-	movl	%ecx,Lv0
-	movl	%eax,Lceilv0
-	jmp		LCalcSecond
-
-//	}
-
-LCalcFirst:
-
-//	else
-//	{
-//		world = &pv0->position[0];
-
-	call	LTransformAndProject	// v0 | lzi0 | u0
-
-	fsts	Lv0
-	fxch	%st(2)					// u0 | lzi0 | v0
-	fstps	Lu0						// lzi0 | v0
-	fstps	Lzi0					// v0
-
-//		ceilv0 = (int)(v0 - 2000) + 2000; // ceil(v0);
-	fistpl	Lceilv0
-
-//	}
-
-LCalcSecond:
-
-//	world = &pv1->position[0];
-	movl	%edx,%esi
-
-	call	LTransformAndProject	// v1 | lzi1 | u1
-
-	flds	Lu0						// u0 | v1 | lzi1 | u1
-	fxch	%st(3)					// u1 | v1 | lzi1 | u0
-	flds	Lzi0					// lzi0 | u1 | v1 | lzi1 | u0
-	fxch	%st(3)					// lzi1 | u1 | v1 | lzi0 | u0
-	flds	Lv0						// v0 | lzi1 | u1 | v1 | lzi0 | u0
-	fxch	%st(3)					// v1 | lzi1 | u1 | v0 | lzi0 | u0
-
-//	r_ceilv1 = (int)(r_v1 - 2000) + 2000; // ceil(r_v1);
-	fistl	C(r_ceilv1)
-
-	fldcw	single_cw				// put back normal floating-point state
-
-	fsts	C(r_v1)
-	fxch	%st(4)					// lzi0 | lzi1 | u1 | v0 | v1 | u0
-
-//	if (r_lzi1 > lzi0)
-//		lzi0 = r_lzi1;
-	fcom	%st(1)
-	fnstsw	%ax
-	testb	$1,%ah
-	jz		LP0
-	fstp	%st(0)
-	fld		%st(0)
-LP0:
-
-	fxch	%st(1)					// lzi1 | lzi0 | u1 | v0 | v1 | u0
-	fstps	C(r_lzi1)				// lzi0 | u1 | v0 | v1 | u0
-	fxch	%st(1)
-	fsts	C(r_u1)
-	fxch	%st(1)
-
-//	if (lzi0 > r_nearzi)	// for mipmap finding
-//		r_nearzi = lzi0;
-	fcoms	C(r_nearzi)
-	fnstsw	%ax
-	testb	$0x45,%ah
-	jnz		LP1
-	fsts	C(r_nearzi)
-LP1:
-
-// // for right edges, all we want is the effect on 1/z
-//	if (r_nearzionly)
-//		return;
-	movl	C(r_nearzionly),%eax
-	testl	%eax,%eax
-	jz		LP2
-LPop5AndDone:
-	movl	C(cacheoffset),%eax
-	movl	C(r_framecount),%edx
-	cmpl	$0x7FFFFFFF,%eax
-	jz		LDoPop
-	andl	$(FRAMECOUNT_MASK),%edx
-	orl		$(FULLY_CLIPPED_CACHED),%edx
-	movl	%edx,C(cacheoffset)
-
-LDoPop:
-	fstp	%st(0)			// u1 | v0 | v1 | u0
-	fstp	%st(0)			// v0 | v1 | u0
-	fstp	%st(0)			// v1 | u0
-	fstp	%st(0)			// u0
-	fstp	%st(0)
-	jmp		Ldone
-
-LP2:
-
-// // create the edge
-//	if (ceilv0 == r_ceilv1)
-//		return;		// horizontal edge
-	movl	Lceilv0,%ebx
-	movl	C(edge_p),%edi
-	movl	C(r_ceilv1),%ecx
-	movl	%edi,%edx
-	movl	C(r_pedge),%esi
-	addl	$(et_size),%edx
-	cmpl	%ecx,%ebx
-	jz		LPop5AndDone
-
-	movl	C(r_pedge),%eax
-	movl	%eax,et_owner(%edi)
-
-//	side = ceilv0 > r_ceilv1;
-//
-//	edge->nearzi = lzi0;
-	fstps	et_nearzi(%edi)		// u1 | v0 | v1 | u0
-
-//	if (side == 1)
-//	{
-	jc		LSide0
-
-LSide1:
-
-//	// leading edge (go from p2 to p1)
-
-//		u_step = ((u0 - r_u1) / (v0 - r_v1));
-	fsubrp	%st(0),%st(3)		// v0 | v1 | u0-u1
-	fsub	%st(1),%st(0)		// v0-v1 | v1 | u0-u1
-	fdivrp	%st(0),%st(2)		// v1 | ustep
-
-//	r_emitted = 1;
-	movl	$1,C(r_emitted)
-
-//	edge = edge_p++;
-	movl	%edx,C(edge_p)
-
-// pretouch next edge
-	movl	(%edx),%eax
-
-//		v2 = ceilv0 - 1;
-//		v = r_ceilv1;
-	movl	%ecx,%eax
-	leal	-1(%ebx),%ecx
-	movl	%eax,%ebx
-
-//		edge->surfs[0] = 0;
-//		edge->surfs[1] = surface_p - surfaces;
-	movl	C(surface_p),%eax
-	movl	C(surfaces),%esi
-	subl	%edx,%edx
-	subl	%esi,%eax
-	shrl	$(SURF_T_SHIFT),%eax
-	movl	%edx,et_surfs(%edi)
-	movl	%eax,et_surfs+2(%edi)
-
-	subl	%esi,%esi
-
-//		u = r_u1 + ((float)v - r_v1) * u_step;
-	movl	%ebx,Lv
-	fildl	Lv					// v | v1 | ustep
-	fsubp	%st(0),%st(1)		// v-v1 | ustep
-	fmul	%st(1),%st(0)		// (v-v1)*ustep | ustep
-	fadds	C(r_u1)				// u | ustep
-
-	jmp		LSideDone
-
-//	}
-
-LSide0:
-
-//	else
-//	{
-//	// trailing edge (go from p1 to p2)
-
-//		u_step = ((r_u1 - u0) / (r_v1 - v0));
-	fsub	%st(3),%st(0)		// u1-u0 | v0 | v1 | u0
-	fxch	%st(2)				// v1 | v0 | u1-u0 | u0
-	fsub	%st(1),%st(0)		// v1-v0 | v0 | u1-u0 | u0
-	fdivrp	%st(0),%st(2)		// v0 | ustep | u0
-
-//	r_emitted = 1;
-	movl	$1,C(r_emitted)
-
-//	edge = edge_p++;
-	movl	%edx,C(edge_p)
-
-// pretouch next edge
-	movl	(%edx),%eax
-
-//		v = ceilv0;
-//		v2 = r_ceilv1 - 1;
-	decl	%ecx
-
-//		edge->surfs[0] = surface_p - surfaces;
-//		edge->surfs[1] = 0;
-	movl	C(surface_p),%eax
-	movl	C(surfaces),%esi
-	subl	%edx,%edx
-	subl	%esi,%eax
-	shrl	$(SURF_T_SHIFT),%eax
-	movl	%edx,et_surfs+2(%edi)
-	movl	%eax,et_surfs(%edi)
-
-	movl	$1,%esi
-
-//		u = u0 + ((float)v - v0) * u_step;
-	movl	%ebx,Lv
-	fildl	Lv					// v | v0 | ustep | u0
-	fsubp	%st(0),%st(1)		// v-v0 | ustep | u0
-	fmul	%st(1),%st(0)		// (v-v0)*ustep | ustep | u0
-	faddp	%st(0),%st(2)		// ustep | u
-	fxch	%st(1)				// u | ustep
-
-//	}
-
-LSideDone:
-
-//	edge->u_step = u_step*0x100000;
-//	edge->u = u*0x100000 + 0xFFFFF;
-
-	fmuls	fp_1m				// u*0x100000 | ustep
-	fxch	%st(1)				// ustep | u*0x100000
-	fmuls	fp_1m				// ustep*0x100000 | u*0x100000
-	fxch	%st(1)				// u*0x100000 | ustep*0x100000
-	fadds	fp_1m_minus_1		// u*0x100000 + 0xFFFFF | ustep*0x100000
-	fxch	%st(1)				// ustep*0x100000 | u*0x100000 + 0xFFFFF
-	fistpl	et_u_step(%edi)		// u*0x100000 + 0xFFFFF
-	fistpl	et_u(%edi)
-
-// // we need to do this to avoid stepping off the edges if a very nearly
-// // horizontal edge is less than epsilon above a scan, and numeric error
-// // causes it to incorrectly extend to the scan, and the extension of the
-// // line goes off the edge of the screen
-// // FIXME: is this actually needed?
-//	if (edge->u < r_refdef.vrect_x_adj_shift20)
-//		edge->u = r_refdef.vrect_x_adj_shift20;
-//	if (edge->u > r_refdef.vrectright_adj_shift20)
-//		edge->u = r_refdef.vrectright_adj_shift20;
-	movl	et_u(%edi),%eax
-	movl	C(r_refdef)+rd_vrect_x_adj_shift20,%edx
-	cmpl	%edx,%eax
-	jl		LP4
-	movl	C(r_refdef)+rd_vrectright_adj_shift20,%edx
-	cmpl	%edx,%eax
-	jng		LP5
-LP4:
-	movl	%edx,et_u(%edi)
-	movl	%edx,%eax
-LP5:
-
-// // sort the edge in normally
-//	u_check = edge->u;
-//
-//	if (edge->surfs[0])
-//		u_check++;	// sort trailers after leaders
-	addl	%esi,%eax
-
-//	if (!newedges[v] || newedges[v]->u >= u_check)
-//	{
-	movl	C(newedges)(,%ebx,4),%esi
-	testl	%esi,%esi
-	jz		LDoFirst
-	cmpl	%eax,et_u(%esi)
-	jl		LNotFirst
-LDoFirst:
-
-//		edge->next = newedges[v];
-//		newedges[v] = edge;
-	movl	%esi,et_next(%edi)
-	movl	%edi,C(newedges)(,%ebx,4)
-
-	jmp		LSetRemove
-
-//	}
-
-LNotFirst:
-
-//	else
-//	{
-//		pcheck = newedges[v];
-//
-//		while (pcheck->next && pcheck->next->u < u_check)
-//			pcheck = pcheck->next;
-LFindInsertLoop:
-	movl	%esi,%edx
-	movl	et_next(%esi),%esi
-	testl	%esi,%esi
-	jz		LInsertFound
-	cmpl	%eax,et_u(%esi)
-	jl		LFindInsertLoop
-
-LInsertFound:
-
-//		edge->next = pcheck->next;
-//		pcheck->next = edge;
-	movl	%esi,et_next(%edi)
-	movl	%edi,et_next(%edx)
-
-//	}
-
-LSetRemove:
-
-//	edge->nextremove = removeedges[v2];
-//	removeedges[v2] = edge;
-	movl	C(removeedges)(,%ecx,4),%eax
-	movl	%edi,C(removeedges)(,%ecx,4)
-	movl	%eax,et_nextremove(%edi)
-
-Ldone:
-	movl	Lstack,%esp			// clear temporary variables from stack
-
-	popl	%ebx				// restore register variables
-	popl	%edi
-	popl	%esi
-	ret
-
-// at least one point is clipped
-
-Lp2:
-	testl	%eax,%eax
-	jns		Lp1
-
-//			else
-//			{
-//			// point 0 is clipped
-
-//				if (d1 < 0)
-//				{
-	movl	Ld1,%eax
-	testl	%eax,%eax
-	jns		Lp3
-
-//				// both points are clipped
-//				// we do cache fully clipped edges
-//					if (!leftclipped)
-	movl	C(r_leftclipped),%eax
-	movl	C(r_pedge),%ecx
-	testl	%eax,%eax
-	jnz		Ldone
-
-//						r_pedge->framecount = r_framecount;
-	movl	C(r_framecount),%eax
-	andl	$(FRAMECOUNT_MASK),%eax
-	orl		$(FULLY_CLIPPED_CACHED),%eax
-	movl	%eax,C(cacheoffset)
-
-//					return;
-	jmp		Ldone
-
-//				}
-
-Lp1:
-
-//			// point 0 is unclipped
-//				if (d1 >= 0)
-//				{
-//				// both points are unclipped
-//					continue;
-
-//			// only point 1 is clipped
-
-//				f = d0 / (d0 - d1);
-	flds	Ld0
-	flds	Ld1
-	fsubr	%st(1),%st(0)
-
-//			// we don't cache partially clipped edges
-	movl	$0x7FFFFFFF,C(cacheoffset)
-
-	fdivrp	%st(0),%st(1)
-
-	subl	$(mv_size),%esp			// allocate space for clipvert
-
-//				clipvert.position[0] = pv0->position[0] +
-//						f * (pv1->position[0] - pv0->position[0]);
-//				clipvert.position[1] = pv0->position[1] +
-//						f * (pv1->position[1] - pv0->position[1]);
-//				clipvert.position[2] = pv0->position[2] +
-//						f * (pv1->position[2] - pv0->position[2]);
-	flds	mv_position+8(%edx)
-	fsubs	mv_position+8(%esi)
-	flds	mv_position+4(%edx)
-	fsubs	mv_position+4(%esi)
-	flds	mv_position+0(%edx)
-	fsubs	mv_position+0(%esi)		// 0 | 1 | 2
-
-// replace pv1 with the clip point
-	movl	%esp,%edx
-	movl	cp_leftedge(%ebx),%eax
-	testb	%al,%al
-
-	fmul	%st(3),%st(0)
-	fxch	%st(1)					// 1 | 0 | 2
-	fmul	%st(3),%st(0)
-	fxch	%st(2)					// 2 | 0 | 1
-	fmulp	%st(0),%st(3)			// 0 | 1 | 2
-	fadds	mv_position+0(%esi)
-	fxch	%st(1)					// 1 | 0 | 2
-	fadds	mv_position+4(%esi)
-	fxch	%st(2)					// 2 | 0 | 1
-	fadds	mv_position+8(%esi)
-	fxch	%st(1)					// 0 | 2 | 1
-	fstps	mv_position+0(%esp)		// 2 | 1
-	fstps	mv_position+8(%esp)		// 1
-	fstps	mv_position+4(%esp)
-
-//				if (clip->leftedge)
-//				{
-	jz		Ltestright
-
-//					r_leftclipped = true;
-//					r_leftexit = clipvert;
-	movl	$1,C(r_leftclipped)
-	movl	mv_position+0(%esp),%eax
-	movl	%eax,C(r_leftexit)+mv_position+0
-	movl	mv_position+4(%esp),%eax
-	movl	%eax,C(r_leftexit)+mv_position+4
-	movl	mv_position+8(%esp),%eax
-	movl	%eax,C(r_leftexit)+mv_position+8
-
-	jmp		Lcontinue
-
-//				}
-
-Ltestright:
-//				else if (clip->rightedge)
-//				{
-	testb	%ah,%ah
-	jz		Lcontinue
-
-//					r_rightclipped = true;
-//					r_rightexit = clipvert;
-	movl	$1,C(r_rightclipped)
-	movl	mv_position+0(%esp),%eax
-	movl	%eax,C(r_rightexit)+mv_position+0
-	movl	mv_position+4(%esp),%eax
-	movl	%eax,C(r_rightexit)+mv_position+4
-	movl	mv_position+8(%esp),%eax
-	movl	%eax,C(r_rightexit)+mv_position+8
-
-//				}
-//
-//				R_ClipEdge (pv0, &clipvert, clip->next);
-//				return;
-//			}
-	jmp		Lcontinue
-
-//			}
-
-Lp3:
-
-//			// only point 0 is clipped
-//				r_lastvertvalid = false;
-
-	movl	$0,C(r_lastvertvalid)
-
-//				f = d0 / (d0 - d1);
-	flds	Ld0
-	flds	Ld1
-	fsubr	%st(1),%st(0)
-
-//			// we don't cache partially clipped edges
-	movl	$0x7FFFFFFF,C(cacheoffset)
-
-	fdivrp	%st(0),%st(1)
-
-	subl	$(mv_size),%esp			// allocate space for clipvert
-
-//				clipvert.position[0] = pv0->position[0] +
-//						f * (pv1->position[0] - pv0->position[0]);
-//				clipvert.position[1] = pv0->position[1] +
-//						f * (pv1->position[1] - pv0->position[1]);
-//				clipvert.position[2] = pv0->position[2] +
-//						f * (pv1->position[2] - pv0->position[2]);
-	flds	mv_position+8(%edx)
-	fsubs	mv_position+8(%esi)
-	flds	mv_position+4(%edx)
-	fsubs	mv_position+4(%esi)
-	flds	mv_position+0(%edx)
-	fsubs	mv_position+0(%esi)		// 0 | 1 | 2
-
-	movl	cp_leftedge(%ebx),%eax
-	testb	%al,%al
-
-	fmul	%st(3),%st(0)
-	fxch	%st(1)					// 1 | 0 | 2
-	fmul	%st(3),%st(0)
-	fxch	%st(2)					// 2 | 0 | 1
-	fmulp	%st(0),%st(3)			// 0 | 1 | 2
-	fadds	mv_position+0(%esi)
-	fxch	%st(1)					// 1 | 0 | 2
-	fadds	mv_position+4(%esi)
-	fxch	%st(2)					// 2 | 0 | 1
-	fadds	mv_position+8(%esi)
-	fxch	%st(1)					// 0 | 2 | 1
-	fstps	mv_position+0(%esp)		// 2 | 1
-	fstps	mv_position+8(%esp)		// 1
-	fstps	mv_position+4(%esp)
-
-// replace pv0 with the clip point
-	movl	%esp,%esi
-
-//				if (clip->leftedge)
-//				{
-	jz		Ltestright2
-
-//					r_leftclipped = true;
-//					r_leftenter = clipvert;
-	movl	$1,C(r_leftclipped)
-	movl	mv_position+0(%esp),%eax
-	movl	%eax,C(r_leftenter)+mv_position+0
-	movl	mv_position+4(%esp),%eax
-	movl	%eax,C(r_leftenter)+mv_position+4
-	movl	mv_position+8(%esp),%eax
-	movl	%eax,C(r_leftenter)+mv_position+8
-
-	jmp		Lcontinue
-
-//				}
-
-Ltestright2:
-//				else if (clip->rightedge)
-//				{
-	testb	%ah,%ah
-	jz		Lcontinue
-
-//					r_rightclipped = true;
-//					r_rightenter = clipvert;
-	movl	$1,C(r_rightclipped)
-	movl	mv_position+0(%esp),%eax
-	movl	%eax,C(r_rightenter)+mv_position+0
-	movl	mv_position+4(%esp),%eax
-	movl	%eax,C(r_rightenter)+mv_position+4
-	movl	mv_position+8(%esp),%eax
-	movl	%eax,C(r_rightenter)+mv_position+8
-
-//				}
-	jmp		Lcontinue
-
-// %esi = vec3_t point to transform and project
-// %edx preserved
-LTransformAndProject:
-
-//	// transform and project
-//		VectorSubtract (world, modelorg, local);
-	flds	mv_position+0(%esi)
-	fsubs	C(modelorg)+0
-	flds	mv_position+4(%esi)
-	fsubs	C(modelorg)+4
-	flds	mv_position+8(%esi)	
-	fsubs	C(modelorg)+8
-	fxch	%st(2)				// local[0] | local[1] | local[2]
-
-//		TransformVector (local, transformed);
-//	
-//		if (transformed[2] < NEAR_CLIP)
-//			transformed[2] = NEAR_CLIP;
-//	
-//		lzi0 = 1.0 / transformed[2];
-	fld		%st(0)				// local[0] | local[0] | local[1] | local[2]
-	fmuls	C(vpn)+0			// zm0 | local[0] | local[1] | local[2]
-	fld		%st(1)				// local[0] | zm0 | local[0] | local[1] |
-								//  local[2]
-	fmuls	C(vright)+0			// xm0 | zm0 | local[0] | local[1] | local[2]
-	fxch	%st(2)				// local[0] | zm0 | xm0 | local[1] | local[2]
-	fmuls	C(vup)+0			// ym0 |  zm0 | xm0 | local[1] | local[2]
-	fld		%st(3)				// local[1] | ym0 |  zm0 | xm0 | local[1] |
-								//  local[2]
-	fmuls	C(vpn)+4			// zm1 | ym0 | zm0 | xm0 | local[1] |
-								//  local[2]
-	fld		%st(4)				// local[1] | zm1 | ym0 | zm0 | xm0 |
-								//  local[1] | local[2]
-	fmuls	C(vright)+4			// xm1 | zm1 | ym0 |  zm0 | xm0 |
-								//  local[1] | local[2]
-	fxch	%st(5)				// local[1] | zm1 | ym0 | zm0 | xm0 |
-								//  xm1 | local[2]
-	fmuls	C(vup)+4			// ym1 | zm1 | ym0 | zm0 | xm0 |
-								//  xm1 | local[2]
-	fxch	%st(1)				// zm1 | ym1 | ym0 | zm0 | xm0 |
-								//  xm1 | local[2]
-	faddp	%st(0),%st(3)		// ym1 | ym0 | zm2 | xm0 | xm1 | local[2]
-	fxch	%st(3)				// xm0 | ym0 | zm2 | ym1 | xm1 | local[2]
-	faddp	%st(0),%st(4)		// ym0 | zm2 | ym1 | xm2 | local[2]
-	faddp	%st(0),%st(2)		// zm2 | ym2 | xm2 | local[2]
-	fld		%st(3)				// local[2] | zm2 | ym2 | xm2 | local[2]
-	fmuls	C(vpn)+8			// zm3 | zm2 | ym2 | xm2 | local[2]
-	fld		%st(4)				// local[2] | zm3 | zm2 | ym2 | xm2 | local[2]
-	fmuls	C(vright)+8			// xm3 | zm3 | zm2 | ym2 | xm2 | local[2]
-	fxch	%st(5)				// local[2] | zm3 | zm2 | ym2 | xm2 | xm3
-	fmuls	C(vup)+8			// ym3 | zm3 | zm2 | ym2 | xm2 | xm3
-	fxch	%st(1)				// zm3 | ym3 | zm2 | ym2 | xm2 | xm3
-	faddp	%st(0),%st(2)		// ym3 | zm4 | ym2 | xm2 | xm3
-	fxch	%st(4)				// xm3 | zm4 | ym2 | xm2 | ym3
-	faddp	%st(0),%st(3)		// zm4 | ym2 | xm4 | ym3
-	fxch	%st(1)				// ym2 | zm4 | xm4 | ym3
-	faddp	%st(0),%st(3)		// zm4 | xm4 | ym4
-
-	fcoms	Lfp_near_clip
-	fnstsw	%ax
-	testb	$1,%ah
-	jz		LNoClip
-	fstp	%st(0)
-	flds	Lfp_near_clip
-
-LNoClip:
-
-	fdivrs	float_1				// lzi0 | x | y
-	fxch	%st(1)				// x | lzi0 | y
-
-//	// FIXME: build x/yscale into transform?
-//		scale = xscale * lzi0;
-//		u0 = (xcenter + scale*transformed[0]);
-	flds	C(xscale)			// xscale | x | lzi0 | y
-	fmul	%st(2),%st(0)		// scale | x | lzi0 | y
-	fmulp	%st(0),%st(1)		// scale*x | lzi0 | y
-	fadds	C(xcenter)			// u0 | lzi0 | y
-
-//		if (u0 < r_refdef.fvrectx_adj)
-//			u0 = r_refdef.fvrectx_adj;
-//		if (u0 > r_refdef.fvrectright_adj)
-//			u0 = r_refdef.fvrectright_adj;
-// FIXME: use integer compares of floats?
-	fcoms	C(r_refdef)+rd_fvrectx_adj
-	fnstsw	%ax
-	testb	$1,%ah
-	jz		LClampP0
-	fstp	%st(0)
-	flds	C(r_refdef)+rd_fvrectx_adj
-LClampP0:
-	fcoms	C(r_refdef)+rd_fvrectright_adj
-	fnstsw	%ax
-	testb	$0x45,%ah
-	jnz		LClampP1
-	fstp	%st(0)
-	flds	C(r_refdef)+rd_fvrectright_adj
-LClampP1:
-
-	fld		%st(1)				// lzi0 | u0 | lzi0 | y
-
-//		scale = yscale * lzi0;
-//		v0 = (ycenter - scale*transformed[1]);
-	fmuls	C(yscale)			// scale | u0 | lzi0 | y
-	fmulp	%st(0),%st(3)		// u0 | lzi0 | scale*y
-	fxch	%st(2)				// scale*y | lzi0 | u0
-	fsubrs	C(ycenter)			// v0 | lzi0 | u0
-
-//		if (v0 < r_refdef.fvrecty_adj)
-//			v0 = r_refdef.fvrecty_adj;
-//		if (v0 > r_refdef.fvrectbottom_adj)
-//			v0 = r_refdef.fvrectbottom_adj;
-// FIXME: use integer compares of floats?
-	fcoms	C(r_refdef)+rd_fvrecty_adj
-	fnstsw	%ax
-	testb	$1,%ah
-	jz		LClampP2
-	fstp	%st(0)
-	flds	C(r_refdef)+rd_fvrecty_adj
-LClampP2:
-	fcoms	C(r_refdef)+rd_fvrectbottom_adj
-	fnstsw	%ax
-	testb	$0x45,%ah
-	jnz		LClampP3
-	fstp	%st(0)
-	flds	C(r_refdef)+rd_fvrectbottom_adj
-LClampP3:
-	ret
-
-#endif	// id386
-
--- a/r_edgea.s
+++ /dev/null
@@ -1,731 +1,0 @@
-//
-// r_edgea.s
-// x86 assembly-language edge-processing code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-
-#ifdef	id386
-
-	.data
-Ltemp:					.long	0
-float_1_div_0100000h:	.long	0x35800000	// 1.0/(float)0x100000
-float_point_999:		.single	0.999
-float_1_point_001:		.single	1.001
-
-	.text
-
-//--------------------------------------------------------------------
-
-#define edgestoadd	4+8		// note odd stack offsets because of interleaving
-#define edgelist	8+12	// with pushes
-
-.globl C(R_EdgeCodeStart)
-C(R_EdgeCodeStart):
-
-.globl C(R_InsertNewEdges)
-C(R_InsertNewEdges):
-	pushl	%edi
-	pushl	%esi				// preserve register variables
-	movl	edgestoadd(%esp),%edx
-	pushl	%ebx
-	movl	edgelist(%esp),%ecx
-
-LDoNextEdge:
-	movl	et_u(%edx),%eax
-	movl	%edx,%edi
-
-LContinueSearch:
-	movl	et_u(%ecx),%ebx
-	movl	et_next(%ecx),%esi
-	cmpl	%ebx,%eax
-	jle		LAddedge
-	movl	et_u(%esi),%ebx
-	movl	et_next(%esi),%ecx
-	cmpl	%ebx,%eax
-	jle		LAddedge2
-	movl	et_u(%ecx),%ebx
-	movl	et_next(%ecx),%esi
-	cmpl	%ebx,%eax
-	jle		LAddedge
-	movl	et_u(%esi),%ebx
-	movl	et_next(%esi),%ecx
-	cmpl	%ebx,%eax
-	jg		LContinueSearch
-
-LAddedge2:
-	movl	et_next(%edx),%edx
-	movl	et_prev(%esi),%ebx
-	movl	%esi,et_next(%edi)
-	movl	%ebx,et_prev(%edi)
-	movl	%edi,et_next(%ebx)
-	movl	%edi,et_prev(%esi)
-	movl	%esi,%ecx
-
-	cmpl	$0,%edx
-	jnz		LDoNextEdge
-	jmp		LDone
-
-	.align 4
-LAddedge:
-	movl	et_next(%edx),%edx
-	movl	et_prev(%ecx),%ebx
-	movl	%ecx,et_next(%edi)
-	movl	%ebx,et_prev(%edi)
-	movl	%edi,et_next(%ebx)
-	movl	%edi,et_prev(%ecx)
-
-	cmpl	$0,%edx
-	jnz		LDoNextEdge
-
-LDone:
-	popl	%ebx				// restore register variables
-	popl	%esi
-	popl	%edi
-
-	ret
-
-//--------------------------------------------------------------------
-
-#define predge	4+4
-
-.globl C(R_RemoveEdges)
-C(R_RemoveEdges):
-	pushl	%ebx
-	movl	predge(%esp),%eax
-
-Lre_loop:
-	movl	et_next(%eax),%ecx
-	movl	et_nextremove(%eax),%ebx
-	movl	et_prev(%eax),%edx
-	testl	%ebx,%ebx
-	movl	%edx,et_prev(%ecx)
-	jz		Lre_done
-	movl	%ecx,et_next(%edx)
-
-	movl	et_next(%ebx),%ecx
-	movl	et_prev(%ebx),%edx
-	movl	et_nextremove(%ebx),%eax
-	movl	%edx,et_prev(%ecx)
-	testl	%eax,%eax
-	movl	%ecx,et_next(%edx)
-	jnz		Lre_loop
-
-	popl	%ebx
-	ret
-
-Lre_done:
-	movl	%ecx,et_next(%edx)
-	popl	%ebx
-
-	ret
-
-//--------------------------------------------------------------------
-
-#define pedgelist	4+4		// note odd stack offset because of interleaving
-							// with pushes
-
-.globl C(R_StepActiveU)
-C(R_StepActiveU):
-	pushl	%edi
-	movl	pedgelist(%esp),%edx
-	pushl	%esi				// preserve register variables
-	pushl	%ebx
-
-	movl	et_prev(%edx),%esi
-
-LNewEdge:
-	movl	et_u(%esi),%edi
-
-LNextEdge:
-	movl	et_u(%edx),%eax
-	movl	et_u_step(%edx),%ebx
-	addl	%ebx,%eax
-	movl	et_next(%edx),%esi
-	movl	%eax,et_u(%edx)
-	cmpl	%edi,%eax
-	jl		LPushBack
-
-	movl	et_u(%esi),%edi
-	movl	et_u_step(%esi),%ebx
-	addl	%ebx,%edi
-	movl	et_next(%esi),%edx
-	movl	%edi,et_u(%esi)
-	cmpl	%eax,%edi
-	jl		LPushBack2
-
-	movl	et_u(%edx),%eax
-	movl	et_u_step(%edx),%ebx
-	addl	%ebx,%eax
-	movl	et_next(%edx),%esi
-	movl	%eax,et_u(%edx)
-	cmpl	%edi,%eax
-	jl		LPushBack
-
-	movl	et_u(%esi),%edi
-	movl	et_u_step(%esi),%ebx
-	addl	%ebx,%edi
-	movl	et_next(%esi),%edx
-	movl	%edi,et_u(%esi)
-	cmpl	%eax,%edi
-	jnl		LNextEdge
-
-LPushBack2:
-	movl	%edx,%ebx
-	movl	%edi,%eax
-	movl	%esi,%edx
-	movl	%ebx,%esi
-
-LPushBack:
-// push it back to keep it sorted
-	movl	et_prev(%edx),%ecx
-	movl	et_next(%edx),%ebx
-
-// done if the -1 in edge_aftertail triggered this
-	cmpl	$(C(edge_aftertail)),%edx
-	jz		LUDone
-
-// pull the edge out of the edge list
-	movl	et_prev(%ecx),%edi
-	movl	%ecx,et_prev(%esi)
-	movl	%ebx,et_next(%ecx)
-
-// find out where the edge goes in the edge list
-LPushBackLoop:
-	movl	et_prev(%edi),%ecx
-	movl	et_u(%edi),%ebx
-	cmpl	%ebx,%eax
-	jnl		LPushBackFound
-
-	movl	et_prev(%ecx),%edi
-	movl	et_u(%ecx),%ebx
-	cmpl	%ebx,%eax
-	jl		LPushBackLoop
-
-	movl	%ecx,%edi
-
-// put the edge back into the edge list
-LPushBackFound:
-	movl	et_next(%edi),%ebx
-	movl	%edi,et_prev(%edx)
-	movl	%ebx,et_next(%edx)
-	movl	%edx,et_next(%edi)
-	movl	%edx,et_prev(%ebx)
-
-	movl	%esi,%edx
-	movl	et_prev(%esi),%esi
-
-	cmpl	$(C(edge_tail)),%edx
-	jnz		LNewEdge
-
-LUDone:
-	popl	%ebx				// restore register variables
-	popl	%esi
-	popl	%edi
-
-	ret
-
-//--------------------------------------------------------------------
-
-#define surf	4		// note this is loaded before any pushes
-
-	.align 4
-TrailingEdge:
-	movl	st_spanstate(%esi),%eax	// check for edge inversion
-	decl	%eax
-	jnz		LInverted
-
-	movl	%eax,st_spanstate(%esi)
-	movl	st_insubmodel(%esi),%ecx
-	movl	0x12345678,%edx		// surfaces[1].st_next
-LPatch0:
-	movl	C(r_bmodelactive),%eax
-	subl	%ecx,%eax
-	cmpl	%esi,%edx
-	movl	%eax,C(r_bmodelactive)
-	jnz		LNoEmit				// surface isn't on top, just remove
-
-// emit a span (current top going away)
-	movl	et_u(%ebx),%eax
-	shrl	$20,%eax				// iu = integral pixel u
-	movl	st_last_u(%esi),%edx
-	movl	st_next(%esi),%ecx
-	cmpl	%edx,%eax
-	jle		LNoEmit2				// iu <= surf->last_u, so nothing to emit
-
-	movl	%eax,st_last_u(%ecx)	// surf->next->last_u = iu;
-	subl	%edx,%eax
-	movl	%edx,espan_t_u(%ebp)		// span->u = surf->last_u;
-
-	movl	%eax,espan_t_count(%ebp)	// span->count = iu - span->u;
-	movl	C(current_iv),%eax
-	movl	%eax,espan_t_v(%ebp)		// span->v = current_iv;
-	movl	st_spans(%esi),%eax
-	movl	%eax,espan_t_pnext(%ebp)	// span->pnext = surf->spans;
-	movl	%ebp,st_spans(%esi)			// surf->spans = span;
-	addl	$(espan_t_size),%ebp
-
-	movl	st_next(%esi),%edx		// remove the surface from the surface
-	movl	st_prev(%esi),%esi		// stack
-
-	movl	%edx,st_next(%esi)
-	movl	%esi,st_prev(%edx)
-	ret
-
-LNoEmit2:
-	movl	%eax,st_last_u(%ecx)	// surf->next->last_u = iu;
-	movl	st_next(%esi),%edx		// remove the surface from the surface
-	movl	st_prev(%esi),%esi		// stack
-
-	movl	%edx,st_next(%esi)
-	movl	%esi,st_prev(%edx)
-	ret
-
-LNoEmit:
-	movl	st_next(%esi),%edx		// remove the surface from the surface
-	movl	st_prev(%esi),%esi		// stack
-
-	movl	%edx,st_next(%esi)
-	movl	%esi,st_prev(%edx)
-	ret
-
-LInverted:
-	movl	%eax,st_spanstate(%esi)
-	ret
-
-//--------------------------------------------------------------------
-
-// trailing edge only
-Lgs_trailing:
-	pushl	$Lgs_nextedge
-	jmp		TrailingEdge
-
-
-.globl C(R_GenerateSpans)
-C(R_GenerateSpans):
-	pushl	%ebp				// preserve caller's stack frame
-	pushl	%edi
-	pushl	%esi				// preserve register variables
-	pushl	%ebx
-
-// clear active surfaces to just the background surface
-	movl	C(surfaces),%eax
-	movl	C(edge_head_u_shift20),%edx
-	addl	$(st_size),%eax
-// %ebp = span_p throughout
-	movl	C(span_p),%ebp
-
-	movl	$0,C(r_bmodelactive)
-
-	movl	%eax,st_next(%eax)
-	movl	%eax,st_prev(%eax)
-	movl	%edx,st_last_u(%eax)
-	movl	C(edge_head)+et_next,%ebx		// edge=edge_head.next
-
-// generate spans
-	cmpl	$(C(edge_tail)),%ebx		// done if empty list
-	jz		Lgs_lastspan
-
-Lgs_edgeloop:
-
-	movl	et_surfs(%ebx),%edi
-	movl	C(surfaces),%eax
-	movl	%edi,%esi
-	andl	$0xFFFF0000,%edi
-	andl	$0xFFFF,%esi
-	jz		Lgs_leading		// not a trailing edge
-
-// it has a left surface, so a surface is going away for this span
-	shll	$(SURF_T_SHIFT),%esi
-	addl	%eax,%esi
-	testl	%edi,%edi
-	jz		Lgs_trailing
-
-// both leading and trailing
-	call	TrailingEdge
-	movl	C(surfaces),%eax
-
-// ---------------------------------------------------------------
-// handle a leading edge
-// ---------------------------------------------------------------
-
-Lgs_leading:
-	shrl	$16-SURF_T_SHIFT,%edi
-	movl	C(surfaces),%eax
-	addl	%eax,%edi
-	movl	0x12345678,%esi		// surf2 = surfaces[1].next;
-LPatch2:
-	movl	st_spanstate(%edi),%edx
-	movl	st_insubmodel(%edi),%eax
-	testl	%eax,%eax
-	jnz		Lbmodel_leading
-
-// handle a leading non-bmodel edge
-
-// don't start a span if this is an inverted span, with the end edge preceding
-// the start edge (that is, we've already seen the end edge)
-	testl	%edx,%edx
-	jnz		Lxl_done
-
-
-// if (surf->key < surf2->key)
-//		goto newtop;
-	incl	%edx
-	movl	st_key(%edi),%eax
-	movl	%edx,st_spanstate(%edi)
-	movl	st_key(%esi),%ecx
-	cmpl	%ecx,%eax
-	jl		Lnewtop
-
-// main sorting loop to search through surface stack until insertion point
-// found. Always terminates because background surface is sentinel
-// do
-// {
-// 		surf2 = surf2->next;
-// } while (surf->key >= surf2->key);
-Lsortloopnb:
-	movl	st_next(%esi),%esi
-	movl	st_key(%esi),%ecx
-	cmpl	%ecx,%eax
-	jge		Lsortloopnb
-
-	jmp		LInsertAndExit
-
-
-// handle a leading bmodel edge
-	.align	4
-Lbmodel_leading:
-
-// don't start a span if this is an inverted span, with the end edge preceding
-// the start edge (that is, we've already seen the end edge)
-	testl	%edx,%edx
-	jnz		Lxl_done
-
-	movl	C(r_bmodelactive),%ecx
-	incl	%edx
-	incl	%ecx
-	movl	%edx,st_spanstate(%edi)
-	movl	%ecx,C(r_bmodelactive)
-
-// if (surf->key < surf2->key)
-//		goto newtop;
-	movl	st_key(%edi),%eax
-	movl	st_key(%esi),%ecx
-	cmpl	%ecx,%eax
-	jl		Lnewtop
-
-// if ((surf->key == surf2->key) && surf->insubmodel)
-// {
-	jz		Lzcheck_for_newtop
-
-// main sorting loop to search through surface stack until insertion point
-// found. Always terminates because background surface is sentinel
-// do
-// {
-// 		surf2 = surf2->next;
-// } while (surf->key > surf2->key);
-Lsortloop:
-	movl	st_next(%esi),%esi
-	movl	st_key(%esi),%ecx
-	cmpl	%ecx,%eax
-	jg		Lsortloop
-
-	jne		LInsertAndExit
-
-// Do 1/z sorting to see if we've arrived in the right position
-	movl	et_u(%ebx),%eax
-	subl	$0xFFFFF,%eax
-	movl	%eax,Ltemp
-	fildl	Ltemp
-
-	fmuls	float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) *
-								//      (1.0 / 0x100000);
-
-	fld		%st(0)				// fu | fu
-	fmuls	st_d_zistepu(%edi)	// fu*surf->d_zistepu | fu
-	flds	C(fv)					// fv | fu*surf->d_zistepu | fu
-	fmuls	st_d_zistepv(%edi)	// fv*surf->d_zistepv | fu*surf->d_zistepu | fu
-	fxch	%st(1)				// fu*surf->d_zistepu | fv*surf->d_zistepv | fu
-	fadds	st_d_ziorigin(%edi)	// fu*surf->d_zistepu + surf->d_ziorigin |
-								//  fv*surf->d_zistepv | fu
-
-	flds	st_d_zistepu(%esi)	// surf2->d_zistepu |
-								//  fu*surf->d_zistepu + surf->d_ziorigin |
-								//  fv*surf->d_zistepv | fu
-	fmul	%st(3),%st(0)		// fu*surf2->d_zistepu |
-								//  fu*surf->d_zistepu + surf->d_ziorigin |
-								//  fv*surf->d_zistepv | fu
-	fxch	%st(1)				// fu*surf->d_zistepu + surf->d_ziorigin |
-								//  fu*surf2->d_zistepu |
-								//  fv*surf->d_zistepv | fu
-	faddp	%st(0),%st(2)		// fu*surf2->d_zistepu | newzi | fu
-
-	flds	C(fv)					// fv | fu*surf2->d_zistepu | newzi | fu
-	fmuls	st_d_zistepv(%esi)	// fv*surf2->d_zistepv |
-								//  fu*surf2->d_zistepu | newzi | fu
-	fld		%st(2)				// newzi | fv*surf2->d_zistepv |
-								//  fu*surf2->d_zistepu | newzi | fu
-	fmuls	float_point_999		// newzibottom | fv*surf2->d_zistepv |
-								//  fu*surf2->d_zistepu | newzi | fu
-
-	fxch	%st(2)				// fu*surf2->d_zistepu | fv*surf2->d_zistepv |
-								//  newzibottom | newzi | fu
-	fadds	st_d_ziorigin(%esi)	// fu*surf2->d_zistepu + surf2->d_ziorigin |
-								//  fv*surf2->d_zistepv | newzibottom | newzi |
-								//  fu
-	faddp	%st(0),%st(1)		// testzi | newzibottom | newzi | fu
-	fxch	%st(1)				// newzibottom | testzi | newzi | fu
-
-// if (newzibottom >= testzi)
-//     goto Lgotposition;
-
-	fcomp	%st(1)				// testzi | newzi | fu
-
-	fxch	%st(1)				// newzi | testzi | fu
-	fmuls	float_1_point_001	// newzitop | testzi | fu
-	fxch	%st(1)				// testzi | newzitop | fu
-
-	fnstsw	%ax
-	testb	$0x01,%ah
-	jz		Lgotposition_fpop3
-
-// if (newzitop >= testzi)
-// {
-
-	fcomp	%st(1)				// newzitop | fu
-	fnstsw	%ax
-	testb	$0x45,%ah
-	jz		Lsortloop_fpop2
-
-// if (surf->d_zistepu >= surf2->d_zistepu)
-//     goto newtop;
-
-	flds	st_d_zistepu(%edi)	// surf->d_zistepu | newzitop| fu
-	fcomps	st_d_zistepu(%esi)	// newzitop | fu
-	fnstsw	%ax
-	testb	$0x01,%ah
-	jz		Lgotposition_fpop2
-
-	fstp	%st(0)				// clear the FPstack
-	fstp	%st(0)
-	movl	st_key(%edi),%eax
-	jmp		Lsortloop
-
-
-Lgotposition_fpop3:
-	fstp	%st(0)
-Lgotposition_fpop2:
-	fstp	%st(0)
-	fstp	%st(0)
-	jmp		LInsertAndExit
-
-
-// emit a span (obscures current top)
-
-Lnewtop_fpop3:
-	fstp	%st(0)
-Lnewtop_fpop2:
-	fstp	%st(0)
-	fstp	%st(0)
-	movl	st_key(%edi),%eax		// reload the sorting key
-
-Lnewtop:
-	movl	et_u(%ebx),%eax
-	movl	st_last_u(%esi),%edx
-	shrl	$20,%eax				// iu = integral pixel u
-	movl	%eax,st_last_u(%edi)	// surf->last_u = iu;
-	cmpl	%edx,%eax
-	jle		LInsertAndExit			// iu <= surf->last_u, so nothing to emit
-
-	subl	%edx,%eax
-	movl	%edx,espan_t_u(%ebp)		// span->u = surf->last_u;
-
-	movl	%eax,espan_t_count(%ebp)	// span->count = iu - span->u;
-	movl	C(current_iv),%eax
-	movl	%eax,espan_t_v(%ebp)		// span->v = current_iv;
-	movl	st_spans(%esi),%eax
-	movl	%eax,espan_t_pnext(%ebp)	// span->pnext = surf->spans;
-	movl	%ebp,st_spans(%esi)			// surf->spans = span;
-	addl	$(espan_t_size),%ebp
-
-LInsertAndExit:
-// insert before surf2
-	movl	%esi,st_next(%edi)		// surf->next = surf2;
-	movl	st_prev(%esi),%eax
-	movl	%eax,st_prev(%edi)		// surf->prev = surf2->prev;
-	movl	%edi,st_prev(%esi)		// surf2->prev = surf;
-	movl	%edi,st_next(%eax)		// surf2->prev->next = surf;
-
-// ---------------------------------------------------------------
-// leading edge done
-// ---------------------------------------------------------------
-
-// ---------------------------------------------------------------
-// see if there are any more edges
-// ---------------------------------------------------------------
-
-Lgs_nextedge:
-	movl	et_next(%ebx),%ebx
-	cmpl	$(C(edge_tail)),%ebx
-	jnz		Lgs_edgeloop
-
-// clean up at the right edge
-Lgs_lastspan:
-
-// now that we've reached the right edge of the screen, we're done with any
-// unfinished surfaces, so emit a span for whatever's on top
-	movl	0x12345678,%esi		// surfaces[1].st_next
-LPatch3:
-	movl	C(edge_tail_u_shift20),%eax
-	xorl	%ecx,%ecx
-	movl	st_last_u(%esi),%edx
-	subl	%edx,%eax
-	jle		Lgs_resetspanstate
-
-	movl	%edx,espan_t_u(%ebp)
-	movl	%eax,espan_t_count(%ebp)
-	movl	C(current_iv),%eax
-	movl	%eax,espan_t_v(%ebp)
-	movl	st_spans(%esi),%eax
-	movl	%eax,espan_t_pnext(%ebp)
-	movl	%ebp,st_spans(%esi)
-	addl	$(espan_t_size),%ebp
-
-// reset spanstate for all surfaces in the surface stack
-Lgs_resetspanstate:
-	movl	%ecx,st_spanstate(%esi)
-	movl	st_next(%esi),%esi
-	cmpl	$0x12345678,%esi		// &surfaces[1]
-LPatch4:
-	jnz		Lgs_resetspanstate
-
-// store the final span_p
-	movl	%ebp,C(span_p)
-
-	popl	%ebx				// restore register variables
-	popl	%esi
-	popl	%edi
-	popl	%ebp				// restore the caller's stack frame
-	ret
-
-
-// ---------------------------------------------------------------
-// 1/z sorting for bmodels in the same leaf
-// ---------------------------------------------------------------
-	.align	4
-Lxl_done:
-	incl	%edx
-	movl	%edx,st_spanstate(%edi)
-
-	jmp		Lgs_nextedge
-
-
-	.align	4
-Lzcheck_for_newtop:
-	movl	et_u(%ebx),%eax
-	subl	$0xFFFFF,%eax
-	movl	%eax,Ltemp
-	fildl	Ltemp
-
-	fmuls	float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) *
-								//      (1.0 / 0x100000);
-
-	fld		%st(0)				// fu | fu
-	fmuls	st_d_zistepu(%edi)	// fu*surf->d_zistepu | fu
-	flds	C(fv)				// fv | fu*surf->d_zistepu | fu
-	fmuls	st_d_zistepv(%edi)	// fv*surf->d_zistepv | fu*surf->d_zistepu | fu
-	fxch	%st(1)				// fu*surf->d_zistepu | fv*surf->d_zistepv | fu
-	fadds	st_d_ziorigin(%edi)	// fu*surf->d_zistepu + surf->d_ziorigin |
-								//  fv*surf->d_zistepv | fu
-
-	flds	st_d_zistepu(%esi)	// surf2->d_zistepu |
-								//  fu*surf->d_zistepu + surf->d_ziorigin |
-								//  fv*surf->d_zistepv | fu
-	fmul	%st(3),%st(0)		// fu*surf2->d_zistepu |
-								//  fu*surf->d_zistepu + surf->d_ziorigin |
-								//  fv*surf->d_zistepv | fu
-	fxch	%st(1)				// fu*surf->d_zistepu + surf->d_ziorigin |
-								//  fu*surf2->d_zistepu |
-								//  fv*surf->d_zistepv | fu
-	faddp	%st(0),%st(2)		// fu*surf2->d_zistepu | newzi | fu
-
-	flds	C(fv)				// fv | fu*surf2->d_zistepu | newzi | fu
-	fmuls	st_d_zistepv(%esi)	// fv*surf2->d_zistepv |
-								//  fu*surf2->d_zistepu | newzi | fu
-	fld		%st(2)				// newzi | fv*surf2->d_zistepv |
-								//  fu*surf2->d_zistepu | newzi | fu
-	fmuls	float_point_999		// newzibottom | fv*surf2->d_zistepv |
-								//  fu*surf2->d_zistepu | newzi | fu
-
-	fxch	%st(2)				// fu*surf2->d_zistepu | fv*surf2->d_zistepv |
-								//  newzibottom | newzi | fu
-	fadds	st_d_ziorigin(%esi)	// fu*surf2->d_zistepu + surf2->d_ziorigin |
-								//  fv*surf2->d_zistepv | newzibottom | newzi |
-								//  fu
-	faddp	%st(0),%st(1)		// testzi | newzibottom | newzi | fu
-	fxch	%st(1)				// newzibottom | testzi | newzi | fu
-
-// if (newzibottom >= testzi)
-//     goto newtop;
-
-	fcomp	%st(1)				// testzi | newzi | fu
-
-	fxch	%st(1)				// newzi | testzi | fu
-	fmuls	float_1_point_001	// newzitop | testzi | fu
-	fxch	%st(1)				// testzi | newzitop | fu
-
-	fnstsw	%ax
-	testb	$0x01,%ah
-	jz		Lnewtop_fpop3
-
-// if (newzitop >= testzi)
-// {
-
-	fcomp	%st(1)				// newzitop | fu
-	fnstsw	%ax
-	testb	$0x45,%ah
-	jz		Lsortloop_fpop2
-
-// if (surf->d_zistepu >= surf2->d_zistepu)
-//     goto newtop;
-
-	flds	st_d_zistepu(%edi)	// surf->d_zistepu | newzitop | fu
-	fcomps	st_d_zistepu(%esi)	// newzitop | fu
-	fnstsw	%ax
-	testb	$0x01,%ah
-	jz		Lnewtop_fpop2
-
-Lsortloop_fpop2:
-	fstp	%st(0)				// clear the FP stack
-	fstp	%st(0)
-	movl	st_key(%edi),%eax
-	jmp		Lsortloop
-
-
-.globl C(R_EdgeCodeEnd)
-C(R_EdgeCodeEnd):
-
-
-//----------------------------------------------------------------------
-// Surface array address code patching routine
-//----------------------------------------------------------------------
-
-	.align 4
-.globl C(R_SurfacePatch)
-C(R_SurfacePatch):
-
-	movl	C(surfaces),%eax
-	addl	$(st_size),%eax
-	movl	%eax,LPatch4-4
-
-	addl	$(st_next),%eax
-	movl	%eax,LPatch0-4
-	movl	%eax,LPatch2-4
-	movl	%eax,LPatch3-4
-
-	ret
-
-#endif	// id386
-
--- a/r_varsa.s
+++ /dev/null
@@ -1,45 +1,0 @@
-//
-// r_varsa.s
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
-	.data
-
-//-------------------------------------------------------
-// ASM-only variables
-//-------------------------------------------------------
-.globl	float_1, float_particle_z_clip, float_point5
-.globl	float_minus_1, float_0
-float_0:		.single	0.0
-float_1:		.single	1.0
-float_minus_1:	.single	-1.0
-float_particle_z_clip:	.single	PARTICLE_Z_CLIP
-float_point5:	.single	0.5
-
-.globl	fp_16, fp_64k, fp_1m, fp_64kx64k
-.globl	fp_1m_minus_1
-.globl	fp_8 
-fp_1m:			.single	1048576.0
-fp_1m_minus_1:	.single	1048575.0
-fp_64k:			.single	65536.0
-fp_8:			.single	8.0
-fp_16:			.single	16.0
-fp_64kx64k:		.long	0x4f000000	// (float)0x8000*0x10000
-
-
-.globl	FloatZero, Float2ToThe31nd, FloatMinus2ToThe31nd
-FloatZero:				.long	0
-Float2ToThe31nd:		.long	0x4f000000
-FloatMinus2ToThe31nd:	.long	0xcf000000
-
-.globl	C(r_bmodelactive)
-C(r_bmodelactive):	.long	0
-
-#endif	// id386
-
--- a/snd_mixa.s
+++ /dev/null
@@ -1,199 +1,0 @@
-//
-// snd_mixa.s
-// x86 assembly-language sound code
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-
-#ifdef	id386
-
-	.text
-
-//----------------------------------------------------------------------
-// 8-bit sound-mixing code
-//----------------------------------------------------------------------
-
-#define ch		4+16
-#define sc		8+16
-#define count	12+16
-
-.globl C(SND_PaintChannelFrom8)
-C(SND_PaintChannelFrom8):
-	pushl	%esi				// preserve register variables
-	pushl	%edi
-	pushl	%ebx
-	pushl	%ebp
-
-//	int 	data;
-//	short	*lscale, *rscale;
-//	unsigned char *sfx;
-//	int		i;
-
-	movl	ch(%esp),%ebx
-	movl	sc(%esp),%esi
-
-//	if (ch->leftvol > 255)
-//		ch->leftvol = 255;
-//	if (ch->rightvol > 255)
-//		ch->rightvol = 255;
-	movl	ch_leftvol(%ebx),%eax
-	movl	ch_rightvol(%ebx),%edx
-	cmpl	$255,%eax
-	jna		LLeftSet
-	movl	$255,%eax
-LLeftSet:
-	cmpl	$255,%edx
-	jna		LRightSet
-	movl	$255,%edx
-LRightSet:
-
-//	lscale = snd_scaletable[ch->leftvol >> 3];
-//	rscale = snd_scaletable[ch->rightvol >> 3];
-//	sfx = (signed char *)sc->data + ch->pos;
-//	ch->pos += count;
-	andl	$0xF8,%eax
-	addl	$(sfxc_data),%esi
-	andl	$0xF8,%edx
-	movl	ch_pos(%ebx),%edi
-	movl	count(%esp),%ecx
-	addl	%edi,%esi
-	shll	$7,%eax
-	addl	%ecx,%edi
-	shll	$7,%edx
-	movl	%edi,ch_pos(%ebx)
-	addl	$(C(snd_scaletable)),%eax
-	addl	$(C(snd_scaletable)),%edx
-	subl	%ebx,%ebx
-	movb	-1(%esi,%ecx,1),%bl
-
-	testl	$1,%ecx
-	jz		LMix8Loop
-
-	movl	(%eax,%ebx,4),%edi
-	movl	(%edx,%ebx,4),%ebp
-	addl	C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi
-	addl	C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp
-	movl	%edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size)
-	movl	%ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size)
-	movb	-2(%esi,%ecx,1),%bl
-
-	decl	%ecx
-	jz		LDone
-
-//	for (i=0 ; i<count ; i++)
-//	{
-LMix8Loop:
-
-//		data = sfx[i];
-//		paintbuffer[i].left += lscale[data];
-//		paintbuffer[i].right += rscale[data];
-	movl	(%eax,%ebx,4),%edi
-	movl	(%edx,%ebx,4),%ebp
-	addl	C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi
-	addl	C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp
-	movb	-2(%esi,%ecx,1),%bl
-	movl	%edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size)
-	movl	%ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size)
-
-	movl	(%eax,%ebx,4),%edi
-	movl	(%edx,%ebx,4),%ebp
-	movb	-3(%esi,%ecx,1),%bl
-	addl	C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size),%edi
-	addl	C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size),%ebp
-	movl	%edi,C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size)
-	movl	%ebp,C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size)
-
-//	}
-	subl	$2,%ecx
-	jnz		LMix8Loop
-
-LDone:
-	popl	%ebp
-	popl	%ebx
-	popl	%edi
-	popl	%esi
-
-	ret
-
-
-//----------------------------------------------------------------------
-// Transfer of stereo buffer to 16-bit DMA buffer code
-//----------------------------------------------------------------------
-
-.globl C(Snd_WriteLinearBlastStereo16)
-C(Snd_WriteLinearBlastStereo16):
-	pushl	%esi				// preserve register variables
-	pushl	%edi
-	pushl	%ebx
-
-//	int		i;
-//	int		val;
-	movl	C(snd_linear_count),%ecx
-	movl	C(snd_p),%ebx
-	movl	C(snd_vol),%esi
-	movl	C(snd_out),%edi
-
-//	for (i=0 ; i<snd_linear_count ; i+=2)
-//	{
-LWLBLoopTop:
-
-//		val = (snd_p[i]*snd_vol)>>8;
-//		if (val > 0x7fff)
-//			snd_out[i] = 0x7fff;
-//		else if (val < (short)0x8000)
-//			snd_out[i] = (short)0x8000;
-//		else
-//			snd_out[i] = val;
-	movl	-8(%ebx,%ecx,4),%eax
-	imull	%esi,%eax
-	sarl	$8,%eax
-	cmpl	$0x7FFF,%eax
-	jg		LClampHigh
-	cmpl	$0xFFFF8000,%eax
-	jnl		LClampDone
-	movl	$0xFFFF8000,%eax
-	jmp		LClampDone
-LClampHigh:
-	movl	$0x7FFF,%eax
-LClampDone:
-
-//		val = (snd_p[i+1]*snd_vol)>>8;
-//		if (val > 0x7fff)
-//			snd_out[i+1] = 0x7fff;
-//		else if (val < (short)0x8000)
-//			snd_out[i+1] = (short)0x8000;
-//		else
-//			snd_out[i+1] = val;
-	movl	-4(%ebx,%ecx,4),%edx
-	imull	%esi,%edx
-	sarl	$8,%edx
-	cmpl	$0x7FFF,%edx
-	jg		LClampHigh2
-	cmpl	$0xFFFF8000,%edx
-	jnl		LClampDone2
-	movl	$0xFFFF8000,%edx
-	jmp		LClampDone2
-LClampHigh2:
-	movl	$0x7FFF,%edx
-LClampDone2:
-	shll	$16,%edx
-	andl	$0xFFFF,%eax
-	orl		%eax,%edx
-	movl	%edx,-4(%edi,%ecx,2)
-
-//	}
-	subl	$2,%ecx
-	jnz		LWLBLoopTop
-
-//	snd_p += snd_linear_count;
-
-	popl	%ebx
-	popl	%edi
-	popl	%esi
-
-	ret
-
-
-#endif	// id386
-
--- a/surf16.s
+++ /dev/null
@@ -1,153 +1,0 @@
-//
-// surf16.s
-// x86 assembly-language 16 bpp surface block drawing code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-
-#ifdef id386
-
-//----------------------------------------------------------------------
-// Surface block drawer
-//----------------------------------------------------------------------
-
-	.data
-
-k:			.long	0
-loopentry:	.long	0
-
-	.align	4
-blockjumptable16:
-	.long	LEnter2_16
-	.long	LEnter4_16
-	.long	0, LEnter8_16
-	.long	0, 0, 0, LEnter16_16
-
-
-	.text
-
-	.align 4
-.globl C(R_Surf16Start)
-C(R_Surf16Start):
-
-	.align 4
-.globl C(R_DrawSurfaceBlock16)
-C(R_DrawSurfaceBlock16):
-	pushl	%ebp				// preserve caller's stack frame
-	pushl	%edi
-	pushl	%esi				// preserve register variables
-	pushl	%ebx
-
-	movl	C(blocksize),%eax
-	movl	C(prowdestbase),%edi
-	movl	C(pbasesource),%esi
-	movl	C(sourcesstep),%ebx
-	movl	blockjumptable16-4(,%eax,2),%ecx
-	movl	%eax,k
-	movl	%ecx,loopentry
-	movl	C(lightleft),%edx
-	movl	C(lightright),%ebp
-
-Lblockloop16:
-
-	subl	%edx,%ebp
-	movb	C(blockdivshift),%cl
-	sarl	%cl,%ebp
-	jns		Lp1_16
-	testl	C(blockdivmask),%ebp
-	jz		Lp1_16
-	incl	%ebp
-Lp1_16:
-
-	subl	%eax,%eax
-	subl	%ecx,%ecx	// high words must be 0 in loop for addressing
-
-	jmp		*loopentry
-
-	.align	4
-
-#include "block16.h"
-
-	movl	C(pbasesource),%esi
-	movl	C(lightleft),%edx
-	movl	C(lightright),%ebp
-	movl	C(sourcetstep),%eax
-	movl	C(lightrightstep),%ecx
-	movl	C(prowdestbase),%edi
-
-	addl	%eax,%esi
-	addl	%ecx,%ebp
-
-	movl	C(lightleftstep),%eax
-	movl	C(surfrowbytes),%ecx
-
-	addl	%eax,%edx
-	addl	%ecx,%edi
-
-	movl	%esi,C(pbasesource)
-	movl	%ebp,C(lightright)
-	movl	k,%eax
-	movl	%edx,C(lightleft)
-	decl	%eax
-	movl	%edi,C(prowdestbase)
-	movl	%eax,k
-	jnz		Lblockloop16
-
-	popl	%ebx				// restore register variables
-	popl	%esi
-	popl	%edi
-	popl	%ebp				// restore the caller's stack frame
-	ret
-
-.globl C(R_Surf16End)
-C(R_Surf16End):
-
-//----------------------------------------------------------------------
-// Code patching routines
-//----------------------------------------------------------------------
-	.data
-
-	.align 4
-LPatchTable16:
-	.long	LBPatch0-4
-	.long	LBPatch1-4
-	.long	LBPatch2-4
-	.long	LBPatch3-4
-	.long	LBPatch4-4
-	.long	LBPatch5-4
-	.long	LBPatch6-4
-	.long	LBPatch7-4
-	.long	LBPatch8-4
-	.long	LBPatch9-4
-	.long	LBPatch10-4
-	.long	LBPatch11-4
-	.long	LBPatch12-4
-	.long	LBPatch13-4
-	.long	LBPatch14-4
-	.long	LBPatch15-4
-
-	.text
-
-	.align 4
-.globl C(R_Surf16Patch)
-C(R_Surf16Patch):
-	pushl	%ebx
-
-	movl	C(colormap),%eax
-	movl	$LPatchTable16,%ebx
-	movl	$16,%ecx
-LPatchLoop16:
-	movl	(%ebx),%edx
-	addl	$4,%ebx
-	movl	%eax,(%edx)
-	decl	%ecx
-	jnz		LPatchLoop16
-
-	popl	%ebx
-
-	ret
-
-
-#endif	// id386
--- a/surf8.s
+++ /dev/null
@@ -1,764 +1,0 @@
-//
-// surf8.s
-// x86 assembly-language 8 bpp surface block drawing code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-
-#ifdef	id386
-
-	.data
-
-sb_v:		.long	0
-
-	.text
-
-	.align 4
-.globl C(R_Surf8Start)
-C(R_Surf8Start):
-
-//----------------------------------------------------------------------
-// Surface block drawer for mip level 0
-//----------------------------------------------------------------------
-
-	.align 4
-.globl C(R_DrawSurfaceBlock8_mip0)
-C(R_DrawSurfaceBlock8_mip0):
-	pushl	%ebp				// preserve caller's stack frame
-	pushl	%edi
-	pushl	%esi				// preserve register variables
-	pushl	%ebx
-
-//		for (v=0 ; v<numvblocks ; v++)
-//		{
-	movl	C(r_lightptr),%ebx
-	movl	C(r_numvblocks),%eax
-
-	movl	%eax,sb_v
-	movl	C(prowdestbase),%edi
-
-	movl	C(pbasesource),%esi
-
-Lv_loop_mip0:
-
-//			lightleft = lightptr[0];
-//			lightright = lightptr[1];
-//			lightdelta = (lightleft - lightright) & 0xFFFFF;
-	movl	(%ebx),%eax			// lightleft
-	movl	4(%ebx),%edx		// lightright
-
-	movl	%eax,%ebp
-	movl	C(r_lightwidth),%ecx
-
-	movl	%edx,C(lightright)
-	subl	%edx,%ebp
-
-	andl	$0xFFFFF,%ebp
-	leal	(%ebx,%ecx,4),%ebx
-
-//			lightptr += lightwidth;
-	movl	%ebx,C(r_lightptr)
-
-//			lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
-//			lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
-//			lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
-//					0xF0000000;
-	movl	4(%ebx),%ecx	// lightptr[1]
-	movl	(%ebx),%ebx		// lightptr[0]
-
-	subl	%eax,%ebx
-	subl	%edx,%ecx
-
-	sarl	$4,%ecx
-	orl		$0xF0000000,%ebp
-
-	sarl	$4,%ebx
-	movl	%ecx,C(lightrightstep)
-
-	subl	%ecx,%ebx
-	andl	$0xFFFFF,%ebx
-
-	orl		$0xF0000000,%ebx
-	subl	%ecx,%ecx	// high word must be 0 in loop for addressing
-
-	movl	%ebx,C(lightdeltastep)
-	subl	%ebx,%ebx	// high word must be 0 in loop for addressing
-
-Lblockloop8_mip0:
-	movl	%ebp,C(lightdelta)
-	movb	14(%esi),%cl
-
-	sarl	$4,%ebp
-	movb	%dh,%bh
-
-	movb	15(%esi),%bl
-	addl	%ebp,%edx
-
-	movb	%dh,%ch
-	addl	%ebp,%edx
-
-	movb	0x12345678(%ebx),%ah
-LBPatch0:
-	movb	13(%esi),%bl
-
-	movb	0x12345678(%ecx),%al
-LBPatch1:
-	movb	12(%esi),%cl
-
-	movb	%dh,%bh
-	addl	%ebp,%edx
-
-	rorl	$16,%eax
-	movb	%dh,%ch
-
-	addl	%ebp,%edx
-	movb	0x12345678(%ebx),%ah
-LBPatch2:
-
-	movb	11(%esi),%bl
-	movb	0x12345678(%ecx),%al
-LBPatch3:
-
-	movb	10(%esi),%cl
-	movl	%eax,12(%edi)
-
-	movb	%dh,%bh
-	addl	%ebp,%edx
-
-	movb	%dh,%ch
-	addl	%ebp,%edx
-
-	movb	0x12345678(%ebx),%ah
-LBPatch4:
-	movb	9(%esi),%bl
-
-	movb	0x12345678(%ecx),%al
-LBPatch5:
-	movb	8(%esi),%cl
-
-	movb	%dh,%bh
-	addl	%ebp,%edx
-
-	rorl	$16,%eax
-	movb	%dh,%ch
-
-	addl	%ebp,%edx
-	movb	0x12345678(%ebx),%ah
-LBPatch6:
-
-	movb	7(%esi),%bl
-	movb	0x12345678(%ecx),%al
-LBPatch7:
-
-	movb	6(%esi),%cl
-	movl	%eax,8(%edi)
-
-	movb	%dh,%bh
-	addl	%ebp,%edx
-
-	movb	%dh,%ch
-	addl	%ebp,%edx
-
-	movb	0x12345678(%ebx),%ah
-LBPatch8:
-	movb	5(%esi),%bl
-
-	movb	0x12345678(%ecx),%al
-LBPatch9:
-	movb	4(%esi),%cl
-
-	movb	%dh,%bh
-	addl	%ebp,%edx
-
-	rorl	$16,%eax
-	movb	%dh,%ch
-
-	addl	%ebp,%edx
-	movb	0x12345678(%ebx),%ah
-LBPatch10:
-
-	movb	3(%esi),%bl
-	movb	0x12345678(%ecx),%al
-LBPatch11:
-
-	movb	2(%esi),%cl
-	movl	%eax,4(%edi)
-
-	movb	%dh,%bh
-	addl	%ebp,%edx
-
-	movb	%dh,%ch
-	addl	%ebp,%edx
-
-	movb	0x12345678(%ebx),%ah
-LBPatch12:
-	movb	1(%esi),%bl
-
-	movb	0x12345678(%ecx),%al
-LBPatch13:
-	movb	(%esi),%cl
-
-	movb	%dh,%bh
-	addl	%ebp,%edx
-
-	rorl	$16,%eax
-	movb	%dh,%ch
-
-	movb	0x12345678(%ebx),%ah
-LBPatch14:
-	movl	C(lightright),%edx
-
-	movb	0x12345678(%ecx),%al
-LBPatch15:
-	movl	C(lightdelta),%ebp
-
-	movl	%eax,(%edi)
-
-	addl	C(sourcetstep),%esi
-	addl	C(surfrowbytes),%edi
-
-	addl	C(lightrightstep),%edx
-	addl	C(lightdeltastep),%ebp
-
-	movl	%edx,C(lightright)
-	jc		Lblockloop8_mip0
-
-//			if (pbasesource >= r_sourcemax)
-//				pbasesource -= stepback;
-
-	cmpl	C(r_sourcemax),%esi
-	jb		LSkip_mip0
-	subl	C(r_stepback),%esi
-LSkip_mip0:
-
-	movl	C(r_lightptr),%ebx
-	decl	sb_v
-
-	jnz		Lv_loop_mip0
-
-	popl	%ebx				// restore register variables
-	popl	%esi
-	popl	%edi
-	popl	%ebp				// restore the caller's stack frame
-	ret
-
-
-//----------------------------------------------------------------------
-// Surface block drawer for mip level 1
-//----------------------------------------------------------------------
-
-	.align 4
-.globl C(R_DrawSurfaceBlock8_mip1)
-C(R_DrawSurfaceBlock8_mip1):
-	pushl	%ebp				// preserve caller's stack frame
-	pushl	%edi
-	pushl	%esi				// preserve register variables
-	pushl	%ebx
-
-//		for (v=0 ; v<numvblocks ; v++)
-//		{
-	movl	C(r_lightptr),%ebx
-	movl	C(r_numvblocks),%eax
-
-	movl	%eax,sb_v
-	movl	C(prowdestbase),%edi
-
-	movl	C(pbasesource),%esi
-
-Lv_loop_mip1:
-
-//			lightleft = lightptr[0];
-//			lightright = lightptr[1];
-//			lightdelta = (lightleft - lightright) & 0xFFFFF;
-	movl	(%ebx),%eax			// lightleft
-	movl	4(%ebx),%edx		// lightright
-
-	movl	%eax,%ebp
-	movl	C(r_lightwidth),%ecx
-
-	movl	%edx,C(lightright)
-	subl	%edx,%ebp
-
-	andl	$0xFFFFF,%ebp
-	leal	(%ebx,%ecx,4),%ebx
-
-//			lightptr += lightwidth;
-	movl	%ebx,C(r_lightptr)
-
-//			lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
-//			lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
-//			lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
-//					0xF0000000;
-	movl	4(%ebx),%ecx	// lightptr[1]
-	movl	(%ebx),%ebx		// lightptr[0]
-
-	subl	%eax,%ebx
-	subl	%edx,%ecx
-
-	sarl	$3,%ecx
-	orl		$0x70000000,%ebp
-
-	sarl	$3,%ebx
-	movl	%ecx,C(lightrightstep)
-
-	subl	%ecx,%ebx
-	andl	$0xFFFFF,%ebx
-
-	orl		$0xF0000000,%ebx
-	subl	%ecx,%ecx	// high word must be 0 in loop for addressing
-
-	movl	%ebx,C(lightdeltastep)
-	subl	%ebx,%ebx	// high word must be 0 in loop for addressing
-
-Lblockloop8_mip1:
-	movl	%ebp,C(lightdelta)
-	movb	6(%esi),%cl
-
-	sarl	$3,%ebp
-	movb	%dh,%bh
-
-	movb	7(%esi),%bl
-	addl	%ebp,%edx
-
-	movb	%dh,%ch
-	addl	%ebp,%edx
-
-	movb	0x12345678(%ebx),%ah
-LBPatch22:
-	movb	5(%esi),%bl
-
-	movb	0x12345678(%ecx),%al
-LBPatch23:
-	movb	4(%esi),%cl
-
-	movb	%dh,%bh
-	addl	%ebp,%edx
-
-	rorl	$16,%eax
-	movb	%dh,%ch
-
-	addl	%ebp,%edx
-	movb	0x12345678(%ebx),%ah
-LBPatch24:
-
-	movb	3(%esi),%bl
-	movb	0x12345678(%ecx),%al
-LBPatch25:
-
-	movb	2(%esi),%cl
-	movl	%eax,4(%edi)
-
-	movb	%dh,%bh
-	addl	%ebp,%edx
-
-	movb	%dh,%ch
-	addl	%ebp,%edx
-
-	movb	0x12345678(%ebx),%ah
-LBPatch26:
-	movb	1(%esi),%bl
-
-	movb	0x12345678(%ecx),%al
-LBPatch27:
-	movb	(%esi),%cl
-
-	movb	%dh,%bh
-	addl	%ebp,%edx
-
-	rorl	$16,%eax
-	movb	%dh,%ch
-
-	movb	0x12345678(%ebx),%ah
-LBPatch28:
-	movl	C(lightright),%edx
-
-	movb	0x12345678(%ecx),%al
-LBPatch29:
-	movl	C(lightdelta),%ebp
-
-	movl	%eax,(%edi)
-	movl	C(sourcetstep),%eax
-
-	addl	%eax,%esi
-	movl	C(surfrowbytes),%eax
-
-	addl	%eax,%edi
-	movl	C(lightrightstep),%eax
-
-	addl	%eax,%edx
-	movl	C(lightdeltastep),%eax
-
-	addl	%eax,%ebp
-	movl	%edx,C(lightright)
-
-	jc		Lblockloop8_mip1
-
-//			if (pbasesource >= r_sourcemax)
-//				pbasesource -= stepback;
-
-	cmpl	C(r_sourcemax),%esi
-	jb		LSkip_mip1
-	subl	C(r_stepback),%esi
-LSkip_mip1:
-
-	movl	C(r_lightptr),%ebx
-	decl	sb_v
-
-	jnz		Lv_loop_mip1
-
-	popl	%ebx				// restore register variables
-	popl	%esi
-	popl	%edi
-	popl	%ebp				// restore the caller's stack frame
-	ret
-
-
-//----------------------------------------------------------------------
-// Surface block drawer for mip level 2
-//----------------------------------------------------------------------
-
-	.align 4
-.globl C(R_DrawSurfaceBlock8_mip2)
-C(R_DrawSurfaceBlock8_mip2):
-	pushl	%ebp				// preserve caller's stack frame
-	pushl	%edi
-	pushl	%esi				// preserve register variables
-	pushl	%ebx
-
-//		for (v=0 ; v<numvblocks ; v++)
-//		{
-	movl	C(r_lightptr),%ebx
-	movl	C(r_numvblocks),%eax
-
-	movl	%eax,sb_v
-	movl	C(prowdestbase),%edi
-
-	movl	C(pbasesource),%esi
-
-Lv_loop_mip2:
-
-//			lightleft = lightptr[0];
-//			lightright = lightptr[1];
-//			lightdelta = (lightleft - lightright) & 0xFFFFF;
-	movl	(%ebx),%eax			// lightleft
-	movl	4(%ebx),%edx		// lightright
-
-	movl	%eax,%ebp
-	movl	C(r_lightwidth),%ecx
-
-	movl	%edx,C(lightright)
-	subl	%edx,%ebp
-
-	andl	$0xFFFFF,%ebp
-	leal	(%ebx,%ecx,4),%ebx
-
-//			lightptr += lightwidth;
-	movl	%ebx,C(r_lightptr)
-
-//			lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
-//			lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
-//			lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
-//					0xF0000000;
-	movl	4(%ebx),%ecx	// lightptr[1]
-	movl	(%ebx),%ebx		// lightptr[0]
-
-	subl	%eax,%ebx
-	subl	%edx,%ecx
-
-	sarl	$2,%ecx
-	orl		$0x30000000,%ebp
-
-	sarl	$2,%ebx
-	movl	%ecx,C(lightrightstep)
-
-	subl	%ecx,%ebx
-
-	andl	$0xFFFFF,%ebx
-
-	orl		$0xF0000000,%ebx
-	subl	%ecx,%ecx	// high word must be 0 in loop for addressing
-
-	movl	%ebx,C(lightdeltastep)
-	subl	%ebx,%ebx	// high word must be 0 in loop for addressing
-
-Lblockloop8_mip2:
-	movl	%ebp,C(lightdelta)
-	movb	2(%esi),%cl
-
-	sarl	$2,%ebp
-	movb	%dh,%bh
-
-	movb	3(%esi),%bl
-	addl	%ebp,%edx
-
-	movb	%dh,%ch
-	addl	%ebp,%edx
-
-	movb	0x12345678(%ebx),%ah
-LBPatch18:
-	movb	1(%esi),%bl
-
-	movb	0x12345678(%ecx),%al
-LBPatch19:
-	movb	(%esi),%cl
-
-	movb	%dh,%bh
-	addl	%ebp,%edx
-
-	rorl	$16,%eax
-	movb	%dh,%ch
-
-	movb	0x12345678(%ebx),%ah
-LBPatch20:
-	movl	C(lightright),%edx
-
-	movb	0x12345678(%ecx),%al
-LBPatch21:
-	movl	C(lightdelta),%ebp
-
-	movl	%eax,(%edi)
-	movl	C(sourcetstep),%eax
-
-	addl	%eax,%esi
-	movl	C(surfrowbytes),%eax
-
-	addl	%eax,%edi
-	movl	C(lightrightstep),%eax
-
-	addl	%eax,%edx
-	movl	C(lightdeltastep),%eax
-
-	addl	%eax,%ebp
-	movl	%edx,C(lightright)
-
-	jc		Lblockloop8_mip2
-
-//			if (pbasesource >= r_sourcemax)
-//				pbasesource -= stepback;
-
-	cmpl	C(r_sourcemax),%esi
-	jb		LSkip_mip2
-	subl	C(r_stepback),%esi
-LSkip_mip2:
-
-	movl	C(r_lightptr),%ebx
-	decl	sb_v
-
-	jnz		Lv_loop_mip2
-
-	popl	%ebx				// restore register variables
-	popl	%esi
-	popl	%edi
-	popl	%ebp				// restore the caller's stack frame
-	ret
-
-
-//----------------------------------------------------------------------
-// Surface block drawer for mip level 3
-//----------------------------------------------------------------------
-
-	.align 4
-.globl C(R_DrawSurfaceBlock8_mip3)
-C(R_DrawSurfaceBlock8_mip3):
-	pushl	%ebp				// preserve caller's stack frame
-	pushl	%edi
-	pushl	%esi				// preserve register variables
-	pushl	%ebx
-
-//		for (v=0 ; v<numvblocks ; v++)
-//		{
-	movl	C(r_lightptr),%ebx
-	movl	C(r_numvblocks),%eax
-
-	movl	%eax,sb_v
-	movl	C(prowdestbase),%edi
-
-	movl	C(pbasesource),%esi
-
-Lv_loop_mip3:
-
-//			lightleft = lightptr[0];
-//			lightright = lightptr[1];
-//			lightdelta = (lightleft - lightright) & 0xFFFFF;
-	movl	(%ebx),%eax			// lightleft
-	movl	4(%ebx),%edx		// lightright
-
-	movl	%eax,%ebp
-	movl	C(r_lightwidth),%ecx
-
-	movl	%edx,C(lightright)
-	subl	%edx,%ebp
-
-	andl	$0xFFFFF,%ebp
-	leal	(%ebx,%ecx,4),%ebx
-
-	movl	%ebp,C(lightdelta)
-//			lightptr += lightwidth;
-	movl	%ebx,C(r_lightptr)
-
-//			lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
-//			lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
-//			lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
-//					0xF0000000;
-	movl	4(%ebx),%ecx	// lightptr[1]
-	movl	(%ebx),%ebx		// lightptr[0]
-
-	subl	%eax,%ebx
-	subl	%edx,%ecx
-
-	sarl	$1,%ecx
-
-	sarl	$1,%ebx
-	movl	%ecx,C(lightrightstep)
-
-	subl	%ecx,%ebx
-	andl	$0xFFFFF,%ebx
-
-	sarl	$1,%ebp
-	orl		$0xF0000000,%ebx
-
-	movl	%ebx,C(lightdeltastep)
-	subl	%ebx,%ebx	// high word must be 0 in loop for addressing
-
-	movb	1(%esi),%bl
-	subl	%ecx,%ecx	// high word must be 0 in loop for addressing
-
-	movb	%dh,%bh
-	movb	(%esi),%cl
-
-	addl	%ebp,%edx
-	movb	%dh,%ch
-
-	movb	0x12345678(%ebx),%al
-LBPatch16:
-	movl	C(lightright),%edx
-
-	movb	%al,1(%edi)
-	movb	0x12345678(%ecx),%al
-LBPatch17:
-
-	movb	%al,(%edi)
-	movl	C(sourcetstep),%eax
-
-	addl	%eax,%esi
-	movl	C(surfrowbytes),%eax
-
-	addl	%eax,%edi
-	movl	C(lightdeltastep),%eax
-
-	movl	C(lightdelta),%ebp
-	movb	(%esi),%cl
-
-	addl	%eax,%ebp
-	movl	C(lightrightstep),%eax
-
-	sarl	$1,%ebp
-	addl	%eax,%edx
-
-	movb	%dh,%bh
-	movb	1(%esi),%bl
-
-	addl	%ebp,%edx
-	movb	%dh,%ch
-
-	movb	0x12345678(%ebx),%al
-LBPatch30:
-	movl	C(sourcetstep),%edx
-
-	movb	%al,1(%edi)
-	movb	0x12345678(%ecx),%al
-LBPatch31:
-
-	movb	%al,(%edi)
-	movl	C(surfrowbytes),%ebp
-
-	addl	%edx,%esi
-	addl	%ebp,%edi
-
-//			if (pbasesource >= r_sourcemax)
-//				pbasesource -= stepback;
-
-	cmpl	C(r_sourcemax),%esi
-	jb		LSkip_mip3
-	subl	C(r_stepback),%esi
-LSkip_mip3:
-
-	movl	C(r_lightptr),%ebx
-	decl	sb_v
-
-	jnz		Lv_loop_mip3
-
-	popl	%ebx				// restore register variables
-	popl	%esi
-	popl	%edi
-	popl	%ebp				// restore the caller's stack frame
-	ret
-
-
-.globl C(R_Surf8End)
-C(R_Surf8End):
-
-//----------------------------------------------------------------------
-// Code patching routines
-//----------------------------------------------------------------------
-	.data
-
-	.align 4
-LPatchTable8:
-	.long	LBPatch0-4
-	.long	LBPatch1-4
-	.long	LBPatch2-4
-	.long	LBPatch3-4
-	.long	LBPatch4-4
-	.long	LBPatch5-4
-	.long	LBPatch6-4
-	.long	LBPatch7-4
-	.long	LBPatch8-4
-	.long	LBPatch9-4
-	.long	LBPatch10-4
-	.long	LBPatch11-4
-	.long	LBPatch12-4
-	.long	LBPatch13-4
-	.long	LBPatch14-4
-	.long	LBPatch15-4
-	.long	LBPatch16-4
-	.long	LBPatch17-4
-	.long	LBPatch18-4
-	.long	LBPatch19-4
-	.long	LBPatch20-4
-	.long	LBPatch21-4
-	.long	LBPatch22-4
-	.long	LBPatch23-4
-	.long	LBPatch24-4
-	.long	LBPatch25-4
-	.long	LBPatch26-4
-	.long	LBPatch27-4
-	.long	LBPatch28-4
-	.long	LBPatch29-4
-	.long	LBPatch30-4
-	.long	LBPatch31-4
-
-	.text
-
-	.align 4
-.globl C(R_Surf8Patch)
-C(R_Surf8Patch):
-	pushl	%ebx
-
-	movl	C(colormap),%eax
-	movl	$LPatchTable8,%ebx
-	movl	$32,%ecx
-LPatchLoop8:
-	movl	(%ebx),%edx
-	addl	$4,%ebx
-	movl	%eax,(%edx)
-	decl	%ecx
-	jnz		LPatchLoop8
-
-	popl	%ebx
-
-	ret
-
-#endif	// id386
--- a/sys_dosa.s
+++ /dev/null
@@ -1,95 +1,0 @@
-//
-// sys_dosa.s
-// x86 assembly-language DOS-dependent routines.
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-
-
-	.data
-
-	.align	4
-fpenv:
-	.long	0, 0, 0, 0, 0, 0, 0, 0
-
-	.text
-
-.globl C(MaskExceptions)
-C(MaskExceptions):
-	fnstenv	fpenv
-	orl		$0x3F,fpenv
-	fldenv	fpenv
-
-	ret
-
-/*
-.globl C(unmaskexceptions)
-C(unmaskexceptions):
-	fnstenv	fpenv
-	andl		$0xFFFFFFE0,fpenv
-	fldenv	fpenv
-
-	ret
-*/
-
-	.data
-
-	.align	4
-.globl	ceil_cw, single_cw, full_cw, cw, pushed_cw
-ceil_cw:	.long	0
-single_cw:	.long	0
-full_cw:	.long	0
-cw:			.long	0
-pushed_cw:	.long	0
-
-	.text
-
-.globl C(Sys_LowFPPrecision)
-C(Sys_LowFPPrecision):
-	fldcw	single_cw
-
-	ret
-
-.globl C(Sys_HighFPPrecision)
-C(Sys_HighFPPrecision):
-	fldcw	full_cw
-
-	ret
-
-.globl C(Sys_PushFPCW_SetHigh)
-C(Sys_PushFPCW_SetHigh):
-	fnstcw	pushed_cw
-	fldcw	full_cw
-
-	ret
-
-.globl C(Sys_PopFPCW)
-C(Sys_PopFPCW):
-	fldcw	pushed_cw
-
-	ret
-
-.globl C(Sys_SetFPCW)
-C(Sys_SetFPCW):
-	fnstcw	cw
-	movl	cw,%eax
-#ifdef	id386
-	andb	$0xF0,%ah
-	orb		$0x03,%ah	// round mode, 64-bit precision
-#endif
-	movl	%eax,full_cw
-
-#ifdef	id386
-	andb	$0xF0,%ah
-	orb		$0x0C,%ah	// chop mode, single precision
-#endif
-	movl	%eax,single_cw
-
-#ifdef	id386
-	andb	$0xF0,%ah
-	orb		$0x08,%ah	// ceil mode, single precision
-#endif
-	movl	%eax,ceil_cw
-
-	ret
-
--- /dev/null
+++ b/u/asm_draw.h
@@ -1,0 +1,132 @@
+//
+// asm_draw.h
+//
+// Include file for asm drawing routines.
+//
+
+//
+// !!! note that this file must match the corresponding C structures at all
+// times !!!
+//
+
+// !!! if this is changed, it must be changed in r_local.h too !!!
+#define	NEAR_CLIP	0.01
+
+// !!! if this is changed, it must be changed in r_local.h too !!!
+#define	CYCLE	128
+
+// espan_t structure
+// !!! if this is changed, it must be changed in r_shared.h too !!!
+#define espan_t_u    	0
+#define espan_t_v	    4
+#define espan_t_count   8
+#define espan_t_pnext	12
+#define espan_t_size    16
+
+// sspan_t structure
+// !!! if this is changed, it must be changed in d_local.h too !!!
+#define sspan_t_u    	0
+#define sspan_t_v	    4
+#define sspan_t_count   8
+#define sspan_t_size    12
+
+// spanpackage_t structure
+// !!! if this is changed, it must be changed in d_polyset.c too !!!
+#define spanpackage_t_pdest				0
+#define spanpackage_t_pz				4
+#define spanpackage_t_count				8
+#define spanpackage_t_ptex				12
+#define spanpackage_t_sfrac				16
+#define spanpackage_t_tfrac				20
+#define spanpackage_t_light				24
+#define spanpackage_t_zi				28
+#define spanpackage_t_size				32 
+
+// edge_t structure
+// !!! if this is changed, it must be changed in r_shared.h too !!!
+#define et_u			0
+#define et_u_step		4
+#define et_prev			8
+#define et_next			12
+#define et_surfs		16
+#define et_nextremove	20
+#define et_nearzi		24
+#define et_owner		28
+#define et_size			32
+
+// surf_t structure
+// !!! if this is changed, it must be changed in r_shared.h too !!!
+#define SURF_T_SHIFT	6
+#define st_next			0
+#define st_prev			4
+#define st_spans		8
+#define st_key			12
+#define st_last_u		16
+#define st_spanstate	20
+#define st_flags		24
+#define st_data			28
+#define st_entity		32
+#define st_nearzi		36
+#define st_insubmodel	40
+#define st_d_ziorigin	44
+#define st_d_zistepu	48
+#define st_d_zistepv	52
+#define st_pad			56
+#define st_size			64
+
+// clipplane_t structure
+// !!! if this is changed, it must be changed in r_local.h too !!!
+#define cp_normal		0
+#define cp_dist			12
+#define cp_next			16
+#define cp_leftedge		20
+#define cp_rightedge	21
+#define cp_reserved		22
+#define cp_size			24
+
+// medge_t structure
+// !!! if this is changed, it must be changed in model.h too !!!
+#define me_v				0
+#define me_cachededgeoffset	4
+#define me_size				8
+
+// mvertex_t structure
+// !!! if this is changed, it must be changed in model.h too !!!
+#define mv_position		0
+#define mv_size			12
+
+// refdef_t structure
+// !!! if this is changed, it must be changed in render.h too !!!
+#define rd_vrect					0
+#define rd_aliasvrect				20
+#define rd_vrectright				40
+#define rd_vrectbottom				44
+#define rd_aliasvrectright			48
+#define rd_aliasvrectbottom			52
+#define rd_vrectrightedge			56
+#define rd_fvrectx					60
+#define rd_fvrecty					64
+#define rd_fvrectx_adj				68
+#define rd_fvrecty_adj				72
+#define rd_vrect_x_adj_shift20		76
+#define rd_vrectright_adj_shift20	80
+#define rd_fvrectright_adj			84
+#define rd_fvrectbottom_adj			88
+#define rd_fvrectright				92
+#define rd_fvrectbottom				96
+#define rd_horizontalFieldOfView	100
+#define rd_xOrigin					104
+#define rd_yOrigin					108
+#define rd_vieworg					112
+#define rd_viewangles				124
+#define rd_ambientlight				136
+#define rd_size						140
+
+// mtriangle_t structure
+// !!! if this is changed, it must be changed in model.h too !!!
+#define mtri_facesfront		0
+#define mtri_vertindex		4
+#define mtri_size			16	// !!! if this changes, array indexing in !!!
+								// !!! d_polysa.s must be changed to match !!!
+#define mtri_shift			4
+
--- /dev/null
+++ b/u/asm_i386.h
@@ -1,0 +1,78 @@
+#ifndef __ASM_I386__
+#define __ASM_I386__
+
+#ifdef ELF
+#define C(label) label
+#endif
+#ifndef ELF
+#define C(label) _##label
+#endif
+
+//
+// !!! note that this file must match the corresponding C structures at all
+// times !!!
+//
+
+// plane_t structure
+// !!! if this is changed, it must be changed in model.h too !!!
+// !!! if the size of this is changed, the array lookup in SV_HullPointContents
+//     must be changed too !!!
+#define pl_normal	0
+#define pl_dist		12
+#define pl_type		16
+#define pl_signbits	17
+#define pl_pad		18
+#define pl_size		20
+
+// hull_t structure
+// !!! if this is changed, it must be changed in model.h too !!!
+#define	hu_clipnodes		0
+#define	hu_planes			4
+#define	hu_firstclipnode	8
+#define	hu_lastclipnode		12
+#define	hu_clip_mins		16
+#define	hu_clip_maxs		28
+#define hu_size  			40
+
+// dnode_t structure
+// !!! if this is changed, it must be changed in bspfile.h too !!!
+#define	nd_planenum		0
+#define	nd_children		4
+#define	nd_mins			8
+#define	nd_maxs			20
+#define	nd_firstface	32
+#define	nd_numfaces		36
+#define nd_size			40
+
+// sfxcache_t structure
+// !!! if this is changed, it much be changed in sound.h too !!!
+#define sfxc_length		0
+#define sfxc_loopstart	4
+#define sfxc_speed		8
+#define sfxc_width		12
+#define sfxc_stereo		16
+#define sfxc_data		20
+
+// channel_t structure
+// !!! if this is changed, it much be changed in sound.h too !!!
+#define ch_sfx			0
+#define ch_leftvol		4
+#define ch_rightvol		8
+#define ch_end			12
+#define ch_pos			16
+#define ch_looping		20
+#define ch_entnum		24
+#define ch_entchannel	28
+#define ch_origin		32
+#define ch_dist_mult	44
+#define ch_master_vol	48
+#define ch_size			52
+
+// portable_samplepair_t structure
+// !!! if this is changed, it much be changed in sound.h too !!!
+#define psp_left		0
+#define psp_right		4
+#define psp_size		8
+
+#endif
+
--- /dev/null
+++ b/u/block16.h
@@ -1,0 +1,123 @@
+LEnter16_16:
+	movb	(%esi),%al
+	movb	(%esi,%ebx,),%cl
+	movb	%dh,%ah
+	addl	%ebp,%edx
+	movb	%dh,%ch
+	leal	(%esi,%ebx,2),%esi
+	movw	0x12345678(,%eax,2),%ax
+LBPatch0:
+	addl	%ebp,%edx
+	movw	%ax,(%edi)
+	movw	0x12345678(,%ecx,2),%cx
+LBPatch1:
+	movw	%cx,2(%edi)
+	addl	$0x4,%edi
+
+	movb	(%esi),%al
+	movb	(%esi,%ebx,),%cl
+	movb	%dh,%ah
+	addl	%ebp,%edx
+	movb	%dh,%ch
+	leal	(%esi,%ebx,2),%esi
+	movw	0x12345678(,%eax,2),%ax
+LBPatch2:
+	addl	%ebp,%edx
+	movw	%ax,(%edi)
+	movw	0x12345678(,%ecx,2),%cx
+LBPatch3:
+	movw	%cx,2(%edi)
+	addl	$0x4,%edi
+
+	movb	(%esi),%al
+	movb	(%esi,%ebx,),%cl
+	movb	%dh,%ah
+	addl	%ebp,%edx
+	movb	%dh,%ch
+	leal	(%esi,%ebx,2),%esi
+	movw	0x12345678(,%eax,2),%ax
+LBPatch4:
+	addl	%ebp,%edx
+	movw	%ax,(%edi)
+	movw	0x12345678(,%ecx,2),%cx
+LBPatch5:
+	movw	%cx,2(%edi)
+	addl	$0x4,%edi
+
+	movb	(%esi),%al
+	movb	(%esi,%ebx,),%cl
+	movb	%dh,%ah
+	addl	%ebp,%edx
+	movb	%dh,%ch
+	leal	(%esi,%ebx,2),%esi
+	movw	0x12345678(,%eax,2),%ax
+LBPatch6:
+	addl	%ebp,%edx
+	movw	%ax,(%edi)
+	movw	0x12345678(,%ecx,2),%cx
+LBPatch7:
+	movw	%cx,2(%edi)
+	addl	$0x4,%edi
+
+LEnter8_16:
+	movb	(%esi),%al
+	movb	(%esi,%ebx,),%cl
+	movb	%dh,%ah
+	addl	%ebp,%edx
+	movb	%dh,%ch
+	leal	(%esi,%ebx,2),%esi
+	movw	0x12345678(,%eax,2),%ax
+LBPatch8:
+	addl	%ebp,%edx
+	movw	%ax,(%edi)
+	movw	0x12345678(,%ecx,2),%cx
+LBPatch9:
+	movw	%cx,2(%edi)
+	addl	$0x4,%edi
+
+	movb	(%esi),%al
+	movb	(%esi,%ebx,),%cl
+	movb	%dh,%ah
+	addl	%ebp,%edx
+	movb	%dh,%ch
+	leal	(%esi,%ebx,2),%esi
+	movw	0x12345678(,%eax,2),%ax
+LBPatch10:
+	addl	%ebp,%edx
+	movw	%ax,(%edi)
+	movw	0x12345678(,%ecx,2),%cx
+LBPatch11:
+	movw	%cx,2(%edi)
+	addl	$0x4,%edi
+
+LEnter4_16:
+	movb	(%esi),%al
+	movb	(%esi,%ebx,),%cl
+	movb	%dh,%ah
+	addl	%ebp,%edx
+	movb	%dh,%ch
+	leal	(%esi,%ebx,2),%esi
+	movw	0x12345678(,%eax,2),%ax
+LBPatch12:
+	addl	%ebp,%edx
+	movw	%ax,(%edi)
+	movw	0x12345678(,%ecx,2),%cx
+LBPatch13:
+	movw	%cx,2(%edi)
+	addl	$0x4,%edi
+
+LEnter2_16:
+	movb	(%esi),%al
+	movb	(%esi,%ebx,),%cl
+	movb	%dh,%ah
+	addl	%ebp,%edx
+	movb	%dh,%ch
+	leal	(%esi,%ebx,2),%esi
+	movw	0x12345678(,%eax,2),%ax
+LBPatch14:
+	addl	%ebp,%edx
+	movw	%ax,(%edi)
+	movw	0x12345678(,%ecx,2),%cx
+LBPatch15:
+	movw	%cx,2(%edi)
+	addl	$0x4,%edi
--- /dev/null
+++ b/u/d_draw.s
@@ -1,0 +1,1018 @@
+//
+// d_draw.s
+// x86 assembly-language horizontal 8-bpp span-drawing code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef	id386
+
+//----------------------------------------------------------------------
+// 8-bpp horizontal span drawing code for polygons, with no transparency.
+//
+// Assumes there is at least one span in pspans, and that every span
+// contains at least one pixel
+//----------------------------------------------------------------------
+
+	.text
+
+// out-of-line, rarely-needed clamping code
+
+LClampHigh0:
+	movl	C(bbextents),%esi
+	jmp		LClampReentry0
+LClampHighOrLow0:
+	jg		LClampHigh0
+	xorl	%esi,%esi
+	jmp		LClampReentry0
+
+LClampHigh1:
+	movl	C(bbextentt),%edx
+	jmp		LClampReentry1
+LClampHighOrLow1:
+	jg		LClampHigh1
+	xorl	%edx,%edx
+	jmp		LClampReentry1
+
+LClampLow2:
+	movl	$2048,%ebp
+	jmp		LClampReentry2
+LClampHigh2:
+	movl	C(bbextents),%ebp
+	jmp		LClampReentry2
+
+LClampLow3:
+	movl	$2048,%ecx
+	jmp		LClampReentry3
+LClampHigh3:
+	movl	C(bbextentt),%ecx
+	jmp		LClampReentry3
+
+LClampLow4:
+	movl	$2048,%eax
+	jmp		LClampReentry4
+LClampHigh4:
+	movl	C(bbextents),%eax
+	jmp		LClampReentry4
+
+LClampLow5:
+	movl	$2048,%ebx
+	jmp		LClampReentry5
+LClampHigh5:
+	movl	C(bbextentt),%ebx
+	jmp		LClampReentry5
+
+
+#define pspans	4+16
+
+	.align 4
+.globl C(D_DrawSpans8)
+C(D_DrawSpans8):
+	pushl	%ebp				// preserve caller's stack frame
+	pushl	%edi
+	pushl	%esi				// preserve register variables
+	pushl	%ebx
+
+//
+// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
+// and span list pointers
+//
+// TODO: any overlap from rearranging?
+	flds	C(d_sdivzstepu)
+	fmuls	fp_8
+	movl	C(cacheblock),%edx
+	flds	C(d_tdivzstepu)
+	fmuls	fp_8
+	movl	pspans(%esp),%ebx	// point to the first span descriptor
+	flds	C(d_zistepu)
+	fmuls	fp_8
+	movl	%edx,pbase			// pbase = cacheblock
+	fstps	zi8stepu
+	fstps	tdivz8stepu
+	fstps	sdivz8stepu
+
+LSpanLoop:
+//
+// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
+// initial s and t values
+//
+// FIXME: pipeline FILD?
+	fildl	espan_t_v(%ebx)
+	fildl	espan_t_u(%ebx)
+
+	fld		%st(1)			// dv | du | dv
+	fmuls	C(d_sdivzstepv)	// dv*d_sdivzstepv | du | dv
+	fld		%st(1)			// du | dv*d_sdivzstepv | du | dv
+	fmuls	C(d_sdivzstepu)	// du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+	fld		%st(2)			// du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+	fmuls	C(d_tdivzstepu)	// du*d_tdivzstepu | du*d_sdivzstepu |
+							//  dv*d_sdivzstepv | du | dv
+	fxch	%st(1)			// du*d_sdivzstepu | du*d_tdivzstepu |
+							//  dv*d_sdivzstepv | du | dv
+	faddp	%st(0),%st(2)	// du*d_tdivzstepu |
+							//  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
+	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
+							//  du*d_tdivzstepu | du | dv
+	fld		%st(3)			// dv | du*d_sdivzstepu + dv*d_sdivzstepv |
+							//  du*d_tdivzstepu | du | dv
+	fmuls	C(d_tdivzstepv)	// dv*d_tdivzstepv |
+							//  du*d_sdivzstepu + dv*d_sdivzstepv |
+							//  du*d_tdivzstepu | du | dv
+	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
+							//  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
+	fadds	C(d_sdivzorigin)	// sdivz = d_sdivzorigin + dv*d_sdivzstepv +
+							//  du*d_sdivzstepu; stays in %st(2) at end
+	fxch	%st(4)			// dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
+							//  s/z
+	fmuls	C(d_zistepv)		// dv*d_zistepv | dv*d_tdivzstepv |
+							//  du*d_tdivzstepu | du | s/z
+	fxch	%st(1)			// dv*d_tdivzstepv |  dv*d_zistepv |
+							//  du*d_tdivzstepu | du | s/z
+	faddp	%st(0),%st(2)	// dv*d_zistepv |
+							//  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
+	fxch	%st(2)			// du | dv*d_tdivzstepv + du*d_tdivzstepu |
+							//  dv*d_zistepv | s/z
+	fmuls	C(d_zistepu)		// du*d_zistepu |
+							//  dv*d_tdivzstepv + du*d_tdivzstepu |
+							//  dv*d_zistepv | s/z
+	fxch	%st(1)			// dv*d_tdivzstepv + du*d_tdivzstepu |
+							//  du*d_zistepu | dv*d_zistepv | s/z
+	fadds	C(d_tdivzorigin)	// tdivz = d_tdivzorigin + dv*d_tdivzstepv +
+							//  du*d_tdivzstepu; stays in %st(1) at end
+	fxch	%st(2)			// dv*d_zistepv | du*d_zistepu | t/z | s/z
+	faddp	%st(0),%st(1)	// dv*d_zistepv + du*d_zistepu | t/z | s/z
+
+	flds	fp_64k			// fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
+	fxch	%st(1)			// dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
+	fadds	C(d_ziorigin)		// zi = d_ziorigin + dv*d_zistepv +
+							//  du*d_zistepu; stays in %st(0) at end
+							// 1/z | fp_64k | t/z | s/z
+//
+// calculate and clamp s & t
+//
+	fdivr	%st(0),%st(1)	// 1/z | z*64k | t/z | s/z
+
+//
+// point %edi to the first pixel in the span
+//
+	movl	C(d_viewbuffer),%ecx
+	movl	espan_t_v(%ebx),%eax
+	movl	%ebx,pspantemp	// preserve spans pointer
+
+	movl	C(tadjust),%edx
+	movl	C(sadjust),%esi
+	movl	C(d_scantable)(,%eax,4),%edi	// v * screenwidth
+	addl	%ecx,%edi
+	movl	espan_t_u(%ebx),%ecx
+	addl	%ecx,%edi				// pdest = &pdestspan[scans->u];
+	movl	espan_t_count(%ebx),%ecx
+
+//
+// now start the FDIV for the end of the span
+//
+	cmpl	$8,%ecx
+	ja		LSetupNotLast1
+
+	decl	%ecx
+	jz		LCleanup1		// if only one pixel, no need to start an FDIV
+	movl	%ecx,spancountminus1
+
+// finish up the s and t calcs
+	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
+
+	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
+	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
+	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
+	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
+	fxch	%st(1)			// s | t | 1/z | t/z | s/z
+	fistpl	s				// 1/z | t | t/z | s/z
+	fistpl	t				// 1/z | t/z | s/z
+
+	fildl	spancountminus1
+
+	flds	C(d_tdivzstepu)	// C(d_tdivzstepu) | spancountminus1
+	flds	C(d_zistepu)		// C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
+	fmul	%st(2),%st(0)	// C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
+	fxch	%st(1)			// C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
+	fmul	%st(2),%st(0)	// C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
+	fxch	%st(2)			// scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
+	fmuls	C(d_sdivzstepu)	// C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
+							//  C(d_tdivzstepu)*scm1
+	fxch	%st(1)			// C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
+							//  C(d_tdivzstepu)*scm1
+	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
+	fxch	%st(1)			// C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
+	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1
+	faddp	%st(0),%st(3)
+
+	flds	fp_64k
+	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
+							//  overlap
+	jmp		LFDIVInFlight1
+
+LCleanup1:
+// finish up the s and t calcs
+	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
+
+	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
+	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
+	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
+	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
+	fxch	%st(1)			// s | t | 1/z | t/z | s/z
+	fistpl	s				// 1/z | t | t/z | s/z
+	fistpl	t				// 1/z | t/z | s/z
+	jmp		LFDIVInFlight1
+
+	.align	4
+LSetupNotLast1:
+// finish up the s and t calcs
+	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
+
+	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
+	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
+	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
+	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
+	fxch	%st(1)			// s | t | 1/z | t/z | s/z
+	fistpl	s				// 1/z | t | t/z | s/z
+	fistpl	t				// 1/z | t/z | s/z
+
+	fadds	zi8stepu
+	fxch	%st(2)
+	fadds	sdivz8stepu
+	fxch	%st(2)
+	flds	tdivz8stepu
+	faddp	%st(0),%st(2)
+	flds	fp_64k
+	fdiv	%st(1),%st(0)	// z = 1/1/z
+							// this is what we've gone to all this trouble to
+							//  overlap
+LFDIVInFlight1:
+
+	addl	s,%esi
+	addl	t,%edx
+	movl	C(bbextents),%ebx
+	movl	C(bbextentt),%ebp
+	cmpl	%ebx,%esi
+	ja		LClampHighOrLow0
+LClampReentry0:
+	movl	%esi,s
+	movl	pbase,%ebx
+	shll	$16,%esi
+	cmpl	%ebp,%edx
+	movl	%esi,sfracf
+	ja		LClampHighOrLow1
+LClampReentry1:
+	movl	%edx,t
+	movl	s,%esi					// sfrac = scans->sfrac;
+	shll	$16,%edx
+	movl	t,%eax					// tfrac = scans->tfrac;
+	sarl	$16,%esi
+	movl	%edx,tfracf
+
+//
+// calculate the texture starting address
+//
+	sarl	$16,%eax
+	movl	C(cachewidth),%edx
+	imull	%edx,%eax				// (tfrac >> 16) * cachewidth
+	addl	%ebx,%esi
+	addl	%eax,%esi				// psource = pbase + (sfrac >> 16) +
+									//           ((tfrac >> 16) * cachewidth);
+
+//
+// determine whether last span or not
+//
+	cmpl	$8,%ecx
+	jna		LLastSegment
+
+//
+// not the last segment; do full 8-wide segment
+//
+LNotLastSegment:
+
+//
+// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+// get there
+//
+
+// pick up after the FDIV that was left in flight previously
+
+	fld		%st(0)			// duplicate it
+	fmul	%st(4),%st(0)	// s = s/z * z
+	fxch	%st(1)
+	fmul	%st(3),%st(0)	// t = t/z * z
+	fxch	%st(1)
+	fistpl	snext
+	fistpl	tnext
+	movl	snext,%eax
+	movl	tnext,%edx
+
+	movb	(%esi),%bl	// get first source texel
+	subl	$8,%ecx		// count off this segments' pixels
+	movl	C(sadjust),%ebp
+	movl	%ecx,counttemp	// remember count of remaining pixels
+
+	movl	C(tadjust),%ecx
+	movb	%bl,(%edi)	// store first dest pixel
+
+	addl	%eax,%ebp
+	addl	%edx,%ecx
+
+	movl	C(bbextents),%eax
+	movl	C(bbextentt),%edx
+
+	cmpl	$2048,%ebp
+	jl		LClampLow2
+	cmpl	%eax,%ebp
+	ja		LClampHigh2
+LClampReentry2:
+
+	cmpl	$2048,%ecx
+	jl		LClampLow3
+	cmpl	%edx,%ecx
+	ja		LClampHigh3
+LClampReentry3:
+
+	movl	%ebp,snext
+	movl	%ecx,tnext
+
+	subl	s,%ebp
+	subl	t,%ecx
+	
+//
+// set up advancetable
+//
+	movl	%ecx,%eax
+	movl	%ebp,%edx
+	sarl	$19,%eax			// tstep >>= 16;
+	jz		LZero
+	sarl	$19,%edx			// sstep >>= 16;
+	movl	C(cachewidth),%ebx
+	imull	%ebx,%eax
+	jmp		LSetUp1
+
+LZero:
+	sarl	$19,%edx			// sstep >>= 16;
+	movl	C(cachewidth),%ebx
+
+LSetUp1:
+
+	addl	%edx,%eax			// add in sstep
+								// (tstep >> 16) * cachewidth + (sstep >> 16);
+	movl	tfracf,%edx
+	movl	%eax,advancetable+4	// advance base in t
+	addl	%ebx,%eax			// ((tstep >> 16) + 1) * cachewidth +
+								//  (sstep >> 16);
+	shll	$13,%ebp			// left-justify sstep fractional part
+	movl	sfracf,%ebx
+	shll	$13,%ecx			// left-justify tstep fractional part
+	movl	%eax,advancetable	// advance extra in t
+
+	movl	%ecx,tstep
+	addl	%ecx,%edx			// advance tfrac fractional part by tstep frac
+
+	sbbl	%ecx,%ecx			// turn tstep carry into -1 (0 if none)
+	addl	%ebp,%ebx			// advance sfrac fractional part by sstep frac
+	adcl	advancetable+4(,%ecx,4),%esi	// point to next source texel
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	(%esi),%al
+	addl	%ebp,%ebx
+	movb	%al,1(%edi)
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,2(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,3(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+
+//
+// start FDIV for end of next segment in flight, so it can overlap
+//
+	movl	counttemp,%ecx
+	cmpl	$8,%ecx			// more than one segment after this?
+	ja		LSetupNotLast2	// yes
+
+	decl	%ecx
+	jz		LFDIVInFlight2	// if only one pixel, no need to start an FDIV
+	movl	%ecx,spancountminus1
+	fildl	spancountminus1
+
+	flds	C(d_zistepu)		// C(d_zistepu) | spancountminus1
+	fmul	%st(1),%st(0)	// C(d_zistepu)*scm1 | scm1
+	flds	C(d_tdivzstepu)	// C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
+	fmul	%st(2),%st(0)	// C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
+	fxch	%st(1)			// C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
+	faddp	%st(0),%st(3)	// C(d_tdivzstepu)*scm1 | scm1
+	fxch	%st(1)			// scm1 | C(d_tdivzstepu)*scm1
+	fmuls	C(d_sdivzstepu)	// C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
+	fxch	%st(1)			// C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
+	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1
+	flds	fp_64k			// 64k | C(d_sdivzstepu)*scm1
+	fxch	%st(1)			// C(d_sdivzstepu)*scm1 | 64k
+	faddp	%st(0),%st(4)	// 64k
+
+	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
+							//  overlap
+	jmp		LFDIVInFlight2
+
+	.align	4
+LSetupNotLast2:
+	fadds	zi8stepu
+	fxch	%st(2)
+	fadds	sdivz8stepu
+	fxch	%st(2)
+	flds	tdivz8stepu
+	faddp	%st(0),%st(2)
+	flds	fp_64k
+	fdiv	%st(1),%st(0)	// z = 1/1/z
+							// this is what we've gone to all this trouble to
+							//  overlap
+LFDIVInFlight2:
+	movl	%ecx,counttemp
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,4(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,5(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,6(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	$8,%edi
+	movl	%edx,tfracf
+	movl	snext,%edx
+	movl	%ebx,sfracf
+	movl	tnext,%ebx
+	movl	%edx,s
+	movl	%ebx,t
+
+	movl	counttemp,%ecx		// retrieve count
+
+//
+// determine whether last span or not
+//
+	cmpl	$8,%ecx				// are there multiple segments remaining?
+	movb	%al,-1(%edi)
+	ja		LNotLastSegment		// yes
+
+//
+// last segment of scan
+//
+LLastSegment:
+
+//
+// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+// get there. The number of pixels left is variable, and we want to land on the
+// last pixel, not step one past it, so we can't run into arithmetic problems
+//
+	testl	%ecx,%ecx
+	jz		LNoSteps		// just draw the last pixel and we're done
+
+// pick up after the FDIV that was left in flight previously
+
+
+	fld		%st(0)			// duplicate it
+	fmul	%st(4),%st(0)	// s = s/z * z
+	fxch	%st(1)
+	fmul	%st(3),%st(0)	// t = t/z * z
+	fxch	%st(1)
+	fistpl	snext
+	fistpl	tnext
+
+	movb	(%esi),%al		// load first texel in segment
+	movl	C(tadjust),%ebx
+	movb	%al,(%edi)		// store first pixel in segment
+	movl	C(sadjust),%eax
+
+	addl	snext,%eax
+	addl	tnext,%ebx
+
+	movl	C(bbextents),%ebp
+	movl	C(bbextentt),%edx
+
+	cmpl	$2048,%eax
+	jl		LClampLow4
+	cmpl	%ebp,%eax
+	ja		LClampHigh4
+LClampReentry4:
+	movl	%eax,snext
+
+	cmpl	$2048,%ebx
+	jl		LClampLow5
+	cmpl	%edx,%ebx
+	ja		LClampHigh5
+LClampReentry5:
+
+	cmpl	$1,%ecx			// don't bother 
+	je		LOnlyOneStep	// if two pixels in segment, there's only one step,
+							//  of the segment length
+	subl	s,%eax
+	subl	t,%ebx
+
+	addl	%eax,%eax		// convert to 15.17 format so multiply by 1.31
+	addl	%ebx,%ebx		//  reciprocal yields 16.48
+
+	imull	reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
+	movl	%edx,%ebp
+
+	movl	%ebx,%eax
+	imull	reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
+
+LSetEntryvec:
+//
+// set up advancetable
+//
+	movl	entryvec_table(,%ecx,4),%ebx
+	movl	%edx,%eax
+	movl	%ebx,jumptemp		// entry point into code for RET later
+	movl	%ebp,%ecx
+	sarl	$16,%edx			// tstep >>= 16;
+	movl	C(cachewidth),%ebx
+	sarl	$16,%ecx			// sstep >>= 16;
+	imull	%ebx,%edx
+
+	addl	%ecx,%edx			// add in sstep
+								// (tstep >> 16) * cachewidth + (sstep >> 16);
+	movl	tfracf,%ecx
+	movl	%edx,advancetable+4	// advance base in t
+	addl	%ebx,%edx			// ((tstep >> 16) + 1) * cachewidth +
+								//  (sstep >> 16);
+	shll	$16,%ebp			// left-justify sstep fractional part
+	movl	sfracf,%ebx
+	shll	$16,%eax			// left-justify tstep fractional part
+	movl	%edx,advancetable	// advance extra in t
+
+	movl	%eax,tstep
+	movl	%ecx,%edx
+	addl	%eax,%edx
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	jmp		*jumptemp			// jump to the number-of-pixels handler
+
+//----------------------------------------
+
+LNoSteps:
+	movb	(%esi),%al		// load first texel in segment
+	subl	$7,%edi			// adjust for hardwired offset
+	jmp		LEndSpan
+
+
+LOnlyOneStep:
+	subl	s,%eax
+	subl	t,%ebx
+	movl	%eax,%ebp
+	movl	%ebx,%edx
+	jmp		LSetEntryvec
+
+//----------------------------------------
+
+.globl	Entry2_8
+Entry2_8:
+	subl	$6,%edi		// adjust for hardwired offsets
+	movb	(%esi),%al
+	jmp		LLEntry2_8
+
+//----------------------------------------
+
+.globl	Entry3_8
+Entry3_8:
+	subl	$5,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	jmp		LLEntry3_8
+
+//----------------------------------------
+
+.globl	Entry4_8
+Entry4_8:
+	subl	$4,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LLEntry4_8
+
+//----------------------------------------
+
+.globl	Entry5_8
+Entry5_8:
+	subl	$3,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LLEntry5_8
+
+//----------------------------------------
+
+.globl	Entry6_8
+Entry6_8:
+	subl	$2,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LLEntry6_8
+
+//----------------------------------------
+
+.globl	Entry7_8
+Entry7_8:
+	decl	%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LLEntry7_8
+
+//----------------------------------------
+
+.globl	Entry8_8
+Entry8_8:
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,1(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LLEntry7_8:
+	sbbl	%ecx,%ecx
+	movb	%al,2(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LLEntry6_8:
+	sbbl	%ecx,%ecx
+	movb	%al,3(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LLEntry5_8:
+	sbbl	%ecx,%ecx
+	movb	%al,4(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LLEntry4_8:
+	sbbl	%ecx,%ecx
+	movb	%al,5(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+LLEntry3_8:
+	movb	%al,6(%edi)
+	movb	(%esi),%al
+LLEntry2_8:
+
+LEndSpan:
+
+//
+// clear s/z, t/z, 1/z from FP stack
+//
+	fstp %st(0)
+	fstp %st(0)
+	fstp %st(0)
+
+	movl	pspantemp,%ebx				// restore spans pointer
+	movl	espan_t_pnext(%ebx),%ebx	// point to next span
+	testl	%ebx,%ebx			// any more spans?
+	movb	%al,7(%edi)
+	jnz		LSpanLoop			// more spans
+
+	popl	%ebx				// restore register variables
+	popl	%esi
+	popl	%edi
+	popl	%ebp				// restore the caller's stack frame
+	ret
+
+//----------------------------------------------------------------------
+// 8-bpp horizontal span z drawing codefor polygons, with no transparency.
+//
+// Assumes there is at least one span in pzspans, and that every span
+// contains at least one pixel
+//----------------------------------------------------------------------
+
+	.text
+
+// z-clamp on a non-negative gradient span
+LClamp:
+	movl	$0x40000000,%edx
+	xorl	%ebx,%ebx
+	fstp	%st(0)
+	jmp		LZDraw
+
+// z-clamp on a negative gradient span
+LClampNeg:
+	movl	$0x40000000,%edx
+	xorl	%ebx,%ebx
+	fstp	%st(0)
+	jmp		LZDrawNeg
+
+
+#define pzspans	4+16
+
+.globl C(D_DrawZSpans)
+C(D_DrawZSpans):
+	pushl	%ebp				// preserve caller's stack frame
+	pushl	%edi
+	pushl	%esi				// preserve register variables
+	pushl	%ebx
+
+	flds	C(d_zistepu)
+	movl	C(d_zistepu),%eax
+	movl	pzspans(%esp),%esi
+	testl	%eax,%eax
+	jz		LFNegSpan
+
+	fmuls	Float2ToThe31nd
+	fistpl	izistep		// note: we are relying on FP exceptions being turned
+						// off here to avoid range problems
+	movl	izistep,%ebx	// remains loaded for all spans
+
+LFSpanLoop:
+// set up the initial 1/z value
+	fildl	espan_t_v(%esi)
+	fildl	espan_t_u(%esi)
+	movl	espan_t_v(%esi),%ecx
+	movl	C(d_pzbuffer),%edi
+	fmuls	C(d_zistepu)
+	fxch	%st(1)
+	fmuls	C(d_zistepv)
+	fxch	%st(1)
+	fadds	C(d_ziorigin)
+	imull	C(d_zrowbytes),%ecx
+	faddp	%st(0),%st(1)
+
+// clamp if z is nearer than 2 (1/z > 0.5)
+	fcoms	float_point5
+	addl	%ecx,%edi
+	movl	espan_t_u(%esi),%edx
+	addl	%edx,%edx				// word count
+	movl	espan_t_count(%esi),%ecx
+	addl	%edx,%edi				// pdest = &pdestspan[scans->u];
+	pushl	%esi		// preserve spans pointer
+	fnstsw	%ax
+	testb	$0x45,%ah
+	jz		LClamp
+
+	fmuls	Float2ToThe31nd
+	fistpl	izi			// note: we are relying on FP exceptions being turned
+						// off here to avoid problems when the span is closer
+						// than 1/(2**31)
+	movl	izi,%edx
+
+// at this point:
+// %ebx = izistep
+// %ecx = count
+// %edx = izi
+// %edi = pdest
+
+LZDraw:
+
+// do a single pixel up front, if necessary to dword align the destination
+	testl	$2,%edi
+	jz		LFMiddle
+	movl	%edx,%eax
+	addl	%ebx,%edx
+	shrl	$16,%eax
+	decl	%ecx
+	movw	%ax,(%edi)
+	addl	$2,%edi
+
+// do middle a pair of aligned dwords at a time
+LFMiddle:
+	pushl	%ecx
+	shrl	$1,%ecx				// count / 2
+	jz		LFLast				// no aligned dwords to do
+	shrl	$1,%ecx				// (count / 2) / 2
+	jnc		LFMiddleLoop		// even number of aligned dwords to do
+
+	movl	%edx,%eax
+	addl	%ebx,%edx
+	shrl	$16,%eax
+	movl	%edx,%esi
+	addl	%ebx,%edx
+	andl	$0xFFFF0000,%esi
+	orl		%esi,%eax
+	movl	%eax,(%edi)
+	addl	$4,%edi
+	andl	%ecx,%ecx
+	jz		LFLast
+
+LFMiddleLoop:
+	movl	%edx,%eax
+	addl	%ebx,%edx
+	shrl	$16,%eax
+	movl	%edx,%esi
+	addl	%ebx,%edx
+	andl	$0xFFFF0000,%esi
+	orl		%esi,%eax
+	movl	%edx,%ebp
+	movl	%eax,(%edi)
+	addl	%ebx,%edx
+	shrl	$16,%ebp
+	movl	%edx,%esi
+	addl	%ebx,%edx
+	andl	$0xFFFF0000,%esi
+	orl		%esi,%ebp
+	movl	%ebp,4(%edi)	// FIXME: eliminate register contention
+	addl	$8,%edi
+
+	decl	%ecx
+	jnz		LFMiddleLoop
+
+LFLast:
+	popl	%ecx			// retrieve count
+	popl	%esi			// retrieve span pointer
+
+// do the last, unaligned pixel, if there is one
+	andl	$1,%ecx			// is there an odd pixel left to do?
+	jz		LFSpanDone		// no
+	shrl	$16,%edx
+	movw	%dx,(%edi)		// do the final pixel's z
+
+LFSpanDone:
+	movl	espan_t_pnext(%esi),%esi
+	testl	%esi,%esi
+	jnz		LFSpanLoop
+
+	jmp		LFDone
+
+LFNegSpan:
+	fmuls	FloatMinus2ToThe31nd
+	fistpl	izistep		// note: we are relying on FP exceptions being turned
+						// off here to avoid range problems
+	movl	izistep,%ebx	// remains loaded for all spans
+
+LFNegSpanLoop:
+// set up the initial 1/z value
+	fildl	espan_t_v(%esi)
+	fildl	espan_t_u(%esi)
+	movl	espan_t_v(%esi),%ecx
+	movl	C(d_pzbuffer),%edi
+	fmuls	C(d_zistepu)
+	fxch	%st(1)
+	fmuls	C(d_zistepv)
+	fxch	%st(1)
+	fadds	C(d_ziorigin)
+	imull	C(d_zrowbytes),%ecx
+	faddp	%st(0),%st(1)
+
+// clamp if z is nearer than 2 (1/z > 0.5)
+	fcoms	float_point5
+	addl	%ecx,%edi
+	movl	espan_t_u(%esi),%edx
+	addl	%edx,%edx				// word count
+	movl	espan_t_count(%esi),%ecx
+	addl	%edx,%edi				// pdest = &pdestspan[scans->u];
+	pushl	%esi		// preserve spans pointer
+	fnstsw	%ax
+	testb	$0x45,%ah
+	jz		LClampNeg
+
+	fmuls	Float2ToThe31nd
+	fistpl	izi			// note: we are relying on FP exceptions being turned
+						// off here to avoid problems when the span is closer
+						// than 1/(2**31)
+	movl	izi,%edx
+
+// at this point:
+// %ebx = izistep
+// %ecx = count
+// %edx = izi
+// %edi = pdest
+
+LZDrawNeg:
+
+// do a single pixel up front, if necessary to dword align the destination
+	testl	$2,%edi
+	jz		LFNegMiddle
+	movl	%edx,%eax
+	subl	%ebx,%edx
+	shrl	$16,%eax
+	decl	%ecx
+	movw	%ax,(%edi)
+	addl	$2,%edi
+
+// do middle a pair of aligned dwords at a time
+LFNegMiddle:
+	pushl	%ecx
+	shrl	$1,%ecx				// count / 2
+	jz		LFNegLast			// no aligned dwords to do
+	shrl	$1,%ecx				// (count / 2) / 2
+	jnc		LFNegMiddleLoop		// even number of aligned dwords to do
+
+	movl	%edx,%eax
+	subl	%ebx,%edx
+	shrl	$16,%eax
+	movl	%edx,%esi
+	subl	%ebx,%edx
+	andl	$0xFFFF0000,%esi
+	orl		%esi,%eax
+	movl	%eax,(%edi)
+	addl	$4,%edi
+	andl	%ecx,%ecx
+	jz		LFNegLast
+
+LFNegMiddleLoop:
+	movl	%edx,%eax
+	subl	%ebx,%edx
+	shrl	$16,%eax
+	movl	%edx,%esi
+	subl	%ebx,%edx
+	andl	$0xFFFF0000,%esi
+	orl		%esi,%eax
+	movl	%edx,%ebp
+	movl	%eax,(%edi)
+	subl	%ebx,%edx
+	shrl	$16,%ebp
+	movl	%edx,%esi
+	subl	%ebx,%edx
+	andl	$0xFFFF0000,%esi
+	orl		%esi,%ebp
+	movl	%ebp,4(%edi)	// FIXME: eliminate register contention
+	addl	$8,%edi
+
+	decl	%ecx
+	jnz		LFNegMiddleLoop
+
+LFNegLast:
+	popl	%ecx			// retrieve count
+	popl	%esi			// retrieve span pointer
+
+// do the last, unaligned pixel, if there is one
+	andl	$1,%ecx			// is there an odd pixel left to do?
+	jz		LFNegSpanDone	// no
+	shrl	$16,%edx
+	movw	%dx,(%edi)		// do the final pixel's z
+
+LFNegSpanDone:
+	movl	espan_t_pnext(%esi),%esi
+	testl	%esi,%esi
+	jnz		LFNegSpanLoop
+
+LFDone:
+	popl	%ebx				// restore register variables
+	popl	%esi
+	popl	%edi
+	popl	%ebp				// restore the caller's stack frame
+	ret
+
+#endif	// id386
--- /dev/null
+++ b/u/d_draw16.s
@@ -1,0 +1,955 @@
+//
+// d_draw16.s
+// x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
+// subdivision.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef	id386
+
+//----------------------------------------------------------------------
+// 8-bpp horizontal span drawing code for polygons, with no transparency and
+// 16-pixel subdivision.
+//
+// Assumes there is at least one span in pspans, and that every span
+// contains at least one pixel
+//----------------------------------------------------------------------
+
+	.data
+
+	.text
+
+// out-of-line, rarely-needed clamping code
+
+LClampHigh0:
+	movl	C(bbextents),%esi
+	jmp		LClampReentry0
+LClampHighOrLow0:
+	jg		LClampHigh0
+	xorl	%esi,%esi
+	jmp		LClampReentry0
+
+LClampHigh1:
+	movl	C(bbextentt),%edx
+	jmp		LClampReentry1
+LClampHighOrLow1:
+	jg		LClampHigh1
+	xorl	%edx,%edx
+	jmp		LClampReentry1
+
+LClampLow2:
+	movl	$4096,%ebp
+	jmp		LClampReentry2
+LClampHigh2:
+	movl	C(bbextents),%ebp
+	jmp		LClampReentry2
+
+LClampLow3:
+	movl	$4096,%ecx
+	jmp		LClampReentry3
+LClampHigh3:
+	movl	C(bbextentt),%ecx
+	jmp		LClampReentry3
+
+LClampLow4:
+	movl	$4096,%eax
+	jmp		LClampReentry4
+LClampHigh4:
+	movl	C(bbextents),%eax
+	jmp		LClampReentry4
+
+LClampLow5:
+	movl	$4096,%ebx
+	jmp		LClampReentry5
+LClampHigh5:
+	movl	C(bbextentt),%ebx
+	jmp		LClampReentry5
+
+
+#define pspans	4+16
+
+	.align 4
+.globl C(D_DrawSpans16)
+C(D_DrawSpans16):
+	pushl	%ebp				// preserve caller's stack frame
+	pushl	%edi
+	pushl	%esi				// preserve register variables
+	pushl	%ebx
+
+//
+// set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
+// and span list pointers
+//
+// TODO: any overlap from rearranging?
+	flds	C(d_sdivzstepu)
+	fmuls	fp_16
+	movl	C(cacheblock),%edx
+	flds	C(d_tdivzstepu)
+	fmuls	fp_16
+	movl	pspans(%esp),%ebx	// point to the first span descriptor
+	flds	C(d_zistepu)
+	fmuls	fp_16
+	movl	%edx,pbase			// pbase = cacheblock
+	fstps	zi16stepu
+	fstps	tdivz16stepu
+	fstps	sdivz16stepu
+
+LSpanLoop:
+//
+// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
+// initial s and t values
+//
+// FIXME: pipeline FILD?
+	fildl	espan_t_v(%ebx)
+	fildl	espan_t_u(%ebx)
+
+	fld		%st(1)			// dv | du | dv
+	fmuls	C(d_sdivzstepv)	// dv*d_sdivzstepv | du | dv
+	fld		%st(1)			// du | dv*d_sdivzstepv | du | dv
+	fmuls	C(d_sdivzstepu)	// du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+	fld		%st(2)			// du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+	fmuls	C(d_tdivzstepu)	// du*d_tdivzstepu | du*d_sdivzstepu |
+							//  dv*d_sdivzstepv | du | dv
+	fxch	%st(1)			// du*d_sdivzstepu | du*d_tdivzstepu |
+							//  dv*d_sdivzstepv | du | dv
+	faddp	%st(0),%st(2)	// du*d_tdivzstepu |
+							//  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
+	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
+							//  du*d_tdivzstepu | du | dv
+	fld		%st(3)			// dv | du*d_sdivzstepu + dv*d_sdivzstepv |
+							//  du*d_tdivzstepu | du | dv
+	fmuls	C(d_tdivzstepv)	// dv*d_tdivzstepv |
+							//  du*d_sdivzstepu + dv*d_sdivzstepv |
+							//  du*d_tdivzstepu | du | dv
+	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
+							//  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
+	fadds	C(d_sdivzorigin)	// sdivz = d_sdivzorigin + dv*d_sdivzstepv +
+							//  du*d_sdivzstepu; stays in %st(2) at end
+	fxch	%st(4)			// dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
+							//  s/z
+	fmuls	C(d_zistepv)		// dv*d_zistepv | dv*d_tdivzstepv |
+							//  du*d_tdivzstepu | du | s/z
+	fxch	%st(1)			// dv*d_tdivzstepv |  dv*d_zistepv |
+							//  du*d_tdivzstepu | du | s/z
+	faddp	%st(0),%st(2)	// dv*d_zistepv |
+							//  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
+	fxch	%st(2)			// du | dv*d_tdivzstepv + du*d_tdivzstepu |
+							//  dv*d_zistepv | s/z
+	fmuls	C(d_zistepu)		// du*d_zistepu |
+							//  dv*d_tdivzstepv + du*d_tdivzstepu |
+							//  dv*d_zistepv | s/z
+	fxch	%st(1)			// dv*d_tdivzstepv + du*d_tdivzstepu |
+							//  du*d_zistepu | dv*d_zistepv | s/z
+	fadds	C(d_tdivzorigin)	// tdivz = d_tdivzorigin + dv*d_tdivzstepv +
+							//  du*d_tdivzstepu; stays in %st(1) at end
+	fxch	%st(2)			// dv*d_zistepv | du*d_zistepu | t/z | s/z
+	faddp	%st(0),%st(1)	// dv*d_zistepv + du*d_zistepu | t/z | s/z
+
+	flds	fp_64k			// fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
+	fxch	%st(1)			// dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
+	fadds	C(d_ziorigin)		// zi = d_ziorigin + dv*d_zistepv +
+							//  du*d_zistepu; stays in %st(0) at end
+							// 1/z | fp_64k | t/z | s/z
+//
+// calculate and clamp s & t
+//
+	fdivr	%st(0),%st(1)	// 1/z | z*64k | t/z | s/z
+
+//
+// point %edi to the first pixel in the span
+//
+	movl	C(d_viewbuffer),%ecx
+	movl	espan_t_v(%ebx),%eax
+	movl	%ebx,pspantemp	// preserve spans pointer
+
+	movl	C(tadjust),%edx
+	movl	C(sadjust),%esi
+	movl	C(d_scantable)(,%eax,4),%edi	// v * screenwidth
+	addl	%ecx,%edi
+	movl	espan_t_u(%ebx),%ecx
+	addl	%ecx,%edi				// pdest = &pdestspan[scans->u];
+	movl	espan_t_count(%ebx),%ecx
+
+//
+// now start the FDIV for the end of the span
+//
+	cmpl	$16,%ecx
+	ja		LSetupNotLast1
+
+	decl	%ecx
+	jz		LCleanup1		// if only one pixel, no need to start an FDIV
+	movl	%ecx,spancountminus1
+
+// finish up the s and t calcs
+	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
+
+	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
+	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
+	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
+	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
+	fxch	%st(1)			// s | t | 1/z | t/z | s/z
+	fistpl	s				// 1/z | t | t/z | s/z
+	fistpl	t				// 1/z | t/z | s/z
+
+	fildl	spancountminus1
+
+	flds	C(d_tdivzstepu)	// C(d_tdivzstepu) | spancountminus1
+	flds	C(d_zistepu)		// C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
+	fmul	%st(2),%st(0)	// C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
+	fxch	%st(1)			// C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
+	fmul	%st(2),%st(0)	// C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
+	fxch	%st(2)			// scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
+	fmuls	C(d_sdivzstepu)	// C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
+							//  C(d_tdivzstepu)*scm1
+	fxch	%st(1)			// C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
+							//  C(d_tdivzstepu)*scm1
+	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
+	fxch	%st(1)			// C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
+	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1
+	faddp	%st(0),%st(3)
+
+	flds	fp_64k
+	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
+							//  overlap
+	jmp		LFDIVInFlight1
+
+LCleanup1:
+// finish up the s and t calcs
+	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
+
+	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
+	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
+	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
+	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
+	fxch	%st(1)			// s | t | 1/z | t/z | s/z
+	fistpl	s				// 1/z | t | t/z | s/z
+	fistpl	t				// 1/z | t/z | s/z
+	jmp		LFDIVInFlight1
+
+	.align	4
+LSetupNotLast1:
+// finish up the s and t calcs
+	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
+
+	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
+	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
+	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
+	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
+	fxch	%st(1)			// s | t | 1/z | t/z | s/z
+	fistpl	s				// 1/z | t | t/z | s/z
+	fistpl	t				// 1/z | t/z | s/z
+
+	fadds	zi16stepu
+	fxch	%st(2)
+	fadds	sdivz16stepu
+	fxch	%st(2)
+	flds	tdivz16stepu
+	faddp	%st(0),%st(2)
+	flds	fp_64k
+	fdiv	%st(1),%st(0)	// z = 1/1/z
+							// this is what we've gone to all this trouble to
+							//  overlap
+LFDIVInFlight1:
+
+	addl	s,%esi
+	addl	t,%edx
+	movl	C(bbextents),%ebx
+	movl	C(bbextentt),%ebp
+	cmpl	%ebx,%esi
+	ja		LClampHighOrLow0
+LClampReentry0:
+	movl	%esi,s
+	movl	pbase,%ebx
+	shll	$16,%esi
+	cmpl	%ebp,%edx
+	movl	%esi,sfracf
+	ja		LClampHighOrLow1
+LClampReentry1:
+	movl	%edx,t
+	movl	s,%esi					// sfrac = scans->sfrac;
+	shll	$16,%edx
+	movl	t,%eax					// tfrac = scans->tfrac;
+	sarl	$16,%esi
+	movl	%edx,tfracf
+
+//
+// calculate the texture starting address
+//
+	sarl	$16,%eax
+	movl	C(cachewidth),%edx
+	imull	%edx,%eax				// (tfrac >> 16) * cachewidth
+	addl	%ebx,%esi
+	addl	%eax,%esi				// psource = pbase + (sfrac >> 16) +
+									//           ((tfrac >> 16) * cachewidth);
+//
+// determine whether last span or not
+//
+	cmpl	$16,%ecx
+	jna		LLastSegment
+
+//
+// not the last segment; do full 16-wide segment
+//
+LNotLastSegment:
+
+//
+// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+// get there
+//
+
+// pick up after the FDIV that was left in flight previously
+
+	fld		%st(0)			// duplicate it
+	fmul	%st(4),%st(0)	// s = s/z * z
+	fxch	%st(1)
+	fmul	%st(3),%st(0)	// t = t/z * z
+	fxch	%st(1)
+	fistpl	snext
+	fistpl	tnext
+	movl	snext,%eax
+	movl	tnext,%edx
+
+	movb	(%esi),%bl	// get first source texel
+	subl	$16,%ecx		// count off this segments' pixels
+	movl	C(sadjust),%ebp
+	movl	%ecx,counttemp	// remember count of remaining pixels
+
+	movl	C(tadjust),%ecx
+	movb	%bl,(%edi)	// store first dest pixel
+
+	addl	%eax,%ebp
+	addl	%edx,%ecx
+
+	movl	C(bbextents),%eax
+	movl	C(bbextentt),%edx
+
+	cmpl	$4096,%ebp
+	jl		LClampLow2
+	cmpl	%eax,%ebp
+	ja		LClampHigh2
+LClampReentry2:
+
+	cmpl	$4096,%ecx
+	jl		LClampLow3
+	cmpl	%edx,%ecx
+	ja		LClampHigh3
+LClampReentry3:
+
+	movl	%ebp,snext
+	movl	%ecx,tnext
+
+	subl	s,%ebp
+	subl	t,%ecx
+	
+//
+// set up advancetable
+//
+	movl	%ecx,%eax
+	movl	%ebp,%edx
+	sarl	$20,%eax			// tstep >>= 16;
+	jz		LZero
+	sarl	$20,%edx			// sstep >>= 16;
+	movl	C(cachewidth),%ebx
+	imull	%ebx,%eax
+	jmp		LSetUp1
+
+LZero:
+	sarl	$20,%edx			// sstep >>= 16;
+	movl	C(cachewidth),%ebx
+
+LSetUp1:
+
+	addl	%edx,%eax			// add in sstep
+								// (tstep >> 16) * cachewidth + (sstep >> 16);
+	movl	tfracf,%edx
+	movl	%eax,advancetable+4	// advance base in t
+	addl	%ebx,%eax			// ((tstep >> 16) + 1) * cachewidth +
+								//  (sstep >> 16);
+	shll	$12,%ebp			// left-justify sstep fractional part
+	movl	sfracf,%ebx
+	shll	$12,%ecx			// left-justify tstep fractional part
+	movl	%eax,advancetable	// advance extra in t
+
+	movl	%ecx,tstep
+	addl	%ecx,%edx			// advance tfrac fractional part by tstep frac
+
+	sbbl	%ecx,%ecx			// turn tstep carry into -1 (0 if none)
+	addl	%ebp,%ebx			// advance sfrac fractional part by sstep frac
+	adcl	advancetable+4(,%ecx,4),%esi	// point to next source texel
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	(%esi),%al
+	addl	%ebp,%ebx
+	movb	%al,1(%edi)
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,2(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,3(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,4(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,5(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,6(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,7(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+
+//
+// start FDIV for end of next segment in flight, so it can overlap
+//
+	movl	counttemp,%ecx
+	cmpl	$16,%ecx			// more than one segment after this?
+	ja		LSetupNotLast2	// yes
+
+	decl	%ecx
+	jz		LFDIVInFlight2	// if only one pixel, no need to start an FDIV
+	movl	%ecx,spancountminus1
+	fildl	spancountminus1
+
+	flds	C(d_zistepu)		// C(d_zistepu) | spancountminus1
+	fmul	%st(1),%st(0)	// C(d_zistepu)*scm1 | scm1
+	flds	C(d_tdivzstepu)	// C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
+	fmul	%st(2),%st(0)	// C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
+	fxch	%st(1)			// C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
+	faddp	%st(0),%st(3)	// C(d_tdivzstepu)*scm1 | scm1
+	fxch	%st(1)			// scm1 | C(d_tdivzstepu)*scm1
+	fmuls	C(d_sdivzstepu)	// C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
+	fxch	%st(1)			// C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
+	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1
+	flds	fp_64k			// 64k | C(d_sdivzstepu)*scm1
+	fxch	%st(1)			// C(d_sdivzstepu)*scm1 | 64k
+	faddp	%st(0),%st(4)	// 64k
+
+	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
+							//  overlap
+	jmp		LFDIVInFlight2
+
+	.align	4
+LSetupNotLast2:
+	fadds	zi16stepu
+	fxch	%st(2)
+	fadds	sdivz16stepu
+	fxch	%st(2)
+	flds	tdivz16stepu
+	faddp	%st(0),%st(2)
+	flds	fp_64k
+	fdiv	%st(1),%st(0)	// z = 1/1/z
+							// this is what we've gone to all this trouble to
+							//  overlap
+LFDIVInFlight2:
+	movl	%ecx,counttemp
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,8(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,9(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,10(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,11(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,12(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,13(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,14(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	$16,%edi
+	movl	%edx,tfracf
+	movl	snext,%edx
+	movl	%ebx,sfracf
+	movl	tnext,%ebx
+	movl	%edx,s
+	movl	%ebx,t
+
+	movl	counttemp,%ecx		// retrieve count
+
+//
+// determine whether last span or not
+//
+	cmpl	$16,%ecx				// are there multiple segments remaining?
+	movb	%al,-1(%edi)
+	ja		LNotLastSegment		// yes
+
+//
+// last segment of scan
+//
+LLastSegment:
+
+//
+// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+// get there. The number of pixels left is variable, and we want to land on the
+// last pixel, not step one past it, so we can't run into arithmetic problems
+//
+	testl	%ecx,%ecx
+	jz		LNoSteps		// just draw the last pixel and we're done
+
+// pick up after the FDIV that was left in flight previously
+
+
+	fld		%st(0)			// duplicate it
+	fmul	%st(4),%st(0)	// s = s/z * z
+	fxch	%st(1)
+	fmul	%st(3),%st(0)	// t = t/z * z
+	fxch	%st(1)
+	fistpl	snext
+	fistpl	tnext
+
+	movb	(%esi),%al		// load first texel in segment
+	movl	C(tadjust),%ebx
+	movb	%al,(%edi)		// store first pixel in segment
+	movl	C(sadjust),%eax
+
+	addl	snext,%eax
+	addl	tnext,%ebx
+
+	movl	C(bbextents),%ebp
+	movl	C(bbextentt),%edx
+
+	cmpl	$4096,%eax
+	jl		LClampLow4
+	cmpl	%ebp,%eax
+	ja		LClampHigh4
+LClampReentry4:
+	movl	%eax,snext
+
+	cmpl	$4096,%ebx
+	jl		LClampLow5
+	cmpl	%edx,%ebx
+	ja		LClampHigh5
+LClampReentry5:
+
+	cmpl	$1,%ecx			// don't bother 
+	je		LOnlyOneStep	// if two pixels in segment, there's only one step,
+							//  of the segment length
+	subl	s,%eax
+	subl	t,%ebx
+
+	addl	%eax,%eax		// convert to 15.17 format so multiply by 1.31
+	addl	%ebx,%ebx		//  reciprocal yields 16.48
+
+	imull	reciprocal_table_16-8(,%ecx,4)	// sstep = (snext - s) /
+											//  (spancount-1)
+	movl	%edx,%ebp
+
+	movl	%ebx,%eax
+	imull	reciprocal_table_16-8(,%ecx,4)	// tstep = (tnext - t) /
+											//  (spancount-1)
+LSetEntryvec:
+//
+// set up advancetable
+//
+	movl	entryvec_table_16(,%ecx,4),%ebx
+	movl	%edx,%eax
+	movl	%ebx,jumptemp		// entry point into code for RET later
+	movl	%ebp,%ecx
+	sarl	$16,%edx			// tstep >>= 16;
+	movl	C(cachewidth),%ebx
+	sarl	$16,%ecx			// sstep >>= 16;
+	imull	%ebx,%edx
+
+	addl	%ecx,%edx			// add in sstep
+								// (tstep >> 16) * cachewidth + (sstep >> 16);
+	movl	tfracf,%ecx
+	movl	%edx,advancetable+4	// advance base in t
+	addl	%ebx,%edx			// ((tstep >> 16) + 1) * cachewidth +
+								//  (sstep >> 16);
+	shll	$16,%ebp			// left-justify sstep fractional part
+	movl	sfracf,%ebx
+	shll	$16,%eax			// left-justify tstep fractional part
+	movl	%edx,advancetable	// advance extra in t
+
+	movl	%eax,tstep
+	movl	%ecx,%edx
+	addl	%eax,%edx
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	jmp		*jumptemp			// jump to the number-of-pixels handler
+
+//----------------------------------------
+
+LNoSteps:
+	movb	(%esi),%al		// load first texel in segment
+	subl	$15,%edi			// adjust for hardwired offset
+	jmp		LEndSpan
+
+
+LOnlyOneStep:
+	subl	s,%eax
+	subl	t,%ebx
+	movl	%eax,%ebp
+	movl	%ebx,%edx
+	jmp		LSetEntryvec
+
+//----------------------------------------
+
+.globl	Entry2_16, Entry3_16, Entry4_16, Entry5_16
+.globl	Entry6_16, Entry7_16, Entry8_16, Entry9_16
+.globl	Entry10_16, Entry11_16, Entry12_16, Entry13_16
+.globl	Entry14_16, Entry15_16, Entry16_16
+
+Entry2_16:
+	subl	$14,%edi		// adjust for hardwired offsets
+	movb	(%esi),%al
+	jmp		LEntry2_16
+
+//----------------------------------------
+
+Entry3_16:
+	subl	$13,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	jmp		LEntry3_16
+
+//----------------------------------------
+
+Entry4_16:
+	subl	$12,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LEntry4_16
+
+//----------------------------------------
+
+Entry5_16:
+	subl	$11,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LEntry5_16
+
+//----------------------------------------
+
+Entry6_16:
+	subl	$10,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LEntry6_16
+
+//----------------------------------------
+
+Entry7_16:
+	subl	$9,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LEntry7_16
+
+//----------------------------------------
+
+Entry8_16:
+	subl	$8,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LEntry8_16
+
+//----------------------------------------
+
+Entry9_16:
+	subl	$7,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LEntry9_16
+
+//----------------------------------------
+
+Entry10_16:
+	subl	$6,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LEntry10_16
+
+//----------------------------------------
+
+Entry11_16:
+	subl	$5,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LEntry11_16
+
+//----------------------------------------
+
+Entry12_16:
+	subl	$4,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LEntry12_16
+
+//----------------------------------------
+
+Entry13_16:
+	subl	$3,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LEntry13_16
+
+//----------------------------------------
+
+Entry14_16:
+	subl	$2,%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LEntry14_16
+
+//----------------------------------------
+
+Entry15_16:
+	decl	%edi		// adjust for hardwired offsets
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+	jmp		LEntry15_16
+
+//----------------------------------------
+
+Entry16_16:
+	addl	%eax,%edx
+	movb	(%esi),%al
+	sbbl	%ecx,%ecx
+	addl	%ebp,%ebx
+	adcl	advancetable+4(,%ecx,4),%esi
+
+	addl	tstep,%edx
+	sbbl	%ecx,%ecx
+	movb	%al,1(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LEntry15_16:
+	sbbl	%ecx,%ecx
+	movb	%al,2(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LEntry14_16:
+	sbbl	%ecx,%ecx
+	movb	%al,3(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LEntry13_16:
+	sbbl	%ecx,%ecx
+	movb	%al,4(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LEntry12_16:
+	sbbl	%ecx,%ecx
+	movb	%al,5(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LEntry11_16:
+	sbbl	%ecx,%ecx
+	movb	%al,6(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LEntry10_16:
+	sbbl	%ecx,%ecx
+	movb	%al,7(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LEntry9_16:
+	sbbl	%ecx,%ecx
+	movb	%al,8(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LEntry8_16:
+	sbbl	%ecx,%ecx
+	movb	%al,9(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LEntry7_16:
+	sbbl	%ecx,%ecx
+	movb	%al,10(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LEntry6_16:
+	sbbl	%ecx,%ecx
+	movb	%al,11(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LEntry5_16:
+	sbbl	%ecx,%ecx
+	movb	%al,12(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+	addl	tstep,%edx
+LEntry4_16:
+	sbbl	%ecx,%ecx
+	movb	%al,13(%edi)
+	addl	%ebp,%ebx
+	movb	(%esi),%al
+	adcl	advancetable+4(,%ecx,4),%esi
+LEntry3_16:
+	movb	%al,14(%edi)
+	movb	(%esi),%al
+LEntry2_16:
+
+LEndSpan:
+
+//
+// clear s/z, t/z, 1/z from FP stack
+//
+	fstp %st(0)
+	fstp %st(0)
+	fstp %st(0)
+
+	movl	pspantemp,%ebx				// restore spans pointer
+	movl	espan_t_pnext(%ebx),%ebx	// point to next span
+	testl	%ebx,%ebx			// any more spans?
+	movb	%al,15(%edi)
+	jnz		LSpanLoop			// more spans
+
+	popl	%ebx				// restore register variables
+	popl	%esi
+	popl	%edi
+	popl	%ebp				// restore the caller's stack frame
+	ret
+
+#endif	// id386
--- /dev/null
+++ b/u/d_ifacea.h
@@ -1,0 +1,79 @@
+//
+// d_ifacea.h
+//
+// Include file for asm driver interface.
+//
+
+//
+// !!! note that this file must match the corresponding C structures in
+// d_iface.h at all times !!!
+//
+
+// !!! if this is changed, it must be changed in r_shared.h too !!!
+#define ALIAS_ONSEAM				0x0020
+
+// !!! if this is changed, it must be changed in d_iface.h too !!!
+#define TURB_TEX_SIZE	64		// base turbulent texture size
+
+// !!! if this is changed, it must be changed in d_iface.h too !!!
+#define	CYCLE	128
+
+// !!! if this is changed, it must be changed in r_shared.h too !!!
+#define	MAXHEIGHT	1024
+
+// !!! if this is changed, it must be changed in quakedef.h too !!!
+#define CACHE_SIZE	32		// used to align key data structures
+
+// particle_t structure
+// !!! if this is changed, it must be changed in d_iface.h too !!!
+// driver-usable fields
+#define pt_org				0
+#define pt_color			12
+// drivers never touch the following fields
+#define pt_next				16
+#define pt_vel				20
+#define pt_ramp				32
+#define pt_die				36
+#define pt_type				40
+#define pt_size				44
+
+#define PARTICLE_Z_CLIP	8.0
+
+// finalvert_t structure
+// !!! if this is changed, it must be changed in d_iface.h too !!!
+#define fv_v				0	// !!! if this is moved, cases where the !!!
+								// !!! address of this field is pushed in !!!
+								// !!! d_polysa.s must be changed !!!
+#define fv_flags			24
+#define fv_reserved			28
+#define fv_size				32
+#define fv_shift			5
+
+
+// stvert_t structure
+// !!! if this is changed, it must be changed in modelgen.h too !!!
+#define stv_onseam	0
+#define stv_s		4
+#define stv_t		8
+#define stv_size	12
+
+
+// trivertx_t structure
+// !!! if this is changed, it must be changed in modelgen.h too !!!
+#define tv_v				0
+#define tv_lightnormalindex	3
+#define tv_size				4
+
+// affinetridesc_t structure
+// !!! if this is changed, it must be changed in d_iface.h too !!!
+#define atd_pskin			0
+#define atd_pskindesc		4
+#define atd_skinwidth		8
+#define atd_skinheight		12
+#define atd_ptriangles		16
+#define atd_pfinalverts		20
+#define atd_numtriangles	24
+#define atd_drawtype		28
+#define atd_seamfixupX16	32
+#define atd_size			36
+
--- /dev/null
+++ b/u/d_parta.s
@@ -1,0 +1,458 @@
+//
+// d_parta.s
+// x86 assembly-language 8-bpp particle-drawing code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "d_ifacea.h"
+#include "asm_draw.h"
+
+#ifdef	id386
+
+//----------------------------------------------------------------------
+// 8-bpp particle drawing code.
+//----------------------------------------------------------------------
+
+//FIXME: comments, full optimization
+
+//----------------------------------------------------------------------
+// 8-bpp particle queueing code.
+//----------------------------------------------------------------------
+
+	.text
+
+#define P	12+4
+
+	.align 4
+.globl C(D_DrawParticle)
+C(D_DrawParticle):
+	pushl	%ebp				// preserve caller's stack frame
+	pushl	%edi				// preserve register variables
+	pushl	%ebx
+
+	movl	P(%esp),%edi
+
+// FIXME: better FP overlap in general here
+
+// transform point
+//	VectorSubtract (p->org, r_origin, local);
+	flds	C(r_origin)
+	fsubrs	pt_org(%edi)
+	flds	pt_org+4(%edi)
+	fsubs	C(r_origin)+4
+	flds	pt_org+8(%edi)
+	fsubs	C(r_origin)+8
+	fxch	%st(2)			// local[0] | local[1] | local[2]
+
+//	transformed[2] = DotProduct(local, r_ppn);		
+	flds	C(r_ppn)		// r_ppn[0] | local[0] | local[1] | local[2]
+	fmul	%st(1),%st(0)	// dot0 | local[0] | local[1] | local[2]
+	flds	C(r_ppn)+4	// r_ppn[1] | dot0 | local[0] | local[1] | local[2]
+	fmul	%st(3),%st(0)	// dot1 | dot0 | local[0] | local[1] | local[2]
+	flds	C(r_ppn)+8	// r_ppn[2] | dot1 | dot0 | local[0] |
+						//  local[1] | local[2]
+	fmul	%st(5),%st(0)	// dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
+	fxch	%st(2)		// dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
+	faddp	%st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
+						  //  local[2]
+	faddp	%st(0),%st(1) // z | local[0] | local[1] | local[2]
+	fld		%st(0)		// z | z | local[0] | local[1] |
+						//  local[2]
+	fdivrs	float_1		// 1/z | z | local[0] | local[1] | local[2]
+	fxch	%st(1)		// z | 1/z | local[0] | local[1] | local[2]
+
+//	if (transformed[2] < PARTICLE_Z_CLIP)
+//		return;
+	fcomps	float_particle_z_clip	// 1/z | local[0] | local[1] | local[2]
+	fxch	%st(3)					// local[2] | local[0] | local[1] | 1/z
+
+	flds	C(r_pup)	// r_pup[0] | local[2] | local[0] | local[1] | 1/z
+	fmul	%st(2),%st(0)	// dot0 | local[2] | local[0] | local[1] | 1/z 
+	flds	C(r_pup)+4	// r_pup[1] | dot0 | local[2] | local[0] |
+						//  local[1] | 1/z 
+
+	fnstsw	%ax
+	testb	$1,%ah
+	jnz		LPop6AndDone
+
+//	transformed[1] = DotProduct(local, r_pup);
+	fmul	%st(4),%st(0)	// dot1 | dot0 | local[2] | local[0] | local[1] | 1/z 
+	flds	C(r_pup)+8	// r_pup[2] | dot1 | dot0 | local[2] |
+						//  local[0] | local[1] | 1/z 
+	fmul	%st(3),%st(0)	// dot2 | dot1 | dot0 | local[2] | local[0] |
+						//  local[1] | 1/z 
+	fxch	%st(2)		// dot0 | dot1 | dot2 | local[2] | local[0] |
+						//  local[1] | 1/z 
+	faddp	%st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
+						//  local[1] | 1/z 
+	faddp	%st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z 
+	fxch	%st(3)		// local[1] | local[2] | local[0] | y | 1/z 
+
+//	transformed[0] = DotProduct(local, r_pright);
+	fmuls	C(r_pright)+4	// dot1 | local[2] | local[0] | y | 1/z
+	fxch	%st(2)		// local[0] | local[2] | dot1 | y | 1/z
+	fmuls	C(r_pright)	// dot0 | local[2] | dot1 | y | 1/z
+	fxch	%st(1)		// local[2] | dot0 | dot1 | y | 1/z
+	fmuls	C(r_pright)+8	// dot2 | dot0 | dot1 | y | 1/z
+	fxch	%st(2)		// dot1 | dot0 | dot2 | y | 1/z
+	faddp	%st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z
+
+	faddp	%st(0),%st(1)	// x | y | 1/z
+	fxch	%st(1)			// y | x | 1/z
+
+// project the point
+	fmul	%st(2),%st(0)	// y/z | x | 1/z
+	fxch	%st(1)			// x | y/z | 1/z
+	fmul	%st(2),%st(0)	// x/z | y/z | 1/z
+	fxch	%st(1)			// y/z | x/z | 1/z
+	fsubrs	C(ycenter)		// v | x/z | 1/z
+	fxch	%st(1)			// x/z | v | 1/z
+	fadds	C(xcenter)		// u | v | 1/z
+// FIXME: preadjust xcenter and ycenter
+	fxch	%st(1)			// v | u | 1/z
+	fadds	float_point5	// v | u | 1/z
+	fxch	%st(1)			// u | v | 1/z
+	fadds	float_point5	// u | v | 1/z
+	fxch	%st(2)			// 1/z | v | u
+	fmuls	DP_32768		// 1/z * 0x8000 | v | u
+	fxch	%st(2)			// u | v | 1/z * 0x8000
+
+// FIXME: use Terje's fp->int trick here?
+// FIXME: check we're getting proper rounding here
+	fistpl	DP_u			// v | 1/z * 0x8000
+	fistpl	DP_v			// 1/z * 0x8000
+
+	movl	DP_u,%eax
+	movl	DP_v,%edx
+
+// if ((v > d_vrectbottom_particle) || 
+// 	(u > d_vrectright_particle) ||
+// 	(v < d_vrecty) ||
+// 	(u < d_vrectx))
+// {
+// 	continue;
+// }
+
+	movl	C(d_vrectbottom_particle),%ebx
+	movl	C(d_vrectright_particle),%ecx
+	cmpl	%ebx,%edx
+	jg		LPop1AndDone
+	cmpl	%ecx,%eax
+	jg		LPop1AndDone
+	movl	C(d_vrecty),%ebx
+	movl	C(d_vrectx),%ecx
+	cmpl	%ebx,%edx
+	jl		LPop1AndDone
+
+	cmpl	%ecx,%eax
+	jl		LPop1AndDone
+
+	flds	pt_color(%edi)	// color | 1/z * 0x8000
+// FIXME: use Terje's fast fp->int trick?
+	fistpl	DP_Color		// 1/z * 0x8000
+
+	movl	C(d_viewbuffer),%ebx
+
+	addl	%eax,%ebx
+	movl	C(d_scantable)(,%edx,4),%edi		// point to the pixel
+
+	imull	C(d_zrowbytes),%edx		// point to the z pixel
+
+	leal	(%edx,%eax,2),%edx
+	movl	C(d_pzbuffer),%eax
+
+	fistpl	izi
+
+	addl	%ebx,%edi
+	addl	%eax,%edx
+
+// pix = izi >> d_pix_shift;
+
+	movl	izi,%eax
+	movl	C(d_pix_shift),%ecx
+	shrl	%cl,%eax
+	movl	izi,%ebp
+
+// if (pix < d_pix_min)
+// 		pix = d_pix_min;
+// else if (pix > d_pix_max)
+//  	pix = d_pix_max;
+
+	movl	C(d_pix_min),%ebx
+	movl	C(d_pix_max),%ecx
+	cmpl	%ebx,%eax
+	jnl		LTestPixMax
+	movl	%ebx,%eax
+	jmp		LTestDone
+
+LTestPixMax:
+	cmpl	%ecx,%eax
+	jng		LTestDone
+	movl	%ecx,%eax
+LTestDone:
+
+	movb	DP_Color,%ch
+
+	movl	C(d_y_aspect_shift),%ebx
+	testl	%ebx,%ebx
+	jnz		LDefault
+
+	cmpl	$4,%eax
+	ja		LDefault
+
+	jmp		DP_EntryTable-4(,%eax,4)
+
+// 1x1
+.globl	DP_1x1
+DP_1x1:
+	cmpw	%bp,(%edx)		// just one pixel to do
+	jg		LDone
+	movw	%bp,(%edx)
+	movb	%ch,(%edi)
+	jmp		LDone
+
+// 2x2
+.globl	DP_2x2
+DP_2x2:
+	pushl	%esi
+	movl	C(screenwidth),%ebx
+	movl	C(d_zrowbytes),%esi
+
+	cmpw	%bp,(%edx)
+	jg		L2x2_1
+	movw	%bp,(%edx)
+	movb	%ch,(%edi)
+L2x2_1:
+	cmpw	%bp,2(%edx)
+	jg		L2x2_2
+	movw	%bp,2(%edx)
+	movb	%ch,1(%edi)
+L2x2_2:
+	cmpw	%bp,(%edx,%esi,1)
+	jg		L2x2_3
+	movw	%bp,(%edx,%esi,1)
+	movb	%ch,(%edi,%ebx,1)
+L2x2_3:
+	cmpw	%bp,2(%edx,%esi,1)
+	jg		L2x2_4
+	movw	%bp,2(%edx,%esi,1)
+	movb	%ch,1(%edi,%ebx,1)
+L2x2_4:
+
+	popl	%esi
+	jmp		LDone
+
+// 3x3
+.globl	DP_3x3
+DP_3x3:
+	pushl	%esi
+	movl	C(screenwidth),%ebx
+	movl	C(d_zrowbytes),%esi
+
+	cmpw	%bp,(%edx)
+	jg		L3x3_1
+	movw	%bp,(%edx)
+	movb	%ch,(%edi)
+L3x3_1:
+	cmpw	%bp,2(%edx)
+	jg		L3x3_2
+	movw	%bp,2(%edx)
+	movb	%ch,1(%edi)
+L3x3_2:
+	cmpw	%bp,4(%edx)
+	jg		L3x3_3
+	movw	%bp,4(%edx)
+	movb	%ch,2(%edi)
+L3x3_3:
+
+	cmpw	%bp,(%edx,%esi,1)
+	jg		L3x3_4
+	movw	%bp,(%edx,%esi,1)
+	movb	%ch,(%edi,%ebx,1)
+L3x3_4:
+	cmpw	%bp,2(%edx,%esi,1)
+	jg		L3x3_5
+	movw	%bp,2(%edx,%esi,1)
+	movb	%ch,1(%edi,%ebx,1)
+L3x3_5:
+	cmpw	%bp,4(%edx,%esi,1)
+	jg		L3x3_6
+	movw	%bp,4(%edx,%esi,1)
+	movb	%ch,2(%edi,%ebx,1)
+L3x3_6:
+
+	cmpw	%bp,(%edx,%esi,2)
+	jg		L3x3_7
+	movw	%bp,(%edx,%esi,2)
+	movb	%ch,(%edi,%ebx,2)
+L3x3_7:
+	cmpw	%bp,2(%edx,%esi,2)
+	jg		L3x3_8
+	movw	%bp,2(%edx,%esi,2)
+	movb	%ch,1(%edi,%ebx,2)
+L3x3_8:
+	cmpw	%bp,4(%edx,%esi,2)
+	jg		L3x3_9
+	movw	%bp,4(%edx,%esi,2)
+	movb	%ch,2(%edi,%ebx,2)
+L3x3_9:
+
+	popl	%esi
+	jmp		LDone
+
+
+// 4x4
+.globl	DP_4x4
+DP_4x4:
+	pushl	%esi
+	movl	C(screenwidth),%ebx
+	movl	C(d_zrowbytes),%esi
+
+	cmpw	%bp,(%edx)
+	jg		L4x4_1
+	movw	%bp,(%edx)
+	movb	%ch,(%edi)
+L4x4_1:
+	cmpw	%bp,2(%edx)
+	jg		L4x4_2
+	movw	%bp,2(%edx)
+	movb	%ch,1(%edi)
+L4x4_2:
+	cmpw	%bp,4(%edx)
+	jg		L4x4_3
+	movw	%bp,4(%edx)
+	movb	%ch,2(%edi)
+L4x4_3:
+	cmpw	%bp,6(%edx)
+	jg		L4x4_4
+	movw	%bp,6(%edx)
+	movb	%ch,3(%edi)
+L4x4_4:
+
+	cmpw	%bp,(%edx,%esi,1)
+	jg		L4x4_5
+	movw	%bp,(%edx,%esi,1)
+	movb	%ch,(%edi,%ebx,1)
+L4x4_5:
+	cmpw	%bp,2(%edx,%esi,1)
+	jg		L4x4_6
+	movw	%bp,2(%edx,%esi,1)
+	movb	%ch,1(%edi,%ebx,1)
+L4x4_6:
+	cmpw	%bp,4(%edx,%esi,1)
+	jg		L4x4_7
+	movw	%bp,4(%edx,%esi,1)
+	movb	%ch,2(%edi,%ebx,1)
+L4x4_7:
+	cmpw	%bp,6(%edx,%esi,1)
+	jg		L4x4_8
+	movw	%bp,6(%edx,%esi,1)
+	movb	%ch,3(%edi,%ebx,1)
+L4x4_8:
+
+	leal	(%edx,%esi,2),%edx
+	leal	(%edi,%ebx,2),%edi
+
+	cmpw	%bp,(%edx)
+	jg		L4x4_9
+	movw	%bp,(%edx)
+	movb	%ch,(%edi)
+L4x4_9:
+	cmpw	%bp,2(%edx)
+	jg		L4x4_10
+	movw	%bp,2(%edx)
+	movb	%ch,1(%edi)
+L4x4_10:
+	cmpw	%bp,4(%edx)
+	jg		L4x4_11
+	movw	%bp,4(%edx)
+	movb	%ch,2(%edi)
+L4x4_11:
+	cmpw	%bp,6(%edx)
+	jg		L4x4_12
+	movw	%bp,6(%edx)
+	movb	%ch,3(%edi)
+L4x4_12:
+
+	cmpw	%bp,(%edx,%esi,1)
+	jg		L4x4_13
+	movw	%bp,(%edx,%esi,1)
+	movb	%ch,(%edi,%ebx,1)
+L4x4_13:
+	cmpw	%bp,2(%edx,%esi,1)
+	jg		L4x4_14
+	movw	%bp,2(%edx,%esi,1)
+	movb	%ch,1(%edi,%ebx,1)
+L4x4_14:
+	cmpw	%bp,4(%edx,%esi,1)
+	jg		L4x4_15
+	movw	%bp,4(%edx,%esi,1)
+	movb	%ch,2(%edi,%ebx,1)
+L4x4_15:
+	cmpw	%bp,6(%edx,%esi,1)
+	jg		L4x4_16
+	movw	%bp,6(%edx,%esi,1)
+	movb	%ch,3(%edi,%ebx,1)
+L4x4_16:
+
+	popl	%esi
+	jmp		LDone
+
+// default case, handling any size particle
+LDefault:
+
+// count = pix << d_y_aspect_shift;
+
+	movl	%eax,%ebx
+	movl	%eax,DP_Pix
+	movb	C(d_y_aspect_shift),%cl
+	shll	%cl,%ebx
+
+// for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
+// {
+// 	for (i=0 ; i<pix ; i++)
+// 	{
+// 		if (pz[i] <= izi)
+// 		{
+// 			pz[i] = izi;
+// 			pdest[i] = color;
+// 		}
+// 	}
+// }
+
+LGenRowLoop:
+	movl	DP_Pix,%eax
+
+LGenColLoop:
+	cmpw	%bp,-2(%edx,%eax,2)
+	jg		LGSkip
+	movw	%bp,-2(%edx,%eax,2)
+	movb	%ch,-1(%edi,%eax,1)
+LGSkip:
+	decl	%eax			// --pix
+	jnz		LGenColLoop
+
+	addl	C(d_zrowbytes),%edx
+	addl	C(screenwidth),%edi
+
+	decl	%ebx			// --count
+	jnz		LGenRowLoop
+
+LDone:
+	popl	%ebx				// restore register variables
+	popl	%edi
+	popl	%ebp				// restore the caller's stack frame
+	ret
+
+LPop6AndDone:
+	fstp	%st(0)
+	fstp	%st(0)
+	fstp	%st(0)
+	fstp	%st(0)
+	fstp	%st(0)
+LPop1AndDone:
+	fstp	%st(0)
+	jmp		LDone
+
+#endif	// id386
--- /dev/null
+++ b/u/d_polysa.s
@@ -1,0 +1,1723 @@
+//
+// d_polysa.s
+// x86 assembly-language polygon model drawing code
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef	id386
+
+// !!! if this is changed, it must be changed in d_polyse.c too !!!
+#define DPS_MAXSPANS			MAXHEIGHT+1	
+									// 1 extra for spanpackage that marks end
+
+//#define	SPAN_SIZE	(((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
+#define SPAN_SIZE (1024+1+1+1)*32
+
+
+	.data
+
+	.align	4
+p10_minus_p20:	.single		0
+p01_minus_p21:	.single		0
+temp0:			.single		0
+temp1:			.single		0
+Ltemp:			.single		0
+
+aff8entryvec_table:	.long	LDraw8, LDraw7, LDraw6, LDraw5
+				.long	LDraw4, LDraw3, LDraw2, LDraw1
+
+lzistepx:		.long	0
+
+
+	.text
+
+	.extern C(D_PolysetSetEdgeTable)
+	.extern C(D_RasterizeAliasPolySmooth)
+
+//----------------------------------------------------------------------
+// affine triangle gradient calculation code
+//----------------------------------------------------------------------
+
+#define skinwidth	4+0
+
+.globl C(D_PolysetCalcGradients)
+C(D_PolysetCalcGradients):
+
+//	p00_minus_p20 = r_p0[0] - r_p2[0];
+//	p01_minus_p21 = r_p0[1] - r_p2[1];
+//	p10_minus_p20 = r_p1[0] - r_p2[0];
+//	p11_minus_p21 = r_p1[1] - r_p2[1];
+//
+//	xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
+//			     p00_minus_p20 * p11_minus_p21);
+//
+//	ystepdenominv = -xstepdenominv;
+
+	fildl	C(r_p0)+0		// r_p0[0]
+	fildl	C(r_p2)+0		// r_p2[0] | r_p0[0]
+	fildl	C(r_p0)+4		// r_p0[1] | r_p2[0] | r_p0[0]
+	fildl	C(r_p2)+4		// r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
+	fildl	C(r_p1)+0		// r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
+	fildl	C(r_p1)+4		// r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
+							//  r_p2[0] | r_p0[0]
+	fxch	%st(3)			// r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
+							//  r_p2[0] | r_p0[0]
+	fsub	%st(2),%st(0)	// p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
+							//  r_p2[0] | r_p0[0]
+	fxch	%st(1)			// r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
+							//  r_p2[0] | r_p0[0]
+	fsub	%st(4),%st(0)	// p10_minus_p20 | p01_minus_p21 | r_p2[1] |
+							//  r_p1[1] | r_p2[0] | r_p0[0]
+	fxch	%st(5)			// r_p0[0] | p01_minus_p21 | r_p2[1] |
+							//  r_p1[1] | r_p2[0] | p10_minus_p20
+	fsubp	%st(0),%st(4)	// p01_minus_p21 | r_p2[1] | r_p1[1] |
+							//  p00_minus_p20 | p10_minus_p20
+	fxch	%st(2)			// r_p1[1] | r_p2[1] | p01_minus_p21 |
+							//  p00_minus_p20 | p10_minus_p20
+	fsubp	%st(0),%st(1)	// p11_minus_p21 | p01_minus_p21 |
+							//  p00_minus_p20 | p10_minus_p20
+	fxch	%st(1)			// p01_minus_p21 | p11_minus_p21 |
+							//  p00_minus_p20 | p10_minus_p20
+	flds	C(d_xdenom)		// d_xdenom | p01_minus_p21 | p11_minus_p21 |
+							//  p00_minus_p20 | p10_minus_p20
+	fxch	%st(4)			// p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
+							//  p00_minus_p20 | d_xdenom
+	fstps	p10_minus_p20	// p01_minus_p21 | p11_minus_p21 |
+							//  p00_minus_p20 | d_xdenom
+	fstps	p01_minus_p21	// p11_minus_p21 | p00_minus_p20 | xstepdenominv
+	fxch	%st(2)			// xstepdenominv | p00_minus_p20 | p11_minus_p21
+
+//// ceil () for light so positive steps are exaggerated, negative steps
+//// diminished,  pushing us away from underflow toward overflow. Underflow is
+//// very visible, overflow is very unlikely, because of ambient lighting
+//	t0 = r_p0[4] - r_p2[4];
+//	t1 = r_p1[4] - r_p2[4];
+
+	fildl	C(r_p2)+16		// r_p2[4] | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fildl	C(r_p0)+16		// r_p0[4] | r_p2[4] | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fildl	C(r_p1)+16		// r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fxch	%st(2)			// r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fld		%st(0)			// r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fsubrp	%st(0),%st(2)	// r_p2[4] | t0 | r_p1[4] | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fsubrp	%st(0),%st(2)	// t0 | t1 | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+
+//	r_lstepx = (int)
+//			ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
+//	r_lstepy = (int)
+//			ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
+
+	fld		%st(0)			// t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fmul	%st(5),%st(0)	// t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
+							//  t0*p11_minus_p21 | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
+							//  t0*p11_minus_p21 | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fmul	%st(5),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
+							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
+							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
+							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
+							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fld		%st(2)			// xstepdenominv |
+							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
+							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fmuls	float_minus_1	// ystepdenominv |
+							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
+							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fxch	%st(2)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
+							//   xstepdenominv |
+							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
+							//   | ystepdenominv | xstepdenominv |
+							//   p00_minus_p20 | p11_minus_p21
+	fxch	%st(1)			// t1*p00_minus_p20 - t0*p10_minus_p20 |
+							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
+							//   xstepdenominv | ystepdenominv |
+							//   xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
+							//  ystepdenominv |
+							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
+							//  xstepdenominv | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fldcw	ceil_cw
+	fistpl	C(r_lstepy)		// r_lstepx | ystepdenominv | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fistpl	C(r_lstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fldcw	single_cw
+
+//	t0 = r_p0[2] - r_p2[2];
+//	t1 = r_p1[2] - r_p2[2];
+
+	fildl	C(r_p2)+8		// r_p2[2] | ystepdenominv | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fildl	C(r_p0)+8		// r_p0[2] | r_p2[2] | ystepdenominv |
+							//   xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fildl	C(r_p1)+8		// r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fxch	%st(2)			// r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fld		%st(0)			// r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fsubrp	%st(0),%st(2)	// r_p2[2] | t0 | r_p1[2] | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+
+//	r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
+//			xstepdenominv);
+//	r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
+//			ystepdenominv);
+
+	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv
+	fmul	%st(6),%st(0)	// t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
+							//  t0*p11_minus_p21 | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
+							//  t0*p11_minus_p21 | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fmul	%st(6),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
+							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
+							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
+							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
+							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
+							//   ystepdenominv |
+							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fxch	%st(1)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
+							//   ystepdenominv | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
+							//  xstepdenominv |
+							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
+							//  ystepdenominv | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fxch	%st(1)			// (t1*p00_minus_p20 - t0*p10_minus_p20)*
+							//  ystepdenominv |
+							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
+							//  xstepdenominv | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fistpl	C(r_sstepy)		// r_sstepx | ystepdenominv | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fistpl	C(r_sstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+
+//	t0 = r_p0[3] - r_p2[3];
+//	t1 = r_p1[3] - r_p2[3];
+
+	fildl	C(r_p2)+12		// r_p2[3] | ystepdenominv | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fildl	C(r_p0)+12		// r_p0[3] | r_p2[3] | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fildl	C(r_p1)+12		// r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fxch	%st(2)			// r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fld		%st(0)			// r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fsubrp	%st(0),%st(2)	// r_p2[3] | t0 | r_p1[3] | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+
+//	r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
+//			xstepdenominv);
+//	r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
+//			ystepdenominv);
+
+	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fmul	%st(6),%st(0)	// t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
+							//  t0*p11_minus_p21 | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
+							//  t0*p11_minus_p21 | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fmul	%st(6),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
+							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
+							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
+							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
+							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
+							//   ystepdenominv |
+							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fxch	%st(1)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
+							//  ystepdenominv | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
+							//  xstepdenominv |
+							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
+							//  ystepdenominv | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fxch	%st(1)			// (t1*p00_minus_p20 - t0*p10_minus_p20)*
+							//  ystepdenominv |
+							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
+							//  xstepdenominv | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fistpl	C(r_tstepy)		// r_tstepx | ystepdenominv | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fistpl	C(r_tstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+
+//	t0 = r_p0[5] - r_p2[5];
+//	t1 = r_p1[5] - r_p2[5];
+
+	fildl	C(r_p2)+20		// r_p2[5] | ystepdenominv | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fildl	C(r_p0)+20		// r_p0[5] | r_p2[5] | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fildl	C(r_p1)+20		// r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fxch	%st(2)			// r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fld		%st(0)			// r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  p11_minus_p21
+	fsubrp	%st(0),%st(2)	// r_p2[5] | t0 | r_p1[5] | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
+	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+
+//	r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
+//			xstepdenominv);
+//	r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
+//			ystepdenominv);
+
+	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv |
+							//  p00_minus_p20 | p11_minus_p21
+	fmulp	%st(0),%st(6)	// t0 | t1 | ystepdenominv | xstepdenominv |
+							//  p00_minus_p20 | t0*p11_minus_p21
+	fxch	%st(1)			// t1 | t0 | ystepdenominv | xstepdenominv |
+							//  p00_minus_p20 | t0*p11_minus_p21
+	fld		%st(0)			// t1 | t1 | t0 | ystepdenominv | xstepdenominv |
+							//  p00_minus_p20 | t0*p11_minus_p21
+	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 |
+							//  t0*p11_minus_p21
+	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
+							//  xstepdenominv | p00_minus_p20 |
+							//  t0*p11_minus_p21
+	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  t0*p11_minus_p21
+	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
+							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
+							//  t0*p11_minus_p21
+	fmulp	%st(0),%st(5)	// t0*p10_minus_p20 | t1*p01_minus_p21 |
+							//  ystepdenominv | xstepdenominv |
+							//  t1*p00_minus_p20 | t0*p11_minus_p21
+	fxch	%st(5)			// t0*p11_minus_p21 | t1*p01_minus_p21 |
+							//  ystepdenominv | xstepdenominv |
+							//  t1*p00_minus_p20 | t0*p10_minus_p20
+	fsubrp	%st(0),%st(1)	// t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  ystepdenominv | xstepdenominv |
+							//  t1*p00_minus_p20 | t0*p10_minus_p20
+	fxch	%st(3)			// t1*p00_minus_p20 | ystepdenominv |
+							//  xstepdenominv |
+							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  t0*p10_minus_p20
+	fsubp	%st(0),%st(4)	// ystepdenominv | xstepdenominv |
+							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  t1*p00_minus_p20 - t0*p10_minus_p20
+	fxch	%st(1)			// xstepdenominv | ystepdenominv |
+							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
+							//  t1*p00_minus_p20 - t0*p10_minus_p20
+	fmulp	%st(0),%st(2)	// ystepdenominv |
+							//  (t1*p01_minus_p21 - t0*p11_minus_p21) *
+							//  xstepdenominv |
+							//  t1*p00_minus_p20 - t0*p10_minus_p20
+	fmulp	%st(0),%st(2)	// (t1*p01_minus_p21 - t0*p11_minus_p21) *
+							//  xstepdenominv |
+							//  (t1*p00_minus_p20 - t0*p10_minus_p20) *
+							//  ystepdenominv
+	fistpl	C(r_zistepx)	// (t1*p00_minus_p20 - t0*p10_minus_p20) *
+							//  ystepdenominv
+	fistpl	C(r_zistepy)
+
+//	a_sstepxfrac = r_sstepx << 16;
+//	a_tstepxfrac = r_tstepx << 16;
+//
+//	a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
+//			(r_sstepx >> 16);
+
+	movl	C(r_sstepx),%eax
+	movl	C(r_tstepx),%edx
+	shll	$16,%eax
+	shll	$16,%edx
+	movl	%eax,C(a_sstepxfrac)
+	movl	%edx,C(a_tstepxfrac)
+
+	movl	C(r_sstepx),%ecx
+	movl	C(r_tstepx),%eax
+	sarl	$16,%ecx
+	sarl	$16,%eax
+	imull	skinwidth(%esp)
+	addl	%ecx,%eax
+	movl	%eax,C(a_ststepxwhole)
+
+	ret
+
+
+//----------------------------------------------------------------------
+// recursive subdivision affine triangle drawing code
+//
+// not C-callable because of stdcall return
+//----------------------------------------------------------------------
+
+#define lp1	4+16
+#define lp2	8+16
+#define lp3	12+16
+
+.globl C(D_PolysetRecursiveTriangle)
+C(D_PolysetRecursiveTriangle):
+	pushl	%ebp				// preserve caller stack frame pointer
+	pushl	%esi				// preserve register variables
+	pushl	%edi
+	pushl	%ebx
+
+//	int		*temp;
+//	int		d;
+//	int		new[6];
+//	int		i;
+//	int		z;
+//	short	*zbuf;
+	movl	lp2(%esp),%esi
+	movl	lp1(%esp),%ebx
+	movl	lp3(%esp),%edi
+
+//	d = lp2[0] - lp1[0];
+//	if (d < -1 || d > 1)
+//		goto split;
+	movl	0(%esi),%eax
+
+	movl	0(%ebx),%edx
+	movl	4(%esi),%ebp
+
+	subl	%edx,%eax
+	movl	4(%ebx),%ecx
+
+	subl	%ecx,%ebp
+	incl	%eax
+
+	cmpl	$2,%eax
+	ja		LSplit
+
+//	d = lp2[1] - lp1[1];
+//	if (d < -1 || d > 1)
+//		goto split;
+	movl	0(%edi),%eax
+	incl	%ebp
+
+	cmpl	$2,%ebp
+	ja		LSplit
+
+//	d = lp3[0] - lp2[0];
+//	if (d < -1 || d > 1)
+//		goto split2;
+	movl	0(%esi),%edx
+	movl	4(%edi),%ebp
+
+	subl	%edx,%eax
+	movl	4(%esi),%ecx
+
+	subl	%ecx,%ebp
+	incl	%eax
+
+	cmpl	$2,%eax
+	ja		LSplit2
+
+//	d = lp3[1] - lp2[1];
+//	if (d < -1 || d > 1)
+//		goto split2;
+	movl	0(%ebx),%eax
+	incl	%ebp
+
+	cmpl	$2,%ebp
+	ja		LSplit2
+
+//	d = lp1[0] - lp3[0];
+//	if (d < -1 || d > 1)
+//		goto split3;
+	movl	0(%edi),%edx
+	movl	4(%ebx),%ebp
+
+	subl	%edx,%eax
+	movl	4(%edi),%ecx
+
+	subl	%ecx,%ebp
+	incl	%eax
+
+	incl	%ebp
+	movl	%ebx,%edx
+
+	cmpl	$2,%eax
+	ja		LSplit3
+
+//	d = lp1[1] - lp3[1];
+//	if (d < -1 || d > 1)
+//	{
+//split3:
+//		temp = lp1;
+//		lp3 = lp2;
+//		lp1 = lp3;
+//		lp2 = temp;
+//		goto split;
+//	}
+//
+//	return;			// entire tri is filled
+//
+	cmpl	$2,%ebp
+	jna		LDone
+
+LSplit3:
+	movl	%edi,%ebx
+	movl	%esi,%edi
+	movl	%edx,%esi
+	jmp		LSplit
+
+//split2:
+LSplit2:
+
+//	temp = lp1;
+//	lp1 = lp2;
+//	lp2 = lp3;
+//	lp3 = temp;
+	movl	%ebx,%eax
+	movl	%esi,%ebx
+	movl	%edi,%esi
+	movl	%eax,%edi
+
+//split:
+LSplit:
+
+	subl	$24,%esp		// allocate space for a new vertex
+
+//// split this edge
+//	new[0] = (lp1[0] + lp2[0]) >> 1;
+//	new[1] = (lp1[1] + lp2[1]) >> 1;
+//	new[2] = (lp1[2] + lp2[2]) >> 1;
+//	new[3] = (lp1[3] + lp2[3]) >> 1;
+//	new[5] = (lp1[5] + lp2[5]) >> 1;
+	movl	8(%ebx),%eax
+
+	movl	8(%esi),%edx
+	movl	12(%ebx),%ecx
+
+	addl	%edx,%eax
+	movl	12(%esi),%edx
+
+	sarl	$1,%eax
+	addl	%edx,%ecx
+
+	movl	%eax,8(%esp)
+	movl	20(%ebx),%eax
+
+	sarl	$1,%ecx
+	movl	20(%esi),%edx
+
+	movl	%ecx,12(%esp)
+	addl	%edx,%eax
+
+	movl	0(%ebx),%ecx
+	movl	0(%esi),%edx
+
+	sarl	$1,%eax
+	addl	%ecx,%edx
+
+	movl	%eax,20(%esp)
+	movl	4(%ebx),%eax
+
+	sarl	$1,%edx
+	movl	4(%esi),%ebp
+
+	movl	%edx,0(%esp)
+	addl	%eax,%ebp
+
+	sarl	$1,%ebp
+	movl	%ebp,4(%esp)
+
+//// draw the point if splitting a leading edge
+//	if (lp2[1] > lp1[1])
+//		goto nodraw;
+	cmpl	%eax,4(%esi)
+	jg		LNoDraw
+
+//	if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
+//		goto nodraw;
+	movl	0(%esi),%edx
+	jnz		LDraw
+
+	cmpl	%ecx,%edx
+	jl		LNoDraw
+
+LDraw:
+
+// z = new[5] >> 16;
+	movl	20(%esp),%edx
+	movl	4(%esp),%ecx
+
+	sarl	$16,%edx
+	movl	0(%esp),%ebp
+
+//	zbuf = zspantable[new[1]] + new[0];
+	movl	C(zspantable)(,%ecx,4),%eax
+
+//	if (z >= *zbuf)
+//	{
+	cmpw	(%eax,%ebp,2),%dx
+	jnge	LNoDraw
+
+//		int		pix;
+//		
+//		*zbuf = z;
+	movw	%dx,(%eax,%ebp,2)
+
+//		pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]];
+	movl	12(%esp),%eax
+
+	sarl	$16,%eax
+	movl	8(%esp),%edx
+
+	sarl	$16,%edx
+	subl	%ecx,%ecx
+
+	movl	C(skintable)(,%eax,4),%eax
+	movl	4(%esp),%ebp
+
+	movb	(%eax,%edx,),%cl
+	movl	C(d_pcolormap),%edx
+
+	movb	(%edx,%ecx,),%dl
+	movl	0(%esp),%ecx
+
+//		d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
+	movl	C(d_scantable)(,%ebp,4),%eax
+	addl	%eax,%ecx
+	movl	C(d_viewbuffer),%eax
+	movb	%dl,(%eax,%ecx,1)
+
+//	}
+//
+//nodraw:
+LNoDraw:
+
+//// recursively continue
+//	D_PolysetRecursiveTriangle (lp3, lp1, new);
+	pushl	%esp
+	pushl	%ebx
+	pushl	%edi
+	call	C(D_PolysetRecursiveTriangle)
+
+//	D_PolysetRecursiveTriangle (lp3, new, lp2);
+	movl	%esp,%ebx
+	pushl	%esi
+	pushl	%ebx
+	pushl	%edi
+	call	C(D_PolysetRecursiveTriangle)
+	addl	$24,%esp
+
+LDone:
+	popl	%ebx				// restore register variables
+	popl	%edi
+	popl	%esi
+	popl	%ebp				// restore caller stack frame pointer
+	ret		$12
+
+
+//----------------------------------------------------------------------
+// 8-bpp horizontal span drawing code for affine polygons, with smooth
+// shading and no transparency
+//----------------------------------------------------------------------
+
+#define pspans	4+8
+
+.globl C(D_PolysetAff8Start)
+C(D_PolysetAff8Start):
+
+.globl C(D_PolysetDrawSpans8)
+C(D_PolysetDrawSpans8):
+	pushl	%esi				// preserve register variables
+	pushl	%ebx
+
+	movl	pspans(%esp),%esi	// point to the first span descriptor
+	movl	C(r_zistepx),%ecx
+
+	pushl	%ebp				// preserve caller's stack frame
+	pushl	%edi
+
+	rorl	$16,%ecx			// put high 16 bits of 1/z step in low word
+	movl	spanpackage_t_count(%esi),%edx
+
+	movl	%ecx,lzistepx
+
+LSpanLoop:
+
+//		lcount = d_aspancount - pspanpackage->count;
+//
+//		errorterm += erroradjustup;
+//		if (errorterm >= 0)
+//		{
+//			d_aspancount += d_countextrastep;
+//			errorterm -= erroradjustdown;
+//		}
+//		else
+//		{
+//			d_aspancount += ubasestep;
+//		}
+	movl	C(d_aspancount),%eax
+	subl	%edx,%eax
+
+	movl	C(erroradjustup),%edx
+	movl	C(errorterm),%ebx
+	addl	%edx,%ebx
+	js		LNoTurnover
+
+	movl	C(erroradjustdown),%edx
+	movl	C(d_countextrastep),%edi
+	subl	%edx,%ebx
+	movl	C(d_aspancount),%ebp
+	movl	%ebx,C(errorterm)
+	addl	%edi,%ebp
+	movl	%ebp,C(d_aspancount)
+	jmp		LRightEdgeStepped
+
+LNoTurnover:
+	movl	C(d_aspancount),%edi
+	movl	C(ubasestep),%edx
+	movl	%ebx,C(errorterm)
+	addl	%edx,%edi
+	movl	%edi,C(d_aspancount)
+
+LRightEdgeStepped:
+	cmpl	$1,%eax
+
+	jl		LNextSpan
+	jz		LExactlyOneLong
+
+//
+// set up advancetable
+//
+	movl	C(a_ststepxwhole),%ecx
+	movl	C(r_affinetridesc)+atd_skinwidth,%edx
+
+	movl	%ecx,advancetable+4	// advance base in t
+	addl	%edx,%ecx
+
+	movl	%ecx,advancetable	// advance extra in t
+	movl	C(a_tstepxfrac),%ecx
+
+	movw	C(r_lstepx),%cx
+	movl	%eax,%edx			// count
+
+	movl	%ecx,tstep
+	addl	$7,%edx
+
+	shrl	$3,%edx				// count of full and partial loops
+	movl	spanpackage_t_sfrac(%esi),%ebx
+
+	movw	%dx,%bx
+	movl	spanpackage_t_pz(%esi),%ecx
+
+	negl	%eax
+
+	movl	spanpackage_t_pdest(%esi),%edi
+	andl	$7,%eax		// 0->0, 1->7, 2->6, ... , 7->1
+
+	subl	%eax,%edi	// compensate for hardwired offsets
+	subl	%eax,%ecx
+
+	subl	%eax,%ecx
+	movl	spanpackage_t_tfrac(%esi),%edx
+
+	movw	spanpackage_t_light(%esi),%dx
+	movl	spanpackage_t_zi(%esi),%ebp
+
+	rorl	$16,%ebp	// put high 16 bits of 1/z in low word
+	pushl	%esi
+
+	movl	spanpackage_t_ptex(%esi),%esi
+	jmp		aff8entryvec_table(,%eax,4)
+
+// %bx = count of full and partial loops
+// %ebx high word = sfrac
+// %ecx = pz
+// %dx = light
+// %edx high word = tfrac
+// %esi = ptex
+// %edi = pdest
+// %ebp = 1/z
+// tstep low word = C(r_lstepx)
+// tstep high word = C(a_tstepxfrac)
+// C(a_sstepxfrac) low word = 0
+// C(a_sstepxfrac) high word = C(a_sstepxfrac)
+
+LDrawLoop:
+
+// FIXME: do we need to clamp light? We may need at least a buffer bit to
+// keep it from poking into tfrac and causing problems
+
+LDraw8:
+	cmpw	(%ecx),%bp
+	jl		Lp1
+	xorl	%eax,%eax
+	movb	%dh,%ah
+	movb	(%esi),%al
+	movw	%bp,(%ecx)
+	movb	0x12345678(%eax),%al
+LPatch8:
+	movb	%al,(%edi)
+Lp1:
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	lzistepx,%ebp
+	adcl	$0,%ebp
+	addl	C(a_sstepxfrac),%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+LDraw7:
+	cmpw	2(%ecx),%bp
+	jl		Lp2
+	xorl	%eax,%eax
+	movb	%dh,%ah
+	movb	(%esi),%al
+	movw	%bp,2(%ecx)
+	movb	0x12345678(%eax),%al
+LPatch7:
+	movb	%al,1(%edi)
+Lp2:
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	lzistepx,%ebp
+	adcl	$0,%ebp
+	addl	C(a_sstepxfrac),%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+LDraw6:
+	cmpw	4(%ecx),%bp
+	jl		Lp3
+	xorl	%eax,%eax
+	movb	%dh,%ah
+	movb	(%esi),%al
+	movw	%bp,4(%ecx)
+	movb	0x12345678(%eax),%al
+LPatch6:
+	movb	%al,2(%edi)
+Lp3:
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	lzistepx,%ebp
+	adcl	$0,%ebp
+	addl	C(a_sstepxfrac),%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+LDraw5:
+	cmpw	6(%ecx),%bp
+	jl		Lp4
+	xorl	%eax,%eax
+	movb	%dh,%ah
+	movb	(%esi),%al
+	movw	%bp,6(%ecx)
+	movb	0x12345678(%eax),%al
+LPatch5:
+	movb	%al,3(%edi)
+Lp4:
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	lzistepx,%ebp
+	adcl	$0,%ebp
+	addl	C(a_sstepxfrac),%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+LDraw4:
+	cmpw	8(%ecx),%bp
+	jl		Lp5
+	xorl	%eax,%eax
+	movb	%dh,%ah
+	movb	(%esi),%al
+	movw	%bp,8(%ecx)
+	movb	0x12345678(%eax),%al
+LPatch4:
+	movb	%al,4(%edi)
+Lp5:
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	lzistepx,%ebp
+	adcl	$0,%ebp
+	addl	C(a_sstepxfrac),%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+LDraw3:
+	cmpw	10(%ecx),%bp
+	jl		Lp6
+	xorl	%eax,%eax
+	movb	%dh,%ah
+	movb	(%esi),%al
+	movw	%bp,10(%ecx)
+	movb	0x12345678(%eax),%al
+LPatch3:
+	movb	%al,5(%edi)
+Lp6:
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	lzistepx,%ebp
+	adcl	$0,%ebp
+	addl	C(a_sstepxfrac),%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+LDraw2:
+	cmpw	12(%ecx),%bp
+	jl		Lp7
+	xorl	%eax,%eax
+	movb	%dh,%ah
+	movb	(%esi),%al
+	movw	%bp,12(%ecx)
+	movb	0x12345678(%eax),%al
+LPatch2:
+	movb	%al,6(%edi)
+Lp7:
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	lzistepx,%ebp
+	adcl	$0,%ebp
+	addl	C(a_sstepxfrac),%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+LDraw1:
+	cmpw	14(%ecx),%bp
+	jl		Lp8
+	xorl	%eax,%eax
+	movb	%dh,%ah
+	movb	(%esi),%al
+	movw	%bp,14(%ecx)
+	movb	0x12345678(%eax),%al
+LPatch1:
+	movb	%al,7(%edi)
+Lp8:
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	lzistepx,%ebp
+	adcl	$0,%ebp
+	addl	C(a_sstepxfrac),%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+	addl	$8,%edi
+	addl	$16,%ecx
+
+	decw	%bx
+	jnz		LDrawLoop
+
+	popl	%esi				// restore spans pointer
+LNextSpan:
+	addl	$(spanpackage_t_size),%esi	// point to next span
+LNextSpanESISet:
+	movl	spanpackage_t_count(%esi),%edx
+	cmpl	$-999999,%edx		// any more spans?
+	jnz		LSpanLoop			// yes
+
+	popl	%edi
+	popl	%ebp				// restore the caller's stack frame
+	popl	%ebx				// restore register variables
+	popl	%esi
+	ret
+
+
+// draw a one-long span
+
+LExactlyOneLong:
+
+	movl	spanpackage_t_pz(%esi),%ecx
+	movl	spanpackage_t_zi(%esi),%ebp
+
+	rorl	$16,%ebp	// put high 16 bits of 1/z in low word
+	movl	spanpackage_t_ptex(%esi),%ebx
+
+	cmpw	(%ecx),%bp
+	jl		LNextSpan
+	xorl	%eax,%eax
+	movl	spanpackage_t_pdest(%esi),%edi
+	movb	spanpackage_t_light+1(%esi),%ah
+	addl	$(spanpackage_t_size),%esi	// point to next span
+	movb	(%ebx),%al
+	movw	%bp,(%ecx)
+	movb	0x12345678(%eax),%al
+LPatch9:
+	movb	%al,(%edi)
+
+	jmp		LNextSpanESISet
+
+.globl C(D_PolysetAff8End)
+C(D_PolysetAff8End):
+
+
+#define pcolormap		4
+
+.globl C(D_Aff8Patch)
+C(D_Aff8Patch):
+	movl	pcolormap(%esp),%eax
+	movl	%eax,LPatch1-4
+	movl	%eax,LPatch2-4
+	movl	%eax,LPatch3-4
+	movl	%eax,LPatch4-4
+	movl	%eax,LPatch5-4
+	movl	%eax,LPatch6-4
+	movl	%eax,LPatch7-4
+	movl	%eax,LPatch8-4
+	movl	%eax,LPatch9-4
+
+	ret
+
+
+//----------------------------------------------------------------------
+// Alias model polygon dispatching code, combined with subdivided affine
+// triangle drawing code
+//----------------------------------------------------------------------
+
+.globl C(D_PolysetDraw)
+C(D_PolysetDraw):
+
+//	spanpackage_t	spans[DPS_MAXSPANS + 1 +
+//			((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1];
+//						// one extra because of cache line pretouching
+//
+//	a_spans = (spanpackage_t *)
+//			(((intptr)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1));
+	subl	$(SPAN_SIZE),%esp
+	movl	%esp,%eax
+	addl	$(CACHE_SIZE - 1),%eax
+	andl	$(~(CACHE_SIZE - 1)),%eax
+	movl	%eax,C(a_spans)
+
+//	if (r_affinetridesc.drawtype)
+//		D_DrawSubdiv ();
+//	else
+//		D_DrawNonSubdiv ();
+	movl	C(r_affinetridesc)+atd_drawtype,%eax
+	testl	%eax,%eax
+	jz		C(D_DrawNonSubdiv)
+
+	pushl	%ebp				// preserve caller stack frame pointer
+
+//	lnumtriangles = r_affinetridesc.numtriangles;
+	movl	C(r_affinetridesc)+atd_numtriangles,%ebp
+
+	pushl	%esi				// preserve register variables
+	shll	$4,%ebp
+
+	pushl	%ebx
+//	ptri = r_affinetridesc.ptriangles;
+	movl	C(r_affinetridesc)+atd_ptriangles,%ebx
+
+	pushl	%edi
+
+//	mtriangle_t		*ptri;
+//	finalvert_t		*pfv, *index0, *index1, *index2;
+//	int				i;
+//	int				lnumtriangles;
+//	int				s0, s1, s2;
+
+//	pfv = r_affinetridesc.pfinalverts;
+	movl	C(r_affinetridesc)+atd_pfinalverts,%edi
+
+//	for (i=0 ; i<lnumtriangles ; i++)
+//	{
+
+Llooptop:
+
+//		index0 = pfv + ptri[i].vertindex[0];
+//		index1 = pfv + ptri[i].vertindex[1];
+//		index2 = pfv + ptri[i].vertindex[2];
+	movl	mtri_vertindex-16+0(%ebx,%ebp,),%ecx
+	movl	mtri_vertindex-16+4(%ebx,%ebp,),%esi
+
+	shll	$(fv_shift),%ecx
+	movl	mtri_vertindex-16+8(%ebx,%ebp,),%edx
+
+	shll	$(fv_shift),%esi
+	addl	%edi,%ecx
+
+	shll	$(fv_shift),%edx
+	addl	%edi,%esi
+
+	addl	%edi,%edx
+
+//		if (((index0->v[1]-index1->v[1]) *
+//				(index0->v[0]-index2->v[0]) -
+//				(index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0)
+//		{
+//			continue;
+//		}
+//
+//		d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00];
+	fildl	fv_v+4(%ecx)	// i0v1
+	fildl	fv_v+4(%esi)	// i1v1 | i0v1
+	fildl	fv_v+0(%ecx)	// i0v0 | i1v1 | i0v1
+	fildl	fv_v+0(%edx)	// i2v0 | i0v0 | i1v1 | i0v1
+	fxch	%st(2)			// i1v1 | i0v0 | i2v0 | i0v1
+	fsubr	%st(3),%st(0)	// i0v1-i1v1 | i0v0 | i2v0 | i0v1
+	fildl	fv_v+0(%esi)	// i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1
+	fxch	%st(2)			// i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1
+	fsub	%st(0),%st(3)	// i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1
+	fildl	fv_v+4(%edx)	// i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
+	fxch	%st(1)			// i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
+	fsubp	%st(0),%st(3)	// i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
+	fxch	%st(1)			// i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
+	fmulp	%st(0),%st(3)	// i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1
+	fsubrp	%st(0),%st(3)	// i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1
+	movl	fv_v+16(%ecx),%eax
+	andl	$0xFF00,%eax
+	fmulp	%st(0),%st(2)	// i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1
+	addl	C(acolormap),%eax
+	fsubp	%st(0),%st(1)	// (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1)
+	movl	%eax,C(d_pcolormap)
+	fstps	Ltemp
+	movl	Ltemp,%eax
+	subl	$0x80000001,%eax
+	jc		Lskip
+
+//		if (ptri[i].facesfront)
+//		{
+//			D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
+	movl	mtri_facesfront-16(%ebx,%ebp,),%eax
+	testl	%eax,%eax
+	jz		Lfacesback
+
+	pushl	%edx
+	pushl	%esi
+	pushl	%ecx
+	call	C(D_PolysetRecursiveTriangle)
+
+	subl	$16,%ebp
+	jnz		Llooptop
+	jmp		Ldone2
+
+//		}
+//		else
+//		{
+Lfacesback:
+
+//			s0 = index0->v[2];
+//			s1 = index1->v[2];
+//			s2 = index2->v[2];
+	movl	fv_v+8(%ecx),%eax
+	pushl	%eax
+	movl	fv_v+8(%esi),%eax
+	pushl	%eax
+	movl	fv_v+8(%edx),%eax
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
+
+//			if (index0->flags & ALIAS_ONSEAM)
+//				index0->v[2] += r_affinetridesc.seamfixupX16;
+	movl	C(r_affinetridesc)+atd_seamfixupX16,%eax
+	testl	$(ALIAS_ONSEAM),fv_flags(%ecx)
+	jz		Lp11
+	addl	%eax,fv_v+8(%ecx)
+Lp11:
+
+//			if (index1->flags & ALIAS_ONSEAM)
+//				index1->v[2] += r_affinetridesc.seamfixupX16;
+	testl	$(ALIAS_ONSEAM),fv_flags(%esi)
+	jz		Lp12
+	addl	%eax,fv_v+8(%esi)
+Lp12:
+
+//			if (index2->flags & ALIAS_ONSEAM)
+//				index2->v[2] += r_affinetridesc.seamfixupX16;
+	testl	$(ALIAS_ONSEAM),fv_flags(%edx)
+	jz		Lp13
+	addl	%eax,fv_v+8(%edx)
+Lp13:
+
+//			D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
+	pushl	%edx
+	pushl	%esi
+	pushl	%ecx
+	call	C(D_PolysetRecursiveTriangle)
+
+//			index0->v[2] = s0;
+//			index1->v[2] = s1;
+//			index2->v[2] = s2;
+	popl	%edx
+	popl	%ecx
+	popl	%eax
+	movl	%eax,fv_v+8(%edx)
+	popl	%eax
+	movl	%eax,fv_v+8(%esi)
+	popl	%eax
+	movl	%eax,fv_v+8(%ecx)
+
+//		}
+//	}
+Lskip:
+	subl	$16,%ebp
+	jnz		Llooptop
+
+Ldone2:
+	popl	%edi				// restore the caller's stack frame
+	popl	%ebx
+	popl	%esi				// restore register variables
+	popl	%ebp
+
+	addl	$(SPAN_SIZE),%esp
+
+	ret
+
+
+//----------------------------------------------------------------------
+// Alias model triangle left-edge scanning code
+//----------------------------------------------------------------------
+
+#define height	4+16
+
+.globl C(D_PolysetScanLeftEdge)
+C(D_PolysetScanLeftEdge):
+	pushl	%ebp				// preserve caller stack frame pointer
+	pushl	%esi				// preserve register variables
+	pushl	%edi
+	pushl	%ebx
+
+	movl	height(%esp),%eax
+	movl	C(d_sfrac),%ecx
+	andl	$0xFFFF,%eax
+	movl	C(d_ptex),%ebx
+	orl		%eax,%ecx
+	movl	C(d_pedgespanpackage),%esi
+	movl	C(d_tfrac),%edx
+	movl	C(d_light),%edi
+	movl	C(d_zi),%ebp
+
+// %eax: scratch
+// %ebx: d_ptex
+// %ecx: d_sfrac in high word, count in low word
+// %edx: d_tfrac
+// %esi: d_pedgespanpackage, errorterm, scratch alternately
+// %edi: d_light
+// %ebp: d_zi
+
+//	do
+//	{
+
+LScanLoop:
+
+//		d_pedgespanpackage->ptex = ptex;
+//		d_pedgespanpackage->pdest = d_pdest;
+//		d_pedgespanpackage->pz = d_pz;
+//		d_pedgespanpackage->count = d_aspancount;
+//		d_pedgespanpackage->light = d_light;
+//		d_pedgespanpackage->zi = d_zi;
+//		d_pedgespanpackage->sfrac = d_sfrac << 16;
+//		d_pedgespanpackage->tfrac = d_tfrac << 16;
+	movl	%ebx,spanpackage_t_ptex(%esi)
+	movl	C(d_pdest),%eax
+	movl	%eax,spanpackage_t_pdest(%esi)
+	movl	C(d_pz),%eax
+	movl	%eax,spanpackage_t_pz(%esi)
+	movl	C(d_aspancount),%eax
+	movl	%eax,spanpackage_t_count(%esi)
+	movl	%edi,spanpackage_t_light(%esi)
+	movl	%ebp,spanpackage_t_zi(%esi)
+	movl	%ecx,spanpackage_t_sfrac(%esi)
+	movl	%edx,spanpackage_t_tfrac(%esi)
+
+// pretouch the next cache line
+	movb	spanpackage_t_size(%esi),%al
+
+//		d_pedgespanpackage++;
+	addl	$(spanpackage_t_size),%esi
+	movl	C(erroradjustup),%eax
+	movl	%esi,C(d_pedgespanpackage)
+
+//		errorterm += erroradjustup;
+	movl	C(errorterm),%esi
+	addl	%eax,%esi
+	movl	C(d_pdest),%eax
+
+//		if (errorterm >= 0)
+//		{
+	js		LNoLeftEdgeTurnover
+
+//			errorterm -= erroradjustdown;
+//			d_pdest += d_pdestextrastep;
+	subl	C(erroradjustdown),%esi
+	addl	C(d_pdestextrastep),%eax
+	movl	%esi,C(errorterm)
+	movl	%eax,C(d_pdest)
+
+//			d_pz += d_pzextrastep;
+//			d_aspancount += d_countextrastep;
+//			d_ptex += d_ptexextrastep;
+//			d_sfrac += d_sfracextrastep;
+//			d_ptex += d_sfrac >> 16;
+//			d_sfrac &= 0xFFFF;
+//			d_tfrac += d_tfracextrastep;
+	movl	C(d_pz),%eax
+	movl	C(d_aspancount),%esi
+	addl	C(d_pzextrastep),%eax
+	addl	C(d_sfracextrastep),%ecx
+	adcl	C(d_ptexextrastep),%ebx
+	addl	C(d_countextrastep),%esi
+	movl	%eax,C(d_pz)
+	movl	C(d_tfracextrastep),%eax
+	movl	%esi,C(d_aspancount)
+	addl	%eax,%edx
+
+//			if (d_tfrac & 0x10000)
+//			{
+	jnc		LSkip1
+
+//				d_ptex += r_affinetridesc.skinwidth;
+//				d_tfrac &= 0xFFFF;
+	addl	C(r_affinetridesc)+atd_skinwidth,%ebx
+
+//			}
+
+LSkip1:
+
+//			d_light += d_lightextrastep;
+//			d_zi += d_ziextrastep;
+	addl	C(d_lightextrastep),%edi
+	addl	C(d_ziextrastep),%ebp
+
+//		}
+	movl	C(d_pedgespanpackage),%esi
+	decl	%ecx
+	testl	$0xFFFF,%ecx
+	jnz		LScanLoop
+
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	popl	%ebp
+	ret
+
+//		else
+//		{
+
+LNoLeftEdgeTurnover:
+	movl	%esi,C(errorterm)
+
+//			d_pdest += d_pdestbasestep;
+	addl	C(d_pdestbasestep),%eax
+	movl	%eax,C(d_pdest)
+
+//			d_pz += d_pzbasestep;
+//			d_aspancount += ubasestep;
+//			d_ptex += d_ptexbasestep;
+//			d_sfrac += d_sfracbasestep;
+//			d_ptex += d_sfrac >> 16;
+//			d_sfrac &= 0xFFFF;
+	movl	C(d_pz),%eax
+	movl	C(d_aspancount),%esi
+	addl	C(d_pzbasestep),%eax
+	addl	C(d_sfracbasestep),%ecx
+	adcl	C(d_ptexbasestep),%ebx
+	addl	C(ubasestep),%esi
+	movl	%eax,C(d_pz)
+	movl	%esi,C(d_aspancount)
+
+//			d_tfrac += d_tfracbasestep;
+	movl	C(d_tfracbasestep),%esi
+	addl	%esi,%edx
+
+//			if (d_tfrac & 0x10000)
+//			{
+	jnc		LSkip2
+
+//				d_ptex += r_affinetridesc.skinwidth;
+//				d_tfrac &= 0xFFFF;
+	addl	C(r_affinetridesc)+atd_skinwidth,%ebx
+
+//			}
+
+LSkip2:
+
+//			d_light += d_lightbasestep;
+//			d_zi += d_zibasestep;
+	addl	C(d_lightbasestep),%edi
+	addl	C(d_zibasestep),%ebp
+
+//		}
+//	} while (--height);
+	movl	C(d_pedgespanpackage),%esi
+	decl	%ecx
+	testl	$0xFFFF,%ecx
+	jnz		LScanLoop
+
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	popl	%ebp
+	ret
+
+
+//----------------------------------------------------------------------
+// Alias model vertex drawing code
+//----------------------------------------------------------------------
+
+#define fv			4+8
+#define	numverts	8+8
+
+.globl C(D_PolysetDrawFinalVerts)
+C(D_PolysetDrawFinalVerts):
+	pushl	%ebp				// preserve caller stack frame pointer
+	pushl	%ebx
+
+//	int		i, z;
+//	short	*zbuf;
+
+	movl	numverts(%esp),%ecx
+	movl	fv(%esp),%ebx
+
+	pushl	%esi				// preserve register variables
+	pushl	%edi
+
+LFVLoop:
+
+//	for (i=0 ; i<numverts ; i++, fv++)
+//	{
+//	// valid triangle coordinates for filling can include the bottom and
+//	// right clip edges, due to the fill rule; these shouldn't be drawn
+//		if ((fv->v[0] < r_refdef.vrectright) &&
+//			(fv->v[1] < r_refdef.vrectbottom))
+//		{
+	movl	fv_v+0(%ebx),%eax
+	movl	C(r_refdef)+rd_vrectright,%edx
+	cmpl	%edx,%eax
+	jge		LNextVert
+	movl	fv_v+4(%ebx),%esi
+	movl	C(r_refdef)+rd_vrectbottom,%edx
+	cmpl	%edx,%esi
+	jge		LNextVert
+
+//			zbuf = zspantable[fv->v[1]] + fv->v[0];
+	movl	C(zspantable)(,%esi,4),%edi
+
+//			z = fv->v[5]>>16;
+	movl	fv_v+20(%ebx),%edx
+	shrl	$16,%edx
+
+//			if (z >= *zbuf)
+//			{
+//				int		pix;
+	cmpw	(%edi,%eax,2),%dx
+	jl		LNextVert
+
+//				*zbuf = z;
+	movw	%dx,(%edi,%eax,2)
+
+//				pix = skintable[fv->v[3]>>16][fv->v[2]>>16];
+	movl	fv_v+12(%ebx),%edi
+	shrl	$16,%edi
+	movl	C(skintable)(,%edi,4),%edi
+	movl	fv_v+8(%ebx),%edx
+	shrl	$16,%edx
+	movb	(%edi,%edx),%dl
+
+//				pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)];
+	movl	fv_v+16(%ebx),%edi
+	andl	$0xFF00,%edi
+	andl	$0x00FF,%edx
+	addl	%edx,%edi
+	movl	C(acolormap),%edx
+	movb	(%edx,%edi,1),%dl
+
+//				d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix;
+	movl	C(d_scantable)(,%esi,4),%edi
+	movl	C(d_viewbuffer),%esi
+	addl	%eax,%edi
+	movb	%dl,(%esi,%edi)
+
+//			}
+//		}
+//	}
+LNextVert:
+	addl	$(fv_size),%ebx
+	decl	%ecx
+	jnz		LFVLoop
+
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+
+
+//----------------------------------------------------------------------
+// Alias model non-subdivided polygon dispatching code
+//
+// not C-callable because of stack buffer cleanup
+//----------------------------------------------------------------------
+
+.globl C(D_DrawNonSubdiv)
+C(D_DrawNonSubdiv):
+	pushl	%ebp				// preserve caller stack frame pointer
+	movl	C(r_affinetridesc)+atd_numtriangles,%ebp
+	pushl	%ebx
+	shll	$(mtri_shift),%ebp
+	pushl	%esi				// preserve register variables
+	movl	C(r_affinetridesc)+atd_ptriangles,%esi
+	pushl	%edi
+
+//	mtriangle_t		*ptri;
+//	finalvert_t		*pfv, *index0, *index1, *index2;
+//	int				i;
+//	int				lnumtriangles;
+
+//	pfv = r_affinetridesc.pfinalverts;
+//	ptri = r_affinetridesc.ptriangles;
+//	lnumtriangles = r_affinetridesc.numtriangles;
+
+LNDLoop:
+
+//	for (i=0 ; i<lnumtriangles ; i++, ptri++)
+//	{
+//		index0 = pfv + ptri->vertindex[0];
+//		index1 = pfv + ptri->vertindex[1];
+//		index2 = pfv + ptri->vertindex[2];
+	movl	C(r_affinetridesc)+atd_pfinalverts,%edi
+	movl	mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx
+	shll	$(fv_shift),%ecx
+	movl	mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx
+	shll	$(fv_shift),%edx
+	movl	mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx
+	shll	$(fv_shift),%ebx
+	addl	%edi,%ecx
+	addl	%edi,%edx
+	addl	%edi,%ebx
+
+//		d_xdenom = (index0->v[1]-index1->v[1]) *
+//				(index0->v[0]-index2->v[0]) -
+//				(index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]);
+	movl	fv_v+4(%ecx),%eax
+	movl	fv_v+0(%ecx),%esi
+	subl	fv_v+4(%edx),%eax
+	subl	fv_v+0(%ebx),%esi
+	imull	%esi,%eax
+	movl	fv_v+0(%ecx),%esi
+	movl	fv_v+4(%ecx),%edi
+	subl	fv_v+0(%edx),%esi
+	subl	fv_v+4(%ebx),%edi
+	imull	%esi,%edi
+	subl	%edi,%eax
+
+//		if (d_xdenom >= 0)
+//		{
+//			continue;
+	jns		LNextTri
+
+//		}
+
+	movl	%eax,C(d_xdenom)
+	fildl	C(d_xdenom)
+
+//		r_p0[0] = index0->v[0];		// u
+//		r_p0[1] = index0->v[1];		// v
+//		r_p0[2] = index0->v[2];		// s
+//		r_p0[3] = index0->v[3];		// t
+//		r_p0[4] = index0->v[4];		// light
+//		r_p0[5] = index0->v[5];		// iz
+	movl	fv_v+0(%ecx),%eax
+	movl	fv_v+4(%ecx),%esi
+	movl	%eax,C(r_p0)+0
+	movl	%esi,C(r_p0)+4
+	movl	fv_v+8(%ecx),%eax
+	movl	fv_v+12(%ecx),%esi
+	movl	%eax,C(r_p0)+8
+	movl	%esi,C(r_p0)+12
+	movl	fv_v+16(%ecx),%eax
+	movl	fv_v+20(%ecx),%esi
+	movl	%eax,C(r_p0)+16
+	movl	%esi,C(r_p0)+20
+
+	fdivrs	float_1
+
+//		r_p1[0] = index1->v[0];
+//		r_p1[1] = index1->v[1];
+//		r_p1[2] = index1->v[2];
+//		r_p1[3] = index1->v[3];
+//		r_p1[4] = index1->v[4];
+//		r_p1[5] = index1->v[5];
+	movl	fv_v+0(%edx),%eax
+	movl	fv_v+4(%edx),%esi
+	movl	%eax,C(r_p1)+0
+	movl	%esi,C(r_p1)+4
+	movl	fv_v+8(%edx),%eax
+	movl	fv_v+12(%edx),%esi
+	movl	%eax,C(r_p1)+8
+	movl	%esi,C(r_p1)+12
+	movl	fv_v+16(%edx),%eax
+	movl	fv_v+20(%edx),%esi
+	movl	%eax,C(r_p1)+16
+	movl	%esi,C(r_p1)+20
+
+//		r_p2[0] = index2->v[0];
+//		r_p2[1] = index2->v[1];
+//		r_p2[2] = index2->v[2];
+//		r_p2[3] = index2->v[3];
+//		r_p2[4] = index2->v[4];
+//		r_p2[5] = index2->v[5];
+	movl	fv_v+0(%ebx),%eax
+	movl	fv_v+4(%ebx),%esi
+	movl	%eax,C(r_p2)+0
+	movl	%esi,C(r_p2)+4
+	movl	fv_v+8(%ebx),%eax
+	movl	fv_v+12(%ebx),%esi
+	movl	%eax,C(r_p2)+8
+	movl	%esi,C(r_p2)+12
+	movl	fv_v+16(%ebx),%eax
+	movl	fv_v+20(%ebx),%esi
+	movl	%eax,C(r_p2)+16
+	movl	C(r_affinetridesc)+atd_ptriangles,%edi
+	movl	%esi,C(r_p2)+20
+	movl	mtri_facesfront-mtri_size(%edi,%ebp,1),%eax
+
+//		if (!ptri->facesfront)
+//		{
+	testl	%eax,%eax
+	jnz		LFacesFront
+
+//			if (index0->flags & ALIAS_ONSEAM)
+//				r_p0[2] += r_affinetridesc.seamfixupX16;
+	movl	fv_flags(%ecx),%eax
+	movl	fv_flags(%edx),%esi
+	movl	fv_flags(%ebx),%edi
+	testl	$(ALIAS_ONSEAM),%eax
+	movl	C(r_affinetridesc)+atd_seamfixupX16,%eax
+	jz		LOnseamDone0
+	addl	%eax,C(r_p0)+8
+LOnseamDone0:
+
+//			if (index1->flags & ALIAS_ONSEAM)
+// 				r_p1[2] += r_affinetridesc.seamfixupX16;
+	testl	$(ALIAS_ONSEAM),%esi
+	jz		LOnseamDone1
+	addl	%eax,C(r_p1)+8
+LOnseamDone1:
+
+//			if (index2->flags & ALIAS_ONSEAM)
+//				r_p2[2] += r_affinetridesc.seamfixupX16;
+	testl	$(ALIAS_ONSEAM),%edi
+	jz		LOnseamDone2
+	addl	%eax,C(r_p2)+8
+LOnseamDone2:
+
+//		}
+
+LFacesFront:
+
+	fstps	C(d_xdenom)
+
+//		D_PolysetSetEdgeTable ();
+//		D_RasterizeAliasPolySmooth ();
+		call	C(D_PolysetSetEdgeTable)
+		call	C(D_RasterizeAliasPolySmooth)
+
+LNextTri:
+		movl	C(r_affinetridesc)+atd_ptriangles,%esi
+		subl	$16,%ebp
+		jnz		LNDLoop
+//	}
+
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+
+	addl	$(SPAN_SIZE),%esp
+
+	ret
+
+
+#endif	// id386
+
--- /dev/null
+++ b/u/d_scana.s
@@ -1,0 +1,70 @@
+//
+// d_scana.s
+// x86 assembly-language turbulent texture mapping code
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+	.data
+
+	.text
+
+//----------------------------------------------------------------------
+// turbulent texture mapping code
+//----------------------------------------------------------------------
+
+	.align 4
+.globl C(D_DrawTurbulent8Span)
+C(D_DrawTurbulent8Span):
+	pushl	%ebp				// preserve caller's stack frame pointer
+	pushl	%esi				// preserve register variables
+	pushl	%edi
+	pushl	%ebx
+
+	movl	C(r_turb_s),%esi
+	movl	C(r_turb_t),%ecx
+	movl	C(r_turb_pdest),%edi
+	movl	C(r_turb_spancount),%ebx
+
+Llp:
+	movl	%ecx,%eax
+	movl	%esi,%edx
+	sarl	$16,%eax
+	movl	C(r_turb_turb),%ebp
+	sarl	$16,%edx
+	andl	$(CYCLE-1),%eax
+	andl	$(CYCLE-1),%edx
+	movl	(%ebp,%eax,4),%eax
+	movl	(%ebp,%edx,4),%edx
+	addl	%esi,%eax
+	sarl	$16,%eax
+	addl	%ecx,%edx
+	sarl	$16,%edx
+	andl	$(TURB_TEX_SIZE-1),%eax
+	andl	$(TURB_TEX_SIZE-1),%edx
+	shll	$6,%edx
+	movl	C(r_turb_pbase),%ebp
+	addl	%eax,%edx
+	incl	%edi
+	addl	C(r_turb_sstep),%esi
+	addl	C(r_turb_tstep),%ecx
+	movb	(%ebp,%edx,1),%dl
+	decl	%ebx
+	movb	%dl,-1(%edi)
+	jnz		Llp
+
+	movl	%edi,C(r_turb_pdest)
+
+	popl	%ebx				// restore register variables
+	popl	%edi
+	popl	%esi
+	popl	%ebp				// restore caller's stack frame pointer
+	ret
+
+#endif	// id386
+
--- /dev/null
+++ b/u/d_spr8.s
@@ -1,0 +1,881 @@
+//
+// d_spr8.s
+// x86 assembly-language horizontal 8-bpp transparent span-drawing code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+
+#ifdef id386
+
+//----------------------------------------------------------------------
+// 8-bpp horizontal span drawing code for polygons, with transparency.
+//----------------------------------------------------------------------
+
+	.text
+
+// out-of-line, rarely-needed clamping code
+
+LClampHigh0:
+	movl	C(bbextents),%esi
+	jmp		LClampReentry0
+LClampHighOrLow0:
+	jg		LClampHigh0
+	xorl	%esi,%esi
+	jmp		LClampReentry0
+
+LClampHigh1:
+	movl	C(bbextentt),%edx
+	jmp		LClampReentry1
+LClampHighOrLow1:
+	jg		LClampHigh1
+	xorl	%edx,%edx
+	jmp		LClampReentry1
+
+LClampLow2:
+	movl	$2048,%ebp
+	jmp		LClampReentry2
+LClampHigh2:
+	movl	C(bbextents),%ebp
+	jmp		LClampReentry2
+
+LClampLow3:
+	movl	$2048,%ecx
+	jmp		LClampReentry3
+LClampHigh3:
+	movl	C(bbextentt),%ecx
+	jmp		LClampReentry3
+
+LClampLow4:
+	movl	$2048,%eax
+	jmp		LClampReentry4
+LClampHigh4:
+	movl	C(bbextents),%eax
+	jmp		LClampReentry4
+
+LClampLow5:
+	movl	$2048,%ebx
+	jmp		LClampReentry5
+LClampHigh5:
+	movl	C(bbextentt),%ebx
+	jmp		LClampReentry5
+
+
+#define pspans	4+16
+
+	.align 4
+.globl C(D_SpriteDrawSpans)
+C(D_SpriteDrawSpans):
+	pushl	%ebp				// preserve caller's stack frame
+	pushl	%edi
+	pushl	%esi				// preserve register variables
+	pushl	%ebx
+
+//
+// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
+// and span list pointers, and 1/z step in 0.32 fixed-point
+//
+// FIXME: any overlap from rearranging?
+	flds	C(d_sdivzstepu)
+	fmuls	fp_8
+	movl	C(cacheblock),%edx
+	flds	C(d_tdivzstepu)
+	fmuls	fp_8
+	movl	pspans(%esp),%ebx	// point to the first span descriptor
+	flds	C(d_zistepu)
+	fmuls	fp_8
+	movl	%edx,pbase			// pbase = cacheblock
+	flds	C(d_zistepu)
+	fmuls	fp_64kx64k
+	fxch	%st(3)
+	fstps	sdivz8stepu
+	fstps	zi8stepu
+	fstps	tdivz8stepu
+	fistpl	izistep
+	movl	izistep,%eax
+	rorl	$16,%eax		// put upper 16 bits in low word
+	movl	sspan_t_count(%ebx),%ecx
+	movl	%eax,izistep
+
+	cmpl	$0,%ecx
+	jle		LNextSpan
+
+LSpanLoop:
+
+//
+// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
+// initial s and t values
+//
+// FIXME: pipeline FILD?
+	fildl	sspan_t_v(%ebx)
+	fildl	sspan_t_u(%ebx)
+
+	fld		%st(1)			// dv | du | dv
+	fmuls	C(d_sdivzstepv)	// dv*d_sdivzstepv | du | dv
+	fld		%st(1)			// du | dv*d_sdivzstepv | du | dv
+	fmuls	C(d_sdivzstepu)	// du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+	fld		%st(2)			// du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+	fmuls	C(d_tdivzstepu)	// du*d_tdivzstepu | du*d_sdivzstepu |
+							//  dv*d_sdivzstepv | du | dv
+	fxch	%st(1)			// du*d_sdivzstepu | du*d_tdivzstepu |
+							//  dv*d_sdivzstepv | du | dv
+	faddp	%st(0),%st(2)	// du*d_tdivzstepu |
+							//  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
+	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
+							//  du*d_tdivzstepu | du | dv
+	fld		%st(3)			// dv | du*d_sdivzstepu + dv*d_sdivzstepv |
+							//  du*d_tdivzstepu | du | dv
+	fmuls	C(d_tdivzstepv)	// dv*d_tdivzstepv |
+							//  du*d_sdivzstepu + dv*d_sdivzstepv |
+							//  du*d_tdivzstepu | du | dv
+	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
+							//  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
+	fadds	C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
+							//  du*d_sdivzstepu; stays in %st(2) at end
+	fxch	%st(4)			// dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
+							//  s/z
+	fmuls	C(d_zistepv)		// dv*d_zistepv | dv*d_tdivzstepv |
+							//  du*d_tdivzstepu | du | s/z
+	fxch	%st(1)			// dv*d_tdivzstepv |  dv*d_zistepv |
+							//  du*d_tdivzstepu | du | s/z
+	faddp	%st(0),%st(2)	// dv*d_zistepv |
+							//  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
+	fxch	%st(2)			// du | dv*d_tdivzstepv + du*d_tdivzstepu |
+							//  dv*d_zistepv | s/z
+	fmuls	C(d_zistepu)		// du*d_zistepu |
+							//  dv*d_tdivzstepv + du*d_tdivzstepu |
+							//  dv*d_zistepv | s/z
+	fxch	%st(1)			// dv*d_tdivzstepv + du*d_tdivzstepu |
+							//  du*d_zistepu | dv*d_zistepv | s/z
+	fadds	C(d_tdivzorigin)	// tdivz = d_tdivzorigin + dv*d_tdivzstepv +
+							//  du*d_tdivzstepu; stays in %st(1) at end
+	fxch	%st(2)			// dv*d_zistepv | du*d_zistepu | t/z | s/z
+	faddp	%st(0),%st(1)	// dv*d_zistepv + du*d_zistepu | t/z | s/z
+
+	flds	fp_64k			// fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
+	fxch	%st(1)			// dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
+	fadds	C(d_ziorigin)		// zi = d_ziorigin + dv*d_zistepv +
+							//  du*d_zistepu; stays in %st(0) at end
+							// 1/z | fp_64k | t/z | s/z
+
+	fld		%st(0)			// FIXME: get rid of stall on FMUL?
+	fmuls	fp_64kx64k
+	fxch	%st(1)
+
+//
+// calculate and clamp s & t
+//
+	fdivr	%st(0),%st(2)	// 1/z | z*64k | t/z | s/z
+	fxch	%st(1)
+
+	fistpl	izi				// 0.32 fixed-point 1/z
+	movl	izi,%ebp
+
+//
+// set pz to point to the first z-buffer pixel in the span
+//
+	rorl	$16,%ebp		// put upper 16 bits in low word
+	movl	sspan_t_v(%ebx),%eax
+	movl	%ebp,izi
+	movl	sspan_t_u(%ebx),%ebp
+	imull	C(d_zrowbytes)
+	shll	$1,%ebp					// a word per pixel
+	addl	C(d_pzbuffer),%eax
+	addl	%ebp,%eax
+	movl	%eax,pz
+
+//
+// point %edi to the first pixel in the span
+//
+	movl	C(d_viewbuffer),%ebp
+	movl	sspan_t_v(%ebx),%eax
+	pushl	%ebx		// preserve spans pointer
+	movl	C(tadjust),%edx
+	movl	C(sadjust),%esi
+	movl	C(d_scantable)(,%eax,4),%edi	// v * screenwidth
+	addl	%ebp,%edi
+	movl	sspan_t_u(%ebx),%ebp
+	addl	%ebp,%edi				// pdest = &pdestspan[scans->u];
+
+//
+// now start the FDIV for the end of the span
+//
+	cmpl	$8,%ecx
+	ja		LSetupNotLast1
+
+	decl	%ecx
+	jz		LCleanup1		// if only one pixel, no need to start an FDIV
+	movl	%ecx,spancountminus1
+
+// finish up the s and t calcs
+	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
+
+	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
+	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
+	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
+	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
+	fxch	%st(1)			// s | t | 1/z | t/z | s/z
+	fistpl	s				// 1/z | t | t/z | s/z
+	fistpl	t				// 1/z | t/z | s/z
+
+	fildl	spancountminus1
+
+	flds	C(d_tdivzstepu)	// _d_tdivzstepu | spancountminus1
+	flds	C(d_zistepu)	// _d_zistepu | _d_tdivzstepu | spancountminus1
+	fmul	%st(2),%st(0)	// _d_zistepu*scm1 | _d_tdivzstepu | scm1
+	fxch	%st(1)			// _d_tdivzstepu | _d_zistepu*scm1 | scm1
+	fmul	%st(2),%st(0)	// _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
+	fxch	%st(2)			// scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
+	fmuls	C(d_sdivzstepu)	// _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
+							//  _d_tdivzstepu*scm1
+	fxch	%st(1)			// _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
+							//  _d_tdivzstepu*scm1
+	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
+	fxch	%st(1)			// _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
+	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1
+	faddp	%st(0),%st(3)
+
+	flds	fp_64k
+	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
+							//  overlap
+	jmp		LFDIVInFlight1
+
+LCleanup1:
+// finish up the s and t calcs
+	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
+
+	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
+	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
+	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
+	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
+	fxch	%st(1)			// s | t | 1/z | t/z | s/z
+	fistpl	s				// 1/z | t | t/z | s/z
+	fistpl	t				// 1/z | t/z | s/z
+	jmp		LFDIVInFlight1
+
+	.align	4
+LSetupNotLast1:
+// finish up the s and t calcs
+	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
+
+	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
+	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
+	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
+	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
+	fxch	%st(1)			// s | t | 1/z | t/z | s/z
+	fistpl	s				// 1/z | t | t/z | s/z
+	fistpl	t				// 1/z | t/z | s/z
+
+	fadds	zi8stepu
+	fxch	%st(2)
+	fadds	sdivz8stepu
+	fxch	%st(2)
+	flds	tdivz8stepu
+	faddp	%st(0),%st(2)
+	flds	fp_64k
+	fdiv	%st(1),%st(0)	// z = 1/1/z
+							// this is what we've gone to all this trouble to
+							//  overlap
+LFDIVInFlight1:
+
+	addl	s,%esi
+	addl	t,%edx
+	movl	C(bbextents),%ebx
+	movl	C(bbextentt),%ebp
+	cmpl	%ebx,%esi
+	ja		LClampHighOrLow0
+LClampReentry0:
+	movl	%esi,s
+	movl	pbase,%ebx
+	shll	$16,%esi
+	cmpl	%ebp,%edx
+	movl	%esi,sfracf
+	ja		LClampHighOrLow1
+LClampReentry1:
+	movl	%edx,t
+	movl	s,%esi					// sfrac = scans->sfrac;
+	shll	$16,%edx
+	movl	t,%eax					// tfrac = scans->tfrac;
+	sarl	$16,%esi
+	movl	%edx,tfracf
+
+//
+// calculate the texture starting address
+//
+	sarl	$16,%eax
+	addl	%ebx,%esi
+	imull	C(cachewidth),%eax		// (tfrac >> 16) * cachewidth
+	addl	%eax,%esi				// psource = pbase + (sfrac >> 16) +
+									//           ((tfrac >> 16) * cachewidth);
+
+//
+// determine whether last span or not
+//
+	cmpl	$8,%ecx
+	jna		LLastSegment
+
+//
+// not the last segment; do full 8-wide segment
+//
+LNotLastSegment:
+
+//
+// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+// get there
+//
+
+// pick up after the FDIV that was left in flight previously
+
+	fld		%st(0)			// duplicate it
+	fmul	%st(4),%st(0)	// s = s/z * z
+	fxch	%st(1)
+	fmul	%st(3),%st(0)	// t = t/z * z
+	fxch	%st(1)
+	fistpl	snext
+	fistpl	tnext
+	movl	snext,%eax
+	movl	tnext,%edx
+
+	subl	$8,%ecx		// count off this segments' pixels
+	movl	C(sadjust),%ebp
+	pushl	%ecx		// remember count of remaining pixels
+	movl	C(tadjust),%ecx
+
+	addl	%eax,%ebp
+	addl	%edx,%ecx
+
+	movl	C(bbextents),%eax
+	movl	C(bbextentt),%edx
+
+	cmpl	$2048,%ebp
+	jl		LClampLow2
+	cmpl	%eax,%ebp
+	ja		LClampHigh2
+LClampReentry2:
+
+	cmpl	$2048,%ecx
+	jl		LClampLow3
+	cmpl	%edx,%ecx
+	ja		LClampHigh3
+LClampReentry3:
+
+	movl	%ebp,snext
+	movl	%ecx,tnext
+
+	subl	s,%ebp
+	subl	t,%ecx
+	
+//
+// set up advancetable
+//
+	movl	%ecx,%eax
+	movl	%ebp,%edx
+	sarl	$19,%edx			// sstep >>= 16;
+	movl	C(cachewidth),%ebx
+	sarl	$19,%eax			// tstep >>= 16;
+	jz		LIsZero
+	imull	%ebx,%eax			// (tstep >> 16) * cachewidth;
+LIsZero:
+	addl	%edx,%eax			// add in sstep
+								// (tstep >> 16) * cachewidth + (sstep >> 16);
+	movl	tfracf,%edx
+	movl	%eax,advancetable+4	// advance base in t
+	addl	%ebx,%eax			// ((tstep >> 16) + 1) * cachewidth +
+								//  (sstep >> 16);
+	shll	$13,%ebp			// left-justify sstep fractional part
+	movl	%ebp,sstep
+	movl	sfracf,%ebx
+	shll	$13,%ecx			// left-justify tstep fractional part
+	movl	%eax,advancetable	// advance extra in t
+	movl	%ecx,tstep
+
+	movl	pz,%ecx
+	movl	izi,%ebp
+
+	cmpw	(%ecx),%bp
+	jl		Lp1
+	movb	(%esi),%al			// get first source texel
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp1
+	movw	%bp,(%ecx)
+	movb	%al,(%edi)			// store first dest pixel
+Lp1:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx			// advance tfrac fractional part by tstep frac
+
+	sbbl	%eax,%eax			// turn tstep carry into -1 (0 if none)
+	addl	sstep,%ebx			// advance sfrac fractional part by sstep frac
+	adcl	advancetable+4(,%eax,4),%esi	// point to next source texel
+
+	cmpw	2(%ecx),%bp
+	jl		Lp2
+	movb	(%esi),%al
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp2
+	movw	%bp,2(%ecx)
+	movb	%al,1(%edi)
+Lp2:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	sstep,%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+	cmpw	4(%ecx),%bp
+	jl		Lp3
+	movb	(%esi),%al
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp3
+	movw	%bp,4(%ecx)
+	movb	%al,2(%edi)
+Lp3:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	sstep,%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+	cmpw	6(%ecx),%bp
+	jl		Lp4
+	movb	(%esi),%al
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp4
+	movw	%bp,6(%ecx)
+	movb	%al,3(%edi)
+Lp4:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	sstep,%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+	cmpw	8(%ecx),%bp
+	jl		Lp5
+	movb	(%esi),%al
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp5
+	movw	%bp,8(%ecx)
+	movb	%al,4(%edi)
+Lp5:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	sstep,%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+//
+// start FDIV for end of next segment in flight, so it can overlap
+//
+	popl	%eax
+	cmpl	$8,%eax			// more than one segment after this?
+	ja		LSetupNotLast2	// yes
+
+	decl	%eax
+	jz		LFDIVInFlight2	// if only one pixel, no need to start an FDIV
+	movl	%eax,spancountminus1
+	fildl	spancountminus1
+
+	flds	C(d_zistepu)		// _d_zistepu | spancountminus1
+	fmul	%st(1),%st(0)	// _d_zistepu*scm1 | scm1
+	flds	C(d_tdivzstepu)	// _d_tdivzstepu | _d_zistepu*scm1 | scm1
+	fmul	%st(2),%st(0)	// _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
+	fxch	%st(1)			// _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
+	faddp	%st(0),%st(3)	// _d_tdivzstepu*scm1 | scm1
+	fxch	%st(1)			// scm1 | _d_tdivzstepu*scm1
+	fmuls	C(d_sdivzstepu)	// _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
+	fxch	%st(1)			// _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
+	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1
+	flds	fp_64k			// 64k | _d_sdivzstepu*scm1
+	fxch	%st(1)			// _d_sdivzstepu*scm1 | 64k
+	faddp	%st(0),%st(4)	// 64k
+
+	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
+							//  overlap
+	jmp		LFDIVInFlight2
+
+	.align	4
+LSetupNotLast2:
+	fadds	zi8stepu
+	fxch	%st(2)
+	fadds	sdivz8stepu
+	fxch	%st(2)
+	flds	tdivz8stepu
+	faddp	%st(0),%st(2)
+	flds	fp_64k
+	fdiv	%st(1),%st(0)	// z = 1/1/z
+							// this is what we've gone to all this trouble to
+							//  overlap
+LFDIVInFlight2:
+	pushl	%eax
+
+	cmpw	10(%ecx),%bp
+	jl		Lp6
+	movb	(%esi),%al
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp6
+	movw	%bp,10(%ecx)
+	movb	%al,5(%edi)
+Lp6:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	sstep,%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+	cmpw	12(%ecx),%bp
+	jl		Lp7
+	movb	(%esi),%al
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp7
+	movw	%bp,12(%ecx)
+	movb	%al,6(%edi)
+Lp7:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	sstep,%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+	cmpw	14(%ecx),%bp
+	jl		Lp8
+	movb	(%esi),%al
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp8
+	movw	%bp,14(%ecx)
+	movb	%al,7(%edi)
+Lp8:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	sstep,%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+	addl	$8,%edi
+	addl	$16,%ecx
+	movl	%edx,tfracf
+	movl	snext,%edx
+	movl	%ebx,sfracf
+	movl	tnext,%ebx
+	movl	%edx,s
+	movl	%ebx,t
+
+	movl	%ecx,pz
+	movl	%ebp,izi
+
+	popl	%ecx				// retrieve count
+
+//
+// determine whether last span or not
+//
+	cmpl	$8,%ecx				// are there multiple segments remaining?
+	ja		LNotLastSegment		// yes
+
+//
+// last segment of scan
+//
+LLastSegment:
+
+//
+// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+// get there. The number of pixels left is variable, and we want to land on the
+// last pixel, not step one past it, so we can't run into arithmetic problems
+//
+	testl	%ecx,%ecx
+	jz		LNoSteps		// just draw the last pixel and we're done
+
+// pick up after the FDIV that was left in flight previously
+
+
+	fld		%st(0)			// duplicate it
+	fmul	%st(4),%st(0)	// s = s/z * z
+	fxch	%st(1)
+	fmul	%st(3),%st(0)	// t = t/z * z
+	fxch	%st(1)
+	fistpl	snext
+	fistpl	tnext
+
+	movl	C(tadjust),%ebx
+	movl	C(sadjust),%eax
+
+	addl	snext,%eax
+	addl	tnext,%ebx
+
+	movl	C(bbextents),%ebp
+	movl	C(bbextentt),%edx
+
+	cmpl	$2048,%eax
+	jl		LClampLow4
+	cmpl	%ebp,%eax
+	ja		LClampHigh4
+LClampReentry4:
+	movl	%eax,snext
+
+	cmpl	$2048,%ebx
+	jl		LClampLow5
+	cmpl	%edx,%ebx
+	ja		LClampHigh5
+LClampReentry5:
+
+	cmpl	$1,%ecx			// don't bother 
+	je		LOnlyOneStep	// if two pixels in segment, there's only one step,
+							//  of the segment length
+	subl	s,%eax
+	subl	t,%ebx
+
+	addl	%eax,%eax		// convert to 15.17 format so multiply by 1.31
+	addl	%ebx,%ebx		//  reciprocal yields 16.48
+	imull	reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
+	movl	%edx,%ebp
+
+	movl	%ebx,%eax
+	imull	reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
+
+LSetEntryvec:
+//
+// set up advancetable
+//
+	movl	spr8entryvec_table(,%ecx,4),%ebx
+	movl	%edx,%eax
+	pushl	%ebx				// entry point into code for RET later
+	movl	%ebp,%ecx
+	sarl	$16,%ecx			// sstep >>= 16;
+	movl	C(cachewidth),%ebx
+	sarl	$16,%edx			// tstep >>= 16;
+	jz		LIsZeroLast
+	imull	%ebx,%edx			// (tstep >> 16) * cachewidth;
+LIsZeroLast:
+	addl	%ecx,%edx			// add in sstep
+								// (tstep >> 16) * cachewidth + (sstep >> 16);
+	movl	tfracf,%ecx
+	movl	%edx,advancetable+4	// advance base in t
+	addl	%ebx,%edx			// ((tstep >> 16) + 1) * cachewidth +
+								//  (sstep >> 16);
+	shll	$16,%ebp			// left-justify sstep fractional part
+	movl	sfracf,%ebx
+	shll	$16,%eax			// left-justify tstep fractional part
+	movl	%edx,advancetable	// advance extra in t
+
+	movl	%eax,tstep
+	movl	%ebp,sstep
+	movl	%ecx,%edx
+
+	movl	pz,%ecx
+	movl	izi,%ebp
+
+	ret							// jump to the number-of-pixels handler
+
+//----------------------------------------
+
+LNoSteps:
+	movl	pz,%ecx
+	subl	$7,%edi			// adjust for hardwired offset
+	subl	$14,%ecx
+	jmp		LEndSpan
+
+
+LOnlyOneStep:
+	subl	s,%eax
+	subl	t,%ebx
+	movl	%eax,%ebp
+	movl	%ebx,%edx
+	jmp		LSetEntryvec
+
+//----------------------------------------
+
+.globl	Spr8Entry2_8
+Spr8Entry2_8:
+	subl	$6,%edi		// adjust for hardwired offsets
+	subl	$12,%ecx
+	movb	(%esi),%al
+	jmp		LLEntry2_8
+
+//----------------------------------------
+
+.globl	Spr8Entry3_8
+Spr8Entry3_8:
+	subl	$5,%edi		// adjust for hardwired offsets
+	subl	$10,%ecx
+	jmp		LLEntry3_8
+
+//----------------------------------------
+
+.globl	Spr8Entry4_8
+Spr8Entry4_8:
+	subl	$4,%edi		// adjust for hardwired offsets
+	subl	$8,%ecx
+	jmp		LLEntry4_8
+
+//----------------------------------------
+
+.globl	Spr8Entry5_8
+Spr8Entry5_8:
+	subl	$3,%edi		// adjust for hardwired offsets
+	subl	$6,%ecx
+	jmp		LLEntry5_8
+
+//----------------------------------------
+
+.globl	Spr8Entry6_8
+Spr8Entry6_8:
+	subl	$2,%edi		// adjust for hardwired offsets
+	subl	$4,%ecx
+	jmp		LLEntry6_8
+
+//----------------------------------------
+
+.globl	Spr8Entry7_8
+Spr8Entry7_8:
+	decl	%edi		// adjust for hardwired offsets
+	subl	$2,%ecx
+	jmp		LLEntry7_8
+
+//----------------------------------------
+
+.globl	Spr8Entry8_8
+Spr8Entry8_8:
+	cmpw	(%ecx),%bp
+	jl		Lp9
+	movb	(%esi),%al
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp9
+	movw	%bp,(%ecx)
+	movb	%al,(%edi)
+Lp9:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	sstep,%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+LLEntry7_8:
+	cmpw	2(%ecx),%bp
+	jl		Lp10
+	movb	(%esi),%al
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp10
+	movw	%bp,2(%ecx)
+	movb	%al,1(%edi)
+Lp10:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	sstep,%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+LLEntry6_8:
+	cmpw	4(%ecx),%bp
+	jl		Lp11
+	movb	(%esi),%al
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp11
+	movw	%bp,4(%ecx)
+	movb	%al,2(%edi)
+Lp11:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	sstep,%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+LLEntry5_8:
+	cmpw	6(%ecx),%bp
+	jl		Lp12
+	movb	(%esi),%al
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp12
+	movw	%bp,6(%ecx)
+	movb	%al,3(%edi)
+Lp12:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	sstep,%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+LLEntry4_8:
+	cmpw	8(%ecx),%bp
+	jl		Lp13
+	movb	(%esi),%al
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp13
+	movw	%bp,8(%ecx)
+	movb	%al,4(%edi)
+Lp13:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	sstep,%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+LLEntry3_8:
+	cmpw	10(%ecx),%bp
+	jl		Lp14
+	movb	(%esi),%al
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp14
+	movw	%bp,10(%ecx)
+	movb	%al,5(%edi)
+Lp14:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	sstep,%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+LLEntry2_8:
+	cmpw	12(%ecx),%bp
+	jl		Lp15
+	movb	(%esi),%al
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp15
+	movw	%bp,12(%ecx)
+	movb	%al,6(%edi)
+Lp15:
+	addl	izistep,%ebp
+	adcl	$0,%ebp
+	addl	tstep,%edx
+	sbbl	%eax,%eax
+	addl	sstep,%ebx
+	adcl	advancetable+4(,%eax,4),%esi
+
+LEndSpan:
+	cmpw	14(%ecx),%bp
+	jl		Lp16
+	movb	(%esi),%al		// load first texel in segment
+	cmpb	$(TRANSPARENT_COLOR),%al
+	jz		Lp16
+	movw	%bp,14(%ecx)
+	movb	%al,7(%edi)
+Lp16:
+
+//
+// clear s/z, t/z, 1/z from FP stack
+//
+	fstp %st(0)
+	fstp %st(0)
+	fstp %st(0)
+
+	popl	%ebx				// restore spans pointer
+LNextSpan:
+	addl	$(sspan_t_size),%ebx // point to next span
+	movl	sspan_t_count(%ebx),%ecx
+	cmpl	$0,%ecx				// any more spans?
+	jg		LSpanLoop			// yes
+	jz		LNextSpan			// yes, but this one's empty
+
+	popl	%ebx				// restore register variables
+	popl	%esi
+	popl	%edi
+	popl	%ebp				// restore the caller's stack frame
+	ret
+
+#endif	// id386
--- /dev/null
+++ b/u/d_varsa.s
@@ -1,0 +1,186 @@
+//
+// d_varsa.s
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef	id386
+
+	.data
+
+//-------------------------------------------------------
+// global refresh variables
+//-------------------------------------------------------
+
+// FIXME: put all refresh variables into one contiguous block. Make into one
+// big structure, like cl or sv?
+
+	.align	4
+.globl	C(d_sdivzstepu)
+.globl	C(d_tdivzstepu)
+.globl	C(d_zistepu)
+.globl	C(d_sdivzstepv)
+.globl	C(d_tdivzstepv)
+.globl	C(d_zistepv)
+.globl	C(d_sdivzorigin)
+.globl	C(d_tdivzorigin)
+.globl	C(d_ziorigin)
+C(d_sdivzstepu):	.single	0
+C(d_tdivzstepu):	.single	0
+C(d_zistepu):		.single	0
+C(d_sdivzstepv):	.single	0
+C(d_tdivzstepv):	.single	0
+C(d_zistepv):		.single	0
+C(d_sdivzorigin):	.single	0
+C(d_tdivzorigin):	.single	0
+C(d_ziorigin):		.single	0
+
+.globl	C(sadjust)
+.globl	C(tadjust)
+.globl	C(bbextents)
+.globl	C(bbextentt)
+C(sadjust):			.long	0
+C(tadjust):			.long	0
+C(bbextents):		.long	0
+C(bbextentt):		.long	0
+
+.globl	C(cacheblock)
+.globl	C(d_viewbuffer)
+.globl	C(cachewidth)
+.globl	C(d_pzbuffer)
+.globl	C(d_zrowbytes)
+.globl	C(d_zwidth)
+C(cacheblock):		.long	0
+C(cachewidth):		.long	0
+C(d_viewbuffer):	.long	0
+C(d_pzbuffer):		.long	0
+C(d_zrowbytes):		.long	0
+C(d_zwidth):		.long	0
+
+
+//-------------------------------------------------------
+// ASM-only variables
+//-------------------------------------------------------
+.globl	izi
+izi:			.long	0
+
+.globl	pbase, s, t, sfracf, tfracf, snext, tnext
+.globl	spancountminus1, zi16stepu, sdivz16stepu, tdivz16stepu
+.globl	zi8stepu, sdivz8stepu, tdivz8stepu, pz
+s:				.long	0
+t:				.long	0
+snext:			.long	0
+tnext:			.long	0
+sfracf:			.long	0
+tfracf:			.long	0
+pbase:			.long	0
+zi8stepu:		.long	0
+sdivz8stepu:	.long	0
+tdivz8stepu:	.long	0
+zi16stepu:		.long	0
+sdivz16stepu:	.long	0
+tdivz16stepu:	.long	0
+spancountminus1: .long	0
+pz:				.long	0
+
+.globl	izistep
+izistep:				.long	0
+
+//-------------------------------------------------------
+// local variables for d_draw16.s
+//-------------------------------------------------------
+
+.globl	reciprocal_table_16, entryvec_table_16
+// 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10, 1/11, 1/12, 1/13,
+// 1/14, and 1/15 in 0.32 form
+reciprocal_table_16:	.long	0x40000000, 0x2aaaaaaa, 0x20000000
+						.long	0x19999999, 0x15555555, 0x12492492
+						.long	0x10000000, 0xe38e38e, 0xccccccc, 0xba2e8ba
+						.long	0xaaaaaaa, 0x9d89d89, 0x9249249, 0x8888888
+
+	.extern Entry2_16
+	.extern Entry3_16
+	.extern Entry4_16
+	.extern Entry5_16
+	.extern Entry6_16
+	.extern Entry7_16
+	.extern Entry8_16
+	.extern Entry9_16
+	.extern Entry10_16
+	.extern Entry11_16
+	.extern Entry12_16
+	.extern Entry13_16
+	.extern Entry14_16
+	.extern Entry15_16
+	.extern Entry16_16
+
+entryvec_table_16:	.long	0, Entry2_16, Entry3_16, Entry4_16
+					.long	Entry5_16, Entry6_16, Entry7_16, Entry8_16
+					.long	Entry9_16, Entry10_16, Entry11_16, Entry12_16
+					.long	Entry13_16, Entry14_16, Entry15_16, Entry16_16
+
+//-------------------------------------------------------
+// local variables for d_parta.s
+//-------------------------------------------------------
+.globl	DP_Count, DP_u, DP_v, DP_32768, DP_Color, DP_Pix, DP_EntryTable
+DP_Count:		.long	0
+DP_u:			.long	0
+DP_v:			.long	0
+DP_32768:		.single	32768.0
+DP_Color:		.long	0
+DP_Pix:			.long	0
+
+
+	.extern DP_1x1
+	.extern DP_2x2
+	.extern DP_3x3
+	.extern DP_4x4
+
+DP_EntryTable:	.long	DP_1x1, DP_2x2, DP_3x3, DP_4x4
+
+//
+// advancetable is 8 bytes, but points to the middle of that range so negative
+// offsets will work
+//
+.globl	advancetable, sstep, tstep, pspantemp, counttemp, jumptemp
+advancetable:	.long	0, 0
+sstep:			.long	0
+tstep:			.long	0
+
+pspantemp:		.long	0
+counttemp:		.long	0
+jumptemp:		.long	0
+
+// 1/2, 1/3, 1/4, 1/5, 1/6, and 1/7 in 0.32 form
+.globl	reciprocal_table, entryvec_table
+reciprocal_table:	.long	0x40000000, 0x2aaaaaaa, 0x20000000
+					.long	0x19999999, 0x15555555, 0x12492492
+
+	.extern Entry2_8
+	.extern Entry3_8
+	.extern Entry4_8
+	.extern Entry5_8
+	.extern Entry6_8
+	.extern Entry7_8
+	.extern Entry8_8
+
+entryvec_table:	.long	0, Entry2_8, Entry3_8, Entry4_8
+				.long	Entry5_8, Entry6_8, Entry7_8, Entry8_8
+
+	.extern Spr8Entry2_8
+	.extern Spr8Entry3_8
+	.extern Spr8Entry4_8
+	.extern Spr8Entry5_8
+	.extern Spr8Entry6_8
+	.extern Spr8Entry7_8
+	.extern Spr8Entry8_8
+	
+.globl spr8entryvec_table
+spr8entryvec_table:	.long	0, Spr8Entry2_8, Spr8Entry3_8, Spr8Entry4_8
+					.long	Spr8Entry5_8, Spr8Entry6_8, Spr8Entry7_8, Spr8Entry8_8
+
+#endif	// id386
+
--- /dev/null
+++ b/u/math.s
@@ -1,0 +1,399 @@
+//
+// math.s
+// x86 assembly-language math routines.
+
+#define GLQUAKE	1	// don't include unneeded defs
+#include "asm_i386.h"
+#include "quakeasm.h"
+
+
+#ifdef	id386
+
+	.data
+
+	.align	4
+Ljmptab:	.long	Lcase0, Lcase1, Lcase2, Lcase3
+			.long	Lcase4, Lcase5, Lcase6, Lcase7
+
+	.text
+
+// TODO: rounding needed?
+// stack parameter offset
+#define	val	4
+
+.globl C(Invert24To16)
+C(Invert24To16):
+
+	movl	val(%esp),%ecx
+	movl	$0x100,%edx		// 0x10000000000 as dividend
+	cmpl	%edx,%ecx
+	jle		LOutOfRange
+
+	subl	%eax,%eax
+	divl	%ecx
+
+	ret
+
+LOutOfRange:
+	movl	$0xFFFFFFFF,%eax
+	ret
+
+#define	in	4
+#define out	8
+
+	.align 2
+.globl C(TransformVector)
+C(TransformVector):
+	movl	in(%esp),%eax
+	movl	out(%esp),%edx
+
+	flds	(%eax)		// in[0]
+	fmuls	C(vright)		// in[0]*vright[0]
+	flds	(%eax)		// in[0] | in[0]*vright[0]
+	fmuls	C(vup)		// in[0]*vup[0] | in[0]*vright[0]
+	flds	(%eax)		// in[0] | in[0]*vup[0] | in[0]*vright[0]
+	fmuls	C(vpn)		// in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0]
+
+	flds	4(%eax)		// in[1] | ...
+	fmuls	C(vright)+4	// in[1]*vright[1] | ...
+	flds	4(%eax)		// in[1] | in[1]*vright[1] | ...
+	fmuls	C(vup)+4		// in[1]*vup[1] | in[1]*vright[1] | ...
+	flds	4(%eax)		// in[1] | in[1]*vup[1] | in[1]*vright[1] | ...
+	fmuls	C(vpn)+4		// in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ...
+	fxch	%st(2)		// in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ...
+
+	faddp	%st(0),%st(5)	// in[1]*vup[1] | in[1]*vpn[1] | ...
+	faddp	%st(0),%st(3)	// in[1]*vpn[1] | ...
+	faddp	%st(0),%st(1)	// vpn_accum | vup_accum | vright_accum
+
+	flds	8(%eax)		// in[2] | ...
+	fmuls	C(vright)+8	// in[2]*vright[2] | ...
+	flds	8(%eax)		// in[2] | in[2]*vright[2] | ...
+	fmuls	C(vup)+8		// in[2]*vup[2] | in[2]*vright[2] | ...
+	flds	8(%eax)		// in[2] | in[2]*vup[2] | in[2]*vright[2] | ...
+	fmuls	C(vpn)+8		// in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ...
+	fxch	%st(2)		// in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ...
+
+	faddp	%st(0),%st(5)	// in[2]*vup[2] | in[2]*vpn[2] | ...
+	faddp	%st(0),%st(3)	// in[2]*vpn[2] | ...
+	faddp	%st(0),%st(1)	// vpn_accum | vup_accum | vright_accum
+
+	fstps	8(%edx)		// out[2]
+	fstps	4(%edx)		// out[1]
+	fstps	(%edx)		// out[0]
+
+	ret
+
+
+#define EMINS	4+4
+#define EMAXS	4+8
+#define P		4+12
+
+	.align 2
+.globl C(BoxOnPlaneSide)
+C(BoxOnPlaneSide):
+	pushl	%ebx
+
+	movl	P(%esp),%edx
+	movl	EMINS(%esp),%ecx
+	xorl	%eax,%eax
+	movl	EMAXS(%esp),%ebx
+	movb	pl_signbits(%edx),%al
+	cmpb	$8,%al
+	jge		Lerror
+	flds	pl_normal(%edx)		// p->normal[0]
+	fld		%st(0)				// p->normal[0] | p->normal[0]
+	jmp		Ljmptab(,%eax,4)
+
+
+//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
+//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
+Lcase0:
+	fmuls	(%ebx)				// p->normal[0]*emaxs[0] | p->normal[0]
+	flds	pl_normal+4(%edx)	// p->normal[1] | p->normal[0]*emaxs[0] |
+								//  p->normal[0]
+	fxch	%st(2)				// p->normal[0] | p->normal[0]*emaxs[0] |
+								//  p->normal[1]
+	fmuls	(%ecx)				// p->normal[0]*emins[0] |
+								//  p->normal[0]*emaxs[0] | p->normal[1]
+	fxch	%st(2)				// p->normal[1] | p->normal[0]*emaxs[0] |
+								//  p->normal[0]*emins[0]
+	fld		%st(0)				// p->normal[1] | p->normal[1] |
+								//  p->normal[0]*emaxs[0] |
+								//  p->normal[0]*emins[0]
+	fmuls	4(%ebx)				// p->normal[1]*emaxs[1] | p->normal[1] |
+								//  p->normal[0]*emaxs[0] |
+								//  p->normal[0]*emins[0]
+	flds	pl_normal+8(%edx)	// p->normal[2] | p->normal[1]*emaxs[1] |
+								//  p->normal[1] | p->normal[0]*emaxs[0] |
+								//  p->normal[0]*emins[0]
+	fxch	%st(2)				// p->normal[1] | p->normal[1]*emaxs[1] |
+								//  p->normal[2] | p->normal[0]*emaxs[0] |
+								//  p->normal[0]*emins[0]
+	fmuls	4(%ecx)				// p->normal[1]*emins[1] |
+								//  p->normal[1]*emaxs[1] |
+								//  p->normal[2] | p->normal[0]*emaxs[0] |
+								//  p->normal[0]*emins[0]
+	fxch	%st(2)				// p->normal[2] | p->normal[1]*emaxs[1] |
+								//  p->normal[1]*emins[1] |
+								//  p->normal[0]*emaxs[0] |
+								//  p->normal[0]*emins[0]
+	fld		%st(0)				// p->normal[2] | p->normal[2] |
+								//  p->normal[1]*emaxs[1] |
+								//  p->normal[1]*emins[1] |
+								//  p->normal[0]*emaxs[0] |
+								//  p->normal[0]*emins[0]
+	fmuls	8(%ebx)				// p->normal[2]*emaxs[2] |
+								//  p->normal[2] |
+								//  p->normal[1]*emaxs[1] |
+								//  p->normal[1]*emins[1] |
+								//  p->normal[0]*emaxs[0] |
+								//  p->normal[0]*emins[0]
+	fxch	%st(5)				// p->normal[0]*emins[0] |
+								//  p->normal[2] |
+								//  p->normal[1]*emaxs[1] |
+								//  p->normal[1]*emins[1] |
+								//  p->normal[0]*emaxs[0] |
+								//  p->normal[2]*emaxs[2]
+	faddp	%st(0),%st(3)		//p->normal[2] |
+								// p->normal[1]*emaxs[1] |
+								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+								// p->normal[0]*emaxs[0] |
+								// p->normal[2]*emaxs[2]
+	fmuls	8(%ecx)				//p->normal[2]*emins[2] |
+								// p->normal[1]*emaxs[1] |
+								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+								// p->normal[0]*emaxs[0] |
+								// p->normal[2]*emaxs[2]
+	fxch	%st(1)				//p->normal[1]*emaxs[1] |
+								// p->normal[2]*emins[2] |
+								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+								// p->normal[0]*emaxs[0] |
+								// p->normal[2]*emaxs[2]
+	faddp	%st(0),%st(3)		//p->normal[2]*emins[2] |
+								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+								// p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
+								// p->normal[2]*emaxs[2]
+	fxch	%st(3)				//p->normal[2]*emaxs[2] +
+								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+								// p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
+								// p->normal[2]*emins[2]
+	faddp	%st(0),%st(2)		//p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+								// dist1 | p->normal[2]*emins[2]
+
+	jmp		LSetSides
+
+//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
+//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
+Lcase1:
+	fmuls	(%ecx)				// emins[0]
+	flds	pl_normal+4(%edx)
+	fxch	%st(2)
+	fmuls	(%ebx)				// emaxs[0]
+	fxch	%st(2)
+	fld		%st(0)
+	fmuls	4(%ebx)				// emaxs[1]
+	flds	pl_normal+8(%edx)
+	fxch	%st(2)
+	fmuls	4(%ecx)				// emins[1]
+	fxch	%st(2)
+	fld		%st(0)
+	fmuls	8(%ebx)				// emaxs[2]
+	fxch	%st(5)
+	faddp	%st(0),%st(3)
+	fmuls	8(%ecx)				// emins[2]
+	fxch	%st(1)
+	faddp	%st(0),%st(3)
+	fxch	%st(3)
+	faddp	%st(0),%st(2)
+
+	jmp		LSetSides
+
+//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
+//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
+Lcase2:
+	fmuls	(%ebx)				// emaxs[0]
+	flds	pl_normal+4(%edx)
+	fxch	%st(2)
+	fmuls	(%ecx)				// emins[0]
+	fxch	%st(2)
+	fld		%st(0)
+	fmuls	4(%ecx)				// emins[1]
+	flds	pl_normal+8(%edx)
+	fxch	%st(2)
+	fmuls	4(%ebx)				// emaxs[1]
+	fxch	%st(2)
+	fld		%st(0)
+	fmuls	8(%ebx)				// emaxs[2]
+	fxch	%st(5)
+	faddp	%st(0),%st(3)
+	fmuls	8(%ecx)				// emins[2]
+	fxch	%st(1)
+	faddp	%st(0),%st(3)
+	fxch	%st(3)
+	faddp	%st(0),%st(2)
+
+	jmp		LSetSides
+
+//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
+//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
+Lcase3:
+	fmuls	(%ecx)				// emins[0]
+	flds	pl_normal+4(%edx)
+	fxch	%st(2)
+	fmuls	(%ebx)				// emaxs[0]
+	fxch	%st(2)
+	fld		%st(0)
+	fmuls	4(%ecx)				// emins[1]
+	flds	pl_normal+8(%edx)
+	fxch	%st(2)
+	fmuls	4(%ebx)				// emaxs[1]
+	fxch	%st(2)
+	fld		%st(0)
+	fmuls	8(%ebx)				// emaxs[2]
+	fxch	%st(5)
+	faddp	%st(0),%st(3)
+	fmuls	8(%ecx)				// emins[2]
+	fxch	%st(1)
+	faddp	%st(0),%st(3)
+	fxch	%st(3)
+	faddp	%st(0),%st(2)
+
+	jmp		LSetSides
+
+//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
+//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
+Lcase4:
+	fmuls	(%ebx)				// emaxs[0]
+	flds	pl_normal+4(%edx)
+	fxch	%st(2)
+	fmuls	(%ecx)				// emins[0]
+	fxch	%st(2)
+	fld		%st(0)
+	fmuls	4(%ebx)				// emaxs[1]
+	flds	pl_normal+8(%edx)
+	fxch	%st(2)
+	fmuls	4(%ecx)				// emins[1]
+	fxch	%st(2)
+	fld		%st(0)
+	fmuls	8(%ecx)				// emins[2]
+	fxch	%st(5)
+	faddp	%st(0),%st(3)
+	fmuls	8(%ebx)				// emaxs[2]
+	fxch	%st(1)
+	faddp	%st(0),%st(3)
+	fxch	%st(3)
+	faddp	%st(0),%st(2)
+
+	jmp		LSetSides
+
+//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
+//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
+Lcase5:
+	fmuls	(%ecx)				// emins[0]
+	flds	pl_normal+4(%edx)
+	fxch	%st(2)
+	fmuls	(%ebx)				// emaxs[0]
+	fxch	%st(2)
+	fld		%st(0)
+	fmuls	4(%ebx)				// emaxs[1]
+	flds	pl_normal+8(%edx)
+	fxch	%st(2)
+	fmuls	4(%ecx)				// emins[1]
+	fxch	%st(2)
+	fld		%st(0)
+	fmuls	8(%ecx)				// emins[2]
+	fxch	%st(5)
+	faddp	%st(0),%st(3)
+	fmuls	8(%ebx)				// emaxs[2]
+	fxch	%st(1)
+	faddp	%st(0),%st(3)
+	fxch	%st(3)
+	faddp	%st(0),%st(2)
+
+	jmp		LSetSides
+
+//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
+//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
+Lcase6:
+	fmuls	(%ebx)				// emaxs[0]
+	flds	pl_normal+4(%edx)
+	fxch	%st(2)
+	fmuls	(%ecx)				// emins[0]
+	fxch	%st(2)
+	fld		%st(0)
+	fmuls	4(%ecx)				// emins[1]
+	flds	pl_normal+8(%edx)
+	fxch	%st(2)
+	fmuls	4(%ebx)				// emaxs[1]
+	fxch	%st(2)
+	fld		%st(0)
+	fmuls	8(%ecx)				// emins[2]
+	fxch	%st(5)
+	faddp	%st(0),%st(3)
+	fmuls	8(%ebx)				// emaxs[2]
+	fxch	%st(1)
+	faddp	%st(0),%st(3)
+	fxch	%st(3)
+	faddp	%st(0),%st(2)
+
+	jmp		LSetSides
+
+//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
+//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
+Lcase7:
+	fmuls	(%ecx)				// emins[0]
+	flds	pl_normal+4(%edx)
+	fxch	%st(2)
+	fmuls	(%ebx)				// emaxs[0]
+	fxch	%st(2)
+	fld		%st(0)
+	fmuls	4(%ecx)				// emins[1]
+	flds	pl_normal+8(%edx)
+	fxch	%st(2)
+	fmuls	4(%ebx)				// emaxs[1]
+	fxch	%st(2)
+	fld		%st(0)
+	fmuls	8(%ecx)				// emins[2]
+	fxch	%st(5)
+	faddp	%st(0),%st(3)
+	fmuls	8(%ebx)				// emaxs[2]
+	fxch	%st(1)
+	faddp	%st(0),%st(3)
+	fxch	%st(3)
+	faddp	%st(0),%st(2)
+
+LSetSides:
+
+//	sides = 0;
+//	if (dist1 >= p->dist)
+//		sides = 1;
+//	if (dist2 < p->dist)
+//		sides |= 2;
+
+	faddp	%st(0),%st(2)		// dist1 | dist2
+	fcomps	pl_dist(%edx)
+	xorl	%ecx,%ecx
+	fnstsw	%ax
+	fcomps	pl_dist(%edx)
+	andb	$1,%ah
+	xorb	$1,%ah
+	addb	%ah,%cl
+
+	fnstsw	%ax
+	andb	$1,%ah
+	addb	%ah,%ah
+	addb	%ah,%cl
+
+//	return sides;
+
+	popl	%ebx
+	movl	%ecx,%eax	// return status
+
+	ret
+
+
+Lerror:
+	call	C(BOPS_Error)
+
+#endif	// id386
--- /dev/null
+++ b/u/quakeasm.h
@@ -1,0 +1,248 @@
+//
+// quakeasm.h: general asm header file
+//
+
+//#define GLQUAKE	1
+
+#ifdef __i386__
+#define id386
+#endif
+
+// !!! must be kept the same as in d_iface.h !!!
+#define TRANSPARENT_COLOR	255
+
+#ifndef GLQUAKE
+	.extern C(d_zistepu)
+	.extern C(d_pzbuffer)
+	.extern C(d_zistepv)
+	.extern C(d_zrowbytes)
+	.extern C(d_ziorigin)
+	.extern C(r_turb_s)
+	.extern C(r_turb_t)
+	.extern C(r_turb_pdest)
+	.extern C(r_turb_spancount)
+	.extern C(r_turb_turb)
+	.extern C(r_turb_pbase)
+	.extern C(r_turb_sstep)
+	.extern C(r_turb_tstep)
+	.extern	C(r_bmodelactive)
+	.extern	C(d_sdivzstepu)
+	.extern	C(d_tdivzstepu)
+	.extern	C(d_sdivzstepv)
+	.extern	C(d_tdivzstepv)
+	.extern	C(d_sdivzorigin)
+	.extern	C(d_tdivzorigin)
+	.extern	C(sadjust)
+	.extern	C(tadjust)
+	.extern	C(bbextents)
+	.extern	C(bbextentt)
+	.extern	C(cacheblock)
+	.extern	C(d_viewbuffer)
+	.extern	C(cachewidth)
+	.extern	C(d_pzbuffer)
+	.extern	C(d_zrowbytes)
+	.extern	C(d_zwidth)
+	.extern C(d_scantable)
+	.extern C(r_lightptr)
+	.extern C(r_numvblocks)
+	.extern C(prowdestbase)
+	.extern C(pbasesource)
+	.extern C(r_lightwidth)
+	.extern C(lightright)
+	.extern C(lightrightstep)
+	.extern C(lightdeltastep)
+	.extern C(lightdelta)
+	.extern C(lightright)
+	.extern C(lightdelta)
+	.extern C(sourcetstep)
+	.extern C(surfrowbytes)
+	.extern C(lightrightstep)
+	.extern C(lightdeltastep)
+	.extern C(r_sourcemax)
+	.extern C(r_stepback)
+	.extern C(colormap)
+	.extern C(blocksize)
+	.extern C(sourcesstep)
+	.extern C(lightleft)
+	.extern C(blockdivshift)
+	.extern C(blockdivmask)
+	.extern C(lightleftstep)
+	.extern C(r_origin)
+	.extern C(r_ppn)
+	.extern C(r_pup)
+	.extern C(r_pright)
+	.extern C(ycenter)
+	.extern C(xcenter)
+	.extern C(d_vrectbottom_particle)
+	.extern C(d_vrectright_particle)
+	.extern C(d_vrecty)
+	.extern C(d_vrectx)
+	.extern C(d_pix_shift)
+	.extern C(d_pix_min)
+	.extern C(d_pix_max)
+	.extern C(d_y_aspect_shift)
+	.extern C(screenwidth)
+	.extern C(r_leftclipped)
+	.extern C(r_leftenter)
+	.extern C(r_rightclipped)
+	.extern C(r_rightenter)
+	.extern C(modelorg)
+	.extern C(xscale)
+	.extern C(r_refdef)
+	.extern C(yscale)
+	.extern C(r_leftexit)
+	.extern C(r_rightexit)
+	.extern C(r_lastvertvalid)
+	.extern C(cacheoffset)
+	.extern C(newedges)
+	.extern C(removeedges)
+	.extern C(r_pedge)
+	.extern C(r_framecount)
+	.extern C(r_u1)
+	.extern C(r_emitted)
+	.extern C(edge_p)
+	.extern C(surface_p)
+	.extern C(surfaces)
+	.extern C(r_lzi1)
+	.extern C(r_v1)
+	.extern C(r_ceilv1)
+	.extern C(r_nearzi)
+	.extern C(r_nearzionly)
+	.extern C(edge_aftertail)
+	.extern C(edge_tail)
+	.extern C(current_iv)
+	.extern C(edge_head_u_shift20)
+	.extern C(span_p)
+	.extern C(edge_head)
+	.extern C(fv)
+	.extern C(edge_tail_u_shift20)
+	.extern C(r_apverts)
+	.extern C(r_anumverts)
+	.extern C(aliastransform)
+	.extern C(r_avertexnormals)
+	.extern C(r_plightvec)
+	.extern C(r_ambientlight)
+	.extern C(r_shadelight)
+	.extern C(aliasxcenter)
+	.extern C(aliasycenter)
+	.extern C(a_sstepxfrac)
+	.extern C(r_affinetridesc)
+	.extern C(acolormap)
+	.extern C(d_pcolormap)
+	.extern C(r_affinetridesc)
+	.extern C(d_sfrac)
+	.extern C(d_ptex)
+	.extern C(d_pedgespanpackage)
+	.extern C(d_tfrac)
+	.extern C(d_light)
+	.extern C(d_zi)
+	.extern C(d_pdest)
+	.extern C(d_pz)
+	.extern C(d_aspancount)
+	.extern C(erroradjustup)
+	.extern C(errorterm)
+	.extern C(d_xdenom)
+	.extern C(r_p0)
+	.extern C(r_p1)
+	.extern C(r_p2)
+	.extern C(a_tstepxfrac)
+	.extern C(r_sstepx)
+	.extern C(r_tstepx)
+	.extern C(a_ststepxwhole)
+	.extern C(zspantable)
+	.extern C(skintable)
+	.extern C(r_zistepx)
+	.extern C(erroradjustdown)
+	.extern C(d_countextrastep)
+	.extern C(ubasestep)
+	.extern C(a_ststepxwhole)
+	.extern C(a_tstepxfrac)
+	.extern C(r_lstepx)
+	.extern C(a_spans)
+	.extern C(erroradjustdown)
+	.extern C(d_pdestextrastep)
+	.extern C(d_pzextrastep)
+	.extern C(d_sfracextrastep)
+	.extern C(d_ptexextrastep)
+	.extern C(d_countextrastep)
+	.extern C(d_tfracextrastep)
+	.extern C(d_lightextrastep)
+	.extern C(d_ziextrastep)
+	.extern C(d_pdestbasestep)
+	.extern C(d_pzbasestep)
+	.extern C(d_sfracbasestep)
+	.extern C(d_ptexbasestep)
+	.extern C(ubasestep)
+	.extern C(d_tfracbasestep)
+	.extern C(d_lightbasestep)
+	.extern C(d_zibasestep)
+	.extern C(zspantable)
+	.extern C(r_lstepy)
+	.extern C(r_sstepy)
+	.extern C(r_tstepy)
+	.extern C(r_zistepy)
+	.extern C(D_PolysetSetEdgeTable)
+	.extern C(D_RasterizeAliasPolySmooth)
+
+	.extern float_point5
+	.extern Float2ToThe31nd
+	.extern izistep
+	.extern izi
+	.extern FloatMinus2ToThe31nd
+	.extern float_1
+	.extern float_particle_z_clip
+	.extern float_minus_1
+	.extern float_0
+	.extern fp_16
+	.extern fp_64k
+	.extern fp_1m
+	.extern fp_1m_minus_1
+	.extern fp_8 
+	.extern entryvec_table
+	.extern advancetable
+	.extern sstep
+	.extern tstep
+	.extern pspantemp
+	.extern counttemp
+	.extern jumptemp
+	.extern reciprocal_table
+	.extern DP_Count
+	.extern DP_u
+	.extern DP_v
+	.extern DP_32768
+	.extern DP_Color
+	.extern DP_Pix
+	.extern DP_EntryTable
+	.extern	pbase
+	.extern s
+	.extern t
+	.extern sfracf
+	.extern tfracf
+	.extern snext
+	.extern tnext
+	.extern	spancountminus1
+	.extern zi16stepu
+	.extern sdivz16stepu
+	.extern tdivz16stepu
+	.extern	zi8stepu
+	.extern sdivz8stepu
+	.extern tdivz8stepu
+	.extern reciprocal_table_16
+	.extern entryvec_table_16
+	.extern ceil_cw
+	.extern single_cw
+	.extern fp_64kx64k
+	.extern pz
+	.extern spr8entryvec_table
+#endif
+
+	.extern C(snd_scaletable)
+	.extern C(paintbuffer)
+	.extern C(snd_linear_count)
+	.extern C(snd_p)
+	.extern C(snd_vol)
+	.extern C(snd_out)
+	.extern C(vright)
+	.extern C(vup)
+	.extern C(vpn)
+	.extern C(BOPS_Error)
--- /dev/null
+++ b/u/r_aclipa.s
@@ -1,0 +1,197 @@
+//
+// r_aliasa.s
+// x86 assembly-language Alias model transform and project code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+	.data
+Ltemp0:	.long	0
+Ltemp1:	.long	0
+
+	.text
+
+#define pfv0		8+4
+#define pfv1		8+8
+#define out			8+12
+
+.globl C(R_Alias_clip_bottom)
+C(R_Alias_clip_bottom):
+	pushl	%esi
+	pushl	%edi
+
+	movl	pfv0(%esp),%esi
+	movl	pfv1(%esp),%edi
+
+	movl	C(r_refdef)+rd_aliasvrectbottom,%eax
+
+LDoForwardOrBackward:
+
+	movl	fv_v+4(%esi),%edx
+	movl	fv_v+4(%edi),%ecx
+
+	cmpl	%ecx,%edx
+	jl		LDoForward
+
+	movl	fv_v+4(%esi),%ecx
+	movl	fv_v+4(%edi),%edx
+	movl	pfv0(%esp),%edi
+	movl	pfv1(%esp),%esi
+
+LDoForward:
+
+	subl	%edx,%ecx
+	subl	%edx,%eax
+	movl	%ecx,Ltemp1
+	movl	%eax,Ltemp0
+	fildl	Ltemp1
+	fildl	Ltemp0
+	movl	out(%esp),%edx
+	movl	$2,%eax
+
+	fdivp	%st(0),%st(1)					// scale
+
+LDo3Forward:
+	fildl	fv_v+0(%esi)	// fv0v0 | scale
+	fildl	fv_v+0(%edi)	// fv1v0 | fv0v0 | scale
+	fildl	fv_v+4(%esi)	// fv0v1 | fv1v0 | fv0v0 | scale
+	fildl	fv_v+4(%edi)	// fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale
+	fildl	fv_v+8(%esi)	// fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale
+	fildl	fv_v+8(%edi)	// fv1v2 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 |
+							//  scale
+	fxch	%st(5)			// fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv1v2 |
+							//  scale
+	fsubr	%st(0),%st(4)	// fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0-fv0v0 |
+							//  fv1v2 | scale
+	fxch	%st(3)			// fv0v1 | fv0v2 | fv1v1 | fv0v0 | fv1v0-fv0v0 |
+							//  fv1v2 | scale
+	fsubr	%st(0),%st(2)	// fv0v1 | fv0v2 | fv1v1-fv0v1 | fv0v0 |
+							//  fv1v0-fv0v0 | fv1v2 | scale
+	fxch	%st(1)			// fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 |
+							//  fv1v0-fv0v0 | fv1v2 | scale
+	fsubr	%st(0),%st(5)	// fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 |
+							//  fv1v0-fv0v0 | fv1v2-fv0v2 | scale
+	fxch	%st(6)			// scale | fv0v1 | fv1v1-fv0v1 | fv0v0 |
+							//  fv1v0-fv0v0 | fv1v2-fv0v2 | fv0v2
+	fmul	%st(0),%st(4)	// scale | fv0v1 | fv1v1-fv0v1 | fv0v0 |
+							//  (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2
+	addl	$12,%edi
+	fmul	%st(0),%st(2)	// scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 |
+							//  (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2
+	addl	$12,%esi
+	addl	$12,%edx
+	fmul	%st(0),%st(5)	// scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 |
+							//  (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale |
+							//  fv0v2
+	fxch	%st(3)			// fv0v0 | fv0v1 | (fv1v1-fv0v1)*scale | scale |
+							//  (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale |
+							//  fv0v2
+	faddp	%st(0),%st(4)	// fv0v1 | (fv1v1-fv0v1)*scale | scale |
+							//  fv0v0+(fv1v0-fv0v0)*scale |
+							//  (fv1v2-fv0v2)*scale | fv0v2
+	faddp	%st(0),%st(1)	// fv0v1+(fv1v1-fv0v1)*scale | scale |
+							//  fv0v0+(fv1v0-fv0v0)*scale |
+							//  (fv1v2-fv0v2)*scale | fv0v2
+	fxch	%st(4)			// fv0v2 | scale | fv0v0+(fv1v0-fv0v0)*scale |
+							//  (fv1v2-fv0v2)*scale | fv0v1+(fv1v1-fv0v1)*scale
+	faddp	%st(0),%st(3)	// scale | fv0v0+(fv1v0-fv0v0)*scale |
+							//  fv0v2+(fv1v2-fv0v2)*scale |
+							//  fv0v1+(fv1v1-fv0v1)*scale
+	fxch	%st(1)			// fv0v0+(fv1v0-fv0v0)*scale | scale | 
+							//  fv0v2+(fv1v2-fv0v2)*scale |
+							//  fv0v1+(fv1v1-fv0v1)*scale
+	fadds	float_point5
+	fxch	%st(3)			// fv0v1+(fv1v1-fv0v1)*scale | scale | 
+							//  fv0v2+(fv1v2-fv0v2)*scale |
+							//  fv0v0+(fv1v0-fv0v0)*scale
+	fadds	float_point5
+	fxch	%st(2)			// fv0v2+(fv1v2-fv0v2)*scale | scale | 
+							//  fv0v1+(fv1v1-fv0v1)*scale |
+							//  fv0v0+(fv1v0-fv0v0)*scale
+	fadds	float_point5
+	fxch	%st(3)			// fv0v0+(fv1v0-fv0v0)*scale | scale | 
+							//  fv0v1+(fv1v1-fv0v1)*scale |
+							//  fv0v2+(fv1v2-fv0v2)*scale
+	fistpl	fv_v+0-12(%edx)	// scale | fv0v1+(fv1v1-fv0v1)*scale |
+							//  fv0v2+(fv1v2-fv0v2)*scale
+	fxch	%st(1)			// fv0v1+(fv1v1-fv0v1)*scale | scale |
+							//  fv0v2+(fv1v2-fv0v2)*scale | scale
+	fistpl	fv_v+4-12(%edx)	// scale | fv0v2+(fv1v2-fv0v2)*scale
+	fxch	%st(1)			// fv0v2+(fv1v2-fv0v2)*sc | scale
+	fistpl	fv_v+8-12(%edx)	// scale
+
+	decl	%eax
+	jnz		LDo3Forward
+
+	fstp	%st(0)
+
+	popl	%edi
+	popl	%esi
+
+	ret
+
+
+.globl C(R_Alias_clip_top)
+C(R_Alias_clip_top):
+	pushl	%esi
+	pushl	%edi
+
+	movl	pfv0(%esp),%esi
+	movl	pfv1(%esp),%edi
+
+	movl	C(r_refdef)+rd_aliasvrect+4,%eax
+	jmp		LDoForwardOrBackward
+
+
+
+.globl C(R_Alias_clip_right)
+C(R_Alias_clip_right):
+	pushl	%esi
+	pushl	%edi
+
+	movl	pfv0(%esp),%esi
+	movl	pfv1(%esp),%edi
+
+	movl	C(r_refdef)+rd_aliasvrectright,%eax
+
+LRightLeftEntry:
+
+
+	movl	fv_v+4(%esi),%edx
+	movl	fv_v+4(%edi),%ecx
+
+	cmpl	%ecx,%edx
+	movl	fv_v+0(%esi),%edx
+
+	movl	fv_v+0(%edi),%ecx
+	jl		LDoForward2
+
+	movl	fv_v+0(%esi),%ecx
+	movl	fv_v+0(%edi),%edx
+	movl	pfv0(%esp),%edi
+	movl	pfv1(%esp),%esi
+
+LDoForward2:
+
+	jmp		LDoForward
+
+
+.globl C(R_Alias_clip_left)
+C(R_Alias_clip_left):
+	pushl	%esi
+	pushl	%edi
+
+	movl	pfv0(%esp),%esi
+	movl	pfv1(%esp),%edi
+
+	movl	C(r_refdef)+rd_aliasvrect+0,%eax
+	jmp		LRightLeftEntry
+
+
+#endif	// id386
+
--- /dev/null
+++ b/u/r_aliasa.s
@@ -1,0 +1,218 @@
+//
+// r_aliasa.s
+// x86 assembly-language Alias model transform and project code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+	.data
+
+Lfloat_1:	.single	1.0
+Ltemp:		.long	0
+Lcoords:	.long	0, 0, 0
+
+	.text
+
+#define fv			12+4
+#define pstverts	12+8
+
+.globl C(R_AliasTransformAndProjectFinalVerts)
+C(R_AliasTransformAndProjectFinalVerts):
+	pushl	%ebp				// preserve caller's stack frame
+	pushl	%edi
+	pushl	%esi				// preserve register variables
+
+//	int			i, temp;
+//	float		lightcos, *plightnormal, zi;
+//	trivertx_t	*pverts;
+
+//	pverts = r_apverts;
+	movl	C(r_apverts),%esi
+
+//	for (i=0 ; i<r_anumverts ; i++, fv++, pverts++, pstverts++)
+//	{
+	movl	pstverts(%esp),%ebp
+	movl	fv(%esp),%edi
+	movl	C(r_anumverts),%ecx
+	subl	%edx,%edx
+
+Lloop:
+
+//	// transform and project
+//		zi = 1.0 / (DotProduct(pverts->v, aliastransform[2]) +
+//				aliastransform[2][3]);
+	movb	(%esi),%dl
+	movb	%dl,Lcoords
+	fildl	Lcoords				// v[0]
+	movb	1(%esi),%dl
+	movb	%dl,Lcoords+4
+	fildl	Lcoords+4			// v[1] | v[0]
+	movb	2(%esi),%dl	
+	movb	%dl,Lcoords+8
+	fildl	Lcoords+8			// v[2] | v[1] | v[0]
+
+	fld		%st(2)				// v[0] | v[2] | v[1] | v[0]
+	fmuls	C(aliastransform)+32 // accum | v[2] | v[1] | v[0]
+	fld		%st(2)				// v[1] | accum | v[2] | v[1] | v[0]
+	fmuls	C(aliastransform)+36 // accum2 | accum | v[2] | v[1] | v[0]
+	fxch	%st(1)				// accum | accum2 | v[2] | v[1] | v[0]
+	fadds	C(aliastransform)+44 // accum | accum2 | v[2] | v[1] | v[0]
+	fld		%st(2)				// v[2] | accum | accum2 | v[2] | v[1] | v[0]
+	fmuls	C(aliastransform)+40 // accum3 | accum | accum2 | v[2] | v[1] |
+								 //  v[0]
+	fxch	%st(1)				// accum | accum3 | accum2 | v[2] | v[1] | v[0]
+	faddp	%st(0),%st(2)		// accum3 | accum | v[2] | v[1] | v[0]
+	movb	tv_lightnormalindex(%esi),%dl
+	movl	stv_s(%ebp),%eax
+	movl	%eax,fv_v+8(%edi)
+	faddp	%st(0),%st(1)		// z | v[2] | v[1] | v[0]
+
+	movl	stv_t(%ebp),%eax
+	movl	%eax,fv_v+12(%edi)
+
+//	// lighting
+//		plightnormal = r_avertexnormals[pverts->lightnormalindex];
+
+	fdivrs	Lfloat_1			// zi | v[2] | v[1] | v[0]
+
+//		fv->v[2] = pstverts->s;
+//		fv->v[3] = pstverts->t;
+//		fv->flags = pstverts->onseam;
+	movl	stv_onseam(%ebp),%eax
+	movl	%eax,fv_flags(%edi)
+
+	movl	fv_size(%edi),%eax
+	movl	stv_size(%ebp),%eax
+	movl	4(%esi),%eax
+
+	leal	(%edx,%edx,2),%eax	// index*3
+
+	fxch	%st(3)				// v[0] | v[2] | v[1] | zi
+
+//		lightcos = DotProduct (plightnormal, r_plightvec);
+	flds	C(r_avertexnormals)(,%eax,4)
+	fmuls	C(r_plightvec)
+	flds	C(r_avertexnormals)+4(,%eax,4)
+	fmuls	C(r_plightvec)+4
+	flds	C(r_avertexnormals)+8(,%eax,4)
+	fmuls	C(r_plightvec)+8
+	fxch	%st(1)
+	faddp	%st(0),%st(2)
+	fld		%st(2)				 // v[0] | laccum | laccum2 | v[0] | v[2] |
+								 //  v[1] | zi
+	fmuls	C(aliastransform)+0  // xaccum | laccum | laccum2 | v[0] | v[2] |
+								 //  v[1] | zi
+	fxch	%st(2)				 // laccum2 | laccum | xaccum | v[0] | v[2] |
+								 //  v[1] | zi
+	faddp	%st(0),%st(1)		 // laccum | xaccum | v[0] | v[2] | v[1] | zi
+
+//		temp = r_ambientlight;
+//		if (lightcos < 0)
+//		{
+	fsts	Ltemp
+	movl	C(r_ambientlight),%eax
+	movb	Ltemp+3,%dl
+	testb	$0x80,%dl
+	jz		Lsavelight	// no need to clamp if only ambient lit, because
+						//  r_ambientlight is preclamped
+
+//			temp += (int)(r_shadelight * lightcos);
+	fmuls	C(r_shadelight)
+// FIXME: fast float->int conversion?
+	fistpl	Ltemp
+	addl	Ltemp,%eax
+
+//		// clamp; because we limited the minimum ambient and shading light, we
+//		// don't have to clamp low light, just bright
+//			if (temp < 0)
+//				temp = 0;
+	jns		Lp1
+	subl	%eax,%eax
+
+//		}
+
+Lp1:
+
+//		fv->v[4] = temp;
+//
+//	// x, y, and z are scaled down by 1/2**31 in the transform, so 1/z is
+//	// scaled up by 1/2**31, and the scaling cancels out for x and y in the
+//	// projection
+//		fv->v[0] = ((DotProduct(pverts->v, aliastransform[0]) +
+//				aliastransform[0][3]) * zi) + aliasxcenter;
+//		fv->v[1] = ((DotProduct(pverts->v, aliastransform[1]) +
+//				aliastransform[1][3]) * zi) + aliasycenter;
+//		fv->v[5] = zi;
+	fxch	%st(1)				 // v[0] | xaccum | v[2] | v[1] | zi
+	fmuls	C(aliastransform)+16 // yaccum | xaccum | v[2] | v[1] | zi
+	fxch	%st(3)				 // v[1] | xaccum | v[2] | yaccum | zi
+	fld		%st(0)				 // v[1] | v[1] | xaccum | v[2] | yaccum | zi
+	fmuls	C(aliastransform)+4	 // xaccum2 | v[1] | xaccum | v[2] | yaccum |zi
+	fxch	%st(1)				 // v[1] | xaccum2 | xaccum | v[2] | yaccum |zi
+	movl	%eax,fv_v+16(%edi)
+	fmuls	C(aliastransform)+20 // yaccum2 | xaccum2 | xaccum | v[2] | yaccum|
+								 //  zi
+	fxch	%st(2)				 // xaccum | xaccum2 | yaccum2 | v[2] | yaccum|
+								 //  zi
+	fadds	C(aliastransform)+12 // xaccum | xaccum2 | yaccum2 | v[2] | yaccum|
+								 //  zi
+	fxch	%st(4)				 // yaccum | xaccum2 | yaccum2 | v[2] | xaccum|
+								 //  zi
+	fadds	C(aliastransform)+28 // yaccum | xaccum2 | yaccum2 | v[2] | xaccum|
+								 //  zi
+	fxch	%st(3)				 // v[2] | xaccum2 | yaccum2 | yaccum | xaccum|
+								 //  zi
+	fld		%st(0)				 // v[2] | v[2] | xaccum2 | yaccum2 | yaccum |
+								 //  xaccum | zi
+	fmuls	C(aliastransform)+8	 // xaccum3 | v[2] | xaccum2 | yaccum2 |yaccum|
+								 //  xaccum | zi
+	fxch	%st(1)				 // v[2] | xaccum3 | xaccum2 | yaccum2 |yaccum|
+								 //  xaccum | zi
+	fmuls	C(aliastransform)+24 // yaccum3 | xaccum3 | xaccum2 | yaccum2 |
+								 // yaccum | xaccum | zi
+	fxch	%st(5)				 // xaccum | xaccum3 | xaccum2 | yaccum2 |
+								 // yaccum | yaccum3 | zi
+	faddp	%st(0),%st(2)		 // xaccum3 | xaccum | yaccum2 | yaccum |
+								 //  yaccum3 | zi
+	fxch	%st(3)				 // yaccum | xaccum | yaccum2 | xaccum3 |
+								 //  yaccum3 | zi
+	faddp	%st(0),%st(2)		 // xaccum | yaccum | xaccum3 | yaccum3 | zi
+	addl	$(tv_size),%esi
+	faddp	%st(0),%st(2)		 // yaccum | x | yaccum3 | zi
+	faddp	%st(0),%st(2)		 // x | y | zi
+	addl	$(stv_size),%ebp
+	fmul	%st(2),%st(0)		 // x/z | y | zi
+	fxch	%st(1)				 // y | x/z | zi
+	fmul	%st(2),%st(0)		 // y/z | x/z | zi
+	fxch	%st(1)				 // x/z | y/z | zi
+	fadds	C(aliasxcenter)		 // u | y/z | zi
+	fxch	%st(1)				 // y/z | u | zi
+	fadds	C(aliasycenter)		 // v | u | zi
+	fxch	%st(2)				 // zi | u | v
+// FIXME: fast float->int conversion?
+	fistpl	fv_v+20(%edi)		 // u | v
+	fistpl	fv_v+0(%edi)		 // v
+	fistpl	fv_v+4(%edi)
+
+//	}
+
+	addl	$(fv_size),%edi
+	decl	%ecx
+	jnz		Lloop
+
+	popl	%esi				// restore register variables
+	popl	%edi
+	popl	%ebp				// restore the caller's stack frame
+	ret
+
+Lsavelight:
+	fstp	%st(0)
+	jmp		Lp1
+
+#endif	// id386
+
--- /dev/null
+++ b/u/r_drawa.s
@@ -1,0 +1,819 @@
+//
+// r_drawa.s
+// x86 assembly-language edge clipping and emission code
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef	id386
+
+// !!! if these are changed, they must be changed in r_draw.c too !!!
+#define FULLY_CLIPPED_CACHED	0x80000000
+#define FRAMECOUNT_MASK			0x7FFFFFFF
+
+	.data
+
+Ld0:			.single		0.0
+Ld1:			.single		0.0
+Lstack:			.long		0
+Lfp_near_clip:	.single		NEAR_CLIP
+Lceilv0:		.long		0
+Lv:				.long		0
+Lu0:			.long		0
+Lv0:			.long		0
+Lzi0:			.long		0
+
+	.text
+
+//----------------------------------------------------------------------
+// edge clipping code
+//----------------------------------------------------------------------
+
+#define pv0		4+12
+#define pv1		8+12
+#define clip	12+12
+
+	.align 4
+.globl C(R_ClipEdge)
+C(R_ClipEdge):
+	pushl	%esi				// preserve register variables
+	pushl	%edi
+	pushl	%ebx
+	movl	%esp,Lstack			// for clearing the stack later
+
+//	float		d0, d1, f;
+//	mvertex_t	clipvert;
+
+	movl	clip(%esp),%ebx
+	movl	pv0(%esp),%esi
+	movl	pv1(%esp),%edx
+
+//	if (clip)
+//	{
+	testl	%ebx,%ebx
+	jz		Lemit
+
+//		do
+//		{
+
+Lcliploop:
+
+//			d0 = DotProduct (pv0->position, clip->normal) - clip->dist;
+//			d1 = DotProduct (pv1->position, clip->normal) - clip->dist;
+	flds	mv_position+0(%esi)
+	fmuls	cp_normal+0(%ebx)
+	flds	mv_position+4(%esi)
+	fmuls	cp_normal+4(%ebx)
+	flds	mv_position+8(%esi)
+	fmuls	cp_normal+8(%ebx)
+	fxch	%st(1)
+	faddp	%st(0),%st(2)		// d0mul2 | d0add0
+
+	flds	mv_position+0(%edx)
+	fmuls	cp_normal+0(%ebx)
+	flds	mv_position+4(%edx)
+	fmuls	cp_normal+4(%ebx)
+	flds	mv_position+8(%edx)
+	fmuls	cp_normal+8(%ebx)
+	fxch	%st(1)
+	faddp	%st(0),%st(2)		// d1mul2 | d1add0 | d0mul2 | d0add0
+	fxch	%st(3)				// d0add0 | d1add0 | d0mul2 | d1mul2
+
+	faddp	%st(0),%st(2)		// d1add0 | dot0 | d1mul2 
+	faddp	%st(0),%st(2)		// dot0 | dot1
+
+	fsubs	cp_dist(%ebx)		// d0 | dot1
+	fxch	%st(1)				// dot1 | d0
+	fsubs	cp_dist(%ebx)		// d1 | d0
+	fxch	%st(1)
+	fstps	Ld0
+	fstps	Ld1
+
+//			if (d0 >= 0)
+//			{
+	movl	Ld0,%eax
+	movl	Ld1,%ecx
+	orl		%eax,%ecx
+	js		Lp2
+
+// both points are unclipped
+
+Lcontinue:
+
+//
+//				R_ClipEdge (&clipvert, pv1, clip->next);
+//				return;
+//			}
+//		} while ((clip = clip->next) != NULL);
+	movl	cp_next(%ebx),%ebx
+	testl	%ebx,%ebx
+	jnz		Lcliploop
+
+//	}
+
+//// add the edge
+//	R_EmitEdge (pv0, pv1);
+Lemit:
+
+//
+// set integer rounding to ceil mode, set to single precision
+//
+// FIXME: do away with by manually extracting integers from floats?
+// FIXME: set less often
+	fldcw	ceil_cw
+
+//	edge_t	*edge, *pcheck;
+//	int		u_check;
+//	float	u, u_step;
+//	vec3_t	local, transformed;
+//	float	*world;
+//	int		v, v2, ceilv0;
+//	float	scale, lzi0, u0, v0;
+//	int		side;
+
+//	if (r_lastvertvalid)
+//	{
+	cmpl	$0,C(r_lastvertvalid)
+	jz		LCalcFirst
+
+//		u0 = r_u1;
+//		v0 = r_v1;
+//		lzi0 = r_lzi1;
+//		ceilv0 = r_ceilv1;
+	movl	C(r_lzi1),%eax
+	movl	C(r_u1),%ecx
+	movl	%eax,Lzi0
+	movl	%ecx,Lu0
+	movl	C(r_v1),%ecx
+	movl	C(r_ceilv1),%eax
+	movl	%ecx,Lv0
+	movl	%eax,Lceilv0
+	jmp		LCalcSecond
+
+//	}
+
+LCalcFirst:
+
+//	else
+//	{
+//		world = &pv0->position[0];
+
+	call	LTransformAndProject	// v0 | lzi0 | u0
+
+	fsts	Lv0
+	fxch	%st(2)					// u0 | lzi0 | v0
+	fstps	Lu0						// lzi0 | v0
+	fstps	Lzi0					// v0
+
+//		ceilv0 = (int)(v0 - 2000) + 2000; // ceil(v0);
+	fistpl	Lceilv0
+
+//	}
+
+LCalcSecond:
+
+//	world = &pv1->position[0];
+	movl	%edx,%esi
+
+	call	LTransformAndProject	// v1 | lzi1 | u1
+
+	flds	Lu0						// u0 | v1 | lzi1 | u1
+	fxch	%st(3)					// u1 | v1 | lzi1 | u0
+	flds	Lzi0					// lzi0 | u1 | v1 | lzi1 | u0
+	fxch	%st(3)					// lzi1 | u1 | v1 | lzi0 | u0
+	flds	Lv0						// v0 | lzi1 | u1 | v1 | lzi0 | u0
+	fxch	%st(3)					// v1 | lzi1 | u1 | v0 | lzi0 | u0
+
+//	r_ceilv1 = (int)(r_v1 - 2000) + 2000; // ceil(r_v1);
+	fistl	C(r_ceilv1)
+
+	fldcw	single_cw				// put back normal floating-point state
+
+	fsts	C(r_v1)
+	fxch	%st(4)					// lzi0 | lzi1 | u1 | v0 | v1 | u0
+
+//	if (r_lzi1 > lzi0)
+//		lzi0 = r_lzi1;
+	fcom	%st(1)
+	fnstsw	%ax
+	testb	$1,%ah
+	jz		LP0
+	fstp	%st(0)
+	fld		%st(0)
+LP0:
+
+	fxch	%st(1)					// lzi1 | lzi0 | u1 | v0 | v1 | u0
+	fstps	C(r_lzi1)				// lzi0 | u1 | v0 | v1 | u0
+	fxch	%st(1)
+	fsts	C(r_u1)
+	fxch	%st(1)
+
+//	if (lzi0 > r_nearzi)	// for mipmap finding
+//		r_nearzi = lzi0;
+	fcoms	C(r_nearzi)
+	fnstsw	%ax
+	testb	$0x45,%ah
+	jnz		LP1
+	fsts	C(r_nearzi)
+LP1:
+
+// // for right edges, all we want is the effect on 1/z
+//	if (r_nearzionly)
+//		return;
+	movl	C(r_nearzionly),%eax
+	testl	%eax,%eax
+	jz		LP2
+LPop5AndDone:
+	movl	C(cacheoffset),%eax
+	movl	C(r_framecount),%edx
+	cmpl	$0x7FFFFFFF,%eax
+	jz		LDoPop
+	andl	$(FRAMECOUNT_MASK),%edx
+	orl		$(FULLY_CLIPPED_CACHED),%edx
+	movl	%edx,C(cacheoffset)
+
+LDoPop:
+	fstp	%st(0)			// u1 | v0 | v1 | u0
+	fstp	%st(0)			// v0 | v1 | u0
+	fstp	%st(0)			// v1 | u0
+	fstp	%st(0)			// u0
+	fstp	%st(0)
+	jmp		Ldone
+
+LP2:
+
+// // create the edge
+//	if (ceilv0 == r_ceilv1)
+//		return;		// horizontal edge
+	movl	Lceilv0,%ebx
+	movl	C(edge_p),%edi
+	movl	C(r_ceilv1),%ecx
+	movl	%edi,%edx
+	movl	C(r_pedge),%esi
+	addl	$(et_size),%edx
+	cmpl	%ecx,%ebx
+	jz		LPop5AndDone
+
+	movl	C(r_pedge),%eax
+	movl	%eax,et_owner(%edi)
+
+//	side = ceilv0 > r_ceilv1;
+//
+//	edge->nearzi = lzi0;
+	fstps	et_nearzi(%edi)		// u1 | v0 | v1 | u0
+
+//	if (side == 1)
+//	{
+	jc		LSide0
+
+LSide1:
+
+//	// leading edge (go from p2 to p1)
+
+//		u_step = ((u0 - r_u1) / (v0 - r_v1));
+	fsubrp	%st(0),%st(3)		// v0 | v1 | u0-u1
+	fsub	%st(1),%st(0)		// v0-v1 | v1 | u0-u1
+	fdivrp	%st(0),%st(2)		// v1 | ustep
+
+//	r_emitted = 1;
+	movl	$1,C(r_emitted)
+
+//	edge = edge_p++;
+	movl	%edx,C(edge_p)
+
+// pretouch next edge
+	movl	(%edx),%eax
+
+//		v2 = ceilv0 - 1;
+//		v = r_ceilv1;
+	movl	%ecx,%eax
+	leal	-1(%ebx),%ecx
+	movl	%eax,%ebx
+
+//		edge->surfs[0] = 0;
+//		edge->surfs[1] = surface_p - surfaces;
+	movl	C(surface_p),%eax
+	movl	C(surfaces),%esi
+	subl	%edx,%edx
+	subl	%esi,%eax
+	shrl	$(SURF_T_SHIFT),%eax
+	movl	%edx,et_surfs(%edi)
+	movl	%eax,et_surfs+2(%edi)
+
+	subl	%esi,%esi
+
+//		u = r_u1 + ((float)v - r_v1) * u_step;
+	movl	%ebx,Lv
+	fildl	Lv					// v | v1 | ustep
+	fsubp	%st(0),%st(1)		// v-v1 | ustep
+	fmul	%st(1),%st(0)		// (v-v1)*ustep | ustep
+	fadds	C(r_u1)				// u | ustep
+
+	jmp		LSideDone
+
+//	}
+
+LSide0:
+
+//	else
+//	{
+//	// trailing edge (go from p1 to p2)
+
+//		u_step = ((r_u1 - u0) / (r_v1 - v0));
+	fsub	%st(3),%st(0)		// u1-u0 | v0 | v1 | u0
+	fxch	%st(2)				// v1 | v0 | u1-u0 | u0
+	fsub	%st(1),%st(0)		// v1-v0 | v0 | u1-u0 | u0
+	fdivrp	%st(0),%st(2)		// v0 | ustep | u0
+
+//	r_emitted = 1;
+	movl	$1,C(r_emitted)
+
+//	edge = edge_p++;
+	movl	%edx,C(edge_p)
+
+// pretouch next edge
+	movl	(%edx),%eax
+
+//		v = ceilv0;
+//		v2 = r_ceilv1 - 1;
+	decl	%ecx
+
+//		edge->surfs[0] = surface_p - surfaces;
+//		edge->surfs[1] = 0;
+	movl	C(surface_p),%eax
+	movl	C(surfaces),%esi
+	subl	%edx,%edx
+	subl	%esi,%eax
+	shrl	$(SURF_T_SHIFT),%eax
+	movl	%edx,et_surfs+2(%edi)
+	movl	%eax,et_surfs(%edi)
+
+	movl	$1,%esi
+
+//		u = u0 + ((float)v - v0) * u_step;
+	movl	%ebx,Lv
+	fildl	Lv					// v | v0 | ustep | u0
+	fsubp	%st(0),%st(1)		// v-v0 | ustep | u0
+	fmul	%st(1),%st(0)		// (v-v0)*ustep | ustep | u0
+	faddp	%st(0),%st(2)		// ustep | u
+	fxch	%st(1)				// u | ustep
+
+//	}
+
+LSideDone:
+
+//	edge->u_step = u_step*0x100000;
+//	edge->u = u*0x100000 + 0xFFFFF;
+
+	fmuls	fp_1m				// u*0x100000 | ustep
+	fxch	%st(1)				// ustep | u*0x100000
+	fmuls	fp_1m				// ustep*0x100000 | u*0x100000
+	fxch	%st(1)				// u*0x100000 | ustep*0x100000
+	fadds	fp_1m_minus_1		// u*0x100000 + 0xFFFFF | ustep*0x100000
+	fxch	%st(1)				// ustep*0x100000 | u*0x100000 + 0xFFFFF
+	fistpl	et_u_step(%edi)		// u*0x100000 + 0xFFFFF
+	fistpl	et_u(%edi)
+
+// // we need to do this to avoid stepping off the edges if a very nearly
+// // horizontal edge is less than epsilon above a scan, and numeric error
+// // causes it to incorrectly extend to the scan, and the extension of the
+// // line goes off the edge of the screen
+// // FIXME: is this actually needed?
+//	if (edge->u < r_refdef.vrect_x_adj_shift20)
+//		edge->u = r_refdef.vrect_x_adj_shift20;
+//	if (edge->u > r_refdef.vrectright_adj_shift20)
+//		edge->u = r_refdef.vrectright_adj_shift20;
+	movl	et_u(%edi),%eax
+	movl	C(r_refdef)+rd_vrect_x_adj_shift20,%edx
+	cmpl	%edx,%eax
+	jl		LP4
+	movl	C(r_refdef)+rd_vrectright_adj_shift20,%edx
+	cmpl	%edx,%eax
+	jng		LP5
+LP4:
+	movl	%edx,et_u(%edi)
+	movl	%edx,%eax
+LP5:
+
+// // sort the edge in normally
+//	u_check = edge->u;
+//
+//	if (edge->surfs[0])
+//		u_check++;	// sort trailers after leaders
+	addl	%esi,%eax
+
+//	if (!newedges[v] || newedges[v]->u >= u_check)
+//	{
+	movl	C(newedges)(,%ebx,4),%esi
+	testl	%esi,%esi
+	jz		LDoFirst
+	cmpl	%eax,et_u(%esi)
+	jl		LNotFirst
+LDoFirst:
+
+//		edge->next = newedges[v];
+//		newedges[v] = edge;
+	movl	%esi,et_next(%edi)
+	movl	%edi,C(newedges)(,%ebx,4)
+
+	jmp		LSetRemove
+
+//	}
+
+LNotFirst:
+
+//	else
+//	{
+//		pcheck = newedges[v];
+//
+//		while (pcheck->next && pcheck->next->u < u_check)
+//			pcheck = pcheck->next;
+LFindInsertLoop:
+	movl	%esi,%edx
+	movl	et_next(%esi),%esi
+	testl	%esi,%esi
+	jz		LInsertFound
+	cmpl	%eax,et_u(%esi)
+	jl		LFindInsertLoop
+
+LInsertFound:
+
+//		edge->next = pcheck->next;
+//		pcheck->next = edge;
+	movl	%esi,et_next(%edi)
+	movl	%edi,et_next(%edx)
+
+//	}
+
+LSetRemove:
+
+//	edge->nextremove = removeedges[v2];
+//	removeedges[v2] = edge;
+	movl	C(removeedges)(,%ecx,4),%eax
+	movl	%edi,C(removeedges)(,%ecx,4)
+	movl	%eax,et_nextremove(%edi)
+
+Ldone:
+	movl	Lstack,%esp			// clear temporary variables from stack
+
+	popl	%ebx				// restore register variables
+	popl	%edi
+	popl	%esi
+	ret
+
+// at least one point is clipped
+
+Lp2:
+	testl	%eax,%eax
+	jns		Lp1
+
+//			else
+//			{
+//			// point 0 is clipped
+
+//				if (d1 < 0)
+//				{
+	movl	Ld1,%eax
+	testl	%eax,%eax
+	jns		Lp3
+
+//				// both points are clipped
+//				// we do cache fully clipped edges
+//					if (!leftclipped)
+	movl	C(r_leftclipped),%eax
+	movl	C(r_pedge),%ecx
+	testl	%eax,%eax
+	jnz		Ldone
+
+//						r_pedge->framecount = r_framecount;
+	movl	C(r_framecount),%eax
+	andl	$(FRAMECOUNT_MASK),%eax
+	orl		$(FULLY_CLIPPED_CACHED),%eax
+	movl	%eax,C(cacheoffset)
+
+//					return;
+	jmp		Ldone
+
+//				}
+
+Lp1:
+
+//			// point 0 is unclipped
+//				if (d1 >= 0)
+//				{
+//				// both points are unclipped
+//					continue;
+
+//			// only point 1 is clipped
+
+//				f = d0 / (d0 - d1);
+	flds	Ld0
+	flds	Ld1
+	fsubr	%st(1),%st(0)
+
+//			// we don't cache partially clipped edges
+	movl	$0x7FFFFFFF,C(cacheoffset)
+
+	fdivrp	%st(0),%st(1)
+
+	subl	$(mv_size),%esp			// allocate space for clipvert
+
+//				clipvert.position[0] = pv0->position[0] +
+//						f * (pv1->position[0] - pv0->position[0]);
+//				clipvert.position[1] = pv0->position[1] +
+//						f * (pv1->position[1] - pv0->position[1]);
+//				clipvert.position[2] = pv0->position[2] +
+//						f * (pv1->position[2] - pv0->position[2]);
+	flds	mv_position+8(%edx)
+	fsubs	mv_position+8(%esi)
+	flds	mv_position+4(%edx)
+	fsubs	mv_position+4(%esi)
+	flds	mv_position+0(%edx)
+	fsubs	mv_position+0(%esi)		// 0 | 1 | 2
+
+// replace pv1 with the clip point
+	movl	%esp,%edx
+	movl	cp_leftedge(%ebx),%eax
+	testb	%al,%al
+
+	fmul	%st(3),%st(0)
+	fxch	%st(1)					// 1 | 0 | 2
+	fmul	%st(3),%st(0)
+	fxch	%st(2)					// 2 | 0 | 1
+	fmulp	%st(0),%st(3)			// 0 | 1 | 2
+	fadds	mv_position+0(%esi)
+	fxch	%st(1)					// 1 | 0 | 2
+	fadds	mv_position+4(%esi)
+	fxch	%st(2)					// 2 | 0 | 1
+	fadds	mv_position+8(%esi)
+	fxch	%st(1)					// 0 | 2 | 1
+	fstps	mv_position+0(%esp)		// 2 | 1
+	fstps	mv_position+8(%esp)		// 1
+	fstps	mv_position+4(%esp)
+
+//				if (clip->leftedge)
+//				{
+	jz		Ltestright
+
+//					r_leftclipped = true;
+//					r_leftexit = clipvert;
+	movl	$1,C(r_leftclipped)
+	movl	mv_position+0(%esp),%eax
+	movl	%eax,C(r_leftexit)+mv_position+0
+	movl	mv_position+4(%esp),%eax
+	movl	%eax,C(r_leftexit)+mv_position+4
+	movl	mv_position+8(%esp),%eax
+	movl	%eax,C(r_leftexit)+mv_position+8
+
+	jmp		Lcontinue
+
+//				}
+
+Ltestright:
+//				else if (clip->rightedge)
+//				{
+	testb	%ah,%ah
+	jz		Lcontinue
+
+//					r_rightclipped = true;
+//					r_rightexit = clipvert;
+	movl	$1,C(r_rightclipped)
+	movl	mv_position+0(%esp),%eax
+	movl	%eax,C(r_rightexit)+mv_position+0
+	movl	mv_position+4(%esp),%eax
+	movl	%eax,C(r_rightexit)+mv_position+4
+	movl	mv_position+8(%esp),%eax
+	movl	%eax,C(r_rightexit)+mv_position+8
+
+//				}
+//
+//				R_ClipEdge (pv0, &clipvert, clip->next);
+//				return;
+//			}
+	jmp		Lcontinue
+
+//			}
+
+Lp3:
+
+//			// only point 0 is clipped
+//				r_lastvertvalid = false;
+
+	movl	$0,C(r_lastvertvalid)
+
+//				f = d0 / (d0 - d1);
+	flds	Ld0
+	flds	Ld1
+	fsubr	%st(1),%st(0)
+
+//			// we don't cache partially clipped edges
+	movl	$0x7FFFFFFF,C(cacheoffset)
+
+	fdivrp	%st(0),%st(1)
+
+	subl	$(mv_size),%esp			// allocate space for clipvert
+
+//				clipvert.position[0] = pv0->position[0] +
+//						f * (pv1->position[0] - pv0->position[0]);
+//				clipvert.position[1] = pv0->position[1] +
+//						f * (pv1->position[1] - pv0->position[1]);
+//				clipvert.position[2] = pv0->position[2] +
+//						f * (pv1->position[2] - pv0->position[2]);
+	flds	mv_position+8(%edx)
+	fsubs	mv_position+8(%esi)
+	flds	mv_position+4(%edx)
+	fsubs	mv_position+4(%esi)
+	flds	mv_position+0(%edx)
+	fsubs	mv_position+0(%esi)		// 0 | 1 | 2
+
+	movl	cp_leftedge(%ebx),%eax
+	testb	%al,%al
+
+	fmul	%st(3),%st(0)
+	fxch	%st(1)					// 1 | 0 | 2
+	fmul	%st(3),%st(0)
+	fxch	%st(2)					// 2 | 0 | 1
+	fmulp	%st(0),%st(3)			// 0 | 1 | 2
+	fadds	mv_position+0(%esi)
+	fxch	%st(1)					// 1 | 0 | 2
+	fadds	mv_position+4(%esi)
+	fxch	%st(2)					// 2 | 0 | 1
+	fadds	mv_position+8(%esi)
+	fxch	%st(1)					// 0 | 2 | 1
+	fstps	mv_position+0(%esp)		// 2 | 1
+	fstps	mv_position+8(%esp)		// 1
+	fstps	mv_position+4(%esp)
+
+// replace pv0 with the clip point
+	movl	%esp,%esi
+
+//				if (clip->leftedge)
+//				{
+	jz		Ltestright2
+
+//					r_leftclipped = true;
+//					r_leftenter = clipvert;
+	movl	$1,C(r_leftclipped)
+	movl	mv_position+0(%esp),%eax
+	movl	%eax,C(r_leftenter)+mv_position+0
+	movl	mv_position+4(%esp),%eax
+	movl	%eax,C(r_leftenter)+mv_position+4
+	movl	mv_position+8(%esp),%eax
+	movl	%eax,C(r_leftenter)+mv_position+8
+
+	jmp		Lcontinue
+
+//				}
+
+Ltestright2:
+//				else if (clip->rightedge)
+//				{
+	testb	%ah,%ah
+	jz		Lcontinue
+
+//					r_rightclipped = true;
+//					r_rightenter = clipvert;
+	movl	$1,C(r_rightclipped)
+	movl	mv_position+0(%esp),%eax
+	movl	%eax,C(r_rightenter)+mv_position+0
+	movl	mv_position+4(%esp),%eax
+	movl	%eax,C(r_rightenter)+mv_position+4
+	movl	mv_position+8(%esp),%eax
+	movl	%eax,C(r_rightenter)+mv_position+8
+
+//				}
+	jmp		Lcontinue
+
+// %esi = vec3_t point to transform and project
+// %edx preserved
+LTransformAndProject:
+
+//	// transform and project
+//		VectorSubtract (world, modelorg, local);
+	flds	mv_position+0(%esi)
+	fsubs	C(modelorg)+0
+	flds	mv_position+4(%esi)
+	fsubs	C(modelorg)+4
+	flds	mv_position+8(%esi)	
+	fsubs	C(modelorg)+8
+	fxch	%st(2)				// local[0] | local[1] | local[2]
+
+//		TransformVector (local, transformed);
+//	
+//		if (transformed[2] < NEAR_CLIP)
+//			transformed[2] = NEAR_CLIP;
+//	
+//		lzi0 = 1.0 / transformed[2];
+	fld		%st(0)				// local[0] | local[0] | local[1] | local[2]
+	fmuls	C(vpn)+0			// zm0 | local[0] | local[1] | local[2]
+	fld		%st(1)				// local[0] | zm0 | local[0] | local[1] |
+								//  local[2]
+	fmuls	C(vright)+0			// xm0 | zm0 | local[0] | local[1] | local[2]
+	fxch	%st(2)				// local[0] | zm0 | xm0 | local[1] | local[2]
+	fmuls	C(vup)+0			// ym0 |  zm0 | xm0 | local[1] | local[2]
+	fld		%st(3)				// local[1] | ym0 |  zm0 | xm0 | local[1] |
+								//  local[2]
+	fmuls	C(vpn)+4			// zm1 | ym0 | zm0 | xm0 | local[1] |
+								//  local[2]
+	fld		%st(4)				// local[1] | zm1 | ym0 | zm0 | xm0 |
+								//  local[1] | local[2]
+	fmuls	C(vright)+4			// xm1 | zm1 | ym0 |  zm0 | xm0 |
+								//  local[1] | local[2]
+	fxch	%st(5)				// local[1] | zm1 | ym0 | zm0 | xm0 |
+								//  xm1 | local[2]
+	fmuls	C(vup)+4			// ym1 | zm1 | ym0 | zm0 | xm0 |
+								//  xm1 | local[2]
+	fxch	%st(1)				// zm1 | ym1 | ym0 | zm0 | xm0 |
+								//  xm1 | local[2]
+	faddp	%st(0),%st(3)		// ym1 | ym0 | zm2 | xm0 | xm1 | local[2]
+	fxch	%st(3)				// xm0 | ym0 | zm2 | ym1 | xm1 | local[2]
+	faddp	%st(0),%st(4)		// ym0 | zm2 | ym1 | xm2 | local[2]
+	faddp	%st(0),%st(2)		// zm2 | ym2 | xm2 | local[2]
+	fld		%st(3)				// local[2] | zm2 | ym2 | xm2 | local[2]
+	fmuls	C(vpn)+8			// zm3 | zm2 | ym2 | xm2 | local[2]
+	fld		%st(4)				// local[2] | zm3 | zm2 | ym2 | xm2 | local[2]
+	fmuls	C(vright)+8			// xm3 | zm3 | zm2 | ym2 | xm2 | local[2]
+	fxch	%st(5)				// local[2] | zm3 | zm2 | ym2 | xm2 | xm3
+	fmuls	C(vup)+8			// ym3 | zm3 | zm2 | ym2 | xm2 | xm3
+	fxch	%st(1)				// zm3 | ym3 | zm2 | ym2 | xm2 | xm3
+	faddp	%st(0),%st(2)		// ym3 | zm4 | ym2 | xm2 | xm3
+	fxch	%st(4)				// xm3 | zm4 | ym2 | xm2 | ym3
+	faddp	%st(0),%st(3)		// zm4 | ym2 | xm4 | ym3
+	fxch	%st(1)				// ym2 | zm4 | xm4 | ym3
+	faddp	%st(0),%st(3)		// zm4 | xm4 | ym4
+
+	fcoms	Lfp_near_clip
+	fnstsw	%ax
+	testb	$1,%ah
+	jz		LNoClip
+	fstp	%st(0)
+	flds	Lfp_near_clip
+
+LNoClip:
+
+	fdivrs	float_1				// lzi0 | x | y
+	fxch	%st(1)				// x | lzi0 | y
+
+//	// FIXME: build x/yscale into transform?
+//		scale = xscale * lzi0;
+//		u0 = (xcenter + scale*transformed[0]);
+	flds	C(xscale)			// xscale | x | lzi0 | y
+	fmul	%st(2),%st(0)		// scale | x | lzi0 | y
+	fmulp	%st(0),%st(1)		// scale*x | lzi0 | y
+	fadds	C(xcenter)			// u0 | lzi0 | y
+
+//		if (u0 < r_refdef.fvrectx_adj)
+//			u0 = r_refdef.fvrectx_adj;
+//		if (u0 > r_refdef.fvrectright_adj)
+//			u0 = r_refdef.fvrectright_adj;
+// FIXME: use integer compares of floats?
+	fcoms	C(r_refdef)+rd_fvrectx_adj
+	fnstsw	%ax
+	testb	$1,%ah
+	jz		LClampP0
+	fstp	%st(0)
+	flds	C(r_refdef)+rd_fvrectx_adj
+LClampP0:
+	fcoms	C(r_refdef)+rd_fvrectright_adj
+	fnstsw	%ax
+	testb	$0x45,%ah
+	jnz		LClampP1
+	fstp	%st(0)
+	flds	C(r_refdef)+rd_fvrectright_adj
+LClampP1:
+
+	fld		%st(1)				// lzi0 | u0 | lzi0 | y
+
+//		scale = yscale * lzi0;
+//		v0 = (ycenter - scale*transformed[1]);
+	fmuls	C(yscale)			// scale | u0 | lzi0 | y
+	fmulp	%st(0),%st(3)		// u0 | lzi0 | scale*y
+	fxch	%st(2)				// scale*y | lzi0 | u0
+	fsubrs	C(ycenter)			// v0 | lzi0 | u0
+
+//		if (v0 < r_refdef.fvrecty_adj)
+//			v0 = r_refdef.fvrecty_adj;
+//		if (v0 > r_refdef.fvrectbottom_adj)
+//			v0 = r_refdef.fvrectbottom_adj;
+// FIXME: use integer compares of floats?
+	fcoms	C(r_refdef)+rd_fvrecty_adj
+	fnstsw	%ax
+	testb	$1,%ah
+	jz		LClampP2
+	fstp	%st(0)
+	flds	C(r_refdef)+rd_fvrecty_adj
+LClampP2:
+	fcoms	C(r_refdef)+rd_fvrectbottom_adj
+	fnstsw	%ax
+	testb	$0x45,%ah
+	jnz		LClampP3
+	fstp	%st(0)
+	flds	C(r_refdef)+rd_fvrectbottom_adj
+LClampP3:
+	ret
+
+#endif	// id386
+
--- /dev/null
+++ b/u/r_edgea.s
@@ -1,0 +1,731 @@
+//
+// r_edgea.s
+// x86 assembly-language edge-processing code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+
+#ifdef	id386
+
+	.data
+Ltemp:					.long	0
+float_1_div_0100000h:	.long	0x35800000	// 1.0/(float)0x100000
+float_point_999:		.single	0.999
+float_1_point_001:		.single	1.001
+
+	.text
+
+//--------------------------------------------------------------------
+
+#define edgestoadd	4+8		// note odd stack offsets because of interleaving
+#define edgelist	8+12	// with pushes
+
+.globl C(R_EdgeCodeStart)
+C(R_EdgeCodeStart):
+
+.globl C(R_InsertNewEdges)
+C(R_InsertNewEdges):
+	pushl	%edi
+	pushl	%esi				// preserve register variables
+	movl	edgestoadd(%esp),%edx
+	pushl	%ebx
+	movl	edgelist(%esp),%ecx
+
+LDoNextEdge:
+	movl	et_u(%edx),%eax
+	movl	%edx,%edi
+
+LContinueSearch:
+	movl	et_u(%ecx),%ebx
+	movl	et_next(%ecx),%esi
+	cmpl	%ebx,%eax
+	jle		LAddedge
+	movl	et_u(%esi),%ebx
+	movl	et_next(%esi),%ecx
+	cmpl	%ebx,%eax
+	jle		LAddedge2
+	movl	et_u(%ecx),%ebx
+	movl	et_next(%ecx),%esi
+	cmpl	%ebx,%eax
+	jle		LAddedge
+	movl	et_u(%esi),%ebx
+	movl	et_next(%esi),%ecx
+	cmpl	%ebx,%eax
+	jg		LContinueSearch
+
+LAddedge2:
+	movl	et_next(%edx),%edx
+	movl	et_prev(%esi),%ebx
+	movl	%esi,et_next(%edi)
+	movl	%ebx,et_prev(%edi)
+	movl	%edi,et_next(%ebx)
+	movl	%edi,et_prev(%esi)
+	movl	%esi,%ecx
+
+	cmpl	$0,%edx
+	jnz		LDoNextEdge
+	jmp		LDone
+
+	.align 4
+LAddedge:
+	movl	et_next(%edx),%edx
+	movl	et_prev(%ecx),%ebx
+	movl	%ecx,et_next(%edi)
+	movl	%ebx,et_prev(%edi)
+	movl	%edi,et_next(%ebx)
+	movl	%edi,et_prev(%ecx)
+
+	cmpl	$0,%edx
+	jnz		LDoNextEdge
+
+LDone:
+	popl	%ebx				// restore register variables
+	popl	%esi
+	popl	%edi
+
+	ret
+
+//--------------------------------------------------------------------
+
+#define predge	4+4
+
+.globl C(R_RemoveEdges)
+C(R_RemoveEdges):
+	pushl	%ebx
+	movl	predge(%esp),%eax
+
+Lre_loop:
+	movl	et_next(%eax),%ecx
+	movl	et_nextremove(%eax),%ebx
+	movl	et_prev(%eax),%edx
+	testl	%ebx,%ebx
+	movl	%edx,et_prev(%ecx)
+	jz		Lre_done
+	movl	%ecx,et_next(%edx)
+
+	movl	et_next(%ebx),%ecx
+	movl	et_prev(%ebx),%edx
+	movl	et_nextremove(%ebx),%eax
+	movl	%edx,et_prev(%ecx)
+	testl	%eax,%eax
+	movl	%ecx,et_next(%edx)
+	jnz		Lre_loop
+
+	popl	%ebx
+	ret
+
+Lre_done:
+	movl	%ecx,et_next(%edx)
+	popl	%ebx
+
+	ret
+
+//--------------------------------------------------------------------
+
+#define pedgelist	4+4		// note odd stack offset because of interleaving
+							// with pushes
+
+.globl C(R_StepActiveU)
+C(R_StepActiveU):
+	pushl	%edi
+	movl	pedgelist(%esp),%edx
+	pushl	%esi				// preserve register variables
+	pushl	%ebx
+
+	movl	et_prev(%edx),%esi
+
+LNewEdge:
+	movl	et_u(%esi),%edi
+
+LNextEdge:
+	movl	et_u(%edx),%eax
+	movl	et_u_step(%edx),%ebx
+	addl	%ebx,%eax
+	movl	et_next(%edx),%esi
+	movl	%eax,et_u(%edx)
+	cmpl	%edi,%eax
+	jl		LPushBack
+
+	movl	et_u(%esi),%edi
+	movl	et_u_step(%esi),%ebx
+	addl	%ebx,%edi
+	movl	et_next(%esi),%edx
+	movl	%edi,et_u(%esi)
+	cmpl	%eax,%edi
+	jl		LPushBack2
+
+	movl	et_u(%edx),%eax
+	movl	et_u_step(%edx),%ebx
+	addl	%ebx,%eax
+	movl	et_next(%edx),%esi
+	movl	%eax,et_u(%edx)
+	cmpl	%edi,%eax
+	jl		LPushBack
+
+	movl	et_u(%esi),%edi
+	movl	et_u_step(%esi),%ebx
+	addl	%ebx,%edi
+	movl	et_next(%esi),%edx
+	movl	%edi,et_u(%esi)
+	cmpl	%eax,%edi
+	jnl		LNextEdge
+
+LPushBack2:
+	movl	%edx,%ebx
+	movl	%edi,%eax
+	movl	%esi,%edx
+	movl	%ebx,%esi
+
+LPushBack:
+// push it back to keep it sorted
+	movl	et_prev(%edx),%ecx
+	movl	et_next(%edx),%ebx
+
+// done if the -1 in edge_aftertail triggered this
+	cmpl	$(C(edge_aftertail)),%edx
+	jz		LUDone
+
+// pull the edge out of the edge list
+	movl	et_prev(%ecx),%edi
+	movl	%ecx,et_prev(%esi)
+	movl	%ebx,et_next(%ecx)
+
+// find out where the edge goes in the edge list
+LPushBackLoop:
+	movl	et_prev(%edi),%ecx
+	movl	et_u(%edi),%ebx
+	cmpl	%ebx,%eax
+	jnl		LPushBackFound
+
+	movl	et_prev(%ecx),%edi
+	movl	et_u(%ecx),%ebx
+	cmpl	%ebx,%eax
+	jl		LPushBackLoop
+
+	movl	%ecx,%edi
+
+// put the edge back into the edge list
+LPushBackFound:
+	movl	et_next(%edi),%ebx
+	movl	%edi,et_prev(%edx)
+	movl	%ebx,et_next(%edx)
+	movl	%edx,et_next(%edi)
+	movl	%edx,et_prev(%ebx)
+
+	movl	%esi,%edx
+	movl	et_prev(%esi),%esi
+
+	cmpl	$(C(edge_tail)),%edx
+	jnz		LNewEdge
+
+LUDone:
+	popl	%ebx				// restore register variables
+	popl	%esi
+	popl	%edi
+
+	ret
+
+//--------------------------------------------------------------------
+
+#define surf	4		// note this is loaded before any pushes
+
+	.align 4
+TrailingEdge:
+	movl	st_spanstate(%esi),%eax	// check for edge inversion
+	decl	%eax
+	jnz		LInverted
+
+	movl	%eax,st_spanstate(%esi)
+	movl	st_insubmodel(%esi),%ecx
+	movl	0x12345678,%edx		// surfaces[1].st_next
+LPatch0:
+	movl	C(r_bmodelactive),%eax
+	subl	%ecx,%eax
+	cmpl	%esi,%edx
+	movl	%eax,C(r_bmodelactive)
+	jnz		LNoEmit				// surface isn't on top, just remove
+
+// emit a span (current top going away)
+	movl	et_u(%ebx),%eax
+	shrl	$20,%eax				// iu = integral pixel u
+	movl	st_last_u(%esi),%edx
+	movl	st_next(%esi),%ecx
+	cmpl	%edx,%eax
+	jle		LNoEmit2				// iu <= surf->last_u, so nothing to emit
+
+	movl	%eax,st_last_u(%ecx)	// surf->next->last_u = iu;
+	subl	%edx,%eax
+	movl	%edx,espan_t_u(%ebp)		// span->u = surf->last_u;
+
+	movl	%eax,espan_t_count(%ebp)	// span->count = iu - span->u;
+	movl	C(current_iv),%eax
+	movl	%eax,espan_t_v(%ebp)		// span->v = current_iv;
+	movl	st_spans(%esi),%eax
+	movl	%eax,espan_t_pnext(%ebp)	// span->pnext = surf->spans;
+	movl	%ebp,st_spans(%esi)			// surf->spans = span;
+	addl	$(espan_t_size),%ebp
+
+	movl	st_next(%esi),%edx		// remove the surface from the surface
+	movl	st_prev(%esi),%esi		// stack
+
+	movl	%edx,st_next(%esi)
+	movl	%esi,st_prev(%edx)
+	ret
+
+LNoEmit2:
+	movl	%eax,st_last_u(%ecx)	// surf->next->last_u = iu;
+	movl	st_next(%esi),%edx		// remove the surface from the surface
+	movl	st_prev(%esi),%esi		// stack
+
+	movl	%edx,st_next(%esi)
+	movl	%esi,st_prev(%edx)
+	ret
+
+LNoEmit:
+	movl	st_next(%esi),%edx		// remove the surface from the surface
+	movl	st_prev(%esi),%esi		// stack
+
+	movl	%edx,st_next(%esi)
+	movl	%esi,st_prev(%edx)
+	ret
+
+LInverted:
+	movl	%eax,st_spanstate(%esi)
+	ret
+
+//--------------------------------------------------------------------
+
+// trailing edge only
+Lgs_trailing:
+	pushl	$Lgs_nextedge
+	jmp		TrailingEdge
+
+
+.globl C(R_GenerateSpans)
+C(R_GenerateSpans):
+	pushl	%ebp				// preserve caller's stack frame
+	pushl	%edi
+	pushl	%esi				// preserve register variables
+	pushl	%ebx
+
+// clear active surfaces to just the background surface
+	movl	C(surfaces),%eax
+	movl	C(edge_head_u_shift20),%edx
+	addl	$(st_size),%eax
+// %ebp = span_p throughout
+	movl	C(span_p),%ebp
+
+	movl	$0,C(r_bmodelactive)
+
+	movl	%eax,st_next(%eax)
+	movl	%eax,st_prev(%eax)
+	movl	%edx,st_last_u(%eax)
+	movl	C(edge_head)+et_next,%ebx		// edge=edge_head.next
+
+// generate spans
+	cmpl	$(C(edge_tail)),%ebx		// done if empty list
+	jz		Lgs_lastspan
+
+Lgs_edgeloop:
+
+	movl	et_surfs(%ebx),%edi
+	movl	C(surfaces),%eax
+	movl	%edi,%esi
+	andl	$0xFFFF0000,%edi
+	andl	$0xFFFF,%esi
+	jz		Lgs_leading		// not a trailing edge
+
+// it has a left surface, so a surface is going away for this span
+	shll	$(SURF_T_SHIFT),%esi
+	addl	%eax,%esi
+	testl	%edi,%edi
+	jz		Lgs_trailing
+
+// both leading and trailing
+	call	TrailingEdge
+	movl	C(surfaces),%eax
+
+// ---------------------------------------------------------------
+// handle a leading edge
+// ---------------------------------------------------------------
+
+Lgs_leading:
+	shrl	$16-SURF_T_SHIFT,%edi
+	movl	C(surfaces),%eax
+	addl	%eax,%edi
+	movl	0x12345678,%esi		// surf2 = surfaces[1].next;
+LPatch2:
+	movl	st_spanstate(%edi),%edx
+	movl	st_insubmodel(%edi),%eax
+	testl	%eax,%eax
+	jnz		Lbmodel_leading
+
+// handle a leading non-bmodel edge
+
+// don't start a span if this is an inverted span, with the end edge preceding
+// the start edge (that is, we've already seen the end edge)
+	testl	%edx,%edx
+	jnz		Lxl_done
+
+
+// if (surf->key < surf2->key)
+//		goto newtop;
+	incl	%edx
+	movl	st_key(%edi),%eax
+	movl	%edx,st_spanstate(%edi)
+	movl	st_key(%esi),%ecx
+	cmpl	%ecx,%eax
+	jl		Lnewtop
+
+// main sorting loop to search through surface stack until insertion point
+// found. Always terminates because background surface is sentinel
+// do
+// {
+// 		surf2 = surf2->next;
+// } while (surf->key >= surf2->key);
+Lsortloopnb:
+	movl	st_next(%esi),%esi
+	movl	st_key(%esi),%ecx
+	cmpl	%ecx,%eax
+	jge		Lsortloopnb
+
+	jmp		LInsertAndExit
+
+
+// handle a leading bmodel edge
+	.align	4
+Lbmodel_leading:
+
+// don't start a span if this is an inverted span, with the end edge preceding
+// the start edge (that is, we've already seen the end edge)
+	testl	%edx,%edx
+	jnz		Lxl_done
+
+	movl	C(r_bmodelactive),%ecx
+	incl	%edx
+	incl	%ecx
+	movl	%edx,st_spanstate(%edi)
+	movl	%ecx,C(r_bmodelactive)
+
+// if (surf->key < surf2->key)
+//		goto newtop;
+	movl	st_key(%edi),%eax
+	movl	st_key(%esi),%ecx
+	cmpl	%ecx,%eax
+	jl		Lnewtop
+
+// if ((surf->key == surf2->key) && surf->insubmodel)
+// {
+	jz		Lzcheck_for_newtop
+
+// main sorting loop to search through surface stack until insertion point
+// found. Always terminates because background surface is sentinel
+// do
+// {
+// 		surf2 = surf2->next;
+// } while (surf->key > surf2->key);
+Lsortloop:
+	movl	st_next(%esi),%esi
+	movl	st_key(%esi),%ecx
+	cmpl	%ecx,%eax
+	jg		Lsortloop
+
+	jne		LInsertAndExit
+
+// Do 1/z sorting to see if we've arrived in the right position
+	movl	et_u(%ebx),%eax
+	subl	$0xFFFFF,%eax
+	movl	%eax,Ltemp
+	fildl	Ltemp
+
+	fmuls	float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) *
+								//      (1.0 / 0x100000);
+
+	fld		%st(0)				// fu | fu
+	fmuls	st_d_zistepu(%edi)	// fu*surf->d_zistepu | fu
+	flds	C(fv)					// fv | fu*surf->d_zistepu | fu
+	fmuls	st_d_zistepv(%edi)	// fv*surf->d_zistepv | fu*surf->d_zistepu | fu
+	fxch	%st(1)				// fu*surf->d_zistepu | fv*surf->d_zistepv | fu
+	fadds	st_d_ziorigin(%edi)	// fu*surf->d_zistepu + surf->d_ziorigin |
+								//  fv*surf->d_zistepv | fu
+
+	flds	st_d_zistepu(%esi)	// surf2->d_zistepu |
+								//  fu*surf->d_zistepu + surf->d_ziorigin |
+								//  fv*surf->d_zistepv | fu
+	fmul	%st(3),%st(0)		// fu*surf2->d_zistepu |
+								//  fu*surf->d_zistepu + surf->d_ziorigin |
+								//  fv*surf->d_zistepv | fu
+	fxch	%st(1)				// fu*surf->d_zistepu + surf->d_ziorigin |
+								//  fu*surf2->d_zistepu |
+								//  fv*surf->d_zistepv | fu
+	faddp	%st(0),%st(2)		// fu*surf2->d_zistepu | newzi | fu
+
+	flds	C(fv)					// fv | fu*surf2->d_zistepu | newzi | fu
+	fmuls	st_d_zistepv(%esi)	// fv*surf2->d_zistepv |
+								//  fu*surf2->d_zistepu | newzi | fu
+	fld		%st(2)				// newzi | fv*surf2->d_zistepv |
+								//  fu*surf2->d_zistepu | newzi | fu
+	fmuls	float_point_999		// newzibottom | fv*surf2->d_zistepv |
+								//  fu*surf2->d_zistepu | newzi | fu
+
+	fxch	%st(2)				// fu*surf2->d_zistepu | fv*surf2->d_zistepv |
+								//  newzibottom | newzi | fu
+	fadds	st_d_ziorigin(%esi)	// fu*surf2->d_zistepu + surf2->d_ziorigin |
+								//  fv*surf2->d_zistepv | newzibottom | newzi |
+								//  fu
+	faddp	%st(0),%st(1)		// testzi | newzibottom | newzi | fu
+	fxch	%st(1)				// newzibottom | testzi | newzi | fu
+
+// if (newzibottom >= testzi)
+//     goto Lgotposition;
+
+	fcomp	%st(1)				// testzi | newzi | fu
+
+	fxch	%st(1)				// newzi | testzi | fu
+	fmuls	float_1_point_001	// newzitop | testzi | fu
+	fxch	%st(1)				// testzi | newzitop | fu
+
+	fnstsw	%ax
+	testb	$0x01,%ah
+	jz		Lgotposition_fpop3
+
+// if (newzitop >= testzi)
+// {
+
+	fcomp	%st(1)				// newzitop | fu
+	fnstsw	%ax
+	testb	$0x45,%ah
+	jz		Lsortloop_fpop2
+
+// if (surf->d_zistepu >= surf2->d_zistepu)
+//     goto newtop;
+
+	flds	st_d_zistepu(%edi)	// surf->d_zistepu | newzitop| fu
+	fcomps	st_d_zistepu(%esi)	// newzitop | fu
+	fnstsw	%ax
+	testb	$0x01,%ah
+	jz		Lgotposition_fpop2
+
+	fstp	%st(0)				// clear the FPstack
+	fstp	%st(0)
+	movl	st_key(%edi),%eax
+	jmp		Lsortloop
+
+
+Lgotposition_fpop3:
+	fstp	%st(0)
+Lgotposition_fpop2:
+	fstp	%st(0)
+	fstp	%st(0)
+	jmp		LInsertAndExit
+
+
+// emit a span (obscures current top)
+
+Lnewtop_fpop3:
+	fstp	%st(0)
+Lnewtop_fpop2:
+	fstp	%st(0)
+	fstp	%st(0)
+	movl	st_key(%edi),%eax		// reload the sorting key
+
+Lnewtop:
+	movl	et_u(%ebx),%eax
+	movl	st_last_u(%esi),%edx
+	shrl	$20,%eax				// iu = integral pixel u
+	movl	%eax,st_last_u(%edi)	// surf->last_u = iu;
+	cmpl	%edx,%eax
+	jle		LInsertAndExit			// iu <= surf->last_u, so nothing to emit
+
+	subl	%edx,%eax
+	movl	%edx,espan_t_u(%ebp)		// span->u = surf->last_u;
+
+	movl	%eax,espan_t_count(%ebp)	// span->count = iu - span->u;
+	movl	C(current_iv),%eax
+	movl	%eax,espan_t_v(%ebp)		// span->v = current_iv;
+	movl	st_spans(%esi),%eax
+	movl	%eax,espan_t_pnext(%ebp)	// span->pnext = surf->spans;
+	movl	%ebp,st_spans(%esi)			// surf->spans = span;
+	addl	$(espan_t_size),%ebp
+
+LInsertAndExit:
+// insert before surf2
+	movl	%esi,st_next(%edi)		// surf->next = surf2;
+	movl	st_prev(%esi),%eax
+	movl	%eax,st_prev(%edi)		// surf->prev = surf2->prev;
+	movl	%edi,st_prev(%esi)		// surf2->prev = surf;
+	movl	%edi,st_next(%eax)		// surf2->prev->next = surf;
+
+// ---------------------------------------------------------------
+// leading edge done
+// ---------------------------------------------------------------
+
+// ---------------------------------------------------------------
+// see if there are any more edges
+// ---------------------------------------------------------------
+
+Lgs_nextedge:
+	movl	et_next(%ebx),%ebx
+	cmpl	$(C(edge_tail)),%ebx
+	jnz		Lgs_edgeloop
+
+// clean up at the right edge
+Lgs_lastspan:
+
+// now that we've reached the right edge of the screen, we're done with any
+// unfinished surfaces, so emit a span for whatever's on top
+	movl	0x12345678,%esi		// surfaces[1].st_next
+LPatch3:
+	movl	C(edge_tail_u_shift20),%eax
+	xorl	%ecx,%ecx
+	movl	st_last_u(%esi),%edx
+	subl	%edx,%eax
+	jle		Lgs_resetspanstate
+
+	movl	%edx,espan_t_u(%ebp)
+	movl	%eax,espan_t_count(%ebp)
+	movl	C(current_iv),%eax
+	movl	%eax,espan_t_v(%ebp)
+	movl	st_spans(%esi),%eax
+	movl	%eax,espan_t_pnext(%ebp)
+	movl	%ebp,st_spans(%esi)
+	addl	$(espan_t_size),%ebp
+
+// reset spanstate for all surfaces in the surface stack
+Lgs_resetspanstate:
+	movl	%ecx,st_spanstate(%esi)
+	movl	st_next(%esi),%esi
+	cmpl	$0x12345678,%esi		// &surfaces[1]
+LPatch4:
+	jnz		Lgs_resetspanstate
+
+// store the final span_p
+	movl	%ebp,C(span_p)
+
+	popl	%ebx				// restore register variables
+	popl	%esi
+	popl	%edi
+	popl	%ebp				// restore the caller's stack frame
+	ret
+
+
+// ---------------------------------------------------------------
+// 1/z sorting for bmodels in the same leaf
+// ---------------------------------------------------------------
+	.align	4
+Lxl_done:
+	incl	%edx
+	movl	%edx,st_spanstate(%edi)
+
+	jmp		Lgs_nextedge
+
+
+	.align	4
+Lzcheck_for_newtop:
+	movl	et_u(%ebx),%eax
+	subl	$0xFFFFF,%eax
+	movl	%eax,Ltemp
+	fildl	Ltemp
+
+	fmuls	float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) *
+								//      (1.0 / 0x100000);
+
+	fld		%st(0)				// fu | fu
+	fmuls	st_d_zistepu(%edi)	// fu*surf->d_zistepu | fu
+	flds	C(fv)				// fv | fu*surf->d_zistepu | fu
+	fmuls	st_d_zistepv(%edi)	// fv*surf->d_zistepv | fu*surf->d_zistepu | fu
+	fxch	%st(1)				// fu*surf->d_zistepu | fv*surf->d_zistepv | fu
+	fadds	st_d_ziorigin(%edi)	// fu*surf->d_zistepu + surf->d_ziorigin |
+								//  fv*surf->d_zistepv | fu
+
+	flds	st_d_zistepu(%esi)	// surf2->d_zistepu |
+								//  fu*surf->d_zistepu + surf->d_ziorigin |
+								//  fv*surf->d_zistepv | fu
+	fmul	%st(3),%st(0)		// fu*surf2->d_zistepu |
+								//  fu*surf->d_zistepu + surf->d_ziorigin |
+								//  fv*surf->d_zistepv | fu
+	fxch	%st(1)				// fu*surf->d_zistepu + surf->d_ziorigin |
+								//  fu*surf2->d_zistepu |
+								//  fv*surf->d_zistepv | fu
+	faddp	%st(0),%st(2)		// fu*surf2->d_zistepu | newzi | fu
+
+	flds	C(fv)				// fv | fu*surf2->d_zistepu | newzi | fu
+	fmuls	st_d_zistepv(%esi)	// fv*surf2->d_zistepv |
+								//  fu*surf2->d_zistepu | newzi | fu
+	fld		%st(2)				// newzi | fv*surf2->d_zistepv |
+								//  fu*surf2->d_zistepu | newzi | fu
+	fmuls	float_point_999		// newzibottom | fv*surf2->d_zistepv |
+								//  fu*surf2->d_zistepu | newzi | fu
+
+	fxch	%st(2)				// fu*surf2->d_zistepu | fv*surf2->d_zistepv |
+								//  newzibottom | newzi | fu
+	fadds	st_d_ziorigin(%esi)	// fu*surf2->d_zistepu + surf2->d_ziorigin |
+								//  fv*surf2->d_zistepv | newzibottom | newzi |
+								//  fu
+	faddp	%st(0),%st(1)		// testzi | newzibottom | newzi | fu
+	fxch	%st(1)				// newzibottom | testzi | newzi | fu
+
+// if (newzibottom >= testzi)
+//     goto newtop;
+
+	fcomp	%st(1)				// testzi | newzi | fu
+
+	fxch	%st(1)				// newzi | testzi | fu
+	fmuls	float_1_point_001	// newzitop | testzi | fu
+	fxch	%st(1)				// testzi | newzitop | fu
+
+	fnstsw	%ax
+	testb	$0x01,%ah
+	jz		Lnewtop_fpop3
+
+// if (newzitop >= testzi)
+// {
+
+	fcomp	%st(1)				// newzitop | fu
+	fnstsw	%ax
+	testb	$0x45,%ah
+	jz		Lsortloop_fpop2
+
+// if (surf->d_zistepu >= surf2->d_zistepu)
+//     goto newtop;
+
+	flds	st_d_zistepu(%edi)	// surf->d_zistepu | newzitop | fu
+	fcomps	st_d_zistepu(%esi)	// newzitop | fu
+	fnstsw	%ax
+	testb	$0x01,%ah
+	jz		Lnewtop_fpop2
+
+Lsortloop_fpop2:
+	fstp	%st(0)				// clear the FP stack
+	fstp	%st(0)
+	movl	st_key(%edi),%eax
+	jmp		Lsortloop
+
+
+.globl C(R_EdgeCodeEnd)
+C(R_EdgeCodeEnd):
+
+
+//----------------------------------------------------------------------
+// Surface array address code patching routine
+//----------------------------------------------------------------------
+
+	.align 4
+.globl C(R_SurfacePatch)
+C(R_SurfacePatch):
+
+	movl	C(surfaces),%eax
+	addl	$(st_size),%eax
+	movl	%eax,LPatch4-4
+
+	addl	$(st_next),%eax
+	movl	%eax,LPatch0-4
+	movl	%eax,LPatch2-4
+	movl	%eax,LPatch3-4
+
+	ret
+
+#endif	// id386
+
--- /dev/null
+++ b/u/r_varsa.s
@@ -1,0 +1,45 @@
+//
+// r_varsa.s
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+	.data
+
+//-------------------------------------------------------
+// ASM-only variables
+//-------------------------------------------------------
+.globl	float_1, float_particle_z_clip, float_point5
+.globl	float_minus_1, float_0
+float_0:		.single	0.0
+float_1:		.single	1.0
+float_minus_1:	.single	-1.0
+float_particle_z_clip:	.single	PARTICLE_Z_CLIP
+float_point5:	.single	0.5
+
+.globl	fp_16, fp_64k, fp_1m, fp_64kx64k
+.globl	fp_1m_minus_1
+.globl	fp_8 
+fp_1m:			.single	1048576.0
+fp_1m_minus_1:	.single	1048575.0
+fp_64k:			.single	65536.0
+fp_8:			.single	8.0
+fp_16:			.single	16.0
+fp_64kx64k:		.long	0x4f000000	// (float)0x8000*0x10000
+
+
+.globl	FloatZero, Float2ToThe31nd, FloatMinus2ToThe31nd
+FloatZero:				.long	0
+Float2ToThe31nd:		.long	0x4f000000
+FloatMinus2ToThe31nd:	.long	0xcf000000
+
+.globl	C(r_bmodelactive)
+C(r_bmodelactive):	.long	0
+
+#endif	// id386
+
--- /dev/null
+++ b/u/snd_mixa.s
@@ -1,0 +1,199 @@
+//
+// snd_mixa.s
+// x86 assembly-language sound code
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+
+#ifdef	id386
+
+	.text
+
+//----------------------------------------------------------------------
+// 8-bit sound-mixing code
+//----------------------------------------------------------------------
+
+#define ch		4+16
+#define sc		8+16
+#define count	12+16
+
+.globl C(SND_PaintChannelFrom8)
+C(SND_PaintChannelFrom8):
+	pushl	%esi				// preserve register variables
+	pushl	%edi
+	pushl	%ebx
+	pushl	%ebp
+
+//	int 	data;
+//	short	*lscale, *rscale;
+//	unsigned char *sfx;
+//	int		i;
+
+	movl	ch(%esp),%ebx
+	movl	sc(%esp),%esi
+
+//	if (ch->leftvol > 255)
+//		ch->leftvol = 255;
+//	if (ch->rightvol > 255)
+//		ch->rightvol = 255;
+	movl	ch_leftvol(%ebx),%eax
+	movl	ch_rightvol(%ebx),%edx
+	cmpl	$255,%eax
+	jna		LLeftSet
+	movl	$255,%eax
+LLeftSet:
+	cmpl	$255,%edx
+	jna		LRightSet
+	movl	$255,%edx
+LRightSet:
+
+//	lscale = snd_scaletable[ch->leftvol >> 3];
+//	rscale = snd_scaletable[ch->rightvol >> 3];
+//	sfx = (signed char *)sc->data + ch->pos;
+//	ch->pos += count;
+	andl	$0xF8,%eax
+	addl	$(sfxc_data),%esi
+	andl	$0xF8,%edx
+	movl	ch_pos(%ebx),%edi
+	movl	count(%esp),%ecx
+	addl	%edi,%esi
+	shll	$7,%eax
+	addl	%ecx,%edi
+	shll	$7,%edx
+	movl	%edi,ch_pos(%ebx)
+	addl	$(C(snd_scaletable)),%eax
+	addl	$(C(snd_scaletable)),%edx
+	subl	%ebx,%ebx
+	movb	-1(%esi,%ecx,1),%bl
+
+	testl	$1,%ecx
+	jz		LMix8Loop
+
+	movl	(%eax,%ebx,4),%edi
+	movl	(%edx,%ebx,4),%ebp
+	addl	C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi
+	addl	C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp
+	movl	%edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size)
+	movl	%ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size)
+	movb	-2(%esi,%ecx,1),%bl
+
+	decl	%ecx
+	jz		LDone
+
+//	for (i=0 ; i<count ; i++)
+//	{
+LMix8Loop:
+
+//		data = sfx[i];
+//		paintbuffer[i].left += lscale[data];
+//		paintbuffer[i].right += rscale[data];
+	movl	(%eax,%ebx,4),%edi
+	movl	(%edx,%ebx,4),%ebp
+	addl	C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi
+	addl	C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp
+	movb	-2(%esi,%ecx,1),%bl
+	movl	%edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size)
+	movl	%ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size)
+
+	movl	(%eax,%ebx,4),%edi
+	movl	(%edx,%ebx,4),%ebp
+	movb	-3(%esi,%ecx,1),%bl
+	addl	C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size),%edi
+	addl	C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size),%ebp
+	movl	%edi,C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size)
+	movl	%ebp,C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size)
+
+//	}
+	subl	$2,%ecx
+	jnz		LMix8Loop
+
+LDone:
+	popl	%ebp
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+
+	ret
+
+
+//----------------------------------------------------------------------
+// Transfer of stereo buffer to 16-bit DMA buffer code
+//----------------------------------------------------------------------
+
+.globl C(Snd_WriteLinearBlastStereo16)
+C(Snd_WriteLinearBlastStereo16):
+	pushl	%esi				// preserve register variables
+	pushl	%edi
+	pushl	%ebx
+
+//	int		i;
+//	int		val;
+	movl	C(snd_linear_count),%ecx
+	movl	C(snd_p),%ebx
+	movl	C(snd_vol),%esi
+	movl	C(snd_out),%edi
+
+//	for (i=0 ; i<snd_linear_count ; i+=2)
+//	{
+LWLBLoopTop:
+
+//		val = (snd_p[i]*snd_vol)>>8;
+//		if (val > 0x7fff)
+//			snd_out[i] = 0x7fff;
+//		else if (val < (short)0x8000)
+//			snd_out[i] = (short)0x8000;
+//		else
+//			snd_out[i] = val;
+	movl	-8(%ebx,%ecx,4),%eax
+	imull	%esi,%eax
+	sarl	$8,%eax
+	cmpl	$0x7FFF,%eax
+	jg		LClampHigh
+	cmpl	$0xFFFF8000,%eax
+	jnl		LClampDone
+	movl	$0xFFFF8000,%eax
+	jmp		LClampDone
+LClampHigh:
+	movl	$0x7FFF,%eax
+LClampDone:
+
+//		val = (snd_p[i+1]*snd_vol)>>8;
+//		if (val > 0x7fff)
+//			snd_out[i+1] = 0x7fff;
+//		else if (val < (short)0x8000)
+//			snd_out[i+1] = (short)0x8000;
+//		else
+//			snd_out[i+1] = val;
+	movl	-4(%ebx,%ecx,4),%edx
+	imull	%esi,%edx
+	sarl	$8,%edx
+	cmpl	$0x7FFF,%edx
+	jg		LClampHigh2
+	cmpl	$0xFFFF8000,%edx
+	jnl		LClampDone2
+	movl	$0xFFFF8000,%edx
+	jmp		LClampDone2
+LClampHigh2:
+	movl	$0x7FFF,%edx
+LClampDone2:
+	shll	$16,%edx
+	andl	$0xFFFF,%eax
+	orl		%eax,%edx
+	movl	%edx,-4(%edi,%ecx,2)
+
+//	}
+	subl	$2,%ecx
+	jnz		LWLBLoopTop
+
+//	snd_p += snd_linear_count;
+
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+
+	ret
+
+
+#endif	// id386
+
--- /dev/null
+++ b/u/surf16.s
@@ -1,0 +1,153 @@
+//
+// surf16.s
+// x86 assembly-language 16 bpp surface block drawing code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+
+#ifdef id386
+
+//----------------------------------------------------------------------
+// Surface block drawer
+//----------------------------------------------------------------------
+
+	.data
+
+k:			.long	0
+loopentry:	.long	0
+
+	.align	4
+blockjumptable16:
+	.long	LEnter2_16
+	.long	LEnter4_16
+	.long	0, LEnter8_16
+	.long	0, 0, 0, LEnter16_16
+
+
+	.text
+
+	.align 4
+.globl C(R_Surf16Start)
+C(R_Surf16Start):
+
+	.align 4
+.globl C(R_DrawSurfaceBlock16)
+C(R_DrawSurfaceBlock16):
+	pushl	%ebp				// preserve caller's stack frame
+	pushl	%edi
+	pushl	%esi				// preserve register variables
+	pushl	%ebx
+
+	movl	C(blocksize),%eax
+	movl	C(prowdestbase),%edi
+	movl	C(pbasesource),%esi
+	movl	C(sourcesstep),%ebx
+	movl	blockjumptable16-4(,%eax,2),%ecx
+	movl	%eax,k
+	movl	%ecx,loopentry
+	movl	C(lightleft),%edx
+	movl	C(lightright),%ebp
+
+Lblockloop16:
+
+	subl	%edx,%ebp
+	movb	C(blockdivshift),%cl
+	sarl	%cl,%ebp
+	jns		Lp1_16
+	testl	C(blockdivmask),%ebp
+	jz		Lp1_16
+	incl	%ebp
+Lp1_16:
+
+	subl	%eax,%eax
+	subl	%ecx,%ecx	// high words must be 0 in loop for addressing
+
+	jmp		*loopentry
+
+	.align	4
+
+#include "block16.h"
+
+	movl	C(pbasesource),%esi
+	movl	C(lightleft),%edx
+	movl	C(lightright),%ebp
+	movl	C(sourcetstep),%eax
+	movl	C(lightrightstep),%ecx
+	movl	C(prowdestbase),%edi
+
+	addl	%eax,%esi
+	addl	%ecx,%ebp
+
+	movl	C(lightleftstep),%eax
+	movl	C(surfrowbytes),%ecx
+
+	addl	%eax,%edx
+	addl	%ecx,%edi
+
+	movl	%esi,C(pbasesource)
+	movl	%ebp,C(lightright)
+	movl	k,%eax
+	movl	%edx,C(lightleft)
+	decl	%eax
+	movl	%edi,C(prowdestbase)
+	movl	%eax,k
+	jnz		Lblockloop16
+
+	popl	%ebx				// restore register variables
+	popl	%esi
+	popl	%edi
+	popl	%ebp				// restore the caller's stack frame
+	ret
+
+.globl C(R_Surf16End)
+C(R_Surf16End):
+
+//----------------------------------------------------------------------
+// Code patching routines
+//----------------------------------------------------------------------
+	.data
+
+	.align 4
+LPatchTable16:
+	.long	LBPatch0-4
+	.long	LBPatch1-4
+	.long	LBPatch2-4
+	.long	LBPatch3-4
+	.long	LBPatch4-4
+	.long	LBPatch5-4
+	.long	LBPatch6-4
+	.long	LBPatch7-4
+	.long	LBPatch8-4
+	.long	LBPatch9-4
+	.long	LBPatch10-4
+	.long	LBPatch11-4
+	.long	LBPatch12-4
+	.long	LBPatch13-4
+	.long	LBPatch14-4
+	.long	LBPatch15-4
+
+	.text
+
+	.align 4
+.globl C(R_Surf16Patch)
+C(R_Surf16Patch):
+	pushl	%ebx
+
+	movl	C(colormap),%eax
+	movl	$LPatchTable16,%ebx
+	movl	$16,%ecx
+LPatchLoop16:
+	movl	(%ebx),%edx
+	addl	$4,%ebx
+	movl	%eax,(%edx)
+	decl	%ecx
+	jnz		LPatchLoop16
+
+	popl	%ebx
+
+	ret
+
+
+#endif	// id386
--- /dev/null
+++ b/u/surf8.s
@@ -1,0 +1,764 @@
+//
+// surf8.s
+// x86 assembly-language 8 bpp surface block drawing code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+
+#ifdef	id386
+
+	.data
+
+sb_v:		.long	0
+
+	.text
+
+	.align 4
+.globl C(R_Surf8Start)
+C(R_Surf8Start):
+
+//----------------------------------------------------------------------
+// Surface block drawer for mip level 0
+//----------------------------------------------------------------------
+
+	.align 4
+.globl C(R_DrawSurfaceBlock8_mip0)
+C(R_DrawSurfaceBlock8_mip0):
+	pushl	%ebp				// preserve caller's stack frame
+	pushl	%edi
+	pushl	%esi				// preserve register variables
+	pushl	%ebx
+
+//		for (v=0 ; v<numvblocks ; v++)
+//		{
+	movl	C(r_lightptr),%ebx
+	movl	C(r_numvblocks),%eax
+
+	movl	%eax,sb_v
+	movl	C(prowdestbase),%edi
+
+	movl	C(pbasesource),%esi
+
+Lv_loop_mip0:
+
+//			lightleft = lightptr[0];
+//			lightright = lightptr[1];
+//			lightdelta = (lightleft - lightright) & 0xFFFFF;
+	movl	(%ebx),%eax			// lightleft
+	movl	4(%ebx),%edx		// lightright
+
+	movl	%eax,%ebp
+	movl	C(r_lightwidth),%ecx
+
+	movl	%edx,C(lightright)
+	subl	%edx,%ebp
+
+	andl	$0xFFFFF,%ebp
+	leal	(%ebx,%ecx,4),%ebx
+
+//			lightptr += lightwidth;
+	movl	%ebx,C(r_lightptr)
+
+//			lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
+//			lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
+//			lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
+//					0xF0000000;
+	movl	4(%ebx),%ecx	// lightptr[1]
+	movl	(%ebx),%ebx		// lightptr[0]
+
+	subl	%eax,%ebx
+	subl	%edx,%ecx
+
+	sarl	$4,%ecx
+	orl		$0xF0000000,%ebp
+
+	sarl	$4,%ebx
+	movl	%ecx,C(lightrightstep)
+
+	subl	%ecx,%ebx
+	andl	$0xFFFFF,%ebx
+
+	orl		$0xF0000000,%ebx
+	subl	%ecx,%ecx	// high word must be 0 in loop for addressing
+
+	movl	%ebx,C(lightdeltastep)
+	subl	%ebx,%ebx	// high word must be 0 in loop for addressing
+
+Lblockloop8_mip0:
+	movl	%ebp,C(lightdelta)
+	movb	14(%esi),%cl
+
+	sarl	$4,%ebp
+	movb	%dh,%bh
+
+	movb	15(%esi),%bl
+	addl	%ebp,%edx
+
+	movb	%dh,%ch
+	addl	%ebp,%edx
+
+	movb	0x12345678(%ebx),%ah
+LBPatch0:
+	movb	13(%esi),%bl
+
+	movb	0x12345678(%ecx),%al
+LBPatch1:
+	movb	12(%esi),%cl
+
+	movb	%dh,%bh
+	addl	%ebp,%edx
+
+	rorl	$16,%eax
+	movb	%dh,%ch
+
+	addl	%ebp,%edx
+	movb	0x12345678(%ebx),%ah
+LBPatch2:
+
+	movb	11(%esi),%bl
+	movb	0x12345678(%ecx),%al
+LBPatch3:
+
+	movb	10(%esi),%cl
+	movl	%eax,12(%edi)
+
+	movb	%dh,%bh
+	addl	%ebp,%edx
+
+	movb	%dh,%ch
+	addl	%ebp,%edx
+
+	movb	0x12345678(%ebx),%ah
+LBPatch4:
+	movb	9(%esi),%bl
+
+	movb	0x12345678(%ecx),%al
+LBPatch5:
+	movb	8(%esi),%cl
+
+	movb	%dh,%bh
+	addl	%ebp,%edx
+
+	rorl	$16,%eax
+	movb	%dh,%ch
+
+	addl	%ebp,%edx
+	movb	0x12345678(%ebx),%ah
+LBPatch6:
+
+	movb	7(%esi),%bl
+	movb	0x12345678(%ecx),%al
+LBPatch7:
+
+	movb	6(%esi),%cl
+	movl	%eax,8(%edi)
+
+	movb	%dh,%bh
+	addl	%ebp,%edx
+
+	movb	%dh,%ch
+	addl	%ebp,%edx
+
+	movb	0x12345678(%ebx),%ah
+LBPatch8:
+	movb	5(%esi),%bl
+
+	movb	0x12345678(%ecx),%al
+LBPatch9:
+	movb	4(%esi),%cl
+
+	movb	%dh,%bh
+	addl	%ebp,%edx
+
+	rorl	$16,%eax
+	movb	%dh,%ch
+
+	addl	%ebp,%edx
+	movb	0x12345678(%ebx),%ah
+LBPatch10:
+
+	movb	3(%esi),%bl
+	movb	0x12345678(%ecx),%al
+LBPatch11:
+
+	movb	2(%esi),%cl
+	movl	%eax,4(%edi)
+
+	movb	%dh,%bh
+	addl	%ebp,%edx
+
+	movb	%dh,%ch
+	addl	%ebp,%edx
+
+	movb	0x12345678(%ebx),%ah
+LBPatch12:
+	movb	1(%esi),%bl
+
+	movb	0x12345678(%ecx),%al
+LBPatch13:
+	movb	(%esi),%cl
+
+	movb	%dh,%bh
+	addl	%ebp,%edx
+
+	rorl	$16,%eax
+	movb	%dh,%ch
+
+	movb	0x12345678(%ebx),%ah
+LBPatch14:
+	movl	C(lightright),%edx
+
+	movb	0x12345678(%ecx),%al
+LBPatch15:
+	movl	C(lightdelta),%ebp
+
+	movl	%eax,(%edi)
+
+	addl	C(sourcetstep),%esi
+	addl	C(surfrowbytes),%edi
+
+	addl	C(lightrightstep),%edx
+	addl	C(lightdeltastep),%ebp
+
+	movl	%edx,C(lightright)
+	jc		Lblockloop8_mip0
+
+//			if (pbasesource >= r_sourcemax)
+//				pbasesource -= stepback;
+
+	cmpl	C(r_sourcemax),%esi
+	jb		LSkip_mip0
+	subl	C(r_stepback),%esi
+LSkip_mip0:
+
+	movl	C(r_lightptr),%ebx
+	decl	sb_v
+
+	jnz		Lv_loop_mip0
+
+	popl	%ebx				// restore register variables
+	popl	%esi
+	popl	%edi
+	popl	%ebp				// restore the caller's stack frame
+	ret
+
+
+//----------------------------------------------------------------------
+// Surface block drawer for mip level 1
+//----------------------------------------------------------------------
+
+	.align 4
+.globl C(R_DrawSurfaceBlock8_mip1)
+C(R_DrawSurfaceBlock8_mip1):
+	pushl	%ebp				// preserve caller's stack frame
+	pushl	%edi
+	pushl	%esi				// preserve register variables
+	pushl	%ebx
+
+//		for (v=0 ; v<numvblocks ; v++)
+//		{
+	movl	C(r_lightptr),%ebx
+	movl	C(r_numvblocks),%eax
+
+	movl	%eax,sb_v
+	movl	C(prowdestbase),%edi
+
+	movl	C(pbasesource),%esi
+
+Lv_loop_mip1:
+
+//			lightleft = lightptr[0];
+//			lightright = lightptr[1];
+//			lightdelta = (lightleft - lightright) & 0xFFFFF;
+	movl	(%ebx),%eax			// lightleft
+	movl	4(%ebx),%edx		// lightright
+
+	movl	%eax,%ebp
+	movl	C(r_lightwidth),%ecx
+
+	movl	%edx,C(lightright)
+	subl	%edx,%ebp
+
+	andl	$0xFFFFF,%ebp
+	leal	(%ebx,%ecx,4),%ebx
+
+//			lightptr += lightwidth;
+	movl	%ebx,C(r_lightptr)
+
+//			lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
+//			lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
+//			lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
+//					0xF0000000;
+	movl	4(%ebx),%ecx	// lightptr[1]
+	movl	(%ebx),%ebx		// lightptr[0]
+
+	subl	%eax,%ebx
+	subl	%edx,%ecx
+
+	sarl	$3,%ecx
+	orl		$0x70000000,%ebp
+
+	sarl	$3,%ebx
+	movl	%ecx,C(lightrightstep)
+
+	subl	%ecx,%ebx
+	andl	$0xFFFFF,%ebx
+
+	orl		$0xF0000000,%ebx
+	subl	%ecx,%ecx	// high word must be 0 in loop for addressing
+
+	movl	%ebx,C(lightdeltastep)
+	subl	%ebx,%ebx	// high word must be 0 in loop for addressing
+
+Lblockloop8_mip1:
+	movl	%ebp,C(lightdelta)
+	movb	6(%esi),%cl
+
+	sarl	$3,%ebp
+	movb	%dh,%bh
+
+	movb	7(%esi),%bl
+	addl	%ebp,%edx
+
+	movb	%dh,%ch
+	addl	%ebp,%edx
+
+	movb	0x12345678(%ebx),%ah
+LBPatch22:
+	movb	5(%esi),%bl
+
+	movb	0x12345678(%ecx),%al
+LBPatch23:
+	movb	4(%esi),%cl
+
+	movb	%dh,%bh
+	addl	%ebp,%edx
+
+	rorl	$16,%eax
+	movb	%dh,%ch
+
+	addl	%ebp,%edx
+	movb	0x12345678(%ebx),%ah
+LBPatch24:
+
+	movb	3(%esi),%bl
+	movb	0x12345678(%ecx),%al
+LBPatch25:
+
+	movb	2(%esi),%cl
+	movl	%eax,4(%edi)
+
+	movb	%dh,%bh
+	addl	%ebp,%edx
+
+	movb	%dh,%ch
+	addl	%ebp,%edx
+
+	movb	0x12345678(%ebx),%ah
+LBPatch26:
+	movb	1(%esi),%bl
+
+	movb	0x12345678(%ecx),%al
+LBPatch27:
+	movb	(%esi),%cl
+
+	movb	%dh,%bh
+	addl	%ebp,%edx
+
+	rorl	$16,%eax
+	movb	%dh,%ch
+
+	movb	0x12345678(%ebx),%ah
+LBPatch28:
+	movl	C(lightright),%edx
+
+	movb	0x12345678(%ecx),%al
+LBPatch29:
+	movl	C(lightdelta),%ebp
+
+	movl	%eax,(%edi)
+	movl	C(sourcetstep),%eax
+
+	addl	%eax,%esi
+	movl	C(surfrowbytes),%eax
+
+	addl	%eax,%edi
+	movl	C(lightrightstep),%eax
+
+	addl	%eax,%edx
+	movl	C(lightdeltastep),%eax
+
+	addl	%eax,%ebp
+	movl	%edx,C(lightright)
+
+	jc		Lblockloop8_mip1
+
+//			if (pbasesource >= r_sourcemax)
+//				pbasesource -= stepback;
+
+	cmpl	C(r_sourcemax),%esi
+	jb		LSkip_mip1
+	subl	C(r_stepback),%esi
+LSkip_mip1:
+
+	movl	C(r_lightptr),%ebx
+	decl	sb_v
+
+	jnz		Lv_loop_mip1
+
+	popl	%ebx				// restore register variables
+	popl	%esi
+	popl	%edi
+	popl	%ebp				// restore the caller's stack frame
+	ret
+
+
+//----------------------------------------------------------------------
+// Surface block drawer for mip level 2
+//----------------------------------------------------------------------
+
+	.align 4
+.globl C(R_DrawSurfaceBlock8_mip2)
+C(R_DrawSurfaceBlock8_mip2):
+	pushl	%ebp				// preserve caller's stack frame
+	pushl	%edi
+	pushl	%esi				// preserve register variables
+	pushl	%ebx
+
+//		for (v=0 ; v<numvblocks ; v++)
+//		{
+	movl	C(r_lightptr),%ebx
+	movl	C(r_numvblocks),%eax
+
+	movl	%eax,sb_v
+	movl	C(prowdestbase),%edi
+
+	movl	C(pbasesource),%esi
+
+Lv_loop_mip2:
+
+//			lightleft = lightptr[0];
+//			lightright = lightptr[1];
+//			lightdelta = (lightleft - lightright) & 0xFFFFF;
+	movl	(%ebx),%eax			// lightleft
+	movl	4(%ebx),%edx		// lightright
+
+	movl	%eax,%ebp
+	movl	C(r_lightwidth),%ecx
+
+	movl	%edx,C(lightright)
+	subl	%edx,%ebp
+
+	andl	$0xFFFFF,%ebp
+	leal	(%ebx,%ecx,4),%ebx
+
+//			lightptr += lightwidth;
+	movl	%ebx,C(r_lightptr)
+
+//			lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
+//			lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
+//			lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
+//					0xF0000000;
+	movl	4(%ebx),%ecx	// lightptr[1]
+	movl	(%ebx),%ebx		// lightptr[0]
+
+	subl	%eax,%ebx
+	subl	%edx,%ecx
+
+	sarl	$2,%ecx
+	orl		$0x30000000,%ebp
+
+	sarl	$2,%ebx
+	movl	%ecx,C(lightrightstep)
+
+	subl	%ecx,%ebx
+
+	andl	$0xFFFFF,%ebx
+
+	orl		$0xF0000000,%ebx
+	subl	%ecx,%ecx	// high word must be 0 in loop for addressing
+
+	movl	%ebx,C(lightdeltastep)
+	subl	%ebx,%ebx	// high word must be 0 in loop for addressing
+
+Lblockloop8_mip2:
+	movl	%ebp,C(lightdelta)
+	movb	2(%esi),%cl
+
+	sarl	$2,%ebp
+	movb	%dh,%bh
+
+	movb	3(%esi),%bl
+	addl	%ebp,%edx
+
+	movb	%dh,%ch
+	addl	%ebp,%edx
+
+	movb	0x12345678(%ebx),%ah
+LBPatch18:
+	movb	1(%esi),%bl
+
+	movb	0x12345678(%ecx),%al
+LBPatch19:
+	movb	(%esi),%cl
+
+	movb	%dh,%bh
+	addl	%ebp,%edx
+
+	rorl	$16,%eax
+	movb	%dh,%ch
+
+	movb	0x12345678(%ebx),%ah
+LBPatch20:
+	movl	C(lightright),%edx
+
+	movb	0x12345678(%ecx),%al
+LBPatch21:
+	movl	C(lightdelta),%ebp
+
+	movl	%eax,(%edi)
+	movl	C(sourcetstep),%eax
+
+	addl	%eax,%esi
+	movl	C(surfrowbytes),%eax
+
+	addl	%eax,%edi
+	movl	C(lightrightstep),%eax
+
+	addl	%eax,%edx
+	movl	C(lightdeltastep),%eax
+
+	addl	%eax,%ebp
+	movl	%edx,C(lightright)
+
+	jc		Lblockloop8_mip2
+
+//			if (pbasesource >= r_sourcemax)
+//				pbasesource -= stepback;
+
+	cmpl	C(r_sourcemax),%esi
+	jb		LSkip_mip2
+	subl	C(r_stepback),%esi
+LSkip_mip2:
+
+	movl	C(r_lightptr),%ebx
+	decl	sb_v
+
+	jnz		Lv_loop_mip2
+
+	popl	%ebx				// restore register variables
+	popl	%esi
+	popl	%edi
+	popl	%ebp				// restore the caller's stack frame
+	ret
+
+
+//----------------------------------------------------------------------
+// Surface block drawer for mip level 3
+//----------------------------------------------------------------------
+
+	.align 4
+.globl C(R_DrawSurfaceBlock8_mip3)
+C(R_DrawSurfaceBlock8_mip3):
+	pushl	%ebp				// preserve caller's stack frame
+	pushl	%edi
+	pushl	%esi				// preserve register variables
+	pushl	%ebx
+
+//		for (v=0 ; v<numvblocks ; v++)
+//		{
+	movl	C(r_lightptr),%ebx
+	movl	C(r_numvblocks),%eax
+
+	movl	%eax,sb_v
+	movl	C(prowdestbase),%edi
+
+	movl	C(pbasesource),%esi
+
+Lv_loop_mip3:
+
+//			lightleft = lightptr[0];
+//			lightright = lightptr[1];
+//			lightdelta = (lightleft - lightright) & 0xFFFFF;
+	movl	(%ebx),%eax			// lightleft
+	movl	4(%ebx),%edx		// lightright
+
+	movl	%eax,%ebp
+	movl	C(r_lightwidth),%ecx
+
+	movl	%edx,C(lightright)
+	subl	%edx,%ebp
+
+	andl	$0xFFFFF,%ebp
+	leal	(%ebx,%ecx,4),%ebx
+
+	movl	%ebp,C(lightdelta)
+//			lightptr += lightwidth;
+	movl	%ebx,C(r_lightptr)
+
+//			lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
+//			lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
+//			lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
+//					0xF0000000;
+	movl	4(%ebx),%ecx	// lightptr[1]
+	movl	(%ebx),%ebx		// lightptr[0]
+
+	subl	%eax,%ebx
+	subl	%edx,%ecx
+
+	sarl	$1,%ecx
+
+	sarl	$1,%ebx
+	movl	%ecx,C(lightrightstep)
+
+	subl	%ecx,%ebx
+	andl	$0xFFFFF,%ebx
+
+	sarl	$1,%ebp
+	orl		$0xF0000000,%ebx
+
+	movl	%ebx,C(lightdeltastep)
+	subl	%ebx,%ebx	// high word must be 0 in loop for addressing
+
+	movb	1(%esi),%bl
+	subl	%ecx,%ecx	// high word must be 0 in loop for addressing
+
+	movb	%dh,%bh
+	movb	(%esi),%cl
+
+	addl	%ebp,%edx
+	movb	%dh,%ch
+
+	movb	0x12345678(%ebx),%al
+LBPatch16:
+	movl	C(lightright),%edx
+
+	movb	%al,1(%edi)
+	movb	0x12345678(%ecx),%al
+LBPatch17:
+
+	movb	%al,(%edi)
+	movl	C(sourcetstep),%eax
+
+	addl	%eax,%esi
+	movl	C(surfrowbytes),%eax
+
+	addl	%eax,%edi
+	movl	C(lightdeltastep),%eax
+
+	movl	C(lightdelta),%ebp
+	movb	(%esi),%cl
+
+	addl	%eax,%ebp
+	movl	C(lightrightstep),%eax
+
+	sarl	$1,%ebp
+	addl	%eax,%edx
+
+	movb	%dh,%bh
+	movb	1(%esi),%bl
+
+	addl	%ebp,%edx
+	movb	%dh,%ch
+
+	movb	0x12345678(%ebx),%al
+LBPatch30:
+	movl	C(sourcetstep),%edx
+
+	movb	%al,1(%edi)
+	movb	0x12345678(%ecx),%al
+LBPatch31:
+
+	movb	%al,(%edi)
+	movl	C(surfrowbytes),%ebp
+
+	addl	%edx,%esi
+	addl	%ebp,%edi
+
+//			if (pbasesource >= r_sourcemax)
+//				pbasesource -= stepback;
+
+	cmpl	C(r_sourcemax),%esi
+	jb		LSkip_mip3
+	subl	C(r_stepback),%esi
+LSkip_mip3:
+
+	movl	C(r_lightptr),%ebx
+	decl	sb_v
+
+	jnz		Lv_loop_mip3
+
+	popl	%ebx				// restore register variables
+	popl	%esi
+	popl	%edi
+	popl	%ebp				// restore the caller's stack frame
+	ret
+
+
+.globl C(R_Surf8End)
+C(R_Surf8End):
+
+//----------------------------------------------------------------------
+// Code patching routines
+//----------------------------------------------------------------------
+	.data
+
+	.align 4
+LPatchTable8:
+	.long	LBPatch0-4
+	.long	LBPatch1-4
+	.long	LBPatch2-4
+	.long	LBPatch3-4
+	.long	LBPatch4-4
+	.long	LBPatch5-4
+	.long	LBPatch6-4
+	.long	LBPatch7-4
+	.long	LBPatch8-4
+	.long	LBPatch9-4
+	.long	LBPatch10-4
+	.long	LBPatch11-4
+	.long	LBPatch12-4
+	.long	LBPatch13-4
+	.long	LBPatch14-4
+	.long	LBPatch15-4
+	.long	LBPatch16-4
+	.long	LBPatch17-4
+	.long	LBPatch18-4
+	.long	LBPatch19-4
+	.long	LBPatch20-4
+	.long	LBPatch21-4
+	.long	LBPatch22-4
+	.long	LBPatch23-4
+	.long	LBPatch24-4
+	.long	LBPatch25-4
+	.long	LBPatch26-4
+	.long	LBPatch27-4
+	.long	LBPatch28-4
+	.long	LBPatch29-4
+	.long	LBPatch30-4
+	.long	LBPatch31-4
+
+	.text
+
+	.align 4
+.globl C(R_Surf8Patch)
+C(R_Surf8Patch):
+	pushl	%ebx
+
+	movl	C(colormap),%eax
+	movl	$LPatchTable8,%ebx
+	movl	$32,%ecx
+LPatchLoop8:
+	movl	(%ebx),%edx
+	addl	$4,%ebx
+	movl	%eax,(%edx)
+	decl	%ecx
+	jnz		LPatchLoop8
+
+	popl	%ebx
+
+	ret
+
+#endif	// id386
--- /dev/null
+++ b/u/sys_dosa.s
@@ -1,0 +1,95 @@
+//
+// sys_dosa.s
+// x86 assembly-language DOS-dependent routines.
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+
+
+	.data
+
+	.align	4
+fpenv:
+	.long	0, 0, 0, 0, 0, 0, 0, 0
+
+	.text
+
+.globl C(MaskExceptions)
+C(MaskExceptions):
+	fnstenv	fpenv
+	orl		$0x3F,fpenv
+	fldenv	fpenv
+
+	ret
+
+/*
+.globl C(unmaskexceptions)
+C(unmaskexceptions):
+	fnstenv	fpenv
+	andl		$0xFFFFFFE0,fpenv
+	fldenv	fpenv
+
+	ret
+*/
+
+	.data
+
+	.align	4
+.globl	ceil_cw, single_cw, full_cw, cw, pushed_cw
+ceil_cw:	.long	0
+single_cw:	.long	0
+full_cw:	.long	0
+cw:			.long	0
+pushed_cw:	.long	0
+
+	.text
+
+.globl C(Sys_LowFPPrecision)
+C(Sys_LowFPPrecision):
+	fldcw	single_cw
+
+	ret
+
+.globl C(Sys_HighFPPrecision)
+C(Sys_HighFPPrecision):
+	fldcw	full_cw
+
+	ret
+
+.globl C(Sys_PushFPCW_SetHigh)
+C(Sys_PushFPCW_SetHigh):
+	fnstcw	pushed_cw
+	fldcw	full_cw
+
+	ret
+
+.globl C(Sys_PopFPCW)
+C(Sys_PopFPCW):
+	fldcw	pushed_cw
+
+	ret
+
+.globl C(Sys_SetFPCW)
+C(Sys_SetFPCW):
+	fnstcw	cw
+	movl	cw,%eax
+#ifdef	id386
+	andb	$0xF0,%ah
+	orb		$0x03,%ah	// round mode, 64-bit precision
+#endif
+	movl	%eax,full_cw
+
+#ifdef	id386
+	andb	$0xF0,%ah
+	orb		$0x0C,%ah	// chop mode, single precision
+#endif
+	movl	%eax,single_cw
+
+#ifdef	id386
+	andb	$0xF0,%ah
+	orb		$0x08,%ah	// ceil mode, single precision
+#endif
+	movl	%eax,ceil_cw
+
+	ret
+
--- /dev/null
+++ b/u/worlda.s
@@ -1,0 +1,125 @@
+//
+// worlda.s
+// x86 assembly-language server testing stuff
+//
+
+#define GLQUAKE	1	// don't include unneeded defs
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+	.data
+
+Ltemp:	.long	0
+
+	.text
+
+//----------------------------------------------------------------------
+// hull-point test
+//----------------------------------------------------------------------
+
+#define hull	4+8				// because only partially pushed
+#define	num		8+4				// because only partially pushed
+#define p		12+12			// because only partially pushed
+
+	.align 4
+.globl C(SV_HullPointContents)
+C(SV_HullPointContents):
+	pushl	%edi				// preserve register variables
+	movl	num(%esp),%eax
+	testl	%eax,%eax
+	js		Lhquickout
+
+//	float		d;
+//	dclipnode_t	*node;
+//	mplane_t	*plane;
+
+	pushl	%ebx
+	movl	hull(%esp),%ebx
+
+	pushl	%ebp
+	movl	p(%esp),%edx
+
+	movl	hu_clipnodes(%ebx),%edi
+	movl	hu_planes(%ebx),%ebp
+
+	subl	%ebx,%ebx
+	pushl	%esi
+
+// %ebx: 0
+// %eax: num
+// %edx: p
+// %edi: hull->clipnodes
+// %ebp: hull->planes
+
+//	while (num >= 0)
+//	{
+
+Lhloop:
+
+//		node = hull->clipnodes + num;
+//		plane = hull->planes + node->planenum;
+// !!! if the size of dclipnode_t changes, the scaling of %eax needs to be
+//     changed !!!
+	movl	nd_planenum(%edi,%eax,8),%ecx
+	movl	nd_children(%edi,%eax,8),%eax
+	movl	%eax,%esi
+	rorl	$16,%eax
+	leal	(%ecx,%ecx,4),%ecx
+
+//		if (plane->type < 3)
+//			d = p[plane->type] - plane->dist;
+	movb	pl_type(%ebp,%ecx,4),%bl
+	cmpb	$3,%bl
+	jb		Lnodot
+
+//		else
+//			d = DotProduct (plane->normal, p) - plane->dist;
+	flds	pl_normal(%ebp,%ecx,4)
+	fmuls	0(%edx)
+	flds	pl_normal+4(%ebp,%ecx,4)
+	fmuls	4(%edx)
+	flds	pl_normal+8(%ebp,%ecx,4)
+	fmuls	8(%edx)
+	fxch	%st(1)
+	faddp	%st(0),%st(2)
+	faddp	%st(0),%st(1)
+	fsubs	pl_dist(%ebp,%ecx,4)
+	jmp		Lsub
+
+Lnodot:
+	flds	pl_dist(%ebp,%ecx,4)
+	fsubrs	(%edx,%ebx,4)
+
+Lsub:
+	sarl	$16,%eax
+	sarl	$16,%esi
+
+//		if (d < 0)
+//			num = node->children[1];
+//		else
+//			num = node->children[0];
+	fstps	Ltemp
+	movl	Ltemp,%ecx
+	sarl	$31,%ecx
+	andl	%ecx,%esi
+	xorl	$0xFFFFFFFF,%ecx
+	andl	%ecx,%eax
+	orl		%esi,%eax
+	jns		Lhloop
+
+//	return num;
+Lhdone:
+	popl	%esi
+	popl	%ebp
+	popl	%ebx				// restore register variables
+
+Lhquickout:
+	popl	%edi
+
+	ret
+
+#endif	// id386
+
--- a/worlda.s
+++ /dev/null
@@ -1,125 +1,0 @@
-//
-// worlda.s
-// x86 assembly-language server testing stuff
-//
-
-#define GLQUAKE	1	// don't include unneeded defs
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
-	.data
-
-Ltemp:	.long	0
-
-	.text
-
-//----------------------------------------------------------------------
-// hull-point test
-//----------------------------------------------------------------------
-
-#define hull	4+8				// because only partially pushed
-#define	num		8+4				// because only partially pushed
-#define p		12+12			// because only partially pushed
-
-	.align 4
-.globl C(SV_HullPointContents)
-C(SV_HullPointContents):
-	pushl	%edi				// preserve register variables
-	movl	num(%esp),%eax
-	testl	%eax,%eax
-	js		Lhquickout
-
-//	float		d;
-//	dclipnode_t	*node;
-//	mplane_t	*plane;
-
-	pushl	%ebx
-	movl	hull(%esp),%ebx
-
-	pushl	%ebp
-	movl	p(%esp),%edx
-
-	movl	hu_clipnodes(%ebx),%edi
-	movl	hu_planes(%ebx),%ebp
-
-	subl	%ebx,%ebx
-	pushl	%esi
-
-// %ebx: 0
-// %eax: num
-// %edx: p
-// %edi: hull->clipnodes
-// %ebp: hull->planes
-
-//	while (num >= 0)
-//	{
-
-Lhloop:
-
-//		node = hull->clipnodes + num;
-//		plane = hull->planes + node->planenum;
-// !!! if the size of dclipnode_t changes, the scaling of %eax needs to be
-//     changed !!!
-	movl	nd_planenum(%edi,%eax,8),%ecx
-	movl	nd_children(%edi,%eax,8),%eax
-	movl	%eax,%esi
-	rorl	$16,%eax
-	leal	(%ecx,%ecx,4),%ecx
-
-//		if (plane->type < 3)
-//			d = p[plane->type] - plane->dist;
-	movb	pl_type(%ebp,%ecx,4),%bl
-	cmpb	$3,%bl
-	jb		Lnodot
-
-//		else
-//			d = DotProduct (plane->normal, p) - plane->dist;
-	flds	pl_normal(%ebp,%ecx,4)
-	fmuls	0(%edx)
-	flds	pl_normal+4(%ebp,%ecx,4)
-	fmuls	4(%edx)
-	flds	pl_normal+8(%ebp,%ecx,4)
-	fmuls	8(%edx)
-	fxch	%st(1)
-	faddp	%st(0),%st(2)
-	faddp	%st(0),%st(1)
-	fsubs	pl_dist(%ebp,%ecx,4)
-	jmp		Lsub
-
-Lnodot:
-	flds	pl_dist(%ebp,%ecx,4)
-	fsubrs	(%edx,%ebx,4)
-
-Lsub:
-	sarl	$16,%eax
-	sarl	$16,%esi
-
-//		if (d < 0)
-//			num = node->children[1];
-//		else
-//			num = node->children[0];
-	fstps	Ltemp
-	movl	Ltemp,%ecx
-	sarl	$31,%ecx
-	andl	%ecx,%esi
-	xorl	$0xFFFFFFFF,%ecx
-	andl	%ecx,%eax
-	orl		%esi,%eax
-	jns		Lhloop
-
-//	return num;
-Lhdone:
-	popl	%esi
-	popl	%ebp
-	popl	%ebx				// restore register variables
-
-Lhquickout:
-	popl	%edi
-
-	ret
-
-#endif	// id386
-