ref: 967b1248f82e5f64eab4dbf45898a37851b71fa2
parent: 88ccea37f65b87312292b2b1a107c39927c6a2c3
author: cinap_lenrek <[email protected]>
date: Wed Feb 27 13:29:08 EST 2019
libip: move optimized 386 assembly version of ptclbsum() from kernel to libip
--- a/sys/src/9/pc/pc
+++ b/sys/src/9/pc/pc
@@ -22,7 +22,7 @@
ether netif
bridge netif log
- ip arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum386 inferno
+ ip arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium inferno
draw screen vga vgax swcursor
mouse mouse
--- a/sys/src/9/pc/ptclbsum386.s
+++ /dev/null
@@ -1,126 +1,0 @@
-TEXT ptclbsum(SB), $0
- MOVL addr+0(FP), SI
- MOVL len+4(FP), CX
-
- XORL AX, AX /* sum */
-
- TESTL $1, SI /* byte aligned? */
- MOVL SI, DI
- JEQ _2align
-
- DECL CX
- JLT _return
-
- MOVB 0x00(SI), AH
- INCL SI
-
-_2align:
- TESTL $2, SI /* word aligned? */
- JEQ _32loop
-
- CMPL CX, $2 /* less than 2 bytes? */
- JLT _1dreg
- SUBL $2, CX
-
- XORL BX, BX
- MOVW 0x00(SI), BX
- ADDL BX, AX
- ADCL $0, AX
- LEAL 2(SI), SI
-
-_32loop:
- CMPL CX, $0x20
- JLT _8loop
-
- MOVL CX, BP
- SHRL $5, BP
- ANDL $0x1F, CX
-
-_32loopx:
- MOVL 0x00(SI), BX
- MOVL 0x1C(SI), DX
- ADCL BX, AX
- MOVL 0x04(SI), BX
- ADCL DX, AX
- MOVL 0x10(SI), DX
- ADCL BX, AX
- MOVL 0x08(SI), BX
- ADCL DX, AX
- MOVL 0x14(SI), DX
- ADCL BX, AX
- MOVL 0x0C(SI), BX
- ADCL DX, AX
- MOVL 0x18(SI), DX
- ADCL BX, AX
- LEAL 0x20(SI), SI
- ADCL DX, AX
-
- DECL BP
- JNE _32loopx
-
- ADCL $0, AX
-
-_8loop:
- CMPL CX, $0x08
- JLT _2loop
-
- MOVL CX, BP
- SHRL $3, BP
- ANDL $0x07, CX
-
-_8loopx:
- MOVL 0x00(SI), BX
- ADCL BX, AX
- MOVL 0x04(SI), DX
- ADCL DX, AX
-
- LEAL 0x08(SI), SI
- DECL BP
- JNE _8loopx
-
- ADCL $0, AX
-
-_2loop:
- CMPL CX, $0x02
- JLT _1dreg
-
- MOVL CX, BP
- SHRL $1, BP
- ANDL $0x01, CX
-
-_2loopx:
- MOVWLZX 0x00(SI), BX
- ADCL BX, AX
-
- LEAL 0x02(SI), SI
- DECL BP
- JNE _2loopx
-
- ADCL $0, AX
-
-_1dreg:
- TESTL $1, CX /* 1 byte left? */
- JEQ _fold
-
- XORL BX, BX
- MOVB 0x00(SI), BX
- ADDL BX, AX
- ADCL $0, AX
-
-_fold:
- MOVL AX, BX
- SHRL $16, BX
- JEQ _swab
-
- ANDL $0xFFFF, AX
- ADDL BX, AX
- JMP _fold
-
-_swab:
- TESTL $1, addr+0(FP)
- /*TESTL $1, DI*/
- JNE _return
- XCHGB AH, AL
-
-_return:
- RET
--- a/sys/src/9/xen/mkfile
+++ b/sys/src/9/xen/mkfile
@@ -128,9 +128,6 @@
<../port/portmkfile
<|../port/mkbootrules $CONF
-ptclbsum386.$O: ../pc/ptclbsum386.s
- $AS $AFLAGS ../pc/ptclbsum386.s
-
# we inherited these.. revisit.
$ETHER: ../port/etherif.h ../port/netif.h
$SDEV: ../port/sd.h
--- a/sys/src/9/xen/xenpcf
+++ b/sys/src/9/xen/xenpcf
@@ -20,7 +20,7 @@
xenstore
ether netif
- ip arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum386 inferno
+ ip arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium inferno
sd
--- a/sys/src/libip/mkfile
+++ b/sys/src/libip/mkfile
@@ -24,3 +24,6 @@
${LIB:/$objtype/%=/386/%}\
</sys/src/cmd/mksyslib
+
+ptclbsum.8: ptclbsum386.s
+ $AS -o $target ptclbsum386.s
--- /dev/null
+++ b/sys/src/libip/ptclbsum386.s
@@ -1,0 +1,126 @@
+TEXT ptclbsum(SB), $0
+ MOVL addr+0(FP), SI
+ MOVL len+4(FP), CX
+
+ XORL AX, AX /* sum */
+
+ TESTL $1, SI /* byte aligned? */
+ MOVL SI, DI
+ JEQ _2align
+
+ DECL CX
+ JLT _return
+
+ MOVB 0x00(SI), AH
+ INCL SI
+
+_2align:
+ TESTL $2, SI /* word aligned? */
+ JEQ _32loop
+
+ CMPL CX, $2 /* less than 2 bytes? */
+ JLT _1dreg
+ SUBL $2, CX
+
+ XORL BX, BX
+ MOVW 0x00(SI), BX
+ ADDL BX, AX
+ ADCL $0, AX
+ LEAL 2(SI), SI
+
+_32loop:
+ CMPL CX, $0x20
+ JLT _8loop
+
+ MOVL CX, BP
+ SHRL $5, BP
+ ANDL $0x1F, CX
+
+_32loopx:
+ MOVL 0x00(SI), BX
+ MOVL 0x1C(SI), DX
+ ADCL BX, AX
+ MOVL 0x04(SI), BX
+ ADCL DX, AX
+ MOVL 0x10(SI), DX
+ ADCL BX, AX
+ MOVL 0x08(SI), BX
+ ADCL DX, AX
+ MOVL 0x14(SI), DX
+ ADCL BX, AX
+ MOVL 0x0C(SI), BX
+ ADCL DX, AX
+ MOVL 0x18(SI), DX
+ ADCL BX, AX
+ LEAL 0x20(SI), SI
+ ADCL DX, AX
+
+ DECL BP
+ JNE _32loopx
+
+ ADCL $0, AX
+
+_8loop:
+ CMPL CX, $0x08
+ JLT _2loop
+
+ MOVL CX, BP
+ SHRL $3, BP
+ ANDL $0x07, CX
+
+_8loopx:
+ MOVL 0x00(SI), BX
+ ADCL BX, AX
+ MOVL 0x04(SI), DX
+ ADCL DX, AX
+
+ LEAL 0x08(SI), SI
+ DECL BP
+ JNE _8loopx
+
+ ADCL $0, AX
+
+_2loop:
+ CMPL CX, $0x02
+ JLT _1dreg
+
+ MOVL CX, BP
+ SHRL $1, BP
+ ANDL $0x01, CX
+
+_2loopx:
+ MOVWLZX 0x00(SI), BX
+ ADCL BX, AX
+
+ LEAL 0x02(SI), SI
+ DECL BP
+ JNE _2loopx
+
+ ADCL $0, AX
+
+_1dreg:
+ TESTL $1, CX /* 1 byte left? */
+ JEQ _fold
+
+ XORL BX, BX
+ MOVB 0x00(SI), BX
+ ADDL BX, AX
+ ADCL $0, AX
+
+_fold:
+ MOVL AX, BX
+ SHRL $16, BX
+ JEQ _swab
+
+ ANDL $0xFFFF, AX
+ ADDL BX, AX
+ JMP _fold
+
+_swab:
+ TESTL $1, addr+0(FP)
+ /*TESTL $1, DI*/
+ JNE _return
+ XCHGB AH, AL
+
+_return:
+ RET