ref: dbe0a995f03f26ea2b6859d21df3bd67856d672d
parent: 59d16c3900ebfdf0725ebab61e77499dfb7f86de
author: mischief <[email protected]>
date: Mon Aug 24 21:58:41 EDT 2015
libc: import more endianness fixes (thanks cherry9) from https://bitbucket.org/cherry9/plan9-loongson/
--- /dev/null
+++ b/sys/src/libc/spim/memccpy.s
@@ -1,0 +1,20 @@
+TEXT memccpy(SB), $0
+ MOVW R1, 0(FP)
+ MOVW n+12(FP), R1
+ BEQ R1, ret
+ MOVW s1+0(FP), R3
+ MOVW s2+4(FP), R2
+ MOVBU c+8(FP), R4
+ ADDU R1, R2, R5
+
+l1: MOVBU (R2), R6
+ ADDU $1, R2
+ MOVBU R6, (R3)
+ ADDU $1, R3
+ BEQ R4, R6, eq
+ BNE R2, R5, l1
+ MOVW $0, R1
+ RET
+
+eq: MOVW R3, R1
+ret: RET
--- /dev/null
+++ b/sys/src/libc/spim/memchr.s
@@ -1,0 +1,39 @@
+TEXT memchr(SB), $0
+ MOVW R1, 0(FP)
+
+ MOVW n+8(FP), R1
+ MOVW s1+0(FP), R2
+ MOVBU c+4(FP), R3
+ ADDU R1, R2, R6
+
+ AND $(~1), R1, R5
+ ADDU R2, R5
+ BEQ R2, R5, lt2
+
+l1:
+ MOVBU 0(R2), R4
+ MOVBU 1(R2), R7
+ BEQ R3, R4, eq0
+ ADDU $2, R2
+ BEQ R3, R7, eq
+ BNE R2, R5, l1
+
+lt2:
+ BEQ R2, R6, zret
+
+l2:
+ MOVBU (R2), R4
+ ADDU $1, R2
+ BEQ R3, R4, eq
+ BNE R2, R6, l2
+zret:
+ MOVW R0, R1
+ RET
+
+eq0:
+ MOVW R2, R1
+ RET
+
+eq:
+ SUBU $1,R2, R1
+ RET
--- /dev/null
+++ b/sys/src/libc/spim/memcmp.s
@@ -1,0 +1,117 @@
+TEXT memcmp(SB), $0
+ MOVW R1, 0(FP)
+
+/*
+ * performance:
+ * alligned about 1.0us/call and 17.4mb/sec
+ * unalligned is about 3.1mb/sec
+ */
+
+ MOVW n+8(FP), R3 /* R3 is count */
+ MOVW s1+0(FP), R4 /* R4 is pointer1 */
+ MOVW s2+4(FP), R5 /* R5 is pointer2 */
+ ADDU R3,R4, R6 /* R6 is end pointer1 */
+
+ /* TODO(mischief): fix multibyte copy */
+ JMP out
+
+/*
+ * if not at least 4 chars,
+ * dont even mess around.
+ * 3 chars to guarantee any
+ * rounding up to a word
+ * boundary and 4 characters
+ * to get at least maybe one
+ * full word cmp.
+ */
+ SGT $4,R3, R1
+ BNE R1, out
+
+/*
+ * test if both pointers
+ * are similarly word alligned
+ */
+ XOR R4,R5, R1
+ AND $3, R1
+ BNE R1, out
+
+/*
+ * byte at a time to word allign
+ */
+l1:
+ AND $3,R4, R1
+ BEQ R1, l2
+ MOVBU 0(R4), R8
+ MOVBU 0(R5), R9
+ ADDU $1, R4
+ BNE R8,R9, ne
+ ADDU $1, R5
+ JMP l1
+
+/*
+ * turn R3 into end pointer1-15
+ * cmp 16 at a time while theres room
+ */
+l2:
+ ADDU $-15,R6, R3
+l3:
+ SGTU R3,R4, R1
+ BEQ R1, l4
+ MOVW 0(R4), R8
+ MOVW 0(R5), R9
+ MOVW 4(R4), R10
+ BNE R8,R9, ne
+ MOVW 4(R5), R11
+ MOVW 8(R4), R8
+ BNE R10,R11, ne1
+ MOVW 8(R5), R9
+ MOVW 12(R4), R10
+ BNE R8,R9, ne
+ MOVW 12(R5), R11
+ ADDU $16, R4
+ BNE R10,R11, ne1
+ BNE R8,R9, ne
+ ADDU $16, R5
+ JMP l3
+
+/*
+ * turn R3 into end pointer1-3
+ * cmp 4 at a time while theres room
+ */
+l4:
+ ADDU $-3,R6, R3
+l5:
+ SGTU R3,R4, R1
+ BEQ R1, out
+ MOVW 0(R4), R8
+ MOVW 0(R5), R9
+ ADDU $4, R4
+ BNE R8,R9, ne /* only works because big endian */
+ ADDU $4, R5
+ JMP l5
+
+/*
+ * last loop, cmp byte at a time
+ */
+out:
+ SGTU R6,R4, R1
+ BEQ R1, ret
+ MOVBU 0(R4), R8
+ MOVBU 0(R5), R9
+ ADDU $1, R4
+ BNE R8,R9, ne
+ ADDU $1, R5
+ JMP out
+
+ne1:
+ SGTU R10,R11, R1
+ BNE R1, ret
+ MOVW $-1,R1
+ RET
+ne:
+ SGTU R8,R9, R1
+ BNE R1, ret
+ MOVW $-1,R1
+ret:
+ RET
+ END
--- a/sys/src/libc/spim/mkfile
+++ b/sys/src/libc/spim/mkfile
@@ -9,13 +9,9 @@
getfcr.s\
main9.s\
main9p.s\
- memccpy.s\
- memchr.s\
- memcmp.s\
memmove.s\
memset.s\
setjmp.s\
- strchr.s\
strcmp.s\
strcpy.s\
tas.s\
@@ -26,6 +22,10 @@
notejmp.c\
SFILES=\
+ memccpy.s\
+ memchr.s\
+ memcmp.s\
+ strchr.s\
vlop.s\
CFILES=\
--- /dev/null
+++ b/sys/src/libc/spim/strchr.s
@@ -1,0 +1,63 @@
+TEXT strchr(SB), $0
+ MOVW R1, 0(FP)
+ MOVB c+4(FP), R4
+ MOVW s+0(FP), R3
+
+ BEQ R4, l2
+
+/*
+ * char is not null
+ */
+l1:
+ MOVB (R3), R1
+ ADDU $1, R3
+ BEQ R1, ret
+ BNE R1,R4, l1
+ JMP rm1
+
+/*
+ * char is null
+ * align to word
+ */
+l2:
+ AND $3,R3, R1
+ BEQ R1, l3
+ MOVB (R3), R1
+ ADDU $1, R3
+ BNE R1, l2
+ JMP rm1
+
+l3:
+ MOVW $0xff000000, R6
+ MOVW $0x00ff0000, R7
+
+l4:
+ MOVW (R3), R5
+ ADDU $4, R3
+ AND R6,R5, R1
+ AND R7,R5, R2
+ BEQ R1, b0
+ AND $0xff00,R5, R1
+ BEQ R2, b1
+ AND $0xff,R5, R2
+ BEQ R1, b2
+ BNE R2, l4
+
+rm1:
+ ADDU $-1,R3, R1
+ JMP ret
+
+b2:
+ ADDU $-2,R3, R1
+ JMP ret
+
+b1:
+ ADDU $-3,R3, R1
+ JMP ret
+
+b0:
+ ADDU $-4,R3, R1
+ JMP ret
+
+ret:
+ RET