ref: e14726be72b7e9b3bc91fb5b18aa588a39a227ba
parent: 26a9351dfdfb784400792a06ceb6b76fda488c03
author: Roberto E. Vargas Caballero <[email protected]>
date: Tue Sep 14 11:11:29 EDT 2021
libc/amd64: Add assembly functions for string.h Amd64 has support for string operations using prefixes that can repeat instructions and they can increase the performance in this kind of functions.
--- a/src/libc/arch/amd64/Makefile
+++ b/src/libc/arch/amd64/Makefile
@@ -1,20 +1,27 @@
.POSIX:
+PROJECTDIR = ../../../..
DIRS =\
- netbsd\
- openbsd\
+ darwin\
dragonfly\
linux\
- darwin\
+ netbsd\
+ openbsd\
-PROJECTDIR =../../../..
include $(PROJECTDIR)/scripts/rules.mk
include ../../rules.mk
OBJS =\
longjmp.$O\
- setjmp.$O\
+ memchr.$O\
+ memcmp.$O\
memcpy.$O\
+ memmove.$O\
+ memset.$O\
+ setjmp.$O\
+ strchr.$O\
+ strcmp.$O\
+ strcpy.$O\
all: $(OBJS) $(SYS)
--- /dev/null
+++ b/src/libc/arch/amd64/memchr.s
@@ -1,0 +1,23 @@
+ .file "memchr.s"
+
+ .text
+ .globl memchr,_memchr
+_memchr:
+memchr:
+ cmpq $0,%rdx
+ je notfound
+
+ movq %rdx,%rcx
+ movb %sil,%al
+ cld
+ repne
+ scasb
+ je found
+
+notfound:
+ xor %eax,%eax
+ ret
+
+found:
+ leaq -1(%rdi),%rax
+ ret
--- /dev/null
+++ b/src/libc/arch/amd64/memcmp.s
@@ -1,0 +1,30 @@
+ .file "memcmp.s"
+
+ .text
+ .globl memcmp,_memcmp
+_memcmp:
+memcmp:
+ cmpq $0,%rdx
+ je EQUA
+ cld
+ movq %rdx,%rcx
+ movq %rdi,%r8
+ movq %rsi,%rdi
+ movq %r8,%rsi
+ rep
+ cmpsb
+ je equa
+ jb less
+ ja grea
+
+equa:
+ movq $0,%rax
+ ret
+
+less:
+ movq $-1,%rax
+ ret
+
+grea:
+ movq $1,%rax
+ ret
--- a/src/libc/arch/amd64/memcpy.s
+++ b/src/libc/arch/amd64/memcpy.s
@@ -1,12 +1,13 @@
.file "memcpy.s"
+
.text
.globl memcpy,_memcpy
-memcpy:
_memcpy:
+memcpy:
+ cld
mov %rdi,%rax
mov %rdx,%rcx
- cld
rep
movsb
ret
--- /dev/null
+++ b/src/libc/arch/amd64/memmove.s
@@ -1,0 +1,29 @@
+ .file "memmove.s"
+
+ .text
+ .globl memmove,_memmove
+_memmove:
+memmove:
+ movq %rdi,%rax
+
+ movq %rdx,%rcx
+
+ cmpq %rdi,%rsi
+ jg forward
+ jl backward
+ ret
+
+forward:
+ cld
+ rep
+ movsb
+ ret
+
+backward:
+ std
+ movq %rdx,%r8
+ subq $1,%r8
+ addq %r8,%rdi
+ addq %r8,%rsi
+ rep; movsb
+ ret
--- /dev/null
+++ b/src/libc/arch/amd64/memset.s
@@ -1,0 +1,14 @@
+ .file "memset.s"
+
+ .text
+ .globl memset,_memset
+_memset:
+memset:
+ cld
+ movq %rdi,%r8
+ movq %rdx,%rcx
+ movl %esi,%eax
+ rep
+ stosb
+ movq %r8,%rax
+ ret
--- /dev/null
+++ b/src/libc/arch/amd64/strchr.s
@@ -1,0 +1,30 @@
+ .file "strchr.s"
+
+ .text
+ .globl strchr,_strchr
+_strchr:
+strchr:
+ movq %rdi,%r8
+
+ movb $0,%al
+ movq $-1,%rcx
+ cld
+ repne
+ scasb
+
+ leaq -1(%rdi),%rcx
+ subq %r8,%rcx
+
+ movq %r8,%rdi
+ movb %sil,%al
+ repne
+ scasb
+ je found
+
+none:
+ xor %rax,%rax
+ ret
+
+found:
+ leaq -1(%rdi),%rax
+ ret
--- /dev/null
+++ b/src/libc/arch/amd64/strcmp.s
@@ -1,0 +1,39 @@
+ .file "strcmp.s"
+
+ .text
+ .globl strcmp,_strcmp
+_strcmp:
+strcmp:
+ movq %rdi,%r8
+ movb $0,%al
+ movq $-1,%rcx
+ cld
+ repne
+ scasb
+
+ leaq -1(%rdi),%rcx
+ subq %r8,%rcx
+ movq %r8,%rdi
+
+ xor %rax,%rax /* set ZF to 1 */
+
+ movq %rdi,%r8
+ movq %rsi,%rdi
+ movq %r8,%rsi
+ rep
+ cmpsb
+ je equa
+ jb less
+ ja grea
+
+grea:
+ movq $1,%rax
+ ret
+
+less:
+ movq $-1,%rax
+ ret
+
+equa:
+ movq $0,%rax
+ ret
--- /dev/null
+++ b/src/libc/arch/amd64/strcpy.s
@@ -1,0 +1,22 @@
+ .file "strcpy.s"
+
+ .text
+ .globl strcpy,_strcpy
+
+strcpy:
+_strcpy:
+ movq %rdi,%r9
+
+ movb $0,%al
+ movq $-1,%rcx
+ cld
+ movq %rsi,%rdi
+ repne
+ scasb
+ movq %rdi,%rcx
+ subq %rsi,%rcx
+
+ movq %r9,%rdi
+ mov %rdi,%rax
+ rep; movsb
+ ret