ref: 57e07865953412c7cf8ae579d7a748c1540d629e
parent: 9d790238f28647de47d7dadebe48a1f567729dc2
author: cinap_lenrek <[email protected]>
date: Thu May 9 07:40:54 EDT 2019
libc: use MOVP instruction for arm64 memmove() and memset()
--- a/sys/src/ape/lib/ap/arm64/memmove.s
+++ b/sys/src/ape/lib/ap/arm64/memmove.s
@@ -9,15 +9,15 @@
_forward:
ADD R0, R2, R3
- BIC $7, R2, R4
+ BIC $15, R2, R4
CBZ R4, _floop1
ADD R0, R4, R4
-_floop8:
- MOV (R1)8!, R5
- MOV R5, (R0)8!
+_floop16:
+ MOVP (R1)16!, R5, R6
+ MOVP R5, R6, (R0)16!
CMP R4, R0
- BNE _floop8
+ BNE _floop16
_floop1:
CMP R3, R0
@@ -32,15 +32,15 @@
_backward:
ADD R2, R1, R1
ADD R2, R0, R3
- BIC $7, R2, R4
+ BIC $15, R2, R4
CBZ R4, _bloop1
SUB R4, R3, R4
-_bloop8:
- MOV -8(R1)!, R5
- MOV R5, -8(R3)!
+_bloop16:
+ MOVP -16(R1)!, R5, R6
+ MOVP R5, R6, -16(R3)!
CMP R4, R3
- BNE _bloop8
+ BNE _bloop16
_bloop1:
CMP R0, R3
--- a/sys/src/ape/lib/ap/arm64/memset.s
+++ b/sys/src/ape/lib/ap/arm64/memset.s
@@ -3,7 +3,7 @@
MOVWU n+16(FP), R2
ADD R0, R2, R3
- BIC $7, R2, R4
+ BIC $15, R2, R4
CBZ R4, _loop1
ADD R0, R4, R4
@@ -11,10 +11,10 @@
ORR R1<<16, R1
ORR R1<<32, R1
-_loop8:
- MOV R1, (R0)8!
+_loop16:
+ MOVP R1, R1, (R0)16!
CMP R4, R0
- BNE _loop8
+ BNE _loop16
_loop1:
CMP R3, R0
--- a/sys/src/libc/arm64/memmove.s
+++ b/sys/src/libc/arm64/memmove.s
@@ -9,15 +9,15 @@
_forward:
ADD R0, R2, R3
- BIC $7, R2, R4
+ BIC $15, R2, R4
CBZ R4, _floop1
ADD R0, R4, R4
-_floop8:
- MOV (R1)8!, R5
- MOV R5, (R0)8!
+_floop16:
+ MOVP (R1)16!, R5, R6
+ MOVP R5, R6, (R0)16!
CMP R4, R0
- BNE _floop8
+ BNE _floop16
_floop1:
CMP R3, R0
@@ -32,15 +32,15 @@
_backward:
ADD R2, R1, R1
ADD R2, R0, R3
- BIC $7, R2, R4
+ BIC $15, R2, R4
CBZ R4, _bloop1
SUB R4, R3, R4
-_bloop8:
- MOV -8(R1)!, R5
- MOV R5, -8(R3)!
+_bloop16:
+ MOVP -16(R1)!, R5, R6
+ MOVP R5, R6, -16(R3)!
CMP R4, R3
- BNE _bloop8
+ BNE _bloop16
_bloop1:
CMP R0, R3
--- a/sys/src/libc/arm64/memset.s
+++ b/sys/src/libc/arm64/memset.s
@@ -3,7 +3,7 @@
MOVWU n+16(FP), R2
ADD R0, R2, R3
- BIC $7, R2, R4
+ BIC $15, R2, R4
CBZ R4, _loop1
ADD R0, R4, R4
@@ -11,10 +11,10 @@
ORR R1<<16, R1
ORR R1<<32, R1
-_loop8:
- MOV R1, (R0)8!
+_loop16:
+ MOVP R1, R1, (R0)16!
CMP R4, R0
- BNE _loop8
+ BNE _loop16
_loop1:
CMP R3, R0