shithub: riscv

ref: 1d82e3f42aa82b12edba2e057c1a6cc090ee1c80
dir: /sys/src/ape/lib/ap/power/memcmp.s/

View raw version
	TEXT	memcmp(SB), $0
#define	BDNZ	BC	16,0,
	MOVW R3, s1+0(FP)		/* R3 is pointer1 */

/*
 * performance:
 *	67mb/sec aligned; 16mb/sec unaligned
 */

	MOVW	n+8(FP), R4		/* R4 is count */
	MOVW	s2+4(FP), R5		/* R5 is pointer2 */

/*
 * let LSW do the work for 4 characters or less; aligned and unaligned
 */
	CMP	R4, $0
	BLE	eq
	CMP	R4, $4
	BLE	out

	XOR	R3, R5, R9
	ANDCC	$3, R9
	BNE	l4	/* pointers misaligned; use LSW loop */

/*
 * do enough bytes to align pointers
 */
	ANDCC	$3,R3, R9
	BEQ	l2
	SUBC	R9, $4, R9
	MOVW	R9, XER
	LSW	(R3), R10
	ADD	R9, R3
	LSW	(R5), R14
	ADD	R9, R5
	SUB	R9, R4
	CMPU	R10, R14
	BNE	ne

/*
 * compare 16 at a time
 */
l2:
	SRAWCC	$4, R4, R9
	BLE	l4
	MOVW	R9, CTR
	SUB	$4, R3
	SUB	$4, R5
l3:
	MOVWU	4(R3), R10
	MOVWU	4(R5), R12
	MOVWU	4(R3), R11
	MOVWU	4(R5), R13
	CMPU	R10, R12
	BNE	ne
	MOVWU	4(R3), R10
	MOVWU	4(R5), R12
	CMPU	R11, R13
	BNE	ne
	MOVWU	4(R3), R11
	MOVWU	4(R5), R13
	CMPU	R10, R12
	BNE	ne
	CMPU	R11, R13
	BNE	ne
	BDNZ	l3
	ADD	$4, R3
	ADD	$4, R5
	RLWNMCC	$0, R4, $15, R4	/* residue */
	BEQ	eq

/*
 * do remaining words with LSW; also does unaligned case
 */
l4:
	SRAWCC	$2, R4, R9
	BLE	out
	MOVW	R9, CTR
l5:
	LSW	(R3), $4, R10
	ADD	$4, R3
	LSW	(R5), $4, R11
	ADD	$4, R5
	CMPU	R10, R11
	BNE	ne
	BDNZ	l5
	RLWNMCC	$0, R4, $3, R4	/* residue */
	BEQ	eq

/*
 * do remaining bytes with final LSW
 */
out:
	MOVW	R4, XER
	LSW	(R3), R10
	LSW	(R5), R11
	CMPU	R10, R11
	BNE	ne

eq:
	MOVW	$0, R3
	RETURN

ne:
	MOVW	$1, R3
	BGE	ret
	MOVW	$-1,R3
ret:
	RETURN
	END