shithub: riscv

Download patch

ref: aa3c0e55f3bb5b3b014ecb6ab1e268fa217d56a7
parent: 48980502820f92ecd647d9f454513ee88f382ed7
author: cinap_lenrek <[email protected]>
date: Sun Nov 26 20:31:19 EST 2017

libsec: optimize aesCBCencrypt()/aesCBCdecrypt()

- get rid of the temporary copies and memmoves()
- when the data pointer is aligned, do xor and copying inline

speedup for auth/aescbc encryption depends on arch:

- zynq	7%	(arm)
- t23	13%	(386)
- x230	20%	(amd64, aes-ni)
- apu2	25% (amd64, aes-ni)

--- a/sys/src/libsec/port/aesCBC.c
+++ b/sys/src/libsec/port/aesCBC.c
@@ -9,24 +9,37 @@
 void
 aesCBCencrypt(uchar *p, int len, AESstate *s)
 {
-	uchar *p2, *ip, *eip;
-	uchar q[AESbsize];
+	uchar *ip, *eip;
 
-	for(; len >= AESbsize; len -= AESbsize){
-		p2 = p;
-		ip = s->ivec;
-		for(eip = ip+AESbsize; ip < eip; )
-			*p2++ ^= *ip++;
-		aes_encrypt(s->ekey, s->rounds, p, q);
-		memmove(s->ivec, q, AESbsize);
-		memmove(p, q, AESbsize);
-		p += AESbsize;
+	if(((p-(uchar*)0) & 3) == 0){
+		for(; len >= AESbsize; len -= AESbsize){
+			ip = s->ivec;
+			((u32int*)ip)[0] ^= ((u32int*)p)[0];
+			((u32int*)ip)[1] ^= ((u32int*)p)[1];
+			((u32int*)ip)[2] ^= ((u32int*)p)[2];
+			((u32int*)ip)[3] ^= ((u32int*)p)[3];
+
+			aes_encrypt(s->ekey, s->rounds, ip, ip);
+
+			((u32int*)p)[0] = ((u32int*)ip)[0];
+			((u32int*)p)[1] = ((u32int*)ip)[1];
+			((u32int*)p)[2] = ((u32int*)ip)[2];
+			((u32int*)p)[3] = ((u32int*)ip)[3];
+			p += AESbsize;
+		}
+	} else {
+		for(; len >= AESbsize; len -= AESbsize){
+			ip = s->ivec;
+			for(eip = ip+AESbsize; ip < eip; )
+				*ip++ ^= *p++;
+			aes_encrypt(s->ekey, s->rounds, s->ivec, s->ivec);
+			memmove(p - AESbsize, s->ivec, AESbsize);
+		}
 	}
 
 	if(len > 0){
 		ip = s->ivec;
-		aes_encrypt(s->ekey, s->rounds, ip, q);
-		memmove(s->ivec, q, AESbsize);
+		aes_encrypt(s->ekey, s->rounds, ip, ip);
 		for(eip = ip+len; ip < eip; )
 			*p++ ^= *ip++;
 	}
@@ -36,24 +49,45 @@
 aesCBCdecrypt(uchar *p, int len, AESstate *s)
 {
 	uchar *ip, *eip, *tp;
-	uchar tmp[AESbsize], q[AESbsize];
+	u32int t[4];
 
-	for(; len >= AESbsize; len -= AESbsize){
-		memmove(tmp, p, AESbsize);
-		aes_decrypt(s->dkey, s->rounds, p, q);
-		memmove(p, q, AESbsize);
-		tp = tmp;
-		ip = s->ivec;
-		for(eip = ip+AESbsize; ip < eip; ){
-			*p++ ^= *ip;
-			*ip++ = *tp++;
+	if(((p-(uchar*)0) & 3) == 0){
+		for(; len >= AESbsize; len -= AESbsize){
+			t[0] = ((u32int*)p)[0];
+			t[1] = ((u32int*)p)[1];
+			t[2] = ((u32int*)p)[2];
+			t[3] = ((u32int*)p)[3];
+
+			aes_decrypt(s->dkey, s->rounds, p, p);
+
+			ip = s->ivec;
+			((u32int*)p)[0] ^= ((u32int*)ip)[0];
+			((u32int*)p)[1] ^= ((u32int*)ip)[1];
+			((u32int*)p)[2] ^= ((u32int*)ip)[2];
+			((u32int*)p)[3] ^= ((u32int*)ip)[3];
+			p += AESbsize;
+
+			((u32int*)ip)[0] = t[0];
+			((u32int*)ip)[1] = t[1];
+			((u32int*)ip)[2] = t[2];
+			((u32int*)ip)[3] = t[3];
 		}
+	} else {
+		for(; len >= AESbsize; len -= AESbsize){
+			tp = (uchar*)t;
+			memmove(tp, p, AESbsize);
+			aes_decrypt(s->dkey, s->rounds, p, p);
+			ip = s->ivec;
+			for(eip = ip+AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
 	}
 
 	if(len > 0){
 		ip = s->ivec;
-		aes_encrypt(s->ekey, s->rounds, ip, q);
-		memmove(s->ivec, q, AESbsize);
+		aes_encrypt(s->ekey, s->rounds, ip, ip);
 		for(eip = ip+len; ip < eip; )
 			*p++ ^= *ip++;
 	}