shithub: femtolisp

Download patch

ref: e4cc50bddd8707dc8feffc623eda092a86e03d8e
parent: 1639dd6badc1c236034e3cac5b4da2f209b040a0
author: Sigrid Solveig Haflínudóttir <[email protected]>
date: Tue Nov 12 22:44:50 EST 2024

remove bitvector ops

--- a/bitreverse.c
+++ /dev/null
@@ -1,16 +1,0 @@
-#include "platform.h"
-
-uint32_t
-__builtin_bitreverse32(uint32_t x)
-{
-	uint32_t m;
-
-	x = (x >> 16)      | (x << 16);       m = 0xff00ff00;
-	x = ((x & m) >> 8) | ((x & ~m) << 8);
-	m = 0xf0f0f0f0;
-	x = ((x & m) >> 4) | ((x & ~m) << 4); m = 0xcccccccc;
-	x = ((x & m) >> 2) | ((x & ~m) << 2); m = 0xaaaaaaaa;
-	x = ((x & m) >> 1) | ((x & ~m) << 1);
-
-	return x;
-}
--- a/bitvector-ops.c
+++ /dev/null
@@ -1,447 +1,0 @@
-#include "llt.h"
-
-#define ONES32 ((uint32_t)0xffffffffUL)
-
-// greater than this # of words we use malloc instead of alloca
-#define MALLOC_CUTOFF 2000
-
-// shift all bits in a long bit vector
-// n is # of int32s to consider, s is shift distance
-// lowest bit-index is bit 0 of word 0
-// TODO: handle boundary case of shift distance >= data size?
-void
-bitvector_shr(uint32_t *b, size_t n, uint32_t s)
-{
-	uint32_t i;
-	if(s == 0 || n == 0)
-		return;
-	i = s >> 5;
-	if(i){
-		n -= i;
-		memmove(b, &b[i], n*4);
-		memset(&b[n], 0, i*4);
-		s &= 31;
-	}
-	for(i = 0; i < n-1; i++)
-		b[i] = (b[i] >> s) | (b[i+1] << (32-s));
-	b[i] >>= s;
-}
-
-// out-of-place version, good for re-aligning a strided submatrix to
-// linear representation when a copy is needed
-// assumes that dest has the same amount of space as source, even if it
-// wouldn't have been necessary to hold the shifted bits
-void
-bitvector_shr_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s)
-{
-	uint32_t i, j;
-	if(n == 0)
-		return;
-	if(s == 0){
-		memmove(dest, b, n*4);
-		return;
-	}
-	j = s >> 5;
-	if(j){
-		n -= j;
-		memset(&dest[n], 0, j*4);
-		s &= 31;
-		b = &b[j];
-	}
-	for(i = 0; i < n-1; i++)
-		dest[i] = (b[i] >> s) | (b[i+1] << (32-s));
-	dest[i] = b[i]>>s;
-}
-
-void
-bitvector_shl(uint32_t *b, size_t n, uint32_t s)
-{
-	uint32_t i, scrap = 0, temp;
-	if(s == 0 || n == 0)
-		return;
-	i = s >> 5;
-	if(i){
-		n -= i;
-		memmove(&b[i], b, n*4);
-		memset(b, 0, i*4);
-		s &= 31;
-		b = &b[i];
-	}
-	for(i = 0; i < n; i++){
-		temp = (b[i] << s) | scrap;
-		scrap = b[i] >> (32-s);
-		b[i] = temp;
-	}
-}
-
-// if dest has more space than source, set scrap to true to keep the
-// top bits that would otherwise be shifted out
-void
-bitvector_shl_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s, int scrap)
-{
-	uint32_t i, j, sc = 0;
-	if(n == 0)
-		return;
-	if(s == 0){
-		memmove(dest, b, n*4);
-		return;
-	}
-	j = s >> 5;
-	if(j){
-		n -= j;
-		memset(dest, 0, j*4);
-		s &= 31;
-		dest = &dest[j];
-	}
-	for(i = 0; i < n; i++){
-		dest[i] = (b[i] << s) | sc;
-		sc = b[i] >> (32-s);
-	}
-	if(scrap)
-		dest[i] = sc;
-}
-
-// set nbits to c, starting at given bit offset
-// assumes offs < 32
-void
-bitvector_fill(uint32_t *b, uint32_t offs, uint32_t c, uint32_t nbits)
-{
-	uint32_t i, nw, tail, mask;
-
-	if(nbits == 0)
-		return;
-	nw = (offs+nbits+31)>>5;
-
-	if(nw == 1){
-		mask = (lomask(nbits)<<offs);
-		if(c)
-			b[0] |= mask;
-		else
-			b[0] &= ~mask;
-		return;
-	}
-
-	mask = lomask(offs);
-	if(c)
-		b[0] |= ~mask;
-	else
-		b[0] &= mask;
-
-	mask = c ? ONES32 : 0;
-
-	for(i = 1; i < nw-1; i++)
-		b[i] = mask;
-
-	tail = (offs+nbits) & 31;
-	if(tail == 0)
-		b[i] = mask;
-	else{
-		mask = lomask(tail);
-		if(c)
-			b[i] |= mask;
-		else
-			b[i] &= ~mask;
-	}
-}
-
-void
-bitvector_not(uint32_t *b, uint32_t offs, uint32_t nbits)
-{
-	uint32_t i, nw, tail, mask;
-
-	if(nbits == 0)
-		return;
-	nw = (offs+nbits+31)>>5;
-
-	if(nw == 1){
-		mask = lomask(nbits)<<offs;
-		b[0] ^= mask;
-		return;
-	}
-
-	mask = ~lomask(offs);
-	b[0] ^= mask;
-
-	for(i = 1; i < nw-1; i++)
-		b[i] = ~b[i];
-
-	tail = (offs+nbits)&31;
-	if(tail == 0)
-		b[i] = ~b[i];
-	else{
-		mask = lomask(tail);
-		b[i] ^= mask;
-	}
-}
-
-// constant-space bit vector copy in a single pass, with arbitrary
-// offsets and lengths. to get this right, there are 16 cases to handle!
-#define BITVECTOR_COPY_OP(name, OP) \
-void \
-bitvector_##name(uint32_t *dest, uint32_t doffs, uint32_t *src, uint32_t soffs, uint32_t nbits) \
-{ \
-	uint32_t i, s, nw, tail, snw, mask, scrap; \
-	if(nbits == 0) \
-		return; \
-	nw = (doffs+nbits+31)>>5; \
-	if(soffs == doffs){ \
-		if(nw == 1){ \
-			mask = (lomask(nbits)<<doffs); \
-			dest[0] = (dest[0] & ~mask) | (OP(src[0]) & mask); \
-			return; \
-		} \
-		mask = ~lomask(doffs); \
-		dest[0] = (dest[0] & ~mask) | (OP(src[0]) & mask); \
-		for(i = 1; i < nw-1; i++) \
-			dest[i] = OP(src[i]); \
-		tail = (doffs+nbits)&31; \
-		if(tail == 0) \
-			dest[i] = src[i]; \
-		else { \
-			mask = lomask(tail); \
-			dest[i] = (dest[i] & ~mask) | (OP(src[i]) & mask); \
-		} \
-		return; \
-	} \
-	snw = (soffs+nbits+31)>>5; \
-	if(soffs < doffs){ \
-		s = doffs-soffs; \
-		if(nw == 1){ \
-			mask = lomask(nbits) << doffs; \
-			dest[0] = (dest[0] & ~mask) | ((OP(src[0])<<s) & mask); \
-			return; \
-		} \
-		mask = ~lomask(doffs); \
-		dest[0] = (dest[0] & ~mask) | ((OP(src[0])<<s) & mask); \
-		scrap = OP(src[0])>>(32-s); \
-		for(i = 1; i < snw-1; i++){ \
-			dest[i] = (OP(src[i])<<s) | scrap; \
-			scrap = OP(src[i])>>(32-s); \
-		} \
-		tail = (doffs+nbits)&31; \
-		mask = tail ? lomask(tail) : ONES32; \
-		if(snw == nw) \
-			dest[i] = (dest[i] & ~mask) | (((OP(src[i])<<s)|scrap) & mask); \
-		else{ /* snw < nw */ \
-			if(snw == 1) \
-				dest[i] = (dest[i] & ~mask) | (((OP(src[i])<<s) | scrap) & mask); \
-			else{ \
-				dest[i] = (OP(src[i])<<s) | scrap; \
-				scrap = OP(src[i])>>(32-s); \
-				i++; \
-				dest[i] = (dest[i] & ~mask) | (scrap & mask); \
-			} \
-		} \
-	}else{ \
-		s = soffs-doffs; \
-		if(snw == 1){ \
-			mask = (lomask(nbits)<<doffs); \
-			dest[0] = (dest[0] & ~mask) | ((OP(src[0])>>s) & mask); \
-			return; \
-		} \
-		if(nw == 1){ \
-			mask = (lomask(nbits)<<doffs); \
-			dest[0] = (dest[0] & ~mask) | \
-				(((OP(src[0])>>s)|(OP(src[1])<<(32-s))) & mask); \
-			return; \
-		} \
-		mask = ~lomask(doffs); \
-		dest[0] = (dest[0] & ~mask) | (((OP(src[0])>>s)|(OP(src[1])<<(32-s))) & mask); \
-		for(i = 1; i < nw-1; i++) \
-			dest[i] = (OP(src[i])>>s) | (OP(src[i+1])<<(32-s)); \
-		tail = (doffs+nbits)&31; \
-		mask = tail ? lomask(tail) : ONES32; \
-		if(snw == nw){ \
-			dest[i] = (dest[i] & ~mask) | ((OP(src[i])>>s) & mask); \
-		} \
-		else /* snw > nw */ { \
-			dest[i] = (dest[i] & ~mask) | \
-				(((OP(src[i])>>s)|(OP(src[i+1])<<(32-s))) & mask); \
-		} \
-	} \
-}
-
-#define BV_COPY(a) (a)
-#define BV_NOT(a) (~(a))
-BITVECTOR_COPY_OP(copy, BV_COPY)
-BITVECTOR_COPY_OP(not_to, BV_NOT)
-
-// copy from source to dest while reversing bit-order
-// assumes dest offset == 0
-// assumes source and dest don't overlap
-// assumes offset < 32
-void
-bitvector_reverse_to(uint32_t *dest, uint32_t *src, uint32_t soffs, uint32_t nbits)
-{
-	uint32_t i, nw, tail;
-
-	if(nbits == 0)
-		return;
-
-	nw = (soffs+nbits+31)>>5;
-	// first, reverse the words while reversing bit order within each word
-	for(i = 0; i < nw/2; i++){
-		dest[i] = __builtin_bitreverse32(src[nw-i-1]);
-		dest[nw-i-1] = __builtin_bitreverse32(src[i]);
-	}
-	if(nw&0x1)
-		dest[i] = __builtin_bitreverse32(src[i]);
-
-	tail = (soffs+nbits)&31;
-	if(tail)
-		bitvector_shr(dest, nw, 32-tail);
-}
-
-void
-bitvector_reverse(uint32_t *b, uint32_t offs, uint32_t nbits)
-{
-	uint32_t i, nw, tail, *temp, a[MALLOC_CUTOFF];
-
-	if(nbits == 0)
-		return;
-
-	nw = (offs+nbits+31)>>5;
-	temp = (nw > MALLOC_CUTOFF) ? LLT_ALLOC(nw*4) : a;
-	for(i = 0; i < nw/2; i++){
-		temp[i]	= __builtin_bitreverse32(b[nw-i-1]);
-		temp[nw-i-1] = __builtin_bitreverse32(b[i]);
-	}
-	if(nw & 1)
-		temp[i] = __builtin_bitreverse32(b[i]);
-
-	tail = (offs+nbits)&31;
-	bitvector_copy(b, offs, temp, (32-tail)&31, nbits);
-	if(nw > MALLOC_CUTOFF)
-		LLT_FREE(temp);
-}
-
-uint64_t
-bitvector_count(uint32_t *b, uint32_t offs, uint64_t nbits)
-{
-	size_t i, nw;
-	uint32_t ntail;
-	uint64_t ans;
-
-	if(nbits == 0)
-		return 0;
-	nw = ((uint64_t)offs+nbits+31)>>5;
-
-	if(nw == 1)
-		return __builtin_popcount(b[0] & (lomask(nbits)<<offs));
-
-	ans = __builtin_popcount(b[0]>>offs);  // first end cap
-
-	for(i = 1; i < nw-1; i++)
-		ans += __builtin_popcount(b[i]);
-
-	ntail = (offs + (uint32_t)nbits) & 31;
-	ans += __builtin_popcount(b[i] & (ntail > 0 ? lomask(ntail) : ONES32));  // last end cap
-
-	return ans;
-}
-
-uint32_t
-bitvector_any0(uint32_t *b, uint32_t offs, uint32_t nbits)
-{
-	uint32_t i, nw, tail, mask;
-
-	if(nbits == 0)
-		return 0;
-	nw = (offs+nbits+31)>>5;
-
-	if(nw == 1){
-		mask = (lomask(nbits)<<offs);
-		if((b[0] & mask) != mask)
-			return 1;
-		return 0;
-	}
-
-	mask = ~lomask(offs);
-	if((b[0] & mask) != mask)
-		return 1;
-
-	for(i = 1; i < nw-1; i++)
-		if(b[i] != ONES32)
-			return 1;
-
-	tail = (offs+nbits)&31;
-	if(tail == 0)
-		return b[i] != ONES32;
-	mask = lomask(tail);
-	return (b[i] & mask) != mask;
-}
-
-uint32_t
-bitvector_any1(uint32_t *b, uint32_t offs, uint32_t nbits)
-{
-	uint32_t i, nw, tail, mask;
-
-	if(nbits == 0)
-		return 0;
-	nw = (offs+nbits+31)>>5;
-
-	if(nw == 1){
-		mask = lomask(nbits)<<offs;
-		return (b[0] & mask) != 0;
-	}
-
-	mask = ~lomask(offs);
-	if((b[0] & mask) != 0)
-		return 1;
-
-	for(i = 1; i < nw-1; i++){
-		if(b[i] != 0)
-			return 1;
-	}
-
-	tail = (offs+nbits)&31;
-	if(tail == 0)
-		return b[i] != 0;
-	return (b[i] & lomask(tail)) != 0;
-}
-
-static void
-adjust_offset_to(uint32_t *dest, uint32_t *src, uint32_t nw, uint32_t soffs, uint32_t newoffs)
-{
-	if(newoffs > soffs)
-		bitvector_shl_to(dest, src, nw, newoffs-soffs, 1);
-	else
-		bitvector_shr_to(dest, src, nw, soffs-newoffs);
-}
-
-#define BITVECTOR_BINARY_OP_TO(opname, OP) \
-void \
-bitvector_##opname##_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits) \
-{ \
-	uint32_t nw = (doffs+nbits+31)>>5; \
-	uint32_t atmp[MALLOC_CUTOFF+1]; \
-	uint32_t *temp = nw>MALLOC_CUTOFF ? LLT_ALLOC((nw+1)*4) : atmp; \
-	uint32_t i, anw, bnw; \
-	if(aoffs == boffs){ \
-		anw = (aoffs+nbits+31)>>5; \
-	}else if(aoffs == doffs){ \
-		bnw = (boffs+nbits+31)>>5; \
-		adjust_offset_to(temp, b, bnw, boffs, aoffs); \
-		b = temp; \
-		anw = nw; \
-	}else{ \
-		anw = (aoffs+nbits+31)>>5; \
-		bnw = (boffs+nbits+31)>>5; \
-		adjust_offset_to(temp, a, anw, aoffs, boffs); \
-		a = temp; \
-		aoffs = boffs; \
-		anw = bnw; \
-	} \
-	for(i = 0; i < anw; i++) \
-		temp[i] = OP(a[i], b[i]); \
-	bitvector_copy(dest, doffs, temp, aoffs, nbits); \
-	if(nw>MALLOC_CUTOFF) \
-		LLT_FREE(temp); \
-}
-
-#define BV_AND(a, b) ((a)&(b))
-#define BV_OR(a, b) ((a)|(b))
-#define BV_XOR(a, b) ((a)^(b))
-BITVECTOR_BINARY_OP_TO(and, BV_AND)
-BITVECTOR_BINARY_OP_TO(or, BV_OR)
-BITVECTOR_BINARY_OP_TO(xor, BV_XOR)
--- a/bitvector.h
+++ b/bitvector.h
@@ -1,24 +1,5 @@
-// a mask with n set lo or hi bits
-#define lomask(n) (uint32_t)((((uint32_t)1)<<(n))-1)
-
 uint32_t *bitvector_new(uint64_t n, int initzero);
 uint32_t *bitvector_resize(uint32_t *b, uint64_t oldsz, uint64_t newsz, int initzero);
 size_t bitvector_nwords(uint64_t nbits);
 void bitvector_set(uint32_t *b, uint64_t n, uint32_t c);
 uint32_t bitvector_get(uint32_t *b, uint64_t n);
-void bitvector_shr(uint32_t *b, size_t n, uint32_t s);
-void bitvector_shr_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s);
-void bitvector_shl(uint32_t *b, size_t n, uint32_t s);
-void bitvector_shl_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s, int scrap);
-void bitvector_fill(uint32_t *b, uint32_t offs, uint32_t c, uint32_t nbits);
-void bitvector_copy(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t nbits);
-void bitvector_not(uint32_t *b, uint32_t offs, uint32_t nbits);
-void bitvector_not_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t nbits);
-void bitvector_reverse(uint32_t *b, uint32_t offs, uint32_t nbits);
-void bitvector_reverse_to(uint32_t *dest, uint32_t *src, uint32_t soffs, uint32_t nbits);
-void bitvector_and_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits);
-void bitvector_or_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits);
-void bitvector_xor_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits);
-uint64_t bitvector_count(uint32_t *b, uint32_t offs, uint64_t nbits);
-uint32_t bitvector_any0(uint32_t *b, uint32_t offs, uint32_t nbits);
-uint32_t bitvector_any1(uint32_t *b, uint32_t offs, uint32_t nbits);
--- a/meson.build
+++ b/meson.build
@@ -57,7 +57,6 @@
 	'3rd/utf/runeistype.c',
 	'3rd/utf/runetotype.c',
 	'3rd/utf/utfnlen.c',
-	'bitvector-ops.c',
 	'bitvector.c',
 	'builtins.c',
 	'cvalues.c',
@@ -85,20 +84,6 @@
 ]
 
 cc = meson.get_compiler('c')
-
-bitreverse32_code = '''#include <stdio.h>
-int main(int argc, char **argv) { return __builtin_bitreverse32(argc); }
-'''
-have_bitreverse32 = cc.links(bitreverse32_code, name: '__builtin_bitreverse32')
-
-if have_bitreverse32
-	add_project_arguments(
-		'-DHAVE_BITREVERSE32',
-		language: 'c',
-	)
-else
-	src += ['bitreverse.c']
-endif
 
 if cc.get_id() == 'clang'
 	add_project_arguments(
--- a/mkfile
+++ b/mkfile
@@ -13,8 +13,6 @@
 	3rd/mt19937-64.$O\
 	3rd/wcwidth.$O\
 	3rd/spooky.$O\
-	bitreverse.$O\
-	bitvector-ops.$O\
 	bitvector.$O\
 	builtins.$O\
 	cvalues.$O\
@@ -30,7 +28,6 @@
 	iostream.$O\
 	main_plan9.$O\
 	operators.$O\
-	popcount.$O\
 	print.$O\
 	ptrhash.$O\
 	random.$O\
--- a/plan9/platform.h
+++ b/plan9/platform.h
@@ -119,6 +119,3 @@
 typedef enum { false, true } bool;
 
 int wcwidth(Rune c);
-
-uint32_t __builtin_popcount(uint32_t);
-uint32_t __builtin_bitreverse32(uint32_t x);
--- a/popcount.c
+++ /dev/null
@@ -1,12 +1,0 @@
-#include "platform.h"
-
-uint32_t
-__builtin_popcount(uint32_t b)
-{
-	b = b - ((b>>1)&0x55555555);
-	b = ((b>>2)&0x33333333) + (b&0x33333333);
-	b = ((b>>4)+b)&0x0f0f0f0f;
-	b += (b>>8);
-	b += (b>>16);
-	return b & 0x3f;
-}