shithub: riscv

ref: 4757debd0b5935dae94944c35c3cac74b14aa209
dir: /sys/src/cmd/dict/mkindex.c/

View raw version
#include <u.h>
#include <libc.h>
#include <bio.h>
#include "dict.h"

/*
 * Use this to start making an index for a new dictionary.
 * Get the dictionary-specific nextoff and printentry(_,'h')
 * commands working, add a record to the dicts[] array below,
 * and run this program to get a list of offset,headword
 * pairs
 */
Biobuf	boutbuf;
Biobuf	*bdict;
Biobuf	*bout = &boutbuf;
int	linelen;
int	breaklen = 2000;
int	outinhibit;
int	debug;

Dict	*dict;	/* current dictionary */

Entry	getentry(long);

void
usage(void)
{
	fprint(2, "usage: %s [-D] [-d dictname]\n", argv0);
	exits("usage");
}

void
main(int argc, char **argv)
{
	int i;
	long a, ae;
	char *p;
	Entry e;

	Binit(&boutbuf, 1, OWRITE);
	dict = &dicts[0];
	ARGBEGIN {
		case 'd':
			p = EARGF(usage());
			dict = 0;

			for(i=0; dicts[i].name; i++) {
				if(strcmp(p, dicts[i].name)==0) {
					dict = &dicts[i];
					break;
				}
			}
			if(dict == nil) {
				err("unknown dictionary: %s", p);
				exits("nodict");
			}
			break;
		case 'D':
			debug++;
			break;
		default:
			usage();
	}ARGEND
	USED(argc,argv);
	bdict = Bopen(dict->path, OREAD);
	if(!bdict) {
		err("can't open dictionary %s", dict->path);
		exits("nodict");
	}
	ae = Bseek(bdict, 0, 2);
	for(a = 0; a < ae; a = (*dict->nextoff)(a+1)) {
		linelen = 0;
		e = getentry(a);
		Bprint(bout, "%ld\t", a);
		linelen = 4;	/* only has to be approx right */
		(*dict->printentry)(e, 'h');
	}
	exits(0);
}

Entry
getentry(long b)
{
	long e, n, dtop;
	static Entry ans;
	static int anslen = 0;

	e = (*dict->nextoff)(b+1);
	ans.doff = b;
	if(e < 0) {
		dtop = Bseek(bdict, 0L, 2);
		if(b < dtop) {
			e = dtop;
		} else {
			err("couldn't seek to entry");
			ans.start = 0;
			ans.end = 0;
		}
	}
	n = e-b;
	if(n) {
		if(n > anslen) {
			if((ans.start = realloc(ans.start, n)) == nil)
				sysfatal("realloc: %r");
			anslen = n;
		}
		Bseek(bdict, b, 0);
		n = Bread(bdict, ans.start, n);
		ans.end = ans.start + n;
	}
	return ans;
}