ref: 11025d6f4a2c3c7e75010f85c2d3108e602270b7
dir: /sys/src/cmd/venti/srv/whack.c/
#include "stdinc.h" #include "whack.h" typedef struct Huff Huff; int compressblocks = 1; enum { MaxFastLen = 9, BigLenCode = 0x1f4, /* minimum code for large lenth encoding */ BigLenBits = 9, BigLenBase = 4, /* starting items to encode for big lens */ MinOffBits = 6, MaxOffBits = MinOffBits + 8, MaxLen = 2051 /* max. length encodable in 24 bits */ }; enum { StatBytes, StatOutBytes, StatLits, StatMatches, StatLitBits, StatOffBits, StatLenBits, MaxStat }; struct Huff { short bits; /* length of the code */ ulong encode; /* the code */ }; static Huff lentab[MaxFastLen] = { {2, 0x2}, /* 10 */ {3, 0x6}, /* 110 */ {5, 0x1c}, /* 11100 */ {5, 0x1d}, /* 11101 */ {6, 0x3c}, /* 111100 */ {7, 0x7a}, /* 1111010 */ {7, 0x7b}, /* 1111011 */ {8, 0xf8}, /* 11111000 */ {8, 0xf9}, /* 11111001 */ }; static int thwmaxcheck; void whackinit(Whack *tw, int level) { thwmaxcheck = (1 << level); thwmaxcheck -= thwmaxcheck >> 2; if(thwmaxcheck < 2) thwmaxcheck = 2; else if(thwmaxcheck > 1024) thwmaxcheck = 1024; memset(tw, 0, sizeof *tw); tw->begin = 2 * WhackMaxOff; } /* * find a string in the dictionary */ static int whackmatch(Whack *b, uchar **ss, uchar *esrc, ulong h, ulong now) { ushort then, off, last; int bestoff, bestlen, check; uchar *s, *t; s = *ss; if(esrc < s + MinMatch) return -1; if(s + MaxLen < esrc) esrc = s + MaxLen; bestoff = 0; bestlen = 0; check = thwmaxcheck; last = 0; for(then = b->hash[h]; check-- > 0; then = b->next[then & (WhackMaxOff - 1)]){ off = now - then; if(off <= last || off > WhackMaxOff) break; /* * don't need to check for the end because * 1) s too close check above */ t = s - off; if(s[0] == t[0] && s[1] == t[1] && s[2] == t[2]){ if(!bestlen || esrc - s > bestlen && s[bestlen] == t[bestlen]){ t += 3; for(s += 3; s < esrc; s++){ if(*s != *t) break; t++; } if(s - *ss > bestlen){ bestlen = s - *ss; bestoff = off; if(bestlen > thwmaxcheck) break; } } } s = *ss; last = off; } *ss += bestlen; return bestoff; } /* * knuth vol. 3 multiplicative hashing * each byte x chosen according to rules * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4 * with reasonable spread between the bytes & their complements * * the 3 byte value appears to be as almost good as the 4 byte value, * and might be faster on some machines */ /* #define hashit(c) ((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask) */ #define hashit(c) (((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask) /* * lz77 compression with single lookup in a hash table for each block */ int whack(Whack *w, uchar *dst, uchar *src, int n, ulong stats[WhackStats]) { uchar *s, *ss, *sss, *esrc, *half, *wdst, *wdmax; ulong cont, code, wbits; ushort now; int toff, lithist, h, len, bits, use, wnbits, lits, matches, offbits, lenbits; if(!compressblocks || n < MinMatch) return -1; wdst = dst; wdmax = dst + n; now = w->begin; s = src; w->data = s; cont = (s[0] << 16) | (s[1] << 8) | s[2]; esrc = s + n; half = s + (n >> 1); wnbits = 0; wbits = 0; lits = 0; matches = 0; offbits = 0; lenbits = 0; lithist = ~0; while(s < esrc){ h = hashit(cont); sss = s; toff = whackmatch(w, &sss, esrc, h, now); ss = sss; len = ss - s; for(; wnbits >= 8; wnbits -= 8){ if(wdst >= wdmax){ w->begin = now; return -1; } *wdst++ = wbits >> (wnbits - 8); } if(len < MinMatch){ toff = *s; lithist = (lithist << 1) | toff < 32 | toff > 127; if(lithist & 0x1e){ wbits = (wbits << 9) | toff; wnbits += 9; }else if(lithist & 1){ toff = (toff + 64) & 0xff; if(toff < 96){ wbits = (wbits << 10) | toff; wnbits += 10; }else{ wbits = (wbits << 11) | toff; wnbits += 11; } }else{ wbits = (wbits << 8) | toff; wnbits += 8; } lits++; /* * speed hack * check for compression progress, bail if none achieved */ if(s > half){ if(4 * (s - src) < 5 * lits){ w->begin = now; return -1; } half = esrc; } if(s + MinMatch <= esrc){ w->next[now & (WhackMaxOff - 1)] = w->hash[h]; w->hash[h] = now; if(s + MinMatch < esrc) cont = (cont << 8) | s[MinMatch]; } now++; s++; continue; } matches++; /* * length of match */ if(len > MaxLen){ len = MaxLen; ss = s + len; } len -= MinMatch; if(len < MaxFastLen){ bits = lentab[len].bits; wbits = (wbits << bits) | lentab[len].encode; wnbits += bits; lenbits += bits; }else{ code = BigLenCode; bits = BigLenBits; use = BigLenBase; len -= MaxFastLen; while(len >= use){ len -= use; code = (code + use) << 1; use <<= (bits & 1) ^ 1; bits++; } wbits = (wbits << bits) | (code + len); wnbits += bits; lenbits += bits; for(; wnbits >= 8; wnbits -= 8){ if(wdst >= wdmax){ w->begin = now; return -1; } *wdst++ = wbits >> (wnbits - 8); } } /* * offset in history */ toff--; for(bits = MinOffBits; toff >= (1 << bits); bits++) ; if(bits < MaxOffBits-1){ wbits = (wbits << 3) | (bits - MinOffBits); if(bits != MinOffBits) bits--; wnbits += bits + 3; offbits += bits + 3; }else{ wbits = (wbits << 4) | 0xe | (bits - (MaxOffBits-1)); bits--; wnbits += bits + 4; offbits += bits + 4; } wbits = (wbits << bits) | toff & ((1 << bits) - 1); for(; s != ss; s++){ if(s + MinMatch <= esrc){ h = hashit(cont); w->next[now & (WhackMaxOff - 1)] = w->hash[h]; w->hash[h] = now; if(s + MinMatch < esrc) cont = (cont << 8) | s[MinMatch]; } now++; } } w->begin = now; stats[StatBytes] += esrc - src; stats[StatLits] += lits; stats[StatMatches] += matches; stats[StatLitBits] += (wdst - (dst + 2)) * 8 + wnbits - offbits - lenbits; stats[StatOffBits] += offbits; stats[StatLenBits] += lenbits; if(wnbits & 7){ wbits <<= 8 - (wnbits & 7); wnbits += 8 - (wnbits & 7); } for(; wnbits >= 8; wnbits -= 8){ if(wdst >= wdmax) return -1; *wdst++ = wbits >> (wnbits - 8); } stats[StatOutBytes] += wdst - dst; return wdst - dst; } int whackblock(uchar *dst, uchar *src, int ssize) { Whack w; ulong stats[MaxStat]; int r; whackinit(&w, 6); r = whack(&w, dst, src, ssize, stats); return r; }