ref: 1d891d163e814a93ee5346cf05b4fac297bf3a5c
dir: /sys/src/cmd/sed.c/
/* * sed -- stream editor */ #include <u.h> #include <libc.h> #include <bio.h> #include <regexp.h> enum { DEPTH = 20, /* max nesting depth of {} */ MAXCMDS = 512, /* max sed commands */ ADDSIZE = 10000, /* size of add & read buffer */ MAXADDS = 20, /* max pending adds and reads */ LBSIZE = 8192, /* input line size */ LABSIZE = 50, /* max number of labels */ MAXSUB = 10, /* max number of sub reg exp */ MAXFILES = 120, /* max output files */ }; /* * An address is a line #, a R.E., "$", a reference to the last * R.E., or nothing. */ typedef struct { enum { A_NONE, A_DOL, A_LINE, A_RE, A_LAST, }type; union { long line; /* Line # */ Reprog *rp; /* Compiled R.E. */ }; } Addr; typedef struct SEDCOM { Addr ad1; /* optional start address */ Addr ad2; /* optional end address */ union { Reprog *re1; /* compiled R.E. */ Rune *text; /* added text or file name */ struct SEDCOM *lb1; /* destination command of branch */ }; Rune *rhs; /* Right-hand side of substitution */ Biobuf* fcode; /* File ID for read and write */ char command; /* command code -see below */ char gfl; /* 'Global' flag for substitutions */ char pfl; /* 'print' flag for substitutions */ char active; /* 1 => data between start and end */ char negfl; /* negation flag */ } SedCom; /* Command Codes for field SedCom.command */ #define ACOM 01 #define BCOM 020 #define CCOM 02 #define CDCOM 025 #define CNCOM 022 #define COCOM 017 #define CPCOM 023 #define DCOM 03 #define ECOM 015 #define EQCOM 013 #define FCOM 016 #define GCOM 027 #define CGCOM 030 #define HCOM 031 #define CHCOM 032 #define ICOM 04 #define LCOM 05 #define NCOM 012 #define PCOM 010 #define QCOM 011 #define RCOM 06 #define SCOM 07 #define TCOM 021 #define WCOM 014 #define CWCOM 024 #define YCOM 026 #define XCOM 033 typedef struct label { /* Label symbol table */ Rune uninm[9]; /* Label name */ SedCom *chain; SedCom *address; /* Command associated with label */ } Label; typedef struct FILE_CACHE { /* Data file control block */ struct FILE_CACHE *next; /* Forward Link */ char *name; /* Name of file */ } FileCache; SedCom pspace[MAXCMDS]; /* Command storage */ SedCom *pend = pspace+MAXCMDS; /* End of command storage */ SedCom *rep = pspace; /* Current fill point */ Reprog *lastre; /* Last regular expression */ Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/ Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */ Rune *addend = addspace+ADDSIZE; SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */ SedCom **aptr = abuf; struct { /* Sed program input control block */ enum PTYPE { /* Either on command line or in file */ P_ARG, P_FILE, } type; union PCTL { /* Pointer to data */ Biobuf *bp; char *curr; }; } prog; Rune genbuf[LBSIZE+1]; /* Miscellaneous buffer */ FileCache *fhead; /* Head of File Cache Chain */ FileCache *ftail; /* Tail of File Cache Chain */ Rune *loc1; /* Start of pattern match */ Rune *loc2; /* End of pattern match */ Rune seof; /* Pattern delimiter char */ Rune linebuf[LBSIZE+1]; /* Input data buffer */ Rune *lbend = linebuf+LBSIZE; /* End of buffer */ Rune *spend = linebuf; /* End of input data */ Rune *cp; /* Current scan point in linebuf */ Rune holdsp[LBSIZE+1]; /* Hold buffer */ Rune *hend = holdsp+LBSIZE; /* End of hold buffer */ Rune *hspend = holdsp; /* End of hold data */ int nflag; /* Command line flags */ int gflag; int dolflag; /* Set when at true EOF */ int sflag; /* Set when substitution done */ int jflag; /* Set when jump required */ int delflag; /* Delete current line when set */ long lnum; /* Input line count */ char fname[MAXFILES][40]; /* File name cache */ Biobuf *fcode[MAXFILES]; /* File ID cache */ int nfiles; /* Cache fill point */ Biobuf fout; /* Output stream */ Biobuf stdin; /* Default input */ Biobuf* f; /* Input data */ Label ltab[LABSIZE]; /* Label name symbol table */ Label *labend = ltab+LABSIZE; /* End of label table */ Label *lab = ltab+1; /* Current Fill point */ int depth; /* {} stack pointer */ Rune bad; /* Dummy err ptr reference */ Rune *badp = &bad; char CGMES[] = "%S command garbled: %S"; char TMMES[] = "Too much text: %S"; char LTL[] = "Label too long: %S"; char AD0MES[] = "No addresses allowed: %S"; char AD1MES[] = "Only one address allowed: %S"; void address(Addr *); void arout(void); int cmp(char *, char *); int rcmp(Rune *, Rune *); void command(SedCom *); Reprog *compile(void); Rune *compsub(Rune *, Rune *); void dechain(void); void dosub(Rune *); void enroll(char *); void errexit(void); int executable(SedCom *); void execute(void); void fcomp(void); long getrune(void); Rune *gline(Rune *); int match(Reprog *, Rune *); void newfile(enum PTYPE, char *); int opendata(void); Biobuf *open_file(char *); Rune *place(Rune *, Rune *, Rune *); void quit(char *, ...); int rline(Rune *, Rune *); Label *search(Label *); int substitute(SedCom *); char *text(char *); Rune *stext(Rune *, Rune *); int ycomp(SedCom *); char * trans(int c); void putline(Biobuf *bp, Rune *buf, int n); void main(int argc, char **argv) { int compfl; lnum = 0; Binit(&fout, 1, OWRITE); Blethal(&fout, nil); fcode[nfiles++] = &fout; compfl = 0; if(argc == 1) exits(nil); ARGBEGIN{ case 'e': if (argc <= 1) quit("missing pattern"); newfile(P_ARG, ARGF()); fcomp(); compfl = 1; continue; case 'f': if(argc <= 1) quit("no pattern-file"); newfile(P_FILE, ARGF()); fcomp(); compfl = 1; continue; case 'g': gflag++; continue; case 'n': nflag++; continue; default: quit("Unknown flag: %c", ARGC()); } ARGEND if(compfl == 0) { if (--argc < 0) quit("missing pattern"); newfile(P_ARG, *argv++); fcomp(); } if(depth) quit("Too many {'s"); ltab[0].address = rep; dechain(); if(argc <= 0) enroll(nil); /* Add stdin to cache */ else while(--argc >= 0) enroll(*argv++); execute(); exits(nil); } void fcomp(void) { int i; Label *lpt; Rune *tp; SedCom *pt, *pt1; static Rune *p = addspace; static SedCom **cmpend[DEPTH]; /* stack of {} operations */ while (rline(linebuf, lbend) >= 0) { cp = linebuf; comploop: while(*cp == L' ' || *cp == L'\t') cp++; if(*cp == L'\0' || *cp == L'#') continue; if(*cp == L';') { cp++; goto comploop; } address(&rep->ad1); if (rep->ad1.type != A_NONE) { if (rep->ad1.type == A_LAST) { if (!lastre) quit("First RE may not be null"); rep->ad1.type = A_RE; rep->ad1.rp = lastre; } if(*cp == L',' || *cp == L';') { cp++; address(&rep->ad2); if (rep->ad2.type == A_LAST) { rep->ad2.type = A_RE; rep->ad2.rp = lastre; } } else rep->ad2.type = A_NONE; } while(*cp == L' ' || *cp == L'\t') cp++; swit: switch(*cp++) { default: quit("Unrecognized command: %S", linebuf); case '!': rep->negfl = 1; goto swit; case '{': rep->command = BCOM; rep->negfl = !rep->negfl; cmpend[depth++] = &rep->lb1; if(++rep >= pend) quit("Too many commands: %S", linebuf); if(*cp == '\0') continue; goto comploop; case '}': if(rep->ad1.type != A_NONE) quit(AD0MES, linebuf); if(--depth < 0) quit("Too many }'s"); *cmpend[depth] = rep; if(*cp == 0) continue; goto comploop; case '=': rep->command = EQCOM; if(rep->ad2.type != A_NONE) quit(AD1MES, linebuf); break; case ':': if(rep->ad1.type != A_NONE) quit(AD0MES, linebuf); while(*cp == L' ') cp++; tp = lab->uninm; while (*cp && *cp != L';' && *cp != L' ' && *cp != L'\t' && *cp != L'#') { *tp++ = *cp++; if(tp >= &lab->uninm[8]) quit(LTL, linebuf); } *tp = L'\0'; if (*lab->uninm == L'\0') /* no label? */ quit(CGMES, L":", linebuf); if(lpt = search(lab)) { if(lpt->address) quit("Duplicate labels: %S", linebuf); } else { lab->chain = 0; lpt = lab; if(++lab >= labend) quit("Too many labels: %S", linebuf); } lpt->address = rep; if (*cp == L'#') continue; rep--; /* reuse this slot */ break; case 'a': rep->command = ACOM; if(rep->ad2.type != A_NONE) quit(AD1MES, linebuf); if(*cp == L'\\') cp++; if(*cp++ != L'\n') quit(CGMES, L"a", linebuf); rep->text = p; p = stext(p, addend); break; case 'c': rep->command = CCOM; if(*cp == L'\\') cp++; if(*cp++ != L'\n') quit(CGMES, L"c", linebuf); rep->text = p; p = stext(p, addend); break; case 'i': rep->command = ICOM; if(rep->ad2.type != A_NONE) quit(AD1MES, linebuf); if(*cp == L'\\') cp++; if(*cp++ != L'\n') quit(CGMES, L"i", linebuf); rep->text = p; p = stext(p, addend); break; case 'g': rep->command = GCOM; break; case 'G': rep->command = CGCOM; break; case 'h': rep->command = HCOM; break; case 'H': rep->command = CHCOM; break; case 't': rep->command = TCOM; goto jtcommon; case 'b': rep->command = BCOM; jtcommon: while(*cp == L' ') cp++; if(*cp == L'\0' || *cp == L';') { /* no label; jump to end */ if(pt = ltab[0].chain) { while((pt1 = pt->lb1) != nil) pt = pt1; pt->lb1 = rep; } else ltab[0].chain = rep; break; } /* copy label into lab->uninm */ tp = lab->uninm; while((*tp = *cp++) != L'\0' && *tp != L';') if(++tp >= &lab->uninm[8]) quit(LTL, linebuf); cp--; *tp = L'\0'; if (*lab->uninm == L'\0') /* shouldn't get here */ quit(CGMES, L"b or t", linebuf); if((lpt = search(lab)) != nil) { if(lpt->address) rep->lb1 = lpt->address; else { for(pt = lpt->chain; pt != nil && (pt1 = pt->lb1) != nil; pt = pt1) ; if (pt) pt->lb1 = rep; } } else { /* add new label */ lab->chain = rep; lab->address = 0; if(++lab >= labend) quit("Too many labels: %S", linebuf); } break; case 'n': rep->command = NCOM; break; case 'N': rep->command = CNCOM; break; case 'p': rep->command = PCOM; break; case 'P': rep->command = CPCOM; break; case 'r': rep->command = RCOM; if(rep->ad2.type != A_NONE) quit(AD1MES, linebuf); if(*cp++ != L' ') quit(CGMES, L"r", linebuf); rep->text = p; p = stext(p, addend); break; case 'd': rep->command = DCOM; break; case 'D': rep->command = CDCOM; rep->lb1 = pspace; break; case 'q': rep->command = QCOM; if(rep->ad2.type != A_NONE) quit(AD1MES, linebuf); break; case 'l': rep->command = LCOM; break; case 's': rep->command = SCOM; seof = *cp++; if ((rep->re1 = compile()) == 0) { if(!lastre) quit("First RE may not be null"); rep->re1 = lastre; } rep->rhs = p; if((p = compsub(p, addend)) == 0) quit(CGMES, L"s", linebuf); if(*cp == L'g') { cp++; rep->gfl++; } else if(gflag) rep->gfl++; if(*cp == L'p') { cp++; rep->pfl = 1; } if(*cp == L'P') { cp++; rep->pfl = 2; } if(*cp == L'w') { cp++; if(*cp++ != L' ') quit(CGMES, L"s", linebuf); text(fname[nfiles]); for(i = nfiles - 1; i >= 0; i--) if(cmp(fname[nfiles], fname[i]) == 0) { rep->fcode = fcode[i]; goto done; } if(nfiles >= MAXFILES) quit("Too many files in w commands 1"); rep->fcode = open_file(fname[nfiles]); } break; case 'w': rep->command = WCOM; if(*cp++ != L' ') quit(CGMES, L"w", linebuf); text(fname[nfiles]); for(i = nfiles - 1; i >= 0; i--) if(cmp(fname[nfiles], fname[i]) == 0) { rep->fcode = fcode[i]; goto done; } if(nfiles >= MAXFILES){ fprint(2, "sed: Too many files in w commands 2 \n"); fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES); errexit(); } rep->fcode = open_file(fname[nfiles]); break; case 'x': rep->command = XCOM; break; case 'y': rep->command = YCOM; seof = *cp++; if (ycomp(rep) == 0) quit(CGMES, L"y", linebuf); break; } done: if(++rep >= pend) quit("Too many commands, last: %S", linebuf); if(*cp++ != L'\0') { if(cp[-1] == L';') goto comploop; quit(CGMES, cp - 1, linebuf); } } } Biobuf * open_file(char *name) { int fd; Biobuf *bp; if ((bp = malloc(sizeof(Biobuf))) == 0) quit("Out of memory"); if ((fd = open(name, OWRITE)) < 0 && (fd = create(name, OWRITE, 0666)) < 0) quit("Cannot create %s", name); Binit(bp, fd, OWRITE); Blethal(bp, nil); Bseek(bp, 0, 2); fcode[nfiles++] = bp; return bp; } Rune * compsub(Rune *rhs, Rune *end) { Rune r; while ((r = *cp++) != '\0') { if(r == '\\') { if (rhs < end) *rhs++ = Runemax; else return 0; r = *cp++; if(r == 'n') r = '\n'; } else { if(r == seof) { if (rhs < end) *rhs++ = '\0'; else return 0; return rhs; } } if (rhs < end) *rhs++ = r; else return 0; } return 0; } Reprog * compile(void) { Rune c; char *ep; char expbuf[512]; if((c = *cp++) == seof) /* L'//' */ return 0; ep = expbuf; do { if (c == L'\0' || c == L'\n') quit(TMMES, linebuf); if (c == L'\\') { if (ep >= expbuf+sizeof(expbuf)) quit(TMMES, linebuf); ep += runetochar(ep, &c); if ((c = *cp++) == L'n') c = L'\n'; } if (ep >= expbuf + sizeof(expbuf)) quit(TMMES, linebuf); ep += runetochar(ep, &c); } while ((c = *cp++) != seof); *ep = 0; return lastre = regcomp(expbuf); } void regerror(char *s) { USED(s); quit(CGMES, L"r.e.-using", linebuf); } void newfile(enum PTYPE type, char *name) { if (type == P_ARG) prog.curr = name; else { if ((prog.bp = Bopen(name, OREAD)) == 0) quit("Cannot open pattern-file: %s\n", name); Blethal(prog.bp, nil); } prog.type = type; } int rline(Rune *buf, Rune *end) { long c; Rune r; while ((c = getrune()) >= 0) { r = c; if (r == '\\') { if (buf <= end) *buf++ = r; if ((c = getrune()) < 0) break; r = c; } else if (r == '\n') { *buf = '\0'; return 1; } if (buf <= end) *buf++ = r; } *buf = '\0'; return -1; } long getrune(void) { long c; Rune r; char *p; if (prog.type == P_ARG) { if ((p = prog.curr) != 0) { if (*p) { prog.curr += chartorune(&r, p); c = r; } else { c = '\n'; /* fake an end-of-line */ prog.curr = 0; } } else c = -1; } else if ((c = Bgetrune(prog.bp)) < 0) Bterm(prog.bp); return c; } void address(Addr *ap) { int c; long lno; if((c = *cp++) == '$') ap->type = A_DOL; else if(c == '/') { seof = c; if (ap->rp = compile()) ap->type = A_RE; else ap->type = A_LAST; } else if (c >= '0' && c <= '9') { lno = c - '0'; while ((c = *cp) >= '0' && c <= '9') lno = lno*10 + *cp++ - '0'; if(!lno) quit("line number 0 is illegal",0); ap->type = A_LINE; ap->line = lno; } else { cp--; ap->type = A_NONE; } } cmp(char *a, char *b) /* compare characters */ { while(*a == *b++) if (*a == '\0') return 0; else a++; return 1; } rcmp(Rune *a, Rune *b) /* compare runes */ { while(*a == *b++) if (*a == '\0') return 0; else a++; return 1; } char * text(char *p) /* extract character string */ { Rune r; while(*cp == ' ' || *cp == '\t') cp++; while (*cp) { if ((r = *cp++) == '\\' && (r = *cp++) == '\0') break; if (r == '\n') while (*cp == ' ' || *cp == '\t') cp++; p += runetochar(p, &r); } *p++ = '\0'; return p; } Rune * stext(Rune *p, Rune *end) /* extract rune string */ { while(*cp == L' ' || *cp == L'\t') cp++; while (*cp) { if (*cp == L'\\' && *++cp == L'\0') break; if (p >= end-1) quit(TMMES, linebuf); if ((*p++ = *cp++) == L'\n') while(*cp == L' ' || *cp == L'\t') cp++; } *p++ = 0; return p; } Label * search(Label *ptr) { Label *rp; for (rp = ltab; rp < ptr; rp++) if(rcmp(rp->uninm, ptr->uninm) == 0) return(rp); return(0); } void dechain(void) { Label *lptr; SedCom *rptr, *trptr; for(lptr = ltab; lptr < lab; lptr++) { if(lptr->address == 0) quit("Undefined label: %S", lptr->uninm); if(lptr->chain) { rptr = lptr->chain; while((trptr = rptr->lb1) != nil) { rptr->lb1 = lptr->address; rptr = trptr; } rptr->lb1 = lptr->address; } } } int ycomp(SedCom *r) { int i; Rune *rp, *sp, *tsp; Rune c, highc; highc = 0; for(tsp = cp; *tsp != seof; tsp++) { if(*tsp == L'\\') tsp++; if(*tsp == L'\n' || *tsp == L'\0') return 0; if (*tsp > highc) highc = *tsp; } tsp++; if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil) quit("Out of memory"); *rp++ = highc; /* save upper bound */ for (i = 0; i <= highc; i++) rp[i] = i; sp = cp; while((c = *sp++) != seof) { if(c == L'\\' && *sp == L'n') { sp++; c = L'\n'; } if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') { rp[c] = L'\n'; tsp++; } if(rp[c] == seof || rp[c] == L'\0') { free(r->re1); r->re1 = nil; return 0; } } if(*tsp != seof) { free(r->re1); r->re1 = nil; return 0; } cp = tsp+1; return 1; } void execute(void) { SedCom *ipc; while (spend = gline(linebuf)){ for(ipc = pspace; ipc->command; ) { if (!executable(ipc)) { ipc++; continue; } command(ipc); if(delflag) break; if(jflag) { jflag = 0; if((ipc = ipc->lb1) == 0) break; } else ipc++; } if(!nflag && !delflag) putline(&fout, linebuf, spend - linebuf); if(aptr > abuf) arout(); delflag = 0; } } /* determine if a statement should be applied to an input line */ int executable(SedCom *ipc) { if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */ if (ipc->active == 1) /* Second line */ ipc->active = 2; switch(ipc->ad2.type) { case A_NONE: /* No second addr; use first */ ipc->active = 0; break; case A_DOL: /* Accept everything */ return !ipc->negfl; case A_LINE: /* Line at end of range? */ if (lnum <= ipc->ad2.line) { if (ipc->ad2.line == lnum) ipc->active = 0; return !ipc->negfl; } ipc->active = 0; /* out of range */ return ipc->negfl; case A_RE: /* Check for matching R.E. */ if (match(ipc->ad2.rp, linebuf)) ipc->active = 0; return !ipc->negfl; default: quit("Internal error"); } } switch (ipc->ad1.type) { /* Check first address */ case A_NONE: /* Everything matches */ return !ipc->negfl; case A_DOL: /* Only last line */ if (dolflag) return !ipc->negfl; break; case A_LINE: /* Check line number */ if (ipc->ad1.line == lnum) { ipc->active = 1; /* In range */ return !ipc->negfl; } break; case A_RE: /* Check R.E. */ if (match(ipc->ad1.rp, linebuf)) { ipc->active = 1; /* In range */ return !ipc->negfl; } break; default: quit("Internal error"); } return ipc->negfl; } int match(Reprog *pattern, Rune *buf) { if (!pattern) return 0; subexp[0].rsp = buf; subexp[0].ep = 0; if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) { loc1 = subexp[0].rsp; loc2 = subexp[0].rep; return 1; } loc1 = loc2 = 0; return 0; } int substitute(SedCom *ipc) { int len; if(!match(ipc->re1, linebuf)) return 0; /* * we have at least one match. some patterns, e.g. '$' or '^', can * produce 0-length matches, so during a global substitute we must * bump to the character after a 0-length match to keep from looping. */ sflag = 1; if(ipc->gfl == 0) /* single substitution */ dosub(ipc->rhs); else do{ /* global substitution */ len = loc2 - loc1; /* length of match */ dosub(ipc->rhs); /* dosub moves loc2 */ if(*loc2 == 0) /* end of string */ break; if(len == 0) /* zero-length R.E. match */ loc2++; /* bump over 0-length match */ if(*loc2 == 0) /* end of string */ break; } while(match(ipc->re1, loc2)); return 1; } void dosub(Rune *rhsbuf) { int c, n; Rune *lp, *sp, *rp; lp = linebuf; sp = genbuf; rp = rhsbuf; while (lp < loc1) *sp++ = *lp++; while(c = *rp++) { if (c == '&') { sp = place(sp, loc1, loc2); continue; } if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB + '0') { n = c-'0'; if (subexp[n].rsp && subexp[n].rep) { sp = place(sp, subexp[n].rsp, subexp[n].rep); continue; } else { quit("Invalid back reference \\%d", n); } } *sp++ = c; if (sp >= &genbuf[LBSIZE]) quit("Output line too long"); } lp = loc2; loc2 = sp - genbuf + linebuf; while (*sp++ = *lp++) if (sp >= &genbuf[LBSIZE]) quit("Output line too long"); lp = linebuf; sp = genbuf; while (*lp++ = *sp++) ; spend = lp - 1; } Rune * place(Rune *sp, Rune *l1, Rune *l2) { while (l1 < l2) { *sp++ = *l1++; if (sp >= &genbuf[LBSIZE]) quit("Output line too long"); } return sp; } char * trans(int c) { static char buf[] = "\\x0000"; static char hex[] = "0123456789abcdef"; switch(c) { case '\b': return "\\b"; case '\n': return "\\n"; case '\r': return "\\r"; case '\t': return "\\t"; case '\\': return "\\\\"; } buf[2] = hex[(c>>12)&0xF]; buf[3] = hex[(c>>8)&0xF]; buf[4] = hex[(c>>4)&0xF]; buf[5] = hex[c&0xF]; return buf; } void command(SedCom *ipc) { int i, c; char *ucp; Rune *execp, *p1, *p2, *rp; switch(ipc->command) { case ACOM: *aptr++ = ipc; if(aptr >= abuf+MAXADDS) quit("Too many appends after line %ld", lnum); *aptr = 0; break; case CCOM: delflag = 1; if(ipc->active == 1) { for(rp = ipc->text; *rp; rp++) Bputrune(&fout, *rp); Bputc(&fout, '\n'); } break; case DCOM: delflag++; break; case CDCOM: p1 = p2 = linebuf; while(*p1 != '\n') { if(*p1++ == 0) { delflag++; return; } } p1++; while(*p2++ = *p1++) ; spend = p2 - 1; jflag++; break; case EQCOM: Bprint(&fout, "%ld\n", lnum); break; case GCOM: p1 = linebuf; p2 = holdsp; while(*p1++ = *p2++) ; spend = p1 - 1; break; case CGCOM: *spend++ = '\n'; p1 = spend; p2 = holdsp; while(*p1++ = *p2++) if(p1 >= lbend) break; spend = p1 - 1; break; case HCOM: p1 = holdsp; p2 = linebuf; while(*p1++ = *p2++); hspend = p1 - 1; break; case CHCOM: *hspend++ = '\n'; p1 = hspend; p2 = linebuf; while(*p1++ = *p2++) if(p1 >= hend) break; hspend = p1 - 1; break; case ICOM: for(rp = ipc->text; *rp; rp++) Bputrune(&fout, *rp); Bputc(&fout, '\n'); break; case BCOM: jflag = 1; break; case LCOM: c = 0; for (i = 0, rp = linebuf; *rp; rp++) { c = *rp; if(c >= 0x20 && c < 0x7F && c != '\\') { Bputc(&fout, c); if(i++ > 71) { Bprint(&fout, "\\\n"); i = 0; } } else { for (ucp = trans(*rp); *ucp; ucp++){ c = *ucp; Bputc(&fout, c); if(i++ > 71) { Bprint(&fout, "\\\n"); i = 0; } } } } if(c == ' ') Bprint(&fout, "\\n"); Bputc(&fout, '\n'); break; case NCOM: if(!nflag) putline(&fout, linebuf, spend-linebuf); if(aptr > abuf) arout(); if((execp = gline(linebuf)) == 0) { delflag = 1; break; } spend = execp; break; case CNCOM: if(aptr > abuf) arout(); *spend++ = '\n'; if((execp = gline(spend)) == 0) { delflag = 1; break; } spend = execp; break; case PCOM: putline(&fout, linebuf, spend-linebuf); break; case CPCOM: cpcom: for(rp = linebuf; *rp && *rp != '\n'; rp++) Bputc(&fout, *rp); Bputc(&fout, '\n'); break; case QCOM: if(!nflag) putline(&fout, linebuf, spend-linebuf); if(aptr > abuf) arout(); exits(nil); case RCOM: *aptr++ = ipc; if(aptr >= &abuf[MAXADDS]) quit("Too many reads after line %ld", lnum); *aptr = 0; break; case SCOM: i = substitute(ipc); if(i && ipc->pfl) if(ipc->pfl == 1) putline(&fout, linebuf, spend-linebuf); else goto cpcom; if(i && ipc->fcode) goto wcom; break; case TCOM: if(sflag) { sflag = 0; jflag = 1; } break; case WCOM: wcom: putline(ipc->fcode,linebuf, spend - linebuf); break; case XCOM: p1 = linebuf; p2 = genbuf; while(*p2++ = *p1++) ; p1 = holdsp; p2 = linebuf; while(*p2++ = *p1++) ; spend = p2 - 1; p1 = genbuf; p2 = holdsp; while(*p2++ = *p1++) ; hspend = p2 - 1; break; case YCOM: p1 = linebuf; p2 = ipc->text; for (i = *p2++; *p1; p1++) if (*p1 <= i) *p1 = p2[*p1]; break; } } void putline(Biobuf *bp, Rune *buf, int n) { while (n--) Bputrune(bp, *buf++); Bputc(bp, '\n'); } void arout(void) { int c; char *s, *e; char buf[128]; Rune *p1; Biobuf *fi; for (aptr = abuf; *aptr; aptr++) { if((*aptr)->command == ACOM) { for(p1 = (*aptr)->text; *p1; p1++ ) Bputrune(&fout, *p1); Bputc(&fout, '\n'); } else { for(s = buf, e = buf+sizeof(buf)-UTFmax-1, p1 = (*aptr)->text; *p1 && s < e; p1++) s += runetochar(s, p1); *s = '\0'; if((fi = Bopen(buf, OREAD)) == 0) continue; Blethal(fi, nil); while((c = Bgetc(fi)) >= 0) Bputc(&fout, c); Bterm(fi); } } aptr = abuf; *aptr = 0; } void errexit(void) { exits("error"); } void quit(char *fmt, ...) { char *p, *ep; char msg[256]; va_list arg; ep = msg + sizeof msg; p = seprint(msg, ep, "sed: "); va_start(arg, fmt); p = vseprint(p, ep, fmt, arg); va_end(arg); p = seprint(p, ep, "\n"); write(2, msg, p - msg); errexit(); } Rune * gline(Rune *addr) { long c; Rune *p; static long peekc = 0; if (f == 0 && opendata() < 0) return 0; sflag = 0; lnum++; /* Bflush(&fout);********* dumped 4/30/92 - bobf****/ do { p = addr; for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) { if (c == '\n') { if ((peekc = Bgetrune(f)) < 0 && fhead == nil) dolflag = 1; *p = '\0'; return p; } if (c && p < lbend) *p++ = c; } /* return partial final line, adding implicit newline */ if(p != addr) { *p = '\0'; peekc = -1; if (fhead == nil) dolflag = 1; return p; } peekc = 0; Bterm(f); } while (opendata() > 0); /* Switch to next stream */ f = 0; return 0; } /* * Data file input section - the intent is to transparently * catenate all data input streams. */ void enroll(char *filename) /* Add a file to the input file cache */ { FileCache *fp; if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil) quit("Out of memory"); if (ftail == nil) fhead = fp; else ftail->next = fp; ftail = fp; fp->next = nil; fp->name = filename; /* 0 => stdin */ } int opendata(void) { if (fhead == nil) return -1; if (fhead->name) { if ((f = Bopen(fhead->name, OREAD)) == nil) quit("Can't open %s", fhead->name); } else { Binit(&stdin, 0, OREAD); f = &stdin; } Blethal(f, nil); fhead = fhead->next; return 1; }