ref: 4dbb99f478d68eaeea4c043a839454cb807c2c7d
dir: /sys/src/cmd/aux/wikifmt.c/
/* * google code wiki to html converter. * https://code.google.com/p/support/wiki/WikiSyntax */ #include <u.h> #include <libc.h> enum { HUNK = 8*1024, }; char *buf; char *pos; char *epos; char *opos; int inquote = 0; int intable = 0; int inlist = 0; int indent = -1; void body(void); int match(char *s) { int n; n = strlen(s); if(pos+n > epos) return 0; return cistrncmp(pos, s, n) == 0; } int got(char *s) { if(!match(s)) return 0; pos += strlen(s); return 1; } char* look(char *s, char *e) { char *p; int n; if(e == nil) e = epos; n = strlen(s); e -= n; for(p = pos; p <= e; p++) if(cistrncmp(p, s, n) == 0) return p; return nil; } void eatspace(void) { while(pos < epos && (*pos == ' ' || *pos == '\t')) pos++; } char* trimback(char *s) { while(s > pos && strchr("\t ", s[-1]) != nil) s--; return s; } void flush(void) { int n; n = opos - buf; if(n <= 0) return; if(write(1, buf, n) != n) sysfatal("write: %r"); opos = buf; } void output(char *s, int n) { int r; if(n <= 0) return; r = HUNK - (opos - buf); if(n > r){ output(s, r); output(s+r, n-r); } else { memmove(opos, s, n); opos += n; if(r == n) flush(); } } void string(char *s) { output(s, strlen(s)); } void escape(char *e) { char *p; for(p = pos; p < e; p++) if(*p == '<'){ output(pos, p - pos); pos = p+1; string("<"); } else if(*p == '>'){ output(pos, p - pos); pos = p+1; string(">"); } else if(*p == '&'){ output(pos, p - pos); pos = p+1; string("&"); } output(pos, p - pos); pos = p; } void ebody(char *e) { char *t; t = epos; epos = trimback(e); body(); pos = e; epos = t; } int tag(char *term, char *tag) { char *e; if(!got(term)) return 0; if(e = look(term, nil)){ eatspace(); string("<"); string(tag); string(">"); ebody(e); string("</"); string(tag); string(">"); pos += strlen(term); } else string(term); return 1; } int heading(void) { char *o, *s, *e; int n; for(s = "======"; *s; s++) if(got(s)) break; if(*s == 0) return 0; n = strlen(s); e = look("=", look("\n", nil)); if(e == nil) e = look("\n", nil); if(e == nil) e = epos; eatspace(); string("<h"); output("0123456"+n, 1); string("><a name=\""); o = pos; s = trimback(e); while(pos < s){ if((*pos >= 'a' && *pos <= 'z') || (*pos >= 'A' && *pos <= 'Z') || (*pos >= '0' && *pos <= '9') || (strchr("!#$%()_+,-./{|}~:;=?@[\\]^_`", *pos) != 0)) output(pos, 1); else if(*pos == ' ' || *pos == '\t') output("_", 1); else if(*pos == '<') string("<"); else if(*pos == '>') string(">"); else if(*pos == '&') string("&"); else if(*pos == '"') string("""); else if(*pos == '\'') string("'"); pos++; } string("\"></a>"); pos = o; ebody(e); while(got("=")) ; string("</h"); output("0123456"+n, 1); string(">"); return 1; } void link(char *e) { char *s, *o; s = o = pos; while(s < epos){ if(e != nil && s >= e) break; if(*s == 0 || strchr("<>[] \t\r\n", *s) != nil) break; s++; } if(s-4 >= o) if(cistrncmp(s-4, ".png", 4) && cistrncmp(s-4, ".jpg", 4) && cistrncmp(s-4, ".gif", 4)){ string("<a href=\""); escape(s); string("\">"); eatspace(); if(e != nil && pos < e) ebody(e); else { pos = o; escape(s); } string("</a>"); } else { string("<img src=\""); escape(s); string("\">"); } } void body(void) { char *s; int t; Next: if(pos >= epos) return; if(got("\n") || got("\r\n")) indent = -1; if(got("\n") || got("\r\n")){ string("<br>"); while(got("\n") || got("\r\n")) ; } if(indent == -1){ indent = 0; for(;;){ if(got(" ")) indent++; else if(got("\t")){ indent += 8; indent %= 8; } else break; } if(intable && look("||", look("\n", nil)) == nil){ string("</table>"); intable = 0; } string("\n"); if((indent < inlist) || (indent < inquote)) return; while(indent > 0){ if(pos >= epos) return; if(got("*") || got("#")){ s = pos-1; eatspace(); if(indent > inlist){ if(*s == '*') string("<ul><li>"); else string("<ol><li>"); t = inlist; inlist = indent; body(); inlist = t; if(*s == '*') string("</li></ul>"); else string("</li></ol>"); } else { string("</li><li>"); break; } } else if(indent > inquote){ string("<blockquote>"); t = inquote; inquote = indent; body(); inquote = t; string("</blockquote>"); } else break; } if(indent == 0){ if(got("#")){ if((pos = look("\n", nil)) == nil) pos = epos; goto Next; } if(heading()) goto Next; if(got("----")){ while(got("-")) ; string("<hr>"); goto Next; } } } if(got("`")){ if(s = look("`", nil)){ escape(s); pos = s+1; } else string("`"); } else if(got("<")){ string("<"); if(s = look(">", nil)){ s++; output(pos, s - pos); pos = s; } } else if(got("[")){ if(s = look("]", nil)){ link(s); pos = s+1; } else string("["); } else if(tag("*", "b") || tag("_", "i") || tag("^", "sup") || tag(",,", "sub") || tag("~~", "strike")){ } else if(got("{{{")){ if(s = look("}}}", nil)){ if(look("\n", s)){ string("<pre>"); escape(s); string("</pre>"); } else { string("<tt>"); escape(s); string("</tt>"); } pos = s+3; } else string("{{{"); } else if(got("||")){ if(s = look("||", look("\n", nil))){ eatspace(); switch(intable){ case 0: string("<table>"); intable++; case 1: string("<tr>"); intable++; } string("<td>"); ebody(s); string("</td>"); } else if(intable){ string("</tr>"); intable = 1; } } else if(match("http://")) link(nil); else if(match("https://")) link(nil); else if(match("ftp://")) link(nil); else{ output(pos, 1); pos++; } goto Next; } void usage(void) { fprint(2, "usage: %s [ file ]\n", argv0); exits("usage"); } void main(int argc, char *argv[]) { int n; ARGBEGIN{ default: usage(); }ARGEND; if(argc != 0 && argc != 1) usage(); if(*argv){ if((n = open(*argv, OREAD)) < 0) sysfatal("open %s: %r", *argv); if(dup(n, 0) < 0) sysfatal("dup: %r"); } buf = opos = sbrk(HUNK); pos = epos = buf + HUNK; for(;;){ if(brk(epos + HUNK + 8) < 0) sysfatal("brk: %r"); if((n = read(0, epos, HUNK)) < 0) sysfatal("read: %r"); if(n == 0) break; epos += n; } if(epos > pos && epos[-1] != '\n') *epos++ = '\n'; body(); flush(); exits(0); }