shithub: lu9-lpeg

Download patch

ref: b7d6be96fcc372886415e8fd55e3febff8340b95
author: kvik <[email protected]>
date: Sat Feb 6 17:28:13 EST 2021

import release 1.0.2

--- /dev/null
+++ b/HISTORY
@@ -1,0 +1,100 @@
+HISTORY for LPeg 1.0.2
+
+* Changes from version 1.0.1 to 1.0.2
+  ---------------------------------
+  + some bugs fixed
+
+* Changes from version 0.12 to 1.0.1
+  ---------------------------------
+  + group "names" can be any Lua value
+  + some bugs fixed
+  + other small improvements
+
+* Changes from version 0.11 to 0.12
+  ---------------------------------
+  + no "unsigned short" limit for pattern sizes
+  + mathtime captures considered nullable
+  + some bugs fixed
+
+* Changes from version 0.10 to 0.11
+  -------------------------------
+  + complete reimplementation of the code generator
+  + new syntax for table captures
+  + new functions in module 're'
+  + other small improvements
+
+* Changes from version 0.9 to 0.10
+  -------------------------------
+  + backtrack stack has configurable size
+  + better error messages
+  + Notation for non-terminals in 're' back to A instead o <A>
+  + experimental look-behind pattern
+  + support for external extensions
+  + works with Lua 5.2
+  + consumes less C stack
+
+  - "and" predicates do not keep captures
+
+* Changes from version 0.8 to 0.9
+  -------------------------------
+  + The accumulator capture was replaced by a fold capture;
+    programs that used the old 'lpeg.Ca' will need small changes.
+  + Some support for character classes from old C locales.
+  + A new named-group capture.
+
+* Changes from version 0.7 to 0.8
+  -------------------------------
+  + New "match-time" capture.
+  + New "argument capture" that allows passing arguments into the pattern.
+  + Better documentation for 're'.
+  + Several small improvements for 're'.
+  + The 're' module has an incompatibility with previous versions:
+    now, any use of a non-terminal must be enclosed in angle brackets
+    (like <B>).
+
+* Changes from version 0.6 to 0.7
+  -------------------------------
+  + Several improvements in module 're':
+    - better documentation;
+    - support for most captures (all but accumulator);
+    - limited repetitions p{n,m}.
+  + Small improvements in efficiency.
+  + Several small bugs corrected (special thanks to Hans Hagen
+    and Taco Hoekwater).
+
+* Changes from version 0.5 to 0.6
+  -------------------------------
+  + Support for non-numeric indices in grammars.
+  + Some bug fixes (thanks to the luatex team).
+  + Some new optimizations; (thanks to Mike Pall).
+  + A new page layout (thanks to Andre Carregal).
+  + Minimal documentation for module 're'.
+
+* Changes from version 0.4 to 0.5
+  -------------------------------
+  + Several optimizations.
+  + lpeg.P now accepts booleans.
+  + Some new examples.
+  + A proper license.
+  + Several small improvements.
+
+* Changes from version 0.3 to 0.4
+  -------------------------------
+  + Static check for loops in repetitions and grammars.
+  + Removed label option in captures.
+  + The implementation of captures uses less memory.
+
+* Changes from version 0.2 to 0.3
+  -------------------------------
+  + User-defined patterns in Lua.
+  + Several new captures.
+
+* Changes from version 0.1 to 0.2
+  -------------------------------
+  + Several small corrections.
+  + Handles embedded zeros like any other character.
+  + Capture "name" can be any Lua value.
+  + Unlimited number of captures.
+  + Match gets an optional initial position.
+
+(end of HISTORY)
--- /dev/null
+++ b/lpcap.c
@@ -1,0 +1,555 @@
+/*
+** $Id: lpcap.c $
+** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
+*/
+
+#include "lua.h"
+#include "lauxlib.h"
+
+#include "lpcap.h"
+#include "lptypes.h"
+
+
+#define captype(cap)	((cap)->kind)
+
+#define isclosecap(cap)	(captype(cap) == Cclose)
+
+#define closeaddr(c)	((c)->s + (c)->siz - 1)
+
+#define isfullcap(cap)	((cap)->siz != 0)
+
+#define getfromktable(cs,v)	lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v)
+
+#define pushluaval(cs)		getfromktable(cs, (cs)->cap->idx)
+
+
+
+/*
+** Put at the cache for Lua values the value indexed by 'v' in ktable
+** of the running pattern (if it is not there yet); returns its index.
+*/
+static int updatecache (CapState *cs, int v) {
+  int idx = cs->ptop + 1;  /* stack index of cache for Lua values */
+  if (v != cs->valuecached) {  /* not there? */
+    getfromktable(cs, v);  /* get value from 'ktable' */
+    lua_replace(cs->L, idx);  /* put it at reserved stack position */
+    cs->valuecached = v;  /* keep track of what is there */
+  }
+  return idx;
+}
+
+
+static int pushcapture (CapState *cs);
+
+
+/*
+** Goes back in a list of captures looking for an open capture
+** corresponding to a close
+*/
+static Capture *findopen (Capture *cap) {
+  int n = 0;  /* number of closes waiting an open */
+  for (;;) {
+    cap--;
+    if (isclosecap(cap)) n++;  /* one more open to skip */
+    else if (!isfullcap(cap))
+      if (n-- == 0) return cap;
+  }
+}
+
+
+/*
+** Go to the next capture
+*/
+static void nextcap (CapState *cs) {
+  Capture *cap = cs->cap;
+  if (!isfullcap(cap)) {  /* not a single capture? */
+    int n = 0;  /* number of opens waiting a close */
+    for (;;) {  /* look for corresponding close */
+      cap++;
+      if (isclosecap(cap)) {
+        if (n-- == 0) break;
+      }
+      else if (!isfullcap(cap)) n++;
+    }
+  }
+  cs->cap = cap + 1;  /* + 1 to skip last close (or entire single capture) */
+}
+
+
+/*
+** Push on the Lua stack all values generated by nested captures inside
+** the current capture. Returns number of values pushed. 'addextra'
+** makes it push the entire match after all captured values. The
+** entire match is pushed also if there are no other nested values,
+** so the function never returns zero.
+*/
+static int pushnestedvalues (CapState *cs, int addextra) {
+  Capture *co = cs->cap;
+  if (isfullcap(cs->cap++)) {  /* no nested captures? */
+    lua_pushlstring(cs->L, co->s, co->siz - 1);  /* push whole match */
+    return 1;  /* that is it */
+  }
+  else {
+    int n = 0;
+    while (!isclosecap(cs->cap))  /* repeat for all nested patterns */
+      n += pushcapture(cs);
+    if (addextra || n == 0) {  /* need extra? */
+      lua_pushlstring(cs->L, co->s, cs->cap->s - co->s);  /* push whole match */
+      n++;
+    }
+    cs->cap++;  /* skip close entry */
+    return n;
+  }
+}
+
+
+/*
+** Push only the first value generated by nested captures
+*/
+static void pushonenestedvalue (CapState *cs) {
+  int n = pushnestedvalues(cs, 0);
+  if (n > 1)
+    lua_pop(cs->L, n - 1);  /* pop extra values */
+}
+
+
+/*
+** Try to find a named group capture with the name given at the top of
+** the stack; goes backward from 'cap'.
+*/
+static Capture *findback (CapState *cs, Capture *cap) {
+  lua_State *L = cs->L;
+  while (cap-- > cs->ocap) {  /* repeat until end of list */
+    if (isclosecap(cap))
+      cap = findopen(cap);  /* skip nested captures */
+    else if (!isfullcap(cap))
+      continue; /* opening an enclosing capture: skip and get previous */
+    if (captype(cap) == Cgroup) {
+      getfromktable(cs, cap->idx);  /* get group name */
+      if (lp_equal(L, -2, -1)) {  /* right group? */
+        lua_pop(L, 2);  /* remove reference name and group name */
+        return cap;
+      }
+      else lua_pop(L, 1);  /* remove group name */
+    }
+  }
+  luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1));
+  return NULL;  /* to avoid warnings */
+}
+
+
+/*
+** Back-reference capture. Return number of values pushed.
+*/
+static int backrefcap (CapState *cs) {
+  int n;
+  Capture *curr = cs->cap;
+  pushluaval(cs);  /* reference name */
+  cs->cap = findback(cs, curr);  /* find corresponding group */
+  n = pushnestedvalues(cs, 0);  /* push group's values */
+  cs->cap = curr + 1;
+  return n;
+}
+
+
+/*
+** Table capture: creates a new table and populates it with nested
+** captures.
+*/
+static int tablecap (CapState *cs) {
+  lua_State *L = cs->L;
+  int n = 0;
+  lua_newtable(L);
+  if (isfullcap(cs->cap++))
+    return 1;  /* table is empty */
+  while (!isclosecap(cs->cap)) {
+    if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) {  /* named group? */
+      pushluaval(cs);  /* push group name */
+      pushonenestedvalue(cs);
+      lua_settable(L, -3);
+    }
+    else {  /* not a named group */
+      int i;
+      int k = pushcapture(cs);
+      for (i = k; i > 0; i--)  /* store all values into table */
+        lua_rawseti(L, -(i + 1), n + i);
+      n += k;
+    }
+  }
+  cs->cap++;  /* skip close entry */
+  return 1;  /* number of values pushed (only the table) */
+}
+
+
+/*
+** Table-query capture
+*/
+static int querycap (CapState *cs) {
+  int idx = cs->cap->idx;
+  pushonenestedvalue(cs);  /* get nested capture */
+  lua_gettable(cs->L, updatecache(cs, idx));  /* query cap. value at table */
+  if (!lua_isnil(cs->L, -1))
+    return 1;
+  else {  /* no value */
+    lua_pop(cs->L, 1);  /* remove nil */
+    return 0;
+  }
+}
+
+
+/*
+** Fold capture
+*/
+static int foldcap (CapState *cs) {
+  int n;
+  lua_State *L = cs->L;
+  int idx = cs->cap->idx;
+  if (isfullcap(cs->cap++) ||  /* no nested captures? */
+      isclosecap(cs->cap) ||  /* no nested captures (large subject)? */
+      (n = pushcapture(cs)) == 0)  /* nested captures with no values? */
+    return luaL_error(L, "no initial value for fold capture");
+  if (n > 1)
+    lua_pop(L, n - 1);  /* leave only one result for accumulator */
+  while (!isclosecap(cs->cap)) {
+    lua_pushvalue(L, updatecache(cs, idx));  /* get folding function */
+    lua_insert(L, -2);  /* put it before accumulator */
+    n = pushcapture(cs);  /* get next capture's values */
+    lua_call(L, n + 1, 1);  /* call folding function */
+  }
+  cs->cap++;  /* skip close entry */
+  return 1;  /* only accumulator left on the stack */
+}
+
+
+/*
+** Function capture
+*/
+static int functioncap (CapState *cs) {
+  int n;
+  int top = lua_gettop(cs->L);
+  pushluaval(cs);  /* push function */
+  n = pushnestedvalues(cs, 0);  /* push nested captures */
+  lua_call(cs->L, n, LUA_MULTRET);  /* call function */
+  return lua_gettop(cs->L) - top;  /* return function's results */
+}
+
+
+/*
+** Select capture
+*/
+static int numcap (CapState *cs) {
+  int idx = cs->cap->idx;  /* value to select */
+  if (idx == 0) {  /* no values? */
+    nextcap(cs);  /* skip entire capture */
+    return 0;  /* no value produced */
+  }
+  else {
+    int n = pushnestedvalues(cs, 0);
+    if (n < idx)  /* invalid index? */
+      return luaL_error(cs->L, "no capture '%d'", idx);
+    else {
+      lua_pushvalue(cs->L, -(n - idx + 1));  /* get selected capture */
+      lua_replace(cs->L, -(n + 1));  /* put it in place of 1st capture */
+      lua_pop(cs->L, n - 1);  /* remove other captures */
+      return 1;
+    }
+  }
+}
+
+
+/*
+** Return the stack index of the first runtime capture in the given
+** list of captures (or zero if no runtime captures)
+*/
+int finddyncap (Capture *cap, Capture *last) {
+  for (; cap < last; cap++) {
+    if (cap->kind == Cruntime)
+      return cap->idx;  /* stack position of first capture */
+  }
+  return 0;  /* no dynamic captures in this segment */
+}
+
+
+/*
+** Calls a runtime capture. Returns number of captures "removed" by the
+** call, that is, those inside the group capture. Captures to be added
+** are on the Lua stack.
+*/
+int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
+  int n, id;
+  lua_State *L = cs->L;
+  int otop = lua_gettop(L);
+  Capture *open = findopen(close);  /* get open group capture */
+  assert(captype(open) == Cgroup);
+  id = finddyncap(open, close);  /* get first dynamic capture argument */
+  close->kind = Cclose;  /* closes the group */
+  close->s = s;
+  cs->cap = open; cs->valuecached = 0;  /* prepare capture state */
+  luaL_checkstack(L, 4, "too many runtime captures");
+  pushluaval(cs);  /* push function to be called */
+  lua_pushvalue(L, SUBJIDX);  /* push original subject */
+  lua_pushinteger(L, s - cs->s + 1);  /* push current position */
+  n = pushnestedvalues(cs, 0);  /* push nested captures */
+  lua_call(L, n + 2, LUA_MULTRET);  /* call dynamic function */
+  if (id > 0) {  /* are there old dynamic captures to be removed? */
+    int i;
+    for (i = id; i <= otop; i++)
+      lua_remove(L, id);  /* remove old dynamic captures */
+    *rem = otop - id + 1;  /* total number of dynamic captures removed */
+  }
+  else
+    *rem = 0;  /* no dynamic captures removed */
+  return close - open - 1;  /* number of captures to be removed */
+}
+
+
+/*
+** Auxiliary structure for substitution and string captures: keep
+** information about nested captures for future use, avoiding to push
+** string results into Lua
+*/
+typedef struct StrAux {
+  int isstring;  /* whether capture is a string */
+  union {
+    Capture *cp;  /* if not a string, respective capture */
+    struct {  /* if it is a string... */
+      const char *s;  /* ... starts here */
+      const char *e;  /* ... ends here */
+    } s;
+  } u;
+} StrAux;
+
+#define MAXSTRCAPS	10
+
+/*
+** Collect values from current capture into array 'cps'. Current
+** capture must be Cstring (first call) or Csimple (recursive calls).
+** (In first call, fills %0 with whole match for Cstring.)
+** Returns number of elements in the array that were filled.
+*/
+static int getstrcaps (CapState *cs, StrAux *cps, int n) {
+  int k = n++;
+  cps[k].isstring = 1;  /* get string value */
+  cps[k].u.s.s = cs->cap->s;  /* starts here */
+  if (!isfullcap(cs->cap++)) {  /* nested captures? */
+    while (!isclosecap(cs->cap)) {  /* traverse them */
+      if (n >= MAXSTRCAPS)  /* too many captures? */
+        nextcap(cs);  /* skip extra captures (will not need them) */
+      else if (captype(cs->cap) == Csimple)  /* string? */
+        n = getstrcaps(cs, cps, n);  /* put info. into array */
+      else {
+        cps[n].isstring = 0;  /* not a string */
+        cps[n].u.cp = cs->cap;  /* keep original capture */
+        nextcap(cs);
+        n++;
+      }
+    }
+    cs->cap++;  /* skip close */
+  }
+  cps[k].u.s.e = closeaddr(cs->cap - 1);  /* ends here */
+  return n;
+}
+
+
+/*
+** add next capture value (which should be a string) to buffer 'b'
+*/
+static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
+
+
+/*
+** String capture: add result to buffer 'b' (instead of pushing
+** it into the stack)
+*/
+static void stringcap (luaL_Buffer *b, CapState *cs) {
+  StrAux cps[MAXSTRCAPS];
+  int n;
+  size_t len, i;
+  const char *fmt;  /* format string */
+  fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len);
+  n = getstrcaps(cs, cps, 0) - 1;  /* collect nested captures */
+  for (i = 0; i < len; i++) {  /* traverse them */
+    if (fmt[i] != '%')  /* not an escape? */
+      luaL_addchar(b, fmt[i]);  /* add it to buffer */
+    else if (fmt[++i] < '0' || fmt[i] > '9')  /* not followed by a digit? */
+      luaL_addchar(b, fmt[i]);  /* add to buffer */
+    else {
+      int l = fmt[i] - '0';  /* capture index */
+      if (l > n)
+        luaL_error(cs->L, "invalid capture index (%d)", l);
+      else if (cps[l].isstring)
+        luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s);
+      else {
+        Capture *curr = cs->cap;
+        cs->cap = cps[l].u.cp;  /* go back to evaluate that nested capture */
+        if (!addonestring(b, cs, "capture"))
+          luaL_error(cs->L, "no values in capture index %d", l);
+        cs->cap = curr;  /* continue from where it stopped */
+      }
+    }
+  }
+}
+
+
+/*
+** Substitution capture: add result to buffer 'b'
+*/
+static void substcap (luaL_Buffer *b, CapState *cs) {
+  const char *curr = cs->cap->s;
+  if (isfullcap(cs->cap))  /* no nested captures? */
+    luaL_addlstring(b, curr, cs->cap->siz - 1);  /* keep original text */
+  else {
+    cs->cap++;  /* skip open entry */
+    while (!isclosecap(cs->cap)) {  /* traverse nested captures */
+      const char *next = cs->cap->s;
+      luaL_addlstring(b, curr, next - curr);  /* add text up to capture */
+      if (addonestring(b, cs, "replacement"))
+        curr = closeaddr(cs->cap - 1);  /* continue after match */
+      else  /* no capture value */
+        curr = next;  /* keep original text in final result */
+    }
+    luaL_addlstring(b, curr, cs->cap->s - curr);  /* add last piece of text */
+  }
+  cs->cap++;  /* go to next capture */
+}
+
+
+/*
+** Evaluates a capture and adds its first value to buffer 'b'; returns
+** whether there was a value
+*/
+static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
+  switch (captype(cs->cap)) {
+    case Cstring:
+      stringcap(b, cs);  /* add capture directly to buffer */
+      return 1;
+    case Csubst:
+      substcap(b, cs);  /* add capture directly to buffer */
+      return 1;
+    default: {
+      lua_State *L = cs->L;
+      int n = pushcapture(cs);
+      if (n > 0) {
+        if (n > 1) lua_pop(L, n - 1);  /* only one result */
+        if (!lua_isstring(L, -1))
+          luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1));
+        luaL_addvalue(b);
+      }
+      return n;
+    }
+  }
+}
+
+
+#if !defined(MAXRECLEVEL)
+#define MAXRECLEVEL	200
+#endif
+
+
+/*
+** Push all values of the current capture into the stack; returns
+** number of values pushed
+*/
+static int pushcapture (CapState *cs) {
+  lua_State *L = cs->L;
+  int res;
+  luaL_checkstack(L, 4, "too many captures");
+  if (cs->reclevel++ > MAXRECLEVEL)
+    return luaL_error(L, "subcapture nesting too deep");
+  switch (captype(cs->cap)) {
+    case Cposition: {
+      lua_pushinteger(L, cs->cap->s - cs->s + 1);
+      cs->cap++;
+      res = 1;
+      break;
+    }
+    case Cconst: {
+      pushluaval(cs);
+      cs->cap++;
+      res = 1;
+      break;
+    }
+    case Carg: {
+      int arg = (cs->cap++)->idx;
+      if (arg + FIXEDARGS > cs->ptop)
+        return luaL_error(L, "reference to absent extra argument #%d", arg);
+      lua_pushvalue(L, arg + FIXEDARGS);
+      res = 1;
+      break;
+    }
+    case Csimple: {
+      int k = pushnestedvalues(cs, 1);
+      lua_insert(L, -k);  /* make whole match be first result */
+      res = k;
+      break;
+    }
+    case Cruntime: {
+      lua_pushvalue(L, (cs->cap++)->idx);  /* value is in the stack */
+      res = 1;
+      break;
+    }
+    case Cstring: {
+      luaL_Buffer b;
+      luaL_buffinit(L, &b);
+      stringcap(&b, cs);
+      luaL_pushresult(&b);
+      res = 1;
+      break;
+    }
+    case Csubst: {
+      luaL_Buffer b;
+      luaL_buffinit(L, &b);
+      substcap(&b, cs);
+      luaL_pushresult(&b);
+      res = 1;
+      break;
+    }
+    case Cgroup: {
+      if (cs->cap->idx == 0)  /* anonymous group? */
+        res = pushnestedvalues(cs, 0);  /* add all nested values */
+      else {  /* named group: add no values */
+        nextcap(cs);  /* skip capture */
+        res = 0;
+      }
+      break;
+    }
+    case Cbackref: res = backrefcap(cs); break;
+    case Ctable: res = tablecap(cs); break;
+    case Cfunction: res = functioncap(cs); break;
+    case Cnum: res = numcap(cs); break;
+    case Cquery: res = querycap(cs); break;
+    case Cfold: res = foldcap(cs); break;
+    default: assert(0); res = 0;
+  }
+  cs->reclevel--;
+  return res;
+}
+
+
+/*
+** Prepare a CapState structure and traverse the entire list of
+** captures in the stack pushing its results. 's' is the subject
+** string, 'r' is the final position of the match, and 'ptop' 
+** the index in the stack where some useful values were pushed.
+** Returns the number of results pushed. (If the list produces no
+** results, push the final position of the match.)
+*/
+int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
+  Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
+  int n = 0;
+  if (!isclosecap(capture)) {  /* is there any capture? */
+    CapState cs;
+    cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0;
+    cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
+    do {  /* collect their values */
+      n += pushcapture(&cs);
+    } while (!isclosecap(cs.cap));
+  }
+  if (n == 0) {  /* no capture values? */
+    lua_pushinteger(L, r - s + 1);  /* return only end position */
+    n = 1;
+  }
+  return n;
+}
+
+
--- /dev/null
+++ b/lpcap.h
@@ -1,0 +1,57 @@
+/*
+** $Id: lpcap.h $
+*/
+
+#if !defined(lpcap_h)
+#define lpcap_h
+
+
+#include "lptypes.h"
+
+
+/* kinds of captures */
+typedef enum CapKind {
+  Cclose,  /* not used in trees */
+  Cposition,
+  Cconst,  /* ktable[key] is Lua constant */
+  Cbackref,  /* ktable[key] is "name" of group to get capture */
+  Carg,  /* 'key' is arg's number */
+  Csimple,  /* next node is pattern */
+  Ctable,  /* next node is pattern */
+  Cfunction,  /* ktable[key] is function; next node is pattern */
+  Cquery,  /* ktable[key] is table; next node is pattern */
+  Cstring,  /* ktable[key] is string; next node is pattern */
+  Cnum,  /* numbered capture; 'key' is number of value to return */
+  Csubst,  /* substitution capture; next node is pattern */
+  Cfold,  /* ktable[key] is function; next node is pattern */
+  Cruntime,  /* not used in trees (is uses another type for tree) */
+  Cgroup  /* ktable[key] is group's "name" */
+} CapKind;
+
+
+typedef struct Capture {
+  const char *s;  /* subject position */
+  unsigned short idx;  /* extra info (group name, arg index, etc.) */
+  byte kind;  /* kind of capture */
+  byte siz;  /* size of full capture + 1 (0 = not a full capture) */
+} Capture;
+
+
+typedef struct CapState {
+  Capture *cap;  /* current capture */
+  Capture *ocap;  /* (original) capture list */
+  lua_State *L;
+  int ptop;  /* index of last argument to 'match' */
+  const char *s;  /* original string */
+  int valuecached;  /* value stored in cache slot */
+  int reclevel;  /* recursion level */
+} CapState;
+
+
+int runtimecap (CapState *cs, Capture *close, const char *s, int *rem);
+int getcaptures (lua_State *L, const char *s, const char *r, int ptop);
+int finddyncap (Capture *cap, Capture *last);
+
+#endif
+
+
--- /dev/null
+++ b/lpcode.c
@@ -1,0 +1,1014 @@
+/*
+** $Id: lpcode.c $
+** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
+*/
+
+#include <limits.h>
+
+
+#include "lua.h"
+#include "lauxlib.h"
+
+#include "lptypes.h"
+#include "lpcode.h"
+
+
+/* signals a "no-instruction */
+#define NOINST		-1
+
+
+
+static const Charset fullset_ =
+  {{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}};
+
+static const Charset *fullset = &fullset_;
+
+/*
+** {======================================================
+** Analysis and some optimizations
+** =======================================================
+*/
+
+/*
+** Check whether a charset is empty (returns IFail), singleton (IChar),
+** full (IAny), or none of those (ISet). When singleton, '*c' returns
+** which character it is. (When generic set, the set was the input,
+** so there is no need to return it.)
+*/
+static Opcode charsettype (const byte *cs, int *c) {
+  int count = 0;  /* number of characters in the set */
+  int i;
+  int candidate = -1;  /* candidate position for the singleton char */
+  for (i = 0; i < CHARSETSIZE; i++) {  /* for each byte */
+    int b = cs[i];
+    if (b == 0) {  /* is byte empty? */
+      if (count > 1)  /* was set neither empty nor singleton? */
+        return ISet;  /* neither full nor empty nor singleton */
+      /* else set is still empty or singleton */
+    }
+    else if (b == 0xFF) {  /* is byte full? */
+      if (count < (i * BITSPERCHAR))  /* was set not full? */
+        return ISet;  /* neither full nor empty nor singleton */
+      else count += BITSPERCHAR;  /* set is still full */
+    }
+    else if ((b & (b - 1)) == 0) {  /* has byte only one bit? */
+      if (count > 0)  /* was set not empty? */
+        return ISet;  /* neither full nor empty nor singleton */
+      else {  /* set has only one char till now; track it */
+        count++;
+        candidate = i;
+      }
+    }
+    else return ISet;  /* byte is neither empty, full, nor singleton */
+  }
+  switch (count) {
+    case 0: return IFail;  /* empty set */
+    case 1: {  /* singleton; find character bit inside byte */
+      int b = cs[candidate];
+      *c = candidate * BITSPERCHAR;
+      if ((b & 0xF0) != 0) { *c += 4; b >>= 4; }
+      if ((b & 0x0C) != 0) { *c += 2; b >>= 2; }
+      if ((b & 0x02) != 0) { *c += 1; }
+      return IChar;
+    }
+    default: {
+       assert(count == CHARSETSIZE * BITSPERCHAR);  /* full set */
+       return IAny;
+    }
+  }
+}
+
+
+/*
+** A few basic operations on Charsets
+*/
+static void cs_complement (Charset *cs) {
+  loopset(i, cs->cs[i] = ~cs->cs[i]);
+}
+
+static int cs_equal (const byte *cs1, const byte *cs2) {
+  loopset(i, if (cs1[i] != cs2[i]) return 0);
+  return 1;
+}
+
+static int cs_disjoint (const Charset *cs1, const Charset *cs2) {
+  loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;)
+  return 1;
+}
+
+
+/*
+** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a
+** charset and return 1; else return 0.
+*/
+int tocharset (TTree *tree, Charset *cs) {
+  switch (tree->tag) {
+    case TSet: {  /* copy set */
+      loopset(i, cs->cs[i] = treebuffer(tree)[i]);
+      return 1;
+    }
+    case TChar: {  /* only one char */
+      assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX);
+      loopset(i, cs->cs[i] = 0);  /* erase all chars */
+      setchar(cs->cs, tree->u.n);  /* add that one */
+      return 1;
+    }
+    case TAny: {
+      loopset(i, cs->cs[i] = 0xFF);  /* add all characters to the set */
+      return 1;
+    }
+    default: return 0;
+  }
+}
+
+
+/*
+** Visit a TCall node taking care to stop recursion. If node not yet
+** visited, return 'f(sib2(tree))', otherwise return 'def' (default
+** value)
+*/
+static int callrecursive (TTree *tree, int f (TTree *t), int def) {
+  int key = tree->key;
+  assert(tree->tag == TCall);
+  assert(sib2(tree)->tag == TRule);
+  if (key == 0)  /* node already visited? */
+    return def;  /* return default value */
+  else {  /* first visit */
+    int result;
+    tree->key = 0;  /* mark call as already visited */
+    result = f(sib2(tree));  /* go to called rule */
+    tree->key = key;  /* restore tree */
+    return result;
+  }
+}
+
+
+/*
+** Check whether a pattern tree has captures
+*/
+int hascaptures (TTree *tree) {
+ tailcall:
+  switch (tree->tag) {
+    case TCapture: case TRunTime:
+      return 1;
+    case TCall:
+      return callrecursive(tree, hascaptures, 0);
+    case TRule:  /* do not follow siblings */
+      tree = sib1(tree); goto tailcall;
+    case TOpenCall: assert(0);
+    default: {
+      switch (numsiblings[tree->tag]) {
+        case 1:  /* return hascaptures(sib1(tree)); */
+          tree = sib1(tree); goto tailcall;
+        case 2:
+          if (hascaptures(sib1(tree)))
+            return 1;
+          /* else return hascaptures(sib2(tree)); */
+          tree = sib2(tree); goto tailcall;
+        default: assert(numsiblings[tree->tag] == 0); return 0;
+      }
+    }
+  }
+}
+
+
+/*
+** Checks how a pattern behaves regarding the empty string,
+** in one of two different ways:
+** A pattern is *nullable* if it can match without consuming any character;
+** A pattern is *nofail* if it never fails for any string
+** (including the empty string).
+** The difference is only for predicates and run-time captures;
+** for other patterns, the two properties are equivalent.
+** (With predicates, &'a' is nullable but not nofail. Of course,
+** nofail => nullable.)
+** These functions are all convervative in the following way:
+**    p is nullable => nullable(p)
+**    nofail(p) => p cannot fail
+** The function assumes that TOpenCall is not nullable;
+** this will be checked again when the grammar is fixed.
+** Run-time captures can do whatever they want, so the result
+** is conservative.
+*/
+int checkaux (TTree *tree, int pred) {
+ tailcall:
+  switch (tree->tag) {
+    case TChar: case TSet: case TAny:
+    case TFalse: case TOpenCall:
+      return 0;  /* not nullable */
+    case TRep: case TTrue:
+      return 1;  /* no fail */
+    case TNot: case TBehind:  /* can match empty, but can fail */
+      if (pred == PEnofail) return 0;
+      else return 1;  /* PEnullable */
+    case TAnd:  /* can match empty; fail iff body does */
+      if (pred == PEnullable) return 1;
+      /* else return checkaux(sib1(tree), pred); */
+      tree = sib1(tree); goto tailcall;
+    case TRunTime:  /* can fail; match empty iff body does */
+      if (pred == PEnofail) return 0;
+      /* else return checkaux(sib1(tree), pred); */
+      tree = sib1(tree); goto tailcall;
+    case TSeq:
+      if (!checkaux(sib1(tree), pred)) return 0;
+      /* else return checkaux(sib2(tree), pred); */
+      tree = sib2(tree); goto tailcall;
+    case TChoice:
+      if (checkaux(sib2(tree), pred)) return 1;
+      /* else return checkaux(sib1(tree), pred); */
+      tree = sib1(tree); goto tailcall;
+    case TCapture: case TGrammar: case TRule:
+      /* return checkaux(sib1(tree), pred); */
+      tree = sib1(tree); goto tailcall;
+    case TCall:  /* return checkaux(sib2(tree), pred); */
+      tree = sib2(tree); goto tailcall;
+    default: assert(0); return 0;
+  }
+}
+
+
+/*
+** number of characters to match a pattern (or -1 if variable)
+*/
+int fixedlen (TTree *tree) {
+  int len = 0;  /* to accumulate in tail calls */
+ tailcall:
+  switch (tree->tag) {
+    case TChar: case TSet: case TAny:
+      return len + 1;
+    case TFalse: case TTrue: case TNot: case TAnd: case TBehind:
+      return len;
+    case TRep: case TRunTime: case TOpenCall:
+      return -1;
+    case TCapture: case TRule: case TGrammar:
+      /* return fixedlen(sib1(tree)); */
+      tree = sib1(tree); goto tailcall;
+    case TCall: {
+      int n1 = callrecursive(tree, fixedlen, -1);
+      if (n1 < 0)
+        return -1;
+      else
+        return len + n1;
+    }
+    case TSeq: {
+      int n1 = fixedlen(sib1(tree));
+      if (n1 < 0)
+        return -1;
+      /* else return fixedlen(sib2(tree)) + len; */
+      len += n1; tree = sib2(tree); goto tailcall;
+    }
+    case TChoice: {
+      int n1 = fixedlen(sib1(tree));
+      int n2 = fixedlen(sib2(tree));
+      if (n1 != n2 || n1 < 0)
+        return -1;
+      else
+        return len + n1;
+    }
+    default: assert(0); return 0;
+  };
+}
+
+
+/*
+** Computes the 'first set' of a pattern.
+** The result is a conservative aproximation:
+**   match p ax -> x (for some x) ==> a belongs to first(p)
+** or
+**   a not in first(p) ==> match p ax -> fail (for all x)
+**
+** The set 'follow' is the first set of what follows the
+** pattern (full set if nothing follows it).
+**
+** The function returns 0 when this resulting set can be used for
+** test instructions that avoid the pattern altogether.
+** A non-zero return can happen for two reasons:
+** 1) match p '' -> ''            ==> return has bit 1 set
+** (tests cannot be used because they would always fail for an empty input);
+** 2) there is a match-time capture ==> return has bit 2 set
+** (optimizations should not bypass match-time captures).
+*/
+static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
+ tailcall:
+  switch (tree->tag) {
+    case TChar: case TSet: case TAny: {
+      tocharset(tree, firstset);
+      return 0;
+    }
+    case TTrue: {
+      loopset(i, firstset->cs[i] = follow->cs[i]);
+      return 1;  /* accepts the empty string */
+    }
+    case TFalse: {
+      loopset(i, firstset->cs[i] = 0);
+      return 0;
+    }
+    case TChoice: {
+      Charset csaux;
+      int e1 = getfirst(sib1(tree), follow, firstset);
+      int e2 = getfirst(sib2(tree), follow, &csaux);
+      loopset(i, firstset->cs[i] |= csaux.cs[i]);
+      return e1 | e2;
+    }
+    case TSeq: {
+      if (!nullable(sib1(tree))) {
+        /* when p1 is not nullable, p2 has nothing to contribute;
+           return getfirst(sib1(tree), fullset, firstset); */
+        tree = sib1(tree); follow = fullset; goto tailcall;
+      }
+      else {  /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */
+        Charset csaux;
+        int e2 = getfirst(sib2(tree), follow, &csaux);
+        int e1 = getfirst(sib1(tree), &csaux, firstset);
+        if (e1 == 0) return 0;  /* 'e1' ensures that first can be used */
+        else if ((e1 | e2) & 2)  /* one of the children has a matchtime? */
+          return 2;  /* pattern has a matchtime capture */
+        else return e2;  /* else depends on 'e2' */
+      }
+    }
+    case TRep: {
+      getfirst(sib1(tree), follow, firstset);
+      loopset(i, firstset->cs[i] |= follow->cs[i]);
+      return 1;  /* accept the empty string */
+    }
+    case TCapture: case TGrammar: case TRule: {
+      /* return getfirst(sib1(tree), follow, firstset); */
+      tree = sib1(tree); goto tailcall;
+    }
+    case TRunTime: {  /* function invalidates any follow info. */
+      int e = getfirst(sib1(tree), fullset, firstset);
+      if (e) return 2;  /* function is not "protected"? */
+      else return 0;  /* pattern inside capture ensures first can be used */
+    }
+    case TCall: {
+      /* return getfirst(sib2(tree), follow, firstset); */
+      tree = sib2(tree); goto tailcall;
+    }
+    case TAnd: {
+      int e = getfirst(sib1(tree), follow, firstset);
+      loopset(i, firstset->cs[i] &= follow->cs[i]);
+      return e;
+    }
+    case TNot: {
+      if (tocharset(sib1(tree), firstset)) {
+        cs_complement(firstset);
+        return 1;
+      }
+      /* else go through */
+    }
+    case TBehind: {  /* instruction gives no new information */
+      /* call 'getfirst' only to check for math-time captures */
+      int e = getfirst(sib1(tree), follow, firstset);
+      loopset(i, firstset->cs[i] = follow->cs[i]);  /* uses follow */
+      return e | 1;  /* always can accept the empty string */
+    }
+    default: assert(0); return 0;
+  }
+}
+
+
+/*
+** If 'headfail(tree)' true, then 'tree' can fail only depending on the
+** next character of the subject.
+*/
+static int headfail (TTree *tree) {
+ tailcall:
+  switch (tree->tag) {
+    case TChar: case TSet: case TAny: case TFalse:
+      return 1;
+    case TTrue: case TRep: case TRunTime: case TNot:
+    case TBehind:
+      return 0;
+    case TCapture: case TGrammar: case TRule: case TAnd:
+      tree = sib1(tree); goto tailcall;  /* return headfail(sib1(tree)); */
+    case TCall:
+      tree = sib2(tree); goto tailcall;  /* return headfail(sib2(tree)); */
+    case TSeq:
+      if (!nofail(sib2(tree))) return 0;
+      /* else return headfail(sib1(tree)); */
+      tree = sib1(tree); goto tailcall;
+    case TChoice:
+      if (!headfail(sib1(tree))) return 0;
+      /* else return headfail(sib2(tree)); */
+      tree = sib2(tree); goto tailcall;
+    default: assert(0); return 0;
+  }
+}
+
+
+/*
+** Check whether the code generation for the given tree can benefit
+** from a follow set (to avoid computing the follow set when it is
+** not needed)
+*/
+static int needfollow (TTree *tree) {
+ tailcall:
+  switch (tree->tag) {
+    case TChar: case TSet: case TAny:
+    case TFalse: case TTrue: case TAnd: case TNot:
+    case TRunTime: case TGrammar: case TCall: case TBehind:
+      return 0;
+    case TChoice: case TRep:
+      return 1;
+    case TCapture:
+      tree = sib1(tree); goto tailcall;
+    case TSeq:
+      tree = sib2(tree); goto tailcall;
+    default: assert(0); return 0;
+  } 
+}
+
+/* }====================================================== */
+
+
+
+/*
+** {======================================================
+** Code generation
+** =======================================================
+*/
+
+
+/*
+** size of an instruction
+*/
+int sizei (const Instruction *i) {
+  switch((Opcode)i->i.code) {
+    case ISet: case ISpan: return CHARSETINSTSIZE;
+    case ITestSet: return CHARSETINSTSIZE + 1;
+    case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall:
+    case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit:
+      return 2;
+    default: return 1;
+  }
+}
+
+
+/*
+** state for the compiler
+*/
+typedef struct CompileState {
+  Pattern *p;  /* pattern being compiled */
+  int ncode;  /* next position in p->code to be filled */
+  lua_State *L;
+} CompileState;
+
+
+/*
+** code generation is recursive; 'opt' indicates that the code is being
+** generated as the last thing inside an optional pattern (so, if that
+** code is optional too, it can reuse the 'IChoice' already in place for
+** the outer pattern). 'tt' points to a previous test protecting this
+** code (or NOINST). 'fl' is the follow set of the pattern.
+*/
+static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
+                     const Charset *fl);
+
+
+void realloccode (lua_State *L, Pattern *p, int nsize) {
+  void *ud;
+  lua_Alloc f = lua_getallocf(L, &ud);
+  void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction),
+                                  nsize * sizeof(Instruction));
+  if (newblock == NULL && nsize > 0)
+    luaL_error(L, "not enough memory");
+  p->code = (Instruction *)newblock;
+  p->codesize = nsize;
+}
+
+
+static int nextinstruction (CompileState *compst) {
+  int size = compst->p->codesize;
+  if (compst->ncode >= size)
+    realloccode(compst->L, compst->p, size * 2);
+  return compst->ncode++;
+}
+
+
+#define getinstr(cs,i)		((cs)->p->code[i])
+
+
+static int addinstruction (CompileState *compst, Opcode op, int aux) {
+  int i = nextinstruction(compst);
+  getinstr(compst, i).i.code = op;
+  getinstr(compst, i).i.aux = aux;
+  return i;
+}
+
+
+/*
+** Add an instruction followed by space for an offset (to be set later)
+*/
+static int addoffsetinst (CompileState *compst, Opcode op) {
+  int i = addinstruction(compst, op, 0);  /* instruction */
+  addinstruction(compst, (Opcode)0, 0);  /* open space for offset */
+  assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2);
+  return i;
+}
+
+
+/*
+** Set the offset of an instruction
+*/
+static void setoffset (CompileState *compst, int instruction, int offset) {
+  getinstr(compst, instruction + 1).offset = offset;
+}
+
+
+/*
+** Add a capture instruction:
+** 'op' is the capture instruction; 'cap' the capture kind;
+** 'key' the key into ktable; 'aux' is the optional capture offset
+**
+*/
+static int addinstcap (CompileState *compst, Opcode op, int cap, int key,
+                       int aux) {
+  int i = addinstruction(compst, op, joinkindoff(cap, aux));
+  getinstr(compst, i).i.key = key;
+  return i;
+}
+
+
+#define gethere(compst) 	((compst)->ncode)
+
+#define target(code,i)		((i) + code[i + 1].offset)
+
+
+/*
+** Patch 'instruction' to jump to 'target'
+*/
+static void jumptothere (CompileState *compst, int instruction, int target) {
+  if (instruction >= 0)
+    setoffset(compst, instruction, target - instruction);
+}
+
+
+/*
+** Patch 'instruction' to jump to current position
+*/
+static void jumptohere (CompileState *compst, int instruction) {
+  jumptothere(compst, instruction, gethere(compst));
+}
+
+
+/*
+** Code an IChar instruction, or IAny if there is an equivalent
+** test dominating it
+*/
+static void codechar (CompileState *compst, int c, int tt) {
+  if (tt >= 0 && getinstr(compst, tt).i.code == ITestChar &&
+                 getinstr(compst, tt).i.aux == c)
+    addinstruction(compst, IAny, 0);
+  else
+    addinstruction(compst, IChar, c);
+}
+
+
+/*
+** Add a charset posfix to an instruction
+*/
+static void addcharset (CompileState *compst, const byte *cs) {
+  int p = gethere(compst);
+  int i;
+  for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++)
+    nextinstruction(compst);  /* space for buffer */
+  /* fill buffer with charset */
+  loopset(j, getinstr(compst, p).buff[j] = cs[j]);
+}
+
+
+/*
+** code a char set, optimizing unit sets for IChar, "complete"
+** sets for IAny, and empty sets for IFail; also use an IAny
+** when instruction is dominated by an equivalent test.
+*/
+static void codecharset (CompileState *compst, const byte *cs, int tt) {
+  int c = 0;  /* (=) to avoid warnings */
+  Opcode op = charsettype(cs, &c);
+  switch (op) {
+    case IChar: codechar(compst, c, tt); break;
+    case ISet: {  /* non-trivial set? */
+      if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet &&
+          cs_equal(cs, getinstr(compst, tt + 2).buff))
+        addinstruction(compst, IAny, 0);
+      else {
+        addinstruction(compst, ISet, 0);
+        addcharset(compst, cs);
+      }
+      break;
+    }
+    default: addinstruction(compst, op, c); break;
+  }
+}
+
+
+/*
+** code a test set, optimizing unit sets for ITestChar, "complete"
+** sets for ITestAny, and empty sets for IJmp (always fails).
+** 'e' is true iff test should accept the empty string. (Test
+** instructions in the current VM never accept the empty string.)
+*/
+static int codetestset (CompileState *compst, Charset *cs, int e) {
+  if (e) return NOINST;  /* no test */
+  else {
+    int c = 0;
+    Opcode op = charsettype(cs->cs, &c);
+    switch (op) {
+      case IFail: return addoffsetinst(compst, IJmp);  /* always jump */
+      case IAny: return addoffsetinst(compst, ITestAny);
+      case IChar: {
+        int i = addoffsetinst(compst, ITestChar);
+        getinstr(compst, i).i.aux = c;
+        return i;
+      }
+      case ISet: {
+        int i = addoffsetinst(compst, ITestSet);
+        addcharset(compst, cs->cs);
+        return i;
+      }
+      default: assert(0); return 0;
+    }
+  }
+}
+
+
+/*
+** Find the final destination of a sequence of jumps
+*/
+static int finaltarget (Instruction *code, int i) {
+  while (code[i].i.code == IJmp)
+    i = target(code, i);
+  return i;
+}
+
+
+/*
+** final label (after traversing any jumps)
+*/
+static int finallabel (Instruction *code, int i) {
+  return finaltarget(code, target(code, i));
+}
+
+
+/*
+** <behind(p)> == behind n; <p>   (where n = fixedlen(p))
+*/
+static void codebehind (CompileState *compst, TTree *tree) {
+  if (tree->u.n > 0)
+    addinstruction(compst, IBehind, tree->u.n);
+  codegen(compst, sib1(tree), 0, NOINST, fullset);
+}
+
+
+/*
+** Choice; optimizations:
+** - when p1 is headfail or
+** when first(p1) and first(p2) are disjoint, than
+** a character not in first(p1) cannot go to p1, and a character
+** in first(p1) cannot go to p2 (at it is not in first(p2)).
+** (The optimization is not valid if p1 accepts the empty string,
+** as then there is no character at all...)
+** - when p2 is empty and opt is true; a IPartialCommit can reuse
+** the Choice already active in the stack.
+*/
+static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
+                        const Charset *fl) {
+  int emptyp2 = (p2->tag == TTrue);
+  Charset cs1, cs2;
+  int e1 = getfirst(p1, fullset, &cs1);
+  if (headfail(p1) ||
+      (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) {
+    /* <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */
+    int test = codetestset(compst, &cs1, 0);
+    int jmp = NOINST;
+    codegen(compst, p1, 0, test, fl);
+    if (!emptyp2)
+      jmp = addoffsetinst(compst, IJmp); 
+    jumptohere(compst, test);
+    codegen(compst, p2, opt, NOINST, fl);
+    jumptohere(compst, jmp);
+  }
+  else if (opt && emptyp2) {
+    /* p1? == IPartialCommit; p1 */
+    jumptohere(compst, addoffsetinst(compst, IPartialCommit));
+    codegen(compst, p1, 1, NOINST, fullset);
+  }
+  else {
+    /* <p1 / p2> == 
+        test(first(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */
+    int pcommit;
+    int test = codetestset(compst, &cs1, e1);
+    int pchoice = addoffsetinst(compst, IChoice);
+    codegen(compst, p1, emptyp2, test, fullset);
+    pcommit = addoffsetinst(compst, ICommit);
+    jumptohere(compst, pchoice);
+    jumptohere(compst, test);
+    codegen(compst, p2, opt, NOINST, fl);
+    jumptohere(compst, pcommit);
+  }
+}
+
+
+/*
+** And predicate
+** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
+** (valid only when 'p' has no captures)
+*/
+static void codeand (CompileState *compst, TTree *tree, int tt) {
+  int n = fixedlen(tree);
+  if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) {
+    codegen(compst, tree, 0, tt, fullset);
+    if (n > 0)
+      addinstruction(compst, IBehind, n);
+  }
+  else {  /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */
+    int pcommit;
+    int pchoice = addoffsetinst(compst, IChoice);
+    codegen(compst, tree, 0, tt, fullset);
+    pcommit = addoffsetinst(compst, IBackCommit);
+    jumptohere(compst, pchoice);
+    addinstruction(compst, IFail, 0);
+    jumptohere(compst, pcommit);
+  }
+}
+
+
+/*
+** Captures: if pattern has fixed (and not too big) length, and it
+** has no nested captures, use a single IFullCapture instruction
+** after the match; otherwise, enclose the pattern with OpenCapture -
+** CloseCapture.
+*/
+static void codecapture (CompileState *compst, TTree *tree, int tt,
+                         const Charset *fl) {
+  int len = fixedlen(sib1(tree));
+  if (len >= 0 && len <= MAXOFF && !hascaptures(sib1(tree))) {
+    codegen(compst, sib1(tree), 0, tt, fl);
+    addinstcap(compst, IFullCapture, tree->cap, tree->key, len);
+  }
+  else {
+    addinstcap(compst, IOpenCapture, tree->cap, tree->key, 0);
+    codegen(compst, sib1(tree), 0, tt, fl);
+    addinstcap(compst, ICloseCapture, Cclose, 0, 0);
+  }
+}
+
+
+static void coderuntime (CompileState *compst, TTree *tree, int tt) {
+  addinstcap(compst, IOpenCapture, Cgroup, tree->key, 0);
+  codegen(compst, sib1(tree), 0, tt, fullset);
+  addinstcap(compst, ICloseRunTime, Cclose, 0, 0);
+}
+
+
+/*
+** Repetion; optimizations:
+** When pattern is a charset, can use special instruction ISpan.
+** When pattern is head fail, or if it starts with characters that
+** are disjoint from what follows the repetions, a simple test
+** is enough (a fail inside the repetition would backtrack to fail
+** again in the following pattern, so there is no need for a choice).
+** When 'opt' is true, the repetion can reuse the Choice already
+** active in the stack.
+*/
+static void coderep (CompileState *compst, TTree *tree, int opt,
+                     const Charset *fl) {
+  Charset st;
+  if (tocharset(tree, &st)) {
+    addinstruction(compst, ISpan, 0);
+    addcharset(compst, st.cs);
+  }
+  else {
+    int e1 = getfirst(tree, fullset, &st);
+    if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) {
+      /* L1: test (fail(p1)) -> L2; <p>; jmp L1; L2: */
+      int jmp;
+      int test = codetestset(compst, &st, 0);
+      codegen(compst, tree, 0, test, fullset);
+      jmp = addoffsetinst(compst, IJmp);
+      jumptohere(compst, test);
+      jumptothere(compst, jmp, test);
+    }
+    else {
+      /* test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2: */
+      /* or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1; */
+      int commit, l2;
+      int test = codetestset(compst, &st, e1);
+      int pchoice = NOINST;
+      if (opt)
+        jumptohere(compst, addoffsetinst(compst, IPartialCommit));
+      else
+        pchoice = addoffsetinst(compst, IChoice);
+      l2 = gethere(compst);
+      codegen(compst, tree, 0, NOINST, fullset);
+      commit = addoffsetinst(compst, IPartialCommit);
+      jumptothere(compst, commit, l2);
+      jumptohere(compst, pchoice);
+      jumptohere(compst, test);
+    }
+  }
+}
+
+
+/*
+** Not predicate; optimizations:
+** In any case, if first test fails, 'not' succeeds, so it can jump to
+** the end. If pattern is headfail, that is all (it cannot fail
+** in other parts); this case includes 'not' of simple sets. Otherwise,
+** use the default code (a choice plus a failtwice).
+*/
+static void codenot (CompileState *compst, TTree *tree) {
+  Charset st;
+  int e = getfirst(tree, fullset, &st);
+  int test = codetestset(compst, &st, e);
+  if (headfail(tree))  /* test (fail(p1)) -> L1; fail; L1:  */
+    addinstruction(compst, IFail, 0);
+  else {
+    /* test(fail(p))-> L1; choice L1; <p>; failtwice; L1:  */
+    int pchoice = addoffsetinst(compst, IChoice);
+    codegen(compst, tree, 0, NOINST, fullset);
+    addinstruction(compst, IFailTwice, 0);
+    jumptohere(compst, pchoice);
+  }
+  jumptohere(compst, test);
+}
+
+
+/*
+** change open calls to calls, using list 'positions' to find
+** correct offsets; also optimize tail calls
+*/
+static void correctcalls (CompileState *compst, int *positions,
+                          int from, int to) {
+  int i;
+  Instruction *code = compst->p->code;
+  for (i = from; i < to; i += sizei(&code[i])) {
+    if (code[i].i.code == IOpenCall) {
+      int n = code[i].i.key;  /* rule number */
+      int rule = positions[n];  /* rule position */
+      assert(rule == from || code[rule - 1].i.code == IRet);
+      if (code[finaltarget(code, i + 2)].i.code == IRet)  /* call; ret ? */
+        code[i].i.code = IJmp;  /* tail call */
+      else
+        code[i].i.code = ICall;
+      jumptothere(compst, i, rule);  /* call jumps to respective rule */
+    }
+  }
+  assert(i == to);
+}
+
+
+/*
+** Code for a grammar:
+** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
+*/
+static void codegrammar (CompileState *compst, TTree *grammar) {
+  int positions[MAXRULES];
+  int rulenumber = 0;
+  TTree *rule;
+  int firstcall = addoffsetinst(compst, ICall);  /* call initial rule */
+  int jumptoend = addoffsetinst(compst, IJmp);  /* jump to the end */
+  int start = gethere(compst);  /* here starts the initial rule */
+  jumptohere(compst, firstcall);
+  for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
+    positions[rulenumber++] = gethere(compst);  /* save rule position */
+    codegen(compst, sib1(rule), 0, NOINST, fullset);  /* code rule */
+    addinstruction(compst, IRet, 0);
+  }
+  assert(rule->tag == TTrue);
+  jumptohere(compst, jumptoend);
+  correctcalls(compst, positions, start, gethere(compst));
+}
+
+
+static void codecall (CompileState *compst, TTree *call) {
+  int c = addoffsetinst(compst, IOpenCall);  /* to be corrected later */
+  getinstr(compst, c).i.key = sib2(call)->cap;  /* rule number */
+  assert(sib2(call)->tag == TRule);
+}
+
+
+/*
+** Code first child of a sequence
+** (second child is called in-place to allow tail call)
+** Return 'tt' for second child
+*/
+static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2,
+                     int tt, const Charset *fl) {
+  if (needfollow(p1)) {
+    Charset fl1;
+    getfirst(p2, fl, &fl1);  /* p1 follow is p2 first */
+    codegen(compst, p1, 0, tt, &fl1);
+  }
+  else  /* use 'fullset' as follow */
+    codegen(compst, p1, 0, tt, fullset);
+  if (fixedlen(p1) != 0)  /* can 'p1' consume anything? */
+    return  NOINST;  /* invalidate test */
+  else return tt;  /* else 'tt' still protects sib2 */
+}
+
+
+/*
+** Main code-generation function: dispatch to auxiliar functions
+** according to kind of tree. ('needfollow' should return true
+** only for consructions that use 'fl'.)
+*/
+static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
+                     const Charset *fl) {
+ tailcall:
+  switch (tree->tag) {
+    case TChar: codechar(compst, tree->u.n, tt); break;
+    case TAny: addinstruction(compst, IAny, 0); break;
+    case TSet: codecharset(compst, treebuffer(tree), tt); break;
+    case TTrue: break;
+    case TFalse: addinstruction(compst, IFail, 0); break;
+    case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break;
+    case TRep: coderep(compst, sib1(tree), opt, fl); break;
+    case TBehind: codebehind(compst, tree); break;
+    case TNot: codenot(compst, sib1(tree)); break;
+    case TAnd: codeand(compst, sib1(tree), tt); break;
+    case TCapture: codecapture(compst, tree, tt, fl); break;
+    case TRunTime: coderuntime(compst, tree, tt); break;
+    case TGrammar: codegrammar(compst, tree); break;
+    case TCall: codecall(compst, tree); break;
+    case TSeq: {
+      tt = codeseq1(compst, sib1(tree), sib2(tree), tt, fl);  /* code 'p1' */
+      /* codegen(compst, p2, opt, tt, fl); */
+      tree = sib2(tree); goto tailcall;
+    }
+    default: assert(0);
+  }
+}
+
+
+/*
+** Optimize jumps and other jump-like instructions.
+** * Update labels of instructions with labels to their final
+** destinations (e.g., choice L1; ... L1: jmp L2: becomes
+** choice L2)
+** * Jumps to other instructions that do jumps become those
+** instructions (e.g., jump to return becomes a return; jump
+** to commit becomes a commit)
+*/
+static void peephole (CompileState *compst) {
+  Instruction *code = compst->p->code;
+  int i;
+  for (i = 0; i < compst->ncode; i += sizei(&code[i])) {
+   redo:
+    switch (code[i].i.code) {
+      case IChoice: case ICall: case ICommit: case IPartialCommit:
+      case IBackCommit: case ITestChar: case ITestSet:
+      case ITestAny: {  /* instructions with labels */
+        jumptothere(compst, i, finallabel(code, i));  /* optimize label */
+        break;
+      }
+      case IJmp: {
+        int ft = finaltarget(code, i);
+        switch (code[ft].i.code) {  /* jumping to what? */
+          case IRet: case IFail: case IFailTwice:
+          case IEnd: {  /* instructions with unconditional implicit jumps */
+            code[i] = code[ft];  /* jump becomes that instruction */
+            code[i + 1].i.code = IAny;  /* 'no-op' for target position */
+            break;
+          }
+          case ICommit: case IPartialCommit:
+          case IBackCommit: {  /* inst. with unconditional explicit jumps */
+            int fft = finallabel(code, ft);
+            code[i] = code[ft];  /* jump becomes that instruction... */
+            jumptothere(compst, i, fft);  /* but must correct its offset */
+            goto redo;  /* reoptimize its label */
+          }
+          default: {
+            jumptothere(compst, i, ft);  /* optimize label */
+            break;
+          }
+        }
+        break;
+      }
+      default: break;
+    }
+  }
+  assert(code[i - 1].i.code == IEnd);
+}
+
+
+/*
+** Compile a pattern
+*/
+Instruction *compile (lua_State *L, Pattern *p) {
+  CompileState compst;
+  compst.p = p;  compst.ncode = 0;  compst.L = L;
+  realloccode(L, p, 2);  /* minimum initial size */
+  codegen(&compst, p->tree, 0, NOINST, fullset);
+  addinstruction(&compst, IEnd, 0);
+  realloccode(L, p, compst.ncode);  /* set final size */
+  peephole(&compst);
+  return p->code;
+}
+
+
+/* }====================================================== */
+
--- /dev/null
+++ b/lpcode.h
@@ -1,0 +1,40 @@
+/*
+** $Id: lpcode.h $
+*/
+
+#if !defined(lpcode_h)
+#define lpcode_h
+
+#include "lua.h"
+
+#include "lptypes.h"
+#include "lptree.h"
+#include "lpvm.h"
+
+int tocharset (TTree *tree, Charset *cs);
+int checkaux (TTree *tree, int pred);
+int fixedlen (TTree *tree);
+int hascaptures (TTree *tree);
+int lp_gc (lua_State *L);
+Instruction *compile (lua_State *L, Pattern *p);
+void realloccode (lua_State *L, Pattern *p, int nsize);
+int sizei (const Instruction *i);
+
+
+#define PEnullable      0
+#define PEnofail        1
+
+/*
+** nofail(t) implies that 't' cannot fail with any input
+*/
+#define nofail(t)	checkaux(t, PEnofail)
+
+/*
+** (not nullable(t)) implies 't' cannot match without consuming
+** something
+*/
+#define nullable(t)	checkaux(t, PEnullable)
+
+
+
+#endif
binary files /dev/null b/lpeg-128.gif differ
--- /dev/null
+++ b/lpeg.html
@@ -1,0 +1,1439 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+    <title>LPeg - Parsing Expression Grammars For Lua</title>
+    <link rel="stylesheet"
+          href="http://www.inf.puc-rio.br/~roberto/lpeg/doc.css"
+          type="text/css"/>
+	<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+</head>
+<body>
+
+<!-- $Id: lpeg.html $ -->
+
+<div id="container">
+	
+<div id="product">
+  <div id="product_logo">
+    <a href="http://www.inf.puc-rio.br/~roberto/lpeg/">
+    <img alt="LPeg logo" src="lpeg-128.gif"/></a>
+    
+  </div>
+  <div id="product_name"><big><strong>LPeg</strong></big></div>
+  <div id="product_description">
+     Parsing Expression Grammars For Lua, version 1.0
+  </div>
+</div> <!-- id="product" -->
+
+<div id="main">
+	
+<div id="navigation">
+<h1>LPeg</h1>
+
+<ul>
+  <li><strong>Home</strong>
+  <ul>
+    <li><a href="#intro">Introduction</a></li>
+    <li><a href="#func">Functions</a></li>
+    <li><a href="#basic">Basic Constructions</a></li>
+    <li><a href="#grammar">Grammars</a></li>
+    <li><a href="#captures">Captures</a></li>
+    <li><a href="#ex">Some Examples</a></li>
+    <li><a href="re.html">The <code>re</code> Module</a></li>
+    <li><a href="#download">Download</a></li>
+    <li><a href="#license">License</a></li>
+  </ul>
+  </li>
+</ul>
+</div> <!-- id="navigation" -->
+
+<div id="content">
+
+
+<h2><a name="intro">Introduction</a></h2>
+
+<p>
+<em>LPeg</em> is a new pattern-matching library for Lua,
+based on
+<a href="http://pdos.csail.mit.edu/%7Ebaford/packrat/">
+Parsing Expression Grammars</a> (PEGs).
+This text is a reference manual for the library.
+For a more formal treatment of LPeg,
+as well as some discussion about its implementation,
+see
+<a href="http://www.inf.puc-rio.br/~roberto/docs/peg.pdf">
+A Text Pattern-Matching Tool based on Parsing Expression Grammars</a>.
+(You may also be interested in my
+<a href="http://vimeo.com/1485123">talk about LPeg</a>
+given at the III Lua Workshop.)
+</p>
+
+<p>
+Following the Snobol tradition,
+LPeg defines patterns as first-class objects.
+That is, patterns are regular Lua values
+(represented by userdata).
+The library offers several functions to create
+and compose patterns.
+With the use of metamethods,
+several of these functions are provided as infix or prefix
+operators.
+On the one hand,
+the result is usually much more verbose than the typical
+encoding of patterns using the so called
+<em>regular expressions</em>
+(which typically are not regular expressions in the formal sense).
+On the other hand,
+first-class patterns allow much better documentation
+(as it is easy to comment the code,
+to break complex definitions in smaller parts, etc.)
+and are extensible,
+as we can define new functions to create and compose patterns.
+</p>
+
+<p>
+For a quick glance of the library,
+the following table summarizes its basic operations
+for creating patterns:
+</p>
+<table border="1">
+<tbody><tr><td><b>Operator</b></td><td><b>Description</b></td></tr>
+<tr><td><a href="#op-p"><code>lpeg.P(string)</code></a></td>
+  <td>Matches <code>string</code> literally</td></tr>
+<tr><td><a href="#op-p"><code>lpeg.P(n)</code></a></td>
+  <td>Matches exactly <code>n</code> characters</td></tr>
+<tr><td><a href="#op-s"><code>lpeg.S(string)</code></a></td>
+  <td>Matches any character in <code>string</code> (Set)</td></tr>
+<tr><td><a href="#op-r"><code>lpeg.R("<em>xy</em>")</code></a></td>
+  <td>Matches any character between <em>x</em> and <em>y</em> (Range)</td></tr>
+<tr><td><a href="#op-pow"><code>patt^n</code></a></td>
+  <td>Matches at least <code>n</code> repetitions of <code>patt</code></td></tr>
+<tr><td><a href="#op-pow"><code>patt^-n</code></a></td>
+  <td>Matches at most <code>n</code> repetitions of <code>patt</code></td></tr>
+<tr><td><a href="#op-mul"><code>patt1 * patt2</code></a></td>
+  <td>Matches <code>patt1</code> followed by <code>patt2</code></td></tr>
+<tr><td><a href="#op-add"><code>patt1 + patt2</code></a></td>
+  <td>Matches <code>patt1</code> or <code>patt2</code>
+      (ordered choice)</td></tr>
+<tr><td><a href="#op-sub"><code>patt1 - patt2</code></a></td>
+  <td>Matches <code>patt1</code> if <code>patt2</code> does not match</td></tr>
+<tr><td><a href="#op-unm"><code>-patt</code></a></td>
+  <td>Equivalent to <code>("" - patt)</code></td></tr>
+<tr><td><a href="#op-len"><code>#patt</code></a></td>
+  <td>Matches <code>patt</code> but consumes no input</td></tr>
+<tr><td><a href="#op-behind"><code>lpeg.B(patt)</code></a></td>
+  <td>Matches <code>patt</code> behind the current position,
+      consuming no input</td></tr>
+</tbody></table>
+
+<p>As a very simple example,
+<code>lpeg.R("09")^1</code> creates a pattern that
+matches a non-empty sequence of digits.
+As a not so simple example,
+<code>-lpeg.P(1)</code>
+(which can be written as <code>lpeg.P(-1)</code>,
+or simply <code>-1</code> for operations expecting a pattern)
+matches an empty string only if it cannot match a single character;
+so, it succeeds only at the end of the subject.
+</p>
+
+<p>
+LPeg also offers the <a href="re.html"><code>re</code> module</a>,
+which implements patterns following a regular-expression style
+(e.g., <code>[09]+</code>).
+(This module is 260 lines of Lua code,
+and of course it uses LPeg to parse regular expressions and
+translate them to regular LPeg patterns.)
+</p>
+
+
+<h2><a name="func">Functions</a></h2>
+
+
+<h3><a name="f-match"></a><code>lpeg.match (pattern, subject [, init])</code></h3>
+<p>
+The matching function.
+It attempts to match the given pattern against the subject string.
+If the match succeeds,
+returns the index in the subject of the first character after the match,
+or the <a href="#captures">captured values</a>
+(if the pattern captured any value).
+</p>
+
+<p>
+An optional numeric argument <code>init</code> makes the match
+start at that position in the subject string.
+As usual in Lua libraries,
+a negative value counts from the end.
+</p>
+
+<p>
+Unlike typical pattern-matching functions,
+<code>match</code> works only in <em>anchored</em> mode;
+that is, it tries to match the pattern with a prefix of
+the given subject string (at position <code>init</code>),
+not with an arbitrary substring of the subject.
+So, if we want to find a pattern anywhere in a string,
+we must either write a loop in Lua or write a pattern that
+matches anywhere.
+This second approach is easy and quite efficient;
+see <a href="#ex">examples</a>.
+</p>
+
+<h3><a name="f-type"></a><code>lpeg.type (value)</code></h3>
+<p>
+If the given value is a pattern,
+returns the string <code>"pattern"</code>.
+Otherwise returns nil.
+</p>
+
+<h3><a name="f-version"></a><code>lpeg.version ()</code></h3>
+<p>
+Returns a string with the running version of LPeg.
+</p>
+
+<h3><a name="f-setstack"></a><code>lpeg.setmaxstack (max)</code></h3>
+<p>
+Sets a limit for the size of the backtrack stack used by LPeg to
+track calls and choices.
+(The default limit is 400.)
+Most well-written patterns need little backtrack levels and
+therefore you seldom need to change this limit;
+before changing it you should try to rewrite your
+pattern to avoid the need for extra space.
+Nevertheless, a few useful patterns may overflow.
+Also, with recursive grammars,
+subjects with deep recursion may also need larger limits.
+</p>
+
+
+<h2><a name="basic">Basic Constructions</a></h2>
+
+<p>
+The following operations build patterns.
+All operations that expect a pattern as an argument
+may receive also strings, tables, numbers, booleans, or functions,
+which are translated to patterns according to
+the rules of function <a href="#op-p"><code>lpeg.P</code></a>.
+</p>
+
+
+
+<h3><a name="op-p"></a><code>lpeg.P (value)</code></h3>
+<p>
+Converts the given value into a proper pattern,
+according to the following rules:
+</p>
+<ul>
+
+<li><p>
+If the argument is a pattern,
+it is returned unmodified.
+</p></li>
+
+<li><p>
+If the argument is a string,
+it is translated to a pattern that matches the string literally.
+</p></li>
+
+<li><p>
+If the argument is a non-negative number <em>n</em>,
+the result is a pattern that matches exactly <em>n</em> characters.
+</p></li>
+
+<li><p>
+If the argument is a negative number <em>-n</em>,
+the result is a pattern that
+succeeds only if the input string has less than <em>n</em> characters left:
+<code>lpeg.P(-n)</code>
+is equivalent to <code>-lpeg.P(n)</code>
+(see the  <a href="#op-unm">unary minus operation</a>).
+</p></li>
+
+<li><p>
+If the argument is a boolean,
+the result is a pattern that always succeeds or always fails
+(according to the boolean value),
+without consuming any input.
+</p></li>
+
+<li><p>
+If the argument is a table,
+it is interpreted as a grammar
+(see <a href="#grammar">Grammars</a>).
+</p></li>
+
+<li><p>
+If the argument is a function,
+returns a pattern equivalent to a
+<a href="#matchtime">match-time capture</a> over the empty string.
+</p></li>
+
+</ul>
+
+
+<h3><a name="op-behind"></a><code>lpeg.B(patt)</code></h3>
+<p>
+Returns a pattern that
+matches only if the input string at the current position
+is preceded by <code>patt</code>.
+Pattern <code>patt</code> must match only strings
+with some fixed length,
+and it cannot contain captures.
+</p>
+
+<p>
+Like the <a href="#op-len">and predicate</a>,
+this pattern never consumes any input,
+independently of success or failure.
+</p>
+
+
+<h3><a name="op-r"></a><code>lpeg.R ({range})</code></h3>
+<p>
+Returns a pattern that matches any single character
+belonging to one of the given <em>ranges</em>.
+Each <code>range</code> is a string <em>xy</em> of length 2,
+representing all characters with code
+between the codes of <em>x</em> and <em>y</em>
+(both inclusive).
+</p>
+
+<p>
+As an example, the pattern
+<code>lpeg.R("09")</code> matches any digit,
+and <code>lpeg.R("az", "AZ")</code> matches any ASCII letter.
+</p>
+
+
+<h3><a name="op-s"></a><code>lpeg.S (string)</code></h3>
+<p>
+Returns a pattern that matches any single character that
+appears in the given string.
+(The <code>S</code> stands for <em>Set</em>.)
+</p>
+
+<p>
+As an example, the pattern
+<code>lpeg.S("+-*/")</code> matches any arithmetic operator.
+</p>
+
+<p>
+Note that, if <code>s</code> is a character
+(that is, a string of length 1),
+then <code>lpeg.P(s)</code> is equivalent to <code>lpeg.S(s)</code>
+which is equivalent to <code>lpeg.R(s..s)</code>.
+Note also that both <code>lpeg.S("")</code> and <code>lpeg.R()</code>
+are patterns that always fail.
+</p>
+
+
+<h3><a name="op-v"></a><code>lpeg.V (v)</code></h3>
+<p>
+This operation creates a non-terminal (a <em>variable</em>)
+for a grammar.
+The created non-terminal refers to the rule indexed by <code>v</code>
+in the enclosing grammar.
+(See <a href="#grammar">Grammars</a> for details.)
+</p>
+
+
+<h3><a name="op-locale"></a><code>lpeg.locale ([table])</code></h3>
+<p>
+Returns a table with patterns for matching some character classes
+according to the current locale.
+The table has fields named
+<code>alnum</code>,
+<code>alpha</code>,
+<code>cntrl</code>,
+<code>digit</code>,
+<code>graph</code>,
+<code>lower</code>,
+<code>print</code>,
+<code>punct</code>,
+<code>space</code>,
+<code>upper</code>, and
+<code>xdigit</code>,
+each one containing a correspondent pattern.
+Each pattern matches any single character that belongs to its class.
+</p>
+
+<p>
+If called with an argument <code>table</code>,
+then it creates those fields inside the given table and
+returns that table. 
+</p>
+
+
+<h3><a name="op-len"></a><code>#patt</code></h3>
+<p>
+Returns a pattern that
+matches only if the input string matches <code>patt</code>,
+but without consuming any input,
+independently of success or failure.
+(This pattern is called an <em>and predicate</em>
+and it is equivalent to
+<em>&amp;patt</em> in the original PEG notation.)
+</p>
+
+
+<p>
+This pattern never produces any capture.
+</p>
+
+
+<h3><a name="op-unm"></a><code>-patt</code></h3>
+<p>
+Returns a pattern that
+matches only if the input string does not match <code>patt</code>.
+It does not consume any input,
+independently of success or failure.
+(This pattern is equivalent to
+<em>!patt</em> in the original PEG notation.)
+</p>
+
+<p>
+As an example, the pattern
+<code>-lpeg.P(1)</code> matches only the end of string.
+</p>
+
+<p>
+This pattern never produces any captures,
+because either <code>patt</code> fails
+or <code>-patt</code> fails.
+(A failing pattern never produces captures.)
+</p>
+
+
+<h3><a name="op-add"></a><code>patt1 + patt2</code></h3>
+<p>
+Returns a pattern equivalent to an <em>ordered choice</em>
+of <code>patt1</code> and <code>patt2</code>.
+(This is denoted by <em>patt1 / patt2</em> in the original PEG notation,
+not to be confused with the <code>/</code> operation in LPeg.)
+It matches either <code>patt1</code> or <code>patt2</code>,
+with no backtracking once one of them succeeds.
+The identity element for this operation is the pattern
+<code>lpeg.P(false)</code>,
+which always fails.
+</p>
+
+<p>
+If both <code>patt1</code> and <code>patt2</code> are
+character sets,
+this operation is equivalent to set union.
+</p>
+<pre class="example">
+lower = lpeg.R("az")
+upper = lpeg.R("AZ")
+letter = lower + upper
+</pre>
+
+
+<h3><a name="op-sub"></a><code>patt1 - patt2</code></h3>
+<p>
+Returns a pattern equivalent to <em>!patt2 patt1</em>.
+This pattern asserts that the input does not match
+<code>patt2</code> and then matches <code>patt1</code>.
+</p>
+
+<p>
+When successful,
+this pattern produces all captures from <code>patt1</code>.
+It never produces any capture from <code>patt2</code>
+(as either <code>patt2</code> fails or
+<code>patt1 - patt2</code> fails).
+</p>
+
+<p>
+If both <code>patt1</code> and <code>patt2</code> are
+character sets,
+this operation is equivalent to set difference.
+Note that <code>-patt</code> is equivalent to <code>"" - patt</code>
+(or <code>0 - patt</code>).
+If <code>patt</code> is a character set,
+<code>1 - patt</code> is its complement.
+</p>
+
+
+<h3><a name="op-mul"></a><code>patt1 * patt2</code></h3>
+<p>
+Returns a pattern that matches <code>patt1</code>
+and then matches <code>patt2</code>,
+starting where <code>patt1</code> finished.
+The identity element for this operation is the
+pattern <code>lpeg.P(true)</code>,
+which always succeeds.
+</p>
+
+<p>
+(LPeg uses the <code>*</code> operator
+[instead of the more obvious <code>..</code>]
+both because it has
+the right priority and because in formal languages it is
+common to use a dot for denoting concatenation.)
+</p>
+
+
+<h3><a name="op-pow"></a><code>patt^n</code></h3>
+<p>
+If <code>n</code> is nonnegative,
+this pattern is
+equivalent to <em>patt<sup>n</sup> patt*</em>:
+It matches <code>n</code> or more occurrences of <code>patt</code>.
+</p>
+
+<p>
+Otherwise, when <code>n</code> is negative,
+this pattern is equivalent to <em>(patt?)<sup>-n</sup></em>:
+It matches at most <code>|n|</code>
+occurrences of <code>patt</code>.
+</p>
+
+<p>
+In particular, <code>patt^0</code> is equivalent to <em>patt*</em>,
+<code>patt^1</code> is equivalent to <em>patt+</em>,
+and <code>patt^-1</code> is equivalent to <em>patt?</em>
+in the original PEG notation.
+</p>
+
+<p>
+In all cases,
+the resulting pattern is greedy with no backtracking
+(also called a <em>possessive</em> repetition).
+That is, it matches only the longest possible sequence
+of matches for <code>patt</code>.
+</p>
+
+
+
+<h2><a name="grammar">Grammars</a></h2>
+
+<p>
+With the use of Lua variables,
+it is possible to define patterns incrementally,
+with each new pattern using previously defined ones.
+However, this technique does not allow the definition of
+recursive patterns.
+For recursive patterns,
+we need real grammars.
+</p>
+
+<p>
+LPeg represents grammars with tables,
+where each entry is a rule.
+</p>
+
+<p>
+The call <code>lpeg.V(v)</code>
+creates a pattern that represents the nonterminal
+(or <em>variable</em>) with index <code>v</code> in a grammar.
+Because the grammar still does not exist when
+this function is evaluated,
+the result is an <em>open reference</em> to the respective rule.
+</p>
+
+<p>
+A table is <em>fixed</em> when it is converted to a pattern
+(either by calling <code>lpeg.P</code> or by using it wherein a
+pattern is expected).
+Then every open reference created by <code>lpeg.V(v)</code>
+is corrected to refer to the rule indexed by <code>v</code> in the table.
+</p>
+
+<p>
+When a table is fixed,
+the result is a pattern that matches its <em>initial rule</em>.
+The entry with index 1 in the table defines its initial rule.
+If that entry is a string,
+it is assumed to be the name of the initial rule.
+Otherwise, LPeg assumes that the entry 1 itself is the initial rule.
+</p>
+
+<p>
+As an example,
+the following grammar matches strings of a's and b's that
+have the same number of a's and b's:
+</p>
+<pre class="example">
+equalcount = lpeg.P{
+  "S";   -- initial rule name
+  S = "a" * lpeg.V"B" + "b" * lpeg.V"A" + "",
+  A = "a" * lpeg.V"S" + "b" * lpeg.V"A" * lpeg.V"A",
+  B = "b" * lpeg.V"S" + "a" * lpeg.V"B" * lpeg.V"B",
+} * -1
+</pre>
+<p>
+It is equivalent to the following grammar in standard PEG notation:
+</p>
+<pre class="example">
+  S <- 'a' B / 'b' A / ''
+  A <- 'a' S / 'b' A A
+  B <- 'b' S / 'a' B B
+</pre>
+
+
+<h2><a name="captures">Captures</a></h2>
+
+<p>
+A <em>capture</em> is a pattern that produces values
+(the so called <em>semantic information</em>)
+according to what it matches.
+LPeg offers several kinds of captures,
+which produces values based on matches and combine these values to
+produce new values.
+Each capture may produce zero or more values.
+</p>
+
+<p>
+The following table summarizes the basic captures:
+</p>
+<table border="1">
+<tbody><tr><td><b>Operation</b></td><td><b>What it Produces</b></td></tr>
+<tr><td><a href="#cap-c"><code>lpeg.C(patt)</code></a></td>
+  <td>the match for <code>patt</code> plus all captures
+      made by <code>patt</code></td></tr>
+<tr><td><a href="#cap-arg"><code>lpeg.Carg(n)</code></a></td>
+    <td>the value of the n<sup>th</sup> extra argument to
+        <code>lpeg.match</code> (matches the empty string)</td></tr>
+<tr><td><a href="#cap-b"><code>lpeg.Cb(name)</code></a></td>
+    <td>the values produced by the previous
+        group capture named <code>name</code>
+        (matches the empty string)</td></tr>
+<tr><td><a href="#cap-cc"><code>lpeg.Cc(values)</code></a></td>
+    <td>the given values (matches the empty string)</td></tr>
+<tr><td><a href="#cap-f"><code>lpeg.Cf(patt, func)</code></a></td>
+  <td>a <em>folding</em> of the captures from <code>patt</code></td></tr>
+<tr><td><a href="#cap-g"><code>lpeg.Cg(patt [, name])</code></a></td>
+    <td>the values produced by <code>patt</code>,
+        optionally tagged with <code>name</code></td></tr>
+<tr><td><a href="#cap-p"><code>lpeg.Cp()</code></a></td>
+    <td>the current position (matches the empty string)</td></tr>
+<tr><td><a href="#cap-s"><code>lpeg.Cs(patt)</code></a></td>
+  <td>the match for <code>patt</code>
+      with the values from nested captures replacing their matches</td></tr>
+<tr><td><a href="#cap-t"><code>lpeg.Ct(patt)</code></a></td>
+  <td>a table with all captures from <code>patt</code></td></tr>
+<tr><td><a href="#cap-string"><code>patt / string</code></a></td>
+  <td><code>string</code>, with some marks replaced by captures
+      of <code>patt</code></td></tr>
+<tr><td><a href="#cap-num"><code>patt / number</code></a></td>
+  <td>the n-th value captured by <code>patt</code>,
+or no value when <code>number</code> is zero.</td></tr>
+<tr><td><a href="#cap-query"><code>patt / table</code></a></td>
+  <td><code>table[c]</code>, where <code>c</code> is the (first)
+      capture of <code>patt</code></td></tr>
+<tr><td><a href="#cap-func"><code>patt / function</code></a></td>
+  <td>the returns of <code>function</code> applied to the captures
+      of <code>patt</code></td></tr>
+<tr><td><a href="#matchtime"><code>lpeg.Cmt(patt, function)</code></a></td>
+  <td>the returns of <code>function</code> applied to the captures
+      of <code>patt</code>; the application is done at match time</td></tr>
+</tbody></table>
+
+<p>
+A capture pattern produces its values only when it succeeds.
+For instance,
+the pattern <code>lpeg.C(lpeg.P"a"^-1)</code>
+produces the empty string when there is no <code>"a"</code>
+(because the pattern <code>"a"?</code> succeeds),
+while the pattern <code>lpeg.C("a")^-1</code>
+does not produce any value when there is no <code>"a"</code>
+(because the pattern <code>"a"</code> fails).
+A pattern inside a loop or inside a recursive structure
+produces values for each match.
+</p>
+
+<p>
+Usually,
+LPeg does not specify when (and if) it evaluates its captures.
+(As an example,
+consider the pattern <code>lpeg.P"a" / func / 0</code>.
+Because the "division" by 0 instructs LPeg to throw away the
+results from the pattern,
+LPeg may or may not call <code>func</code>.)
+Therefore, captures should avoid side effects.
+Moreover,
+most captures cannot affect the way a pattern matches a subject.
+The only exception to this rule is the
+so-called <a href="#matchtime"><em>match-time capture</em></a>.
+When a match-time capture matches,
+it forces the immediate evaluation of all its nested captures
+and then calls its corresponding function,
+which defines whether the match succeeds and also
+what values are produced.
+</p>
+
+<h3><a name="cap-c"></a><code>lpeg.C (patt)</code></h3>
+<p>
+Creates a <em>simple capture</em>,
+which captures the substring of the subject that matches <code>patt</code>.
+The captured value is a string.
+If <code>patt</code> has other captures,
+their values are returned after this one.
+</p>
+
+
+<h3><a name="cap-arg"></a><code>lpeg.Carg (n)</code></h3>
+<p>
+Creates an <em>argument capture</em>.
+This pattern matches the empty string and
+produces the value given as the n<sup>th</sup> extra
+argument given in the call to <code>lpeg.match</code>.
+</p>
+
+
+<h3><a name="cap-b"></a><code>lpeg.Cb (name)</code></h3>
+<p>
+Creates a <em>back capture</em>.
+This pattern matches the empty string and
+produces the values produced by the <em>most recent</em>
+<a href="#cap-g">group capture</a> named <code>name</code>
+(where <code>name</code> can be any Lua value).
+</p>
+
+<p>
+<em>Most recent</em> means the last
+<em>complete</em>
+<em>outermost</em>
+group capture with the given name.
+A <em>Complete</em> capture means that the entire pattern
+corresponding to the capture has matched.
+An <em>Outermost</em> capture means that the capture is not inside
+another complete capture.
+</p>
+
+<p>
+In the same way that LPeg does not specify when it evaluates captures,
+it does not specify whether it reuses
+values previously produced by the group
+or re-evaluates them.
+</p>
+
+<h3><a name="cap-cc"></a><code>lpeg.Cc ([value, ...])</code></h3>
+<p>
+Creates a <em>constant capture</em>.
+This pattern matches the empty string and
+produces all given values as its captured values.
+</p>
+
+
+<h3><a name="cap-f"></a><code>lpeg.Cf (patt, func)</code></h3>
+<p>
+Creates a <em>fold capture</em>.
+If <code>patt</code> produces a list of captures
+<em>C<sub>1</sub> C<sub>2</sub> ... C<sub>n</sub></em>,
+this capture will produce the value
+<em>func(...func(func(C<sub>1</sub>, C<sub>2</sub>), C<sub>3</sub>)...,
+         C<sub>n</sub>)</em>,
+that is, it will <em>fold</em>
+(or <em>accumulate</em>, or <em>reduce</em>)
+the captures from <code>patt</code> using function <code>func</code>.
+</p>
+
+<p>
+This capture assumes that <code>patt</code> should produce
+at least one capture with at least one value (of any type),
+which becomes the initial value of an <em>accumulator</em>.
+(If you need a specific initial value,
+you may prefix a <a href="#cap-cc">constant capture</a> to <code>patt</code>.)
+For each subsequent capture,
+LPeg calls <code>func</code>
+with this accumulator as the first argument and all values produced
+by the capture as extra arguments;
+the first result from this call
+becomes the new value for the accumulator.
+The final value of the accumulator becomes the captured value.
+</p>
+
+<p>
+As an example,
+the following pattern matches a list of numbers separated
+by commas and returns their addition:
+</p>
+<pre class="example">
+-- matches a numeral and captures its numerical value
+number = lpeg.R"09"^1 / tonumber
+
+-- matches a list of numbers, capturing their values
+list = number * ("," * number)^0
+
+-- auxiliary function to add two numbers
+function add (acc, newvalue) return acc + newvalue end
+
+-- folds the list of numbers adding them
+sum = lpeg.Cf(list, add)
+
+-- example of use
+print(sum:match("10,30,43"))   --&gt; 83
+</pre>
+
+
+<h3><a name="cap-g"></a><code>lpeg.Cg (patt [, name])</code></h3>
+<p>
+Creates a <em>group capture</em>.
+It groups all values returned by <code>patt</code>
+into a single capture.
+The group may be anonymous (if no name is given)
+or named with the given name
+(which can be any non-nil Lua value).
+</p>
+
+<p>
+An anonymous group serves to join values from several captures into
+a single capture.
+A named group has a different behavior.
+In most situations, a named group returns no values at all.
+Its values are only relevant for a following
+<a href="#cap-b">back capture</a> or when used
+inside a <a href="#cap-t">table capture</a>.
+</p>
+
+
+<h3><a name="cap-p"></a><code>lpeg.Cp ()</code></h3>
+<p>
+Creates a <em>position capture</em>.
+It matches the empty string and
+captures the position in the subject where the match occurs.
+The captured value is a number.
+</p>
+
+
+<h3><a name="cap-s"></a><code>lpeg.Cs (patt)</code></h3>
+<p>
+Creates a <em>substitution capture</em>,
+which captures the substring of the subject that matches <code>patt</code>,
+with <em>substitutions</em>.
+For any capture inside <code>patt</code> with a value,
+the substring that matched the capture is replaced by the capture value
+(which should be a string).
+The final captured value is the string resulting from
+all replacements.
+</p>
+
+
+<h3><a name="cap-t"></a><code>lpeg.Ct (patt)</code></h3>
+<p>
+Creates a <em>table capture</em>.
+This capture returns a table with all values from all anonymous captures
+made by <code>patt</code> inside this table in successive integer keys,
+starting at 1.
+Moreover,
+for each named capture group created by <code>patt</code>,
+the first value of the group is put into the table
+with the group name as its key.
+The captured value is only the table.
+</p>
+
+
+<h3><a name="cap-string"></a><code>patt / string</code></h3>
+<p>
+Creates a <em>string capture</em>.
+It creates a capture string based on <code>string</code>.
+The captured value is a copy of <code>string</code>,
+except that the character <code>%</code> works as an escape character:
+any sequence in <code>string</code> of the form <code>%<em>n</em></code>,
+with <em>n</em> between 1 and 9,
+stands for the match of the <em>n</em>-th capture in <code>patt</code>.
+The sequence <code>%0</code> stands for the whole match.
+The sequence <code>%%</code> stands for a single&nbsp;<code>%</code>.
+</p>
+
+
+<h3><a name="cap-num"></a><code>patt / number</code></h3>
+<p>
+Creates a <em>numbered capture</em>.
+For a non-zero number,
+the captured value is the n-th value
+captured by <code>patt</code>. 
+When <code>number</code> is zero,
+there are no captured values.
+</p>
+
+
+<h3><a name="cap-query"></a><code>patt / table</code></h3>
+<p>
+Creates a <em>query capture</em>.
+It indexes the given table using as key the first value captured by
+<code>patt</code>,
+or the whole match if <code>patt</code> produced no value.
+The value at that index is the final value of the capture.
+If the table does not have that key,
+there is no captured value.
+</p>
+
+
+<h3><a name="cap-func"></a><code>patt / function</code></h3>
+<p>
+Creates a <em>function capture</em>.
+It calls the given function passing all captures made by
+<code>patt</code> as arguments,
+or the whole match if <code>patt</code> made no capture.
+The values returned by the function
+are the final values of the capture.
+In particular,
+if <code>function</code> returns no value,
+there is no captured value.
+</p>
+
+
+<h3><a name="matchtime"></a><code>lpeg.Cmt(patt, function)</code></h3>
+<p>
+Creates a <em>match-time capture</em>.
+Unlike all other captures,
+this one is evaluated immediately when a match occurs
+(even if it is part of a larger pattern that fails later).
+It forces the immediate evaluation of all its nested captures
+and then calls <code>function</code>.
+</p>
+
+<p>
+The given function gets as arguments the entire subject,
+the current position (after the match of <code>patt</code>),
+plus any capture values produced by <code>patt</code>.
+</p>
+
+<p>
+The first value returned by <code>function</code>
+defines how the match happens.
+If the call returns a number,
+the match succeeds
+and the returned number becomes the new current position.
+(Assuming a subject <em>s</em> and current position <em>i</em>,
+the returned number must be in the range <em>[i, len(s) + 1]</em>.)
+If the call returns <b>true</b>,
+the match succeeds without consuming any input.
+(So, to return <b>true</b> is equivalent to return <em>i</em>.)
+If the call returns <b>false</b>, <b>nil</b>, or no value,
+the match fails.
+</p>
+
+<p>
+Any extra values returned by the function become the
+values produced by the capture.
+</p>
+
+
+
+
+<h2><a name="ex">Some Examples</a></h2>
+
+<h3>Using a Pattern</h3>
+<p>
+This example shows a very simple but complete program
+that builds and uses a pattern:
+</p>
+<pre class="example">
+local lpeg = require "lpeg"
+
+-- matches a word followed by end-of-string
+p = lpeg.R"az"^1 * -1
+
+print(p:match("hello"))        --> 6
+print(lpeg.match(p, "hello"))  --> 6
+print(p:match("1 hello"))      --> nil
+</pre>
+<p>
+The pattern is simply a sequence of one or more lower-case letters
+followed by the end of string (-1).
+The program calls <code>match</code> both as a method
+and as a function.
+In both sucessful cases,
+the match returns 
+the index of the first character after the match,
+which is the string length plus one.
+</p>
+
+
+<h3>Name-value lists</h3>
+<p>
+This example parses a list of name-value pairs and returns a table
+with those pairs:
+</p>
+<pre class="example">
+lpeg.locale(lpeg)   -- adds locale entries into 'lpeg' table
+
+local space = lpeg.space^0
+local name = lpeg.C(lpeg.alpha^1) * space
+local sep = lpeg.S(",;") * space
+local pair = lpeg.Cg(name * "=" * space * name) * sep^-1
+local list = lpeg.Cf(lpeg.Ct("") * pair^0, rawset)
+t = list:match("a=b, c = hi; next = pi")  --> { a = "b", c = "hi", next = "pi" }
+</pre>
+<p>
+Each pair has the format <code>name = name</code> followed by
+an optional separator (a comma or a semicolon).
+The <code>pair</code> pattern encloses the pair in a group pattern,
+so that the names become the values of a single capture.
+The <code>list</code> pattern then folds these captures.
+It starts with an empty table,
+created by a table capture matching an empty string;
+then for each capture (a pair of names) it applies <code>rawset</code>
+over the accumulator (the table) and the capture values (the pair of names).
+<code>rawset</code> returns the table itself,
+so the accumulator is always the table.
+</p>
+
+<h3>Splitting a string</h3>
+<p>
+The following code builds a pattern that
+splits a string using a given pattern
+<code>sep</code> as a separator:
+</p>
+<pre class="example">
+function split (s, sep)
+  sep = lpeg.P(sep)
+  local elem = lpeg.C((1 - sep)^0)
+  local p = elem * (sep * elem)^0
+  return lpeg.match(p, s)
+end
+</pre>
+<p>
+First the function ensures that <code>sep</code> is a proper pattern.
+The pattern <code>elem</code> is a repetition of zero of more
+arbitrary characters as long as there is not a match against
+the separator.
+It also captures its match.
+The pattern <code>p</code> matches a list of elements separated
+by <code>sep</code>.
+</p>
+
+<p>
+If the split results in too many values,
+it may overflow the maximum number of values
+that can be returned by a Lua function.
+In this case,
+we can collect these values in a table:
+</p>
+<pre class="example">
+function split (s, sep)
+  sep = lpeg.P(sep)
+  local elem = lpeg.C((1 - sep)^0)
+  local p = lpeg.Ct(elem * (sep * elem)^0)   -- make a table capture
+  return lpeg.match(p, s)
+end
+</pre>
+
+
+<h3>Searching for a pattern</h3>
+<p>
+The primitive <code>match</code> works only in anchored mode.
+If we want to find a pattern anywhere in a string,
+we must write a pattern that matches anywhere.
+</p>
+
+<p>
+Because patterns are composable,
+we can write a function that,
+given any arbitrary pattern <code>p</code>,
+returns a new pattern that searches for <code>p</code>
+anywhere in a string.
+There are several ways to do the search.
+One way is like this:
+</p>
+<pre class="example">
+function anywhere (p)
+  return lpeg.P{ p + 1 * lpeg.V(1) }
+end
+</pre>
+<p>
+This grammar has a straight reading:
+it matches <code>p</code> or skips one character and tries again.
+</p>
+
+<p>
+If we want to know where the pattern is in the string
+(instead of knowing only that it is there somewhere),
+we can add position captures to the pattern:
+</p>
+<pre class="example">
+local I = lpeg.Cp()
+function anywhere (p)
+  return lpeg.P{ I * p * I + 1 * lpeg.V(1) }
+end
+
+print(anywhere("world"):match("hello world!"))   -> 7   12
+</pre>
+
+<p>
+Another option for the search is like this:
+</p>
+<pre class="example">
+local I = lpeg.Cp()
+function anywhere (p)
+  return (1 - lpeg.P(p))^0 * I * p * I
+end
+</pre>
+<p>
+Again the pattern has a straight reading:
+it skips as many characters as possible while not matching <code>p</code>,
+and then matches <code>p</code> (plus appropriate captures).
+</p>
+
+<p>
+If we want to look for a pattern only at word boundaries,
+we can use the following transformer:
+</p>
+
+<pre class="example">
+local t = lpeg.locale()
+
+function atwordboundary (p)
+  return lpeg.P{
+    [1] = p + t.alpha^0 * (1 - t.alpha)^1 * lpeg.V(1)
+  }
+end
+</pre>
+
+
+<h3><a name="balanced"></a>Balanced parentheses</h3>
+<p>
+The following pattern matches only strings with balanced parentheses:
+</p>
+<pre class="example">
+b = lpeg.P{ "(" * ((1 - lpeg.S"()") + lpeg.V(1))^0 * ")" }
+</pre>
+<p>
+Reading the first (and only) rule of the given grammar,
+we have that a balanced string is
+an open parenthesis,
+followed by zero or more repetitions of either
+a non-parenthesis character or
+a balanced string (<code>lpeg.V(1)</code>),
+followed by a closing parenthesis.
+</p>
+
+
+<h3>Global substitution</h3>
+<p>
+The next example does a job somewhat similar to <code>string.gsub</code>.
+It receives a pattern and a replacement value,
+and substitutes the replacement value for all occurrences of the pattern
+in a given string:
+</p>
+<pre class="example">
+function gsub (s, patt, repl)
+  patt = lpeg.P(patt)
+  patt = lpeg.Cs((patt / repl + 1)^0)
+  return lpeg.match(patt, s)
+end
+</pre>
+<p>
+As in <code>string.gsub</code>,
+the replacement value can be a string,
+a function, or a table.
+</p>
+
+
+<h3><a name="CSV"></a>Comma-Separated Values (CSV)</h3>
+<p>
+This example breaks a string into comma-separated values,
+returning all fields:
+</p>
+<pre class="example">
+local field = '"' * lpeg.Cs(((lpeg.P(1) - '"') + lpeg.P'""' / '"')^0) * '"' +
+                    lpeg.C((1 - lpeg.S',\n"')^0)
+
+local record = field * (',' * field)^0 * (lpeg.P'\n' + -1)
+
+function csv (s)
+  return lpeg.match(record, s)
+end
+</pre>
+<p>
+A field is either a quoted field
+(which may contain any character except an individual quote,
+which may be written as two quotes that are replaced by one)
+or an unquoted field
+(which cannot contain commas, newlines, or quotes).
+A record is a list of fields separated by commas,
+ending with a newline or the string end (-1).
+</p>
+
+<p>
+As it is,
+the previous pattern returns each field as a separated result.
+If we add a table capture in the definition of <code>record</code>,
+the pattern will return instead a single table
+containing all fields:
+</p>
+<pre>
+local record = lpeg.Ct(field * (',' * field)^0) * (lpeg.P'\n' + -1)
+</pre>
+
+
+<h3>UTF-8 and Latin 1</h3>
+<p>
+It is not difficult to use LPeg to convert a string from
+UTF-8 encoding to Latin 1 (ISO 8859-1):
+</p>
+
+<pre class="example">
+-- convert a two-byte UTF-8 sequence to a Latin 1 character
+local function f2 (s)
+  local c1, c2 = string.byte(s, 1, 2)
+  return string.char(c1 * 64 + c2 - 12416)
+end
+
+local utf8 = lpeg.R("\0\127")
+           + lpeg.R("\194\195") * lpeg.R("\128\191") / f2
+
+local decode_pattern = lpeg.Cs(utf8^0) * -1
+</pre>
+<p>
+In this code,
+the definition of UTF-8 is already restricted to the
+Latin 1 range (from 0 to 255).
+Any encoding outside this range (as well as any invalid encoding)
+will not match that pattern.
+</p>
+
+<p>
+As the definition of <code>decode_pattern</code> demands that
+the pattern matches the whole input (because of the -1 at its end),
+any invalid string will simply fail to match,
+without any useful information about the problem.
+We can improve this situation redefining <code>decode_pattern</code>
+as follows:
+</p>
+<pre class="example">
+local function er (_, i) error("invalid encoding at position " .. i) end
+
+local decode_pattern = lpeg.Cs(utf8^0) * (-1 + lpeg.P(er))
+</pre>
+<p>
+Now, if the pattern <code>utf8^0</code> stops
+before the end of the string,
+an appropriate error function is called.
+</p>
+
+
+<h3>UTF-8 and Unicode</h3>
+<p>
+We can extend the previous patterns to handle all Unicode code points.
+Of course,
+we cannot translate them to Latin 1 or any other one-byte encoding.
+Instead, our translation results in a array with the code points
+represented as numbers.
+The full code is here:
+</p>
+<pre class="example">
+-- decode a two-byte UTF-8 sequence
+local function f2 (s)
+  local c1, c2 = string.byte(s, 1, 2)
+  return c1 * 64 + c2 - 12416
+end
+
+-- decode a three-byte UTF-8 sequence
+local function f3 (s)
+  local c1, c2, c3 = string.byte(s, 1, 3)
+  return (c1 * 64 + c2) * 64 + c3 - 925824
+end
+
+-- decode a four-byte UTF-8 sequence
+local function f4 (s)
+  local c1, c2, c3, c4 = string.byte(s, 1, 4)
+  return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168
+end
+
+local cont = lpeg.R("\128\191")   -- continuation byte
+
+local utf8 = lpeg.R("\0\127") / string.byte
+           + lpeg.R("\194\223") * cont / f2
+           + lpeg.R("\224\239") * cont * cont / f3
+           + lpeg.R("\240\244") * cont * cont * cont / f4
+
+local decode_pattern = lpeg.Ct(utf8^0) * -1
+</pre>
+
+
+<h3>Lua's long strings</h3>
+<p>
+A long string in Lua starts with the pattern <code>[=*[</code>
+and ends at the first occurrence of <code>]=*]</code> with
+exactly the same number of equal signs.
+If the opening brackets are followed by a newline,
+this newline is discarded
+(that is, it is not part of the string).
+</p>
+
+<p>
+To match a long string in Lua,
+the pattern must capture the first repetition of equal signs and then,
+whenever it finds a candidate for closing the string,
+check whether it has the same number of equal signs.
+</p>
+
+<pre class="example">
+equals = lpeg.P"="^0
+open = "[" * lpeg.Cg(equals, "init") * "[" * lpeg.P"\n"^-1
+close = "]" * lpeg.C(equals) * "]"
+closeeq = lpeg.Cmt(close * lpeg.Cb("init"), function (s, i, a, b) return a == b end)
+string = open * lpeg.C((lpeg.P(1) - closeeq)^0) * close / 1
+</pre>
+
+<p>
+The <code>open</code> pattern matches <code>[=*[</code>,
+capturing the repetitions of equal signs in a group named <code>init</code>;
+it also discharges an optional newline, if present.
+The <code>close</code> pattern matches <code>]=*]</code>,
+also capturing the repetitions of equal signs.
+The <code>closeeq</code> pattern first matches <code>close</code>;
+then it uses a back capture to recover the capture made
+by the previous <code>open</code>,
+which is named <code>init</code>;
+finally it uses a match-time capture to check
+whether both captures are equal.
+The <code>string</code> pattern starts with an <code>open</code>,
+then it goes as far as possible until matching <code>closeeq</code>,
+and then matches the final <code>close</code>.
+The final numbered capture simply discards
+the capture made by <code>close</code>.
+</p>
+
+
+<h3>Arithmetic expressions</h3>
+<p>
+This example is a complete parser and evaluator for simple
+arithmetic expressions.
+We write it in two styles.
+The first approach first builds a syntax tree and then
+traverses this tree to compute the expression value:
+</p>
+<pre class="example">
+-- Lexical Elements
+local Space = lpeg.S(" \n\t")^0
+local Number = lpeg.C(lpeg.P"-"^-1 * lpeg.R("09")^1) * Space
+local TermOp = lpeg.C(lpeg.S("+-")) * Space
+local FactorOp = lpeg.C(lpeg.S("*/")) * Space
+local Open = "(" * Space
+local Close = ")" * Space
+
+-- Grammar
+local Exp, Term, Factor = lpeg.V"Exp", lpeg.V"Term", lpeg.V"Factor"
+G = lpeg.P{ Exp,
+  Exp = lpeg.Ct(Term * (TermOp * Term)^0);
+  Term = lpeg.Ct(Factor * (FactorOp * Factor)^0);
+  Factor = Number + Open * Exp * Close;
+}
+
+G = Space * G * -1
+
+-- Evaluator
+function eval (x)
+  if type(x) == "string" then
+    return tonumber(x)
+  else
+    local op1 = eval(x[1])
+    for i = 2, #x, 2 do
+      local op = x[i]
+      local op2 = eval(x[i + 1])
+      if (op == "+") then op1 = op1 + op2
+      elseif (op == "-") then op1 = op1 - op2
+      elseif (op == "*") then op1 = op1 * op2
+      elseif (op == "/") then op1 = op1 / op2
+      end
+    end
+    return op1
+  end
+end
+
+-- Parser/Evaluator
+function evalExp (s)
+  local t = lpeg.match(G, s)
+  if not t then error("syntax error", 2) end
+  return eval(t)
+end
+
+-- small example
+print(evalExp"3 + 5*9 / (1+1) - 12")   --> 13.5
+</pre>
+
+<p>
+The second style computes the expression value on the fly,
+without building the syntax tree.
+The following grammar takes this approach.
+(It assumes the same lexical elements as before.)
+</p>
+<pre class="example">
+-- Auxiliary function
+function eval (v1, op, v2)
+  if (op == "+") then return v1 + v2
+  elseif (op == "-") then return v1 - v2
+  elseif (op == "*") then return v1 * v2
+  elseif (op == "/") then return v1 / v2
+  end
+end
+
+-- Grammar
+local V = lpeg.V
+G = lpeg.P{ "Exp",
+  Exp = lpeg.Cf(V"Term" * lpeg.Cg(TermOp * V"Term")^0, eval);
+  Term = lpeg.Cf(V"Factor" * lpeg.Cg(FactorOp * V"Factor")^0, eval);
+  Factor = Number / tonumber + Open * V"Exp" * Close;
+}
+
+-- small example
+print(lpeg.match(G, "3 + 5*9 / (1+1) - 12"))   --> 13.5
+</pre>
+<p>
+Note the use of the fold (accumulator) capture.
+To compute the value of an expression,
+the accumulator starts with the value of the first term,
+and then applies <code>eval</code> over
+the accumulator, the operator,
+and the new term for each repetition.
+</p>
+
+
+
+<h2><a name="download"></a>Download</h2>
+
+<p>LPeg 
+<a href="http://www.inf.puc-rio.br/~roberto/lpeg/lpeg-1.0.2.tar.gz">source code</a>.</p>
+
+
+<h2><a name="license">License</a></h2>
+
+<p>
+Copyright &copy; 2007-2019 Lua.org, PUC-Rio.
+</p>
+<p>
+Permission is hereby granted, free of charge,
+to any person obtaining a copy of this software and
+associated documentation files (the "Software"),
+to deal in the Software without restriction,
+including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software,
+and to permit persons to whom the Software is
+furnished to do so,
+subject to the following conditions:
+</p>
+
+<p>
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions of the Software.
+</p>
+
+<p>
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+</p>
+
+</div> <!-- id="content" -->
+
+</div> <!-- id="main" -->
+
+</div> <!-- id="container" -->
+
+</body>
+</html> 
--- /dev/null
+++ b/lpprint.c
@@ -1,0 +1,244 @@
+/*
+** $Id: lpprint.c $
+** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
+*/
+
+#include <ctype.h>
+#include <limits.h>
+#include <stdio.h>
+
+
+#include "lptypes.h"
+#include "lpprint.h"
+#include "lpcode.h"
+
+
+#if defined(LPEG_DEBUG)
+
+/*
+** {======================================================
+** Printing patterns (for debugging)
+** =======================================================
+*/
+
+
+void printcharset (const byte *st) {
+  int i;
+  printf("[");
+  for (i = 0; i <= UCHAR_MAX; i++) {
+    int first = i;
+    while (testchar(st, i) && i <= UCHAR_MAX) i++;
+    if (i - 1 == first)  /* unary range? */
+      printf("(%02x)", first);
+    else if (i - 1 > first)  /* non-empty range? */
+      printf("(%02x-%02x)", first, i - 1);
+  }
+  printf("]");
+}
+
+
+static const char *capkind (int kind) {
+  const char *const modes[] = {
+    "close", "position", "constant", "backref",
+    "argument", "simple", "table", "function",
+    "query", "string", "num", "substitution", "fold",
+    "runtime", "group"};
+  return modes[kind];
+}
+
+
+static void printjmp (const Instruction *op, const Instruction *p) {
+  printf("-> %d", (int)(p + (p + 1)->offset - op));
+}
+
+
+void printinst (const Instruction *op, const Instruction *p) {
+  const char *const names[] = {
+    "any", "char", "set",
+    "testany", "testchar", "testset",
+    "span", "behind",
+    "ret", "end",
+    "choice", "jmp", "call", "open_call",
+    "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
+     "fullcapture", "opencapture", "closecapture", "closeruntime"
+  };
+  printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
+  switch ((Opcode)p->i.code) {
+    case IChar: {
+      printf("'%c'", p->i.aux);
+      break;
+    }
+    case ITestChar: {
+      printf("'%c'", p->i.aux); printjmp(op, p);
+      break;
+    }
+    case IFullCapture: {
+      printf("%s (size = %d)  (idx = %d)",
+             capkind(getkind(p)), getoff(p), p->i.key);
+      break;
+    }
+    case IOpenCapture: {
+      printf("%s (idx = %d)", capkind(getkind(p)), p->i.key);
+      break;
+    }
+    case ISet: {
+      printcharset((p+1)->buff);
+      break;
+    }
+    case ITestSet: {
+      printcharset((p+2)->buff); printjmp(op, p);
+      break;
+    }
+    case ISpan: {
+      printcharset((p+1)->buff);
+      break;
+    }
+    case IOpenCall: {
+      printf("-> %d", (p + 1)->offset);
+      break;
+    }
+    case IBehind: {
+      printf("%d", p->i.aux);
+      break;
+    }
+    case IJmp: case ICall: case ICommit: case IChoice:
+    case IPartialCommit: case IBackCommit: case ITestAny: {
+      printjmp(op, p);
+      break;
+    }
+    default: break;
+  }
+  printf("\n");
+}
+
+
+void printpatt (Instruction *p, int n) {
+  Instruction *op = p;
+  while (p < op + n) {
+    printinst(op, p);
+    p += sizei(p);
+  }
+}
+
+
+#if defined(LPEG_DEBUG)
+static void printcap (Capture *cap) {
+  printf("%s (idx: %d - size: %d) -> %p\n",
+         capkind(cap->kind), cap->idx, cap->siz, cap->s);
+}
+
+
+void printcaplist (Capture *cap, Capture *limit) {
+  printf(">======\n");
+  for (; cap->s && (limit == NULL || cap < limit); cap++)
+    printcap(cap);
+  printf("=======\n");
+}
+#endif
+
+/* }====================================================== */
+
+
+/*
+** {======================================================
+** Printing trees (for debugging)
+** =======================================================
+*/
+
+static const char *tagnames[] = {
+  "char", "set", "any",
+  "true", "false",
+  "rep",
+  "seq", "choice",
+  "not", "and",
+  "call", "opencall", "rule", "grammar",
+  "behind",
+  "capture", "run-time"
+};
+
+
+void printtree (TTree *tree, int ident) {
+  int i;
+  for (i = 0; i < ident; i++) printf(" ");
+  printf("%s", tagnames[tree->tag]);
+  switch (tree->tag) {
+    case TChar: {
+      int c = tree->u.n;
+      if (isprint(c))
+        printf(" '%c'\n", c);
+      else
+        printf(" (%02X)\n", c);
+      break;
+    }
+    case TSet: {
+      printcharset(treebuffer(tree));
+      printf("\n");
+      break;
+    }
+    case TOpenCall: case TCall: {
+      assert(sib2(tree)->tag == TRule);
+      printf(" key: %d  (rule: %d)\n", tree->key, sib2(tree)->cap);
+      break;
+    }
+    case TBehind: {
+      printf(" %d\n", tree->u.n);
+        printtree(sib1(tree), ident + 2);
+      break;
+    }
+    case TCapture: {
+      printf(" kind: '%s'  key: %d\n", capkind(tree->cap), tree->key);
+      printtree(sib1(tree), ident + 2);
+      break;
+    }
+    case TRule: {
+      printf(" n: %d  key: %d\n", tree->cap, tree->key);
+      printtree(sib1(tree), ident + 2);
+      break;  /* do not print next rule as a sibling */
+    }
+    case TGrammar: {
+      TTree *rule = sib1(tree);
+      printf(" %d\n", tree->u.n);  /* number of rules */
+      for (i = 0; i < tree->u.n; i++) {
+        printtree(rule, ident + 2);
+        rule = sib2(rule);
+      }
+      assert(rule->tag == TTrue);  /* sentinel */
+      break;
+    }
+    default: {
+      int sibs = numsiblings[tree->tag];
+      printf("\n");
+      if (sibs >= 1) {
+        printtree(sib1(tree), ident + 2);
+        if (sibs >= 2)
+          printtree(sib2(tree), ident + 2);
+      }
+      break;
+    }
+  }
+}
+
+
+void printktable (lua_State *L, int idx) {
+  int n, i;
+  lua_getuservalue(L, idx);
+  if (lua_isnil(L, -1))  /* no ktable? */
+    return;
+  n = lua_rawlen(L, -1);
+  printf("[");
+  for (i = 1; i <= n; i++) {
+    printf("%d = ", i);
+    lua_rawgeti(L, -1, i);
+    if (lua_isstring(L, -1))
+      printf("%s  ", lua_tostring(L, -1));
+    else
+      printf("%s  ", lua_typename(L, lua_type(L, -1)));
+    lua_pop(L, 1);
+  }
+  printf("]\n");
+  /* leave ktable at the stack */
+}
+
+/* }====================================================== */
+
+#endif
--- /dev/null
+++ b/lpprint.h
@@ -1,0 +1,36 @@
+/*
+** $Id: lpprint.h $
+*/
+
+
+#if !defined(lpprint_h)
+#define lpprint_h
+
+
+#include "lptree.h"
+#include "lpvm.h"
+
+
+#if defined(LPEG_DEBUG)
+
+void printpatt (Instruction *p, int n);
+void printtree (TTree *tree, int ident);
+void printktable (lua_State *L, int idx);
+void printcharset (const byte *st);
+void printcaplist (Capture *cap, Capture *limit);
+void printinst (const Instruction *op, const Instruction *p);
+
+#else
+
+#define printktable(L,idx)  \
+	luaL_error(L, "function only implemented in debug mode")
+#define printtree(tree,i)  \
+	luaL_error(L, "function only implemented in debug mode")
+#define printpatt(p,n)  \
+	luaL_error(L, "function only implemented in debug mode")
+
+#endif
+
+
+#endif
+
--- /dev/null
+++ b/lptree.c
@@ -1,0 +1,1305 @@
+/*
+** $Id: lptree.c $
+** Copyright 2013, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
+*/
+
+#include <ctype.h>
+#include <limits.h>
+#include <string.h>
+
+
+#include "lua.h"
+#include "lauxlib.h"
+
+#include "lptypes.h"
+#include "lpcap.h"
+#include "lpcode.h"
+#include "lpprint.h"
+#include "lptree.h"
+
+
+/* number of siblings for each tree */
+const byte numsiblings[] = {
+  0, 0, 0,	/* char, set, any */
+  0, 0,		/* true, false */	
+  1,		/* rep */
+  2, 2,		/* seq, choice */
+  1, 1,		/* not, and */
+  0, 0, 2, 1,  /* call, opencall, rule, grammar */
+  1,  /* behind */
+  1, 1  /* capture, runtime capture */
+};
+
+
+static TTree *newgrammar (lua_State *L, int arg);
+
+
+/*
+** returns a reasonable name for value at index 'idx' on the stack
+*/
+static const char *val2str (lua_State *L, int idx) {
+  const char *k = lua_tostring(L, idx);
+  if (k != NULL)
+    return lua_pushfstring(L, "%s", k);
+  else
+    return lua_pushfstring(L, "(a %s)", luaL_typename(L, idx));
+}
+
+
+/*
+** Fix a TOpenCall into a TCall node, using table 'postable' to
+** translate a key to its rule address in the tree. Raises an
+** error if key does not exist.
+*/
+static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t) {
+  int n;
+  lua_rawgeti(L, -1, t->key);  /* get rule's name */
+  lua_gettable(L, postable);  /* query name in position table */
+  n = lua_tonumber(L, -1);  /* get (absolute) position */
+  lua_pop(L, 1);  /* remove position */
+  if (n == 0) {  /* no position? */
+    lua_rawgeti(L, -1, t->key);  /* get rule's name again */
+    luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1));
+  }
+  t->tag = TCall;
+  t->u.ps = n - (t - g);  /* position relative to node */
+  assert(sib2(t)->tag == TRule);
+  sib2(t)->key = t->key;  /* fix rule's key */
+}
+
+
+/*
+** Transform left associative constructions into right
+** associative ones, for sequence and choice; that is:
+** (t11 + t12) + t2  =>  t11 + (t12 + t2)
+** (t11 * t12) * t2  =>  t11 * (t12 * t2)
+** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))
+*/
+static void correctassociativity (TTree *tree) {
+  TTree *t1 = sib1(tree);
+  assert(tree->tag == TChoice || tree->tag == TSeq);
+  while (t1->tag == tree->tag) {
+    int n1size = tree->u.ps - 1;  /* t1 == Op t11 t12 */
+    int n11size = t1->u.ps - 1;
+    int n12size = n1size - n11size - 1;
+    memmove(sib1(tree), sib1(t1), n11size * sizeof(TTree)); /* move t11 */
+    tree->u.ps = n11size + 1;
+    sib2(tree)->tag = tree->tag;
+    sib2(tree)->u.ps = n12size + 1;
+  }
+}
+
+
+/*
+** Make final adjustments in a tree. Fix open calls in tree 't',
+** making them refer to their respective rules or raising appropriate
+** errors (if not inside a grammar). Correct associativity of associative
+** constructions (making them right associative). Assume that tree's
+** ktable is at the top of the stack (for error messages).
+*/
+static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) {
+ tailcall:
+  switch (t->tag) {
+    case TGrammar:  /* subgrammars were already fixed */
+      return;
+    case TOpenCall: {
+      if (g != NULL)  /* inside a grammar? */
+        fixonecall(L, postable, g, t);
+      else {  /* open call outside grammar */
+        lua_rawgeti(L, -1, t->key);
+        luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1));
+      }
+      break;
+    }
+    case TSeq: case TChoice:
+      correctassociativity(t);
+      break;
+  }
+  switch (numsiblings[t->tag]) {
+    case 1: /* finalfix(L, postable, g, sib1(t)); */
+      t = sib1(t); goto tailcall;
+    case 2:
+      finalfix(L, postable, g, sib1(t));
+      t = sib2(t); goto tailcall;  /* finalfix(L, postable, g, sib2(t)); */
+    default: assert(numsiblings[t->tag] == 0); break;
+  }
+}
+
+
+
+/*
+** {===================================================================
+** KTable manipulation
+**
+** - The ktable of a pattern 'p' can be shared by other patterns that
+** contain 'p' and no other constants. Because of this sharing, we
+** should not add elements to a 'ktable' unless it was freshly created
+** for the new pattern.
+**
+** - The maximum index in a ktable is USHRT_MAX, because trees and
+** patterns use unsigned shorts to store those indices.
+** ====================================================================
+*/
+
+/*
+** Create a new 'ktable' to the pattern at the top of the stack.
+*/
+static void newktable (lua_State *L, int n) {
+  lua_createtable(L, n, 0);  /* create a fresh table */
+  lua_setuservalue(L, -2);  /* set it as 'ktable' for pattern */
+}
+
+
+/*
+** Add element 'idx' to 'ktable' of pattern at the top of the stack;
+** Return index of new element.
+** If new element is nil, does not add it to table (as it would be
+** useless) and returns 0, as ktable[0] is always nil.
+*/
+static int addtoktable (lua_State *L, int idx) {
+  if (lua_isnil(L, idx))  /* nil value? */
+    return 0;
+  else {
+    int n;
+    lua_getuservalue(L, -1);  /* get ktable from pattern */
+    n = lua_rawlen(L, -1);
+    if (n >= USHRT_MAX)
+      luaL_error(L, "too many Lua values in pattern");
+    lua_pushvalue(L, idx);  /* element to be added */
+    lua_rawseti(L, -2, ++n);
+    lua_pop(L, 1);  /* remove 'ktable' */
+    return n;
+  }
+}
+
+
+/*
+** Return the number of elements in the ktable at 'idx'.
+** In Lua 5.2/5.3, default "environment" for patterns is nil, not
+** a table. Treat it as an empty table. In Lua 5.1, assumes that
+** the environment has no numeric indices (len == 0)
+*/
+static int ktablelen (lua_State *L, int idx) {
+  if (!lua_istable(L, idx)) return 0;
+  else return lua_rawlen(L, idx);
+}
+
+
+/*
+** Concatentate the contents of table 'idx1' into table 'idx2'.
+** (Assume that both indices are negative.)
+** Return the original length of table 'idx2' (or 0, if no
+** element was added, as there is no need to correct any index).
+*/
+static int concattable (lua_State *L, int idx1, int idx2) {
+  int i;
+  int n1 = ktablelen(L, idx1);
+  int n2 = ktablelen(L, idx2);
+  if (n1 + n2 > USHRT_MAX)
+    luaL_error(L, "too many Lua values in pattern");
+  if (n1 == 0) return 0;  /* nothing to correct */
+  for (i = 1; i <= n1; i++) {
+    lua_rawgeti(L, idx1, i);
+    lua_rawseti(L, idx2 - 1, n2 + i);  /* correct 'idx2' */
+  }
+  return n2;
+}
+
+
+/*
+** When joining 'ktables', constants from one of the subpatterns must
+** be renumbered; 'correctkeys' corrects their indices (adding 'n'
+** to each of them)
+*/
+static void correctkeys (TTree *tree, int n) {
+  if (n == 0) return;  /* no correction? */
+ tailcall:
+  switch (tree->tag) {
+    case TOpenCall: case TCall: case TRunTime: case TRule: {
+      if (tree->key > 0)
+        tree->key += n;
+      break;
+    }
+    case TCapture: {
+      if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum)
+        tree->key += n;
+      break;
+    }
+    default: break;
+  }
+  switch (numsiblings[tree->tag]) {
+    case 1:  /* correctkeys(sib1(tree), n); */
+      tree = sib1(tree); goto tailcall;
+    case 2:
+      correctkeys(sib1(tree), n);
+      tree = sib2(tree); goto tailcall;  /* correctkeys(sib2(tree), n); */
+    default: assert(numsiblings[tree->tag] == 0); break;
+  }
+}
+
+
+/*
+** Join the ktables from p1 and p2 the ktable for the new pattern at the
+** top of the stack, reusing them when possible.
+*/
+static void joinktables (lua_State *L, int p1, TTree *t2, int p2) {
+  int n1, n2;
+  lua_getuservalue(L, p1);  /* get ktables */
+  lua_getuservalue(L, p2);
+  n1 = ktablelen(L, -2);
+  n2 = ktablelen(L, -1);
+  if (n1 == 0 && n2 == 0)  /* are both tables empty? */
+    lua_pop(L, 2);  /* nothing to be done; pop tables */
+  else if (n2 == 0 || lp_equal(L, -2, -1)) {  /* 2nd table empty or equal? */
+    lua_pop(L, 1);  /* pop 2nd table */
+    lua_setuservalue(L, -2);  /* set 1st ktable into new pattern */
+  }
+  else if (n1 == 0) {  /* first table is empty? */
+    lua_setuservalue(L, -3);  /* set 2nd table into new pattern */
+    lua_pop(L, 1);  /* pop 1st table */
+  }
+  else {
+    lua_createtable(L, n1 + n2, 0);  /* create ktable for new pattern */
+    /* stack: new p; ktable p1; ktable p2; new ktable */
+    concattable(L, -3, -1);  /* from p1 into new ktable */
+    concattable(L, -2, -1);  /* from p2 into new ktable */
+    lua_setuservalue(L, -4);  /* new ktable becomes 'p' environment */
+    lua_pop(L, 2);  /* pop other ktables */
+    correctkeys(t2, n1);  /* correction for indices from p2 */
+  }
+}
+
+
+/*
+** copy 'ktable' of element 'idx' to new tree (on top of stack)
+*/
+static void copyktable (lua_State *L, int idx) {
+  lua_getuservalue(L, idx);
+  lua_setuservalue(L, -2);
+}
+
+
+/*
+** merge 'ktable' from 'stree' at stack index 'idx' into 'ktable'
+** from tree at the top of the stack, and correct corresponding
+** tree.
+*/
+static void mergektable (lua_State *L, int idx, TTree *stree) {
+  int n;
+  lua_getuservalue(L, -1);  /* get ktables */
+  lua_getuservalue(L, idx);
+  n = concattable(L, -1, -2);
+  lua_pop(L, 2);  /* remove both ktables */
+  correctkeys(stree, n);
+}
+
+
+/*
+** Create a new 'ktable' to the pattern at the top of the stack, adding
+** all elements from pattern 'p' (if not 0) plus element 'idx' to it.
+** Return index of new element.
+*/
+static int addtonewktable (lua_State *L, int p, int idx) {
+  newktable(L, 1);
+  if (p)
+    mergektable(L, p, NULL);
+  return addtoktable(L, idx);
+}
+
+/* }====================================================== */
+
+
+/*
+** {======================================================
+** Tree generation
+** =======================================================
+*/
+
+/*
+** In 5.2, could use 'luaL_testudata'...
+*/
+static int testpattern (lua_State *L, int idx) {
+  if (lua_touserdata(L, idx)) {  /* value is a userdata? */
+    if (lua_getmetatable(L, idx)) {  /* does it have a metatable? */
+      luaL_getmetatable(L, PATTERN_T);
+      if (lua_rawequal(L, -1, -2)) {  /* does it have the correct mt? */
+        lua_pop(L, 2);  /* remove both metatables */
+        return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+
+static Pattern *getpattern (lua_State *L, int idx) {
+  return (Pattern *)luaL_checkudata(L, idx, PATTERN_T);
+}
+
+
+static int getsize (lua_State *L, int idx) {
+  return (lua_rawlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1;
+}
+
+
+static TTree *gettree (lua_State *L, int idx, int *len) {
+  Pattern *p = getpattern(L, idx);
+  if (len)
+    *len = getsize(L, idx);
+  return p->tree;
+}
+
+
+/*
+** create a pattern. Set its uservalue (the 'ktable') equal to its
+** metatable. (It could be any empty sequence; the metatable is at
+** hand here, so we use it.)
+*/
+static TTree *newtree (lua_State *L, int len) {
+  size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern);
+  Pattern *p = (Pattern *)lua_newuserdata(L, size);
+  luaL_getmetatable(L, PATTERN_T);
+  lua_pushvalue(L, -1);
+  lua_setuservalue(L, -3);
+  lua_setmetatable(L, -2);
+  p->code = NULL;  p->codesize = 0;
+  return p->tree;
+}
+
+
+static TTree *newleaf (lua_State *L, int tag) {
+  TTree *tree = newtree(L, 1);
+  tree->tag = tag;
+  return tree;
+}
+
+
+static TTree *newcharset (lua_State *L) {
+  TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1);
+  tree->tag = TSet;
+  loopset(i, treebuffer(tree)[i] = 0);
+  return tree;
+}
+
+
+/*
+** add to tree a sequence where first sibling is 'sib' (with size
+** 'sibsize'); returns position for second sibling
+*/
+static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) {
+  tree->tag = TSeq; tree->u.ps = sibsize + 1;
+  memcpy(sib1(tree), sib, sibsize * sizeof(TTree));
+  return sib2(tree);
+}
+
+
+/*
+** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got
+** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it
+** must build a sequence of sequence of sequence...)
+*/
+static void fillseq (TTree *tree, int tag, int n, const char *s) {
+  int i;
+  for (i = 0; i < n - 1; i++) {  /* initial n-1 copies of Seq tag; Seq ... */
+    tree->tag = TSeq; tree->u.ps = 2;
+    sib1(tree)->tag = tag;
+    sib1(tree)->u.n = s ? (byte)s[i] : 0;
+    tree = sib2(tree);
+  }
+  tree->tag = tag;  /* last one does not need TSeq */
+  tree->u.n = s ? (byte)s[i] : 0;
+}
+
+
+/*
+** Numbers as patterns:
+** 0 == true (always match); n == TAny repeated 'n' times;
+** -n == not (TAny repeated 'n' times)
+*/
+static TTree *numtree (lua_State *L, int n) {
+  if (n == 0)
+    return newleaf(L, TTrue);
+  else {
+    TTree *tree, *nd;
+    if (n > 0)
+      tree = nd = newtree(L, 2 * n - 1);
+    else {  /* negative: code it as !(-n) */
+      n = -n;
+      tree = newtree(L, 2 * n);
+      tree->tag = TNot;
+      nd = sib1(tree);
+    }
+    fillseq(nd, TAny, n, NULL);  /* sequence of 'n' any's */
+    return tree;
+  }
+}
+
+
+/*
+** Convert value at index 'idx' to a pattern
+*/
+static TTree *getpatt (lua_State *L, int idx, int *len) {
+  TTree *tree;
+  switch (lua_type(L, idx)) {
+    case LUA_TSTRING: {
+      size_t slen;
+      const char *s = lua_tolstring(L, idx, &slen);  /* get string */
+      if (slen == 0)  /* empty? */
+        tree = newleaf(L, TTrue);  /* always match */
+      else {
+        tree = newtree(L, 2 * (slen - 1) + 1);
+        fillseq(tree, TChar, slen, s);  /* sequence of 'slen' chars */
+      }
+      break;
+    }
+    case LUA_TNUMBER: {
+      int n = lua_tointeger(L, idx);
+      tree = numtree(L, n);
+      break;
+    }
+    case LUA_TBOOLEAN: {
+      tree = (lua_toboolean(L, idx) ? newleaf(L, TTrue) : newleaf(L, TFalse));
+      break;
+    }
+    case LUA_TTABLE: {
+      tree = newgrammar(L, idx);
+      break;
+    }
+    case LUA_TFUNCTION: {
+      tree = newtree(L, 2);
+      tree->tag = TRunTime;
+      tree->key = addtonewktable(L, 0, idx);
+      sib1(tree)->tag = TTrue;
+      break;
+    }
+    default: {
+      return gettree(L, idx, len);
+    }
+  }
+  lua_replace(L, idx);  /* put new tree into 'idx' slot */
+  if (len)
+    *len = getsize(L, idx);
+  return tree;
+}
+
+
+/*
+** create a new tree, whith a new root and one sibling.
+** Sibling must be on the Lua stack, at index 1.
+*/
+static TTree *newroot1sib (lua_State *L, int tag) {
+  int s1;
+  TTree *tree1 = getpatt(L, 1, &s1);
+  TTree *tree = newtree(L, 1 + s1);  /* create new tree */
+  tree->tag = tag;
+  memcpy(sib1(tree), tree1, s1 * sizeof(TTree));
+  copyktable(L, 1);
+  return tree;
+}
+
+
+/*
+** create a new tree, whith a new root and 2 siblings.
+** Siblings must be on the Lua stack, first one at index 1.
+*/
+static TTree *newroot2sib (lua_State *L, int tag) {
+  int s1, s2;
+  TTree *tree1 = getpatt(L, 1, &s1);
+  TTree *tree2 = getpatt(L, 2, &s2);
+  TTree *tree = newtree(L, 1 + s1 + s2);  /* create new tree */
+  tree->tag = tag;
+  tree->u.ps =  1 + s1;
+  memcpy(sib1(tree), tree1, s1 * sizeof(TTree));
+  memcpy(sib2(tree), tree2, s2 * sizeof(TTree));
+  joinktables(L, 1, sib2(tree), 2);
+  return tree;
+}
+
+
+static int lp_P (lua_State *L) {
+  luaL_checkany(L, 1);
+  getpatt(L, 1, NULL);
+  lua_settop(L, 1);
+  return 1;
+}
+
+
+/*
+** sequence operator; optimizations:
+** false x => false, x true => x, true x => x
+** (cannot do x . false => false because x may have runtime captures)
+*/
+static int lp_seq (lua_State *L) {
+  TTree *tree1 = getpatt(L, 1, NULL);
+  TTree *tree2 = getpatt(L, 2, NULL);
+  if (tree1->tag == TFalse || tree2->tag == TTrue)
+    lua_pushvalue(L, 1);  /* false . x == false, x . true = x */
+  else if (tree1->tag == TTrue)
+    lua_pushvalue(L, 2);  /* true . x = x */
+  else
+    newroot2sib(L, TSeq);
+  return 1;
+}
+
+
+/*
+** choice operator; optimizations:
+** charset / charset => charset
+** true / x => true, x / false => x, false / x => x
+** (x / true is not equivalent to true)
+*/
+static int lp_choice (lua_State *L) {
+  Charset st1, st2;
+  TTree *t1 = getpatt(L, 1, NULL);
+  TTree *t2 = getpatt(L, 2, NULL);
+  if (tocharset(t1, &st1) && tocharset(t2, &st2)) {
+    TTree *t = newcharset(L);
+    loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]);
+  }
+  else if (nofail(t1) || t2->tag == TFalse)
+    lua_pushvalue(L, 1);  /* true / x => true, x / false => x */
+  else if (t1->tag == TFalse)
+    lua_pushvalue(L, 2);  /* false / x => x */
+  else
+    newroot2sib(L, TChoice);
+  return 1;
+}
+
+
+/*
+** p^n
+*/
+static int lp_star (lua_State *L) {
+  int size1;
+  int n = (int)luaL_checkinteger(L, 2);
+  TTree *tree1 = getpatt(L, 1, &size1);
+  if (n >= 0) {  /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */
+    TTree *tree = newtree(L, (n + 1) * (size1 + 1));
+    if (nullable(tree1))
+      luaL_error(L, "loop body may accept empty string");
+    while (n--)  /* repeat 'n' times */
+      tree = seqaux(tree, tree1, size1);
+    tree->tag = TRep;
+    memcpy(sib1(tree), tree1, size1 * sizeof(TTree));
+  }
+  else {  /* choice (seq tree1 ... choice tree1 true ...) true */
+    TTree *tree;
+    n = -n;
+    /* size = (choice + seq + tree1 + true) * n, but the last has no seq */
+    tree = newtree(L, n * (size1 + 3) - 1);
+    for (; n > 1; n--) {  /* repeat (n - 1) times */
+      tree->tag = TChoice; tree->u.ps = n * (size1 + 3) - 2;
+      sib2(tree)->tag = TTrue;
+      tree = sib1(tree);
+      tree = seqaux(tree, tree1, size1);
+    }
+    tree->tag = TChoice; tree->u.ps = size1 + 1;
+    sib2(tree)->tag = TTrue;
+    memcpy(sib1(tree), tree1, size1 * sizeof(TTree));
+  }
+  copyktable(L, 1);
+  return 1;
+}
+
+
+/*
+** #p == &p
+*/
+static int lp_and (lua_State *L) {
+  newroot1sib(L, TAnd);
+  return 1;
+}
+
+
+/*
+** -p == !p
+*/
+static int lp_not (lua_State *L) {
+  newroot1sib(L, TNot);
+  return 1;
+}
+
+
+/*
+** [t1 - t2] == Seq (Not t2) t1
+** If t1 and t2 are charsets, make their difference.
+*/
+static int lp_sub (lua_State *L) {
+  Charset st1, st2;
+  int s1, s2;
+  TTree *t1 = getpatt(L, 1, &s1);
+  TTree *t2 = getpatt(L, 2, &s2);
+  if (tocharset(t1, &st1) && tocharset(t2, &st2)) {
+    TTree *t = newcharset(L);
+    loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]);
+  }
+  else {
+    TTree *tree = newtree(L, 2 + s1 + s2);
+    tree->tag = TSeq;  /* sequence of... */
+    tree->u.ps =  2 + s2;
+    sib1(tree)->tag = TNot;  /* ...not... */
+    memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree));  /* ...t2 */
+    memcpy(sib2(tree), t1, s1 * sizeof(TTree));  /* ... and t1 */
+    joinktables(L, 1, sib1(tree), 2);
+  }
+  return 1;
+}
+
+
+static int lp_set (lua_State *L) {
+  size_t l;
+  const char *s = luaL_checklstring(L, 1, &l);
+  TTree *tree = newcharset(L);
+  while (l--) {
+    setchar(treebuffer(tree), (byte)(*s));
+    s++;
+  }
+  return 1;
+}
+
+
+static int lp_range (lua_State *L) {
+  int arg;
+  int top = lua_gettop(L);
+  TTree *tree = newcharset(L);
+  for (arg = 1; arg <= top; arg++) {
+    int c;
+    size_t l;
+    const char *r = luaL_checklstring(L, arg, &l);
+    luaL_argcheck(L, l == 2, arg, "range must have two characters");
+    for (c = (byte)r[0]; c <= (byte)r[1]; c++)
+      setchar(treebuffer(tree), c);
+  }
+  return 1;
+}
+
+
+/*
+** Look-behind predicate
+*/
+static int lp_behind (lua_State *L) {
+  TTree *tree;
+  TTree *tree1 = getpatt(L, 1, NULL);
+  int n = fixedlen(tree1);
+  luaL_argcheck(L, n >= 0, 1, "pattern may not have fixed length");
+  luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures");
+  luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind");
+  tree = newroot1sib(L, TBehind);
+  tree->u.n = n;
+  return 1;
+}
+
+
+/*
+** Create a non-terminal
+*/
+static int lp_V (lua_State *L) {
+  TTree *tree = newleaf(L, TOpenCall);
+  luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected");
+  tree->key = addtonewktable(L, 0, 1);
+  return 1;
+}
+
+
+/*
+** Create a tree for a non-empty capture, with a body and
+** optionally with an associated Lua value (at index 'labelidx' in the
+** stack)
+*/
+static int capture_aux (lua_State *L, int cap, int labelidx) {
+  TTree *tree = newroot1sib(L, TCapture);
+  tree->cap = cap;
+  tree->key = (labelidx == 0) ? 0 : addtonewktable(L, 1, labelidx);
+  return 1;
+}
+
+
+/*
+** Fill a tree with an empty capture, using an empty (TTrue) sibling.
+** (The 'key' field must be filled by the caller to finish the tree.)
+*/
+static TTree *auxemptycap (TTree *tree, int cap) {
+  tree->tag = TCapture;
+  tree->cap = cap;
+  sib1(tree)->tag = TTrue;
+  return tree;
+}
+
+
+/*
+** Create a tree for an empty capture.
+*/
+static TTree *newemptycap (lua_State *L, int cap, int key) {
+  TTree *tree = auxemptycap(newtree(L, 2), cap);
+  tree->key = key;
+  return tree;
+}
+
+
+/*
+** Create a tree for an empty capture with an associated Lua value.
+*/
+static TTree *newemptycapkey (lua_State *L, int cap, int idx) {
+  TTree *tree = auxemptycap(newtree(L, 2), cap);
+  tree->key = addtonewktable(L, 0, idx);
+  return tree;
+}
+
+
+/*
+** Captures with syntax p / v
+** (function capture, query capture, string capture, or number capture)
+*/
+static int lp_divcapture (lua_State *L) {
+  switch (lua_type(L, 2)) {
+    case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2);
+    case LUA_TTABLE: return capture_aux(L, Cquery, 2);
+    case LUA_TSTRING: return capture_aux(L, Cstring, 2);
+    case LUA_TNUMBER: {
+      int n = lua_tointeger(L, 2);
+      TTree *tree = newroot1sib(L, TCapture);
+      luaL_argcheck(L, 0 <= n && n <= SHRT_MAX, 1, "invalid number");
+      tree->cap = Cnum;
+      tree->key = n;
+      return 1;
+    }
+    default: return luaL_argerror(L, 2, "invalid replacement value");
+  }
+}
+
+
+static int lp_substcapture (lua_State *L) {
+  return capture_aux(L, Csubst, 0);
+}
+
+
+static int lp_tablecapture (lua_State *L) {
+  return capture_aux(L, Ctable, 0);
+}
+
+
+static int lp_groupcapture (lua_State *L) {
+  if (lua_isnoneornil(L, 2))
+    return capture_aux(L, Cgroup, 0);
+  else
+    return capture_aux(L, Cgroup, 2);
+}
+
+
+static int lp_foldcapture (lua_State *L) {
+  luaL_checktype(L, 2, LUA_TFUNCTION);
+  return capture_aux(L, Cfold, 2);
+}
+
+
+static int lp_simplecapture (lua_State *L) {
+  return capture_aux(L, Csimple, 0);
+}
+
+
+static int lp_poscapture (lua_State *L) {
+  newemptycap(L, Cposition, 0);
+  return 1;
+}
+
+
+static int lp_argcapture (lua_State *L) {
+  int n = (int)luaL_checkinteger(L, 1);
+  luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index");
+  newemptycap(L, Carg, n);
+  return 1;
+}
+
+
+static int lp_backref (lua_State *L) {
+  luaL_checkany(L, 1);
+  newemptycapkey(L, Cbackref, 1);
+  return 1;
+}
+
+
+/*
+** Constant capture
+*/
+static int lp_constcapture (lua_State *L) {
+  int i;
+  int n = lua_gettop(L);  /* number of values */
+  if (n == 0)  /* no values? */
+    newleaf(L, TTrue);  /* no capture */
+  else if (n == 1)
+    newemptycapkey(L, Cconst, 1);  /* single constant capture */
+  else {  /* create a group capture with all values */
+    TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2);
+    newktable(L, n);  /* create a 'ktable' for new tree */
+    tree->tag = TCapture;
+    tree->cap = Cgroup;
+    tree->key = 0;
+    tree = sib1(tree);
+    for (i = 1; i <= n - 1; i++) {
+      tree->tag = TSeq;
+      tree->u.ps = 3;  /* skip TCapture and its sibling */
+      auxemptycap(sib1(tree), Cconst);
+      sib1(tree)->key = addtoktable(L, i);
+      tree = sib2(tree);
+    }
+    auxemptycap(tree, Cconst);
+    tree->key = addtoktable(L, i);
+  }
+  return 1;
+}
+
+
+static int lp_matchtime (lua_State *L) {
+  TTree *tree;
+  luaL_checktype(L, 2, LUA_TFUNCTION);
+  tree = newroot1sib(L, TRunTime);
+  tree->key = addtonewktable(L, 1, 2);
+  return 1;
+}
+
+/* }====================================================== */
+
+
+/*
+** {======================================================
+** Grammar - Tree generation
+** =======================================================
+*/
+
+/*
+** push on the stack the index and the pattern for the
+** initial rule of grammar at index 'arg' in the stack;
+** also add that index into position table.
+*/
+static void getfirstrule (lua_State *L, int arg, int postab) {
+  lua_rawgeti(L, arg, 1);  /* access first element */
+  if (lua_isstring(L, -1)) {  /* is it the name of initial rule? */
+    lua_pushvalue(L, -1);  /* duplicate it to use as key */
+    lua_gettable(L, arg);  /* get associated rule */
+  }
+  else {
+    lua_pushinteger(L, 1);  /* key for initial rule */
+    lua_insert(L, -2);  /* put it before rule */
+  }
+  if (!testpattern(L, -1)) {  /* initial rule not a pattern? */
+    if (lua_isnil(L, -1))
+      luaL_error(L, "grammar has no initial rule");
+    else
+      luaL_error(L, "initial rule '%s' is not a pattern", lua_tostring(L, -2));
+  }
+  lua_pushvalue(L, -2);  /* push key */
+  lua_pushinteger(L, 1);  /* push rule position (after TGrammar) */
+  lua_settable(L, postab);  /* insert pair at position table */
+}
+
+/*
+** traverse grammar at index 'arg', pushing all its keys and patterns
+** into the stack. Create a new table (before all pairs key-pattern) to
+** collect all keys and their associated positions in the final tree
+** (the "position table").
+** Return the number of rules and (in 'totalsize') the total size
+** for the new tree.
+*/
+static int collectrules (lua_State *L, int arg, int *totalsize) {
+  int n = 1;  /* to count number of rules */
+  int postab = lua_gettop(L) + 1;  /* index of position table */
+  int size;  /* accumulator for total size */
+  lua_newtable(L);  /* create position table */
+  getfirstrule(L, arg, postab);
+  size = 2 + getsize(L, postab + 2);  /* TGrammar + TRule + rule */
+  lua_pushnil(L);  /* prepare to traverse grammar table */
+  while (lua_next(L, arg) != 0) {
+    if (lua_tonumber(L, -2) == 1 ||
+        lp_equal(L, -2, postab + 1)) {  /* initial rule? */
+      lua_pop(L, 1);  /* remove value (keep key for lua_next) */
+      continue;
+    }
+    if (!testpattern(L, -1))  /* value is not a pattern? */
+      luaL_error(L, "rule '%s' is not a pattern", val2str(L, -2));
+    luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules");
+    lua_pushvalue(L, -2);  /* push key (to insert into position table) */
+    lua_pushinteger(L, size);
+    lua_settable(L, postab);
+    size += 1 + getsize(L, -1);  /* update size */
+    lua_pushvalue(L, -2);  /* push key (for next lua_next) */
+    n++;
+  }
+  *totalsize = size + 1;  /* TTrue to finish list of rules */
+  return n;
+}
+
+
+static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) {
+  int i;
+  TTree *nd = sib1(grammar);  /* auxiliary pointer to traverse the tree */
+  for (i = 0; i < n; i++) {  /* add each rule into new tree */
+    int ridx = frule + 2*i + 1;  /* index of i-th rule */
+    int rulesize;
+    TTree *rn = gettree(L, ridx, &rulesize);
+    nd->tag = TRule;
+    nd->key = 0;  /* will be fixed when rule is used */
+    nd->cap = i;  /* rule number */
+    nd->u.ps = rulesize + 1;  /* point to next rule */
+    memcpy(sib1(nd), rn, rulesize * sizeof(TTree));  /* copy rule */
+    mergektable(L, ridx, sib1(nd));  /* merge its ktable into new one */
+    nd = sib2(nd);  /* move to next rule */
+  }
+  nd->tag = TTrue;  /* finish list of rules */
+}
+
+
+/*
+** Check whether a tree has potential infinite loops
+*/
+static int checkloops (TTree *tree) {
+ tailcall:
+  if (tree->tag == TRep && nullable(sib1(tree)))
+    return 1;
+  else if (tree->tag == TGrammar)
+    return 0;  /* sub-grammars already checked */
+  else {
+    switch (numsiblings[tree->tag]) {
+      case 1:  /* return checkloops(sib1(tree)); */
+        tree = sib1(tree); goto tailcall;
+      case 2:
+        if (checkloops(sib1(tree))) return 1;
+        /* else return checkloops(sib2(tree)); */
+        tree = sib2(tree); goto tailcall;
+      default: assert(numsiblings[tree->tag] == 0); return 0;
+    }
+  }
+}
+
+
+/*
+** Give appropriate error message for 'verifyrule'. If a rule appears
+** twice in 'passed', there is path from it back to itself without
+** advancing the subject.
+*/
+static int verifyerror (lua_State *L, int *passed, int npassed) {
+  int i, j;
+  for (i = npassed - 1; i >= 0; i--) {  /* search for a repetition */
+    for (j = i - 1; j >= 0; j--) {
+      if (passed[i] == passed[j]) {
+        lua_rawgeti(L, -1, passed[i]);  /* get rule's key */
+        return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1));
+      }
+    }
+  }
+  return luaL_error(L, "too many left calls in grammar");
+}
+
+
+/*
+** Check whether a rule can be left recursive; raise an error in that
+** case; otherwise return 1 iff pattern is nullable.
+** The return value is used to check sequences, where the second pattern
+** is only relevant if the first is nullable.
+** Parameter 'nb' works as an accumulator, to allow tail calls in
+** choices. ('nb' true makes function returns true.)
+** Parameter 'passed' is a list of already visited rules, 'npassed'
+** counts the elements in 'passed'.
+** Assume ktable at the top of the stack.
+*/
+static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
+                       int nb) {
+ tailcall:
+  switch (tree->tag) {
+    case TChar: case TSet: case TAny:
+    case TFalse:
+      return nb;  /* cannot pass from here */
+    case TTrue:
+    case TBehind:  /* look-behind cannot have calls */
+      return 1;
+    case TNot: case TAnd: case TRep:
+      /* return verifyrule(L, sib1(tree), passed, npassed, 1); */
+      tree = sib1(tree); nb = 1; goto tailcall;
+    case TCapture: case TRunTime:
+      /* return verifyrule(L, sib1(tree), passed, npassed, nb); */
+      tree = sib1(tree); goto tailcall;
+    case TCall:
+      /* return verifyrule(L, sib2(tree), passed, npassed, nb); */
+      tree = sib2(tree); goto tailcall;
+    case TSeq:  /* only check 2nd child if first is nb */
+      if (!verifyrule(L, sib1(tree), passed, npassed, 0))
+        return nb;
+      /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */
+      tree = sib2(tree); goto tailcall;
+    case TChoice:  /* must check both children */
+      nb = verifyrule(L, sib1(tree), passed, npassed, nb);
+      /* return verifyrule(L, sib2(tree), passed, npassed, nb); */
+      tree = sib2(tree); goto tailcall;
+    case TRule:
+      if (npassed >= MAXRULES)
+        return verifyerror(L, passed, npassed);
+      else {
+        passed[npassed++] = tree->key;
+        /* return verifyrule(L, sib1(tree), passed, npassed); */
+        tree = sib1(tree); goto tailcall;
+      }
+    case TGrammar:
+      return nullable(tree);  /* sub-grammar cannot be left recursive */
+    default: assert(0); return 0;
+  }
+}
+
+
+static void verifygrammar (lua_State *L, TTree *grammar) {
+  int passed[MAXRULES];
+  TTree *rule;
+  /* check left-recursive rules */
+  for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
+    if (rule->key == 0) continue;  /* unused rule */
+    verifyrule(L, sib1(rule), passed, 0, 0);
+  }
+  assert(rule->tag == TTrue);
+  /* check infinite loops inside rules */
+  for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
+    if (rule->key == 0) continue;  /* unused rule */
+    if (checkloops(sib1(rule))) {
+      lua_rawgeti(L, -1, rule->key);  /* get rule's key */
+      luaL_error(L, "empty loop in rule '%s'", val2str(L, -1));
+    }
+  }
+  assert(rule->tag == TTrue);
+}
+
+
+/*
+** Give a name for the initial rule if it is not referenced
+*/
+static void initialrulename (lua_State *L, TTree *grammar, int frule) {
+  if (sib1(grammar)->key == 0) {  /* initial rule is not referenced? */
+    int n = lua_rawlen(L, -1) + 1;  /* index for name */
+    lua_pushvalue(L, frule);  /* rule's name */
+    lua_rawseti(L, -2, n);  /* ktable was on the top of the stack */
+    sib1(grammar)->key = n;
+  }
+}
+
+
+static TTree *newgrammar (lua_State *L, int arg) {
+  int treesize;
+  int frule = lua_gettop(L) + 2;  /* position of first rule's key */
+  int n = collectrules(L, arg, &treesize);
+  TTree *g = newtree(L, treesize);
+  luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules");
+  g->tag = TGrammar;  g->u.n = n;
+  lua_newtable(L);  /* create 'ktable' */
+  lua_setuservalue(L, -2);
+  buildgrammar(L, g, frule, n);
+  lua_getuservalue(L, -1);  /* get 'ktable' for new tree */
+  finalfix(L, frule - 1, g, sib1(g));
+  initialrulename(L, g, frule);
+  verifygrammar(L, g);
+  lua_pop(L, 1);  /* remove 'ktable' */
+  lua_insert(L, -(n * 2 + 2));  /* move new table to proper position */
+  lua_pop(L, n * 2 + 1);  /* remove position table + rule pairs */
+  return g;  /* new table at the top of the stack */
+}
+
+/* }====================================================== */
+
+
+static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) {
+  lua_getuservalue(L, idx);  /* push 'ktable' (may be used by 'finalfix') */
+  finalfix(L, 0, NULL, p->tree);
+  lua_pop(L, 1);  /* remove 'ktable' */
+  return compile(L, p);
+}
+
+
+static int lp_printtree (lua_State *L) {
+  TTree *tree = getpatt(L, 1, NULL);
+  int c = lua_toboolean(L, 2);
+  if (c) {
+    lua_getuservalue(L, 1);  /* push 'ktable' (may be used by 'finalfix') */
+    finalfix(L, 0, NULL, tree);
+    lua_pop(L, 1);  /* remove 'ktable' */
+  }
+  printktable(L, 1);
+  printtree(tree, 0);
+  return 0;
+}
+
+
+static int lp_printcode (lua_State *L) {
+  Pattern *p = getpattern(L, 1);
+  printktable(L, 1);
+  if (p->code == NULL)  /* not compiled yet? */
+    prepcompile(L, p, 1);
+  printpatt(p->code, p->codesize);
+  return 0;
+}
+
+
+/*
+** Get the initial position for the match, interpreting negative
+** values from the end of the subject
+*/
+static size_t initposition (lua_State *L, size_t len) {
+  lua_Integer ii = luaL_optinteger(L, 3, 1);
+  if (ii > 0) {  /* positive index? */
+    if ((size_t)ii <= len)  /* inside the string? */
+      return (size_t)ii - 1;  /* return it (corrected to 0-base) */
+    else return len;  /* crop at the end */
+  }
+  else {  /* negative index */
+    if ((size_t)(-ii) <= len)  /* inside the string? */
+      return len - ((size_t)(-ii));  /* return position from the end */
+    else return 0;  /* crop at the beginning */
+  }
+}
+
+
+/*
+** Main match function
+*/
+static int lp_match (lua_State *L) {
+  Capture capture[INITCAPSIZE];
+  const char *r;
+  size_t l;
+  Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1));
+  Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1);
+  const char *s = luaL_checklstring(L, SUBJIDX, &l);
+  size_t i = initposition(L, l);
+  int ptop = lua_gettop(L);
+  lua_pushnil(L);  /* initialize subscache */
+  lua_pushlightuserdata(L, capture);  /* initialize caplistidx */
+  lua_getuservalue(L, 1);  /* initialize penvidx */
+  r = match(L, s, s + i, s + l, code, capture, ptop);
+  if (r == NULL) {
+    lua_pushnil(L);
+    return 1;
+  }
+  return getcaptures(L, s, r, ptop);
+}
+
+
+
+/*
+** {======================================================
+** Library creation and functions not related to matching
+** =======================================================
+*/
+
+/* maximum limit for stack size */
+#define MAXLIM		(INT_MAX / 100)
+
+static int lp_setmax (lua_State *L) {
+  lua_Integer lim = luaL_checkinteger(L, 1);
+  luaL_argcheck(L, 0 < lim && lim <= MAXLIM, 1, "out of range");
+  lua_settop(L, 1);
+  lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
+  return 0;
+}
+
+
+static int lp_version (lua_State *L) {
+  lua_pushstring(L, VERSION);
+  return 1;
+}
+
+
+static int lp_type (lua_State *L) {
+  if (testpattern(L, 1))
+    lua_pushliteral(L, "pattern");
+  else
+    lua_pushnil(L);
+  return 1;
+}
+
+
+int lp_gc (lua_State *L) {
+  Pattern *p = getpattern(L, 1);
+  realloccode(L, p, 0);  /* delete code block */
+  return 0;
+}
+
+
+static void createcat (lua_State *L, const char *catname, int (catf) (int)) {
+  TTree *t = newcharset(L);
+  int i;
+  for (i = 0; i <= UCHAR_MAX; i++)
+    if (catf(i)) setchar(treebuffer(t), i);
+  lua_setfield(L, -2, catname);
+}
+
+
+static int lp_locale (lua_State *L) {
+  if (lua_isnoneornil(L, 1)) {
+    lua_settop(L, 0);
+    lua_createtable(L, 0, 12);
+  }
+  else {
+    luaL_checktype(L, 1, LUA_TTABLE);
+    lua_settop(L, 1);
+  }
+  createcat(L, "alnum", isalnum);
+  createcat(L, "alpha", isalpha);
+  createcat(L, "cntrl", iscntrl);
+  createcat(L, "digit", isdigit);
+  createcat(L, "graph", isgraph);
+  createcat(L, "lower", islower);
+  createcat(L, "print", isprint);
+  createcat(L, "punct", ispunct);
+  createcat(L, "space", isspace);
+  createcat(L, "upper", isupper);
+  createcat(L, "xdigit", isxdigit);
+  return 1;
+}
+
+
+static struct luaL_Reg pattreg[] = {
+  {"ptree", lp_printtree},
+  {"pcode", lp_printcode},
+  {"match", lp_match},
+  {"B", lp_behind},
+  {"V", lp_V},
+  {"C", lp_simplecapture},
+  {"Cc", lp_constcapture},
+  {"Cmt", lp_matchtime},
+  {"Cb", lp_backref},
+  {"Carg", lp_argcapture},
+  {"Cp", lp_poscapture},
+  {"Cs", lp_substcapture},
+  {"Ct", lp_tablecapture},
+  {"Cf", lp_foldcapture},
+  {"Cg", lp_groupcapture},
+  {"P", lp_P},
+  {"S", lp_set},
+  {"R", lp_range},
+  {"locale", lp_locale},
+  {"version", lp_version},
+  {"setmaxstack", lp_setmax},
+  {"type", lp_type},
+  {NULL, NULL}
+};
+
+
+static struct luaL_Reg metareg[] = {
+  {"__mul", lp_seq},
+  {"__add", lp_choice},
+  {"__pow", lp_star},
+  {"__gc", lp_gc},
+  {"__len", lp_and},
+  {"__div", lp_divcapture},
+  {"__unm", lp_not},
+  {"__sub", lp_sub},
+  {NULL, NULL}
+};
+
+
+int luaopen_lpeg (lua_State *L);
+int luaopen_lpeg (lua_State *L) {
+  luaL_newmetatable(L, PATTERN_T);
+  lua_pushnumber(L, MAXBACK);  /* initialize maximum backtracking */
+  lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
+  luaL_setfuncs(L, metareg, 0);
+  luaL_newlib(L, pattreg);
+  lua_pushvalue(L, -1);
+  lua_setfield(L, -3, "__index");
+  return 1;
+}
+
+/* }====================================================== */
--- /dev/null
+++ b/lptree.h
@@ -1,0 +1,82 @@
+/*  
+** $Id: lptree.h $
+*/
+
+#if !defined(lptree_h)
+#define lptree_h
+
+
+#include "lptypes.h" 
+
+
+/*
+** types of trees
+*/
+typedef enum TTag {
+  TChar = 0,  /* 'n' = char */
+  TSet,  /* the set is stored in next CHARSETSIZE bytes */
+  TAny,
+  TTrue,
+  TFalse,
+  TRep,  /* 'sib1'* */
+  TSeq,  /* 'sib1' 'sib2' */
+  TChoice,  /* 'sib1' / 'sib2' */
+  TNot,  /* !'sib1' */
+  TAnd,  /* &'sib1' */
+  TCall,  /* ktable[key] is rule's key; 'sib2' is rule being called */
+  TOpenCall,  /* ktable[key] is rule's key */
+  TRule,  /* ktable[key] is rule's key (but key == 0 for unused rules);
+             'sib1' is rule's pattern;
+             'sib2' is next rule; 'cap' is rule's sequential number */
+  TGrammar,  /* 'sib1' is initial (and first) rule */
+  TBehind,  /* 'sib1' is pattern, 'n' is how much to go back */
+  TCapture,  /* captures: 'cap' is kind of capture (enum 'CapKind');
+                ktable[key] is Lua value associated with capture;
+                'sib1' is capture body */
+  TRunTime  /* run-time capture: 'key' is Lua function;
+               'sib1' is capture body */
+} TTag;
+
+
+/*
+** Tree trees
+** The first child of a tree (if there is one) is immediately after
+** the tree.  A reference to a second child (ps) is its position
+** relative to the position of the tree itself.
+*/
+typedef struct TTree {
+  byte tag;
+  byte cap;  /* kind of capture (if it is a capture) */
+  unsigned short key;  /* key in ktable for Lua data (0 if no key) */
+  union {
+    int ps;  /* occasional second child */
+    int n;  /* occasional counter */
+  } u;
+} TTree;
+
+
+/*
+** A complete pattern has its tree plus, if already compiled,
+** its corresponding code
+*/
+typedef struct Pattern {
+  union Instruction *code;
+  int codesize;
+  TTree tree[1];
+} Pattern;
+
+
+/* number of children for each tree */
+extern const byte numsiblings[];
+
+/* access to children */
+#define sib1(t)         ((t) + 1)
+#define sib2(t)         ((t) + (t)->u.ps)
+
+
+
+
+
+
+#endif
+
--- /dev/null
+++ b/lptypes.h
@@ -1,0 +1,145 @@
+/*
+** $Id: lptypes.h $
+** LPeg - PEG pattern matching for Lua
+** Copyright 2007-2019, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
+** written by Roberto Ierusalimschy
+*/
+
+#if !defined(lptypes_h)
+#define lptypes_h
+
+
+#include <assert.h>
+#include <limits.h>
+
+#include "lua.h"
+
+
+#define VERSION         "1.0.2"
+
+
+#define PATTERN_T	"lpeg-pattern"
+#define MAXSTACKIDX	"lpeg-maxstack"
+
+
+/*
+** compatibility with Lua 5.1
+*/
+#if (LUA_VERSION_NUM == 501)
+
+#define lp_equal	lua_equal
+
+#define lua_getuservalue	lua_getfenv
+#define lua_setuservalue	lua_setfenv
+
+#define lua_rawlen		lua_objlen
+
+#define luaL_setfuncs(L,f,n)	luaL_register(L,NULL,f)
+#define luaL_newlib(L,f)	luaL_register(L,"lpeg",f)
+
+#endif
+
+
+#if !defined(lp_equal)
+#define lp_equal(L,idx1,idx2)  lua_compare(L,(idx1),(idx2),LUA_OPEQ)
+#endif
+
+
+/* default maximum size for call/backtrack stack */
+#if !defined(MAXBACK)
+#define MAXBACK         400
+#endif
+
+
+/* maximum number of rules in a grammar (limited by 'unsigned char') */
+#if !defined(MAXRULES)
+#define MAXRULES        250
+#endif
+
+
+
+/* initial size for capture's list */
+#define INITCAPSIZE	32
+
+
+/* index, on Lua stack, for subject */
+#define SUBJIDX		2
+
+/* number of fixed arguments to 'match' (before capture arguments) */
+#define FIXEDARGS	3
+
+/* index, on Lua stack, for capture list */
+#define caplistidx(ptop)	((ptop) + 2)
+
+/* index, on Lua stack, for pattern's ktable */
+#define ktableidx(ptop)		((ptop) + 3)
+
+/* index, on Lua stack, for backtracking stack */
+#define stackidx(ptop)	((ptop) + 4)
+
+
+
+typedef unsigned char byte;
+
+
+#define BITSPERCHAR		8
+
+#define CHARSETSIZE		((UCHAR_MAX/BITSPERCHAR) + 1)
+
+
+
+typedef struct Charset {
+  byte cs[CHARSETSIZE];
+} Charset;
+
+
+
+#define loopset(v,b)    { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} }
+
+/* access to charset */
+#define treebuffer(t)      ((byte *)((t) + 1))
+
+/* number of slots needed for 'n' bytes */
+#define bytes2slots(n)  (((n) - 1) / sizeof(TTree) + 1)
+
+/* set 'b' bit in charset 'cs' */
+#define setchar(cs,b)   ((cs)[(b) >> 3] |= (1 << ((b) & 7)))
+
+
+/*
+** in capture instructions, 'kind' of capture and its offset are
+** packed in field 'aux', 4 bits for each
+*/
+#define getkind(op)		((op)->i.aux & 0xF)
+#define getoff(op)		(((op)->i.aux >> 4) & 0xF)
+#define joinkindoff(k,o)	((k) | ((o) << 4))
+
+#define MAXOFF		0xF
+#define MAXAUX		0xFF
+
+
+/* maximum number of bytes to look behind */
+#define MAXBEHIND	MAXAUX
+
+
+/* maximum size (in elements) for a pattern */
+#define MAXPATTSIZE	(SHRT_MAX - 10)
+
+
+/* size (in elements) for an instruction plus extra l bytes */
+#define instsize(l)  (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1)
+
+
+/* size (in elements) for a ISet instruction */
+#define CHARSETINSTSIZE		instsize(CHARSETSIZE)
+
+/* size (in elements) for a IFunc instruction */
+#define funcinstsize(p)		((p)->i.aux + 2)
+
+
+
+#define testchar(st,c)	(((int)(st)[((c) >> 3)] & (1 << ((c) & 7))))
+
+
+#endif
+
--- /dev/null
+++ b/lpvm.c
@@ -1,0 +1,374 @@
+/*
+** $Id: lpvm.c $
+** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
+*/
+
+#include <limits.h>
+#include <string.h>
+
+
+#include "lua.h"
+#include "lauxlib.h"
+
+#include "lpcap.h"
+#include "lptypes.h"
+#include "lpvm.h"
+#include "lpprint.h"
+
+
+/* initial size for call/backtrack stack */
+#if !defined(INITBACK)
+#define INITBACK	MAXBACK
+#endif
+
+
+#define getoffset(p)	(((p) + 1)->offset)
+
+static const Instruction giveup = {{IGiveup, 0, 0}};
+
+
+/*
+** {======================================================
+** Virtual Machine
+** =======================================================
+*/
+
+
+typedef struct Stack {
+  const char *s;  /* saved position (or NULL for calls) */
+  const Instruction *p;  /* next instruction */
+  int caplevel;
+} Stack;
+
+
+#define getstackbase(L, ptop)	((Stack *)lua_touserdata(L, stackidx(ptop)))
+
+
+/*
+** Ensures the size of array 'capture' (with size '*capsize' and
+** 'captop' elements being used) is enough to accomodate 'n' extra
+** elements plus one.  (Because several opcodes add stuff to the capture
+** array, it is simpler to ensure the array always has at least one free
+** slot upfront and check its size later.)
+*/
+static Capture *growcap (lua_State *L, Capture *capture, int *capsize,
+                                       int captop, int n, int ptop) {
+  if (*capsize - captop > n)
+    return capture;  /* no need to grow array */
+  else {  /* must grow */
+    Capture *newc;
+    int newsize = captop + n + 1;  /* minimum size needed */
+    if (newsize < INT_MAX/((int)sizeof(Capture) * 2))
+      newsize *= 2;  /* twice that size, if not too big */
+    else if (newsize >= INT_MAX/((int)sizeof(Capture)))
+      luaL_error(L, "too many captures");
+    newc = (Capture *)lua_newuserdata(L, newsize * sizeof(Capture));
+    memcpy(newc, capture, captop * sizeof(Capture));
+    *capsize = newsize;
+    lua_replace(L, caplistidx(ptop));
+    return newc;
+  }
+}
+
+
+/*
+** Double the size of the stack
+*/
+static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
+  Stack *stack = getstackbase(L, ptop);
+  Stack *newstack;
+  int n = *stacklimit - stack;  /* current stack size */
+  int max, newn;
+  lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
+  max = lua_tointeger(L, -1);  /* maximum allowed size */
+  lua_pop(L, 1);
+  if (n >= max)  /* already at maximum size? */
+    luaL_error(L, "backtrack stack overflow (current limit is %d)", max);
+  newn = 2 * n;  /* new size */
+  if (newn > max) newn = max;
+  newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
+  memcpy(newstack, stack, n * sizeof(Stack));
+  lua_replace(L, stackidx(ptop));
+  *stacklimit = newstack + newn;
+  return newstack + n;  /* return next position */
+}
+
+
+/*
+** Interpret the result of a dynamic capture: false -> fail;
+** true -> keep current position; number -> next position.
+** Return new subject position. 'fr' is stack index where
+** is the result; 'curr' is current subject position; 'limit'
+** is subject's size.
+*/
+static int resdyncaptures (lua_State *L, int fr, int curr, int limit) {
+  lua_Integer res;
+  if (!lua_toboolean(L, fr)) {  /* false value? */
+    lua_settop(L, fr - 1);  /* remove results */
+    return -1;  /* and fail */
+  }
+  else if (lua_isboolean(L, fr))  /* true? */
+    res = curr;  /* keep current position */
+  else {
+    res = lua_tointeger(L, fr) - 1;  /* new position */
+    if (res < curr || res > limit)
+      luaL_error(L, "invalid position returned by match-time capture");
+  }
+  lua_remove(L, fr);  /* remove first result (offset) */
+  return res;
+}
+
+
+/*
+** Add capture values returned by a dynamic capture to the list
+** 'capture', nested inside a group. 'fd' indexes the first capture
+** value, 'n' is the number of values (at least 1). The open group
+** capture is already in 'capture', before the place for the new entries.
+*/
+static void adddyncaptures (const char *s, Capture *capture, int n, int fd) {
+  int i;
+  assert(capture[-1].kind == Cgroup && capture[-1].siz == 0);
+  capture[-1].idx = 0;  /* make group capture an anonymous group */
+  for (i = 0; i < n; i++) {  /* add runtime captures */
+    capture[i].kind = Cruntime;
+    capture[i].siz = 1;  /* mark it as closed */
+    capture[i].idx = fd + i;  /* stack index of capture value */
+    capture[i].s = s;
+  }
+  capture[n].kind = Cclose;  /* close group */
+  capture[n].siz = 1;
+  capture[n].s = s;
+}
+
+
+/*
+** Remove dynamic captures from the Lua stack (called in case of failure)
+*/
+static int removedyncap (lua_State *L, Capture *capture,
+                         int level, int last) {
+  int id = finddyncap(capture + level, capture + last);  /* index of 1st cap. */
+  int top = lua_gettop(L);
+  if (id == 0) return 0;  /* no dynamic captures? */
+  lua_settop(L, id - 1);  /* remove captures */
+  return top - id + 1;  /* number of values removed */
+}
+
+
+/*
+** Opcode interpreter
+*/
+const char *match (lua_State *L, const char *o, const char *s, const char *e,
+                   Instruction *op, Capture *capture, int ptop) {
+  Stack stackbase[INITBACK];
+  Stack *stacklimit = stackbase + INITBACK;
+  Stack *stack = stackbase;  /* point to first empty slot in stack */
+  int capsize = INITCAPSIZE;
+  int captop = 0;  /* point to first empty slot in captures */
+  int ndyncap = 0;  /* number of dynamic captures (in Lua stack) */
+  const Instruction *p = op;  /* current instruction */
+  stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
+  lua_pushlightuserdata(L, stackbase);
+  for (;;) {
+#if defined(DEBUG)
+      printf("-------------------------------------\n");
+      printcaplist(capture, capture + captop);
+      printf("s: |%s| stck:%d, dyncaps:%d, caps:%d  ",
+             s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop);
+      printinst(op, p);
+#endif
+    assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop);
+    switch ((Opcode)p->i.code) {
+      case IEnd: {
+        assert(stack == getstackbase(L, ptop) + 1);
+        capture[captop].kind = Cclose;
+        capture[captop].s = NULL;
+        return s;
+      }
+      case IGiveup: {
+        assert(stack == getstackbase(L, ptop));
+        return NULL;
+      }
+      case IRet: {
+        assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL);
+        p = (--stack)->p;
+        continue;
+      }
+      case IAny: {
+        if (s < e) { p++; s++; }
+        else goto fail;
+        continue;
+      }
+      case ITestAny: {
+        if (s < e) p += 2;
+        else p += getoffset(p);
+        continue;
+      }
+      case IChar: {
+        if ((byte)*s == p->i.aux && s < e) { p++; s++; }
+        else goto fail;
+        continue;
+      }
+      case ITestChar: {
+        if ((byte)*s == p->i.aux && s < e) p += 2;
+        else p += getoffset(p);
+        continue;
+      }
+      case ISet: {
+        int c = (byte)*s;
+        if (testchar((p+1)->buff, c) && s < e)
+          { p += CHARSETINSTSIZE; s++; }
+        else goto fail;
+        continue;
+      }
+      case ITestSet: {
+        int c = (byte)*s;
+        if (testchar((p + 2)->buff, c) && s < e)
+          p += 1 + CHARSETINSTSIZE;
+        else p += getoffset(p);
+        continue;
+      }
+      case IBehind: {
+        int n = p->i.aux;
+        if (n > s - o) goto fail;
+        s -= n; p++;
+        continue;
+      }
+      case ISpan: {
+        for (; s < e; s++) {
+          int c = (byte)*s;
+          if (!testchar((p+1)->buff, c)) break;
+        }
+        p += CHARSETINSTSIZE;
+        continue;
+      }
+      case IJmp: {
+        p += getoffset(p);
+        continue;
+      }
+      case IChoice: {
+        if (stack == stacklimit)
+          stack = doublestack(L, &stacklimit, ptop);
+        stack->p = p + getoffset(p);
+        stack->s = s;
+        stack->caplevel = captop;
+        stack++;
+        p += 2;
+        continue;
+      }
+      case ICall: {
+        if (stack == stacklimit)
+          stack = doublestack(L, &stacklimit, ptop);
+        stack->s = NULL;
+        stack->p = p + 2;  /* save return address */
+        stack++;
+        p += getoffset(p);
+        continue;
+      }
+      case ICommit: {
+        assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
+        stack--;
+        p += getoffset(p);
+        continue;
+      }
+      case IPartialCommit: {
+        assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
+        (stack - 1)->s = s;
+        (stack - 1)->caplevel = captop;
+        p += getoffset(p);
+        continue;
+      }
+      case IBackCommit: {
+        assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
+        s = (--stack)->s;
+        captop = stack->caplevel;
+        p += getoffset(p);
+        continue;
+      }
+      case IFailTwice:
+        assert(stack > getstackbase(L, ptop));
+        stack--;
+        /* go through */
+      case IFail:
+      fail: { /* pattern failed: try to backtrack */
+        do {  /* remove pending calls */
+          assert(stack > getstackbase(L, ptop));
+          s = (--stack)->s;
+        } while (s == NULL);
+        if (ndyncap > 0)  /* is there matchtime captures? */
+          ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
+        captop = stack->caplevel;
+        p = stack->p;
+#if defined(DEBUG)
+        printf("**FAIL**\n");
+#endif
+        continue;
+      }
+      case ICloseRunTime: {
+        CapState cs;
+        int rem, res, n;
+        int fr = lua_gettop(L) + 1;  /* stack index of first result */
+        cs.reclevel = 0; cs.L = L;
+        cs.s = o; cs.ocap = capture; cs.ptop = ptop;
+        n = runtimecap(&cs, capture + captop, s, &rem);  /* call function */
+        captop -= n;  /* remove nested captures */
+        ndyncap -= rem;  /* update number of dynamic captures */
+        fr -= rem;  /* 'rem' items were popped from Lua stack */
+        res = resdyncaptures(L, fr, s - o, e - o);  /* get result */
+        if (res == -1)  /* fail? */
+          goto fail;
+        s = o + res;  /* else update current position */
+        n = lua_gettop(L) - fr + 1;  /* number of new captures */
+        ndyncap += n;  /* update number of dynamic captures */
+        if (n == 0)  /* no new captures? */
+          captop--;  /* remove open group */
+        else {  /* new captures; keep original open group */
+          if (fr + n >= SHRT_MAX)
+            luaL_error(L, "too many results in match-time capture");
+          /* add new captures + close group to 'capture' list */
+          capture = growcap(L, capture, &capsize, captop, n + 1, ptop);
+          adddyncaptures(s, capture + captop, n, fr);
+          captop += n + 1;  /* new captures + close group */
+        }
+        p++;
+        continue;
+      }
+      case ICloseCapture: {
+        const char *s1 = s;
+        assert(captop > 0);
+        /* if possible, turn capture into a full capture */
+        if (capture[captop - 1].siz == 0 &&
+            s1 - capture[captop - 1].s < UCHAR_MAX) {
+          capture[captop - 1].siz = s1 - capture[captop - 1].s + 1;
+          p++;
+          continue;
+        }
+        else {
+          capture[captop].siz = 1;  /* mark entry as closed */
+          capture[captop].s = s;
+          goto pushcapture;
+        }
+      }
+      case IOpenCapture:
+        capture[captop].siz = 0;  /* mark entry as open */
+        capture[captop].s = s;
+        goto pushcapture;
+      case IFullCapture:
+        capture[captop].siz = getoff(p) + 1;  /* save capture size */
+        capture[captop].s = s - getoff(p);
+        /* goto pushcapture; */
+      pushcapture: {
+        capture[captop].idx = p->i.key;
+        capture[captop].kind = getkind(p);
+        captop++;
+        capture = growcap(L, capture, &capsize, captop, 0, ptop);
+        p++;
+        continue;
+      }
+      default: assert(0); return NULL;
+    }
+  }
+}
+
+/* }====================================================== */
+
+
--- /dev/null
+++ b/lpvm.h
@@ -1,0 +1,58 @@
+/*
+** $Id: lpvm.h $
+*/
+
+#if !defined(lpvm_h)
+#define lpvm_h
+
+#include "lpcap.h"
+
+
+/* Virtual Machine's instructions */
+typedef enum Opcode {
+  IAny, /* if no char, fail */
+  IChar,  /* if char != aux, fail */
+  ISet,  /* if char not in buff, fail */
+  ITestAny,  /* in no char, jump to 'offset' */
+  ITestChar,  /* if char != aux, jump to 'offset' */
+  ITestSet,  /* if char not in buff, jump to 'offset' */
+  ISpan,  /* read a span of chars in buff */
+  IBehind,  /* walk back 'aux' characters (fail if not possible) */
+  IRet,  /* return from a rule */
+  IEnd,  /* end of pattern */
+  IChoice,  /* stack a choice; next fail will jump to 'offset' */
+  IJmp,  /* jump to 'offset' */
+  ICall,  /* call rule at 'offset' */
+  IOpenCall,  /* call rule number 'key' (must be closed to a ICall) */
+  ICommit,  /* pop choice and jump to 'offset' */
+  IPartialCommit,  /* update top choice to current position and jump */
+  IBackCommit,  /* "fails" but jump to its own 'offset' */
+  IFailTwice,  /* pop one choice and then fail */
+  IFail,  /* go back to saved state on choice and jump to saved offset */
+  IGiveup,  /* internal use */
+  IFullCapture,  /* complete capture of last 'off' chars */
+  IOpenCapture,  /* start a capture */
+  ICloseCapture,
+  ICloseRunTime
+} Opcode;
+
+
+
+typedef union Instruction {
+  struct Inst {
+    byte code;
+    byte aux;
+    short key;
+  } i;
+  int offset;
+  byte buff[1];
+} Instruction;
+
+
+void printpatt (Instruction *p, int n);
+const char *match (lua_State *L, const char *o, const char *s, const char *e,
+                   Instruction *op, Capture *capture, int ptop);
+
+
+#endif
+
--- /dev/null
+++ b/makefile
@@ -1,0 +1,55 @@
+LIBNAME = lpeg
+LUADIR = ../lua/
+
+COPT = -O2 -DNDEBUG
+# COPT = -g
+
+CWARNS = -Wall -Wextra -pedantic \
+	-Waggregate-return \
+	-Wcast-align \
+	-Wcast-qual \
+	-Wdisabled-optimization \
+	-Wpointer-arith \
+	-Wshadow \
+	-Wsign-compare \
+	-Wundef \
+	-Wwrite-strings \
+	-Wbad-function-cast \
+	-Wdeclaration-after-statement \
+	-Wmissing-prototypes \
+	-Wnested-externs \
+	-Wstrict-prototypes \
+# -Wunreachable-code \
+
+
+CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC
+CC = gcc
+
+FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o
+
+# For Linux
+linux:
+	$(MAKE) lpeg.so "DLLFLAGS = -shared -fPIC"
+
+# For Mac OS
+macosx:
+	$(MAKE) lpeg.so "DLLFLAGS = -bundle -undefined dynamic_lookup"
+
+lpeg.so: $(FILES)
+	env $(CC) $(DLLFLAGS) $(FILES) -o lpeg.so
+
+$(FILES): makefile
+
+test: test.lua re.lua lpeg.so
+	./test.lua
+
+clean:
+	rm -f $(FILES) lpeg.so
+
+
+lpcap.o: lpcap.c lpcap.h lptypes.h
+lpcode.o: lpcode.c lptypes.h lpcode.h lptree.h lpvm.h lpcap.h
+lpprint.o: lpprint.c lptypes.h lpprint.h lptree.h lpvm.h lpcap.h
+lptree.o: lptree.c lptypes.h lpcap.h lpcode.h lptree.h lpvm.h lpprint.h
+lpvm.o: lpvm.c lpcap.h lptypes.h lpvm.h lpprint.h lptree.h
+
--- /dev/null
+++ b/re.html
@@ -1,0 +1,494 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html>
+<head>
+    <title>LPeg.re - Regex syntax for LPEG</title>
+    <link rel="stylesheet"
+          href="http://www.inf.puc-rio.br/~roberto/lpeg/doc.css"
+          type="text/css"/>
+	<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+</head>
+<body>
+
+<!-- $Id: re.html $ -->
+
+<div id="container">
+	
+<div id="product">
+  <div id="product_logo">
+    <a href="http://www.inf.puc-rio.br/~roberto/lpeg/">
+    <img alt="LPeg logo" src="lpeg-128.gif"/>
+    </a>
+  </div>
+  <div id="product_name"><big><strong>LPeg.re</strong></big></div>
+  <div id="product_description">
+     Regex syntax for LPEG
+  </div>
+</div> <!-- id="product" -->
+
+<div id="main">
+	
+<div id="navigation">
+<h1>re</h1>
+
+<ul>
+  <li><a href="#basic">Basic Constructions</a></li>
+  <li><a href="#func">Functions</a></li>
+  <li><a href="#ex">Some Examples</a></li>
+  <li><a href="#license">License</a></li>
+  </ul>
+  </li>
+</ul>
+</div> <!-- id="navigation" -->
+
+<div id="content">
+
+<h2><a name="basic"></a>The <code>re</code> Module</h2>
+
+<p>
+The <code>re</code> module
+(provided by file <code>re.lua</code> in the distribution)
+supports a somewhat conventional regex syntax
+for pattern usage within <a href="lpeg.html">LPeg</a>.
+</p>
+
+<p>
+The next table summarizes <code>re</code>'s syntax.
+A <code>p</code> represents an arbitrary pattern;
+<code>num</code> represents a number (<code>[0-9]+</code>);
+<code>name</code> represents an identifier
+(<code>[a-zA-Z][a-zA-Z0-9_]*</code>).
+Constructions are listed in order of decreasing precedence.
+<table border="1">
+<tbody><tr><td><b>Syntax</b></td><td><b>Description</b></td></tr>
+<tr><td><code>( p )</code></td> <td>grouping</td></tr>
+<tr><td><code>'string'</code></td> <td>literal string</td></tr>
+<tr><td><code>"string"</code></td> <td>literal string</td></tr>
+<tr><td><code>[class]</code></td> <td>character class</td></tr>
+<tr><td><code>.</code></td> <td>any character</td></tr>
+<tr><td><code>%name</code></td>
+  <td>pattern <code>defs[name]</code> or a pre-defined pattern</td></tr>
+<tr><td><code>name</code></td><td>non terminal</td></tr>
+<tr><td><code>&lt;name&gt;</code></td><td>non terminal</td></tr>
+<tr><td><code>{}</code></td> <td>position capture</td></tr>
+<tr><td><code>{ p }</code></td> <td>simple capture</td></tr>
+<tr><td><code>{: p :}</code></td> <td>anonymous group capture</td></tr>
+<tr><td><code>{:name: p :}</code></td> <td>named group capture</td></tr>
+<tr><td><code>{~ p ~}</code></td> <td>substitution capture</td></tr>
+<tr><td><code>{| p |}</code></td> <td>table capture</td></tr>
+<tr><td><code>=name</code></td> <td>back reference
+</td></tr>
+<tr><td><code>p ?</code></td> <td>optional match</td></tr>
+<tr><td><code>p *</code></td> <td>zero or more repetitions</td></tr>
+<tr><td><code>p +</code></td> <td>one or more repetitions</td></tr>
+<tr><td><code>p^num</code></td> <td>exactly <code>n</code> repetitions</td></tr>
+<tr><td><code>p^+num</code></td>
+      <td>at least <code>n</code> repetitions</td></tr>
+<tr><td><code>p^-num</code></td>
+      <td>at most <code>n</code> repetitions</td></tr>
+<tr><td><code>p -&gt; 'string'</code></td> <td>string capture</td></tr>
+<tr><td><code>p -&gt; "string"</code></td> <td>string capture</td></tr>
+<tr><td><code>p -&gt; num</code></td> <td>numbered capture</td></tr>
+<tr><td><code>p -&gt; name</code></td> <td>function/query/string capture
+equivalent to <code>p / defs[name]</code></td></tr>
+<tr><td><code>p =&gt; name</code></td> <td>match-time capture
+equivalent to <code>lpeg.Cmt(p, defs[name])</code></td></tr>
+<tr><td><code>p ~&gt; name</code></td> <td>fold capture
+equivalent to <code>lpeg.Cf(p, defs[name])</code></td></tr>
+<tr><td><code>& p</code></td> <td>and predicate</td></tr>
+<tr><td><code>! p</code></td> <td>not predicate</td></tr>
+<tr><td><code>p1 p2</code></td> <td>concatenation</td></tr>
+<tr><td><code>p1 / p2</code></td> <td>ordered choice</td></tr>
+<tr><td>(<code>name &lt;- p</code>)<sup>+</sup></td> <td>grammar</td></tr>
+</tbody></table>
+<p>
+Any space appearing in a syntax description can be
+replaced by zero or more space characters and Lua-style comments
+(<code>--</code> until end of line).
+</p>
+
+<p>
+Character classes define sets of characters.
+An initial <code>^</code> complements the resulting set.
+A range <em>x</em><code>-</code><em>y</em> includes in the set
+all characters with codes between the codes of <em>x</em> and <em>y</em>.
+A pre-defined class <code>%</code><em>name</em> includes all
+characters of that class.
+A simple character includes itself in the set.
+The only special characters inside a class are <code>^</code>
+(special only if it is the first character);
+<code>]</code>
+(can be included in the set as the first character,
+after the optional <code>^</code>);
+<code>%</code> (special only if followed by a letter);
+and <code>-</code>
+(can be included in the set as the first or the last character).
+</p>
+
+<p>
+Currently the pre-defined classes are similar to those from the
+Lua's string library
+(<code>%a</code> for letters,
+<code>%A</code> for non letters, etc.).
+There is also a class <code>%nl</code>
+containing only the newline character,
+which is particularly handy for grammars written inside long strings,
+as long strings do not interpret escape sequences like <code>\n</code>.
+</p>
+
+
+<h2><a name="func">Functions</a></h2>
+
+<h3><code>re.compile (string, [, defs])</code></h3>
+<p>
+Compiles the given string and
+returns an equivalent LPeg pattern.
+The given string may define either an expression or a grammar.
+The optional <code>defs</code> table provides extra Lua values
+to be used by the pattern.
+</p>
+
+<h3><code>re.find (subject, pattern [, init])</code></h3>
+<p>
+Searches the given pattern in the given subject.
+If it finds a match,
+returns the index where this occurrence starts and
+the index where it ends.
+Otherwise, returns nil.
+</p>
+
+<p>
+An optional numeric argument <code>init</code> makes the search
+starts at that position in the subject string.
+As usual in Lua libraries,
+a negative value counts from the end.
+</p>
+
+<h3><code>re.gsub (subject, pattern, replacement)</code></h3>
+<p>
+Does a <em>global substitution</em>,
+replacing all occurrences of <code>pattern</code>
+in the given <code>subject</code> by <code>replacement</code>.
+
+<h3><code>re.match (subject, pattern)</code></h3>
+<p>
+Matches the given pattern against the given subject,
+returning all captures.
+</p>
+
+<h3><code>re.updatelocale ()</code></h3>
+<p>
+Updates the pre-defined character classes to the current locale.
+</p>
+
+
+<h2><a name="ex">Some Examples</a></h2>
+
+<h3>A complete simple program</h3>
+<p>
+The next code shows a simple complete Lua program using
+the <code>re</code> module:
+</p>
+<pre class="example">
+local re = require"re"
+
+-- find the position of the first numeral in a string
+print(re.find("the number 423 is odd", "[0-9]+"))  --&gt; 12    14
+
+-- returns all words in a string
+print(re.match("the number 423 is odd", "({%a+} / .)*"))
+--&gt; the    number    is    odd
+
+-- returns the first numeral in a string
+print(re.match("the number 423 is odd", "s <- {%d+} / . s"))
+--&gt; 423
+
+print(re.gsub("hello World", "[aeiou]", "."))
+--&gt; h.ll. W.rld
+</pre>
+
+
+<h3>Balanced parentheses</h3>
+<p>
+The following call will produce the same pattern produced by the
+Lua expression in the
+<a href="lpeg.html#balanced">balanced parentheses</a> example:
+</p>
+<pre class="example">
+b = re.compile[[  balanced &lt;- "(" ([^()] / balanced)* ")"  ]]
+</pre>
+
+<h3>String reversal</h3>
+<p>
+The next example reverses a string:
+</p>
+<pre class="example">
+rev = re.compile[[ R &lt;- (!.) -&gt; '' / ({.} R) -&gt; '%2%1']]
+print(rev:match"0123456789")   --&gt; 9876543210
+</pre>
+
+<h3>CSV decoder</h3>
+<p>
+The next example replicates the <a href="lpeg.html#CSV">CSV decoder</a>:
+</p>
+<pre class="example">
+record = re.compile[[
+  record &lt;- {| field (',' field)* |} (%nl / !.)
+  field &lt;- escaped / nonescaped
+  nonescaped &lt;- { [^,"%nl]* }
+  escaped &lt;- '"' {~ ([^"] / '""' -&gt; '"')* ~} '"'
+]]
+</pre>
+
+<h3>Lua's long strings</h3>
+<p>
+The next example matches Lua long strings:
+</p>
+<pre class="example">
+c = re.compile([[
+  longstring &lt;- ('[' {:eq: '='* :} '[' close)
+  close &lt;- ']' =eq ']' / . close
+]])
+
+print(c:match'[==[]]===]]]]==]===[]')   --&gt; 17
+</pre>
+
+<h3>Abstract Syntax Trees</h3>
+<p>
+This example shows a simple way to build an
+abstract syntax tree (AST) for a given grammar.
+To keep our example simple,
+let us consider the following grammar
+for lists of names:
+</p>
+<pre class="example">
+p = re.compile[[
+      listname &lt;- (name s)*
+      name &lt;- [a-z][a-z]*
+      s &lt;- %s*
+]]
+</pre>
+<p>
+Now, we will add captures to build a corresponding AST.
+As a first step, the pattern will build a table to
+represent each non terminal;
+terminals will be represented by their corresponding strings:
+</p>
+<pre class="example">
+c = re.compile[[
+      listname &lt;- {| (name s)* |}
+      name &lt;- {| {[a-z][a-z]*} |}
+      s &lt;- %s*
+]]
+</pre>
+<p>
+Now, a match against <code>"hi hello bye"</code>
+results in the table
+<code>{{"hi"}, {"hello"}, {"bye"}}</code>.
+</p>
+<p>
+For such a simple grammar,
+this AST is more than enough;
+actually, the tables around each single name
+are already overkilling.
+More complex grammars,
+however, may need some more structure.
+Specifically,
+it would be useful if each table had
+a <code>tag</code> field telling what non terminal
+that table represents.
+We can add such a tag using
+<a href="lpeg.html#cap-g">named group captures</a>:
+</p>
+<pre class="example">
+x = re.compile[[
+      listname <- {| {:tag: '' -> 'list':} (name s)* |}
+      name <- {| {:tag: '' -> 'id':} {[a-z][a-z]*} |}
+      s <- ' '*
+]]
+</pre>
+<p>
+With these group captures,
+a match against <code>"hi hello bye"</code>
+results in the following table:
+</p>
+<pre class="example">
+{tag="list",
+  {tag="id", "hi"},
+  {tag="id", "hello"},
+  {tag="id", "bye"}
+}
+</pre>
+
+
+<h3>Indented blocks</h3>
+<p>
+This example breaks indented blocks into tables,
+respecting the indentation:
+</p>
+<pre class="example">
+p = re.compile[[
+  block &lt;- {| {:ident:' '*:} line
+           ((=ident !' ' line) / &(=ident ' ') block)* |}
+  line &lt;- {[^%nl]*} %nl
+]]
+</pre>
+<p>
+As an example,
+consider the following text:
+</p>
+<pre class="example">
+t = p:match[[
+first line
+  subline 1
+  subline 2
+second line
+third line
+  subline 3.1
+    subline 3.1.1
+  subline 3.2
+]]
+</pre>
+<p>
+The resulting table <code>t</code> will be like this:
+</p>
+<pre class="example">
+   {'first line'; {'subline 1'; 'subline 2'; ident = '  '};
+    'second line';
+    'third line'; { 'subline 3.1'; {'subline 3.1.1'; ident = '    '};
+                    'subline 3.2'; ident = '  '};
+    ident = ''}
+</pre>
+
+<h3>Macro expander</h3>
+<p>
+This example implements a simple macro expander.
+Macros must be defined as part of the pattern,
+following some simple rules:
+</p>
+<pre class="example">
+p = re.compile[[
+      text &lt;- {~ item* ~}
+      item &lt;- macro / [^()] / '(' item* ')'
+      arg &lt;- ' '* {~ (!',' item)* ~}
+      args &lt;- '(' arg (',' arg)* ')'
+      -- now we define some macros
+      macro &lt;- ('apply' args) -&gt; '%1(%2)'
+             / ('add' args) -&gt; '%1 + %2'
+             / ('mul' args) -&gt; '%1 * %2'
+]]
+
+print(p:match"add(mul(a,b), apply(f,x))")   --&gt; a * b + f(x)
+</pre>
+<p>
+A <code>text</code> is a sequence of items,
+wherein we apply a substitution capture to expand any macros.
+An <code>item</code> is either a macro,
+any character different from parentheses,
+or a parenthesized expression.
+A macro argument (<code>arg</code>) is a sequence
+of items different from a comma.
+(Note that a comma may appear inside an item,
+e.g., inside a parenthesized expression.)
+Again we do a substitution capture to expand any macro
+in the argument before expanding the outer macro.
+<code>args</code> is a list of arguments separated by commas.
+Finally we define the macros.
+Each macro is a string substitution;
+it replaces the macro name and its arguments by its corresponding string,
+with each <code>%</code><em>n</em> replaced by the <em>n</em>-th argument.
+</p>
+
+<h3>Patterns</h3>
+<p>
+This example shows the complete syntax
+of patterns accepted by <code>re</code>.
+</p>
+<pre class="example">
+p = [=[
+
+pattern         &lt;- exp !.
+exp             &lt;- S (grammar / alternative)
+
+alternative     &lt;- seq ('/' S seq)*
+seq             &lt;- prefix*
+prefix          &lt;- '&amp;' S prefix / '!' S prefix / suffix
+suffix          &lt;- primary S (([+*?]
+                            / '^' [+-]? num
+                            / '-&gt;' S (string / '{}' / name)
+                            / '=&gt;' S name) S)*
+
+primary         &lt;- '(' exp ')' / string / class / defined
+                 / '{:' (name ':')? exp ':}'
+                 / '=' name
+                 / '{}'
+                 / '{~' exp '~}'
+                 / '{' exp '}'
+                 / '.'
+                 / name S !arrow
+                 / '&lt;' name '&gt;'          -- old-style non terminals
+
+grammar         &lt;- definition+
+definition      &lt;- name S arrow exp
+
+class           &lt;- '[' '^'? item (!']' item)* ']'
+item            &lt;- defined / range / .
+range           &lt;- . '-' [^]]
+
+S               &lt;- (%s / '--' [^%nl]*)*   -- spaces and comments
+name            &lt;- [A-Za-z][A-Za-z0-9_]*
+arrow           &lt;- '&lt;-'
+num             &lt;- [0-9]+
+string          &lt;- '"' [^"]* '"' / "'" [^']* "'"
+defined         &lt;- '%' name
+
+]=]
+
+print(re.match(p, p))   -- a self description must match itself
+</pre>
+
+
+
+<h2><a name="license">License</a></h2>
+
+<p>
+Copyright &copy; 2008-2015 Lua.org, PUC-Rio.
+</p>
+<p>
+Permission is hereby granted, free of charge,
+to any person obtaining a copy of this software and
+associated documentation files (the "Software"),
+to deal in the Software without restriction,
+including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software,
+and to permit persons to whom the Software is
+furnished to do so,
+subject to the following conditions:
+</p>
+
+<p>
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions of the Software.
+</p>
+
+<p>
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+</p>
+
+</div> <!-- id="content" -->
+
+</div> <!-- id="main" -->
+
+</div> <!-- id="container" -->
+
+</body>
+</html> 
--- /dev/null
+++ b/re.lua
@@ -1,0 +1,267 @@
+-- $Id: re.lua $
+
+-- imported functions and modules
+local tonumber, type, print, error = tonumber, type, print, error
+local setmetatable = setmetatable
+local m = require"lpeg"
+
+-- 'm' will be used to parse expressions, and 'mm' will be used to
+-- create expressions; that is, 're' runs on 'm', creating patterns
+-- on 'mm'
+local mm = m
+
+-- pattern's metatable
+local mt = getmetatable(mm.P(0))
+
+
+
+-- No more global accesses after this point
+local version = _VERSION
+if version == "Lua 5.2" then _ENV = nil end
+
+
+local any = m.P(1)
+
+
+-- Pre-defined names
+local Predef = { nl = m.P"\n" }
+
+
+local mem
+local fmem
+local gmem
+
+
+local function updatelocale ()
+  mm.locale(Predef)
+  Predef.a = Predef.alpha
+  Predef.c = Predef.cntrl
+  Predef.d = Predef.digit
+  Predef.g = Predef.graph
+  Predef.l = Predef.lower
+  Predef.p = Predef.punct
+  Predef.s = Predef.space
+  Predef.u = Predef.upper
+  Predef.w = Predef.alnum
+  Predef.x = Predef.xdigit
+  Predef.A = any - Predef.a
+  Predef.C = any - Predef.c
+  Predef.D = any - Predef.d
+  Predef.G = any - Predef.g
+  Predef.L = any - Predef.l
+  Predef.P = any - Predef.p
+  Predef.S = any - Predef.s
+  Predef.U = any - Predef.u
+  Predef.W = any - Predef.w
+  Predef.X = any - Predef.x
+  mem = {}    -- restart memoization
+  fmem = {}
+  gmem = {}
+  local mt = {__mode = "v"}
+  setmetatable(mem, mt)
+  setmetatable(fmem, mt)
+  setmetatable(gmem, mt)
+end
+
+
+updatelocale()
+
+
+
+local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
+
+
+local function patt_error (s, i)
+  local msg = (#s < i + 20) and s:sub(i)
+                             or s:sub(i,i+20) .. "..."
+  msg = ("pattern error near '%s'"):format(msg)
+  error(msg, 2)
+end
+
+local function mult (p, n)
+  local np = mm.P(true)
+  while n >= 1 do
+    if n%2 >= 1 then np = np * p end
+    p = p * p
+    n = n/2
+  end
+  return np
+end
+
+local function equalcap (s, i, c)
+  if type(c) ~= "string" then return nil end
+  local e = #c + i
+  if s:sub(i, e - 1) == c then return e else return nil end
+end
+
+
+local S = (Predef.space + "--" * (any - Predef.nl)^0)^0
+
+local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0
+
+local arrow = S * "<-"
+
+local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1
+
+name = m.C(name)
+
+
+-- a defined name only have meaning in a given environment
+local Def = name * m.Carg(1)
+
+
+local function getdef (id, defs)
+  local c = defs and defs[id]
+  if not c then error("undefined name: " .. id) end
+  return c
+end
+
+-- match a name and return a group of its corresponding definition
+-- and 'f' (to be folded in 'Suffix')
+local function defwithfunc (f)
+  return m.Cg(Def / getdef * m.Cc(f))
+end
+
+
+local num = m.C(m.R"09"^1) * S / tonumber
+
+local String = "'" * m.C((any - "'")^0) * "'" +
+               '"' * m.C((any - '"')^0) * '"'
+
+
+local defined = "%" * Def / function (c,Defs)
+  local cat =  Defs and Defs[c] or Predef[c]
+  if not cat then error ("name '" .. c .. "' undefined") end
+  return cat
+end
+
+local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
+
+local item = (defined + Range + m.C(any)) / m.P
+
+local Class =
+    "["
+  * (m.C(m.P"^"^-1))    -- optional complement symbol
+  * m.Cf(item * (item - "]")^0, mt.__add) /
+                          function (c, p) return c == "^" and any - p or p end
+  * "]"
+
+local function adddef (t, k, exp)
+  if t[k] then
+    error("'"..k.."' already defined as a rule")
+  else
+    t[k] = exp
+  end
+  return t
+end
+
+local function firstdef (n, r) return adddef({n}, n, r) end
+
+
+local function NT (n, b)
+  if not b then
+    error("rule '"..n.."' used outside a grammar")
+  else return mm.V(n)
+  end
+end
+
+
+local exp = m.P{ "Exp",
+  Exp = S * ( m.V"Grammar"
+            + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) );
+  Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul)
+        * (#seq_follow + patt_error);
+  Prefix = "&" * S * m.V"Prefix" / mt.__len
+         + "!" * S * m.V"Prefix" / mt.__unm
+         + m.V"Suffix";
+  Suffix = m.Cf(m.V"Primary" * S *
+          ( ( m.P"+" * m.Cc(1, mt.__pow)
+            + m.P"*" * m.Cc(0, mt.__pow)
+            + m.P"?" * m.Cc(-1, mt.__pow)
+            + "^" * ( m.Cg(num * m.Cc(mult))
+                    + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow))
+                    )
+            + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div))
+                         + m.P"{}" * m.Cc(nil, m.Ct)
+                         + defwithfunc(mt.__div)
+                         )
+            + "=>" * S * defwithfunc(m.Cmt)
+            + "~>" * S * defwithfunc(m.Cf)
+            ) * S
+          )^0, function (a,b,f) return f(a,b) end );
+  Primary = "(" * m.V"Exp" * ")"
+            + String / mm.P
+            + Class
+            + defined
+            + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
+                     function (n, p) return mm.Cg(p, n) end
+            + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
+            + m.P"{}" / mm.Cp
+            + "{~" * m.V"Exp" * "~}" / mm.Cs
+            + "{|" * m.V"Exp" * "|}" / mm.Ct
+            + "{" * m.V"Exp" * "}" / mm.C
+            + m.P"." * m.Cc(any)
+            + (name * -arrow + "<" * name * ">") * m.Cb("G") / NT;
+  Definition = name * arrow * m.V"Exp";
+  Grammar = m.Cg(m.Cc(true), "G") *
+            m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0,
+              adddef) / mm.P
+}
+
+local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error)
+
+
+local function compile (p, defs)
+  if mm.type(p) == "pattern" then return p end   -- already compiled
+  local cp = pattern:match(p, 1, defs)
+  if not cp then error("incorrect pattern", 3) end
+  return cp
+end
+
+local function match (s, p, i)
+  local cp = mem[p]
+  if not cp then
+    cp = compile(p)
+    mem[p] = cp
+  end
+  return cp:match(s, i or 1)
+end
+
+local function find (s, p, i)
+  local cp = fmem[p]
+  if not cp then
+    cp = compile(p) / 0
+    cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) }
+    fmem[p] = cp
+  end
+  local i, e = cp:match(s, i or 1)
+  if i then return i, e - 1
+  else return i
+  end
+end
+
+local function gsub (s, p, rep)
+  local g = gmem[p] or {}   -- ensure gmem[p] is not collected while here
+  gmem[p] = g
+  local cp = g[rep]
+  if not cp then
+    cp = compile(p)
+    cp = mm.Cs((cp / rep + 1)^0)
+    g[rep] = cp
+  end
+  return cp:match(s)
+end
+
+
+-- exported names
+local re = {
+  compile = compile,
+  match = match,
+  find = find,
+  gsub = gsub,
+  updatelocale = updatelocale,
+}
+
+if version == "Lua 5.1" then _G.re = re end
+
+return re
--- /dev/null
+++ b/test.lua
@@ -1,0 +1,1523 @@
+#!/usr/bin/env lua
+
+-- $Id: test.lua $
+
+-- require"strict"    -- just to be pedantic
+
+local m = require"lpeg"
+
+
+-- for general use
+local a, b, c, d, e, f, g, p, t
+
+
+-- compatibility with Lua 5.2
+local unpack = rawget(table, "unpack") or unpack
+local loadstring = rawget(_G, "loadstring") or load
+
+
+local any = m.P(1)
+local space = m.S" \t\n"^0
+
+local function checkeq (x, y, p)
+if p then print(x,y) end
+  if type(x) ~= "table" then assert(x == y)
+  else
+    for k,v in pairs(x) do checkeq(v, y[k], p) end
+    for k,v in pairs(y) do checkeq(v, x[k], p) end
+  end
+end
+
+
+local mt = getmetatable(m.P(1))
+
+
+local allchar = {}
+for i=0,255 do allchar[i + 1] = i end
+allchar = string.char(unpack(allchar))
+assert(#allchar == 256)
+
+local function cs2str (c)
+  return m.match(m.Cs((c + m.P(1)/"")^0), allchar)
+end
+
+local function eqcharset (c1, c2)
+  assert(cs2str(c1) == cs2str(c2))
+end
+
+
+print"General tests for LPeg library"
+
+assert(type(m.version()) == "string")
+print("version " .. m.version())
+assert(m.type("alo") ~= "pattern")
+assert(m.type(io.input) ~= "pattern")
+assert(m.type(m.P"alo") == "pattern")
+
+-- tests for some basic optimizations
+assert(m.match(m.P(false) + "a", "a") == 2)
+assert(m.match(m.P(true) + "a", "a") == 1)
+assert(m.match("a" + m.P(false), "b") == nil)
+assert(m.match("a" + m.P(true), "b") == 1)
+
+assert(m.match(m.P(false) * "a", "a") == nil)
+assert(m.match(m.P(true) * "a", "a") == 2)
+assert(m.match("a" * m.P(false), "a") == nil)
+assert(m.match("a" * m.P(true), "a") == 2)
+
+assert(m.match(#m.P(false) * "a", "a") == nil)
+assert(m.match(#m.P(true) * "a", "a") == 2)
+assert(m.match("a" * #m.P(false), "a") == nil)
+assert(m.match("a" * #m.P(true), "a") == 2)
+
+
+-- tests for locale
+do
+  assert(m.locale(m) == m)
+  local t = {}
+  assert(m.locale(t, m) == t)
+  local x = m.locale()
+  for n,v in pairs(x) do
+    assert(type(n) == "string")
+    eqcharset(v, m[n])
+  end
+end
+
+
+assert(m.match(3, "aaaa"))
+assert(m.match(4, "aaaa"))
+assert(not m.match(5, "aaaa"))
+assert(m.match(-3, "aa"))
+assert(not m.match(-3, "aaa"))
+assert(not m.match(-3, "aaaa"))
+assert(not m.match(-4, "aaaa"))
+assert(m.P(-5):match"aaaa")
+
+assert(m.match("a", "alo") == 2)
+assert(m.match("al", "alo") == 3)
+assert(not m.match("alu", "alo"))
+assert(m.match(true, "") == 1)
+
+local digit = m.S"0123456789"
+local upper = m.S"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+local lower = m.S"abcdefghijklmnopqrstuvwxyz"
+local letter = m.S"" + upper + lower
+local alpha = letter + digit + m.R()
+
+eqcharset(m.S"", m.P(false))
+eqcharset(upper, m.R("AZ"))
+eqcharset(lower, m.R("az"))
+eqcharset(upper + lower, m.R("AZ", "az"))
+eqcharset(upper + lower, m.R("AZ", "cz", "aa", "bb", "90"))
+eqcharset(digit, m.S"01234567" + "8" + "9")
+eqcharset(upper, letter - lower)
+eqcharset(m.S(""), m.R())
+assert(cs2str(m.S("")) == "")
+
+eqcharset(m.S"\0", "\0")
+eqcharset(m.S"\1\0\2", m.R"\0\2")
+eqcharset(m.S"\1\0\2", m.R"\1\2" + "\0")
+eqcharset(m.S"\1\0\2" - "\0", m.R"\1\2")
+
+local word = alpha^1 * (1 - alpha)^0
+
+assert((word^0 * -1):match"alo alo")
+assert(m.match(word^1 * -1, "alo alo"))
+assert(m.match(word^2 * -1, "alo alo"))
+assert(not m.match(word^3 * -1, "alo alo"))
+
+assert(not m.match(word^-1 * -1, "alo alo"))
+assert(m.match(word^-2 * -1, "alo alo"))
+assert(m.match(word^-3 * -1, "alo alo"))
+
+local eos = m.P(-1)
+
+assert(m.match(digit^0 * letter * digit * eos, "1298a1"))
+assert(not m.match(digit^0 * letter * eos, "1257a1"))
+
+b = {
+  [1] = "(" * (((1 - m.S"()") + #m.P"(" * m.V(1))^0) * ")"
+}
+
+assert(m.match(b, "(al())()"))
+assert(not m.match(b * eos, "(al())()"))
+assert(m.match(b * eos, "((al())()(é))"))
+assert(not m.match(b, "(al()()"))
+
+assert(not m.match(letter^1 - "for", "foreach"))
+assert(m.match(letter^1 - ("for" * eos), "foreach"))
+assert(not m.match(letter^1 - ("for" * eos), "for"))
+
+function basiclookfor (p)
+  return m.P {
+    [1] = p + (1 * m.V(1))
+  }
+end
+
+function caplookfor (p)
+  return basiclookfor(p:C())
+end
+
+assert(m.match(caplookfor(letter^1), "   4achou123...") == "achou")
+a = {m.match(caplookfor(letter^1)^0, " two words, one more  ")}
+checkeq(a, {"two", "words", "one", "more"})
+
+assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), "  (  (a)") == 7)
+
+a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")}
+checkeq(a, {"123", "d"})
+
+-- bug in LPeg 0.12  (nil value does not create a 'ktable')
+assert(m.match(m.Cc(nil), "") == nil)
+
+a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")}
+checkeq(a, {"abcd", "l"})
+
+a = {m.match(m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
+checkeq(a, {10,20,30,2})
+a = {m.match(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
+checkeq(a, {1,10,20,30,2})
+a = m.match(m.Ct(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
+checkeq(a, {1,10,20,30,2})
+a = m.match(m.Ct(m.Cp() * m.Cc(7,8) * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
+checkeq(a, {1,7,8,10,20,30,2})
+a = {m.match(m.Cc() * m.Cc() * m.Cc(1) * m.Cc(2,3,4) * m.Cc() * 'a', 'aaa')}
+checkeq(a, {1,2,3,4})
+
+a = {m.match(m.Cp() * letter^1 * m.Cp(), "abcd")}
+checkeq(a, {1, 5})
+
+
+t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")}
+checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""})
+
+-- bug in 0.12 ('hascapture' did not check for captures inside a rule)
+do
+  local pat = m.P{
+    'S';
+    S1 = m.C('abc') + 3,
+    S = #m.V('S1')    -- rule has capture, but '#' must ignore it
+  }
+  assert(pat:match'abc' == 1)
+end
+
+
+-- bug: loop in 'hascaptures'
+do
+  local p = m.C(-m.P{m.P'x' * m.V(1) + m.P'y'})
+  assert(p:match("xxx") == "")
+end
+
+
+
+-- test for small capture boundary
+for i = 250,260 do
+  assert(#m.match(m.C(i), string.rep('a', i)) == i)
+  assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i)
+end
+
+-- tests for any*n and any*-n
+for n = 1, 550, 13 do
+  local x_1 = string.rep('x', n - 1)
+  local x = x_1 .. 'a'
+  assert(not m.P(n):match(x_1))
+  assert(m.P(n):match(x) == n + 1)
+  assert(n < 4 or m.match(m.P(n) + "xxx", x_1) == 4)
+  assert(m.C(n):match(x) == x)
+  assert(m.C(m.C(n)):match(x) == x)
+  assert(m.P(-n):match(x_1) == 1)
+  assert(not m.P(-n):match(x))
+  assert(n < 13 or m.match(m.Cc(20) * ((n - 13) * m.P(10)) * 3, x) == 20)
+  local n3 = math.floor(n/3)
+  assert(m.match(n3 * m.Cp() * n3 * n3, x) == n3 + 1)
+end
+
+-- true values
+assert(m.P(0):match("x") == 1)
+assert(m.P(0):match("") == 1)
+assert(m.C(0):match("x") == "")
+
+assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxu") == 1)
+assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxuxuxuxu") == 0)
+assert(m.match(m.C(m.P(2)^1), "abcde") == "abcd")
+p = m.Cc(0) * 1 + m.Cc(1) * 2 + m.Cc(2) * 3 + m.Cc(3) * 4
+
+
+-- test for alternation optimization
+assert(m.match(m.P"a"^1 + "ab" + m.P"x"^0, "ab") == 2)
+assert(m.match((m.P"a"^1 + "ab" + m.P"x"^0 * 1)^0, "ab") == 3)
+assert(m.match(m.P"ab" + "cd" + "" + "cy" + "ak", "98") == 1)
+assert(m.match(m.P"ab" + "cd" + "ax" + "cy", "ax") == 3)
+assert(m.match("a" * m.P"b"^0 * "c"  + "cd" + "ax" + "cy", "ax") == 3)
+assert(m.match((m.P"ab" + "cd" + "ax" + "cy")^0, "ax") == 3)
+assert(m.match(m.P(1) * "x" + m.S"" * "xu" + "ay", "ay") == 3)
+assert(m.match(m.P"abc" + "cde" + "aka", "aka") == 4)
+assert(m.match(m.S"abc" * "x" + "cde" + "aka", "ax") == 3)
+assert(m.match(m.S"abc" * "x" + "cde" + "aka", "aka") == 4)
+assert(m.match(m.S"abc" * "x" + "cde" + "aka", "cde") == 4)
+assert(m.match(m.S"abc" * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
+assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "ax") == 3)
+assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "aka") == 4)
+assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "cde") == 4)
+assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
+assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "ax") == 3)
+assert(m.match(m.P(1) * "x" + "cde" + m.S"ab" * "ka", "aka") == 4)
+assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "aka") == 4)
+assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "cde") == 4)
+assert(m.match(m.P"eb" + "cd" + m.P"e"^0 + "x", "ee") == 3)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "abcd") == 3)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "eeex") == 4)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "cd") == 3)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "x") == 1)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x" + "", "zee") == 1)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "abcd") == 3)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "eeex") == 4)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "cd") == 3)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "x") == 2)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x" + "", "zee") == 1)
+assert(not m.match(("aa" * m.P"bc"^-1 + "aab") * "e", "aabe"))
+
+assert(m.match("alo" * (m.P"\n" + -1), "alo") == 4)
+
+
+-- bug in 0.12 (rc1)
+assert(m.match((m.P"\128\187\191" + m.S"abc")^0, "\128\187\191") == 4)
+
+assert(m.match(m.S"\0\128\255\127"^0, string.rep("\0\128\255\127", 10)) ==
+    4*10 + 1)
+
+-- optimizations with optional parts
+assert(m.match(("ab" * -m.P"c")^-1, "abc") == 1)
+assert(m.match(("ab" * #m.P"c")^-1, "abd") == 1)
+assert(m.match(("ab" * m.B"c")^-1, "ab") == 1)
+assert(m.match(("ab" * m.P"cd"^0)^-1, "abcdcdc") == 7)
+
+assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3)
+
+p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1
+assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21)
+
+
+-- bug in 0.12.2
+-- p = { ('ab' ('c' 'ef'?)*)? }
+p = m.C(('ab' * ('c' * m.P'ef'^-1)^0)^-1)
+s = "abcefccefc"
+assert(s == p:match(s))
+ 
+
+pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510"
+assert(m.match(m.Cs((m.P"1" / "a" + m.P"5" / "b" + m.P"9" / "c" + 1)^0), pi) ==
+  m.match(m.Cs((m.P(1) / {["1"] = "a", ["5"] = "b", ["9"] = "c"})^0), pi))
+print"+"
+
+
+-- tests for capture optimizations
+assert(m.match((m.P(3) +  4 * m.Cp()) * "a", "abca") == 5)
+t = {m.match(((m.P"a" + m.Cp()) * m.P"x")^0, "axxaxx")}
+checkeq(t, {3, 6})
+
+
+-- tests for numbered captures
+p = m.C(1)
+assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 3, "abcdefgh") == "a")
+assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 1, "abcdefgh") == "abcdef")
+assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 4, "abcdefgh") == "bc")
+assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 0, "abcdefgh") == 7)
+
+a, b, c = m.match(p * (m.C(p * m.C(2)) * m.C(3) / 4) * p, "abcdefgh")
+assert(a == "a" and b == "efg" and c == "h")
+
+-- test for table captures
+t = m.match(m.Ct(letter^1), "alo")
+checkeq(t, {})
+
+t, n = m.match(m.Ct(m.C(letter)^1) * m.Cc"t", "alo")
+assert(n == "t" and table.concat(t) == "alo")
+
+t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
+assert(table.concat(t, ";") == "alo;a;l;o")
+
+t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
+assert(table.concat(t, ";") == "alo;a;l;o")
+
+t = m.match(m.Ct(m.Ct((m.Cp() * letter * m.Cp())^1)), "alo")
+assert(table.concat(t[1], ";") == "1;2;2;3;3;4")
+
+t = m.match(m.Ct(m.C(m.C(1) * 1 * m.C(1))), "alo")
+checkeq(t, {"alo", "a", "o"})
+
+
+-- tests for groups
+p = m.Cg(1)   -- no capture
+assert(p:match('x') == 'x')
+p = m.Cg(m.P(true)/function () end * 1)   -- no value
+assert(p:match('x') == 'x')
+p = m.Cg(m.Cg(m.Cg(m.C(1))))
+assert(p:match('x') == 'x')
+p = m.Cg(m.Cg(m.Cg(m.C(1))^0) * m.Cg(m.Cc(1) * m.Cc(2)))
+t = {p:match'abc'}
+checkeq(t, {'a', 'b', 'c', 1, 2})
+
+p = m.Ct(m.Cg(m.Cc(10), "hi") * m.C(1)^0 * m.Cg(m.Cc(20), "ho"))
+t = p:match''
+checkeq(t, {hi = 10, ho = 20})
+t = p:match'abc'
+checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'})
+
+-- non-string group names
+p = m.Ct(m.Cg(1, print) * m.Cg(1, 23.5) * m.Cg(1, io))
+t = p:match('abcdefghij')
+assert(t[print] == 'a' and t[23.5] == 'b' and t[io] == 'c')
+
+
+-- test for error messages
+local function checkerr (msg, f, ...)
+  local st, err = pcall(f, ...)
+  assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err))
+end
+
+checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a")
+checkerr("rule '1' used outside a grammar", m.match, m.V(1), "")
+checkerr("rule 'hiii' used outside a grammar", m.match, m.V('hiii'), "")
+checkerr("rule 'hiii' undefined in given grammar", m.match, { m.V('hiii') }, "")
+checkerr("undefined in given grammar", m.match, { m.V{} }, "")
+
+checkerr("rule 'A' is not a pattern", m.P, { m.P(1), A = {} })
+checkerr("grammar has no initial rule", m.P, { [print] = {} })
+
+-- grammar with a long call chain before left recursion
+p = {'a',
+  a = m.V'b' * m.V'c' * m.V'd' * m.V'a',
+  b = m.V'c',
+  c = m.V'd',
+  d = m.V'e',
+  e = m.V'f',
+  f = m.V'g',
+  g = m.P''
+}
+checkerr("rule 'a' may be left recursive", m.match, p, "a")
+
+-- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit)
+-- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1
+-- that is optimized to ICommit L1
+
+p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' }
+assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc')
+
+
+do
+  -- large dynamic Cc
+  local lim = 2^16 - 1
+  local c = 0
+  local function seq (n) 
+    if n == 1 then c = c + 1; return m.Cc(c)
+    else
+      local m = math.floor(n / 2)
+      return seq(m) * seq(n - m)
+    end
+  end
+  p = m.Ct(seq(lim))
+  t = p:match('')
+  assert(t[lim] == lim)
+  checkerr("too many", function () p = p / print end)
+  checkerr("too many", seq, lim + 1)
+end
+
+
+do
+  -- nesting of captures too deep
+  local p = m.C(1)
+  for i = 1, 300 do
+    p = m.Ct(p)
+  end
+  checkerr("too deep", p.match, p, "x")
+end
+
+
+-- tests for non-pattern as arguments to pattern functions
+
+p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 }
+assert(m.match(p, "aaabaac") == 7)
+
+p = m.P'abc' * 2 * -5 * true * 'de'  -- mix of numbers and strings and booleans
+
+assert(p:match("abc01de") == 8)
+assert(p:match("abc01de3456") == nil)
+
+p = 'abc' * (2 * (-5 * (true * m.P'de')))
+
+assert(p:match("abc01de") == 8)
+assert(p:match("abc01de3456") == nil)
+
+p = { m.V(2), m.P"abc" } *
+     (m.P{ "xx", xx = m.P"xx" } + { "x", x = m.P"a" * m.V"x" + "" })
+assert(p:match("abcaaaxx") == 7)
+assert(p:match("abcxx") == 6)
+
+
+-- a large table capture
+t = m.match(m.Ct(m.C('a')^0), string.rep("a", 10000))
+assert(#t == 10000 and t[1] == 'a' and t[#t] == 'a')
+
+print('+')
+
+
+-- bug in 0.10 (rechecking a grammar, after tail-call optimization)
+m.P{ m.P { (m.P(3) + "xuxu")^0 * m.V"xuxu", xuxu = m.P(1) } }
+
+local V = m.V
+
+local Space = m.S(" \n\t")^0
+local Number = m.C(m.R("09")^1) * Space
+local FactorOp = m.C(m.S("+-")) * Space
+local TermOp = m.C(m.S("*/")) * Space
+local Open = "(" * Space
+local Close = ")" * Space
+
+
+local function f_factor (v1, op, v2, d)
+  assert(d == nil)
+  if op == "+" then return v1 + v2
+  else return v1 - v2
+  end
+end
+
+
+local function f_term (v1, op, v2, d)
+  assert(d == nil)
+  if op == "*" then return v1 * v2
+  else return v1 / v2
+  end
+end
+
+G = m.P{ "Exp",
+  Exp = m.Cf(V"Factor" * m.Cg(FactorOp * V"Factor")^0, f_factor);
+  Factor = m.Cf(V"Term" * m.Cg(TermOp * V"Term")^0, f_term);
+  Term = Number / tonumber  +  Open * V"Exp" * Close;
+}
+
+G = Space * G * -1
+
+for _, s in ipairs{" 3 + 5*9 / (1+1) ", "3+4/2", "3+3-3- 9*2+3*9/1-  8"} do
+  assert(m.match(G, s) == loadstring("return "..s)())
+end
+
+
+-- test for grammars (errors deep in calling non-terminals)
+g = m.P{
+  [1] = m.V(2) + "a",
+  [2] = "a" * m.V(3) * "x",
+  [3] = "b" * m.V(3) + "c"
+}
+
+assert(m.match(g, "abbbcx") == 7)
+assert(m.match(g, "abbbbx") == 2)
+
+
+-- tests for \0
+assert(m.match(m.R("\0\1")^1, "\0\1\0") == 4)
+assert(m.match(m.S("\0\1ab")^1, "\0\1\0a") == 5)
+assert(m.match(m.P(1)^3, "\0\1\0a") == 5)
+assert(not m.match(-4, "\0\1\0a"))
+assert(m.match("\0\1\0a", "\0\1\0a") == 5)
+assert(m.match("\0\0\0", "\0\0\0") == 4)
+assert(not m.match("\0\0\0", "\0\0"))
+
+
+-- tests for predicates
+assert(not m.match(-m.P("a") * 2, "alo"))
+assert(m.match(- -m.P("a") * 2, "alo") == 3)
+assert(m.match(#m.P("a") * 2, "alo") == 3)
+assert(m.match(##m.P("a") * 2, "alo") == 3)
+assert(not m.match(##m.P("c") * 2, "alo"))
+assert(m.match(m.Cs((##m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
+assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
+assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
+assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
+
+
+-- fixed length
+do
+  -- 'and' predicate using fixed length
+  local p = m.C(#("a" * (m.P("bd") + "cd")) * 2)
+  assert(p:match("acd") == "ac")
+
+  p = #m.P{ "a" * m.V(2), m.P"b" } * 2
+  assert(p:match("abc") == 3)
+
+  p = #(m.P"abc" * m.B"c")
+  assert(p:match("abc") == 1 and not p:match("ab"))
+ 
+  p = m.P{ "a" * m.V(2), m.P"b"^1 }
+  checkerr("pattern may not have fixed length", m.B, p)
+
+  p = "abc" * (m.P"b"^1 + m.P"a"^0)
+  checkerr("pattern may not have fixed length", m.B, p)
+end
+
+
+p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3)
+assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1)
+
+p = -m.P'a' * m.Cc(10) + #m.P'a' * m.Cc(20)
+assert(p:match('a') == 20 and p:match('') == 10 and p:match('b') == 10)
+
+
+
+-- look-behind predicate
+assert(not m.match(m.B'a', 'a'))
+assert(m.match(1 * m.B'a', 'a') == 2)
+assert(not m.match(m.B(1), 'a'))
+assert(m.match(1 * m.B(1), 'a') == 2)
+assert(m.match(-m.B(1), 'a') == 1)
+assert(m.match(m.B(250), string.rep('a', 250)) == nil)
+assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251)
+
+-- look-behind with an open call
+checkerr("pattern may not have fixed length", m.B, m.V'S1')
+checkerr("too long to look behind", m.B, 260)
+
+B = #letter * -m.B(letter) + -letter * m.B(letter)
+x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) })
+checkeq(m.match(x, 'ar cal  c'), {1,3,4,7,9,10})
+checkeq(m.match(x, ' ar cal  '), {2,4,5,8})
+checkeq(m.match(x, '   '), {})
+checkeq(m.match(x, 'aloalo'), {1,7})
+
+assert(m.match(B, "a") == 1)
+assert(m.match(1 * B, "a") == 2)
+assert(not m.B(1 - letter):match(""))
+assert((-m.B(letter)):match("") == 1)
+
+assert((4 * m.B(letter, 4)):match("aaaaaaaa") == 5)
+assert(not (4 * m.B(#letter * 5)):match("aaaaaaaa"))
+assert((4 * -m.B(#letter * 5)):match("aaaaaaaa") == 5)
+
+-- look-behind with grammars
+assert(m.match('a' * m.B{'x', x = m.P(3)},  'aaa') == nil)
+assert(m.match('aa' * m.B{'x', x = m.P('aaa')},  'aaaa') == nil)
+assert(m.match('aaa' * m.B{'x', x = m.P('aaa')},  'aaaaa') == 4)
+
+
+
+-- bug in 0.9
+assert(m.match(('a' * #m.P'b'), "ab") == 2)
+assert(not m.match(('a' * #m.P'b'), "a"))
+
+assert(not m.match(#m.S'567', ""))
+assert(m.match(#m.S'567' * 1, "6") == 2)
+
+
+-- tests for Tail Calls
+
+p = m.P{ 'a' * m.V(1) + '' }
+assert(p:match(string.rep('a', 1000)) == 1001)
+
+-- create a grammar for a simple DFA for even number of 0s and 1s
+--
+--  ->1 <---0---> 2
+--    ^           ^
+--    |           |
+--    1           1
+--    |           |
+--    V           V
+--    3 <---0---> 4
+--
+-- this grammar should keep no backtracking information
+
+p = m.P{
+  [1] = '0' * m.V(2) + '1' * m.V(3) + -1,
+  [2] = '0' * m.V(1) + '1' * m.V(4),
+  [3] = '0' * m.V(4) + '1' * m.V(1),
+  [4] = '0' * m.V(3) + '1' * m.V(2),
+}
+
+assert(p:match(string.rep("00", 10000)))
+assert(p:match(string.rep("01", 10000)))
+assert(p:match(string.rep("011", 10000)))
+assert(not p:match(string.rep("011", 10000) .. "1"))
+assert(not p:match(string.rep("011", 10001)))
+
+
+-- this grammar does need backtracking info.
+local lim = 10000
+p = m.P{ '0' * m.V(1) + '0' }
+checkerr("stack overflow", m.match, p, string.rep("0", lim))
+m.setmaxstack(2*lim)
+checkerr("stack overflow", m.match, p, string.rep("0", lim))
+m.setmaxstack(2*lim + 4)
+assert(m.match(p, string.rep("0", lim)) == lim + 1)
+
+-- this repetition should not need stack space (only the call does)
+p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' }
+m.setmaxstack(200)
+assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362)
+
+m.setmaxstack(100)   -- restore low limit
+
+-- tests for optional start position
+assert(m.match("a", "abc", 1))
+assert(m.match("b", "abc", 2))
+assert(m.match("c", "abc", 3))
+assert(not m.match(1, "abc", 4))
+assert(m.match("a", "abc", -3))
+assert(m.match("b", "abc", -2))
+assert(m.match("c", "abc", -1))
+assert(m.match("abc", "abc", -4))   -- truncate to position 1
+
+assert(m.match("", "abc", 10))   -- empty string is everywhere!
+assert(m.match("", "", 10))
+assert(not m.match(1, "", 1))
+assert(not m.match(1, "", -1))
+assert(not m.match(1, "", 0))
+
+print("+")
+
+
+-- tests for argument captures
+checkerr("invalid argument", m.Carg, 0)
+checkerr("invalid argument", m.Carg, -1)
+checkerr("invalid argument", m.Carg, 2^18)
+checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1)
+assert(m.match(m.Carg(1), 'a', 1, print) == print)
+x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)}
+checkeq(x, {10, 20})
+
+assert(m.match(m.Cmt(m.Cg(m.Carg(3), "a") *
+                     m.Cmt(m.Cb("a"), function (s,i,x)
+                                        assert(s == "a" and i == 1);
+                                        return i, x+1
+                                      end) *
+                     m.Carg(2), function (s,i,a,b,c)
+                                  assert(s == "a" and i == 1 and c == nil);
+				  return i, 2*a + 3*b
+                                end) * "a",
+               "a", 1, false, 100, 1000) == 2*1001 + 3*100)
+
+
+-- tests for Lua functions
+
+t = {}
+s = ""
+p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; return nil end) * false
+s = "hi, this is a test"
+assert(m.match(((p - m.P(-1)) + 2)^0, s) == string.len(s) + 1)
+assert(#t == string.len(s)/2 and t[1] == 1 and t[2] == 3)
+
+assert(not m.match(p, s))
+
+p = mt.__add(function (s, i) return i end, function (s, i) return nil end)
+assert(m.match(p, "alo"))
+
+p = mt.__mul(function (s, i) return i end, function (s, i) return nil end)
+assert(not m.match(p, "alo"))
+
+
+t = {}
+p = function (s1, i) assert(s == s1); t[#t + 1] = i; return i end
+s = "hi, this is a test"
+assert(m.match((m.P(1) * p)^0, s) == string.len(s) + 1)
+assert(#t == string.len(s) and t[1] == 2 and t[2] == 3)
+
+t = {}
+p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i;
+                         return i <= s1:len() and i end) * 1
+s = "hi, this is a test"
+assert(m.match(p^0, s) == string.len(s) + 1)
+assert(#t == string.len(s) + 1 and t[1] == 1 and t[2] == 2)
+
+p = function (s1, i) return m.match(m.P"a"^1, s1, i) end
+assert(m.match(p, "aaaa") == 5)
+assert(m.match(p, "abaa") == 2)
+assert(not m.match(p, "baaa"))
+
+checkerr("invalid position", m.match, function () return 2^20 end, s)
+checkerr("invalid position", m.match, function () return 0 end, s)
+checkerr("invalid position", m.match, function (s, i) return i - 1 end, s)
+checkerr("invalid position", m.match,
+             m.P(1)^0 * function (_, i) return i - 1 end, s)
+assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s))
+checkerr("invalid position", m.match,
+             m.P(1)^0 * function (_, i) return i + 1 end, s)
+assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s))
+checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s)
+assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s))
+assert(m.match(m.P(1)^0 * function (_, i) return true end, s) ==
+       string.len(s) + 1)
+for i = 1, string.len(s) + 1 do
+  assert(m.match(function (_, _) return i end, s) == i)
+end
+
+p = (m.P(function (s, i) return i%2 == 0 and i end) * 1
+  +  m.P(function (s, i) return i%2 ~= 0 and i + 2 <= s:len() and i end) * 3)^0
+  * -1
+assert(p:match(string.rep('a', 14000)))
+
+-- tests for Function Replacements
+f = function (a, ...) if a ~= "x" then return {a, ...} end end
+
+t = m.match(m.C(1)^0/f, "abc")
+checkeq(t, {"a", "b", "c"})
+
+t = m.match(m.C(1)^0/f/f, "abc")
+checkeq(t, {{"a", "b", "c"}})
+
+t = m.match(m.P(1)^0/f/f, "abc")   -- no capture
+checkeq(t, {{"abc"}})
+
+t = m.match((m.P(1)^0/f * m.Cp())/f, "abc")
+checkeq(t, {{"abc"}, 4})
+
+t = m.match((m.C(1)^0/f * m.Cp())/f, "abc")
+checkeq(t, {{"a", "b", "c"}, 4})
+
+t = m.match((m.C(1)^0/f * m.Cp())/f, "xbc")
+checkeq(t, {4})
+
+t = m.match(m.C(m.C(1)^0)/f, "abc")
+checkeq(t, {"abc", "a", "b", "c"})
+
+g = function (...) return 1, ... end
+t = {m.match(m.C(1)^0/g/g, "abc")}
+checkeq(t, {1, 1, "a", "b", "c"})
+
+t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")}
+t1 = {1,1,nil,nil,4,nil,3,nil,nil}
+for i=1,10 do assert(t[i] == t1[i]) end
+
+-- bug in 0.12.2: ktable with only nil could be eliminated when joining
+-- with a pattern without ktable
+assert((m.P"aaa" * m.Cc(nil)):match"aaa" == nil)
+
+t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")}
+checkeq(t, {"a", "ax", "b", "bx", "c", "cx"})
+
+t = m.match(m.Ct((m.C(1) / function (x,y) return y, x end * m.Cc(1))^0), "abc")
+checkeq(t, {nil, "a", 1, nil, "b", 1, nil, "c", 1})
+
+-- tests for Query Replacements
+
+assert(m.match(m.C(m.C(1)^0)/{abc = 10}, "abc") == 10)
+assert(m.match(m.C(1)^0/{a = 10}, "abc") == 10)
+assert(m.match(m.S("ba")^0/{ab = 40}, "abc") == 40)
+t = m.match(m.Ct((m.S("ba")/{a = 40})^0), "abc")
+checkeq(t, {40})
+
+assert(m.match(m.Cs((m.C(1)/{a=".", d=".."})^0), "abcdde") == ".bc....e")
+assert(m.match(m.Cs((m.C(1)/{f="."})^0), "abcdde") == "abcdde")
+assert(m.match(m.Cs((m.C(1)/{d="."})^0), "abcdde") == "abc..e")
+assert(m.match(m.Cs((m.C(1)/{e="."})^0), "abcdde") == "abcdd.")
+assert(m.match(m.Cs((m.C(1)/{e=".", f="+"})^0), "eefef") == "..+.+")
+assert(m.match(m.Cs((m.C(1))^0), "abcdde") == "abcdde")
+assert(m.match(m.Cs(m.C(m.C(1)^0)), "abcdde") == "abcdde")
+assert(m.match(1 * m.Cs(m.P(1)^0), "abcdde") == "bcdde")
+assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "abcdde") == "abcdde")
+assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "0ab0b0") == "xabxbx")
+assert(m.match(m.Cs((m.C('0')/'x' + m.P(1)/{b=3})^0), "b0a0b") == "3xax3")
+assert(m.match(m.P(1)/'%0%0'/{aa = -3} * 'x', 'ax') == -3)
+assert(m.match(m.C(1)/'%0%1'/{aa = 'z'}/{z = -3} * 'x', 'ax') == -3)
+
+assert(m.match(m.Cs(m.Cc(0) * (m.P(1)/"")), "4321") == "0")
+
+assert(m.match(m.Cs((m.P(1) / "%0")^0), "abcd") == "abcd")
+assert(m.match(m.Cs((m.P(1) / "%0.%0")^0), "abcd") == "a.ab.bc.cd.d")
+assert(m.match(m.Cs((m.P("a") / "%0.%0" + 1)^0), "abcad") == "a.abca.ad")
+assert(m.match(m.C("a") / "%1%%%0", "a") == "a%a")
+assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx")
+assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") ==
+   "411 - abc ")
+
+assert(m.match(m.P(1)/"%0", "abc") == "a")
+checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc")
+checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc")
+
+p = m.C(1)
+p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1"
+assert(p:match("1234567890") == "9 - 1")
+
+assert(m.match(m.Cc(print), "") == print)
+
+-- too many captures (just ignore extra ones)
+p = m.C(1)^0 / "%2-%9-%0-%9"
+assert(p:match"01234567890123456789" == "1-8-01234567890123456789-8")
+s = string.rep("12345678901234567890", 20)
+assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3")
+
+-- string captures with non-string subcaptures
+p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1"
+assert(p:match'x' == 'alo - x - alo')
+
+checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a")
+
+-- long strings for string capture
+l = 10000
+s = string.rep('a', l) .. string.rep('b', l) .. string.rep('c', l)
+
+p = (m.C(m.P'a'^1) * m.C(m.P'b'^1) * m.C(m.P'c'^1)) / '%3%2%1'
+
+assert(p:match(s) == string.rep('c', l) ..
+                     string.rep('b', l) .. 
+                     string.rep('a', l))
+
+print"+"
+
+-- accumulator capture
+function f (x) return x + 1 end
+assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7)
+
+t = {m.match(m.Cf(m.Cc(1,2,3), error), "")}
+checkeq(t, {1})
+p = m.Cf(m.Ct(true) * m.Cg(m.C(m.R"az"^1) * "=" * m.C(m.R"az"^1) * ";")^0,
+         rawset)
+t = p:match("a=b;c=du;xux=yuy;")
+checkeq(t, {a="b", c="du", xux="yuy"})
+
+
+-- errors in accumulator capture
+
+-- no initial capture
+checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa')
+-- no initial capture (very long match forces fold to be a pair open-close)
+checkerr("no initial value", m.match, m.Cf(m.P(500), print),
+                               string.rep('a', 600))
+
+-- nested capture produces no initial value
+checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo")
+
+
+-- tests for loop checker
+
+local function isnullable (p)
+  checkerr("may accept empty string", function (p) return p^0 end, m.P(p))
+end
+
+isnullable(m.P("x")^-4)
+assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3)
+assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3)
+isnullable("")
+isnullable(m.P("x")^0)
+isnullable(m.P("x")^-1)
+isnullable(m.P("x") + 1 + 2 + m.P("a")^-1)
+isnullable(-m.P("ab"))
+isnullable(- -m.P("ab"))
+isnullable(# #(m.P("ab") + "xy"))
+isnullable(- #m.P("ab")^0)
+isnullable(# -m.P("ab")^1)
+isnullable(#m.V(3))
+isnullable(m.V(3) + m.V(1) + m.P('a')^-1)
+isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)})
+assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc")
+       == 3)
+assert(m.match(m.P""^-3, "a") == 1)
+
+local function find (p, s)
+  return m.match(basiclookfor(p), s)
+end
+
+
+local function badgrammar (g, expected)
+  local stat, msg = pcall(m.P, g)
+  assert(not stat)
+  if expected then assert(find(expected, msg)) end
+end
+
+badgrammar({[1] = m.V(1)}, "rule '1'")
+badgrammar({[1] = m.V(2)}, "rule '2'")   -- invalid non-terminal
+badgrammar({[1] = m.V"x"}, "rule 'x'")   -- invalid non-terminal
+badgrammar({[1] = m.V{}}, "rule '(a table)'")   -- invalid non-terminal
+badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'")  -- left-recursive
+badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'")  -- left-recursive
+badgrammar({[1] = -1 * m.V(1)}, "rule '1'")  -- left-recursive
+badgrammar({[1] = -1 + m.V(1)}, "rule '1'")  -- left-recursive
+badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'")  -- left-recursive
+badgrammar({[1] = 1 * m.V(2)^0, [2] = m.P(0)}, "rule '1'")  -- inf. loop
+badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'")  -- inf. loop
+badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'")  -- inf. loop
+badgrammar({"x", x = #(m.V(1) * 'a') }, "rule '1'")  -- inf. loop
+badgrammar({ -(m.V(1) * 'a') }, "rule '1'")  -- inf. loop
+badgrammar({"x", x = m.P'a'^-1 * m.V"x"}, "rule 'x'")  -- left recursive
+badgrammar({"x", x = m.P'a' * m.V"y"^1, y = #m.P(1)}, "rule 'x'")
+
+assert(m.match({'a' * -m.V(1)}, "aaa") == 2)
+assert(m.match({'a' * -m.V(1)}, "aaaa") == nil)
+
+
+-- good x bad grammars
+m.P{ ('a' * m.V(1))^-1 }
+m.P{ -('a' * m.V(1)) }
+m.P{ ('abc' * m.V(1))^-1 }
+m.P{ -('abc' * m.V(1)) }
+badgrammar{ #m.P('abc') * m.V(1) }
+badgrammar{ -('a' + m.V(1)) }
+m.P{ #('a' * m.V(1)) }
+badgrammar{ #('a' + m.V(1)) }
+m.P{ m.B{ m.P'abc' } * 'a' * m.V(1) }
+badgrammar{ m.B{ m.P'abc' } * m.V(1) }
+badgrammar{ ('a' + m.P'bcd')^-1 * m.V(1) }
+
+
+-- simple tests for maximum sizes:
+local p = m.P"a"
+for i=1,14 do p = p * p end
+
+p = {}
+for i=1,100 do p[i] = m.P"a" end
+p = m.P(p)
+
+
+-- strange values for rule labels
+
+p = m.P{ "print",
+     print = m.V(print),
+     [print] = m.V(_G),
+     [_G] = m.P"a",
+   }
+
+assert(p:match("a"))
+
+-- initial rule
+g = {}
+for i = 1, 10 do g["i"..i] =  "a" * m.V("i"..i+1) end
+g.i11 = m.P""
+for i = 1, 10 do
+  g[1] = "i"..i
+  local p = m.P(g)
+  assert(p:match("aaaaaaaaaaa") == 11 - i + 1)
+end
+
+print"+"
+
+
+-- tests for back references
+checkerr("back reference 'x' not found", m.match, m.Cb('x'), '')
+checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')
+
+p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k"))
+t = p:match("ab")
+checkeq(t, {"a", "b"})
+
+p = m.P(true)
+for i = 1, 10 do p = p * m.Cg(1, i) end
+for i = 1, 10 do
+  local p = p * m.Cb(i)
+  assert(p:match('abcdefghij') == string.sub('abcdefghij', i, i))
+end
+
+
+t = {}
+function foo (p) t[#t + 1] = p; return p .. "x" end
+
+p = m.Cg(m.C(2)    / foo, "x") * m.Cb"x" *
+    m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
+    m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
+    m.Cg(m.Cb('x') / foo, "x") * m.Cb"x"
+x = {p:match'ab'}
+checkeq(x, {'abx', 'abxx', 'abxxx', 'abxxxx'})
+checkeq(t, {'ab',
+            'ab', 'abx',
+            'ab', 'abx', 'abxx',
+            'ab', 'abx', 'abxx', 'abxxx'})
+
+
+
+-- tests for match-time captures
+
+p = m.P'a' * (function (s, i) return (s:sub(i, i) == 'b') and i + 1 end)
+  + 'acd'
+
+assert(p:match('abc') == 3)
+assert(p:match('acd') == 4)
+
+local function id (s, i, ...)
+  return true, ...
+end
+
+assert(m.Cmt(m.Cs((m.Cmt(m.S'abc' / { a = 'x', c = 'y' }, id) +
+              m.R'09'^1 /  string.char +
+              m.P(1))^0), id):match"acb98+68c" == "xyb\98+\68y")
+
+p = m.P{'S',
+  S = m.V'atom' * space
+    + m.Cmt(m.Ct("(" * space * (m.Cmt(m.V'S'^1, id) + m.P(true)) * ")" * space), id),
+  atom = m.Cmt(m.C(m.R("AZ", "az", "09")^1), id)
+}
+x = p:match"(a g () ((b) c) (d (e)))"
+checkeq(x, {'a', 'g', {}, {{'b'}, 'c'}, {'d', {'e'}}});
+
+x = {(m.Cmt(1, id)^0):match(string.rep('a', 500))}
+assert(#x == 500)
+
+local function id(s, i, x)
+  if x == 'a' then return i, 1, 3, 7
+  else return nil, 2, 4, 6, 8
+  end   
+end     
+
+p = ((m.P(id) * 1 + m.Cmt(2, id) * 1  + m.Cmt(1, id) * 1))^0
+assert(table.concat{p:match('abababab')} == string.rep('137', 4))
+
+local function ref (s, i, x)
+  return m.match(x, s, i - x:len())
+end
+
+assert(m.Cmt(m.P(1)^0, ref):match('alo') == 4)
+assert((m.P(1) * m.Cmt(m.P(1)^0, ref)):match('alo') == 4)
+assert(not (m.P(1) * m.Cmt(m.C(1)^0, ref)):match('alo'))
+
+ref = function (s,i,x) return i == tonumber(x) and i, 'xuxu' end
+
+assert(m.Cmt(1, ref):match'2')
+assert(not m.Cmt(1, ref):match'1')
+assert(m.Cmt(m.P(1)^0, ref):match'03')
+
+function ref (s, i, a, b)
+  if a == b then return i, a:upper() end
+end
+
+p = m.Cmt(m.C(m.R"az"^1) * "-" * m.C(m.R"az"^1), ref)
+p = (any - p)^0 * p * any^0 * -1
+
+assert(p:match'abbbc-bc ddaa' == 'BC')
+
+do   -- match-time captures cannot be optimized away
+  local touch = 0
+  f = m.P(function () touch = touch + 1; return true end)
+
+  local function check(n) n = n or 1; assert(touch == n); touch = 0 end
+
+  assert(m.match(f * false + 'b', 'a') == nil); check()
+  assert(m.match(f * false + 'b', '') == nil); check()
+  assert(m.match( (f * 'a')^0 * 'b', 'b') == 2); check()
+  assert(m.match( (f * 'a')^0 * 'b', '') == nil); check()
+  assert(m.match( (f * 'a')^-1 * 'b', 'b') == 2); check()
+  assert(m.match( (f * 'a')^-1 * 'b', '') == nil); check()
+  assert(m.match( ('b' + f * 'a')^-1 * 'b', '') == nil); check()
+  assert(m.match( (m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); check()
+  assert(m.match( (-m.P(1) * m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil);
+     check()
+  assert(m.match( (f * 'a' + 'b')^-1 * 'b', '') == nil); check()
+  assert(m.match(f * 'a' + f * 'b', 'b') == 2); check(2)
+  assert(m.match(f * 'a' + f * 'b', 'a') == 2); check(1)
+  assert(m.match(-f * 'a' + 'b', 'b') == 2); check(1)
+  assert(m.match(-f * 'a' + 'b', '') == nil); check(1)
+end
+
+c = '[' * m.Cg(m.P'='^0, "init") * '[' *
+    { m.Cmt(']' * m.C(m.P'='^0) * ']' * m.Cb("init"), function (_, _, s1, s2)
+                                               return s1 == s2 end)
+       + 1 * m.V(1) } / 0
+
+assert(c:match'[==[]]====]]]]==]===[]' == 18)
+assert(c:match'[[]=]====]=]]]==]===[]' == 14)
+assert(not c:match'[[]=]====]=]=]==]===[]')
+
+
+-- old bug: optimization of concat with fail removed match-time capture
+p = m.Cmt(0, function (s) p = s end) * m.P(false)
+assert(not p:match('alo'))
+assert(p == 'alo')
+
+
+-- ensure that failed match-time captures are not kept on Lua stack
+do
+  local t = {__mode = "kv"}; setmetatable(t,t)
+  local c = 0
+
+  local function foo (s,i)
+    collectgarbage();
+    assert(next(t) == "__mode" and next(t, "__mode") == nil)
+    local x = {}
+    t[x] = true
+    c = c + 1
+    return i, x
+  end
+
+  local p = m.P{ m.Cmt(0, foo) * m.P(false) + m.P(1) * m.V(1) + m.P"" }
+  p:match(string.rep('1', 10))
+  assert(c == 11)
+end
+
+
+-- Return a match-time capture that returns 'n' captures
+local function manyCmt (n)
+    return m.Cmt("a", function ()
+             local a = {}; for i = 1, n do a[i] = n - i end
+             return true, unpack(a)
+           end)
+end
+
+-- bug in 1.0: failed match-time that used previous match-time results
+do
+  local x
+  local function aux (...) x = #{...}; return false end
+  local res = {m.match(m.Cmt(manyCmt(20), aux) + manyCmt(10), "a")}
+  assert(#res == 10 and res[1] == 9 and res[10] == 0)
+end
+
+
+-- bug in 1.0: problems with math-times returning too many captures
+do
+  local lim = 2^11 - 10
+  local res = {m.match(manyCmt(lim), "a")}
+  assert(#res == lim and res[1] == lim - 1 and res[lim] == 0)
+  checkerr("too many", m.match, manyCmt(2^15), "a")
+end
+
+p = (m.P(function () return true, "a" end) * 'a'
+  + m.P(function (s, i) return i, "aa", 20 end) * 'b'
+  + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0
+
+t = {p:match('abacc')}
+checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'})
+
+
+-------------------------------------------------------------------
+-- Tests for 're' module
+-------------------------------------------------------------------
+
+local re = require "re"
+
+local match, compile = re.match, re.compile
+
+
+
+assert(match("a", ".") == 2)
+assert(match("a", "''") == 1)
+assert(match("", " ! . ") == 1)
+assert(not match("a", " ! . "))
+assert(match("abcde", "  ( . . ) * ") == 5)
+assert(match("abbcde", " [a-c] +") == 5)
+assert(match("0abbc1de", "'0' [a-c]+ '1'") == 7)
+assert(match("0zz1dda", "'0' [^a-c]+ 'a'") == 8)
+assert(match("abbc--", " [a-c] + +") == 5)
+assert(match("abbc--", " [ac-] +") == 2)
+assert(match("abbc--", " [-acb] + ") == 7)
+assert(not match("abbcde", " [b-z] + "))
+assert(match("abb\"de", '"abb"["]"de"') == 7)
+assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee")
+assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8)
+
+assert(re.match("aaand", "[a]^2") == 3)
+
+local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")}
+checkeq(t, {4, 5, 7})
+local t = {match("abceefe", "((&&'e' {})? .)*")}
+checkeq(t, {4, 5, 7})
+local t = {match("abceefe", "( ( ! ! 'e' {} ) ? . ) *")}
+checkeq(t, {4, 5, 7})
+local t = {match("abceefe", "(( & ! & ! 'e' {})? .)*")}
+checkeq(t, {4, 5, 7})
+
+assert(match("cccx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 5)
+assert(match("cdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 4)
+assert(match("abcdcdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 8)
+
+assert(match("abc", "a <- (. a)?") == 4)
+b = "balanced <- '(' ([^()] / balanced)* ')'"
+assert(match("(abc)", b))
+assert(match("(a(b)((c) (d)))", b))
+assert(not match("(a(b ((c) (d)))", b))
+
+b = compile[[  balanced <- "(" ([^()] / balanced)* ")" ]]
+assert(b == m.P(b))
+assert(b:match"((((a))(b)))")
+
+local g = [[
+  S <- "0" B / "1" A / ""   -- balanced strings
+  A <- "0" S / "1" A A      -- one more 0
+  B <- "1" S / "0" B B      -- one more 1
+]]
+assert(match("00011011", g) == 9)
+
+local g = [[
+  S <- ("0" B / "1" A)*
+  A <- "0" / "1" A A
+  B <- "1" / "0" B B
+]]
+assert(match("00011011", g) == 9)
+assert(match("000110110", g) == 9)
+assert(match("011110110", g) == 3)
+assert(match("000110010", g) == 1)
+
+s = "aaaaaaaaaaaaaaaaaaaaaaaa"
+assert(match(s, "'a'^3") == 4)
+assert(match(s, "'a'^0") == 1)
+assert(match(s, "'a'^+3") == s:len() + 1)
+assert(not match(s, "'a'^+30"))
+assert(match(s, "'a'^-30") == s:len() + 1)
+assert(match(s, "'a'^-5") == 6)
+for i = 1, s:len() do
+  assert(match(s, string.format("'a'^+%d", i)) >= i + 1)
+  assert(match(s, string.format("'a'^-%d", i)) <= i + 1)
+  assert(match(s, string.format("'a'^%d", i)) == i + 1)
+end
+assert(match("01234567890123456789", "[0-9]^3+") == 19)
+
+
+assert(match("01234567890123456789", "({....}{...}) -> '%2%1'") == "4560123")
+t = match("0123456789", "{| {.}* |}")
+checkeq(t, {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"})
+assert(match("012345", "{| (..) -> '%0%0' |}")[1] == "0101")
+
+assert(match("abcdef", "( {.} {.} {.} {.} {.} ) -> 3") == "c")
+assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 3") == "d")
+assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 0") == 6)
+
+assert(not match("abcdef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
+assert(match("abcbef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
+
+eqcharset(compile"[]]", "]")
+eqcharset(compile"[][]", m.S"[]")
+eqcharset(compile"[]-]", m.S"-]")
+eqcharset(compile"[-]", m.S"-")
+eqcharset(compile"[az-]", m.S"a-z")
+eqcharset(compile"[-az]", m.S"a-z")
+eqcharset(compile"[a-z]", m.R"az")
+eqcharset(compile"[]['\"]", m.S[[]['"]])
+
+eqcharset(compile"[^]]", any - "]")
+eqcharset(compile"[^][]", any - m.S"[]")
+eqcharset(compile"[^]-]", any - m.S"-]")
+eqcharset(compile"[^]-]", any - m.S"-]")
+eqcharset(compile"[^-]", any - m.S"-")
+eqcharset(compile"[^az-]", any - m.S"a-z")
+eqcharset(compile"[^-az]", any - m.S"a-z")
+eqcharset(compile"[^a-z]", any - m.R"az")
+eqcharset(compile"[^]['\"]", any - m.S[[]['"]])
+
+-- tests for comments in 're'
+e = compile[[
+A  <- _B   -- \t \n %nl .<> <- -> --
+_B <- 'x'  --]]
+assert(e:match'xy' == 2)
+
+-- tests for 're' with pre-definitions
+defs = {digits = m.R"09", letters = m.R"az", _=m.P"__"}
+e = compile("%letters (%letters / %digits)*", defs)
+assert(e:match"x123" == 5)
+e = compile("%_", defs)
+assert(e:match"__" == 3)
+
+e = compile([[
+  S <- A+
+  A <- %letters+ B
+  B <- %digits+
+]], defs)
+
+e = compile("{[0-9]+'.'?[0-9]*} -> sin", math)
+assert(e:match("2.34") == math.sin(2.34))
+
+
+function eq (_, _, a, b) return a == b end
+
+c = re.compile([[
+  longstring <- '[' {:init: '='* :} '[' close
+  close <- ']' =init ']' / . close
+]])
+
+assert(c:match'[==[]]===]]]]==]===[]' == 17)
+assert(c:match'[[]=]====]=]]]==]===[]' == 14)
+assert(not c:match'[[]=]====]=]=]==]===[]')
+
+c = re.compile" '[' {:init: '='* :} '[' (!(']' =init ']') .)* ']' =init ']' !. "
+
+assert(c:match'[==[]]===]]]]==]')
+assert(c:match'[[]=]====]=][]==]===[]]')
+assert(not c:match'[[]=]====]=]=]==]===[]')
+
+assert(re.find("hi alalo", "{:x:..:} =x") == 4)
+assert(re.find("hi alalo", "{:x:..:} =x", 4) == 4)
+assert(not re.find("hi alalo", "{:x:..:} =x", 5))
+assert(re.find("hi alalo", "{'al'}", 5) == 6)
+assert(re.find("hi aloalolo", "{:x:..:} =x") == 8)
+assert(re.find("alo alohi x x", "{:word:%w+:}%W*(=word)!%w") == 11)
+
+-- re.find discards any captures
+local a,b,c = re.find("alo", "{.}{'o'}")
+assert(a == 2 and b == 3 and c == nil)
+
+local function match (s,p)
+  local i,e = re.find(s,p)
+  if i then return s:sub(i, e) end
+end
+assert(match("alo alo", '[a-z]+') == "alo")
+assert(match("alo alo", '{:x: [a-z]+ :} =x') == nil)
+assert(match("alo alo", "{:x: [a-z]+ :} ' ' =x") == "alo alo")
+
+assert(re.gsub("alo alo", "[abc]", "x") == "xlo xlo")
+assert(re.gsub("alo alo", "%w+", ".") == ". .")
+assert(re.gsub("hi, how are you", "[aeiou]", string.upper) ==
+               "hI, hOw ArE yOU")
+
+s = 'hi [[a comment[=]=] ending here]] and [=[another]]=]]'
+c = re.compile" '[' {:i: '='* :} '[' (!(']' =i ']') .)* ']' { =i } ']' "
+assert(re.gsub(s, c, "%2") == 'hi  and =]')
+assert(re.gsub(s, c, "%0") == s)
+assert(re.gsub('[=[hi]=]', c, "%2") == '=')
+
+assert(re.find("", "!.") == 1)
+assert(re.find("alo", "!.") == 4)
+
+function addtag (s, i, t, tag) t.tag = tag; return i, t end
+
+c = re.compile([[
+  doc <- block !.
+  block <- (start {| (block / { [^<]+ })* |} end?) => addtag
+  start <- '<' {:tag: [a-z]+ :} '>'
+  end <- '</' { =tag } '>'
+]], {addtag = addtag})
+
+x = c:match[[
+<x>hi<b>hello</b>but<b>totheend</x>]]
+checkeq(x, {tag='x', 'hi', {tag = 'b', 'hello'}, 'but',
+                     {'totheend'}})
+
+
+-- test for folding captures
+c = re.compile([[
+  S <- (number (%s+ number)*) ~> add
+  number <- %d+ -> tonumber
+]], {tonumber = tonumber, add = function (a,b) return a + b end})
+assert(c:match("3 401 50") == 3 + 401 + 50)
+
+-- tests for look-ahead captures
+x = {re.match("alo", "&(&{.}) !{'b'} {&(...)} &{..} {...} {!.}")}
+checkeq(x, {"", "alo", ""})
+
+assert(re.match("aloalo",
+   "{~ (((&'al' {.}) -> 'A%1' / (&%l {.}) -> '%1%1') / .)* ~}")
+       == "AallooAalloo")
+
+-- bug in 0.9 (and older versions), due to captures in look-aheads
+x = re.compile[[   {~ (&(. ([a-z]* -> '*')) ([a-z]+ -> '+') ' '*)* ~}  ]]
+assert(x:match"alo alo" == "+ +")
+
+-- valid capture in look-ahead (used inside the look-ahead itself)
+x = re.compile[[
+      S <- &({:two: .. :} . =two) {[a-z]+} / . S
+]]
+assert(x:match("hello aloaLo aloalo xuxu") == "aloalo")
+
+
+p = re.compile[[
+  block <- {| {:ident:space*:} line
+           ((=ident !space line) / &(=ident space) block)* |}
+  line <- {[^%nl]*} %nl
+  space <- '_'     -- should be ' ', but '_' is simpler for editors
+]]
+
+t= p:match[[
+1
+__1.1
+__1.2
+____1.2.1
+____
+2
+__2.1
+]]
+checkeq(t, {"1", {"1.1", "1.2", {"1.2.1", "", ident = "____"}, ident = "__"},
+            "2", {"2.1", ident = "__"}, ident = ""})
+
+
+-- nested grammars
+p = re.compile[[
+       s <- a b !.
+       b <- ( x <- ('b' x)? )
+       a <- ( x <- 'a' x? )
+]]
+
+assert(p:match'aaabbb')
+assert(p:match'aaa')
+assert(not p:match'bbb')
+assert(not p:match'aaabbba')
+
+-- testing groups
+t = {re.match("abc", "{:S <- {:.:} {S} / '':}")}
+checkeq(t, {"a", "bc", "b", "c", "c", ""})
+
+t = re.match("1234", "{| {:a:.:} {:b:.:} {:c:.{.}:} |}")
+checkeq(t, {a="1", b="2", c="4"})
+t = re.match("1234", "{|{:a:.:} {:b:{.}{.}:} {:c:{.}:}|}")
+checkeq(t, {a="1", b="2", c="4"})
+t = re.match("12345", "{| {:.:} {:b:{.}{.}:} {:{.}{.}:} |}")
+checkeq(t, {"1", b="2", "4", "5"})
+t = re.match("12345", "{| {:.:} {:{:b:{.}{.}:}:} {:{.}{.}:} |}")
+checkeq(t, {"1", "23", "4", "5"})
+t = re.match("12345", "{| {:.:} {{:b:{.}{.}:}} {:{.}{.}:} |}")
+checkeq(t, {"1", "23", "4", "5"})
+
+
+-- testing pre-defined names
+assert(os.setlocale("C") == "C")
+
+function eqlpeggsub (p1, p2)
+  local s1 = cs2str(re.compile(p1))
+  local s2 = string.gsub(allchar, "[^" .. p2 .. "]", "")
+  -- if s1 ~= s2 then print(#s1,#s2) end
+  assert(s1 == s2)
+end
+
+
+eqlpeggsub("%w", "%w")
+eqlpeggsub("%a", "%a")
+eqlpeggsub("%l", "%l")
+eqlpeggsub("%u", "%u")
+eqlpeggsub("%p", "%p")
+eqlpeggsub("%d", "%d")
+eqlpeggsub("%x", "%x")
+eqlpeggsub("%s", "%s")
+eqlpeggsub("%c", "%c")
+
+eqlpeggsub("%W", "%W")
+eqlpeggsub("%A", "%A")
+eqlpeggsub("%L", "%L")
+eqlpeggsub("%U", "%U")
+eqlpeggsub("%P", "%P")
+eqlpeggsub("%D", "%D")
+eqlpeggsub("%X", "%X")
+eqlpeggsub("%S", "%S")
+eqlpeggsub("%C", "%C")
+
+eqlpeggsub("[%w]", "%w")
+eqlpeggsub("[_%w]", "_%w")
+eqlpeggsub("[^%w]", "%W")
+eqlpeggsub("[%W%S]", "%W%S")
+
+re.updatelocale()
+
+
+-- testing nested substitutions x string captures
+
+p = re.compile[[
+      text <- {~ item* ~}
+      item <- macro / [^()] / '(' item* ')'
+      arg <- ' '* {~ (!',' item)* ~}
+      args <- '(' arg (',' arg)* ')'
+      macro <- ('apply' args) -> '%1(%2)'
+             / ('add' args) -> '%1 + %2'
+             / ('mul' args) -> '%1 * %2'
+]]
+
+assert(p:match"add(mul(a,b), apply(f,x))" == "a * b + f(x)")
+
+rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
+
+assert(rev:match"0123456789" == "9876543210")
+
+
+-- testing error messages in re
+
+local function errmsg (p, err)
+  checkerr(err, re.compile, p)
+end
+
+errmsg('aaaa', "rule 'aaaa'")
+errmsg('a', 'outside')
+errmsg('b <- a', 'undefined')
+errmsg("x <- 'a'  x <- 'b'", 'already defined')
+errmsg("'a' -", "near '-'")
+
+
+print"OK"
+
+