diff src/ploki/re.c @ 4223:ac0403686959

<oerjan> rm -rf src/ploki; mv ploki src
author HackBot
date Fri, 20 Dec 2013 22:18:50 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ploki/re.c	Fri Dec 20 22:18:50 2013 +0000
@@ -0,0 +1,2661 @@
+#include "config.h"
+#include "IO.h"
+#include "Str.h"
+#include "hash.h"
+#include "main_io.h"
+#include "main_opt.h"
+#include "re.h"
+#include "xmalloc.h"
+#include "zz.h"
+
+#include <ctype.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+enum {CLASS_SIZE = UCHAR_MAX / CHAR_BIT + 1u};
+
+static unsigned char dc_word[CLASS_SIZE];
+static unsigned char dc_alpha[CLASS_SIZE];
+static unsigned char dc_cntrl[CLASS_SIZE];
+static unsigned char dc_digit[CLASS_SIZE];
+static unsigned char dc_lower[CLASS_SIZE];
+static unsigned char dc_print[CLASS_SIZE];
+static unsigned char dc_space[CLASS_SIZE];
+static unsigned char dc_upper[CLASS_SIZE];
+static unsigned char dc_xdigit[CLASS_SIZE];
+
+static void fill(unsigned char *v, int (*pred)(int)) {
+	unsigned char c;
+
+	for (c = UCHAR_MAX; c; --c) {
+		v[c / CHAR_BIT] |= !!pred(c) << c % CHAR_BIT;
+	}
+	v[0] |= !!pred(0);
+}
+
+static void nfill(unsigned char *v, int (*pred)(int)) {
+	unsigned char c;
+
+	for (c = UCHAR_MAX; c; --c) {
+		v[c / CHAR_BIT] |= !pred(c) << c % CHAR_BIT;
+	}
+	v[0] |= !pred(0);
+}
+
+ATTR_CONST
+static int siword(int c) {
+	return c == '_' || isalnum(c);
+}
+
+enum {MAGIC = 23};
+
+static Hash rcache;
+static String cached[MAGIC];
+static size_t cachfill, cachlast;
+
+static size_t hash(const void *s, size_t seed) {
+	return St_hash(s, seed);
+}
+
+static int compar(const void *a, const void *b) {
+	return St_cmp(a, b);
+}
+
+enum t_re_node {
+	RE_ALTER,
+	RE_ANYCH,
+	RE_AT_BOL,
+	RE_AT_BOS,
+	RE_AT_EOL,
+	RE_AT_EOS,
+	RE_AT_MATCH,
+	RE_AT_NOMATCH,
+	RE_AT_NWBOUND,
+	RE_AT_WBOUND,
+	RE_BACKCHECK,
+	RE_BACKREF,
+	RE_BEGIN_CAPTURE,
+	RE_CLASS,
+	RE_DEFCLASS,
+	RE_END_CAPTURE,
+	RE_INDEP,
+	RE_LITERAL,
+	RE_N_CLASS,
+	RE_N_DEFCLASS,
+	RE_PARTIAL,
+	RE_REP_FRUGAL,
+	RE_REP_GREEDY,
+	RE_SELECT,
+	RE_STUFF_FRUGAL,
+	RE_STUFF_GREEDY,
+	RE_XREP_FRUGAL,
+	RE_XREP_GREEDY
+};
+
+#define CMN_HDR  enum t_re_node type; union my_node *next
+
+typedef union my_node {
+	struct {
+		CMN_HDR;
+	} x;
+	struct {
+		CMN_HDR;
+		union my_node *arg;
+	} alter;
+	struct {
+		CMN_HDR;
+		size_t n;
+	} backref;
+	struct {
+		CMN_HDR;
+		size_t n;
+	} capture;
+	struct {
+		CMN_HDR;
+		unsigned char vec[CLASS_SIZE];
+	} class;
+	struct {
+		CMN_HDR;
+		unsigned char *vec;
+	} defclass;
+	struct {
+		CMN_HDR;
+		union my_node *arg;
+	} indep;
+	struct {
+		CMN_HDR;
+		unsigned char *buf;
+		size_t len;
+	} literal;
+	struct {
+		CMN_HDR;
+		union my_node *arg;
+		size_t min, max;
+		size_t n;
+	} rep;
+	struct {
+		CMN_HDR;
+		union my_node *arg;
+		union my_node *cond;
+	} select;
+} re_node;
+
+#include "re_block.c.h"
+
+enum re_flags {
+	FL_NONE = 0,
+	FL_ANCHOR_START = 1
+};
+
+struct cap_state {
+	size_t start, end, pending;
+};
+
+struct my_regex {
+	re_node *start;
+	size_t repets;
+	size_t *repbuf;
+	t_block alloc;
+	size_t captures;
+	struct cap_state *cap_ptr;
+	size_t refs;
+	enum re_flags flags;
+	size_t minlen;
+};
+
+static void delv(void *v) {
+	re_free(v);
+}
+
+void re_init(void) {
+	fill(dc_word, siword);
+	fill(dc_alpha, isalpha);
+	fill(dc_cntrl, iscntrl);
+	fill(dc_digit, isdigit);
+	fill(dc_lower, islower);
+	fill(dc_print, isprint);
+	fill(dc_space, isspace);
+	fill(dc_upper, isupper);
+	fill(dc_xdigit, isxdigit);
+
+	h_init(&rcache, hash, compar, NULL, delv);
+}
+
+void re_end(void) {
+	h_end(&rcache);
+	while (cachfill) {
+		--cachfill;
+		St_clear(&cached[cachfill]);
+	}
+	cachlast = 0;
+}
+
+
+enum   {ESCAPE     =        '!'};
+#define MEATCHARP(c) strchr("!|({<[]>})*+?:.'^$`", c)
+#define ENDBRANCHP(c) strchr("|({<[", c)
+
+struct parse_context {
+	t_regex *base;
+	t_block *block;
+	const String *s;
+	size_t *p;
+};
+
+static void skipspace(const String *const s, size_t *const p) {
+restart:
+	while (isspace(ST_INDEX(s, *p))) {
+		--*p;
+	}
+	if (ST_INDEX(s, *p) == ']' && ST_INDEX(s, *p - 1u) == '#') {
+		*p -= 2u;
+		while (ST_INDEX(s, *p) != EOF) {
+			if (ST_INDEX(s, *p) == '[') {
+				--*p;
+				goto restart;
+			}
+			if (ST_INDEX(s, *p) == ESCAPE && ST_INDEX(s, *p - 1u) != EOF) {
+				--*p;
+			}
+			--*p;
+		}
+	}
+}
+
+#define PRAZ(name) static re_node *name(const struct parse_context *con, re_node *next, re_node *curolan_zebeth)
+#define SNORK const String *const s = con->s; size_t *const p = con->p
+#define CALL(name, nextn, glob) name(con, nextn, glob)
+#define TAILC(name) return CALL(name, next, curolan_zebeth)
+
+PRAZ(quantifire);
+PRAZ(branch);
+PRAZ(alternation);
+
+PRAZ(atom) {
+	SNORK;
+	(void)curolan_zebeth;
+
+	skipspace(s, p);
+	switch (ST_INDEX(s, *p)) {
+		case '(':
+		case '<':
+		case '{':
+		case '[':
+		case EOF: {
+			re_node *cur = bl_node(con->block);
+			cur->literal.type = RE_LITERAL;
+			cur->literal.next = next;
+			cur->literal.buf = NULL;
+			cur->literal.len = 0;
+			return cur;
+		}
+
+		case ESCAPE: {
+			re_node *cur = bl_node(con->block);
+			--*p;
+			if (isdigit(ST_INDEX(s, *p))) {
+				do {
+					--*p;
+				} while (isdigit(ST_INDEX(s, *p)));
+				cur->backref.type = RE_BACKREF;
+				cur->backref.next = next;
+				cur->backref.n = strtoul(St_ptr(s) + *p + 1, NULL, 10);
+			} else if (isalpha(ST_INDEX(s, *p))) {
+				switch (ST_INDEX(s, *p)) {
+					case 'A':
+						cur->x.type = RE_AT_BOL;
+						cur->x.next = next;
+						--*p;
+						break;
+					case 'Z':
+						cur->x.type = RE_AT_EOL;
+						cur->x.next = next;
+						--*p;
+						break;
+
+					case 'b':
+						cur->x.type = RE_AT_WBOUND;
+						cur->x.next = next;
+						--*p;
+						break;
+					case 'B':
+						cur->x.type = RE_AT_NWBOUND;
+						cur->x.next = next;
+						--*p;
+						break;
+
+					case 'c':
+						cur->defclass.type = RE_DEFCLASS;
+						cur->defclass.vec = dc_cntrl;
+						cur->defclass.next = next;
+						--*p;
+						break;
+					case 'C':
+						cur->defclass.type = RE_N_DEFCLASS;
+						cur->defclass.vec = dc_cntrl;
+						cur->defclass.next = next;
+						--*p;
+						break;
+
+					case 'd':
+						cur->defclass.type = RE_DEFCLASS;
+						cur->defclass.vec = dc_digit;
+						cur->defclass.next = next;
+						--*p;
+						break;
+					case 'D':
+						cur->defclass.type = RE_N_DEFCLASS;
+						cur->defclass.vec = dc_digit;
+						cur->defclass.next = next;
+						--*p;
+						break;
+
+					case 'l':
+						cur->defclass.type = RE_DEFCLASS;
+						cur->defclass.vec = dc_lower;
+						cur->defclass.next = next;
+						--*p;
+						break;
+					case 'L':
+						cur->defclass.type = RE_N_DEFCLASS;
+						cur->defclass.vec = dc_lower;
+						cur->defclass.next = next;
+						--*p;
+						break;
+
+					case 'p':
+						cur->defclass.type = RE_DEFCLASS;
+						cur->defclass.vec = dc_print;
+						cur->defclass.next = next;
+						--*p;
+						break;
+					case 'P':
+						cur->defclass.type = RE_N_DEFCLASS;
+						cur->defclass.vec = dc_print;
+						cur->defclass.next = next;
+						--*p;
+						break;
+
+					case 'q':
+						cur->defclass.type = RE_DEFCLASS;
+						cur->defclass.vec = dc_alpha;
+						cur->defclass.next = next;
+						--*p;
+						break;
+					case 'Q':
+						cur->defclass.type = RE_N_DEFCLASS;
+						cur->defclass.vec = dc_alpha;
+						cur->defclass.next = next;
+						--*p;
+						break;
+
+					case 's':
+						cur->defclass.type = RE_DEFCLASS;
+						cur->defclass.vec = dc_space;
+						cur->defclass.next = next;
+						--*p;
+						break;
+					case 'S':
+						cur->defclass.type = RE_N_DEFCLASS;
+						cur->defclass.vec = dc_space;
+						cur->defclass.next = next;
+						--*p;
+						break;
+
+					case 'u':
+						cur->defclass.type = RE_DEFCLASS;
+						cur->defclass.vec = dc_upper;
+						cur->defclass.next = next;
+						--*p;
+						break;
+					case 'U':
+						cur->defclass.type = RE_N_DEFCLASS;
+						cur->defclass.vec = dc_upper;
+						cur->defclass.next = next;
+						--*p;
+						break;
+
+					case 'w':
+						cur->defclass.type = RE_DEFCLASS;
+						cur->defclass.vec = dc_word;
+						cur->defclass.next = next;
+						--*p;
+						break;
+					case 'W':
+						cur->defclass.type = RE_N_DEFCLASS;
+						cur->defclass.vec = dc_word;
+						cur->defclass.next = next;
+						--*p;
+						break;
+
+					case 'x':
+						cur->defclass.type = RE_DEFCLASS;
+						cur->defclass.vec = dc_xdigit;
+						cur->defclass.next = next;
+						--*p;
+						break;
+					case 'X':
+						cur->defclass.type = RE_N_DEFCLASS;
+						cur->defclass.vec = dc_xdigit;
+						cur->defclass.next = next;
+						--*p;
+						break;
+
+					default:
+						goto default_escape;
+				}
+
+			} else default_escape: {
+				cur->literal.type = RE_LITERAL;
+				cur->literal.next = next;
+				cur->literal.buf = bl_string(con->block, cur->literal.len = 1);
+				if (ST_INDEX(s, *p) == EOF) {
+					cur->literal.buf[0] = ESCAPE;
+				} else {
+					cur->literal.buf[0] = ST_INDEX(s, *p);
+					--*p;
+				}
+			}
+			return cur;
+		}
+
+		case ')': {
+			re_node *cur;
+			--*p;
+			cur = CALL(alternation, next, next);
+			skipspace(s, p);
+			if (ST_INDEX(s, *p) == '(') {
+				--*p;
+			}
+			return cur;
+		}
+
+		case ']': {
+			re_node *cur;
+			switch (ST_INDEX(s, *p - 1u)) {
+				case '&':
+					*p -= 2u;
+					cur = bl_node(con->block);
+					cur->indep.type = RE_AT_MATCH;
+					cur->indep.next = next;
+					cur->indep.arg = CALL(alternation, NULL, NULL);
+					break;
+
+				case '^':
+					*p -= 2u;
+					cur = bl_node(con->block);
+					cur->indep.type = RE_AT_NOMATCH;
+					cur->indep.next = next;
+					cur->indep.arg = CALL(alternation, NULL, NULL);
+					break;
+
+				case '?':
+					*p -= 2u;
+					cur = bl_node(con->block);
+					cur->select.type = RE_SELECT;
+					cur->select.cond = CALL(quantifire, NULL, NULL);
+					cur->select.arg = CALL(branch, next, next);
+					skipspace(s, p);
+					if (ST_INDEX(s, *p) == '|') {
+						--*p;
+						cur->select.next = CALL(alternation, next, next);
+					} else {
+						cur->select.next = next;
+					}
+					break;
+
+				case '0': case '1': case '2': case '3': case '4':
+				case '5': case '6': case '7': case '8': case '9': {
+					size_t i;
+
+					for (i = *p - 2u; isdigit(ST_INDEX(s, i)); --i)
+						;
+					if (ST_INDEX(s, i) != '[') {
+						goto std_atom;
+					}
+					cur = bl_node(con->block);
+					cur->backref.type = RE_BACKCHECK;
+					cur->backref.next = next;
+					cur->backref.n = strtoul(St_ptr(s) + i + 1u, NULL, 10);
+					*p = i - 1u;
+
+					return cur;
+				}
+
+				default:
+					goto std_atom;
+			}
+
+			skipspace(s, p);
+			if (ST_INDEX(s, *p) == '[') {
+				--*p;
+			}
+			return cur;
+		}
+
+		case '>': {
+			re_node *cur = bl_node(con->block);
+			--*p;
+			cur->indep.type = RE_INDEP;
+			cur->indep.next = next;
+			cur->indep.arg = CALL(alternation, NULL, NULL);
+			skipspace(s, p);
+			if (ST_INDEX(s, *p) == '<') {
+				--*p;
+			}
+			return cur;
+		}
+
+		case '}': {
+			re_node *begin = bl_node(con->block);
+			re_node *end = bl_node(con->block);
+			--*p;
+			begin->capture.n = end->capture.n = con->base->captures++;
+			end->capture.type = RE_END_CAPTURE;
+			end->capture.next = next;
+			begin->capture.type = RE_BEGIN_CAPTURE;
+			begin->capture.next = CALL(alternation, end, end);
+			skipspace(s, p);
+			if (ST_INDEX(s, *p) == '{') {
+				--*p;
+			}
+			return begin;
+		}
+
+		case '^': {
+			re_node *cur = bl_node(con->block);
+			cur->x.type = RE_AT_BOS;
+			cur->x.next = next;
+			--*p;
+			return cur;
+		}
+
+		case '$': {
+			re_node *cur = bl_node(con->block);
+			cur->x.type = RE_AT_EOS;
+			cur->x.next = next;
+			--*p;
+			return cur;
+		}
+
+		case '.': {
+			re_node *cur = bl_node(con->block);
+			cur->x.type = RE_ANYCH;
+			cur->x.next = next;
+			--*p;
+			return cur;
+		}
+
+		case '`': {
+			String tmp;
+			int c;
+			re_node *cur = bl_node(con->block);
+			cur->literal.type = RE_PARTIAL;
+			cur->literal.next = next;
+
+			--*p;
+			St_init(&tmp);
+			while (
+				(c = ST_INDEX(s, *p)) != EOF &&
+				c != '`'
+			) {
+				--*p;
+				if (c == ESCAPE && ST_INDEX(s, *p) != EOF) {
+					c = ST_INDEX(s, *p);
+					--*p;
+				}
+				St_cat_c(&tmp, c);
+			}
+			if (c == '`') {
+				--*p;
+			}
+			St_reverse(&tmp);
+
+			cur->literal.buf = bl_string(con->block, cur->literal.len = St_len(&tmp));
+			memcpy(cur->literal.buf, St_ptr(&tmp), St_len(&tmp));
+			St_clear(&tmp);
+
+			return cur;
+		}
+
+		case '\'': {
+			int c;
+			re_node *cur = bl_node(con->block);
+			cur->class.type = RE_CLASS;
+			cur->class.next = next;
+			memset(cur->class.vec, '\0', sizeof cur->class.vec);
+			for (--*p; (c = ST_INDEX(s, *p)) != EOF && c != '\''; --*p) {
+				if (c == ESCAPE) {
+					switch (c = ST_INDEX(s, *p - 1)) {
+						case EOF:
+							cur->class.vec[ESCAPE / CHAR_BIT] |= 1u << ESCAPE % CHAR_BIT;
+							break;
+
+						case 'c':
+							fill(cur->class.vec, iscntrl);
+							--*p;
+							break;
+						case 'C':
+							nfill(cur->class.vec, iscntrl);
+							--*p;
+							break;
+
+						case 'd':
+							fill(cur->class.vec, isdigit);
+							--*p;
+							break;
+						case 'D':
+							nfill(cur->class.vec, isdigit);
+							--*p;
+							break;
+
+						case 'l':
+							fill(cur->class.vec, islower);
+							--*p;
+							break;
+						case 'L':
+							nfill(cur->class.vec, islower);
+							--*p;
+							break;
+
+						case 'p':
+							fill(cur->class.vec, isprint);
+							--*p;
+							break;
+						case 'P':
+							nfill(cur->class.vec, isprint);
+							--*p;
+							break;
+
+						case 'q':
+							fill(cur->class.vec, isalpha);
+							--*p;
+							break;
+						case 'Q':
+							nfill(cur->class.vec, isalpha);
+							--*p;
+							break;
+
+						case 's':
+							fill(cur->class.vec, isspace);
+							--*p;
+							break;
+						case 'S':
+							nfill(cur->class.vec, isspace);
+							--*p;
+							break;
+
+						case 'u':
+							fill(cur->class.vec, isupper);
+							--*p;
+							break;
+						case 'U':
+							nfill(cur->class.vec, isupper);
+							--*p;
+							break;
+
+						case 'w':
+							fill(cur->class.vec, siword);
+							--*p;
+							break;
+						case 'W':
+							nfill(cur->class.vec, siword);
+							--*p;
+							break;
+
+						case 'x':
+							fill(cur->class.vec, isxdigit);
+							--*p;
+							break;
+						case 'X':
+							nfill(cur->class.vec, isxdigit);
+							--*p;
+							break;
+
+						default:
+							cur->class.vec[c / CHAR_BIT] |= 1u << c % CHAR_BIT;
+							--*p;
+							break;
+					}
+				} else {
+					int b;
+					if (ST_INDEX(s, *p - 1) == '-' && (b = ST_INDEX(s, *p - 2)) != EOF) {
+						if (b == ESCAPE && (b = ST_INDEX(s, *p - 3)) == EOF) {
+							b = ESCAPE;
+						}
+						if (b > c) {
+							goto normal_char;
+						}
+						for (; b < c; ++b) {
+							cur->class.vec[b / CHAR_BIT] |= 1u << b % CHAR_BIT;
+						}
+						cur->class.vec[b / CHAR_BIT] |= 1u << b % CHAR_BIT;
+						*p -= 2;
+						if (ST_INDEX(s, *p) == ESCAPE && ST_INDEX(s, *p - 1) != EOF) {
+							--*p;
+						}
+					} else if (ST_INDEX(s, *p) == ']' && *p >= 8 && ST_INDEX(s, *p - 1) == ':') {
+
+#define CXCLASS_CHECK(n, t, f, o, b) if (1) { \
+	if (memcmp(St_ptr(s) + *p - (n), (t), (n) - 1) != 0) { \
+		goto normal_char; \
+	} \
+	(b)(cur->class.vec, (f)); \
+	*p -= (o); \
+	break; \
+} else (void)0
+
+#define CCLASS_CHECK(t, f, o) CXCLASS_CHECK(sizeof (t), t, f, o, fill)
+#define CNCLASS_CHECK(t, f, o) CXCLASS_CHECK(sizeof (t), t, f, o, nfill)
+
+						if (ST_INDEX(s, *p - 8) == '[' && ST_INDEX(s, *p - 7) == ':') {
+							switch (ST_INDEX(s, *p - 6)) {
+								case 'a':
+									if (ST_INDEX(s, *p - 5) != 'l') {
+										goto normal_char;
+									}
+									switch (ST_INDEX(s, *p - 4)) {
+										case 'n': CCLASS_CHECK("um", isalnum, 8);
+										case 'p': CCLASS_CHECK("ha", isalpha, 8);
+										default: goto normal_char;
+									}
+									break;
+
+								case 'c': CCLASS_CHECK("ntrl", iscntrl, 8);
+								case 'd': CCLASS_CHECK("igit", isdigit, 8);
+								case 'g': CCLASS_CHECK("raph", isgraph, 8);
+								case 'l': CCLASS_CHECK("ower", islower, 8);
+
+								case 'p':
+									switch (ST_INDEX(s, *p - 5)) {
+										case 'r': CCLASS_CHECK("int", isprint, 8);
+										case 'u': CCLASS_CHECK("nct", ispunct, 8);
+										default: goto normal_char;
+									}
+									break;
+
+								case 's': CCLASS_CHECK("pace", isspace, 8);
+								case 'u': CCLASS_CHECK("pper", isupper, 8);
+								default: goto normal_char;
+							}
+						} else if (*p >= 9 && ST_INDEX(s, *p - 9) == '[' && ST_INDEX(s, *p - 8) == ':') {
+							switch (ST_INDEX(s, *p - 7)) {
+								case 'x':
+									CCLASS_CHECK("digit", isxdigit, 9);
+
+								case '^':
+									switch (ST_INDEX(s, *p - 6)) {
+										case 'a':
+											if (ST_INDEX(s, *p - 5) != 'l') {
+												goto normal_char;
+											}
+											switch (ST_INDEX(s, *p - 4)) {
+												case 'n': CNCLASS_CHECK("um", isalnum, 9);
+												case 'p': CNCLASS_CHECK("ha", isalpha, 9);
+												default: goto normal_char;
+											}
+											break;
+
+										case 'c': CNCLASS_CHECK("ntrl", iscntrl, 9);
+										case 'd': CNCLASS_CHECK("igit", isdigit, 9);
+										case 'g': CNCLASS_CHECK("raph", isgraph, 9);
+										case 'l': CNCLASS_CHECK("ower", islower, 9);
+
+										case 'p':
+											switch (ST_INDEX(s, *p - 5)) {
+												case 'r': CNCLASS_CHECK("int", isprint, 9);
+												case 'u': CNCLASS_CHECK("nct", ispunct, 9);
+												default: goto normal_char;
+											}
+											break;
+
+										case 's': CNCLASS_CHECK("pace", isspace, 9);
+										case 'u': CNCLASS_CHECK("pper", isupper, 9);
+										default: goto normal_char;
+									}
+									break;
+
+								default: goto normal_char;
+							}
+						} else if (*p >= 10 && memcmp(St_ptr(s) + *p - 10, "[:^xdigit", 9) == 0) {
+							nfill(cur->class.vec, isxdigit);
+							*p -= 10;
+						} else {
+							goto normal_char;
+						}
+					} else if (ST_INDEX(s, *p) == '^' && ((b = ST_INDEX(s, *p - 1) == '\'') || b == EOF)) {
+						cur->class.type = RE_N_CLASS;
+					} else normal_char: {
+						cur->class.vec[c / CHAR_BIT] |= 1u << c % CHAR_BIT;
+					}
+				}
+			}
+			if (c == '\'') {
+				--*p;
+			}
+
+			if (cur->class.type == RE_CLASS) {
+				size_t i;
+				for (i = 0; i < CLASS_SIZE; ++i) {
+					if (cur->class.vec[i] != (unsigned char)~0u) {
+						return cur;
+					}
+				}
+				cur->x.type = RE_ANYCH;
+			} else {
+				size_t i;
+				assert(cur->class.type == RE_N_CLASS);
+				for (i = 0; i < CLASS_SIZE; ++i) {
+					if (cur->class.vec[i] != 0) {
+						return cur;
+					}
+				}
+				cur->x.type = RE_ANYCH;
+			}
+
+			return cur;
+		}
+
+std_atom:
+		default: {
+			re_node *cur = bl_node(con->block);
+			cur->literal.type = RE_LITERAL;
+
+			/*XXX?*/
+			if (next && next->x.type == RE_LITERAL) {
+				if (next->literal.len) {
+					cur->literal.buf = bl_string(con->block, cur->literal.len = 1u + next->literal.len);
+					memcpy(cur->literal.buf + 1, next->literal.buf, next->literal.len);
+				}
+				cur->literal.next = next->literal.next;
+			} else {
+				cur->literal.next = next;
+				cur->literal.buf = bl_string(con->block, cur->literal.len = 1);
+			}
+
+			cur->literal.buf[0] = ST_INDEX(s, *p);
+			--*p;
+			return cur;
+		}
+	}
+}
+
+PRAZ(literal) {
+	SNORK;
+	int c;
+	String tmp;
+	re_node *cur;
+
+	skipspace(s, p);
+	if ((ST_INDEX(s, *p) != ESCAPE || isalnum(ST_INDEX(s, *p - 1))) && MEATCHARP(ST_INDEX(s, *p))) {
+		TAILC(atom);
+	}
+
+	cur = bl_node(con->block);
+	cur->literal.type = RE_LITERAL;
+	cur->literal.next = next;
+
+	St_init(&tmp);
+	while (
+			(c = ST_INDEX(s, *p)) != EOF &&
+			(
+			 !MEATCHARP(c) ||
+			 (c == ESCAPE && !isalnum(ST_INDEX(s, *p - 1)))
+			)
+		  ) {
+		--*p;
+		if (c == ESCAPE && ST_INDEX(s, *p) != EOF) {
+			c = ST_INDEX(s, *p);
+			--*p;
+		}
+		St_cat_c(&tmp, c);
+		skipspace(s, p);
+	}
+	St_reverse(&tmp);
+
+	if (next && next->x.type == RE_LITERAL) {
+		if (next->literal.len) {
+			St_cat_m(&tmp, next->literal.buf, next->literal.len);
+		}
+		cur->literal.next = next->literal.next;
+	}
+
+	cur->literal.buf = bl_string(con->block, cur->literal.len = St_len(&tmp));
+	memcpy(cur->literal.buf, St_ptr(&tmp), St_len(&tmp));
+	St_clear(&tmp);
+
+	return cur;
+}
+
+#define RETURN_REP(lo, hi, grdy) \
+do { \
+	re_node *cur = bl_node(con->block); \
+	cur->rep.type = (grdy); \
+	cur->rep.next = next; \
+	cur->rep.min = (lo); \
+	cur->rep.max = (hi); \
+	cur->rep.n = con->base->repets++; \
+	cur->rep.arg = CALL(atom, cur, NULL); \
+	return cur; \
+} while (0)
+
+#define STD_REP_CASE(lo, hi, grdy) \
+do { \
+	--*p; \
+	RETURN_REP(lo, hi, grdy); \
+} while (0)
+
+#define CUSTOM_REP_CASE(off, grdy, mismatch) \
+do { \
+	size_t from, to; \
+	to = *p - 1 - (off); \
+	while (isdigit(ST_INDEX(s, to))) { \
+		--to; \
+	} \
+	if (ST_INDEX(s, to) == ':') { \
+		*p = to - 1; \
+		to = ST_INDEX(s, to + 1) == ':' ? (size_t)-1 : strtoul(St_ptr(s) + to + 1, NULL, 10); \
+		from = to; \
+	} else { \
+		if (ST_INDEX(s, to) != ',') { \
+			mismatch; \
+		} \
+		from = to - 1; \
+		if (!isdigit(ST_INDEX(s, from))) { \
+			mismatch; \
+		} \
+		do { \
+			--from; \
+		} while (isdigit(ST_INDEX(s, from))); \
+		if (ST_INDEX(s, from) != ':') { \
+			mismatch; \
+		} \
+		*p = from - 1; \
+		from = strtoul(St_ptr(s) + from + 1, NULL, 10); \
+		to = ST_INDEX(s, to + 1) == ':' ? (size_t)-1 : strtoul(St_ptr(s) + to + 1, NULL, 10); \
+	} \
+	if (to == 1) { \
+		if (from == 1) { \
+			TAILC(atom); \
+		} \
+		if (from == 0) { \
+			re_node *cur = bl_node(con->block); \
+			cur->alter.type = RE_ALTER; \
+			if ((grdy) == RE_REP_GREEDY) { \
+				cur->alter.next = next; \
+				cur->alter.arg = CALL(atom, next, NULL); \
+			} else { \
+				cur->alter.arg = next; \
+				cur->alter.next = CALL(atom, next, NULL); \
+			} \
+			return cur; \
+		} \
+	} \
+	RETURN_REP(from, to, grdy); \
+} while (0)
+
+PRAZ(quantifire) {
+	SNORK;
+
+	skipspace(s, p);
+	switch (ST_INDEX(s, *p)) {
+		case EOF: return next;
+
+		case '?':
+			switch (ST_INDEX(s, *p - 1)) {
+				case ':': CUSTOM_REP_CASE(1, RE_REP_FRUGAL, goto hook);
+				case '*': --*p; STD_REP_CASE(0, -1, RE_REP_FRUGAL);
+				case '+': --*p; STD_REP_CASE(1, -1, RE_REP_FRUGAL);
+
+				case '?': {
+					re_node *cur = bl_node(con->block);
+					*p -= 2;
+					cur->alter.type = RE_ALTER;
+					cur->alter.next = CALL(atom, next, NULL);
+					cur->alter.arg = next;
+					return cur;
+				}
+
+hook:
+				default: {
+					re_node *cur = bl_node(con->block);
+					--*p;
+					cur->alter.type = RE_ALTER;
+					cur->alter.next = next;
+					cur->alter.arg = CALL(atom, next, NULL);
+					return cur;
+				}
+			}
+
+		case ':': CUSTOM_REP_CASE(0, RE_REP_GREEDY, goto noquanti);
+		case '*': STD_REP_CASE(0, -1, RE_REP_GREEDY);
+		case '+': STD_REP_CASE(1, -1, RE_REP_GREEDY);
+
+noquanti:
+		default: TAILC(literal);
+	}
+}
+
+PRAZ(branch) {
+	SNORK;
+	skipspace(s, p);
+	while (ST_INDEX(s, *p) != EOF && !ENDBRANCHP(ST_INDEX(s, *p))) {
+		next = CALL(quantifire, next, curolan_zebeth);
+		skipspace(s, p);
+	}
+	return next;
+}
+
+PRAZ(alternation) {
+	SNORK;
+	re_node *cur;
+
+	skipspace(s, p);
+	if (ST_INDEX(s, *p) == EOF) {
+		return next;
+	}
+
+	next = CALL(branch, next, curolan_zebeth);
+	skipspace(s, p);
+	if (ST_INDEX(s, *p) != '|') {
+		return next;
+	}
+
+	--*p;
+	cur = bl_node(con->block);
+	cur->alter.type = RE_ALTER;
+	cur->alter.next = next;
+	cur->alter.arg = CALL(alternation, curolan_zebeth, curolan_zebeth);
+	return cur;
+}
+
+ATTR_CONST
+static size_t minimum(const size_t a, const size_t b) {
+	return a < b ? a : b;
+}
+
+ATTR_CONST
+static size_t maximum(const size_t a, const size_t b) {
+	return a > b ? a : b;
+}
+
+ATTR_PURE
+static size_t subminlen(const re_node *node, const re_node *end, size_t start) {
+	if (!node || node == end) {
+		return start;
+	}
+
+	switch (node->x.type) {
+		case RE_ALTER:
+			return minimum(
+					subminlen(node->alter.arg, end, start),
+					subminlen(node->alter.next, end, start)
+					);
+
+		case RE_ANYCH:
+		case RE_CLASS:
+		case RE_DEFCLASS:
+		case RE_N_CLASS:
+		case RE_N_DEFCLASS:
+			return subminlen(node->x.next, end, start + 1u);
+
+		case RE_AT_BOL:
+		case RE_AT_BOS:
+		case RE_AT_EOL:
+		case RE_AT_EOS:
+		case RE_AT_NWBOUND:
+		case RE_AT_WBOUND:
+		case RE_BACKCHECK:
+		case RE_BACKREF:
+		case RE_BEGIN_CAPTURE:
+		case RE_END_CAPTURE:
+		case RE_PARTIAL:
+		case RE_STUFF_FRUGAL:
+		case RE_STUFF_GREEDY:
+			return subminlen(node->x.next, end, start);
+
+		case RE_AT_MATCH:
+			return maximum(
+					subminlen(node->indep.arg, end, start),
+					subminlen(node->indep.next, end, start)
+					);
+
+		case RE_AT_NOMATCH:
+			return subminlen(node->indep.next, end, start);
+
+		case RE_INDEP:
+			return subminlen(node->indep.next, end, start + subminlen(node->indep.arg, NULL, 0));
+
+		case RE_LITERAL:
+			return subminlen(node->literal.next, end, start + node->literal.len);
+
+		case RE_SELECT:
+			return minimum(
+					subminlen(node->select.cond, end, start) + subminlen(node->select.arg, end, start),
+					subminlen(node->select.next, end, start)
+					);
+
+		case RE_REP_FRUGAL:
+		case RE_REP_GREEDY:
+			if (node->rep.min == 0) {
+				return subminlen(node->rep.next, end, start);
+			}
+			return subminlen(node->rep.next, end, start + node->rep.min * subminlen(node->rep.arg, node, 0));
+
+		case RE_XREP_FRUGAL:
+		case RE_XREP_GREEDY:
+			if (node->rep.min == 0) {
+				return subminlen(node->rep.next, end, start);
+			}
+			return subminlen(node->rep.next, end, start + node->rep.min * subminlen(node->rep.arg, NULL, 0));
+	}
+
+	NOTREACHED;
+}
+
+ATTR_PURE
+static size_t minlen(const re_node *node) {
+	return subminlen(node, NULL, 0);
+}
+
+static void reinit(t_regex *re) {
+	const struct cap_state null = { -1, -1, -1 };
+	size_t i;
+	for (i = 0; i < re->captures; ++i) {
+		re->cap_ptr[i] = null;
+	}
+}
+
+static re_node **notcomplex(re_node **node, const re_node *end) {
+	assert(*node != NULL);
+
+	if (*node == end) {
+		return node;
+	}
+
+	switch ((*node)->x.type) {
+		case RE_ALTER:
+		case RE_AT_MATCH:
+		case RE_AT_NOMATCH:
+		case RE_BACKCHECK:
+		case RE_BACKREF:
+		case RE_BEGIN_CAPTURE:
+		case RE_END_CAPTURE:
+		case RE_INDEP:
+		case RE_PARTIAL:
+		case RE_REP_FRUGAL:
+		case RE_REP_GREEDY:
+		case RE_SELECT:
+		case RE_STUFF_FRUGAL:
+		case RE_STUFF_GREEDY:
+			return NULL;
+
+		case RE_XREP_FRUGAL:
+		case RE_XREP_GREEDY:
+			if ((*node)->rep.min == (*node)->rep.max) {
+				return notcomplex(&(*node)->rep.next, end);
+			}
+			return NULL;
+
+		case RE_ANYCH:
+		case RE_AT_BOL:
+		case RE_AT_BOS:
+		case RE_AT_EOL:
+		case RE_AT_EOS:
+		case RE_AT_NWBOUND:
+		case RE_AT_WBOUND:
+		case RE_CLASS:
+		case RE_DEFCLASS:
+		case RE_LITERAL:
+		case RE_N_CLASS:
+		case RE_N_DEFCLASS:
+			return notcomplex(&(*node)->x.next, end);
+	}
+
+	NOTREACHED;
+}
+
+static void trysimpl(re_node *node) {
+	assert(node->x.type == RE_REP_FRUGAL || node->x.type == RE_REP_GREEDY);
+
+	{
+		re_node **const tmp = notcomplex(&node->rep.arg, node);
+		if (!tmp) {
+			return;
+		}
+		*tmp = NULL;
+	}
+
+	switch (node->rep.type) {
+		case RE_REP_FRUGAL:
+			if (
+					node->rep.arg->x.type == RE_ANYCH &&
+					node->rep.arg->x.next == NULL &&
+					node->rep.max == (size_t)-1
+			   ) {
+				if (node->rep.min == 0) {
+					node->x.type = RE_STUFF_FRUGAL;
+					break;
+				}
+				if (node->rep.min == 1) {
+					node->rep.arg->x.next = node->x.next;
+					node->rep.arg->x.type = RE_STUFF_FRUGAL;
+					node->x.next = node->rep.arg;
+					node->x.type = RE_ANYCH;
+					break;
+				}
+			}
+
+			node->rep.n = minlen(node->rep.arg);
+			node->rep.type = RE_XREP_FRUGAL;
+			break;
+
+		case RE_REP_GREEDY:
+			if (
+					node->rep.arg->x.type == RE_ANYCH &&
+					node->rep.arg->x.next == NULL &&
+					node->rep.max == (size_t)-1
+			   ) {
+				if (node->rep.min == 0) {
+					node->x.type = RE_STUFF_GREEDY;
+					break;
+				}
+				if (node->rep.min == 1) {
+					node->rep.arg->x.next = node->x.next;
+					node->rep.arg->x.type = RE_STUFF_GREEDY;
+					node->x.next = node->rep.arg;
+					node->x.type = RE_ANYCH;
+					break;
+				}
+			}
+
+			node->rep.n = minlen(node->rep.arg);
+			node->rep.type = RE_XREP_GREEDY;
+			break;
+
+		default:
+			NOTREACHED;
+	}
+
+}
+
+static void simplerep(re_node *node, const re_node *const end) {
+	if (!node || node == end) {
+		return;
+	}
+
+	switch (node->x.type) {
+		case RE_ALTER:
+			simplerep(node->alter.arg, end);
+			simplerep(node->alter.next, end);
+			return;
+
+		case RE_AT_MATCH:
+		case RE_AT_NOMATCH:
+		case RE_INDEP:
+			simplerep(node->indep.arg, end);
+			simplerep(node->indep.next, end);
+			return;
+
+		case RE_ANYCH:
+		case RE_AT_BOL:
+		case RE_AT_BOS:
+		case RE_AT_EOL:
+		case RE_AT_EOS:
+		case RE_AT_NWBOUND:
+		case RE_AT_WBOUND:
+		case RE_BACKCHECK:
+		case RE_BACKREF:
+		case RE_BEGIN_CAPTURE:
+		case RE_CLASS:
+		case RE_DEFCLASS:
+		case RE_END_CAPTURE:
+		case RE_LITERAL:
+		case RE_N_CLASS:
+		case RE_N_DEFCLASS:
+		case RE_PARTIAL:
+			simplerep(node->x.next, end);
+			return;
+
+		case RE_REP_FRUGAL:
+		case RE_REP_GREEDY:
+			simplerep(node->rep.next, end);
+			simplerep(node->rep.arg, node);
+			trysimpl(node);
+			return;
+
+		case RE_SELECT:
+			simplerep(node->select.cond, end);
+			simplerep(node->select.arg, end);
+			simplerep(node->select.next, end);
+			return;
+
+		case RE_STUFF_FRUGAL:
+		case RE_STUFF_GREEDY:
+		case RE_XREP_FRUGAL:
+		case RE_XREP_GREEDY:
+			return;
+	}
+
+	NOTREACHED;
+}
+
+ATTR_PURE
+static int anchorbegp(const re_node *node) {
+	return
+		node &&
+		(
+		 node->x.type == RE_AT_BOS ||
+		 node->x.type == RE_STUFF_FRUGAL ||
+		 node->x.type == RE_STUFF_GREEDY ||
+		 (
+		  node->x.type == RE_ALTER &&
+		  anchorbegp(node->alter.arg) &&
+		  anchorbegp(node->alter.next)
+		 ) ||
+		 (
+		  node->x.type == RE_INDEP &&
+		  anchorbegp(node->indep.arg)
+		 ) ||
+		 (
+		  node->x.type == RE_AT_MATCH &&
+		  (
+		   anchorbegp(node->indep.arg) ||
+		   anchorbegp(node->indep.next)
+		  )
+		 ) ||
+		 (
+		  (
+		   node->x.type == RE_AT_NOMATCH ||
+		   node->x.type == RE_AT_BOL ||
+		   node->x.type == RE_AT_EOL ||
+		   node->x.type == RE_AT_EOS ||
+		   node->x.type == RE_AT_NWBOUND ||
+		   node->x.type == RE_AT_WBOUND ||
+		   node->x.type == RE_BEGIN_CAPTURE ||
+		   node->x.type == RE_BACKCHECK
+		  ) &&
+		  anchorbegp(node->x.next)
+		 )
+		)
+		;
+}
+
+static void dostuff(t_regex *re) {
+	simplerep(re->start, NULL);
+
+	if (!re->start || anchorbegp(re->start)) {
+		re->flags |= FL_ANCHOR_START;
+	}
+}
+
+t_regex *re_compile(const String *s) {
+	size_t p;
+	struct parse_context dcon;
+	t_regex *re;
+	void *pre;
+
+	if (h_get(&rcache, s, &pre) == H_OK) {
+		re = pre;
+		++re->refs;
+		return re;
+	}
+
+	re = xmalloc(1, sizeof *re);
+	re->flags = FL_NONE;
+	re->refs = 0;
+	re->repets = 0;
+	bl_init(&re->alloc);
+	re->cap_ptr = NULL;
+	re->captures = 0;
+	dcon.base = re;
+	dcon.block = &re->alloc;
+	dcon.s = s;
+	p = St_len(s) - 1;
+	dcon.p = &p;
+
+	re->start = alternation(&dcon, NULL, NULL);
+
+	if (re->captures) {
+		re->cap_ptr = xmalloc(re->captures, sizeof *re->cap_ptr);
+		reinit(re);
+	}
+
+	dostuff(re);
+	re->minlen = minlen(re->start);
+	re->repbuf = xmalloc(re->repets, sizeof *re->repbuf);
+
+	if (cachfill < sizeof cached / sizeof cached[0]) {
+		St_init(&cached[cachfill]);
+		St_cpy(&cached[cachfill], s);
+		++re->refs;
+		h_put(&rcache, &cached[cachfill], re, 1);
+		++cachfill;
+	} else {
+		h_del(&rcache, &cached[cachlast]);
+		St_cpy(&cached[cachlast], s);
+		++re->refs;
+		h_put(&rcache, &cached[cachlast], re, 1);
+		cachlast = (cachlast + 1u) % (sizeof cached / sizeof cached[0]);
+	}
+
+	if (Opt.debug & DBG_REGEX) {
+		io_write_s(Err, "Compiled ");
+		re_dump(re, io_fp(Err));
+	}
+
+	return re;
+}
+
+t_regex *re_dup(t_regex *re) {
+	++re->refs;
+	return re;
+}
+
+void re_free(t_regex *re) {
+	if (re->refs) {
+		--re->refs;
+		return;
+	}
+	bl_free(&re->alloc);
+	xfree(re->repbuf);
+	xfree(re->cap_ptr);
+	xfree(re);
+}
+
+ATTR_PURE
+static int match_class(unsigned char c, const unsigned char *vec) {
+	return vec[c / CHAR_BIT] & 1u << c % CHAR_BIT;
+}
+
+static void *mem_dup(const void *p, size_t n, size_t m) {
+	void *q = xmalloc(n, m);
+	memcpy(q, p, n * m);
+	return q;
+}
+
+#define NO_MATCH ((size_t)-1)
+
+#define Mmatch0(Xmatch, Xindex) \
+	size_t tmp; \
+ \
+	if (!node) { \
+		return o; \
+	} \
+ \
+	switch (node->x.type) { \
+		case RE_ALTER: \
+			if ((tmp = Xmatch(base, node->alter.arg, s, o)) == NO_MATCH) { \
+				return Xmatch(base, node->alter.next, s, o); \
+			} \
+			return tmp; \
+ \
+		case RE_ANYCH: \
+			if (Xindex(s, o) != EOF) { \
+				return Xmatch(base, node->x.next, s, o + 1u); \
+			} \
+			return NO_MATCH; \
+ \
+		case RE_AT_BOL: \
+			if (o == 0 || Xindex(s, o - 1u) == '\n') { \
+				return Xmatch(base, node->x.next, s, o); \
+			} \
+			return NO_MATCH; \
+ \
+		case RE_AT_BOS: \
+			if (o == 0) { \
+				return Xmatch(base, node->x.next, s, o); \
+			} \
+			return NO_MATCH; \
+ \
+		case RE_AT_EOL: \
+			if ( \
+					Xindex(s, o) == '\n' || \
+					( \
+					 o && Xindex(s, o) == EOF && Xindex(s, o - 1u) != '\n' \
+					) \
+			   ) { \
+				return Xmatch(base, node->x.next, s, o); \
+			} \
+			return NO_MATCH; \
+ \
+		case RE_AT_EOS: \
+			if (Xindex(s, o) == EOF) { \
+				return Xmatch(base, node->x.next, s, o); \
+			} \
+			return NO_MATCH; \
+ \
+		case RE_AT_MATCH: { \
+			struct cap_state *prev = mem_dup(base->cap_ptr, base->captures, sizeof *base->cap_ptr); \
+			tmp = Xmatch(base, node->indep.arg, s, o); \
+			if (tmp != NO_MATCH) { \
+				tmp = Xmatch(base, node->indep.next, s, o); \
+				if (tmp == NO_MATCH) { \
+					memcpy(base->cap_ptr, prev, base->captures * sizeof *base->cap_ptr); \
+				} \
+			} \
+			xfree(prev); \
+			return tmp; \
+		} \
+ \
+		case RE_AT_NOMATCH: { \
+			struct cap_state *prev = mem_dup(base->cap_ptr, base->captures, sizeof *base->cap_ptr); \
+			tmp = Xmatch(base, node->indep.arg, s, o); \
+			if (tmp == NO_MATCH) { \
+				tmp = Xmatch(base, node->indep.next, s, o); \
+			} else { \
+				memcpy(base->cap_ptr, prev, base->captures * sizeof *base->cap_ptr); \
+				tmp = NO_MATCH; \
+			} \
+			xfree(prev); \
+			return tmp; \
+		} \
+ \
+		case RE_AT_NWBOUND: \
+			if (siword(Xindex(s, o - 1u)) == siword(Xindex(s, o))) { \
+				return Xmatch(base, node->x.next, s, o); \
+			} \
+			return NO_MATCH; \
+ \
+		case RE_AT_WBOUND: \
+			if (siword(Xindex(s, o - 1u)) != siword(Xindex(s, o))) { \
+				return Xmatch(base, node->x.next, s, o); \
+			} \
+			return NO_MATCH; \
+ \
+		case RE_BACKCHECK: \
+			if ( \
+					node->backref.n < base->captures && \
+					base->cap_ptr[node->backref.n].start + 1u && \
+					base->cap_ptr[node->backref.n].end + 1u && \
+					base->cap_ptr[node->backref.n].end >= base->cap_ptr[node->backref.n].start \
+			   ) { \
+				return Xmatch(base, node->backref.next, s, o); \
+			} \
+			return NO_MATCH
+
+
+#define Mmatch1(Xmatch, Xindex) \
+		case RE_BEGIN_CAPTURE: { \
+			const size_t prev = base->cap_ptr[node->capture.n].pending; \
+			base->cap_ptr[node->capture.n].pending = o; \
+			if ((tmp = Xmatch(base, node->capture.next, s, o)) == NO_MATCH) { \
+				base->cap_ptr[node->capture.n].pending = prev; \
+			} \
+			return tmp; \
+		} \
+ \
+		case RE_CLASS: \
+			if ( \
+				Xindex(s, o) != EOF && \
+				match_class(Xindex(s, o), node->class.vec) \
+			) { \
+				return Xmatch(base, node->class.next, s, o + 1u); \
+			} \
+			return NO_MATCH; \
+ \
+		case RE_DEFCLASS: \
+			if ( \
+				Xindex(s, o) != EOF && \
+				match_class(Xindex(s, o), node->defclass.vec) \
+			) { \
+				return Xmatch(base, node->defclass.next, s, o + 1u); \
+			} \
+			return NO_MATCH; \
+ \
+		case RE_END_CAPTURE: { \
+			const size_t prevbeg = base->cap_ptr[node->capture.n].start; \
+			const size_t prevend = base->cap_ptr[node->capture.n].end; \
+			if (base->cap_ptr[node->capture.n].pending + 1u == 0u) { \
+				return NO_MATCH; \
+			} \
+ \
+			base->cap_ptr[node->capture.n].end = o; \
+			base->cap_ptr[node->capture.n].start = base->cap_ptr[node->capture.n].pending; \
+			if ((tmp = Xmatch(base, node->capture.next, s, o)) == NO_MATCH) { \
+				base->cap_ptr[node->capture.n].end = prevend; \
+				base->cap_ptr[node->capture.n].start = prevbeg; \
+			} \
+			return tmp; \
+		} \
+ \
+		case RE_INDEP: { \
+			struct cap_state *prev = mem_dup(base->cap_ptr, base->captures, sizeof *base->cap_ptr); \
+			if ((tmp = Xmatch(base, node->indep.arg, s, o)) != NO_MATCH) { \
+				tmp = Xmatch(base, node->indep.next, s, tmp); \
+				if (tmp == NO_MATCH) { \
+					memcpy(base->cap_ptr, prev, base->captures * sizeof *base->cap_ptr); \
+				} \
+			} \
+			xfree(prev); \
+			return tmp; \
+		} \
+ \
+		case RE_N_CLASS: \
+			if ( \
+				Xindex(s, o) != EOF && \
+				!match_class(Xindex(s, o), node->class.vec) \
+			) { \
+				return Xmatch(base, node->class.next, s, o + 1u); \
+			} \
+			return NO_MATCH; \
+ \
+		case RE_N_DEFCLASS: \
+			if ( \
+				Xindex(s, o) != EOF && \
+				!match_class(Xindex(s, o), node->defclass.vec) \
+			) { \
+				return Xmatch(base, node->defclass.next, s, o + 1u); \
+			} \
+			return NO_MATCH; \
+ \
+		case RE_PARTIAL: { \
+			size_t n; \
+			for (n = 0; n < node->literal.len; ++n) { \
+				if (node->literal.buf[n] != Xindex(s, o + n)) { \
+					break; \
+				} \
+			} \
+			for (; n; --n) { \
+				if ((tmp = Xmatch(base, node->literal.next, s, o + n)) != NO_MATCH) { \
+					return tmp; \
+				} \
+			} \
+			return Xmatch(base, node->literal.next, s, o); \
+		} \
+ \
+		case RE_SELECT: { \
+			struct cap_state *prev = mem_dup(base->cap_ptr, base->captures, sizeof *base->cap_ptr); \
+			if ((tmp = Xmatch(base, node->select.cond, s, o)) != NO_MATCH) { \
+				tmp = Xmatch(base, node->select.arg, s, tmp); \
+				if (tmp == NO_MATCH) { \
+					memcpy(base->cap_ptr, prev, base->captures * sizeof *base->cap_ptr); \
+				} \
+			} else { \
+				tmp = Xmatch(base, node->select.next, s, o); \
+			} \
+			xfree(prev); \
+			return tmp; \
+		} \
+ \
+		case RE_REP_FRUGAL: \
+			if (base->repbuf[node->rep.n] >= node->rep.max) { \
+				const size_t n = base->repbuf[node->rep.n]; \
+ \
+				base->repbuf[node->rep.n] = 0; \
+				tmp = Xmatch(base, node->rep.next, s, o); \
+				base->repbuf[node->rep.n] = n; \
+				return tmp; \
+			} else if (base->repbuf[node->rep.n] >= node->rep.min) { \
+				const size_t n = base->repbuf[node->rep.n]; \
+ \
+				base->repbuf[node->rep.n] = 0; \
+				if ((tmp = Xmatch(base, node->rep.next, s, o)) != NO_MATCH) { \
+					base->repbuf[node->rep.n] = n; \
+					return tmp; \
+				} \
+				base->repbuf[node->rep.n] = n + 1u; \
+				tmp = Xmatch(base, node->rep.arg, s, o); \
+				--base->repbuf[node->rep.n]; \
+				return tmp; \
+			} else { \
+				++base->repbuf[node->rep.n]; \
+				tmp = Xmatch(base, node->rep.arg, s, o); \
+				--base->repbuf[node->rep.n]; \
+				return tmp; \
+			} \
+ \
+		case RE_REP_GREEDY: \
+			if (base->repbuf[node->rep.n] >= node->rep.max) { \
+				const size_t n = base->repbuf[node->rep.n]; \
+ \
+				base->repbuf[node->rep.n] = 0; \
+				tmp = Xmatch(base, node->rep.next, s, o); \
+				base->repbuf[node->rep.n] = n; \
+				return tmp; \
+			} else if (base->repbuf[node->rep.n] >= node->rep.min) { \
+				const size_t n = base->repbuf[node->rep.n]; \
+ \
+				++base->repbuf[node->rep.n]; \
+				if ((tmp = Xmatch(base, node->rep.arg, s, o)) != NO_MATCH) { \
+					--base->repbuf[node->rep.n]; \
+					return tmp; \
+				} \
+				base->repbuf[node->rep.n] = 0; \
+				tmp = Xmatch(base, node->rep.next, s, o); \
+				base->repbuf[node->rep.n] = n; \
+				return tmp; \
+			} else { \
+				++base->repbuf[node->rep.n]; \
+				tmp = Xmatch(base, node->rep.arg, s, o); \
+				--base->repbuf[node->rep.n]; \
+				return tmp; \
+			} \
+ \
+		case RE_XREP_FRUGAL: { \
+			size_t n; \
+ \
+			for (n = 0; n < node->rep.min; ++n) { \
+				if ((tmp = Xmatch(base, node->rep.arg, s, o)) == NO_MATCH) { \
+					return NO_MATCH; \
+				} \
+				o = tmp; \
+			} \
+ \
+			for (; n < node->rep.max; ++n) { \
+				if ((tmp = Xmatch(base, node->rep.next, s, o)) != NO_MATCH) { \
+					return tmp; \
+				} \
+				if ((tmp = Xmatch(base, node->rep.arg, s, o)) == NO_MATCH) { \
+					return NO_MATCH; \
+				} \
+				o = tmp; \
+			} \
+			return Xmatch(base, node->rep.next, s, o); \
+		} \
+ \
+		case RE_XREP_GREEDY: { \
+			size_t n; \
+ \
+			for (n = 0; n < node->rep.min; ++n) { \
+				if ((tmp = Xmatch(base, node->rep.arg, s, o)) == NO_MATCH) { \
+					return NO_MATCH; \
+				} \
+				o = tmp; \
+			} \
+ \
+			for (; n < node->rep.max; ++n) { \
+				if ((tmp = Xmatch(base, node->rep.arg, s, o)) == NO_MATCH) { \
+					break; \
+				} \
+				o = tmp; \
+			} \
+ \
+			for (; n > node->rep.min; --n) { \
+				if ((tmp = Xmatch(base, node->rep.next, s, o)) != NO_MATCH) { \
+					return tmp; \
+				} \
+				o -= node->rep.n; \
+			} \
+ \
+			return Xmatch(base, node->rep.next, s, o); \
+		} \
+	} \
+ \
+	NOTREACHED
+
+
+static size_t match(const t_regex *base, const re_node *node, const String *s, size_t o) {
+
+	Mmatch0(match, ST_INDEX);
+
+		case RE_BACKREF:
+			if (
+					node->backref.n < base->captures &&
+					base->cap_ptr[node->backref.n].start + 1u &&
+					base->cap_ptr[node->backref.n].end + 1u &&
+					base->cap_ptr[node->backref.n].end >= base->cap_ptr[node->backref.n].start &&
+					(
+					 base->cap_ptr[node->backref.n].end - base->cap_ptr[node->backref.n].start <= St_len(s) - o &&
+					 memcmp(
+						 St_ptr(s) + base->cap_ptr[node->backref.n].start,
+						 St_ptr(s) + o,
+						 base->cap_ptr[node->backref.n].end - base->cap_ptr[node->backref.n].start
+						 ) == 0
+					)
+			   ) {
+				return match(base, node->backref.next, s, o + base->cap_ptr[node->backref.n].end - base->cap_ptr[node->backref.n].start);
+			}
+			return NO_MATCH;
+
+
+		case RE_LITERAL:
+			if (
+					node->literal.len == 0 ||
+					(
+					 node->literal.len <= St_len(s) - o &&
+					 memcmp(node->literal.buf, St_ptr(s) + o, node->literal.len) == 0
+					)
+			   ) {
+				return match(base, node->literal.next, s, o + node->literal.len);
+			}
+			return NO_MATCH;
+
+		case RE_STUFF_FRUGAL:
+			if (!node->x.next) {
+				return o;
+			}
+
+			if (node->x.next->x.type == RE_LITERAL && node->x.next->literal.len) {
+				for (
+						o = St_stro_m(s, o, node->x.next->literal.buf, node->x.next->literal.len);
+						o + 1u;
+						o = St_stro_m(s, o + 1u, node->x.next->literal.buf, node->x.next->literal.len)
+						) {
+					if ((tmp = match(base, node->x.next->literal.next, s, o + node->x.next->literal.len)) != NO_MATCH) {
+						return tmp;
+					}
+				}
+				return NO_MATCH;
+			}
+
+			for (; o <= St_len(s); ++o) {
+				if ((tmp = match(base, node->x.next, s, o)) != NO_MATCH) {
+					return tmp;
+				}
+			}
+			return NO_MATCH;
+
+		case RE_STUFF_GREEDY: {
+			size_t n;
+
+			if (!node->x.next) {
+				return St_len(s);
+			}
+
+			if (node->x.next->x.type == RE_LITERAL && node->x.next->literal.len) {
+				for (
+						n = St_rstr_m(s, node->x.next->literal.buf, node->x.next->literal.len);
+						n > o && n + 1u;
+						n = St_rstro_m(s, n - 1u, node->x.next->literal.buf, node->x.next->literal.len)
+						) {
+					if ((tmp = match(base, node->x.next->literal.next, s, n + node->x.next->literal.len)) != NO_MATCH) {
+						return tmp;
+					}
+					if (!n) {
+						break;
+					}
+				}
+				if (n == o) {
+					return match(base, node->x.next->literal.next, s, n + node->x.next->literal.len);
+				}
+				return NO_MATCH;
+			}
+
+			for (n = St_len(s); n > o; --n) {
+				if ((tmp = match(base, node->x.next, s, n)) != NO_MATCH) {
+					return tmp;
+				}
+			}
+			return match(base, node->x.next, s, o);
+		}
+
+	Mmatch1(match, ST_INDEX);
+}
+
+static size_t iomatch(const t_regex *base, const re_node *node, IO *s, size_t o) {
+
+	Mmatch0(iomatch, io_peek);
+
+		case RE_BACKREF:
+			if (
+					node->backref.n < base->captures &&
+					base->cap_ptr[node->backref.n].start + 1u &&
+					base->cap_ptr[node->backref.n].end + 1u &&
+					base->cap_ptr[node->backref.n].end >= base->cap_ptr[node->backref.n].start &&
+					(
+					 io_xcmp(
+						 s,
+						 base->cap_ptr[node->backref.n].start,
+						 o,
+						 base->cap_ptr[node->backref.n].end - base->cap_ptr[node->backref.n].start
+						 ) == 0
+					)
+			   ) {
+				return iomatch(base, node->backref.next, s, o + base->cap_ptr[node->backref.n].end - base->cap_ptr[node->backref.n].start);
+			}
+			return NO_MATCH;
+
+
+		case RE_LITERAL:
+			if (
+					node->literal.len == 0 ||
+					(
+					 io_cmppeek(s, o, node->literal.buf, node->literal.len) == 0
+					)
+			   ) {
+				return iomatch(base, node->literal.next, s, o + node->literal.len);
+			}
+			return NO_MATCH;
+
+		case RE_STUFF_FRUGAL:
+			if (!node->x.next) {
+				return o;
+			}
+
+			for (; io_peek(s, o) != EOF; ++o) {
+				if ((tmp = iomatch(base, node->x.next, s, o)) != NO_MATCH) {
+					return tmp;
+				}
+			}
+			return iomatch(base, node->x.next, s, o);
+
+		case RE_STUFF_GREEDY: {
+			size_t n;
+
+			for (n = o; io_peek(s, n) != EOF; ++n)
+				;
+
+			if (!node->x.next) {
+				return n;
+			}
+
+			for (; n > o; --n) {
+				if ((tmp = iomatch(base, node->x.next, s, n)) != NO_MATCH) {
+					return tmp;
+				}
+			}
+			return iomatch(base, node->x.next, s, o);
+		}
+
+	Mmatch1(iomatch, io_peek);
+}
+
+#define DEFRBUF ((void)0)
+#define MRETURN(x) return (x)
+#define INITRBUF(re) memset((re)->repbuf, '\0', (re)->repets * sizeof *(re)->repbuf)
+
+int re_iomatch(t_regex *re, IO *io, size_t *ms, size_t *me) {
+	size_t i;
+	DEFRBUF;
+	INITRBUF(re);
+
+	assert(re != NULL);
+	assert(io != NULL);
+	assert(io_bufred(io));
+	assert(ms != NULL);
+	assert(me != NULL);
+
+	reinit(re);
+
+	if (re->flags & FL_ANCHOR_START) {
+		if ((*me = iomatch(re, re->start, io, 0)) == NO_MATCH) {
+			MRETURN(0);
+		}
+		*ms = 0;
+		MRETURN(1);
+	}
+
+	for (i = 0; io_peek(io, i) != EOF; ++i) {
+		if ((*me = iomatch(re, re->start, io, i)) != NO_MATCH) {
+			*ms = i;
+			MRETURN(1);
+		}
+	}
+
+	if ((*me = iomatch(re, re->start, io, i)) != NO_MATCH) {
+		*ms = i;
+		MRETURN(1);
+	}
+	MRETURN(0);
+}
+
+int re_match(t_regex *re, const String *s, size_t *ms, size_t *me) {
+	size_t i;
+	DEFRBUF;
+	INITRBUF(re);
+
+	assert(re != NULL);
+	assert(s != NULL);
+	assert(ms != NULL);
+	assert(me != NULL);
+
+	if (re->minlen > St_len(s)) {
+		MRETURN(0);
+	}
+
+	reinit(re);
+
+	if (re->flags & FL_ANCHOR_START) {
+		if ((*me = match(re, re->start, s, 0)) == NO_MATCH) {
+			MRETURN(0);
+		}
+		*ms = 0;
+		MRETURN(1);
+	}
+
+	if (re->start->x.type == RE_LITERAL && re->start->literal.len) {
+		for (
+				i = St_str_m(s, re->start->literal.buf, re->start->literal.len);
+				i + 1u;
+				i = St_stro_m(s, i + 1u, re->start->literal.buf, re->start->literal.len)
+			) {
+			if ((*me = match(re, re->start->literal.next, s, i + re->start->literal.len)) != NO_MATCH) {
+				*ms = i;
+				MRETURN(1);
+			}
+		}
+		MRETURN(0);
+	}
+
+	for (i = 0; i <= St_len(s) - re->minlen; ++i) {
+		if ((*me = match(re, re->start, s, i)) != NO_MATCH) {
+			*ms = i;
+			MRETURN(1);
+		}
+	}
+
+	MRETURN(0);
+}
+
+size_t re_cabra(const t_regex *re) {
+	return re->captures;
+}
+
+int re_backref(const t_regex *re, size_t i, size_t *a, size_t *z) {
+	if (i >= re->captures) {
+		return -1;
+	}
+	if (!(re->cap_ptr[i].start + 1u && re->cap_ptr[i].end + 1u)) {
+		return 1;
+	}
+	*a = re->cap_ptr[i].start;
+	*z = re->cap_ptr[i].end;
+	return 0;
+}
+
+ATTR_PURE
+static const re_node *branchend(const re_node *a, const re_node *b) {
+	for (; a; a = a->x.next) {
+		const re_node *q = b;
+		for (; q; q = q->x.next) {
+			if (a == q) {
+				return q;
+			}
+		}
+	}
+	return NULL;
+}
+
+static void decom_class(String *s, const unsigned char vec[HAVE_C99(static CLASS_SIZE)]) {
+	unsigned char i;
+	int flag = 0;
+	char big_enough[100];
+
+	for (i = 0; i < UCHAR_MAX; ++i) {
+		if (vec[i / CHAR_BIT] & 1u << i % CHAR_BIT) {
+			if (vec[(i + 1u) / CHAR_BIT] & 1u << (i + 1u) % CHAR_BIT) {
+				if (!flag) {
+					flag = 1;
+					goto mess;
+				}
+				flag = 1;
+			} else {
+				if (flag) {
+					if (i > 1u && vec[(i - 2u) / CHAR_BIT] & 1u << (i - 2u) % CHAR_BIT) {
+						St_cat_c(s, '-');
+					}
+					flag = 0;
+				}
+mess:
+				if (!isprint(i)) {
+					St_cat_c(s, '\\');
+					switch (i) {
+						case '\a': St_cat_c(s, 'a'); break;
+						case '\b': St_cat_c(s, 'b'); break;
+						case '\f': St_cat_c(s, 'f'); break;
+						case '\n': St_cat_c(s, 'n'); break;
+						case '\r': St_cat_c(s, 'r'); break;
+						case '\t': St_cat_c(s, 't'); break;
+						case '\v': St_cat_c(s, 'v'); break;
+						default:
+								   sprintf(big_enough, "%03o", (unsigned)i);
+								   St_cat_s(s, big_enough);
+								   break;
+					}
+				} else {
+					if (strchr("\\\"", (char)i)) {
+						St_cat_c(s, '\\');
+					}
+					St_cat_c(s, i);
+					if (strchr("!'-", (char)i)) {
+						St_cat_c(s, ESCAPE);
+					}
+				}
+			}
+		}
+	}
+	if (vec[i / CHAR_BIT] & 1u << i % CHAR_BIT) {
+		if (flag) {
+			if (vec[(i - 2u) / CHAR_BIT] & 1u << (i - 2u) % CHAR_BIT) {
+				St_cat_c(s, '-');
+			}
+			flag = 0;
+		}
+		if (!isprint(i)) {
+			St_cat_c(s, '\\');
+			switch (i) {
+				case '\a': St_cat_c(s, 'a'); break;
+				case '\b': St_cat_c(s, 'b'); break;
+				case '\f': St_cat_c(s, 'f'); break;
+				case '\n': St_cat_c(s, 'n'); break;
+				case '\r': St_cat_c(s, 'r'); break;
+				case '\t': St_cat_c(s, 't'); break;
+				case '\v': St_cat_c(s, 'v'); break;
+				default:
+						   sprintf(big_enough, "%03o", (unsigned)i);
+						   St_cat_s(s, big_enough);
+						   break;
+			}
+		} else {
+			if (strchr("\\\"", (char)i)) {
+				St_cat_c(s, '\\');
+			}
+			St_cat_c(s, i);
+			if (strchr("!'-", (char)i)) {
+				St_cat_c(s, ESCAPE);
+			}
+		}
+	}
+}
+
+static void dumpclass(FILE *fp, const unsigned char vec[HAVE_C99(static CLASS_SIZE)]) {
+	String tmp;
+	St_init(&tmp);
+	decom_class(&tmp, vec);
+	ST_WRITE(&tmp, fp);
+	St_clear(&tmp);
+}
+
+ATTR_CONST
+static int cdefclass(const unsigned char *ptr) {
+	if (ptr == dc_word)   { return 'w'; }
+	if (ptr == dc_alpha)  { return 'q'; }
+	if (ptr == dc_cntrl)  { return 'c'; }
+	if (ptr == dc_digit)  { return 'd'; }
+	if (ptr == dc_lower)  { return 'l'; }
+	if (ptr == dc_print)  { return 'p'; }
+	if (ptr == dc_space)  { return 's'; }
+	if (ptr == dc_upper)  { return 'u'; }
+	if (ptr == dc_xdigit) { return 'x'; }
+	NOTREACHED;
+}
+
+static void decom_liter(String *s, const re_node *node) {
+	size_t i;
+	assert(node->x.type == RE_LITERAL || node->x.type == RE_PARTIAL);
+	for (i = 0; i < node->literal.len; ++i) {
+		if (!isprint((unsigned char)node->literal.buf[i])) {
+			St_cat_c(s, '\\');
+			switch (node->literal.buf[i]) {
+				case '\a': St_cat_c(s, 'a'); break;
+				case '\b': St_cat_c(s, 'b'); break;
+				case '\f': St_cat_c(s, 'f'); break;
+				case '\n': St_cat_c(s, 'n'); break;
+				case '\r': St_cat_c(s, 'r'); break;
+				case '\t': St_cat_c(s, 't'); break;
+				case '\v': St_cat_c(s, 'v'); break;
+				default: {
+					char big_enough[100];
+					sprintf(big_enough, "%03o", (unsigned)(unsigned char)node->literal.buf[i]);
+					St_cat_s(s, big_enough);
+					break;
+				}
+			}
+		} else {
+			if (node->x.type == RE_LITERAL && strchr("\"\\", node->literal.buf[i])) {
+				St_cat_c(s, '\\');
+			}
+			St_cat_c(s, node->literal.buf[i]);
+			if (MEATCHARP(node->literal.buf[i])) {
+				St_cat_c(s, ESCAPE);
+			}
+		}
+	}
+}
+
+static void dumpliter(FILE *fp, const re_node *node) {
+	String tmp;
+	St_init(&tmp);
+	decom_liter(&tmp, node);
+	ST_WRITE(&tmp, fp);
+	St_clear(&tmp);
+}
+
+static void do_indent(FILE *fp, size_t n) {
+	for (; n; --n) {
+		fputs("  ", fp);
+	}
+}
+
+static void dumpnode(FILE *fp, const re_node *node, const re_node *end, const size_t indent) {
+	if (node == end) {
+		return;
+	}
+
+	assert(node != NULL);
+	fprintf(fp, "[%p] ", (const void *)node);
+	do_indent(fp, indent);
+
+	switch (node->x.type) {
+		case RE_ALTER: {
+			const re_node *const be = branchend(node->alter.arg, node->alter.next);
+			fputs("branch {\n", fp);
+			dumpnode(fp, node->alter.arg, be, indent + 1);
+			fprintf(fp, "[%p] ", (const void *)node);
+			do_indent(fp, indent);
+			fputs("} or {\n", fp);
+			dumpnode(fp, node->alter.next, be, indent + 1);
+			fprintf(fp, "[%p] ", (const void *)node);
+			do_indent(fp, indent);
+			fputs("} /branch\n", fp);
+			dumpnode(fp, be, end, indent);
+			return;
+		}
+
+		case RE_ANYCH:
+			fputs("anychar\n", fp);
+			break;
+
+		case RE_AT_BOL:
+			fputs("at beginning of line\n", fp);
+			break;
+
+		case RE_AT_BOS:
+			fputs("at beginning of string\n", fp);
+			break;
+
+		case RE_AT_EOL:
+			fputs("at end of line\n", fp);
+			break;
+
+		case RE_AT_EOS:
+			fputs("at end of string\n", fp);
+			break;
+
+		case RE_AT_MATCH:
+			fputs("at match {\n", fp);
+			dumpnode(fp, node->indep.arg, NULL, indent + 1);
+			fprintf(fp, "[%p] ", (const void *)node);
+			do_indent(fp, indent);
+			fputs("} /at match\n", fp);
+			break;
+
+		case RE_AT_NOMATCH:
+			fputs("not at match {\n", fp);
+			dumpnode(fp, node->indep.arg, NULL, indent + 1);
+			fprintf(fp, "[%p] ", (const void *)node);
+			do_indent(fp, indent);
+			fputs("} /not at match\n", fp);
+			break;
+
+		case RE_AT_NWBOUND:
+			fputs("not at word boundary\n", fp);
+			break;
+
+		case RE_AT_WBOUND:
+			fputs("at word boundary\n", fp);
+			break;
+
+		case RE_BACKCHECK:
+			fprintf(fp, "check capture (%lu)\n", (unsigned long)node->backref.n);
+			break;
+
+		case RE_BACKREF:
+			fprintf(fp, "backref (%lu)\n", (unsigned long)node->backref.n);
+			break;
+
+		case RE_BEGIN_CAPTURE:
+			fprintf(fp, "begin capture (%lu)\n", (unsigned long)node->backref.n);
+			break;
+
+		case RE_CLASS:
+			fputs("character class '", fp);
+			dumpclass(fp, node->class.vec);
+			fputs("'\n", fp);
+			break;
+
+		case RE_DEFCLASS:
+			fprintf(fp, "character class %c!\n", cdefclass(node->defclass.vec));
+			break;
+
+		case RE_END_CAPTURE:
+			fprintf(fp, "end capture (%lu)\n", (unsigned long)node->backref.n);
+			break;
+
+		case RE_INDEP:
+			fputs("independent submatch {\n", fp);
+			dumpnode(fp, node->indep.arg, NULL, indent + 1);
+			fprintf(fp, "[%p] ", (const void *)node);
+			do_indent(fp, indent);
+			fputs("} /independent submatch\n", fp);
+			break;
+
+		case RE_LITERAL:
+			fputs("literal \"", fp);
+			dumpliter(fp, node);
+			fputs("\"\n", fp);
+			break;
+
+		case RE_N_CLASS:
+			fputs("negated character class '", fp);
+			dumpclass(fp, node->class.vec);
+			fputs("'\n", fp);
+			break;
+
+		case RE_N_DEFCLASS:
+			fprintf(fp, "character class %c!\n", toupper(cdefclass(node->defclass.vec)));
+			break;
+
+		case RE_PARTIAL:
+			fputs("abbreviation `", fp);
+			dumpliter(fp, node);
+			fputs("`\n", fp);
+			break;
+
+		case RE_REP_FRUGAL:
+			fputs("repetition ", fp);
+			if (node->rep.min == node->rep.max) {
+				fprintf(fp, ":%lu:?", (unsigned long)node->rep.min);
+			} else if (node->rep.max != (size_t)-1) {
+				fprintf(fp, ":%lu,%lu:?", (unsigned long)node->rep.min, (unsigned long)node->rep.max);
+			} else {
+				switch (node->rep.min) {
+					case 0: fputs("*?", fp); break;
+					case 1: fputs("+?", fp); break;
+					default: fprintf(fp, ":%lu,:?", (unsigned long)node->rep.min); break;
+				}
+			}
+			fputs(" {\n", fp);
+			dumpnode(fp, node->rep.arg, node, indent + 1);
+			fprintf(fp, "[%p] ", (const void *)node);
+			do_indent(fp, indent);
+			fputs("} /repetition\n", fp);
+			break;
+
+		case RE_REP_GREEDY:
+			fputs("repetition ", fp);
+			if (node->rep.min == node->rep.max) {
+				fprintf(fp, ":%lu:", (unsigned long)node->rep.min);
+			} else if (node->rep.max != (size_t)-1) {
+				fprintf(fp, ":%lu,%lu:", (unsigned long)node->rep.min, (unsigned long)node->rep.max);
+			} else {
+				switch (node->rep.min) {
+					case 0: fputs("*", fp); break;
+					case 1: fputs("+", fp); break;
+					default: fprintf(fp, ":%lu,:", (unsigned long)node->rep.min); break;
+				}
+			}
+			fputs(" {\n", fp);
+			dumpnode(fp, node->rep.arg, node, indent + 1);
+			fprintf(fp, "[%p] ", (const void *)node);
+			do_indent(fp, indent);
+			fputs("} /repetition\n", fp);
+			break;
+
+		case RE_SELECT: {
+			const re_node *const be = branchend(node->select.arg, node->select.next);
+			fputs("if {\n", fp);
+			dumpnode(fp, node->select.cond, NULL, indent + 1);
+			fprintf(fp, "[%p] ", (const void *)node);
+			do_indent(fp, indent);
+			fputs("} then {\n", fp);
+			dumpnode(fp, node->select.arg, be, indent + 1);
+			if (node->select.next != be) {
+				fprintf(fp, "[%p] ", (const void *)node);
+				do_indent(fp, indent);
+				fputs("} else {\n", fp);
+				dumpnode(fp, node->select.next, be, indent + 1);
+			}
+			fprintf(fp, "[%p] ", (const void *)node);
+			do_indent(fp, indent);
+			fputs("} /if\n", fp);
+			dumpnode(fp, be, end, indent);
+			return;
+		}
+
+		case RE_STUFF_FRUGAL:
+			fputs(".*?\n", fp);
+			break;
+
+		case RE_STUFF_GREEDY:
+			fputs(".*\n", fp);
+			break;
+
+		case RE_XREP_FRUGAL:
+			fputs("simple repetition ", fp);
+			if (node->rep.min == node->rep.max) {
+				fprintf(fp, ":%lu:?", (unsigned long)node->rep.min);
+			} else if (node->rep.max != (size_t)-1) {
+				fprintf(fp, ":%lu,%lu:?", (unsigned long)node->rep.min, (unsigned long)node->rep.max);
+			} else {
+				switch (node->rep.min) {
+					case 0: fputs("*?", fp); break;
+					case 1: fputs("+?", fp); break;
+					default: fprintf(fp, ":%lu,:?", (unsigned long)node->rep.min); break;
+				}
+			}
+			fputs(" {\n", fp);
+			dumpnode(fp, node->rep.arg, NULL, indent + 1);
+			fprintf(fp, "[%p] ", (const void *)node);
+			do_indent(fp, indent);
+			fputs("} /simple repetition\n", fp);
+			break;
+
+		case RE_XREP_GREEDY:
+			fputs("simple repetition ", fp);
+			if (node->rep.min == node->rep.max) {
+				fprintf(fp, ":%lu:", (unsigned long)node->rep.min);
+			} else if (node->rep.max != (size_t)-1) {
+				fprintf(fp, ":%lu,%lu:", (unsigned long)node->rep.min, (unsigned long)node->rep.max);
+			} else {
+				switch (node->rep.min) {
+					case 0: fputs("*", fp); break;
+					case 1: fputs("+", fp); break;
+					default: fprintf(fp, ":%lu,:", (unsigned long)node->rep.min); break;
+				}
+			}
+			fputs(" {\n", fp);
+			dumpnode(fp, node->rep.arg, NULL, indent + 1);
+			fprintf(fp, "[%p] ", (const void *)node);
+			do_indent(fp, indent);
+			fputs("} /simple repetition\n", fp);
+			break;
+	}
+	dumpnode(fp, node->x.next, end, indent);
+}
+
+void re_dump(const t_regex *re, FILE *fp) {
+	fprintf(fp, "RE: minlen=%lu, anchor=%s\n", (unsigned long)re->minlen, re->flags & FL_ANCHOR_START ? "start" : "none");
+	dumpnode(fp, re->start, NULL, 0);
+}
+
+static void cat_n(String *s, unsigned long n) {
+	char big_enough[100];
+	sprintf(big_enough, "%lu", n);
+	St_cat_s(s, big_enough);
+}
+
+static void decom_repeat(String *s, const re_node *node) {
+	assert(
+		node->x.type == RE_REP_FRUGAL ||
+		node->x.type == RE_REP_GREEDY ||
+		node->x.type == RE_XREP_FRUGAL ||
+		node->x.type == RE_XREP_GREEDY
+	);
+
+	if (node->rep.min == 0 && node->rep.max == (size_t)-1) {
+		St_cat_c(s, '*');
+	} else if (node->rep.min == 0 && node->rep.max == 1) {
+		St_cat_c(s, '?');
+	} else if (node->rep.min == 1 && node->rep.max == (size_t)-1) {
+		St_cat_c(s, '+');
+	} else if (node->rep.min == node->rep.max) {
+		St_cat_c(s, ':');
+		cat_n(s, node->rep.min);
+		St_cat_c(s, ':');
+	} else if (node->rep.max == (size_t)-1) {
+		St_cat_c(s, ':');
+		cat_n(s, node->rep.min);
+		St_cat_m(s, ",:", 2);
+	} else {
+		St_cat_c(s, ':');
+		cat_n(s, node->rep.min);
+		St_cat_c(s, ',');
+		cat_n(s, node->rep.max);
+		St_cat_c(s, ':');
+	}
+
+	if (node->rep.type == RE_REP_FRUGAL || node->rep.type == RE_XREP_FRUGAL) {
+		St_cat_c(s, '?');
+	}
+}
+
+static void decom_node(String *s, const re_node *node, const re_node *end) {
+	if (node == end) {
+		return;
+	}
+
+	assert(node != NULL);
+	switch (node->x.type) {
+		case RE_ALTER: {
+			const re_node *const be = branchend(node->alter.arg, node->alter.next);
+			St_cat_c(s, '(');
+			decom_node(s, node->alter.arg, be);
+			St_cat_c(s, '|');
+			decom_node(s, node->alter.next, be);
+			St_cat_c(s, ')');
+			decom_node(s, be, end);
+			break;
+		}
+
+		case RE_ANYCH:
+			St_cat_c(s, '.');
+			decom_node(s, node->x.next, end);
+			break;
+
+		case RE_AT_BOL:
+			St_cat_m(s, "A!", 2);
+			decom_node(s, node->x.next, end);
+			break;
+
+		case RE_AT_BOS:
+			St_cat_c(s, '^');
+			decom_node(s, node->x.next, end);
+			break;
+
+		case RE_AT_EOL:
+			St_cat_m(s, "Z!", 2);
+			decom_node(s, node->x.next, end);
+			break;
+
+		case RE_AT_EOS:
+			St_cat_c(s, '$');
+			decom_node(s, node->x.next, end);
+			break;
+
+		case RE_AT_MATCH:
+			St_cat_c(s, '[');
+			decom_node(s, node->indep.arg, NULL);
+			St_cat_m(s, "&]", 2);
+			decom_node(s, node->indep.next, end);
+			break;
+
+		case RE_AT_NOMATCH:
+			St_cat_c(s, '[');
+			decom_node(s, node->indep.arg, NULL);
+			St_cat_m(s, "^]", 2);
+			decom_node(s, node->indep.next, end);
+			break;
+
+		case RE_AT_NWBOUND:
+			St_cat_m(s, "B!", 2);
+			decom_node(s, node->x.next, end);
+			break;
+
+		case RE_AT_WBOUND:
+			St_cat_m(s, "b!", 2);
+			decom_node(s, node->x.next, end);
+			break;
+
+		case RE_BACKREF:
+		case RE_BACKCHECK: {
+			String tmp;
+			if (node->x.type == RE_BACKCHECK) {
+				St_cat_c(s, '[');
+			}
+			St_init(&tmp);
+#if HAVE_VSNPRINTF_P
+			St_xprintf(&tmp, "%lu", (unsigned long)node->backref.n);
+#else
+			St_num(&tmp, node->backref.n);
+#endif
+			St_cat(s, &tmp);
+			St_clear(&tmp);
+			if (node->x.type == RE_BACKCHECK) {
+				St_cat_c(s, ']');
+			} else {
+				St_cat_c(s, ESCAPE);
+			}
+			decom_node(s, node->backref.next, end);
+			break;
+		}
+
+		case RE_BEGIN_CAPTURE:
+			St_cat_c(s, '{');
+			decom_node(s, node->x.next, end);
+			break;
+
+		case RE_CLASS:
+			St_cat_c(s, '\'');
+			decom_class(s, node->class.vec);
+			St_cat_c(s, '\'');
+			decom_node(s, node->class.next, end);
+			break;
+
+		case RE_DEFCLASS:
+			St_cat_c(s, cdefclass(node->defclass.vec));
+			St_cat_c(s, ESCAPE);
+			decom_node(s, node->defclass.next, end);
+			break;
+
+		case RE_END_CAPTURE:
+			St_cat_c(s, '}');
+			decom_node(s, node->x.next, end);
+			break;
+
+		case RE_INDEP:
+			St_cat_c(s, '<');
+			decom_node(s, node->indep.arg, NULL);
+			St_cat_c(s, '>');
+			decom_node(s, node->indep.next, end);
+			break;
+
+		case RE_LITERAL:
+			decom_liter(s, node);
+			decom_node(s, node->literal.next, end);
+			break;
+
+		case RE_N_CLASS:
+			St_cat_m(s, "'^", 2);
+			decom_class(s, node->class.vec);
+			St_cat_c(s, '\'');
+			decom_node(s, node->class.next, end);
+			break;
+
+		case RE_N_DEFCLASS:
+			St_cat_c(s, toupper(cdefclass(node->defclass.vec)));
+			St_cat_c(s, ESCAPE);
+			decom_node(s, node->defclass.next, end);
+			break;
+
+		case RE_PARTIAL:
+			St_cat_c(s, '`');
+			decom_liter(s, node);
+			St_cat_c(s, '`');
+			break;
+
+		case RE_REP_FRUGAL:
+		case RE_REP_GREEDY:
+			St_cat_c(s, '(');
+			decom_node(s, node->rep.arg, node);
+			St_cat_c(s, ')');
+			decom_repeat(s, node);
+			decom_node(s, node->rep.next, end);
+			break;
+
+		case RE_SELECT: {
+			const re_node *const be = branchend(node->select.arg, node->select.next);
+			St_cat_c(s, '[');
+			if (be != node->select.next) {
+				decom_node(s, node->select.next, be);
+				St_cat_c(s, '|');
+			}
+			decom_node(s, node->select.arg, be);
+			St_cat_c(s, '(');
+			decom_node(s, node->select.cond, NULL);
+			St_cat_c(s, ')');
+			St_cat_m(s, "?]", 2);
+			decom_node(s, be, end);
+			break;
+		}
+
+		case RE_STUFF_FRUGAL:
+			St_cat_m(s, ".*?", 3);
+			decom_node(s, node->x.next, end);
+			break;
+
+		case RE_STUFF_GREEDY:
+			St_cat_m(s, ".*", 2);
+			decom_node(s, node->x.next, end);
+			break;
+
+		case RE_XREP_FRUGAL:
+		case RE_XREP_GREEDY:
+			St_cat_c(s, '(');
+			decom_node(s, node->rep.arg, NULL);
+			St_cat_c(s, ')');
+			decom_repeat(s, node);
+			decom_node(s, node->rep.next, end);
+			break;
+	}
+}
+
+void re_decompile(const t_regex *re, String *s) {
+	St_zero(s);
+	decom_node(s, re->start, NULL);
+}