view src/ploki/re.c @ 5556:f2e89077ff1d

<oerjan> learn Turkey was the center of an empire that gobbled up much of Eastern Europe and the Middle East, something which brought them into conflict with Ostrich. In the 19th century the overstuffed empire started declining, and after the Great War it was cut up like so much Shish Kebab.
author HackBot
date Fri, 12 Jun 2015 06:39:15 +0000
parents ac0403686959
children
line wrap: on
line source

#include "config.h"
#include "IO.h"
#include "Str.h"
#include "hash.h"
#include "main_io.h"
#include "main_opt.h"
#include "re.h"
#include "xmalloc.h"
#include "zz.h"

#include <ctype.h>
#include <limits.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>

enum {CLASS_SIZE = UCHAR_MAX / CHAR_BIT + 1u};

static unsigned char dc_word[CLASS_SIZE];
static unsigned char dc_alpha[CLASS_SIZE];
static unsigned char dc_cntrl[CLASS_SIZE];
static unsigned char dc_digit[CLASS_SIZE];
static unsigned char dc_lower[CLASS_SIZE];
static unsigned char dc_print[CLASS_SIZE];
static unsigned char dc_space[CLASS_SIZE];
static unsigned char dc_upper[CLASS_SIZE];
static unsigned char dc_xdigit[CLASS_SIZE];

static void fill(unsigned char *v, int (*pred)(int)) {
	unsigned char c;

	for (c = UCHAR_MAX; c; --c) {
		v[c / CHAR_BIT] |= !!pred(c) << c % CHAR_BIT;
	}
	v[0] |= !!pred(0);
}

static void nfill(unsigned char *v, int (*pred)(int)) {
	unsigned char c;

	for (c = UCHAR_MAX; c; --c) {
		v[c / CHAR_BIT] |= !pred(c) << c % CHAR_BIT;
	}
	v[0] |= !pred(0);
}

ATTR_CONST
static int siword(int c) {
	return c == '_' || isalnum(c);
}

enum {MAGIC = 23};

static Hash rcache;
static String cached[MAGIC];
static size_t cachfill, cachlast;

static size_t hash(const void *s, size_t seed) {
	return St_hash(s, seed);
}

static int compar(const void *a, const void *b) {
	return St_cmp(a, b);
}

enum t_re_node {
	RE_ALTER,
	RE_ANYCH,
	RE_AT_BOL,
	RE_AT_BOS,
	RE_AT_EOL,
	RE_AT_EOS,
	RE_AT_MATCH,
	RE_AT_NOMATCH,
	RE_AT_NWBOUND,
	RE_AT_WBOUND,
	RE_BACKCHECK,
	RE_BACKREF,
	RE_BEGIN_CAPTURE,
	RE_CLASS,
	RE_DEFCLASS,
	RE_END_CAPTURE,
	RE_INDEP,
	RE_LITERAL,
	RE_N_CLASS,
	RE_N_DEFCLASS,
	RE_PARTIAL,
	RE_REP_FRUGAL,
	RE_REP_GREEDY,
	RE_SELECT,
	RE_STUFF_FRUGAL,
	RE_STUFF_GREEDY,
	RE_XREP_FRUGAL,
	RE_XREP_GREEDY
};

#define CMN_HDR  enum t_re_node type; union my_node *next

typedef union my_node {
	struct {
		CMN_HDR;
	} x;
	struct {
		CMN_HDR;
		union my_node *arg;
	} alter;
	struct {
		CMN_HDR;
		size_t n;
	} backref;
	struct {
		CMN_HDR;
		size_t n;
	} capture;
	struct {
		CMN_HDR;
		unsigned char vec[CLASS_SIZE];
	} class;
	struct {
		CMN_HDR;
		unsigned char *vec;
	} defclass;
	struct {
		CMN_HDR;
		union my_node *arg;
	} indep;
	struct {
		CMN_HDR;
		unsigned char *buf;
		size_t len;
	} literal;
	struct {
		CMN_HDR;
		union my_node *arg;
		size_t min, max;
		size_t n;
	} rep;
	struct {
		CMN_HDR;
		union my_node *arg;
		union my_node *cond;
	} select;
} re_node;

#include "re_block.c.h"

enum re_flags {
	FL_NONE = 0,
	FL_ANCHOR_START = 1
};

struct cap_state {
	size_t start, end, pending;
};

struct my_regex {
	re_node *start;
	size_t repets;
	size_t *repbuf;
	t_block alloc;
	size_t captures;
	struct cap_state *cap_ptr;
	size_t refs;
	enum re_flags flags;
	size_t minlen;
};

static void delv(void *v) {
	re_free(v);
}

void re_init(void) {
	fill(dc_word, siword);
	fill(dc_alpha, isalpha);
	fill(dc_cntrl, iscntrl);
	fill(dc_digit, isdigit);
	fill(dc_lower, islower);
	fill(dc_print, isprint);
	fill(dc_space, isspace);
	fill(dc_upper, isupper);
	fill(dc_xdigit, isxdigit);

	h_init(&rcache, hash, compar, NULL, delv);
}

void re_end(void) {
	h_end(&rcache);
	while (cachfill) {
		--cachfill;
		St_clear(&cached[cachfill]);
	}
	cachlast = 0;
}


enum   {ESCAPE     =        '!'};
#define MEATCHARP(c) strchr("!|({<[]>})*+?:.'^$`", c)
#define ENDBRANCHP(c) strchr("|({<[", c)

struct parse_context {
	t_regex *base;
	t_block *block;
	const String *s;
	size_t *p;
};

static void skipspace(const String *const s, size_t *const p) {
restart:
	while (isspace(ST_INDEX(s, *p))) {
		--*p;
	}
	if (ST_INDEX(s, *p) == ']' && ST_INDEX(s, *p - 1u) == '#') {
		*p -= 2u;
		while (ST_INDEX(s, *p) != EOF) {
			if (ST_INDEX(s, *p) == '[') {
				--*p;
				goto restart;
			}
			if (ST_INDEX(s, *p) == ESCAPE && ST_INDEX(s, *p - 1u) != EOF) {
				--*p;
			}
			--*p;
		}
	}
}

#define PRAZ(name) static re_node *name(const struct parse_context *con, re_node *next, re_node *curolan_zebeth)
#define SNORK const String *const s = con->s; size_t *const p = con->p
#define CALL(name, nextn, glob) name(con, nextn, glob)
#define TAILC(name) return CALL(name, next, curolan_zebeth)

PRAZ(quantifire);
PRAZ(branch);
PRAZ(alternation);

PRAZ(atom) {
	SNORK;
	(void)curolan_zebeth;

	skipspace(s, p);
	switch (ST_INDEX(s, *p)) {
		case '(':
		case '<':
		case '{':
		case '[':
		case EOF: {
			re_node *cur = bl_node(con->block);
			cur->literal.type = RE_LITERAL;
			cur->literal.next = next;
			cur->literal.buf = NULL;
			cur->literal.len = 0;
			return cur;
		}

		case ESCAPE: {
			re_node *cur = bl_node(con->block);
			--*p;
			if (isdigit(ST_INDEX(s, *p))) {
				do {
					--*p;
				} while (isdigit(ST_INDEX(s, *p)));
				cur->backref.type = RE_BACKREF;
				cur->backref.next = next;
				cur->backref.n = strtoul(St_ptr(s) + *p + 1, NULL, 10);
			} else if (isalpha(ST_INDEX(s, *p))) {
				switch (ST_INDEX(s, *p)) {
					case 'A':
						cur->x.type = RE_AT_BOL;
						cur->x.next = next;
						--*p;
						break;
					case 'Z':
						cur->x.type = RE_AT_EOL;
						cur->x.next = next;
						--*p;
						break;

					case 'b':
						cur->x.type = RE_AT_WBOUND;
						cur->x.next = next;
						--*p;
						break;
					case 'B':
						cur->x.type = RE_AT_NWBOUND;
						cur->x.next = next;
						--*p;
						break;

					case 'c':
						cur->defclass.type = RE_DEFCLASS;
						cur->defclass.vec = dc_cntrl;
						cur->defclass.next = next;
						--*p;
						break;
					case 'C':
						cur->defclass.type = RE_N_DEFCLASS;
						cur->defclass.vec = dc_cntrl;
						cur->defclass.next = next;
						--*p;
						break;

					case 'd':
						cur->defclass.type = RE_DEFCLASS;
						cur->defclass.vec = dc_digit;
						cur->defclass.next = next;
						--*p;
						break;
					case 'D':
						cur->defclass.type = RE_N_DEFCLASS;
						cur->defclass.vec = dc_digit;
						cur->defclass.next = next;
						--*p;
						break;

					case 'l':
						cur->defclass.type = RE_DEFCLASS;
						cur->defclass.vec = dc_lower;
						cur->defclass.next = next;
						--*p;
						break;
					case 'L':
						cur->defclass.type = RE_N_DEFCLASS;
						cur->defclass.vec = dc_lower;
						cur->defclass.next = next;
						--*p;
						break;

					case 'p':
						cur->defclass.type = RE_DEFCLASS;
						cur->defclass.vec = dc_print;
						cur->defclass.next = next;
						--*p;
						break;
					case 'P':
						cur->defclass.type = RE_N_DEFCLASS;
						cur->defclass.vec = dc_print;
						cur->defclass.next = next;
						--*p;
						break;

					case 'q':
						cur->defclass.type = RE_DEFCLASS;
						cur->defclass.vec = dc_alpha;
						cur->defclass.next = next;
						--*p;
						break;
					case 'Q':
						cur->defclass.type = RE_N_DEFCLASS;
						cur->defclass.vec = dc_alpha;
						cur->defclass.next = next;
						--*p;
						break;

					case 's':
						cur->defclass.type = RE_DEFCLASS;
						cur->defclass.vec = dc_space;
						cur->defclass.next = next;
						--*p;
						break;
					case 'S':
						cur->defclass.type = RE_N_DEFCLASS;
						cur->defclass.vec = dc_space;
						cur->defclass.next = next;
						--*p;
						break;

					case 'u':
						cur->defclass.type = RE_DEFCLASS;
						cur->defclass.vec = dc_upper;
						cur->defclass.next = next;
						--*p;
						break;
					case 'U':
						cur->defclass.type = RE_N_DEFCLASS;
						cur->defclass.vec = dc_upper;
						cur->defclass.next = next;
						--*p;
						break;

					case 'w':
						cur->defclass.type = RE_DEFCLASS;
						cur->defclass.vec = dc_word;
						cur->defclass.next = next;
						--*p;
						break;
					case 'W':
						cur->defclass.type = RE_N_DEFCLASS;
						cur->defclass.vec = dc_word;
						cur->defclass.next = next;
						--*p;
						break;

					case 'x':
						cur->defclass.type = RE_DEFCLASS;
						cur->defclass.vec = dc_xdigit;
						cur->defclass.next = next;
						--*p;
						break;
					case 'X':
						cur->defclass.type = RE_N_DEFCLASS;
						cur->defclass.vec = dc_xdigit;
						cur->defclass.next = next;
						--*p;
						break;

					default:
						goto default_escape;
				}

			} else default_escape: {
				cur->literal.type = RE_LITERAL;
				cur->literal.next = next;
				cur->literal.buf = bl_string(con->block, cur->literal.len = 1);
				if (ST_INDEX(s, *p) == EOF) {
					cur->literal.buf[0] = ESCAPE;
				} else {
					cur->literal.buf[0] = ST_INDEX(s, *p);
					--*p;
				}
			}
			return cur;
		}

		case ')': {
			re_node *cur;
			--*p;
			cur = CALL(alternation, next, next);
			skipspace(s, p);
			if (ST_INDEX(s, *p) == '(') {
				--*p;
			}
			return cur;
		}

		case ']': {
			re_node *cur;
			switch (ST_INDEX(s, *p - 1u)) {
				case '&':
					*p -= 2u;
					cur = bl_node(con->block);
					cur->indep.type = RE_AT_MATCH;
					cur->indep.next = next;
					cur->indep.arg = CALL(alternation, NULL, NULL);
					break;

				case '^':
					*p -= 2u;
					cur = bl_node(con->block);
					cur->indep.type = RE_AT_NOMATCH;
					cur->indep.next = next;
					cur->indep.arg = CALL(alternation, NULL, NULL);
					break;

				case '?':
					*p -= 2u;
					cur = bl_node(con->block);
					cur->select.type = RE_SELECT;
					cur->select.cond = CALL(quantifire, NULL, NULL);
					cur->select.arg = CALL(branch, next, next);
					skipspace(s, p);
					if (ST_INDEX(s, *p) == '|') {
						--*p;
						cur->select.next = CALL(alternation, next, next);
					} else {
						cur->select.next = next;
					}
					break;

				case '0': case '1': case '2': case '3': case '4':
				case '5': case '6': case '7': case '8': case '9': {
					size_t i;

					for (i = *p - 2u; isdigit(ST_INDEX(s, i)); --i)
						;
					if (ST_INDEX(s, i) != '[') {
						goto std_atom;
					}
					cur = bl_node(con->block);
					cur->backref.type = RE_BACKCHECK;
					cur->backref.next = next;
					cur->backref.n = strtoul(St_ptr(s) + i + 1u, NULL, 10);
					*p = i - 1u;

					return cur;
				}

				default:
					goto std_atom;
			}

			skipspace(s, p);
			if (ST_INDEX(s, *p) == '[') {
				--*p;
			}
			return cur;
		}

		case '>': {
			re_node *cur = bl_node(con->block);
			--*p;
			cur->indep.type = RE_INDEP;
			cur->indep.next = next;
			cur->indep.arg = CALL(alternation, NULL, NULL);
			skipspace(s, p);
			if (ST_INDEX(s, *p) == '<') {
				--*p;
			}
			return cur;
		}

		case '}': {
			re_node *begin = bl_node(con->block);
			re_node *end = bl_node(con->block);
			--*p;
			begin->capture.n = end->capture.n = con->base->captures++;
			end->capture.type = RE_END_CAPTURE;
			end->capture.next = next;
			begin->capture.type = RE_BEGIN_CAPTURE;
			begin->capture.next = CALL(alternation, end, end);
			skipspace(s, p);
			if (ST_INDEX(s, *p) == '{') {
				--*p;
			}
			return begin;
		}

		case '^': {
			re_node *cur = bl_node(con->block);
			cur->x.type = RE_AT_BOS;
			cur->x.next = next;
			--*p;
			return cur;
		}

		case '$': {
			re_node *cur = bl_node(con->block);
			cur->x.type = RE_AT_EOS;
			cur->x.next = next;
			--*p;
			return cur;
		}

		case '.': {
			re_node *cur = bl_node(con->block);
			cur->x.type = RE_ANYCH;
			cur->x.next = next;
			--*p;
			return cur;
		}

		case '`': {
			String tmp;
			int c;
			re_node *cur = bl_node(con->block);
			cur->literal.type = RE_PARTIAL;
			cur->literal.next = next;

			--*p;
			St_init(&tmp);
			while (
				(c = ST_INDEX(s, *p)) != EOF &&
				c != '`'
			) {
				--*p;
				if (c == ESCAPE && ST_INDEX(s, *p) != EOF) {
					c = ST_INDEX(s, *p);
					--*p;
				}
				St_cat_c(&tmp, c);
			}
			if (c == '`') {
				--*p;
			}
			St_reverse(&tmp);

			cur->literal.buf = bl_string(con->block, cur->literal.len = St_len(&tmp));
			memcpy(cur->literal.buf, St_ptr(&tmp), St_len(&tmp));
			St_clear(&tmp);

			return cur;
		}

		case '\'': {
			int c;
			re_node *cur = bl_node(con->block);
			cur->class.type = RE_CLASS;
			cur->class.next = next;
			memset(cur->class.vec, '\0', sizeof cur->class.vec);
			for (--*p; (c = ST_INDEX(s, *p)) != EOF && c != '\''; --*p) {
				if (c == ESCAPE) {
					switch (c = ST_INDEX(s, *p - 1)) {
						case EOF:
							cur->class.vec[ESCAPE / CHAR_BIT] |= 1u << ESCAPE % CHAR_BIT;
							break;

						case 'c':
							fill(cur->class.vec, iscntrl);
							--*p;
							break;
						case 'C':
							nfill(cur->class.vec, iscntrl);
							--*p;
							break;

						case 'd':
							fill(cur->class.vec, isdigit);
							--*p;
							break;
						case 'D':
							nfill(cur->class.vec, isdigit);
							--*p;
							break;

						case 'l':
							fill(cur->class.vec, islower);
							--*p;
							break;
						case 'L':
							nfill(cur->class.vec, islower);
							--*p;
							break;

						case 'p':
							fill(cur->class.vec, isprint);
							--*p;
							break;
						case 'P':
							nfill(cur->class.vec, isprint);
							--*p;
							break;

						case 'q':
							fill(cur->class.vec, isalpha);
							--*p;
							break;
						case 'Q':
							nfill(cur->class.vec, isalpha);
							--*p;
							break;

						case 's':
							fill(cur->class.vec, isspace);
							--*p;
							break;
						case 'S':
							nfill(cur->class.vec, isspace);
							--*p;
							break;

						case 'u':
							fill(cur->class.vec, isupper);
							--*p;
							break;
						case 'U':
							nfill(cur->class.vec, isupper);
							--*p;
							break;

						case 'w':
							fill(cur->class.vec, siword);
							--*p;
							break;
						case 'W':
							nfill(cur->class.vec, siword);
							--*p;
							break;

						case 'x':
							fill(cur->class.vec, isxdigit);
							--*p;
							break;
						case 'X':
							nfill(cur->class.vec, isxdigit);
							--*p;
							break;

						default:
							cur->class.vec[c / CHAR_BIT] |= 1u << c % CHAR_BIT;
							--*p;
							break;
					}
				} else {
					int b;
					if (ST_INDEX(s, *p - 1) == '-' && (b = ST_INDEX(s, *p - 2)) != EOF) {
						if (b == ESCAPE && (b = ST_INDEX(s, *p - 3)) == EOF) {
							b = ESCAPE;
						}
						if (b > c) {
							goto normal_char;
						}
						for (; b < c; ++b) {
							cur->class.vec[b / CHAR_BIT] |= 1u << b % CHAR_BIT;
						}
						cur->class.vec[b / CHAR_BIT] |= 1u << b % CHAR_BIT;
						*p -= 2;
						if (ST_INDEX(s, *p) == ESCAPE && ST_INDEX(s, *p - 1) != EOF) {
							--*p;
						}
					} else if (ST_INDEX(s, *p) == ']' && *p >= 8 && ST_INDEX(s, *p - 1) == ':') {

#define CXCLASS_CHECK(n, t, f, o, b) if (1) { \
	if (memcmp(St_ptr(s) + *p - (n), (t), (n) - 1) != 0) { \
		goto normal_char; \
	} \
	(b)(cur->class.vec, (f)); \
	*p -= (o); \
	break; \
} else (void)0

#define CCLASS_CHECK(t, f, o) CXCLASS_CHECK(sizeof (t), t, f, o, fill)
#define CNCLASS_CHECK(t, f, o) CXCLASS_CHECK(sizeof (t), t, f, o, nfill)

						if (ST_INDEX(s, *p - 8) == '[' && ST_INDEX(s, *p - 7) == ':') {
							switch (ST_INDEX(s, *p - 6)) {
								case 'a':
									if (ST_INDEX(s, *p - 5) != 'l') {
										goto normal_char;
									}
									switch (ST_INDEX(s, *p - 4)) {
										case 'n': CCLASS_CHECK("um", isalnum, 8);
										case 'p': CCLASS_CHECK("ha", isalpha, 8);
										default: goto normal_char;
									}
									break;

								case 'c': CCLASS_CHECK("ntrl", iscntrl, 8);
								case 'd': CCLASS_CHECK("igit", isdigit, 8);
								case 'g': CCLASS_CHECK("raph", isgraph, 8);
								case 'l': CCLASS_CHECK("ower", islower, 8);

								case 'p':
									switch (ST_INDEX(s, *p - 5)) {
										case 'r': CCLASS_CHECK("int", isprint, 8);
										case 'u': CCLASS_CHECK("nct", ispunct, 8);
										default: goto normal_char;
									}
									break;

								case 's': CCLASS_CHECK("pace", isspace, 8);
								case 'u': CCLASS_CHECK("pper", isupper, 8);
								default: goto normal_char;
							}
						} else if (*p >= 9 && ST_INDEX(s, *p - 9) == '[' && ST_INDEX(s, *p - 8) == ':') {
							switch (ST_INDEX(s, *p - 7)) {
								case 'x':
									CCLASS_CHECK("digit", isxdigit, 9);

								case '^':
									switch (ST_INDEX(s, *p - 6)) {
										case 'a':
											if (ST_INDEX(s, *p - 5) != 'l') {
												goto normal_char;
											}
											switch (ST_INDEX(s, *p - 4)) {
												case 'n': CNCLASS_CHECK("um", isalnum, 9);
												case 'p': CNCLASS_CHECK("ha", isalpha, 9);
												default: goto normal_char;
											}
											break;

										case 'c': CNCLASS_CHECK("ntrl", iscntrl, 9);
										case 'd': CNCLASS_CHECK("igit", isdigit, 9);
										case 'g': CNCLASS_CHECK("raph", isgraph, 9);
										case 'l': CNCLASS_CHECK("ower", islower, 9);

										case 'p':
											switch (ST_INDEX(s, *p - 5)) {
												case 'r': CNCLASS_CHECK("int", isprint, 9);
												case 'u': CNCLASS_CHECK("nct", ispunct, 9);
												default: goto normal_char;
											}
											break;

										case 's': CNCLASS_CHECK("pace", isspace, 9);
										case 'u': CNCLASS_CHECK("pper", isupper, 9);
										default: goto normal_char;
									}
									break;

								default: goto normal_char;
							}
						} else if (*p >= 10 && memcmp(St_ptr(s) + *p - 10, "[:^xdigit", 9) == 0) {
							nfill(cur->class.vec, isxdigit);
							*p -= 10;
						} else {
							goto normal_char;
						}
					} else if (ST_INDEX(s, *p) == '^' && ((b = ST_INDEX(s, *p - 1) == '\'') || b == EOF)) {
						cur->class.type = RE_N_CLASS;
					} else normal_char: {
						cur->class.vec[c / CHAR_BIT] |= 1u << c % CHAR_BIT;
					}
				}
			}
			if (c == '\'') {
				--*p;
			}

			if (cur->class.type == RE_CLASS) {
				size_t i;
				for (i = 0; i < CLASS_SIZE; ++i) {
					if (cur->class.vec[i] != (unsigned char)~0u) {
						return cur;
					}
				}
				cur->x.type = RE_ANYCH;
			} else {
				size_t i;
				assert(cur->class.type == RE_N_CLASS);
				for (i = 0; i < CLASS_SIZE; ++i) {
					if (cur->class.vec[i] != 0) {
						return cur;
					}
				}
				cur->x.type = RE_ANYCH;
			}

			return cur;
		}

std_atom:
		default: {
			re_node *cur = bl_node(con->block);
			cur->literal.type = RE_LITERAL;

			/*XXX?*/
			if (next && next->x.type == RE_LITERAL) {
				if (next->literal.len) {
					cur->literal.buf = bl_string(con->block, cur->literal.len = 1u + next->literal.len);
					memcpy(cur->literal.buf + 1, next->literal.buf, next->literal.len);
				}
				cur->literal.next = next->literal.next;
			} else {
				cur->literal.next = next;
				cur->literal.buf = bl_string(con->block, cur->literal.len = 1);
			}

			cur->literal.buf[0] = ST_INDEX(s, *p);
			--*p;
			return cur;
		}
	}
}

PRAZ(literal) {
	SNORK;
	int c;
	String tmp;
	re_node *cur;

	skipspace(s, p);
	if ((ST_INDEX(s, *p) != ESCAPE || isalnum(ST_INDEX(s, *p - 1))) && MEATCHARP(ST_INDEX(s, *p))) {
		TAILC(atom);
	}

	cur = bl_node(con->block);
	cur->literal.type = RE_LITERAL;
	cur->literal.next = next;

	St_init(&tmp);
	while (
			(c = ST_INDEX(s, *p)) != EOF &&
			(
			 !MEATCHARP(c) ||
			 (c == ESCAPE && !isalnum(ST_INDEX(s, *p - 1)))
			)
		  ) {
		--*p;
		if (c == ESCAPE && ST_INDEX(s, *p) != EOF) {
			c = ST_INDEX(s, *p);
			--*p;
		}
		St_cat_c(&tmp, c);
		skipspace(s, p);
	}
	St_reverse(&tmp);

	if (next && next->x.type == RE_LITERAL) {
		if (next->literal.len) {
			St_cat_m(&tmp, next->literal.buf, next->literal.len);
		}
		cur->literal.next = next->literal.next;
	}

	cur->literal.buf = bl_string(con->block, cur->literal.len = St_len(&tmp));
	memcpy(cur->literal.buf, St_ptr(&tmp), St_len(&tmp));
	St_clear(&tmp);

	return cur;
}

#define RETURN_REP(lo, hi, grdy) \
do { \
	re_node *cur = bl_node(con->block); \
	cur->rep.type = (grdy); \
	cur->rep.next = next; \
	cur->rep.min = (lo); \
	cur->rep.max = (hi); \
	cur->rep.n = con->base->repets++; \
	cur->rep.arg = CALL(atom, cur, NULL); \
	return cur; \
} while (0)

#define STD_REP_CASE(lo, hi, grdy) \
do { \
	--*p; \
	RETURN_REP(lo, hi, grdy); \
} while (0)

#define CUSTOM_REP_CASE(off, grdy, mismatch) \
do { \
	size_t from, to; \
	to = *p - 1 - (off); \
	while (isdigit(ST_INDEX(s, to))) { \
		--to; \
	} \
	if (ST_INDEX(s, to) == ':') { \
		*p = to - 1; \
		to = ST_INDEX(s, to + 1) == ':' ? (size_t)-1 : strtoul(St_ptr(s) + to + 1, NULL, 10); \
		from = to; \
	} else { \
		if (ST_INDEX(s, to) != ',') { \
			mismatch; \
		} \
		from = to - 1; \
		if (!isdigit(ST_INDEX(s, from))) { \
			mismatch; \
		} \
		do { \
			--from; \
		} while (isdigit(ST_INDEX(s, from))); \
		if (ST_INDEX(s, from) != ':') { \
			mismatch; \
		} \
		*p = from - 1; \
		from = strtoul(St_ptr(s) + from + 1, NULL, 10); \
		to = ST_INDEX(s, to + 1) == ':' ? (size_t)-1 : strtoul(St_ptr(s) + to + 1, NULL, 10); \
	} \
	if (to == 1) { \
		if (from == 1) { \
			TAILC(atom); \
		} \
		if (from == 0) { \
			re_node *cur = bl_node(con->block); \
			cur->alter.type = RE_ALTER; \
			if ((grdy) == RE_REP_GREEDY) { \
				cur->alter.next = next; \
				cur->alter.arg = CALL(atom, next, NULL); \
			} else { \
				cur->alter.arg = next; \
				cur->alter.next = CALL(atom, next, NULL); \
			} \
			return cur; \
		} \
	} \
	RETURN_REP(from, to, grdy); \
} while (0)

PRAZ(quantifire) {
	SNORK;

	skipspace(s, p);
	switch (ST_INDEX(s, *p)) {
		case EOF: return next;

		case '?':
			switch (ST_INDEX(s, *p - 1)) {
				case ':': CUSTOM_REP_CASE(1, RE_REP_FRUGAL, goto hook);
				case '*': --*p; STD_REP_CASE(0, -1, RE_REP_FRUGAL);
				case '+': --*p; STD_REP_CASE(1, -1, RE_REP_FRUGAL);

				case '?': {
					re_node *cur = bl_node(con->block);
					*p -= 2;
					cur->alter.type = RE_ALTER;
					cur->alter.next = CALL(atom, next, NULL);
					cur->alter.arg = next;
					return cur;
				}

hook:
				default: {
					re_node *cur = bl_node(con->block);
					--*p;
					cur->alter.type = RE_ALTER;
					cur->alter.next = next;
					cur->alter.arg = CALL(atom, next, NULL);
					return cur;
				}
			}

		case ':': CUSTOM_REP_CASE(0, RE_REP_GREEDY, goto noquanti);
		case '*': STD_REP_CASE(0, -1, RE_REP_GREEDY);
		case '+': STD_REP_CASE(1, -1, RE_REP_GREEDY);

noquanti:
		default: TAILC(literal);
	}
}

PRAZ(branch) {
	SNORK;
	skipspace(s, p);
	while (ST_INDEX(s, *p) != EOF && !ENDBRANCHP(ST_INDEX(s, *p))) {
		next = CALL(quantifire, next, curolan_zebeth);
		skipspace(s, p);
	}
	return next;
}

PRAZ(alternation) {
	SNORK;
	re_node *cur;

	skipspace(s, p);
	if (ST_INDEX(s, *p) == EOF) {
		return next;
	}

	next = CALL(branch, next, curolan_zebeth);
	skipspace(s, p);
	if (ST_INDEX(s, *p) != '|') {
		return next;
	}

	--*p;
	cur = bl_node(con->block);
	cur->alter.type = RE_ALTER;
	cur->alter.next = next;
	cur->alter.arg = CALL(alternation, curolan_zebeth, curolan_zebeth);
	return cur;
}

ATTR_CONST
static size_t minimum(const size_t a, const size_t b) {
	return a < b ? a : b;
}

ATTR_CONST
static size_t maximum(const size_t a, const size_t b) {
	return a > b ? a : b;
}

ATTR_PURE
static size_t subminlen(const re_node *node, const re_node *end, size_t start) {
	if (!node || node == end) {
		return start;
	}

	switch (node->x.type) {
		case RE_ALTER:
			return minimum(
					subminlen(node->alter.arg, end, start),
					subminlen(node->alter.next, end, start)
					);

		case RE_ANYCH:
		case RE_CLASS:
		case RE_DEFCLASS:
		case RE_N_CLASS:
		case RE_N_DEFCLASS:
			return subminlen(node->x.next, end, start + 1u);

		case RE_AT_BOL:
		case RE_AT_BOS:
		case RE_AT_EOL:
		case RE_AT_EOS:
		case RE_AT_NWBOUND:
		case RE_AT_WBOUND:
		case RE_BACKCHECK:
		case RE_BACKREF:
		case RE_BEGIN_CAPTURE:
		case RE_END_CAPTURE:
		case RE_PARTIAL:
		case RE_STUFF_FRUGAL:
		case RE_STUFF_GREEDY:
			return subminlen(node->x.next, end, start);

		case RE_AT_MATCH:
			return maximum(
					subminlen(node->indep.arg, end, start),
					subminlen(node->indep.next, end, start)
					);

		case RE_AT_NOMATCH:
			return subminlen(node->indep.next, end, start);

		case RE_INDEP:
			return subminlen(node->indep.next, end, start + subminlen(node->indep.arg, NULL, 0));

		case RE_LITERAL:
			return subminlen(node->literal.next, end, start + node->literal.len);

		case RE_SELECT:
			return minimum(
					subminlen(node->select.cond, end, start) + subminlen(node->select.arg, end, start),
					subminlen(node->select.next, end, start)
					);

		case RE_REP_FRUGAL:
		case RE_REP_GREEDY:
			if (node->rep.min == 0) {
				return subminlen(node->rep.next, end, start);
			}
			return subminlen(node->rep.next, end, start + node->rep.min * subminlen(node->rep.arg, node, 0));

		case RE_XREP_FRUGAL:
		case RE_XREP_GREEDY:
			if (node->rep.min == 0) {
				return subminlen(node->rep.next, end, start);
			}
			return subminlen(node->rep.next, end, start + node->rep.min * subminlen(node->rep.arg, NULL, 0));
	}

	NOTREACHED;
}

ATTR_PURE
static size_t minlen(const re_node *node) {
	return subminlen(node, NULL, 0);
}

static void reinit(t_regex *re) {
	const struct cap_state null = { -1, -1, -1 };
	size_t i;
	for (i = 0; i < re->captures; ++i) {
		re->cap_ptr[i] = null;
	}
}

static re_node **notcomplex(re_node **node, const re_node *end) {
	assert(*node != NULL);

	if (*node == end) {
		return node;
	}

	switch ((*node)->x.type) {
		case RE_ALTER:
		case RE_AT_MATCH:
		case RE_AT_NOMATCH:
		case RE_BACKCHECK:
		case RE_BACKREF:
		case RE_BEGIN_CAPTURE:
		case RE_END_CAPTURE:
		case RE_INDEP:
		case RE_PARTIAL:
		case RE_REP_FRUGAL:
		case RE_REP_GREEDY:
		case RE_SELECT:
		case RE_STUFF_FRUGAL:
		case RE_STUFF_GREEDY:
			return NULL;

		case RE_XREP_FRUGAL:
		case RE_XREP_GREEDY:
			if ((*node)->rep.min == (*node)->rep.max) {
				return notcomplex(&(*node)->rep.next, end);
			}
			return NULL;

		case RE_ANYCH:
		case RE_AT_BOL:
		case RE_AT_BOS:
		case RE_AT_EOL:
		case RE_AT_EOS:
		case RE_AT_NWBOUND:
		case RE_AT_WBOUND:
		case RE_CLASS:
		case RE_DEFCLASS:
		case RE_LITERAL:
		case RE_N_CLASS:
		case RE_N_DEFCLASS:
			return notcomplex(&(*node)->x.next, end);
	}

	NOTREACHED;
}

static void trysimpl(re_node *node) {
	assert(node->x.type == RE_REP_FRUGAL || node->x.type == RE_REP_GREEDY);

	{
		re_node **const tmp = notcomplex(&node->rep.arg, node);
		if (!tmp) {
			return;
		}
		*tmp = NULL;
	}

	switch (node->rep.type) {
		case RE_REP_FRUGAL:
			if (
					node->rep.arg->x.type == RE_ANYCH &&
					node->rep.arg->x.next == NULL &&
					node->rep.max == (size_t)-1
			   ) {
				if (node->rep.min == 0) {
					node->x.type = RE_STUFF_FRUGAL;
					break;
				}
				if (node->rep.min == 1) {
					node->rep.arg->x.next = node->x.next;
					node->rep.arg->x.type = RE_STUFF_FRUGAL;
					node->x.next = node->rep.arg;
					node->x.type = RE_ANYCH;
					break;
				}
			}

			node->rep.n = minlen(node->rep.arg);
			node->rep.type = RE_XREP_FRUGAL;
			break;

		case RE_REP_GREEDY:
			if (
					node->rep.arg->x.type == RE_ANYCH &&
					node->rep.arg->x.next == NULL &&
					node->rep.max == (size_t)-1
			   ) {
				if (node->rep.min == 0) {
					node->x.type = RE_STUFF_GREEDY;
					break;
				}
				if (node->rep.min == 1) {
					node->rep.arg->x.next = node->x.next;
					node->rep.arg->x.type = RE_STUFF_GREEDY;
					node->x.next = node->rep.arg;
					node->x.type = RE_ANYCH;
					break;
				}
			}

			node->rep.n = minlen(node->rep.arg);
			node->rep.type = RE_XREP_GREEDY;
			break;

		default:
			NOTREACHED;
	}

}

static void simplerep(re_node *node, const re_node *const end) {
	if (!node || node == end) {
		return;
	}

	switch (node->x.type) {
		case RE_ALTER:
			simplerep(node->alter.arg, end);
			simplerep(node->alter.next, end);
			return;

		case RE_AT_MATCH:
		case RE_AT_NOMATCH:
		case RE_INDEP:
			simplerep(node->indep.arg, end);
			simplerep(node->indep.next, end);
			return;

		case RE_ANYCH:
		case RE_AT_BOL:
		case RE_AT_BOS:
		case RE_AT_EOL:
		case RE_AT_EOS:
		case RE_AT_NWBOUND:
		case RE_AT_WBOUND:
		case RE_BACKCHECK:
		case RE_BACKREF:
		case RE_BEGIN_CAPTURE:
		case RE_CLASS:
		case RE_DEFCLASS:
		case RE_END_CAPTURE:
		case RE_LITERAL:
		case RE_N_CLASS:
		case RE_N_DEFCLASS:
		case RE_PARTIAL:
			simplerep(node->x.next, end);
			return;

		case RE_REP_FRUGAL:
		case RE_REP_GREEDY:
			simplerep(node->rep.next, end);
			simplerep(node->rep.arg, node);
			trysimpl(node);
			return;

		case RE_SELECT:
			simplerep(node->select.cond, end);
			simplerep(node->select.arg, end);
			simplerep(node->select.next, end);
			return;

		case RE_STUFF_FRUGAL:
		case RE_STUFF_GREEDY:
		case RE_XREP_FRUGAL:
		case RE_XREP_GREEDY:
			return;
	}

	NOTREACHED;
}

ATTR_PURE
static int anchorbegp(const re_node *node) {
	return
		node &&
		(
		 node->x.type == RE_AT_BOS ||
		 node->x.type == RE_STUFF_FRUGAL ||
		 node->x.type == RE_STUFF_GREEDY ||
		 (
		  node->x.type == RE_ALTER &&
		  anchorbegp(node->alter.arg) &&
		  anchorbegp(node->alter.next)
		 ) ||
		 (
		  node->x.type == RE_INDEP &&
		  anchorbegp(node->indep.arg)
		 ) ||
		 (
		  node->x.type == RE_AT_MATCH &&
		  (
		   anchorbegp(node->indep.arg) ||
		   anchorbegp(node->indep.next)
		  )
		 ) ||
		 (
		  (
		   node->x.type == RE_AT_NOMATCH ||
		   node->x.type == RE_AT_BOL ||
		   node->x.type == RE_AT_EOL ||
		   node->x.type == RE_AT_EOS ||
		   node->x.type == RE_AT_NWBOUND ||
		   node->x.type == RE_AT_WBOUND ||
		   node->x.type == RE_BEGIN_CAPTURE ||
		   node->x.type == RE_BACKCHECK
		  ) &&
		  anchorbegp(node->x.next)
		 )
		)
		;
}

static void dostuff(t_regex *re) {
	simplerep(re->start, NULL);

	if (!re->start || anchorbegp(re->start)) {
		re->flags |= FL_ANCHOR_START;
	}
}

t_regex *re_compile(const String *s) {
	size_t p;
	struct parse_context dcon;
	t_regex *re;
	void *pre;

	if (h_get(&rcache, s, &pre) == H_OK) {
		re = pre;
		++re->refs;
		return re;
	}

	re = xmalloc(1, sizeof *re);
	re->flags = FL_NONE;
	re->refs = 0;
	re->repets = 0;
	bl_init(&re->alloc);
	re->cap_ptr = NULL;
	re->captures = 0;
	dcon.base = re;
	dcon.block = &re->alloc;
	dcon.s = s;
	p = St_len(s) - 1;
	dcon.p = &p;

	re->start = alternation(&dcon, NULL, NULL);

	if (re->captures) {
		re->cap_ptr = xmalloc(re->captures, sizeof *re->cap_ptr);
		reinit(re);
	}

	dostuff(re);
	re->minlen = minlen(re->start);
	re->repbuf = xmalloc(re->repets, sizeof *re->repbuf);

	if (cachfill < sizeof cached / sizeof cached[0]) {
		St_init(&cached[cachfill]);
		St_cpy(&cached[cachfill], s);
		++re->refs;
		h_put(&rcache, &cached[cachfill], re, 1);
		++cachfill;
	} else {
		h_del(&rcache, &cached[cachlast]);
		St_cpy(&cached[cachlast], s);
		++re->refs;
		h_put(&rcache, &cached[cachlast], re, 1);
		cachlast = (cachlast + 1u) % (sizeof cached / sizeof cached[0]);
	}

	if (Opt.debug & DBG_REGEX) {
		io_write_s(Err, "Compiled ");
		re_dump(re, io_fp(Err));
	}

	return re;
}

t_regex *re_dup(t_regex *re) {
	++re->refs;
	return re;
}

void re_free(t_regex *re) {
	if (re->refs) {
		--re->refs;
		return;
	}
	bl_free(&re->alloc);
	xfree(re->repbuf);
	xfree(re->cap_ptr);
	xfree(re);
}

ATTR_PURE
static int match_class(unsigned char c, const unsigned char *vec) {
	return vec[c / CHAR_BIT] & 1u << c % CHAR_BIT;
}

static void *mem_dup(const void *p, size_t n, size_t m) {
	void *q = xmalloc(n, m);
	memcpy(q, p, n * m);
	return q;
}

#define NO_MATCH ((size_t)-1)

#define Mmatch0(Xmatch, Xindex) \
	size_t tmp; \
 \
	if (!node) { \
		return o; \
	} \
 \
	switch (node->x.type) { \
		case RE_ALTER: \
			if ((tmp = Xmatch(base, node->alter.arg, s, o)) == NO_MATCH) { \
				return Xmatch(base, node->alter.next, s, o); \
			} \
			return tmp; \
 \
		case RE_ANYCH: \
			if (Xindex(s, o) != EOF) { \
				return Xmatch(base, node->x.next, s, o + 1u); \
			} \
			return NO_MATCH; \
 \
		case RE_AT_BOL: \
			if (o == 0 || Xindex(s, o - 1u) == '\n') { \
				return Xmatch(base, node->x.next, s, o); \
			} \
			return NO_MATCH; \
 \
		case RE_AT_BOS: \
			if (o == 0) { \
				return Xmatch(base, node->x.next, s, o); \
			} \
			return NO_MATCH; \
 \
		case RE_AT_EOL: \
			if ( \
					Xindex(s, o) == '\n' || \
					( \
					 o && Xindex(s, o) == EOF && Xindex(s, o - 1u) != '\n' \
					) \
			   ) { \
				return Xmatch(base, node->x.next, s, o); \
			} \
			return NO_MATCH; \
 \
		case RE_AT_EOS: \
			if (Xindex(s, o) == EOF) { \
				return Xmatch(base, node->x.next, s, o); \
			} \
			return NO_MATCH; \
 \
		case RE_AT_MATCH: { \
			struct cap_state *prev = mem_dup(base->cap_ptr, base->captures, sizeof *base->cap_ptr); \
			tmp = Xmatch(base, node->indep.arg, s, o); \
			if (tmp != NO_MATCH) { \
				tmp = Xmatch(base, node->indep.next, s, o); \
				if (tmp == NO_MATCH) { \
					memcpy(base->cap_ptr, prev, base->captures * sizeof *base->cap_ptr); \
				} \
			} \
			xfree(prev); \
			return tmp; \
		} \
 \
		case RE_AT_NOMATCH: { \
			struct cap_state *prev = mem_dup(base->cap_ptr, base->captures, sizeof *base->cap_ptr); \
			tmp = Xmatch(base, node->indep.arg, s, o); \
			if (tmp == NO_MATCH) { \
				tmp = Xmatch(base, node->indep.next, s, o); \
			} else { \
				memcpy(base->cap_ptr, prev, base->captures * sizeof *base->cap_ptr); \
				tmp = NO_MATCH; \
			} \
			xfree(prev); \
			return tmp; \
		} \
 \
		case RE_AT_NWBOUND: \
			if (siword(Xindex(s, o - 1u)) == siword(Xindex(s, o))) { \
				return Xmatch(base, node->x.next, s, o); \
			} \
			return NO_MATCH; \
 \
		case RE_AT_WBOUND: \
			if (siword(Xindex(s, o - 1u)) != siword(Xindex(s, o))) { \
				return Xmatch(base, node->x.next, s, o); \
			} \
			return NO_MATCH; \
 \
		case RE_BACKCHECK: \
			if ( \
					node->backref.n < base->captures && \
					base->cap_ptr[node->backref.n].start + 1u && \
					base->cap_ptr[node->backref.n].end + 1u && \
					base->cap_ptr[node->backref.n].end >= base->cap_ptr[node->backref.n].start \
			   ) { \
				return Xmatch(base, node->backref.next, s, o); \
			} \
			return NO_MATCH


#define Mmatch1(Xmatch, Xindex) \
		case RE_BEGIN_CAPTURE: { \
			const size_t prev = base->cap_ptr[node->capture.n].pending; \
			base->cap_ptr[node->capture.n].pending = o; \
			if ((tmp = Xmatch(base, node->capture.next, s, o)) == NO_MATCH) { \
				base->cap_ptr[node->capture.n].pending = prev; \
			} \
			return tmp; \
		} \
 \
		case RE_CLASS: \
			if ( \
				Xindex(s, o) != EOF && \
				match_class(Xindex(s, o), node->class.vec) \
			) { \
				return Xmatch(base, node->class.next, s, o + 1u); \
			} \
			return NO_MATCH; \
 \
		case RE_DEFCLASS: \
			if ( \
				Xindex(s, o) != EOF && \
				match_class(Xindex(s, o), node->defclass.vec) \
			) { \
				return Xmatch(base, node->defclass.next, s, o + 1u); \
			} \
			return NO_MATCH; \
 \
		case RE_END_CAPTURE: { \
			const size_t prevbeg = base->cap_ptr[node->capture.n].start; \
			const size_t prevend = base->cap_ptr[node->capture.n].end; \
			if (base->cap_ptr[node->capture.n].pending + 1u == 0u) { \
				return NO_MATCH; \
			} \
 \
			base->cap_ptr[node->capture.n].end = o; \
			base->cap_ptr[node->capture.n].start = base->cap_ptr[node->capture.n].pending; \
			if ((tmp = Xmatch(base, node->capture.next, s, o)) == NO_MATCH) { \
				base->cap_ptr[node->capture.n].end = prevend; \
				base->cap_ptr[node->capture.n].start = prevbeg; \
			} \
			return tmp; \
		} \
 \
		case RE_INDEP: { \
			struct cap_state *prev = mem_dup(base->cap_ptr, base->captures, sizeof *base->cap_ptr); \
			if ((tmp = Xmatch(base, node->indep.arg, s, o)) != NO_MATCH) { \
				tmp = Xmatch(base, node->indep.next, s, tmp); \
				if (tmp == NO_MATCH) { \
					memcpy(base->cap_ptr, prev, base->captures * sizeof *base->cap_ptr); \
				} \
			} \
			xfree(prev); \
			return tmp; \
		} \
 \
		case RE_N_CLASS: \
			if ( \
				Xindex(s, o) != EOF && \
				!match_class(Xindex(s, o), node->class.vec) \
			) { \
				return Xmatch(base, node->class.next, s, o + 1u); \
			} \
			return NO_MATCH; \
 \
		case RE_N_DEFCLASS: \
			if ( \
				Xindex(s, o) != EOF && \
				!match_class(Xindex(s, o), node->defclass.vec) \
			) { \
				return Xmatch(base, node->defclass.next, s, o + 1u); \
			} \
			return NO_MATCH; \
 \
		case RE_PARTIAL: { \
			size_t n; \
			for (n = 0; n < node->literal.len; ++n) { \
				if (node->literal.buf[n] != Xindex(s, o + n)) { \
					break; \
				} \
			} \
			for (; n; --n) { \
				if ((tmp = Xmatch(base, node->literal.next, s, o + n)) != NO_MATCH) { \
					return tmp; \
				} \
			} \
			return Xmatch(base, node->literal.next, s, o); \
		} \
 \
		case RE_SELECT: { \
			struct cap_state *prev = mem_dup(base->cap_ptr, base->captures, sizeof *base->cap_ptr); \
			if ((tmp = Xmatch(base, node->select.cond, s, o)) != NO_MATCH) { \
				tmp = Xmatch(base, node->select.arg, s, tmp); \
				if (tmp == NO_MATCH) { \
					memcpy(base->cap_ptr, prev, base->captures * sizeof *base->cap_ptr); \
				} \
			} else { \
				tmp = Xmatch(base, node->select.next, s, o); \
			} \
			xfree(prev); \
			return tmp; \
		} \
 \
		case RE_REP_FRUGAL: \
			if (base->repbuf[node->rep.n] >= node->rep.max) { \
				const size_t n = base->repbuf[node->rep.n]; \
 \
				base->repbuf[node->rep.n] = 0; \
				tmp = Xmatch(base, node->rep.next, s, o); \
				base->repbuf[node->rep.n] = n; \
				return tmp; \
			} else if (base->repbuf[node->rep.n] >= node->rep.min) { \
				const size_t n = base->repbuf[node->rep.n]; \
 \
				base->repbuf[node->rep.n] = 0; \
				if ((tmp = Xmatch(base, node->rep.next, s, o)) != NO_MATCH) { \
					base->repbuf[node->rep.n] = n; \
					return tmp; \
				} \
				base->repbuf[node->rep.n] = n + 1u; \
				tmp = Xmatch(base, node->rep.arg, s, o); \
				--base->repbuf[node->rep.n]; \
				return tmp; \
			} else { \
				++base->repbuf[node->rep.n]; \
				tmp = Xmatch(base, node->rep.arg, s, o); \
				--base->repbuf[node->rep.n]; \
				return tmp; \
			} \
 \
		case RE_REP_GREEDY: \
			if (base->repbuf[node->rep.n] >= node->rep.max) { \
				const size_t n = base->repbuf[node->rep.n]; \
 \
				base->repbuf[node->rep.n] = 0; \
				tmp = Xmatch(base, node->rep.next, s, o); \
				base->repbuf[node->rep.n] = n; \
				return tmp; \
			} else if (base->repbuf[node->rep.n] >= node->rep.min) { \
				const size_t n = base->repbuf[node->rep.n]; \
 \
				++base->repbuf[node->rep.n]; \
				if ((tmp = Xmatch(base, node->rep.arg, s, o)) != NO_MATCH) { \
					--base->repbuf[node->rep.n]; \
					return tmp; \
				} \
				base->repbuf[node->rep.n] = 0; \
				tmp = Xmatch(base, node->rep.next, s, o); \
				base->repbuf[node->rep.n] = n; \
				return tmp; \
			} else { \
				++base->repbuf[node->rep.n]; \
				tmp = Xmatch(base, node->rep.arg, s, o); \
				--base->repbuf[node->rep.n]; \
				return tmp; \
			} \
 \
		case RE_XREP_FRUGAL: { \
			size_t n; \
 \
			for (n = 0; n < node->rep.min; ++n) { \
				if ((tmp = Xmatch(base, node->rep.arg, s, o)) == NO_MATCH) { \
					return NO_MATCH; \
				} \
				o = tmp; \
			} \
 \
			for (; n < node->rep.max; ++n) { \
				if ((tmp = Xmatch(base, node->rep.next, s, o)) != NO_MATCH) { \
					return tmp; \
				} \
				if ((tmp = Xmatch(base, node->rep.arg, s, o)) == NO_MATCH) { \
					return NO_MATCH; \
				} \
				o = tmp; \
			} \
			return Xmatch(base, node->rep.next, s, o); \
		} \
 \
		case RE_XREP_GREEDY: { \
			size_t n; \
 \
			for (n = 0; n < node->rep.min; ++n) { \
				if ((tmp = Xmatch(base, node->rep.arg, s, o)) == NO_MATCH) { \
					return NO_MATCH; \
				} \
				o = tmp; \
			} \
 \
			for (; n < node->rep.max; ++n) { \
				if ((tmp = Xmatch(base, node->rep.arg, s, o)) == NO_MATCH) { \
					break; \
				} \
				o = tmp; \
			} \
 \
			for (; n > node->rep.min; --n) { \
				if ((tmp = Xmatch(base, node->rep.next, s, o)) != NO_MATCH) { \
					return tmp; \
				} \
				o -= node->rep.n; \
			} \
 \
			return Xmatch(base, node->rep.next, s, o); \
		} \
	} \
 \
	NOTREACHED


static size_t match(const t_regex *base, const re_node *node, const String *s, size_t o) {

	Mmatch0(match, ST_INDEX);

		case RE_BACKREF:
			if (
					node->backref.n < base->captures &&
					base->cap_ptr[node->backref.n].start + 1u &&
					base->cap_ptr[node->backref.n].end + 1u &&
					base->cap_ptr[node->backref.n].end >= base->cap_ptr[node->backref.n].start &&
					(
					 base->cap_ptr[node->backref.n].end - base->cap_ptr[node->backref.n].start <= St_len(s) - o &&
					 memcmp(
						 St_ptr(s) + base->cap_ptr[node->backref.n].start,
						 St_ptr(s) + o,
						 base->cap_ptr[node->backref.n].end - base->cap_ptr[node->backref.n].start
						 ) == 0
					)
			   ) {
				return match(base, node->backref.next, s, o + base->cap_ptr[node->backref.n].end - base->cap_ptr[node->backref.n].start);
			}
			return NO_MATCH;


		case RE_LITERAL:
			if (
					node->literal.len == 0 ||
					(
					 node->literal.len <= St_len(s) - o &&
					 memcmp(node->literal.buf, St_ptr(s) + o, node->literal.len) == 0
					)
			   ) {
				return match(base, node->literal.next, s, o + node->literal.len);
			}
			return NO_MATCH;

		case RE_STUFF_FRUGAL:
			if (!node->x.next) {
				return o;
			}

			if (node->x.next->x.type == RE_LITERAL && node->x.next->literal.len) {
				for (
						o = St_stro_m(s, o, node->x.next->literal.buf, node->x.next->literal.len);
						o + 1u;
						o = St_stro_m(s, o + 1u, node->x.next->literal.buf, node->x.next->literal.len)
						) {
					if ((tmp = match(base, node->x.next->literal.next, s, o + node->x.next->literal.len)) != NO_MATCH) {
						return tmp;
					}
				}
				return NO_MATCH;
			}

			for (; o <= St_len(s); ++o) {
				if ((tmp = match(base, node->x.next, s, o)) != NO_MATCH) {
					return tmp;
				}
			}
			return NO_MATCH;

		case RE_STUFF_GREEDY: {
			size_t n;

			if (!node->x.next) {
				return St_len(s);
			}

			if (node->x.next->x.type == RE_LITERAL && node->x.next->literal.len) {
				for (
						n = St_rstr_m(s, node->x.next->literal.buf, node->x.next->literal.len);
						n > o && n + 1u;
						n = St_rstro_m(s, n - 1u, node->x.next->literal.buf, node->x.next->literal.len)
						) {
					if ((tmp = match(base, node->x.next->literal.next, s, n + node->x.next->literal.len)) != NO_MATCH) {
						return tmp;
					}
					if (!n) {
						break;
					}
				}
				if (n == o) {
					return match(base, node->x.next->literal.next, s, n + node->x.next->literal.len);
				}
				return NO_MATCH;
			}

			for (n = St_len(s); n > o; --n) {
				if ((tmp = match(base, node->x.next, s, n)) != NO_MATCH) {
					return tmp;
				}
			}
			return match(base, node->x.next, s, o);
		}

	Mmatch1(match, ST_INDEX);
}

static size_t iomatch(const t_regex *base, const re_node *node, IO *s, size_t o) {

	Mmatch0(iomatch, io_peek);

		case RE_BACKREF:
			if (
					node->backref.n < base->captures &&
					base->cap_ptr[node->backref.n].start + 1u &&
					base->cap_ptr[node->backref.n].end + 1u &&
					base->cap_ptr[node->backref.n].end >= base->cap_ptr[node->backref.n].start &&
					(
					 io_xcmp(
						 s,
						 base->cap_ptr[node->backref.n].start,
						 o,
						 base->cap_ptr[node->backref.n].end - base->cap_ptr[node->backref.n].start
						 ) == 0
					)
			   ) {
				return iomatch(base, node->backref.next, s, o + base->cap_ptr[node->backref.n].end - base->cap_ptr[node->backref.n].start);
			}
			return NO_MATCH;


		case RE_LITERAL:
			if (
					node->literal.len == 0 ||
					(
					 io_cmppeek(s, o, node->literal.buf, node->literal.len) == 0
					)
			   ) {
				return iomatch(base, node->literal.next, s, o + node->literal.len);
			}
			return NO_MATCH;

		case RE_STUFF_FRUGAL:
			if (!node->x.next) {
				return o;
			}

			for (; io_peek(s, o) != EOF; ++o) {
				if ((tmp = iomatch(base, node->x.next, s, o)) != NO_MATCH) {
					return tmp;
				}
			}
			return iomatch(base, node->x.next, s, o);

		case RE_STUFF_GREEDY: {
			size_t n;

			for (n = o; io_peek(s, n) != EOF; ++n)
				;

			if (!node->x.next) {
				return n;
			}

			for (; n > o; --n) {
				if ((tmp = iomatch(base, node->x.next, s, n)) != NO_MATCH) {
					return tmp;
				}
			}
			return iomatch(base, node->x.next, s, o);
		}

	Mmatch1(iomatch, io_peek);
}

#define DEFRBUF ((void)0)
#define MRETURN(x) return (x)
#define INITRBUF(re) memset((re)->repbuf, '\0', (re)->repets * sizeof *(re)->repbuf)

int re_iomatch(t_regex *re, IO *io, size_t *ms, size_t *me) {
	size_t i;
	DEFRBUF;
	INITRBUF(re);

	assert(re != NULL);
	assert(io != NULL);
	assert(io_bufred(io));
	assert(ms != NULL);
	assert(me != NULL);

	reinit(re);

	if (re->flags & FL_ANCHOR_START) {
		if ((*me = iomatch(re, re->start, io, 0)) == NO_MATCH) {
			MRETURN(0);
		}
		*ms = 0;
		MRETURN(1);
	}

	for (i = 0; io_peek(io, i) != EOF; ++i) {
		if ((*me = iomatch(re, re->start, io, i)) != NO_MATCH) {
			*ms = i;
			MRETURN(1);
		}
	}

	if ((*me = iomatch(re, re->start, io, i)) != NO_MATCH) {
		*ms = i;
		MRETURN(1);
	}
	MRETURN(0);
}

int re_match(t_regex *re, const String *s, size_t *ms, size_t *me) {
	size_t i;
	DEFRBUF;
	INITRBUF(re);

	assert(re != NULL);
	assert(s != NULL);
	assert(ms != NULL);
	assert(me != NULL);

	if (re->minlen > St_len(s)) {
		MRETURN(0);
	}

	reinit(re);

	if (re->flags & FL_ANCHOR_START) {
		if ((*me = match(re, re->start, s, 0)) == NO_MATCH) {
			MRETURN(0);
		}
		*ms = 0;
		MRETURN(1);
	}

	if (re->start->x.type == RE_LITERAL && re->start->literal.len) {
		for (
				i = St_str_m(s, re->start->literal.buf, re->start->literal.len);
				i + 1u;
				i = St_stro_m(s, i + 1u, re->start->literal.buf, re->start->literal.len)
			) {
			if ((*me = match(re, re->start->literal.next, s, i + re->start->literal.len)) != NO_MATCH) {
				*ms = i;
				MRETURN(1);
			}
		}
		MRETURN(0);
	}

	for (i = 0; i <= St_len(s) - re->minlen; ++i) {
		if ((*me = match(re, re->start, s, i)) != NO_MATCH) {
			*ms = i;
			MRETURN(1);
		}
	}

	MRETURN(0);
}

size_t re_cabra(const t_regex *re) {
	return re->captures;
}

int re_backref(const t_regex *re, size_t i, size_t *a, size_t *z) {
	if (i >= re->captures) {
		return -1;
	}
	if (!(re->cap_ptr[i].start + 1u && re->cap_ptr[i].end + 1u)) {
		return 1;
	}
	*a = re->cap_ptr[i].start;
	*z = re->cap_ptr[i].end;
	return 0;
}

ATTR_PURE
static const re_node *branchend(const re_node *a, const re_node *b) {
	for (; a; a = a->x.next) {
		const re_node *q = b;
		for (; q; q = q->x.next) {
			if (a == q) {
				return q;
			}
		}
	}
	return NULL;
}

static void decom_class(String *s, const unsigned char vec[HAVE_C99(static CLASS_SIZE)]) {
	unsigned char i;
	int flag = 0;
	char big_enough[100];

	for (i = 0; i < UCHAR_MAX; ++i) {
		if (vec[i / CHAR_BIT] & 1u << i % CHAR_BIT) {
			if (vec[(i + 1u) / CHAR_BIT] & 1u << (i + 1u) % CHAR_BIT) {
				if (!flag) {
					flag = 1;
					goto mess;
				}
				flag = 1;
			} else {
				if (flag) {
					if (i > 1u && vec[(i - 2u) / CHAR_BIT] & 1u << (i - 2u) % CHAR_BIT) {
						St_cat_c(s, '-');
					}
					flag = 0;
				}
mess:
				if (!isprint(i)) {
					St_cat_c(s, '\\');
					switch (i) {
						case '\a': St_cat_c(s, 'a'); break;
						case '\b': St_cat_c(s, 'b'); break;
						case '\f': St_cat_c(s, 'f'); break;
						case '\n': St_cat_c(s, 'n'); break;
						case '\r': St_cat_c(s, 'r'); break;
						case '\t': St_cat_c(s, 't'); break;
						case '\v': St_cat_c(s, 'v'); break;
						default:
								   sprintf(big_enough, "%03o", (unsigned)i);
								   St_cat_s(s, big_enough);
								   break;
					}
				} else {
					if (strchr("\\\"", (char)i)) {
						St_cat_c(s, '\\');
					}
					St_cat_c(s, i);
					if (strchr("!'-", (char)i)) {
						St_cat_c(s, ESCAPE);
					}
				}
			}
		}
	}
	if (vec[i / CHAR_BIT] & 1u << i % CHAR_BIT) {
		if (flag) {
			if (vec[(i - 2u) / CHAR_BIT] & 1u << (i - 2u) % CHAR_BIT) {
				St_cat_c(s, '-');
			}
			flag = 0;
		}
		if (!isprint(i)) {
			St_cat_c(s, '\\');
			switch (i) {
				case '\a': St_cat_c(s, 'a'); break;
				case '\b': St_cat_c(s, 'b'); break;
				case '\f': St_cat_c(s, 'f'); break;
				case '\n': St_cat_c(s, 'n'); break;
				case '\r': St_cat_c(s, 'r'); break;
				case '\t': St_cat_c(s, 't'); break;
				case '\v': St_cat_c(s, 'v'); break;
				default:
						   sprintf(big_enough, "%03o", (unsigned)i);
						   St_cat_s(s, big_enough);
						   break;
			}
		} else {
			if (strchr("\\\"", (char)i)) {
				St_cat_c(s, '\\');
			}
			St_cat_c(s, i);
			if (strchr("!'-", (char)i)) {
				St_cat_c(s, ESCAPE);
			}
		}
	}
}

static void dumpclass(FILE *fp, const unsigned char vec[HAVE_C99(static CLASS_SIZE)]) {
	String tmp;
	St_init(&tmp);
	decom_class(&tmp, vec);
	ST_WRITE(&tmp, fp);
	St_clear(&tmp);
}

ATTR_CONST
static int cdefclass(const unsigned char *ptr) {
	if (ptr == dc_word)   { return 'w'; }
	if (ptr == dc_alpha)  { return 'q'; }
	if (ptr == dc_cntrl)  { return 'c'; }
	if (ptr == dc_digit)  { return 'd'; }
	if (ptr == dc_lower)  { return 'l'; }
	if (ptr == dc_print)  { return 'p'; }
	if (ptr == dc_space)  { return 's'; }
	if (ptr == dc_upper)  { return 'u'; }
	if (ptr == dc_xdigit) { return 'x'; }
	NOTREACHED;
}

static void decom_liter(String *s, const re_node *node) {
	size_t i;
	assert(node->x.type == RE_LITERAL || node->x.type == RE_PARTIAL);
	for (i = 0; i < node->literal.len; ++i) {
		if (!isprint((unsigned char)node->literal.buf[i])) {
			St_cat_c(s, '\\');
			switch (node->literal.buf[i]) {
				case '\a': St_cat_c(s, 'a'); break;
				case '\b': St_cat_c(s, 'b'); break;
				case '\f': St_cat_c(s, 'f'); break;
				case '\n': St_cat_c(s, 'n'); break;
				case '\r': St_cat_c(s, 'r'); break;
				case '\t': St_cat_c(s, 't'); break;
				case '\v': St_cat_c(s, 'v'); break;
				default: {
					char big_enough[100];
					sprintf(big_enough, "%03o", (unsigned)(unsigned char)node->literal.buf[i]);
					St_cat_s(s, big_enough);
					break;
				}
			}
		} else {
			if (node->x.type == RE_LITERAL && strchr("\"\\", node->literal.buf[i])) {
				St_cat_c(s, '\\');
			}
			St_cat_c(s, node->literal.buf[i]);
			if (MEATCHARP(node->literal.buf[i])) {
				St_cat_c(s, ESCAPE);
			}
		}
	}
}

static void dumpliter(FILE *fp, const re_node *node) {
	String tmp;
	St_init(&tmp);
	decom_liter(&tmp, node);
	ST_WRITE(&tmp, fp);
	St_clear(&tmp);
}

static void do_indent(FILE *fp, size_t n) {
	for (; n; --n) {
		fputs("  ", fp);
	}
}

static void dumpnode(FILE *fp, const re_node *node, const re_node *end, const size_t indent) {
	if (node == end) {
		return;
	}

	assert(node != NULL);
	fprintf(fp, "[%p] ", (const void *)node);
	do_indent(fp, indent);

	switch (node->x.type) {
		case RE_ALTER: {
			const re_node *const be = branchend(node->alter.arg, node->alter.next);
			fputs("branch {\n", fp);
			dumpnode(fp, node->alter.arg, be, indent + 1);
			fprintf(fp, "[%p] ", (const void *)node);
			do_indent(fp, indent);
			fputs("} or {\n", fp);
			dumpnode(fp, node->alter.next, be, indent + 1);
			fprintf(fp, "[%p] ", (const void *)node);
			do_indent(fp, indent);
			fputs("} /branch\n", fp);
			dumpnode(fp, be, end, indent);
			return;
		}

		case RE_ANYCH:
			fputs("anychar\n", fp);
			break;

		case RE_AT_BOL:
			fputs("at beginning of line\n", fp);
			break;

		case RE_AT_BOS:
			fputs("at beginning of string\n", fp);
			break;

		case RE_AT_EOL:
			fputs("at end of line\n", fp);
			break;

		case RE_AT_EOS:
			fputs("at end of string\n", fp);
			break;

		case RE_AT_MATCH:
			fputs("at match {\n", fp);
			dumpnode(fp, node->indep.arg, NULL, indent + 1);
			fprintf(fp, "[%p] ", (const void *)node);
			do_indent(fp, indent);
			fputs("} /at match\n", fp);
			break;

		case RE_AT_NOMATCH:
			fputs("not at match {\n", fp);
			dumpnode(fp, node->indep.arg, NULL, indent + 1);
			fprintf(fp, "[%p] ", (const void *)node);
			do_indent(fp, indent);
			fputs("} /not at match\n", fp);
			break;

		case RE_AT_NWBOUND:
			fputs("not at word boundary\n", fp);
			break;

		case RE_AT_WBOUND:
			fputs("at word boundary\n", fp);
			break;

		case RE_BACKCHECK:
			fprintf(fp, "check capture (%lu)\n", (unsigned long)node->backref.n);
			break;

		case RE_BACKREF:
			fprintf(fp, "backref (%lu)\n", (unsigned long)node->backref.n);
			break;

		case RE_BEGIN_CAPTURE:
			fprintf(fp, "begin capture (%lu)\n", (unsigned long)node->backref.n);
			break;

		case RE_CLASS:
			fputs("character class '", fp);
			dumpclass(fp, node->class.vec);
			fputs("'\n", fp);
			break;

		case RE_DEFCLASS:
			fprintf(fp, "character class %c!\n", cdefclass(node->defclass.vec));
			break;

		case RE_END_CAPTURE:
			fprintf(fp, "end capture (%lu)\n", (unsigned long)node->backref.n);
			break;

		case RE_INDEP:
			fputs("independent submatch {\n", fp);
			dumpnode(fp, node->indep.arg, NULL, indent + 1);
			fprintf(fp, "[%p] ", (const void *)node);
			do_indent(fp, indent);
			fputs("} /independent submatch\n", fp);
			break;

		case RE_LITERAL:
			fputs("literal \"", fp);
			dumpliter(fp, node);
			fputs("\"\n", fp);
			break;

		case RE_N_CLASS:
			fputs("negated character class '", fp);
			dumpclass(fp, node->class.vec);
			fputs("'\n", fp);
			break;

		case RE_N_DEFCLASS:
			fprintf(fp, "character class %c!\n", toupper(cdefclass(node->defclass.vec)));
			break;

		case RE_PARTIAL:
			fputs("abbreviation `", fp);
			dumpliter(fp, node);
			fputs("`\n", fp);
			break;

		case RE_REP_FRUGAL:
			fputs("repetition ", fp);
			if (node->rep.min == node->rep.max) {
				fprintf(fp, ":%lu:?", (unsigned long)node->rep.min);
			} else if (node->rep.max != (size_t)-1) {
				fprintf(fp, ":%lu,%lu:?", (unsigned long)node->rep.min, (unsigned long)node->rep.max);
			} else {
				switch (node->rep.min) {
					case 0: fputs("*?", fp); break;
					case 1: fputs("+?", fp); break;
					default: fprintf(fp, ":%lu,:?", (unsigned long)node->rep.min); break;
				}
			}
			fputs(" {\n", fp);
			dumpnode(fp, node->rep.arg, node, indent + 1);
			fprintf(fp, "[%p] ", (const void *)node);
			do_indent(fp, indent);
			fputs("} /repetition\n", fp);
			break;

		case RE_REP_GREEDY:
			fputs("repetition ", fp);
			if (node->rep.min == node->rep.max) {
				fprintf(fp, ":%lu:", (unsigned long)node->rep.min);
			} else if (node->rep.max != (size_t)-1) {
				fprintf(fp, ":%lu,%lu:", (unsigned long)node->rep.min, (unsigned long)node->rep.max);
			} else {
				switch (node->rep.min) {
					case 0: fputs("*", fp); break;
					case 1: fputs("+", fp); break;
					default: fprintf(fp, ":%lu,:", (unsigned long)node->rep.min); break;
				}
			}
			fputs(" {\n", fp);
			dumpnode(fp, node->rep.arg, node, indent + 1);
			fprintf(fp, "[%p] ", (const void *)node);
			do_indent(fp, indent);
			fputs("} /repetition\n", fp);
			break;

		case RE_SELECT: {
			const re_node *const be = branchend(node->select.arg, node->select.next);
			fputs("if {\n", fp);
			dumpnode(fp, node->select.cond, NULL, indent + 1);
			fprintf(fp, "[%p] ", (const void *)node);
			do_indent(fp, indent);
			fputs("} then {\n", fp);
			dumpnode(fp, node->select.arg, be, indent + 1);
			if (node->select.next != be) {
				fprintf(fp, "[%p] ", (const void *)node);
				do_indent(fp, indent);
				fputs("} else {\n", fp);
				dumpnode(fp, node->select.next, be, indent + 1);
			}
			fprintf(fp, "[%p] ", (const void *)node);
			do_indent(fp, indent);
			fputs("} /if\n", fp);
			dumpnode(fp, be, end, indent);
			return;
		}

		case RE_STUFF_FRUGAL:
			fputs(".*?\n", fp);
			break;

		case RE_STUFF_GREEDY:
			fputs(".*\n", fp);
			break;

		case RE_XREP_FRUGAL:
			fputs("simple repetition ", fp);
			if (node->rep.min == node->rep.max) {
				fprintf(fp, ":%lu:?", (unsigned long)node->rep.min);
			} else if (node->rep.max != (size_t)-1) {
				fprintf(fp, ":%lu,%lu:?", (unsigned long)node->rep.min, (unsigned long)node->rep.max);
			} else {
				switch (node->rep.min) {
					case 0: fputs("*?", fp); break;
					case 1: fputs("+?", fp); break;
					default: fprintf(fp, ":%lu,:?", (unsigned long)node->rep.min); break;
				}
			}
			fputs(" {\n", fp);
			dumpnode(fp, node->rep.arg, NULL, indent + 1);
			fprintf(fp, "[%p] ", (const void *)node);
			do_indent(fp, indent);
			fputs("} /simple repetition\n", fp);
			break;

		case RE_XREP_GREEDY:
			fputs("simple repetition ", fp);
			if (node->rep.min == node->rep.max) {
				fprintf(fp, ":%lu:", (unsigned long)node->rep.min);
			} else if (node->rep.max != (size_t)-1) {
				fprintf(fp, ":%lu,%lu:", (unsigned long)node->rep.min, (unsigned long)node->rep.max);
			} else {
				switch (node->rep.min) {
					case 0: fputs("*", fp); break;
					case 1: fputs("+", fp); break;
					default: fprintf(fp, ":%lu,:", (unsigned long)node->rep.min); break;
				}
			}
			fputs(" {\n", fp);
			dumpnode(fp, node->rep.arg, NULL, indent + 1);
			fprintf(fp, "[%p] ", (const void *)node);
			do_indent(fp, indent);
			fputs("} /simple repetition\n", fp);
			break;
	}
	dumpnode(fp, node->x.next, end, indent);
}

void re_dump(const t_regex *re, FILE *fp) {
	fprintf(fp, "RE: minlen=%lu, anchor=%s\n", (unsigned long)re->minlen, re->flags & FL_ANCHOR_START ? "start" : "none");
	dumpnode(fp, re->start, NULL, 0);
}

static void cat_n(String *s, unsigned long n) {
	char big_enough[100];
	sprintf(big_enough, "%lu", n);
	St_cat_s(s, big_enough);
}

static void decom_repeat(String *s, const re_node *node) {
	assert(
		node->x.type == RE_REP_FRUGAL ||
		node->x.type == RE_REP_GREEDY ||
		node->x.type == RE_XREP_FRUGAL ||
		node->x.type == RE_XREP_GREEDY
	);

	if (node->rep.min == 0 && node->rep.max == (size_t)-1) {
		St_cat_c(s, '*');
	} else if (node->rep.min == 0 && node->rep.max == 1) {
		St_cat_c(s, '?');
	} else if (node->rep.min == 1 && node->rep.max == (size_t)-1) {
		St_cat_c(s, '+');
	} else if (node->rep.min == node->rep.max) {
		St_cat_c(s, ':');
		cat_n(s, node->rep.min);
		St_cat_c(s, ':');
	} else if (node->rep.max == (size_t)-1) {
		St_cat_c(s, ':');
		cat_n(s, node->rep.min);
		St_cat_m(s, ",:", 2);
	} else {
		St_cat_c(s, ':');
		cat_n(s, node->rep.min);
		St_cat_c(s, ',');
		cat_n(s, node->rep.max);
		St_cat_c(s, ':');
	}

	if (node->rep.type == RE_REP_FRUGAL || node->rep.type == RE_XREP_FRUGAL) {
		St_cat_c(s, '?');
	}
}

static void decom_node(String *s, const re_node *node, const re_node *end) {
	if (node == end) {
		return;
	}

	assert(node != NULL);
	switch (node->x.type) {
		case RE_ALTER: {
			const re_node *const be = branchend(node->alter.arg, node->alter.next);
			St_cat_c(s, '(');
			decom_node(s, node->alter.arg, be);
			St_cat_c(s, '|');
			decom_node(s, node->alter.next, be);
			St_cat_c(s, ')');
			decom_node(s, be, end);
			break;
		}

		case RE_ANYCH:
			St_cat_c(s, '.');
			decom_node(s, node->x.next, end);
			break;

		case RE_AT_BOL:
			St_cat_m(s, "A!", 2);
			decom_node(s, node->x.next, end);
			break;

		case RE_AT_BOS:
			St_cat_c(s, '^');
			decom_node(s, node->x.next, end);
			break;

		case RE_AT_EOL:
			St_cat_m(s, "Z!", 2);
			decom_node(s, node->x.next, end);
			break;

		case RE_AT_EOS:
			St_cat_c(s, '$');
			decom_node(s, node->x.next, end);
			break;

		case RE_AT_MATCH:
			St_cat_c(s, '[');
			decom_node(s, node->indep.arg, NULL);
			St_cat_m(s, "&]", 2);
			decom_node(s, node->indep.next, end);
			break;

		case RE_AT_NOMATCH:
			St_cat_c(s, '[');
			decom_node(s, node->indep.arg, NULL);
			St_cat_m(s, "^]", 2);
			decom_node(s, node->indep.next, end);
			break;

		case RE_AT_NWBOUND:
			St_cat_m(s, "B!", 2);
			decom_node(s, node->x.next, end);
			break;

		case RE_AT_WBOUND:
			St_cat_m(s, "b!", 2);
			decom_node(s, node->x.next, end);
			break;

		case RE_BACKREF:
		case RE_BACKCHECK: {
			String tmp;
			if (node->x.type == RE_BACKCHECK) {
				St_cat_c(s, '[');
			}
			St_init(&tmp);
#if HAVE_VSNPRINTF_P
			St_xprintf(&tmp, "%lu", (unsigned long)node->backref.n);
#else
			St_num(&tmp, node->backref.n);
#endif
			St_cat(s, &tmp);
			St_clear(&tmp);
			if (node->x.type == RE_BACKCHECK) {
				St_cat_c(s, ']');
			} else {
				St_cat_c(s, ESCAPE);
			}
			decom_node(s, node->backref.next, end);
			break;
		}

		case RE_BEGIN_CAPTURE:
			St_cat_c(s, '{');
			decom_node(s, node->x.next, end);
			break;

		case RE_CLASS:
			St_cat_c(s, '\'');
			decom_class(s, node->class.vec);
			St_cat_c(s, '\'');
			decom_node(s, node->class.next, end);
			break;

		case RE_DEFCLASS:
			St_cat_c(s, cdefclass(node->defclass.vec));
			St_cat_c(s, ESCAPE);
			decom_node(s, node->defclass.next, end);
			break;

		case RE_END_CAPTURE:
			St_cat_c(s, '}');
			decom_node(s, node->x.next, end);
			break;

		case RE_INDEP:
			St_cat_c(s, '<');
			decom_node(s, node->indep.arg, NULL);
			St_cat_c(s, '>');
			decom_node(s, node->indep.next, end);
			break;

		case RE_LITERAL:
			decom_liter(s, node);
			decom_node(s, node->literal.next, end);
			break;

		case RE_N_CLASS:
			St_cat_m(s, "'^", 2);
			decom_class(s, node->class.vec);
			St_cat_c(s, '\'');
			decom_node(s, node->class.next, end);
			break;

		case RE_N_DEFCLASS:
			St_cat_c(s, toupper(cdefclass(node->defclass.vec)));
			St_cat_c(s, ESCAPE);
			decom_node(s, node->defclass.next, end);
			break;

		case RE_PARTIAL:
			St_cat_c(s, '`');
			decom_liter(s, node);
			St_cat_c(s, '`');
			break;

		case RE_REP_FRUGAL:
		case RE_REP_GREEDY:
			St_cat_c(s, '(');
			decom_node(s, node->rep.arg, node);
			St_cat_c(s, ')');
			decom_repeat(s, node);
			decom_node(s, node->rep.next, end);
			break;

		case RE_SELECT: {
			const re_node *const be = branchend(node->select.arg, node->select.next);
			St_cat_c(s, '[');
			if (be != node->select.next) {
				decom_node(s, node->select.next, be);
				St_cat_c(s, '|');
			}
			decom_node(s, node->select.arg, be);
			St_cat_c(s, '(');
			decom_node(s, node->select.cond, NULL);
			St_cat_c(s, ')');
			St_cat_m(s, "?]", 2);
			decom_node(s, be, end);
			break;
		}

		case RE_STUFF_FRUGAL:
			St_cat_m(s, ".*?", 3);
			decom_node(s, node->x.next, end);
			break;

		case RE_STUFF_GREEDY:
			St_cat_m(s, ".*", 2);
			decom_node(s, node->x.next, end);
			break;

		case RE_XREP_FRUGAL:
		case RE_XREP_GREEDY:
			St_cat_c(s, '(');
			decom_node(s, node->rep.arg, NULL);
			St_cat_c(s, ')');
			decom_repeat(s, node);
			decom_node(s, node->rep.next, end);
			break;
	}
}

void re_decompile(const t_regex *re, String *s) {
	St_zero(s);
	decom_node(s, re->start, NULL);
}