view interps/clc-intercal/CLC-INTERCAL-Base-1.-94.-2/blib/lib/Language/INTERCAL/Reggrim.pm @ 9071:581584df6d82

<fizzie> revert 942e964c81c1
author HackBot
date Sun, 25 Sep 2016 20:17:31 +0000
parents 859f9b4339e6
children
line wrap: on
line source

package Language::INTERCAL::Reggrim;

# Regular grimages - INTERCAL's answer to regular expressions

# This file is part of CLC-INTERCAL

# Copyright (c) 2007-2008 Claudio Calvelli, all rights reserved.

# CLC-INTERCAL is copyrighted software. However, permission to use, modify,
# and distribute it is granted provided that the conditions set out in the
# licence agreement are met. See files README and COPYING in the distribution.

use strict;
use vars qw($VERSION $PERVERSION);
($VERSION) = ($PERVERSION = "CLC-INTERCAL/Base INTERCAL/Reggrim.pm 1.-94.-2") =~ /\s(\S+)$/;

use Carp;
use Language::INTERCAL::Exporter '1.-94.-2';
use Language::INTERCAL::Splats '1.-94.-2', qw(:SP);

my %backslash = (
    '<' => ['s', 'func', qr/^\w/],
    '=' => ['c', 'func', qr/^\w/],
    '>' => ['e', 'func', qr/^\w/],
    'b' => ['r', 'list', "\b"],
    'd' => ['r', 'func', qr/^\d/],
    'D' => ['r', 'func', qr/^\D/],
    'e' => ['r', 'list', "\e"],
    'f' => ['r', 'list', "\f"],
    'n' => ['r', 'list', "\012"],
    'r' => ['r', 'list', "\015"],
    's' => ['r', 'func', qr/^\s/],
    'S' => ['r', 'func', qr/^\S/],
    't' => ['r', 'list', "\t"],
    'w' => ['r', 'func', qr/^\w/],
    'W' => ['r', 'func', qr/^\W/],
);

sub compile {
    @_ == 2 or croak "Usage: Language::INTERCAL::Reggrim->compile(TEXT)";
    my ($class, $text) = @_;
    my $code = _compile($text, 0);
    my $cnv = _convert($code);
    bless $cnv, $class;
}

sub restore {
    @_ == 2 or croak "Usage: Language::INTERCAL::Reggrim->restore(DATA)";
    my ($class, $data) = @_;
    faint(SP_TODO, "reglar grimaces"); # XXX
}

sub save {
    @_ == 1 or croak "Usage: REGGRIM->save";
    my ($rg) = @_;
    faint(SP_TODO, "reglar grimaces"); # XXX
}

sub match {
    @_ == 3 or croak "Usage: REGGRIM->match(STRING, PLACE)";
    my ($rg, $string, $place) = @_;
    faint(SP_TODO, "reglar grimaces"); # XXX
}

sub can_start {
    @_ == 1 or croak "Usage: REGGRIM->can_start";
    my ($rg) = @_;
    faint(SP_TODO, "reglar grimaces"); # XXX
}

sub can_empty {
    @_ == 1 or croak "Usage: REGGRIM->can_empty";
    my ($rg) = @_;
    faint(SP_TODO, "reglar grimaces"); # XXX
}

# private methods follow

use constant _RANGE => 256;

sub _compile {
    my ($text, $in_group) = @_;
    my $m = undef;
    my $alt = undef;
    while ($text ne '') {
	my $c = substr($text, 0, 1, '');
	if ($c eq '.') {
	    my $r = _full_range();
	    my $u = _make_range($r);
	    $u = _test_repeat($u, \$text);
	    $m = _sequence($m, $u);
	    next;
	}
	if ($c eq '\\') {
	    my ($m, $r, $x) = _backslash(\$text);
	    my $u;
	    if ($m eq 'r') {
		$u = _make_range($r);
	    } elsif ($m eq 's') {
		$u = _start_of($r);
	    } elsif ($m eq 'e') {
		$u = _end_of($r);
	    } elsif ($m eq 'c') {
		$u = _change($r);
	    } elsif ($m eq 'i') {
		$u = _inside($r);
	    } elsif ($m eq 'o') {
		$u = _inside(~ $r);
	    } else {
		faint(SP_INTERNAL, "Invalid range type $m");
	    }
	    $u = _test_repeat($u, \$text);
	    $m = _sequence($m, $u);
	    next;
	}
	if ($c eq '[') {
	    my $r = empty_range();
	    my $neg = 0;
	    if ($$text =~ s/^\^//) {
		$neg = 1;
	    }
	    if ($$text =~ s/^-//) {
		vec($r, ord('-'), 1) = 1;
	    }
	    if ($$text =~ s/^\]//) {
		vec($r, ord(']'), 1) = 1;
	    }
	    while (1) {
		$$text eq '' and faint(SP_REGGRIM, "Invalid range");
		$c = substr($$text, 0, 1, '');
		if ($c eq ']') {
		    last;
		}
		if ($c eq '\\') {
		    my ($m, $b, $x) = _backslash(\$text);
		    if ($m ne 'r') {
			faint(SP_INTERNAL, "Invalid escape in range");
		    }
		    if (! defined $x) {
			$r |= $b;
			next;
		    }
		}
		if ($$text =~ s/^-//) {
		    $$text eq '' and faint(SP_REGGRIM, "Invalid range");
		    my $d = substr($$text, 0, 1, '');
		    if ($d eq '\\') {
			my ($m, $b, $x) = _backslash(\$text);
			if ($m ne 'r' || ! defined $x) {
			    faint(SP_INTERNAL, "Invalid escape in range");
			}
			$d = $x;
		    }
		    $c = ord($c);
		    $d = ord($d);
		    $c <= $d or faint(SP_REGGRIM, "Time goes backwards");
		    while ($c <= $d) {
			vec($r, ord(lc($c)), 1) = 1;
			vec($r, ord(uc($c)), 1) = 1;
			$c++;
		    }
		} else {
		    vec($r, ord(lc($c)), 1) = 1;
		    vec($r, ord(uc($c)), 1) = 1;
		}
	    }
	    $r = ~ $r if $neg;
	    my $u = _make_range($r);
	    $u = _test_repeat($u, \$text);
	    $m = _sequence($m, $u);
	    next;
	}
	if ($c eq '(') {
	    my $u = _compile($text, 1);
	    $u = _test_repeat($u, \$text);
	    $m = _sequence($m, $u);
	    next;
	}
	if ($c eq ')') {
	    return if $in_group;
	    faint(SP_REGGRIM, "Invalid group");
	}
	if ($c eq '|') {
	    $alt = _alternative($alt, $m);
	    $m = undef;
	    next;
	}
	if ($c eq '?' || $c eq '*' || $c eq '+' || $c eq '{') {
	    faint(SP_REGGRIM, "Misplaced $c");
	}
	my $r = _empty_range();
	vec($r, ord(lc($c)), 1) = 1;
	vec($r, ord(uc($c)), 1) = 1;
	my $u = _make_range($r);
	$u = _test_repeat($u, \$text);
	$m = _sequence($m, $u);
	next;
    }
}

sub _empty_range {
    my $r = '';
    vec($r, _RANGE - 1, 1) = 0;
    $r;
}

sub _full_range {
    ~ _empty_range();
}

sub _make_range {
    my ($r) = @_;
    return {
	initial => [[0, _full_range()]],
	final   => [[1, _full_range()]],
	trans   => [[1, $r], []],
    };
}

sub _start_of {
    my ($r) = @_;
    return {
	initial => [[0, ~ $r]],
	final   => [[0, $r]],
	trans   => [[]],
    };
}

sub _end_of {
    my ($r) = @_;
    return {
	initial => [[0, $r]],
	final   => [[0, ~ $r]],
	trans   => [[]],
    };
}

sub _change {
    my ($r) = @_;
    return {
	initial => [[0, ~ $r], [1, $r]],
	final   => [[0, $r], [1, ~ $r]],
	trans   => [[], []],
    };
}

sub _inside {
    my ($r) = @_;
    return {
	initial => [[0, $r]],
	final   => [[0, $r]],
	trans   => [[]],
    };
}

sub _test_repeat {
    my ($u, $text) = @_;
    my $c = substr($$text, 0, 1, '');
    $c eq '?' and return _repeat($u, 0, 1);
    $c eq '*' and return _repeat($u, 0, undef);
    $c eq '+' and return _repeat($u, 1, undef);
    if ($c ne '}') { $$text = $c . $$text; return $u }
    my ($min, $max) = (0, undef);
    $$text =~ s/^(\d+)// and $min = $1;
    $$text =~ s/^,(\d+)// and $max = $1;
    $$text =~ s/^\}// or faint(SP_REGGRIM, "Missing }");
    _repeat($u, $min, $max);
}

sub _sequence {
    my ($m1, $m2) = @_;
    defined $m1 or return $m2;
    defined $m2 or return $m1;
    _sequence_or_star($m1, $m2);
}

sub _star {
    my ($m) = @_;
    defined $m or return $m;
    _sequence_or_star($m, undef);
}

sub _repeat {
    my ($m, $min, $max) = @_;
    defined $max && $min > $max and faint(SP_REGGRIM, "Time goes backwards");
    my $m1;
    if ($min > 0) {
	$m1 = _repeat_exactly($m, $min);
	$max -= $min if defined $max;
    } else {
	$m1 = $m;
    }
    if (! defined $max) {
	return _sequence($m1, _star($m));
    }
    if ($max == 0) {
	return $m1;
    }
    my $m2 = _alternative($m, undef); # zero or one times
    while ($max > 0) {
	$m1 = _sequence($m1, $m2);
	$max--;
    }
    $m1;
}

sub _repeat_exactly {
    my ($m, $num) = @_;
    $num < 1 and return undef;
    defined $m or return $m;
    $num == 1 and return $m;
    my $num2 = int($num / 2);
    my $m1 = _repeat_exactly($m, $num2);
    my $m2 = _repeat_exactly($m, $num - $num2);
    _sequence($m1, $m2);
}

sub _alternative {
    my ($m1, $m2) = @_;
    defined $m1 or return $m2;
    defined $m2 or return _repeat($m1, 0, 1); # m1|(empty)
    my @i = @{$m1->{initial}};
    my @f = @{$m1->{final}};
    my @t = @{$m1->{trans}};
    my $diff = @t;
    for my $i (@{$m2->{initial}}) {
	my ($to, $range) = @$i;
	push @i, [$to + $diff, $range];
    }
    for my $f (@{$m2->{final}}) {
	my ($from, $range) = @$f;
	push @f, [$from + $diff, $range];
    }
    for my $t (@{$m2->{trans}}) {
	my @state = ();
	for my $s (@$t) {
	    my ($from, $to, $range) = @$s;
	    push @state, [$from + $diff, $to + $diff, $range];
	}
	push @t, \@state;
    }
    return {
	initial => \@i,
	final => \@f,
	trans => \@t,
    };
}

sub _backslash {
    my ($text) = @_;
    $$text eq '' and faint(SP_REGGRIM, "There is no escape!");
    my $c = substr($$text, 0, 1, '');
    if (exists $backslash{$c}) {
	my ($m, $t, $f, $z) = @{$backslash{$c}};
	if ($t eq 'func') {
	    if (! defined $z) {
		$z = '';
		for (my $i = 0; $i < _RANGE; $i++) {
		    my $d = chr($i);
		    $z .= $d if $d =~ $f;
		}
		$backslash{$c}[3] = $z;
	    }
	    $f = $z;
	} elsif ($t ne 'list') {
	    faint(SP_INTERNAL, "Invalid escape in reggrim tables");
	}
	my $x = length($f) == 1 ? $f : undef;
	my $r = _empty_range();
	while ($f ne '') {
	    my $d = substr($f, 0, 1, '');
	    vec($r, ord(lc($d)), 1) = 1;
	    vec($r, ord(uc($d)), 1) = 1;
	}
	return ($m, $r, $x);
    } else {
	my $r = _empty_range();
	vec($r, ord(lc($c)), 1) = 1;
	vec($r, ord(uc($c)), 1) = 1;
	return ('r', $r, $c);
    }
}

# XXX sub _sequence_or_star
# XXX sub _convert

1;