Mercurial > repo
view interps/clc-intercal/CLC-INTERCAL-Base-1.-94.-2/INTERCAL/Reggrim.pm @ 3553:a2c0fbb7c2b1
<Roujo> revert
author | HackBot |
---|---|
date | Thu, 29 Aug 2013 20:30:48 +0000 |
parents | 859f9b4339e6 |
children |
line wrap: on
line source
package Language::INTERCAL::Reggrim; # Regular grimages - INTERCAL's answer to regular expressions # This file is part of CLC-INTERCAL # Copyright (c) 2007-2008 Claudio Calvelli, all rights reserved. # CLC-INTERCAL is copyrighted software. However, permission to use, modify, # and distribute it is granted provided that the conditions set out in the # licence agreement are met. See files README and COPYING in the distribution. use strict; use vars qw($VERSION $PERVERSION); ($VERSION) = ($PERVERSION = "CLC-INTERCAL/Base INTERCAL/Reggrim.pm 1.-94.-2") =~ /\s(\S+)$/; use Carp; use Language::INTERCAL::Exporter '1.-94.-2'; use Language::INTERCAL::Splats '1.-94.-2', qw(:SP); my %backslash = ( '<' => ['s', 'func', qr/^\w/], '=' => ['c', 'func', qr/^\w/], '>' => ['e', 'func', qr/^\w/], 'b' => ['r', 'list', "\b"], 'd' => ['r', 'func', qr/^\d/], 'D' => ['r', 'func', qr/^\D/], 'e' => ['r', 'list', "\e"], 'f' => ['r', 'list', "\f"], 'n' => ['r', 'list', "\012"], 'r' => ['r', 'list', "\015"], 's' => ['r', 'func', qr/^\s/], 'S' => ['r', 'func', qr/^\S/], 't' => ['r', 'list', "\t"], 'w' => ['r', 'func', qr/^\w/], 'W' => ['r', 'func', qr/^\W/], ); sub compile { @_ == 2 or croak "Usage: Language::INTERCAL::Reggrim->compile(TEXT)"; my ($class, $text) = @_; my $code = _compile($text, 0); my $cnv = _convert($code); bless $cnv, $class; } sub restore { @_ == 2 or croak "Usage: Language::INTERCAL::Reggrim->restore(DATA)"; my ($class, $data) = @_; faint(SP_TODO, "reglar grimaces"); # XXX } sub save { @_ == 1 or croak "Usage: REGGRIM->save"; my ($rg) = @_; faint(SP_TODO, "reglar grimaces"); # XXX } sub match { @_ == 3 or croak "Usage: REGGRIM->match(STRING, PLACE)"; my ($rg, $string, $place) = @_; faint(SP_TODO, "reglar grimaces"); # XXX } sub can_start { @_ == 1 or croak "Usage: REGGRIM->can_start"; my ($rg) = @_; faint(SP_TODO, "reglar grimaces"); # XXX } sub can_empty { @_ == 1 or croak "Usage: REGGRIM->can_empty"; my ($rg) = @_; faint(SP_TODO, "reglar grimaces"); # XXX } # private methods follow use constant _RANGE => 256; sub _compile { my ($text, $in_group) = @_; my $m = undef; my $alt = undef; while ($text ne '') { my $c = substr($text, 0, 1, ''); if ($c eq '.') { my $r = _full_range(); my $u = _make_range($r); $u = _test_repeat($u, \$text); $m = _sequence($m, $u); next; } if ($c eq '\\') { my ($m, $r, $x) = _backslash(\$text); my $u; if ($m eq 'r') { $u = _make_range($r); } elsif ($m eq 's') { $u = _start_of($r); } elsif ($m eq 'e') { $u = _end_of($r); } elsif ($m eq 'c') { $u = _change($r); } elsif ($m eq 'i') { $u = _inside($r); } elsif ($m eq 'o') { $u = _inside(~ $r); } else { faint(SP_INTERNAL, "Invalid range type $m"); } $u = _test_repeat($u, \$text); $m = _sequence($m, $u); next; } if ($c eq '[') { my $r = empty_range(); my $neg = 0; if ($$text =~ s/^\^//) { $neg = 1; } if ($$text =~ s/^-//) { vec($r, ord('-'), 1) = 1; } if ($$text =~ s/^\]//) { vec($r, ord(']'), 1) = 1; } while (1) { $$text eq '' and faint(SP_REGGRIM, "Invalid range"); $c = substr($$text, 0, 1, ''); if ($c eq ']') { last; } if ($c eq '\\') { my ($m, $b, $x) = _backslash(\$text); if ($m ne 'r') { faint(SP_INTERNAL, "Invalid escape in range"); } if (! defined $x) { $r |= $b; next; } } if ($$text =~ s/^-//) { $$text eq '' and faint(SP_REGGRIM, "Invalid range"); my $d = substr($$text, 0, 1, ''); if ($d eq '\\') { my ($m, $b, $x) = _backslash(\$text); if ($m ne 'r' || ! defined $x) { faint(SP_INTERNAL, "Invalid escape in range"); } $d = $x; } $c = ord($c); $d = ord($d); $c <= $d or faint(SP_REGGRIM, "Time goes backwards"); while ($c <= $d) { vec($r, ord(lc($c)), 1) = 1; vec($r, ord(uc($c)), 1) = 1; $c++; } } else { vec($r, ord(lc($c)), 1) = 1; vec($r, ord(uc($c)), 1) = 1; } } $r = ~ $r if $neg; my $u = _make_range($r); $u = _test_repeat($u, \$text); $m = _sequence($m, $u); next; } if ($c eq '(') { my $u = _compile($text, 1); $u = _test_repeat($u, \$text); $m = _sequence($m, $u); next; } if ($c eq ')') { return if $in_group; faint(SP_REGGRIM, "Invalid group"); } if ($c eq '|') { $alt = _alternative($alt, $m); $m = undef; next; } if ($c eq '?' || $c eq '*' || $c eq '+' || $c eq '{') { faint(SP_REGGRIM, "Misplaced $c"); } my $r = _empty_range(); vec($r, ord(lc($c)), 1) = 1; vec($r, ord(uc($c)), 1) = 1; my $u = _make_range($r); $u = _test_repeat($u, \$text); $m = _sequence($m, $u); next; } } sub _empty_range { my $r = ''; vec($r, _RANGE - 1, 1) = 0; $r; } sub _full_range { ~ _empty_range(); } sub _make_range { my ($r) = @_; return { initial => [[0, _full_range()]], final => [[1, _full_range()]], trans => [[1, $r], []], }; } sub _start_of { my ($r) = @_; return { initial => [[0, ~ $r]], final => [[0, $r]], trans => [[]], }; } sub _end_of { my ($r) = @_; return { initial => [[0, $r]], final => [[0, ~ $r]], trans => [[]], }; } sub _change { my ($r) = @_; return { initial => [[0, ~ $r], [1, $r]], final => [[0, $r], [1, ~ $r]], trans => [[], []], }; } sub _inside { my ($r) = @_; return { initial => [[0, $r]], final => [[0, $r]], trans => [[]], }; } sub _test_repeat { my ($u, $text) = @_; my $c = substr($$text, 0, 1, ''); $c eq '?' and return _repeat($u, 0, 1); $c eq '*' and return _repeat($u, 0, undef); $c eq '+' and return _repeat($u, 1, undef); if ($c ne '}') { $$text = $c . $$text; return $u } my ($min, $max) = (0, undef); $$text =~ s/^(\d+)// and $min = $1; $$text =~ s/^,(\d+)// and $max = $1; $$text =~ s/^\}// or faint(SP_REGGRIM, "Missing }"); _repeat($u, $min, $max); } sub _sequence { my ($m1, $m2) = @_; defined $m1 or return $m2; defined $m2 or return $m1; _sequence_or_star($m1, $m2); } sub _star { my ($m) = @_; defined $m or return $m; _sequence_or_star($m, undef); } sub _repeat { my ($m, $min, $max) = @_; defined $max && $min > $max and faint(SP_REGGRIM, "Time goes backwards"); my $m1; if ($min > 0) { $m1 = _repeat_exactly($m, $min); $max -= $min if defined $max; } else { $m1 = $m; } if (! defined $max) { return _sequence($m1, _star($m)); } if ($max == 0) { return $m1; } my $m2 = _alternative($m, undef); # zero or one times while ($max > 0) { $m1 = _sequence($m1, $m2); $max--; } $m1; } sub _repeat_exactly { my ($m, $num) = @_; $num < 1 and return undef; defined $m or return $m; $num == 1 and return $m; my $num2 = int($num / 2); my $m1 = _repeat_exactly($m, $num2); my $m2 = _repeat_exactly($m, $num - $num2); _sequence($m1, $m2); } sub _alternative { my ($m1, $m2) = @_; defined $m1 or return $m2; defined $m2 or return _repeat($m1, 0, 1); # m1|(empty) my @i = @{$m1->{initial}}; my @f = @{$m1->{final}}; my @t = @{$m1->{trans}}; my $diff = @t; for my $i (@{$m2->{initial}}) { my ($to, $range) = @$i; push @i, [$to + $diff, $range]; } for my $f (@{$m2->{final}}) { my ($from, $range) = @$f; push @f, [$from + $diff, $range]; } for my $t (@{$m2->{trans}}) { my @state = (); for my $s (@$t) { my ($from, $to, $range) = @$s; push @state, [$from + $diff, $to + $diff, $range]; } push @t, \@state; } return { initial => \@i, final => \@f, trans => \@t, }; } sub _backslash { my ($text) = @_; $$text eq '' and faint(SP_REGGRIM, "There is no escape!"); my $c = substr($$text, 0, 1, ''); if (exists $backslash{$c}) { my ($m, $t, $f, $z) = @{$backslash{$c}}; if ($t eq 'func') { if (! defined $z) { $z = ''; for (my $i = 0; $i < _RANGE; $i++) { my $d = chr($i); $z .= $d if $d =~ $f; } $backslash{$c}[3] = $z; } $f = $z; } elsif ($t ne 'list') { faint(SP_INTERNAL, "Invalid escape in reggrim tables"); } my $x = length($f) == 1 ? $f : undef; my $r = _empty_range(); while ($f ne '') { my $d = substr($f, 0, 1, ''); vec($r, ord(lc($d)), 1) = 1; vec($r, ord(uc($d)), 1) = 1; } return ($m, $r, $x); } else { my $r = _empty_range(); vec($r, ord(lc($c)), 1) = 1; vec($r, ord(uc($c)), 1) = 1; return ('r', $r, $c); } } # XXX sub _sequence_or_star # XXX sub _convert 1;