view interps/clc-intercal/CLC-INTERCAL-Base-1.-94.-2/INTERCAL/ByteCode.pm @ 9071:581584df6d82

<fizzie> revert 942e964c81c1
author HackBot
date Sun, 25 Sep 2016 20:17:31 +0000
parents 859f9b4339e6
children
line wrap: on
line source

package Language::INTERCAL::ByteCode;

# Definitions of bytecode symbols etc

# This file is part of CLC-INTERCAL

# Copyright (c) 2006-2008 Claudio Calvelli, all rights reserved.

# CLC-INTERCAL is copyrighted software. However, permission to use, modify,
# and distribute it is granted provided that the conditions set out in the
# licence agreement are met. See files README and COPYING in the distribution.

@@DATA ByteCode@@

use strict;
use vars qw($VERSION $PERVERSION);
($VERSION) = ($PERVERSION = "CLC-INTERCAL/Base INTERCAL/ByteCode.pm 1.-94.-2") =~ /\s(\S+)$/;

use Carp;
use Language::INTERCAL::Exporter '1.-94.-2';
use Language::INTERCAL::Splats '1.-94.-2', qw(:SP);
use Language::INTERCAL::Numbers '1.-94.-2';
use Language::INTERCAL::DoubleOhSeven '1.-94.-2';
use Language::INTERCAL::SharkFin '1.-94.-2';
use Language::INTERCAL::Arrays '1.-94.-2';
use Language::INTERCAL::Whirlpool '1.-94.-2';
use Language::INTERCAL::CrawlingHorror '1.-94.-2';
use Language::INTERCAL::GenericIO '1.-94.-2',
	qw($stdwrite $stdread $stdsplat $devnull);

use constant BYTE_SIZE     => 8;      # number of bits per byte (must be == 8)
use constant NUM_OPCODES   => 0x80;   # number of virtual opcodes
use constant OPCODE_RANGE  => 1 << BYTE_SIZE;
use constant BC_MASK       => OPCODE_RANGE - 1;
use constant BIGNUM_SHIFT  => BYTE_SIZE - 1;
use constant BIGNUM_RANGE  => 1 << BIGNUM_SHIFT;
use constant BIGNUM_MASK   => (BIGNUM_RANGE - 1) << 1;
use constant BYTE_SHIFT    => OPCODE_RANGE - NUM_OPCODES;

use vars qw(@EXPORT @EXPORT_OK %EXPORT_TAGS);
@EXPORT_OK = qw(
    bytecode bytedecode bc_list BC BCget bc_bytype bc_match BC_MASK
    BC_constants is_constant is_multibyte bc_xtype bc_skip bc_forall
    @@FILL OPCODES BC_ NAME '' 76 ' '@@
    reg_list reg_name reg_create reg_codetype reg_decode
    reg_code
);

%EXPORT_TAGS = (
    BC => [qw(
	BC BCget BC_MASK bytecode bytedecode
	@@FILL OPCODES BC_ NAME '' 76 ' '@@
    )],
);

my %bytecodes = (
    @@ALL OPCODES NAME@@ => ['@@'DESCR'@@', '@@TYPE@@', '@@NUMBER@@', '@@ARGS@@', @@CONST@@, @@ASSIGNABLE@@],
);

my %bytedecode = (
    @@ALL OPCODES NUMBER@@ => '@@'NAME'@@',
);

my @bc_list = qw(
    @@FILL OPCODES '' NAME '' 76 ' '@@
);

sub BC_@@ALL OPCODES NAME@@ () { @@NUMBER@@; }

my @reg_list = qw(
    @@FILL SPECIAL '' NAME '' 76 ' '@@
);

my %reg_list = (
    @@ALL DOUBLE_OH_SEVEN NAME@@ => ['@@'CODE'@@', @@DEFAULT@@, BC_DOS, '%', @@NUMBER@@],
    @@ALL SHARK_FIN NAME@@ => ['@@'CODE'@@', @@DEFAULT@@, BC_SHF, '^', @@NUMBER@@],
    @@ALL WHIRLPOOL NAME@@ => ['@@'CODE'@@', @@DEFAULT@@, BC_WHP, '@', @@NUMBER@@],
);

my %reg_names = (
    '%@@ALL DOUBLE_OH_SEVEN NUMBER@@' => '@@NAME@@',
    '^@@ALL SHARK_FIN NUMBER@@' => '@@NAME@@',
    '@@@ALL WHIRLPOOL NUMBER@@' => '@@NAME@@',
);

my %mulmap = map { ( $_ => 1 ) } BC_MUL, BC_STR;

sub bc_list () {
    @bc_list;
}

sub BC {
    @_ == 1 || croak "Usage: BC(value)";
    my ($val) = @_;
    croak "Invalid undefined value" unless defined $val;
    my $orig = $val;
    $val < BYTE_SHIFT
	and return ($val + NUM_OPCODES);
    $val < OPCODE_RANGE
	and return (BC_HSN, $val);
    my $div = int($val / OPCODE_RANGE);
    $div < OPCODE_RANGE
	and return (BC_OSN, $div, $val % OPCODE_RANGE);
    croak "Invalid value $orig: does not fit in one spot";
}

sub bytecode ($) {
    my ($name) = @_;
    $name =~ /^\d+$/ && $name < BYTE_SHIFT ? ($name + NUM_OPCODES)
					   : $bytecodes{$name}[2];
}

sub bytedecode ($) {
    my ($b) = @_;
    if ($b >= NUM_OPCODES) {
	my $n = $b - NUM_OPCODES;
	return () if $n >= BYTE_SHIFT;
	return "#$n" unless wantarray;
	return ("#$n", 'Constant', '#', $b, '', 1, 1);
    } else {
	return () unless exists $bytedecode{$b};
	return $bytedecode{$b} unless wantarray;
	return ($bytedecode{$b}, @{$bytecodes{$bytedecode{$b}}});
    }
}

sub BCget {
    @_ == 3 or croak "Usage: BCget(CODE, \\POSITION, END)";
    my ($code, $cp, $ep) = @_;
    $$cp >= $ep and faint(SP_INVALID, "end of code", "BCget");
    my $byte = ord(substr($code, $$cp, 1));
    $$cp++;
    if ($byte >= NUM_OPCODES) {
	return $byte - NUM_OPCODES;
    }
    if ($byte == BC_HSN) {
	$$cp >= $ep and faint(SP_INVALID, "end of code", "BCget/HSN");
	return ord(substr($code, $$cp++, 1));
    }
    if ($byte == BC_OSN) {
	$$cp + 1 >= $ep and faint(SP_INVALID, "end of code", "BCget/OSN");
	my $nx = unpack('n', substr($code, $$cp, 2));
	$$cp += 2;
	return $nx;
    }
    faint(SP_INVALID, sprintf("0x%02x", $byte), "BCget")
}

sub BC_constants () {
    (NUM_OPCODES..BC_MASK);
}

sub is_constant ($) {
    my ($byte) = @_;
    return 1 if $byte >= NUM_OPCODES ||
		$byte == BC_HSN ||
		$byte == BC_OSN;
    return 0;
}

sub is_multibyte ($) {
    my ($byte) = @_;
    return 1 if $byte == BC_HSN;
    return 2 if $byte == BC_OSN;
    0;
}

sub bc_bytype {
    @_ or croak "Usage: bc_bytype(TYPES)";
    my %types = ();
    for my $type (@_) {
	if ($type eq 'R' || $type eq 'S') {
	    $types{$type} = 0;
	    next;
	}
	if ($type =~ /^[CEP<>L\[\]]$/) {
	    $types{E} = $types{R} = $types{'#'} = 0;
	    next;
	}
	if ($type eq 'V') {
	    $types{R} = $types{V} = 0;
	    next;
	}
	if ($type eq 'O') {
	    $types{S} = 0;
	    next;
	}
    }
    my %values = exists $types{V} ? %mulmap : ();
    map {
	my ($desc, $type, $value, $args, $function) = @{$bytecodes{$_}};
	if (exists $types{$type} || exists $values{$value}) {
	    $value;
	} else {
	    ();
	}
    } keys %bytecodes;
}

sub bc_match {
    @_ >= 2 && @_ <= 4
	or croak "Usage: bc_match(PATTERN, CODE [,START [,END]])";
    my ($pattern, $code, $start, $end) = @_;
    $start ||= 0;
    $end = length($code) if not defined $end;
    _match($pattern, $code, $start, $end, undef);
}

sub bc_skip {
    @_ >= 1 && @_ <= 3
	or croak "Usage: bc_skip(CODE [,START [,END]])";
    my ($code, $start, $end) = @_;
    $start ||= 0;
    $end = length($code) if not defined $end;
    return undef if $start >= $end || $start < 0;
    my $byte = ord(substr($code, $start, 1));
    return 1 if $byte >= NUM_OPCODES;
    return undef if ! exists $bytedecode{$byte};
    my $name = $bytedecode{$byte};
    my $pattern = $bytecodes{$name}[1];
    _match($pattern, $code, $start, $end, undef);
}

sub bc_forall {
    @_ == 5
	or croak "Usage: bc_forall(PATTERN, CODE, START, END, CLOSURE)";
    my ($pattern, $code, $start, $end, $closure) = @_;
    $start ||= 0;
    $end = length($code) if not defined $end;
    return undef if $start >= $end || $start < 0;
    my $np = '';
    while ($pattern =~ s/^(.*?)C\(/(/) {
	my $a = $1;
	$a =~ s/(.)/$1\x01/g;
	$np .= $a . 'C';
	$np .= '(' . _args('forall', \$pattern) . ')';
	$np .= "\01";
    }
    $pattern =~ s/(.)/$1\x01/g;
    $pattern = "\x01" if $pattern eq '';
    $np .= $pattern;
    _match($np, $code, $start, $end, $closure);
}

sub bc_xtype {
    @_ == 1 or croak "Usage: bc_xtype(\\PATTERN)";
    my ($pattern) = @_;
    _args('xtype', $pattern);
}

my %typemap = (
    'S' => { 'S' => 0 },
    'O' => { 'S' => 0 },
    'E' => { 'E' => 0, 'R' => 0, '#' => 0 },
    'A' => { 'E' => 0, 'R' => 0, '#' => 0 },
    'R' => { 'R' => 0 },
    'V' => { 'R' => 0, 'V' => 0 },
    '#' => { '#' => 0 },
    'C' => { '#' => 0 },
    'Z' => { 'S' => 0, 'E' => 0, 'R' => 0, '#' => 0 },
    '*' => { 'S' => 0, 'E' => 0, 'R' => 0, '#' => 0 },
);

sub _args {
    my ($name, $pattern) = @_;
    faint(SP_BCMATCH, $name, 'Missing (') if $$pattern !~ s/^\(//;
    my $count = 1;
    my $result = '';
    while ($count > 0) {
	$$pattern =~ s/^([^\(\)]*)([\(\)])//
	    or faint(SP_BCMATCH, $name, 'Missing )');
	$count++ if $2 eq '(';
	$count-- if $2 eq ')';
	$result .= $1 . ($count ? $2 : '');
    }
    $result;
}

sub _match {
    my ($pattern, $code, $sc, $ep, $closure) = @_;
    my $osc = $sc;
    MATCH: while ($pattern ne '') {
	my $e = substr($pattern, 0, 1, '');
	if ($e eq "\x00") {
	    $closure->(undef, '>') if $closure;
	    next MATCH;
	}
	if ($e eq "\x01") {
	    $closure->($sc, undef) if $closure;
	    next MATCH;
	}
	faint(SP_INVALID, 'end of code', '_match') if $sc >= $ep;
	my $v = ord(substr($code, $sc, 1));
	if (exists $typemap{$e}) {
	    # check next opcode is correct type
	    my ($op, $desc, $type, $value, $args, $const) = bytedecode($v);
	    faint(SP_INVALID, $v, "_match: $e")
		unless defined $type;
	    faint(SP_INVALID, $type, "_match: $e")
		unless exists $typemap{$e}{$type} ||
		       (exists $mulmap{$v} && exists $typemap{$e}{V});
	    if ($e eq 'O' && $const) {
		BCget($code, \$sc, $ep);
	    } elsif ($type eq '#' && $e ne '*') {
		my $num = BCget($code, \$sc, $ep);
		$closure->($v, "#$num") if $closure;
		if ($e eq 'C') {
		    $args = _args('count', \$pattern) x $num;
		    $args .= "\x00";
		    $closure->(undef, '<') if $closure;
		} else {
		    $args = '';
		}
	    } else {
		$sc++;
		$args = '' if $e eq 'O' || $e eq '*';
		$closure->($v, $op) if $closure;
	    }
	    $pattern = $args . $pattern;
	    next MATCH;
	} elsif ($e eq 'N') {
	    # any nonzero number
	    return undef if $v == 0;
	    $closure->($v, "N$v") if $closure;
	    $sc++;
	} elsif ($e eq '<') {
	    # left grammar element
	    my $count = BCget($code, \$sc, $ep);
	    my $num = BCget($code, \$sc, $ep);
	    if ($num == 0) {
		$closure->(undef, '?<') if $closure;
	    } elsif ($num == 1 || $num == 2) {
		$closure->(undef, ',<') if $closure;
	    } else {
		$closure->(undef, ',!<') if $closure;
	    }
	    if ($count && $closure) {
		$closure->(undef, $count == 65535 ? '*' : $count);
	    }
	    $pattern = "E\x00" . $pattern;
	    next MATCH;
	} elsif ($e eq '>') {
	    # right grammar element
	    my $num = BCget($code, \$sc, $ep);
	    if ($num == 0 || $num == 6) {
		my $count = BCget($code, \$sc, $ep);
		if ($count && $closure) {
		    $closure->(undef, $count);
		}
		$closure->($v, $num ? '!<' : '?<') if $closure;
		$pattern = "E\x00" . $pattern;
		next MATCH;
	    }
	    if ($num == 1 || $num == 2) {
		$closure->($v, ',<') if $closure;
		my $count = BCget($code, \$sc, $ep);
		if ($count && $closure) {
		    $closure->(undef, $count);
		}
		$pattern = "E\x00" . $pattern;
		next MATCH;
	    }
	    if ($num == 3 || $num == 7) {
		$closure->($v, ',!<') if $closure;
		my $count = BCget($code, \$sc, $ep);
		if ($count && $closure) {
		    $closure->(undef, $count);
		}
		$pattern = "E\x00" . $pattern;
		next MATCH;
	    }
	    if ($num == 4) {
		$num = BCget($code, \$sc, $ep);
		my $se = $sc + $num;
		$se <= $ep
		    or faint(SP_INVALID, '???', '_match: >');
		if ($closure) {
		    $closure->(undef, '=<');
		    while ($sc < $se) {
			$sc += _match('*', $code, $sc, $se, $closure);
		    }
		    $closure->(undef, '>');
		} else {
		    $sc = $se;
		}
		next MATCH;
	    }
	    if ($num == 15) {
		$closure->($v, '*') if $closure;
		next MATCH;
	    }
	    faint(SP_INVALID, $num, "_match: >");
	} elsif ($e eq '[') {
	    # XXX left optimise element
	    faint(SP_TODO, 'match on [');
	} elsif ($e eq ']') {
	    # XXX right optimise element
	    faint(SP_TODO, 'match on ]');
	} else {
	    faint(SP_BCMATCH, 'type', $e);
	}
    }
    $sc - $osc;
}

sub reg_list () {
    @reg_list;
}

sub reg_create {
    @_ == 2 || @_ == 3
	or croak "Usage: reg_create(REGISTER, OBJECT [, VALUE])";
    my ($rn, $object, @value) = @_;
    $rn = $reg_names{$rn} if exists $reg_names{$rn};
    if (exists $reg_list{$rn}) {
	@value = $reg_list{$rn}[1] if ! @value;
	my $rt = $reg_list{$rn}[3];
	my $dt = $reg_list{$rn}[0];
	return Language::INTERCAL::DoubleOhSeven->new($dt, $object, @value)
	    if $rt eq '%';
	return Language::INTERCAL::SharkFin->new($dt, $object, @value)
	    if $rt eq '^';
	return Language::INTERCAL::Whirlpool->new(@value)
	    if $rt eq '@';
    }
    $rn =~ /^\./
	and return Language::INTERCAL::Numbers::Spot->new(@value || 0);
    $rn =~ /^:/
	and return Language::INTERCAL::Numbers::Twospot->new(@value || 0);
    $rn =~ /^,/
	and return Language::INTERCAL::Arrays::Tail->new(@value || []);
    $rn =~ /^;/
	and return Language::INTERCAL::Arrays::Hybrid->new(@value || []);
    $rn =~ /^\@/
	and return Language::INTERCAL::Whirlpool->new();
    $rn =~ /^\_[12]$/
	and return Language::INTERCAL::CrawlingHorror->new();
    faint(SP_SPECIAL, $rn);
}

sub reg_codetype {
    @_ == 1 or croak "Usage: reg_codetype(REGISTER)";
    my ($rn) = @_;
    exists $reg_list{$rn} and return $reg_list{$rn}[0];
    if (exists $reg_names{$rn}) {
	$rn = $reg_names{$rn};
	return $reg_list{$rn}[0];
    }
    $rn =~ /^\./ and return 'spot';
    $rn =~ /^:/ and return 'twospot';
    $rn =~ /^,/ and return 'tail';
    $rn =~ /^;/ and return 'hybrid';
    $rn =~ /^\@/ and return 'whirlpool';
    faint(SP_SPECIAL, $rn);
}

sub reg_name {
    @_ == 1 or croak "Usage: reg_name(REGISTER)";
    my ($rn) = @_;
    exists $reg_list{$rn}
	and return $reg_list{$rn}[3] . $reg_list{$rn}[4];
    if (exists $reg_names{$rn}) {
	$rn = $reg_names{$rn};
	return $reg_list{$rn}[3] . $reg_list{$rn}[4];
    }
    $rn =~ /^([%^\@])(.*)$/ && exists $reg_list{$2} && $reg_list{$2}[3] eq $1
	and return $reg_list{$2}[3] . $reg_list{$2}[4];
    $rn =~ s/^([\.:,;\@^%])0*(\d+)$/$1$2/ and return $rn;
    undef;
}

sub reg_code {
    @_ == 1 or croak "Usage: reg_code(REGISTER)";
    my ($rn) = @_;
    exists $reg_list{$rn}
	and return ($reg_list{$rn}[2], BC($reg_list{$rn}[4]));
    if (exists $reg_names{$rn}) {
	$rn = $reg_names{$rn};
	return ($reg_list{$rn}[2], BC($reg_list{$rn}[4]));
    }
    $rn =~ /^([%^\@])(.*)$/ && exists $reg_list{$2} && $reg_list{$2}[3] eq $1
	and return ($reg_list{$2}[2], BC($reg_list{$2}[4]));
    $rn =~ /^\.(\d+)$/ and return (BC_SPO, BC($1));
    $rn =~ /^:(\d+)$/ and return (BC_TSP, BC($1));
    $rn =~ /^,(\d+)$/ and return (BC_TAI, BC($1));
    $rn =~ /^;(\d+)$/ and return (BC_HYB, BC($1));
    $rn =~ /^\@(\d+)$/ and return (BC_WHP, BC($1));
    $rn =~ /^\%(\d+)$/ and return (BC_DOS, BC($1));
    $rn =~ /^\^(\d+)$/ and return (BC_SHF, BC($1));
    undef;
}

sub reg_decode {
    @_ == 1 or croak "Usage: reg_name(REGISTER)";
    my ($rn) = @_;
    return $rn if $rn =~ /^[.,:;\@_]/;
    if ($rn =~ /^[%^]\d+$/) {
	return undef unless exists $reg_names{$rn};
	$rn = $reg_names{$rn};
    } elsif ($rn =~ s/^([%^])//) {
	return undef unless exists $reg_list{$rn};
	return undef if $1 ne $reg_list{$rn}[3];
    } else {
	return undef unless exists $reg_list{$rn};
    }
    $reg_list{$rn}[3] . $rn;
}

1;

__END__

=pod

=head1 TITLE

Language::INTERCAL::Bytecode - intermediate language

=head1 DESCRIPTION

The CLC-INTERCAL compiler works by producing bytecode from the
program source; this bytecode can be interpreted to execute the
program immediately; alternatively, a backend can produce something
else from the bytecode, for example C or Perl source code which can
then be compiled to your computer's native object format.

The compiler itself is just some more bytecode. Thus, to produce the
compiler you need a compiler compiler, and to produce that you need
a compiler compiler compiler; to produce the latter you would need
a compiler compiler compiler compiler, and so on to infinity. To
simplify the programmer's life (eh?), the compiler compiler is able
to compile itself, and is therefore identical to the compiler compiler
compiler (etcetera).

The programmer can start the process because a pre-compiled compiler
compiler, in the form of bytecode, is provided with the CLC-INTERCAL
distribution; this compiler compiler then is able to compile all
other compilers, as well as to rebuild itself if need be.

See the online manual or the HTML documentation included with the
distribution for more information about this.

=head1 SEE ALSO

A qualified psychiatrist

=head1 AUTHOR

Claudio Calvelli - intercal (whirlpool) sdf.lonestar.org
(Please include the word INTERLEAVING in the subject when emailing that
address, or the email may be ignored)