view interps/clc-intercal/CLC-INTERCAL-Base-1.-94.-2/blib/lib/Language/INTERCAL/ByteCode.pm @ 9071:581584df6d82

<fizzie> revert 942e964c81c1
author HackBot
date Sun, 25 Sep 2016 20:17:31 +0000
parents 859f9b4339e6
children
line wrap: on
line source

package Language::INTERCAL::ByteCode;

# Definitions of bytecode symbols etc

# This file is part of CLC-INTERCAL

# Copyright (c) 2006-2008 Claudio Calvelli, all rights reserved.

# CLC-INTERCAL is copyrighted software. However, permission to use, modify,
# and distribute it is granted provided that the conditions set out in the
# licence agreement are met. See files README and COPYING in the distribution.


use strict;
use vars qw($VERSION $PERVERSION);
($VERSION) = ($PERVERSION = "CLC-INTERCAL/Base INTERCAL/ByteCode.pm 1.-94.-2") =~ /\s(\S+)$/;

use Carp;
use Language::INTERCAL::Exporter '1.-94.-2';
use Language::INTERCAL::Splats '1.-94.-2', qw(:SP);
use Language::INTERCAL::Numbers '1.-94.-2';
use Language::INTERCAL::DoubleOhSeven '1.-94.-2';
use Language::INTERCAL::SharkFin '1.-94.-2';
use Language::INTERCAL::Arrays '1.-94.-2';
use Language::INTERCAL::Whirlpool '1.-94.-2';
use Language::INTERCAL::CrawlingHorror '1.-94.-2';
use Language::INTERCAL::GenericIO '1.-94.-2',
	qw($stdwrite $stdread $stdsplat $devnull);

use constant BYTE_SIZE     => 8;      # number of bits per byte (must be == 8)
use constant NUM_OPCODES   => 0x80;   # number of virtual opcodes
use constant OPCODE_RANGE  => 1 << BYTE_SIZE;
use constant BC_MASK       => OPCODE_RANGE - 1;
use constant BIGNUM_SHIFT  => BYTE_SIZE - 1;
use constant BIGNUM_RANGE  => 1 << BIGNUM_SHIFT;
use constant BIGNUM_MASK   => (BIGNUM_RANGE - 1) << 1;
use constant BYTE_SHIFT    => OPCODE_RANGE - NUM_OPCODES;

use vars qw(@EXPORT @EXPORT_OK %EXPORT_TAGS);
@EXPORT_OK = qw(
    bytecode bytedecode bc_list BC BCget bc_bytype bc_match BC_MASK
    BC_constants is_constant is_multibyte bc_xtype bc_skip bc_forall
    BC_ABG BC_ABL BC_AWC BC_BAW BC_BBT BC_BSW BC_BUG BC_BUT BC_BWC BC_CFG
    BC_CFL BC_CHO BC_CON BC_CRE BC_CSE BC_CWB BC_DES BC_DOS BC_DSX BC_EBC
    BC_ECB BC_ENR BC_ENS BC_FIN BC_FLA BC_FOR BC_FRE BC_FRZ BC_GRA BC_GUP
    BC_HSN BC_HYB BC_IGN BC_INT BC_LAB BC_LEA BC_MKG BC_MSP BC_MUL BC_NOT
    BC_NUM BC_NXG BC_NXL BC_NXT BC_OPT BC_OSN BC_OVM BC_OVR BC_OWN BC_QUA
    BC_REG BC_REL BC_REM BC_RES BC_RET BC_RIN BC_ROM BC_ROR BC_ROU BC_RSE
    BC_SEL BC_SHF BC_SMU BC_SPL BC_SPO BC_STA BC_STE BC_STO BC_STR BC_STS
    BC_STU BC_SUB BC_SWA BC_SWB BC_SYS BC_TAI BC_TSP BC_TYP BC_UDV BC_UNE
    BC_UNS BC_USG BC_WHP BC_WIN
    reg_list reg_name reg_create reg_codetype reg_decode
    reg_code
);

%EXPORT_TAGS = (
    BC => [qw(
	BC BCget BC_MASK bytecode bytedecode
	BC_ABG BC_ABL BC_AWC BC_BAW BC_BBT BC_BSW BC_BUG BC_BUT BC_BWC
	BC_CFG BC_CFL BC_CHO BC_CON BC_CRE BC_CSE BC_CWB BC_DES BC_DOS
	BC_DSX BC_EBC BC_ECB BC_ENR BC_ENS BC_FIN BC_FLA BC_FOR BC_FRE
	BC_FRZ BC_GRA BC_GUP BC_HSN BC_HYB BC_IGN BC_INT BC_LAB BC_LEA
	BC_MKG BC_MSP BC_MUL BC_NOT BC_NUM BC_NXG BC_NXL BC_NXT BC_OPT
	BC_OSN BC_OVM BC_OVR BC_OWN BC_QUA BC_REG BC_REL BC_REM BC_RES
	BC_RET BC_RIN BC_ROM BC_ROR BC_ROU BC_RSE BC_SEL BC_SHF BC_SMU
	BC_SPL BC_SPO BC_STA BC_STE BC_STO BC_STR BC_STS BC_STU BC_SUB
	BC_SWA BC_SWB BC_SYS BC_TAI BC_TSP BC_TYP BC_UDV BC_UNE BC_UNS
	BC_USG BC_WHP BC_WIN
    )],
);

my %bytecodes = (
    ABG => ['ABstain from Gerund', 'S', '15', 'C(O)', 0, 0],
    ABL => ['ABstain from Label', 'S', '14', 'E', 0, 0],
    AWC => ['unary Add Without Carry', 'E', '102', 'E', 0, 1],
    BAW => ['binary Add Without Carry', 'E', '103', 'EE', 0, 1],
    BBT => ['binary BUT', 'E', '99', '#EE', 0, 1],
    BSW => ['binary Subtract Without Borrow', 'E', '101', 'EE', 0, 1],
    BUG => ['compiler BUG', 'S', '38', '#', 0, 0],
    BUT => ['unary BUT', 'E', '98', '#E', 0, 1],
    BWC => ['loop: Body While Condition', 'S', '26', 'SS', 0, 0],
    CFG => ['Come From Gerund', 'S', '23', 'C(O)', 0, 0],
    CFL => ['Come From Label', 'S', '22', 'E', 0, 0],
    CHO => ['Crawling HOrror', 'R', '71', 'E', 0, 1],
    CON => ['CONvert', 'S', '36', 'OO', 0, 0],
    CRE => ['CREate', 'S', '2', 'EVC(<)C(>)', 0, 0],
    CSE => ['CaSE', 'S', '47', 'EC(ES)', 0, 0],
    CWB => ['loop: Condition While Body', 'S', '25', 'SS', 0, 0],
    DES => ['DEStroy', 'S', '3', 'EVC(<)', 0, 0],
    DOS => ['Double-Oh-Seven', 'R', '69', 'E', 0, 1],
    DSX => ['Double-oh-Seven eXecution', 'S', '5', 'ES', 0, 0],
    EBC => ['Event: Body while Condition', 'S', '40', 'ES', 0, 0],
    ECB => ['Event: Condition while Body', 'S', '41', 'ES', 0, 0],
    ENR => ['ENRol', 'S', '30', 'C(E)R', 0, 0],
    ENS => ['ENSlave', 'S', '27', 'RR', 0, 0],
    FIN => ['FINish lecture', 'S', '32', '', 0, 0],
    FLA => ['set object FLAg', 'S', '63', '', 0, 0],
    FOR => ['FORget', 'S', '9', 'E', 0, 0],
    FRE => ['FREe', 'S', '28', 'RR', 0, 0],
    FRZ => ['FReeZe', 'S', '42', '', 0, 0],
    GRA => ['GRAduate', 'S', '33', 'R', 0, 0],
    GUP => ['Give UP', 'S', '18', '', 0, 0],
    HSN => ['Half Spot Number', '#', '126', 'N', 1, 1],
    HYB => ['HYBrid', 'R', '67', 'E', 0, 1],
    IGN => ['IGNore', 'S', '12', 'C(R)', 0, 0],
    INT => ['INTerleave', 'E', '105', 'EE', 0, 1],
    LAB => ['LABel', 'S', '21', 'ES', 0, 0],
    LEA => ['LEArns', 'S', '31', 'ER', 0, 0],
    MKG => ['MaKe Gerund', 'S', '61', 'EE', 0, 0],
    MSP => ['Make SPlat', 'S', '4', 'EC(V)', 0, 0],
    MUL => ['MULtiple number', 'E', '96', 'C(E)', 0, 0],
    NOT => ['NOT', 'S', '6', 'S', 0, 0],
    NUM => ['NUMber', 'E', '106', 'R', 0, 1],
    NXG => ['Next From Gerund', 'S', '35', 'C(O)', 0, 0],
    NXL => ['Next From Label', 'S', '34', 'E', 0, 0],
    NXT => ['NeXT', 'S', '7', 'E', 0, 0],
    OPT => ['OPTimise', 'S', '39', 'C([)C(])', 0, 0],
    OSN => ['One Spot Number', '#', '127', 'NN', 1, 1],
    OVM => ['OVerload Many', 'E', '107', 'EE', 0, 1],
    OVR => ['OVerload Register', 'R', '80', 'ER', 0, 1],
    OWN => ['OWNer', 'R', '82', 'ER', 0, 1],
    QUA => ['QUAntum statement', 'S', '24', 'S', 0, 0],
    REG => ['REinstate from Gerund', 'S', '17', 'C(O)', 0, 0],
    REL => ['REinstate from Label', 'S', '16', 'E', 0, 0],
    REM => ['REMember', 'S', '13', 'C(R)', 0, 0],
    RES => ['RESume', 'S', '8', 'E', 0, 0],
    RET => ['RETrieve', 'S', '11', 'C(R)', 0, 0],
    RIN => ['Reverse INterleave', 'E', '112', 'EE', 0, 1],
    ROM => ['Remove Overload Many', 'E', '108', 'E', 0, 1],
    ROR => ['Remove Overload Register', 'R', '81', 'R', 0, 1],
    ROU => ['Read OUt', 'S', '20', 'C(E)', 0, 0],
    RSE => ['Reverse SElect', 'E', '111', 'EE', 0, 1],
    SEL => ['SELect', 'E', '104', 'EE', 0, 1],
    SHF => ['SHark Fin', 'R', '70', 'E', 0, 1],
    SMU => ['SMUggle', 'S', '46', 'C(E)C(E)C(R)', 0, 0],
    SPL => ['SPLat', 'E', '109', '', 0, 1],
    SPO => ['SPOt', 'R', '64', 'E', 0, 1],
    STA => ['STAsh', 'S', '10', 'C(R)', 0, 0],
    STE => ['STEal', 'S', '45', 'C(E)C(E)C(R)', 0, 0],
    STO => ['STOre', 'S', '1', 'EA', 0, 0],
    STR => ['STRing', 'E', '97', 'C(N)', 0, 0],
    STS => ['STArt of STAtement', 'S', '0', '###C(#)S', 0, 0],
    STU => ['STUdy', 'S', '29', 'EER', 0, 0],
    SUB => ['SUBscript', 'R', '83', 'ER', 0, 1],
    SWA => ['SWAp', 'S', '37', 'OO', 0, 0],
    SWB => ['unary Subtract Without Borrow', 'E', '100', 'E', 0, 1],
    SYS => ['SYStem call', 'S', '43', 'EC(S)', 0, 0],
    TAI => ['TAIl', 'R', '66', 'E', 0, 1],
    TSP => ['Two SPot', 'R', '65', 'E', 0, 1],
    TYP => ['TYPe', 'R', '79', 'RE', 0, 1],
    UDV => ['Unary DiVide', 'E', '110', 'E', 0, 1],
    UNE => ['UNdocumented Expression', 'E', '113', 'EEC(E)', 0, 0],
    UNS => ['UNdocumented Statement', 'S', '44', 'EEC(E)', 0, 0],
    USG => ['USe Gerund', 'S', '62', 'E', 0, 0],
    WHP => ['WHirlPool', 'R', '68', 'E', 0, 1],
    WIN => ['Write IN', 'S', '19', 'C(A)', 0, 0],
);

my %bytedecode = (
    0 => 'STS',
    1 => 'STO',
    2 => 'CRE',
    3 => 'DES',
    4 => 'MSP',
    5 => 'DSX',
    6 => 'NOT',
    7 => 'NXT',
    8 => 'RES',
    9 => 'FOR',
    10 => 'STA',
    11 => 'RET',
    12 => 'IGN',
    13 => 'REM',
    14 => 'ABL',
    15 => 'ABG',
    16 => 'REL',
    17 => 'REG',
    18 => 'GUP',
    19 => 'WIN',
    20 => 'ROU',
    21 => 'LAB',
    22 => 'CFL',
    23 => 'CFG',
    24 => 'QUA',
    25 => 'CWB',
    26 => 'BWC',
    27 => 'ENS',
    28 => 'FRE',
    29 => 'STU',
    30 => 'ENR',
    31 => 'LEA',
    32 => 'FIN',
    33 => 'GRA',
    34 => 'NXL',
    35 => 'NXG',
    36 => 'CON',
    37 => 'SWA',
    38 => 'BUG',
    39 => 'OPT',
    40 => 'EBC',
    41 => 'ECB',
    42 => 'FRZ',
    43 => 'SYS',
    44 => 'UNS',
    45 => 'STE',
    46 => 'SMU',
    47 => 'CSE',
    61 => 'MKG',
    62 => 'USG',
    63 => 'FLA',
    64 => 'SPO',
    65 => 'TSP',
    66 => 'TAI',
    67 => 'HYB',
    68 => 'WHP',
    69 => 'DOS',
    70 => 'SHF',
    71 => 'CHO',
    79 => 'TYP',
    80 => 'OVR',
    81 => 'ROR',
    82 => 'OWN',
    83 => 'SUB',
    96 => 'MUL',
    97 => 'STR',
    98 => 'BUT',
    99 => 'BBT',
    100 => 'SWB',
    101 => 'BSW',
    102 => 'AWC',
    103 => 'BAW',
    104 => 'SEL',
    105 => 'INT',
    106 => 'NUM',
    107 => 'OVM',
    108 => 'ROM',
    109 => 'SPL',
    110 => 'UDV',
    111 => 'RSE',
    112 => 'RIN',
    113 => 'UNE',
    126 => 'HSN',
    127 => 'OSN',
);

my @bc_list = qw(
    ABG ABL AWC BAW BBT BSW BUG BUT BWC CFG CFL CHO CON CRE CSE CWB DES DOS
    DSX EBC ECB ENR ENS FIN FLA FOR FRE FRZ GRA GUP HSN HYB IGN INT LAB LEA
    MKG MSP MUL NOT NUM NXG NXL NXT OPT OSN OVM OVR OWN QUA REG REL REM RES
    RET RIN ROM ROR ROU RSE SEL SHF SMU SPL SPO STA STE STO STR STS STU SUB
    SWA SWB SYS TAI TSP TYP UDV UNE UNS USG WHP WIN
);

sub BC_ABG () { 15; }
sub BC_ABL () { 14; }
sub BC_AWC () { 102; }
sub BC_BAW () { 103; }
sub BC_BBT () { 99; }
sub BC_BSW () { 101; }
sub BC_BUG () { 38; }
sub BC_BUT () { 98; }
sub BC_BWC () { 26; }
sub BC_CFG () { 23; }
sub BC_CFL () { 22; }
sub BC_CHO () { 71; }
sub BC_CON () { 36; }
sub BC_CRE () { 2; }
sub BC_CSE () { 47; }
sub BC_CWB () { 25; }
sub BC_DES () { 3; }
sub BC_DOS () { 69; }
sub BC_DSX () { 5; }
sub BC_EBC () { 40; }
sub BC_ECB () { 41; }
sub BC_ENR () { 30; }
sub BC_ENS () { 27; }
sub BC_FIN () { 32; }
sub BC_FLA () { 63; }
sub BC_FOR () { 9; }
sub BC_FRE () { 28; }
sub BC_FRZ () { 42; }
sub BC_GRA () { 33; }
sub BC_GUP () { 18; }
sub BC_HSN () { 126; }
sub BC_HYB () { 67; }
sub BC_IGN () { 12; }
sub BC_INT () { 105; }
sub BC_LAB () { 21; }
sub BC_LEA () { 31; }
sub BC_MKG () { 61; }
sub BC_MSP () { 4; }
sub BC_MUL () { 96; }
sub BC_NOT () { 6; }
sub BC_NUM () { 106; }
sub BC_NXG () { 35; }
sub BC_NXL () { 34; }
sub BC_NXT () { 7; }
sub BC_OPT () { 39; }
sub BC_OSN () { 127; }
sub BC_OVM () { 107; }
sub BC_OVR () { 80; }
sub BC_OWN () { 82; }
sub BC_QUA () { 24; }
sub BC_REG () { 17; }
sub BC_REL () { 16; }
sub BC_REM () { 13; }
sub BC_RES () { 8; }
sub BC_RET () { 11; }
sub BC_RIN () { 112; }
sub BC_ROM () { 108; }
sub BC_ROR () { 81; }
sub BC_ROU () { 20; }
sub BC_RSE () { 111; }
sub BC_SEL () { 104; }
sub BC_SHF () { 70; }
sub BC_SMU () { 46; }
sub BC_SPL () { 109; }
sub BC_SPO () { 64; }
sub BC_STA () { 10; }
sub BC_STE () { 45; }
sub BC_STO () { 1; }
sub BC_STR () { 97; }
sub BC_STS () { 0; }
sub BC_STU () { 29; }
sub BC_SUB () { 83; }
sub BC_SWA () { 37; }
sub BC_SWB () { 100; }
sub BC_SYS () { 43; }
sub BC_TAI () { 66; }
sub BC_TSP () { 65; }
sub BC_TYP () { 79; }
sub BC_UDV () { 110; }
sub BC_UNE () { 113; }
sub BC_UNS () { 44; }
sub BC_USG () { 62; }
sub BC_WHP () { 68; }
sub BC_WIN () { 19; }

my @reg_list = qw(
    AR AV AW BA CF CR CW DM ES EV FS IO IS JS OR ORFH OS OSFH OWFH PS RM RT
    SNFH SP SS TH TM TRFH WT
);

my %reg_list = (
    AR => ['spot', 0, BC_DOS, '%', 10],
    AW => ['spot', 0, BC_DOS, '%', 11],
    BA => ['base', 2, BC_DOS, '%', 4],
    CF => ['comefrom', 0, BC_DOS, '%', 5],
    CR => ['charset', 0, BC_DOS, '%', 6],
    CW => ['charset', 0, BC_DOS, '%', 7],
    DM => ['zeroone', 0, BC_DOS, '%', 18],
    ES => ['symbol', 'CALC_EXPR', BC_DOS, '%', 16],
    FS => ['symbol', 'CALC_FULL', BC_DOS, '%', 15],
    IO => ['iotype', 0, BC_DOS, '%', 3],
    IS => ['symbol', 0, BC_DOS, '%', 17],
    JS => ['symbol', 'END_JUNK', BC_DOS, '%', 12],
    OS => ['spot', 0, BC_DOS, '%', 8],
    PS => ['symbol', 'PROGRAM', BC_DOS, '%', 14],
    RM => ['zeroone', 0, BC_DOS, '%', 21],
    RT => ['roman', 0, BC_DOS, '%', 2],
    SP => ['splat', 1000, BC_DOS, '%', 19],
    SS => ['symbol', 'SPACE', BC_DOS, '%', 13],
    TH => ['zeroone', 0, BC_DOS, '%', 20],
    TM => ['zeroone', 0, BC_DOS, '%', 9],
    WT => ['zeroone', 0, BC_DOS, '%', 1],
    AV => ['vector', [], BC_SHF, '^', 1],
    EV => ['vector', [], BC_SHF, '^', 2],
    OR => ['whirlpool', undef, BC_WHP, '@', 0],
    ORFH => ['whirlpool', $stdread, BC_WHP, '@', 2],
    OSFH => ['whirlpool', $stdsplat, BC_WHP, '@', 3],
    OWFH => ['whirlpool', $stdwrite, BC_WHP, '@', 1],
    SNFH => ['whirlpool', $devnull, BC_WHP, '@', 7],
    TRFH => ['whirlpool', $stdsplat, BC_WHP, '@', 9],
);

my %reg_names = (
    '%1' => 'WT',
    '%2' => 'RT',
    '%3' => 'IO',
    '%4' => 'BA',
    '%5' => 'CF',
    '%6' => 'CR',
    '%7' => 'CW',
    '%8' => 'OS',
    '%9' => 'TM',
    '%10' => 'AR',
    '%11' => 'AW',
    '%12' => 'JS',
    '%13' => 'SS',
    '%14' => 'PS',
    '%15' => 'FS',
    '%16' => 'ES',
    '%17' => 'IS',
    '%18' => 'DM',
    '%19' => 'SP',
    '%20' => 'TH',
    '%21' => 'RM',
    '^1' => 'AV',
    '^2' => 'EV',
    '@0' => 'OR',
    '@1' => 'OWFH',
    '@2' => 'ORFH',
    '@3' => 'OSFH',
    '@7' => 'SNFH',
    '@9' => 'TRFH',
);

my %mulmap = map { ( $_ => 1 ) } BC_MUL, BC_STR;

sub bc_list () {
    @bc_list;
}

sub BC {
    @_ == 1 || croak "Usage: BC(value)";
    my ($val) = @_;
    croak "Invalid undefined value" unless defined $val;
    my $orig = $val;
    $val < BYTE_SHIFT
	and return ($val + NUM_OPCODES);
    $val < OPCODE_RANGE
	and return (BC_HSN, $val);
    my $div = int($val / OPCODE_RANGE);
    $div < OPCODE_RANGE
	and return (BC_OSN, $div, $val % OPCODE_RANGE);
    croak "Invalid value $orig: does not fit in one spot";
}

sub bytecode ($) {
    my ($name) = @_;
    $name =~ /^\d+$/ && $name < BYTE_SHIFT ? ($name + NUM_OPCODES)
					   : $bytecodes{$name}[2];
}

sub bytedecode ($) {
    my ($b) = @_;
    if ($b >= NUM_OPCODES) {
	my $n = $b - NUM_OPCODES;
	return () if $n >= BYTE_SHIFT;
	return "#$n" unless wantarray;
	return ("#$n", 'Constant', '#', $b, '', 1, 1);
    } else {
	return () unless exists $bytedecode{$b};
	return $bytedecode{$b} unless wantarray;
	return ($bytedecode{$b}, @{$bytecodes{$bytedecode{$b}}});
    }
}

sub BCget {
    @_ == 3 or croak "Usage: BCget(CODE, \\POSITION, END)";
    my ($code, $cp, $ep) = @_;
    $$cp >= $ep and faint(SP_INVALID, "end of code", "BCget");
    my $byte = ord(substr($code, $$cp, 1));
    $$cp++;
    if ($byte >= NUM_OPCODES) {
	return $byte - NUM_OPCODES;
    }
    if ($byte == BC_HSN) {
	$$cp >= $ep and faint(SP_INVALID, "end of code", "BCget/HSN");
	return ord(substr($code, $$cp++, 1));
    }
    if ($byte == BC_OSN) {
	$$cp + 1 >= $ep and faint(SP_INVALID, "end of code", "BCget/OSN");
	my $nx = unpack('n', substr($code, $$cp, 2));
	$$cp += 2;
	return $nx;
    }
    faint(SP_INVALID, sprintf("0x%02x", $byte), "BCget")
}

sub BC_constants () {
    (NUM_OPCODES..BC_MASK);
}

sub is_constant ($) {
    my ($byte) = @_;
    return 1 if $byte >= NUM_OPCODES ||
		$byte == BC_HSN ||
		$byte == BC_OSN;
    return 0;
}

sub is_multibyte ($) {
    my ($byte) = @_;
    return 1 if $byte == BC_HSN;
    return 2 if $byte == BC_OSN;
    0;
}

sub bc_bytype {
    @_ or croak "Usage: bc_bytype(TYPES)";
    my %types = ();
    for my $type (@_) {
	if ($type eq 'R' || $type eq 'S') {
	    $types{$type} = 0;
	    next;
	}
	if ($type =~ /^[CEP<>L\[\]]$/) {
	    $types{E} = $types{R} = $types{'#'} = 0;
	    next;
	}
	if ($type eq 'V') {
	    $types{R} = $types{V} = 0;
	    next;
	}
	if ($type eq 'O') {
	    $types{S} = 0;
	    next;
	}
    }
    my %values = exists $types{V} ? %mulmap : ();
    map {
	my ($desc, $type, $value, $args, $function) = @{$bytecodes{$_}};
	if (exists $types{$type} || exists $values{$value}) {
	    $value;
	} else {
	    ();
	}
    } keys %bytecodes;
}

sub bc_match {
    @_ >= 2 && @_ <= 4
	or croak "Usage: bc_match(PATTERN, CODE [,START [,END]])";
    my ($pattern, $code, $start, $end) = @_;
    $start ||= 0;
    $end = length($code) if not defined $end;
    _match($pattern, $code, $start, $end, undef);
}

sub bc_skip {
    @_ >= 1 && @_ <= 3
	or croak "Usage: bc_skip(CODE [,START [,END]])";
    my ($code, $start, $end) = @_;
    $start ||= 0;
    $end = length($code) if not defined $end;
    return undef if $start >= $end || $start < 0;
    my $byte = ord(substr($code, $start, 1));
    return 1 if $byte >= NUM_OPCODES;
    return undef if ! exists $bytedecode{$byte};
    my $name = $bytedecode{$byte};
    my $pattern = $bytecodes{$name}[1];
    _match($pattern, $code, $start, $end, undef);
}

sub bc_forall {
    @_ == 5
	or croak "Usage: bc_forall(PATTERN, CODE, START, END, CLOSURE)";
    my ($pattern, $code, $start, $end, $closure) = @_;
    $start ||= 0;
    $end = length($code) if not defined $end;
    return undef if $start >= $end || $start < 0;
    my $np = '';
    while ($pattern =~ s/^(.*?)C\(/(/) {
	my $a = $1;
	$a =~ s/(.)/$1\x01/g;
	$np .= $a . 'C';
	$np .= '(' . _args('forall', \$pattern) . ')';
	$np .= "\01";
    }
    $pattern =~ s/(.)/$1\x01/g;
    $pattern = "\x01" if $pattern eq '';
    $np .= $pattern;
    _match($np, $code, $start, $end, $closure);
}

sub bc_xtype {
    @_ == 1 or croak "Usage: bc_xtype(\\PATTERN)";
    my ($pattern) = @_;
    _args('xtype', $pattern);
}

my %typemap = (
    'S' => { 'S' => 0 },
    'O' => { 'S' => 0 },
    'E' => { 'E' => 0, 'R' => 0, '#' => 0 },
    'A' => { 'E' => 0, 'R' => 0, '#' => 0 },
    'R' => { 'R' => 0 },
    'V' => { 'R' => 0, 'V' => 0 },
    '#' => { '#' => 0 },
    'C' => { '#' => 0 },
    'Z' => { 'S' => 0, 'E' => 0, 'R' => 0, '#' => 0 },
    '*' => { 'S' => 0, 'E' => 0, 'R' => 0, '#' => 0 },
);

sub _args {
    my ($name, $pattern) = @_;
    faint(SP_BCMATCH, $name, 'Missing (') if $$pattern !~ s/^\(//;
    my $count = 1;
    my $result = '';
    while ($count > 0) {
	$$pattern =~ s/^([^\(\)]*)([\(\)])//
	    or faint(SP_BCMATCH, $name, 'Missing )');
	$count++ if $2 eq '(';
	$count-- if $2 eq ')';
	$result .= $1 . ($count ? $2 : '');
    }
    $result;
}

sub _match {
    my ($pattern, $code, $sc, $ep, $closure) = @_;
    my $osc = $sc;
    MATCH: while ($pattern ne '') {
	my $e = substr($pattern, 0, 1, '');
	if ($e eq "\x00") {
	    $closure->(undef, '>') if $closure;
	    next MATCH;
	}
	if ($e eq "\x01") {
	    $closure->($sc, undef) if $closure;
	    next MATCH;
	}
	faint(SP_INVALID, 'end of code', '_match') if $sc >= $ep;
	my $v = ord(substr($code, $sc, 1));
	if (exists $typemap{$e}) {
	    # check next opcode is correct type
	    my ($op, $desc, $type, $value, $args, $const) = bytedecode($v);
	    faint(SP_INVALID, $v, "_match: $e")
		unless defined $type;
	    faint(SP_INVALID, $type, "_match: $e")
		unless exists $typemap{$e}{$type} ||
		       (exists $mulmap{$v} && exists $typemap{$e}{V});
	    if ($e eq 'O' && $const) {
		BCget($code, \$sc, $ep);
	    } elsif ($type eq '#' && $e ne '*') {
		my $num = BCget($code, \$sc, $ep);
		$closure->($v, "#$num") if $closure;
		if ($e eq 'C') {
		    $args = _args('count', \$pattern) x $num;
		    $args .= "\x00";
		    $closure->(undef, '<') if $closure;
		} else {
		    $args = '';
		}
	    } else {
		$sc++;
		$args = '' if $e eq 'O' || $e eq '*';
		$closure->($v, $op) if $closure;
	    }
	    $pattern = $args . $pattern;
	    next MATCH;
	} elsif ($e eq 'N') {
	    # any nonzero number
	    return undef if $v == 0;
	    $closure->($v, "N$v") if $closure;
	    $sc++;
	} elsif ($e eq '<') {
	    # left grammar element
	    my $count = BCget($code, \$sc, $ep);
	    my $num = BCget($code, \$sc, $ep);
	    if ($num == 0) {
		$closure->(undef, '?<') if $closure;
	    } elsif ($num == 1 || $num == 2) {
		$closure->(undef, ',<') if $closure;
	    } else {
		$closure->(undef, ',!<') if $closure;
	    }
	    if ($count && $closure) {
		$closure->(undef, $count == 65535 ? '*' : $count);
	    }
	    $pattern = "E\x00" . $pattern;
	    next MATCH;
	} elsif ($e eq '>') {
	    # right grammar element
	    my $num = BCget($code, \$sc, $ep);
	    if ($num == 0 || $num == 6) {
		my $count = BCget($code, \$sc, $ep);
		if ($count && $closure) {
		    $closure->(undef, $count);
		}
		$closure->($v, $num ? '!<' : '?<') if $closure;
		$pattern = "E\x00" . $pattern;
		next MATCH;
	    }
	    if ($num == 1 || $num == 2) {
		$closure->($v, ',<') if $closure;
		my $count = BCget($code, \$sc, $ep);
		if ($count && $closure) {
		    $closure->(undef, $count);
		}
		$pattern = "E\x00" . $pattern;
		next MATCH;
	    }
	    if ($num == 3 || $num == 7) {
		$closure->($v, ',!<') if $closure;
		my $count = BCget($code, \$sc, $ep);
		if ($count && $closure) {
		    $closure->(undef, $count);
		}
		$pattern = "E\x00" . $pattern;
		next MATCH;
	    }
	    if ($num == 4) {
		$num = BCget($code, \$sc, $ep);
		my $se = $sc + $num;
		$se <= $ep
		    or faint(SP_INVALID, '???', '_match: >');
		if ($closure) {
		    $closure->(undef, '=<');
		    while ($sc < $se) {
			$sc += _match('*', $code, $sc, $se, $closure);
		    }
		    $closure->(undef, '>');
		} else {
		    $sc = $se;
		}
		next MATCH;
	    }
	    if ($num == 15) {
		$closure->($v, '*') if $closure;
		next MATCH;
	    }
	    faint(SP_INVALID, $num, "_match: >");
	} elsif ($e eq '[') {
	    # XXX left optimise element
	    faint(SP_TODO, 'match on [');
	} elsif ($e eq ']') {
	    # XXX right optimise element
	    faint(SP_TODO, 'match on ]');
	} else {
	    faint(SP_BCMATCH, 'type', $e);
	}
    }
    $sc - $osc;
}

sub reg_list () {
    @reg_list;
}

sub reg_create {
    @_ == 2 || @_ == 3
	or croak "Usage: reg_create(REGISTER, OBJECT [, VALUE])";
    my ($rn, $object, @value) = @_;
    $rn = $reg_names{$rn} if exists $reg_names{$rn};
    if (exists $reg_list{$rn}) {
	@value = $reg_list{$rn}[1] if ! @value;
	my $rt = $reg_list{$rn}[3];
	my $dt = $reg_list{$rn}[0];
	return Language::INTERCAL::DoubleOhSeven->new($dt, $object, @value)
	    if $rt eq '%';
	return Language::INTERCAL::SharkFin->new($dt, $object, @value)
	    if $rt eq '^';
	return Language::INTERCAL::Whirlpool->new(@value)
	    if $rt eq '@';
    }
    $rn =~ /^\./
	and return Language::INTERCAL::Numbers::Spot->new(@value || 0);
    $rn =~ /^:/
	and return Language::INTERCAL::Numbers::Twospot->new(@value || 0);
    $rn =~ /^,/
	and return Language::INTERCAL::Arrays::Tail->new(@value || []);
    $rn =~ /^;/
	and return Language::INTERCAL::Arrays::Hybrid->new(@value || []);
    $rn =~ /^\@/
	and return Language::INTERCAL::Whirlpool->new();
    $rn =~ /^\_[12]$/
	and return Language::INTERCAL::CrawlingHorror->new();
    faint(SP_SPECIAL, $rn);
}

sub reg_codetype {
    @_ == 1 or croak "Usage: reg_codetype(REGISTER)";
    my ($rn) = @_;
    exists $reg_list{$rn} and return $reg_list{$rn}[0];
    if (exists $reg_names{$rn}) {
	$rn = $reg_names{$rn};
	return $reg_list{$rn}[0];
    }
    $rn =~ /^\./ and return 'spot';
    $rn =~ /^:/ and return 'twospot';
    $rn =~ /^,/ and return 'tail';
    $rn =~ /^;/ and return 'hybrid';
    $rn =~ /^\@/ and return 'whirlpool';
    faint(SP_SPECIAL, $rn);
}

sub reg_name {
    @_ == 1 or croak "Usage: reg_name(REGISTER)";
    my ($rn) = @_;
    exists $reg_list{$rn}
	and return $reg_list{$rn}[3] . $reg_list{$rn}[4];
    if (exists $reg_names{$rn}) {
	$rn = $reg_names{$rn};
	return $reg_list{$rn}[3] . $reg_list{$rn}[4];
    }
    $rn =~ /^([%^\@])(.*)$/ && exists $reg_list{$2} && $reg_list{$2}[3] eq $1
	and return $reg_list{$2}[3] . $reg_list{$2}[4];
    $rn =~ s/^([\.:,;\@^%])0*(\d+)$/$1$2/ and return $rn;
    undef;
}

sub reg_code {
    @_ == 1 or croak "Usage: reg_code(REGISTER)";
    my ($rn) = @_;
    exists $reg_list{$rn}
	and return ($reg_list{$rn}[2], BC($reg_list{$rn}[4]));
    if (exists $reg_names{$rn}) {
	$rn = $reg_names{$rn};
	return ($reg_list{$rn}[2], BC($reg_list{$rn}[4]));
    }
    $rn =~ /^([%^\@])(.*)$/ && exists $reg_list{$2} && $reg_list{$2}[3] eq $1
	and return ($reg_list{$2}[2], BC($reg_list{$2}[4]));
    $rn =~ /^\.(\d+)$/ and return (BC_SPO, BC($1));
    $rn =~ /^:(\d+)$/ and return (BC_TSP, BC($1));
    $rn =~ /^,(\d+)$/ and return (BC_TAI, BC($1));
    $rn =~ /^;(\d+)$/ and return (BC_HYB, BC($1));
    $rn =~ /^\@(\d+)$/ and return (BC_WHP, BC($1));
    $rn =~ /^\%(\d+)$/ and return (BC_DOS, BC($1));
    $rn =~ /^\^(\d+)$/ and return (BC_SHF, BC($1));
    undef;
}

sub reg_decode {
    @_ == 1 or croak "Usage: reg_name(REGISTER)";
    my ($rn) = @_;
    return $rn if $rn =~ /^[.,:;\@_]/;
    if ($rn =~ /^[%^]\d+$/) {
	return undef unless exists $reg_names{$rn};
	$rn = $reg_names{$rn};
    } elsif ($rn =~ s/^([%^])//) {
	return undef unless exists $reg_list{$rn};
	return undef if $1 ne $reg_list{$rn}[3];
    } else {
	return undef unless exists $reg_list{$rn};
    }
    $reg_list{$rn}[3] . $rn;
}

1;

__END__

=pod

=head1 TITLE

Language::INTERCAL::Bytecode - intermediate language

=head1 DESCRIPTION

The CLC-INTERCAL compiler works by producing bytecode from the
program source; this bytecode can be interpreted to execute the
program immediately; alternatively, a backend can produce something
else from the bytecode, for example C or Perl source code which can
then be compiled to your computer's native object format.

The compiler itself is just some more bytecode. Thus, to produce the
compiler you need a compiler compiler, and to produce that you need
a compiler compiler compiler; to produce the latter you would need
a compiler compiler compiler compiler, and so on to infinity. To
simplify the programmer's life (eh?), the compiler compiler is able
to compile itself, and is therefore identical to the compiler compiler
compiler (etcetera).

The programmer can start the process because a pre-compiled compiler
compiler, in the form of bytecode, is provided with the CLC-INTERCAL
distribution; this compiler compiler then is able to compile all
other compilers, as well as to rebuild itself if need be.

See the online manual or the HTML documentation included with the
distribution for more information about this.

=head1 SEE ALSO

A qualified psychiatrist

=head1 AUTHOR

Claudio Calvelli - intercal (whirlpool) sdf.lonestar.org
(Please include the word INTERLEAVING in the subject when emailing that
address, or the email may be ignored)