Mercurial > repo
comparison interps/7/7.pl @ 11884:d0cc5812f4bb draft
<ais523> ` mv 7.pl interps/7
author | HackEso <hackeso@esolangs.org> |
---|---|
date | Sat, 20 Jul 2019 01:30:35 +0000 |
parents | 7.pl@766bdea3dbe9 |
children |
comparison
equal
deleted
inserted
replaced
11883:766bdea3dbe9 | 11884:d0cc5812f4bb |
---|---|
1 #!/usr/bin/env perl | |
2 use utf8; | |
3 q{ | |
4 | |
5 =pod | |
6 | |
7 =encoding UTF8 | |
8 | |
9 =head1 NAME | |
10 | |
11 7 - an esoteric programming language | |
12 | |
13 =head1 SYNOPSIS | |
14 | |
15 ./7 program.7 # run | |
16 ./7 -d program.7 # debug | |
17 | |
18 =head1 DESCRIPTION | |
19 | |
20 7 is an esoteric programming language, inspired by Underload (although | |
21 it has diverged somewhat), but aiming for small source code | |
22 representation. | |
23 | |
24 The language operates using two main pieces of internal state: the | |
25 I<frame>, which is a vaguely stack-like construct that holds data; and | |
26 the I<command list>. The frame consists of a sequence of commands | |
27 separated by I<bars>; the command list is also a sequence of commands. | |
28 | |
29 The program operates by repeatedly removing the first command from the | |
30 command list, then performing the action specified by that command on | |
31 the frame. Once the command list is empty, the portion of the frame | |
32 to the right of the last bar is copied to the start of the command | |
33 list; this operation is called "cycling". (Cycling exits the program | |
34 if the frame is completely empty, and is an error if the frame is | |
35 nonempty but has no bars; also, as a special case, it deletes the | |
36 rightmost bar if nothing is to its right, in order to avoid a trivial | |
37 infinite loop.) These are the only ways in which the command list can | |
38 be modified. | |
39 | |
40 The frame is a "working space" for the program, and so can be modified | |
41 in rather more interesting ways than the command list. However, | |
42 typically only the section to the right of the rightmost bar is | |
43 modified, so operations tend to be fairly local and self-contained. | |
44 | |
45 =head2 Frame operations | |
46 | |
47 There is a range of basic operations that can be performed on the | |
48 frame: | |
49 | |
50 =over 4 | |
51 | |
52 =item * | |
53 | |
54 You can I<append> something to the frame, either a bar or a command. | |
55 This adds the bar or command in question to the right of the frame. | |
56 | |
57 =item * | |
58 | |
59 You can I<disbar> the frame, removing the rightmost bar ("closing the | |
60 gap" where the bar was in the process). | |
61 | |
62 =item * | |
63 | |
64 You can I<copy> a section of the frame to the end. A section is the | |
65 region of the frame between two bars, or from a bar to an end of the | |
66 frame. When copying a section, a bar is placed to separate the copy | |
67 from the rest of the frame. | |
68 | |
69 =item * | |
70 | |
71 You can I<move> a section of the frame to the end. This is like | |
72 copying, except that original section (together with one of the bars | |
73 between it and the adjacent sections) is removed. | |
74 | |
75 =item * | |
76 | |
77 You can I<delete> the end of the frame, again from the rightmost bar | |
78 onwards. This will delete the bar in the process. | |
79 | |
80 =item * | |
81 | |
82 You can I<output> the end of the frame to the user. This outputs the | |
83 area strictly beyond the rightmost bar. (Bars themselves are never | |
84 output, because they have no representation as a sequence of bits.) | |
85 | |
86 =item * | |
87 | |
88 You can I<pacify> the end of the frame, beyond the rightmost bar; this | |
89 is the most complex operation. To understand this, the first thing to | |
90 note is that the commands come in pairs; each I<active> command is | |
91 paired with a I<passive> command. Additionally, some commands have | |
92 names, and some do not have names, We can then describe the process as | |
93 follows: | |
94 | |
95 =over 4 | |
96 | |
97 =item 1. | |
98 | |
99 The "passive regions" of the section are identified. These are | |
100 regions in which all the commands are named, and for which C<7> and | |
101 C<6> commands are correctly matched, as though they were parentheses; | |
102 additionally, the substring C<76> cannot appear in a passive region. | |
103 Passive regions are made as long as possible while respecting these | |
104 rules. | |
105 | |
106 =item 2. | |
107 | |
108 Each passive region is enclosed between a C<7> and C<6> command. | |
109 | |
110 =item 3. | |
111 | |
112 Each command outside any passive region (which must by definition be | |
113 active, as all anonymous commands are active, as are C<6> and C<7>) is | |
114 converted to the corresponding passive command. | |
115 | |
116 =back | |
117 | |
118 =back | |
119 | |
120 =head2 Commands | |
121 | |
122 Now that we've defined the basic frame operations, it's possible to | |
123 define the commands. | |
124 | |
125 Eight of the twelve commands have numerical names, used in the source | |
126 code representation: the six passive commands, and two active | |
127 commands. The other four active commands have no numerical name (and | |
128 cannot appear in the source), and are given names in English to | |
129 identify them. | |
130 | |
131 =over 4 | |
132 | |
133 =item C<6> (active), C<0> (passive): Pacify then disbar | |
134 | |
135 The C<6> command pacifies the rightmost section of the frame, and then | |
136 the frame is disbarred. The passive equivalent C<0> appends a C<6> | |
137 command to the frame. | |
138 | |
139 =item C<7> (active), C<1> (passive): Append a bar | |
140 | |
141 The C<7> command appends a bar to the frame. The passive equivalent | |
142 C<1> appends a C<7> command to the frame. | |
143 | |
144 =item "copy" (active), C<2> (passive): Copy the rightmost section | |
145 | |
146 The "copy" command copies the rightmost section of the frame to the | |
147 end of the frame. The passive equivalent C<2> appends a "copy" | |
148 command to the frame. | |
149 | |
150 =item "recycle" (active) C<5> (passive): Cycle early, then delete | |
151 | |
152 The "recycle" command cycles a copy of the rightmost section of the | |
153 frame onto the command list, even if the command list isn't empty. | |
154 After doing that, it deletes the rightmost section of the frame | |
155 (including the bar that separates it from the rest of the frame). The | |
156 passive equivalent C<5> appends a "recycle" command to the frame. | |
157 | |
158 =item "grab" (active), C<4> (passive): Swap, with extra bars | |
159 | |
160 The "grab" command appends an extra bar to the frame then moves the | |
161 new third-rightmost section (the original second-rightmost section) of | |
162 the frame to the end of the frame. The passive equivalent C<4> | |
163 appends a "grab" command to the frame. | |
164 | |
165 =item "eject" (active), C<3> (passive): Output, then delete twice | |
166 | |
167 The "eject" command outputs the end of the frame (beyond the rightmost | |
168 bar) to the user, then deletes the end of the frame from the | |
169 second-rightmost bar onwards (i.e. deletes the end of the frame | |
170 twice). Exception: if the section of the frame that would be output | |
171 contains any unnamed commands, it is pacified first (pacification | |
172 leaves a region with only named commands), and a C<7> is prepended to | |
173 the output. The passive equivalent C<3> appends an "eject" command to | |
174 the frame. | |
175 | |
176 This command is also capable of producing input in some circumstances | |
177 (via outputting special codes which mean "read input"). Although the | |
178 input can be in more than one form, it's always provided to the | |
179 program as a nonnegative integer, and affects the program like this: | |
180 after the deletions from the frame, the new rightmost section of the | |
181 frame (not including the bar to its left) has its content repeated a | |
182 number of times equal to the input (e.g. 0 would remove everything to | |
183 the right of the rightmost bar, 1 would do nothing, 2 would add an | |
184 extra copy of the text to the rightmost bar, and so on.) When reading | |
185 in integers from the user, any leading whitespace and commas are | |
186 skipped, then a decimal integer is read, with any stray characters | |
187 "after" the integer being left for future input commands; if there's | |
188 no integer available, 0 will be returned (this is also a valid | |
189 integer, so you may want to alternate between character and integer | |
190 reading to detect invalid input and EOF). When reading in characters, | |
191 they're converted to a character code, and 1 is added to them (EOF | |
192 counts as character code -1, and thus will produce 0 copies). | |
193 | |
194 The very first command output via this command does not produce | |
195 output, but rather specifies the format of future output: | |
196 | |
197 =over 4 | |
198 | |
199 =item Numerical (C<0>) | |
200 | |
201 In each command list output, a value is calculated like this: the | |
202 total number of C<7> and C<1> commands, minus the total number of C<6> | |
203 and C<0> commands. Exception: if the output was automatically | |
204 pacified (and thus had a C<7> prepended), that C<7> does not count in | |
205 the total count of C<7> commands. This integer is converted to | |
206 decimal and output. | |
207 | |
208 Formatting can be controlled via outputting commands other than | |
209 strings of C<0>, C<1>, C<6>, C<7>. The output will be preceded by a | |
210 space, unless it's the first output on the line. A C<2> at the end of | |
211 the string will output a trailing newline. A C<3> at the end of the | |
212 string will append a comma to it. C<4> and C<5> can be placed at | |
213 either end of the string, and will turn into opening and closing | |
214 square brackets respectively. | |
215 | |
216 Input can be obtained via attempting to output C<23> (numerical input) | |
217 or C<24> (character input). Outputting C<25> allows selection of a | |
218 new output format (basically by "resetting" the format, meaning that | |
219 the next command to be output will select a format again). | |
220 | |
221 =item Byte-per-command (C<1>) | |
222 | |
223 Each command is output as an individual byte (thus the only bytes | |
224 output will be the digits C<0> to C<7> inclusive, as those are the | |
225 only bytes that name commands). This format does not support input. | |
226 | |
227 =item Character code as number (C<2>) | |
228 | |
229 This is similar to the numerical output C<0> explained above, except | |
230 that 1 is subracted from the number, and it's then used as a character | |
231 code (rather than being output in decimal). By default, this is in | |
232 binary (i.e. octets mod 256), but this can be changed by attempting to | |
233 output a negative number; C<-1> selects binary, C<-2> selects UTF-8, | |
234 C<-3> inputs a number, C<-4> inputs a character, and C<-5> allows | |
235 selection of a new output format. Commands other than C<0>, C<1>, | |
236 C<6>, and C<7> aren't recognised here. | |
237 | |
238 =item Byte-per-three-commands (C<3>) | |
239 | |
240 Any C<6> and C<7> commands in the output are ignored. The others are | |
241 taken as triples and interpreted as base-6 numbers (as usual, in a | |
242 big-endian way), giving a value from 0 to 215 (decimal), 0 to 555 | |
243 (base 6). This is is encoded as follows: if the value is 127 | |
244 (decimal) or lower (that's 331 in base 6), it's output as an octet | |
245 directly; if the value is in the range 128 to 191 (decimal) inclusive | |
246 (that's 332 to 515 in base 6), then 64 (decimal), which is 144 (base | |
247 6), might or might not be added to it. Values above 192 (decimal) or | |
248 520 (base 6) control this addition; outputting 522 (the default) | |
249 requests the addition to happen in a way that makes the output look as | |
250 much like UTF-8 as possible; 520 specifies never adding (until this | |
251 condition is changed); 521 specifies always adding. You can also | |
252 request input, with 523 requesting an integer and 524 requesting a | |
253 character; 525 allows selection of a new output format. Other values | |
254 are currently reserved for future expansion and should not be used. | |
255 | |
256 =item Truncated octal (C<4>) | |
257 | |
258 The output is encoded using the octal source encoding. However, if it | |
259 does not come to a whole number of bytes, any spare bits are discarded | |
260 (as opposed to padding them out, as would for example happen with the | |
261 source format C<7> when the source were in binary). This format does | |
262 not support input. | |
263 | |
264 =item US-TTY (C<5>) | |
265 | |
266 Any C<6> and C<7> commands in the output are ignored. The others are | |
267 taken in pairs and interpreted as base-6 numbers (as usual, in a | |
268 big-endian way), giving a value from 0 to 35 (decimal), 0 to 55 (base | |
269 6). These are then in turn interpreted as being in an expanded | |
270 version of the US-TTY character set. This is a 5-bit character set | |
271 (thus using values from 0 to 31 (decimal), 0 to 51 (base 6)); the | |
272 remaining four codes are reserved for giving instructions to the 7 | |
273 interpreter itself. It can encode more than just 31 characters via | |
274 the use of shift states; it has five different states, notated here as | |
275 Ł, ł, Ŧ, Ø, ø, with Ł being the default. (Ł and Ŧ are identical, | |
276 except that immediately after outputting any character in state Ŧ, the | |
277 shift state becomes ł; other shift states are stable until changed | |
278 explicitly. State ø is an expansion based on CLC-INTERCAL's version of | |
279 Baudot, a very similar character set to US-TTY; US-TTY reserves the | |
280 state for use by expansions but does not specify what form those might | |
281 take. Note that the tables below are not I<quite> the same as | |
282 CLC-INTERCAL's, although they are very close; however, the only | |
283 differences are in the meanings of some shift codes, and in the fact | |
284 that C<#> appears in two places in order to improve compatibility with | |
285 existing US-TTY and CLC-INTERCAL-encoded text.) The character (or | |
286 control sequence) encoded depends on the shift state and the sequence | |
287 seen. | |
288 | |
289 Here's how the character set looks (shift state down the left side, | |
290 character encoding (first then second command) at the top, Ŧ encodes | |
291 the same characters as Ł): | |
292 | |
293 00000011111122222233333344444455 | |
294 01234501234501234501234501234501 | |
295 | |
296 Ł E A SIU DRJNFCKTZLWHYPQOBGØMXVł | |
297 ł e a siu drjnfcktzlwhypqobgØmxvŦ | |
298 Ø 3 - 87 $4',!:(5")2#6019?&ø./;Ł | |
299 ø ¢ + \#= *{~∀|^<[}>] @ £¬ Ø%_ Ł | |
300 | |
301 C<00> encodes NUL, C<02> encodes newline, C<04> encodes space, C<12> | |
302 encodes carriage return, C<05> in state Ø encodes BEL, C<32> in state | |
303 ø encodes backspace, C<41> in state ø encodes DEL, other blank squares | |
304 are currently undefined and should not be used. | |
305 | |
306 As for the remaining codes, C<53> requests input of an integer, C<54> | |
307 requests input of a character, and C<55> allows selection of a new | |
308 output format. C<52> is currently undefined and should not be used. | |
309 | |
310 =item Source (C<7>) | |
311 | |
312 The output is encoded the same way that the source was (and, if | |
313 necessary, will be padded with C<1> bits; note that this padding only | |
314 occurs if the program ends "naturally", not if it ends abruptly due to | |
315 ejecting beyond the bottom of the stack). | |
316 | |
317 =item Other values | |
318 | |
319 Other values are currently undefined and reserved for future extension. | |
320 | |
321 =back | |
322 | |
323 If an attempt is made to delete more sections than exist on the frame | |
324 via use of this command, the program exits immediately (although any | |
325 output will still be produced). (A program also exits if the frame | |
326 and command list are both empty.) | |
327 | |
328 =back | |
329 | |
330 =head2 Source Encoding | |
331 | |
332 A 7 source file is written via the use of the eight named commands; | |
333 the four anonymous commands cannot appear in the original | |
334 program. These form the initial command list; the initial frame | |
335 consists of two bars (with no commands before, after, or between | |
336 them). Additionally, it is not legal for a program to end with a C<7> | |
337 command. A file can either be written in ASCII, using one byte per | |
338 command, or in octal, using three bits per command. In the latter | |
339 case, when storing the program in a file, the commands are written in | |
340 such a way that the bit sequence of the commands and of the file are | |
341 the same when viewed in a big-endian way. If there's a need to pad | |
342 the end of the program, e.g. to make it round up to a multiple of 8 | |
343 bits, the program can be padded by appending any number of C<1> bits | |
344 (the fact that programs may not end with a C<7> command makes this | |
345 unambiguous). Similarly, any number of consecutive C<1> bits at the | |
346 end of the file can be deleted (they're implied into the file when a | |
347 partial command is seen). | |
348 | |
349 =cut | |
350 | |
351 } or exit; | |
352 | |
353 use warnings; | |
354 use strict; | |
355 use feature qw/state/; | |
356 use Encode qw/encode/; | |
357 use 5.012; | |
358 | |
359 my $debug; | |
360 | |
361 while (@ARGV && $ARGV[0] =~ /^-/) { | |
362 my $option = shift @ARGV; | |
363 $option eq '-d' ? $debug = 1 : | |
364 $option eq '-' ? last : | |
365 die "Unknown option $option"; | |
366 } | |
367 | |
368 my %usttytable = ( | |
369 # 0 0 0000 11 11 1122222 233 333344 444455 | |
370 # 0 1 2345 01 23 4501234 501 234501 234501 | |
371 'Ł' => "\0"."E\nA S" ."IU\rD". "RJNFCKT".'ZLW'. "HYPQOB". "GØMXVł", | |
372 'ł' => "\0"."e\na s" ."iu\rd". "rjnfckt".'zlw'. "hypqob". "gØmxvŦ", | |
373 'Ø' => "\0"."3\n- \a"."87\r\$"."4',!:(5".'")2'. "#6019?". "&ø./;Ł", | |
374 'ø' => "\0"."¢\n+ \\"."#=\r*". "{~∀|^<[".'}>]'."\b@—£¬\x7F"."—Ø%_—Ł"); | |
375 $usttytable{'Ŧ'} = $usttytable{'Ł'}; | |
376 | |
377 $| = 1; undef $/; | |
378 binmode STDIN; | |
379 # Note: we leave STDOUT in text mode until the user selects an output format | |
380 # that clearly requires binary. | |
381 my $program = <>; | |
382 my $program_was_binary = 0; | |
383 my $bits_per_byte = length unpack "B*", "\0"; | |
384 | |
385 if ($program =~ y/01234567 \n\r\t\f//c) { | |
386 # If the program contains anything other than digits, spaces, and | |
387 # \n,\r,\t,\f, then unpack it from binary. | |
388 my $binary = unpack "B*", $program; | |
389 $binary =~ s/1+$//; | |
390 $binary .= 1 while (length $binary) % 3; | |
391 $program = ""; | |
392 $program .= oct "0b$&" while $binary =~ s/^...//; | |
393 $program_was_binary = 1; | |
394 } else { | |
395 $program =~ s/\s//g; | |
396 } | |
397 | |
398 # Technically we should start with the program on the command list, but | |
399 # it's easier to start it on the frame and do an explicit-cycle in order | |
400 # to move it into place. | |
401 my $frame = "|||$program"; | |
402 my $delayed_frame_copy = 1; | |
403 my @commandlist = (); | |
404 | |
405 my $outformat = undef; | |
406 | |
407 sub pacify { | |
408 local $_ = shift; | |
409 s/((7(?2)+6|[0-5])+)/"7${1}6"=~y:67:89:r/ge; | |
410 y/67cegr89/0-7/; | |
411 $_; | |
412 } | |
413 | |
414 my $pending_octal = ""; | |
415 my $getc_buffer = undef; | |
416 | |
417 sub getin { | |
418 if ($getc_buffer) { | |
419 my $c = $getc_buffer; | |
420 undef $getc_buffer; | |
421 $debug and printf STDERR "\tRereading character code $c\n"; | |
422 return $c; | |
423 } else { | |
424 my $c = getc; | |
425 $c = defined $c ? (ord $c) + 1 : 0; | |
426 return $c; | |
427 } | |
428 } | |
429 | |
430 sub input_number { | |
431 my $c; | |
432 my $n = 0; | |
433 while (($c = getin) && (chr $c - 1) =~ /\s/) {} | |
434 if (!$c) { | |
435 $debug and print STDERR "\tRead EOF, expecting number\n"; | |
436 $delayed_frame_copy = 0; | |
437 return; | |
438 } | |
439 while ((chr $c - 1) =~ /(\d)/) { | |
440 $n *= 10; | |
441 $n += $1; | |
442 $c = getin; | |
443 !$c and last; | |
444 } | |
445 $getc_buffer = $c if $c; | |
446 $debug and print STDERR "\tRead integer value $n\n"; | |
447 $delayed_frame_copy = $n; | |
448 } | |
449 | |
450 sub input_char { | |
451 my $c = getin; | |
452 $debug and print STDERR (!$c ? "\tRead EOF, expecting character\n" : | |
453 "Read character code ".($c - 1)); | |
454 $delayed_frame_copy = $c; | |
455 } | |
456 | |
457 sub oprint { | |
458 local $_ = shift; | |
459 my $autopacify = /\D/; | |
460 state $after_newline = 1; | |
461 $autopacify and $_ = "7" . pacify $_; | |
462 length $_ or return; | |
463 | |
464 unless (defined $outformat) { | |
465 s/(.)//; | |
466 $outformat = $1; | |
467 $outformat == 7 and !$program_was_binary and $outformat = 1; | |
468 $debug and print STDERR "\tSelected output format $outformat\n"; | |
469 } | |
470 [ | |
471 sub { # 0: numerical | |
472 $_ eq '23' and input_number, return; | |
473 $_ eq '24' and input_char, return; | |
474 $_ eq '25' and (undef $outformat), return; | |
475 my $result = y/71//; | |
476 $result -= y/60//; | |
477 $result-- if $autopacify; | |
478 print " " unless $after_newline; | |
479 $after_newline = !!/2$/; | |
480 s/^[45]+// and print ($& =~ y/45/[]/r); | |
481 print $result; | |
482 s/[2345]+$// and print ($& =~ y/2345/\n,[]/r); | |
483 }, | |
484 sub { # 1: byte per command | |
485 print; | |
486 $after_newline = 0; | |
487 }, | |
488 sub { # 2: read numbers, write character codes | |
489 binmode STDOUT; | |
490 state $utf8mode = 0; | |
491 my $result = y/71//; | |
492 $result -= y/60//; | |
493 $result-- if $autopacify; | |
494 if ($result > 0) { | |
495 print $utf8mode ? encode("UTF-8", chr($result - 1)) : | |
496 encode("ISO-8859-1", chr ($result - 1)); | |
497 } else { | |
498 $result == -1 and ($utf8mode = 0), return; | |
499 $result == -2 and ($utf8mode = 1), return; | |
500 $result == -3 and input_number, return; | |
501 $result == -4 and input_char, return; | |
502 $result == -5 and (undef $outformat), return; | |
503 $result == 0 and die "Attempted to output EOF"; | |
504 die "Attempted to output negative character code $result"; | |
505 } | |
506 }, | |
507 sub { # 3: byte per three commands | |
508 state $partial = ""; | |
509 state $add64 = 194; | |
510 state $utf8bytes = 0; | |
511 binmode STDOUT; | |
512 s/[67]//g; | |
513 $partial .= $_; | |
514 while ($partial =~ s/^(.)(.)(.)//) { | |
515 my $v = $1*36 + $2 * 6 + $3; | |
516 if ($v < 128) { | |
517 print chr $v; | |
518 $utf8bytes = 0; | |
519 } elsif ($v < 192) { | |
520 if ($add64 == 192 || ($add64 == 194 && $utf8bytes)) { | |
521 print chr $v; | |
522 $utf8bytes and $utf8bytes--; | |
523 } elsif ($add64 == 193 || ($add64 == 194 && !$utf8bytes)) { | |
524 $v += 64; | |
525 print chr $v; | |
526 $utf8bytes = 1; # always at least 1 trailing byte | |
527 $v >= 0xE0 and ++$utf8bytes; | |
528 $v >= 0xF0 and ++$utf8bytes; | |
529 $v >= 0xF8 and ++$utf8bytes; | |
530 $v >= 0xFC and ++$utf8bytes; | |
531 } | |
532 } elsif ($v < 195) { | |
533 $add64 = $v; | |
534 } elsif ($v == 195) { | |
535 input_number; | |
536 } elsif ($v == 196) { | |
537 input_char; | |
538 } elsif ($v == 197) { | |
539 undef $outformat; | |
540 } else { | |
541 die "Unknown byte-per-three-commands value $v"; | |
542 } | |
543 } | |
544 }, | |
545 sub { # 4: octal | |
546 state $partial = ""; | |
547 binmode STDOUT; | |
548 $partial .= sprintf "%03b", $& while s/%.//; | |
549 print chr oct "0b$&" while $partial =~ s/^.{$bits_per_byte}//; | |
550 }, | |
551 sub { # 5: US-TTY | |
552 s/[67]//g; | |
553 state $partial = ""; | |
554 state $shiftstate = 'Ł'; | |
555 $partial .= $_; | |
556 while ($partial =~ s/^(.)(.)//) { | |
557 my $v = $1 * 6 + $2; | |
558 $v == 33 and input_number, return; | |
559 $v == 34 and input_char, return; | |
560 $v == 35 and (undef $outformat), return; | |
561 my $c = substr $usttytable{$shiftstate}, $v, 1; | |
562 if ($usttytable{$c}) { | |
563 $shiftstate = $c; | |
564 } elsif ($c eq '—') { # represents an invalid character | |
565 ...; | |
566 } else { | |
567 # TODO: Adapt to locale encoding? | |
568 print encode("UTF-8",$c); | |
569 $shiftstate eq 'Ŧ' and $shiftstate = 'ł'; | |
570 } | |
571 } | |
572 }, | |
573 sub { # 6: Unimplemented | |
574 die "Output format 6 is currently undefined"; | |
575 }, | |
576 sub { # 7: Like source; if we get here, it's octal encoding | |
577 binmode STDOUT; | |
578 $pending_octal .= sprintf "%03b", $& while s/^.//; | |
579 print chr oct "0b$&" while $pending_octal =~ s/^.{$bits_per_byte}//; | |
580 }, | |
581 ]->[$outformat]->(); | |
582 $debug and print STDERR "\n"; | |
583 } | |
584 | |
585 my $explicit = 1; | |
586 CYCLE: while ($frame ne '') { | |
587 $frame =~ /[|](\w*)$/ | |
588 or die "when cycling, frame is nonempty but has no bar"; | |
589 my $commandlist = $1; | |
590 $commandlist eq '' and $explicit = 1; | |
591 $explicit and $frame =~ s/[|](\w*)$//; | |
592 $explicit = 0; | |
593 unshift @commandlist, split /([0-57]*)/, $commandlist; | |
594 while (@commandlist) { | |
595 if ($delayed_frame_copy != 1) { | |
596 $frame =~ s/[|]\K(\w*)$/$1 x $delayed_frame_copy/e | |
597 || die "Attempt to read input with no bar in the frame"; | |
598 $delayed_frame_copy = 1; | |
599 } | |
600 $debug and print STDERR "\t$frame ", @commandlist, "\n"; | |
601 my $command = shift @commandlist; | |
602 length $command or next; | |
603 for ($command) { | |
604 /[0-57]/ and $frame .= $command =~ y/0-57/67cegr|/r; | |
605 /6/ and $frame =~ s/[|](\w*)$/pacify $1/e | |
606 || die "6 command run with no bar in the frame"; | |
607 /c/ and $frame =~ s/[|](\w*)$/|$1|$1/ | |
608 || die "'copy' command run with no bar in the frame"; | |
609 /e/ and $frame =~ s/[|]\w*[|](\w*)$/(oprint $1), ""/e | |
610 || $frame =~ s/(\w*)$/ (oprint $1), exit/e; | |
611 /g/ and $frame =~ s/(\w*)[|](\w*)$/$2||$1/ | |
612 || die "'grab' command run with no bar in the frame"; | |
613 /r/ and $explicit = 1 and next CYCLE; | |
614 } | |
615 } | |
616 } | |
617 | |
618 $pending_octal =~ s/1+$//; | |
619 if (length $pending_octal) { | |
620 $pending_octal .= 1 until length $pending_octal == $bits_per_byte; | |
621 print chr oct "0b$pending_octal"; | |
622 } |