comparison interps/clc-intercal/CLC-INTERCAL-Base-1.-94.-2/INTERCAL/Generate.pm @ 996:859f9b4339e6

<Gregor> tar xf egobot.tar.xz
author HackBot
date Sun, 09 Dec 2012 19:30:08 +0000
parents
children
comparison
equal deleted inserted replaced
995:6883f5911eb7 996:859f9b4339e6
1 package Language::INTERCAL::Generate;
2
3 # Creates automatically generated files (ByteCode, Splats) from descriptions
4
5 # This file is part of CLC-INTERCAL
6
7 # Copyright (c) 2007-2008 Claudio Calvelli, all rights reserved.
8
9 # CLC-INTERCAL is copyrighted software. However, permission to use, modify,
10 # and distribute it is granted provided that the conditions set out in the
11 # licence agreement are met. See files README and COPYING in the distribution.
12
13 # Usage: perl -MLanguage::INTERCAL::Generate -e 'Generate()' [INPUT [OUTPUT]]
14
15 # If a file contains the string @@SKIPME@@ this module just copies it unchanged,
16 # this allows it to process itself.
17
18 # INPUT (or standard input) can contain the following commands to generate
19 # data-dependent lines:
20
21 # @@DATA filename@@
22 # loads filename as a data SPEC (see below)
23
24 # (prefix)@@FILL GROUP PRE FIELD POST SIZE SEP@@(suffix)
25 # fills a line with as many elements from GROUP as possible, then repeats
26 # with another line until all elements of GROUP have been listed; each
27 # element will be taken from the given FIELD and the line lenght will
28 # not exceed SIZE. (prefix) and (suffix) are added at the start and
29 # the end of each line generated; PRE and POST are added before and
30 # after each element; SEP is added between elements in the same line.
31 # The data is sorted by the given FIELD. For example:
32 # [@@FILL SPLATS 'SP_' NAME '' 76 '/'@@]
33 # may generate:
34 # [SP_BCMATCH/SP_CHARSET/SP_CIRCULAR/SP_COMMENT/SP_CREATION/SP_DIGITS]
35 # [SP_INVALID/SP_IOTYPE/SP_JUNK/SP_NONUMBER/SP_NOSUCHCHAR/SP_ROMAN/SP_SPOTS]
36 # [SP_THREESPOT/SP_TODO]
37
38 # (prefix)@@ALL GROUP FIELD@@(suffix)
39 # generates as many lines as there are elements of GROUP; each line is
40 # generated by replacing any @@FIELD@@ in (prefix) and (suffix) with
41 # the corresponding data, and replacing the @@ALL...@@ with the
42 # value of FIELD. The data is sorted by the FIELD. For example:
43 # [@@NUMBER@@ SP_@@ALL SPLATS NAME@@]
44 # may generate:
45 # [578 SP_BCMATCH]
46 # ...
47 # [1 SP_TODO]
48 # to insert a literal whirlpool where this can cause confusion use
49 # @@WHIRLPOOL@@. Note that if your GROUP has a field named WHIRLPOOL
50 # this will not be accessible.
51
52 # @@MULTI GROUP FIELD@@
53 # (content)
54 # @@MULTI@@
55 # is a multiline version of @@ALL...@@: produces a block for each
56 # element of group, sorted by FIELD, in which each line of (content)
57 # is subject to the same substitution rules as @@ALL@@. Does not
58 # automatically insert the FIELD in the output, use @@FIELD@@ for
59 # that. A special syntax @@FIELD SIZE@@ allows to "fold" FIELD:
60 # for a multiline field containing blank lines, each block is
61 # folded separately.
62
63 # SPEC contains data specification in the form:
64 # @GROUP NAME FIELD...
65 # DATA
66 # @END [NAME]
67
68 # Each FIELD definition has the form NAME=TYPE where TYPE is m (multiline),
69 # 'd' (digits), 's' (string), 'w' (word) or '@TYPE' (array - cannot be
70 # used for multiline).
71
72 # Each line of DATA is one record followed by the contents of a multiline
73 # field, if present; alternatively the special line @SOURCE GROUP will
74 # include the whole of another group. The contents of the multiline field
75 # must be more indented than the record they refer to and than the record
76 # that follows, for example:
77 # DATA
78 # multiline 1
79 # multiline 2
80 # muitiline 3
81 # NEXT RECORD
82 # if a line in a multiline field starts with # it will be interpreted as
83 # a comment and ignored; if it starts with @ it will be interpreted as
84 # an escape (e.g. @END). These can be escaped with a backslash, which
85 # will be removed from the beginning of line. Note that backslashes
86 # anywhere else in the multiline fields are not touched.
87 # All lines in a multiline field will be joined together, separated by
88 # a single space (the above sequence produces "multiline 1 multiline 2
89 # multiline 3"), except a blank line which produces a double newline
90 # in the field.
91
92 use strict;
93
94 use Carp;
95 use File::Spec;
96
97 use vars qw($VERSION $PERVERSION);
98 ($VERSION) = ($PERVERSION = "CLC-INTERCAL/Base INTERCAL/Generate.pm 1.-94.-2") =~ /\s(\S+)$/;
99
100 my $data_suffix = '.Data';
101 my %groups;
102
103 sub Generate {
104 @ARGV >= 0 && @ARGV <= 2 or croak "Usage: Generate [INPUT [OUTPUT]]";
105 my ($input, $output) = @ARGV;
106
107 %groups = ();
108
109 # translate INPUT into OUTPUT
110 @ARGV = defined $input ? ($input) : ();
111 if (defined $output) {
112 open(STDOUT, '>', $output)
113 or die "$output: $!";
114 }
115 my $skipme = 0;
116 while (<>) {
117 my $orig = $_;
118 if (/\@\@SKIPME\@\@/) {
119 $skipme = 1;
120 }
121 if ($skipme) {
122 print;
123 next;
124 }
125 if (/^\s*\@\@DATA\s+(.*?)\@\@$/) {
126 load_spec($1 . $data_suffix);
127 next;
128 }
129 if (s/^(.*)\@\@FILL\s*//) {
130 my $line_pre = $1;
131 my $group = get_field($orig, \$_, 'w');
132 exists $groups{$group}
133 or die "Unknown group $group";
134 my $gp = $groups{$group};
135 my $item_pre = get_field($orig, \$_, 's');
136 my $item_name = get_field($orig, \$_, 'w');
137 exists $gp->{fpos}{$item_name}
138 or die "Unknown field $item_name in group $group";
139 my $item_pos = $gp->{fpos}{$item_name};
140 my $item_post = get_field($orig, \$_, 's');
141 my $line_size = get_field($orig, \$_, 'd');
142 my $item_sep = get_field($orig, \$_, 's');
143 s/^\@\@// or die "Missing \@\@ after \@\@FILL";
144 my $line_post = $_;
145 my @il = map { $_->[$item_pos] } @{$gp->{data}};
146 @il = sort_items(@il);
147 my $line = $line_pre;
148 for my $item (@il) {
149 my $nl = $line;
150 $nl .= $item_sep if $nl ne $line_pre;
151 $nl .= $item_pre . $item . $item_post;
152 if (sizeof($nl . $line_post) > $line_size) {
153 print $line, $line_post if $line ne $line_pre;
154 $nl = $line_pre . $item_pre . $item . $item_post;
155 }
156 $line = $nl;
157 }
158 print $line, $line_post if $line ne $line_pre;
159 next;
160 }
161 if (s/^(.*)\@\@ALL\s*//) {
162 my $line_pre = $1;
163 my $group = get_field($orig, \$_, 'w');
164 exists $groups{$group}
165 or die "Unknown group $group";
166 my $gp = $groups{$group};
167 my $item_name = get_field($orig, \$_, 'w');
168 exists $gp->{fpos}{$item_name}
169 or die "Unknown field $item_name in group $group";
170 my $item_pos = $gp->{fpos}{$item_name};
171 s/^\@\@// or die "Missing \@\@ after \@\@ALL";
172 my $line_post = $_;
173 my @il = map { $_->[$item_pos] } @{$gp->{data}};
174 @il = sort_items(@il);
175 my $p = $gp->{fpos};
176 check_escapes($gp, $p, $line_pre);
177 check_escapes($gp, $p, $line_post);
178 for my $il (@il) {
179 for my $item (@{$gp->{data}}) {
180 next if $item->[$item_pos] ne $il;
181 my @line = ();
182 for my $ol ($line_pre, $line_post) {
183 my $line = $ol;
184 my $trans = '';
185 while ($line =~ s/^(.*?)\@\@//) {
186 $trans .= $1;
187 $line =~ s/^(.*?)\@\@//
188 or die "Missing \@\@ closing $line";
189 my $gn = $1;
190 my $quote = $gn =~ s/^(['"]?)(\w+)\1$/$2/ ? $1 : '';
191 my $f;
192 if ($gn eq 'WHIRLPOOL') {
193 $f = '@';
194 } elsif ($gn =~ /^(.*?):(\w+)$/) {
195 $f = $item->[$p->{$1}];
196 my @a = @{$item->[$p->{$2}]};
197 $f =~ s/%/shift @a || '???'/ge;
198 } else {
199 $f = $item->[$p->{$gn}];
200 }
201 $f =~ s/([\\$quote])/\\$1/g if $quote ne '';
202 $trans .= $f;
203 }
204 push @line, $trans . $line;
205 }
206 print $line[0], $il, $line[1];
207 }
208 }
209 next;
210 }
211 if (s/^\s*\@\@MULTI\s*//) {
212 my $group = get_field($orig, \$_, 'w');
213 exists $groups{$group}
214 or die "Unknown group $group";
215 my $gp = $groups{$group};
216 my $item_name = get_field($orig, \$_, 'w');
217 exists $gp->{fpos}{$item_name}
218 or die "Unknown field $item_name in group $group";
219 my $item_pos = $gp->{fpos}{$item_name};
220 s/^\@\@\s*$// or die "Missing \@\@ after \@\@MULTI";
221 my @il = map { $_->[$item_pos] } @{$gp->{data}};
222 @il = sort_items(@il);
223 my $p = $gp->{fpos};
224 my @line = ();
225 my $found = 0;
226 while (<>) {
227 if (/^\s*\@\@MULTI\@\@\s*$/) {
228 $found = 1;
229 last;
230 }
231 push @line, $_;
232 check_escapes($gp, $p, $_);
233 }
234 $found or die "Missing \@\@MULTI\@\@";
235 for my $il (@il) {
236 for my $item (@{$gp->{data}}) {
237 next if $item->[$item_pos] ne $il;
238 print translate_escapes($gp, $p, $item, $_) for @line;
239 }
240 }
241 next;
242 }
243 if (/\@\@/) {
244 chomp;
245 die "Invalid \@\@-escape: $_";
246 }
247 print;
248 }
249 }
250
251 sub get_field {
252 my ($orig, $line, $type) = @_;
253 if ($type =~ s/^\@//) {
254 $$line =~ s/^\[\s*//
255 or die "Invalid array: missing [";
256 my @data = ();
257 while ($$line ne '' && $$line !~ s/^\]\s*//) {
258 push @data, get_field($orig, $line, $type);
259 }
260 return \@data;
261 }
262 if ($type eq 'd') {
263 $$line =~ s/^0x([[:xdigit:]]+)\s*//
264 and return hex($1);
265 $$line =~ s/^(\d+)\s*//
266 and return $1;
267 die "Invalid number: $_";
268 }
269 if ($type eq 'w') {
270 $$line =~ s/^(\w+)\s*//
271 or die "Invalid symbol: $_";
272 return $1;
273 }
274 if ($type eq 's') {
275 if ($$line =~ s/^(['"])//) {
276 # quoted string
277 my $quote = $1;
278 my $data = '';
279 while ($$line =~ s/^(.*?)([$quote\\])//) {
280 $data .= $1;
281 last if $2 eq $quote;
282 die "Invalid data: \\ at end of line" if $$line eq '';
283 $data .= substr($$line, 0, 1, '');
284 }
285 $$line =~ s/^\s+//;
286 return $data;
287 } else {
288 # bareword
289 $$line =~ s/^(\S+)\s*//
290 or die "Invalid string: $_";
291 return $1;
292 }
293 }
294 die "Internal error: type is '$type'";
295 }
296
297 sub sizeof {
298 my ($s) = @_;
299 my $l = 0;
300 while ($s ne '') {
301 my $x = substr($s, 0, 1, '');
302 if ($x eq "\t") {
303 $l = 8 * (1 + int($l / 8));
304 } else {
305 $l++;
306 }
307 }
308 $l;
309 }
310
311 sub sort_items {
312 sort {
313 return $a <=> $b if $a =~ /^\d+$/ && $b =~ /^\d+$/;
314 return -1 if $a =~ /^\d+$/;
315 return 1 if $b =~ /^\d+$/;
316 return $a cmp $b;
317 } @_;
318 }
319
320 sub field_map {
321 my ($a, $b) = @_;
322 # we are trying to append $b's data to $a...
323 my @map = ();
324 for my $n (@{$b->{fnames}}) {
325 # $a must have this field
326 return () if ! exists $a->{fpos}{$n};
327 # the fields must have the same type
328 return () if $a->{ftypes}{$n} ne $b->{ftypes}{$n};
329 my $p = $a->{fpos}{$n};
330 push @map, $p;
331 }
332 @map;
333 }
334
335 sub check_escapes {
336 my ($gp, $p, $line) = @_;
337 while ($line =~ s/^.*?\@\@//) {
338 $line =~ s/^(.*?)\@\@//
339 or die "Missing \@\@ closing $line";
340 my $gn = $1;
341 $gn =~ s/\s+HTML$//i;
342 $gn =~ s/\s+\d+$//;
343 next if $gn eq 'WHIRLPOOL';
344 my $ogn = $gn;
345 if ($gn =~ s/^(\w+):(\w+)\s*//) {
346 my $next = $gn;
347 $gn = $1;
348 exists $p->{$2}
349 or die "Invalid field name $2";
350 substr($gp->{ftypes}{$2}, 0, 1) eq '@'
351 or die "Field $2 is not an array";
352 my $mapfrom = get_field($ogn, \$next, 's');
353 my $prefix = get_field($ogn, \$next, 's');
354 my $suffix = get_field($ogn, \$next, 's');
355 }
356 $gn =~ s/^(['"])(.*)\1$/$2/;
357 exists $p->{$gn}
358 or die "Invalid field name $gn";
359 }
360 }
361
362 sub translate_escapes {
363 my ($gp, $p, $item, $line) = @_;
364 my $trans = '';
365 while ($line =~ s/^(.*?)\@\@//) {
366 $trans .= $1;
367 $line =~ s/^(.*?)\@\@//;
368 my $gn = $1;
369 if ($gn eq 'WHIRLPOOL') {
370 $trans .= '@';
371 next;
372 }
373 my $html = $gn =~ s/\s+HTML$//i;
374 my $fold = $gn =~ s/\s+(\d+)$// ? $1 : undef;
375 my ($mapfrom, $prefix, $suffix, $mapto);
376 my $ogn = $gn;
377 if ($gn =~ s/^(\w+):(\w+)\s*//) {
378 my $next = $gn;
379 $gn = $1;
380 $mapto = $2;
381 $mapfrom = get_field($ogn, \$next, 's');
382 $prefix = get_field($ogn, \$next, 's');
383 $suffix = get_field($ogn, \$next, 's');
384 }
385 my $quote = $gn =~ s/^(['"]?)(\w+)\1$/$2/ ? $1 : '';
386 my $f = $item->[$p->{$gn}];
387 if (defined $mapto) {
388 my @a = @{$item->[$p->{$mapto}]};
389 $f =~ s/$mapfrom/$prefix . (shift @a || '???'). $suffix/ge;
390 }
391 if ($html) {
392 $f =~ s/&/&amp;/gi;
393 $f =~ s/</&lt;/gi;
394 $f =~ s/>/&gt;/gi;
395 $f =~ s/"/&quot;/gi;
396 $f =~ s/I&lt;(.*?)&gt;/<I>$1<\/I>/gi;
397 $f =~ s/L&lt;Language::INTERCAL::Charset&gt;/<A HREF="charset.html">the chapter on character sets<\/A>/gi;
398 $f =~ s/L&lt;Language::INTERCAL::(?:ArrayIO|ReadNumber|WriteNumber)&gt;/<A HREF="input_output.html">the chapter on Input\/Output<\/A>/gi;
399 $f =~ s/L&lt;(.*?)&gt;/<CODE>$1<\/CODE>/gi;
400 $f =~ s/C&lt;(.*?)&gt;/<CODE>$1<\/CODE>/gi;
401 $f =~ s/\n\n+/<BR>/g;
402 }
403 if (defined $fold) {
404 my $u = $f;
405 $f = '';
406 for my $o (split(/\n\n/, $u)) {
407 while (sizeof($o) > $fold) {
408 my $g = '';
409 while ($o =~ s/^(\S*)(\s+)//) {
410 my ($n, $s) = ($1, $2);
411 if (sizeof($g . $n) > $fold) {
412 $o = $n . $s . $o;
413 last;
414 }
415 $g .= $n . $s;
416 }
417 $g =~ s/\s+$//;
418 $f .= $g . "\n";
419 }
420 $f .= $o . "\n\n";
421 }
422 $f =~ s/\n\n$//;
423 }
424 $f =~ s/([\\$quote])/\\$1/g if $quote ne '';
425 $trans .= $f;
426 }
427 $trans .= $line;
428 $trans;
429 }
430
431 sub load_spec {
432 my ($dataname) = @_;
433 my @gpath = $ENV{CLC_INTERCAL_PATH} ? ($ENV{CLC_INTERCAL_PATH}) : ();
434 my $dataspec = File::Spec->catfile(@gpath, qw(INTERCAL Generate), $dataname);
435 unless (open(DATASPEC, '<', $dataspec)) {
436 $dataspec = undef;
437 for my $path (@INC) {
438 my $d = File::Spec->catfile($path, qw(Language INTERCAL Generate), $dataname);
439 open(DATASPEC, '<', $d) or next;
440 $dataspec = $d;
441 last;
442 }
443 defined $dataspec
444 or die "$0: $dataname: $!";
445 }
446 print STDERR " ($dataspec)\n";
447 my $in_group = undef;
448 my $item_indent = undef;
449 my $last_multi = undef;
450 my $blank_line = 0;
451 while (<DATASPEC>) {
452 chomp;
453 last if /^\s*\@\__END__/;
454 if (/^\s*#|^\s*$/) {
455 $blank_line = 1;
456 next;
457 }
458 my $bl = $blank_line;
459 $blank_line = 0;
460 if (defined $in_group) {
461 if (s/^\s*\@END\s*//) {
462 die "group $in_group->{name} ended by \@END $_"
463 if $in_group->{name} ne $_;
464 if ($in_group->{has_m}) {
465 $_->[-1] = ${$_->[-1]} for @{$in_group->{data}};
466 }
467 $in_group = undef;
468 next;
469 }
470 if (s/^\s*\@SOURCE\s+//) {
471 push @{$in_group->{sources}}, $_;
472 next;
473 }
474 die "$0: Invalid \@ escape ($_)" if /^\s*\@/;
475 my $indent = s/^([ \t]+)// ? sizeof($1) : 0;
476 if ($in_group->{has_m} &&
477 defined $item_indent &&
478 $item_indent < $indent)
479 {
480 s/^\\//;
481 if ($bl) {
482 $$last_multi .= "\n\n" if $bl;
483 } elsif ($$last_multi ne '') {
484 $$last_multi .= ' ';
485 }
486 $$last_multi .= $_;
487 } else {
488 $item_indent = $indent;
489 # process group line
490 my @line = ();
491 for my $fname (@{$in_group->{fnames}}) {
492 my $ftype = $in_group->{ftypes}{$fname};
493 next if $ftype eq 'm';
494 push @line, get_field($_, \$_, $ftype);
495 }
496 die "Extra data at end of line ($_)" if $_ ne '';
497 if ($in_group->{has_m}) {
498 my $x = '';
499 $last_multi = \$x;
500 push @line, $last_multi;
501 }
502 push @{$in_group->{data}}, \@line;
503 }
504 } elsif (s/^\s*\@GROUP\s+//) {
505 my ($group, @fspec) = split;
506 die "$0: duplicate group $group" if exists $groups{$group};
507 die "$0: group $group has no fields!" unless @fspec;
508 my @fnames = ();
509 my %ftypes = ();
510 my %fpos = ();
511 my $has_m = 0;
512 for my $fs (@fspec) {
513 $fs =~ /^(\w+)=(.*)$/ or die "Invalid field definition ($fs)";
514 my ($name, $type) = ($1, lc($2));
515 exists $ftypes{$type} and die "Duplicate field name ($name)";
516 $type =~ /^(?:\@*[dws]|m)$/ or die "Invalid field type ($fs)";
517 die "Sorry, multiline fields must be last" if $has_m;
518 $has_m = 1 if $type eq 'm';
519 $fpos{$name} = scalar @fnames;
520 push @fnames, $name;
521 $ftypes{$name} = $type;
522 }
523 $in_group = {
524 fnames => \@fnames,
525 ftypes => \%ftypes,
526 fpos => \%fpos,
527 data => [],
528 sources => [],
529 name => $group,
530 has_m => $has_m,
531 };
532 $groups{$group} = $in_group;
533 } else {
534 die "Invalid line ($_)";
535 }
536 }
537 close DATASPEC;
538
539 # process SOURCE
540 for my $g (values %groups) {
541 for my $s (@{$g->{sources}}) {
542 $s ne $g && exists $groups{$s}
543 or die "Invalid source $s for $g->{name}";
544 my $d = $groups{$s};
545 @{$d->{sources}}
546 and die "Sourcing from a group containing sources ($s) not implemented";
547 my @map = field_map($g, $d)
548 or die "$g->{name} cannot source from $s: incompatible fields";
549 for my $d (@{$d->{data}}) {
550 push @{$g->{data}}, [map { $d->[$_] } @map];
551 }
552 }
553 }
554 }
555
556 1;