Mercurial > repo
comparison interps/clc-intercal/CLC-INTERCAL-Base-1.-94.-2/INTERCAL/Generate.pm @ 996:859f9b4339e6
<Gregor> tar xf egobot.tar.xz
author | HackBot |
---|---|
date | Sun, 09 Dec 2012 19:30:08 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
995:6883f5911eb7 | 996:859f9b4339e6 |
---|---|
1 package Language::INTERCAL::Generate; | |
2 | |
3 # Creates automatically generated files (ByteCode, Splats) from descriptions | |
4 | |
5 # This file is part of CLC-INTERCAL | |
6 | |
7 # Copyright (c) 2007-2008 Claudio Calvelli, all rights reserved. | |
8 | |
9 # CLC-INTERCAL is copyrighted software. However, permission to use, modify, | |
10 # and distribute it is granted provided that the conditions set out in the | |
11 # licence agreement are met. See files README and COPYING in the distribution. | |
12 | |
13 # Usage: perl -MLanguage::INTERCAL::Generate -e 'Generate()' [INPUT [OUTPUT]] | |
14 | |
15 # If a file contains the string @@SKIPME@@ this module just copies it unchanged, | |
16 # this allows it to process itself. | |
17 | |
18 # INPUT (or standard input) can contain the following commands to generate | |
19 # data-dependent lines: | |
20 | |
21 # @@DATA filename@@ | |
22 # loads filename as a data SPEC (see below) | |
23 | |
24 # (prefix)@@FILL GROUP PRE FIELD POST SIZE SEP@@(suffix) | |
25 # fills a line with as many elements from GROUP as possible, then repeats | |
26 # with another line until all elements of GROUP have been listed; each | |
27 # element will be taken from the given FIELD and the line lenght will | |
28 # not exceed SIZE. (prefix) and (suffix) are added at the start and | |
29 # the end of each line generated; PRE and POST are added before and | |
30 # after each element; SEP is added between elements in the same line. | |
31 # The data is sorted by the given FIELD. For example: | |
32 # [@@FILL SPLATS 'SP_' NAME '' 76 '/'@@] | |
33 # may generate: | |
34 # [SP_BCMATCH/SP_CHARSET/SP_CIRCULAR/SP_COMMENT/SP_CREATION/SP_DIGITS] | |
35 # [SP_INVALID/SP_IOTYPE/SP_JUNK/SP_NONUMBER/SP_NOSUCHCHAR/SP_ROMAN/SP_SPOTS] | |
36 # [SP_THREESPOT/SP_TODO] | |
37 | |
38 # (prefix)@@ALL GROUP FIELD@@(suffix) | |
39 # generates as many lines as there are elements of GROUP; each line is | |
40 # generated by replacing any @@FIELD@@ in (prefix) and (suffix) with | |
41 # the corresponding data, and replacing the @@ALL...@@ with the | |
42 # value of FIELD. The data is sorted by the FIELD. For example: | |
43 # [@@NUMBER@@ SP_@@ALL SPLATS NAME@@] | |
44 # may generate: | |
45 # [578 SP_BCMATCH] | |
46 # ... | |
47 # [1 SP_TODO] | |
48 # to insert a literal whirlpool where this can cause confusion use | |
49 # @@WHIRLPOOL@@. Note that if your GROUP has a field named WHIRLPOOL | |
50 # this will not be accessible. | |
51 | |
52 # @@MULTI GROUP FIELD@@ | |
53 # (content) | |
54 # @@MULTI@@ | |
55 # is a multiline version of @@ALL...@@: produces a block for each | |
56 # element of group, sorted by FIELD, in which each line of (content) | |
57 # is subject to the same substitution rules as @@ALL@@. Does not | |
58 # automatically insert the FIELD in the output, use @@FIELD@@ for | |
59 # that. A special syntax @@FIELD SIZE@@ allows to "fold" FIELD: | |
60 # for a multiline field containing blank lines, each block is | |
61 # folded separately. | |
62 | |
63 # SPEC contains data specification in the form: | |
64 # @GROUP NAME FIELD... | |
65 # DATA | |
66 # @END [NAME] | |
67 | |
68 # Each FIELD definition has the form NAME=TYPE where TYPE is m (multiline), | |
69 # 'd' (digits), 's' (string), 'w' (word) or '@TYPE' (array - cannot be | |
70 # used for multiline). | |
71 | |
72 # Each line of DATA is one record followed by the contents of a multiline | |
73 # field, if present; alternatively the special line @SOURCE GROUP will | |
74 # include the whole of another group. The contents of the multiline field | |
75 # must be more indented than the record they refer to and than the record | |
76 # that follows, for example: | |
77 # DATA | |
78 # multiline 1 | |
79 # multiline 2 | |
80 # muitiline 3 | |
81 # NEXT RECORD | |
82 # if a line in a multiline field starts with # it will be interpreted as | |
83 # a comment and ignored; if it starts with @ it will be interpreted as | |
84 # an escape (e.g. @END). These can be escaped with a backslash, which | |
85 # will be removed from the beginning of line. Note that backslashes | |
86 # anywhere else in the multiline fields are not touched. | |
87 # All lines in a multiline field will be joined together, separated by | |
88 # a single space (the above sequence produces "multiline 1 multiline 2 | |
89 # multiline 3"), except a blank line which produces a double newline | |
90 # in the field. | |
91 | |
92 use strict; | |
93 | |
94 use Carp; | |
95 use File::Spec; | |
96 | |
97 use vars qw($VERSION $PERVERSION); | |
98 ($VERSION) = ($PERVERSION = "CLC-INTERCAL/Base INTERCAL/Generate.pm 1.-94.-2") =~ /\s(\S+)$/; | |
99 | |
100 my $data_suffix = '.Data'; | |
101 my %groups; | |
102 | |
103 sub Generate { | |
104 @ARGV >= 0 && @ARGV <= 2 or croak "Usage: Generate [INPUT [OUTPUT]]"; | |
105 my ($input, $output) = @ARGV; | |
106 | |
107 %groups = (); | |
108 | |
109 # translate INPUT into OUTPUT | |
110 @ARGV = defined $input ? ($input) : (); | |
111 if (defined $output) { | |
112 open(STDOUT, '>', $output) | |
113 or die "$output: $!"; | |
114 } | |
115 my $skipme = 0; | |
116 while (<>) { | |
117 my $orig = $_; | |
118 if (/\@\@SKIPME\@\@/) { | |
119 $skipme = 1; | |
120 } | |
121 if ($skipme) { | |
122 print; | |
123 next; | |
124 } | |
125 if (/^\s*\@\@DATA\s+(.*?)\@\@$/) { | |
126 load_spec($1 . $data_suffix); | |
127 next; | |
128 } | |
129 if (s/^(.*)\@\@FILL\s*//) { | |
130 my $line_pre = $1; | |
131 my $group = get_field($orig, \$_, 'w'); | |
132 exists $groups{$group} | |
133 or die "Unknown group $group"; | |
134 my $gp = $groups{$group}; | |
135 my $item_pre = get_field($orig, \$_, 's'); | |
136 my $item_name = get_field($orig, \$_, 'w'); | |
137 exists $gp->{fpos}{$item_name} | |
138 or die "Unknown field $item_name in group $group"; | |
139 my $item_pos = $gp->{fpos}{$item_name}; | |
140 my $item_post = get_field($orig, \$_, 's'); | |
141 my $line_size = get_field($orig, \$_, 'd'); | |
142 my $item_sep = get_field($orig, \$_, 's'); | |
143 s/^\@\@// or die "Missing \@\@ after \@\@FILL"; | |
144 my $line_post = $_; | |
145 my @il = map { $_->[$item_pos] } @{$gp->{data}}; | |
146 @il = sort_items(@il); | |
147 my $line = $line_pre; | |
148 for my $item (@il) { | |
149 my $nl = $line; | |
150 $nl .= $item_sep if $nl ne $line_pre; | |
151 $nl .= $item_pre . $item . $item_post; | |
152 if (sizeof($nl . $line_post) > $line_size) { | |
153 print $line, $line_post if $line ne $line_pre; | |
154 $nl = $line_pre . $item_pre . $item . $item_post; | |
155 } | |
156 $line = $nl; | |
157 } | |
158 print $line, $line_post if $line ne $line_pre; | |
159 next; | |
160 } | |
161 if (s/^(.*)\@\@ALL\s*//) { | |
162 my $line_pre = $1; | |
163 my $group = get_field($orig, \$_, 'w'); | |
164 exists $groups{$group} | |
165 or die "Unknown group $group"; | |
166 my $gp = $groups{$group}; | |
167 my $item_name = get_field($orig, \$_, 'w'); | |
168 exists $gp->{fpos}{$item_name} | |
169 or die "Unknown field $item_name in group $group"; | |
170 my $item_pos = $gp->{fpos}{$item_name}; | |
171 s/^\@\@// or die "Missing \@\@ after \@\@ALL"; | |
172 my $line_post = $_; | |
173 my @il = map { $_->[$item_pos] } @{$gp->{data}}; | |
174 @il = sort_items(@il); | |
175 my $p = $gp->{fpos}; | |
176 check_escapes($gp, $p, $line_pre); | |
177 check_escapes($gp, $p, $line_post); | |
178 for my $il (@il) { | |
179 for my $item (@{$gp->{data}}) { | |
180 next if $item->[$item_pos] ne $il; | |
181 my @line = (); | |
182 for my $ol ($line_pre, $line_post) { | |
183 my $line = $ol; | |
184 my $trans = ''; | |
185 while ($line =~ s/^(.*?)\@\@//) { | |
186 $trans .= $1; | |
187 $line =~ s/^(.*?)\@\@// | |
188 or die "Missing \@\@ closing $line"; | |
189 my $gn = $1; | |
190 my $quote = $gn =~ s/^(['"]?)(\w+)\1$/$2/ ? $1 : ''; | |
191 my $f; | |
192 if ($gn eq 'WHIRLPOOL') { | |
193 $f = '@'; | |
194 } elsif ($gn =~ /^(.*?):(\w+)$/) { | |
195 $f = $item->[$p->{$1}]; | |
196 my @a = @{$item->[$p->{$2}]}; | |
197 $f =~ s/%/shift @a || '???'/ge; | |
198 } else { | |
199 $f = $item->[$p->{$gn}]; | |
200 } | |
201 $f =~ s/([\\$quote])/\\$1/g if $quote ne ''; | |
202 $trans .= $f; | |
203 } | |
204 push @line, $trans . $line; | |
205 } | |
206 print $line[0], $il, $line[1]; | |
207 } | |
208 } | |
209 next; | |
210 } | |
211 if (s/^\s*\@\@MULTI\s*//) { | |
212 my $group = get_field($orig, \$_, 'w'); | |
213 exists $groups{$group} | |
214 or die "Unknown group $group"; | |
215 my $gp = $groups{$group}; | |
216 my $item_name = get_field($orig, \$_, 'w'); | |
217 exists $gp->{fpos}{$item_name} | |
218 or die "Unknown field $item_name in group $group"; | |
219 my $item_pos = $gp->{fpos}{$item_name}; | |
220 s/^\@\@\s*$// or die "Missing \@\@ after \@\@MULTI"; | |
221 my @il = map { $_->[$item_pos] } @{$gp->{data}}; | |
222 @il = sort_items(@il); | |
223 my $p = $gp->{fpos}; | |
224 my @line = (); | |
225 my $found = 0; | |
226 while (<>) { | |
227 if (/^\s*\@\@MULTI\@\@\s*$/) { | |
228 $found = 1; | |
229 last; | |
230 } | |
231 push @line, $_; | |
232 check_escapes($gp, $p, $_); | |
233 } | |
234 $found or die "Missing \@\@MULTI\@\@"; | |
235 for my $il (@il) { | |
236 for my $item (@{$gp->{data}}) { | |
237 next if $item->[$item_pos] ne $il; | |
238 print translate_escapes($gp, $p, $item, $_) for @line; | |
239 } | |
240 } | |
241 next; | |
242 } | |
243 if (/\@\@/) { | |
244 chomp; | |
245 die "Invalid \@\@-escape: $_"; | |
246 } | |
247 print; | |
248 } | |
249 } | |
250 | |
251 sub get_field { | |
252 my ($orig, $line, $type) = @_; | |
253 if ($type =~ s/^\@//) { | |
254 $$line =~ s/^\[\s*// | |
255 or die "Invalid array: missing ["; | |
256 my @data = (); | |
257 while ($$line ne '' && $$line !~ s/^\]\s*//) { | |
258 push @data, get_field($orig, $line, $type); | |
259 } | |
260 return \@data; | |
261 } | |
262 if ($type eq 'd') { | |
263 $$line =~ s/^0x([[:xdigit:]]+)\s*// | |
264 and return hex($1); | |
265 $$line =~ s/^(\d+)\s*// | |
266 and return $1; | |
267 die "Invalid number: $_"; | |
268 } | |
269 if ($type eq 'w') { | |
270 $$line =~ s/^(\w+)\s*// | |
271 or die "Invalid symbol: $_"; | |
272 return $1; | |
273 } | |
274 if ($type eq 's') { | |
275 if ($$line =~ s/^(['"])//) { | |
276 # quoted string | |
277 my $quote = $1; | |
278 my $data = ''; | |
279 while ($$line =~ s/^(.*?)([$quote\\])//) { | |
280 $data .= $1; | |
281 last if $2 eq $quote; | |
282 die "Invalid data: \\ at end of line" if $$line eq ''; | |
283 $data .= substr($$line, 0, 1, ''); | |
284 } | |
285 $$line =~ s/^\s+//; | |
286 return $data; | |
287 } else { | |
288 # bareword | |
289 $$line =~ s/^(\S+)\s*// | |
290 or die "Invalid string: $_"; | |
291 return $1; | |
292 } | |
293 } | |
294 die "Internal error: type is '$type'"; | |
295 } | |
296 | |
297 sub sizeof { | |
298 my ($s) = @_; | |
299 my $l = 0; | |
300 while ($s ne '') { | |
301 my $x = substr($s, 0, 1, ''); | |
302 if ($x eq "\t") { | |
303 $l = 8 * (1 + int($l / 8)); | |
304 } else { | |
305 $l++; | |
306 } | |
307 } | |
308 $l; | |
309 } | |
310 | |
311 sub sort_items { | |
312 sort { | |
313 return $a <=> $b if $a =~ /^\d+$/ && $b =~ /^\d+$/; | |
314 return -1 if $a =~ /^\d+$/; | |
315 return 1 if $b =~ /^\d+$/; | |
316 return $a cmp $b; | |
317 } @_; | |
318 } | |
319 | |
320 sub field_map { | |
321 my ($a, $b) = @_; | |
322 # we are trying to append $b's data to $a... | |
323 my @map = (); | |
324 for my $n (@{$b->{fnames}}) { | |
325 # $a must have this field | |
326 return () if ! exists $a->{fpos}{$n}; | |
327 # the fields must have the same type | |
328 return () if $a->{ftypes}{$n} ne $b->{ftypes}{$n}; | |
329 my $p = $a->{fpos}{$n}; | |
330 push @map, $p; | |
331 } | |
332 @map; | |
333 } | |
334 | |
335 sub check_escapes { | |
336 my ($gp, $p, $line) = @_; | |
337 while ($line =~ s/^.*?\@\@//) { | |
338 $line =~ s/^(.*?)\@\@// | |
339 or die "Missing \@\@ closing $line"; | |
340 my $gn = $1; | |
341 $gn =~ s/\s+HTML$//i; | |
342 $gn =~ s/\s+\d+$//; | |
343 next if $gn eq 'WHIRLPOOL'; | |
344 my $ogn = $gn; | |
345 if ($gn =~ s/^(\w+):(\w+)\s*//) { | |
346 my $next = $gn; | |
347 $gn = $1; | |
348 exists $p->{$2} | |
349 or die "Invalid field name $2"; | |
350 substr($gp->{ftypes}{$2}, 0, 1) eq '@' | |
351 or die "Field $2 is not an array"; | |
352 my $mapfrom = get_field($ogn, \$next, 's'); | |
353 my $prefix = get_field($ogn, \$next, 's'); | |
354 my $suffix = get_field($ogn, \$next, 's'); | |
355 } | |
356 $gn =~ s/^(['"])(.*)\1$/$2/; | |
357 exists $p->{$gn} | |
358 or die "Invalid field name $gn"; | |
359 } | |
360 } | |
361 | |
362 sub translate_escapes { | |
363 my ($gp, $p, $item, $line) = @_; | |
364 my $trans = ''; | |
365 while ($line =~ s/^(.*?)\@\@//) { | |
366 $trans .= $1; | |
367 $line =~ s/^(.*?)\@\@//; | |
368 my $gn = $1; | |
369 if ($gn eq 'WHIRLPOOL') { | |
370 $trans .= '@'; | |
371 next; | |
372 } | |
373 my $html = $gn =~ s/\s+HTML$//i; | |
374 my $fold = $gn =~ s/\s+(\d+)$// ? $1 : undef; | |
375 my ($mapfrom, $prefix, $suffix, $mapto); | |
376 my $ogn = $gn; | |
377 if ($gn =~ s/^(\w+):(\w+)\s*//) { | |
378 my $next = $gn; | |
379 $gn = $1; | |
380 $mapto = $2; | |
381 $mapfrom = get_field($ogn, \$next, 's'); | |
382 $prefix = get_field($ogn, \$next, 's'); | |
383 $suffix = get_field($ogn, \$next, 's'); | |
384 } | |
385 my $quote = $gn =~ s/^(['"]?)(\w+)\1$/$2/ ? $1 : ''; | |
386 my $f = $item->[$p->{$gn}]; | |
387 if (defined $mapto) { | |
388 my @a = @{$item->[$p->{$mapto}]}; | |
389 $f =~ s/$mapfrom/$prefix . (shift @a || '???'). $suffix/ge; | |
390 } | |
391 if ($html) { | |
392 $f =~ s/&/&/gi; | |
393 $f =~ s/</</gi; | |
394 $f =~ s/>/>/gi; | |
395 $f =~ s/"/"/gi; | |
396 $f =~ s/I<(.*?)>/<I>$1<\/I>/gi; | |
397 $f =~ s/L<Language::INTERCAL::Charset>/<A HREF="charset.html">the chapter on character sets<\/A>/gi; | |
398 $f =~ s/L<Language::INTERCAL::(?:ArrayIO|ReadNumber|WriteNumber)>/<A HREF="input_output.html">the chapter on Input\/Output<\/A>/gi; | |
399 $f =~ s/L<(.*?)>/<CODE>$1<\/CODE>/gi; | |
400 $f =~ s/C<(.*?)>/<CODE>$1<\/CODE>/gi; | |
401 $f =~ s/\n\n+/<BR>/g; | |
402 } | |
403 if (defined $fold) { | |
404 my $u = $f; | |
405 $f = ''; | |
406 for my $o (split(/\n\n/, $u)) { | |
407 while (sizeof($o) > $fold) { | |
408 my $g = ''; | |
409 while ($o =~ s/^(\S*)(\s+)//) { | |
410 my ($n, $s) = ($1, $2); | |
411 if (sizeof($g . $n) > $fold) { | |
412 $o = $n . $s . $o; | |
413 last; | |
414 } | |
415 $g .= $n . $s; | |
416 } | |
417 $g =~ s/\s+$//; | |
418 $f .= $g . "\n"; | |
419 } | |
420 $f .= $o . "\n\n"; | |
421 } | |
422 $f =~ s/\n\n$//; | |
423 } | |
424 $f =~ s/([\\$quote])/\\$1/g if $quote ne ''; | |
425 $trans .= $f; | |
426 } | |
427 $trans .= $line; | |
428 $trans; | |
429 } | |
430 | |
431 sub load_spec { | |
432 my ($dataname) = @_; | |
433 my @gpath = $ENV{CLC_INTERCAL_PATH} ? ($ENV{CLC_INTERCAL_PATH}) : (); | |
434 my $dataspec = File::Spec->catfile(@gpath, qw(INTERCAL Generate), $dataname); | |
435 unless (open(DATASPEC, '<', $dataspec)) { | |
436 $dataspec = undef; | |
437 for my $path (@INC) { | |
438 my $d = File::Spec->catfile($path, qw(Language INTERCAL Generate), $dataname); | |
439 open(DATASPEC, '<', $d) or next; | |
440 $dataspec = $d; | |
441 last; | |
442 } | |
443 defined $dataspec | |
444 or die "$0: $dataname: $!"; | |
445 } | |
446 print STDERR " ($dataspec)\n"; | |
447 my $in_group = undef; | |
448 my $item_indent = undef; | |
449 my $last_multi = undef; | |
450 my $blank_line = 0; | |
451 while (<DATASPEC>) { | |
452 chomp; | |
453 last if /^\s*\@\__END__/; | |
454 if (/^\s*#|^\s*$/) { | |
455 $blank_line = 1; | |
456 next; | |
457 } | |
458 my $bl = $blank_line; | |
459 $blank_line = 0; | |
460 if (defined $in_group) { | |
461 if (s/^\s*\@END\s*//) { | |
462 die "group $in_group->{name} ended by \@END $_" | |
463 if $in_group->{name} ne $_; | |
464 if ($in_group->{has_m}) { | |
465 $_->[-1] = ${$_->[-1]} for @{$in_group->{data}}; | |
466 } | |
467 $in_group = undef; | |
468 next; | |
469 } | |
470 if (s/^\s*\@SOURCE\s+//) { | |
471 push @{$in_group->{sources}}, $_; | |
472 next; | |
473 } | |
474 die "$0: Invalid \@ escape ($_)" if /^\s*\@/; | |
475 my $indent = s/^([ \t]+)// ? sizeof($1) : 0; | |
476 if ($in_group->{has_m} && | |
477 defined $item_indent && | |
478 $item_indent < $indent) | |
479 { | |
480 s/^\\//; | |
481 if ($bl) { | |
482 $$last_multi .= "\n\n" if $bl; | |
483 } elsif ($$last_multi ne '') { | |
484 $$last_multi .= ' '; | |
485 } | |
486 $$last_multi .= $_; | |
487 } else { | |
488 $item_indent = $indent; | |
489 # process group line | |
490 my @line = (); | |
491 for my $fname (@{$in_group->{fnames}}) { | |
492 my $ftype = $in_group->{ftypes}{$fname}; | |
493 next if $ftype eq 'm'; | |
494 push @line, get_field($_, \$_, $ftype); | |
495 } | |
496 die "Extra data at end of line ($_)" if $_ ne ''; | |
497 if ($in_group->{has_m}) { | |
498 my $x = ''; | |
499 $last_multi = \$x; | |
500 push @line, $last_multi; | |
501 } | |
502 push @{$in_group->{data}}, \@line; | |
503 } | |
504 } elsif (s/^\s*\@GROUP\s+//) { | |
505 my ($group, @fspec) = split; | |
506 die "$0: duplicate group $group" if exists $groups{$group}; | |
507 die "$0: group $group has no fields!" unless @fspec; | |
508 my @fnames = (); | |
509 my %ftypes = (); | |
510 my %fpos = (); | |
511 my $has_m = 0; | |
512 for my $fs (@fspec) { | |
513 $fs =~ /^(\w+)=(.*)$/ or die "Invalid field definition ($fs)"; | |
514 my ($name, $type) = ($1, lc($2)); | |
515 exists $ftypes{$type} and die "Duplicate field name ($name)"; | |
516 $type =~ /^(?:\@*[dws]|m)$/ or die "Invalid field type ($fs)"; | |
517 die "Sorry, multiline fields must be last" if $has_m; | |
518 $has_m = 1 if $type eq 'm'; | |
519 $fpos{$name} = scalar @fnames; | |
520 push @fnames, $name; | |
521 $ftypes{$name} = $type; | |
522 } | |
523 $in_group = { | |
524 fnames => \@fnames, | |
525 ftypes => \%ftypes, | |
526 fpos => \%fpos, | |
527 data => [], | |
528 sources => [], | |
529 name => $group, | |
530 has_m => $has_m, | |
531 }; | |
532 $groups{$group} = $in_group; | |
533 } else { | |
534 die "Invalid line ($_)"; | |
535 } | |
536 } | |
537 close DATASPEC; | |
538 | |
539 # process SOURCE | |
540 for my $g (values %groups) { | |
541 for my $s (@{$g->{sources}}) { | |
542 $s ne $g && exists $groups{$s} | |
543 or die "Invalid source $s for $g->{name}"; | |
544 my $d = $groups{$s}; | |
545 @{$d->{sources}} | |
546 and die "Sourcing from a group containing sources ($s) not implemented"; | |
547 my @map = field_map($g, $d) | |
548 or die "$g->{name} cannot source from $s: incompatible fields"; | |
549 for my $d (@{$d->{data}}) { | |
550 push @{$g->{data}}, [map { $d->[$_] } @map]; | |
551 } | |
552 } | |
553 } | |
554 } | |
555 | |
556 1; |