Mercurial > repo
annotate perl-5.22.2/regen/mk_PL_charclass.pl @ 8045:a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
author | HackBot |
---|---|
date | Sat, 14 May 2016 14:54:38 +0000 |
parents | |
children |
rev | line source |
---|---|
8045
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
1 #!perl -w |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
2 use v5.15.8; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
3 use strict; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
4 use warnings; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
5 require 'regen/regen_lib.pl'; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
6 require 'regen/charset_translations.pl'; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
7 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
8 # This program outputs l1_charclass_tab.h, which defines the guts of the |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
9 # PL_charclass table. Each line is a bit map of properties that the Unicode |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
10 # code point at the corresponding position in the table array has. The first |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
11 # line corresponds to code point U+0000, NULL, the last line to U+00FF. For |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
12 # an application to see if the code point "i" has a particular property, it |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
13 # just does |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
14 # 'PL_charclass[i] & BIT' |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
15 # The bit names are of the form '_CC_property_suffix', where 'CC' stands for |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
16 # character class, and 'property' is the corresponding property, and 'suffix' |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
17 # is one of '_A' to mean the property is true only if the corresponding code |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
18 # point is ASCII, and '_L1' means that the range includes any Latin1 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
19 # character (ISO-8859-1 including the C0 and C1 controls). A property without |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
20 # these suffixes does not have different forms for both ranges. |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
21 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
22 # This program need be run only when adding new properties to it, or upon a |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
23 # new Unicode release, to make sure things haven't been changed by it. |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
24 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
25 my @properties = qw( |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
26 NONLATIN1_SIMPLE_FOLD |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
27 NONLATIN1_FOLD |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
28 ALPHANUMERIC |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
29 ALPHA |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
30 ASCII |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
31 BLANK |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
32 CASED |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
33 CHARNAME_CONT |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
34 CNTRL |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
35 DIGIT |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
36 GRAPH |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
37 IDFIRST |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
38 LOWER |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
39 NON_FINAL_FOLD |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
40 PRINT |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
41 PUNCT |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
42 QUOTEMETA |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
43 SPACE |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
44 UPPER |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
45 WORDCHAR |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
46 XDIGIT |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
47 VERTSPACE |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
48 IS_IN_SOME_FOLD |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
49 MNEMONIC_CNTRL |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
50 ); |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
51 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
52 # Read in the case fold mappings. |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
53 my %folded_closure; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
54 my @hex_non_final_folds; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
55 my @non_latin1_simple_folds; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
56 my @folds; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
57 use Unicode::UCD; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
58 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
59 BEGIN { # Have to do this at compile time because using user-defined \p{property} |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
60 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
61 # Use the Unicode data file if we are on an ASCII platform (which its data |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
62 # is for), and it is in the modern format (starting in Unicode 3.1.0) and |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
63 # it is available. This avoids being affected by potential bugs |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
64 # introduced by other layers of Perl |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
65 my $file="lib/unicore/CaseFolding.txt"; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
66 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
67 if (ord('A') == 65 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
68 && pack("C*", split /\./, Unicode::UCD::UnicodeVersion()) ge v3.1.0 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
69 && open my $fh, "<", $file) |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
70 { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
71 @folds = <$fh>; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
72 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
73 else { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
74 my ($invlist_ref, $invmap_ref, undef, $default) |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
75 = Unicode::UCD::prop_invmap('Case_Folding'); |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
76 for my $i (0 .. @$invlist_ref - 1 - 1) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
77 next if $invmap_ref->[$i] == $default; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
78 my $adjust = -1; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
79 for my $j ($invlist_ref->[$i] .. $invlist_ref->[$i+1] -1) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
80 $adjust++; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
81 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
82 # Single-code point maps go to a 'C' type |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
83 if (! ref $invmap_ref->[$i]) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
84 push @folds, sprintf("%04X; C; %04X\n", |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
85 $j, |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
86 $invmap_ref->[$i] + $adjust); |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
87 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
88 else { # Multi-code point maps go to 'F'. prop_invmap() |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
89 # guarantees that no adjustment is needed for these, |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
90 # as the range will contain just one element |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
91 push @folds, sprintf("%04X; F; %s\n", |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
92 $j, |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
93 join " ", map { sprintf "%04X", $_ } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
94 @{$invmap_ref->[$i]}); |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
95 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
96 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
97 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
98 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
99 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
100 for (@folds) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
101 chomp; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
102 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
103 # Lines look like (without the initial '#' |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
104 #0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
105 # Get rid of comments, ignore blank or comment-only lines |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
106 my $line = $_ =~ s/ (?: \s* \# .* )? $ //rx; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
107 next unless length $line; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
108 my ($hex_from, $fold_type, @folded) = split /[\s;]+/, $line; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
109 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
110 my $from = hex $hex_from; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
111 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
112 # Perl only deals with S, C, and F folds |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
113 next if $fold_type ne 'C' and $fold_type ne 'F' and $fold_type ne 'S'; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
114 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
115 # Get each code point in the range that participates in this line's fold. |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
116 # The hash has keys of each code point in the range, and values of what it |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
117 # folds to and what folds to it |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
118 for my $i (0 .. @folded - 1) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
119 my $hex_fold = $folded[$i]; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
120 my $fold = hex $hex_fold; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
121 push @{$folded_closure{$fold}}, $from if $fold < 256; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
122 push @{$folded_closure{$from}}, $fold if $from < 256; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
123 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
124 if (($fold_type eq 'C' || $fold_type eq 'S') |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
125 && ($fold < 256 != $from < 256)) |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
126 { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
127 # Fold is simple (hence can't be a non-final fold, so the 'if' |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
128 # above is mutualy exclusive from the 'if below) and crosses |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
129 # 255/256 boundary. We keep track of the Latin1 code points |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
130 # in such folds. |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
131 push @non_latin1_simple_folds, ($fold < 256) |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
132 ? $fold |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
133 : $from; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
134 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
135 elsif ($i < @folded-1 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
136 && $fold < 256 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
137 && ! grep { $_ eq $hex_fold } @hex_non_final_folds) |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
138 { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
139 push @hex_non_final_folds, $hex_fold; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
140 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
141 # Also add the upper case, which in the latin1 range folds to |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
142 # $fold |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
143 push @hex_non_final_folds, sprintf "%04X", ord uc chr $fold; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
144 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
145 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
146 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
147 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
148 # Now having read all the lines, combine them into the full closure of each |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
149 # code point in the range by adding lists together that share a common |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
150 # element |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
151 foreach my $folded (keys %folded_closure) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
152 foreach my $from (grep { $_ < 256 } @{$folded_closure{$folded}}) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
153 push @{$folded_closure{$from}}, @{$folded_closure{$folded}}; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
154 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
155 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
156 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
157 # We have the single-character folds that cross the 255/256, like KELVIN |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
158 # SIGN => 'k', but we need the closure, so add like 'K' to it |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
159 foreach my $folded (@non_latin1_simple_folds) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
160 foreach my $fold (@{$folded_closure{$folded}}) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
161 if ($fold < 256 && ! grep { $fold == $_ } @non_latin1_simple_folds) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
162 push @non_latin1_simple_folds, $fold; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
163 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
164 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
165 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
166 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
167 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
168 sub Is_Non_Latin1_Fold { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
169 my @return; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
170 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
171 foreach my $folded (keys %folded_closure) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
172 push @return, sprintf("%X", $folded), if grep { $_ > 255 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
173 @{$folded_closure{$folded}}; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
174 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
175 return join("\n", @return) . "\n"; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
176 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
177 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
178 sub Is_Non_Latin1_Simple_Fold { # Latin1 code points that are folded to by |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
179 # non-Latin1 code points as single character |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
180 # folds |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
181 return join("\n", map { sprintf "%X", $_ } @non_latin1_simple_folds) . "\n"; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
182 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
183 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
184 sub Is_Non_Final_Fold { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
185 return join("\n", @hex_non_final_folds) . "\n"; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
186 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
187 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
188 my @bits; # Bit map for each code point |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
189 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
190 # For each character, calculate which properties it matches. |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
191 for my $ord (0..255) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
192 my $char = chr($ord); |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
193 utf8::upgrade($char); # Important to use Unicode rules! |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
194 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
195 # Look at all the properties we care about here. |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
196 for my $property (@properties) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
197 my $name = $property; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
198 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
199 # Remove the suffix to get the actual property name. |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
200 # Currently the suffixes are '_L1', '_A', and none. |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
201 # If is a latin1 version, no further checking is needed. |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
202 if (! ($name =~ s/_L1$//)) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
203 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
204 # Here, isn't an _L1. If its _A, it's automatically false for |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
205 # non-ascii. The only current ones (besides ASCII) without a |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
206 # suffix are valid over the whole range. |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
207 next if $name =~ s/_A$// && $char !~ /\p{ASCII}/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
208 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
209 my $re; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
210 if ($name eq 'PUNCT') {; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
211 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
212 # Sadly, this is inconsistent: \pP and \pS for the ascii range, |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
213 # just \pP outside it. |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
214 $re = qr/\p{Punct}|[^\P{Symbol}\P{ASCII}]/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
215 } elsif ($name eq 'CHARNAME_CONT') {; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
216 $re = qr/\p{_Perl_Charname_Continue}/, |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
217 } elsif ($name eq 'SPACE') {; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
218 $re = qr/\p{XPerlSpace}/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
219 } elsif ($name eq 'IDFIRST') { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
220 $re = qr/[_\p{Alpha}]/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
221 } elsif ($name eq 'WORDCHAR') { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
222 $re = qr/\p{XPosixWord}/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
223 } elsif ($name eq 'ALPHANUMERIC') { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
224 # Like \w, but no underscore |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
225 $re = qr/\p{Alnum}/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
226 } elsif ($name eq 'QUOTEMETA') { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
227 $re = qr/\p{_Perl_Quotemeta}/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
228 } elsif ($name eq 'NONLATIN1_FOLD') { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
229 $re = qr/\p{Is_Non_Latin1_Fold}/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
230 } elsif ($name eq 'NONLATIN1_SIMPLE_FOLD') { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
231 $re = qr/\p{Is_Non_Latin1_Simple_Fold}/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
232 } elsif ($name eq 'NON_FINAL_FOLD') { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
233 $re = qr/\p{Is_Non_Final_Fold}/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
234 } elsif ($name eq 'IS_IN_SOME_FOLD') { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
235 $re = qr/\p{_Perl_Any_Folds}/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
236 } elsif ($name eq 'MNEMONIC_CNTRL') { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
237 # These are the control characters that there are mnemonics for |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
238 $re = qr/[\a\b\e\f\n\r\t]/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
239 } else { # The remainder have the same name and values as Unicode |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
240 $re = eval "qr/\\p{$name}/"; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
241 use Carp; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
242 carp $@ if ! defined $re; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
243 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
244 #print "$ord, $name $property, $re\n"; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
245 if ($char =~ $re) { # Add this property if matches |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
246 $bits[$ord] .= '|' if $bits[$ord]; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
247 $bits[$ord] .= "(1U<<_CC_$property)"; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
248 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
249 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
250 #print __LINE__, " $ord $char $bits[$ord]\n"; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
251 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
252 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
253 my $out_fh = open_new('l1_char_class_tab.h', '>', |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
254 {style => '*', by => $0, |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
255 from => "property definitions"}); |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
256 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
257 print $out_fh <<END; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
258 /* For code points whose position is not the same as Unicode, both are shown |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
259 * in the comment*/ |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
260 END |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
261 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
262 # Output the table using fairly short names for each char. |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
263 foreach my $charset (get_supported_code_pages()) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
264 my @a2n = @{get_a2n($charset)}; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
265 my @out; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
266 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
267 print $out_fh "\n" . get_conditional_compile_line_start($charset); |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
268 for my $ord (0..255) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
269 my $name; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
270 my $char = chr $ord; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
271 if ($char =~ /\p{PosixGraph}/) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
272 my $quote = $char eq "'" ? '"' : "'"; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
273 $name = $quote . chr($ord) . $quote; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
274 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
275 elsif ($char =~ /\p{XPosixGraph}/) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
276 use charnames(); |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
277 $name = charnames::viacode($ord); |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
278 $name =~ s/LATIN CAPITAL LETTER // |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
279 or $name =~ s/LATIN SMALL LETTER (.*)/\L$1/ |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
280 or $name =~ s/ SIGN\b// |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
281 or $name =~ s/EXCLAMATION MARK/'!'/ |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
282 or $name =~ s/QUESTION MARK/'?'/ |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
283 or $name =~ s/QUOTATION MARK/QUOTE/ |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
284 or $name =~ s/ INDICATOR//; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
285 $name =~ s/\bWITH\b/\L$&/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
286 $name =~ s/\bONE\b/1/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
287 $name =~ s/\b(TWO|HALF)\b/2/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
288 $name =~ s/\bTHREE\b/3/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
289 $name =~ s/\b QUARTER S? \b/4/x; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
290 $name =~ s/VULGAR FRACTION (.) (.)/$1\/$2/; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
291 $name =~ s/\bTILDE\b/'~'/i |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
292 or $name =~ s/\bCIRCUMFLEX\b/'^'/i |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
293 or $name =~ s/\bSTROKE\b/'\/'/i |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
294 or $name =~ s/ ABOVE\b//i; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
295 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
296 else { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
297 use Unicode::UCD qw(prop_invmap); |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
298 my ($list_ref, $map_ref, $format) = prop_invmap("Name_Alias"); |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
299 if ($format !~ /^s/) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
300 use Carp; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
301 carp "Unexpected format '$format' for 'Name_Alias"; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
302 last; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
303 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
304 my $which = Unicode::UCD::search_invlist($list_ref, $ord); |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
305 if (! defined $which) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
306 use Carp; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
307 carp "No name found for code pont $ord"; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
308 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
309 else { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
310 my $map = $map_ref->[$which]; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
311 if (! ref $map) { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
312 $name = $map; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
313 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
314 else { |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
315 # Just pick the first abbreviation if more than one |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
316 my @names = grep { $_ =~ /abbreviation/ } @$map; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
317 $name = $names[0]; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
318 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
319 $name =~ s/:.*//; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
320 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
321 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
322 my $index = $a2n[$ord]; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
323 $out[$index] = ($ord == $index) |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
324 ? sprintf "/* U+%02X %s */ %s,\n", $ord, $name, $bits[$ord] |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
325 : sprintf "/* 0x%02X U+%02X %s */ %s,\n", $index, $ord, $name, $bits[$ord]; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
326 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
327 print $out_fh join "", @out; |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
328 print $out_fh "\n" . get_conditional_compile_line_end(); |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
329 } |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
330 |
a16537d2fe07
<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
HackBot
parents:
diff
changeset
|
331 read_only_bottom_close_and_rename($out_fh) |