Mercurial > repo
comparison bin/len.pl @ 9075:c989a1669243
<fizzie> revert 58b9ee8f97a7
author | HackBot |
---|---|
date | Sun, 25 Sep 2016 20:31:46 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9074:560a73f4f0a4 | 9075:c989a1669243 |
---|---|
1 #!/usr/bin/perl | |
2 use strict; | |
3 use warnings; | |
4 use 5.010; | |
5 use Encode qw/decode encode FB_CROAK LEAVE_SRC/; | |
6 | |
7 sub put { | |
8 my ($count, $item) = @_; | |
9 if ($count == 1) { | |
10 say "1 $item"; | |
11 } else { | |
12 say "$count ${item}s"; | |
13 } | |
14 } | |
15 | |
16 my $line = "@ARGV"; | |
17 | |
18 my $unicode; | |
19 eval { | |
20 $unicode = decode "UTF-8", $line, FB_CROAK | LEAVE_SRC; | |
21 }; | |
22 # Not valid UTF-8 | |
23 if ($@) { | |
24 my $modifier = length($line) == 1 ? '' : 's'; | |
25 say length($line)." byte$modifier (UTF-8 not valid)"; | |
26 } else { | |
27 my @output; | |
28 my @graphemes = $unicode =~ /\X/g; | |
29 my @ucs2 = $unicode =~ /[\x{10000}-\x{10FFFF}]/g; | |
30 my $ucs2chars = @ucs2 + length $unicode; | |
31 if (@graphemes != length $unicode) { | |
32 put scalar @graphemes, 'grapheme'; | |
33 } | |
34 put length $unicode, 'codepoint'; | |
35 if ($ucs2chars != length $unicode) { | |
36 put $ucs2chars, 'Java character'; | |
37 } | |
38 if (length $unicode != length $line) { | |
39 put length $line, 'UTF-8 byte'; | |
40 } | |
41 } |