Mercurial > repo
comparison bin/len.pl @ 8041:7ae5d5b9baca
<xfix> mv len.pl bin && chmod +x bin/len
author | HackBot |
---|---|
date | Sat, 14 May 2016 14:03:52 +0000 |
parents | len.pl@3021689c6749 |
children |
comparison
equal
deleted
inserted
replaced
8040:3021689c6749 | 8041:7ae5d5b9baca |
---|---|
1 #!/usr/bin/perl | |
2 use strict; | |
3 use warnings; | |
4 use 5.010; | |
5 use Encode qw/decode encode FB_CROAK LEAVE_SRC/; | |
6 | |
7 sub put { | |
8 my ($count, $item) = @_; | |
9 if ($count == 1) { | |
10 say "1 $item"; | |
11 } else { | |
12 say "$count ${item}s"; | |
13 } | |
14 } | |
15 | |
16 my $line = "@ARGV"; | |
17 | |
18 my $unicode; | |
19 eval { | |
20 $unicode = decode "UTF-8", $line, FB_CROAK | LEAVE_SRC; | |
21 }; | |
22 # Not valid UTF-8 | |
23 if ($@) { | |
24 my $modifier = length($line) == 1 ? '' : 's'; | |
25 say length($line)." byte$modifier (UTF-8 not valid)"; | |
26 } else { | |
27 my @output; | |
28 my @graphemes = $unicode =~ /\X/g; | |
29 my @ucs2 = $unicode =~ /[\x{10000}-\x{10FFFF}]/g; | |
30 my $ucs2chars = @ucs2 + length $unicode; | |
31 if (@graphemes != length $unicode) { | |
32 put scalar @graphemes, 'grapheme'; | |
33 } | |
34 put length $unicode, 'codepoint'; | |
35 if ($ucs2chars != length $unicode) { | |
36 put $ucs2chars, 'Java character'; | |
37 } | |
38 if (length $unicode != length $line) { | |
39 put length $line, 'UTF-8 byte'; | |
40 } | |
41 } |