comparison bin/len.pl @ 9075:c989a1669243

<fizzie> revert 58b9ee8f97a7
author HackBot
date Sun, 25 Sep 2016 20:31:46 +0000
parents
children
comparison
equal deleted inserted replaced
9074:560a73f4f0a4 9075:c989a1669243
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4 use 5.010;
5 use Encode qw/decode encode FB_CROAK LEAVE_SRC/;
6
7 sub put {
8 my ($count, $item) = @_;
9 if ($count == 1) {
10 say "1 $item";
11 } else {
12 say "$count ${item}s";
13 }
14 }
15
16 my $line = "@ARGV";
17
18 my $unicode;
19 eval {
20 $unicode = decode "UTF-8", $line, FB_CROAK | LEAVE_SRC;
21 };
22 # Not valid UTF-8
23 if ($@) {
24 my $modifier = length($line) == 1 ? '' : 's';
25 say length($line)." byte$modifier (UTF-8 not valid)";
26 } else {
27 my @output;
28 my @graphemes = $unicode =~ /\X/g;
29 my @ucs2 = $unicode =~ /[\x{10000}-\x{10FFFF}]/g;
30 my $ucs2chars = @ucs2 + length $unicode;
31 if (@graphemes != length $unicode) {
32 put scalar @graphemes, 'grapheme';
33 }
34 put length $unicode, 'codepoint';
35 if ($ucs2chars != length $unicode) {
36 put $ucs2chars, 'Java character';
37 }
38 if (length $unicode != length $line) {
39 put length $line, 'UTF-8 byte';
40 }
41 }