Mercurial > repo
view bin/len.pl @ 9367:1d9cae768f53
<shachaf> learn A is a village in Norway. The BBC invented it by not understanding things on top of letters.
author | HackBot |
---|---|
date | Thu, 20 Oct 2016 01:18:49 +0000 |
parents | c989a1669243 |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; use warnings; use 5.010; use Encode qw/decode encode FB_CROAK LEAVE_SRC/; sub put { my ($count, $item) = @_; if ($count == 1) { say "1 $item"; } else { say "$count ${item}s"; } } my $line = "@ARGV"; my $unicode; eval { $unicode = decode "UTF-8", $line, FB_CROAK | LEAVE_SRC; }; # Not valid UTF-8 if ($@) { my $modifier = length($line) == 1 ? '' : 's'; say length($line)." byte$modifier (UTF-8 not valid)"; } else { my @output; my @graphemes = $unicode =~ /\X/g; my @ucs2 = $unicode =~ /[\x{10000}-\x{10FFFF}]/g; my $ucs2chars = @ucs2 + length $unicode; if (@graphemes != length $unicode) { put scalar @graphemes, 'grapheme'; } put length $unicode, 'codepoint'; if ($ucs2chars != length $unicode) { put $ucs2chars, 'Java character'; } if (length $unicode != length $line) { put length $line, 'UTF-8 byte'; } }