Mercurial > repo
diff bin/len.pl @ 8041:7ae5d5b9baca
<xfix> mv len.pl bin && chmod +x bin/len
author | HackBot |
---|---|
date | Sat, 14 May 2016 14:03:52 +0000 |
parents | len.pl@3021689c6749 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/len.pl Sat May 14 14:03:52 2016 +0000 @@ -0,0 +1,41 @@ +#!/usr/bin/perl +use strict; +use warnings; +use 5.010; +use Encode qw/decode encode FB_CROAK LEAVE_SRC/; + +sub put { + my ($count, $item) = @_; + if ($count == 1) { + say "1 $item"; + } else { + say "$count ${item}s"; + } +} + +my $line = "@ARGV"; + +my $unicode; +eval { + $unicode = decode "UTF-8", $line, FB_CROAK | LEAVE_SRC; +}; +# Not valid UTF-8 +if ($@) { + my $modifier = length($line) == 1 ? '' : 's'; + say length($line)." byte$modifier (UTF-8 not valid)"; +} else { + my @output; + my @graphemes = $unicode =~ /\X/g; + my @ucs2 = $unicode =~ /[\x{10000}-\x{10FFFF}]/g; + my $ucs2chars = @ucs2 + length $unicode; + if (@graphemes != length $unicode) { + put scalar @graphemes, 'grapheme'; + } + put length $unicode, 'codepoint'; + if ($ucs2chars != length $unicode) { + put $ucs2chars, 'Java character'; + } + if (length $unicode != length $line) { + put length $line, 'UTF-8 byte'; + } +} \ No newline at end of file