# HG changeset patch # User HackBot # Date 1463234632 0 # Node ID 7ae5d5b9baca567f995555e537552c3666fcaeb0 # Parent 3021689c67497612f8f92432dae1dea7ce89570c mv len.pl bin && chmod +x bin/len diff -r 3021689c6749 -r 7ae5d5b9baca bin/len.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/len.pl Sat May 14 14:03:52 2016 +0000 @@ -0,0 +1,41 @@ +#!/usr/bin/perl +use strict; +use warnings; +use 5.010; +use Encode qw/decode encode FB_CROAK LEAVE_SRC/; + +sub put { + my ($count, $item) = @_; + if ($count == 1) { + say "1 $item"; + } else { + say "$count ${item}s"; + } +} + +my $line = "@ARGV"; + +my $unicode; +eval { + $unicode = decode "UTF-8", $line, FB_CROAK | LEAVE_SRC; +}; +# Not valid UTF-8 +if ($@) { + my $modifier = length($line) == 1 ? '' : 's'; + say length($line)." byte$modifier (UTF-8 not valid)"; +} else { + my @output; + my @graphemes = $unicode =~ /\X/g; + my @ucs2 = $unicode =~ /[\x{10000}-\x{10FFFF}]/g; + my $ucs2chars = @ucs2 + length $unicode; + if (@graphemes != length $unicode) { + put scalar @graphemes, 'grapheme'; + } + put length $unicode, 'codepoint'; + if ($ucs2chars != length $unicode) { + put $ucs2chars, 'Java character'; + } + if (length $unicode != length $line) { + put length $line, 'UTF-8 byte'; + } +} \ No newline at end of file diff -r 3021689c6749 -r 7ae5d5b9baca len.pl --- a/len.pl Sat May 14 14:03:32 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; -use 5.010; -use Encode qw/decode encode FB_CROAK LEAVE_SRC/; - -sub put { - my ($count, $item) = @_; - if ($count == 1) { - say "1 $item"; - } else { - say "$count ${item}s"; - } -} - -my $line = "@ARGV"; - -my $unicode; -eval { - $unicode = decode "UTF-8", $line, FB_CROAK | LEAVE_SRC; -}; -# Not valid UTF-8 -if ($@) { - my $modifier = length($line) == 1 ? '' : 's'; - say length($line)." byte$modifier (UTF-8 not valid)"; -} else { - my @output; - my @graphemes = $unicode =~ /\X/g; - my @ucs2 = $unicode =~ /[\x{10000}-\x{10FFFF}]/g; - my $ucs2chars = @ucs2 + length $unicode; - if (@graphemes != length $unicode) { - put scalar @graphemes, 'grapheme'; - } - put length $unicode, 'codepoint'; - if ($ucs2chars != length $unicode) { - put $ucs2chars, 'Java character'; - } - if (length $unicode != length $line) { - put length $line, 'UTF-8 byte'; - } -} \ No newline at end of file