Mercurial > repo
view bin/len @ 10880:a2e82a8e7f4f
<wob_jonas> slashlearn b_jonas can\'t spell//b_jonas can\'t spell these words: weird, hygiene, etymology, myopia, hibernate, carbohydrate, appearance, maintenance, appropriate, privilege, obsolete, heard, homogeneous, jealous; and confuses these sets of words: drought, draught/draft; couch, coach; depreciate, deprecate; dilate, dilute; contiguous, continuous; d
author | HackBot |
---|---|
date | Wed, 10 May 2017 23:48:17 +0000 |
parents | c989a1669243 |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; use warnings; use 5.010; use Encode qw/decode encode FB_CROAK LEAVE_SRC/; sub put { my ($count, $item) = @_; if ($count == 1) { say "1 $item"; } else { say "$count ${item}s"; } } my $line = "@ARGV"; my $unicode; eval { $unicode = decode "UTF-8", $line, FB_CROAK | LEAVE_SRC; }; # Not valid UTF-8 if ($@) { my $modifier = length($line) == 1 ? '' : 's'; say length($line)." byte$modifier (UTF-8 not valid)"; } else { my @output; my @graphemes = $unicode =~ /\X/g; my @ucs2 = $unicode =~ /[\x{10000}-\x{10FFFF}]/g; my $ucs2chars = @ucs2 + length $unicode; if (@graphemes != length $unicode) { put scalar @graphemes, 'grapheme'; } put length $unicode, 'codepoint'; if ($ucs2chars != length $unicode) { put $ucs2chars, 'Java character'; } if (length $unicode != length $line) { put length $line, 'UTF-8 byte'; } }