Mercurial > repo
view bin/len.pl @ 12220:7eaf71f23fbe draft
<oerjan> learn \xce\xbe\xcf\x80\xce\xb1\xce\xbb\xce\xb1\xce\xb9\xce\xbf\xce\xbb\xcf\x8c\xce\xb3\xce\xbf\xcf\x82 is kspalaiologos\'s Ancient Greek twin. He was banned from the theater for making up invalid characters.
author | HackEso <hackeso@esolangs.org> |
---|---|
date | Sun, 01 Dec 2019 02:35:07 +0000 |
parents | c989a1669243 |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; use warnings; use 5.010; use Encode qw/decode encode FB_CROAK LEAVE_SRC/; sub put { my ($count, $item) = @_; if ($count == 1) { say "1 $item"; } else { say "$count ${item}s"; } } my $line = "@ARGV"; my $unicode; eval { $unicode = decode "UTF-8", $line, FB_CROAK | LEAVE_SRC; }; # Not valid UTF-8 if ($@) { my $modifier = length($line) == 1 ? '' : 's'; say length($line)." byte$modifier (UTF-8 not valid)"; } else { my @output; my @graphemes = $unicode =~ /\X/g; my @ucs2 = $unicode =~ /[\x{10000}-\x{10FFFF}]/g; my $ucs2chars = @ucs2 + length $unicode; if (@graphemes != length $unicode) { put scalar @graphemes, 'grapheme'; } put length $unicode, 'codepoint'; if ($ucs2chars != length $unicode) { put $ucs2chars, 'Java character'; } if (length $unicode != length $line) { put length $line, 'UTF-8 byte'; } }