view bin/len.pl @ 10176:00ca58f2763c

<oerjan> learn The `words dictionary framework was designed by H\xc3\xa5lgar Oslekk, Bick Noffrey, Guiston Degra\xc3\xaeme, Myyntti Raatalla, G\xc3\xb6lrika Rosenskild, Waslomir Siwovich, Gy\xc5\x91rvan S\xc3\xa1rbik, and Pastronella Gattrovezzi.
author HackBot
date Sat, 28 Jan 2017 18:46:09 +0000
parents c989a1669243
children
line wrap: on
line source

#!/usr/bin/perl
use strict;
use warnings;
use 5.010;
use Encode qw/decode encode FB_CROAK LEAVE_SRC/;

sub put {
    my ($count, $item) = @_;
    if ($count == 1) {
        say "1 $item";
    } else {
        say "$count ${item}s";
    }
}

my $line = "@ARGV";

my $unicode;
eval {
    $unicode = decode "UTF-8", $line, FB_CROAK | LEAVE_SRC;
};
# Not valid UTF-8
if ($@) {
    my $modifier = length($line) == 1 ? '' : 's';
    say length($line)." byte$modifier (UTF-8 not valid)";
} else {
    my @output;
    my @graphemes = $unicode =~ /\X/g;
    my @ucs2 = $unicode =~ /[\x{10000}-\x{10FFFF}]/g;
    my $ucs2chars = @ucs2 + length $unicode;
    if (@graphemes != length $unicode) {
        put scalar @graphemes, 'grapheme';
    }
    put length $unicode, 'codepoint';
    if ($ucs2chars != length $unicode) {
        put $ucs2chars, 'Java character';
    }
    if (length $unicode != length $line) {
        put length $line, 'UTF-8 byte';
    }
}