view bin/len @ 11086:df94eceb14f9

<oerjan> learn Cognac is named for its strong cognitive effects. Taneb invented it, then somehow managed to keep it off the illegal drugs list.
author HackBot
date Wed, 12 Jul 2017 00:00:42 +0000
parents c989a1669243
children
line wrap: on
line source

#!/usr/bin/perl
use strict;
use warnings;
use 5.010;
use Encode qw/decode encode FB_CROAK LEAVE_SRC/;

sub put {
    my ($count, $item) = @_;
    if ($count == 1) {
        say "1 $item";
    } else {
        say "$count ${item}s";
    }
}

my $line = "@ARGV";

my $unicode;
eval {
    $unicode = decode "UTF-8", $line, FB_CROAK | LEAVE_SRC;
};
# Not valid UTF-8
if ($@) {
    my $modifier = length($line) == 1 ? '' : 's';
    say length($line)." byte$modifier (UTF-8 not valid)";
} else {
    my @output;
    my @graphemes = $unicode =~ /\X/g;
    my @ucs2 = $unicode =~ /[\x{10000}-\x{10FFFF}]/g;
    my $ucs2chars = @ucs2 + length $unicode;
    if (@graphemes != length $unicode) {
        put scalar @graphemes, 'grapheme';
    }
    put length $unicode, 'codepoint';
    if ($ucs2chars != length $unicode) {
        put $ucs2chars, 'Java character';
    }
    if (length $unicode != length $line) {
        put length $line, 'UTF-8 byte';
    }
}