view bin/len @ 11293:a7899ef2d7b6

<wob_jonas> learn Aristotle said that every illness can be cured by balancing the four vitreous humors, and everyone believed him for two thousand years, even though people still died of illnesses. It wasn\'t until the 20th century that Szent-Gy\xc3\xb6rgyi Albert realized that Aristotle didn\'t find fifth kind of vitreous humor, vitamin C, because the Greek alphabet
author HackBot
date Mon, 01 Jan 2018 17:57:43 +0000
parents c989a1669243
children
line wrap: on
line source

#!/usr/bin/perl
use strict;
use warnings;
use 5.010;
use Encode qw/decode encode FB_CROAK LEAVE_SRC/;

sub put {
    my ($count, $item) = @_;
    if ($count == 1) {
        say "1 $item";
    } else {
        say "$count ${item}s";
    }
}

my $line = "@ARGV";

my $unicode;
eval {
    $unicode = decode "UTF-8", $line, FB_CROAK | LEAVE_SRC;
};
# Not valid UTF-8
if ($@) {
    my $modifier = length($line) == 1 ? '' : 's';
    say length($line)." byte$modifier (UTF-8 not valid)";
} else {
    my @output;
    my @graphemes = $unicode =~ /\X/g;
    my @ucs2 = $unicode =~ /[\x{10000}-\x{10FFFF}]/g;
    my $ucs2chars = @ucs2 + length $unicode;
    if (@graphemes != length $unicode) {
        put scalar @graphemes, 'grapheme';
    }
    put length $unicode, 'codepoint';
    if ($ucs2chars != length $unicode) {
        put $ucs2chars, 'Java character';
    }
    if (length $unicode != length $line) {
        put length $line, 'UTF-8 byte';
    }
}