Mercurial > repo
view paste/paste.17694 @ 7910:7d5f1c5e44b1
<moon__> mkx bin/hfs//erro You have discovered an eerie caven. The air above the dark stone floor is alive with vorices of purple light and dark, boiling clouds. Seemingly bottomless glowing pit mark the surface.
author | HackBot |
---|---|
date | Sat, 07 May 2016 00:41:47 +0000 |
parents | d949e6cf6a79 |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; use warnings; use v5.10; use open qw( :encoding(UTF-8) :std); use File::Basename 'dirname'; use Storable 'retrieve'; use List::Util qw(sum min); use Getopt::Long qw(:config gnu_getopt); BEGIN { eval { require Math::Random::MT::Perl; Math::Random::MT::Perl->import('rand'); }; #warn "Optional module Math::Random::MT::Perl not found.\n" if $@; } #constants my @options = qw(eng-1M eng-all eng-fiction eng-gb eng-us french german hebrew russian spanish irish german-medical bulgarian catalan swedish brazilian canadian-english-insane manx italian ogerman portuguese polish gaelic finnish norwegian esolangs); my $n = 4; my $default_opt = "--eng-1M"; (my $default_dataset = $default_opt) =~ s/(^|-+)([^-])/\u$2/g; #help info my $help_text = <<END Usage: words [-dhNo] [DATASETS...] [NUMBER_OF_WORDS] options: -l, --list list valid datasets -d, --debug debugging output -N, --dont-normalize don't normalize frequencies when combining multiple Markov models; this has the effect of making larger datasets more influential -o, --target-offset change the target length offset used in the word generation algorithm; use negative integers for best results END ; my $list_text = <<END valid datasets: --@{[join ' --', @options]} default: $default_opt END ; #data from loaded files my @loaded_data; #data after normalizing and combining datasets my $grams; my $freqs; #some command line options my $debug_mode; my $target_offset = -4; #needs testing; my $dont_normalize; sub pick(%) { my ($f) = @_; my @c = keys %$f; my @w = map { $f->{$_} } @c; my $r = rand(sum(@w)); for(0..$#c) { return $c[$_] if $r < $w[$_]; $r -= $w[$_]; } print "end of pick loop reached. returned $c[$#w]\n" if $debug_mode; return $c[$#w]; } sub get_gram { my ($key) = @_; ##Lazily interpolate the gram table on the fly ##then cache the results unless (defined $grams->{$key}) { for(@loaded_data) { my $data = $_->[0]; my $g = $data->{$key} or next; my $sum = $dont_normalize || sum(values %$g); while( my ($c, $v) = each %$g ) { $grams->{$key}->{$c} += $v/$sum; } } } return $grams->{$key}; } sub generate { my $target = pick($freqs) + $target_offset; my $word = ' ' x ($n-1); my $c; do { my $len = (length $word) - ($n-1); my %ftable = %{get_gram substr($word, -$n+1, $n-1)}; ($ftable{' '} //= 0) *= 2**($len-$target); $c = pick \%ftable; $word .= $c; } while $c ne ' '; $word =~ s/\s//g; $word = "$word (L-T: @{[length($word) - $target]})" if $debug_mode; return $word; } sub load_dataset { my ($mod) = @_; push @loaded_data, retrieve ("share/WordData/$mod") or die "Unable to load $mod"; } sub main { #if (my $d = dirname $0) { chdir $d } ##Option handling my ($help_mode, $list_mode); @ARGV = split /\s+/, $ARGV[0] if @ARGV == 1; GetOptions ( 'd|debug' => \$debug_mode, 'h|help' => \$help_mode, 'l|list' => \$list_mode, 'N|dont-normalize' => \$dont_normalize, 'o|target-offset=s' => \$target_offset, map { my $mod=$_; $mod =~ s/(^|-)(.)/\u$2/g; $_, sub { load_dataset $mod }; } @options ) or exit 1; return print $help_text if $help_mode; return print $list_text if $list_mode; ##Use the default dataset if no others were specified load_dataset $default_dataset unless @loaded_data; ##In the case of 1 dataset, skip normalization by copying everything ##into the tables if (@loaded_data == 1) { ($grams, $freqs) = @{$loaded_data[0]}; } ##Otherwise, normalize and combine the length histograms. ##The gram tables will be normalized lazily as needed (see: get_gram) else { for (@loaded_data) { my $fdata = $_->[1]; my $sum = $dont_normalize || sum(values %$fdata); while ( my ($len, $f) = each %$fdata ) { $freqs->{$len} += $f/$sum; } } } ##Run word generator and print results { local $\ = ' '; print generate for 1..min(25, int($ARGV[0]||1)); } print "\n"; return 0; } exit main unless caller; 1;