annotate paste/paste.17694 @ 12257:1924fe176291 draft

<fizzie> ` sed -e \'s|wisdom|bin|\' < ../bin/cwlprits > ../bin/cblprits; chmod a+x ../bin/cblprits
author HackEso <hackeso@esolangs.org>
date Sat, 07 Dec 2019 23:36:53 +0000
parents d949e6cf6a79
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1320
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
1 #!/usr/bin/perl
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
2 use strict; use warnings;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
3 use v5.10;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
4 use open qw( :encoding(UTF-8) :std);
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
5 use File::Basename 'dirname';
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
6 use Storable 'retrieve';
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
7 use List::Util qw(sum min);
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
8 use Getopt::Long qw(:config gnu_getopt);
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
9 BEGIN {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
10 eval {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
11 require Math::Random::MT::Perl; Math::Random::MT::Perl->import('rand');
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
12 };
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
13 #warn "Optional module Math::Random::MT::Perl not found.\n" if $@;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
14 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
15
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
16 #constants
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
17 my @options = qw(eng-1M eng-all eng-fiction eng-gb eng-us french german hebrew russian spanish irish german-medical bulgarian catalan swedish brazilian canadian-english-insane manx italian ogerman portuguese polish gaelic finnish norwegian esolangs);
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
18 my $n = 4;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
19 my $default_opt = "--eng-1M";
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
20 (my $default_dataset = $default_opt) =~ s/(^|-+)([^-])/\u$2/g;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
21
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
22 #help info
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
23 my $help_text = <<END
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
24 Usage: words [-dhNo] [DATASETS...] [NUMBER_OF_WORDS]
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
25
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
26 options:
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
27 -l, --list list valid datasets
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
28 -d, --debug debugging output
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
29 -N, --dont-normalize don't normalize frequencies when combining
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
30 multiple Markov models; this has the effect
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
31 of making larger datasets more influential
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
32 -o, --target-offset change the target length offset used in the
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
33 word generation algorithm; use negative integers
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
34 for best results
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
35 END
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
36 ;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
37
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
38 my $list_text = <<END
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
39 valid datasets: --@{[join ' --', @options]}
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
40 default: $default_opt
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
41 END
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
42 ;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
43
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
44 #data from loaded files
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
45 my @loaded_data;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
46
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
47 #data after normalizing and combining datasets
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
48 my $grams;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
49 my $freqs;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
50
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
51 #some command line options
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
52 my $debug_mode;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
53 my $target_offset = -4; #needs testing;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
54 my $dont_normalize;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
55
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
56 sub pick(%) {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
57 my ($f) = @_;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
58 my @c = keys %$f;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
59 my @w = map { $f->{$_} } @c;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
60 my $r = rand(sum(@w));
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
61 for(0..$#c) {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
62 return $c[$_] if $r < $w[$_];
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
63 $r -= $w[$_];
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
64 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
65 print "end of pick loop reached. returned $c[$#w]\n" if $debug_mode;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
66 return $c[$#w];
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
67 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
68
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
69 sub get_gram {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
70 my ($key) = @_;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
71 ##Lazily interpolate the gram table on the fly
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
72 ##then cache the results
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
73 unless (defined $grams->{$key}) {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
74 for(@loaded_data) {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
75 my $data = $_->[0];
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
76 my $g = $data->{$key} or next;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
77 my $sum = $dont_normalize || sum(values %$g);
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
78 while( my ($c, $v) = each %$g ) {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
79 $grams->{$key}->{$c} += $v/$sum;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
80 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
81 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
82 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
83 return $grams->{$key};
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
84 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
85
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
86 sub generate {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
87 my $target = pick($freqs) + $target_offset;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
88 my $word = ' ' x ($n-1);
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
89 my $c;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
90 do {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
91 my $len = (length $word) - ($n-1);
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
92 my %ftable = %{get_gram substr($word, -$n+1, $n-1)};
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
93 ($ftable{' '} //= 0) *= 2**($len-$target);
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
94 $c = pick \%ftable;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
95 $word .= $c;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
96 } while $c ne ' ';
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
97 $word =~ s/\s//g;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
98 $word = "$word (L-T: @{[length($word) - $target]})" if $debug_mode;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
99 return $word;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
100 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
101
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
102 sub load_dataset {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
103 my ($mod) = @_;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
104 push @loaded_data, retrieve ("share/WordData/$mod") or die "Unable to load $mod";
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
105 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
106
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
107 sub main {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
108 #if (my $d = dirname $0) { chdir $d }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
109 ##Option handling
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
110 my ($help_mode, $list_mode);
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
111 @ARGV = split /\s+/, $ARGV[0] if @ARGV == 1;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
112 GetOptions (
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
113 'd|debug' => \$debug_mode,
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
114 'h|help' => \$help_mode,
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
115 'l|list' => \$list_mode,
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
116 'N|dont-normalize' => \$dont_normalize,
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
117 'o|target-offset=s' => \$target_offset,
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
118 map {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
119 my $mod=$_;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
120 $mod =~ s/(^|-)(.)/\u$2/g;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
121 $_, sub { load_dataset $mod };
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
122 } @options
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
123 ) or exit 1;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
124 return print $help_text if $help_mode;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
125 return print $list_text if $list_mode;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
126 ##Use the default dataset if no others were specified
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
127 load_dataset $default_dataset unless @loaded_data;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
128 ##In the case of 1 dataset, skip normalization by copying everything
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
129 ##into the tables
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
130 if (@loaded_data == 1) {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
131 ($grams, $freqs) = @{$loaded_data[0]};
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
132 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
133 ##Otherwise, normalize and combine the length histograms.
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
134 ##The gram tables will be normalized lazily as needed (see: get_gram)
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
135 else {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
136 for (@loaded_data) {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
137 my $fdata = $_->[1];
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
138 my $sum = $dont_normalize || sum(values %$fdata);
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
139 while ( my ($len, $f) = each %$fdata ) {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
140 $freqs->{$len} += $f/$sum;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
141 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
142 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
143 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
144
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
145 ##Run word generator and print results
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
146 {
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
147 local $\ = ' ';
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
148 print generate for 1..min(25, int($ARGV[0]||1));
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
149 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
150 print "\n";
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
151 return 0;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
152 }
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
153
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
154 exit main unless caller;
d949e6cf6a79 <GreyKnight> cat $(which words) | paste
HackBot
parents:
diff changeset
155 1;