annotate paste/paste.20211 @ 12493:885661512b17 draft

<int-e> le//rn schwartzian//In 1987, Yogurt introduced a better way to rank Schwartz users: Rather than holding an annual tournament, users would take a series of standardized tests adminstered by official Schwartz centers, and would then be ranked according to the results. This lead to the Schwartzian transform because it allowed many more users to be ranked.
author HackEso <hackeso@esolangs.org>
date Fri, 12 Jan 2024 07:24:55 +0000
parents e037173e0012
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e037173e0012 Initial import.
HackBot
parents:
diff changeset
1 #!/usr/bin/perl
e037173e0012 Initial import.
HackBot
parents:
diff changeset
2 use strict; use warnings;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
3 use v5.10;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
4 use open qw( :encoding(UTF-8) :std);
e037173e0012 Initial import.
HackBot
parents:
diff changeset
5 use Storable 'retrieve';
e037173e0012 Initial import.
HackBot
parents:
diff changeset
6 use List::Util 'sum';
e037173e0012 Initial import.
HackBot
parents:
diff changeset
7 use Getopt::Long qw(:config gnu_getopt);
e037173e0012 Initial import.
HackBot
parents:
diff changeset
8 BEGIN {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
9 eval {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
10 require Math::Random::MT::Perl; Math::Random::MT::Perl->import('rand');
e037173e0012 Initial import.
HackBot
parents:
diff changeset
11 };
e037173e0012 Initial import.
HackBot
parents:
diff changeset
12 # warn "Optional module Math::Random::MT::Perl not found.\n" if $@;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
13 }
e037173e0012 Initial import.
HackBot
parents:
diff changeset
14
e037173e0012 Initial import.
HackBot
parents:
diff changeset
15 #constants
e037173e0012 Initial import.
HackBot
parents:
diff changeset
16 my @options = qw(eng-1M eng-all eng-fiction eng-gb eng-us french german hebrew russian spanish irish german-medical bulgarian catalan swedish brazilian canadian-english-insane manx italian ogerman portuguese polish gaelic finnish norwegian);
e037173e0012 Initial import.
HackBot
parents:
diff changeset
17 my $n = 4;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
18 my $default_opt = "--eng-1M";
e037173e0012 Initial import.
HackBot
parents:
diff changeset
19 (my $default_dataset = $default_opt) =~ s/(^|-+)([^-])/\u$2/g;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
20
e037173e0012 Initial import.
HackBot
parents:
diff changeset
21 #help info
e037173e0012 Initial import.
HackBot
parents:
diff changeset
22 my $help_text = <<END
e037173e0012 Initial import.
HackBot
parents:
diff changeset
23 Usage: words [-dhNo] [DATASETS...] [NUMBER_OF_WORDS]
e037173e0012 Initial import.
HackBot
parents:
diff changeset
24
e037173e0012 Initial import.
HackBot
parents:
diff changeset
25 valid datasets: --@{[join ' --', @options]}
e037173e0012 Initial import.
HackBot
parents:
diff changeset
26 default: $default_opt
e037173e0012 Initial import.
HackBot
parents:
diff changeset
27
e037173e0012 Initial import.
HackBot
parents:
diff changeset
28 options:
e037173e0012 Initial import.
HackBot
parents:
diff changeset
29 -h, --help this help text
e037173e0012 Initial import.
HackBot
parents:
diff changeset
30 -d, --debug debugging output
e037173e0012 Initial import.
HackBot
parents:
diff changeset
31 -N, --dont-normalize don't normalize frequencies when combining
e037173e0012 Initial import.
HackBot
parents:
diff changeset
32 multiple Markov models; this has the effect
e037173e0012 Initial import.
HackBot
parents:
diff changeset
33 of making larger datasets more influential
e037173e0012 Initial import.
HackBot
parents:
diff changeset
34 -o, --target-offset change the target length offset used in the
e037173e0012 Initial import.
HackBot
parents:
diff changeset
35 word generation algorithm; use negative integers
e037173e0012 Initial import.
HackBot
parents:
diff changeset
36 for best results
e037173e0012 Initial import.
HackBot
parents:
diff changeset
37 END
e037173e0012 Initial import.
HackBot
parents:
diff changeset
38 ;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
39
e037173e0012 Initial import.
HackBot
parents:
diff changeset
40 #data from loaded files
e037173e0012 Initial import.
HackBot
parents:
diff changeset
41 my @loaded_data;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
42
e037173e0012 Initial import.
HackBot
parents:
diff changeset
43 #data after normalizing and combining datasets
e037173e0012 Initial import.
HackBot
parents:
diff changeset
44 my $grams;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
45 my $freqs;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
46
e037173e0012 Initial import.
HackBot
parents:
diff changeset
47 #some command line options
e037173e0012 Initial import.
HackBot
parents:
diff changeset
48 my $debug_mode;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
49 my $target_offset = -4; #needs testing;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
50 my $dont_normalize;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
51
e037173e0012 Initial import.
HackBot
parents:
diff changeset
52 sub pick(%) {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
53 my ($f) = @_;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
54 my @c = keys %$f;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
55 my @w = map { $f->{$_} } @c;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
56 my $r = rand(sum(@w));
e037173e0012 Initial import.
HackBot
parents:
diff changeset
57 for(0..$#c) {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
58 return $c[$_] if $r < $w[$_];
e037173e0012 Initial import.
HackBot
parents:
diff changeset
59 $r -= $w[$_];
e037173e0012 Initial import.
HackBot
parents:
diff changeset
60 }
e037173e0012 Initial import.
HackBot
parents:
diff changeset
61 print "end of pick loop reached. returned $c[$#w]\n" if $debug_mode;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
62 return $c[$#w];
e037173e0012 Initial import.
HackBot
parents:
diff changeset
63 }
e037173e0012 Initial import.
HackBot
parents:
diff changeset
64
e037173e0012 Initial import.
HackBot
parents:
diff changeset
65 sub get_gram {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
66 my ($key) = @_;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
67 ##Lazily interpolate the gram table on the fly
e037173e0012 Initial import.
HackBot
parents:
diff changeset
68 ##then cache the results
e037173e0012 Initial import.
HackBot
parents:
diff changeset
69 unless (defined $grams->{$key}) {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
70 for(@loaded_data) {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
71 my $data = $_->[0];
e037173e0012 Initial import.
HackBot
parents:
diff changeset
72 my $g = $data->{$key} or next;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
73 my $sum = $dont_normalize || sum(values %$g);
e037173e0012 Initial import.
HackBot
parents:
diff changeset
74 while( my ($c, $v) = each %$g ) {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
75 $grams->{$key}->{$c} += $v/$sum;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
76 }
e037173e0012 Initial import.
HackBot
parents:
diff changeset
77 }
e037173e0012 Initial import.
HackBot
parents:
diff changeset
78 }
e037173e0012 Initial import.
HackBot
parents:
diff changeset
79 return $grams->{$key};
e037173e0012 Initial import.
HackBot
parents:
diff changeset
80 }
e037173e0012 Initial import.
HackBot
parents:
diff changeset
81
e037173e0012 Initial import.
HackBot
parents:
diff changeset
82 sub generate {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
83 my $target = pick($freqs) + $target_offset;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
84 my $word = ' ' x ($n-1);
e037173e0012 Initial import.
HackBot
parents:
diff changeset
85 my $c;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
86 do {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
87 my $len = (length $word) - ($n-1);
e037173e0012 Initial import.
HackBot
parents:
diff changeset
88 my %ftable = %{get_gram substr($word, -$n+1, $n-1)};
e037173e0012 Initial import.
HackBot
parents:
diff changeset
89 ($ftable{' '} //= 0) *= 2**($len-$target);
e037173e0012 Initial import.
HackBot
parents:
diff changeset
90 $c = pick \%ftable;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
91 $word .= $c;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
92 } while $c ne ' ';
e037173e0012 Initial import.
HackBot
parents:
diff changeset
93 $word =~ s/\s//g;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
94 $word = "$word (L-T: @{[length($word) - $target]})" if $debug_mode;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
95 return $word;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
96 }
e037173e0012 Initial import.
HackBot
parents:
diff changeset
97
e037173e0012 Initial import.
HackBot
parents:
diff changeset
98 sub load_dataset {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
99 my ($mod) = @_;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
100 push @loaded_data, retrieve ("share/WordData/$mod") or die "Unable to load $mod";
e037173e0012 Initial import.
HackBot
parents:
diff changeset
101 }
e037173e0012 Initial import.
HackBot
parents:
diff changeset
102
e037173e0012 Initial import.
HackBot
parents:
diff changeset
103 sub main {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
104 ##Option handling
e037173e0012 Initial import.
HackBot
parents:
diff changeset
105 @ARGV = split /\s+/, $ARGV[0] if @ARGV == 1;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
106 my $help_mode;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
107 GetOptions (
e037173e0012 Initial import.
HackBot
parents:
diff changeset
108 'd|debug' => \$debug_mode,
e037173e0012 Initial import.
HackBot
parents:
diff changeset
109 'h|help' => \$help_mode,
e037173e0012 Initial import.
HackBot
parents:
diff changeset
110 'N|dont-normalize' => \$dont_normalize,
e037173e0012 Initial import.
HackBot
parents:
diff changeset
111 'o|target-offset=s' => \$target_offset,
e037173e0012 Initial import.
HackBot
parents:
diff changeset
112 map {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
113 my $mod=$_;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
114 $mod =~ s/(^|-)(.)/\u$2/g;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
115 $_, sub { load_dataset $mod };
e037173e0012 Initial import.
HackBot
parents:
diff changeset
116 } @options
e037173e0012 Initial import.
HackBot
parents:
diff changeset
117 ) or exit 1;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
118 return print $help_text if $help_mode;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
119
e037173e0012 Initial import.
HackBot
parents:
diff changeset
120 ##Use the default dataset if no others were specified
e037173e0012 Initial import.
HackBot
parents:
diff changeset
121 load_dataset $default_dataset unless @loaded_data;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
122 ##In the case of 1 dataset, skip normalization by copying everything
e037173e0012 Initial import.
HackBot
parents:
diff changeset
123 ##into the tables
e037173e0012 Initial import.
HackBot
parents:
diff changeset
124 if (@loaded_data == 1) {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
125 ($grams, $freqs) = @{$loaded_data[0]};
e037173e0012 Initial import.
HackBot
parents:
diff changeset
126 }
e037173e0012 Initial import.
HackBot
parents:
diff changeset
127 ##Otherwise, normalize and combine the length histograms.
e037173e0012 Initial import.
HackBot
parents:
diff changeset
128 ##The gram tables will be normalized lazily as needed (see: get_gram)
e037173e0012 Initial import.
HackBot
parents:
diff changeset
129 else {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
130 for (@loaded_data) {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
131 my $fdata = $_->[1];
e037173e0012 Initial import.
HackBot
parents:
diff changeset
132 my $sum = $dont_normalize || sum(values %$fdata);
e037173e0012 Initial import.
HackBot
parents:
diff changeset
133 while ( my ($len, $f) = each %$fdata ) {
e037173e0012 Initial import.
HackBot
parents:
diff changeset
134 $freqs->{$len} += $f/$sum;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
135 }
e037173e0012 Initial import.
HackBot
parents:
diff changeset
136 }
e037173e0012 Initial import.
HackBot
parents:
diff changeset
137 }
e037173e0012 Initial import.
HackBot
parents:
diff changeset
138
e037173e0012 Initial import.
HackBot
parents:
diff changeset
139 ##Run word generator and print results
e037173e0012 Initial import.
HackBot
parents:
diff changeset
140 local $, = ' ';
e037173e0012 Initial import.
HackBot
parents:
diff changeset
141 print map {generate} 1..int($ARGV[0]||1);
e037173e0012 Initial import.
HackBot
parents:
diff changeset
142 print "\n";
e037173e0012 Initial import.
HackBot
parents:
diff changeset
143 return 0;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
144 }
e037173e0012 Initial import.
HackBot
parents:
diff changeset
145
e037173e0012 Initial import.
HackBot
parents:
diff changeset
146 exit main unless caller;
e037173e0012 Initial import.
HackBot
parents:
diff changeset
147 1;