# hepburn7.pl
# Attempts to rearrange order of rules until it finds a consistent one
$number_of_trials = 100;
$input_file = "Japanese-ToConvert.txt";
open (INFILE, $input_file) or die "Warning! Can't open input file: $!\n";
$check_file = "Japanese-Converted.txt";
open (CHECKFILE, $check_file) or die "Warning! Can't open check file: $!\n";
$rules_file = "JapaneseRules.txt";
open (RULESFILE, $rules_file) or die "Warning! Can't open rule file: $!\n";
# Read in the file and store each line in the rules array of arrays
while ($line = ) {
chomp($line);
($kunrei, $hepburn) = split("\t", $line);
# Now, place this pair onto the end of the @rules array
push(@rules, [ $kunrei, $hepburn ]);
}
# Now read in the input forms, and store them so we can learn from them
while ($line = ) {
chomp($line);
push (@inputs, $line);
$check_line = ;
chomp($check_line);
push (@answers, $check_line);
}
# The hypothesis space of rule orders is the number of possible permutations
# One way to explore them would be to go about it systematically (trying every
# possible permutation).
# Another possibility is to try permutations out randomly until you hit on one
# that works
# (Neither is optimal, of course-- but given no better options, when would the
# first be sensible, and when might you prefer the second?)
# Here, we'll do the random stabs in the dark approach:
# We want to keep a copy of the start state, so we can keep going back to it
for (my $i = 0; $i <= $#rules; $i++) {
print "keeping original copy of rule $i\n";
push (@original_rules, @rules->[$i]);
}
for ($t = 1; $t <= $number_of_trials; $t++) {
# For each trial, we start at the start state and try solving it again
@rules = undef;
for (my $i = 0; $i <= $#original_rules; $i++) {
push (@rules, @original_rules->[$i]);
}
$iterations = 0;
$number_correct = 0;
while ($number_correct != ($#inputs + 1)) {
$number_correct = 0;
$iterations++;
# Try flipping two rules
$r1 = rand($#rules + 1);
$r2 = rand($#rules + 1);
# The following contains an extra fancy bit of code to round of the number when it's printed.
# Instead of the variables $r1 and $r2, we put a placeholder "%.3f" meaning a floating point
# (decimal) number with three decimal places. Then, after the string, we list the variables
# that should go in those spots (in order)
# printf "Flipping rules %.3f ($rules[$r1][0] -> $rules[$r1][1]) and %.3f ($rules[$r2][0] -> $rules[$r2][1])\n", $r1, $r2;
@rules[$r1, $r2] = @rules[$r2, $r1];
for ($i = 0; $i <= $#inputs; $i++) {
# We'll start with the current input, and transform it
$output = $inputs[$i];
for ($r = 0; $r <= $#rules; $r++) {
$output =~ s/$rules[$r][0]/$rules[$r][1]/g;
}
# Now check answer against the "real" answer in the checkfile
if ($output eq $answers[$i]) {
$number_correct++;
}
}
}
$total_iterations += $iterations;
print "Trial $t took $iterations iterations\n";
}
# Now that we're done, the average iterations is the total over the number of trials
$average_iterations = $total_iterations / $number_of_trials;
printf "\nAfter $number_of_trials trials, the average solution time is %.2f iterations\n", $average_iterations;