# hepburn7.pl # Attempts to rearrange order of rules until it finds a consistent one $number_of_trials = 100; $input_file = "Japanese-ToConvert.txt"; open (INFILE, $input_file) or die "Warning! Can't open input file: $!\n"; $check_file = "Japanese-Converted.txt"; open (CHECKFILE, $check_file) or die "Warning! Can't open check file: $!\n"; $rules_file = "JapaneseRules.txt"; open (RULESFILE, $rules_file) or die "Warning! Can't open rule file: $!\n"; # Read in the file and store each line in the rules array of arrays while ($line = ) { chomp($line); ($kunrei, $hepburn) = split("\t", $line); # Now, place this pair onto the end of the @rules array push(@rules, [ $kunrei, $hepburn ]); } # Now read in the input forms, and store them so we can learn from them while ($line = ) { chomp($line); push (@inputs, $line); $check_line = ; chomp($check_line); push (@answers, $check_line); } # The hypothesis space of rule orders is the number of possible permutations # One way to explore them would be to go about it systematically (trying every # possible permutation). # Another possibility is to try permutations out randomly until you hit on one # that works # (Neither is optimal, of course-- but given no better options, when would the # first be sensible, and when might you prefer the second?) # Here, we'll do the random stabs in the dark approach: # We want to keep a copy of the start state, so we can keep going back to it for (my $i = 0; $i <= $#rules; $i++) { print "keeping original copy of rule $i\n"; push (@original_rules, @rules->[$i]); } for ($t = 1; $t <= $number_of_trials; $t++) { # For each trial, we start at the start state and try solving it again @rules = undef; for (my $i = 0; $i <= $#original_rules; $i++) { push (@rules, @original_rules->[$i]); } $iterations = 0; $number_correct = 0; while ($number_correct != ($#inputs + 1)) { $number_correct = 0; $iterations++; # Try flipping two rules $r1 = rand($#rules + 1); $r2 = rand($#rules + 1); # The following contains an extra fancy bit of code to round of the number when it's printed. # Instead of the variables $r1 and $r2, we put a placeholder "%.3f" meaning a floating point # (decimal) number with three decimal places. Then, after the string, we list the variables # that should go in those spots (in order) # printf "Flipping rules %.3f ($rules[$r1][0] -> $rules[$r1][1]) and %.3f ($rules[$r2][0] -> $rules[$r2][1])\n", $r1, $r2; @rules[$r1, $r2] = @rules[$r2, $r1]; for ($i = 0; $i <= $#inputs; $i++) { # We'll start with the current input, and transform it $output = $inputs[$i]; for ($r = 0; $r <= $#rules; $r++) { $output =~ s/$rules[$r][0]/$rules[$r][1]/g; } # Now check answer against the "real" answer in the checkfile if ($output eq $answers[$i]) { $number_correct++; } } } $total_iterations += $iterations; print "Trial $t took $iterations iterations\n"; } # Now that we're done, the average iterations is the total over the number of trials $average_iterations = $total_iterations / $number_of_trials; printf "\nAfter $number_of_trials trials, the average solution time is %.2f iterations\n", $average_iterations;