# hepburn7.pl # Attempts to rearrange order of rules until it finds a consistent one \$number_of_trials = 100; \$input_file = "Japanese-ToConvert.txt"; open (INFILE, \$input_file) or die "Warning! Can't open input file: \$!\n"; \$check_file = "Japanese-Converted.txt"; open (CHECKFILE, \$check_file) or die "Warning! Can't open check file: \$!\n"; \$rules_file = "JapaneseRules.txt"; open (RULESFILE, \$rules_file) or die "Warning! Can't open rule file: \$!\n"; # Read in the file and store each line in the rules array of arrays while (\$line = ) { chomp(\$line); (\$kunrei, \$hepburn) = split("\t", \$line); # Now, place this pair onto the end of the @rules array push(@rules, [ \$kunrei, \$hepburn ]); } # Now read in the input forms, and store them so we can learn from them while (\$line = ) { chomp(\$line); push (@inputs, \$line); \$check_line = ; chomp(\$check_line); push (@answers, \$check_line); } # The hypothesis space of rule orders is the number of possible permutations # One way to explore them would be to go about it systematically (trying every # possible permutation). # Another possibility is to try permutations out randomly until you hit on one # that works # (Neither is optimal, of course-- but given no better options, when would the # first be sensible, and when might you prefer the second?) # Here, we'll do the random stabs in the dark approach: # We want to keep a copy of the start state, so we can keep going back to it for (my \$i = 0; \$i <= \$#rules; \$i++) { print "keeping original copy of rule \$i\n"; push (@original_rules, @rules->[\$i]); } for (\$t = 1; \$t <= \$number_of_trials; \$t++) { # For each trial, we start at the start state and try solving it again @rules = undef; for (my \$i = 0; \$i <= \$#original_rules; \$i++) { push (@rules, @original_rules->[\$i]); } \$iterations = 0; \$number_correct = 0; while (\$number_correct != (\$#inputs + 1)) { \$number_correct = 0; \$iterations++; # Try flipping two rules \$r1 = rand(\$#rules + 1); \$r2 = rand(\$#rules + 1); # The following contains an extra fancy bit of code to round of the number when it's printed. # Instead of the variables \$r1 and \$r2, we put a placeholder "%.3f" meaning a floating point # (decimal) number with three decimal places. Then, after the string, we list the variables # that should go in those spots (in order) # printf "Flipping rules %.3f (\$rules[\$r1][0] -> \$rules[\$r1][1]) and %.3f (\$rules[\$r2][0] -> \$rules[\$r2][1])\n", \$r1, \$r2; @rules[\$r1, \$r2] = @rules[\$r2, \$r1]; for (\$i = 0; \$i <= \$#inputs; \$i++) { # We'll start with the current input, and transform it \$output = \$inputs[\$i]; for (\$r = 0; \$r <= \$#rules; \$r++) { \$output =~ s/\$rules[\$r][0]/\$rules[\$r][1]/g; } # Now check answer against the "real" answer in the checkfile if (\$output eq \$answers[\$i]) { \$number_correct++; } } } \$total_iterations += \$iterations; print "Trial \$t took \$iterations iterations\n"; } # Now that we're done, the average iterations is the total over the number of trials \$average_iterations = \$total_iterations / \$number_of_trials; printf "\nAfter \$number_of_trials trials, the average solution time is %.2f iterations\n", \$average_iterations;