# italian.pl # Same as hepburn8.pl, but for italian files # (Modified a bit to handle more complex changes involving backreferences) $number_of_trials = 1; $input_file = "Italian-ToConvert.txt"; open (INFILE, $input_file) or die "Warning! Can't open input file: $!\n"; $check_file = "Italian-Converted.txt"; open (CHECKFILE, $check_file) or die "Warning! Can't open check file: $!\n"; $rules_file = "ItalianRules.txt"; open (RULESFILE, $rules_file) or die "Warning! Can't open rule file: $!\n"; # Read in the file and store each line in the rules array of arrays while ($line = ) { chomp($line); unless ($line =~ /^\s*$/) { # skip lines that are just whitespace ($orthographic, $phonemic) = split("\t", $line); # Now, place this pair onto the end of the @rules array push(@rules, [ $orthographic, $phonemic ]); } } # Now read in the input forms, and store them so we can learn from them while ($line = ) { chomp($line); push (@inputs, $line); $check_line = ; chomp($check_line); push (@answers, $check_line); } # The hypothesis space of rule orders is the number of possible permutations # One way to explore them would be to go about it systematically (trying every # possible permutation). # Another possibility is to try permutations out randomly until you hit on one # that works # (Neither is optimal, of course-- but given no better options, when would the # first be sensible, and when might you prefer the second?) # Here, we'll do the random stabs in the dark approach: # We want to keep a copy of the start state, so we can keep going back to it for (my $i = 0; $i <= $#rules; $i++) { push (@original_rules, @rules->[$i]); } for ($t = 1; $t <= $number_of_trials; $t++) { # For each trial, we start at the start state and try solving it again @rules = undef; for (my $i = 0; $i <= $#original_rules; $i++) { push (@rules, @original_rules->[$i]); } $iterations = 0; $number_correct = 0; while ($number_correct != ($#inputs + 1)) { $number_correct = 0; $iterations++; # Try flipping two rules $r1 = rand($#rules + 1); $r2 = rand($#rules + 1); printf "Iteration $iterations: flipping rules %.3f ($rules[$r1][0] -> $rules[$r1][1]) and %.3f ($rules[$r2][0] -> $rules[$r2][1])\n", $r1, $r2; @rules[$r1, $r2] = @rules[$r2, $r1]; for ($i = 0; $i <= $#inputs; $i++) { # We'll start with the current input, and transform it $output = $inputs[$i]; for ($r = 0; $r <= $#rules; $r++) { # Nab the backreferences if ($output =~ /$rules[$r][0]/) { @backrefs = ($output =~ /$rules[$r][0]/); } # Now do the replacement, putting in the "dummy" backref $1, etc. $output =~ s/$rules[$r][0]/$rules[$r][1]/g; while ($output =~ /\$(\d+)/) { $replacement = $backrefs['$1']; $output =~ s/\$$1/$replacement/; } } # Now check answer against the "real" answer in the checkfile if ($output eq $answers[$i]) { $number_correct++; } else { print "still incorrect on form $i\n"; } } } $total_iterations += $iterations; print "Trial $t took $iterations iterations\n"; } # Now that we're done, the average iterations is the total over the number of trials $average_iterations = $total_iterations / $number_of_trials; printf "\nAfter $number_of_trials trials, the average solution time is %.2f iterations\n", $average_iterations;