# italian2.pl # Same as hepburn8.pl, but for italian files # (Modified a bit to handle more complex changes involving backreferences) $number_of_trials = 100; $input_file = "Italian-ToConvert.txt"; open (INFILE, $input_file) or die "Warning! Can't open input file: $!\n"; $check_file = "Italian-Converted.txt"; open (CHECKFILE, $check_file) or die "Warning! Can't open check file: $!\n"; $rules_file = "ItalianRules.txt"; open (RULESFILE, $rules_file) or die "Warning! Can't open rule file: $!\n"; open (LOGFILE, ">italian2.log") or die "Warning! Can't create log file: $!\n"; # Read in the file and store each line in the rules array of arrays while ($line = ) { chomp($line); unless ($line =~ /^\s*$/) { # skip lines that are just whitespace ($orthographic, $phonemic) = split("\t", $line); # Now, place this pair onto the end of the @rules array push (@rules, [ $orthographic, $phonemic ]); } } # Now read in the input forms, and store them so we can learn from them while ($line = ) { chomp($line); push (@inputs, $line); $check_line = ; chomp($check_line); push (@answers, $check_line); } # The hypothesis space of rule orders is the number of possible permutations # One way to explore them would be to go about it systematically (trying every # possible permutation). # Another possibility is to try permutations out randomly until you hit on one # that works # (Neither is optimal, of course-- but given no better options, when would the # first be sensible, and when might you prefer the second?) # Here, we'll do the random stabs in the dark approach: # We want to keep a copy of the start state, so we can keep going back to it for (my $i = 0; $i <= $#rules; $i++) { push (@original_rules, @rules->[$i]); } for ($t = 1; $t <= $number_of_trials; $t++) { # For each trial, we start at the start state and try solving it again @rules = (); for (my $i = 0; $i <= $#original_rules; $i++) { push (@rules, @original_rules->[$i]); } # print "Starting rules:\n"; # for ($i=0; $i <= $#rules; $i++) { # print "\t$i\t$rules[$i][0] -> $rules[$i][1]\n"; # } $iterations = 0; $number_correct = 0; while ($number_correct != ($#inputs + 1)) { $number_correct = 0; $iterations++; # Try flipping two rules $r1 = rand($#rules ); $r2 = rand($#rules ); printf "Iteration $iterations: flipping rules %.3f ($rules[$r1][0] -> $rules[$r1][1]) and %.3f ($rules[$r2][0] -> $rules[$r2][1])\n", $r1, $r2; @rules[$r1, $r2] = @rules[$r2, $r1]; enforce_elsewhere_condition(); for ($i = 0; $i <= $#inputs; $i++) { # We'll start with the current input, and transform it $output = $inputs[$i]; for ($r = 0; $r <= $#rules; $r++) { # Nab the backreferences if ($output =~ /$rules[$r][0]/) { @backrefs = ($output =~ /$rules[$r][0]/); } # Now do the replacement, putting in the "dummy" backref $1, etc. $output =~ s/$rules[$r][0]/$rules[$r][1]/g; while ($output =~ /\$(\d+)/) { $replacement = @backrefs['$1']; $output =~ s/\$$1/$replacement/; } } # Now check answer against the "real" answer in the checkfile if ($output eq $answers[$i]) { $number_correct++; } else { # print "still incorrect on form $i\n"; } } } $total_iterations += $iterations; print "Trial $t took $iterations iterations\n"; printf LOGFILE "Trial $t took $iterations iterations\n"; } # Now that we're done, the average iterations is the total over the number of trials $average_iterations = $total_iterations / $number_of_trials; printf "\nAfter $number_of_trials trials, the average solution time is %.2f iterations\n", $average_iterations; sub enforce_elsewhere_condition { # The idea here is to make sure that more specific rules always come # before more general rules (Kiparsky 1982's Elsewhere Condition) # Let's enforce this by starting at the end, and whenever a constraint # at the end is found to be more specific than a preceding constraint, # we move it up (so it "budges" before its more general counterpart) for ($r1 = $#rules; $r1 >= 0; $r1--) { # The structural description we're interested in is $rules[$r][0] for ($r2 = ($r1 - 1); $r2 > 0; $r2--) { #print "[$r1, $r2] "; # There is a specific/general relation if $r1's SD contains $r2's if ($rules[$r1][0] ne "" and $rules[$r2][0] ne "" and $rules[$r1][0] =~ /\Q$rules[$r2][0]\E/) { # We need to move r1 to just before r2 print "Putting $r1: $rules[$r1][0] -> $rules[$r1][1] before $r2: $rules[$r2][0] -> $rules[$r2][1]\n"; @new_rules = @rules[0..$r2-1,$r1,$r2..$r1-1,$r1+1..$#rules]; @rules = @new_rules; # Finally, get rid of the old copy of r1 # Now everything has shifted up one, so increment $r1 and $r2 $r1++; $r2++; # print "Result of elsewhere condition enforcement:\n"; # for ($i=0; $i <= $#rules; $i++) { # print "\t$i\t$rules[$i][0] -> $rules[$i][1]\n"; # } } } } }