# italian.pl
# Same as hepburn8.pl, but for italian files
# (Modified a bit to handle more complex changes involving backreferences)
$number_of_trials = 1;
$input_file = "Italian-ToConvert.txt";
open (INFILE, $input_file) or die "Warning! Can't open input file: $!\n";
$check_file = "Italian-Converted.txt";
open (CHECKFILE, $check_file) or die "Warning! Can't open check file: $!\n";
$rules_file = "ItalianRules.txt";
open (RULESFILE, $rules_file) or die "Warning! Can't open rule file: $!\n";
open (LOGFILE, ">italian3.log") or die "Warning! Can't create log file: $!\n";
# Read in the file and store each line in the rules array of arrays
while ($line = ) {
chomp($line);
unless ($line =~ /^\s*$/) { # skip lines that are just whitespace
($orthographic, $phonemic, @rest) = split("\t", $line);
# Now, place this pair onto the end of the @rules array
push(@rules, [ $orthographic, $phonemic ]);
}
}
$number_of_rules = $#rules;
# Now read in the input forms, and store them so we can learn from them
while ($line = ) {
chomp($line);
push (@inputs, $line);
$check_line = ;
chomp($check_line);
push (@answers, $check_line);
}
for ($t = 1; $t <= $number_of_trials; $t++) {
# For each trial, we start at the start state and try solving it again
# Rather than manipulating the rules array directly, we'll just keep track of
# the currently hypothesized order, using an "indexing array"
# We start by assuming that we'll use the rules in the given order
# (that is, the initial ordering is 1, 2, 3, 4, ...)
for (my $i = 0; $i <= $number_of_rules; $i++) {
$ordering[$i] = $i;
}
$known_orderings = undef;
$explored_orderings = undef;
# Start by checking the accuracy of the grammar using the initial ordering:
$current_number_correct = check_accuracy(@ordering);
$iterations = 0;
while ($current_number_correct != ($#inputs + 1)) {
$iterations++;
# We try to improve performance by swapping rules that might feed or bleed each other
# There are various ways to explore the space of possible orderings,
# but one way is to start at the end of the grammar and try to "promote" rules
# by moving them up in the grammar
$swap1 = -1;
$swap2 = -1;
FINDSWAPPAIR:
for (my $r1 = $#rules; $r1 >= 1; $r1--) {
for (my $r2 = $r1 - 1; $r2 >= 0; $r2--) {
# Check if we already know that r2 crucially precedes r1
next if ($known_orderings =~ /$ordering[$r2]<$ordering[$r1]/ or
$explored_orderings =~ /$ordering[$r2]<$ordering[$r1]/ or
$explored_orderings =~ /$ordering[$r1]<$ordering[$r2]/
);
# Now check if moving r1 before r2 could possibly change things
# There are three possibilities:
# (1) r1 and r2 have overlapping structural descriptions (bleeding/counterbleeding)
# (2) struc desc of r1 overlaps with output of r2 (currently feeding, try counterfeeding)
# (3) struc desc of r2 overlaps with output of r1 (currently counterfeeding, try feeding)
# The struc descs are: $rules[$ordering[$r1]][0] and $rules[$ordering[$r2]][0]
# "Overlapping" means that at least one of their "terms" could match the same segment
# (a "term" in this simplified conception is an expression that refers to a single segment)
if (overlap($rules[$ordering[$r1]][0], $rules[$ordering[$r2]][0])) {
print "Rules $ordering[$r1] and $ordering[$r2] have overlapping struc descs (potential for bleeding/counterbleeding)\n";
print "\t\t[$rules[$ordering[$r1]][0]] and [$rules[$ordering[$r2]][0]]\n";
($swap1, $swap2) = ($r1, $r2);
} elsif (overlap($rules[$ordering[$r2]][0], $rules[$ordering[$r1]][1])) {
print "Rule $ordering[$r2] operates on the output of $ordering[$r1] (currently counterfeeding; trying feeding)\n";
print "\t\t[$rules[$ordering[$r2]][0]] and [$rules[$ordering[$r1]][1]]\n";
($swap1, $swap2) = ($r1, $r2);
} elsif (overlap($rules[$ordering[$r1]][0], $rules[$ordering[$r2]][1])) {
print "Rule $ordering[$r1] operates on the output of $ordering[$r2] (currently feeding; trying counterfeeding)\n";
print "\t\t[$rules[$ordering[$r1]][0]] and [$rules[$ordering[$r2]][1]]\n";
($swap1, $swap2) = ($r1, $r2);
}
if ($swap1 >= 0) {
last FINDSWAPPAIR;
}
}
}
# If we got through the FINDSWAPPAIR block and didn't find any eligible swaps, then we're stuck
if ($swap1 == -1) {
print "****It appears that there is no ordering of these rules that will correctly derive the data****\n";
check_accuracy(@ordering);
exit;
} else { # Try this swap
print "\tAttempting to move rule $ordering[$swap1] before $ordering[$swap2]\n";
@proposed_ordering = (@ordering[0..$swap2-1],$ordering[$swap1],@ordering[$swap2..$swap1-1],@ordering[$swap1+1..$number_of_rules]);
print "Proposed ordering: @proposed_ordering\n";
$new_number_correct = check_accuracy(@proposed_ordering);
if ($new_number_correct > $current_number_correct) {
# Aha, this helped! A new crucial ordering
# Save the new order
@ordering = @proposed_ordering;
$current_number_correct = $new_number_correct;
$known_orderings .= "$ordering[$swap1]<$ordering[$swap2] ";
$explored_orderings = undef;
print "\tNew order is better: rule $proposed_ordering[$swap1] must come before rule $proposed_ordering[$swap2]\n";
print "\t\t$rules[$ordering[$swap1]][0] -> $rules[$ordering[$swap1]][1] PRECEDES ";
print "$rules[$ordering[$swap2]][0] -> $rules[$ordering[$swap2]][1]\n";
print "\tKnown orderings: $known_orderings\n";
print "\tCurrent order: @ordering\n";
} elsif ($new_number_correct < $current_number_correct) {
# A detrimental move; make sure we don't do it again (at least until something else has changed)
$explored_orderings .= "$ordering[$swap2]<$ordering[$swap1] ";
print "\tNew order is worse: rule $ordering[$swap2] probably comes before rule $ordering[$swap1]\n";
}
# It's also possible that the move made no difference, and we shouldn't try it again.
# However, I think it may also be possible that something which seems not to make a difference at
# the moment might turn out to be crucial; So, we should keep track of the irrelevant_orderings at the moment,
# but keep in mind that if we change any other ordering, then we might as well re-check orderings
# that were previously irrelevant (by forgetting that they were irrelevant)
else {
print "\tNew order makes no difference; leaving things as they were\n\n";
$explored_orderings .= "$ordering[$swap2]<$ordering[$swap1] ";
}
}
}
$total_iterations += $iterations;
print "Trial $t took $iterations iterations\n";
printf LOGFILE "Trial $t took $iterations iterations\n";
}
# Now that we're done, the average iterations is the total over the number of trials
$average_iterations = $total_iterations / $number_of_trials;
printf "\nAfter $number_of_trials trials, the average solution time is %.2f iterations\n", $average_iterations;
sub check_accuracy {
my @check_ordering = @_;
my $correct = 0;
for ($i = 0; $i <= $#inputs; $i++) {
# We'll start with the current input, and transform it
$output = $inputs[$i];
for ($r = 0; $r <= $number_of_rules; $r++) {
# Nab the backreferences
if ($output =~ /$rules[$check_ordering[$r]][0]/) {
@backrefs = ($output =~ /$rules[$check_ordering[$r]][0]/);
}
# Now do the replacement, putting in the "dummy" backref $1, etc.
$output =~ s/$rules[$check_ordering[$r]][0]/$rules[$check_ordering[$r]][1]/g;
while ($output =~ /\$(\d+)/) {
$replacement = $backrefs['$1'];
$output =~ s/\$$1/$replacement/;
}
}
# Now check answer against the "real" answer in the checkfile
if ($output eq $answers[$i]) {
$correct++;
} else {
print "\tIncorrect on form $i ($inputs[$i]: deriving [$output] instead of [$answers[$i]])\n";
}
}
return $correct;
}
sub overlap {
my $string1 = @_[0];
my $string2 = @_[1];
if (length($string1) > length($string2) ) {
return ($string1 =~ /\Q$string2\E/);
} else {
return ($string2 =~ /\Q$string1\E/);
}
}