#!/usr/athena/bin/perl -w use Getopt::Std; my %opts; getopts( 'bhmk', \%opts ); if( defined $opts{'b'} ) { $brill = 1; } elsif( defined $opts{'h'} ) { $brill = 0; } else { &print_usage; } my $print_matrix = defined $opts{'m'}; my $compute_kappa = defined $opts{'k'}; if( ! $print_matrix && ! $compute_kappa ) { &print_usage; } # Get the parameters if( $#ARGV < 1 ) { &print_usage; } my $tagger_output = $ARGV[0]; my $gold_standard = $ARGV[1]; open TAGGER, $tagger_output or die "Couldn't open file $tagger_output: $!\n"; open GOLD, $gold_standard or die "Couldn't open file $gold_standard: $!\n"; my %confusion; my %tags; my $num_errors = 0; my $num_correct = 0; while( 1 ) { # Read the next line from both files. # If it's the HMM tagger, skip blank lines. my $tagger_line = ; while( defined $tagger_line && $tagger_line =~ /^\s*$/ ) { $tagger_line = ; } # Skip blank lines, headers, and dividers in the Treebank file. my $gold_line = ; while( defined $gold_line && ( $gold_line =~ /^\s*$/ || $gold_line =~ /=+/ || $gold_line =~ /\*x\*/ )) { $gold_line = ; } last unless defined $tagger_line && defined $gold_line; # Strip chunking from the gold standard text $gold_line =~ tr/[]//d; # Sometimes the HMM tagger does weird things with tags -- uses # stuff instead of the normal syntax. if( ! $brill ) { $tagger_line =~ s/<\/?S>//g; $tagger_line =~ tr/[]//d; $tagger_line =~ s/\(\(//g; $tagger_line =~ s/\)\)//g; $tagger_line =~ s/]*>([^<]*)<\/W>/$2_$1/g; } # Tokenize them both my @tagger_words = split ' ', $tagger_line; my @gold_words = split ' ', $gold_line; # ... should be the same length; if not, we're sort of screwed. if( $#tagger_words != $#gold_words ) { print STDERR "WARNING: different number of words on lines.\n"; print STDERR $tagger_line; print STDERR $gold_line; } for( my $i = 0; $i < $#tagger_words && $i < $#gold_words; $i++ ) { my( $tw, $tt ); if( $brill ) { ( $tw, $tt ) = split /\//, $tagger_words[$i]; } else { ( $tw, $tt ) = split /_/, $tagger_words[$i]; } my( $gw, $gt ) = split /\//, $gold_words[$i]; $confusion{$gt}{$tt}++; $tags{$gt} = 1; $tags{$tt} = 1; if( $gt ne $tt ) { $num_errors++; } else { $num_correct++; } } } # If they want it, print the confusion matrix if( $print_matrix ) { print ' |'; foreach my $key ( sort keys %tags ) { printf "%-4s|", $key; } print "\n"; foreach my $key1 ( sort keys %tags ) { printf "%-4s|", $key1; foreach my $key2 ( sort keys %tags ) { if( $key1 eq $key2 ) { print '- |'; } elsif( defined $confusion{$key1}{$key2} ) { printf "%4.1f|", ( 100 * $confusion{$key1}{$key2} / $num_errors ); } else { print ' |'; } } print "\n"; } } # If they want it, compute and print the kappa score if( $compute_kappa ) { print "\n" if $print_matrix; my $total_trials = $num_correct + $num_errors; # Percent agreement my $p_a = $num_correct / $total_trials; printf "P(A): %.4f\n", $p_a; # For each tag # - Sum values across that row -- how many times first tagger chose it # - Sum values across that column -- # times second tagger chose it # - Divide each by total # trials to get % of times each tagger chose # that tag # - Multiply the values together to get expected agreement on that tag. # Then total expected agreement is the sum of all these individual # agreements. my $p_e; foreach my $key1 ( keys %tags ) { my $row_sum = 0; my $column_sum = 0; foreach my $key2 ( keys %tags ) { $row_sum += ( $confusion{$key1}{$key2} || 0 ); $column_sum += ( $confusion{$key2}{$key1} || 0 ); } my $this_p_e = ( $row_sum / $total_trials ) * ( $column_sum / $total_trials ); $p_e += $this_p_e; } printf "P(E): %.4f\n", $p_e; my $kappa_val = ( $p_a - $p_e ) / ( 1 - $p_e ); printf "Kappa: %.4f\n", $kappa_val; } sub print_usage { print "\nUsage: $0 (-b|-h) [-m] [-k] tagger-output gold-standard\n\n"; print "\ttagger-output: The file contaning the output of the tagger\n"; print "\tgold-standard: The \"gold standard\" tagged file\n"; print "\tYou must specify both of these files\n\n"; print "\t-b: Process the output of the Brill tagger\n"; print "\t-h: Process the output of the LTG HMM tagger\n"; print "\tYou must specify either -b or -h\n\n"; print "\t-m: Print the confusion matrix\n"; print "\t-k: Compute kappa\n"; print "\tYou must specify at least one of -m and -k\n\n"; exit; }