### Script for Approximate Randomization Test of F-measure ### Copyright (C) 2008-2010 Hiroya Takamura, All rights reserved. ### usage: echo "4 13 14 6 40 8 3" | perl randtest_fm.pl # input format # First, calculate the number t[i] of instances in each case i. # "yes" means "relevant to the target class". # "no" means "irrelevant to the target class". # e.g., t[2] indicates the number of instances for which the baseline # says "irrelevant", the proposed method says "relevant" # and the correct answer is "relevant". #-------------------------------------------- #case number & baseline & proposed & answer #-------------------------------------------- #0 & yes & no & yes #1 & yes & no & no #2 & no & yes & yes #3 & no & yes & no #4 & yes & yes & yes #5 & yes & yes & no #6 & no & no & yes #7 & no & no & no # # The input format is: t[0] t[1] t[2] t[3] t[4] t[5] t[6] # e.g., echo "4 13 14 6 40 8 3" | perl randtest_fm.pl # # Note that t[7] is not used. # # Note also that we use the binomial distribution for random sampling, # but we have no computational advantage from it, because our binomial # sampling routine is very naive. By replacing it with a faster method, # we can have computational advantage. # $max_iter = 10000; my @t = split(" ",); $p1 = ($t[0]+$t[4])/($t[0]+$t[1]+$t[4]+$t[5]); $r1 = ($t[0]+$t[4])/($t[0]+$t[2]+$t[4]+$t[6]); $p2 = ($t[2]+$t[4])/($t[2]+$t[3]+$t[4]+$t[5]); $r2 = ($t[2]+$t[4])/($t[0]+$t[2]+$t[4]+$t[6]); $f1 = 2*$p1*$r1/($p1+$r1); $f2 = 2*$p2*$r2/($p2+$r2); $delta0 = abs($f1-$f2); print "The null hypothesis is that the baseline and the proposed method do not differ in F-measure.\n"; print "F-measure for baseline: ".$f1."\n"; print "F-measure for proposed: ".$f2."\n"; print "The number of shuffles: ".$max_iter."\n"; for($iter=0;$iter<$max_iter;$iter++){ $k0 = &binomial(0.5,$t[0]); $k1 = &binomial(0.5,$t[1]); $k2 = &binomial(0.5,$t[2]); $k3 = &binomial(0.5,$t[3]); $p1 = ($t[0]+$t[4]-$k0+$k2)/($t[0]+$t[1]+$t[4]+$t[5]-$k0-$k1+$k2+$k3); $r1 = ($t[0]+$t[4]-$k0+$k2)/($t[0]+$t[2]+$t[4]+$t[6]); $p2 = ($t[2]+$t[4]-$k2+$k0)/($t[2]+$t[3]+$t[4]+$t[5]-$k2-$k3+$k0+$k1); $r2 = ($t[2]+$t[4]-$k2+$k0)/($t[0]+$t[2]+$t[4]+$t[6]); $f1 = 2*$p1*$r1/($p1+$r1); $f2 = 2*$p2*$r2/($p2+$r2); my $delta = abs($f1-$f2); if($delta>=$delta0){$count++;} } $pvalue = ($count+1)/($max_iter+1); print "p-value: ".$pvalue."\n"; if($pvalue <= 0.05){ print "The difference is statistically significant with the significance level 0.05.\n"; } if($pvalue <= 0.03){ print "The difference is statistically significant with the significance level 0.03.\n"; } if($pvalue <= 0.01){ print "The difference is statistically significant with the significance level 0.01.\n"; } sub binomial{ my ($p,$N) = @_; my $n = 0; for(my $i=0;$i<$N;$i++){ my $u = rand(); if($u<$p){$n++;} } return $n; }