#!/usr/bin/perl

# This tool takes a profile with a set of operations, and displays areas of
# profiles with similar numbers of operations.

sub usage {
	print "usage: compare_peaks.pl -f <file> -v <var> -i <ignore> -verbose -h\n";
	print "-f:\tfile containing the set of profiles\n";
	print "-v:\tpercent variation to allow in comparisons\n";
	print "-i:\tignore groups with fewer than this many elements\n";
	print "-verbose: print verbose information\n";
	print "-h: print this help menu and exit\n";
	exit 0;
}

sub max {
	my $m = shift @_;
	foreach (@_) {
		if ($_ > $m) {
			$m = $_;
		}
	}
	return $m;
}

sub min {
	my $m = shift @_;
	foreach (@_) {
		if ($_ < $m) {
			$m = $_;
		}
	}
	return $m;
}

sub getgroups {
	my $i, $numgroups, $start, $end, $peak, $increase, $grouptot;

	$start = 0;
	$numgroups = 0;
	for ($i = 3; $i <= $#_; $i++) {
		if (!$start) {
			if ($_[$i]) {
				$start = $i - 2;
				$peak = $_[$i];
				$increase = 1;
				$grouptot = $_[$i];
			}
		} else {
			if (!$_[$i]) {		#if we just finished a group (no ops in bucket)
				$numgroups++;
				$end = $i - 3;
				$resarray[$numops][0] = $numgroups;
				$resarray[$numops][$numgroups] = {};
				$resarray[$numops][$numgroups]->{START} = $start;
				$resarray[$numops][$numgroups]->{END} = $end;
				$resarray[$numops][$numgroups]->{GROUPTOT} = $grouptot;
				if ($verbose) {
					print "group: $start -- $end (peak: $peak, tot: $grouptot)\n";
				}
				$start = 0;
			} elsif ($increase) {	#check if we are still increasing
				if ($_[$i] > $peak) {
					$peak = $_[$i];
				} elsif ((length($_[$i]) > 1) && (length($_[$i]) < length($peak))) {
					$increase = 0;
				}
				$grouptot += $_[$i];
			} else {		#we are decreasing and might need to end the group
				if ((length($_[$i]) > length($_[$i - 1])) && ((length($_[$i - 1]) > 1) || (length($_[$i] > 2)))) {
					$increase = 1;
					$end = $i - 3;
					$numgroups++;
					$resarray[$numops][0] = $numgroups;
					$resarray[$numops][$numgroups] = {};
					$resarray[$numops][$numgroups]->{START} = $start;
					$resarray[$numops][$numgroups]->{END} = $end;
					$resarray[$numops][$numgroups]->{GROUPTOT} = $grouptot;
					if ($verbose) {
						print "group: $start -- $end (peak: $peak, tot: $grouptot)\n";
					}
					$start = $i - 2;
					$peak = $_[$i];
					$grouptot = $_[$i];
				} else {
					$grouptot += $_[$i];
				}
			}
		}
	}

	#we may have to end a group at the end of the line
	if ($start) {
		$end = $i - 3;
		$numgroups++;
		$resarray[$numops][0] = $numgroups;
		$resarray[$numops][$numgroups]->{START} = $start;
		$resarray[$numops][$numgroups]->{END} = $end;
		$resarray[$numops][$numgroups]->{GROUPTOT} = $grouptot;
		if ($verbose) {
			print "group: $start -- $end (peak: $peak, tot: $grouptot)\n";
		}
	}

	#now lets expand add more groups which are combinations of adjacent groups
	#i = number of groups to add
	#j = group that starts the combo
	for ($i = 1; $i < $numgroups; $i++)
	{
		for ($j = 1; $j <= ($numgroups - $i); $j++)
		{
			$resarray[$numops][0]++;
			$resarray[$numops][$resarray[$numops][0]]->{START} = $resarray[$numops][$j]->{START};
			$resarray[$numops][$resarray[$numops][0]]->{END} = $resarray[$numops][$j+$i]->{END};
			$resarray[$numops][$resarray[$numops][0]]->{GROUPTOT} = 0;
			for ($k = 0; $k <= $i; $k++)
			{
				$resarray[$numops][$resarray[$numops][0]]->{GROUPTOT} += $resarray[$numops][$j+$k]->{GROUPTOT};
			}
		}
	}
}

##############################################################################
# main
##############################################################################

use Switch;

$vset = 0;
for ($argnum = 0; $argnum <= $#ARGV; $argnum++) { 
	switch ($ARGV[$argnum]) {
		case "-f"	{ $filename=$ARGV[++$argnum]; }
		case "-v"	{ $variation=$ARGV[++$argnum]; $vset = 1; }
		case "-verbose"	{ $verbose = 1; }
		case "-h"	{ &usage; }
		else		{ print "unknown arg: $ARGV[$argnum]\n"; &usage; }
	}
}

if ((!$filename) || (!$vset)) {
	&usage;
}

if ($verbose) {
	print "file: $filename\n";
}

open INFILE, "<$filename" or die "Cannot open file: $!";

$numops = 0;
while (<INFILE>) {
	chomp();
	if (/(^OP_[\w]+) /) {
		$op = $1;
		@data = split / /, $_;

		if ($verbose) {
			print "\ndata: @data\n";
		}

		@resarray[$numops] = ();
		&getgroups(@data);
		$oparray[$numops] = $op;
		$numops++;
	}
}
close INFILE;

if ($verbose)
{
	print "\ngroups:\n";
	for ($i = 0 ; $i < $numops ; $i++)
	{
		print "$oparray[$i], $resarray[$i][0]\n";
		for ($j = 1; $j <= $resarray[$i][0]; $j++)
		{
			print "group $j: [$resarray[$i][$j]->{START}, $resarray[$i][$j]->{END}, $resarray[$i][$j]->{GROUPTOT}]\n";
		}
	}
	print "\n";
}

#i: first op to compare
#j: second op to compare
#k: group of first op
#l: group of second op
for ($i = 0; $i < ($numops - 1); $i++)
{
	for ($j = $i + 1; $j < $numops; $j++)
	{
		for ($k = 1; $k <= $resarray[$i][0]; $k++)
		{
			$bestmatch = {};
			$bestperc = 0;
			for ($l = 1; $l <= $resarray[$j][0]; $l++)
			{
				$tot1 = $resarray[$i][$k]->{GROUPTOT};
				$tot2 = $resarray[$j][$l]->{GROUPTOT};
				if (min($tot1,$tot2) < $MINGRPSIZE)
				{
					next;
				}
				$percentage = $tot2 * ($variation/100);
				$lowerbound = $tot2 - $percentage;
				$upperbound = $tot2 + $percentage;
				if (($tot1 >= $lowerbound) && ($tot1 <= $upperbound))
				{
					$currperc = (min($tot1,$tot2) / max($tot1,$tot2));
					if ($currperc > $bestperc) {
						$bestmatch->{START1} = $resarray[$i][$k]->{START};
						$bestmatch->{END1}   = $resarray[$i][$k]->{END};
						$bestmatch->{START2} = $resarray[$j][$l]->{START};
						$bestmatch->{END2}   = $resarray[$j][$l]->{END};
						$besttot1 = $tot1;
						$besttot2 = $tot2;
						$bestperc = $currperc;
					}
				}
			}
			if ($bestperc > 0)
			{
				$bestperc *= 100;
				print "Buckets $bestmatch->{START1}--$bestmatch->{END1} of $oparray[$i] match buckets $bestmatch->{START2}--$bestmatch->{END2} of $oparray[$j] with $bestperc% ($besttot1,$besttot2 operations)\n";
			}
		}
	}
}

