#!/usr/bin/perl -w 

#inherits, via cut and paste, from log_analyzer.pl...

$desired = "census.bin";

sub synccheck {
    if ( /Test run at .*OwenData\/(.*) -groupbits (.*) -memorylimit.*/ ) {
	if ($state != 1) {
	    print "[Note] projection $proj ended early, in state $state\n";
	}

	if ($1 ne $desired) { print STDERR "[Warning] See entry for wrong dataset, $1\n"; $state = 1;}
	else {
	    $proj = $2;
	    $state = 2;  #got sync, want data!
	    return 1;
	}
    }
    return 0;
}



# take care, timeouts can occur anywhere.

$state = 1;  #seeking sync


while (<>) {
    chomp;

    synccheck();
    next if $state == 1; 

    if ($state == 2) {
	if (/Your data cube will have dimensions/) {
	    $_ = <>; chomp;
	    if (/\[info\] (.*)/) {
		$cubeshape{$proj} = $1;  #space separated
	    }
	    else {
		print STDERR "problem with cube dimensions\n"; exit;
	    }
	    $state = 4;
	}
	next;
    }

    if ($state == 4) {
	if ( /^Using sqrt/) { 
	    $state = 5; $size = 0; next;}
	if ( /^Using regular chunks of size (\d+)/) { 
	    $state = 5; $size = $1; next;}
	if ( /Independence sum = (\S+) /) {
	    $IS{"$proj"} = $1;
	    $state = 1; 
	    next;
	}
	next;
    }

    if ($state == 5) {
	# $_ = <>; chomp;
	while (<>) {
	    chomp;
	    if (/^\s*$/) {
		$state = 4; #seeking sqrt or 4 or indep sum
		last;
	    }
	    if ( /\d+=\s*(\d+).*- (.*)/ ) {
		$A{"$size;$proj;$2"} = $1;
	    }
	    else {print STDERR "unexpected line in state 5 = $_\n";}
	}
	next if $state == 4;
	print STDERR "problem in state 5 line '$_'\n";  #eof??
	exit;
    }
}


sub stddev {
    my ($k,$s,$ctr);
    $s = 0;
    $k = 0;
    $ctr=0;
    foreach $v (@_) {$s += $v; $k += $v**2; $ctr++;}
    if ($ctr < 2) {return 0;}  #nonsense
    return sqrt( (1/($ctr-1))* ($k - $s**2/$ctr)); 
}


sub avg {
    my ($s,$ctr);
    $s = 0;
    $ctr=0;
    foreach $v (@_) {$s += $v; $ctr++;}
    return $s / $ctr;
}

$FS = "Slice Sorting (Less-than ordering)";
$DF = "default normalization";

# analysing keys %IS good: see only things that actually completed.

# analysis FS's stddev and mean (and number samples) 
# for various blocksize.  Relative to min(ROLAP,HOLAP}

print "$desired\n";

BLOX:
foreach $blsz (0,2,3,4,8,16) {
    @ratios = (); @goodratios = (); @onechunkOption = ();
    @densities = (); @gooddensities = ();
    @defChvsROLAP = ();  #answer referee query

    foreach $is (keys %IS) {    # as done in log_analyzer.pl also
	next BLOX if not defined $A{"$blsz;$is;$FS"};

	@cbdims = split(' ',$cubeshape{$is});
	$vol = 1;
	foreach $factor (@cbdims) { $vol *= $factor;}

#	print "cube volume $vol\n";

	$num_chunks = 1;
	#compute true chunkshape, number of chunks in dimension
	
	for($i=0; $i < @cbdims; $i++) {
	    if ($blsz == 0) { #code for sqrt
		$chkshp[$i] = int(0.999999+sqrt($cbdims[$i]));
	    }
	    else { $chkshp[$i] = $blsz;}
	    $chkshp[$i] = $cbdims[$i] if $chkshp[$i] > $cbdims[$i];

	    $num_chunks *=  int (0.999999 + $cbdims[$i]/$chkshp[$i]);
	}
	
#	print "$num_chunks chunks\n chunking really @chkshp";

	$pureMOLAP =  $vol;
	$pureROLAP = $A{"1;$is;$FS"} * (int( @cbdims / 2) + 1);
        if ($pureMOLAP < $pureROLAP) {
#	    print "M";
	    $pureMin = $pureMOLAP;
	} else {
#	    print "R";
	    $pureMin = $pureROLAP;
	}

	#Aug 2003, respond to DOLAP referees

	$chCost = $A{"2;$is;$DF"};
	print "blocksize 2 ; default chunking cost $chCost\n";
        push @defChvsROLAP,$chCost / $pureROLAP;

	$HOLAP = $A{"$blsz;$is;$FS"} + $num_chunks;

#	print "HOLAP $HOLAP vs $pureMin\n";

	push @ratios, $HOLAP / $pureMin ;
	push @densities, log($A{"1;$is;$FS"}/$vol);
	
	if ($HOLAP <= $pureMin) {
	    push @goodratios, $HOLAP / $pureMin;
	    push @onechunkOption, $HOLAP / $pureMin;
	    push @gooddensities, log($A{"1;$is;$FS"}/$vol);
	}
	else {
	    push @onechunkOption, 1.0; #no sense expanding, so code as 1 block!
	}
    }


    if (@defChvsROLAP){
	print "just 2-regular chunking (no overhead added) vs ROLAP " ,
	scalar(@defChvsROLAP)," datapts mean = ", avg(@defChvsROLAP),
	" and stddev = ", stddev(@defChvsROLAP),"\n";
    }

	if (@ratios) {
      print "For $blsz ALL datapts = ",scalar(@ratios)," mean = ",avg(@ratios)," and stddev = ", stddev(@ratios);
      print "logdensity mean = ",avg(@densities)," unLOGIFY mean ",exp(avg(@densities)),"\n"; 
  }
   if (@goodratios) {
      print "For $blsz COMPRESS datapts = ",scalar(@goodratios)," mean = ",avg(@goodratios)," and stddev = ", stddev(@goodratios);
      print "logdensity mean = ",avg(@gooddensities)," unLOGIFY mean ",exp(avg(@gooddensities)),"\n"; 
  }
   if (@onechunkOption) {
      print "For $blsz ONE_FALLBACK datapts = ",scalar(@onechunkOption)," mean = ",avg(@onechunkOption)," and stddev = ", stddev(@onechunkOption),"\n";
  }

    print "\n";
}

