/**
* (c) National Research Council of Canada, 2002-2003 by Daniel Lemire, Ph.D.
* Email lemire at ondelette dot com for support and details.
*/
/**
 *  This program is free software; you can
 *  redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation (version 2). This
 *  program is distributed in the hope that it will be useful, but WITHOUT ANY
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 *  FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 *  details. You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software Foundation,
 *  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
package cofi.benchmark;

import gnu.trove.*;
import java.io.*;
import java.util.*;
import cofi.algorithms.*;
import cofi.algorithms.basic.*;
import cofi.algorithms.stin.*;
import cofi.algorithms.memorybased.*;
import cofi.algorithms.linear.*;
import cofi.algorithms.jester.*;
import cofi.algorithms.util.*;
import cofi.data.*;

/**
 *  A class used for the benchmarking/performance eval. of CFS.
 *
 * $Id: Benchmark.java,v 1.22 2003/11/09 23:48:34 lemired Exp $
 * $Date: 2003/11/09 23:48:34 $
 * $Author: lemired $
 * $Revision: 1.22 $
 * $Log: Benchmark.java,v $
 * Revision 1.22  2003/11/09 23:48:34  lemired
 * progres with Anna
 *
 * Revision 1.21  2003/11/05 17:26:46  lemired
 * Put gamma to 0.02
 *
 * Revision 1.20  2003/11/03 23:41:57  lemired
 * Latest changes: should almost conclude paper with Anna.
 *
 * Revision 1.19  2003/10/31 00:47:06  lemired
 * Still got a bug in TIOptimalWeight... it should be better than average... arghh!
 *
 * Revision 1.18  2003/10/28 01:43:08  lemired
 * Lots of refactoring.
 *
 * Revision 1.17  2003/10/07 13:28:32  lemired
 * Did some tweaking...
 *
 * Revision 1.16  2003/09/29 14:37:33  lemired
 * Fixed the optimalweight scheme.
 *
 * Revision 1.15  2003/09/25 23:18:27  lemired
 * Added Anna.
 *
 * Revision 1.14  2003/09/24 14:58:40  lemired
 * Worked hard on eigenmatch.
 *
 * Revision 1.13  2003/09/22 18:47:59  lemired
 * Got first-order exactness right!
 *
 * Revision 1.12  2003/09/18 12:41:42  lemired
 * Still doing lots of boring research.
 *
 * Revision 1.11  2003/08/28 16:41:51  lemired
 * Added Harold's bibtex changes, added a howto and my recent changes to algos.
 *
 * Revision 1.10  2003/08/21 20:46:30  lemired
 * Finally got STIOptimalWeight right!
 *
 * Revision 1.9  2003/08/21 18:49:38  lemired
 * It should now compile nicely.
 *
 * Revision 1.8  2003/08/21 18:04:29  lemired
 * Added toString method plus added necessary activation.jar for convenience.
 *
 * Revision 1.7  2003/08/19 23:13:57  lemired
 * More work on OptimalWeight. Don't think it works well afterall.
 *
 * Revision 1.6  2003/08/19 17:51:21  lemired
 * I've been improving OptimalWeight.
 *
 * Revision 1.5  2003/08/12 11:52:11  lemired
 * Added more regression testing.
 *
 * Revision 1.4  2003/08/07 00:37:42  lemired
 * Mostly, I updated the javadoc.
 *
 *
 *@author       Daniel Lemire, Ph.D.
 *@since     September 2002
 */
public class Benchmark {

  static boolean Fast = false;
  static int factor = 1;// hardcoded cheat factor
  //
  final static short STEEPEST = 0;
  final static short DELTA = 1;
  final static short DUMP = 2;
  final static short JESTER = 3;
  final static short INFO = 4;
  final static int INFINITE = 99999999;



  public static void benchmark( CollaborativeFilteringSystem cfs, EvaluationSet origuset, int RatingsToTest, float minvalue, float maxvalue ) {
    System.out.println( "[waiting] Completing..." );
    TIntObjectIterator iter = origuset.iterator();
    int NumberTested = 0;
    float nmaeAllBut1 = 0.0f;
    float[] nmaeAllBut1s = new float[cfs.getNumberOfItems()];
    float nmaeAllBut1_2 = 0.0f;
    float[] nmaeAllBut1s_2 = new float[cfs.getNumberOfItems()];
    int TotalAllBut1 = 0;
    int[] TotalAllBut1s = new int[cfs.getNumberOfItems()];
    float match_quality = 0.0f;
    while ( iter.hasNext() ) {
      ++NumberTested;
      if ( ( RatingsToTest > 0 ) && ( NumberTested > RatingsToTest ) )
        break;
      iter.advance();
      TIntFloatHashMap User = (TIntFloatHashMap) iter.value();
      float[] c = cfs.completeUser( User , minvalue, maxvalue);
      match_quality += UtilMath.l2diff(User,c) / origuset.size();
      int[] itemids = User.keys();
      int number = 0;
      for ( int itemidindex = 0; itemidindex < itemids.length; itemidindex += factor ) {
        final int RemovalIndex = itemids[itemidindex];
        if( cfs instanceof JesterClassical ) {
          int[] index = ((JesterClassical) cfs).getStandard();
          boolean found = false;
          for (int k = 0; k < index.length; ++k)
            if(index[k] == RemovalIndex) found = true;
          if(found) continue;
        }
        ++TotalAllBut1;
        ++TotalAllBut1s[RemovalIndex];
        TIntFloatHashMap HackedUser = (TIntFloatHashMap) User.clone();
        final float RemovedValue = HackedUser.remove( RemovalIndex );
        float[] complete = cfs.completeUser( HackedUser,minvalue, maxvalue );
        float err = (float) Math.abs( complete[RemovalIndex] - RemovedValue );
        float err2 = ( complete[RemovalIndex] - RemovedValue ) * ( complete[RemovalIndex] - RemovedValue );
        nmaeAllBut1 += err;
        nmaeAllBut1s[RemovalIndex] += err;
        nmaeAllBut1_2 += err2;
        nmaeAllBut1s_2[RemovalIndex] += err2;

        ++number;
      }
      //nmaeAllBut1 += currentNMAEAllBut1;			
    }
    for (int k = 0; k < nmaeAllBut1s.length; ++k) {
      if(TotalAllBut1s[k] > 0)
        nmaeAllBut1s[k] /= TotalAllBut1s[k]; 
    }
    for (int k = 0; k < nmaeAllBut1s_2.length; ++k) {
      if(TotalAllBut1s[k] > 0)
        nmaeAllBut1s_2[k] /= TotalAllBut1s[k]; 
    }
    //UtilMath.print( nmaeAllBut1s,10);
    float min = nmaeAllBut1s[0]; int minpos = 0;
    float max = nmaeAllBut1s[0]; int maxpos = 0;
    for (int k = 0; k < nmaeAllBut1s.length; ++k) {
      if( (nmaeAllBut1s[k] < min) && (TotalAllBut1s[k]>0) ) {
        min = nmaeAllBut1s[k];
        minpos = k;
      }
      if( (nmaeAllBut1s[k] > max) && (TotalAllBut1s[k]>0) ) {
        max = nmaeAllBut1s[k];
        maxpos = k;
      }
    }
    //System.out.println("[info] min allbut1 = "+min+" at "+minpos);
    //System.out.println("[info] max allbut1 = "+max+" at "+maxpos);
    float average = UtilMath.average(nmaeAllBut1s);
    float stddev = (float) Math.sqrt(UtilMath.variance(nmaeAllBut1s, average));
    //System.out.println( "[info] average allbut1 "+average + " stddev = "+stddev+ "(warning: doesn't apply to eigentaste)");
    System.out.println( "[info] AllBut1  NMAE = " + (  nmaeAllBut1 / TotalAllBut1 ) );
    System.out.println( "[info]  NMAE_2 = " + Math.sqrt(  nmaeAllBut1 / TotalAllBut1 ) );
    System.out.println( "[info] match_quality = "+match_quality);
  }


  /**
   *  The main program for the GreedyTaste class
   *
   *@param  arg              The command line arguments
   *@exception  IOException  Description of the Exception
   */
  public static void main( String[] arg ) throws IOException {
    long before, after;
    System.out.println( "(c) 2002-2003, NRC by Daniel Lemire, Ph.D." );
    System.out.println( "Usage: java Benchmark -max MaxNumberOfUsers" );
    System.out.println();
    String DataFileName = System.getProperty( "user.home" ) + "/CFData/vote.bin";
    float minvalue = 0.0f;
    float maxvalue = 1.0f;
    int maxratings = INFINITE;
    int skip = 0;
    int test = -1;
    int testskip = 0;
    boolean testweight = false;
    boolean optimal = false;
    short currentmethod = DUMP;
    for ( int k = 0; k < arg.length; ++k ) {
      if ( arg[k].equals( "-max" ) ) {
        try {
          maxratings = Integer.parseInt( arg[k + 1] );
          ++k;
        } catch ( Exception e ) {
          System.out.println( "[Error] couldn't parse " + arg[k + 1] + " as an integer." );
        }
        continue;
      }
      if ( arg[k].equals( "-maxvalue" ) ) {
        try {
          maxvalue = Float.parseFloat( arg[k + 1] );
          ++k;
        } catch ( Exception e ) {
          System.out.println( "[Error] couldn't parse " + arg[k + 1] + " as a float." );
        }
        continue;
      } 
      if ( arg[k].equals( "-minvalue" ) ) {
        try {
          minvalue = Float.parseFloat( arg[k + 1] );
          ++k;
        } catch ( Exception e ) {
          System.out.println( "[Error] couldn't parse " + arg[k + 1] + " as a float." );
        }
        continue;
      } 
 
      if ( arg[k].equals( "-input" ) ) {
        DataFileName = arg[++k];
        continue;
      }
      if ( arg[k].equals( "-skip" ) ) {
        try {
          skip = Integer.parseInt( arg[k + 1] );
          ++k;
        } catch ( Exception e ) {
          System.out.println( "[Error] couldn't parse " + arg[k + 1] + " as an integer." );
        }
        continue;
      }
      if ( arg[k].equals( "-test" ) ) {
        try {
          test = Integer.parseInt( arg[k + 1] );
          ++k;
        } catch ( Exception e ) {
          System.out.println( "[Error] couldn't parse " + arg[k + 1] + " as an integer." );
        }
        continue;
      }
      if ( arg[k].equals( "-testskip" ) ) {
        try {
          testskip = Integer.parseInt( arg[k + 1] );
          ++k;
        } catch ( Exception e ) {
          System.out.println( "[Error] couldn't parse " + arg[k + 1] + " as an integer." );
        }
        continue;
      }
      if ( arg[k].equals( "-factor" ) ) {
        try {
          factor = Integer.parseInt( arg[k + 1] );
          ++k;
        } catch ( Exception e ) {
          System.out.println( "[Error] couldn't parse " + arg[k + 1] + " as an integer." );
        }
        continue;
      }
      if ( arg[k].equals( "-fast" ) ) {
        Fast = true;
        continue;
      }
      if ( arg[k].equals( "-testweight" ) ) {
        testweight = true;
        continue;
      }
      if ( arg[k].equals( "-optimal" ) ) {
        optimal = true;
        continue;
      }
      if ( arg[k].equals( "-method" ) ) {
        if ( arg[k + 1].equals( "delta" ) ) {
          currentmethod = DELTA;
          System.out.println( "[info] Switching to deta method" );
        }
        else if ( arg[k + 1].equals( "dump" ) ) {
          currentmethod = DUMP;
          System.out.println( "[info] Switching to dump method" );
        }
        else if ( arg[k + 1].equals( "jester" ) ) {
          currentmethod = JESTER;
          System.out.println( "[info] Switching to jest method" );
        }
        else if ( arg[k + 1].equals( "info" ) ) {
          currentmethod = INFO;
          System.out.println( "[info] Switching to info method" );
        }
        ++k;
        continue;
      }
      System.out.println("Unrecognized option : "+arg[k]);
    }
    if ( test == -1 ) test = maxratings;
    if ( currentmethod == INFO ) {
      System.out.println("[info] Reading file "+DataFileName);
      EvaluationSet origuset = EvaluationSet.readRatings( new File( DataFileName ), maxratings, 0, 20 );
      System.out.println("[info] Number of users with at least 20 ratings = "+origuset.getNumberOfUsers() );
      int NumberOfRatings = 0;
      TIntObjectIterator i = origuset.iterator();
      while(i.hasNext()) {
        i.advance();
        NumberOfRatings += ((TIntFloatHashMap) i.value()).size();
      }
      System.out.println("[info] Number of ratings in users with at least 20 ratings = "+NumberOfRatings );
      System.out.println("[info] Ratings per users with at least 20 ratings = "+NumberOfRatings / (float) origuset.getNumberOfUsers() );
      System.out.println("[info] Detected MaxItemID = "+origuset.getMaxItemID());
      return;
    }
    System.out.println( "[info] maxratings = " + maxratings + " skip = " + skip + " users to test against = " + test + " skip test = " + testskip +" factor = "+factor +" fast = "+Fast+ " testweight = "+testweight);
    //
    System.out.println( "[waiting] Parsing..." );
    //
    EvaluationSet origuset = EvaluationSet.readRatings( new File( DataFileName ), maxratings, skip, 20 );
    EvaluationSet testset = EvaluationSet.readRatings( new File( DataFileName ), test, testskip, 20 );
    int origMaxItemID = origuset.computeApparentMaxItemID();
    int testMaxItemID = testset.computeApparentMaxItemID();
    System.out.println( "[info] origset size = " + origuset.size() + " max id=" + origMaxItemID );
    System.out.println( "[info] testset size = " + testset.size() + " max id=" + testMaxItemID );
    int MaxItemID = origMaxItemID > testMaxItemID ? origMaxItemID : testMaxItemID;
    origuset.setMaxItemID(MaxItemID);
    testset.setMaxItemID(MaxItemID);
    System.out.println("[info] Setting max item id to : "+MaxItemID);
    System.out.println( "[info] Loaded " + origuset.size() + " users" );
    //
    Vector CFSVector = new Vector();
    if(testweight) {
      if ( ! Fast ) {					
        /* CFSVector.add(new OptimalWeight(new RuleOf5( origuset) , 1.0f )); */
        
      }
      //int seven_percent = (int)Math.round(0.06*MaxItemID);
      //int number_of_vectors = seven_percent > 30 ? 30 : seven_percent;
      //CFSVector.add(new EigenMatch( origuset,1,false,false));
      //CFSVector.add(new EigenMatch( origuset,1,true,false));
      //CFSVector.add(new EigenMatch( origuset,1,false,true));
      //CFSVector.add(new EigenMatch( origuset,1,true,true));
      CFSVector.add(new RuleOf5( origuset));
      CFSVector.add(new ConstantBias( origuset));
      CFSVector.add(new BiConstantBias( origuset));
      if(optimal) {
        CFSVector.add(new TIOptimalWeight( origuset, 0.02f ));
        CFSVector.add(new OptimalConstantWeight( origuset, 0.02f ));
        CFSVector.add(new TIOptimalConstantWeight( origuset,0.02f ));
      }
      /* if(true) {
        OptimalWeight ow = new OptimalWeight(anna, 0.1f);
        CFSVector.add(ow);
      } */
      //CFSVector.add(new EigenMatch( origuset,2));
      //CFSVector.add(new EigenMatch( origuset,5));
    }
    if ( ! Fast ) {
      CFSVector.add(new Pearson( origuset ));
      CFSVector.add(new STIPearson( origuset, 2.0f ));
      CFSVector.add(new MeanSTIPearson( origuset, 2.0f ));
    }
    CFSVector.add(new STINonPersonalized( origuset, 2.0f ));
    if(false) {
      CFSVector.add(new MeanSTINonPersonalized( origuset, 2.0f ));
      CFSVector.add(new MeanSTINonPersonalized2steps( origuset, 2.0f ));
    }
    CFSVector.add(new STINonPersonalized2steps( origuset, 2.0f ));
    if(false) {
      CFSVector.add(new STINonPersonalizedNsteps( origuset, 2.0f, 1 ));
      CFSVector.add(new STINonPersonalizedNsteps( origuset, 2.0f, 2 ));
      CFSVector.add(new STINonPersonalizedNsteps( origuset, 2.0f, 3 ));
    }
    CFSVector.add(new NonPersonalized( origuset ));
    CFSVector.add(new Average(  origuset));
    CFSVector.add(new PerItemAverage( origuset ));

    //
    int RatingsToTest = test;
    //
    if ( currentmethod == JESTER ) {
        CFSVector.add(new JesterClassical(origuset, false));// true == new normalization
        CFSVector.add(new JesterSTI(origuset));
    }
    if((currentmethod == JESTER) || (currentmethod == DUMP)) {
      for (Enumeration e = CFSVector.elements() ; e.hasMoreElements() ;) {
        CollaborativeFilteringSystem cfs = (CollaborativeFilteringSystem) e.nextElement();
        System.out.println( "[name] "+cfs.toString());
        benchmark( cfs, testset, RatingsToTest  ,minvalue, maxvalue );
      }
    }
  }

}

