/* -*- mode: c++ -*- 
*/
/* 

    GIFT, a flexible content based image retrieval system.
    Copyright (C) 1998, 1999, 2000 CUI, University of Geneva

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
/**  -*- mode: c++ -*- */
/**
 *
 * CWEIGHTINGFUNCTION
 *
*
*
* modification history:
*
* HM 090399 created some documentation
*
* @author Wolfgang Müller
*
* compiler defines used:
*
*
*/
#ifndef _CWEIGHTINGFUNCTION
#define _CWEIGHTINGFUNCTION

#include <math.h>
#include "TID.h"
#include "CDocumentFrequencyElement.h"
#include "CAcInvertedFile.h"

class CQueryNormalizer;

/** a universally usable class for weighting. 
    
    The rationale is, that we want to implement very flexible 
    feature weighting, so flexible that it does not necessarily 
    have anything to do with the initial concept of feature
    weighting.

    The scenario: We have query/feedback from the user. That is:
    We have a list of images with either positive or negative
    relevance for the user. Each of these images contains a list
    of features. 

    Now we want to combine the different features. What we
    usually need to know is: how many images where in
    the positive feedback, and how many images where
    in the negative feedback.

*/
class CWeightingFunction{
protected:
  /** This value depends on all the elements of the query, which
      have a positive user-assigned relevance.

      i.e. not necessarily only on the elements which contain 
      this feature, so it has to be explicitly set using
      the function setRelevanceSum from the outside.
      
      If relevance=+-1 then this is the number of positive images in the query
  */
  double mPositiveRelevanceSum;
  /** This value depends on all the elements of the query, which
      have a negative user-assigned relevance.

      i.e. not necessarily only on the elements which contain 
      this feature, so it has to be explicitly set using
      the function setRelevanceSum from the outside.
      
      If relevance=+-1 then this is the number of negative images in the query
  */
  double mNegativeRelevanceSum;

  /* A pseudo term frequency calculated from the input
     at present by calculating the mean.

     mPositiveTermFrequency/mPositiveRelevanceSum is one term in a 
     a pseudo term frequency used for calculating query weights */
  double mPositiveTermFrequency;
  /* A pseudo term frequency calculated from the input
     at present by calculating the mean.

     mNegativeTermFrequency/mNegativeRelevanceSum is the other term in a 
     a pseudo term frequency used for calculating query weights */
  double mNegativeTermFrequency;

  /**  
       At present the feature description is a simple integer which expresses 
       if the feature is a histogram feature or not and if it is a texture feature
       or not. We probably will use more elaborate descriptions in the future.
   */
  int mFeatureDescription;

  /**  */
  TID mID;

  /** The accessor for the file on which the query is performed */
  const CAcInvertedFile* mAccessor;

  /** FIXME THERE IS A GOOD DESCRIPTION MISSING FOR THE NEXT
      TWO VARIABLES */
  mutable CQueryNormalizer* mQueryNormalizer;

  /** A normalizer for apply on self 
   */ 
  mutable CQueryNormalizer* mThisNormalizer;

  /**
     The query usually will separate into a part
     which is equal for all the query and a part which is to be 
     calculated for each document. The part which stays constant
  */
  double mQueryFactor;
  /**  */
  double mDocumentFactor;

public:

  /**
   *
   *  The query factor contains all informatio which
   *  depends only on the query. (for efficiency)
   *
   */
  double getQueryFactor()const;

  /**
   *
   * Calculate the QueryFactor.
   *
   */
  virtual void preCalculate();

  /**
   *
   * Constructor: 
   * @param inAccessor: the accessor containing this \
   *                     (for getting information about single documents)
   * @param inQueryNormalizer: FIXME
   * @param inThisNormalizer: FIXME
   */
  CWeightingFunction(const CAcInvertedFile* inAccessor=0,
		     CQueryNormalizer* inQueryNormalizer=0,
		     CQueryNormalizer* inThisNormalizer=0);

  /**
   *
   * Set the accessor (should not be needed) 
   *
   */
  void setAccessor(const CAcInvertedFile*);

  /**
   *
   *  Set the query normalizer (should not be needed);  
   *
   */
  void setNormalizers(CQueryNormalizer* inQueryNormalizer,
		      CQueryNormalizer* inThisNormalizer);

  /**
   *
   * Set the ID of this feature
   *
   */
  void setID(TID);

  /**
   *
   * get the ID of this feature 
   *
   */
  TID getID()const;

  /**
   *
   * Setting the relevance sum, 
   * that is the sum of the absolute values of all 
   * relevance levels of all documents of the query
   *
   * for rocchio we need this sum to be once calculated
   * for positive, once for negative feedback images
   * 
   */
  void setRelevanceSum(double inPositiveRelevanceSum,
		       double inNegativeRelevanceSum);

  /**
   *
   * Add a feature. A feature will be a document frequency element
   * out of the feature file.
   *
   */
  virtual void addQueryFeature(double inRelevanceLevel,
			       const CDocumentFrequencyElement& 
			       inQueryFeature);

  /**
   *
   * Giving a pseudo term frequency for normalization
   *
   */
  virtual double getTermFrequency()const;

  /**
   *
   * Apply this on a document frequency
   *
   */
  virtual double subApply(const double inDocumentFrequency,
			  const double inNormalizingFactor)const;

  /**
   *
   * Preparing the right parameters for subApply: Apply this on another feature
   *
   */
  virtual double apply(const CDocumentFrequencyElement& inResultFeature)const;

  /**
   *
   * Preparing the right parameters for subApply: Apply this on itself 
   *
   */
 
  double applyOnThis()const;

  /**For the "prototype pattern": 
     Kinda virtual copy constructor: 
     Gamma/Helm/Johnson/Vlissides p. 107*/
  virtual CWeightingFunction* constructNew(TID inID)const;

  /**For the "prototype pattern": 
     Kinda virtual copy constructor: 
     Gamma/Helm/Johnson/Vlissides p. 107

     This is a new version of constructNew which
     manages to live without the setID function
  */
  virtual CWeightingFunction* clone()const;
  
  friend class CSortByDFTimesLogICF_WF;
};

/**
 *
 * 
 *
 */
bool operator<(const CWeightingFunction&,
	       const CWeightingFunction&);

/** CWeightingFunction: Sort by FeatureID  */
class CSortByFeatureID_WF:
  public binary_function
<CWeightingFunction,CWeightingFunction,bool>{
  /**  */
  inline bool operator()(const CWeightingFunction& l,
			 const CWeightingFunction& t){
    return l.getID()<t.getID();
  }
};

/**
 *
 * CWeightingFunction: Sort by DocumentFrequency/CollectionFrequency 
 *
 */ 
 
class CSortByDFTimesLogICF_WF:
  public binary_function
<CWeightingFunction,CWeightingFunction,bool>{
  /**  */
  inline bool operator()(const CWeightingFunction& l,
			 const CWeightingFunction& t){
    return 
      l.getTermFrequency()
      *
      fabs(log(l.mAccessor->FeatureToCollectionFrequency(l.getID())))
      <
      t.getTermFrequency()
      *
      fabs(log(t.mAccessor->FeatureToCollectionFrequency(t.getID())));
  };
};

/**
*
* CWeightingFunction: Sort by DocumentFrequency/CollectionFrequency 
* 
*/
class CSortByQueryFactor_WF:
  public binary_function
<CWeightingFunction,CWeightingFunction,bool>{
public:
  /**  */
  inline bool operator()(const CWeightingFunction& l,
			 const CWeightingFunction& t){
    return 
      l.getQueryFactor()
      <
      t.getQueryFactor();
  };
};
/**
   CWeightingFunction: Sort by the absolute value
   of the query factor. This is useful for pruning
   when the feedback is negative.
*/

class CSortByAbsQueryFactor_WF:
  public binary_function
<CWeightingFunction,CWeightingFunction,bool>{
public:
  /**  */
  inline bool operator()(const CWeightingFunction& l,
			 const CWeightingFunction& t){
    return 
      fabs(l.getQueryFactor())
      <
      fabs(t.getQueryFactor());
  };
};

/**
*
* 
*
*/
template<class CSortOp>
class CSortPointers_WF:
  binary_function<CWeightingFunction*,CWeightingFunction*,bool>{
protected:
  /**  */
  CSortOp mSorter;
public:
  /**  */
  inline CSortPointers_WF(){};
  /**  */
  inline bool operator()(const CWeightingFunction* l,
			 const CWeightingFunction* t){
    return mSorter(*l,*t);
  };
};

#include "CQueryNormalizer.h"

#endif



Documentation generated by muellerw@pc7170 on Son Okt 8 16:04:40 CEST 2000