// Lemur OLAP library (c) 2003 National Research Council of Canada by Daniel Lemire, and Owen Kaser
 /**
 *  This program is free software; you can
 *  redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation (version 2). This
 *  program is distributed in the hope that it will be useful, but WITHOUT ANY
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 *  FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 *  details. You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software Foundation,
 *  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
#ifndef SLICESORTINGNORMALIZATION_H
#define SLICESORTINGNORMALIZATION_H

#include "normalizations.h"



/*
 * This class should allow us to implement all
 * slice sorting including FrequencySort
 */

template <class _DT, class _LDT, class _CT = LessComparator<pair<double,int> > >
class SliceSort : public Normalization<_DT,_LDT> {
public:
SliceSort() : mComparator() {}
    virtual SliceSort* clone() const { return new SliceSort(*this); }
    virtual ~SliceSort() {}
    virtual const string getTextName(void) const { return "Slice Sorting ("+ _CT::getDescription() +")";}
    virtual norm_type computeNormalFrom(DataCube<_DT,_LDT>& DC,
            const norm_type & initial);
    enum {verbose = false};
protected:
    virtual deque<pair<double,int> > sortedHistogram(DataCube<_DT,_LDT>& DC, 
        const norm_type & initial, const uint dimension); 
    
    virtual vector<double> histogram(DataCube<_DT,_LDT>& DC, 
        const norm_type & initial, const uint dimension);
    _CT mComparator;
};

//
// Implementations follow...
///////////////////////////////

template <class _DT, class _LDT, class _CT>
deque<pair<double,int> > SliceSort<_DT,_LDT,_CT>::sortedHistogram(DataCube<_DT,_LDT>& DC,
    const norm_type & initial, const uint dimension) {
        vector<double> freq = histogram(DC, initial, dimension);
        // everything else should be cheap
        deque<pair<double,int> > freqindexpairs;
        for(uint index = 0; index < freq.size(); ++index) {
            freqindexpairs.push_back(pair<double,int>(freq[index], index));
        }
        sort(freqindexpairs.begin(), freqindexpairs.end(), mComparator);
        return freqindexpairs;
}


template <class _DT, class _LDT,  class _CT>
vector<double> SliceSort<_DT,_LDT,_CT>::
histogram(DataCube<_DT,_LDT>& DC, const norm_type & initial,  const uint dimension) {
    // this can be relatively expensive
    vector<int> shape = DC.getShape();
    const int range = shape[dimension];
    vector<double> answer(range,0.0);
    for(int value = 0 ; value < range; ++value) {
        vector<int> Bounds(shape);
        vector<int> Start(Bounds.size(),0);
        Start[dimension] = value;
        Bounds[dimension] = value+1;
        vector<int> indices(Start);
        do {
            if(DC.get(PermutationUtil::permute( indices , initial) ) != 0 ) ++answer[value];
        }	while(MathUtil::increment( indices, Start, Bounds));
    }
    return answer;
}


template <class _DT, class _LDT,  class _CT>
norm_type SliceSort<_DT,_LDT,_CT>::
computeNormalFrom( DataCube<_DT,_LDT>& DC,const norm_type & initial) {
    norm_type answer;
    vector<int> shape = DC.getShape();
    for(uint dim = 0; dim < shape.size(); ++dim ) {
        if(verbose) cout << "dim = "<< dim << endl; 
        // 
        // this is complicated. We have storage indices si, we have
        // the normalized indices given by initial, so the mapping is
        // that if you provide me with x, I do initial[x] == si
        // 
        // the key point is that for slice sorting, in general, the histograms
        // will depend on the normalization initial 
        const deque<pair<double,int> > & freqindexpairs = sortedHistogram(DC,initial, dim);
        vector<int> normalization(shape[dim],0);
        int index = 0;
        for(typename deque<pair<double,int> >::const_iterator i = freqindexpairs.begin(); 
            i != freqindexpairs.end(); ++i, ++index) {
            if(verbose) cout << "pos["<< index<<"] = "<< i->second << endl;
            normalization[index] = initial[dim][i->second];
        }
        answer.push_back(normalization);
    }
    return answer;
}


/////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Follows some alternative slice sorting
//
// Now, a slice sorting is orthogonal if and only if the function f used is invariant
// under permutations. This means that it cannot depend on the order of the values,
// only on the values themselves.
//
// An example is f(x,y,z) = x + y + z
//
// Alernatives are f(x,y,z) = x*x + y*y +z*z (amounts to the same thing because the data is binary)
//
// All of those amount to counting the number of non zero components.
//
// This gets into symmetric function. A function f(x1,x2,...,xn) is symmetric if
// f(x1,x,2,...,xn) = f(x2,x1,...,xn-1,xn) and so on for all permutations.
//
// In our case, x**n = x because we have binary data.
//
// Now, there are alternatives such as
//
// sum_i_j ( x_i x_j ) where the sum if over all i,j
//
// which amounts to count**2 and once sorted this amount to the same algorithm.
//
// Conjecture (almost Theorem): all such symmetric function can be written as a function
// of "count(number of 1s)".
//
//
//////////////////////////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////////////////////////////////////////////
//Sloped was my first idea of a SliceSort
//////////////////////////////////////////////////////////////////////////////////////////

template <class _DT, class _LDT, class _CT= LessComparator<pair<double,int> > >
class SlopedSliceSort : public SliceSort<_DT,_LDT,_CT > {
public:
SlopedSliceSort() : SliceSort<_DT,_LDT,_CT>() {}
    virtual SlopedSliceSort* clone() const { return new SlopedSliceSort(*this); }
    virtual ~SlopedSliceSort() {}
    virtual const string getTextName(void) const {
        return "Sloped Slice Sorting ("+ _CT::getDescription() +")";}
protected:
    virtual vector<double> histogram(DataCube<_DT,_LDT>& DC, 
        const norm_type & initial,  const uint dimension);
};

template <class _DT, class _LDT,  class _CT>
vector<double> SlopedSliceSort<_DT,_LDT,_CT>::
histogram(DataCube<_DT,_LDT>& DC, const norm_type & initial,  const uint dimension) {
    // this can be relatively expensive
    vector<int> shape = DC.getShape();
    const int range = shape[dimension];
    vector<double> answer(range,0.0);
    int alternatedim = dimension > 0 ? dimension - 1 : shape.size() - 1;//arbitrary
    //cout << " alternatedim = " << alternatedim << "range = " << range << endl;
    if(true) {
      for(uint k = 0; k <  shape.size() ; ++k ) {
        if((uint) k == dimension) continue;
        if(shape[k] > shape[alternatedim]) alternatedim = k;
      }
    }
    //cout << " alternatedim (2) = " << alternatedim << endl; 
    assert((uint) alternatedim < shape.size());
    assert((uint) dimension < shape.size());
    for(int value = 0 ; value < range; ++value) {
        vector<int> Bounds(shape);
        vector<int> Start(Bounds.size(),0);
        Start[dimension] = value;
        Bounds[dimension] = value + 1;
        vector<int> indices(Start);
        do {
           const vector<int> permuted = PermutationUtil::permute(indices,initial) ;  
           if( DC.get( permuted ) != 0 ) 
             answer[value] +=  indices[alternatedim]  + 1;// this amounts to a linear slope!!!
        }	while(MathUtil::increment( indices, Start, Bounds));
    }
    return answer;
}

/////////////////////////////////////////////////////////////////////
// This is an alternative to Sloped
// /////////////////////////////////////////////////////////////////

template <class _DT, class _LDT, class _CT= LessComparator<pair<double,int> > >
class AlternateSlopedSliceSort : public SliceSort<_DT,_LDT,_CT> {
public:
AlternateSlopedSliceSort() : SliceSort<_DT,_LDT,_CT>() {}
    virtual AlternateSlopedSliceSort* clone() const { return new AlternateSlopedSliceSort(*this); }
    virtual ~AlternateSlopedSliceSort() {}
    virtual const string getTextName(void) const {
        return "Alternate Sloped Slice Sorting ("+ _CT::getDescription() +")";}
protected:
    virtual vector<double> histogram(DataCube<_DT,_LDT>& DC, 
        const norm_type & initial,  const uint dimension);
};

template <class _DT, class _LDT,  class _CT>
vector<double> AlternateSlopedSliceSort<_DT,_LDT,_CT>::
histogram(DataCube<_DT,_LDT>& DC, const norm_type & initial,  const uint dimension) {
    // this can be relatively expensive
    vector<int> shape = DC.getShape();
    const int range = shape[dimension];
    vector<double> answer(range,0.0);
    uint alternatedim = dimension > 0 ? dimension - 1 : shape.size() - 1;//arbitrary
    if(true) {
      for(uint k = 0; k < shape.size(); ++k ) {
        if((uint) k == dimension) continue;
        if(shape[k] > shape[alternatedim]) alternatedim = k;
      }
    }
    assert(alternatedim < shape.size());
    assert(dimension < shape.size()); 
    for(int value = 0 ; value < range; ++value) {
        vector<int> Bounds(shape);
        vector<int> Start(Bounds.size(),0);
        Start[dimension] = value;
        Bounds[dimension] = value + 1;
        vector<int> indices(Start);
          do {
             const vector<int> permuted = PermutationUtil::permute(indices,initial) ; 
             if( DC.get( permuted ) != 0 ) 
                 answer[value] += (indices[alternatedim] % (shape[alternatedim]/2)) + 1; 
          }	while(MathUtil::increment( indices, Start, Bounds));
    }
    return answer;
}

//////////////////////////////////////////////////////////////////////////////////////////
// AlternatingSliceSort is some kind of two chunk scheme. It works badly for some 
// reason. Maybe I did something wrong.
//////////////////////////////////////////////////////////////////////////////////////////
template <class _DT, class _LDT, class _CT= LessComparator<pair<double,int> > >
class AlternatingSliceSort : public SliceSort<_DT,_LDT,_CT> {
public:
AlternatingSliceSort() : SliceSort<_DT,_LDT,_CT>() {}
    virtual AlternatingSliceSort* clone() const { return new AlternatingSliceSort(*this); }
    virtual ~AlternatingSliceSort() {}
    virtual const string getTextName(void) const {
        return "Alternating Slice Sorting ("+ _CT::getDescription() +")";}
protected:
    virtual vector<double> histogram(DataCube<_DT,_LDT>& DC, 
        const norm_type & initial,  const uint dimension);
};

template <class _DT, class _LDT,  class _CT>
vector<double> AlternatingSliceSort<_DT,_LDT,_CT>::
histogram(DataCube<_DT,_LDT>& DC, const norm_type & initial,  const uint dimension) {
    // this can be relatively expensive
    vector<int> shape = DC.getShape();
    const int range = shape[dimension];
    vector<double> answer(range,0.0);
    int alternatedim = dimension > 0 ? dimension - 1 : shape.size() - 1;//arbitrary
    for(int value = 0 ; value < range; ++value) {
        vector<int> Bounds(shape);
        vector<int> Start(Bounds.size(),0);
        Start[dimension] = value;
        Bounds[dimension] = value + 1;
        vector<int> indices(Start);
        do {
           const vector<int> permuted = PermutationUtil::permute(indices,initial) ;
           if( DC.get( permuted ) != 0 ) 
             if (indices[alternatedim] < shape[alternatedim] / 2) 
               ++answer[value]; 
             else --answer[value];
        }	while(MathUtil::increment( indices, Start, Bounds));
    }
    return answer;
}

///////////////////////////////////////////////////////////////////////////////////////////
// BitSliceSort is something right out of Owen's dreams.
// ////////////////////////////////////////////////////////////////////////////////////////

template <class _DT, class _LDT, class _CT= LessComparator<pair<double,int> > >
class BitSliceSort : public SliceSort<_DT,_LDT,_CT> {
public:
    BitSliceSort() : SliceSort<_DT,_LDT,_CT>() {}
    virtual BitSliceSort* clone() const { return new BitSliceSort(*this); }
    virtual ~BitSliceSort() {}
    virtual const string getTextName(void) const { 
      return "Bit Slice Sorting ("+ _CT::getDescription() +")";}
protected:
    virtual vector<double> histogram(DataCube<_DT,_LDT>& DC, 
        const norm_type & initial,  const uint dimension);
};

template <class _DT, class _LDT,  class _CT>
vector<double> BitSliceSort<_DT,_LDT,_CT>::
histogram(DataCube<_DT,_LDT>& DC, const norm_type & initial,  const uint dimension) {
    // this can be relatively expensive
    vector<int> shape = DC.getShape();
    const int range = shape[dimension];
    vector<double> answer(range,0.0);
    int alternatedim = dimension > 0 ? dimension - 1 : shape.size() - 1;//arbitrary
    for(int value = 0 ; value < range; ++value) {
        vector<int> Bounds(shape);
        vector<int> Start(Bounds.size(),0);
        Start[dimension] = value;
        Bounds[dimension] = value + 1;
        vector<int> indices(Start);
        do {
           const vector<int> permuted = PermutationUtil::permute(indices,initial); 
           if( DC.get( permuted ) != 0 ) 
             answer[value] += (1>>(indices[alternatedim]%32));// not sure this is sensible
        }	while(MathUtil::increment( indices, Start, Bounds));
    }
    return answer;
}

//////////////////////////////////////// 
// Threshold Slice Sort is a more radical version
// of the Sloped and Bit Slice Sort
///////////////////////////////////////


template <class _DT, class _LDT, class _CT= LessComparator<pair<double,int> > >
class ThresholdSliceSort : public SliceSort<_DT,_LDT,_CT> {
public:
    ThresholdSliceSort(vector<int> threshold) : SliceSort<_DT,_LDT,_CT>(), mThreshold(threshold) {}
    virtual ThresholdSliceSort* clone() const { return new ThresholdSliceSort(*this); }
    virtual ~ThresholdSliceSort() {}
    virtual const string getTextName(void) const { 
      return "Threshold Slice Sorting ("+ _CT::getDescription() +")";}
protected:
    virtual vector<double> histogram(DataCube<_DT,_LDT>& DC, 
        const norm_type & initial,  const uint dimension);
    vector<int> mThreshold;
};

template <class _DT, class _LDT,  class _CT>
vector<double> ThresholdSliceSort<_DT,_LDT,_CT>::
histogram(DataCube<_DT,_LDT>& DC, const norm_type & initial,  const uint dimension) {
    // this can be relatively expensive
    vector<int> shape = DC.getShape();
    const int range = shape[dimension];
    vector<double> answer(range,0.0);
    //int alternatedim = dimension > 0 ? dimension - 1 : shape.size() - 1;//arbitrary
    for(int value = 0 ; value < range; ++value) {
        vector<int> Bounds(shape);
        vector<int> Start(Bounds.size(),0);
        Start[dimension] = value;
        Bounds[dimension] = value + 1;
        vector<int> indices(Start);
        do {
          const vector<int> permuted = PermutationUtil::permute(indices,initial) ;   
          if( DC.get( permuted ) != 0 ) {
             bool ok = true;
             for(uint dim = 0; dim < shape.size() ; ++dim) {
               if( (dim != dimension) && (shape[dim] - indices[dim] > mThreshold[dim]) ) ok = false;
             }
             if(ok)
              ++answer[value];//
           }
        }	while(MathUtil::increment( indices, Start, Bounds));
    }
    return answer;
}

#endif

