// Lemur OLAP library (c) 2003 National Research Council of Canada by Daniel Lemire, and Owen Kaser
 /**
 *  This program is free software; you can
 *  redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation (version 2). This
 *  program is distributed in the hope that it will be useful, but WITHOUT ANY
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 *  FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 *  details. You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software Foundation,
 *  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */

#include "normalizationscommon.h"

#include "normalutil.h"
#include "normalizations.h"



void print(DataCube<short,int64>& cube) {
    // todo: write something general
    for(int k = 0; k < 16; ++k) {
        for(int l = 0; l < 16; ++l ) {
            cout << cube.get(k,l) << " ";
        }
        cout << endl;
    }
}

void sanity() {
    vector<int> DCShape(2,16);
    DataCube<short,int64>	DC (DCShape);
    DataCube<short,int64> Copy(DCShape);
    DC.open("sanity.bin");
    Copy.open("sanity_copy.bin");
    DC.fillWithZeroes();
    Copy.fillWithZeroes();
    for(int k = 0 ; k < 16 ; ++k) {
        DC.put(2,k,k);
        DC.put(1,k,2);
        DC.put(3,6,k);
        DC.put(7,9,k);
        DC.put(5,k,10);
    }
    Normalization<short,int64> n1;
    FrequencySort<short,int64,LessComparator<pair<uint64,int> > > n2;
    FrequencySort<short,int64,GreaterComparator<pair<uint64,int> > > n3;

    vector<vector<int> > normal1 = n1.computeNormal(DC);
    vector<vector<int> > normal2 = n2.computeNormal(DC);
    vector<vector<int> > normal3 = n3.computeNormal(DC);

    int m = 2;
    vector<int> ChunkShape(DC.getShape().size(), m);
    cout << "Using regular chunks of size " << m << endl;
    cout << " HOLAP with canonical normalization = " << \
    HOLAPUtil<short,int64>::cost(DC,ChunkShape,normal1,true)<< endl;
    NormalUtil<short,int64>::copyTo(DC,Copy,normal1);
    print(Copy);
    Copy.fillWithZeroes();
    cout << " HOLAP with frequency sort (ascending) normalization = " << \
    HOLAPUtil<short,int64>::cost(DC,ChunkShape,normal2,true) << endl;
    NormalUtil<short,int64>::copyTo(DC,Copy,normal2);
    print(Copy);
    Copy.fillWithZeroes();
    cout << " HOLAP with frequency sort (descending) normalization = " << \
    HOLAPUtil<short,int64>::cost(DC,ChunkShape,normal3,true) << endl;
    NormalUtil<short,int64>::copyTo(DC,Copy,normal3);
    print(Copy);
    // We must clean up!
    DC.close();
    Copy.close();
    //
}

int main(int argc, char * argv[]) {

    // couldn't the getopt() libraries be used for this? (I tend to parse args myself
    //  but I'm always aware that I shouldn't.)  I think it's POSIX. -OFK
    //
    //  I agree. However this main.cpp file started out for a very
    //  simple test case. It should be refacted now. I'm focusing on
    //  more important cases, assuming that main.cpp can be rewritten
    //  completly once the rest is solid.
    //
    //  Plus, I must say I've never seen anyone use getopt.
    //
    //  -DL
    //

    cout <<" Normalization tool. (c) 2003 NRC/CNRC Daniel Lemire "<<endl;
    cout <<" Usage: "<< argv[0] <<" BinaryFileName -groupby n1,n2,n3 "<<endl;
    cout <<" or  "<<argv[0] << " BinaryFileName -dumpinfo "<<endl;
    // need to update about normfile option
    if(argc < 2) return 0;
    vector<int> GroupBys;
    char * BinaryFileName = NULL;
    char * normFileName = NULL;
    //
    // next follow our ugly parsing of the command line, to be replaced by getopt
    //
    if(argv[1][0] != '-')	BinaryFileName = argv[1];
    for(int k = 1; k < argc ; ++k) {
        if(strcmp(argv[k],"-groupby")==0) {
            char * groupbys = argv[++k];
            string number;
            for( int i = 0; groupbys[i] != 0; ++i) {
                if(groupbys[i] == ',') {
                    GroupBys.push_back(atoi(number.c_str()));
                    number.clear();
                } else {
                    number += groupbys[i];
                }
            }
            GroupBys.push_back(atoi(number.c_str()));
        }
        else if (strcmp(argv[k],"-dumpinfo")==0){
            dump(BinaryFileName);
        }
        else if (strcmp(argv[k],"-normfile")==0) {
            normFileName = argv[++k];
        }
        else if (strcmp(argv[k],"-sanity")==0) {
            sanity();
        }
        else if (argv[k][0] == 'v') {
            cout << "Option not recognized = "<< argv[k] << endl;
        }
    }
    if(GroupBys.size() == 0) {
        cout << "No groupby specified. Exiting. "<< endl;
        return 1;
    }
    if(BinaryFileName == NULL) {
        cout << "No file specified. Exiting. " << endl;
        return 2;
    }
    //
    // end of the parsing
    //
    cout << "[info] Group by : ";
    stringstream strs;
    for(vector<int>::iterator i = GroupBys.begin(); i != GroupBys.end(); ++i) {
        strs << *i << "_"; // this is for later, see temp file construction
        cout << *i << " ";
    }
    cout <<endl;
    string temp;
    strs >> temp;
    temp = "DCTemp_"+temp+".bin"; // we will use this for our data cube
    vector<int> DCShape(GroupBys.size());
    // parsing the data
    OwenParser<short,int64> parser(BinaryFileName);
    cout << "[info] Header is : \"" << parser.getHeader() << "\". "<<endl;
    vector<int> shape = parser.getShape();
    cout << "[info] Your data cube will have dimensions... "<<endl;
    cout << "[info]  ";
    for(uint i = 0; i < GroupBys.size(); ++i) {
        DCShape[i] = shape[GroupBys[i]];
        cout << DCShape[i]<< " ";
    }
    cout << endl;
    // done parsing the data
    /*vector<int> LastIndex = DCShape;
    for (uint k = 0 ; k < LastIndex.size(); ++k) --LastIndex[k];*/ // don't know why this was needed
    clock_t start, finish;
    double NombreDeSecondes = 0.0;// that's NumberOfSeconds in French to test your bilinguism
    // next we construct our data cube
    RAMCube<short,int64>	DC (DCShape);
    cout << "[info] creating data cube in file " << temp << endl;
    DC.open(temp.c_str());
    DC.fillWithZeroes();
    //
    // Ok, now we have a data cube with zeroes in it
    //


    //
    //
    //
    cout << "[info] filling data cube."<<endl;
    start = clock();
    parser.fill(DC,GroupBys);
    finish = clock();
    NombreDeSecondes =  (double)(finish - start) / CLOCKS_PER_SEC;
    cout << "[info] done filling data cube." <<endl;
    cout << "[time] It took " << NombreDeSecondes << " s."<<endl;
    //
    // Data cube should be alright now.
    //

    //
    // Ok, enough fooling around!
    //
    //First we define the normalizations we will use
    deque<Normalization<short, int64> *> n;
    n.push_back( new Normalization<short,int64> ());
    if(normFileName != 0) n.push_back( new NormalizationReader<short,int64> (normFileName));
    n.push_back( new FrequencySort<short,int64,LessComparator<pair<uint64,int> > > ());
    n.push_back(new IteratedSliceCluster<short,int64>(2));
    if (*min_element(DCShape.begin(),DCShape.end()) >= 4) n.push_back(new IteratedSliceCluster<short,int64>(4));
    n.push_back(new GreedyIterSort<short,int64,LessComparator<pair<uint64,int> > > ());
    n.push_back(new GreedyIterSort<short,int64,GreaterComparator<pair<uint64,int> > > ());
    n.push_back(( IteratedSliceCluster<short,int64> (2) *
                  FrequencySort<short,int64,LessComparator<pair<uint64,int> >  >()
                ).clone());
    // next we actually compute the normalizations
    vector<vector<int> > normal[n.size()];
    for (uint i = 0; i < n.size(); ++i) {
        cout << "Normalization " + n[i]->getTextName() << endl;
        start = clock();
        normal[i] = n[i]->computeNormal(DC);
        finish = clock();
        NombreDeSecondes =  (double)(finish - start) / CLOCKS_PER_SEC;
        cout << "[time] It took " << NombreDeSecondes << " s."<<endl;
        NormalUtil<short,int64>::printSmall2d(DC,normal[i],2);
    }
    // last, we display the results
    int maxm = min(8,*min_element(DCShape.begin(),DCShape.end())); 
    for(int m = 2; m <= maxm ; ++m) {
        vector<int> ChunkShape(DC.getShape().size(), m);
        cout << "------------------------" << endl;
        cout << "Using regular chunks of size " << m << endl;
        for (uint i = 0; i < n.size(); ++i) {
            cout << " HOLAP with " << n[i]->getTextName() << "\t= " <<
            HOLAPUtil<short,int64>::cost(DC,ChunkShape,normal[i],true)<< endl;
        }
    }
    //
    // We must clean up!
    //
    DC.close();
    //
    for (uint i = 0; i < n.size(); ++i) delete n[i];
}

