/*
 * average.cpp
 *
 *  Created on: Apr 15, 2010
 *      Author: heber
 */

using namespace std;

#include <iostream>
#include <iomanip>
#include <math.h>
#include <string>
#include <sstream>

#include "average.hpp"

/** Takes the average over all entries in a list of columns per column.
 * Note that point in ifstream \a data is returned to its position on beginning.
 * \param data input stream with entries, columns are white-space separated, rows by new-line.
 * \param Columns set of indices (zero-based) for which columns to compute the average,
 *        note that Columns is copied to allow for removal of faulty column indices.
 * \return pointer to map with column index as key and tuple (mean,std deviation) as value
 */
MeanErrorMap *AverageColumns(istream &data, IndexSet Columns)
{
  MeanErrorMap *Values = new MeanErrorMap;
  map<int, int> CountMap;
  double tmp = 0.;
  int cols = 0;
  int lines = 0;
  string zeile;
  stringstream line;

  // store position of ifstream
  size_t position = data.tellg();

  // set initial values in maps to zero
  for (IndexSet::iterator ColRunner = Columns.begin(); ColRunner != Columns.end(); ++ColRunner) {
    Values->insert( pair<int, pair<double, double> > (*ColRunner, pair<double, double> (0., 0.) ) );
    CountMap.insert( pair<int, int> (0, 0) );
  }

  /// The average is taken by going through each line, scanning the desired column, adding up
  /// and keeping count of the number of summands.
  lines = 0;
  while (getline(data, zeile, '\n')) {
    // get next line
    line.clear();
    line.str(zeile);
    lines++;
    // go through the columns
    cols = -1;
    IndexSet::const_iterator Eraser = Columns.end();
    for (IndexSet::const_iterator ColRunner = Columns.begin(); ColRunner != Columns.end(); ++ColRunner) {
      // delete earlier column if necessary
      if (Eraser != Columns.end()) {
        Columns.erase(Eraser);
        Eraser = Columns.end();
      }
      // skip to next desired column
      tmp = 0.;
      for(;cols!=*ColRunner;++cols)
        if (!line.eof()) // check for end of line
          line >> ws >> tmp;
        else
          break;
      if (cols == *ColRunner) { // if end of line has not been reached
        (*Values)[*ColRunner].first += tmp;
        ++CountMap[*ColRunner];
      } else {
        cerr << "Not enough columns in line " << lines << "." << endl;
        Eraser = ColRunner;
        break;
      }
    }
    if (Eraser != Columns.end()) {
      Columns.erase(Eraser);
      Eraser = Columns.end();
    }
  }

  // go through each value in Results and take average
  {
    MeanErrorMap::iterator Eraser = Values->end();
    for (MeanErrorMap::iterator Runner = Values->begin(); Runner != Values->end(); ++Runner) {
      if(Eraser != Values->end()) {
        Values->erase(Eraser);
        Eraser = Values->end();
      }
      if (CountMap[Runner->first] != 0)
        Runner->second.first /= CountMap[Runner->first];
      else {
        cerr << "For column " << CountMap[Runner->first] << " no entries have been found." << endl;
        Eraser = Runner;
      }
    }
    if(Eraser != Values->end()) {
      Values->erase(Eraser);
      Eraser = Values->end();
    }
  }

  // goto to beginning again for second sweep
  data.clear();
  data.seekg(position);

  /// The average is taken by going through each line, scanning the desired column, adding up
  /// and keeping count of the number of summands.
  lines = 0;
  while (getline(data, zeile, '\n')) {
    // get next line
    line.clear();
    line.str(zeile);
    lines++;
    // go through the columns
    cols = -1;
    for (IndexSet::const_iterator ColRunner = Columns.begin(); ColRunner != Columns.end(); ++ColRunner) {
      // skip to next desired column
      tmp = 0.;
      for(;cols!=*ColRunner;++cols)
        if (!line.eof()) // check for end of line
          line >> ws >> tmp;
        else
          break;
      if (cols == *ColRunner) { // if end of line has not been reached
        (*Values)[*ColRunner].second += (tmp - (*Values)[*ColRunner].first)*(tmp - (*Values)[*ColRunner].first);
      } else {
        cerr << "Not enough columns in line " << lines << "." << endl;
        break;
      }
    }
  }

  // go through each value in Results and take std deviation
  for (MeanErrorMap::iterator Runner = Values->begin(); Runner != Values->end(); ++Runner)
    if (CountMap[Runner->first] != 0) {
      tmp = Runner->second.second;
      Runner->second.second = sqrt(tmp)/CountMap[Runner->first];
    } else
      cerr << "For column " << CountMap[Runner->first] << " no entries have been found." << endl;

  // go back to initial pointer of data
  data.clear();
  data.seekg(position);

  return Values;
};
