/*
 * TrainingData.hpp
 *
 *  Created on: 15.10.2012
 *      Author: heber
 */

#ifndef TRAININGDATA_HPP_
#define TRAININGDATA_HPP_

// include config.h
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <iosfwd>
#include <boost/function.hpp>

#include "Fragmentation/Homology/HomologyContainer.hpp"
#include "FunctionApproximation/FunctionApproximation.hpp"

class Fragment;

/** This class encapsulates the training data for a given potential function
 * to learn.
 *
 * The data is added piece-wise by calling the operator() with a specific
 * Fragment.
 */
class TrainingData
{
public:
  //!> typedef for a range within the HomologyContainer at which fragments to look at
  typedef std::pair<
      HomologyContainer::const_iterator,
      HomologyContainer::const_iterator> range_t;
  //!> Training tuple input vector pair
  typedef FunctionApproximation::inputs_t InputVector_t;
  //!> Training tuple output vector pair
  typedef FunctionApproximation::outputs_t OutputVector_t;
  //!> Typedef for a function containing how to extract required information from a Fragment.
  typedef boost::function< FunctionModel::arguments_t (const Fragment &, const size_t)> extractor_t;
  //!> Typedef for a table with columns of all distances and the energy
  typedef std::vector< std::vector<double> > DistanceEnergyTable_t;

public:
  /** Constructor for class TrainingData.
   *
   */
  explicit TrainingData(const extractor_t &_extractor) :
      extractor(_extractor)
  {}
  /** Destructor for class TrainingData.
   *
   */
  ~TrainingData()
  {}

  /** We go through the given \a range of homologous fragments and call
   * TrainingData::extractor on them in order to gather the distance and
   * the energy value, stored internally.
   *
   * \param range given range within a HomologyContainer of homologous fragments
   */
  void operator()(const range_t &range);

  /** Getter for const access to internal training data inputs.
   *
   * \return const ref to training tuple of input vector
   */
  const InputVector_t& getTrainingInputs() const {
    return DistanceVector;
  }

  /** Getter for const access to internal training data outputs.
   *
   * \return const ref to training tuple of output vector
   */
  const OutputVector_t& getTrainingOutputs() const {
    return EnergyVector;
  }

  /** Returns the average of each component over all OutputVectors.
   *
   * This is useful for initializing the offset of the potential.
   *
   * @return average output vector
   */
  const FunctionModel::results_t getTrainingOutputAverage() const;

  /** Calculate the L2 error of a given \a model against the stored training data.
   *
   * \param model model whose L2 error to calculate
   * \return sum of squared differences at training tuples
   */
  const double getL2Error(const FunctionModel &model) const;

  /** Calculate the Lmax error of a given \a model against the stored training data.
   *
   * \param model model whose Lmax error to calculate
   * \return maximum difference over all training tuples
   */
  const double getLMaxError(const FunctionModel &model) const;

  /** Creates a table of columns with all distances and the energy.
   *
   * \return array with first columns containing distances, last column energy
   */
  const DistanceEnergyTable_t getDistanceEnergyTable() const;

private:
  // prohibit use of default constructor, as we always require extraction functor.
  TrainingData();

private:
  //!> private training data vector
  InputVector_t DistanceVector;
  OutputVector_t EnergyVector;
  //!> function to be used for training input data extraction from a fragment
  const extractor_t extractor;
};

// print training data for debugging
std::ostream &operator<<(std::ostream &out, const TrainingData &data);

#endif /* TRAININGDATA_HPP_ */
