/*
 * Project: MoleCuilder
 * Description: creates and alters molecular systems
 * Copyright (C)  2010-2012 University of Bonn. All rights reserved.
 * Please see the LICENSE file or "Copyright notice" in builder.cpp for details.
 */

/*
 * TremoloParser.cpp
 *
 *  Created on: Mar 2, 2010
 *      Author: metzler
 */

// include config.h
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "CodePatterns/MemDebug.hpp"

#include "CodePatterns/Assert.hpp"
#include "CodePatterns/Log.hpp"
#include "CodePatterns/toString.hpp"
#include "CodePatterns/Verbose.hpp"

#include "TremoloParser.hpp"

#include "Atom/atom.hpp"
#include "Bond/bond.hpp"
#include "Descriptors/AtomIdDescriptor.hpp"
#include "Element/element.hpp"
#include "Element/periodentafel.hpp"
#include "molecule.hpp"
#include "MoleculeListClass.hpp"
#include "World.hpp"
#include "WorldTime.hpp"


#include <algorithm>
#include <boost/tokenizer.hpp>
#include <iostream>
#include <iomanip>
#include <map>
#include <sstream>
#include <vector>

// declare specialized static variables
const std::string FormatParserTrait<tremolo>::name = "tremolo";
const std::string FormatParserTrait<tremolo>::suffix = "data";
const ParserTypes FormatParserTrait<tremolo>::type = tremolo;

/**
 * Constructor.
 */
FormatParser< tremolo >::FormatParser()  :
  FormatParser_common(NULL)
{
  knownKeys["x"] = TremoloKey::x;
  knownKeys["u"] = TremoloKey::u;
  knownKeys["F"] = TremoloKey::F;
  knownKeys["stress"] = TremoloKey::stress;
  knownKeys["Id"] = TremoloKey::Id;
  knownKeys["neighbors"] = TremoloKey::neighbors;
  knownKeys["imprData"] = TremoloKey::imprData;
  knownKeys["GroupMeasureTypeNo"] = TremoloKey::GroupMeasureTypeNo;
  knownKeys["type"] = TremoloKey::type;
  knownKeys["extType"] = TremoloKey::extType;
  knownKeys["name"] = TremoloKey::name;
  knownKeys["resName"] = TremoloKey::resName;
  knownKeys["chainID"] = TremoloKey::chainID;
  knownKeys["resSeq"] = TremoloKey::resSeq;
  knownKeys["occupancy"] = TremoloKey::occupancy;
  knownKeys["tempFactor"] = TremoloKey::tempFactor;
  knownKeys["segID"] = TremoloKey::segID;
  knownKeys["Charge"] = TremoloKey::Charge;
  knownKeys["charge"] = TremoloKey::charge;
  knownKeys["GrpTypeNo"] = TremoloKey::GrpTypeNo;
  knownKeys["torsion"] = TremoloKey::torsion;

  createKnownTypesByIdentity();

  // default behavior: use all possible keys on output
  for (std::map<std::string, TremoloKey::atomDataKey>::iterator iter = knownKeys.begin(); iter != knownKeys.end(); ++iter)
    usedFields.push_back(iter->first);

  // and noKey afterwards(!) such that it is not used in usedFields
  knownKeys[" "] = TremoloKey::noKey; // with this we can detect invalid keys

  // invert knownKeys for debug output
  for (std::map<std::string, TremoloKey::atomDataKey>::iterator iter = knownKeys.begin(); iter != knownKeys.end(); ++iter)
    knownKeyNames.insert( make_pair( iter->second, iter->first) );

  additionalAtomData.clear();
}

/**
 * Destructor.
 */
FormatParser< tremolo >::~FormatParser() 
{
  LOG(1, "INFO: Clearing usedFields.");
  usedFields.clear();
  additionalAtomData.clear();
  knownKeys.clear();
}

/**
 * Loads atoms from a tremolo-formatted file.
 *
 * \param tremolo file
 */
void FormatParser< tremolo >::load(istream* file) {
  std::string line;
  std::string::size_type location;

  // reset the id maps
  resetIdAssociations();

  LOG(1, "INFO: Clearing usedFields.");
  usedFields.clear();

  molecule *newmol = World::getInstance().createMolecule();
  newmol->ActiveFlag = true;
  // TODO: Remove the insertion into molecule when saving does not depend on them anymore. Also, remove molecule.hpp include
  World::getInstance().getMolecules()->insert(newmol);
  while (file->good()) {
    std::getline(*file, line, '\n');
    if (usedFields.empty()) {
      location = line.find("ATOMDATA", 0);
      if (location != string::npos) {
       parseAtomDataKeysLine(line, location + 8);
      }
    }
    if (line.length() > 0 && line.at(0) != '#') {
      readAtomDataLine(line, newmol);
    }
  }
  LOG(3, "usedFields after load contains: " << usedFields);

  // refresh atom::nr and atom::name
  std::vector<atomId_t> atoms(newmol->getAtomCount());
  std::transform(newmol->begin(), newmol->end(), atoms.begin(),
      boost::bind(&atom::getId, _1));
  processNeighborInformation(atoms);
  adaptImprData();
  adaptTorsion();
}

/**
 * Saves the \a atoms into as a tremolo file.
 *
 * \param file where to save the state
 * \param atoms atoms to store
 */
void FormatParser< tremolo >::save(ostream* file, const std::vector<atom *> &AtomList) {
  LOG(0, "Saving changes to tremolo.");

  std::vector<atom*>::const_iterator atomIt;
  /*vector<string>::iterator it;*/
  std::vector<std::string>::iterator it = unique(usedFields.begin(), usedFields.end()); // skips all duplicates in the vector

  LOG(3, "INFO: usedFields before save contains: " << usedFields);
  //LOG(3, "INFO: additionalAtomData contains: " << additionalAtomData);

  // distribute continuous indices
  resetIdAssociations();
  atomId_t lastid = 0;
  for (atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
    associateLocaltoGlobalId(++lastid, (*atomIt)->getId());
  }

  // store
  *file << "# ATOMDATA";
  for (it=usedFields.begin(); it < usedFields.end(); it++) {
    *file << "\t" << *it;
  }
  *file << endl;
  for (atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
    saveLine(file, *atomIt);
  }
}

/** Add default info, when new atom is added to World.
 *
 * @param id of atom
 */
void FormatParser< tremolo >::AtomInserted(atomId_t id)
{
  std::map<const atomId_t, TremoloAtomInfoContainer>::iterator iter = additionalAtomData.find(id);
  ASSERT(iter == additionalAtomData.end(),
      "FormatParser< tremolo >::AtomInserted() - additionalAtomData already present for newly added atom "
      +toString(id)+".");
  // don't add entry, as this gives a default resSeq of 0 not the molecule id
  // additionalAtomData.insert( std::make_pair(id, TremoloAtomInfoContainer()) );
}

/** Remove additional AtomData info, when atom has been removed from World.
 *
 * @param id of atom
 */
void FormatParser< tremolo >::AtomRemoved(atomId_t id)
{
  std::map<const atomId_t, TremoloAtomInfoContainer>::iterator iter = additionalAtomData.find(id);
  // as we do not insert AtomData on AtomInserted, we cannot be assured of its presence
//  ASSERT(iter != additionalAtomData.end(),
//      "FormatParser< tremolo >::AtomRemoved() - additionalAtomData is not present for atom "
//      +toString(id)+" to remove.");
  if (iter != additionalAtomData.end())
    additionalAtomData.erase(iter);
}

/**
 * Sets the keys for which data should be written to the stream when save is
 * called.
 *
 * \param string of field names with the same syntax as for an ATOMDATA line
 *        but without the prexix "ATOMDATA"
 */
void FormatParser< tremolo >::setFieldsForSave(std::string atomDataLine) {
  parseAtomDataKeysLine(atomDataLine, 0);
}


/**
 * Writes one line of tremolo-formatted data to the provided stream.
 *
 * \param stream where to write the line to
 * \param reference to the atom of which information should be written
 */
void FormatParser< tremolo >::saveLine(ostream* file, atom* currentAtom) {
  std::vector<string>::iterator it;

  TremoloKey::atomDataKey currentField;

  LOG(4, "INFO: Saving atom " << *currentAtom << ", its father id is " << currentAtom->GetTrueFather()->getId());

  for (it = usedFields.begin(); it != usedFields.end(); it++) {
    currentField = knownKeys[it->substr(0, it->find("="))];
    switch (currentField) {
      case TremoloKey::x :
        // for the moment, assume there are always three dimensions
        LOG(3, "Writing for type " << knownKeyNames[currentField] << ": " << currentAtom->getPosition());
        *file << currentAtom->at(0) << "\t";
        *file << currentAtom->at(1) << "\t";
        *file << currentAtom->at(2) << "\t";
        break;
      case TremoloKey::u :
        // for the moment, assume there are always three dimensions
        LOG(3, "Writing for type " << knownKeyNames[currentField] << ": " << currentAtom->getAtomicVelocity());
        *file << currentAtom->getAtomicVelocity()[0] << "\t";
        *file << currentAtom->getAtomicVelocity()[1] << "\t";
        *file << currentAtom->getAtomicVelocity()[2] << "\t";
        break;
      case TremoloKey::type :
        if (additionalAtomData.count(currentAtom->getId())) {
          if (additionalAtomData[currentAtom->getId()].get(currentField) != "-") {
            LOG(3, "Writing for type " << knownKeyNames[currentField] << ": " << additionalAtomData[currentAtom->getId()].get(currentField));
            *file << additionalAtomData[currentAtom->getId()].get(currentField) << "\t";
          } else {
            LOG(3, "Writing for type " << knownKeyNames[currentField] << " default value: " << currentAtom->getType()->getSymbol());
            *file << currentAtom->getType()->getSymbol() << "\t";
          }
        } else if (additionalAtomData.count(currentAtom->GetTrueFather()->getId())) {
          if (additionalAtomData[currentAtom->GetTrueFather()->getId()].get(currentField) != "-") {
            LOG(3, "Writing for type " << knownKeyNames[currentField] << " stuff from father: " << additionalAtomData[currentAtom->GetTrueFather()->getId()].get(currentField));
            *file << additionalAtomData[currentAtom->GetTrueFather()->getId()].get(currentField) << "\t";
          } else {
            LOG(3, "Writing for type " << knownKeyNames[currentField] << " default value from father: " << currentAtom->GetTrueFather()->getType()->getSymbol());
            *file << currentAtom->GetTrueFather()->getType()->getSymbol() << "\t";
          }
        } else {
          LOG(3, "Writing for type " << knownKeyNames[currentField] << " its default value: " << currentAtom->getType()->getSymbol());
          *file << currentAtom->getType()->getSymbol() << "\t";
        }
        break;
      case TremoloKey::Id :
        LOG(3, "Writing for type " << knownKeyNames[currentField] << ": " << currentAtom->getId()+1);
        *file << getLocalId(currentAtom->getId()) << "\t";
        break;
      case TremoloKey::neighbors :
        LOG(3, "Writing type " << knownKeyNames[currentField]);
        writeNeighbors(file, atoi(it->substr(it->find("=") + 1, 1).c_str()), currentAtom);
        break;
      case TremoloKey::resSeq :
        if (additionalAtomData.count(currentAtom->getId())) {
          LOG(3, "Writing for type " << knownKeyNames[currentField] << ": " << additionalAtomData[currentAtom->getId()].get(currentField));
          *file << additionalAtomData[currentAtom->getId()].get(currentField);
        } else if (currentAtom->getMolecule() != NULL) {
          LOG(3, "Writing for type " << knownKeyNames[currentField] << " its own id: " << currentAtom->getMolecule()->getId()+1);
          *file << setw(4) << currentAtom->getMolecule()->getId()+1;
        } else {
          LOG(3, "Writing for type " << knownKeyNames[currentField] << " default value: " << defaultAdditionalData.get(currentField));
          *file << defaultAdditionalData.get(currentField);
        }
        *file << "\t";
        break;
      default :
        if (additionalAtomData.count(currentAtom->getId())) {
          LOG(3, "Writing for type " << knownKeyNames[currentField] << ": " << additionalAtomData[currentAtom->getId()].get(currentField));
          *file << additionalAtomData[currentAtom->getId()].get(currentField);
        } else if (additionalAtomData.count(currentAtom->GetTrueFather()->getId())) {
          LOG(3, "Writing for type " << knownKeyNames[currentField] << " stuff from father: " << additionalAtomData[currentAtom->GetTrueFather()->getId()].get(currentField));
          *file << additionalAtomData[currentAtom->GetTrueFather()->getId()].get(currentField);
        } else {
          LOG(3, "Writing for type " << knownKeyNames[currentField] << " the default: " << defaultAdditionalData.get(currentField));
          *file << defaultAdditionalData.get(currentField);
        }
        *file << "\t";
        break;
    }
  }

  *file << endl;
}

/**
 * Writes the neighbor information of one atom to the provided stream.
 *
 * Note that ListOfBonds of WorldTime::CurrentTime is used.
 *
 * \param stream where to write neighbor information to
 * \param number of neighbors
 * \param reference to the atom of which to take the neighbor information
 */
void FormatParser< tremolo >::writeNeighbors(ostream* file, int numberOfNeighbors, atom* currentAtom) {
  const BondList& ListOfBonds = currentAtom->getListOfBonds();
  // sort bonded indices
  typedef std::set<atomId_t> sortedIndices;
  sortedIndices sortedBonds;
  for (BondList::const_iterator iter = ListOfBonds.begin();
      iter != ListOfBonds.end(); ++iter)
    sortedBonds.insert(getLocalId((*iter)->GetOtherAtom(currentAtom)->getId()));
  // print indices
  sortedIndices::const_iterator currentBond = sortedBonds.begin();
  for (int i = 0; i < numberOfNeighbors; i++) {
    *file << (currentBond != sortedBonds.end() ? (*currentBond) : 0) << "\t";
    if (currentBond != sortedBonds.end())
      ++currentBond;
  }
}

/**
 * Stores keys from the ATOMDATA line.
 *
 * \param line to parse the keys from
 * \param with which offset the keys begin within the line
 */
void FormatParser< tremolo >::parseAtomDataKeysLine(std::string line, int offset) {
  std::string keyword;
  std::stringstream lineStream;

  lineStream << line.substr(offset);
  LOG(1, "INFO: Clearing usedFields in parseAtomDataKeysLine.");
  usedFields.clear();
  while (lineStream.good()) {
    lineStream >> keyword;
    if (knownKeys[keyword.substr(0, keyword.find("="))] == TremoloKey::noKey) {
      // TODO: throw exception about unknown key
      cout << "Unknown key: " << keyword << " is not part of the tremolo format specification." << endl;
      break;
    }
    usedFields.push_back(keyword);
  }
  //LOG(1, "INFO: " << usedFields);
}

/** Sets the properties per atom to print to .data file by parsing line from
 *  \a atomdata_string.
 *
 *  We just call \sa  FormatParser< tremolo >::parseAtomDataKeysLine() which is left
 *  private.,
 *
 * @param atomdata_string line to parse with space-separated values
 */
void FormatParser< tremolo >::setAtomData(const std::string &atomdata_string)
{
  parseAtomDataKeysLine(atomdata_string, 0);
}


/**
 * Reads one data line of a tremolo file and interprets it according to the keys
 * obtained from the ATOMDATA line.
 *
 * \param line to parse as an atom
 * \param *newmol molecule to add atom to
 */
void FormatParser< tremolo >::readAtomDataLine(std::string line, molecule *newmol = NULL) {
  std::vector<string>::iterator it;
  std::stringstream lineStream;
  atom* newAtom = World::getInstance().createAtom();
  const atomId_t atomid = newAtom->getId();
  additionalAtomData[atomid] = TremoloAtomInfoContainer(); // fill with default values
  TremoloAtomInfoContainer *atomInfo = &additionalAtomData[atomid];
  TremoloKey::atomDataKey currentField;
  ConvertTo<double> toDouble;
  ConvertTo<int> toInt;
  Vector tempVector;

  // setup tokenizer, splitting up white-spaced entries
  typedef boost::tokenizer<boost::char_separator<char> >
      tokenizer;
  boost::char_separator<char> whitespacesep(" \t");
  tokenizer tokens(line, whitespacesep);
  ASSERT(tokens.begin() != tokens.end(),
      "FormatParser< tremolo >::readAtomDataLine - empty string, need at least ' '!");
  tokenizer::iterator tok_iter = tokens.begin();
  // then associate each token to each file
  for (it = usedFields.begin(); it < usedFields.end(); it++) {
    const std::string keyName = it->substr(0, it->find("="));
    currentField = knownKeys[keyName];
    const std::string word = *tok_iter;
    LOG(4, "INFO: Parsing key " << keyName << " with remaining data " << word);
    switch (currentField) {
      case TremoloKey::x :
        // for the moment, assume there are always three dimensions
        for (int i=0;i<NDIM;i++) {
          ASSERT(tok_iter != tokens.end(), "FormatParser< tremolo >::readAtomDataLine() - no value for x["+toString(i)+"]!");
          LOG(4, "INFO: Parsing key " << keyName << " with next token " << *tok_iter);
          newAtom->set(i, toDouble(*tok_iter));
          tok_iter++;
        }
        break;
      case TremoloKey::u :
        // for the moment, assume there are always three dimensions
        for (int i=0;i<NDIM;i++) {
          ASSERT(tok_iter != tokens.end(), "FormatParser< tremolo >::readAtomDataLine() - no value for u["+toString(i)+"]!");
          LOG(4, "INFO: Parsing key " << keyName << " with next token " << *tok_iter);
          tempVector[i] = toDouble(*tok_iter);
          tok_iter++;
        }
        newAtom->setAtomicVelocity(tempVector);
        break;
      case TremoloKey::type :
      {
        ASSERT(tok_iter != tokens.end(), "FormatParser< tremolo >::readAtomDataLine() - no value for "+keyName+"!");
        LOG(4, "INFO: Parsing key " << keyName << " with next token " << *tok_iter);
        std::string element;
        try {
          element = knownTypes.getType(*tok_iter);
        } catch(IllegalParserKeyException) {
          // clean up
          World::getInstance().destroyAtom(newAtom);
          // give an error
          ELOG(0, "TremoloParser: I do not understand the element token " << *tok_iter << ".");
        }
        // put type name into container for later use
        atomInfo->set(currentField, *tok_iter);
        LOG(4, "INFO: Parsing element " << (*tok_iter) << " as " << element << " according to KnownTypes.");
        tok_iter++;
        newAtom->setType(World::getInstance().getPeriode()->FindElement(element));
        ASSERT(newAtom->getType(), "Type was not set for this atom");
        break;
      }
      case TremoloKey::Id :
        ASSERT(tok_iter != tokens.end(), "FormatParser< tremolo >::readAtomDataLine() - no value for "+keyName+"!");
        LOG(4, "INFO: Parsing key " << keyName << " with next token " << *tok_iter);
        associateLocaltoGlobalId(toInt(*tok_iter), atomid);
        tok_iter++;
        break;
      case TremoloKey::neighbors :
        for (int i=0;i<atoi(it->substr(it->find("=") + 1, 1).c_str());i++) {
          ASSERT(tok_iter != tokens.end(), "FormatParser< tremolo >::readAtomDataLine() - no value for "+keyName+"!");
          LOG(4, "INFO: Parsing key " << keyName << " with next token " << *tok_iter);
          lineStream << *tok_iter << "\t";
          tok_iter++;
        }
        readNeighbors(&lineStream,
            atoi(it->substr(it->find("=") + 1, 1).c_str()), atomid);
        break;
      default :
        ASSERT(tok_iter != tokens.end(), "FormatParser< tremolo >::readAtomDataLine() - no value for "+keyName+"!");
        LOG(4, "INFO: Parsing key " << keyName << " with next token " << *tok_iter);
        atomInfo->set(currentField, *tok_iter);
        tok_iter++;
        break;
    }
  }
  LOG(3, "INFO: Parsed atom " << atomid << ".");
  if (newmol != NULL)
    newmol->AddAtom(newAtom);
}

/**
 * Reads neighbor information for one atom from the input.
 *
 * \param line stream where to read the information from
 * \param numberOfNeighbors number of neighbors to read
 * \param atomid world id of the atom the information belongs to
 */
void FormatParser< tremolo >::readNeighbors(std::stringstream* line, int numberOfNeighbors, int atomId) {
  int neighborId = 0;
  for (int i = 0; i < numberOfNeighbors; i++) {
    *line >> neighborId;
    // 0 is used to fill empty neighbor positions in the tremolo file.
    if (neighborId > 0) {
      LOG(4, "INFO: Atom with global id " << atomId
          << " has neighbour with serial " << neighborId);
      additionalAtomData[atomId].neighbors.push_back(neighborId);
    }
  }
}

/**
 * Checks whether the provided name is within the list of used fields.
 *
 * \param field name to check
 *
 * \return true if the field name is used
 */
bool FormatParser< tremolo >::isUsedField(std::string fieldName) {
  bool fieldNameExists = false;
  for (std::vector<std::string>::iterator usedField = usedFields.begin(); usedField != usedFields.end(); usedField++) {
    if (usedField->substr(0, usedField->find("=")) == fieldName)
      fieldNameExists = true;
  }

  return fieldNameExists;
}


/**
 * Adds the collected neighbor information to the atoms in the world. The atoms
 * are found by their current ID and mapped to the corresponding atoms with the
 * Id found in the parsed file.
 *
 * @param atoms vector with all newly added (global) atomic ids
 */
void FormatParser< tremolo >::processNeighborInformation(const std::vector<atomId_t> &atoms) {
  if (!isUsedField("neighbors")) {
    return;
  }

  for (std::vector<atomId_t>::const_iterator iter = atoms.begin(); iter != atoms.end(); ++iter) {
    ASSERT(additionalAtomData.count(*iter) != 0,
        "FormatParser< tremolo >::processNeighborInformation() - global id "
        +toString(*iter)+" unknown in additionalAtomData.");
    TremoloAtomInfoContainer &currentInfo = additionalAtomData[*iter];
    ASSERT (!currentInfo.neighbors_processed,
        "FormatParser< tremolo >::processNeighborInformation() - neighbors of new atom "
        +toString(*iter)+" are already processed.");
    for(std::vector<int>::const_iterator neighbor = currentInfo.neighbors.begin();
      neighbor != currentInfo.neighbors.end(); neighbor++
    ) {
      LOG(3, "INFO: Creating bond between ("
          << *iter
          << ") and ("
          << getGlobalId(*neighbor) << "|" << *neighbor << ")");
      ASSERT(getGlobalId(*neighbor) != -1,
          "FormatParser< tremolo >::processNeighborInformation() - global id to local id "
          +toString(*neighbor)+" is unknown.");
      World::getInstance().getAtom(AtomById(*iter))
          ->addBond(WorldTime::getTime(), World::getInstance().getAtom(AtomById(getGlobalId(*neighbor))));
    }
    currentInfo.neighbors_processed = true;
  }
}

/**
 * Replaces atom IDs read from the file by the corresponding world IDs. All IDs
 * IDs of the input string will be replaced; expected separating characters are
 * "-" and ",".
 *
 * \param string in which atom IDs should be adapted
 *
 * \return input string with modified atom IDs
 */
std::string FormatParser< tremolo >::adaptIdDependentDataString(std::string data) {
  // there might be no IDs
  if (data == "-") {
    return "-";
  }

  char separator;
  int id;
  std::stringstream line, result;
  line << data;

  line >> id;
  result << getGlobalId(id);
  while (line.good()) {
    line >> separator >> id;
    result << separator << getGlobalId(id);
  }

  return result.str();
}

/**
 * Corrects the atom IDs in each imprData entry to the corresponding world IDs
 * as they might differ from the originally read IDs.
 */
void FormatParser< tremolo >::adaptImprData() {
  if (!isUsedField("imprData")) {
    return;
  }

  for(std::map<const atomId_t, TremoloAtomInfoContainer>::iterator currentInfo = additionalAtomData.begin();
    currentInfo != additionalAtomData.end(); currentInfo++
  ) {
    currentInfo->second.imprData = adaptIdDependentDataString(currentInfo->second.imprData);
  }
}

/**
 * Corrects the atom IDs in each torsion entry to the corresponding world IDs
 * as they might differ from the originally read IDs.
 */
void FormatParser< tremolo >::adaptTorsion() {
  if (!isUsedField("torsion")) {
    return;
  }

  for(std::map<const atomId_t, TremoloAtomInfoContainer>::iterator currentInfo = additionalAtomData.begin();
    currentInfo != additionalAtomData.end(); currentInfo++
  ) {
    currentInfo->second.torsion = adaptIdDependentDataString(currentInfo->second.torsion);
  }
}

