| 1 | /*
 | 
|---|
| 2 |  * Project: MoleCuilder
 | 
|---|
| 3 |  * Description: creates and alters molecular systems
 | 
|---|
| 4 |  * Copyright (C)  2012 University of Bonn. All rights reserved.
 | 
|---|
| 5 |  * Please see the COPYING file or "Copyright notice" in builder.cpp for details.
 | 
|---|
| 6 |  * 
 | 
|---|
| 7 |  *
 | 
|---|
| 8 |  *   This file is part of MoleCuilder.
 | 
|---|
| 9 |  *
 | 
|---|
| 10 |  *    MoleCuilder is free software: you can redistribute it and/or modify
 | 
|---|
| 11 |  *    it under the terms of the GNU General Public License as published by
 | 
|---|
| 12 |  *    the Free Software Foundation, either version 2 of the License, or
 | 
|---|
| 13 |  *    (at your option) any later version.
 | 
|---|
| 14 |  *
 | 
|---|
| 15 |  *    MoleCuilder is distributed in the hope that it will be useful,
 | 
|---|
| 16 |  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|---|
| 17 |  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|---|
| 18 |  *    GNU General Public License for more details.
 | 
|---|
| 19 |  *
 | 
|---|
| 20 |  *    You should have received a copy of the GNU General Public License
 | 
|---|
| 21 |  *    along with MoleCuilder.  If not, see <http://www.gnu.org/licenses/>. 
 | 
|---|
| 22 |  */
 | 
|---|
| 23 | 
 | 
|---|
| 24 | /*
 | 
|---|
| 25 |  * LevMartester.cpp
 | 
|---|
| 26 |  *
 | 
|---|
| 27 |  *  Created on: Sep 27, 2012
 | 
|---|
| 28 |  *      Author: heber
 | 
|---|
| 29 |  */
 | 
|---|
| 30 | 
 | 
|---|
| 31 | 
 | 
|---|
| 32 | // include config.h
 | 
|---|
| 33 | #ifdef HAVE_CONFIG_H
 | 
|---|
| 34 | #include <config.h>
 | 
|---|
| 35 | #endif
 | 
|---|
| 36 | 
 | 
|---|
| 37 | #include <boost/archive/text_iarchive.hpp>
 | 
|---|
| 38 | 
 | 
|---|
| 39 | #include "CodePatterns/MemDebug.hpp"
 | 
|---|
| 40 | 
 | 
|---|
| 41 | #include <boost/assign.hpp>
 | 
|---|
| 42 | #include <boost/assign/list_of.hpp>
 | 
|---|
| 43 | #include <boost/bind.hpp>
 | 
|---|
| 44 | #include <boost/filesystem.hpp>
 | 
|---|
| 45 | #include <boost/foreach.hpp>
 | 
|---|
| 46 | #include <boost/function.hpp>
 | 
|---|
| 47 | #include <boost/program_options.hpp>
 | 
|---|
| 48 | 
 | 
|---|
| 49 | #include <cstdlib>
 | 
|---|
| 50 | #include <ctime>
 | 
|---|
| 51 | #include <fstream>
 | 
|---|
| 52 | #include <iostream>
 | 
|---|
| 53 | #include <iterator>
 | 
|---|
| 54 | #include <list>
 | 
|---|
| 55 | #include <vector>
 | 
|---|
| 56 | 
 | 
|---|
| 57 | #include <levmar.h>
 | 
|---|
| 58 | 
 | 
|---|
| 59 | #include "CodePatterns/Assert.hpp"
 | 
|---|
| 60 | #include "CodePatterns/Log.hpp"
 | 
|---|
| 61 | 
 | 
|---|
| 62 | #include "LinearAlgebra/Vector.hpp"
 | 
|---|
| 63 | 
 | 
|---|
| 64 | #include "Fragmentation/Homology/HomologyContainer.hpp"
 | 
|---|
| 65 | #include "Fragmentation/SetValues/Fragment.hpp"
 | 
|---|
| 66 | #include "FunctionApproximation/Extractors.hpp"
 | 
|---|
| 67 | #include "FunctionApproximation/FunctionApproximation.hpp"
 | 
|---|
| 68 | #include "FunctionApproximation/FunctionModel.hpp"
 | 
|---|
| 69 | #include "FunctionApproximation/TrainingData.hpp"
 | 
|---|
| 70 | #include "FunctionApproximation/writeDistanceEnergyTable.hpp"
 | 
|---|
| 71 | #include "Helpers/defs.hpp"
 | 
|---|
| 72 | #include "Potentials/PotentialFactory.hpp"
 | 
|---|
| 73 | #include "Potentials/PotentialRegistry.hpp"
 | 
|---|
| 74 | #include "Potentials/Specifics/PairPotential_Morse.hpp"
 | 
|---|
| 75 | #include "Potentials/Specifics/PairPotential_Angle.hpp"
 | 
|---|
| 76 | #include "Potentials/Specifics/SaturationPotential.hpp"
 | 
|---|
| 77 | #include "types.hpp"
 | 
|---|
| 78 | 
 | 
|---|
| 79 | namespace po = boost::program_options;
 | 
|---|
| 80 | 
 | 
|---|
| 81 | using namespace boost::assign;
 | 
|---|
| 82 | 
 | 
|---|
| 83 | HomologyGraph getFirstGraphwithTimesSpecificElement(
 | 
|---|
| 84 |     const HomologyContainer &homologies,
 | 
|---|
| 85 |     const size_t _number,
 | 
|---|
| 86 |     const size_t _times)
 | 
|---|
| 87 | {
 | 
|---|
| 88 |   for (HomologyContainer::container_t::const_iterator iter =
 | 
|---|
| 89 |       homologies.begin(); iter != homologies.end(); ++iter) {
 | 
|---|
| 90 |     if (iter->first.hasTimesAtomicNumber(_number,_times))
 | 
|---|
| 91 |       return iter->first;
 | 
|---|
| 92 |   }
 | 
|---|
| 93 |   return HomologyGraph();
 | 
|---|
| 94 | }
 | 
|---|
| 95 | 
 | 
|---|
| 96 | /** This function returns the elements of the sum over index "k" for an
 | 
|---|
| 97 |  * argument containing indices "i" and "j"
 | 
|---|
| 98 |  * @param inputs vector of all configuration (containing each a vector of all arguments)
 | 
|---|
| 99 |  * @param arg argument containing indices "i" and "j"
 | 
|---|
| 100 |  * @param cutoff cutoff criterion for sum over k
 | 
|---|
| 101 |  * @return vector of argument pairs (a vector) of ik and jk for at least all k
 | 
|---|
| 102 |  *        within distance of \a cutoff to i
 | 
|---|
| 103 |  */
 | 
|---|
| 104 | std::vector<FunctionModel::arguments_t>
 | 
|---|
| 105 | getTripleFromArgument(const FunctionApproximation::inputs_t &inputs, const argument_t &arg, const double cutoff)
 | 
|---|
| 106 | {
 | 
|---|
| 107 |   typedef std::list<argument_t> arg_list_t;
 | 
|---|
| 108 |   typedef std::map<size_t, arg_list_t > k_args_map_t;
 | 
|---|
| 109 |   k_args_map_t tempresult;
 | 
|---|
| 110 |   ASSERT( inputs.size() > arg.globalid,
 | 
|---|
| 111 |       "getTripleFromArgument() - globalid "+toString(arg.globalid)
 | 
|---|
| 112 |       +" is greater than all inputs "+toString(inputs.size())+".");
 | 
|---|
| 113 |   const FunctionModel::arguments_t &listofargs = inputs[arg.globalid];
 | 
|---|
| 114 |   for (FunctionModel::arguments_t::const_iterator argiter = listofargs.begin();
 | 
|---|
| 115 |       argiter != listofargs.end();
 | 
|---|
| 116 |       ++argiter) {
 | 
|---|
| 117 |     // first index must be either i or j but second index not
 | 
|---|
| 118 |     if (((argiter->indices.first == arg.indices.first)
 | 
|---|
| 119 |         || (argiter->indices.first == arg.indices.second))
 | 
|---|
| 120 |       && ((argiter->indices.second != arg.indices.first)
 | 
|---|
| 121 |           && (argiter->indices.second != arg.indices.second))) {
 | 
|---|
| 122 |       // we need arguments ik and jk
 | 
|---|
| 123 |       std::pair< k_args_map_t::iterator, bool> inserter =
 | 
|---|
| 124 |           tempresult.insert( std::make_pair( argiter->indices.second, arg_list_t(1,*argiter)));
 | 
|---|
| 125 |       if (!inserter.second) {
 | 
|---|
| 126 |         // is present one ik or jk, if ik insert jk at back
 | 
|---|
| 127 |         if (inserter.first->second.begin()->indices.first == arg.indices.first)
 | 
|---|
| 128 |           inserter.first->second.push_back(*argiter);
 | 
|---|
| 129 |         else // if jk, insert ik at front
 | 
|---|
| 130 |           inserter.first->second.push_front(*argiter);
 | 
|---|
| 131 |       }
 | 
|---|
| 132 |     }
 | 
|---|
| 133 | //    // or second index must be either i or j but first index not
 | 
|---|
| 134 | //    else if (((argiter->indices.first != arg.indices.first)
 | 
|---|
| 135 | //              && (argiter->indices.first != arg.indices.second))
 | 
|---|
| 136 | //            && ((argiter->indices.second == arg.indices.first)
 | 
|---|
| 137 | //                || (argiter->indices.second == arg.indices.second))) {
 | 
|---|
| 138 | //      // we need arguments ki and kj
 | 
|---|
| 139 | //      std::pair< k_args_map_t::iterator, bool> inserter =
 | 
|---|
| 140 | //          tempresult.insert( std::make_pair( argiter->indices.first, arg_list_t(1,*argiter)));
 | 
|---|
| 141 | //      if (!inserter.second) {
 | 
|---|
| 142 | //        // is present one ki or kj, if ki insert kj at back
 | 
|---|
| 143 | //        if (inserter.first->second.begin()->indices.second == arg.indices.first)
 | 
|---|
| 144 | //          inserter.first->second.push_back(*argiter);
 | 
|---|
| 145 | //        else // if kj, insert ki at front
 | 
|---|
| 146 | //          inserter.first->second.push_front(*argiter);
 | 
|---|
| 147 | //      }
 | 
|---|
| 148 | //    }
 | 
|---|
| 149 |   }
 | 
|---|
| 150 |   // check that i,j are NOT contained
 | 
|---|
| 151 |   ASSERT( tempresult.count(arg.indices.first) == 0,
 | 
|---|
| 152 |       "getTripleFromArgument() - first index of argument present in k_args_map?");
 | 
|---|
| 153 |   ASSERT( tempresult.count(arg.indices.second) == 0,
 | 
|---|
| 154 |       "getTripleFromArgument() - first index of argument present in k_args_map?");
 | 
|---|
| 155 | 
 | 
|---|
| 156 |   // convert
 | 
|---|
| 157 |   std::vector<FunctionModel::arguments_t> result;
 | 
|---|
| 158 |   for (k_args_map_t::const_iterator iter = tempresult.begin();
 | 
|---|
| 159 |       iter != tempresult.end();
 | 
|---|
| 160 |       ++iter) {
 | 
|---|
| 161 |     ASSERT( iter->second.size() == 2,
 | 
|---|
| 162 |         "getTripleFromArgument() - for index "+toString(iter->first)+" we did not find both ik and jk.");
 | 
|---|
| 163 |     result.push_back( FunctionModel::arguments_t(iter->second.begin(), iter->second.end()) );
 | 
|---|
| 164 |   }
 | 
|---|
| 165 |   return result;
 | 
|---|
| 166 | }
 | 
|---|
| 167 | 
 | 
|---|
| 168 | int main(int argc, char **argv)
 | 
|---|
| 169 | {
 | 
|---|
| 170 |   std::cout << "Hello to the World from LevMar!" << std::endl;
 | 
|---|
| 171 | 
 | 
|---|
| 172 |   // load homology file
 | 
|---|
| 173 |   po::options_description desc("Allowed options");
 | 
|---|
| 174 |   desc.add_options()
 | 
|---|
| 175 |       ("help", "produce help message")
 | 
|---|
| 176 |       ("homology-file", po::value< boost::filesystem::path >(), "homology file to parse")
 | 
|---|
| 177 |       ("fit-potential", po::value< std::string >(), "potential type to fit")
 | 
|---|
| 178 |       ("charges", po::value< FunctionModel::charges_t >()->multitoken(), "charges specifying the fragment")
 | 
|---|
| 179 |   ;
 | 
|---|
| 180 | 
 | 
|---|
| 181 |   po::variables_map vm;
 | 
|---|
| 182 |   po::store(po::parse_command_line(argc, argv, desc), vm);
 | 
|---|
| 183 |   po::notify(vm);
 | 
|---|
| 184 | 
 | 
|---|
| 185 |   if (vm.count("help")) {
 | 
|---|
| 186 |       std::cout << desc << "\n";
 | 
|---|
| 187 |       return 1;
 | 
|---|
| 188 |   }
 | 
|---|
| 189 | 
 | 
|---|
| 190 |   // homology-file
 | 
|---|
| 191 |   boost::filesystem::path homology_file;
 | 
|---|
| 192 |   if (vm.count("homology-file")) {
 | 
|---|
| 193 |     homology_file = vm["homology-file"].as<boost::filesystem::path>();
 | 
|---|
| 194 |     LOG(1, "INFO: Parsing " << homology_file.string() << ".");
 | 
|---|
| 195 |   } else {
 | 
|---|
| 196 |     ELOG(0, "homology file (homology-file) was not set.");
 | 
|---|
| 197 |     return 1;
 | 
|---|
| 198 |   }
 | 
|---|
| 199 | 
 | 
|---|
| 200 |   // type of potential to fit
 | 
|---|
| 201 |   std::string potentialtype;
 | 
|---|
| 202 |   if (vm.count("fit-potential")) {
 | 
|---|
| 203 |     potentialtype = vm["fit-potential"].as<std::string>();
 | 
|---|
| 204 |   } else {
 | 
|---|
| 205 |     ELOG(0, "potential type to fit (fit-potential) was not set.");
 | 
|---|
| 206 |     return 1;
 | 
|---|
| 207 |   }
 | 
|---|
| 208 | 
 | 
|---|
| 209 |   // charges
 | 
|---|
| 210 |   FunctionModel::charges_t charges;
 | 
|---|
| 211 |   if (vm.count("charges")) {
 | 
|---|
| 212 |     charges = vm["charges"].as< FunctionModel::charges_t >();
 | 
|---|
| 213 |   } else {
 | 
|---|
| 214 |     ELOG(0, "Vector of charges specifying the fragment (charges) was not set.");
 | 
|---|
| 215 |     return 1;
 | 
|---|
| 216 |   }
 | 
|---|
| 217 | 
 | 
|---|
| 218 |   // parse homologies into container
 | 
|---|
| 219 |   HomologyContainer homologies;
 | 
|---|
| 220 |   if (boost::filesystem::exists(homology_file)) {
 | 
|---|
| 221 |     std::ifstream returnstream(homology_file.string().c_str());
 | 
|---|
| 222 |     if (returnstream.good()) {
 | 
|---|
| 223 |       boost::archive::text_iarchive ia(returnstream);
 | 
|---|
| 224 |       ia >> homologies;
 | 
|---|
| 225 |     } else {
 | 
|---|
| 226 |       ELOG(0, "Failed to parse from " << homology_file.string() << ".");
 | 
|---|
| 227 |       return 1;
 | 
|---|
| 228 |     }
 | 
|---|
| 229 |     returnstream.close();
 | 
|---|
| 230 |   } else {
 | 
|---|
| 231 |     ELOG(0, homology_file << " does not exist.");
 | 
|---|
| 232 |     return 1;
 | 
|---|
| 233 |   }
 | 
|---|
| 234 | 
 | 
|---|
| 235 |   // first we try to look into the HomologyContainer
 | 
|---|
| 236 |   LOG(1, "INFO: Listing all present homologies ...");
 | 
|---|
| 237 |   for (HomologyContainer::container_t::const_iterator iter =
 | 
|---|
| 238 |       homologies.begin(); iter != homologies.end(); ++iter) {
 | 
|---|
| 239 |     LOG(1, "INFO: graph " << iter->first << " has Fragment "
 | 
|---|
| 240 |         << iter->second.first << " and associated energy " << iter->second.second << ".");
 | 
|---|
| 241 |   }
 | 
|---|
| 242 | 
 | 
|---|
| 243 |   LOG(0, "STATUS: I'm training now a " << potentialtype << " potential on charges " << charges << ".");
 | 
|---|
| 244 | 
 | 
|---|
| 245 |   /******************** TRAINING ********************/
 | 
|---|
| 246 |   // fit potential
 | 
|---|
| 247 |   FunctionModel *model =
 | 
|---|
| 248 |       PotentialFactory::getInstance().createInstance(
 | 
|---|
| 249 |           potentialtype,
 | 
|---|
| 250 |           charges);
 | 
|---|
| 251 |   ASSERT( model != NULL,
 | 
|---|
| 252 |       "main() - model returned from PotentialFactory is NULL.");
 | 
|---|
| 253 |   FunctionModel::parameters_t params(model->getParameterDimension(), 0.);
 | 
|---|
| 254 |   {
 | 
|---|
| 255 |     // then we ought to pick the right HomologyGraph ...
 | 
|---|
| 256 |     const HomologyGraph graph = getFirstGraphwithTimesSpecificElement(homologies,8,1);
 | 
|---|
| 257 |     if (graph != HomologyGraph()) {
 | 
|---|
| 258 |       LOG(1, "First representative graph containing charges is " << graph << ".");
 | 
|---|
| 259 | 
 | 
|---|
| 260 |       // Afterwards we go through all of this type and gather the distance and the energy value
 | 
|---|
| 261 |       TrainingData data(model->getFragmentSpecificExtractor(charges));
 | 
|---|
| 262 |       data(homologies.getHomologousGraphs(graph));
 | 
|---|
| 263 |       LOG(1, "INFO: I gathered the following training data:\n" <<
 | 
|---|
| 264 |           _detail::writeDistanceEnergyTable(data.getDistanceEnergyTable()));
 | 
|---|
| 265 |       // NOTICE that distance are in bohrradi as they come from MPQC!
 | 
|---|
| 266 | 
 | 
|---|
| 267 |       // now perform the function approximation by optimizing the model function
 | 
|---|
| 268 |       FunctionApproximation approximator(data, *model);
 | 
|---|
| 269 |       if (model->isBoxConstraint() && approximator.checkParameterDerivatives()) {
 | 
|---|
| 270 |         // we set parameters here because we want to test with default ones
 | 
|---|
| 271 |         srand((unsigned)time(0)); // seed with current time
 | 
|---|
| 272 |         model->setParametersToRandomInitialValues(data);
 | 
|---|
| 273 |         LOG(0, "INFO: Initial parameters are " << model->getParameters() << ".");
 | 
|---|
| 274 |         approximator(FunctionApproximation::ParameterDerivative);
 | 
|---|
| 275 |       } else {
 | 
|---|
| 276 |         ELOG(0, "We require parameter derivatives for a box constraint minimization.");
 | 
|---|
| 277 |         return 1;
 | 
|---|
| 278 |       }
 | 
|---|
| 279 | 
 | 
|---|
| 280 |       // create a map of each fragment with error.
 | 
|---|
| 281 |       typedef std::multimap< double, size_t > WorseFragmentMap_t;
 | 
|---|
| 282 |       WorseFragmentMap_t WorseFragmentMap;
 | 
|---|
| 283 |       HomologyContainer::range_t fragmentrange = homologies.getHomologousGraphs(graph);
 | 
|---|
| 284 |       // fragments make it into the container in reversed order, hence count from top down
 | 
|---|
| 285 |       size_t index= std::distance(fragmentrange.first, fragmentrange.second)-1;
 | 
|---|
| 286 |       for (HomologyContainer::const_iterator iter = fragmentrange.first;
 | 
|---|
| 287 |           iter != fragmentrange.second;
 | 
|---|
| 288 |           ++iter) {
 | 
|---|
| 289 |         const Fragment& fragment = iter->second.first;
 | 
|---|
| 290 |         const double &energy = iter->second.second;
 | 
|---|
| 291 | 
 | 
|---|
| 292 |         // create arguments from the fragment
 | 
|---|
| 293 |         FunctionModel::extractor_t extractor = model->getFragmentSpecificExtractor(charges);
 | 
|---|
| 294 |         FunctionModel::arguments_t args = extractor(fragment, 1);
 | 
|---|
| 295 | 
 | 
|---|
| 296 |         // calculate value from potential
 | 
|---|
| 297 |         const double fitvalue = (*model)(args)[0];
 | 
|---|
| 298 | 
 | 
|---|
| 299 |         // insert difference into map
 | 
|---|
| 300 |         const double error = fabs(energy - fitvalue);
 | 
|---|
| 301 |         WorseFragmentMap.insert( std::make_pair( error, index-- ) );
 | 
|---|
| 302 | 
 | 
|---|
| 303 |         {
 | 
|---|
| 304 |           // give only the distances in the debugging text
 | 
|---|
| 305 |           std::stringstream streamargs;
 | 
|---|
| 306 |           BOOST_FOREACH (argument_t arg, args) {
 | 
|---|
| 307 |             streamargs << " " << arg.distance*AtomicLengthToAngstroem;
 | 
|---|
| 308 |           }
 | 
|---|
| 309 |           LOG(2, "DEBUG: frag.#" << index+1 << "'s error is |" << energy << " - " << fitvalue
 | 
|---|
| 310 |               << "| = " << error << " for args " << streamargs.str() << ".");
 | 
|---|
| 311 |         }
 | 
|---|
| 312 |       }
 | 
|---|
| 313 |       LOG(0, "RESULT: WorstFragmentMap " << WorseFragmentMap << ".");
 | 
|---|
| 314 | 
 | 
|---|
| 315 |       params = model->getParameters();
 | 
|---|
| 316 | 
 | 
|---|
| 317 |     }
 | 
|---|
| 318 |   }
 | 
|---|
| 319 |   delete model;
 | 
|---|
| 320 |   // remove static instances
 | 
|---|
| 321 |   PotentialFactory::purgeInstance();
 | 
|---|
| 322 | 
 | 
|---|
| 323 |   return 0;
 | 
|---|
| 324 | }
 | 
|---|
| 325 | 
 | 
|---|