source: src/molecule_fragmentation.cpp@ 15911c

Action_Thermostats Add_AtomRandomPerturbation Add_FitFragmentPartialChargesAction Add_RotateAroundBondAction Add_SelectAtomByNameAction Added_ParseSaveFragmentResults AddingActions_SaveParseParticleParameters Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_ParticleName_to_Atom Adding_StructOpt_integration_tests AtomFragments Automaking_mpqc_open AutomationFragmentation_failures Candidate_v1.5.4 Candidate_v1.6.0 Candidate_v1.6.1 Candidate_v1.7.0 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator CombiningParticlePotentialParsing Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_BoundInBox_CenterInBox_MoleculeActions Fix_ChargeSampling_PBC Fix_ChronosMutex Fix_FitPartialCharges Fix_FitPotential_needs_atomicnumbers Fix_ForceAnnealing Fix_IndependentFragmentGrids Fix_ParseParticles Fix_ParseParticles_split_forward_backward_Actions Fix_PopActions Fix_QtFragmentList_sorted_selection Fix_Restrictedkeyset_FragmentMolecule Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns Fix_fitting_potentials Fixes ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion FragmentAction_writes_AtomFragments FragmentMolecule_checks_bonddegrees GeometryObjects Gui_Fixes Gui_displays_atomic_force_velocity ImplicitCharges IndependentFragmentGrids IndependentFragmentGrids_IndividualZeroInstances IndependentFragmentGrids_IntegrationTest IndependentFragmentGrids_Sole_NN_Calculation JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix MoreRobust_FragmentAutomation ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PdbParser_setsAtomName PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks Rewrite_FitPartialCharges RotateToPrincipalAxisSystem_UndoRedo SaturateAtoms_findBestMatching SaturateAtoms_singleDegree StoppableMakroAction Subpackage_CodePatterns Subpackage_JobMarket Subpackage_LinearAlgebra Subpackage_levmar Subpackage_mpqc_open Subpackage_vmg Switchable_LogView ThirdParty_MPQC_rebuilt_buildsystem TrajectoryDependenant_MaxOrder TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps TremoloParser_setsAtomName Ubuntu_1604_changes stable
Last change on this file since 15911c was ad011c, checked in by Frederik Heber <heber@…>, 15 years ago

CodePatterns places all includes now in subfolder CodePatterns/.

  • change all includes accordingly.
  • this was necessary as Helpers and Patterns are not very distinctive names for include folders. Already now, we had a conflict between Helpers from CodePatterns and Helpers from this project.
  • changed compilation test in ax_codepatterns.m4 when changing CodePatterns includes.
  • Property mode set to 100644
File size: 80.6 KB
Line 
1/*
2 * Project: MoleCuilder
3 * Description: creates and alters molecular systems
4 * Copyright (C) 2010 University of Bonn. All rights reserved.
5 * Please see the LICENSE file or "Copyright notice" in builder.cpp for details.
6 */
7
8/*
9 * molecule_fragmentation.cpp
10 *
11 * Created on: Oct 5, 2009
12 * Author: heber
13 */
14
15// include config.h
16#ifdef HAVE_CONFIG_H
17#include <config.h>
18#endif
19
20#include "CodePatterns/MemDebug.hpp"
21
22#include <cstring>
23
24#include "World.hpp"
25#include "atom.hpp"
26#include "bond.hpp"
27#include "config.hpp"
28#include "element.hpp"
29#include "Helpers/helpers.hpp"
30#include "lists.hpp"
31#include "CodePatterns/Verbose.hpp"
32#include "CodePatterns/Log.hpp"
33#include "molecule.hpp"
34#include "periodentafel.hpp"
35#include "World.hpp"
36#include "LinearAlgebra/RealSpaceMatrix.hpp"
37#include "Box.hpp"
38
39/************************************* Functions for class molecule *********************************/
40
41
42/** Estimates by educated guessing (using upper limit) the expected number of fragments.
43 * The upper limit is
44 * \f[
45 * n = N \cdot C^k
46 * \f]
47 * where \f$C=2^c\f$ and c is the maximum bond degree over N number of atoms.
48 * \param *out output stream for debugging
49 * \param order bond order k
50 * \return number n of fragments
51 */
52int molecule::GuesstimateFragmentCount(int order)
53{
54 size_t c = 0;
55 int FragmentCount;
56 // get maximum bond degree
57 for (molecule::const_iterator iter = begin(); iter != end(); ++iter) {
58 c = ((*iter)->ListOfBonds.size() > c) ? (*iter)->ListOfBonds.size() : c;
59 }
60 FragmentCount = NoNonHydrogen*(1 << (c*order));
61 DoLog(1) && (Log() << Verbose(1) << "Upper limit for this subgraph is " << FragmentCount << " for " << NoNonHydrogen << " non-H atoms with maximum bond degree of " << c << "." << endl);
62 return FragmentCount;
63};
64
65/** Scans a single line for number and puts them into \a KeySet.
66 * \param *out output stream for debugging
67 * \param *buffer buffer to scan
68 * \param &CurrentSet filled KeySet on return
69 * \return true - at least one valid atom id parsed, false - CurrentSet is empty
70 */
71bool ScanBufferIntoKeySet(char *buffer, KeySet &CurrentSet)
72{
73 stringstream line;
74 int AtomNr;
75 int status = 0;
76
77 line.str(buffer);
78 while (!line.eof()) {
79 line >> AtomNr;
80 if (AtomNr >= 0) {
81 CurrentSet.insert(AtomNr); // insert at end, hence in same order as in file!
82 status++;
83 } // else it's "-1" or else and thus must not be added
84 }
85 DoLog(1) && (Log() << Verbose(1) << "The scanned KeySet is ");
86 for(KeySet::iterator runner = CurrentSet.begin(); runner != CurrentSet.end(); runner++) {
87 DoLog(0) && (Log() << Verbose(0) << (*runner) << "\t");
88 }
89 DoLog(0) && (Log() << Verbose(0) << endl);
90 return (status != 0);
91};
92
93/** Parses the KeySet file and fills \a *FragmentList from the known molecule structure.
94 * Does two-pass scanning:
95 * -# Scans the keyset file and initialises a temporary graph
96 * -# Scans TEFactors file and sets the TEFactor of each key set in the temporary graph accordingly
97 * Finally, the temporary graph is inserted into the given \a FragmentList for return.
98 * \param &path path to file
99 * \param *FragmentList empty, filled on return
100 * \return true - parsing successfully, false - failure on parsing (FragmentList will be NULL)
101 */
102bool ParseKeySetFile(std::string &path, Graph *&FragmentList)
103{
104 bool status = true;
105 ifstream InputFile;
106 stringstream line;
107 GraphTestPair testGraphInsert;
108 int NumberOfFragments = 0;
109 string filename;
110
111 if (FragmentList == NULL) { // check list pointer
112 FragmentList = new Graph;
113 }
114
115 // 1st pass: open file and read
116 DoLog(1) && (Log() << Verbose(1) << "Parsing the KeySet file ... " << endl);
117 filename = path + KEYSETFILE;
118 InputFile.open(filename.c_str());
119 if (InputFile.good()) {
120 // each line represents a new fragment
121 char buffer[MAXSTRINGSIZE];
122 // 1. parse keysets and insert into temp. graph
123 while (!InputFile.eof()) {
124 InputFile.getline(buffer, MAXSTRINGSIZE);
125 KeySet CurrentSet;
126 if ((strlen(buffer) > 0) && (ScanBufferIntoKeySet(buffer, CurrentSet))) { // if at least one valid atom was added, write config
127 testGraphInsert = FragmentList->insert(GraphPair (CurrentSet,pair<int,double>(NumberOfFragments++,1))); // store fragment number and current factor
128 if (!testGraphInsert.second) {
129 DoeLog(0) && (eLog()<< Verbose(0) << "KeySet file must be corrupt as there are two equal key sets therein!" << endl);
130 performCriticalExit();
131 }
132 }
133 }
134 // 2. Free and done
135 InputFile.close();
136 InputFile.clear();
137 DoLog(1) && (Log() << Verbose(1) << "\t ... done." << endl);
138 } else {
139 DoLog(1) && (Log() << Verbose(1) << "\t ... File " << filename << " not found." << endl);
140 status = false;
141 }
142
143 return status;
144};
145
146/** Parses the TE factors file and fills \a *FragmentList from the known molecule structure.
147 * -# Scans TEFactors file and sets the TEFactor of each key set in the temporary graph accordingly
148 * \param *out output stream for debugging
149 * \param *path path to file
150 * \param *FragmentList graph whose nodes's TE factors are set on return
151 * \return true - parsing successfully, false - failure on parsing
152 */
153bool ParseTEFactorsFile(char *path, Graph *FragmentList)
154{
155 bool status = true;
156 ifstream InputFile;
157 stringstream line;
158 GraphTestPair testGraphInsert;
159 int NumberOfFragments = 0;
160 double TEFactor;
161 char filename[MAXSTRINGSIZE];
162
163 if (FragmentList == NULL) { // check list pointer
164 FragmentList = new Graph;
165 }
166
167 // 2nd pass: open TEFactors file and read
168 DoLog(1) && (Log() << Verbose(1) << "Parsing the TEFactors file ... " << endl);
169 sprintf(filename, "%s/%s%s", path, FRAGMENTPREFIX, TEFACTORSFILE);
170 InputFile.open(filename);
171 if (InputFile != NULL) {
172 // 3. add found TEFactors to each keyset
173 NumberOfFragments = 0;
174 for(Graph::iterator runner = FragmentList->begin();runner != FragmentList->end(); runner++) {
175 if (!InputFile.eof()) {
176 InputFile >> TEFactor;
177 (*runner).second.second = TEFactor;
178 DoLog(2) && (Log() << Verbose(2) << "Setting " << ++NumberOfFragments << " fragment's TEFactor to " << (*runner).second.second << "." << endl);
179 } else {
180 status = false;
181 break;
182 }
183 }
184 // 4. Free and done
185 InputFile.close();
186 DoLog(1) && (Log() << Verbose(1) << "done." << endl);
187 } else {
188 DoLog(1) && (Log() << Verbose(1) << "File " << filename << " not found." << endl);
189 status = false;
190 }
191
192 return status;
193};
194
195/** Stores key sets to file.
196 * \param KeySetList Graph with Keysets
197 * \param &path path to file
198 * \return true - file written successfully, false - writing failed
199 */
200bool StoreKeySetFile(Graph &KeySetList, std::string &path)
201{
202 bool status = true;
203 string line = path + KEYSETFILE;
204 ofstream output(line.c_str());
205
206 // open KeySet file
207 DoLog(1) && (Log() << Verbose(1) << "Saving key sets of the total graph ... ");
208 if(output.good()) {
209 for(Graph::iterator runner = KeySetList.begin(); runner != KeySetList.end(); runner++) {
210 for (KeySet::iterator sprinter = (*runner).first.begin();sprinter != (*runner).first.end(); sprinter++) {
211 if (sprinter != (*runner).first.begin())
212 output << "\t";
213 output << *sprinter;
214 }
215 output << endl;
216 }
217 DoLog(0) && (Log() << Verbose(0) << "done." << endl);
218 } else {
219 DoeLog(0) && (eLog()<< Verbose(0) << "Unable to open " << line << " for writing keysets!" << endl);
220 performCriticalExit();
221 status = false;
222 }
223 output.close();
224 output.clear();
225
226 return status;
227};
228
229
230/** Stores TEFactors to file.
231 * \param *out output stream for debugging
232 * \param KeySetList Graph with factors
233 * \param *path path to file
234 * \return true - file written successfully, false - writing failed
235 */
236bool StoreTEFactorsFile(Graph &KeySetList, char *path)
237{
238 ofstream output;
239 bool status = true;
240 string line;
241
242 // open TEFactors file
243 line = path;
244 line.append("/");
245 line += FRAGMENTPREFIX;
246 line += TEFACTORSFILE;
247 output.open(line.c_str(), ios::out);
248 DoLog(1) && (Log() << Verbose(1) << "Saving TEFactors of the total graph ... ");
249 if(output != NULL) {
250 for(Graph::iterator runner = KeySetList.begin(); runner != KeySetList.end(); runner++)
251 output << (*runner).second.second << endl;
252 DoLog(1) && (Log() << Verbose(1) << "done." << endl);
253 } else {
254 DoLog(1) && (Log() << Verbose(1) << "failed to open " << line << "." << endl);
255 status = false;
256 }
257 output.close();
258
259 return status;
260};
261
262/** For a given graph, sorts KeySets into a (index, keyset) map.
263 * \param *GlobalKeySetList list of keysets with global ids (valid in "this" molecule) needed for adaptive increase
264 * \return map from index to keyset
265 */
266map<int,KeySet> * GraphToIndexedKeySet(Graph *GlobalKeySetList)
267{
268 map<int,KeySet> *IndexKeySetList = new map<int,KeySet>;
269 for(Graph::iterator runner = GlobalKeySetList->begin(); runner != GlobalKeySetList->end(); runner++) {
270 IndexKeySetList->insert( pair<int,KeySet>(runner->second.first,runner->first) );
271 }
272 return IndexKeySetList;
273};
274
275/** Inserts a (\a No, \a value) pair into the list, overwriting present one.
276 * Note if values are equal, No will decided on which is first
277 * \param *out output stream for debugging
278 * \param &AdaptiveCriteriaList list to insert into
279 * \param &IndexedKeySetList list to find key set for a given index \a No
280 * \param FragOrder current bond order of fragment
281 * \param No index of keyset
282 * \param value energy value
283 */
284void InsertIntoAdaptiveCriteriaList(map<int, pair<double,int> > *AdaptiveCriteriaList, map<int,KeySet> &IndexKeySetList, int FragOrder, int No, double Value)
285{
286 map<int,KeySet>::iterator marker = IndexKeySetList.find(No); // find keyset to Frag No.
287 if (marker != IndexKeySetList.end()) { // if found
288 Value *= 1 + MYEPSILON*(*((*marker).second.begin())); // in case of equal energies this makes them not equal without changing anything actually
289 // as the smallest number in each set has always been the root (we use global id to keep the doubles away), seek smallest and insert into AtomMask
290 pair <map<int, pair<double,int> >::iterator, bool> InsertedElement = AdaptiveCriteriaList->insert( make_pair(*((*marker).second.begin()), pair<double,int>( fabs(Value), FragOrder) ));
291 map<int, pair<double,int> >::iterator PresentItem = InsertedElement.first;
292 if (!InsertedElement.second) { // this root is already present
293 if ((*PresentItem).second.second < FragOrder) // if order there is lower, update entry with higher-order term
294 //if ((*PresentItem).second.first < (*runner).first) // as higher-order terms are not always better, we skip this part (which would always include this site into adaptive increase)
295 { // if value is smaller, update value and order
296 (*PresentItem).second.first = fabs(Value);
297 (*PresentItem).second.second = FragOrder;
298 DoLog(2) && (Log() << Verbose(2) << "Updated element (" << (*PresentItem).first << ",[" << (*PresentItem).second.first << "," << (*PresentItem).second.second << "])." << endl);
299 } else {
300 DoLog(2) && (Log() << Verbose(2) << "Did not update element " << (*PresentItem).first << " as " << FragOrder << " is less than or equal to " << (*PresentItem).second.second << "." << endl);
301 }
302 } else {
303 DoLog(2) && (Log() << Verbose(2) << "Inserted element (" << (*PresentItem).first << ",[" << (*PresentItem).second.first << "," << (*PresentItem).second.second << "])." << endl);
304 }
305 } else {
306 DoLog(1) && (Log() << Verbose(1) << "No Fragment under No. " << No << "found." << endl);
307 }
308};
309
310/** Counts lines in file.
311 * Note we are scanning lines from current position, not from beginning.
312 * \param InputFile file to be scanned.
313 */
314int CountLinesinFile(ifstream &InputFile)
315{
316 char *buffer = new char[MAXSTRINGSIZE];
317 int lines=0;
318
319 int PositionMarker = InputFile.tellg(); // not needed as Inputfile is copied, given by value, not by ref
320 // count the number of lines, i.e. the number of fragments
321 InputFile.getline(buffer, MAXSTRINGSIZE); // skip comment lines
322 InputFile.getline(buffer, MAXSTRINGSIZE);
323 while(!InputFile.eof()) {
324 InputFile.getline(buffer, MAXSTRINGSIZE);
325 lines++;
326 }
327 InputFile.seekg(PositionMarker, ios::beg);
328 delete[](buffer);
329 return lines;
330};
331
332
333/** Scans the adaptive order file and insert (index, value) into map.
334 * \param &path path to ENERGYPERFRAGMENT file (may be NULL if Order is non-negative)
335 * \param &IndexedKeySetList list to find key set for a given index \a No
336 * \return adaptive criteria list from file
337 */
338map<int, pair<double,int> > * ScanAdaptiveFileIntoMap(std::string &path, map<int,KeySet> &IndexKeySetList)
339{
340 map<int, pair<double,int> > *AdaptiveCriteriaList = new map<int, pair<double,int> >;
341 int No = 0, FragOrder = 0;
342 double Value = 0.;
343 char buffer[MAXSTRINGSIZE];
344 string filename = path + ENERGYPERFRAGMENT;
345 ifstream InputFile(filename.c_str());
346
347 if (InputFile.fail()) {
348 DoeLog(1) && (eLog() << Verbose(1) << "Cannot find file " << filename << "." << endl);
349 return AdaptiveCriteriaList;
350 }
351
352 if (CountLinesinFile(InputFile) > 0) {
353 // each line represents a fragment root (Atom::nr) id and its energy contribution
354 InputFile.getline(buffer, MAXSTRINGSIZE); // skip comment lines
355 InputFile.getline(buffer, MAXSTRINGSIZE);
356 while(!InputFile.eof()) {
357 InputFile.getline(buffer, MAXSTRINGSIZE);
358 if (strlen(buffer) > 2) {
359 //Log() << Verbose(2) << "Scanning: " << buffer << endl;
360 stringstream line(buffer);
361 line >> FragOrder;
362 line >> ws >> No;
363 line >> ws >> Value; // skip time entry
364 line >> ws >> Value;
365 No -= 1; // indices start at 1 in file, not 0
366 //Log() << Verbose(2) << " - yields (" << No << "," << Value << ", " << FragOrder << ")" << endl;
367
368 // clean the list of those entries that have been superceded by higher order terms already
369 InsertIntoAdaptiveCriteriaList(AdaptiveCriteriaList, IndexKeySetList, FragOrder, No, Value);
370 }
371 }
372 // close and done
373 InputFile.close();
374 InputFile.clear();
375 }
376
377 return AdaptiveCriteriaList;
378};
379
380/** Maps adaptive criteria list back onto (Value, (Root Nr., Order))
381 * (i.e. sorted by value to pick the highest ones)
382 * \param *out output stream for debugging
383 * \param &AdaptiveCriteriaList list to insert into
384 * \param *mol molecule with atoms
385 * \return remapped list
386 */
387map<double, pair<int,int> > * ReMapAdaptiveCriteriaListToValue(map<int, pair<double,int> > *AdaptiveCriteriaList, molecule *mol)
388{
389 atom *Walker = NULL;
390 map<double, pair<int,int> > *FinalRootCandidates = new map<double, pair<int,int> > ;
391 DoLog(1) && (Log() << Verbose(1) << "Root candidate list is: " << endl);
392 for(map<int, pair<double,int> >::iterator runner = AdaptiveCriteriaList->begin(); runner != AdaptiveCriteriaList->end(); runner++) {
393 Walker = mol->FindAtom((*runner).first);
394 if (Walker != NULL) {
395 //if ((*runner).second.second >= Walker->AdaptiveOrder) { // only insert if this is an "active" root site for the current order
396 if (!Walker->MaxOrder) {
397 DoLog(2) && (Log() << Verbose(2) << "(" << (*runner).first << ",[" << (*runner).second.first << "," << (*runner).second.second << "])" << endl);
398 FinalRootCandidates->insert( make_pair( (*runner).second.first, pair<int,int>((*runner).first, (*runner).second.second) ) );
399 } else {
400 DoLog(2) && (Log() << Verbose(2) << "Excluding (" << *Walker << ", " << (*runner).first << ",[" << (*runner).second.first << "," << (*runner).second.second << "]), as it has reached its maximum order." << endl);
401 }
402 } else {
403 DoeLog(0) && (eLog()<< Verbose(0) << "Atom No. " << (*runner).second.first << " was not found in this molecule." << endl);
404 performCriticalExit();
405 }
406 }
407 return FinalRootCandidates;
408};
409
410/** Marks all candidate sites for update if below adaptive threshold.
411 * Picks a given number of highest values and set *AtomMask to true.
412 * \param *out output stream for debugging
413 * \param *AtomMask defines true/false per global Atom::nr to mask in/out each nuclear site, used to activate given number of site to increment order adaptively
414 * \param FinalRootCandidates list candidates to check
415 * \param Order desired order
416 * \param *mol molecule with atoms
417 * \return true - if update is necessary, false - not
418 */
419bool MarkUpdateCandidates(bool *AtomMask, map<double, pair<int,int> > &FinalRootCandidates, int Order, molecule *mol)
420{
421 atom *Walker = NULL;
422 int No = -1;
423 bool status = false;
424 for(map<double, pair<int,int> >::iterator runner = FinalRootCandidates.upper_bound(pow(10.,Order)); runner != FinalRootCandidates.end(); runner++) {
425 No = (*runner).second.first;
426 Walker = mol->FindAtom(No);
427 //if (Walker->AdaptiveOrder < MinimumRingSize[Walker->nr]) {
428 DoLog(2) && (Log() << Verbose(2) << "Root " << No << " is still above threshold (10^{" << Order <<"}: " << runner->first << ", setting entry " << No << " of Atom mask to true." << endl);
429 AtomMask[No] = true;
430 status = true;
431 //} else
432 //Log() << Verbose(2) << "Root " << No << " is still above threshold (10^{" << Order <<"}: " << runner->first << ", however MinimumRingSize of " << MinimumRingSize[Walker->nr] << " does not allow further adaptive increase." << endl;
433 }
434 return status;
435};
436
437/** print atom mask for debugging.
438 * \param *out output stream for debugging
439 * \param *AtomMask defines true/false per global Atom::nr to mask in/out each nuclear site, used to activate given number of site to increment order adaptively
440 * \param AtomCount number of entries in \a *AtomMask
441 */
442void PrintAtomMask(bool *AtomMask, int AtomCount)
443{
444 DoLog(2) && (Log() << Verbose(2) << " ");
445 for(int i=0;i<AtomCount;i++)
446 DoLog(0) && (Log() << Verbose(0) << (i % 10));
447 DoLog(0) && (Log() << Verbose(0) << endl);
448 DoLog(2) && (Log() << Verbose(2) << "Atom mask is: ");
449 for(int i=0;i<AtomCount;i++)
450 DoLog(0) && (Log() << Verbose(0) << (AtomMask[i] ? "t" : "f"));
451 DoLog(0) && (Log() << Verbose(0) << endl);
452};
453
454/** Checks whether the OrderAtSite is still below \a Order at some site.
455 * \param *AtomMask defines true/false per global Atom::nr to mask in/out each nuclear site, used to activate given number of site to increment order adaptively
456 * \param *GlobalKeySetList list of keysets with global ids (valid in "this" molecule) needed for adaptive increase
457 * \param Order desired Order if positive, desired exponent in threshold criteria if negative (0 is single-step)
458 * \param *MinimumRingSize array of max. possible order to avoid loops
459 * \param path path to ENERGYPERFRAGMENT file (may be NULL if Order is non-negative)
460 * \return true - needs further fragmentation, false - does not need fragmentation
461 */
462bool molecule::CheckOrderAtSite(bool *AtomMask, Graph *GlobalKeySetList, int Order, int *MinimumRingSize, std::string path)
463{
464 bool status = false;
465
466 // initialize mask list
467 for(int i=getAtomCount();i--;)
468 AtomMask[i] = false;
469
470 if (Order < 0) { // adaptive increase of BondOrder per site
471 if (AtomMask[getAtomCount()] == true) // break after one step
472 return false;
473
474 // transmorph graph keyset list into indexed KeySetList
475 if (GlobalKeySetList == NULL) {
476 DoeLog(1) && (eLog()<< Verbose(1) << "Given global key set list (graph) is NULL!" << endl);
477 return false;
478 }
479 map<int,KeySet> *IndexKeySetList = GraphToIndexedKeySet(GlobalKeySetList);
480
481 // parse the EnergyPerFragment file
482 map<int, pair<double,int> > *AdaptiveCriteriaList = ScanAdaptiveFileIntoMap(path, *IndexKeySetList); // (Root No., (Value, Order)) !
483 if (AdaptiveCriteriaList->empty()) {
484 DoeLog(2) && (eLog()<< Verbose(2) << "Unable to parse file, incrementing all." << endl);
485 for (molecule::const_iterator iter = begin(); iter != end(); ++iter) {
486 #ifdef ADDHYDROGEN
487 if ((*iter)->getType()->getAtomicNumber() != 1) // skip hydrogen
488 #endif
489 {
490 AtomMask[(*iter)->nr] = true; // include all (non-hydrogen) atoms
491 status = true;
492 }
493 }
494 }
495 // then map back onto (Value, (Root Nr., Order)) (i.e. sorted by value to pick the highest ones)
496 map<double, pair<int,int> > *FinalRootCandidates = ReMapAdaptiveCriteriaListToValue(AdaptiveCriteriaList, this);
497
498 // pick the ones still below threshold and mark as to be adaptively updated
499 MarkUpdateCandidates(AtomMask, *FinalRootCandidates, Order, this);
500
501 delete[](IndexKeySetList);
502 delete[](AdaptiveCriteriaList);
503 delete[](FinalRootCandidates);
504 } else { // global increase of Bond Order
505 for(molecule::const_iterator iter = begin(); iter != end(); ++iter) {
506 #ifdef ADDHYDROGEN
507 if ((*iter)->getType()->getAtomicNumber() != 1) // skip hydrogen
508 #endif
509 {
510 AtomMask[(*iter)->nr] = true; // include all (non-hydrogen) atoms
511 if ((Order != 0) && ((*iter)->AdaptiveOrder < Order)) // && ((*iter)->AdaptiveOrder < MinimumRingSize[(*iter)->nr]))
512 status = true;
513 }
514 }
515 if ((!Order) && (!AtomMask[getAtomCount()])) // single stepping, just check
516 status = true;
517
518 if (!status) {
519 if (Order == 0)
520 DoLog(1) && (Log() << Verbose(1) << "Single stepping done." << endl);
521 else
522 DoLog(1) && (Log() << Verbose(1) << "Order at every site is already equal or above desired order " << Order << "." << endl);
523 }
524 }
525
526 PrintAtomMask(AtomMask, getAtomCount()); // for debugging
527
528 return status;
529};
530
531/** Create a SortIndex to map from atomic labels to the sequence in which the atoms are given in the config file.
532 * \param *out output stream for debugging
533 * \param *&SortIndex Mapping array of size molecule::AtomCount
534 * \return true - success, false - failure of SortIndex alloc
535 */
536bool molecule::CreateMappingLabelsToConfigSequence(int *&SortIndex)
537{
538 if (SortIndex != NULL) {
539 DoLog(1) && (Log() << Verbose(1) << "SortIndex is " << SortIndex << " and not NULL as expected." << endl);
540 return false;
541 }
542 SortIndex = new int[getAtomCount()];
543 for(int i=getAtomCount();i--;)
544 SortIndex[i] = -1;
545
546 int AtomNo = 0;
547 for(internal_iterator iter=atoms.begin();iter!=atoms.end();++iter){
548 ASSERT(SortIndex[(*iter)->nr]==-1,"Same SortIndex set twice");
549 SortIndex[(*iter)->nr] = AtomNo++;
550 }
551
552 return true;
553};
554
555
556
557/** Creates a lookup table for true father's Atom::Nr -> atom ptr.
558 * \param *start begin of list (STL iterator, i.e. first item)
559 * \paran *end end of list (STL iterator, i.e. one past last item)
560 * \param **Lookuptable pointer to return allocated lookup table (should be NULL on start)
561 * \param count optional predetermined size for table (otherwise we set the count to highest true father id)
562 * \return true - success, false - failure
563 */
564bool molecule::CreateFatherLookupTable(atom **&LookupTable, int count)
565{
566 bool status = true;
567 int AtomNo;
568
569 if (LookupTable != NULL) {
570 Log() << Verbose(0) << "Pointer for Lookup table is not NULL! Aborting ..." <<endl;
571 return false;
572 }
573
574 // count them
575 if (count == 0) {
576 for (molecule::iterator iter = begin(); iter != end(); ++iter) { // create a lookup table (Atom::nr -> atom) used as a marker table lateron
577 count = (count < (*iter)->GetTrueFather()->nr) ? (*iter)->GetTrueFather()->nr : count;
578 }
579 }
580 if (count <= 0) {
581 Log() << Verbose(0) << "Count of lookup list is 0 or less." << endl;
582 return false;
583 }
584
585 // allocate and fill
586 LookupTable = new atom *[count];
587 if (LookupTable == NULL) {
588 eLog() << Verbose(0) << "LookupTable memory allocation failed!" << endl;
589 performCriticalExit();
590 status = false;
591 } else {
592 for (int i=0;i<count;i++)
593 LookupTable[i] = NULL;
594 for (molecule::iterator iter = begin(); iter != end(); ++iter) {
595 AtomNo = (*iter)->GetTrueFather()->nr;
596 if ((AtomNo >= 0) && (AtomNo < count)) {
597 //*out << "Setting LookupTable[" << AtomNo << "] to " << *(*iter) << endl;
598 LookupTable[AtomNo] = (*iter);
599 } else {
600 Log() << Verbose(0) << "Walker " << *(*iter) << " exceeded range of nuclear ids [0, " << count << ")." << endl;
601 status = false;
602 break;
603 }
604 }
605 }
606
607 return status;
608};
609
610/** Performs a many-body bond order analysis for a given bond order.
611 * -# parses adjacency, keysets and orderatsite files
612 * -# performs DFS to find connected subgraphs (to leave this in was a design decision: might be useful later)
613 * -# RootStack is created for every subgraph (here, later we implement the "update 10 sites with highest energ
614y contribution", and that's why this consciously not done in the following loop)
615 * -# in a loop over all subgraphs
616 * -# calls FragmentBOSSANOVA with this RootStack and within the subgraph molecule structure
617 * -# creates molecule (fragment)s from the returned keysets (StoreFragmentFromKeySet)
618 * -# combines the generated molecule lists from all subgraphs
619 * -# saves to disk: fragment configs, adjacency, orderatsite, keyset files
620 * Note that as we split "this" molecule up into a list of subgraphs, i.e. a MoleculeListClass, we have two sets
621 * of vertex indices: Global always means the index in "this" molecule, whereas local refers to the molecule or
622 * subgraph in the MoleculeListClass.
623 * \param Order up to how many neighbouring bonds a fragment contains in BondOrderScheme::BottumUp scheme
624 * \param &prefix path and prefix of the bond order configs to be written
625 * \return 1 - continue, 2 - stop (no fragmentation occured)
626 */
627int molecule::FragmentMolecule(int Order, std::string &prefix)
628{
629 MoleculeListClass *BondFragments = NULL;
630 int *MinimumRingSize = new int[getAtomCount()];
631 int FragmentCounter;
632 MoleculeLeafClass *MolecularWalker = NULL;
633 MoleculeLeafClass *Subgraphs = NULL; // list of subgraphs from DFS analysis
634 fstream File;
635 bool FragmentationToDo = true;
636 std::deque<bond *> *BackEdgeStack = NULL, *LocalBackEdgeStack = NULL;
637 bool CheckOrder = false;
638 Graph **FragmentList = NULL;
639 Graph *ParsedFragmentList = NULL;
640 Graph TotalGraph; // graph with all keysets however local numbers
641 int TotalNumberOfKeySets = 0;
642 atom **ListOfAtoms = NULL;
643 atom ***ListOfLocalAtoms = NULL;
644 bool *AtomMask = NULL;
645
646 DoLog(0) && (Log() << Verbose(0) << endl);
647#ifdef ADDHYDROGEN
648 DoLog(0) && (Log() << Verbose(0) << "I will treat hydrogen special and saturate dangling bonds with it." << endl);
649#else
650 DoLog(0) && (Log() << Verbose(0) << "Hydrogen is treated just like the rest of the lot." << endl);
651#endif
652
653 // ++++++++++++++++++++++++++++ INITIAL STUFF: Bond structure analysis, file parsing, ... ++++++++++++++++++++++++++++++++++++++++++
654
655 // ===== 1. Check whether bond structure is same as stored in files ====
656
657 // create lookup table for Atom::nr
658 FragmentationToDo = FragmentationToDo && CreateFatherLookupTable(ListOfAtoms, getAtomCount());
659
660 // === compare it with adjacency file ===
661 FragmentationToDo = FragmentationToDo && CheckAdjacencyFileAgainstMolecule(prefix, ListOfAtoms);
662 delete[](ListOfAtoms);
663
664 // ===== 2. perform a DFS analysis to gather info on cyclic structure and a list of disconnected subgraphs =====
665 Subgraphs = DepthFirstSearchAnalysis(BackEdgeStack);
666
667 // analysis of the cycles (print rings, get minimum cycle length) for each subgraph
668 for(int i=getAtomCount();i--;)
669 MinimumRingSize[i] = getAtomCount();
670 MolecularWalker = Subgraphs;
671 const int LeafCount = Subgraphs->next->Count();
672 FragmentCounter = 0;
673 while (MolecularWalker->next != NULL) {
674 MolecularWalker = MolecularWalker->next;
675 // fill the bond structure of the individually stored subgraphs
676 ListOfAtoms = NULL;
677 MolecularWalker->FillBondStructureFromReference(this, ListOfAtoms, false); // we want to keep the created ListOfLocalAtoms
678 DoLog(0) && (Log() << Verbose(0) << "Analysing the cycles of subgraph " << MolecularWalker->Leaf << " with nr. " << FragmentCounter << "." << endl);
679 LocalBackEdgeStack = new std::deque<bond *>; // (MolecularWalker->Leaf->BondCount);
680// // check the list of local atoms for debugging
681// Log() << Verbose(0) << "ListOfLocalAtoms for this subgraph is:" << endl;
682// for (int i=0;i<getAtomCount();i++)
683// if (ListOfLocalAtoms[FragmentCounter][i] == NULL)
684// Log() << Verbose(0) << "\tNULL";
685// else
686// Log() << Verbose(0) << "\t" << ListOfLocalAtoms[FragmentCounter][i]->Name;
687 DoLog(0) && (Log() << Verbose(0) << "Gathering local back edges for subgraph " << MolecularWalker->Leaf << " with nr. " << FragmentCounter << "." << endl);
688 MolecularWalker->Leaf->PickLocalBackEdges(ListOfAtoms, BackEdgeStack, LocalBackEdgeStack);
689 DoLog(0) && (Log() << Verbose(0) << "Analysing the cycles of subgraph " << MolecularWalker->Leaf << " with nr. " << FragmentCounter << "." << endl);
690 MolecularWalker->Leaf->CyclicStructureAnalysis(LocalBackEdgeStack, MinimumRingSize);
691 DoLog(0) && (Log() << Verbose(0) << "Done with Analysing the cycles of subgraph " << MolecularWalker->Leaf << " with nr. " << FragmentCounter << "." << endl);
692 delete(LocalBackEdgeStack);
693 delete(ListOfAtoms);
694 FragmentCounter++;
695 }
696 delete(BackEdgeStack);
697
698 // ===== 3. if structure still valid, parse key set file and others =====
699 FragmentationToDo = FragmentationToDo && ParseKeySetFile(prefix, ParsedFragmentList);
700
701 // ===== 4. check globally whether there's something to do actually (first adaptivity check)
702 FragmentationToDo = FragmentationToDo && ParseOrderAtSiteFromFile(prefix);
703
704 // =================================== Begin of FRAGMENTATION ===============================
705 // ===== 6a. assign each keyset to its respective subgraph =====
706 ListOfLocalAtoms = new atom **[LeafCount];
707 for (int i=0;i<LeafCount;i++)
708 ListOfLocalAtoms[i] = NULL;
709 FragmentCounter = 0;
710 Subgraphs->next->AssignKeySetsToFragment(this, ParsedFragmentList, ListOfLocalAtoms, FragmentList, FragmentCounter, true);
711 delete[](ListOfLocalAtoms);
712
713 // ===== 6b. prepare and go into the adaptive (Order<0), single-step (Order==0) or incremental (Order>0) cycle
714 KeyStack *RootStack = new KeyStack[Subgraphs->next->Count()];
715 AtomMask = new bool[getAtomCount()+1];
716 AtomMask[getAtomCount()] = false;
717 FragmentationToDo = false; // if CheckOrderAtSite just ones recommends fragmentation, we will save fragments afterwards
718 while ((CheckOrder = CheckOrderAtSite(AtomMask, ParsedFragmentList, Order, MinimumRingSize, prefix))) {
719 FragmentationToDo = FragmentationToDo || CheckOrder;
720 AtomMask[getAtomCount()] = true; // last plus one entry is used as marker that we have been through this loop once already in CheckOrderAtSite()
721 // ===== 6b. fill RootStack for each subgraph (second adaptivity check) =====
722 Subgraphs->next->FillRootStackForSubgraphs(RootStack, AtomMask, (FragmentCounter = 0));
723
724 // ===== 7. fill the bond fragment list =====
725 FragmentCounter = 0;
726 MolecularWalker = Subgraphs;
727 while (MolecularWalker->next != NULL) {
728 MolecularWalker = MolecularWalker->next;
729 DoLog(1) && (Log() << Verbose(1) << "Fragmenting subgraph " << MolecularWalker << "." << endl);
730 //MolecularWalker->Leaf->OutputListOfBonds(out); // output atom::ListOfBonds for debugging
731 if (MolecularWalker->Leaf->hasBondStructure()) {
732 // call BOSSANOVA method
733 DoLog(0) && (Log() << Verbose(0) << endl << " ========== BOND ENERGY of subgraph " << FragmentCounter << " ========================= " << endl);
734 MolecularWalker->Leaf->FragmentBOSSANOVA(FragmentList[FragmentCounter], RootStack[FragmentCounter], MinimumRingSize);
735 } else {
736 DoeLog(1) && (eLog()<< Verbose(1) << "Subgraph " << MolecularWalker << " has no atoms!" << endl);
737 }
738 FragmentCounter++; // next fragment list
739 }
740 }
741 DoLog(2) && (Log() << Verbose(2) << "CheckOrder is " << CheckOrder << "." << endl);
742 delete[](RootStack);
743 delete[](AtomMask);
744 delete(ParsedFragmentList);
745 delete[](MinimumRingSize);
746
747 // ==================================== End of FRAGMENTATION ============================================
748
749 // ===== 8a. translate list into global numbers (i.e. ones that are valid in "this" molecule, not in MolecularWalker->Leaf)
750 Subgraphs->next->TranslateIndicesToGlobalIDs(FragmentList, (FragmentCounter = 0), TotalNumberOfKeySets, TotalGraph);
751
752 // free subgraph memory again
753 FragmentCounter = 0;
754 if (Subgraphs != NULL) {
755 while (Subgraphs->next != NULL) {
756 Subgraphs = Subgraphs->next;
757 delete(FragmentList[FragmentCounter++]);
758 delete(Subgraphs->previous);
759 }
760 delete(Subgraphs);
761 }
762 delete[](FragmentList);
763
764 // ===== 8b. gather keyset lists (graphs) from all subgraphs and transform into MoleculeListClass =====
765 //if (FragmentationToDo) { // we should always store the fragments again as coordination might have changed slightly without changing bond structure
766 // allocate memory for the pointer array and transmorph graphs into full molecular fragments
767 BondFragments = new MoleculeListClass(World::getPointer());
768 int k=0;
769 for(Graph::iterator runner = TotalGraph.begin(); runner != TotalGraph.end(); runner++) {
770 KeySet test = (*runner).first;
771 DoLog(0) && (Log() << Verbose(0) << "Fragment No." << (*runner).second.first << " with TEFactor " << (*runner).second.second << "." << endl);
772 BondFragments->insert(StoreFragmentFromKeySet(test, World::getInstance().getConfig()));
773 k++;
774 }
775 DoLog(0) && (Log() << Verbose(0) << k << "/" << BondFragments->ListOfMolecules.size() << " fragments generated from the keysets." << endl);
776
777 // ===== 9. Save fragments' configuration and keyset files et al to disk ===
778 if (BondFragments->ListOfMolecules.size() != 0) {
779 // create the SortIndex from BFS labels to order in the config file
780 int *SortIndex = NULL;
781 CreateMappingLabelsToConfigSequence(SortIndex);
782
783 DoLog(1) && (Log() << Verbose(1) << "Writing " << BondFragments->ListOfMolecules.size() << " possible bond fragmentation configs" << endl);
784 if (BondFragments->OutputConfigForListOfFragments(prefix, SortIndex))
785 DoLog(1) && (Log() << Verbose(1) << "All configs written." << endl);
786 else
787 DoLog(1) && (Log() << Verbose(1) << "Some config writing failed." << endl);
788
789 // store force index reference file
790 BondFragments->StoreForcesFile(prefix, SortIndex);
791
792 // store keysets file
793 StoreKeySetFile(TotalGraph, prefix);
794
795 {
796 // store Adjacency file
797 std::string filename = prefix + ADJACENCYFILE;
798 StoreAdjacencyToFile(filename);
799 }
800
801 // store Hydrogen saturation correction file
802 BondFragments->AddHydrogenCorrection(prefix);
803
804 // store adaptive orders into file
805 StoreOrderAtSiteFile(prefix);
806
807 // restore orbital and Stop values
808 //CalculateOrbitals(*configuration);
809
810 // free memory for bond part
811 DoLog(1) && (Log() << Verbose(1) << "Freeing bond memory" << endl);
812 delete[](SortIndex);
813 } else {
814 DoLog(1) && (Log() << Verbose(1) << "FragmentList is zero on return, splitting failed." << endl);
815 }
816 delete(BondFragments);
817 DoLog(0) && (Log() << Verbose(0) << "End of bond fragmentation." << endl);
818
819 return ((int)(!FragmentationToDo)+1); // 1 - continue, 2 - stop (no fragmentation occured)
820};
821
822
823/** Stores pairs (Atom::nr, Atom::AdaptiveOrder) into file.
824 * Atoms not present in the file get "-1".
825 * \param &path path to file ORDERATSITEFILE
826 * \return true - file writable, false - not writable
827 */
828bool molecule::StoreOrderAtSiteFile(std::string &path)
829{
830 string line;
831 ofstream file;
832
833 line = path + ORDERATSITEFILE;
834 file.open(line.c_str());
835 DoLog(1) && (Log() << Verbose(1) << "Writing OrderAtSite " << ORDERATSITEFILE << " ... " << endl);
836 if (file.good()) {
837 for_each(atoms.begin(),atoms.end(),bind2nd(mem_fun(&atom::OutputOrder), &file));
838 file.close();
839 DoLog(1) && (Log() << Verbose(1) << "done." << endl);
840 return true;
841 } else {
842 DoLog(1) && (Log() << Verbose(1) << "failed to open file " << line << "." << endl);
843 return false;
844 }
845};
846
847/** Parses pairs(Atom::nr, Atom::AdaptiveOrder) from file and stores in molecule's Atom's.
848 * Atoms not present in the file get "0".
849 * \param &path path to file ORDERATSITEFILEe
850 * \return true - file found and scanned, false - file not found
851 * \sa ParseKeySetFile() and CheckAdjacencyFileAgainstMolecule() as this is meant to be used in conjunction with the two
852 */
853bool molecule::ParseOrderAtSiteFromFile(std::string &path)
854{
855 unsigned char *OrderArray = new unsigned char[getAtomCount()];
856 bool *MaxArray = new bool[getAtomCount()];
857 bool status;
858 int AtomNr, value;
859 string line;
860 ifstream file;
861
862 for(int i=0;i<getAtomCount();i++) {
863 OrderArray[i] = 0;
864 MaxArray[i] = false;
865 }
866
867 DoLog(1) && (Log() << Verbose(1) << "Begin of ParseOrderAtSiteFromFile" << endl);
868 line = path + ORDERATSITEFILE;
869 file.open(line.c_str());
870 if (file.good()) {
871 while (!file.eof()) { // parse from file
872 AtomNr = -1;
873 file >> AtomNr;
874 if (AtomNr != -1) { // test whether we really parsed something (this is necessary, otherwise last atom is set twice and to 0 on second time)
875 file >> value;
876 OrderArray[AtomNr] = value;
877 file >> value;
878 MaxArray[AtomNr] = value;
879 //Log() << Verbose(2) << "AtomNr " << AtomNr << " with order " << (int)OrderArray[AtomNr] << " and max order set to " << (int)MaxArray[AtomNr] << "." << endl;
880 }
881 }
882 file.close();
883
884 // set atom values
885 for(internal_iterator iter=atoms.begin();iter!=atoms.end();++iter){
886 (*iter)->AdaptiveOrder = OrderArray[(*iter)->nr];
887 (*iter)->MaxOrder = MaxArray[(*iter)->nr];
888 }
889 //SetAtomValueToIndexedArray( OrderArray, &atom::nr, &atom::AdaptiveOrder );
890 //SetAtomValueToIndexedArray( MaxArray, &atom::nr, &atom::MaxOrder );
891
892 DoLog(1) && (Log() << Verbose(1) << "\t ... done." << endl);
893 status = true;
894 } else {
895 DoLog(1) && (Log() << Verbose(1) << "\t ... failed to open file " << line << "." << endl);
896 status = false;
897 }
898 delete[](OrderArray);
899 delete[](MaxArray);
900
901 DoLog(1) && (Log() << Verbose(1) << "End of ParseOrderAtSiteFromFile" << endl);
902 return status;
903};
904
905
906
907/** Looks through a std::deque<atom *> and returns the likeliest removal candiate.
908 * \param *out output stream for debugging messages
909 * \param *&Leaf KeySet to look through
910 * \param *&ShortestPathList list of the shortest path to decide which atom to suggest as removal candidate in the end
911 * \param index of the atom suggested for removal
912 */
913int molecule::LookForRemovalCandidate(KeySet *&Leaf, int *&ShortestPathList)
914{
915 atom *Runner = NULL;
916 int SP, Removal;
917
918 DoLog(2) && (Log() << Verbose(2) << "Looking for removal candidate." << endl);
919 SP = -1; //0; // not -1, so that Root is never removed
920 Removal = -1;
921 for (KeySet::iterator runner = Leaf->begin(); runner != Leaf->end(); runner++) {
922 Runner = FindAtom((*runner));
923 if (Runner->getType()->getAtomicNumber() != 1) { // skip all those added hydrogens when re-filling snake stack
924 if (ShortestPathList[(*runner)] > SP) { // remove the oldest one with longest shortest path
925 SP = ShortestPathList[(*runner)];
926 Removal = (*runner);
927 }
928 }
929 }
930 return Removal;
931};
932
933/** Initializes some value for putting fragment of \a *mol into \a *Leaf.
934 * \param *mol total molecule
935 * \param *Leaf fragment molecule
936 * \param &Leaflet pointer to KeySet structure
937 * \param **SonList calloc'd list which atom of \a *Leaf is a son of which atom in \a *mol
938 * \return number of atoms in fragment
939 */
940int StoreFragmentFromKeySet_Init(molecule *mol, molecule *Leaf, KeySet &Leaflet, atom **SonList)
941{
942 atom *FatherOfRunner = NULL;
943
944 Leaf->BondDistance = mol->BondDistance;
945
946 // first create the minimal set of atoms from the KeySet
947 int size = 0;
948 for(KeySet::iterator runner = Leaflet.begin(); runner != Leaflet.end(); runner++) {
949 FatherOfRunner = mol->FindAtom((*runner)); // find the id
950 SonList[FatherOfRunner->nr] = Leaf->AddCopyAtom(FatherOfRunner);
951 size++;
952 }
953 return size;
954};
955
956/** Creates an induced subgraph out of a fragmental key set, adding bonds and hydrogens (if treated specially).
957 * \param *out output stream for debugging messages
958 * \param *mol total molecule
959 * \param *Leaf fragment molecule
960 * \param IsAngstroem whether we have Ansgtroem or bohrradius
961 * \param **SonList list which atom of \a *Leaf is a son of which atom in \a *mol
962 */
963void CreateInducedSubgraphOfFragment(molecule *mol, molecule *Leaf, atom **SonList, bool IsAngstroem)
964{
965 bool LonelyFlag = false;
966 atom *OtherFather = NULL;
967 atom *FatherOfRunner = NULL;
968
969#ifdef ADDHYDROGEN
970 molecule::const_iterator runner;
971#endif
972 // we increment the iter just before skipping the hydrogen
973 for (molecule::const_iterator iter = Leaf->begin(); iter != Leaf->end();) {
974 LonelyFlag = true;
975 FatherOfRunner = (*iter)->father;
976 ASSERT(FatherOfRunner,"Atom without father found");
977 if (SonList[FatherOfRunner->nr] != NULL) { // check if this, our father, is present in list
978 // create all bonds
979 for (BondList::const_iterator BondRunner = FatherOfRunner->ListOfBonds.begin(); BondRunner != FatherOfRunner->ListOfBonds.end(); (++BondRunner)) {
980 OtherFather = (*BondRunner)->GetOtherAtom(FatherOfRunner);
981// Log() << Verbose(2) << "Father " << *FatherOfRunner << " of son " << *SonList[FatherOfRunner->nr] << " is bound to " << *OtherFather;
982 if (SonList[OtherFather->nr] != NULL) {
983// Log() << Verbose(0) << ", whose son is " << *SonList[OtherFather->nr] << "." << endl;
984 if (OtherFather->nr > FatherOfRunner->nr) { // add bond (nr check is for adding only one of both variants: ab, ba)
985// Log() << Verbose(3) << "Adding Bond: ";
986// Log() << Verbose(0) <<
987 Leaf->AddBond((*iter), SonList[OtherFather->nr], (*BondRunner)->BondDegree);
988// Log() << Verbose(0) << "." << endl;
989 //NumBonds[(*iter)->nr]++;
990 } else {
991// Log() << Verbose(3) << "Not adding bond, labels in wrong order." << endl;
992 }
993 LonelyFlag = false;
994 } else {
995// Log() << Verbose(0) << ", who has no son in this fragment molecule." << endl;
996#ifdef ADDHYDROGEN
997 //Log() << Verbose(3) << "Adding Hydrogen to " << (*iter)->Name << " and a bond in between." << endl;
998 if(!Leaf->AddHydrogenReplacementAtom((*BondRunner), (*iter), FatherOfRunner, OtherFather, IsAngstroem))
999 exit(1);
1000#endif
1001 //NumBonds[(*iter)->nr] += Binder->BondDegree;
1002 }
1003 }
1004 } else {
1005 DoeLog(1) && (eLog()<< Verbose(1) << "Son " << (*iter)->getName() << " has father " << FatherOfRunner->getName() << " but its entry in SonList is " << SonList[FatherOfRunner->nr] << "!" << endl);
1006 }
1007 if ((LonelyFlag) && (Leaf->getAtomCount() > 1)) {
1008 DoLog(0) && (Log() << Verbose(0) << **iter << "has got bonds only to hydrogens!" << endl);
1009 }
1010 ++iter;
1011#ifdef ADDHYDROGEN
1012 while ((iter != Leaf->end()) && ((*iter)->getType()->getAtomicNumber() == 1)){ // skip added hydrogen
1013 iter++;
1014 }
1015#endif
1016 }
1017};
1018
1019/** Stores a fragment from \a KeySet into \a molecule.
1020 * First creates the minimal set of atoms from the KeySet, then creates the bond structure from the complete
1021 * molecule and adds missing hydrogen where bonds were cut.
1022 * \param *out output stream for debugging messages
1023 * \param &Leaflet pointer to KeySet structure
1024 * \param IsAngstroem whether we have Ansgtroem or bohrradius
1025 * \return pointer to constructed molecule
1026 */
1027molecule * molecule::StoreFragmentFromKeySet(KeySet &Leaflet, bool IsAngstroem)
1028{
1029 atom **SonList = new atom*[getAtomCount()];
1030 molecule *Leaf = World::getInstance().createMolecule();
1031
1032 for(int i=0;i<getAtomCount();i++)
1033 SonList[i] = NULL;
1034
1035// Log() << Verbose(1) << "Begin of StoreFragmentFromKeyset." << endl;
1036 StoreFragmentFromKeySet_Init(this, Leaf, Leaflet, SonList);
1037 // create the bonds between all: Make it an induced subgraph and add hydrogen
1038// Log() << Verbose(2) << "Creating bonds from father graph (i.e. induced subgraph creation)." << endl;
1039 CreateInducedSubgraphOfFragment(this, Leaf, SonList, IsAngstroem);
1040
1041 //Leaflet->Leaf->ScanForPeriodicCorrection(out);
1042 delete[](SonList);
1043// Log() << Verbose(1) << "End of StoreFragmentFromKeyset." << endl;
1044 return Leaf;
1045};
1046
1047
1048/** Clears the touched list
1049 * \param *out output stream for debugging
1050 * \param verbosity verbosity level
1051 * \param *&TouchedList touched list
1052 * \param SubOrder current suborder
1053 * \param TouchedIndex currently touched
1054 */
1055void SPFragmentGenerator_ClearingTouched(int verbosity, int *&TouchedList, int SubOrder, int &TouchedIndex)
1056{
1057 Log() << Verbose(1+verbosity) << "Clearing touched list." << endl;
1058 for (TouchedIndex=SubOrder+1;TouchedIndex--;) // empty touched list
1059 TouchedList[TouchedIndex] = -1;
1060 TouchedIndex = 0;
1061
1062}
1063
1064/** Adds the current combination of the power set to the snake stack.
1065 * \param *out output stream for debugging
1066 * \param verbosity verbosity level
1067 * \param CurrentCombination
1068 * \param SetDimension maximum number of bits in power set
1069 * \param *FragmentSet snake stack to remove from
1070 * \param *&TouchedList touched list
1071 * \param TouchedIndex currently touched
1072 * \return number of set bits
1073 */
1074int AddPowersetToSnakeStack(int verbosity, int CurrentCombination, int SetDimension, KeySet *FragmentSet, bond **BondsSet, int *&TouchedList, int &TouchedIndex)
1075{
1076 atom *OtherWalker = NULL;
1077 bool bit = false;
1078 KeySetTestPair TestKeySetInsert;
1079
1080 int Added = 0;
1081 for (int j=0;j<SetDimension;j++) { // pull out every bit by shifting
1082 bit = ((CurrentCombination & (1 << j)) != 0); // mask the bit for the j-th bond
1083 if (bit) { // if bit is set, we add this bond partner
1084 OtherWalker = BondsSet[j]->rightatom; // rightatom is always the one more distant, i.e. the one to add
1085 //Log() << Verbose(1+verbosity) << "Current Bond is " << BondsSet[j] << ", checking on " << *OtherWalker << "." << endl;
1086 Log() << Verbose(2+verbosity) << "Adding " << *OtherWalker << " with nr " << OtherWalker->nr << "." << endl;
1087 TestKeySetInsert = FragmentSet->insert(OtherWalker->nr);
1088 if (TestKeySetInsert.second) {
1089 TouchedList[TouchedIndex++] = OtherWalker->nr; // note as added
1090 Added++;
1091 } else {
1092 Log() << Verbose(2+verbosity) << "This was item was already present in the keyset." << endl;
1093 }
1094 } else {
1095 Log() << Verbose(2+verbosity) << "Not adding." << endl;
1096 }
1097 }
1098 return Added;
1099};
1100
1101/** Counts the number of elements in a power set.
1102 * \param *SetFirst
1103 * \param *SetLast
1104 * \param *&TouchedList touched list
1105 * \param TouchedIndex currently touched
1106 * \return number of elements
1107 */
1108int CountSetMembers(bond *SetFirst, bond *SetLast, int *&TouchedList, int TouchedIndex)
1109{
1110 int SetDimension = 0;
1111 bond *Binder = SetFirst; // start node for this level
1112 while (Binder->next != SetLast) { // compare to end node of this level
1113 Binder = Binder->next;
1114 for (int k=TouchedIndex;k--;) {
1115 if (Binder->Contains(TouchedList[k])) // if we added this very endpiece
1116 SetDimension++;
1117 }
1118 }
1119 return SetDimension;
1120};
1121
1122/** Counts the number of elements in a power set.
1123 * \param *BondsList bonds list to fill
1124 * \param *SetFirst
1125 * \param *SetLast
1126 * \param *&TouchedList touched list
1127 * \param TouchedIndex currently touched
1128 * \return number of elements
1129 */
1130int FillBondsList(bond **BondsList, bond *SetFirst, bond *SetLast, int *&TouchedList, int TouchedIndex)
1131{
1132 int SetDimension = 0;
1133 bond *Binder = SetFirst; // start node for this level
1134 while (Binder->next != SetLast) { // compare to end node of this level
1135 Binder = Binder->next;
1136 for (int k=0;k<TouchedIndex;k++) {
1137 if (Binder->leftatom->nr == TouchedList[k]) // leftatom is always the close one
1138 BondsList[SetDimension++] = Binder;
1139 }
1140 }
1141 return SetDimension;
1142};
1143
1144/** Remove all items that were added on this SP level.
1145 * \param *out output stream for debugging
1146 * \param verbosity verbosity level
1147 * \param *FragmentSet snake stack to remove from
1148 * \param *&TouchedList touched list
1149 * \param TouchedIndex currently touched
1150 */
1151void RemoveAllTouchedFromSnakeStack(int verbosity, KeySet *FragmentSet, int *&TouchedList, int &TouchedIndex)
1152{
1153 int Removal = 0;
1154 for(int j=0;j<TouchedIndex;j++) {
1155 Removal = TouchedList[j];
1156 Log() << Verbose(2+verbosity) << "Removing item nr. " << Removal << " from snake stack." << endl;
1157 FragmentSet->erase(Removal);
1158 TouchedList[j] = -1;
1159 }
1160 DoLog(2) && (Log() << Verbose(2) << "Remaining local nr.s on snake stack are: ");
1161 for(KeySet::iterator runner = FragmentSet->begin(); runner != FragmentSet->end(); runner++)
1162 DoLog(0) && (Log() << Verbose(0) << (*runner) << " ");
1163 DoLog(0) && (Log() << Verbose(0) << endl);
1164 TouchedIndex = 0; // set Index to 0 for list of atoms added on this level
1165};
1166
1167/** From a given set of Bond sorted by Shortest Path distance, create all possible fragments of size \a SetDimension.
1168 * -# loops over every possible combination (2^dimension of edge set)
1169 * -# inserts current set, if there's still space left
1170 * -# yes: calls SPFragmentGenerator with structure, created new edge list and size respective to root dist
1171ance+1
1172 * -# no: stores fragment into keyset list by calling InsertFragmentIntoGraph
1173 * -# removes all items added into the snake stack (in UniqueFragments structure) added during level (root
1174distance) and current set
1175 * \param *out output stream for debugging
1176 * \param FragmentSearch UniqueFragments structure with all values needed
1177 * \param RootDistance current shortest path level, whose set of edges is represented by **BondsSet
1178 * \param SetDimension Number of possible bonds on this level (i.e. size of the array BondsSet[])
1179 * \param SubOrder remaining number of allowed vertices to add
1180 */
1181void molecule::SPFragmentGenerator(struct UniqueFragments *FragmentSearch, int RootDistance, bond **BondsSet, int SetDimension, int SubOrder)
1182{
1183 int verbosity = 0; //FragmentSearch->ANOVAOrder-SubOrder;
1184 int NumCombinations;
1185 int bits, TouchedIndex, SubSetDimension, SP, Added;
1186 int SpaceLeft;
1187 int *TouchedList = new int[SubOrder + 1];
1188 KeySetTestPair TestKeySetInsert;
1189
1190 NumCombinations = 1 << SetDimension;
1191
1192 // here for all bonds of Walker all combinations of end pieces (from the bonds)
1193 // have to be added and for the remaining ANOVA order GraphCrawler be called
1194 // recursively for the next level
1195
1196 Log() << Verbose(1+verbosity) << "Begin of SPFragmentGenerator." << endl;
1197 Log() << Verbose(1+verbosity) << "We are " << RootDistance << " away from Root, which is " << *FragmentSearch->Root << ", SubOrder is " << SubOrder << ", SetDimension is " << SetDimension << " and this means " << NumCombinations-1 << " combination(s)." << endl;
1198
1199 // initialised touched list (stores added atoms on this level)
1200 SPFragmentGenerator_ClearingTouched(verbosity, TouchedList, SubOrder, TouchedIndex);
1201
1202 // create every possible combination of the endpieces
1203 Log() << Verbose(1+verbosity) << "Going through all combinations of the power set." << endl;
1204 for (int i=1;i<NumCombinations;i++) { // sweep through all power set combinations (skip empty set!)
1205 // count the set bit of i
1206 bits = 0;
1207 for (int j=SetDimension;j--;)
1208 bits += (i & (1 << j)) >> j;
1209
1210 Log() << Verbose(1+verbosity) << "Current set is " << Binary(i | (1 << SetDimension)) << ", number of bits is " << bits << "." << endl;
1211 if (bits <= SubOrder) { // if not greater than additional atoms allowed on stack, continue
1212 // --1-- add this set of the power set of bond partners to the snake stack
1213 Added = AddPowersetToSnakeStack(verbosity, i, SetDimension, FragmentSearch->FragmentSet, BondsSet, TouchedList, TouchedIndex);
1214
1215 SpaceLeft = SubOrder - Added ;// SubOrder - bits; // due to item's maybe being already present, this does not work anymore
1216 if (SpaceLeft > 0) {
1217 Log() << Verbose(1+verbosity) << "There's still some space left on stack: " << SpaceLeft << "." << endl;
1218 if (SubOrder > 1) { // Due to Added above we have to check extra whether we're not already reaching beyond the desired Order
1219 // --2-- look at all added end pieces of this combination, construct bond subsets and sweep through a power set of these by recursion
1220 SP = RootDistance+1; // this is the next level
1221
1222 // first count the members in the subset
1223 SubSetDimension = CountSetMembers(FragmentSearch->BondsPerSPList[2*SP], FragmentSearch->BondsPerSPList[2*SP+1], TouchedList, TouchedIndex);
1224
1225 // then allocate and fill the list
1226 bond *BondsList[SubSetDimension];
1227 SubSetDimension = FillBondsList(BondsList, FragmentSearch->BondsPerSPList[2*SP], FragmentSearch->BondsPerSPList[2*SP+1], TouchedList, TouchedIndex);
1228
1229 // then iterate
1230 Log() << Verbose(2+verbosity) << "Calling subset generator " << SP << " away from root " << *FragmentSearch->Root << " with sub set dimension " << SubSetDimension << "." << endl;
1231 SPFragmentGenerator(FragmentSearch, SP, BondsList, SubSetDimension, SubOrder-bits);
1232 }
1233 } else {
1234 // --2-- otherwise store the complete fragment
1235 Log() << Verbose(1+verbosity) << "Enough items on stack for a fragment!" << endl;
1236 // store fragment as a KeySet
1237 DoLog(2) && (Log() << Verbose(2) << "Found a new fragment[" << FragmentSearch->FragmentCounter << "], local nr.s are: ");
1238 for(KeySet::iterator runner = FragmentSearch->FragmentSet->begin(); runner != FragmentSearch->FragmentSet->end(); runner++)
1239 DoLog(0) && (Log() << Verbose(0) << (*runner) << " ");
1240 DoLog(0) && (Log() << Verbose(0) << endl);
1241 //if (!CheckForConnectedSubgraph(FragmentSearch->FragmentSet))
1242 //DoeLog(1) && (eLog()<< Verbose(1) << "The found fragment is not a connected subgraph!" << endl);
1243 InsertFragmentIntoGraph(FragmentSearch);
1244 }
1245
1246 // --3-- remove all added items in this level from snake stack
1247 Log() << Verbose(1+verbosity) << "Removing all items that were added on this SP level " << RootDistance << "." << endl;
1248 RemoveAllTouchedFromSnakeStack(verbosity, FragmentSearch->FragmentSet, TouchedList, TouchedIndex);
1249 } else {
1250 Log() << Verbose(2+verbosity) << "More atoms to add for this set (" << bits << ") than space left on stack " << SubOrder << ", skipping this set." << endl;
1251 }
1252 }
1253 delete[](TouchedList);
1254 Log() << Verbose(1+verbosity) << "End of SPFragmentGenerator, " << RootDistance << " away from Root " << *FragmentSearch->Root << " and SubOrder is " << SubOrder << "." << endl;
1255};
1256
1257/** Allocates memory for UniqueFragments::BondsPerSPList.
1258 * \param *out output stream
1259 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1260 * \param FragmentSearch UniqueFragments
1261 * \sa FreeSPList()
1262 */
1263void InitialiseSPList(int Order, struct UniqueFragments &FragmentSearch)
1264{
1265 FragmentSearch.BondsPerSPList = new bond* [Order * 2];
1266 FragmentSearch.BondsPerSPCount = new int[Order];
1267 for (int i=Order;i--;) {
1268 FragmentSearch.BondsPerSPList[2*i] = new bond(); // start node
1269 FragmentSearch.BondsPerSPList[2*i+1] = new bond(); // end node
1270 FragmentSearch.BondsPerSPList[2*i]->next = FragmentSearch.BondsPerSPList[2*i+1]; // intertwine these two
1271 FragmentSearch.BondsPerSPList[2*i+1]->previous = FragmentSearch.BondsPerSPList[2*i];
1272 FragmentSearch.BondsPerSPCount[i] = 0;
1273 }
1274};
1275
1276/** Free's memory for for UniqueFragments::BondsPerSPList.
1277 * \param *out output stream
1278 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1279 * \param FragmentSearch UniqueFragments\
1280 * \sa InitialiseSPList()
1281 */
1282void FreeSPList(int Order, struct UniqueFragments &FragmentSearch)
1283{
1284 delete[](FragmentSearch.BondsPerSPCount);
1285 for (int i=Order;i--;) {
1286 delete(FragmentSearch.BondsPerSPList[2*i]);
1287 delete(FragmentSearch.BondsPerSPList[2*i+1]);
1288 }
1289 delete[](FragmentSearch.BondsPerSPList);
1290};
1291
1292/** Sets FragmenSearch to initial value.
1293 * Sets UniqueFragments::ShortestPathList entries to zero, UniqueFragments::BondsPerSPCount to zero (except zero level to 1) and
1294 * adds initial bond UniqueFragments::Root to UniqueFragments::Root to UniqueFragments::BondsPerSPList
1295 * \param *out output stream
1296 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1297 * \param FragmentSearch UniqueFragments
1298 * \sa FreeSPList()
1299 */
1300void SetSPList(int Order, struct UniqueFragments &FragmentSearch)
1301{
1302 // prepare Label and SP arrays of the BFS search
1303 FragmentSearch.ShortestPathList[FragmentSearch.Root->nr] = 0;
1304
1305 // prepare root level (SP = 0) and a loop bond denoting Root
1306 for (int i=Order;i--;)
1307 FragmentSearch.BondsPerSPCount[i] = 0;
1308 FragmentSearch.BondsPerSPCount[0] = 1;
1309 bond *Binder = new bond(FragmentSearch.Root, FragmentSearch.Root);
1310 add(Binder, FragmentSearch.BondsPerSPList[1]);
1311};
1312
1313/** Resets UniqueFragments::ShortestPathList and cleans bonds from UniqueFragments::BondsPerSPList.
1314 * \param *out output stream
1315 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1316 * \param FragmentSearch UniqueFragments
1317 * \sa InitialiseSPList()
1318 */
1319void ResetSPList(int Order, struct UniqueFragments &FragmentSearch)
1320{
1321 bond *Binder = NULL;
1322 DoLog(0) && (Log() << Verbose(0) << "Free'ing all found lists. and resetting index lists" << endl);
1323 for(int i=Order;i--;) {
1324 DoLog(1) && (Log() << Verbose(1) << "Current SP level is " << i << ": ");
1325 Binder = FragmentSearch.BondsPerSPList[2*i];
1326 while (Binder->next != FragmentSearch.BondsPerSPList[2*i+1]) {
1327 Binder = Binder->next;
1328 // Log() << Verbose(0) << "Removing atom " << Binder->leftatom->nr << " and " << Binder->rightatom->nr << "." << endl; // make sure numbers are local
1329 FragmentSearch.ShortestPathList[Binder->leftatom->nr] = -1;
1330 FragmentSearch.ShortestPathList[Binder->rightatom->nr] = -1;
1331 }
1332 // delete added bonds
1333 cleanup(FragmentSearch.BondsPerSPList[2*i], FragmentSearch.BondsPerSPList[2*i+1]);
1334 // also start and end node
1335 DoLog(0) && (Log() << Verbose(0) << "cleaned." << endl);
1336 }
1337};
1338
1339
1340/** Fills the Bonds per Shortest Path List and set the vertex labels.
1341 * \param *out output stream
1342 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1343 * \param FragmentSearch UniqueFragments
1344 * \param *mol molecule with atoms and bonds
1345 * \param RestrictedKeySet Restricted vertex set to use in context of molecule
1346 */
1347void FillSPListandLabelVertices(int Order, struct UniqueFragments &FragmentSearch, molecule *mol, KeySet RestrictedKeySet)
1348{
1349 // Actually, we should construct a spanning tree vom the root atom and select all edges therefrom and put them into
1350 // according shortest path lists. However, we don't. Rather we fill these lists right away, as they do form a spanning
1351 // tree already sorted into various SP levels. That's why we just do loops over the depth (CurrentSP) and breadth
1352 // (EdgeinSPLevel) of this tree ...
1353 // In another picture, the bonds always contain a direction by rightatom being the one more distant from root and hence
1354 // naturally leftatom forming its predecessor, preventing the BFS"seeker" from continuing in the wrong direction.
1355 int AtomKeyNr = -1;
1356 atom *Walker = NULL;
1357 atom *OtherWalker = NULL;
1358 atom *Predecessor = NULL;
1359 bond *CurrentEdge = NULL;
1360 bond *Binder = NULL;
1361 int RootKeyNr = FragmentSearch.Root->GetTrueFather()->nr;
1362 int RemainingWalkers = -1;
1363 int SP = -1;
1364
1365 DoLog(0) && (Log() << Verbose(0) << "Starting BFS analysis ..." << endl);
1366 for (SP = 0; SP < (Order-1); SP++) {
1367 DoLog(1) && (Log() << Verbose(1) << "New SP level reached: " << SP << ", creating new SP list with " << FragmentSearch.BondsPerSPCount[SP] << " item(s)");
1368 if (SP > 0) {
1369 DoLog(0) && (Log() << Verbose(0) << ", old level closed with " << FragmentSearch.BondsPerSPCount[SP-1] << " item(s)." << endl);
1370 FragmentSearch.BondsPerSPCount[SP] = 0;
1371 } else
1372 DoLog(0) && (Log() << Verbose(0) << "." << endl);
1373
1374 RemainingWalkers = FragmentSearch.BondsPerSPCount[SP];
1375 CurrentEdge = FragmentSearch.BondsPerSPList[2*SP]; /// start of this SP level's list
1376 while (CurrentEdge->next != FragmentSearch.BondsPerSPList[2*SP+1]) { /// end of this SP level's list
1377 CurrentEdge = CurrentEdge->next;
1378 RemainingWalkers--;
1379 Walker = CurrentEdge->rightatom; // rightatom is always the one more distant
1380 Predecessor = CurrentEdge->leftatom; // ... and leftatom is predecessor
1381 AtomKeyNr = Walker->nr;
1382 DoLog(0) && (Log() << Verbose(0) << "Current Walker is: " << *Walker << " with nr " << Walker->nr << " and SP of " << SP << ", with " << RemainingWalkers << " remaining walkers on this level." << endl);
1383 // check for new sp level
1384 // go through all its bonds
1385 DoLog(1) && (Log() << Verbose(1) << "Going through all bonds of Walker." << endl);
1386 for (BondList::const_iterator Runner = Walker->ListOfBonds.begin(); Runner != Walker->ListOfBonds.end(); (++Runner)) {
1387 OtherWalker = (*Runner)->GetOtherAtom(Walker);
1388 if ((RestrictedKeySet.find(OtherWalker->nr) != RestrictedKeySet.end())
1389 #ifdef ADDHYDROGEN
1390 && (OtherWalker->getType()->getAtomicNumber() != 1)
1391 #endif
1392 ) { // skip hydrogens and restrict to fragment
1393 DoLog(2) && (Log() << Verbose(2) << "Current partner is " << *OtherWalker << " with nr " << OtherWalker->nr << " in bond " << *(*Runner) << "." << endl);
1394 // set the label if not set (and push on root stack as well)
1395 if ((OtherWalker != Predecessor) && (OtherWalker->GetTrueFather()->nr > RootKeyNr)) { // only pass through those with label bigger than Root's
1396 FragmentSearch.ShortestPathList[OtherWalker->nr] = SP+1;
1397 DoLog(3) && (Log() << Verbose(3) << "Set Shortest Path to " << FragmentSearch.ShortestPathList[OtherWalker->nr] << "." << endl);
1398 // add the bond in between to the SP list
1399 Binder = new bond(Walker, OtherWalker); // create a new bond in such a manner, that bond::rightatom is always the one more distant
1400 add(Binder, FragmentSearch.BondsPerSPList[2*(SP+1)+1]);
1401 FragmentSearch.BondsPerSPCount[SP+1]++;
1402 DoLog(3) && (Log() << Verbose(3) << "Added its bond to SP list, having now " << FragmentSearch.BondsPerSPCount[SP+1] << " item(s)." << endl);
1403 } else {
1404 if (OtherWalker != Predecessor)
1405 DoLog(3) && (Log() << Verbose(3) << "Not passing on, as index of " << *OtherWalker << " " << OtherWalker->GetTrueFather()->nr << " is smaller than that of Root " << RootKeyNr << "." << endl);
1406 else
1407 DoLog(3) && (Log() << Verbose(3) << "This is my predecessor " << *Predecessor << "." << endl);
1408 }
1409 } else Log() << Verbose(2) << "Is not in the restricted keyset or skipping hydrogen " << *OtherWalker << "." << endl;
1410 }
1411 }
1412 }
1413};
1414
1415/** prints the Bonds per Shortest Path list in UniqueFragments.
1416 * \param *out output stream
1417 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1418 * \param FragmentSearch UniqueFragments
1419 */
1420void OutputSPList(int Order, struct UniqueFragments &FragmentSearch)
1421{
1422 bond *Binder = NULL;
1423 DoLog(0) && (Log() << Verbose(0) << "Printing all found lists." << endl);
1424 for(int i=1;i<Order;i++) { // skip the root edge in the printing
1425 Binder = FragmentSearch.BondsPerSPList[2*i];
1426 DoLog(1) && (Log() << Verbose(1) << "Current SP level is " << i << "." << endl);
1427 while (Binder->next != FragmentSearch.BondsPerSPList[2*i+1]) {
1428 Binder = Binder->next;
1429 DoLog(2) && (Log() << Verbose(2) << *Binder << endl);
1430 }
1431 }
1432};
1433
1434/** Simply counts all bonds in all UniqueFragments::BondsPerSPList lists.
1435 * \param *out output stream
1436 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1437 * \param FragmentSearch UniqueFragments
1438 */
1439int CountNumbersInBondsList(int Order, struct UniqueFragments &FragmentSearch)
1440{
1441 bond *Binder = NULL;
1442 int SP = -1; // the Root <-> Root edge must be subtracted!
1443 for(int i=Order;i--;) { // sum up all found edges
1444 Binder = FragmentSearch.BondsPerSPList[2*i];
1445 while (Binder->next != FragmentSearch.BondsPerSPList[2*i+1]) {
1446 Binder = Binder->next;
1447 SP++;
1448 }
1449 }
1450 return SP;
1451};
1452
1453/** Creates a list of all unique fragments of certain vertex size from a given graph \a Fragment for a given root vertex in the context of \a this molecule.
1454 * -# initialises UniqueFragments structure
1455 * -# fills edge list via BFS
1456 * -# creates the fragment by calling recursive function SPFragmentGenerator with UniqueFragments structure, 0 as
1457 root distance, the edge set, its dimension and the current suborder
1458 * -# Free'ing structure
1459 * Note that we may use the fact that the atoms are SP-ordered on the atomstack. I.e. when popping always the last, we first get all
1460 * with SP of 2, then those with SP of 3, then those with SP of 4 and so on.
1461 * \param *out output stream for debugging
1462 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1463 * \param FragmentSearch UniqueFragments structure containing TEFactor, root atom and so on
1464 * \param RestrictedKeySet Restricted vertex set to use in context of molecule
1465 * \return number of inserted fragments
1466 * \note ShortestPathList in FragmentSearch structure is probably due to NumberOfAtomsSPLevel and SP not needed anymore
1467 */
1468int molecule::PowerSetGenerator(int Order, struct UniqueFragments &FragmentSearch, KeySet RestrictedKeySet)
1469{
1470 int Counter = FragmentSearch.FragmentCounter; // mark current value of counter
1471
1472 DoLog(0) && (Log() << Verbose(0) << endl);
1473 DoLog(0) && (Log() << Verbose(0) << "Begin of PowerSetGenerator with order " << Order << " at Root " << *FragmentSearch.Root << "." << endl);
1474
1475 SetSPList(Order, FragmentSearch);
1476
1477 // do a BFS search to fill the SP lists and label the found vertices
1478 FillSPListandLabelVertices(Order, FragmentSearch, this, RestrictedKeySet);
1479
1480 // outputting all list for debugging
1481 OutputSPList(Order, FragmentSearch);
1482
1483 // creating fragments with the found edge sets (may be done in reverse order, faster)
1484 int SP = CountNumbersInBondsList(Order, FragmentSearch);
1485 DoLog(0) && (Log() << Verbose(0) << "Total number of edges is " << SP << "." << endl);
1486 if (SP >= (Order-1)) {
1487 // start with root (push on fragment stack)
1488 DoLog(0) && (Log() << Verbose(0) << "Starting fragment generation with " << *FragmentSearch.Root << ", local nr is " << FragmentSearch.Root->nr << "." << endl);
1489 FragmentSearch.FragmentSet->clear();
1490 DoLog(0) && (Log() << Verbose(0) << "Preparing subset for this root and calling generator." << endl);
1491
1492 // prepare the subset and call the generator
1493 bond* BondsList[FragmentSearch.BondsPerSPCount[0]];
1494 for(int i=0;i<FragmentSearch.BondsPerSPCount[0];i++)
1495 BondsList[i] = NULL;
1496 BondsList[0] = FragmentSearch.BondsPerSPList[0]->next; // on SP level 0 there's only the root bond
1497
1498 SPFragmentGenerator(&FragmentSearch, 0, BondsList, FragmentSearch.BondsPerSPCount[0], Order);
1499 } else {
1500 DoLog(0) && (Log() << Verbose(0) << "Not enough total number of edges to build " << Order << "-body fragments." << endl);
1501 }
1502
1503 // as FragmentSearch structure is used only once, we don't have to clean it anymore
1504 // remove root from stack
1505 DoLog(0) && (Log() << Verbose(0) << "Removing root again from stack." << endl);
1506 FragmentSearch.FragmentSet->erase(FragmentSearch.Root->nr);
1507
1508 // free'ing the bonds lists
1509 ResetSPList(Order, FragmentSearch);
1510
1511 // return list
1512 DoLog(0) && (Log() << Verbose(0) << "End of PowerSetGenerator." << endl);
1513 return (FragmentSearch.FragmentCounter - Counter);
1514};
1515
1516bool KeyCompare::operator() (const KeySet SubgraphA, const KeySet SubgraphB) const
1517{
1518 //Log() << Verbose(0) << "my check is used." << endl;
1519 if (SubgraphA.size() < SubgraphB.size()) {
1520 return true;
1521 } else {
1522 if (SubgraphA.size() > SubgraphB.size()) {
1523 return false;
1524 } else {
1525 KeySet::iterator IteratorA = SubgraphA.begin();
1526 KeySet::iterator IteratorB = SubgraphB.begin();
1527 while ((IteratorA != SubgraphA.end()) && (IteratorB != SubgraphB.end())) {
1528 if ((*IteratorA) < (*IteratorB))
1529 return true;
1530 else if ((*IteratorA) > (*IteratorB)) {
1531 return false;
1532 } // else, go on to next index
1533 IteratorA++;
1534 IteratorB++;
1535 } // end of while loop
1536 }// end of check in case of equal sizes
1537 }
1538 return false; // if we reach this point, they are equal
1539};
1540
1541
1542/** Combines all KeySets from all orders into single ones (with just unique entries).
1543 * \param *out output stream for debugging
1544 * \param *&FragmentList list to fill
1545 * \param ***FragmentLowerOrdersList
1546 * \param &RootStack stack with all root candidates (unequal to each atom in complete molecule if adaptive scheme is applied)
1547 * \param *mol molecule with atoms and bonds
1548 */
1549int CombineAllOrderListIntoOne(Graph *&FragmentList, Graph ***FragmentLowerOrdersList, KeyStack &RootStack, molecule *mol)
1550{
1551 int RootNr = 0;
1552 int RootKeyNr = 0;
1553 int StartNr = 0;
1554 int counter = 0;
1555 int NumLevels = 0;
1556 atom *Walker = NULL;
1557
1558 DoLog(0) && (Log() << Verbose(0) << "Combining the lists of all orders per order and finally into a single one." << endl);
1559 if (FragmentList == NULL) {
1560 FragmentList = new Graph;
1561 counter = 0;
1562 } else {
1563 counter = FragmentList->size();
1564 }
1565
1566 StartNr = RootStack.back();
1567 do {
1568 RootKeyNr = RootStack.front();
1569 RootStack.pop_front();
1570 Walker = mol->FindAtom(RootKeyNr);
1571 NumLevels = 1 << (Walker->AdaptiveOrder - 1);
1572 for(int i=0;i<NumLevels;i++) {
1573 if (FragmentLowerOrdersList[RootNr][i] != NULL) {
1574 InsertGraphIntoGraph(*FragmentList, (*FragmentLowerOrdersList[RootNr][i]), &counter);
1575 }
1576 }
1577 RootStack.push_back(Walker->nr);
1578 RootNr++;
1579 } while (RootKeyNr != StartNr);
1580 return counter;
1581};
1582
1583/** Free's memory allocated for all KeySets from all orders.
1584 * \param *out output stream for debugging
1585 * \param ***FragmentLowerOrdersList
1586 * \param &RootStack stack with all root candidates (unequal to each atom in complete molecule if adaptive scheme is applied)
1587 * \param *mol molecule with atoms and bonds
1588 */
1589void FreeAllOrdersList(Graph ***FragmentLowerOrdersList, KeyStack &RootStack, molecule *mol)
1590{
1591 DoLog(1) && (Log() << Verbose(1) << "Free'ing the lists of all orders per order." << endl);
1592 int RootNr = 0;
1593 int RootKeyNr = 0;
1594 int NumLevels = 0;
1595 atom *Walker = NULL;
1596 while (!RootStack.empty()) {
1597 RootKeyNr = RootStack.front();
1598 RootStack.pop_front();
1599 Walker = mol->FindAtom(RootKeyNr);
1600 NumLevels = 1 << (Walker->AdaptiveOrder - 1);
1601 for(int i=0;i<NumLevels;i++) {
1602 if (FragmentLowerOrdersList[RootNr][i] != NULL) {
1603 delete(FragmentLowerOrdersList[RootNr][i]);
1604 }
1605 }
1606 delete[](FragmentLowerOrdersList[RootNr]);
1607 RootNr++;
1608 }
1609 delete[](FragmentLowerOrdersList);
1610};
1611
1612
1613/** Performs BOSSANOVA decomposition at selected sites, increasing the cutoff by one at these sites.
1614 * -# constructs a complete keyset of the molecule
1615 * -# In a loop over all possible roots from the given rootstack
1616 * -# increases order of root site
1617 * -# calls PowerSetGenerator with this order, the complete keyset and the rootkeynr
1618 * -# for all consecutive lower levels PowerSetGenerator is called with the suborder, the higher order keyset
1619as the restricted one and each site in the set as the root)
1620 * -# these are merged into a fragment list of keysets
1621 * -# All fragment lists (for all orders, i.e. from all destination fields) are merged into one list for return
1622 * Important only is that we create all fragments, it is not important if we create them more than once
1623 * as these copies are filtered out via use of the hash table (KeySet).
1624 * \param *out output stream for debugging
1625 * \param Fragment&*List list of already present keystacks (adaptive scheme) or empty list
1626 * \param &RootStack stack with all root candidates (unequal to each atom in complete molecule if adaptive scheme is applied)
1627 * \param *MinimumRingSize minimum ring size for each atom (molecule::Atomcount)
1628 * \return pointer to Graph list
1629 */
1630void molecule::FragmentBOSSANOVA(Graph *&FragmentList, KeyStack &RootStack, int *MinimumRingSize)
1631{
1632 Graph ***FragmentLowerOrdersList = NULL;
1633 int NumLevels = 0;
1634 int NumMolecules = 0;
1635 int TotalNumMolecules = 0;
1636 int *NumMoleculesOfOrder = NULL;
1637 int Order = 0;
1638 int UpgradeCount = RootStack.size();
1639 KeyStack FragmentRootStack;
1640 int RootKeyNr = 0;
1641 int RootNr = 0;
1642 struct UniqueFragments FragmentSearch;
1643
1644 DoLog(0) && (Log() << Verbose(0) << "Begin of FragmentBOSSANOVA." << endl);
1645
1646 // FragmentLowerOrdersList is a 2D-array of pointer to MoleculeListClass objects, one dimension represents the ANOVA expansion of a single order (i.e. 5)
1647 // with all needed lower orders that are subtracted, the other dimension is the BondOrder (i.e. from 1 to 5)
1648 NumMoleculesOfOrder = new int[UpgradeCount];
1649 FragmentLowerOrdersList = new Graph**[UpgradeCount];
1650
1651 for(int i=0;i<UpgradeCount;i++) {
1652 NumMoleculesOfOrder[i] = 0;
1653 FragmentLowerOrdersList[i] = NULL;
1654 }
1655
1656 // initialise the fragments structure
1657 FragmentSearch.FragmentCounter = 0;
1658 FragmentSearch.FragmentSet = new KeySet;
1659 FragmentSearch.Root = FindAtom(RootKeyNr);
1660 FragmentSearch.ShortestPathList = new int[getAtomCount()];
1661 for (int i=getAtomCount();i--;) {
1662 FragmentSearch.ShortestPathList[i] = -1;
1663 }
1664
1665 // Construct the complete KeySet which we need for topmost level only (but for all Roots)
1666 KeySet CompleteMolecule;
1667 for (molecule::const_iterator iter = begin(); iter != end(); ++iter) {
1668 CompleteMolecule.insert((*iter)->GetTrueFather()->nr);
1669 }
1670
1671 // this can easily be seen: if Order is 5, then the number of levels for each lower order is the total sum of the number of levels above, as
1672 // each has to be split up. E.g. for the second level we have one from 5th, one from 4th, two from 3th (which in turn is one from 5th, one from 4th),
1673 // hence we have overall four 2th order levels for splitting. This also allows for putting all into a single array (FragmentLowerOrdersList[])
1674 // with the order along the cells as this: 5433222211111111 for BondOrder 5 needing 16=pow(2,5-1) cells (only we use bit-shifting which is faster)
1675 RootNr = 0; // counts through the roots in RootStack
1676 while ((RootNr < UpgradeCount) && (!RootStack.empty())) {
1677 RootKeyNr = RootStack.front();
1678 RootStack.pop_front();
1679 atom *Walker = FindAtom(RootKeyNr);
1680 // check cyclic lengths
1681 //if ((MinimumRingSize[Walker->GetTrueFather()->nr] != -1) && (Walker->GetTrueFather()->AdaptiveOrder+1 > MinimumRingSize[Walker->GetTrueFather()->nr])) {
1682 // Log() << Verbose(0) << "Bond order " << Walker->GetTrueFather()->AdaptiveOrder << " of Root " << *Walker << " greater than or equal to Minimum Ring size of " << MinimumRingSize << " found is not allowed." << endl;
1683 //} else
1684 {
1685 // increase adaptive order by one
1686 Walker->GetTrueFather()->AdaptiveOrder++;
1687 Order = Walker->AdaptiveOrder = Walker->GetTrueFather()->AdaptiveOrder;
1688
1689 // initialise Order-dependent entries of UniqueFragments structure
1690 InitialiseSPList(Order, FragmentSearch);
1691
1692 // allocate memory for all lower level orders in this 1D-array of ptrs
1693 NumLevels = 1 << (Order-1); // (int)pow(2,Order);
1694 FragmentLowerOrdersList[RootNr] = new Graph*[NumLevels];
1695 for (int i=0;i<NumLevels;i++)
1696 FragmentLowerOrdersList[RootNr][i] = NULL;
1697
1698 // create top order where nothing is reduced
1699 DoLog(0) && (Log() << Verbose(0) << "==============================================================================================================" << endl);
1700 DoLog(0) && (Log() << Verbose(0) << "Creating KeySets of Bond Order " << Order << " for " << *Walker << ", " << (RootStack.size()-RootNr) << " Roots remaining." << endl); // , NumLevels is " << NumLevels << "
1701
1702 // Create list of Graphs of current Bond Order (i.e. F_{ij})
1703 FragmentLowerOrdersList[RootNr][0] = new Graph;
1704 FragmentSearch.TEFactor = 1.;
1705 FragmentSearch.Leaflet = FragmentLowerOrdersList[RootNr][0]; // set to insertion graph
1706 FragmentSearch.Root = Walker;
1707 NumMoleculesOfOrder[RootNr] = PowerSetGenerator(Walker->AdaptiveOrder, FragmentSearch, CompleteMolecule);
1708
1709 // output resulting number
1710 DoLog(1) && (Log() << Verbose(1) << "Number of resulting KeySets is: " << NumMoleculesOfOrder[RootNr] << "." << endl);
1711 if (NumMoleculesOfOrder[RootNr] != 0) {
1712 NumMolecules = 0;
1713 } else {
1714 Walker->GetTrueFather()->MaxOrder = true;
1715 }
1716 // now, we have completely filled each cell of FragmentLowerOrdersList[] for the current Walker->AdaptiveOrder
1717 //NumMoleculesOfOrder[Walker->AdaptiveOrder-1] = NumMolecules;
1718 TotalNumMolecules += NumMoleculesOfOrder[RootNr];
1719// Log() << Verbose(1) << "Number of resulting molecules for Order " << (int)Walker->GetTrueFather()->AdaptiveOrder << " is: " << NumMoleculesOfOrder[RootNr] << "." << endl;
1720 RootStack.push_back(RootKeyNr); // put back on stack
1721 RootNr++;
1722
1723 // free Order-dependent entries of UniqueFragments structure for next loop cycle
1724 FreeSPList(Order, FragmentSearch);
1725 }
1726 }
1727 DoLog(0) && (Log() << Verbose(0) << "==============================================================================================================" << endl);
1728 DoLog(1) && (Log() << Verbose(1) << "Total number of resulting molecules is: " << TotalNumMolecules << "." << endl);
1729 DoLog(0) && (Log() << Verbose(0) << "==============================================================================================================" << endl);
1730
1731 // cleanup FragmentSearch structure
1732 delete[](FragmentSearch.ShortestPathList);
1733 delete(FragmentSearch.FragmentSet);
1734
1735 // now, FragmentLowerOrdersList is complete, it looks - for BondOrder 5 - as this (number is the ANOVA Order of the terms therein)
1736 // 5433222211111111
1737 // 43221111
1738 // 3211
1739 // 21
1740 // 1
1741
1742 // Subsequently, we combine all into a single list (FragmentList)
1743 CombineAllOrderListIntoOne(FragmentList, FragmentLowerOrdersList, RootStack, this);
1744 FreeAllOrdersList(FragmentLowerOrdersList, RootStack, this);
1745 delete[](NumMoleculesOfOrder);
1746
1747 DoLog(0) && (Log() << Verbose(0) << "End of FragmentBOSSANOVA." << endl);
1748};
1749
1750/** Corrects the nuclei position if the fragment was created over the cell borders.
1751 * Scans all bonds, checks the distance, if greater than typical, we have a candidate for the correction.
1752 * We remove the bond whereafter the graph probably separates. Then, we translate the one component periodically
1753 * and re-add the bond. Looping on the distance check.
1754 * \param *out ofstream for debugging messages
1755 */
1756void molecule::ScanForPeriodicCorrection()
1757{
1758 bond *Binder = NULL;
1759 bond *OtherBinder = NULL;
1760 atom *Walker = NULL;
1761 atom *OtherWalker = NULL;
1762 RealSpaceMatrix matrix = World::getInstance().getDomain().getM();
1763 enum Shading *ColorList = NULL;
1764 double tmp;
1765 Vector Translationvector;
1766 //std::deque<atom *> *CompStack = NULL;
1767 std::deque<atom *> *AtomStack = new std::deque<atom *>; // (getAtomCount());
1768 bool flag = true;
1769
1770 DoLog(2) && (Log() << Verbose(2) << "Begin of ScanForPeriodicCorrection." << endl);
1771
1772 ColorList = new enum Shading[getAtomCount()];
1773 for (int i=0;i<getAtomCount();i++)
1774 ColorList[i] = (enum Shading)0;
1775 while (flag) {
1776 // remove bonds that are beyond bonddistance
1777 Translationvector.Zero();
1778 // scan all bonds
1779 flag = false;
1780 for(molecule::iterator AtomRunner = begin(); (!flag) && (AtomRunner != end()); ++AtomRunner)
1781 for(BondList::iterator BondRunner = (*AtomRunner)->ListOfBonds.begin(); (!flag) && (BondRunner != (*AtomRunner)->ListOfBonds.end()); ++BondRunner) {
1782 Binder = (*BondRunner);
1783 for (int i=NDIM;i--;) {
1784 tmp = fabs(Binder->leftatom->at(i) - Binder->rightatom->at(i));
1785 //Log() << Verbose(3) << "Checking " << i << "th distance of " << *Binder->leftatom << " to " << *Binder->rightatom << ": " << tmp << "." << endl;
1786 if (tmp > BondDistance) {
1787 OtherBinder = Binder->next; // note down binding partner for later re-insertion
1788 unlink(Binder); // unlink bond
1789 DoLog(2) && (Log() << Verbose(2) << "Correcting at bond " << *Binder << "." << endl);
1790 flag = true;
1791 break;
1792 }
1793 }
1794 }
1795 if (flag) {
1796 // create translation vector from their periodically modified distance
1797 for (int i=NDIM;i--;) {
1798 tmp = Binder->leftatom->at(i) - Binder->rightatom->at(i);
1799 if (fabs(tmp) > BondDistance)
1800 Translationvector[i] = (tmp < 0) ? +1. : -1.;
1801 }
1802 Translationvector *= matrix;
1803 //Log() << Verbose(3) << "Translation vector is ";
1804 Log() << Verbose(0) << Translationvector << endl;
1805 // apply to all atoms of first component via BFS
1806 for (int i=getAtomCount();i--;)
1807 ColorList[i] = white;
1808 AtomStack->push_front(Binder->leftatom);
1809 while (!AtomStack->empty()) {
1810 Walker = AtomStack->front();
1811 AtomStack->pop_front();
1812 //Log() << Verbose (3) << "Current Walker is: " << *Walker << "." << endl;
1813 ColorList[Walker->nr] = black; // mark as explored
1814 *Walker += Translationvector; // translate
1815 for (BondList::const_iterator Runner = Walker->ListOfBonds.begin(); Runner != Walker->ListOfBonds.end(); (++Runner)) {
1816 if ((*Runner) != Binder) {
1817 OtherWalker = (*Runner)->GetOtherAtom(Walker);
1818 if (ColorList[OtherWalker->nr] == white) {
1819 AtomStack->push_front(OtherWalker); // push if yet unexplored
1820 }
1821 }
1822 }
1823 }
1824 // re-add bond
1825 link(Binder, OtherBinder);
1826 } else {
1827 DoLog(3) && (Log() << Verbose(3) << "No corrections for this fragment." << endl);
1828 }
1829 //delete(CompStack);
1830 }
1831 // free allocated space from ReturnFullMatrixforSymmetric()
1832 delete(AtomStack);
1833 delete[](ColorList);
1834 DoLog(2) && (Log() << Verbose(2) << "End of ScanForPeriodicCorrection." << endl);
1835};
Note: See TracBrowser for help on using the repository browser.