| 1 | // | 
|---|
| 2 | // memory.h | 
|---|
| 3 | // | 
|---|
| 4 | // Copyright (C) 1996 Limit Point Systems, Inc. | 
|---|
| 5 | // | 
|---|
| 6 | // Author: Curtis Janssen <cljanss@limitpt.com> | 
|---|
| 7 | // Maintainer: LPS | 
|---|
| 8 | // | 
|---|
| 9 | // This file is part of the SC Toolkit. | 
|---|
| 10 | // | 
|---|
| 11 | // The SC Toolkit is free software; you can redistribute it and/or modify | 
|---|
| 12 | // it under the terms of the GNU Library General Public License as published by | 
|---|
| 13 | // the Free Software Foundation; either version 2, or (at your option) | 
|---|
| 14 | // any later version. | 
|---|
| 15 | // | 
|---|
| 16 | // The SC Toolkit is distributed in the hope that it will be useful, | 
|---|
| 17 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 18 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 19 | // GNU Library General Public License for more details. | 
|---|
| 20 | // | 
|---|
| 21 | // You should have received a copy of the GNU Library General Public License | 
|---|
| 22 | // along with the SC Toolkit; see the file COPYING.LIB.  If not, write to | 
|---|
| 23 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | 
|---|
| 24 | // | 
|---|
| 25 | // The U.S. Government is granted a limited license as per AL 91-7. | 
|---|
| 26 | // | 
|---|
| 27 |  | 
|---|
| 28 | #ifdef __GNUC__ | 
|---|
| 29 | #pragma interface | 
|---|
| 30 | #endif | 
|---|
| 31 |  | 
|---|
| 32 | #ifndef _util_group_memory_h | 
|---|
| 33 | #define _util_group_memory_h | 
|---|
| 34 |  | 
|---|
| 35 | #include <iostream> | 
|---|
| 36 |  | 
|---|
| 37 | #include <scconfig.h> | 
|---|
| 38 | #include <util/class/class.h> | 
|---|
| 39 | #include <util/group/thread.h> | 
|---|
| 40 |  | 
|---|
| 41 | namespace sc { | 
|---|
| 42 |  | 
|---|
| 43 | #if 0 // this can be used to catch accidental conversions to int | 
|---|
| 44 | class distsize_t { | 
|---|
| 45 | friend size_t distsize_to_size(const distsize_t &a); | 
|---|
| 46 | friend distsize_t operator *(const int &a,const distsize_t &b); | 
|---|
| 47 | friend distsize_t operator +(const int &a,const distsize_t &b); | 
|---|
| 48 | friend distsize_t operator -(const int &a,const distsize_t &b); | 
|---|
| 49 | friend distsize_t operator /(const int &a,const distsize_t &b); | 
|---|
| 50 | friend distsize_t operator %(const int &a,const distsize_t &b); | 
|---|
| 51 | friend ostream& operator <<(ostream& o, const distsize_t &s); | 
|---|
| 52 | private: | 
|---|
| 53 | unsigned long long s; | 
|---|
| 54 | public: | 
|---|
| 55 | distsize_t(): s(999999999999999LL) {} | 
|---|
| 56 | distsize_t(int a): s(a) {} | 
|---|
| 57 | distsize_t(unsigned int a): s(a) {} | 
|---|
| 58 | distsize_t(unsigned long long a): s(a) {} | 
|---|
| 59 | distsize_t &operator =(const distsize_t &a) | 
|---|
| 60 | { s=a.s; return *this; } | 
|---|
| 61 | distsize_t &operator +=(const distsize_t &a) | 
|---|
| 62 | { s+=a.s; return *this; } | 
|---|
| 63 | distsize_t operator *(const distsize_t &a) const | 
|---|
| 64 | { return s*a.s; } | 
|---|
| 65 | distsize_t operator +(const distsize_t &a) const | 
|---|
| 66 | { return s+a.s; } | 
|---|
| 67 | distsize_t operator -(const distsize_t &a) const | 
|---|
| 68 | { return s-a.s; } | 
|---|
| 69 | distsize_t operator /(const distsize_t &a) const | 
|---|
| 70 | { return s/a.s; } | 
|---|
| 71 | distsize_t operator %(const distsize_t &a) const | 
|---|
| 72 | { return s%a.s; } | 
|---|
| 73 | bool operator <(const distsize_t &a) const | 
|---|
| 74 | { return s<a.s; } | 
|---|
| 75 | bool operator <=(const distsize_t &a) const | 
|---|
| 76 | { return s<=a.s; } | 
|---|
| 77 | bool operator >(const distsize_t &a) const | 
|---|
| 78 | { return s>a.s; } | 
|---|
| 79 | bool operator >=(const distsize_t &a) const | 
|---|
| 80 | { return s>=a.s; } | 
|---|
| 81 | bool operator ==(const distsize_t &a) const | 
|---|
| 82 | { return s==a.s; } | 
|---|
| 83 | distsize_t operator *(const int &a) const | 
|---|
| 84 | { return s*a; } | 
|---|
| 85 | distsize_t operator +(const int &a) const | 
|---|
| 86 | { return s+a; } | 
|---|
| 87 | distsize_t operator -(const int &a) const | 
|---|
| 88 | { return s-a; } | 
|---|
| 89 | distsize_t operator /(const int &a) const | 
|---|
| 90 | { return s/a; } | 
|---|
| 91 | distsize_t operator %(const int &a) const | 
|---|
| 92 | { return s%a; } | 
|---|
| 93 | }; | 
|---|
| 94 | inline distsize_t operator *(const int &a,const distsize_t &b) | 
|---|
| 95 | { return a*b.s; } | 
|---|
| 96 | inline distsize_t operator +(const int &a,const distsize_t &b) | 
|---|
| 97 | { return a+b.s; } | 
|---|
| 98 | inline distsize_t operator -(const int &a,const distsize_t &b) | 
|---|
| 99 | { return a-b.s; } | 
|---|
| 100 | inline distsize_t operator /(const int &a,const distsize_t &b) | 
|---|
| 101 | { return a/b.s; } | 
|---|
| 102 | inline distsize_t operator %(const int &a,const distsize_t &b) | 
|---|
| 103 | { return a%b.s; } | 
|---|
| 104 | inline ostream& operator <<(ostream& o, const distsize_t &s) { return o<<s.s; } | 
|---|
| 105 | inline size_t distsize_to_size(const distsize_t &a) {return a.s;} | 
|---|
| 106 | #elif defined(HAVE_LONG_LONG) | 
|---|
| 107 | typedef unsigned long long distsize_t; | 
|---|
| 108 | typedef long long distssize_t; | 
|---|
| 109 | inline size_t distsize_to_size(const distsize_t &a) {return a;} | 
|---|
| 110 | #else | 
|---|
| 111 | typedef unsigned long distsize_t; | 
|---|
| 112 | typedef long distssize_t; | 
|---|
| 113 | inline size_t distsize_to_size(const distsize_t &a) {return a;} | 
|---|
| 114 | #endif | 
|---|
| 115 |  | 
|---|
| 116 | /** The MemoryGrp abstract class provides a way of accessing distributed | 
|---|
| 117 | memory in a parallel machine.  Several specializations are available.  For | 
|---|
| 118 | one processor, ProcMemoryGrp provides a simple stub implementation. | 
|---|
| 119 | Parallel specializations include ShmMemoryGrp, MTMPIMemoryGrp, and | 
|---|
| 120 | ARMCIMemoryGrp.  The particular specializations that work depend highly on | 
|---|
| 121 | the target hardware and software environment. | 
|---|
| 122 |  | 
|---|
| 123 | */ | 
|---|
| 124 | class MemoryGrp: public DescribedClass { | 
|---|
| 125 | private: | 
|---|
| 126 | Ref<ThreadLock> *locks_; | 
|---|
| 127 | int nlock_; | 
|---|
| 128 |  | 
|---|
| 129 | void init_locks(); | 
|---|
| 130 |  | 
|---|
| 131 |  | 
|---|
| 132 | protected: | 
|---|
| 133 | // derived classes must fill in all these | 
|---|
| 134 | // ~MemoryGrp deletes the arrays | 
|---|
| 135 | int me_; | 
|---|
| 136 | int n_; | 
|---|
| 137 | distsize_t *offsets_; // offsets_[n_] is the fence for all data | 
|---|
| 138 |  | 
|---|
| 139 | // set to nonzero for debugging information | 
|---|
| 140 | int debug_; | 
|---|
| 141 |  | 
|---|
| 142 | void obtain_local_lock(size_t start, size_t fence); | 
|---|
| 143 | void release_local_lock(size_t start, size_t fence); | 
|---|
| 144 | public: | 
|---|
| 145 | MemoryGrp(); | 
|---|
| 146 | MemoryGrp(const Ref<KeyVal>&); | 
|---|
| 147 | virtual ~MemoryGrp(); | 
|---|
| 148 |  | 
|---|
| 149 | /// Returns who I am. | 
|---|
| 150 | int me() const { return me_; } | 
|---|
| 151 | /// Returns how many nodes there are. | 
|---|
| 152 | int n() const { return n_; } | 
|---|
| 153 |  | 
|---|
| 154 | /** Set the size of locally held memory. | 
|---|
| 155 | When memory is accessed using a global offset counting | 
|---|
| 156 | starts at node 0 and proceeds up to node n() - 1. */ | 
|---|
| 157 | virtual void set_localsize(size_t) = 0; | 
|---|
| 158 | /// Returns the amount of memory residing locally on me(). | 
|---|
| 159 | size_t localsize() { return distsize_to_size(offsets_[me_+1]-offsets_[me_]); } | 
|---|
| 160 | /// Returns a pointer to the local data. | 
|---|
| 161 | virtual void *localdata() = 0; | 
|---|
| 162 | /// Returns the global offset to this node's memory. | 
|---|
| 163 | distsize_t localoffset() { return offsets_[me_]; } | 
|---|
| 164 | /// Returns the amount of memory residing on node. | 
|---|
| 165 | int size(int node) | 
|---|
| 166 | { return distsize_to_size(offsets_[node+1] - offsets_[node]); } | 
|---|
| 167 | /// Returns the global offset to node's memory. | 
|---|
| 168 | distsize_t offset(int node) { return offsets_[node]; } | 
|---|
| 169 | /// Returns the sum of all memory allocated on all nodes. | 
|---|
| 170 | distsize_t totalsize() { return offsets_[n_]; } | 
|---|
| 171 |  | 
|---|
| 172 | /// Activate is called before the memory is to be used. | 
|---|
| 173 | virtual void activate(); | 
|---|
| 174 | /// Deactivate is called after the memory has been used. | 
|---|
| 175 | virtual void deactivate(); | 
|---|
| 176 |  | 
|---|
| 177 | /// This gives write access to the memory location.  No locking is done. | 
|---|
| 178 | virtual void *obtain_writeonly(distsize_t offset, int size) = 0; | 
|---|
| 179 | /** Only one thread can have an unreleased obtain_readwrite at a time. | 
|---|
| 180 | The actual memory region locked can be larger than that requested. | 
|---|
| 181 | If the memory region is already locked this will block.  For this | 
|---|
| 182 | reason, data should be held as read/write for as short a time as | 
|---|
| 183 | possible. */ | 
|---|
| 184 | virtual void *obtain_readwrite(distsize_t offset, int size) = 0; | 
|---|
| 185 | /// This gives read access to the memory location.  No locking is done. | 
|---|
| 186 | virtual void *obtain_readonly(distsize_t offset, int size) = 0; | 
|---|
| 187 | /// This is called when read access is no longer needed. | 
|---|
| 188 | virtual void release_readonly(void *data, distsize_t offset, int size) = 0; | 
|---|
| 189 | /// This is called when write access is no longer needed. | 
|---|
| 190 | virtual void release_writeonly(void *data, distsize_t offset, int size)=0; | 
|---|
| 191 | /** This is called when read/write access is no longer needed. | 
|---|
| 192 | The memory will be unlocked. */ | 
|---|
| 193 | virtual void release_readwrite(void *data, distsize_t offset, int size)=0; | 
|---|
| 194 |  | 
|---|
| 195 | virtual void sum_reduction(double *data, distsize_t doffset, int dsize); | 
|---|
| 196 | virtual void sum_reduction_on_node(double *data, size_t doffset, int dsize, | 
|---|
| 197 | int node = -1); | 
|---|
| 198 |  | 
|---|
| 199 | /** Synchronizes all the nodes.  This is useful after remote memory | 
|---|
| 200 | writes to be certain that all of the writes have completed and the | 
|---|
| 201 | data can be accessed locally, for example. */ | 
|---|
| 202 | virtual void sync() = 0; | 
|---|
| 203 |  | 
|---|
| 204 | /** Allocate data that will be accessed locally only.  Using this | 
|---|
| 205 | for data that will be used for global operation can improve | 
|---|
| 206 | efficiency.  Data allocated in this way must be freed with | 
|---|
| 207 | free_local_double.  */ | 
|---|
| 208 | virtual void* malloc_local(size_t nbyte); | 
|---|
| 209 | virtual double* malloc_local_double(size_t ndouble); | 
|---|
| 210 |  | 
|---|
| 211 | /** Free data that was allocated with malloc_local_double. */ | 
|---|
| 212 | virtual void free_local(void *data); | 
|---|
| 213 | virtual void free_local_double(double *data); | 
|---|
| 214 |  | 
|---|
| 215 | /** Processes outstanding requests. Some memory group implementations | 
|---|
| 216 | don't have access to real shared memory or even active messages. | 
|---|
| 217 | Instead, requests are processed whenever certain memory group | 
|---|
| 218 | routines are called.  This can cause large latencies and buffer | 
|---|
| 219 | overflows.  If this is a problem, then the catchup member can be | 
|---|
| 220 | called to process all outstanding requests. */ | 
|---|
| 221 | virtual void catchup(); | 
|---|
| 222 |  | 
|---|
| 223 | /// Prints out information about the object. | 
|---|
| 224 | virtual void print(std::ostream &o = ExEnv::out0()) const; | 
|---|
| 225 |  | 
|---|
| 226 | /** Create a memory group.  This routine looks for a -memorygrp | 
|---|
| 227 | argument, and then the environmental variable MEMORYGRP to decide | 
|---|
| 228 | which specialization of MemoryGrp would be appropriate.  The | 
|---|
| 229 | argument to -memorygrp or the value of the environmental variable | 
|---|
| 230 | should be either string for a ParsedKeyVal constructor or a | 
|---|
| 231 | classname.  The default ThreadGrp and MessageGrp objects should be | 
|---|
| 232 | initialized before this is called. */ | 
|---|
| 233 | static MemoryGrp* initial_memorygrp(int &argc, char** argv); | 
|---|
| 234 | static MemoryGrp* initial_memorygrp(); | 
|---|
| 235 | /** The default memory group contains the primary memory group to | 
|---|
| 236 | be used by an application. */ | 
|---|
| 237 | static void set_default_memorygrp(const Ref<MemoryGrp>&); | 
|---|
| 238 | /** Returns the default memory group.  If the default memory | 
|---|
| 239 | group has not yet been set, then one is created. | 
|---|
| 240 | The particular specialization used is determined by | 
|---|
| 241 | configuration options and which specializations are being | 
|---|
| 242 | used for MessageGrp and ThreadGrp. */ | 
|---|
| 243 | static MemoryGrp* get_default_memorygrp(); | 
|---|
| 244 | }; | 
|---|
| 245 |  | 
|---|
| 246 |  | 
|---|
| 247 | /** The MemoryGrpBuf class provides access to pieces of the | 
|---|
| 248 | global shared memory that have been obtained with MemoryGrp. | 
|---|
| 249 | MemoryGrpBuf is a template class that is parameterized on | 
|---|
| 250 | data_t.  All lengths and offsets of given in terms | 
|---|
| 251 | of sizeof(data_t). */ | 
|---|
| 252 | template <class data_t> | 
|---|
| 253 | class MemoryGrpBuf { | 
|---|
| 254 | Ref<MemoryGrp> grp_; | 
|---|
| 255 | enum AccessType { None, Read, Write, ReadWrite }; | 
|---|
| 256 | AccessType accesstype_; | 
|---|
| 257 | data_t *data_; | 
|---|
| 258 | distsize_t offset_; | 
|---|
| 259 | int length_; | 
|---|
| 260 | public: | 
|---|
| 261 | /** Creates a new MemoryGrpBuf given a MemoryGrp | 
|---|
| 262 | reference.  This is a template class parameterized on | 
|---|
| 263 | data_t. */ | 
|---|
| 264 | MemoryGrpBuf(const Ref<MemoryGrp> &); | 
|---|
| 265 | /** Request write only access to global memory at the global address | 
|---|
| 266 | offset and with size length.  Writing the same bit of memory twice | 
|---|
| 267 | without an intervening sync of the MemoryGrp will have undefined | 
|---|
| 268 | results. */ | 
|---|
| 269 | data_t *writeonly(distsize_t offset, int length); | 
|---|
| 270 | /** Request read write access to global memory at the global address | 
|---|
| 271 | offset and with size length.  This will lock the memory it uses | 
|---|
| 272 | until release is called unless locking has been turned off in the | 
|---|
| 273 | MemoryGrp object. */ | 
|---|
| 274 | data_t *readwrite(distsize_t offset, int length); | 
|---|
| 275 | /** Request read only access to global memory at the global address | 
|---|
| 276 | offset and with size length.  Writing to the | 
|---|
| 277 | specified region without an intervening sync of the MemoryGrp | 
|---|
| 278 | will have undefined results. */ | 
|---|
| 279 | const data_t *readonly(distsize_t offset, int length); | 
|---|
| 280 | /** These behave like writeonly, readwrite, and readonly, except the | 
|---|
| 281 | offset is local to the node specified by node.  If node = -1, then | 
|---|
| 282 | the local node is used. */ | 
|---|
| 283 | data_t *writeonly_on_node(size_t offset, int length, int node = -1); | 
|---|
| 284 | data_t *readwrite_on_node(size_t offset, int length, int node = -1); | 
|---|
| 285 | const data_t *readonly_on_node(size_t offset, int length, int node = -1); | 
|---|
| 286 | /** Release the access to the chunk of global memory that was obtained | 
|---|
| 287 | with writeonly, readwrite, readonly, writeonly_on_node, | 
|---|
| 288 | readwrite_on_node, and readonly_on_node. */ | 
|---|
| 289 | void release(); | 
|---|
| 290 | /// The length of the current bit of memory. | 
|---|
| 291 | int length() const { return length_; } | 
|---|
| 292 | }; | 
|---|
| 293 |  | 
|---|
| 294 | ////////////////////////////////////////////////////////////////////// | 
|---|
| 295 | // MemoryGrpBuf members | 
|---|
| 296 |  | 
|---|
| 297 | template <class data_t> | 
|---|
| 298 | MemoryGrpBuf<data_t>::MemoryGrpBuf(const Ref<MemoryGrp> & grp) | 
|---|
| 299 | { | 
|---|
| 300 | grp_ = grp; | 
|---|
| 301 | accesstype_ = None; | 
|---|
| 302 | } | 
|---|
| 303 |  | 
|---|
| 304 | template <class data_t> | 
|---|
| 305 | data_t * | 
|---|
| 306 | MemoryGrpBuf<data_t>::writeonly(distsize_t offset, int length) | 
|---|
| 307 | { | 
|---|
| 308 | if (accesstype_ != None) release(); | 
|---|
| 309 | data_ = (data_t *) grp_->obtain_writeonly(sizeof(data_t)*offset, | 
|---|
| 310 | sizeof(data_t)*length); | 
|---|
| 311 | offset_ = offset; | 
|---|
| 312 | length_ = length; | 
|---|
| 313 | accesstype_ = Write; | 
|---|
| 314 | return data_; | 
|---|
| 315 | } | 
|---|
| 316 |  | 
|---|
| 317 | template <class data_t> | 
|---|
| 318 | data_t * | 
|---|
| 319 | MemoryGrpBuf<data_t>::readwrite(distsize_t offset, int length) | 
|---|
| 320 | { | 
|---|
| 321 | if (accesstype_ != None) release(); | 
|---|
| 322 | data_ = (data_t *) grp_->obtain_readwrite(sizeof(data_t)*offset, | 
|---|
| 323 | sizeof(data_t)*length); | 
|---|
| 324 | offset_ = offset; | 
|---|
| 325 | length_ = length; | 
|---|
| 326 | accesstype_ = ReadWrite; | 
|---|
| 327 | return data_; | 
|---|
| 328 | } | 
|---|
| 329 |  | 
|---|
| 330 | template <class data_t> | 
|---|
| 331 | const data_t * | 
|---|
| 332 | MemoryGrpBuf<data_t>::readonly(distsize_t offset, int length) | 
|---|
| 333 | { | 
|---|
| 334 | if (accesstype_ != None) release(); | 
|---|
| 335 | data_ = (data_t *) grp_->obtain_readonly(sizeof(data_t)*offset, | 
|---|
| 336 | sizeof(data_t)*length); | 
|---|
| 337 | offset_ = offset; | 
|---|
| 338 | length_ = length; | 
|---|
| 339 | accesstype_ = Read; | 
|---|
| 340 | return data_; | 
|---|
| 341 | } | 
|---|
| 342 |  | 
|---|
| 343 | template <class data_t> | 
|---|
| 344 | data_t * | 
|---|
| 345 | MemoryGrpBuf<data_t>::writeonly_on_node(size_t offset, int length, int node) | 
|---|
| 346 | { | 
|---|
| 347 | if (node == -1) node = grp_->me(); | 
|---|
| 348 | return writeonly(offset + grp_->offset(node)/sizeof(data_t), length); | 
|---|
| 349 | } | 
|---|
| 350 |  | 
|---|
| 351 | template <class data_t> | 
|---|
| 352 | data_t * | 
|---|
| 353 | MemoryGrpBuf<data_t>::readwrite_on_node(size_t offset, int length, int node) | 
|---|
| 354 | { | 
|---|
| 355 | if (node == -1) node = grp_->me(); | 
|---|
| 356 | return readwrite(offset + grp_->offset(node)/sizeof(data_t), length); | 
|---|
| 357 | } | 
|---|
| 358 |  | 
|---|
| 359 | template <class data_t> | 
|---|
| 360 | const data_t * | 
|---|
| 361 | MemoryGrpBuf<data_t>::readonly_on_node(size_t offset, int length, int node) | 
|---|
| 362 | { | 
|---|
| 363 | if (node == -1) node = grp_->me(); | 
|---|
| 364 | return readonly(offset + grp_->offset(node)/sizeof(data_t), length); | 
|---|
| 365 | } | 
|---|
| 366 |  | 
|---|
| 367 | template <class data_t> | 
|---|
| 368 | void | 
|---|
| 369 | MemoryGrpBuf<data_t>::release() | 
|---|
| 370 | { | 
|---|
| 371 | if (accesstype_ == Write) | 
|---|
| 372 | grp_->release_writeonly((data_t *)data_, | 
|---|
| 373 | sizeof(data_t)*offset_, sizeof(data_t)*length_); | 
|---|
| 374 | if (accesstype_ == Read) | 
|---|
| 375 | grp_->release_readonly(data_, sizeof(data_t)*offset_, | 
|---|
| 376 | sizeof(data_t)*length_); | 
|---|
| 377 | if (accesstype_ == ReadWrite) | 
|---|
| 378 | grp_->release_readwrite(data_, sizeof(data_t)*offset_, | 
|---|
| 379 | sizeof(data_t)*length_); | 
|---|
| 380 |  | 
|---|
| 381 | accesstype_ = None; | 
|---|
| 382 | } | 
|---|
| 383 |  | 
|---|
| 384 | } | 
|---|
| 385 |  | 
|---|
| 386 | #endif | 
|---|
| 387 |  | 
|---|
| 388 | // Local Variables: | 
|---|
| 389 | // mode: c++ | 
|---|
| 390 | // c-file-style: "CLJ" | 
|---|
| 391 | // End: | 
|---|