1 | //
|
---|
2 | // memarmci.cc
|
---|
3 | // based on memshm.cc
|
---|
4 | //
|
---|
5 | // Copyright (C) 1996 Limit Point Systems, Inc.
|
---|
6 | //
|
---|
7 | // Author: Curtis Janssen <cljanss@ca.sandia.gov>
|
---|
8 | // Maintainer: SNL
|
---|
9 | //
|
---|
10 | // This file is part of the SC Toolkit.
|
---|
11 | //
|
---|
12 | // The SC Toolkit is free software; you can redistribute it and/or modify
|
---|
13 | // it under the terms of the GNU Library General Public License as published by
|
---|
14 | // the Free Software Foundation; either version 2, or (at your option)
|
---|
15 | // any later version.
|
---|
16 | //
|
---|
17 | // The SC Toolkit is distributed in the hope that it will be useful,
|
---|
18 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
20 | // GNU Library General Public License for more details.
|
---|
21 | //
|
---|
22 | // You should have received a copy of the GNU Library General Public License
|
---|
23 | // along with the SC Toolkit; see the file COPYING.LIB. If not, write to
|
---|
24 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
25 | //
|
---|
26 | // The U.S. Government is granted a limited license as per AL 91-7.
|
---|
27 | //
|
---|
28 |
|
---|
29 | #ifndef _util_group_memarmci_cc
|
---|
30 | #define _util_group_memarmci_cc
|
---|
31 |
|
---|
32 | #ifdef __GNUC__
|
---|
33 | #pragma implementation
|
---|
34 | #endif
|
---|
35 |
|
---|
36 | extern "C" {
|
---|
37 | #include <armci.h>
|
---|
38 | }
|
---|
39 |
|
---|
40 | #include <stdexcept>
|
---|
41 |
|
---|
42 | #include <util/misc/formio.h>
|
---|
43 | #include <util/class/scexception.h>
|
---|
44 | #include <util/group/memarmci.h>
|
---|
45 |
|
---|
46 | using namespace sc;
|
---|
47 |
|
---|
48 | static ClassDesc ARMCIMemoryGrp_cd(
|
---|
49 | typeid(ARMCIMemoryGrp),"ARMCIMemoryGrp",1,"public RDMAMemoryGrp",
|
---|
50 | 0, create<ARMCIMemoryGrp>, 0);
|
---|
51 |
|
---|
52 | ARMCIMemoryGrp::ARMCIMemoryGrp(const Ref<MessageGrp>& msg):
|
---|
53 | RDMAMemoryGrp(msg)
|
---|
54 | {
|
---|
55 | init();
|
---|
56 | }
|
---|
57 |
|
---|
58 | ARMCIMemoryGrp::ARMCIMemoryGrp(const Ref<KeyVal>& keyval):
|
---|
59 | RDMAMemoryGrp(keyval)
|
---|
60 | {
|
---|
61 | init();
|
---|
62 | }
|
---|
63 |
|
---|
64 | void
|
---|
65 | ARMCIMemoryGrp::init()
|
---|
66 | {
|
---|
67 | armci_lock_ = ThreadGrp::get_default_threadgrp()->new_lock();
|
---|
68 | //debug_ = 1;
|
---|
69 | all_data_ = 0;
|
---|
70 | ARMCI_Init();
|
---|
71 | }
|
---|
72 |
|
---|
73 | void
|
---|
74 | ARMCIMemoryGrp::finalize()
|
---|
75 | {
|
---|
76 | set_localsize(0);
|
---|
77 | ARMCI_Finalize();
|
---|
78 | }
|
---|
79 |
|
---|
80 | void
|
---|
81 | ARMCIMemoryGrp::set_localsize(size_t localsize)
|
---|
82 | {
|
---|
83 | ARMCI_AllFence();
|
---|
84 |
|
---|
85 | // this will initialize the offsets_ array
|
---|
86 | RDMAMemoryGrp::set_localsize(localsize);
|
---|
87 |
|
---|
88 | if (all_data_) {
|
---|
89 | ARMCI_Free(data_);
|
---|
90 | delete[] all_data_;
|
---|
91 | all_data_ = 0;
|
---|
92 | data_ = 0;
|
---|
93 | ARMCI_Destroy_mutexes();
|
---|
94 | }
|
---|
95 |
|
---|
96 | if (localsize == 0) return;
|
---|
97 |
|
---|
98 | all_data_ = new void*[n()];
|
---|
99 | int r;
|
---|
100 | r = ARMCI_Malloc(all_data_, localsize);
|
---|
101 | data_ = reinterpret_cast<char*>(all_data_[me()]);
|
---|
102 |
|
---|
103 | if (debug_) {
|
---|
104 | for (int i=0; i<n(); i++) {
|
---|
105 | std::cout << me() << ": all_data[" << i
|
---|
106 | << "] = " << all_data_[i] << std::endl;
|
---|
107 | }
|
---|
108 | }
|
---|
109 |
|
---|
110 | ARMCI_Create_mutexes(1);
|
---|
111 | }
|
---|
112 |
|
---|
113 | void
|
---|
114 | ARMCIMemoryGrp::retrieve_data(void *data, int node, int offset,
|
---|
115 | int size, int lock)
|
---|
116 | {
|
---|
117 | if (armci_lock_.nonnull()) armci_lock_->lock();
|
---|
118 | if (lock) ARMCI_Lock(0, node);
|
---|
119 | ARMCI_Get(reinterpret_cast<char*>(all_data_[node])+offset, data, size, node);
|
---|
120 | if (armci_lock_.nonnull()) armci_lock_->unlock();
|
---|
121 | }
|
---|
122 |
|
---|
123 | void
|
---|
124 | ARMCIMemoryGrp::replace_data(void *data, int node, int offset,
|
---|
125 | int size, int unlock)
|
---|
126 | {
|
---|
127 | if (armci_lock_.nonnull()) armci_lock_->lock();
|
---|
128 | ARMCI_Put(data, reinterpret_cast<char*>(all_data_[node])+offset, size, node);
|
---|
129 | if (unlock) {
|
---|
130 | ARMCI_Fence(node);
|
---|
131 | ARMCI_Unlock(0, node);
|
---|
132 | }
|
---|
133 | if (armci_lock_.nonnull()) armci_lock_->unlock();
|
---|
134 | }
|
---|
135 |
|
---|
136 | void
|
---|
137 | ARMCIMemoryGrp::sum_data(double *data, int node, int offset, int size)
|
---|
138 | {
|
---|
139 | int doffset = offset/sizeof(double);
|
---|
140 | int dsize = size/sizeof(double);
|
---|
141 |
|
---|
142 | void *src = data;
|
---|
143 | void *dst = reinterpret_cast<double*>(all_data_[node])+doffset;
|
---|
144 |
|
---|
145 | armci_giov_t acc_dat;
|
---|
146 | acc_dat.src_ptr_array = &src;
|
---|
147 | acc_dat.dst_ptr_array = &dst;
|
---|
148 | acc_dat.bytes = dsize * sizeof(double);
|
---|
149 | acc_dat.ptr_array_len = 1;
|
---|
150 | double scale = 1.0;
|
---|
151 |
|
---|
152 | if (debug_) {
|
---|
153 | std::cout << me() << ": summing " << dsize
|
---|
154 | << " doubles from "
|
---|
155 | << (void*)src
|
---|
156 | << " to "
|
---|
157 | << (void*)dst
|
---|
158 | << " on " << node
|
---|
159 | << " (base dest=" << (void*)all_data_[node] << ")"
|
---|
160 | << std::endl;
|
---|
161 | for (int i=0; i<dsize; i++) {
|
---|
162 | std::cout << me() << ": src[" << i << "] = "
|
---|
163 | << data[i] << std::endl;
|
---|
164 | }
|
---|
165 | // for (int i=0; i<dsize; i++) {
|
---|
166 | // std::cout << me() << ": dst[" << i << "] = "
|
---|
167 | // << ((double*)(all_data_[node]))[doffset+i]
|
---|
168 | // << std::endl;
|
---|
169 | // }
|
---|
170 | }
|
---|
171 |
|
---|
172 | if (armci_lock_.nonnull()) armci_lock_->lock();
|
---|
173 | // Original code sending all data at once:
|
---|
174 | // ARMCI_AccV(ARMCI_ACC_DBL, &scale, &acc_dat, 1, node);
|
---|
175 | // Hack to send smaller chunks to not overflow buffers in ARMCI:
|
---|
176 | int incr = 32768;
|
---|
177 | for (int i=0; i<size; i+=incr) {
|
---|
178 | void *tsrc = (&(((char*)src)[i]));
|
---|
179 | void *tdst = (&(((char*)dst)[i]));
|
---|
180 | acc_dat.src_ptr_array = &tsrc;
|
---|
181 | acc_dat.dst_ptr_array = &tdst;
|
---|
182 | if (size - i > incr) acc_dat.bytes = incr;
|
---|
183 | else acc_dat.bytes = (size-i);
|
---|
184 | acc_dat.ptr_array_len = 1;
|
---|
185 | ARMCI_AccV(ARMCI_ACC_DBL, &scale, &acc_dat, 1, node);
|
---|
186 | }
|
---|
187 | // Send data all at once using the contiguous routine (which does not exist):
|
---|
188 | // ARMCI_Acc(ARMCI_ACC_DBL, &scale, src, dst, size, node);
|
---|
189 | if (armci_lock_.nonnull()) armci_lock_->unlock();
|
---|
190 | }
|
---|
191 |
|
---|
192 | void
|
---|
193 | ARMCIMemoryGrp::sync()
|
---|
194 | {
|
---|
195 | ARMCI_Barrier();
|
---|
196 | }
|
---|
197 |
|
---|
198 | void
|
---|
199 | ARMCIMemoryGrp::deactivate()
|
---|
200 | {
|
---|
201 | // Really, this is still active after deactivate is called.
|
---|
202 | // However, we'll at least make sure that all outstanding
|
---|
203 | // requests are finished.
|
---|
204 | ARMCI_AllFence();
|
---|
205 | }
|
---|
206 |
|
---|
207 | void*
|
---|
208 | ARMCIMemoryGrp::malloc_local(size_t nbyte)
|
---|
209 | {
|
---|
210 | void* buf = ARMCI_Malloc_local(nbyte);
|
---|
211 | if (buf == NULL)
|
---|
212 | throw MemAllocFailed("malloc_local -- failed to allocate memory",
|
---|
213 | __FILE__, __LINE__, nbyte, this->class_desc());
|
---|
214 | return buf;
|
---|
215 | }
|
---|
216 |
|
---|
217 | void
|
---|
218 | ARMCIMemoryGrp::free_local(void *data)
|
---|
219 | {
|
---|
220 | ARMCI_Free_local(data);
|
---|
221 | }
|
---|
222 |
|
---|
223 | ARMCIMemoryGrp::~ARMCIMemoryGrp()
|
---|
224 | {
|
---|
225 | finalize();
|
---|
226 | }
|
---|
227 |
|
---|
228 | void
|
---|
229 | ARMCIMemoryGrp::print(std::ostream &o) const
|
---|
230 | {
|
---|
231 | RDMAMemoryGrp::print(o);
|
---|
232 | }
|
---|
233 |
|
---|
234 | #endif
|
---|
235 |
|
---|
236 | /////////////////////////////////////////////////////////////////////////////
|
---|
237 |
|
---|
238 | // Local Variables:
|
---|
239 | // mode: c++
|
---|
240 | // c-file-style: "CLJ"
|
---|
241 | // End:
|
---|