-
Notifications
You must be signed in to change notification settings - Fork 0
/
hmpi_mic_port_10_14.patch
325 lines (282 loc) · 10.7 KB
/
hmpi_mic_port_10_14.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
Index: hmpi.h
===================================================================
--- hmpi.h (revision 810)
+++ hmpi.h (working copy)
@@ -369,6 +369,7 @@
static int HMPI_Comm_group(HMPI_Comm comm, HMPI_Group* group)
{
*group = NULL;
+ printf("stub Comm_ group \n");
return MPI_SUCCESS;
}
@@ -473,6 +474,9 @@
#define MPI_Comm_free HMPI_Comm_free
#define MPI_Comm_split HMPI_Comm_split
+#define MPI_Comm_group(c, group) \
+ MPI_Comm_group((c)->comm, group)
+
#define MPI_Cart_coords(comm, rank, maxdims, coords) \
MPI_Cart_coords((comm)->comm, rank, maxdims, coords)
@@ -539,7 +543,7 @@
//TODO NOT IMPLEMENTED YET
// Added to catch apps that call these routines.
-#define MPI_Comm_group HMPI_Comm_group
+//#define MPI_Comm_group HMPI_Comm_group
#endif //HMPI_INTERNAL
Index: sm_malloc.c
===================================================================
--- sm_malloc.c (revision 810)
+++ sm_malloc.c (working copy)
@@ -143,15 +143,20 @@
#else
//Total shared memory space to mmap.
-#define DEFAULT_TOTAL_SIZE ((1024L*1024L*1024L * 512))
+//#define DEFAULT_TOTAL_SIZE ((1024L*1024L*1024L * 8))
+//#define DEFAULT_TOTAL_SIZE ((1024L*1024L*64))
+#define DEFAULT_TOTAL_SIZE ((1024L*1024L*1024L*6))
+//#define DEFAULT_TOTAL_SIZE ((1024L*1024L*1024L*24))
//How many pieces the available SM memory should be divided into.
// Each rank/process will get one piece.
-#define DEFAULT_RANK_DIVIDER (16)
+#define DEFAULT_RANK_DIVIDER (20)
+//#define DEFAULT_RANK_DIVIDER (16)
#endif
-static char* sm_filename = "hmpismfile";
+static char* sm_filename = "/hmpismfile.mic";
+//static char* sm_filename = "/hmpismfile.cpu";
static void __sm_destroy(void)
@@ -309,6 +314,11 @@
//SM_RANKS and DEFAULT_RANK_DIVIDER indicate how many regions to break the
//SM region into -- one region per rank/process.
tmp = getenv("SM_RANKS");
+ if (tmp == NULL){
+#ifdef __MIC__
+ tmp = getenv("MIC_PPN");
+#endif
+ }
if(tmp == NULL) {
rank_divider = DEFAULT_RANK_DIVIDER;
} else {
@@ -368,6 +378,7 @@
//Create my own mspace.
size_t local_size = total_size / rank_divider;
+ printf("params: %ld %ld\n", total_size, rank_divider);
//void* base = sm_morecore(local_size);
void* base = (void*)__sync_fetch_and_add(&sm_region->brk, local_size);
if(base < sm_lower || base >= sm_upper) {
Index: hmpi_p2p.c
===================================================================
--- hmpi_p2p.c (revision 810)
+++ hmpi_p2p.c (working copy)
@@ -1756,6 +1756,7 @@
HMPI_Comm_node_rank(comm, dest, &dest_node_rank);
if(dest_node_rank != MPI_UNDEFINED) {
+ //printf("reached local hmpi_send() path target : %d tag : %d count : %d \n", dest, tag, count);
HMPI_Request req = acquire_req();
HMPI_Local_isend(buf, count, datatype, dest_node_rank, tag, comm, req);
@@ -1766,13 +1767,14 @@
do {
HMPI_Progress(recv_reqs_head, local_list, shared_list);
+ //printf("hmpi_progress()... \n");
} while(HMPI_Progress_send(req) != HMPI_REQ_COMPLETE);
release_req(req);
} else {
MPI_Request req;
int flag = 0;
-
+ //printf("reached MPI_send() path target : %d tag : %d count : %d \n", dest, tag, count);
//Can't use MPI_Send here :(
//Deadlocks are possible if local progress isn't made.
MPI_Isend(buf, count, datatype, dest, tag, comm->comm, &req);
@@ -1929,7 +1931,7 @@
if(src_node_rank != MPI_UNDEFINED) {
HMPI_Request req = acquire_req();
-
+ //printf("reached local hmpi_recv() path source : %d tag : %d\n", source, tag);
//Yes, Local_irecv uses source, not src_node_rank.
HMPI_Local_irecv(buf, count, datatype, source, tag, comm, req);
//HMPI_Wait(&req, status);
@@ -1940,6 +1942,7 @@
do {
HMPI_Progress(recv_reqs_head, local_list, shared_list);
+ //printf("hmpi_progress()... \n");
} while(get_reqstat(req) != HMPI_REQ_COMPLETE);
if(status != HMPI_STATUS_IGNORE) {
@@ -1954,7 +1957,7 @@
} else {
MPI_Request req;
int flag = 0;
-
+ //printf("reached MPI_recv() path source : %d tag : %d\n", source, tag);
MPI_Irecv(buf, count, datatype, source, tag, comm->comm, &req);
HMPI_Item* recv_reqs_head = &g_recv_reqs_head;
@@ -2042,15 +2045,18 @@
HMPI_Comm comm, HMPI_Status *status)
{
HMPI_Request req;
-
+ printf("HMPI_SENDRECV() source: %d \n",source);
//Irecv/Send/Wait is chosen intentionally: this creates the possibility
// for sender-side acceleration in the synergistic protocol. Doing
// Isend/Recv/Wait would be less likely to do so since it'll only poll
// the recv until that completes, then the send. Irecv/Send polls both.
HMPI_Irecv(recvbuf, recvcount, recvtype, source, recvtag, comm, &req);
+ printf("HMPI_SENDRECV() after Irecv() \n");
HMPI_Send(sendbuf, sendcount, sendtype, dest, sendtag, comm);
+ printf("HMPI_SENDRECV() after Send() \n");
HMPI_Wait(&req, status);
+ printf("HMPI_SENDRECV() exit() \n");
return MPI_SUCCESS;
}
Index: Makefile
===================================================================
--- Makefile (revision 810)
+++ Makefile (working copy)
@@ -29,7 +29,7 @@
CC=mpicc -std=gnu99
WARN=-Wall -Wuninitialized -Winline #-Wno-unused-function
-CFLAGS+=$(WARN) -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
+CFLAGS+=$(WARN) -xhost -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
#CFLAGS=$(WARN) -O3 -mcpu=power7 -fomit-frame-pointer
#CFLAGS=$(WARN) $(INCLUDE) -O3 -march=native -fomit-frame-pointer
#CFLAGS=$(WARN) -O0 -g
@@ -52,10 +52,10 @@
HDRS=hmpi.h barrier.h lock.h profile2.h
-all: INCS+=-DUSE_NUMA=1 -DUSE_MCS=1
+all: INCS+=-DUSE_NUMA=0 -DUSE_MCS=1
all: SRCS+=sm_malloc.c
all: $(SRCS:%.c=%.o) sm_malloc.o
- ar sr libhmpi.a $(SRCS:%.c=%.o)
+ xiar crs libhmpi.a $(SRCS:%.c=%.o)
rm $(SRCS:%.c=%.o)
#bgq: CFLAGS=-O3 -qhot=novector -qsimd=auto $(INCLUDE) -qinline=auto:level=5 -qassert=refalign -qlibansi -qlibmpi -qipa -qhot -qprefetch=aggressive
@@ -88,7 +88,7 @@
debug: CFLAGS = $(WARN) -g -O0 -rdynamic $(INCLUDE)
debug: SRCS+=sm_malloc.c
debug: $(SRCS:%.c=%.o) sm_malloc.o
- ar sr libhmpi.a $(SRCS:%.c=%.o)
+ xiar crs libhmpi.a $(SRCS:%.c=%.o)
rm $(SRCS:%.c=%.o)
opi: all example_opi.c
@@ -102,4 +102,3 @@
bgq_clean:
rm -f *.o libhmpi-bgq.a
-
Index: hmpi.c
===================================================================
--- hmpi.c (revision 810)
+++ hmpi.c (working copy)
@@ -80,7 +80,6 @@
#endif
#ifdef USE_NUMA
-#include <numa.h>
#endif
@@ -406,8 +405,16 @@
//MPI communicator. All other values will be filled in based on the MPI comm.
void init_communicator(HMPI_Comm comm)
{
+ if(comm->comm == MPI_COMM_NULL){
+ printf("DEBUG_ HELLO INIT_COMM comm NULL! \n");
+ } else {
+ printf("DEBUG_ HELLO INIT_COMM comm good \n");
+ }
+
//Fill in the cached comm variables.
+ printf("DEBUG_HMPIComm_ init_communcitor(c) [inside] \n");
MPI_Comm_rank(comm->comm, &comm->comm_rank);
+ printf("DEBUG_HMPIComm_ init_communcitor(c) [after] rank:%d \n",comm->comm_rank);
//MPI_Comm_size(comm, &comm->comm_size);
@@ -446,7 +453,14 @@
&comm->node_comm);
}
+ printf("DEBUG_HMPIComm_ node_rank init() rank:%d \n",comm->comm_rank);
+ if(comm->node_comm == MPI_COMM_NULL){
+ printf("DEBUG_ INIT_NODE_COMM Node_comm NULL! rank:%d \n",comm->comm_rank);
+ }else {
+ printf("DEBUG_ INIT_NODE_COMM Node_comm is good... rank:%d \n",comm->comm_rank);
+ }
MPI_Comm_rank(comm->node_comm, &comm->node_rank);
+ printf("DEBUG_HMPIComm_ node_rank init() [after] rank:%d \n",comm->comm_rank);
MPI_Comm_size(comm->node_comm, &comm->node_size);
//Translate rank 0 in the node comm into its rank in the main comm.
@@ -762,15 +776,45 @@
int HMPI_Comm_create(HMPI_Comm comm, MPI_Group group, HMPI_Comm* newcomm)
{
//Allocate a new HMPI communicator.
- HMPI_Comm c = MALLOC(HMPI_Comm_info, 1);
+ HMPI_Comm c = (HMPI_Comm_info*)MALLOC(HMPI_Comm_info, 1);
+ printf("DEBUG_HMPIComm_create \n");
//Create an MPI comm from the group.
MPI_Comm_create(comm->comm, group, &c->comm);
+
+ //dummy test
+ MPI_Comm_rank(comm->comm, &comm->comm_rank);
+ if(comm->comm == MPI_COMM_NULL){
+ printf("DEBUG_ HMPI_COMM_CREATE comm NULL! rank:%d \n",comm->comm_rank);
+ } else {
+ printf("DEBUG_ HMPI_COMM_CREATE comm good rank:%d \n",comm->comm_rank);
+ }
+ if(c->comm == MPI_COMM_NULL){
+ printf("DEBUG_ NEW_HMPI_COMM_CREATE comm NULL! return from HMPI_Comm_create() \n");
+ //printf("DEBUG_ NEW_HMPI_COMM_CREATE comm NULL! MY_rank:");
+ //MPI_Comm_rank(c->comm, &c->comm_rank);
+ //printf(" :%d \n",c->comm_rank);
+ *newcomm = c ;
+ return MPI_ERR_COMM;
+ } else {
+ printf("DEBUG_ NEW_HMPI_COMM_CREATE comm good \n");
+ printf("DEBUG_ NEW_HMPI_COMM_CREATE comm good MY_rank:");
+ MPI_Comm_rank(c->comm, &c->comm_rank);
+ printf(" :%d \n",c->comm_rank);
+ }
+
+
//Initialize the rest of the HMPI comm.
+ printf("DEBUG_HMPIComm_create init_communicator() [after] \n");
init_communicator(c);
+ //HMPI_Comm cm = (HMPI_Comm_info*)MALLOC(HMPI_Comm_info, 0);
+ //cm->comm = MPI_COMM_WORLD;
+ //init_communicator(cm);
+
*newcomm = c;
+ //*newcomm = cm;
return MPI_SUCCESS;
}
@@ -794,9 +838,23 @@
int HMPI_Comm_free(HMPI_Comm* comm)
{
HMPI_Comm c = *comm;
-
+ printf("HMPI comm_free() \n");
//Free malloc'd resources on the comm.
+ if(&c->net_comm != MPI_COMM_NULL){
+ printf("HMPI comm net_comm NOT NULL \n");
+ //MPI_Comm_free(&c->net_comm);
+ }
+
+ if(&c->node_comm != MPI_COMM_NULL){
+ printf("HMPI comm node_comm NOT NULL \n");
+ //MPI_Comm_free(&c->node_comm);
+ }
+
+ if(&c->comm != MPI_COMM_NULL){
+ printf("HMPI comm comm NOT NULL \n");
+ //MPI_Comm_free(&c->comm);
+ }
//Free all the MPI communicators (main, node, net, numa).
MPI_Comm_free(&c->net_comm);
MPI_Comm_free(&c->node_comm);
Index: lock.h
===================================================================
--- lock.h (revision 810)
+++ lock.h (working copy)
@@ -334,7 +334,10 @@
#endif
-#ifdef __x86_64__ //Better x86 versions
+#ifdef __MIC__ && __x86_64__ //Better x86 versions
+#define STORE_FENCE() __asm__ volatile ("":::"memory")
+#define LOAD_FENCE() __asm__ volatile ("":::"memory")
+#elif defined(__x86_64__) //Better x86 versions
#define STORE_FENCE() __asm__ volatile ("sfence")
#define LOAD_FENCE() __asm__ volatile ("lfence")
#else //Default GCC builtins