blob: 3d661748e1cc7b59ef8e954173b0ef2301499619 [file] [log] [blame]
<html>
<head>
<title>shmem_collect64</title>
</head>
<h2 id="top">shmem_collect64</h2>
<h4>Purpose</h4>
<p>Concatenates blocks of data from multiple PEs to an array on every PEs in the active set.
</p>
<h4>C syntax</h4>
<pre>
#include &lt;shmem.h&gt;
void shmem_collect64(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
</pre>
<h4>Parameters</h4>
<dl>
<dt class="bold">INPUT</dt>
<dd>
</dd>
<dt class="bold ">target</dt>
<dd>A symmetric data object array which will receive the broadcasted data. For shmem_collect, shmem_collect64, shmem_fcollect and shmem_fcollect64, the target can be any 64-bit data object. For shmem_collect32 and shmem_fcollect32 the target can be any 32-bit data object.
</dd>
<dt class="bold ">source</dt>
<dd>A symmetric data object array which is the same type as the target.
</dd>
<dt class="bold ">nlong</dt>
<dd>The number of elements in source array. For shmem_collect, shmem_collect64, shmem_fcollect and shmem_fcollect64, it is the number of 64-bit data items. For shmem_collect32 and shmem_fcollect32, it is the number of 32-bit data items.
</dd>
<dt class="bold ">PE_start</dt>
<dd>The lowest virtual PE number of the active set of PEs.
</dd>
<dt class="bold ">logPE_stride</dt>
<dd>The log (base 2) of the stride between consecutive virtual PE numbers in the active set.
</dd>
<dt class="bold ">PE_size</dt>
<dd>The number of PEs in the active set.
</dd>
<dt class="bold ">pSync</dt>
<dd>A symmetric work array. In C/C++, pSync must be of type long and size _SHMEM_REDUCE_SYNC_SIZE. Every element of this array must be initialized with the value _SHMEM_SYNC_VALUE before any of the PEs in the active set enters the reduction routine the first time.
</dd>
</dl>
<h4>Description</h4>
<div class="ledi">
<p>The shmem_collect64 routine concatenates nlong 64- or 32-bit data items from the source arrays into the target arrays over the active set that is defined by PE_start, logPE_stride, and PE_size. The resultant target array contains the contribution from PE PE_start first, then the contribution from PE PE_start + (2^PE_stride) second, and so on.</p>
<p>This function is a collective function, and must be called by all PEs in the active set defined by the PE_start, logPE_stride, and PE_size. On all PEs in the active set, the values of PE_root, PE_start, logPE_stride, and PE_size must be equal. And the same source, target, and pSync objects must be used on all PEs in the active set.
The value of nlong must be the same in all participating PEs for fcollect functions, while collect functions allow nlong to vary from PE to PE. The performance of fcollect functions is much better than collect functions, since the same nlong is used on all participating PEs and there is no need to calculate the position of each contribution in the target array.</p>
<p>Before any PE calls the collective functions, users have to ensure the following conditions exist:</p>
<p>The pSync arrays on all PEs in the active set are not still in use from a prior call to a collective function.</p>
<p>The target array on all PEs in the active set is ready to accept the broadcast data.</p>
<p>Upon returning from the colletcive function, the following conditions are true:</p>
<p>The target data array is updated.</p>
<p>The values in the pSync array are restored to the original values.</p>
<p>Each of these functions assumes that only PEs in the active set call the function. If a PE not in the active set calls the collective function, the behavior is undefined.</p>
<h4>C examples</h4>
<pre>
#include &lt;stdlib.h&gt;
#include &lt;stdio.h&gt;
#include &lt;assert.h&gt;
#include &lt;unistd.h&gt;
#define PSYNC_NUM 100
#define ARRAY_SIZE 100
#define COLLECT_VAL 1234
#include "shmem.h"
int main (int argc, char* argv[])
{
int total_tasks = -1;
int my_task = -1;
start_pes(0);
total_tasks = _num_pes();
if (total_tasks <= 0) {
printf("FAILED\n");
exit(1);
} else {
printf("number of pes is %d\n", total_tasks);
}
if (total_tasks < 2 || total_tasks % 2) {
printf("FAILED: The number of pes should be an even number. (at least 2)\n");
exit(1);
}
my_task = _my_pe();
if (my_task < 0){
printf("FAILED\n");
exit(1);
} else {
printf("my pe id is %d\n", my_task);
}
printf("my pid is %d\n", getpid());
long *srcArray = (long *)shmalloc(sizeof(long)*ARRAY_SIZE);
// initialize source array
for (int i=0; i&lt;ARRAY_SIZE; i++) {
srcArray[i] = COLLECT_VAL;
}
// initialize destination array
long *destArray = (long *)shmalloc(sizeof(long)*ARRAY_SIZE*total_tasks);
// initialize destination array
for (int i=0; i&lt;ARRAY_SIZE*total_tasks; i++) {
destArray[i] = -1;
}
long *syncList = (long *)shmalloc(sizeof(long)*PSYNC_NUM*_SHMEM_BCAST_SYNC_SIZE);
long *pSync1, *pSync2;
// Every element of this array must be initialized with the
// value _SHMEM_SYNC_VALUE (in C/C++) or SHMEM_SYNC_VALUE (in
// Fortran) before any of the PEs in the active set enter
// shmem_barrier().
pSync1 = &syncList[0];
pSync2 = &syncList[_SHMEM_COLLECT_SYNC_SIZE];
for (int i=0; i &lt; _SHMEM_COLLECT_SYNC_SIZE; i++) {
pSync1[i] = _SHMEM_SYNC_VALUE;
pSync2[i] = _SHMEM_SYNC_VALUE;
}
shmem_barrier_all(); /* Wait for all PEs to initialize pSync */
if (sizeof(long) == 4) {
shmem_collect32(destArray, srcArray, ARRAY_SIZE, 0, 0, total_tasks, pSync1);
} else if (sizeof(long) == 8) {
shmem_collect64(destArray, srcArray, ARRAY_SIZE, 0, 0, total_tasks, pSync1);
} else {
assert(!"Should not be here");
}
for (int i=0; i&lt;ARRAY_SIZE*total_tasks; i++) {
if (destArray[i] != COLLECT_VAL) {
printf("Collect value does not match.\n");
exit(1);
}
}
// reinitialize destination array
for (int i=0; i&lt;ARRAY_SIZE*total_tasks; i++) {
destArray[i] = -1;
}
printf("World geometry collect test finished.\n");
shmem_barrier_all();
int PE_start = my_task%2;
if (sizeof(long) == 4) {
shmem_collect32(destArray, srcArray, ARRAY_SIZE, PE_start, 1, total_tasks/2, pSync2);
} else if (sizeof(long) == 8) {
shmem_collect64(destArray, srcArray, ARRAY_SIZE, PE_start, 1, total_tasks/2, pSync2);
} else {
assert(!"Should not be here");
}
for (int i=0; i&lt;ARRAY_SIZE*total_tasks/2; i++) {
if (destArray[i] != COLLECT_VAL) {
printf("Collect value does not match.\n");
exit(1);
}
}
printf("Sub geometry collect test finished.\n");
shmem_barrier_all();
printf("PASSED\n");
return 0;
}
</pre>
<h4>Related information</h4>
<p>Subroutines: shmem_broadcast, shmem_and, shmem_barrier, shmem_max, shmem_min, shmem_or, shmem_prod, shmem_sum, shmem_xor
</p>
<hr><a href="apiIndex.html">OpenSHMEM API Index</a>
</html>