blob: 3daaa85bb56ab71ef367a7f61b18b757e092dbf3 [file] [log] [blame]
<html>
<head>
<title>shmem_complexf_sum_to_all</title>
</head>
<h2 id="top">shmem_complexf_sum_to_all</h2>
<h4>Purpose</h4>
<p>Performs a sum reduction function across a set of processing elements (PEs).
</p>
<h4>C syntax</h4>
<pre>
#include &lt;shmem.h&gt;
void shmem_complexf_sum_to_all(float complex *target, float complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float complex *pWrk, long *pSync);
</pre>
<h4>Parameters</h4>
<dl>
<dt class="bold">INPUT</dt>
<dd>
</dd>
<dt class="bold ">target</dt>
<dd>A symmetric data object array which will receive the result of the reduction operation.
</dd>
<dt class="bold ">source</dt>
<dd>A symmetric data object array of length nreduce elements which is the same type as the target.
</dd>
<dt class="bold ">nreduce</dt>
<dd>Number of elements in the target and source arrays.
</dd>
<dt class="bold ">PE_start</dt>
<dd>The lowest virtual PE number of the active set of PEs.
</dd>
<dt class="bold ">logPE_stride</dt>
<dd>The log (base 2) of the stride between consecutive virtual PE numbers in the active set.
</dd>
<dt class="bold ">PE_size</dt>
<dd>The number of PEs in the active set.
</dd>
<dt class="bold ">pWrk</dt>
<dd>A symmetric work array. The pWrk argument must have the same data type as target. The size of the array must be max(nreduce/2+1,_SHMEM_REDUCE_MIN_WRKDATA_SIZE).
</dd>
<dt class="bold ">pSync</dt>
<dd>A symmetric work array. In C/C++, pSync must be of type long and size _SHMEM_REDUCE_SYNC_SIZE. Every element of this array must be initialized with the value _SHMEM_SYNC_VALUE before any of the PEs in the active set enters the reduction routine the first time.
</dd>
</dl>
<h4>Description</h4>
<div class="ledi">
<p>The reduction routines compute one or more reductions across symmetric arrays on multiple processing elements (PEs). A reduction performs an associative binary operation across a set of values. The nreduce argument determines the number of separate reductions to perform. The source array on all PEs in the active set provides one element for each reduction. The results of the reductions are placed in the target array on all PEs in the active set that is defined by the PE_start, logPE_stride, PE_size.</p>
<p>The source and target arrays may be the same array, but they may not be overlapping arrays.
Each of these functions assumes that only PEs in the active set call the function. If a PE not in the active set calls the collective function, the behavior is undefined.</p>
<p>The function shmem_complexf_sum_to_all performs a reduction applying the
sum function to float complex values distributed across the PEs.</p>
<p>Before any PE calls the reduction functions, users have to ensure the following conditions exist:</p>
<p>The pWrk and pSync arrays on all PEs in the active set are not still in use from a prior call to a reduction function.</p>
<p>The target array on all PEs in the active set is ready to accept the results of the
reduction.</p>
<p>Upon returning from the colletcive function, the following conditions are true:</p>
<p>The target data array is updated.</p>
<p>The values in the pSync array are restored to the original values.</p>
<p>Each of these functions assumes that only PEs in the active set call the function. If a PE not in the active set calls the collective function, the behavior is undefined.</p>
<h4>C examples</h4>
<pre>
#include &lt;stdlib.h&gt;
#include &lt;stdio.h&gt;
#include &lt;assert.h&gt;
#include &lt;unistd.h&gt;
#include &lt;math.h&gt;
#include &lt;shmem.h&gt;
#define ARRAY_SIZE 10
const double DOUBLE_COMPLEX_SUM_REAL_INIT_VAL=8.888888;
const double DOUBLE_COMPLEX_SUM_IMAG_INIT_VAL=5.555555;
const double DOUBLE_PRECISION_OFFSET=0.000001;
const float FLOAT_COMPLEX_SUM_REAL_INIT_VAL=6.66;
const float FLOAT_COMPLEX_SUM_IMAG_INIT_VAL=2.22;
const float FLOAT_PRECISION_OFFSET=0.0001;
#define _SHMEM_REDUCE_MIN_WRKDATA_SIZE 1
#define _SHMEM_PWRK_VALUE 1
#define MAX(X, Y) (X > Y)? X:Y
int main (int argc, char* argv[])
{
int total_tasks = -1;
int PE_size = -1;
int my_task = -1;
start_pes(0);
total_tasks = _num_pes();
if (total_tasks <= 0) {
printf("FAILED\n");
exit(1);
} else {
printf("number of pes is %d\n", total_tasks);
}
if (total_tasks < 2 || total_tasks % 2) {
printf("FAILED: The number of pes should be an even number. (at least 2)\n");
exit(1);
}
my_task = _my_pe();
if (my_task < 0){
printf("FAILED\n");
exit(1);
} else {
printf("my pe id is %d\n", my_task);
}
printf("my pid is %d\n", getpid());
long *syncList = (long *)shmalloc(sizeof(long)*_SHMEM_REDUCE_SYNC_SIZE);
long *pSync;
pSync = &syncList[0];
int i;
for (i=0; i < _SHMEM_BCAST_SYNC_SIZE; i++) {
pSync[i] = _SHMEM_SYNC_VALUE;
}
int size = MAX((ARRAY_SIZE/2+1), _SHMEM_REDUCE_MIN_WRKDATA_SIZE);
int *wrkList = (int *)shmalloc(sizeof(long)*size);
int *pWrk;
pWrk = &wrkList[0];
for (i=0; i < size; i++) {
pWrk[i] = _SHMEM_PWRK_VALUE;
}
double complex *source = (double complex *)shmalloc(sizeof(double complex)*ARRAY_SIZE);
double complex *target = (double complex *)shmalloc(sizeof(double complex)*ARRAY_SIZE);
shmem_barrier_all(); // Wait for all PEs to initialize pSync and pWrk
//Firstly, test double complex sum
printf("double complex--->NUM test begin.\n");
for (i=0; i&lt;ARRAY_SIZE; i++) {
source[i] = DOUBLE_COMPLEX_SUM_REAL_INIT_VAL + DOUBLE_COMPLEX_SUM_IMAG_INIT_VAL * I;
}
for (i=0; i&lt;ARRAY_SIZE; i++) {
target[i] = 0.0 + 0.0*I;
}
PE_size = total_tasks;
shmem_complexd_sum_to_all(target, source, ARRAY_SIZE, 0, 0,
PE_size, (double complex *)pWrk, pSync);
//verify the correctness of the result
for (i=0; i&lt;ARRAY_SIZE; i++) {
if (((creal(target[i])-PE_size*DOUBLE_COMPLEX_SUM_REAL_INIT_VAL)>DOUBLE_PRECISION_OFFSET) &&
((cimag(target[i])-PE_size*DOUBLE_COMPLEX_SUM_IMAG_INIT_VAL)>DOUBLE_PRECISION_OFFSET)) {
printf("FAILED, target[%d] should be %f+%f*I instead of %f+%f*I\n", i,
PE_size*DOUBLE_COMPLEX_SUM_REAL_INIT_VAL, PE_size*DOUBLE_COMPLEX_SUM_IMAG_INIT_VAL,
creal(target[i]), cimag(target[i]));
exit(1);
}
}
//printf the correct result
printf("The following data is the computing result of complexd--->SUM.\n");
for (i=0; i<ARRAY_SIZE; i++) {
printf("target[%d]---> %lf+%lf*I\n", i, creal(target[i]), cimag(target[i]));
}
printf("double complex--->NUM test finished.\n");
shmem_barrier_all();
float complex *src = (float complex *)shmalloc(sizeof(float complex)*ARRAY_SIZE);
float complex *tgt = (float complex *)shmalloc(sizeof(float complex)*ARRAY_SIZE);
//Secondly, test float complex sum
printf("float complex--->SUM test begin.\n");
for (i=0; i&lt;ARRAY_SIZE; i++) {
src[i] = FLOAT_COMPLEX_SUM_REAL_INIT_VAL + FLOAT_COMPLEX_SUM_IMAG_INIT_VAL * I;
}
for (i=0; i&lt;ARRAY_SIZE; i++) {
tgt[i] = 0.0 + 0.0*I;
}
PE_size = total_tasks;
shmem_complexf_sum_to_all(tgt, src, ARRAY_SIZE, 0, 0,
PE_size, (float complex *)pWrk, pSync);
//verify the correctness of the result
for (i=0; i&lt;ARRAY_SIZE; i++) {
if (((creal(tgt[i])-PE_size*FLOAT_COMPLEX_SUM_REAL_INIT_VAL)>FLOAT_PRECISION_OFFSET) &&
((cimag(tgt[i])-PE_size*FLOAT_COMPLEX_SUM_IMAG_INIT_VAL)>FLOAT_PRECISION_OFFSET)) {
printf("FAILED, target[%d] should be %f+%f*I instead of %f+%f*I\n", i,
PE_size*FLOAT_COMPLEX_SUM_REAL_INIT_VAL, PE_size*FLOAT_COMPLEX_SUM_IMAG_INIT_VAL,
creal(tgt[i]), cimag(tgt[i]));
exit(1);
}
}
//printf the correct result
printf("The following data is the computing result of complexf--->SUM.\n");
for (i=0; i<ARRAY_SIZE; i++) {
printf("target[%d]---> %f+%f*I\n", i, creal(tgt[i]), cimag(tgt[i]));
}
printf("float complex--->SUM test finished.\n");
shmem_barrier_all();
printf("PASSED\n");
return 0;
}
</pre>
<h4>Related information</h4>
<p>Subroutines: shmem_and, shmem_barrier, shmem_broadcast, shmem_collect, shmem_max, shmem_min, shmem_or, shmem_prod, shmem_sum, shmem_xor
</p>
<hr><a href="apiIndex.html">OpenSHMEM API Index</a>
</html>