46 const char TransA,
const char TransB,
62 if (!
A.ConstantStride() ||
63 !
B.ConstantStride() ||
65 !
C.ConstantStride())
return(-1);
77 int A_nrows =
A.MyLength();
78 int A_ncols =
A.NumVectors();
79 int B_nrows =
B.MyLength();
80 int B_ncols =
B.NumVectors();
81 int C_nrows =
C.MyLength();
82 int C_ncols =
C.NumVectors();
86 int C_GEMM_Stride = 0;
96 int opA_ncols = (TransA==
'N') ?
A.NumVectors() :
A.MyLength();
97 int opB_nrows = (TransB==
'N') ?
B.MyLength() :
B.NumVectors();
99 int C_global_inner_dim = (TransA==
'N') ?
A.NumVectors() :
A.GlobalLength();
102 bool A_is_local = (!
A.DistributedGlobal());
103 bool B_is_local = (!
B.DistributedGlobal());
104 bool C_is_local = (!
C.DistributedGlobal());
106 int A_IndexBase =
A.Map().IndexBase();
107 int B_IndexBase =
B.Map().IndexBase();
113 int* A_MyGlobalElements =
new int[A_nrows];
115 int* B_MyGlobalElements =
new int[B_nrows];
116 B_Map->MyGlobalElements(B_MyGlobalElements);
120 if (
C.MyLength() != C_nrows ||
121 opA_ncols != opB_nrows ||
122 C.NumVectors() != C_ncols ||
127 delete [] A_MyGlobalElements;
128 delete [] B_MyGlobalElements;
132 bool Case1 = ( A_is_local && B_is_local && C_is_local);
133 bool Case2 = (!A_is_local && !B_is_local && C_is_local && TransA==
'T' );
134 bool Case3 = (!A_is_local && B_is_local && !C_is_local && TransA==
'N');
138 if (!(Case1 || Case2 || Case3)) {
141 delete [] A_MyGlobalElements;
142 delete [] B_MyGlobalElements;
178 double sf = C_global_inner_dim;
179 double sfinv = 1.0/sf;
185 for (j = 0; j <A_ncols ; j++)
186 for (i = 0; i<A_nrows; i++)
190 Ap[i + A_Stride*j] = (fi*sfinv)*fj;
195 for (j = 0; j <A_ncols ; j++)
196 for (i = 0; i<A_nrows; i++)
198 fi = A_MyGlobalElements[i]+1;
200 Ap[i + A_Stride*j] = (fi*sfinv)*fj;
208 for (j = 0; j <B_ncols ; j++)
209 for (i = 0; i<B_nrows; i++)
213 Bp[i + B_Stride*j] = 1.0/((fi*sfinv)*fj);
218 for (j = 0; j <B_ncols ; j++)
219 for (i = 0; i<B_nrows; i++)
221 fi = B_MyGlobalElements[i]+1;
223 Bp[i + B_Stride*j] = 1.0/((fi*sfinv)*fj);
233 for (j = 0; j <C_ncols ; j++)
234 for (i = 0; i<C_nrows; i++)
237 fi = (i+1)*C_global_inner_dim;
239 C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj)
240 + beta * Cp[i + C_Stride*j];
245 for (j = 0; j <C_ncols ; j++)
246 for (i = 0; i<C_nrows; i++)
249 fi = (i+1)*C_global_inner_dim;
251 C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj)
252 + beta * Cp[i + C_Stride*j];
257 for (j = 0; j <C_ncols ; j++)
258 for (i = 0; i<C_nrows; i++)
261 fi = (A_MyGlobalElements[i]+1)*C_global_inner_dim;
263 C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj)
264 + beta * Cp[i + C_Stride*j];
269 delete []A_MyGlobalElements;
270 delete []B_MyGlobalElements;
281 double*
const dotvec_AB,
282 double*
const norm1_A,
283 double*
const norm2_sqrtA,
284 double*
const norminf_A,
285 double*
const normw_A,
287 double*
const minval_A,
288 double*
const maxval_A,
289 double*
const meanval_A ) {
305 int A_nrows =
A.MyLength();
306 int A_ncols =
A.NumVectors();
309 int B_nrows =
B.MyLength();
310 int B_ncols =
B.NumVectors();
316 double *C_alphaAp = 0;
317 double *C_alphaAplusBp = 0;
318 double *C_plusBp = 0;
319 double *Weightsp = 0;
330 bool A_is_local = (
A.MyLength() ==
A.GlobalLength());
331 bool B_is_local = (
B.MyLength() ==
B.GlobalLength());
332 bool C_is_local = (
C.MyLength() ==
C.GlobalLength());
334 int A_IndexBase =
A.Map().IndexBase();
335 int B_IndexBase =
B.Map().IndexBase();
341 int* A_MyGlobalElements =
new int[A_nrows];
343 int* B_MyGlobalElements =
new int[B_nrows];
344 B_Map->MyGlobalElements(B_MyGlobalElements);
348 if (
C.MyLength() != A_nrows ||
349 A_nrows != B_nrows ||
350 C.NumVectors() != A_ncols ||
351 A_ncols != B_ncols ||
352 sqrtA_nrows != A_nrows ||
353 sqrtA_ncols != A_ncols ||
356 C.MyLength() != C_alphaAplusB.
MyLength() ||
359 C.NumVectors() != C_plusB.
NumVectors() )
return(-2);
362 bool Case1 = ( A_is_local && B_is_local && C_is_local);
363 bool Case2 = (!A_is_local && !B_is_local && !C_is_local);
367 if (!(Case1 || Case2))
return(-3);
389 double sf =
A.GlobalLength();
390 double sfinv = 1.0/sf;
395 for (i = 0; i<A_nrows; i++) {
398 sqrtAp[i] = std::sqrt(Ap[i]);
402 for (i = 0; i<A_nrows; i++) {
403 fi = A_MyGlobalElements[i]+1;
405 sqrtAp[i] = std::sqrt(Ap[i]);
412 for (i = 0; i<B_nrows; i++) {
414 Bp[i] = 1.0/((fi*sfinv));
418 for (i = 0; i<B_nrows; i++) {
419 fi = B_MyGlobalElements[i]+1;
420 Bp[i] = 1.0/((fi*sfinv));
426 for (i = 0; i<A_nrows; i++) C_alphaAp[i] = alpha * Ap[i];
430 for (i = 0; i<A_nrows; i++) C_alphaAplusBp[i] = alpha * Ap[i] + Bp[i];
434 for (i = 0; i<A_nrows; i++) C_plusBp[i] = Cp[i] + Bp[i];
438 for (i=0; i<
A.NumVectors(); i++) dotvec_AB[i] =
C.GlobalLength();
444 double result =
C.GlobalLength();
447 result *= (double)(
C.GlobalLength()+1);
451 for (i=0; i<
A.NumVectors(); i++)
453 norm1_A[i] = result * ((
double) (i+1));
457 for (i=0; i<
A.NumVectors(); i++)
459 norm2_sqrtA[i] = std::sqrt(result * ((
double) (i+1)));
463 for (i=0; i<
A.NumVectors(); i++)
465 norminf_A[i] = (double) (i+1);
466 minval_A[i] = (double) (i+1)/ (double)
A.GlobalLength();
467 maxval_A[i] = (double) (i+1);
468 meanval_A[i] = norm1_A[i]/((double) (
A.GlobalLength()));
473 for (j=0; j<A_nrows; j++) Weightsp[j] = Ap[j];
477 delete [] A_MyGlobalElements;
478 delete [] B_MyGlobalElements;
int MyGlobalElements(int *MyGlobalElementList) const
Puts list of global elements on this processor into the user-provided array.
Epetra_Map: A class for partitioning vectors and matrices.
bool ConstantStride() const
Returns true if this multi-vector has constant stride between vectors.
int ExtractView(double **V) const
Set user-provided address of V.
int MyLength() const
Returns the local vector length on the calling processor of vectors in the multi-vector.
Epetra_Vector: A class for constructing and using dense vectors on a parallel computer.
int NumVectors() const
Returns the number of vectors in the multi-vector.
int BuildVectorTests(Epetra_Vector &C, const double alpha, Epetra_Vector &A, Epetra_Vector &sqrtA, Epetra_Vector &B, Epetra_Vector &C_alphaA, Epetra_Vector &C_alphaAplusB, Epetra_Vector &C_plusB, double *const dotvec_AB, double *const norm1_A, double *const norm2_sqrtA, double *const norminf_A, double *const normw_A, Epetra_Vector &Weights, double *const minval_A, double *const maxval_A, double *const meanval_A)
int BuildMatrixTests(Epetra_Vector &C, const char TransA, const char TransB, const double alpha, Epetra_Vector &A, Epetra_Vector &B, const double beta, Epetra_Vector &C_GEMM)