IFPACK  Development
Ifpack_PointRelaxation.cpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack: Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2002) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #include "Ifpack_ConfigDefs.h"
44 #include <iomanip>
45 #include <cmath>
46 #include "Epetra_Operator.h"
47 #include "Epetra_CrsMatrix.h"
48 #include "Epetra_VbrMatrix.h"
49 #include "Epetra_Comm.h"
50 #include "Epetra_Map.h"
51 #include "Epetra_MultiVector.h"
52 #include "Ifpack_Preconditioner.h"
53 #include "Ifpack_PointRelaxation.h"
54 #include "Ifpack_Utils.h"
55 #include "Ifpack_Condest.h"
56 
57 static const int IFPACK_JACOBI = 0;
58 static const int IFPACK_GS = 1;
59 static const int IFPACK_SGS = 2;
60 
61 //==============================================================================
64  IsInitialized_(false),
65  IsComputed_(false),
66  NumInitialize_(0),
67  NumCompute_(0),
68  NumApplyInverse_(0),
69  InitializeTime_(0.0),
70  ComputeTime_(0.0),
71  ApplyInverseTime_(0.0),
72  ComputeFlops_(0.0),
73  ApplyInverseFlops_(0.0),
74  NumSweeps_(1),
75  DampingFactor_(1.0),
76  UseTranspose_(false),
77  Condest_(-1.0),
78  /* ComputeCondest_(false), (unused; commented out to avoid build warnings) */
79  PrecType_(IFPACK_JACOBI),
80  MinDiagonalValue_(0.0),
81  NumMyRows_(0),
82  NumMyNonzeros_(0),
83  NumGlobalRows_(0),
84  NumGlobalNonzeros_(0),
85  Matrix_(Teuchos::rcp(Matrix_in,false)),
86  IsParallel_(false),
87  ZeroStartingSolution_(true),
88  DoBackwardGS_(false),
89  DoL1Method_(false),
90  L1Eta_(1.5),
91  NumLocalSmoothingIndices_(Matrix_in->NumMyRows()),
92  LocalSmoothingIndices_(0)
93 {
94 }
95 
96 //==============================================================================
97 int Ifpack_PointRelaxation::SetParameters(Teuchos::ParameterList& List)
98 {
99  using std::cout;
100  using std::endl;
101 
102  std::string PT;
103  if (PrecType_ == IFPACK_JACOBI)
104  PT = "Jacobi";
105  else if (PrecType_ == IFPACK_GS)
106  PT = "Gauss-Seidel";
107  else if (PrecType_ == IFPACK_SGS)
108  PT = "symmetric Gauss-Seidel";
109 
110  PT = List.get("relaxation: type", PT);
111 
112  if (PT == "Jacobi")
113  PrecType_ = IFPACK_JACOBI;
114  else if (PT == "Gauss-Seidel")
115  PrecType_ = IFPACK_GS;
116  else if (PT == "symmetric Gauss-Seidel")
117  PrecType_ = IFPACK_SGS;
118  else {
119  IFPACK_CHK_ERR(-2);
120  }
121 
122  NumSweeps_ = List.get("relaxation: sweeps",NumSweeps_);
123  DampingFactor_ = List.get("relaxation: damping factor",
124  DampingFactor_);
125  MinDiagonalValue_ = List.get("relaxation: min diagonal value",
126  MinDiagonalValue_);
127  ZeroStartingSolution_ = List.get("relaxation: zero starting solution",
128  ZeroStartingSolution_);
129 
130  DoBackwardGS_ = List.get("relaxation: backward mode",DoBackwardGS_);
131 
132  DoL1Method_ = List.get("relaxation: use l1",DoL1Method_);
133 
134  L1Eta_ = List.get("relaxation: l1 eta",L1Eta_);
135 
136 
137  // This (partial) reordering would require a very different implementation of Jacobi than we have no, so we're not
138  // going to do it.
139  NumLocalSmoothingIndices_= List.get("relaxation: number of local smoothing indices",NumLocalSmoothingIndices_);
140  LocalSmoothingIndices_ = List.get("relaxation: local smoothing indices",LocalSmoothingIndices_);
141  if(PrecType_ == IFPACK_JACOBI && LocalSmoothingIndices_) {
142  NumLocalSmoothingIndices_=NumMyRows_;
143  LocalSmoothingIndices_=0;
144  if(!Comm().MyPID()) cout<<"Ifpack_PointRelaxation: WARNING: Reordered/Local Smoothing not implemented for Jacobi. Defaulting to regular Jacobi"<<endl;
145  }
146 
147  SetLabel();
148 
149  return(0);
150 }
151 
152 //==============================================================================
154 {
155  return(Matrix_->Comm());
156 }
157 
158 //==============================================================================
160 {
161  return(Matrix_->OperatorDomainMap());
162 }
163 
164 //==============================================================================
166 {
167  return(Matrix_->OperatorRangeMap());
168 }
169 
170 //==============================================================================
172 {
173  IsInitialized_ = false;
174 
175  if (Matrix_ == Teuchos::null)
176  IFPACK_CHK_ERR(-2);
177 
178  if (Time_ == Teuchos::null)
179  Time_ = Teuchos::rcp( new Epetra_Time(Comm()) );
180 
181  if (Matrix().NumGlobalRows64() != Matrix().NumGlobalCols64())
182  IFPACK_CHK_ERR(-2); // only square matrices
183 
184  NumMyRows_ = Matrix_->NumMyRows();
185  NumMyNonzeros_ = Matrix_->NumMyNonzeros();
186  NumGlobalRows_ = Matrix_->NumGlobalRows64();
187  NumGlobalNonzeros_ = Matrix_->NumGlobalNonzeros64();
188 
189  if (Comm().NumProc() != 1)
190  IsParallel_ = true;
191  else
192  IsParallel_ = false;
193 
194  ++NumInitialize_;
195  InitializeTime_ += Time_->ElapsedTime();
196  IsInitialized_ = true;
197  return(0);
198 }
199 
200 //==============================================================================
202 {
203  int ierr = 0;
204  if (!IsInitialized())
205  IFPACK_CHK_ERR(Initialize());
206 
207  Time_->ResetStartTime();
208 
209  // reset values
210  IsComputed_ = false;
211  Condest_ = -1.0;
212 
213  if (NumSweeps_ == 0) ierr = 1; // Warning: no sweeps performed.
214  if (NumSweeps_ < 0)
215  IFPACK_CHK_ERR(-2); // at least one application
216 
217  // NOTE: RowMatrixRowMap doesn't work correctly for Epetra_VbrMatrix
218  const Epetra_VbrMatrix * VbrMat = dynamic_cast<const Epetra_VbrMatrix*>(&Matrix());
219  if(VbrMat) Diagonal_ = Teuchos::rcp( new Epetra_Vector(VbrMat->RowMap()) );
220  else Diagonal_ = Teuchos::rcp( new Epetra_Vector(Matrix().RowMatrixRowMap()) );
221 
222  if (Diagonal_ == Teuchos::null)
223  IFPACK_CHK_ERR(-5);
224 
225  IFPACK_CHK_ERR(Matrix().ExtractDiagonalCopy(*Diagonal_));
226 
227  // Setup for L1 Methods.
228  // Here we add half the value of the off-processor entries in the row,
229  // but only if diagonal isn't sufficiently large.
230  //
231  // Note: This is only done in the slower-but-more-general "RowMatrix" mode.
232  //
233  // This follows from Equation (6.5) in:
234  // Baker, Falgout, Kolev and Yang. Multigrid Smoothers for Ultraparallel Computing.
235  // SIAM J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864--2887.
236  if(DoL1Method_ && IsParallel_) {
237  int maxLength = Matrix().MaxNumEntries();
238  std::vector<int> Indices(maxLength);
239  std::vector<double> Values(maxLength);
240  int NumEntries;
241 
242  for (int i = 0 ; i < NumMyRows_ ; ++i) {
243  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, maxLength,NumEntries,
244  &Values[0], &Indices[0]));
245  double diagonal_boost=0.0;
246  for (int k = 0 ; k < NumEntries ; ++k)
247  if(Indices[k] > NumMyRows_)
248  diagonal_boost+=std::abs(Values[k]/2.0);
249  if ((*Diagonal_)[i] < L1Eta_*diagonal_boost)
250  (*Diagonal_)[i]+=diagonal_boost;
251  }
252  }
253 
254  // check diagonal elements, store the inverses, and verify that
255  // no zeros are around. If an element is zero, then by default
256  // its inverse is zero as well (that is, the row is ignored).
257  for (int i = 0 ; i < NumMyRows_ ; ++i) {
258  double& diag = (*Diagonal_)[i];
259  if (IFPACK_ABS(diag) < MinDiagonalValue_)
260  diag = MinDiagonalValue_;
261  if (diag != 0.0)
262  diag = 1.0 / diag;
263  }
264 #ifdef IFPACK_FLOPCOUNTERS
265  ComputeFlops_ += NumMyRows_;
266 #endif
267 
268 #if 0
269  // some methods require the inverse of the diagonal, compute it
270  // now and store it in `Diagonal_'
271  if ((PrecType_ == IFPACK_JACOBI) || (PrecType_ == IFPACK_GS)) {
272  Diagonal_->Reciprocal(*Diagonal_);
273  // update flops
274  ComputeFlops_ += NumMyRows_;
275  }
276 #endif
277 
278  // We need to import data from external processors. Here I create an
279  // Epetra_Import object because I cannot assume that Matrix_ has one.
280  // This is a bit of waste of resources (but the code is more robust).
281  // Note that I am doing some strange stuff to set the components of Y
282  // from Y2 (to save some time).
283  //
284  if (IsParallel_ && ((PrecType_ == IFPACK_GS) || (PrecType_ == IFPACK_SGS))) {
285  Importer_ = Teuchos::rcp( new Epetra_Import(Matrix().RowMatrixColMap(),
286  Matrix().RowMatrixRowMap()) );
287  if (Importer_ == Teuchos::null) IFPACK_CHK_ERR(-5);
288  }
289 
290  ++NumCompute_;
291  ComputeTime_ += Time_->ElapsedTime();
292  IsComputed_ = true;
293 
294  IFPACK_CHK_ERR(ierr);
295  return(0);
296 }
297 
298 //==============================================================================
299 std::ostream& Ifpack_PointRelaxation::Print(std::ostream & os) const
300 {
301  using std::endl;
302 
303  double MyMinVal, MyMaxVal;
304  double MinVal, MaxVal;
305 
306  if (IsComputed_) {
307  Diagonal_->MinValue(&MyMinVal);
308  Diagonal_->MaxValue(&MyMaxVal);
309  Comm().MinAll(&MyMinVal,&MinVal,1);
310  Comm().MinAll(&MyMaxVal,&MaxVal,1);
311  }
312 
313  if (!Comm().MyPID()) {
314  os << endl;
315  os << "================================================================================" << endl;
316  os << "Ifpack_PointRelaxation" << endl;
317  os << "Sweeps = " << NumSweeps_ << endl;
318  os << "damping factor = " << DampingFactor_ << endl;
319  if (PrecType_ == IFPACK_JACOBI)
320  os << "Type = Jacobi" << endl;
321  else if (PrecType_ == IFPACK_GS)
322  os << "Type = Gauss-Seidel" << endl;
323  else if (PrecType_ == IFPACK_SGS)
324  os << "Type = symmetric Gauss-Seidel" << endl;
325  if (DoBackwardGS_)
326  os << "Using backward mode (GS only)" << endl;
327  if (ZeroStartingSolution_)
328  os << "Using zero starting solution" << endl;
329  else
330  os << "Using input starting solution" << endl;
331  os << "Condition number estimate = " << Condest() << endl;
332  os << "Global number of rows = " << Matrix_->NumGlobalRows64() << endl;
333  if (IsComputed_) {
334  os << "Minimum value on stored diagonal = " << MinVal << endl;
335  os << "Maximum value on stored diagonal = " << MaxVal << endl;
336  }
337  os << endl;
338  os << "Phase # calls Total Time (s) Total MFlops MFlops/s" << endl;
339  os << "----- ------- -------------- ------------ --------" << endl;
340  os << "Initialize() " << std::setw(5) << NumInitialize_
341  << " " << std::setw(15) << InitializeTime_
342  << " 0.0 0.0" << endl;
343  os << "Compute() " << std::setw(5) << NumCompute_
344  << " " << std::setw(15) << ComputeTime_
345  << " " << std::setw(15) << 1.0e-6 * ComputeFlops_;
346  if (ComputeTime_ != 0.0)
347  os << " " << std::setw(15) << 1.0e-6 * ComputeFlops_ / ComputeTime_ << endl;
348  else
349  os << " " << std::setw(15) << 0.0 << endl;
350  os << "ApplyInverse() " << std::setw(5) << NumApplyInverse_
351  << " " << std::setw(15) << ApplyInverseTime_
352  << " " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_;
353  if (ApplyInverseTime_ != 0.0)
354  os << " " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_ / ApplyInverseTime_ << endl;
355  else
356  os << " " << std::setw(15) << 0.0 << endl;
357  os << "================================================================================" << endl;
358  os << endl;
359  }
360 
361  return(os);
362 }
363 
364 //==============================================================================
366 Condest(const Ifpack_CondestType CT,
367  const int MaxIters, const double Tol,
368  Epetra_RowMatrix* Matrix_in)
369 {
370  if (!IsComputed()) // cannot compute right now
371  return(-1.0);
372 
373  // always computes it. Call Condest() with no parameters to get
374  // the previous estimate.
375  Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix_in);
376 
377  return(Condest_);
378 }
379 
380 //==============================================================================
381 void Ifpack_PointRelaxation::SetLabel()
382 {
383  std::string PT;
384  if (PrecType_ == IFPACK_JACOBI)
385  PT = "Jacobi";
386  else if (PrecType_ == IFPACK_GS){
387  PT = "GS";
388  if(DoBackwardGS_)
389  PT = "Backward " + PT;
390  }
391  else if (PrecType_ == IFPACK_SGS)
392  PT = "SGS";
393 
394  if(NumLocalSmoothingIndices_!=NumMyRows_) PT="Local " + PT;
395  else if(LocalSmoothingIndices_) PT="Reordered " + PT;
396 
397  Label_ = "IFPACK (" + PT + ", sweeps=" + Ifpack_toString(NumSweeps_)
398  + ", damping=" + Ifpack_toString(DampingFactor_) + ")";
399 }
400 
401 //==============================================================================
402 // Note that Ifpack_PointRelaxation and Jacobi is much faster than
403 // Ifpack_AdditiveSchwarz<Ifpack_PointRelaxation> (because of the
404 // way the matrix-vector produce is performed).
405 //
406 // Another ML-related observation is that the starting solution (in Y)
407 // is NOT supposed to be zero. This may slow down the application of just
408 // one sweep of Jacobi.
409 //
412 {
413  if (!IsComputed())
414  IFPACK_CHK_ERR(-3);
415 
416  if (X.NumVectors() != Y.NumVectors())
417  IFPACK_CHK_ERR(-2);
418 
419  Time_->ResetStartTime();
420 
421  // AztecOO gives X and Y pointing to the same memory location,
422  // need to create an auxiliary vector, Xcopy
423  Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy;
424  if (X.Pointers()[0] == Y.Pointers()[0])
425  Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
426  else
427  Xcopy = Teuchos::rcp( &X, false );
428 
429  // Flops are updated in each of the following.
430  switch (PrecType_) {
431  case IFPACK_JACOBI:
432  IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y));
433  break;
434  case IFPACK_GS:
435  IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y));
436  break;
437  case IFPACK_SGS:
438  IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y));
439  break;
440  default:
441  IFPACK_CHK_ERR(-1); // something wrong
442  }
443 
444  ++NumApplyInverse_;
445  ApplyInverseTime_ += Time_->ElapsedTime();
446  return(0);
447 }
448 
449 //==============================================================================
450 // This preconditioner can be much slower than AztecOO and ML versions
451 // if the matrix-vector product requires all ExtractMyRowCopy()
452 // (as done through Ifpack_AdditiveSchwarz).
453 int Ifpack_PointRelaxation::
454 ApplyInverseJacobi(const Epetra_MultiVector& RHS, Epetra_MultiVector& LHS) const
455 {
456  int NumVectors = LHS.NumVectors();
457 
458  int startIter = 0;
459  if (NumSweeps_ > 0 && ZeroStartingSolution_) {
460  // When we have a zero initial guess, we can skip the first
461  // matrix apply call and zero initialization
462  for (int v = 0; v < NumVectors; v++)
463  IFPACK_CHK_ERR(LHS(v)->Multiply(DampingFactor_, *(RHS(v)), *Diagonal_, 0.0));
464 
465  startIter = 1;
466  }
467 
468  bool zeroOut = false;
469  Epetra_MultiVector A_times_LHS(LHS.Map(), NumVectors, zeroOut);
470  for (int j = startIter; j < NumSweeps_ ; j++) {
471  IFPACK_CHK_ERR(Apply(LHS, A_times_LHS));
472  IFPACK_CHK_ERR(A_times_LHS.Update(1.0,RHS,-1.0));
473  for (int v = 0 ; v < NumVectors ; ++v)
474  IFPACK_CHK_ERR(LHS(v)->Multiply(DampingFactor_, *(A_times_LHS(v)),
475  *Diagonal_, 1.0));
476 
477  }
478 
479  // Flops:
480  // - matrix vector (2 * NumGlobalNonzeros_)
481  // - update (2 * NumGlobalRows_)
482  // - Multiply:
483  // - DampingFactor (NumGlobalRows_)
484  // - Diagonal (NumGlobalRows_)
485  // - A + B (NumGlobalRows_)
486  // - 1.0 (NumGlobalRows_)
487 #ifdef IFPACK_FLOPCOUNTERS
488  ApplyInverseFlops_ += NumVectors * (6 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
489 #endif
490 
491  return(0);
492 }
493 
494 //==============================================================================
495 int Ifpack_PointRelaxation::
496 ApplyInverseGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
497 {
498  if (ZeroStartingSolution_)
499  Y.PutScalar(0.0);
500 
501  const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
502  // try to pick the best option; performances may be improved
503  // if several sweeps are used.
504  if (CrsMatrix != 0)
505  {
506  if (CrsMatrix->StorageOptimized() && LocalSmoothingIndices_)
507  return(ApplyInverseGS_LocalFastCrsMatrix(CrsMatrix, X, Y));
508  else if (CrsMatrix->StorageOptimized())
509  return(ApplyInverseGS_FastCrsMatrix(CrsMatrix, X, Y));
510  else
511  return(ApplyInverseGS_CrsMatrix(CrsMatrix, X, Y));
512  }
513  else
514  return(ApplyInverseGS_RowMatrix(X, Y));
515 } //ApplyInverseGS()
516 
517 
518 
519 //==============================================================================
520 int Ifpack_PointRelaxation::
521 ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
522 {
523  int NumVectors = X.NumVectors();
524 
525  int Length = Matrix().MaxNumEntries();
526  std::vector<int> Indices(Length);
527  std::vector<double> Values(Length);
528 
529  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
530  if (IsParallel_)
531  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
532  else
533  Y2 = Teuchos::rcp( &Y, false );
534 
535  // extract views (for nicer and faster code)
536  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
537  X.ExtractView(&x_ptr);
538  Y.ExtractView(&y_ptr);
539  Y2->ExtractView(&y2_ptr);
540  Diagonal_->ExtractView(&d_ptr);
541 
542  for (int j = 0; j < NumSweeps_ ; j++) {
543 
544  // data exchange is here, once per sweep
545  if (IsParallel_)
546  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
547 
548  // FIXME: do I really need this code below?
549  if (NumVectors == 1) {
550 
551  double* y0_ptr = y_ptr[0];
552  double* y20_ptr = y2_ptr[0];
553  double* x0_ptr = x_ptr[0];
554 
555  if(!DoBackwardGS_){
556  /* Forward Mode */
557  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
558  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
559 
560  int NumEntries;
561  int col;
562  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
563  &Values[0], &Indices[0]));
564 
565  double dtemp = 0.0;
566  for (int k = 0 ; k < NumEntries ; ++k) {
567 
568  col = Indices[k];
569  dtemp += Values[k] * y20_ptr[col];
570  }
571 
572  y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
573  }
574  }
575  else {
576  /* Backward Mode */
577  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
578  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
579 
580  int NumEntries;
581  int col;
582  (void) col; // Forestall compiler warning for unused variable.
583  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
584  &Values[0], &Indices[0]));
585  double dtemp = 0.0;
586  for (int k = 0 ; k < NumEntries ; ++k) {
587 
588  col = Indices[k];
589  dtemp += Values[k] * y20_ptr[i];
590  }
591 
592  y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
593  }
594  }
595 
596  // using Export() sounded quite expensive
597  if (IsParallel_)
598  for (int i = 0 ; i < NumMyRows_ ; ++i)
599  y0_ptr[i] = y20_ptr[i];
600 
601  }
602  else {
603  if(!DoBackwardGS_){
604  /* Forward Mode */
605  for (int i = 0 ; i < NumMyRows_ ; ++i) {
606 
607  int NumEntries;
608  int col;
609  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
610  &Values[0], &Indices[0]));
611 
612  for (int m = 0 ; m < NumVectors ; ++m) {
613 
614  double dtemp = 0.0;
615  for (int k = 0 ; k < NumEntries ; ++k) {
616 
617  col = Indices[k];
618  dtemp += Values[k] * y2_ptr[m][col];
619  }
620 
621  y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
622  }
623  }
624  }
625  else {
626  /* Backward Mode */
627  for (int i = NumMyRows_ - 1 ; i > -1 ; --i) {
628  int NumEntries;
629  int col;
630  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
631  &Values[0], &Indices[0]));
632 
633  for (int m = 0 ; m < NumVectors ; ++m) {
634 
635  double dtemp = 0.0;
636  for (int k = 0 ; k < NumEntries ; ++k) {
637 
638  col = Indices[k];
639  dtemp += Values[k] * y2_ptr[m][col];
640  }
641 
642  y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
643 
644  }
645  }
646  }
647 
648  // using Export() sounded quite expensive
649  if (IsParallel_)
650  for (int m = 0 ; m < NumVectors ; ++m)
651  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
652  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
653  y_ptr[m][i] = y2_ptr[m][i];
654  }
655  }
656  }
657 
658 #ifdef IFPACK_FLOPCOUNTERS
659  ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
660 #endif
661 
662  return(0);
663 } //ApplyInverseGS_RowMatrix()
664 
665 //==============================================================================
666 int Ifpack_PointRelaxation::
667 ApplyInverseGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
668 {
669  int NumVectors = X.NumVectors();
670 
671  int* Indices;
672  double* Values;
673 
674  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
675  if (IsParallel_) {
676  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
677  }
678  else
679  Y2 = Teuchos::rcp( &Y, false );
680 
681  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
682  X.ExtractView(&x_ptr);
683  Y.ExtractView(&y_ptr);
684  Y2->ExtractView(&y2_ptr);
685  Diagonal_->ExtractView(&d_ptr);
686 
687  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
688 
689  // only one data exchange per sweep
690  if (IsParallel_)
691  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
692 
693  if(!DoBackwardGS_){
694  /* Forward Mode */
695  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
696  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
697 
698  int NumEntries;
699  int col;
700  double diag = d_ptr[i];
701 
702  IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
703 
704  for (int m = 0 ; m < NumVectors ; ++m) {
705 
706  double dtemp = 0.0;
707 
708  for (int k = 0; k < NumEntries; ++k) {
709 
710  col = Indices[k];
711  dtemp += Values[k] * y2_ptr[m][col];
712  }
713 
714  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
715  }
716  }
717  }
718  else {
719  /* Backward Mode */
720  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
721  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
722 
723  int NumEntries;
724  int col;
725  double diag = d_ptr[i];
726 
727  IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
728 
729  for (int m = 0 ; m < NumVectors ; ++m) {
730 
731  double dtemp = 0.0;
732  for (int k = 0; k < NumEntries; ++k) {
733 
734  col = Indices[k];
735  dtemp += Values[k] * y2_ptr[m][col];
736  }
737 
738  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
739 
740  }
741  }
742  }
743 
744  if (IsParallel_)
745  for (int m = 0 ; m < NumVectors ; ++m)
746  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
747  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
748  y_ptr[m][i] = y2_ptr[m][i];
749  }
750  }
751 
752 #ifdef IFPACK_FLOPCOUNTERS
753  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
754 #endif
755  return(0);
756 } //ApplyInverseGS_CrsMatrix()
757 
758 //
759 //==============================================================================
760 // ApplyInverseGS_FastCrsMatrix() requires Epetra_CrsMatrix + OptimizeStorage()
761 
762 int Ifpack_PointRelaxation::
763 ApplyInverseGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
764 {
765  int* IndexOffset;
766  int* Indices;
767  double* Values;
768  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
769 
770  int NumVectors = X.NumVectors();
771 
772  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
773  if (IsParallel_) {
774  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
775  }
776  else
777  Y2 = Teuchos::rcp( &Y, false );
778 
779  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
780  X.ExtractView(&x_ptr);
781  Y.ExtractView(&y_ptr);
782  Y2->ExtractView(&y2_ptr);
783  Diagonal_->ExtractView(&d_ptr);
784 
785  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
786 
787  // only one data exchange per sweep
788  if (IsParallel_)
789  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
790 
791  if(!DoBackwardGS_){
792  /* Forward Mode */
793  for (int i = 0 ; i < NumMyRows_ ; ++i) {
794 
795  int col;
796  double diag = d_ptr[i];
797 
798  for (int m = 0 ; m < NumVectors ; ++m) {
799 
800  double dtemp = 0.0;
801 
802  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
803 
804  col = Indices[k];
805  dtemp += Values[k] * y2_ptr[m][col];
806  }
807 
808  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
809  }
810  }
811  }
812  else {
813  /* Backward Mode */
814  for (int i = NumMyRows_ - 1 ; i > -1 ; --i) {
815 
816  int col;
817  double diag = d_ptr[i];
818 
819  for (int m = 0 ; m < NumVectors ; ++m) {
820 
821  double dtemp = 0.0;
822  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
823 
824  col = Indices[k];
825  dtemp += Values[k] * y2_ptr[m][col];
826  }
827 
828  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
829 
830  }
831  }
832  }
833 
834 
835  if (IsParallel_)
836  for (int m = 0 ; m < NumVectors ; ++m)
837  for (int i = 0 ; i < NumMyRows_ ; ++i)
838  y_ptr[m][i] = y2_ptr[m][i];
839  }
840 
841 #ifdef IFPACK_FLOPCOUNTERS
842  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
843 #endif
844  return(0);
845 } //ApplyInverseGS_FastCrsMatrix()
846 
847 
848 //
849 //==============================================================================
850 // ApplyInverseGS_LocalFastCrsMatrix() requires Epetra_CrsMatrix + OptimizeStorage() + LocalSmoothingIndices
851 
852 int Ifpack_PointRelaxation::
853 ApplyInverseGS_LocalFastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
854 {
855  int* IndexOffset;
856  int* Indices;
857  double* Values;
858  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
859 
860  int NumVectors = X.NumVectors();
861 
862  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
863  if (IsParallel_) {
864  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
865  }
866  else
867  Y2 = Teuchos::rcp( &Y, false );
868 
869  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
870  X.ExtractView(&x_ptr);
871  Y.ExtractView(&y_ptr);
872  Y2->ExtractView(&y2_ptr);
873  Diagonal_->ExtractView(&d_ptr);
874 
875  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
876 
877  // only one data exchange per sweep
878  if (IsParallel_)
879  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
880 
881  if(!DoBackwardGS_){
882  /* Forward Mode */
883  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
884  int i=LocalSmoothingIndices_[ii];
885 
886  int col;
887  double diag = d_ptr[i];
888 
889  for (int m = 0 ; m < NumVectors ; ++m) {
890 
891  double dtemp = 0.0;
892 
893  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
894 
895  col = Indices[k];
896  dtemp += Values[k] * y2_ptr[m][col];
897  }
898 
899  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
900  }
901  }
902  }
903  else {
904  /* Backward Mode */
905  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
906  int i=LocalSmoothingIndices_[ii];
907 
908  int col;
909  double diag = d_ptr[i];
910 
911  for (int m = 0 ; m < NumVectors ; ++m) {
912 
913  double dtemp = 0.0;
914  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
915 
916  col = Indices[k];
917  dtemp += Values[k] * y2_ptr[m][col];
918  }
919 
920  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
921 
922  }
923  }
924  }
925 
926 
927  if (IsParallel_)
928  for (int m = 0 ; m < NumVectors ; ++m)
929  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
930  int i=LocalSmoothingIndices_[ii];
931  y_ptr[m][i] = y2_ptr[m][i];
932  }
933  }
934 
935 #ifdef IFPACK_FLOPCOUNTERS
936  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
937 #endif
938  return(0);
939 } //ApplyInverseGS_LocalFastCrsMatrix()
940 
941 
942 //==============================================================================
943 int Ifpack_PointRelaxation::
944 ApplyInverseSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
945 {
946  if (ZeroStartingSolution_)
947  Y.PutScalar(0.0);
948 
949  const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
950  // try to pick the best option; performances may be improved
951  // if several sweeps are used.
952  if (CrsMatrix != 0)
953  {
954  if (CrsMatrix->StorageOptimized() && LocalSmoothingIndices_)
955  return(ApplyInverseSGS_LocalFastCrsMatrix(CrsMatrix, X, Y));
956  else if (CrsMatrix->StorageOptimized())
957  return(ApplyInverseSGS_FastCrsMatrix(CrsMatrix, X, Y));
958  else
959  return(ApplyInverseSGS_CrsMatrix(CrsMatrix, X, Y));
960  }
961  else
962  return(ApplyInverseSGS_RowMatrix(X, Y));
963 }
964 
965 //==============================================================================
966 int Ifpack_PointRelaxation::
967 ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
968 {
969  int NumVectors = X.NumVectors();
970  int Length = Matrix().MaxNumEntries();
971  std::vector<int> Indices(Length);
972  std::vector<double> Values(Length);
973 
974  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
975  if (IsParallel_) {
976  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
977  }
978  else
979  Y2 = Teuchos::rcp( &Y, false );
980 
981  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
982  X.ExtractView(&x_ptr);
983  Y.ExtractView(&y_ptr);
984  Y2->ExtractView(&y2_ptr);
985  Diagonal_->ExtractView(&d_ptr);
986 
987  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
988 
989  // only one data exchange per sweep
990  if (IsParallel_)
991  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
992 
993  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
994  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
995 
996  int NumEntries;
997  int col;
998  double diag = d_ptr[i];
999 
1000  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
1001  &Values[0], &Indices[0]));
1002 
1003  for (int m = 0 ; m < NumVectors ; ++m) {
1004 
1005  double dtemp = 0.0;
1006 
1007  for (int k = 0 ; k < NumEntries ; ++k) {
1008 
1009  col = Indices[k];
1010  dtemp += Values[k] * y2_ptr[m][col];
1011  }
1012 
1013  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1014  }
1015  }
1016  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
1017  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1018 
1019  int NumEntries;
1020  int col;
1021  double diag = d_ptr[i];
1022 
1023  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
1024  &Values[0], &Indices[0]));
1025 
1026  for (int m = 0 ; m < NumVectors ; ++m) {
1027 
1028  double dtemp = 0.0;
1029  for (int k = 0 ; k < NumEntries ; ++k) {
1030 
1031  col = Indices[k];
1032  dtemp += Values[k] * y2_ptr[m][col];
1033  }
1034 
1035  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1036 
1037  }
1038  }
1039 
1040  if (IsParallel_)
1041  for (int m = 0 ; m < NumVectors ; ++m)
1042  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1043  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1044  y_ptr[m][i] = y2_ptr[m][i];
1045  }
1046  }
1047 
1048 #ifdef IFPACK_FLOPCOUNTERS
1049  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
1050 #endif
1051  return(0);
1052 }
1053 
1054 //==============================================================================
1055 int Ifpack_PointRelaxation::
1056 ApplyInverseSGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
1057 {
1058  int NumVectors = X.NumVectors();
1059 
1060  int* Indices;
1061  double* Values;
1062 
1063  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
1064  if (IsParallel_) {
1065  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
1066  }
1067  else
1068  Y2 = Teuchos::rcp( &Y, false );
1069 
1070  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
1071  X.ExtractView(&x_ptr);
1072  Y.ExtractView(&y_ptr);
1073  Y2->ExtractView(&y2_ptr);
1074  Diagonal_->ExtractView(&d_ptr);
1075 
1076  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
1077 
1078  // only one data exchange per sweep
1079  if (IsParallel_)
1080  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
1081 
1082  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1083  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1084 
1085  int NumEntries;
1086  int col;
1087  double diag = d_ptr[i];
1088 
1089  IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1090 
1091  for (int m = 0 ; m < NumVectors ; ++m) {
1092 
1093  double dtemp = 0.0;
1094 
1095  for (int k = 0; k < NumEntries; ++k) {
1096 
1097  col = Indices[k];
1098  dtemp += Values[k] * y2_ptr[m][col];
1099  }
1100 
1101  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1102  }
1103  }
1104  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
1105  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1106 
1107  int NumEntries;
1108  int col;
1109  double diag = d_ptr[i];
1110 
1111  IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1112 
1113  for (int m = 0 ; m < NumVectors ; ++m) {
1114 
1115  double dtemp = 0.0;
1116  for (int k = 0; k < NumEntries; ++k) {
1117 
1118  col = Indices[k];
1119  dtemp += Values[k] * y2_ptr[m][col];
1120  }
1121 
1122  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1123 
1124  }
1125  }
1126 
1127  if (IsParallel_)
1128  for (int m = 0 ; m < NumVectors ; ++m)
1129  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1130  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1131  y_ptr[m][i] = y2_ptr[m][i];
1132  }
1133  }
1134 
1135 #ifdef IFPACK_FLOPCOUNTERS
1136  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
1137 #endif
1138  return(0);
1139 }
1140 
1141 //==============================================================================
1142 // this requires Epetra_CrsMatrix + OptimizeStorage()
1143 int Ifpack_PointRelaxation::
1144 ApplyInverseSGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
1145 {
1146  int* IndexOffset;
1147  int* Indices;
1148  double* Values;
1149  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
1150 
1151  int NumVectors = X.NumVectors();
1152 
1153  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
1154  if (IsParallel_) {
1155  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
1156  }
1157  else
1158  Y2 = Teuchos::rcp( &Y, false );
1159 
1160  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
1161  X.ExtractView(&x_ptr);
1162  Y.ExtractView(&y_ptr);
1163  Y2->ExtractView(&y2_ptr);
1164  Diagonal_->ExtractView(&d_ptr);
1165 
1166  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
1167 
1168  // only one data exchange per sweep
1169  if (IsParallel_)
1170  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
1171 
1172  for (int i = 0 ; i < NumMyRows_ ; ++i) {
1173 
1174  int col;
1175  double diag = d_ptr[i];
1176 
1177  // no need to extract row i
1178  //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1179 
1180  for (int m = 0 ; m < NumVectors ; ++m) {
1181 
1182  double dtemp = 0.0;
1183 
1184  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
1185 
1186  col = Indices[k];
1187  dtemp += Values[k] * y2_ptr[m][col];
1188  }
1189 
1190  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1191  }
1192  }
1193 
1194  for (int i = NumMyRows_ - 1 ; i > -1 ; --i) {
1195 
1196  int col;
1197  double diag = d_ptr[i];
1198 
1199  // no need to extract row i
1200  //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1201 
1202  for (int m = 0 ; m < NumVectors ; ++m) {
1203 
1204  double dtemp = 0.0;
1205  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
1206 
1207  col = Indices[k];
1208  dtemp += Values[k] * y2_ptr[m][col];
1209  }
1210 
1211  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1212 
1213  }
1214  }
1215 
1216  if (IsParallel_)
1217  for (int m = 0 ; m < NumVectors ; ++m)
1218  for (int i = 0 ; i < NumMyRows_ ; ++i)
1219  y_ptr[m][i] = y2_ptr[m][i];
1220  }
1221 
1222 #ifdef IFPACK_FLOPCOUNTERS
1223  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
1224 #endif
1225  return(0);
1226 }
1227 
1228 
1229 //==============================================================================
1230 // this requires Epetra_CrsMatrix + OptimizeStorage() + LocalSmoothingIndices_
1231 int Ifpack_PointRelaxation::
1232 ApplyInverseSGS_LocalFastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
1233 {
1234  int* IndexOffset;
1235  int* Indices;
1236  double* Values;
1237  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
1238 
1239  int NumVectors = X.NumVectors();
1240 
1241  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
1242  if (IsParallel_) {
1243  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
1244  }
1245  else
1246  Y2 = Teuchos::rcp( &Y, false );
1247 
1248  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
1249  X.ExtractView(&x_ptr);
1250  Y.ExtractView(&y_ptr);
1251  Y2->ExtractView(&y2_ptr);
1252  Diagonal_->ExtractView(&d_ptr);
1253 
1254  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
1255 
1256  // only one data exchange per sweep
1257  if (IsParallel_)
1258  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
1259 
1260  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1261  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1262 
1263  int col;
1264  double diag = d_ptr[i];
1265 
1266  // no need to extract row i
1267  //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1268 
1269  for (int m = 0 ; m < NumVectors ; ++m) {
1270 
1271  double dtemp = 0.0;
1272 
1273  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
1274 
1275  col = Indices[k];
1276  dtemp += Values[k] * y2_ptr[m][col];
1277  }
1278 
1279  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1280  }
1281  }
1282 
1283  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
1284  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1285 
1286  int col;
1287  double diag = d_ptr[i];
1288 
1289  // no need to extract row i
1290  //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1291 
1292  for (int m = 0 ; m < NumVectors ; ++m) {
1293 
1294  double dtemp = 0.0;
1295  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
1296 
1297  col = Indices[k];
1298  dtemp += Values[k] * y2_ptr[m][col];
1299  }
1300 
1301  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1302 
1303  }
1304  }
1305 
1306  if (IsParallel_)
1307  for (int m = 0 ; m < NumVectors ; ++m)
1308  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1309  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1310  y_ptr[m][i] = y2_ptr[m][i];
1311  }
1312  }
1313 
1314 #ifdef IFPACK_FLOPCOUNTERS
1315  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
1316 #endif
1317  return(0);
1318 }
bool StorageOptimized() const
const Epetra_BlockMap & Map() const
const Epetra_BlockMap & RowMap() const
virtual const Epetra_Map & OperatorRangeMap() const
Returns the Epetra_Map object associated with the range of this operator.
int ExtractCrsDataPointers(int *&IndexOffset, int *&Indices, double *&Values_in) const
virtual double Condest() const
Returns the condition number estimate, or -1.0 if not computed.
int PutScalar(double ScalarConstant)
virtual int ApplyInverse(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const
Applies the preconditioner to X, returns the result in Y.
Ifpack_PointRelaxation(const Epetra_RowMatrix *Matrix)
Ifpack_PointRelaxation constructor with given Epetra_RowMatrix.
virtual int MinAll(double *PartialMins, double *GlobalMins, int Count) const=0
int ExtractMyRowView(int MyRow, int &NumEntries, double *&Values, int *&Indices) const
virtual int SetParameters(Teuchos::ParameterList &List)
Sets all the parameters for the preconditioner.
virtual int Apply(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const
Applies the matrix to an Epetra_MultiVector.
virtual int MaxNumEntries() const=0
virtual const Epetra_RowMatrix & Matrix() const
Returns a pointer to the matrix to be preconditioned.
virtual const Epetra_Comm & Comm() const
Returns a pointer to the Epetra_Comm communicator associated with this operator.
int NumVectors() const
virtual bool IsInitialized() const
Returns true if the preconditioner has been successfully initialized, false otherwise.
virtual int Compute()
Computes the preconditioners.
virtual int Initialize()
Computes all it is necessary to initialize the preconditioner.
int Multiply(char TransA, char TransB, double ScalarAB, const Epetra_MultiVector &A, const Epetra_MultiVector &B, double ScalarThis)
virtual std::ostream & Print(std::ostream &os) const
Prints object to an output stream.
virtual const Epetra_Map & OperatorDomainMap() const
Returns the Epetra_Map object associated with the domain of this operator.
int ExtractView(double **A, int *MyLDA) const
virtual bool IsComputed() const
Returns true if the preconditioner has been successfully computed.
std::string Ifpack_toString(const int &x)
Converts an integer to std::string.
double ** Pointers() const