MueLu  Version of the Day
MueLu_SaPFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_SAPFACTORY_KOKKOS_DEF_HPP
47 #define MUELU_SAPFACTORY_KOKKOS_DEF_HPP
48 
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
50 
51 #ifdef out
52 #include "KokkosKernels_Handle.hpp"
53 #include "KokkosSparse_spgemm.hpp"
54 #include "KokkosSparse_spmv.hpp"
55 #endif
57 
58 #include <Xpetra_Matrix.hpp>
59 #include <Xpetra_IteratorOps.hpp>
60 
62 #include "MueLu_Level.hpp"
63 #include "MueLu_MasterList.hpp"
64 #include "MueLu_Monitor.hpp"
65 #include "MueLu_PerfUtils.hpp"
67 #include "MueLu_TentativePFactory.hpp"
68 #include "MueLu_Utilities_kokkos.hpp"
69 
70 #include <sstream>
71 
72 namespace MueLu {
73 
74  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
75  RCP<const ParameterList> SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetValidParameterList() const {
76  RCP<ParameterList> validParamList = rcp(new ParameterList());
77 
78 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
79  SET_VALID_ENTRY("sa: damping factor");
80  SET_VALID_ENTRY("sa: calculate eigenvalue estimate");
81  SET_VALID_ENTRY("sa: eigenvalue estimate num iterations");
82  SET_VALID_ENTRY("sa: use rowsumabs diagonal scaling");
83  SET_VALID_ENTRY("sa: enforce constraints");
84  SET_VALID_ENTRY("tentative: calculate qr");
85  SET_VALID_ENTRY("sa: max eigenvalue");
86 #undef SET_VALID_ENTRY
87 
88  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process");
89  validParamList->set< RCP<const FactoryBase> >("P", Teuchos::null, "Tentative prolongator factory");
90 
91  // Make sure we don't recursively validate options for the matrixmatrix kernels
92  ParameterList norecurse;
93  norecurse.disableRecursiveValidation();
94  validParamList->set<ParameterList> ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
95 
96 
97  return validParamList;
98  }
99 
100  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
101  void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::DeclareInput(Level& fineLevel, Level& coarseLevel) const {
102  Input(fineLevel, "A");
103 
104  // Get default tentative prolongator factory
105  // Getting it that way ensure that the same factory instance will be used for both SaPFactory_kokkos and NullspaceFactory.
106  RCP<const FactoryBase> initialPFact = GetFactory("P");
107  if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }
108  coarseLevel.DeclareInput("P", initialPFact.get(), this);
109  }
110 
111  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
112  void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::Build(Level& fineLevel, Level& coarseLevel) const {
113  return BuildP(fineLevel, coarseLevel);
114  }
115 
116  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
117  void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::BuildP(Level& fineLevel, Level& coarseLevel) const {
118  FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);
119 
120  // Add debugging information
121  DeviceType::execution_space::print_configuration(GetOStream(Runtime1));
122 
123  typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude;
124 
125  // Get default tentative prolongator factory
126  // Getting it that way ensure that the same factory instance will be used for both SaPFactory_kokkos and NullspaceFactory.
127  // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
128  RCP<const FactoryBase> initialPFact = GetFactory("P");
129  if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }
130  const ParameterList& pL = GetParameterList();
131 
132  // Level Get
133  RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A");
134  RCP<Matrix> Ptent = coarseLevel.Get< RCP<Matrix> >("P", initialPFact.get());
135 
136  if(restrictionMode_) {
137  SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
138 
139  A = Utilities_kokkos::Transpose(*A, true); // build transpose of A explicitly
140  }
141 
142  //Build final prolongator
143  RCP<Matrix> finalP; // output
144 
145  // Reuse pattern if available
146  RCP<ParameterList> APparams;
147  if(pL.isSublist("matrixmatrix: kernel params"))
148  APparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
149  else
150  APparams= rcp(new ParameterList);
151  if (coarseLevel.IsAvailable("AP reuse data", this)) {
152  GetOStream(static_cast<MsgType>(Runtime0 | Test)) << "Reusing previous AP data" << std::endl;
153 
154  APparams = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", this);
155 
156  if (APparams->isParameter("graph"))
157  finalP = APparams->get< RCP<Matrix> >("graph");
158  }
159  // By default, we don't need global constants for SaP
160  APparams->set("compute global constants: temporaries",APparams->get("compute global constants: temporaries",false));
161  APparams->set("compute global constants",APparams->get("compute global constants",false));
162 
163  const SC dampingFactor = as<SC>(pL.get<double>("sa: damping factor"));
164  const LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations"));
165  const bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate");
166  const bool useAbsValueRowSum = pL.get<bool> ("sa: use rowsumabs diagonal scaling");
167  const bool doQRStep = pL.get<bool>("tentative: calculate qr");
168  const bool enforceConstraints= pL.get<bool>("sa: enforce constraints");
169  const SC userDefinedMaxEigen = as<SC>(pL.get<double>("sa: max eigenvalue"));
170 
171  // Sanity checking
172  TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && enforceConstraints,Exceptions::RuntimeError,
173  "MueLu::TentativePFactory::MakeTentative: cannot use 'enforce constraints' and 'calculate qr' at the same time");
174 
175  if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) {
176 
177  SC lambdaMax;
178  RCP<Vector> invDiag;
179  if (Teuchos::ScalarTraits<SC>::real(userDefinedMaxEigen) == Teuchos::ScalarTraits<SC>::real(-1.0))
180  {
181  SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
182  lambdaMax = A->GetMaxEigenvalueEstimate();
183  if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
184  GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = "<< maxEigenIterations <<
185  ( (useAbsValueRowSum) ? ", use rowSumAbs diagonal)" : ", use point diagonal)") << std::endl;
186  Magnitude stopTol = 1e-4;
187  invDiag = Utilities_kokkos::GetMatrixDiagonalInverse(*A, Teuchos::ScalarTraits<SC>::eps()*100, useAbsValueRowSum);
188  if (useAbsValueRowSum)
189  lambdaMax = Utilities_kokkos::PowerMethod(*A, invDiag, maxEigenIterations, stopTol);
190  else
191  lambdaMax = Utilities_kokkos::PowerMethod(*A, true, maxEigenIterations, stopTol);
192  A->SetMaxEigenvalueEstimate(lambdaMax);
193  } else {
194  GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
195  }
196  }
197  else {
198  lambdaMax = userDefinedMaxEigen;
199  A->SetMaxEigenvalueEstimate(lambdaMax);
200  }
201  GetOStream(Statistics0) << "Prolongator damping factor = " << dampingFactor/lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl;
202 
203  {
204  SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
205  {
206  SubFactoryMonitor m3(*this, "Diagonal Extraction", coarseLevel);
207  if (useAbsValueRowSum)
208  GetOStream(Runtime0) << "Using rowSumAbs diagonal" << std::endl;
209  if (invDiag == Teuchos::null)
210  invDiag = Utilities_kokkos::GetMatrixDiagonalInverse(*A, Teuchos::ScalarTraits<SC>::eps()*100, useAbsValueRowSum);
211  }
212  SC omega = dampingFactor / lambdaMax;
213  TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<SC>::magnitude(omega)), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
214 
215  {
216  SubFactoryMonitor m3(*this, "Xpetra::IteratorOps::Jacobi", coarseLevel);
217  // finalP = Ptent + (I - \omega D^{-1}A) Ptent
218  finalP = Xpetra::IteratorOps<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams);
219  if (enforceConstraints) SatisfyPConstraints( A, finalP);
220  }
221  }
222 
223  } else {
224  finalP = Ptent;
225  }
226 
227  // Level Set
228  if (!restrictionMode_) {
229  // prolongation factory is in prolongation mode
230  if(!finalP.is_null()) {std::ostringstream oss; oss << "P_" << coarseLevel.GetLevelID(); finalP->setObjectLabel(oss.str());}
231  Set(coarseLevel, "P", finalP);
232 
233  // NOTE: EXPERIMENTAL
234  if (Ptent->IsView("stridedMaps"))
235  finalP->CreateView("stridedMaps", Ptent);
236 
237  } else {
238  // prolongation factory is in restriction mode
239  RCP<Matrix> R = Utilities_kokkos::Transpose(*finalP, true);
240  Set(coarseLevel, "R", R);
241  if(!R.is_null()) {std::ostringstream oss; oss << "R_" << coarseLevel.GetLevelID(); R->setObjectLabel(oss.str());}
242 
243  // NOTE: EXPERIMENTAL
244  if (Ptent->IsView("stridedMaps"))
245  R->CreateView("stridedMaps", Ptent, true);
246  }
247 
248  if (IsPrint(Statistics2)) {
249  RCP<ParameterList> params = rcp(new ParameterList());
250  params->set("printLoadBalancingInfo", true);
251  params->set("printCommInfo", true);
252  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*finalP, (!restrictionMode_ ? "P" : "R"), params);
253  }
254 
255  } //Build()
256 
257  // Analyze the grid transfer produced by smoothed aggregation and make
258  // modifications if it does not look right. In particular, if there are
259  // negative entries or entries larger than 1, modify P's rows.
260  //
261  // Note: this kind of evaluation probably only makes sense if not doing QR
262  // when constructing tentative P.
263  //
264  // For entries that do not satisfy the two constraints (>= 0 or <=1) we set
265  // these entries to the constraint value and modify the rest of the row
266  // so that the row sum remains the same as before by adding an equal
267  // amount to each remaining entry. However, if the original row sum value
268  // violates the constraints, we set the row sum back to 1 (the row sum of
269  // tentative P). After doing the modification to a row, we need to check
270  // again the entire row to make sure that the modified row does not violate
271  // the constraints.
272 
273 template<typename local_matrix_type>
274 struct constraintKernel {
275 
276  using Scalar= typename local_matrix_type::non_const_value_type;
277  using SC=Scalar;
278  using LO=typename local_matrix_type::non_const_ordinal_type;
279  using Device= typename local_matrix_type::device_type;
280  const Scalar zero = Kokkos::ArithTraits<SC>::zero();
281  const Scalar one = Kokkos::ArithTraits<SC>::one();
282  LO nPDEs;
283  local_matrix_type localP;
284  Kokkos::View<SC**, Device> ConstraintViolationSum;
287 
288  constraintKernel(LO nPDEs_,local_matrix_type localP_) : nPDEs(nPDEs_), localP(localP_)
289  {
290  ConstraintViolationSum = Kokkos::View<SC**, Device>("ConstraintViolationSum", localP_.numRows(), nPDEs);
291  Rsum = Kokkos::View<SC**, Device>("Rsum", localP_.numRows(), nPDEs);
292  nPositive = Kokkos::View<size_t**, Device>("nPositive", localP_.numRows(), nPDEs);
293  }
294 
295  KOKKOS_INLINE_FUNCTION
296  void operator() (const size_t rowIdx) const {
297 
298  auto rowPtr = localP.graph.row_map;
299  auto values = localP.values;
300 
301  bool checkRow = true;
302 
303  if (rowPtr(rowIdx + 1) == rowPtr(rowIdx)) checkRow = false;
304 
305 
306  while (checkRow) {
307 
308  // check constraints and compute the row sum
309 
310  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
311  Rsum(rowIdx, entryIdx%nPDEs) += values(entryIdx);
312  if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) < Kokkos::ArithTraits<SC>::real(zero)) {
313 
314  ConstraintViolationSum(rowIdx, entryIdx%nPDEs) += values(entryIdx);
315  values(entryIdx) = zero;
316  }
317  else {
318  if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) != Kokkos::ArithTraits<SC>::real(zero))
319  nPositive(rowIdx, entryIdx%nPDEs) = nPositive(rowIdx, entryIdx%nPDEs) + 1;
320 
321  if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) > Kokkos::ArithTraits<SC>::real(1.00001 )) {
322  ConstraintViolationSum(rowIdx, entryIdx%nPDEs) += (values(entryIdx) - one);
323  values(entryIdx) = one;
324  }
325  }
326  }
327 
328  checkRow = false;
329 
330  // take into account any row sum that violates the contraints
331 
332  for (size_t k=0; k < (size_t) nPDEs; k++) {
333 
334  if (Kokkos::ArithTraits<SC>::real(Rsum(rowIdx, k)) < Kokkos::ArithTraits<SC>::magnitude(zero)) {
335  ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) - Rsum(rowIdx, k); // rstumin
336  }
337  else if (Kokkos::ArithTraits<SC>::real(Rsum(rowIdx, k)) > Kokkos::ArithTraits<SC>::magnitude(1.00001)) {
338  ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k)+ (one - Rsum(rowIdx, k)); // rstumin
339  }
340  }
341 
342  // check if row need modification
343  for (size_t k=0; k < (size_t) nPDEs; k++) {
344  if (Kokkos::ArithTraits<SC>::magnitude(ConstraintViolationSum(rowIdx, k)) != Kokkos::ArithTraits<SC>::magnitude(zero))
345  checkRow = true;
346  }
347  // modify row
348  if (checkRow) {
349  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
350  if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) > Kokkos::ArithTraits<SC>::real(zero)) {
351  values(entryIdx) = values(entryIdx) +
352  (ConstraintViolationSum(rowIdx, entryIdx%nPDEs)/ (Scalar (nPositive(rowIdx, entryIdx%nPDEs)) != zero ? Scalar (nPositive(rowIdx, entryIdx%nPDEs)) : one));
353  }
354  }
355  for (size_t k=0; k < (size_t) nPDEs; k++) ConstraintViolationSum(rowIdx, k) = zero;
356  }
357  for (size_t k=0; k < (size_t) nPDEs; k++) Rsum(rowIdx, k) = zero;
358  for (size_t k=0; k < (size_t) nPDEs; k++) nPositive(rowIdx, k) = 0;
359  } // while (checkRow) ...
360 
361  }
362 };
363 
364 
365  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
366  void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::SatisfyPConstraints(const RCP<Matrix> A, RCP<Matrix>& P) const {
367 
368  using Device = typename Matrix::local_matrix_type::device_type;
369  LO nPDEs = A->GetFixedBlockSize();
370 
371  using local_mat_type = typename Matrix::local_matrix_type;
372  constraintKernel<local_mat_type> myKernel(nPDEs,P->getLocalMatrixDevice() );
373  Kokkos::parallel_for("enforce constraint",Kokkos::RangePolicy<typename Device::execution_space>(0, P->getRowMap()->getNodeNumElements() ),
374  myKernel );
375 
376  } //SatsifyPConstraints()
377 
378 } //namespace MueLu
379 
380 #endif // HAVE_MUELU_KOKKOS_REFACTOR
381 #endif // MUELU_SAPFACTORY_KOKKOS_DEF_HPP
382 
383 //TODO: restrictionMode_ should use the parameter list.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
scalar_type dampingFactor
Print more statistics.
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Print even more statistics.
Print statistics that do not involve significant additional computation.
MueLu::DefaultScalar Scalar
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Description of what is happening (more verbose)
#define SET_VALID_ENTRY(name)