MueLu  Version of the Day
MueLu_CoalesceDropFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
47 #define MUELU_COALESCEDROPFACTORY_DEF_HPP
48 
49 #include <Xpetra_CrsGraphFactory.hpp>
50 #include <Xpetra_CrsGraph.hpp>
51 #include <Xpetra_ImportFactory.hpp>
52 #include <Xpetra_ExportFactory.hpp>
53 #include <Xpetra_MapFactory.hpp>
54 #include <Xpetra_Map.hpp>
55 #include <Xpetra_Matrix.hpp>
56 #include <Xpetra_MultiVectorFactory.hpp>
57 #include <Xpetra_MultiVector.hpp>
58 #include <Xpetra_StridedMap.hpp>
59 #include <Xpetra_VectorFactory.hpp>
60 #include <Xpetra_Vector.hpp>
61 
62 #include <Xpetra_IO.hpp>
63 
65 
66 #include "MueLu_AmalgamationFactory.hpp"
67 #include "MueLu_AmalgamationInfo.hpp"
68 #include "MueLu_Exceptions.hpp"
69 #include "MueLu_GraphBase.hpp"
70 #include "MueLu_Graph.hpp"
71 #include "MueLu_Level.hpp"
72 #include "MueLu_LWGraph.hpp"
73 #include "MueLu_MasterList.hpp"
74 #include "MueLu_Monitor.hpp"
75 #include "MueLu_PreDropFunctionBaseClass.hpp"
76 #include "MueLu_PreDropFunctionConstVal.hpp"
77 #include "MueLu_Utilities.hpp"
78 
79 #ifdef HAVE_XPETRA_TPETRA
80 #include "Tpetra_CrsGraphTransposer.hpp"
81 #endif
82 
83 #include <algorithm>
84 #include <cstdlib>
85 #include <string>
86 
87 // If defined, read environment variables.
88 // Should be removed once we are confident that this works.
89 //#define DJS_READ_ENV_VARIABLES
90 
91 
92 namespace MueLu {
93 
94  namespace Details {
95  template<class real_type, class LO>
96  struct DropTol {
97 
98  DropTol() = default;
99  DropTol(DropTol const&) = default;
100  DropTol(DropTol &&) = default;
101 
102  DropTol& operator=(DropTol const&) = default;
103  DropTol& operator=(DropTol &&) = default;
104 
105  DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
106  : val{val_}, diag{diag_}, col{col_}, drop{drop_}
107  {}
108 
109  real_type val {Teuchos::ScalarTraits<real_type>::zero()};
110  real_type diag {Teuchos::ScalarTraits<real_type>::zero()};
111  LO col {Teuchos::OrdinalTraits<LO>::invalid()};
112  bool drop {true};
113 
114  // CMS: Auxillary information for debugging info
115  // real_type aux_val {Teuchos::ScalarTraits<real_type>::nan()};
116  };
117  }
118 
119 
120  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
122  RCP<ParameterList> validParamList = rcp(new ParameterList());
123 
124 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
125  SET_VALID_ENTRY("aggregation: drop tol");
126  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
127  SET_VALID_ENTRY("aggregation: greedy Dirichlet");
128  SET_VALID_ENTRY("aggregation: row sum drop tol");
129  SET_VALID_ENTRY("aggregation: drop scheme");
130  SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
131  SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
132 
133  {
134  typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
135  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple<std::string>("classical", "distance laplacian","signed classical","block diagonal","block diagonal classical","block diagonal distance laplacian","block diagonal signed classical","block diagonal colored signed classical"), "aggregation: drop scheme")));
136 
137  }
138  SET_VALID_ENTRY("aggregation: distance laplacian algo");
139  SET_VALID_ENTRY("aggregation: classical algo");
140  SET_VALID_ENTRY("aggregation: coloring: localize color graph");
141 #undef SET_VALID_ENTRY
142  validParamList->set< bool > ("lightweight wrap", true, "Experimental option for lightweight graph access");
143 
144  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
145  validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
146  validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for Coordinates");
147  validParamList->set< RCP<const FactoryBase> >("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber");
148 
149  return validParamList;
150  }
151 
152  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
154 
155  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
157  Input(currentLevel, "A");
158  Input(currentLevel, "UnAmalgamationInfo");
159 
160  const ParameterList& pL = GetParameterList();
161  if (pL.get<bool>("lightweight wrap") == true) {
162  std::string algo = pL.get<std::string>("aggregation: drop scheme");
163  if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
164  Input(currentLevel, "Coordinates");
165  }
166  if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
167  Input(currentLevel, "BlockNumber");
168  }
169  }
170 
171  }
172 
173  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
175 
176  FactoryMonitor m(*this, "Build", currentLevel);
177 
178  typedef Teuchos::ScalarTraits<SC> STS;
179  typedef typename STS::magnitudeType real_type;
180  typedef Xpetra::MultiVector<real_type,LO,GO,NO> RealValuedMultiVector;
181  typedef Xpetra::MultiVectorFactory<real_type,LO,GO,NO> RealValuedMultiVectorFactory;
182 
183  if (predrop_ != Teuchos::null)
184  GetOStream(Parameters0) << predrop_->description();
185 
186  RCP<Matrix> realA = Get< RCP<Matrix> >(currentLevel, "A");
187  RCP<AmalgamationInfo> amalInfo = Get< RCP<AmalgamationInfo> >(currentLevel, "UnAmalgamationInfo");
188  const ParameterList & pL = GetParameterList();
189  bool doExperimentalWrap = pL.get<bool>("lightweight wrap");
190 
191  GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl;
192  std::string algo = pL.get<std::string>("aggregation: drop scheme");
193 
194  RCP<RealValuedMultiVector> Coords;
195  RCP<Matrix> A;
196 
197  bool use_block_algorithm=false;
198  LO interleaved_blocksize = as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
199  bool useSignedClassical = false;
200  bool generateColoringGraph = false;
201 
202  // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it
203  // in the block diagonalizaiton). So we'll clobber the rowSumTol with -1.0 in this case
204  typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
205  RCP<LocalOrdinalVector> ghostedBlockNumber;
206  ArrayRCP<const LO> g_block_id;
207 
208  if(algo == "distance laplacian" ) {
209  // Grab the coordinates for distance laplacian
210  Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
211  A = realA;
212  }
213  else if(algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
214  useSignedClassical = true;
215  // if(realA->GetFixedBlockSize() > 1) {
216  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector> >(currentLevel, "BlockNumber");
217  // Ghost the column block numbers if we need to
218  RCP<const Import> importer = realA->getCrsGraph()->getImporter();
219  if(!importer.is_null()) {
220  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
221  ghostedBlockNumber= Xpetra::VectorFactory<LO,LO,GO,NO>::Build(importer->getTargetMap());
222  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
223  }
224  else {
225  ghostedBlockNumber = BlockNumber;
226  }
227  g_block_id = ghostedBlockNumber->getData(0);
228  // }
229  if(algo == "block diagonal colored signed classical")
230  generateColoringGraph=true;
231  algo = "classical";
232  A = realA;
233 
234  }
235  else if(algo == "block diagonal") {
236  // Handle the "block diagonal" filtering and then leave
237  BlockDiagonalize(currentLevel,realA,false);
238  return;
239  }
240  else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") {
241  // Handle the "block diagonal" filtering, and then continue onward
242  use_block_algorithm = true;
243  RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel,realA,true);
244  if(algo == "block diagonal distance laplacian") {
245  // We now need to expand the coordinates by the interleaved blocksize
246  RCP<RealValuedMultiVector> OldCoords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
247  if (OldCoords->getLocalLength() != realA->getNodeNumRows()) {
248  LO dim = (LO) OldCoords->getNumVectors();
249  Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(),dim);
250  for(LO k=0; k<dim; k++){
251  ArrayRCP<const real_type> old_vec = OldCoords->getData(k);
252  ArrayRCP<real_type> new_vec = Coords->getDataNonConst(k);
253  for(LO i=0; i <(LO)OldCoords->getLocalLength(); i++) {
254  LO new_base = i*dim;
255  for(LO j=0; j<interleaved_blocksize; j++)
256  new_vec[new_base + j] = old_vec[i];
257  }
258  }
259  }
260  else {
261  Coords = OldCoords;
262  }
263  algo = "distance laplacian";
264  }
265  else if(algo == "block diagonal classical") {
266  algo = "classical";
267  }
268  // All cases
269  A = filteredMatrix;
270  rowSumTol = -1.0;
271  }
272  else {
273  A = realA;
274  }
275 
276  // Distance Laplacian weights
277  Array<double> dlap_weights = pL.get<Array<double> >("aggregation: distance laplacian directional weights");
278  enum {NO_WEIGHTS=0, SINGLE_WEIGHTS, BLOCK_WEIGHTS};
279  int use_dlap_weights = NO_WEIGHTS;
280  if(algo == "distance laplacian") {
281  LO dim = (LO) Coords->getNumVectors();
282  // If anything isn't 1.0 we need to turn on the weighting
283  bool non_unity = false;
284  for (LO i=0; !non_unity && i<(LO)dlap_weights.size(); i++) {
285  if(dlap_weights[i] != 1.0) {
286  non_unity = true;
287  }
288  }
289  if(non_unity) {
290  LO blocksize = use_block_algorithm ? as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize")) : 1;
291  if((LO)dlap_weights.size() == dim)
292  use_dlap_weights = SINGLE_WEIGHTS;
293  else if((LO)dlap_weights.size() == blocksize * dim)
294  use_dlap_weights = BLOCK_WEIGHTS;
295  else {
296  TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
297  "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
298  }
299  if (GetVerbLevel() & Statistics1)
300  GetOStream(Statistics1) << "Using distance laplacian weights: "<<dlap_weights<<std::endl;
301  }
302  }
303 
304  // decide wether to use the fast-track code path for standard maps or the somewhat slower
305  // code path for non-standard maps
306  /*bool bNonStandardMaps = false;
307  if (A->IsView("stridedMaps") == true) {
308  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
309  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
310  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
311  if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0)
312  bNonStandardMaps = true;
313  }*/
314 
315  if (doExperimentalWrap) {
316  TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm");
317  TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)");
318 
319  SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
320  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
321  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
322  real_type realThreshold = STS::magnitude(threshold);// CMS: Rename this to "magnitude threshold" sometime
323 
325  // Remove this bit once we are confident that cut-based dropping works.
326 #ifdef HAVE_MUELU_DEBUG
327  int distanceLaplacianCutVerbose = 0;
328 #endif
329 #ifdef DJS_READ_ENV_VARIABLES
330  if (getenv("MUELU_DROP_TOLERANCE_MODE")) {
331  distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE"));
332  }
333 
334  if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) {
335  auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD"));
336  realThreshold = 1e-4*tmp;
337  }
338 
339 # ifdef HAVE_MUELU_DEBUG
340  if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) {
341  distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE"));
342  }
343 # endif
344 #endif
345 
347  enum decisionAlgoType {defaultAlgo, unscaled_cut, scaled_cut, scaled_cut_symmetric};
348 
349  decisionAlgoType distanceLaplacianAlgo = defaultAlgo;
350  decisionAlgoType classicalAlgo = defaultAlgo;
351  if (algo == "distance laplacian") {
352  if (distanceLaplacianAlgoStr == "default")
353  distanceLaplacianAlgo = defaultAlgo;
354  else if (distanceLaplacianAlgoStr == "unscaled cut")
355  distanceLaplacianAlgo = unscaled_cut;
356  else if (distanceLaplacianAlgoStr == "scaled cut")
357  distanceLaplacianAlgo = scaled_cut;
358  else if (distanceLaplacianAlgoStr == "scaled cut symmetric")
359  distanceLaplacianAlgo = scaled_cut_symmetric;
360  else
361  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\"");
362  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
363  } else if (algo == "classical") {
364  if (classicalAlgoStr == "default")
365  classicalAlgo = defaultAlgo;
366  else if (classicalAlgoStr == "unscaled cut")
367  classicalAlgo = unscaled_cut;
368  else if (classicalAlgoStr == "scaled cut")
369  classicalAlgo = scaled_cut;
370  else
371  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\"");
372  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
373 
374  } else
375  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
376  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
377 
378  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
379 
380 
381  // NOTE: We don't support signed classical with cut drop at present
382  TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassical && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation");
383 
384 
385  GO numDropped = 0, numTotal = 0;
386  std::string graphType = "unamalgamated"; //for description purposes only
387  if (algo == "classical") {
388  if (predrop_ == null) {
389  // ap: this is a hack: had to declare predrop_ as mutable
390  predrop_ = rcp(new PreDropFunctionConstVal(threshold));
391  }
392 
393  if (predrop_ != null) {
394  RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(predrop_);
395  TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast,
396  "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
397  // If a user provided a predrop function, it overwrites the XML threshold parameter
398  SC newt = predropConstVal->GetThreshold();
399  if (newt != threshold) {
400  GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl;
401  threshold = newt;
402  }
403  }
404  // At this points we either have
405  // (predrop_ != null)
406  // Therefore, it is sufficient to check only threshold
407  if (A->GetFixedBlockSize() == 1 && threshold == STS::zero() && !useSignedClassical && A->hasCrsGraph()) {
408  // Case 1: scalar problem, no dropping => just use matrix graph
409  RCP<GraphBase> graph = rcp(new Graph(A->getCrsGraph(), "graph of A"));
410  // Detect and record rows that correspond to Dirichlet boundary conditions
411  ArrayRCP<bool > boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
412  if (rowSumTol > 0.)
413  Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
414 
415  graph->SetBoundaryNodeMap(boundaryNodes);
416  numTotal = A->getNodeNumEntries();
417 
418  if (GetVerbLevel() & Statistics1) {
419  GO numLocalBoundaryNodes = 0;
420  GO numGlobalBoundaryNodes = 0;
421  for (LO i = 0; i < boundaryNodes.size(); ++i)
422  if (boundaryNodes[i])
423  numLocalBoundaryNodes++;
424  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
425  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
426  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
427  }
428 
429  Set(currentLevel, "DofsPerNode", 1);
430  Set(currentLevel, "Graph", graph);
431 
432  } else if ( (A->GetFixedBlockSize() == 1 && threshold != STS::zero()) ||
433  (A->GetFixedBlockSize() == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
434  (A->GetFixedBlockSize() == 1 && useSignedClassical) ) {
435  // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original
436  // graph's map information, e.g., whether index is local
437  // OR a matrix without a CrsGraph
438 
439  // allocate space for the local graph
440  ArrayRCP<LO> rows (A->getNodeNumRows()+1);
441  ArrayRCP<LO> columns(A->getNodeNumEntries());
442 
443  using MT = typename STS::magnitudeType;
444  RCP<Vector> ghostedDiag;
445  ArrayRCP<const SC> ghostedDiagVals;
446  ArrayRCP<const MT> negMaxOffDiagonal;
447  if(useSignedClassical) {
448  if(ghostedBlockNumber.is_null()) {
450  if (GetVerbLevel() & Statistics1)
451  GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl;
452  }
453  else {
454  negMaxOffDiagonal = MueLu::Utilities<SC,LO,GO,NO>::GetMatrixMaxMinusOffDiagonal(*A,*ghostedBlockNumber);
455  if (GetVerbLevel() & Statistics1)
456  GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl;
457  }
458  }
459  else {
461  ghostedDiagVals = ghostedDiag->getData(0);
462  }
463  ArrayRCP<bool> boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
464  if (rowSumTol > 0.) {
465  if(ghostedBlockNumber.is_null()) {
466  if (GetVerbLevel() & Statistics1)
467  GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl;
468  Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
469  } else {
470  if (GetVerbLevel() & Statistics1)
471  GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl;
472  Utilities::ApplyRowSumCriterion(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes);
473  }
474  }
475 
476  LO realnnz = 0;
477  rows[0] = 0;
478  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getNodeNumElements()); ++row) {
479  size_t nnz = A->getNumEntriesInLocalRow(row);
480  bool rowIsDirichlet = boundaryNodes[row];
481  ArrayView<const LO> indices;
482  ArrayView<const SC> vals;
483  A->getLocalRowView(row, indices, vals);
484 
485  if(classicalAlgo == defaultAlgo) {
486  //FIXME the current predrop function uses the following
487  //FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid )
488  //FIXME but the threshold doesn't take into account the rows' diagonal entries
489  //FIXME For now, hardwiring the dropping in here
490 
491  LO rownnz = 0;
492  if(useSignedClassical) {
493  // Signed classical
494  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
495  LO col = indices[colID];
496  MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
497  MT neg_aij = - STS::real(vals[colID]);
498  /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID],
499  g_block_id.is_null() ? -1 : g_block_id[row],
500  g_block_id.is_null() ? -1 : g_block_id[col],
501  neg_aij, max_neg_aik);*/
502  if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
503  columns[realnnz++] = col;
504  rownnz++;
505  } else
506  numDropped++;
507  }
508  rows[row+1] = realnnz;
509  }
510  else {
511  // Standard abs classical
512  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
513  LO col = indices[colID];
514  MT aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
515  MT aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2
516 
517  if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
518  columns[realnnz++] = col;
519  rownnz++;
520  } else
521  numDropped++;
522  }
523  rows[row+1] = realnnz;
524  }
525  }
526  else {
527  /* Cut Algorithm */
528  //CMS
529  using DropTol = Details::DropTol<real_type,LO>;
530  std::vector<DropTol> drop_vec;
531  drop_vec.reserve(nnz);
532  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
533  const real_type one = Teuchos::ScalarTraits<real_type>::one();
534  LO rownnz = 0;
535  // NOTE: This probably needs to be fixed for rowsum
536 
537  // find magnitudes
538  for (LO colID = 0; colID < (LO)nnz; colID++) {
539  LO col = indices[colID];
540  if (row == col) {
541  drop_vec.emplace_back( zero, one, colID, false);
542  continue;
543  }
544 
545  // Don't aggregate boundaries
546  if(boundaryNodes[colID]) continue;
547  typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
548  typename STS::magnitudeType aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2
549  drop_vec.emplace_back(aij, aiiajj, colID, false);
550  }
551 
552  const size_t n = drop_vec.size();
553 
554  if (classicalAlgo == unscaled_cut) {
555  std::sort( drop_vec.begin(), drop_vec.end()
556  , [](DropTol const& a, DropTol const& b) {
557  return a.val > b.val;
558  });
559 
560  bool drop = false;
561  for (size_t i=1; i<n; ++i) {
562  if (!drop) {
563  auto const& x = drop_vec[i-1];
564  auto const& y = drop_vec[i];
565  auto a = x.val;
566  auto b = y.val;
567  if (a > realThreshold*b) {
568  drop = true;
569 #ifdef HAVE_MUELU_DEBUG
570  if (distanceLaplacianCutVerbose) {
571  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
572  }
573 #endif
574  }
575  }
576  drop_vec[i].drop = drop;
577  }
578  } else if (classicalAlgo == scaled_cut) {
579  std::sort( drop_vec.begin(), drop_vec.end()
580  , [](DropTol const& a, DropTol const& b) {
581  return a.val/a.diag > b.val/b.diag;
582  });
583  bool drop = false;
584  // printf("[%d] Scaled Cut: ",(int)row);
585  // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep");
586  for (size_t i=1; i<n; ++i) {
587  if (!drop) {
588  auto const& x = drop_vec[i-1];
589  auto const& y = drop_vec[i];
590  auto a = x.val/x.diag;
591  auto b = y.val/y.diag;
592  if (a > realThreshold*b) {
593  drop = true;
594 
595 #ifdef HAVE_MUELU_DEBUG
596  if (distanceLaplacianCutVerbose) {
597  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
598  }
599 #endif
600  }
601  // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep");
602 
603  }
604  drop_vec[i].drop = drop;
605  }
606  // printf("\n");
607  }
608  std::sort( drop_vec.begin(), drop_vec.end()
609  , [](DropTol const& a, DropTol const& b) {
610  return a.col < b.col;
611  }
612  );
613 
614  for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) {
615  LO col = indices[drop_vec[idxID].col];
616  // don't drop diagonal
617  if (row == col) {
618  columns[realnnz++] = col;
619  rownnz++;
620  continue;
621  }
622 
623  if (!drop_vec[idxID].drop) {
624  columns[realnnz++] = col;
625  rownnz++;
626  } else {
627  numDropped++;
628  }
629  }
630  // CMS
631  rows[row+1] = realnnz;
632 
633  }
634  }//end for row
635 
636  columns.resize(realnnz);
637  numTotal = A->getNodeNumEntries();
638  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A"));
639  graph->SetBoundaryNodeMap(boundaryNodes);
640  if (GetVerbLevel() & Statistics1) {
641  GO numLocalBoundaryNodes = 0;
642  GO numGlobalBoundaryNodes = 0;
643  for (LO i = 0; i < boundaryNodes.size(); ++i)
644  if (boundaryNodes[i])
645  numLocalBoundaryNodes++;
646  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
647  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
648  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
649  }
650  Set(currentLevel, "Graph", graph);
651  Set(currentLevel, "DofsPerNode", 1);
652 
653  // If we're doing signed classical, we might want to block-diagonalize *after* the dropping
654  if(generateColoringGraph) {
655  RCP<GraphBase> colorGraph;
656  RCP<const Import> importer = A->getCrsGraph()->getImporter();
657  BlockDiagonalizeGraph(graph,ghostedBlockNumber,colorGraph,importer);
658  Set(currentLevel, "Coloring Graph",colorGraph);
659  // #define CMS_DUMP
660 #ifdef CMS_DUMP
661  {
662  Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("m_regular_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(graph)->GetCrsGraph());
663  Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("m_color_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(colorGraph)->GetCrsGraph());
664  // int rank = graph->GetDomainMap()->getComm()->getRank();
665  // {
666  // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
667  // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
668  // colorGraph->print(*fancy,Debug);
669  // }
670  // {
671  // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
672  // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
673  // graph->print(*fancy,Debug);
674  // }
675 
676  }
677 #endif
678  }//end generateColoringGraph
679  } else if (A->GetFixedBlockSize() > 1 && threshold == STS::zero()) {
680  // Case 3: Multiple DOF/node problem without dropping
681  const RCP<const Map> rowMap = A->getRowMap();
682  const RCP<const Map> colMap = A->getColMap();
683 
684  graphType = "amalgamated";
685 
686  // build node row map (uniqueMap) and node column map (nonUniqueMap)
687  // the arrays rowTranslation and colTranslation contain the local node id
688  // given a local dof id. The data is calculated by the AmalgamationFactory and
689  // stored in the variable container "UnAmalgamationInfo"
690  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
691  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
692  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
693  Array<LO> colTranslation = *(amalInfo->getColTranslation());
694 
695  // get number of local nodes
696  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
697 
698  // Allocate space for the local graph
699  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
700  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getNodeNumEntries());
701 
702  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
703 
704  // Detect and record rows that correspond to Dirichlet boundary conditions
705  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
706  // TODO the array one bigger than the number of local rows, and the last entry can
707  // TODO hold the actual number of boundary nodes. Clever, huh?
708  ArrayRCP<bool > pointBoundaryNodes;
709  pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
710  if (rowSumTol > 0.)
711  Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
712 
713 
714  // extract striding information
715  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
716  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
717  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
718  if (A->IsView("stridedMaps") == true) {
719  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
720  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
721  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
722  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
723  blkId = strMap->getStridedBlockId();
724  if (blkId > -1)
725  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
726  }
727 
728  // loop over all local nodes
729  LO realnnz = 0;
730  rows[0] = 0;
731  Array<LO> indicesExtra;
732  for (LO row = 0; row < numRows; row++) {
733  ArrayView<const LO> indices;
734  indicesExtra.resize(0);
735 
736  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
737  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
738  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
739  // with local ids.
740  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
741  // node.
742  bool isBoundary = false;
743  if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
744  for (LO j = 0; j < blkPartSize; j++) {
745  if (pointBoundaryNodes[row*blkPartSize+j]) {
746  isBoundary = true;
747  break;
748  }
749  }
750  } else {
751  isBoundary = true;
752  for (LO j = 0; j < blkPartSize; j++) {
753  if (!pointBoundaryNodes[row*blkPartSize+j]) {
754  isBoundary = false;
755  break;
756  }
757  }
758  }
759 
760  // Merge rows of A
761  // The array indicesExtra contains local column node ids for the current local node "row"
762  if (!isBoundary)
763  MergeRows(*A, row, indicesExtra, colTranslation);
764  else
765  indicesExtra.push_back(row);
766  indices = indicesExtra;
767  numTotal += indices.size();
768 
769  // add the local column node ids to the full columns array which
770  // contains the local column node ids for all local node rows
771  LO nnz = indices.size(), rownnz = 0;
772  for (LO colID = 0; colID < nnz; colID++) {
773  LO col = indices[colID];
774  columns[realnnz++] = col;
775  rownnz++;
776  }
777 
778  if (rownnz == 1) {
779  // If the only element remaining after filtering is diagonal, mark node as boundary
780  // FIXME: this should really be replaced by the following
781  // if (indices.size() == 1 && indices[0] == row)
782  // boundaryNodes[row] = true;
783  // We do not do it this way now because there is no framework for distinguishing isolated
784  // and boundary nodes in the aggregation algorithms
785  amalgBoundaryNodes[row] = true;
786  }
787  rows[row+1] = realnnz;
788  } //for (LO row = 0; row < numRows; row++)
789  columns.resize(realnnz);
790 
791  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
792  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
793 
794  if (GetVerbLevel() & Statistics1) {
795  GO numLocalBoundaryNodes = 0;
796  GO numGlobalBoundaryNodes = 0;
797 
798  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
799  if (amalgBoundaryNodes[i])
800  numLocalBoundaryNodes++;
801 
802  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
803  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
804  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
805  << " agglomerated Dirichlet nodes" << std::endl;
806  }
807 
808  Set(currentLevel, "Graph", graph);
809  Set(currentLevel, "DofsPerNode", blkSize); // full block size
810 
811  } else if (A->GetFixedBlockSize() > 1 && threshold != STS::zero()) {
812  // Case 4: Multiple DOF/node problem with dropping
813  const RCP<const Map> rowMap = A->getRowMap();
814  const RCP<const Map> colMap = A->getColMap();
815  graphType = "amalgamated";
816 
817  // build node row map (uniqueMap) and node column map (nonUniqueMap)
818  // the arrays rowTranslation and colTranslation contain the local node id
819  // given a local dof id. The data is calculated by the AmalgamationFactory and
820  // stored in the variable container "UnAmalgamationInfo"
821  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
822  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
823  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
824  Array<LO> colTranslation = *(amalInfo->getColTranslation());
825 
826  // get number of local nodes
827  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
828 
829  // Allocate space for the local graph
830  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
831  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getNodeNumEntries());
832 
833  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
834 
835  // Detect and record rows that correspond to Dirichlet boundary conditions
836  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
837  // TODO the array one bigger than the number of local rows, and the last entry can
838  // TODO hold the actual number of boundary nodes. Clever, huh?
839  ArrayRCP<bool > pointBoundaryNodes;
840  pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
841  if (rowSumTol > 0.)
842  Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
843 
844 
845  // extract striding information
846  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
847  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
848  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
849  if (A->IsView("stridedMaps") == true) {
850  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
851  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
852  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
853  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
854  blkId = strMap->getStridedBlockId();
855  if (blkId > -1)
856  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
857  }
858 
859  // extract diagonal data for dropping strategy
861  const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
862 
863  // loop over all local nodes
864  LO realnnz = 0;
865  rows[0] = 0;
866  Array<LO> indicesExtra;
867  for (LO row = 0; row < numRows; row++) {
868  ArrayView<const LO> indices;
869  indicesExtra.resize(0);
870 
871  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
872  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
873  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
874  // with local ids.
875  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
876  // node.
877  bool isBoundary = false;
878  if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
879  for (LO j = 0; j < blkPartSize; j++) {
880  if (pointBoundaryNodes[row*blkPartSize+j]) {
881  isBoundary = true;
882  break;
883  }
884  }
885  } else {
886  isBoundary = true;
887  for (LO j = 0; j < blkPartSize; j++) {
888  if (!pointBoundaryNodes[row*blkPartSize+j]) {
889  isBoundary = false;
890  break;
891  }
892  }
893  }
894 
895  // Merge rows of A
896  // The array indicesExtra contains local column node ids for the current local node "row"
897  if (!isBoundary)
898  MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
899  else
900  indicesExtra.push_back(row);
901  indices = indicesExtra;
902  numTotal += indices.size();
903 
904  // add the local column node ids to the full columns array which
905  // contains the local column node ids for all local node rows
906  LO nnz = indices.size(), rownnz = 0;
907  for (LO colID = 0; colID < nnz; colID++) {
908  LO col = indices[colID];
909  columns[realnnz++] = col;
910  rownnz++;
911  }
912 
913  if (rownnz == 1) {
914  // If the only element remaining after filtering is diagonal, mark node as boundary
915  // FIXME: this should really be replaced by the following
916  // if (indices.size() == 1 && indices[0] == row)
917  // boundaryNodes[row] = true;
918  // We do not do it this way now because there is no framework for distinguishing isolated
919  // and boundary nodes in the aggregation algorithms
920  amalgBoundaryNodes[row] = true;
921  }
922  rows[row+1] = realnnz;
923  } //for (LO row = 0; row < numRows; row++)
924  columns.resize(realnnz);
925 
926  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
927  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
928 
929  if (GetVerbLevel() & Statistics1) {
930  GO numLocalBoundaryNodes = 0;
931  GO numGlobalBoundaryNodes = 0;
932 
933  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
934  if (amalgBoundaryNodes[i])
935  numLocalBoundaryNodes++;
936 
937  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
938  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
939  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
940  << " agglomerated Dirichlet nodes" << std::endl;
941  }
942 
943  Set(currentLevel, "Graph", graph);
944  Set(currentLevel, "DofsPerNode", blkSize); // full block size
945  }
946 
947  } else if (algo == "distance laplacian") {
948  LO blkSize = A->GetFixedBlockSize();
949  GO indexBase = A->getRowMap()->getIndexBase();
950  // [*0*] : FIXME
951  // ap: somehow, if I move this line to [*1*], Belos throws an error
952  // I'm not sure what's going on. Do we always have to Get data, if we did
953  // DeclareInput for it?
954  // RCP<RealValuedMultiVector> Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
955 
956  // Detect and record rows that correspond to Dirichlet boundary conditions
957  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
958  // TODO the array one bigger than the number of local rows, and the last entry can
959  // TODO hold the actual number of boundary nodes. Clever, huh?
960  ArrayRCP<bool > pointBoundaryNodes;
961  pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
962  if (rowSumTol > 0.)
963  Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
964 
965  if ( (blkSize == 1) && (threshold == STS::zero()) ) {
966  // Trivial case: scalar problem, no dropping. Can return original graph
967  RCP<GraphBase> graph = rcp(new Graph(A->getCrsGraph(), "graph of A"));
968  graph->SetBoundaryNodeMap(pointBoundaryNodes);
969  graphType="unamalgamated";
970  numTotal = A->getNodeNumEntries();
971 
972  if (GetVerbLevel() & Statistics1) {
973  GO numLocalBoundaryNodes = 0;
974  GO numGlobalBoundaryNodes = 0;
975  for (LO i = 0; i < pointBoundaryNodes.size(); ++i)
976  if (pointBoundaryNodes[i])
977  numLocalBoundaryNodes++;
978  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
979  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
980  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
981  }
982 
983  Set(currentLevel, "DofsPerNode", blkSize);
984  Set(currentLevel, "Graph", graph);
985 
986  } else {
987  // ap: We make quite a few assumptions here; general case may be a lot different,
988  // but much much harder to implement. We assume that:
989  // 1) all maps are standard maps, not strided maps
990  // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic
991  // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i
992  //
993  // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo,
994  // but as I totally don't understand that code, here is my solution
995 
996  // [*1*]: see [*0*]
997 
998  // Check that the number of local coordinates is consistent with the #rows in A
999  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getNodeNumElements()/blkSize != Coords->getLocalLength(), Exceptions::Incompatible,
1000  "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getNodeNumElements() << ") by modulo block size ("<< blkSize <<").");
1001 
1002  const RCP<const Map> colMap = A->getColMap();
1003  RCP<const Map> uniqueMap, nonUniqueMap;
1004  Array<LO> colTranslation;
1005  if (blkSize == 1) {
1006  uniqueMap = A->getRowMap();
1007  nonUniqueMap = A->getColMap();
1008  graphType="unamalgamated";
1009 
1010  } else {
1011  uniqueMap = Coords->getMap();
1012  TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible,
1013  "Different index bases for matrix and coordinates");
1014 
1015  AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation);
1016 
1017  graphType = "amalgamated";
1018  }
1019  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
1020 
1021  RCP<RealValuedMultiVector> ghostedCoords;
1022  RCP<Vector> ghostedLaplDiag;
1023  Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
1024  if (threshold != STS::zero()) {
1025  // Get ghost coordinates
1026  RCP<const Import> importer;
1027  {
1028  SubFactoryMonitor m1(*this, "Import construction", currentLevel);
1029  if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1030  GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl;
1031  importer = realA->getCrsGraph()->getImporter();
1032  } else {
1033  GetOStream(Warnings0) << "Constructing new importer instance" << std::endl;
1034  importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1035  }
1036  } //subtimer
1037  ghostedCoords = Xpetra::MultiVectorFactory<real_type,LO,GO,NO>::Build(nonUniqueMap, Coords->getNumVectors());
1038  {
1039  SubFactoryMonitor m1(*this, "Coordinate import", currentLevel);
1040  ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
1041  } //subtimer
1042 
1043  // Construct Distance Laplacian diagonal
1044  RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1045  ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1046  Array<LO> indicesExtra;
1047  Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1048  if (threshold != STS::zero()) {
1049  const size_t numVectors = ghostedCoords->getNumVectors();
1050  coordData.reserve(numVectors);
1051  for (size_t j = 0; j < numVectors; j++) {
1052  Teuchos::ArrayRCP<const real_type> tmpData=ghostedCoords->getData(j);
1053  coordData.push_back(tmpData);
1054  }
1055  }
1056  {
1057  SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel);
1058  for (LO row = 0; row < numRows; row++) {
1059  ArrayView<const LO> indices;
1060 
1061  if (blkSize == 1) {
1062  ArrayView<const SC> vals;
1063  A->getLocalRowView(row, indices, vals);
1064 
1065  } else {
1066  // Merge rows of A
1067  indicesExtra.resize(0);
1068  MergeRows(*A, row, indicesExtra, colTranslation);
1069  indices = indicesExtra;
1070  }
1071 
1072  LO nnz = indices.size();
1073  bool haveAddedToDiag = false;
1074  for (LO colID = 0; colID < nnz; colID++) {
1075  const LO col = indices[colID];
1076 
1077  if (row != col) {
1078  if(use_dlap_weights == SINGLE_WEIGHTS) {
1079  /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col,
1080  MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col),
1081  MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col));*/
1082  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1083  }
1084  else if(use_dlap_weights == BLOCK_WEIGHTS) {
1085  int block_id = row % interleaved_blocksize;
1086  int block_start = block_id * interleaved_blocksize;
1087  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1088  }
1089  else {
1090  // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col));
1091  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1092  }
1093  haveAddedToDiag = true;
1094  }
1095  }
1096  // Deal with the situation where boundary conditions have only been enforced on rows, but not on columns.
1097  // We enforce dropping of these entries by assigning a very large number to the diagonal entries corresponding to BCs.
1098  if (!haveAddedToDiag)
1099  localLaplDiagData[row] = STS::rmax();
1100  }
1101  } //subtimer
1102  {
1103  SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel);
1104  ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1105  ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
1106  ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1107  } //subtimer
1108 
1109  } else {
1110  GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl;
1111  }
1112 
1113  // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian
1114 
1115  // allocate space for the local graph
1116  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
1117  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getNodeNumEntries());
1118 
1119 #ifdef HAVE_MUELU_DEBUG
1120  // DEBUGGING
1121  for(LO i=0; i<(LO)columns.size(); i++) columns[i]=-666;
1122 #endif
1123 
1124  // Extra array for if we're allowing symmetrization with cutting
1125  ArrayRCP<LO> rows_stop;
1126  bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric;
1127  if(use_stop_array)
1128  rows_stop.resize(numRows);
1129 
1130  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
1131 
1132  LO realnnz = 0;
1133  rows[0] = 0;
1134 
1135  Array<LO> indicesExtra;
1136  {
1137  SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel);
1138  Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1139  if (threshold != STS::zero()) {
1140  const size_t numVectors = ghostedCoords->getNumVectors();
1141  coordData.reserve(numVectors);
1142  for (size_t j = 0; j < numVectors; j++) {
1143  Teuchos::ArrayRCP<const real_type> tmpData=ghostedCoords->getData(j);
1144  coordData.push_back(tmpData);
1145  }
1146  }
1147 
1148  ArrayView<const SC> vals;//CMS hackery
1149  for (LO row = 0; row < numRows; row++) {
1150  ArrayView<const LO> indices;
1151  indicesExtra.resize(0);
1152  bool isBoundary = false;
1153 
1154  if (blkSize == 1) {
1155  // ArrayView<const SC> vals;//CMS uncomment
1156  A->getLocalRowView(row, indices, vals);
1157  isBoundary = pointBoundaryNodes[row];
1158  } else {
1159  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
1160  for (LO j = 0; j < blkSize; j++) {
1161  if (!pointBoundaryNodes[row*blkSize+j]) {
1162  isBoundary = false;
1163  break;
1164  }
1165  }
1166 
1167  // Merge rows of A
1168  if (!isBoundary)
1169  MergeRows(*A, row, indicesExtra, colTranslation);
1170  else
1171  indicesExtra.push_back(row);
1172  indices = indicesExtra;
1173  }
1174  numTotal += indices.size();
1175 
1176  LO nnz = indices.size(), rownnz = 0;
1177 
1178  if(use_stop_array) {
1179  rows[row+1] = rows[row]+nnz;
1180  realnnz = rows[row];
1181  }
1182 
1183  if (threshold != STS::zero()) {
1184  // default
1185  if (distanceLaplacianAlgo == defaultAlgo) {
1186  /* Standard Distance Laplacian */
1187  for (LO colID = 0; colID < nnz; colID++) {
1188 
1189  LO col = indices[colID];
1190 
1191  if (row == col) {
1192  columns[realnnz++] = col;
1193  rownnz++;
1194  continue;
1195  }
1196 
1197  // We do not want the distance Laplacian aggregating boundary nodes
1198  if(isBoundary) continue;
1199 
1200  SC laplVal;
1201  if(use_dlap_weights == SINGLE_WEIGHTS) {
1202  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1203  }
1204  else if(use_dlap_weights == BLOCK_WEIGHTS) {
1205  int block_id = row % interleaved_blocksize;
1206  int block_start = block_id * interleaved_blocksize;
1207  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1208  }
1209  else {
1210  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1211  }
1212  real_type aiiajj = STS::magnitude(realThreshold*realThreshold * ghostedLaplDiagData[row]*ghostedLaplDiagData[col]);
1213  real_type aij = STS::magnitude(laplVal*laplVal);
1214 
1215  if (aij > aiiajj) {
1216  columns[realnnz++] = col;
1217  rownnz++;
1218  } else {
1219  numDropped++;
1220  }
1221  }
1222  } else {
1223  /* Cut Algorithm */
1224  using DropTol = Details::DropTol<real_type,LO>;
1225  std::vector<DropTol> drop_vec;
1226  drop_vec.reserve(nnz);
1227  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
1228  const real_type one = Teuchos::ScalarTraits<real_type>::one();
1229 
1230  // find magnitudes
1231  for (LO colID = 0; colID < nnz; colID++) {
1232 
1233  LO col = indices[colID];
1234 
1235  if (row == col) {
1236  drop_vec.emplace_back( zero, one, colID, false);
1237  continue;
1238  }
1239  // We do not want the distance Laplacian aggregating boundary nodes
1240  if(isBoundary) continue;
1241 
1242  SC laplVal;
1243  if(use_dlap_weights == SINGLE_WEIGHTS) {
1244  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1245  }
1246  else if(use_dlap_weights == BLOCK_WEIGHTS) {
1247  int block_id = row % interleaved_blocksize;
1248  int block_start = block_id * interleaved_blocksize;
1249  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1250  }
1251  else {
1252  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1253  }
1254 
1255  real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row]*ghostedLaplDiagData[col]);
1256  real_type aij = STS::magnitude(laplVal*laplVal);
1257 
1258  drop_vec.emplace_back(aij, aiiajj, colID, false);
1259  }
1260 
1261  const size_t n = drop_vec.size();
1262 
1263  if (distanceLaplacianAlgo == unscaled_cut) {
1264 
1265  std::sort( drop_vec.begin(), drop_vec.end()
1266  , [](DropTol const& a, DropTol const& b) {
1267  return a.val > b.val;
1268  }
1269  );
1270 
1271  bool drop = false;
1272  for (size_t i=1; i<n; ++i) {
1273  if (!drop) {
1274  auto const& x = drop_vec[i-1];
1275  auto const& y = drop_vec[i];
1276  auto a = x.val;
1277  auto b = y.val;
1278  if (a > realThreshold*b) {
1279  drop = true;
1280 #ifdef HAVE_MUELU_DEBUG
1281  if (distanceLaplacianCutVerbose) {
1282  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
1283  }
1284 #endif
1285  }
1286  }
1287  drop_vec[i].drop = drop;
1288  }
1289  }
1290  else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) {
1291 
1292  std::sort( drop_vec.begin(), drop_vec.end()
1293  , [](DropTol const& a, DropTol const& b) {
1294  return a.val/a.diag > b.val/b.diag;
1295  }
1296  );
1297 
1298  bool drop = false;
1299  for (size_t i=1; i<n; ++i) {
1300  if (!drop) {
1301  auto const& x = drop_vec[i-1];
1302  auto const& y = drop_vec[i];
1303  auto a = x.val/x.diag;
1304  auto b = y.val/y.diag;
1305  if (a > realThreshold*b) {
1306  drop = true;
1307 #ifdef HAVE_MUELU_DEBUG
1308  if (distanceLaplacianCutVerbose) {
1309  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
1310  }
1311 #endif
1312  }
1313  }
1314  drop_vec[i].drop = drop;
1315  }
1316  }
1317 
1318  std::sort( drop_vec.begin(), drop_vec.end()
1319  , [](DropTol const& a, DropTol const& b) {
1320  return a.col < b.col;
1321  }
1322  );
1323 
1324  for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) {
1325  LO col = indices[drop_vec[idxID].col];
1326 
1327 
1328  // don't drop diagonal
1329  if (row == col) {
1330  columns[realnnz++] = col;
1331  rownnz++;
1332  // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val);
1333  continue;
1334  }
1335 
1336  if (!drop_vec[idxID].drop) {
1337  columns[realnnz++] = col;
1338  // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1339  rownnz++;
1340  } else {
1341  // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1342  numDropped++;
1343  }
1344  }
1345  }
1346  } else {
1347  // Skip laplace calculation and threshold comparison for zero threshold
1348  for (LO colID = 0; colID < nnz; colID++) {
1349  LO col = indices[colID];
1350  columns[realnnz++] = col;
1351  rownnz++;
1352  }
1353  }
1354 
1355  if ( rownnz == 1) {
1356  // If the only element remaining after filtering is diagonal, mark node as boundary
1357  // FIXME: this should really be replaced by the following
1358  // if (indices.size() == 1 && indices[0] == row)
1359  // boundaryNodes[row] = true;
1360  // We do not do it this way now because there is no framework for distinguishing isolated
1361  // and boundary nodes in the aggregation algorithms
1362  amalgBoundaryNodes[row] = true;
1363  }
1364 
1365  if(use_stop_array)
1366  rows_stop[row] = rownnz + rows[row];
1367  else
1368  rows[row+1] = realnnz;
1369  } //for (LO row = 0; row < numRows; row++)
1370 
1371  } //subtimer
1372 
1373  if (use_stop_array) {
1374  // Do symmetrization of the cut matrix
1375  // NOTE: We assume nested row/column maps here
1376  for (LO row = 0; row < numRows; row++) {
1377  for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1378  LO col = columns[colidx];
1379  if(col >= numRows) continue;
1380 
1381  bool found = false;
1382  for(LO t_col = rows[col] ; !found && t_col < rows_stop[col]; t_col++) {
1383  if (columns[t_col] == row)
1384  found = true;
1385  }
1386  // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing
1387  // into a Dirichlet unknown. In that case don't.
1388  if(!found && !pointBoundaryNodes[col] && rows_stop[col] < rows[col+1]) {
1389  LO new_idx = rows_stop[col];
1390  // printf("(%d,%d) SYMADD entry\n",col,row);
1391  columns[new_idx] = row;
1392  rows_stop[col]++;
1393  numDropped--;
1394  }
1395  }
1396  }
1397 
1398  // Condense everything down
1399  LO current_start=0;
1400  for (LO row = 0; row < numRows; row++) {
1401  LO old_start = current_start;
1402  for (LO col = rows[row]; col < rows_stop[row]; col++) {
1403  if(current_start != col) {
1404  columns[current_start] = columns[col];
1405  }
1406  current_start++;
1407  }
1408  rows[row] = old_start;
1409  }
1410  rows[numRows] = realnnz = current_start;
1411 
1412  }
1413 
1414  columns.resize(realnnz);
1415 
1416  RCP<GraphBase> graph;
1417  {
1418  SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel);
1419  graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1420  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1421  } //subtimer
1422 
1423  if (GetVerbLevel() & Statistics1) {
1424  GO numLocalBoundaryNodes = 0;
1425  GO numGlobalBoundaryNodes = 0;
1426 
1427  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1428  if (amalgBoundaryNodes[i])
1429  numLocalBoundaryNodes++;
1430 
1431  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1432  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1433  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes"
1434  << " using threshold " << dirichletThreshold << std::endl;
1435  }
1436 
1437  Set(currentLevel, "Graph", graph);
1438  Set(currentLevel, "DofsPerNode", blkSize);
1439  }
1440  }
1441 
1442  if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) {
1443  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1444  GO numGlobalTotal, numGlobalDropped;
1445  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1446  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1447  GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1448  if (numGlobalTotal != 0)
1449  GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1450  GetOStream(Statistics1) << std::endl;
1451  }
1452 
1453  } else {
1454  //what Tobias has implemented
1455 
1456  SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
1457  //GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1458  GetOStream(Runtime0) << "algorithm = \"" << "failsafe" << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1459  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
1460 
1461  RCP<const Map> rowMap = A->getRowMap();
1462  RCP<const Map> colMap = A->getColMap();
1463 
1464  LO blockdim = 1; // block dim for fixed size blocks
1465  GO indexBase = rowMap->getIndexBase(); // index base of maps
1466  GO offset = 0;
1467 
1468  // 1) check for blocking/striding information
1469  if(A->IsView("stridedMaps") &&
1470  Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
1471  Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
1472  RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1473  TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null,Exceptions::BadCast,"MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1474  blockdim = strMap->getFixedBlockSize();
1475  offset = strMap->getOffset();
1476  oldView = A->SwitchToView(oldView);
1477  GetOStream(Statistics1) << "CoalesceDropFactory::Build():" << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl;
1478  } else GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1479 
1480  // 2) get row map for amalgamated matrix (graph of A)
1481  // with same distribution over all procs as row map of A
1482  RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1483  GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getNodeNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl;
1484 
1485  // 3) create graph of amalgamated matrix
1486  RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getNodeMaxNumRowEntries()*blockdim);
1487 
1488  LO numRows = A->getRowMap()->getNodeNumElements();
1489  LO numNodes = nodeMap->getNodeNumElements();
1490  const ArrayRCP<bool> amalgBoundaryNodes(numNodes, false);
1491  const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node
1492  bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid
1493 
1494  // 4) do amalgamation. generate graph of amalgamated matrix
1495  // Note, this code is much more inefficient than the leightwight implementation
1496  // Most of the work has already been done in the AmalgamationFactory
1497  for(LO row=0; row<numRows; row++) {
1498  // get global DOF id
1499  GO grid = rowMap->getGlobalElement(row);
1500 
1501  // reinitialize boolean helper variable
1502  bIsDiagonalEntry = false;
1503 
1504  // translate grid to nodeid
1505  GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase);
1506 
1507  size_t nnz = A->getNumEntriesInLocalRow(row);
1508  Teuchos::ArrayView<const LO> indices;
1509  Teuchos::ArrayView<const SC> vals;
1510  A->getLocalRowView(row, indices, vals);
1511 
1512  RCP<std::vector<GO> > cnodeIds = Teuchos::rcp(new std::vector<GO>); // global column block ids
1513  LO realnnz = 0;
1514  for(LO col=0; col<Teuchos::as<LO>(nnz); col++) {
1515  GO gcid = colMap->getGlobalElement(indices[col]); // global column id
1516 
1517  if(vals[col]!=STS::zero()) {
1518  GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase);
1519  cnodeIds->push_back(cnodeId);
1520  realnnz++; // increment number of nnz in matrix row
1521  if (grid == gcid) bIsDiagonalEntry = true;
1522  }
1523  }
1524 
1525  if(realnnz == 1 && bIsDiagonalEntry == true) {
1526  LO lNodeId = nodeMap->getLocalElement(nodeId);
1527  numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId
1528  if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes
1529  amalgBoundaryNodes[lNodeId] = true;
1530  }
1531 
1532  Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp( cnodeIds );
1533 
1534  if(arr_cnodeIds.size() > 0 )
1535  crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1536  }
1537  // fill matrix graph
1538  crsGraph->fillComplete(nodeMap,nodeMap);
1539 
1540  // 5) create MueLu Graph object
1541  RCP<GraphBase> graph = rcp(new Graph(crsGraph, "amalgamated graph of A"));
1542 
1543  // Detect and record rows that correspond to Dirichlet boundary conditions
1544  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1545 
1546  if (GetVerbLevel() & Statistics1) {
1547  GO numLocalBoundaryNodes = 0;
1548  GO numGlobalBoundaryNodes = 0;
1549  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1550  if (amalgBoundaryNodes[i])
1551  numLocalBoundaryNodes++;
1552  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1553  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1554  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1555  }
1556 
1557  // 6) store results in Level
1558  //graph->SetBoundaryNodeMap(gBoundaryNodeMap);
1559  Set(currentLevel, "DofsPerNode", blockdim);
1560  Set(currentLevel, "Graph", graph);
1561 
1562  } //if (doExperimentalWrap) ... else ...
1563 
1564 
1565  } //Build
1566 
1567  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1568  void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRows(const Matrix& A, const LO row, Array<LO>& cols, const Array<LO>& translation) const {
1569  typedef typename ArrayView<const LO>::size_type size_type;
1570 
1571  // extract striding information
1572  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1573  if (A.IsView("stridedMaps") == true) {
1574  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1575  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1576  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1577  if (strMap->getStridedBlockId() > -1)
1578  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1579  }
1580 
1581  // count nonzero entries in all dof rows associated with node row
1582  size_t nnz = 0, pos = 0;
1583  for (LO j = 0; j < blkSize; j++)
1584  nnz += A.getNumEntriesInLocalRow(row*blkSize+j);
1585 
1586  if (nnz == 0) {
1587  cols.resize(0);
1588  return;
1589  }
1590 
1591  cols.resize(nnz);
1592 
1593  // loop over all local dof rows associated with local node "row"
1594  ArrayView<const LO> inds;
1595  ArrayView<const SC> vals;
1596  for (LO j = 0; j < blkSize; j++) {
1597  A.getLocalRowView(row*blkSize+j, inds, vals);
1598  size_type numIndices = inds.size();
1599 
1600  if (numIndices == 0) // skip empty dof rows
1601  continue;
1602 
1603  // cols: stores all local node ids for current local node id "row"
1604  cols[pos++] = translation[inds[0]];
1605  for (size_type k = 1; k < numIndices; k++) {
1606  LO nodeID = translation[inds[k]];
1607  // Here we try to speed up the process by reducing the size of an array
1608  // to sort. This works if the column nonzeros belonging to the same
1609  // node are stored consequently.
1610  if (nodeID != cols[pos-1])
1611  cols[pos++] = nodeID;
1612  }
1613  }
1614  cols.resize(pos);
1615  nnz = pos;
1616 
1617  // Sort and remove duplicates
1618  std::sort(cols.begin(), cols.end());
1619  pos = 0;
1620  for (size_t j = 1; j < nnz; j++)
1621  if (cols[j] != cols[pos])
1622  cols[++pos] = cols[j];
1623  cols.resize(pos+1);
1624  }
1625 
1626  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1627  void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP<const SC>& ghostedDiagVals, SC threshold, Array<LO>& cols, const Array<LO>& translation) const {
1628  typedef typename ArrayView<const LO>::size_type size_type;
1629  typedef Teuchos::ScalarTraits<SC> STS;
1630 
1631  // extract striding information
1632  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1633  if (A.IsView("stridedMaps") == true) {
1634  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1635  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1636  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1637  if (strMap->getStridedBlockId() > -1)
1638  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1639  }
1640 
1641  // count nonzero entries in all dof rows associated with node row
1642  size_t nnz = 0, pos = 0;
1643  for (LO j = 0; j < blkSize; j++)
1644  nnz += A.getNumEntriesInLocalRow(row*blkSize+j);
1645 
1646  if (nnz == 0) {
1647  cols.resize(0);
1648  return;
1649  }
1650 
1651  cols.resize(nnz);
1652 
1653  // loop over all local dof rows associated with local node "row"
1654  ArrayView<const LO> inds;
1655  ArrayView<const SC> vals;
1656  for (LO j = 0; j < blkSize; j++) {
1657  A.getLocalRowView(row*blkSize+j, inds, vals);
1658  size_type numIndices = inds.size();
1659 
1660  if (numIndices == 0) // skip empty dof rows
1661  continue;
1662 
1663  // cols: stores all local node ids for current local node id "row"
1664  LO prevNodeID = -1;
1665  for (size_type k = 0; k < numIndices; k++) {
1666  LO dofID = inds[k];
1667  LO nodeID = translation[inds[k]];
1668 
1669  // we avoid a square root by using squared values
1670  typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold*ghostedDiagVals[dofID]*ghostedDiagVals[row*blkSize+j]); // eps^2 * |a_ii| * |a_jj|
1671  typename STS::magnitudeType aij = STS::magnitude(vals[k]*vals[k]);
1672 
1673  // check dropping criterion
1674  if (aij > aiiajj || (row*blkSize+j == dofID)) {
1675  // accept entry in graph
1676 
1677  // Here we try to speed up the process by reducing the size of an array
1678  // to sort. This works if the column nonzeros belonging to the same
1679  // node are stored consequently.
1680  if (nodeID != prevNodeID) {
1681  cols[pos++] = nodeID;
1682  prevNodeID = nodeID;
1683  }
1684  }
1685  }
1686  }
1687  cols.resize(pos);
1688  nnz = pos;
1689 
1690  // Sort and remove duplicates
1691  std::sort(cols.begin(), cols.end());
1692  pos = 0;
1693  for (size_t j = 1; j < nnz; j++)
1694  if (cols[j] != cols[pos])
1695  cols[++pos] = cols[j];
1696  cols.resize(pos+1);
1697 
1698  return;
1699  }
1700 
1701 
1702 
1703  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1704  Teuchos::RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalize(Level & currentLevel,const RCP<Matrix>& A,bool generate_matrix) const {
1705  typedef Teuchos::ScalarTraits<SC> STS;
1706 
1707  const ParameterList & pL = GetParameterList();
1708  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
1709  const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
1710 
1711  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector> >(currentLevel, "BlockNumber");
1712  RCP<LocalOrdinalVector> ghostedBlockNumber;
1713  GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)"<<std::endl;
1714 
1715  // Ghost the column block numbers if we need to
1716  RCP<const Import> importer = A->getCrsGraph()->getImporter();
1717  if(!importer.is_null()) {
1718  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
1719  ghostedBlockNumber= Xpetra::VectorFactory<LO,LO,GO,NO>::Build(importer->getTargetMap());
1720  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
1721  }
1722  else {
1723  ghostedBlockNumber = BlockNumber;
1724  }
1725 
1726  // Accessors for block numbers
1727  Teuchos::ArrayRCP<const LO> row_block_number = BlockNumber->getData(0);
1728  Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1729 
1730  // allocate space for the local graph
1731  ArrayRCP<size_t> rows_mat;
1732  ArrayRCP<LO> rows_graph,columns;
1733  ArrayRCP<SC> values;
1734  RCP<CrsMatrixWrap> crs_matrix_wrap;
1735 
1736  if(generate_matrix) {
1737  crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1738  crs_matrix_wrap->getCrsMatrix()->allocateAllValues(A->getNodeNumEntries(), rows_mat, columns, values);
1739  }
1740  else {
1741  rows_graph.resize(A->getNodeNumRows()+1);
1742  columns.resize(A->getNodeNumEntries());
1743  values.resize(A->getNodeNumEntries());
1744  }
1745 
1746  LO realnnz = 0;
1747  GO numDropped = 0, numTotal = 0;
1748  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getNodeNumElements()); ++row) {
1749  LO row_block = row_block_number[row];
1750  size_t nnz = A->getNumEntriesInLocalRow(row);
1751  ArrayView<const LO> indices;
1752  ArrayView<const SC> vals;
1753  A->getLocalRowView(row, indices, vals);
1754 
1755  LO rownnz = 0;
1756  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1757  LO col = indices[colID];
1758  LO col_block = col_block_number[col];
1759 
1760  if(row_block == col_block) {
1761  if(generate_matrix) values[realnnz] = vals[colID];
1762  columns[realnnz++] = col;
1763  rownnz++;
1764  } else
1765  numDropped++;
1766  }
1767  if(generate_matrix) rows_mat[row+1] = realnnz;
1768  else rows_graph[row+1] = realnnz;
1769  }
1770 
1771  ArrayRCP<bool> boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
1772  if (rowSumTol > 0.)
1773  Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
1774 
1775 
1776  if(!generate_matrix) {
1777  // We can't resize an Arrayrcp and pass the checks for setAllValues
1778  values.resize(realnnz);
1779  columns.resize(realnnz);
1780  }
1781  numTotal = A->getNodeNumEntries();
1782 
1783  if (GetVerbLevel() & Statistics1) {
1784  GO numLocalBoundaryNodes = 0;
1785  GO numGlobalBoundaryNodes = 0;
1786  for (LO i = 0; i < boundaryNodes.size(); ++i)
1787  if (boundaryNodes[i])
1788  numLocalBoundaryNodes++;
1789  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1790  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1791  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1792 
1793  GO numGlobalTotal, numGlobalDropped;
1794  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1795  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1796  GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1797  if (numGlobalTotal != 0)
1798  GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1799  GetOStream(Statistics1) << std::endl;
1800  }
1801 
1802  Set(currentLevel, "Filtering", true);
1803 
1804  if(generate_matrix) {
1805  // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import
1806  // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects
1807  // here, which is legit, because we never use them anyway.
1808  crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat,columns,values);
1809  crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1810  }
1811  else {
1812  RCP<GraphBase> graph = rcp(new LWGraph(rows_graph, columns, A->getRowMap(), A->getColMap(), "block-diagonalized graph of A"));
1813  graph->SetBoundaryNodeMap(boundaryNodes);
1814  Set(currentLevel, "Graph", graph);
1815  }
1816 
1817 
1818  Set(currentLevel, "DofsPerNode", 1);
1819  return crs_matrix_wrap;
1820  }
1821 
1822 
1823  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1824  void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalizeGraph(const RCP<GraphBase> & inputGraph, const RCP<LocalOrdinalVector> & ghostedBlockNumber, RCP<GraphBase> & outputGraph, RCP<const Import> & importer) const {
1825  typedef Teuchos::ScalarTraits<SC> STS;
1826 
1827  TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null.");
1828  const ParameterList & pL = GetParameterList();
1829 
1830  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
1831 
1832  GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)";
1833  if (localizeColoringGraph)
1834  GetOStream(Statistics1) << ", with localization" <<std::endl;
1835  else
1836  GetOStream(Statistics1) << ", without localization" <<std::endl;
1837 
1838  // Accessors for block numbers
1839  Teuchos::ArrayRCP<const LO> row_block_number = ghostedBlockNumber->getData(0);
1840  Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1841 
1842  // allocate space for the local graph
1843  ArrayRCP<size_t> rows_mat;
1844  ArrayRCP<LO> rows_graph,columns;
1845 
1846  rows_graph.resize(inputGraph->GetNodeNumVertices()+1);
1847  columns.resize(inputGraph->GetNodeNumEdges());
1848 
1849  LO realnnz = 0;
1850  GO numDropped = 0, numTotal = 0;
1851  const LO numRows = Teuchos::as<LO>(inputGraph->GetDomainMap()->getNodeNumElements());
1852  if (localizeColoringGraph) {
1853 
1854  for (LO row = 0; row < numRows; ++row) {
1855  LO row_block = row_block_number[row];
1856  ArrayView<const LO> indices = inputGraph->getNeighborVertices(row);
1857 
1858  LO rownnz = 0;
1859  for (LO colID = 0; colID < Teuchos::as<LO>(indices.size()); colID++) {
1860  LO col = indices[colID];
1861  LO col_block = col_block_number[col];
1862 
1863  if((row_block == col_block) && (col < numRows)) {
1864  columns[realnnz++] = col;
1865  rownnz++;
1866  } else
1867  numDropped++;
1868  }
1869  rows_graph[row+1] = realnnz;
1870  }
1871  } else {
1872  // ghosting of boundary node map
1873  Teuchos::ArrayRCP<const bool> boundaryNodes = inputGraph->GetBoundaryNodeMap();
1874  auto boundaryNodesVector = Xpetra::VectorFactory<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Build(inputGraph->GetDomainMap());
1875  for (size_t i=0; i<inputGraph->GetNodeNumVertices(); i++)
1876  boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1877  // Xpetra::IO<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Write("boundary",*boundaryNodesVector);
1878  auto boundaryColumnVector = Xpetra::VectorFactory<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Build(inputGraph->GetImportMap());
1879  boundaryColumnVector->doImport(*boundaryNodesVector,*importer, Xpetra::INSERT);
1880  auto boundaryColumn = boundaryColumnVector->getData(0);
1881 
1882  for (LO row = 0; row < numRows; ++row) {
1883  LO row_block = row_block_number[row];
1884  ArrayView<const LO> indices = inputGraph->getNeighborVertices(row);
1885 
1886  LO rownnz = 0;
1887  for (LO colID = 0; colID < Teuchos::as<LO>(indices.size()); colID++) {
1888  LO col = indices[colID];
1889  LO col_block = col_block_number[col];
1890 
1891  if((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1892  columns[realnnz++] = col;
1893  rownnz++;
1894  } else
1895  numDropped++;
1896  }
1897  rows_graph[row+1] = realnnz;
1898  }
1899  }
1900 
1901  columns.resize(realnnz);
1902  numTotal = inputGraph->GetNodeNumEdges();
1903 
1904  if (GetVerbLevel() & Statistics1) {
1905  RCP<const Teuchos::Comm<int> > comm = inputGraph->GetDomainMap()->getComm();
1906  GO numGlobalTotal, numGlobalDropped;
1907  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1908  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1909  GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1910  if (numGlobalTotal != 0)
1911  GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1912  GetOStream(Statistics1) << std::endl;
1913  }
1914 
1915  if (localizeColoringGraph) {
1916  outputGraph = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1917  outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
1918  } else {
1919  TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra);
1920 #ifdef HAVE_XPETRA_TPETRA
1921  auto outputGraph2 = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1922 
1923  auto tpGraph = Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
1924  auto sym = rcp(new Tpetra::CrsGraphTransposer<LocalOrdinal,GlobalOrdinal,Node>(tpGraph));
1925  auto tpGraphSym = sym->symmetrize();
1926 
1927  auto rowsSym = tpGraphSym->getNodeRowPtrs();
1928  ArrayRCP<LO> rows_graphSym;
1929  rows_graphSym.resize(rowsSym.size());
1930  for (LO row = 0; row < rowsSym.size(); row++)
1931  rows_graphSym[row] = rowsSym[row];
1932  outputGraph = rcp(new LWGraph(rows_graphSym, tpGraphSym->getNodePackedIndices(), inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A"));
1933  outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
1934 #endif
1935  }
1936 
1937  }
1938 
1939 
1940 
1941 } //namespace MueLu
1942 
1943 #endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
Important warning messages (one line)
Exception indicating invalid cast attempted.
#define MueLu_sumAll(rcpComm, in, out)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
MueLu representation of a compressed row storage graph.
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Scalar >::magnitude(0.), const bool count_twos_as_dirichlet=false)
Timer to be used in factories. Similar to Monitor but with additional timers.
Print more statistics.
DropTol & operator=(DropTol const &)=default
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Exception throws to report incompatible objects (like maps).
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
Additional warnings.
void DeclareInput(Level &currentLevel) const
Input.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
#define SET_VALID_ENTRY(name)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const
void Build(Level &currentLevel) const
Build an object with this factory.
Print class parameters.
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level &currentLevel, const RCP< Matrix > &A, bool generate_matrix) const
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
static Teuchos::ArrayRCP< Magnitude > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Lightweight MueLu representation of a compressed row storage graph.
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
Exception throws to report errors in the internal logical of the program.
void BlockDiagonalizeGraph(const RCP< GraphBase > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< GraphBase > &outputGraph, RCP< const Import > &importer) const