MueLu  Version of the Day
MueLu_SemiCoarsenPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_SEMICOARSENPFACTORY_DEF_HPP
47 #define MUELU_SEMICOARSENPFACTORY_DEF_HPP
48 
49 #include <stdlib.h>
50 
51 #include <Teuchos_LAPACK.hpp>
52 
53 #include <Xpetra_CrsMatrixWrap.hpp>
54 #include <Xpetra_ImportFactory.hpp>
55 #include <Xpetra_Matrix.hpp>
57 #include <Xpetra_VectorFactory.hpp>
58 
61 
62 #include "MueLu_MasterList.hpp"
63 #include "MueLu_Monitor.hpp"
64 
65 namespace MueLu {
66 
67  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
69  RCP<ParameterList> validParamList = rcp(new ParameterList());
70 
71 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
72  SET_VALID_ENTRY("semicoarsen: coarsen rate");
73 #undef SET_VALID_ENTRY
74  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
75  validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
76  validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coorindates");
77 
78  validParamList->set< RCP<const FactoryBase> >("LineDetection_VertLineIds", Teuchos::null, "Generating factory for LineDetection information");
79  validParamList->set< RCP<const FactoryBase> >("LineDetection_Layers", Teuchos::null, "Generating factory for LineDetection information");
80  validParamList->set< RCP<const FactoryBase> >("CoarseNumZLayers", Teuchos::null, "Generating factory for LineDetection information");
81 
82  return validParamList;
83  }
84 
85  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
87  Input(fineLevel, "A");
88  Input(fineLevel, "Nullspace");
89 
90  Input(fineLevel, "LineDetection_VertLineIds");
91  Input(fineLevel, "LineDetection_Layers");
92  Input(fineLevel, "CoarseNumZLayers");
93 
94  // check whether fine level coordinate information is available.
95  // If yes, request the fine level coordinates and generate coarse coordinates
96  // during the Build call
97  if (fineLevel.GetLevelID() == 0) {
98  if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
99  fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
100  bTransferCoordinates_ = true;
101  }
102  } else if (bTransferCoordinates_ == true){
103  // on coarser levels we check the default factory providing "Coordinates"
104  // or the factory declared to provide "Coordinates"
105  // first, check which factory is providing coordinate information
106  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
107  if (myCoordsFact == Teuchos::null) { myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates"); }
108  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
109  fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
110  bTransferCoordinates_ = true;
111  }
112  }
113  }
114 
115  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
117  return BuildP(fineLevel, coarseLevel);
118  }
119 
120  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
122  FactoryMonitor m(*this, "Build", coarseLevel);
123 
124  // obtain general variables
125  RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
126  RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
127 
128  // get user-provided coarsening rate parameter (constant over all levels)
129  const ParameterList& pL = GetParameterList();
130  LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
131 
132  // collect general input data
133  LO BlkSize = A->GetFixedBlockSize();
134  RCP<const Map> rowMap = A->getRowMap();
135  LO Ndofs = rowMap->getNodeNumElements();
136  LO Nnodes = Ndofs/BlkSize;
137 
138  // collect line detection information generated by the LineDetectionFactory instance
139  LO FineNumZLayers = Get< LO >(fineLevel, "CoarseNumZLayers");
140  LO CoarseNumZLayers = FineNumZLayers;
141  Teuchos::ArrayRCP<LO> TVertLineId = Get< Teuchos::ArrayRCP<LO> > (fineLevel, "LineDetection_VertLineIds");
142  Teuchos::ArrayRCP<LO> TLayerId = Get< Teuchos::ArrayRCP<LO> > (fineLevel, "LineDetection_Layers");
143  LO* VertLineId = TVertLineId.getRawPtr();
144  LO* LayerId = TLayerId.getRawPtr();
145 
146  // generate transfer operator with semicoarsening
147  RCP<const Map> theCoarseMap;
148  RCP<Matrix> P;
149  GO Ncoarse = MakeSemiCoarsenP(Nnodes,CoarseNumZLayers,CoarsenRate,LayerId,VertLineId,
150  BlkSize, A, P, theCoarseMap);
151 
152  // set StridingInformation of P
153  if (A->IsView("stridedMaps") == true)
154  P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), theCoarseMap);
155  else
156  P->CreateView("stridedMaps", P->getRangeMap(), theCoarseMap);
157 
158  // Store number of coarse z-layers on the coarse level container
159  // This information is used by the LineDetectionAlgorithm
160  // TODO get rid of the NoFactory
161  coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers * Ncoarse/Ndofs), MueLu::NoFactory::get());
162 
163  // store semicoarsening transfer on coarse level
164  Set(coarseLevel, "P", P);
165 
166  // rst: null space might get scaled here ... do we care. We could just inject at the cpoints, but I don't
167  // feel that this is needed.
168  RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(P->getDomainMap(), fineNullspace->getNumVectors());
169  P->apply(*fineNullspace, *coarseNullspace, Teuchos::TRANS, Teuchos::ScalarTraits<SC>::one(), Teuchos::ScalarTraits<SC>::zero());
170  Set(coarseLevel, "Nullspace", coarseNullspace);
171 
172  // transfer coordinates
173  if(bTransferCoordinates_) {
174  //Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
176  RCP<xdMV> fineCoords = Teuchos::null;
177  if (fineLevel.GetLevelID() == 0 &&
178  fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
179  fineCoords = fineLevel.Get< RCP<xdMV> >("Coordinates", NoFactory::get());
180  } else {
181  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
182  if (myCoordsFact == Teuchos::null) { myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates"); }
183  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
184  fineCoords = fineLevel.Get< RCP<xdMV> >("Coordinates", myCoordsFact.get());
185  }
186  }
187 
188  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords==Teuchos::null, Exceptions::RuntimeError, "No Coordinates found provided by the user.");
189 
190  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given.");
191  ArrayRCP<double> x = fineCoords->getDataNonConst(0);
192  ArrayRCP<double> y = fineCoords->getDataNonConst(1);
193  ArrayRCP<double> z = fineCoords->getDataNonConst(2);
194 
195  // determine the maximum and minimum z coordinate value on the current processor.
196  double zval_max = -Teuchos::ScalarTraits<double>::one() / Teuchos::ScalarTraits<double>::sfmin();
197  double zval_min = Teuchos::ScalarTraits<double>::one() / Teuchos::ScalarTraits<double>::sfmin();
198  for ( ArrayRCP<double>::iterator it = z.begin(); it != z.end(); ++it) {
199  if(*it > zval_max) zval_max = *it;
200  if(*it < zval_min) zval_min = *it;
201  }
202 
203  LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers * Ncoarse/Ndofs);
204 
205  ArrayRCP<double> myZLayerCoords = Teuchos::arcp<double>(myCoarseZLayers);
206  if(myCoarseZLayers == 1) {
207  myZLayerCoords[0] = zval_min;
208  } else {
209  double dz = (zval_max-zval_min)/(myCoarseZLayers-1);
210  for(LO k = 0; k<myCoarseZLayers; ++k) {
211  myZLayerCoords[k] = k*dz;
212  }
213  }
214 
215  // Note, that the coarse level node coordinates have to be in vertical ordering according
216  // to the numbering of the vertical lines
217 
218  // number of vertical lines on current node:
219  LO numVertLines = Nnodes / FineNumZLayers;
220  LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
221 
222  //std::cout << "rowMap elements: " << rowMap->getNodeNumElements() << std::endl;
223  //std::cout << "fineCoords: " << fineCoords->getNodeNumElements() << std::endl;
224  //std::cout << "TVertLineId.size(): " << TVertLineId.size() << std::endl;
225  //std::cout << "numVertLines=" << numVertLines << std::endl;
226  //std::cout << "numLocalCoarseNodes=" << numLocalCoarseNodes << std::endl;
227 
228  RCP<const Map> coarseCoordMap =
229  MapFactory::Build (fineCoords->getMap()->lib(),
230  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
231  Teuchos::as<size_t>(numLocalCoarseNodes),
232  fineCoords->getMap()->getIndexBase(),
233  fineCoords->getMap()->getComm());
234  RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<double,LO,GO,NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
235  coarseCoords->putScalar(-1.0);
236  ArrayRCP<double> cx = coarseCoords->getDataNonConst(0);
237  ArrayRCP<double> cy = coarseCoords->getDataNonConst(1);
238  ArrayRCP<double> cz = coarseCoords->getDataNonConst(2);
239 
240  // loop over all vert line indices (stop as soon as possible)
241  LO cntCoarseNodes = 0;
242  for( LO vt = 0; vt < TVertLineId.size(); ++vt) {
243  //vertical line id in *vt
244  LO curVertLineId = TVertLineId[vt];
245 
246  if(cx[curVertLineId * myCoarseZLayers] == -1.0 &&
247  cy[curVertLineId * myCoarseZLayers] == -1.0) {
248  // loop over all local myCoarseZLayers
249  for (LO n=0; n<myCoarseZLayers; ++n) {
250  cx[curVertLineId * myCoarseZLayers + n] = x[vt];
251  cy[curVertLineId * myCoarseZLayers + n] = y[vt];
252  cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
253  }
254  cntCoarseNodes += myCoarseZLayers;
255  }
256 
257  TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes, Exceptions::RuntimeError, "number of coarse nodes is inconsistent.");
258  if(cntCoarseNodes == numLocalCoarseNodes) break;
259  }
260 
261  // set coarse level coordinates
262  Set(coarseLevel, "Coordinates", coarseCoords);
263  } /* end bool bTransferCoordinates */
264 
265  }
266 
267  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
268  LocalOrdinal SemiCoarsenPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::FindCpts(LocalOrdinal const PtsPerLine, LocalOrdinal const CoarsenRate, LocalOrdinal const Thin, LocalOrdinal **LayerCpts) const {
269  /*
270  * Given the number of points in the z direction (PtsPerLine) and a
271  * coarsening rate (CoarsenRate), determine which z-points will serve
272  * as Cpts and return the total number of Cpts.
273  *
274  * Input
275  * PtsPerLine: Number of fine level points in the z direction
276  *
277  * CoarsenRate: Roughly, number of Cpts = (PtsPerLine+1)/CoarsenRate - 1
278  *
279  * Thin: Must be either 0 or 1. Thin decides what to do when
280  * (PtsPerLine+1)/CoarsenRate is not an integer.
281  *
282  * Thin == 0 ==> ceil() the above fraction
283  * Thin == 1 ==> floor() the above fraction
284  *
285  * Output
286  * LayerCpts Array where LayerCpts[i] indicates that the
287  * LayerCpts[i]th fine level layer is a Cpt Layer.
288  * Note: fine level layers are assumed to be numbered starting
289  * a one.
290  */
291  double temp, RestStride, di;
292  LO NCpts, i;
293  LO NCLayers = -1;
294  LO FirstStride;
295 
296  temp = ((double) (PtsPerLine+1))/((double) (CoarsenRate)) - 1.0;
297  if (Thin == 1) NCpts = (LO) ceil(temp);
298  else NCpts = (LO) floor(temp);
299 
300  TEUCHOS_TEST_FOR_EXCEPTION(PtsPerLine == 1, Exceptions::RuntimeError, "SemiCoarsenPFactory::FindCpts: cannot coarsen further.");
301 
302  if (NCpts < 1) NCpts = 1;
303 
304  FirstStride= (LO) ceil( ((double) PtsPerLine+1)/( (double) (NCpts+1)));
305  RestStride = ((double) (PtsPerLine-FirstStride+1))/((double) NCpts);
306 
307  NCLayers = (LO) floor((((double) (PtsPerLine-FirstStride+1))/RestStride)+.00001);
308 
309  TEUCHOS_TEST_FOR_EXCEPTION(NCLayers != NCpts, Exceptions::RuntimeError, "sizes do not match.");
310 
311  di = (double) FirstStride;
312  for (i = 1; i <= NCpts; i++) {
313  (*LayerCpts)[i] = (LO) floor(di);
314  di += RestStride;
315  }
316 
317  return(NCLayers);
318  }
319 
320 #define MaxHorNeighborNodes 75
321 
322  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
323  LocalOrdinal SemiCoarsenPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MakeSemiCoarsenP(LocalOrdinal const Ntotal, LocalOrdinal const nz, LocalOrdinal const CoarsenRate, LocalOrdinal const LayerId[],
324  LocalOrdinal const VertLineId[], LocalOrdinal const DofsPerNode, RCP<Matrix> & Amat, RCP<Matrix>& P, RCP<const Map>& coarseMap) const {
325 
326  /*
327  * Given a CSR matrix (OrigARowPtr, OrigAcols, OrigAvals), information
328  * describing the z-layer and vertical line (LayerId and VertLineId)
329  * of each matrix block row, a coarsening rate, and dofs/node information,
330  * construct a prolongator that coarsening to semicoarsening in the z-direction
331  * using something like an operator-dependent grid transfer. In particular,
332  * matrix stencils are collapsed to vertical lines. Thus, each vertical line
333  * gives rise to a block tridiagonal matrix. BlkRows corresponding to
334  * Cpts are replaced by identity matrices. This tridiagonal is solved
335  * to determine each interpolation basis functions. Each Blk Rhs corresponds
336  * to all zeros except at the corresponding C-pt which has an identity
337  *
338  * On termination, return the number of local prolongator columns owned by
339  * this processor.
340  *
341  * Note: This code was adapted from a matlab code where offsets/arrays
342  * start from 1. In most parts of the code, this 1 offset is kept
343  * (in some cases wasting the first element of the array). The
344  * input and output matrices of this function has been changed to
345  * have offsets/rows/columns which start from 0. LayerId[] and
346  * VertLineId[] currently start from 1.
347  *
348  * Input
349  * =====
350  * Ntotal Number of fine level Blk Rows owned by this processor
351  *
352  * nz Number of vertical layers. Note: partitioning must be done
353  * so that each processor owns an entire vertical line. This
354  * means that nz is the global number of layers, which should
355  * be equal to the local number of layers.
356  * CoarsenRate Rate of z semicoarsening. Smoothed aggregation-like coarsening
357  * would correspond to CoarsenRate = 3.
358  * LayerId Array from 0 to Ntotal-1 + Ghost. LayerId(BlkRow) gives the
359  * layer number associated with the dofs within BlkRow.
360  * VertLineId Array from 1 to Ntotal, VertLineId(BlkRow) gives a unique
361  * vertical line id (from 0 to Ntotal/nz-1) of BlkRow. All
362  * BlkRows associated with nodes along the same vertical line
363  * in the mesh should have the same LineId.
364  * DofsPerNode Number of degrees-of-freedom per mesh node.
365  *
366  * OrigARowPtr, CSR arrays corresponding to the fine level matrix.
367  * OrigAcols,
368  * OrigAvals
369  *
370  * Output
371  * =====
372  * ParamPptr, CSR arrays corresponding to the final prolongation matrix.
373  * ParamPcols,
374  * ParamsPvals
375  */
376  int NLayers, NVertLines, MaxNnz, NCLayers, MyLine, MyLayer;
377  int *InvLineLayer=NULL, *CptLayers=NULL, StartLayer, NStencilNodes;
378  int BlkRow, dof_i, dof_j, node_k, *Sub2FullMap=NULL, RowLeng;
379  int i, j, iii, col, count, index, loc, PtRow, PtCol;
380  SC *BandSol=NULL, *BandMat=NULL, TheSum;
381  int *IPIV=NULL, KL, KU, KLU, N, NRHS, LDAB,INFO;
382  int *Pcols;
383  size_t *Pptr;
384  SC *Pvals;
385  int MaxStencilSize, MaxNnzPerRow;
386  LO *LayDiff=NULL;
387  int CurRow, LastGuy = -1, NewPtr;
388  int Ndofs;
389  int Nghost;
390  LO *Layerdofs = NULL, *Col2Dof = NULL;
391 
392  Teuchos::LAPACK<LO,SC> lapack;
393 
394  char notrans[3];
395  notrans[0] = 'N';
396  notrans[1] = 'N';
397 
398 
399  MaxNnzPerRow = MaxHorNeighborNodes*DofsPerNode*3;
400  Teuchos::ArrayRCP<LO> TLayDiff = Teuchos::arcp<LO>(1+MaxNnzPerRow); LayDiff = TLayDiff.getRawPtr();
401 
402  Nghost = Amat->getColMap()->getNodeNumElements() - Amat->getDomainMap()->getNodeNumElements();
403  if (Nghost < 0) Nghost = 0;
404  Teuchos::ArrayRCP<LO> TLayerdofs= Teuchos::arcp<LO>(Ntotal*DofsPerNode+Nghost+1); Layerdofs = TLayerdofs.getRawPtr();
405  Teuchos::ArrayRCP<LO> TCol2Dof= Teuchos::arcp<LO>(Ntotal*DofsPerNode+Nghost+1); Col2Dof= TCol2Dof.getRawPtr();
406 
407  RCP<Xpetra::Vector<LO,LO,GO,NO> > localdtemp = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(Amat->getDomainMap());
408  RCP<Xpetra::Vector<LO,LO,GO,NO> > dtemp = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(Amat->getColMap());
409  ArrayRCP<LO> valptr= localdtemp->getDataNonConst(0);
410 
411  for (i = 0; i < Ntotal*DofsPerNode; i++)
412  valptr[i]= LayerId[i/DofsPerNode];
413 
414  RCP< const Import> importer;
415  importer = Amat->getCrsGraph()->getImporter();
416  if (importer == Teuchos::null) {
417  importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
418  }
419  dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
420 
421  valptr= dtemp->getDataNonConst(0);
422  for (i = 0; i < Ntotal*DofsPerNode+Nghost; i++) Layerdofs[i]= valptr[i];
423  valptr= localdtemp->getDataNonConst(0);
424  for (i = 0; i < Ntotal*DofsPerNode; i++) valptr[i]= i%DofsPerNode;
425  dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
426  valptr= dtemp->getDataNonConst(0);
427  for (i = 0; i < Ntotal*DofsPerNode+Nghost; i++) Col2Dof[i]= valptr[i];
428 
429  if (Ntotal != 0) {
430  NLayers = LayerId[0];
431  NVertLines= VertLineId[0];
432  }
433  else { NLayers = -1; NVertLines = -1; }
434 
435  for (i = 1; i < Ntotal; i++) {
436  if ( VertLineId[i] > NVertLines ) NVertLines = VertLineId[i];
437  if ( LayerId[i] > NLayers ) NLayers = LayerId[i];
438  }
439  NLayers++;
440  NVertLines++;
441 
442  /*
443  * Make an inverse map so that we can quickly find the dof
444  * associated with a particular vertical line and layer.
445  */
446 
447  Teuchos::ArrayRCP<LO> TInvLineLayer= Teuchos::arcp<LO>(1+NVertLines*NLayers); InvLineLayer = TInvLineLayer.getRawPtr();
448  for (i=0; i < Ntotal; i++) {
449  InvLineLayer[ VertLineId[i]+1+LayerId[i]*NVertLines ] = i;
450  }
451 
452  /*
453  * Determine coarse layers where injection will be applied.
454  */
455 
456  Teuchos::ArrayRCP<LO> TCptLayers = Teuchos::arcp<LO>(nz+1); CptLayers = TCptLayers.getRawPtr();
457  NCLayers = FindCpts(nz,CoarsenRate,0, &CptLayers);
458 
459  /*
460  * Compute the largest possible interpolation stencil width based
461  * on the location of the Clayers. This stencil width is actually
462  * nodal (i.e. assuming 1 dof/node). To get the true max stencil width
463  * one needs to multiply this by DofsPerNode.
464  */
465 
466  if (NCLayers < 2) MaxStencilSize = nz;
467  else MaxStencilSize = CptLayers[2];
468 
469  for (i = 3; i <= NCLayers; i++) {
470  if (MaxStencilSize < CptLayers[i]- CptLayers[i-2])
471  MaxStencilSize = CptLayers[i]- CptLayers[i-2];
472  }
473  if (NCLayers > 1) {
474  if (MaxStencilSize < nz - CptLayers[NCLayers-1]+1)
475  MaxStencilSize = nz - CptLayers[NCLayers-1]+1;
476  }
477 
478  /*
479  * Allocate storage associated with solving a banded sub-matrix needed to
480  * determine the interpolation stencil. Note: we compute interpolation stencils
481  * for all dofs within a node at the same time, and so the banded solution
482  * must be large enough to hold all DofsPerNode simultaneously.
483  */
484 
485  Teuchos::ArrayRCP<LO> TSub2FullMap= Teuchos::arcp<LO>((MaxStencilSize+1)*DofsPerNode); Sub2FullMap= TSub2FullMap.getRawPtr();
486  Teuchos::ArrayRCP<SC> TBandSol= Teuchos::arcp<SC>((MaxStencilSize+1)*DofsPerNode*DofsPerNode); BandSol = TBandSol.getRawPtr();
487  /*
488  * Lapack variables. See comments for dgbsv().
489  */
490  KL = 2*DofsPerNode-1;
491  KU = 2*DofsPerNode-1;
492  KLU = KL+KU;
493  LDAB = 2*KL+KU+1;
494  NRHS = DofsPerNode;
495  Teuchos::ArrayRCP<SC> TBandMat= Teuchos::arcp<SC>(LDAB*MaxStencilSize*DofsPerNode+1); BandMat = TBandMat.getRawPtr();
496  Teuchos::ArrayRCP<LO> TIPIV= Teuchos::arcp<LO>((MaxStencilSize+1)*DofsPerNode); IPIV = TIPIV.getRawPtr();
497 
498  /*
499  * Allocate storage for the final interpolation matrix. Note: each prolongator
500  * row might have entries corresponding to at most two nodes.
501  * Note: the total fine level dofs equals DofsPerNode*Ntotal and the max
502  * nnz per prolongator row is DofsPerNode*2.
503  */
504 
505  Ndofs = DofsPerNode*Ntotal;
506  MaxNnz = 2*DofsPerNode*Ndofs;
507 
508  RCP<const Map> rowMap = Amat->getRowMap();
509  int GNdofs= rowMap->getGlobalNumElements();
510 
511  std::vector<size_t> stridingInfo_;
512  stridingInfo_.push_back(DofsPerNode);
513 
514  coarseMap = StridedMapFactory::Build(rowMap->lib(),
515  (NCLayers*GNdofs)/nz,
516  NCLayers*NVertLines*DofsPerNode,
517  0, /* index base */
518  stridingInfo_,
519  rowMap->getComm(),
520  -1, /* strided block id */
521  0, /* domain gid offset */
522  rowMap->getNode());
523 
524 
525  //coarseMap = MapFactory::createContigMapWithNode(rowMap->lib(),(NCLayers*GNdofs)/nz, NCLayers*NVertLines*DofsPerNode,(rowMap->getComm()), rowMap->getNode());
526 
527 
528  P = rcp(new CrsMatrixWrap(rowMap, coarseMap , 0, Xpetra::StaticProfile));
529 
530 
531  Teuchos::ArrayRCP<SC> TPvals= Teuchos::arcp<SC>(1+MaxNnz); Pvals= TPvals.getRawPtr();
532  Teuchos::ArrayRCP<size_t> TPptr = Teuchos::arcp<size_t>(DofsPerNode*(2+Ntotal)); Pptr = TPptr.getRawPtr();
533  Teuchos::ArrayRCP<LO> TPcols= Teuchos::arcp<LO>(1+MaxNnz); Pcols= TPcols.getRawPtr();
534 
535  Pptr[0] = 0; Pptr[1] = 0;
536 
537  TEUCHOS_TEST_FOR_EXCEPTION(Pcols == NULL, Exceptions::RuntimeError, "MakeSemiCoarsenP: Not enough space \n");
538 
539  /*
540  * Setup P's rowptr as if each row had its maximum of 2*DofsPerNode nonzeros.
541  * This will be useful while filling up P, and then later we will squeeze out
542  * the unused nonzeros locations.
543  */
544 
545  for (i = 1; i <= MaxNnz; i++) Pcols[i] = -1; /* mark all entries as unused */
546  count = 1;
547  for (i = 1; i <= DofsPerNode*Ntotal+1; i++) {
548  Pptr[i] = count;
549  count += (2*DofsPerNode);
550  }
551 
552  /*
553  * Build P column by column. The 1st block column corresponds to the 1st coarse
554  * layer and the first line. The 2nd block column corresponds to the 2nd coarse
555  * layer and the first line. The NCLayers+1 block column corresponds to the
556  * 1st coarse layer and the 2nd line, etc.
557  */
558 
559 
560  col = 0;
561  for (MyLine=1; MyLine <= NVertLines; MyLine += 1) {
562  for (iii=1; iii <= NCLayers; iii+= 1) {
563  col = col+1;
564  MyLayer = CptLayers[iii];
565 
566  /*
567  * StartLayer gives the layer number of the lowest layer that
568  * is nonzero in the interpolation stencil that is currently
569  * being computed. Normally, if we are not near a boundary this
570  * is simply CptsLayers[iii-1]+1.
571  *
572  * NStencilNodes indicates the number of nonzero nodes in the
573  * interpolation stencil that is currently being computed. Normally,
574  * if we are not near a boundary this is CptLayers[iii+1]-StartLayer.
575  */
576 
577  if (iii != 1 ) StartLayer = CptLayers[iii-1]+1;
578  else StartLayer = 1;
579 
580  if (iii != NCLayers) NStencilNodes= CptLayers[iii+1]-StartLayer;
581  else NStencilNodes= NLayers - StartLayer + 1;
582 
583 
584  N = NStencilNodes*DofsPerNode;
585 
586  /*
587  * dgbsv() does not require that the first KL rows be initialized,
588  * so we could avoid zeroing out some entries?
589  */
590 
591  for (i = 0; i < NStencilNodes*DofsPerNode*DofsPerNode; i++)
592  BandSol[ i] = 0.0;
593  for (i = 0; i < LDAB*N; i++) BandMat[ i] = 0.0;
594 
595  /*
596  * Fill BandMat and BandSol (which is initially the rhs) for each
597  * node in the interpolation stencil that is being computed.
598  */
599 
600  for (node_k=1; node_k <= NStencilNodes ; node_k++) {
601 
602  /* Map a Line and Layer number to a BlkRow in the fine level matrix
603  * and record the mapping from the sub-system to the BlkRow of the
604  * fine level matrix.
605  */
606  BlkRow = InvLineLayer[MyLine+(StartLayer+node_k-2)*NVertLines]+1;
607  Sub2FullMap[node_k] = BlkRow;
608 
609  /* Two cases:
610  * 1) the current layer is not a Cpoint layer. In this case we
611  * want to basically stick the matrix couplings to other
612  * nonzero stencil rows into the band matrix. One way to do
613  * this is to include couplings associated with only MyLine
614  * and ignore all the other couplings. However, what we do
615  * instead is to sum all the coupling at each layer participating
616  * in this interpolation stencil and stick this sum into BandMat.
617  * 2) the current layer is a Cpoint layer and so we
618  * stick an identity block in BandMat and rhs.
619  */
620  if (StartLayer+node_k-1 != MyLayer) {
621  for (dof_i=0; dof_i < DofsPerNode; dof_i++) {
622 
623  j = (BlkRow-1)*DofsPerNode+dof_i;
624  ArrayView<const LO> AAcols;
625  ArrayView<const SC> AAvals;
626  Amat->getLocalRowView(j, AAcols, AAvals);
627  const int *Acols = AAcols.getRawPtr();
628  const SC *Avals = AAvals.getRawPtr();
629  RowLeng = AAvals.size();
630 
631  TEUCHOS_TEST_FOR_EXCEPTION(RowLeng >= MaxNnzPerRow, Exceptions::RuntimeError, "MakeSemiCoarsenP: recompile with larger Max(HorNeighborNodes)\n");
632 
633  for (i = 0; i < RowLeng; i++) {
634  LayDiff[i] = Layerdofs[Acols[i]]-StartLayer-node_k+2;
635 
636  /* This is the main spot where there might be off- */
637  /* processor communication. That is, when we */
638  /* average the stencil in the horizontal direction,*/
639  /* we need to know the layer id of some */
640  /* neighbors that might reside off-processor. */
641  }
642  PtRow = (node_k-1)*DofsPerNode+dof_i+1;
643  for (dof_j=0; dof_j < DofsPerNode; dof_j++) {
644  PtCol = (node_k-1)*DofsPerNode+dof_j + 1;
645  /* Stick in entry corresponding to Mat(PtRow,PtCol) */
646  /* see dgbsv() comments for matrix format. */
647  TheSum = 0.0;
648  for (i = 0; i < RowLeng; i++) {
649  if ((LayDiff[i] == 0) && (Col2Dof[Acols[i]] == dof_j))
650  TheSum += Avals[i];
651  }
652  index = LDAB*(PtCol-1)+KLU+PtRow-PtCol;
653  BandMat[index] = TheSum;
654  if (node_k != NStencilNodes) {
655  /* Stick Mat(PtRow,PtCol+DofsPerNode) entry */
656  /* see dgbsv() comments for matrix format. */
657  TheSum = 0.0;
658  for (i = 0; i < RowLeng; i++) {
659  if ((LayDiff[i] == 1) &&(Col2Dof[Acols[i]]== dof_j))
660  TheSum += Avals[i];
661  }
662  j = PtCol+DofsPerNode;
663  index=LDAB*(j-1)+KLU+PtRow-j;
664  BandMat[index] = TheSum;
665  }
666  if (node_k != 1) {
667  /* Stick Mat(PtRow,PtCol-DofsPerNode) entry */
668  /* see dgbsv() comments for matrix format. */
669  TheSum = 0.0;
670  for (i = 0; i < RowLeng; i++) {
671  if ((LayDiff[i]== -1) &&(Col2Dof[Acols[i]]== dof_j))
672  TheSum += Avals[i];
673  }
674  j = PtCol-DofsPerNode;
675  index=LDAB*(j-1)+KLU+PtRow-j;
676  BandMat[index] = TheSum;
677  }
678  }
679  }
680  }
681  else {
682  for (dof_i = 0; dof_i < DofsPerNode; dof_i++) {
683  /* Stick Mat(PtRow,PtRow) and Rhs(PtRow,dof_i+1) */
684  /* see dgbsv() comments for matrix format. */
685  PtRow = (node_k-1)*DofsPerNode+dof_i+1;
686  index = LDAB*(PtRow-1)+KLU;
687  BandMat[index] = 1.0;
688  BandSol[(dof_i)*DofsPerNode*NStencilNodes+PtRow-1] = 1.;
689  }
690  }
691  }
692 
693  /* Solve banded system and then stick result in Pmatrix arrays */
694 
695  lapack.GBTRF( N, N, KL, KU, BandMat, LDAB, IPIV, &INFO);
696 
697  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band factorization failed");
698 
699  lapack.GBTRS(notrans[0], N, KL, KU, NRHS, BandMat, LDAB, IPIV,
700  BandSol, N, &INFO );
701 
702  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band solve back substitution failed");
703 
704  for (dof_j=0; dof_j < DofsPerNode; dof_j++) {
705  for (dof_i=0; dof_i < DofsPerNode; dof_i++) {
706  for (i =1; i <= NStencilNodes ; i++) {
707  index = (Sub2FullMap[i]-1)*DofsPerNode+dof_i+1;
708  loc = Pptr[index];
709  Pcols[loc] = (col-1)*DofsPerNode+dof_j+1;
710  Pvals[loc] = BandSol[dof_j*DofsPerNode*NStencilNodes +
711  (i-1)*DofsPerNode + dof_i];
712  Pptr[index]= Pptr[index] + 1;
713  }
714  }
715  }
716  }
717  }
718 
719  /*
720  * Squeeze the -1's out of the columns. At the same time convert Pcols
721  * so that now the first column is numbered '0' as opposed to '1'.
722  * Also, the arrays Pcols and Pvals should now use the zeroth element
723  * as opposed to just starting with the first element. Pptr will be
724  * fixed in the for loop below so that Pptr[0] = 0, etc.
725  */
726  CurRow = 1;
727  NewPtr = 1;
728  for (size_t ii=1; ii <= Pptr[Ntotal*DofsPerNode]-1; ii++) {
729  if (ii == Pptr[CurRow]) {
730  Pptr[CurRow] = LastGuy;
731  CurRow++;
732  while (ii > Pptr[CurRow]) {
733  Pptr[CurRow] = LastGuy;
734  CurRow++;
735  }
736  }
737  if (Pcols[ii] != -1) {
738  Pcols[NewPtr-1] = Pcols[ii]-1; /* these -1's fix the offset and */
739  Pvals[NewPtr-1] = Pvals[ii]; /* start using the zeroth element */
740  LastGuy = NewPtr;
741  NewPtr++;
742  }
743  }
744  for (i = CurRow; i <= Ntotal*DofsPerNode; i++) Pptr[CurRow] = LastGuy;
745 
746  /* Now move the pointers so that they now point to the beginning of each
747  * row as opposed to the end of each row
748  */
749  for (i=-Ntotal*DofsPerNode+1; i>= 2 ; i--) {
750  Pptr[i-1] = Pptr[i-2]; /* extra -1 added to start from 0 */
751  }
752  Pptr[0] = 0;
753 
754  ArrayRCP<size_t> rcpRowPtr;
755  ArrayRCP<LO> rcpColumns;
756  ArrayRCP<SC> rcpValues;
757 
758  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
759  LO nnz = Pptr[Ndofs];
760  PCrs->allocateAllValues(nnz, rcpRowPtr, rcpColumns, rcpValues);
761 
762  ArrayView<size_t> rowPtr = rcpRowPtr();
763  ArrayView<LO> columns = rcpColumns();
764  ArrayView<SC> values = rcpValues();
765 
766  // copy data over
767 
768  for (LO ii = 0; ii <= Ndofs; ii++) rowPtr[ii] = Pptr[ii];
769  for (LO ii = 0; ii < nnz; ii++) columns[ii] = Pcols[ii];
770  for (LO ii = 0; ii < nnz; ii++) values[ii] = Pvals[ii];
771  PCrs->setAllValues(rcpRowPtr, rcpColumns, rcpValues);
772  PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
773 
774 
775  return NCLayers*NVertLines*DofsPerNode;
776  }
777 } //namespace MueLu
778 
779 #define MUELU_SEMICOARSENPFACTORY_SHORT
780 #endif // MUELU_SEMICOARSENPFACTORY_DEF_HPP
LO FindCpts(LO const PtsPerLine, LO const CoarsenRate, LO const Thin, LO **LayerCpts) const
GlobalOrdinal GO
Timer to be used in factories. Similar to Monitor but with additional timers.
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
LocalOrdinal LO
Namespace for MueLu classes and methods.
static const NoFactory * get()
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
LO MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[], LO const VertLineId[], LO const DofsPerNode, RCP< Matrix > &Amat, RCP< Matrix > &P, RCP< const Map > &coarseMap) const
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
#define SET_VALID_ENTRY(name)
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
#define MaxHorNeighborNodes
Scalar SC
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, size_t NumVectors, bool zeroOut=true)
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:96
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
Exception throws to report errors in the internal logical of the program.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.