46 #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP 47 #define MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP 49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR 51 #include <Teuchos_Comm.hpp> 52 #include <Teuchos_CommHelpers.hpp> 54 #include <Xpetra_Vector.hpp> 58 #include "MueLu_Aggregates_kokkos.hpp" 60 #include "MueLu_LWGraph_kokkos.hpp" 63 #include "Kokkos_Sort.hpp" 67 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
68 void AggregationPhase2aAlgorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
69 BuildAggregates(
const ParameterList& params,
70 const LWGraph_kokkos& graph,
71 Aggregates_kokkos& aggregates,
73 LO& numNonAggregatedNodes)
const {
75 if(params.get<
bool>(
"aggregation: deterministic")) {
76 Monitor m(*
this,
"BuildAggregatesDeterministic");
77 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
79 Monitor m(*
this,
"BuildAggregatesRandom");
80 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
85 template <
class LO,
class GO,
class Node>
86 void AggregationPhase2aAlgorithm_kokkos<LO, GO, Node>::
87 BuildAggregatesRandom(
const ParameterList& params,
88 const LWGraph_kokkos& graph,
89 Aggregates_kokkos& aggregates,
91 LO& numNonAggregatedNodes)
const 93 const int minNodesPerAggregate = params.get<
int>(
"aggregation: min agg size");
94 const int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
95 bool includeRootInAgg = params.get<
bool>(
"aggregation: phase2a include root");
97 const LO numRows = graph.GetNodeNumVertices();
98 const int myRank = graph.GetComm()->getRank();
100 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
101 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
102 auto colors = aggregates.GetGraphColors();
103 const LO numColors = aggregates.GetGraphNumColors();
105 LO numLocalNodes = numRows;
106 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
108 const double aggFactor = 0.5;
109 double factor =
static_cast<double>(numLocalAggregated)/(numLocalNodes+1);
110 factor = pow(factor, aggFactor);
118 Kokkos::create_mirror_view(numLocalAggregates);
119 h_numLocalAggregates() = aggregates.GetNumAggregates();
120 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
124 for(
int color = 2; color < numColors + 1; ++color) {
125 LO tmpNumNonAggregatedNodes = 0;
126 Kokkos::parallel_reduce(
"Aggregation Phase 2a: loop over each individual color",
128 KOKKOS_LAMBDA (
const LO rootCandidate, LO& lNumNonAggregatedNodes) {
129 if(aggStat(rootCandidate) ==
READY &&
130 colors(rootCandidate) == color) {
133 if (includeRootInAgg)
138 auto neighbors = graph.getNeighborVertices(rootCandidate);
143 for(
int j = 0; j < neighbors.length; ++j) {
144 LO neigh = neighbors(j);
145 if(neigh != rootCandidate) {
146 if(graph.isLocalNeighborVertex(neigh) &&
147 (aggStat(neigh) ==
READY) &&
148 (aggSize < maxNodesPerAggregate)) {
157 if(aggSize > minNodesPerAggregate &&
158 ((includeRootInAgg && aggSize-1 > factor*numNeighbors) ||
159 (!includeRootInAgg && aggSize > factor*numNeighbors))) {
162 LO aggIndex = Kokkos::
163 atomic_fetch_add(&numLocalAggregates(), 1);
165 LO numAggregated = 0;
167 if (includeRootInAgg) {
170 vertex2AggId(rootCandidate, 0) = aggIndex;
171 procWinner(rootCandidate, 0) = myRank;
173 --lNumNonAggregatedNodes;
176 for(
int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
177 LO neigh = neighbors(neighIdx);
178 if(neigh != rootCandidate) {
179 if(graph.isLocalNeighborVertex(neigh) &&
180 (aggStat(neigh) ==
READY) &&
181 (numAggregated < aggSize)) {
183 vertex2AggId(neigh, 0) = aggIndex;
184 procWinner(neigh, 0) = myRank;
187 --lNumNonAggregatedNodes;
193 }, tmpNumNonAggregatedNodes);
194 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
198 Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
199 aggregates.SetNumAggregates(h_numLocalAggregates());
202 template <
class LO,
class GO,
class Node>
203 void AggregationPhase2aAlgorithm_kokkos<LO, GO, Node>::
204 BuildAggregatesDeterministic(
const ParameterList& params,
205 const LWGraph_kokkos& graph,
206 Aggregates_kokkos& aggregates,
208 LO& numNonAggregatedNodes)
const 210 const int minNodesPerAggregate = params.get<
int>(
"aggregation: min agg size");
211 const int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
213 const LO numRows = graph.GetNodeNumVertices();
214 const int myRank = graph.GetComm()->getRank();
216 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
217 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
218 auto colors = aggregates.GetGraphColors();
219 const LO numColors = aggregates.GetGraphNumColors();
221 LO numLocalNodes = procWinner.size();
222 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
224 const double aggFactor = 0.5;
225 double factor = as<double>(numLocalAggregated)/(numLocalNodes+1);
226 factor = pow(factor, aggFactor);
230 Kokkos::create_mirror_view(numLocalAggregates);
231 h_numLocalAggregates() = aggregates.GetNumAggregates();
232 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
249 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
250 for(
int color = 1; color < numColors + 1; ++color) {
252 Kokkos::deep_copy(numNewRoots, h_numNewRoots);
253 Kokkos::parallel_for(
"Aggregation Phase 2a: determining new roots of current color",
255 KOKKOS_LAMBDA(
const LO rootCandidate) {
256 if(aggStat(rootCandidate) ==
READY &&
257 colors(rootCandidate) == color) {
259 auto neighbors = graph.getNeighborVertices(rootCandidate);
263 for(
int j = 0; j < neighbors.length; ++j) {
264 LO neigh = neighbors(j);
265 if(neigh != rootCandidate)
267 if(graph.isLocalNeighborVertex(neigh) &&
268 aggStat(neigh) ==
READY &&
269 aggSize < maxNodesPerAggregate)
278 if(aggSize > minNodesPerAggregate && aggSize > factor*numNeighbors) {
279 LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
280 newRoots(newRootIndex) = rootCandidate;
284 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
286 if(h_numNewRoots() > 0) {
288 Kokkos::sort(newRoots, 0, h_numNewRoots());
290 LO tmpNumNonAggregatedNodes = 0;
292 Kokkos::parallel_reduce(
"Aggregation Phase 2a: create new aggregates",
294 KOKKOS_LAMBDA (
const LO newRootIndex, LO& lNumNonAggregatedNodes) {
295 LO root = newRoots(newRootIndex);
296 LO newAggID = numLocalAggregates() + newRootIndex;
297 auto neighbors = graph.getNeighborVertices(root);
300 vertex2AggId(root, 0) = newAggID;
302 for(
int j = 0; j < neighbors.length; ++j) {
303 LO neigh = neighbors(j);
305 if(graph.isLocalNeighborVertex(neigh) &&
306 aggStat(neigh) ==
READY &&
307 aggSize < maxNodesPerAggregate) {
309 vertex2AggId(neigh, 0) = newAggID;
310 procWinner(neigh, 0) = myRank;
315 lNumNonAggregatedNodes -= aggSize;
316 }, tmpNumNonAggregatedNodes);
317 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
318 h_numLocalAggregates() += h_numNewRoots();
319 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
322 aggregates.SetNumAggregates(h_numLocalAggregates());
327 #endif // HAVE_MUELU_KOKKOS_REFACTOR 328 #endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
Namespace for MueLu classes and methods.