46 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP 47 #define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP 49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR 51 #include <Teuchos_Comm.hpp> 52 #include <Teuchos_CommHelpers.hpp> 54 #include <Xpetra_Vector.hpp> 58 #include "MueLu_Aggregates_kokkos.hpp" 60 #include "MueLu_LWGraph_kokkos.hpp" 67 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
68 void AggregationPhase2bAlgorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
69 BuildAggregates(
const ParameterList& params,
70 const LWGraph_kokkos& graph,
71 Aggregates_kokkos& aggregates,
73 LO& numNonAggregatedNodes)
const {
75 if(params.get<
bool>(
"aggregation: deterministic")) {
76 Monitor m(*
this,
"BuildAggregatesDeterministic");
77 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
79 Monitor m(*
this,
"BuildAggregatesRandom");
80 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
85 template <
class LO,
class GO,
class Node>
86 void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
87 BuildAggregatesRandom(
const ParameterList& params,
88 const LWGraph_kokkos& graph,
89 Aggregates_kokkos& aggregates,
91 LO& numNonAggregatedNodes)
const {
93 const LO numRows = graph.GetNodeNumVertices();
94 const int myRank = graph.GetComm()->getRank();
96 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
97 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
98 auto colors = aggregates.GetGraphColors();
99 const LO numColors = aggregates.GetGraphNumColors();
100 const LO numLocalAggregates = aggregates.GetNumAggregates();
102 const LO defaultConnectWeight = 100;
103 const LO penaltyConnectWeight = 10;
109 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
119 int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
120 if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
121 for (
int iter = 0; iter < maxIters; ++iter) {
122 for(LO color = 1; color <= numColors; ++color) {
123 Kokkos::deep_copy(aggWeight, 0);
127 LO numAggregated = 0;
128 Kokkos::parallel_reduce(
"Aggregation Phase 2b: aggregates expansion",
130 KOKKOS_LAMBDA (
const LO i, LO& tmpNumAggregated) {
131 if (aggStat(i) !=
READY || colors(i) != color)
134 auto neighOfINode = graph.getNeighborVertices(i);
135 for (
int j = 0; j < neighOfINode.length; j++) {
136 LO neigh = neighOfINode(j);
140 if (graph.isLocalNeighborVertex(neigh) &&
142 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
143 connectWeight(neigh));
146 int bestScore = -100000;
148 int bestConnect = -1;
150 for (
int j = 0; j < neighOfINode.length; j++) {
151 LO neigh = neighOfINode(j);
153 if (graph.isLocalNeighborVertex(neigh) &&
155 auto aggId = vertex2AggId(neigh, 0);
156 int score = aggWeight(aggId) - aggPenalties(aggId);
158 if (score > bestScore) {
161 bestConnect = connectWeight(neigh);
163 }
else if (aggId == bestAggId &&
164 connectWeight(neigh) > bestConnect) {
165 bestConnect = connectWeight(neigh);
169 if (bestScore >= 0) {
171 vertex2AggId(i, 0) = bestAggId;
172 procWinner(i, 0) = myRank;
174 Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
175 connectWeight(i) = bestConnect - penaltyConnectWeight;
179 numNonAggregatedNodes -= numAggregated;
187 template <
class LO,
class GO,
class Node>
188 void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
189 BuildAggregatesDeterministic(
const ParameterList& params,
190 const LWGraph_kokkos& graph,
191 Aggregates_kokkos& aggregates,
193 LO& numNonAggregatedNodes)
const {
195 const LO numRows = graph.GetNodeNumVertices();
196 const int myRank = graph.GetComm()->getRank();
198 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
199 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
200 auto colors = aggregates.GetGraphColors();
201 const LO numColors = aggregates.GetGraphNumColors();
202 LO numLocalAggregates = aggregates.GetNumAggregates();
204 const int defaultConnectWeight = 100;
205 const int penaltyConnectWeight = 10;
212 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
221 int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
222 if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
223 for (
int iter = 0; iter < maxIters; ++iter) {
224 for(LO color = 1; color <= numColors; color++) {
225 Kokkos::deep_copy(aggWeight, 0);
229 LO numAggregated = 0;
230 Kokkos::parallel_for(
"Aggregation Phase 2b: updating agg weights",
232 KOKKOS_LAMBDA (
const LO i)
234 if (aggStat(i) !=
READY || colors(i) != color)
236 auto neighOfINode = graph.getNeighborVertices(i);
237 for (
int j = 0; j < neighOfINode.length; j++) {
238 LO neigh = neighOfINode(j);
241 if (graph.isLocalNeighborVertex(neigh) &&
243 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
244 connectWeight(neigh));
248 Kokkos::parallel_reduce(
"Aggregation Phase 2b: aggregates expansion",
250 KOKKOS_LAMBDA (
const LO i, LO& tmpNumAggregated)
252 if (aggStat(i) !=
READY || colors(i) != color)
254 int bestScore = -100000;
256 int bestConnect = -1;
258 auto neighOfINode = graph.getNeighborVertices(i);
259 for (
int j = 0; j < neighOfINode.length; j++) {
260 LO neigh = neighOfINode(j);
262 if (graph.isLocalNeighborVertex(neigh) &&
264 auto aggId = vertex2AggId(neigh, 0);
265 int score = aggWeight(aggId) - aggPenalties(aggId);
267 if (score > bestScore) {
270 bestConnect = connectWeight(neigh);
272 }
else if (aggId == bestAggId &&
273 connectWeight(neigh) > bestConnect) {
274 bestConnect = connectWeight(neigh);
278 if (bestScore >= 0) {
280 vertex2AggId(i, 0) = bestAggId;
281 procWinner(i, 0) = myRank;
283 Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
284 connectWeight(i) = bestConnect - penaltyConnectWeight;
289 Kokkos::parallel_for(
"Aggregation Phase 2b: updating agg penalties",
291 KOKKOS_LAMBDA (
const LO agg)
293 aggPenalties(agg) += aggPenaltyUpdates(agg);
294 aggPenaltyUpdates(agg) = 0;
296 numNonAggregatedNodes -= numAggregated;
302 #endif // HAVE_MUELU_KOKKOS_REFACTOR 303 #endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
Namespace for MueLu classes and methods.