Compadre  1.3.3
Compadre_GMLS.cpp
Go to the documentation of this file.
1 #include "Compadre_GMLS.hpp"
6 #include "Compadre_Functors.hpp"
8 
9 #include "KokkosBatched_Gemm_TeamVector_Internal.hpp"
10 
11 namespace Compadre {
12 
13 void GMLS::generatePolynomialCoefficients(const int number_of_batches, const bool keep_coefficients) {
14 
15  compadre_assert_release( (keep_coefficients==false || number_of_batches==1)
16  && "keep_coefficients is set to true, but number of batches exceeds 1.");
17 
18  /*
19  * Generate Quadrature
20  */
22 
23  /*
24  * Operations to Device
25  */
26 
27  // copy over operations
28  _operations = decltype(_operations)("operations", _lro.size());
29  _host_operations = Kokkos::create_mirror_view(_operations);
30 
31  // make sure at least one target operation specified
32  compadre_assert_release((_lro.size() > 0)
33  && "No target operations added to GMLS class before calling generatePolynomialCoefficients().");
34 
35  // loop through list of linear reconstruction operations to be performed and set them on the host
36  for (size_t i=0; i<_lro.size(); ++i) _host_operations(i) = _lro[i];
37 
38  // get copy of operations on the device
39  Kokkos::deep_copy(_operations, _host_operations);
40 
41  // check that if any target sites added, that neighbors_lists has equal rows
43  && "Neighbor lists not set in GMLS class before calling generatePolynomialCoefficients.");
44 
45  // check that if any target sites are greater than zero (could be zero), then there are more than zero source sites
47  && "Source coordinates not set in GMLS class before calling generatePolynomialCoefficients.");
48 
49  /*
50  * Initialize Alphas and Prestencil Weights
51  */
52 
53  // throw an assertion for QR solver incompatibility
54  // TODO: this is a temporary location for this check, in the future the
55  // constraint type could be an object that can check when given a dense_solver_type
58  && "Cannot solve GMLS problems with the NEUMANN_GRAD_SCALAR constraint using QR Factorization.");
59 
60  // calculate the additional size for different constraint problems
63 
64  // initialize all alpha values to be used for taking the dot product with data to get a reconstruction
65  try {
67  int total_added_alphas = _target_coordinates.extent(0)*_added_alpha_size;
68  _alphas = decltype(_alphas)("alphas", (total_neighbors + TO_GLOBAL(total_added_alphas))
70  // this deep copy writes to all theoretically allocated memory,
71  // ensuring that allocation attempted was successful
72  compadre_assert_debug((deep_copy(_alphas, 0.0), true));
73  } catch(std::exception &e) {
74  printf("Insufficient memory to store alphas: \n\n%s", e.what());
75  throw e;
76  }
77 
78  // initialize the prestencil weights that are applied to sampling data to put it into a form
79  // that the GMLS operator will be able to operate on
80  auto sro = _data_sampling_functional;
81  try {
82  _prestencil_weights = decltype(_prestencil_weights)("Prestencil weights",
83  std::pow(2,sro.use_target_site_weights),
84  (sro.transform_type==DifferentEachTarget
85  || sro.transform_type==DifferentEachNeighbor) ?
87  (sro.transform_type==DifferentEachNeighbor) ?
89  (sro.output_rank>0) ?
91  (sro.input_rank>0) ?
92  _global_dimensions : 1);
93  } catch(std::exception &e) {
94  printf("Insufficient memory to store prestencil weights: \n\n%s", e.what());
95  throw e;
96  }
97  Kokkos::fence();
98 
99  /*
100  * Determine if Nontrivial Null Space in Solution
101  */
102 
103  // check whether the sampling function acting on the basis will induce a nontrivial nullspace
104  // an example would be reconstructing from gradient information, which would annihilate constants
106 
107  /*
108  * Determine if Nonstandard Sampling Dimension or Basis Component Dimension
109  */
110 
111 
112  // calculate the dimension of the basis (a vector space on a manifold requires two components, for example)
114 
115  // calculate sampling dimension
117 
118  // effective number of components in the basis
120 
121  // special case for using a higher order for sampling from a polynomial space that are gradients of a scalar polynomial
123  // if the reconstruction is being made with a gradient of a basis, then we want that basis to be one order higher so that
124  // the gradient is consistent with the convergence order expected.
125  _poly_order += 1;
127  }
128 
129  /*
130  * Dimensions
131  */
132 
133  // for tallying scratch space needed for device kernel calls
134  int team_scratch_size_a = 0;
135 
136  // TEMPORARY, take to zero after conversion
137  int team_scratch_size_b = 0;
138  int thread_scratch_size_a = 0;
139  int thread_scratch_size_b = 0;
140 
141  // dimensions that are relevant for each subproblem
142  int max_num_rows = _sampling_multiplier*_max_num_neighbors;
143  int this_num_cols = _basis_multiplier*_NP;
144  int manifold_NP = 0;
145 
147  // these dimensions already calculated differ in the case of manifolds
150  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
151  this_num_cols = _basis_multiplier*max_manifold_NP;
152  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
153  const int max_P_row_size = ((_dimensions-1)*manifold_NP > max_manifold_NP*_total_alpha_values*_basis_multiplier) ? (_dimensions-1)*manifold_NP : max_manifold_NP*_total_alpha_values*_basis_multiplier*_max_evaluation_sites_per_target;
154 
155  /*
156  * Calculate Scratch Space Allocations
157  */
158 
159  team_scratch_size_b += scratch_matrix_right_type::shmem_size(_dimensions-1, _dimensions-1); // G
160  team_scratch_size_b += scratch_matrix_right_type::shmem_size(_dimensions, _dimensions); // PTP matrix
161  team_scratch_size_b += scratch_vector_type::shmem_size( (_dimensions-1)*_max_num_neighbors ); // manifold_gradient
162 
163  team_scratch_size_b += scratch_vector_type::shmem_size(_max_num_neighbors*std::max(_sampling_multiplier,_basis_multiplier)); // t1 work vector for qr
164  team_scratch_size_b += scratch_vector_type::shmem_size(_max_num_neighbors*std::max(_sampling_multiplier,_basis_multiplier)); // t2 work vector for qr
165 
166  team_scratch_size_b += scratch_vector_type::shmem_size(max_P_row_size); // row of P matrix, one for each operator
167  thread_scratch_size_b += scratch_vector_type::shmem_size(max_manifold_NP*_basis_multiplier); // delta, used for each thread
168  thread_scratch_size_b += scratch_vector_type::shmem_size((max_poly_order+1)*_global_dimensions); // temporary space for powers in basis
170  thread_scratch_size_b += scratch_vector_type::shmem_size(_dimensions*_dimensions); // temporary tangent calculations, used for each thread
171  }
172 
173 
174  // allocate data on the device (initialized to zero)
175  _T = Kokkos::View<double*>("tangent approximation",_target_coordinates.extent(0)*_dimensions*_dimensions);
176  _manifold_metric_tensor_inverse = Kokkos::View<double*>("manifold metric tensor inverse",_target_coordinates.extent(0)*(_dimensions-1)*(_dimensions-1));
177  _manifold_curvature_coefficients = Kokkos::View<double*>("manifold curvature coefficients",_target_coordinates.extent(0)*manifold_NP);
178  _manifold_curvature_gradient = Kokkos::View<double*>("manifold curvature gradient",_target_coordinates.extent(0)*(_dimensions-1));
179 
180  } else { // Standard GMLS
181 
182  /*
183  * Calculate Scratch Space Allocations
184  */
185 
186  team_scratch_size_a += scratch_vector_type::shmem_size(max_num_rows); // t1 work vector for qr
187  team_scratch_size_a += scratch_vector_type::shmem_size(max_num_rows); // t2 work vector for qr
188 
189  // row of P matrix, one for each operator
190  // +1 is for the original target site which always gets evaluated
191  team_scratch_size_b += scratch_vector_type::shmem_size(this_num_cols*_total_alpha_values*_max_evaluation_sites_per_target);
192 
193  thread_scratch_size_b += scratch_vector_type::shmem_size(this_num_cols); // delta, used for each thread
194  thread_scratch_size_b += scratch_vector_type::shmem_size((_poly_order+1)*_global_dimensions); // temporary space for powers in basis
195  }
196  _pm.setTeamScratchSize(0, team_scratch_size_a);
197  _pm.setTeamScratchSize(1, team_scratch_size_b);
198  _pm.setThreadScratchSize(0, thread_scratch_size_a);
199  _pm.setThreadScratchSize(1, thread_scratch_size_b);
200 
201  /*
202  * Calculate the size for matrix P and RHS
203  */
204 
205  int RHS_dim_0, RHS_dim_1;
206  getRHSDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, RHS_dim_0, RHS_dim_1);
207 
208  int P_dim_0, P_dim_1;
209  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
210 
211  /*
212  * Allocate Global Device Storage of Data Needed Over Multiple Calls
213  */
214 
215  global_index_type max_batch_size = (_target_coordinates.extent(0) + TO_GLOBAL(number_of_batches) - 1) / TO_GLOBAL(number_of_batches);
216  try {
217  _RHS = Kokkos::View<double*>("RHS", max_batch_size*TO_GLOBAL(RHS_dim_0)*TO_GLOBAL(RHS_dim_1));
218  _P = Kokkos::View<double*>("P", max_batch_size*TO_GLOBAL(P_dim_0)*TO_GLOBAL(P_dim_1));
219  _w = Kokkos::View<double*>("w", max_batch_size*TO_GLOBAL(max_num_rows));
220  } catch (std::exception &e) {
221  printf("Failed to allocate space for RHS, P, and w. Consider increasing number_of_batches: \n\n%s", e.what());
222  throw e;
223  }
224  Kokkos::fence();
225 
226  /*
227  * Calculate Optimal Threads Based On Levels of Parallelism
228  */
229 
232  && "Normal vectors are required for solving GMLS problems with the NEUMANN_GRAD_SCALAR constraint.");
233  }
234 
235 
237  for (int batch_num=0; batch_num<number_of_batches; ++batch_num) {
238 
239  auto this_batch_size = std::min(_target_coordinates.extent(0)-_initial_index_for_batch, max_batch_size);
240  Kokkos::deep_copy(_RHS, 0.0);
241  Kokkos::deep_copy(_P, 0.0);
242  Kokkos::deep_copy(_w, 0.0);
243 
245 
246  /*
247  * MANIFOLD Problems
248  */
249 
250  if (!_orthonormal_tangent_space_provided) { // user did not specify orthonormal tangent directions, so we approximate them first
251  // coarse tangent plane approximation construction of P^T*P
253 
254  // if the user provided the reference outward normal direction, then orient the computed or user provided
255  // outward normal directions in the tangent bundle
257  // use the reference outward normal direction provided by the user to orient
258  // the tangent bundle
260  }
261 
262  // assembles the P*sqrt(weights) matrix and constructs sqrt(weights)*Identity for curvature
263  _pm.CallFunctorWithTeamThreads<AssembleCurvaturePsqrtW>(*this, this_batch_size);
264 
266  // solves P^T*P against P^T*W with LU, stored in P
267  Kokkos::Profiling::pushRegion("Curvature LU Factorization");
268  // batchLU expects layout_left matrix tiles for B
269  // by giving it layout_right matrix tiles with reverse ordered ldb and ndb
270  // it effects a transpose of _P in layout_left
271  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_left,layout_right>(_pm, _RHS.data(), RHS_dim_0, RHS_dim_1, _P.data(), P_dim_1, P_dim_0, manifold_NP, manifold_NP, _max_num_neighbors, this_batch_size);
272  Kokkos::Profiling::popRegion();
273  } else {
274  // solves P*sqrt(weights) against sqrt(weights)*Identity with QR, stored in RHS
275  Kokkos::Profiling::pushRegion("Curvature QR+Pivoting Factorization");
276  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(_pm, _P.data(), P_dim_0, P_dim_1, _RHS.data(), RHS_dim_0, RHS_dim_1, _max_num_neighbors, manifold_NP, _max_num_neighbors, this_batch_size);
277  Kokkos::Profiling::popRegion();
278  }
279 
280  // evaluates targets, applies target evaluation to polynomial coefficients for curvature
282 
283  // Due to converting layout, entries that are assumed zeros may become non-zeros.
284  Kokkos::deep_copy(_P, 0.0);
285 
286  if (batch_num==number_of_batches-1) {
287  // copy tangent bundle from device back to host
288  _host_T = Kokkos::create_mirror_view(_T);
289  Kokkos::deep_copy(_host_T, _T);
290  }
291  }
292 
293  // this time assembling curvature PsqrtW matrix is using a highly accurate approximation of the tangent, previously calculated
294  // assembles the P*sqrt(weights) matrix and constructs sqrt(weights)*Identity for curvature
295  _pm.CallFunctorWithTeamThreads<AssembleCurvaturePsqrtW>(*this, this_batch_size);
296 
298  // solves P^T*P against P^T*W with LU, stored in P
299  Kokkos::Profiling::pushRegion("Curvature LU Factorization");
300  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_left,layout_right>(_pm, _RHS.data(), RHS_dim_0, RHS_dim_1, _P.data(), P_dim_1, P_dim_0, manifold_NP, manifold_NP, _max_num_neighbors, this_batch_size);
301  Kokkos::Profiling::popRegion();
302  } else {
303  // solves P*sqrt(weights) against sqrt(weights)*Identity, stored in RHS
304  Kokkos::Profiling::pushRegion("Curvature QR+Pivoting Factorization");
305  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(_pm, _P.data(), P_dim_0, P_dim_1, _RHS.data(), RHS_dim_0, RHS_dim_1, _max_num_neighbors, manifold_NP, _max_num_neighbors, this_batch_size);
306  Kokkos::Profiling::popRegion();
307  }
308 
309  // evaluates targets, applies target evaluation to polynomial coefficients for curvature
310  _pm.CallFunctorWithTeamThreads<ApplyCurvatureTargets>(*this, this_batch_size);
311  Kokkos::fence();
312 
313  // prestencil weights calculated here. appropriate because:
314  // precedes polynomial reconstruction from data (replaces contents of _RHS)
315  // follows reconstruction of geometry
316  // calculate prestencil weights
318 
319  // Due to converting layout, entried that are assumed zeros may become non-zeros.
320  Kokkos::deep_copy(_P, 0.0);
321 
322  // assembles the P*sqrt(weights) matrix and constructs sqrt(weights)*Identity
323  _pm.CallFunctorWithTeamThreads<AssembleManifoldPsqrtW>(*this, this_batch_size);
324 
325  // solves P*sqrt(weights) against sqrt(weights)*Identity, stored in RHS
327  Kokkos::Profiling::pushRegion("Manifold LU Factorization");
328  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_left,layout_right>(_pm, _RHS.data(), RHS_dim_0, RHS_dim_1, _P.data(), P_dim_1, P_dim_0, this_num_cols, this_num_cols, max_num_rows, this_batch_size);
329  Kokkos::Profiling::popRegion();
330  } else {
331  Kokkos::Profiling::pushRegion("Manifold QR+Pivoting Factorization");
332  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(_pm, _P.data(), P_dim_0, P_dim_1, _RHS.data(), RHS_dim_0, RHS_dim_1, max_num_rows, this_num_cols, max_num_rows, this_batch_size);
333  Kokkos::Profiling::popRegion();
334  }
335  Kokkos::fence();
336 
337  } else {
338 
339  /*
340  * STANDARD GMLS Problems
341  */
342 
343  // assembles the P*sqrt(weights) matrix and constructs sqrt(weights)*Identity
344  _pm.CallFunctorWithTeamThreads<AssembleStandardPsqrtW>(*this, this_batch_size);
345  //_pm.CallFunctorWithTeamThreads<AssembleStandardPsqrtW>(this_batch_size, *this);
346  Kokkos::fence();
347 
348  // solves P*sqrt(weights) against sqrt(weights)*Identity, stored in RHS
350  Kokkos::Profiling::pushRegion("LU Factorization");
351  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_left,layout_right>(_pm, _RHS.data(), RHS_dim_0, RHS_dim_1, _P.data(), P_dim_1, P_dim_0, this_num_cols + added_coeff_size, this_num_cols + added_coeff_size, max_num_rows + _added_alpha_size, this_batch_size);
352  Kokkos::Profiling::popRegion();
353  } else {
354  Kokkos::Profiling::pushRegion("QR+Pivoting Factorization");
356  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(_pm, _RHS.data(), RHS_dim_0, RHS_dim_1, _P.data(), P_dim_1, P_dim_0, this_num_cols + added_coeff_size, this_num_cols + added_coeff_size, max_num_rows + _added_alpha_size, this_batch_size);
357  } else {
358  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(_pm, _P.data(), P_dim_0, P_dim_1, _RHS.data(), RHS_dim_0, RHS_dim_1, max_num_rows, this_num_cols, max_num_rows, this_batch_size);
359  }
360  Kokkos::Profiling::popRegion();
361  }
362 
364  Kokkos::fence();
365  }
366 
367  /*
368  * Calculate Optimal Threads Based On Levels of Parallelism
369  */
370 
371 
373 
374  /*
375  * MANIFOLD Problems
376  */
377 
378  // evaluates targets, applies target evaluation to polynomial coefficients to store in _alphas
379  _pm.CallFunctorWithTeamThreads<ApplyManifoldTargets>(*this, this_batch_size);
380 
381  } else {
382 
383  /*
384  * STANDARD GMLS Problems
385  */
386 
387  // evaluates targets, applies target evaluation to polynomial coefficients to store in _alphas
389 
390  }
391  Kokkos::fence();
392  _initial_index_for_batch += this_batch_size;
393  if ((size_t)_initial_index_for_batch == _target_coordinates.extent(0)) break;
394  } // end of batch loops
395 
396  // deallocate _P and _w
397  _w = Kokkos::View<double*>("w",0);
398  if (number_of_batches > 1) { // no reason to keep coefficients if they aren't all in memory
399  _RHS = Kokkos::View<double*>("RHS",0);
400  _P = Kokkos::View<double*>("P",0);
402  } else {
404  _RHS = Kokkos::View<double*>("RHS", 0);
405  if (!keep_coefficients) _P = Kokkos::View<double*>("P", 0);
406  } else {
408  _P = Kokkos::View<double*>("P", 0);
409  if (!keep_coefficients) _RHS = Kokkos::View<double*>("RHS", 0);
410  } else {
411  _RHS = Kokkos::View<double*>("RHS", 0);
412  if (!keep_coefficients) _P = Kokkos::View<double*>("P", 0);
413  }
414  }
415  if (keep_coefficients) _store_PTWP_inv_PTW = true;
416  }
417 
418  /*
419  * Device to Host Copy Of Solution
420  */
421 
422  // copy computed alphas back to the host
423  _host_alphas = Kokkos::create_mirror_view(_alphas);
425  _host_prestencil_weights = Kokkos::create_mirror_view(_prestencil_weights);
426  Kokkos::deep_copy(_host_prestencil_weights, _prestencil_weights);
427  }
428  Kokkos::deep_copy(_host_alphas, _alphas);
429  Kokkos::fence();
430 
431 
432 }
433 
434 void GMLS::generateAlphas(const int number_of_batches, const bool keep_coefficients) {
435 
436  this->generatePolynomialCoefficients(number_of_batches, keep_coefficients);
437 
438 }
439 
440 
441 KOKKOS_INLINE_FUNCTION
442 void GMLS::operator()(const AssembleStandardPsqrtW&, const member_type& teamMember) const {
443 
444  /*
445  * Dimensions
446  */
447 
448  const int target_index = _initial_index_for_batch + teamMember.league_rank();
449  const int local_index = teamMember.league_rank();
450 
451  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
452  const int this_num_rows = _sampling_multiplier*this->getNNeighbors(target_index);
453  const int this_num_cols = _basis_multiplier*_NP;
454 
455  int RHS_dim_0, RHS_dim_1;
456  getRHSDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, RHS_dim_0, RHS_dim_1);
457  int P_dim_0, P_dim_1;
458  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
459 
460  /*
461  * Data
462  */
463 
464  scratch_matrix_right_type PsqrtW(_P.data()
465  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_0)*TO_GLOBAL(P_dim_1), P_dim_0, P_dim_1);
466  scratch_matrix_right_type RHS(_RHS.data()
467  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_dim_0)*TO_GLOBAL(RHS_dim_1), RHS_dim_0, RHS_dim_1);
468  scratch_vector_type w(_w.data()
469  + TO_GLOBAL(local_index)*TO_GLOBAL(max_num_rows), max_num_rows);
470 
471  // delta, used for each thread
472  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), this_num_cols);
473  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (_poly_order+1)*_global_dimensions);
474 
475  /*
476  * Assemble P*sqrt(W) and sqrt(w)*Identity
477  */
478 
479  // creates the matrix sqrt(W)*P
480  this->createWeightsAndP(teamMember, delta, thread_workspace, PsqrtW, w, _dimensions, _poly_order, true /*weight_p*/, NULL /*&V*/, _reconstruction_space, _polynomial_sampling_functional);
481 
483  // fill in RHS with Identity * sqrt(weights)
484  double * rhs_data = RHS.data();
485  Kokkos::parallel_for(Kokkos::TeamVectorRange(teamMember,this_num_rows), [&] (const int i) {
486  rhs_data[i] = std::sqrt(w(i));
487  });
488  } else {
489  // create global memory for matrix M = PsqrtW^T*PsqrtW
490  // don't need to cast into scratch_matrix_left_type since the matrix is symmetric
492  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_dim_0)*TO_GLOBAL(RHS_dim_1), RHS_dim_0, RHS_dim_1);
493  KokkosBatched::TeamVectorGemmInternal<KokkosBatched::Algo::Gemm::Unblocked>
494  ::invoke(teamMember,
495  this_num_cols, this_num_cols, this_num_rows,
496  1.0,
497  PsqrtW.data(), PsqrtW.stride(1), PsqrtW.stride(0),
498  PsqrtW.data(), PsqrtW.stride(0), PsqrtW.stride(1),
499  0.0,
500  M.data(), M.stride(0), M.stride(1));
501  teamMember.team_barrier();
502 
503  // Multiply PsqrtW with sqrt(W) to get PW
504  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, max_num_rows), [&] (const int i) {
505  for (int j=0; j < this_num_cols; j++) {
506  PsqrtW(i, j) = PsqrtW(i, j)*std::sqrt(w(i));
507  }
508  });
509  teamMember.team_barrier();
510 
511  // conditionally fill in rows determined by constraint type
513  // normal vector is contained in last row of T
516 
517  // Get the number of neighbors for target index
518  int num_neigh_target = this->getNNeighbors(target_index);
519  double cutoff_p = _epsilons(target_index);
520 
521  evaluateConstraints(M, PsqrtW, _constraint_type, _reconstruction_space, _NP, cutoff_p, _dimensions, num_neigh_target, &T);
522  }
523  }
524 }
525 
526 
527 KOKKOS_INLINE_FUNCTION
528 void GMLS::operator()(const ApplyStandardTargets&, const member_type& teamMember) const {
529 
530  /*
531  * Dimensions
532  */
533 
534  const int local_index = teamMember.league_rank();
535 
536  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
537  const int this_num_cols = _basis_multiplier*_NP;
538 
539  int RHS_dim_0, RHS_dim_1;
540  getRHSDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, RHS_dim_0, RHS_dim_1);
541  int P_dim_0, P_dim_1;
542  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
543 
544  /*
545  * Data
546  */
547 
548  // Coefficients for polynomial basis have overwritten _RHS
550  // if (_dense_solver_type != DenseSolverType::LU) {
552  Coeffs = scratch_matrix_right_type(_RHS.data()
553  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_dim_0)*TO_GLOBAL(RHS_dim_1), RHS_dim_0, RHS_dim_1);
554  } else {
555  Coeffs = scratch_matrix_right_type(_P.data()
556  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_1)*TO_GLOBAL(P_dim_0), P_dim_1, P_dim_0);
557  }
558  scratch_vector_type w(_w.data()
559  + TO_GLOBAL(local_index)*TO_GLOBAL(max_num_rows), max_num_rows);
560 
561  scratch_vector_type t1(teamMember.team_scratch(_pm.getTeamScratchLevel(0)), max_num_rows);
562  scratch_vector_type t2(teamMember.team_scratch(_pm.getTeamScratchLevel(0)), max_num_rows);
563  scratch_matrix_right_type P_target_row(teamMember.team_scratch(_pm.getTeamScratchLevel(1)), _total_alpha_values*_max_evaluation_sites_per_target, this_num_cols);
564 
565  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), this_num_cols);
566  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (_poly_order+1)*_global_dimensions);
567 
568  /*
569  * Apply Standard Target Evaluations to Polynomial Coefficients
570  */
571 
572  // get evaluation of target functionals
573  this->computeTargetFunctionals(teamMember, delta, thread_workspace, P_target_row);
574  teamMember.team_barrier();
575 
576  this->applyTargetsToCoefficients(teamMember, t1, t2, Coeffs, w, P_target_row, _NP);
577  teamMember.team_barrier();
578 }
579 
580 
581 KOKKOS_INLINE_FUNCTION
582 void GMLS::operator()(const ComputeCoarseTangentPlane&, const member_type& teamMember) const {
583 
584  /*
585  * Dimensions
586  */
587 
588  const int target_index = _initial_index_for_batch + teamMember.league_rank();
589  const int local_index = teamMember.league_rank();
590 
591  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
593  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
594  const int this_num_cols = _basis_multiplier*max_manifold_NP;
595  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
596 
597  int P_dim_0, P_dim_1;
598  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
599 
600  /*
601  * Data
602  */
603 
604  scratch_matrix_right_type PsqrtW(_P.data()
605  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_0)*TO_GLOBAL(P_dim_1), P_dim_0, P_dim_1);
606  scratch_vector_type w(_w.data()
607  + TO_GLOBAL(local_index)*TO_GLOBAL(max_num_rows), max_num_rows);
608  scratch_matrix_right_type T(_T.data()
610 
611  scratch_matrix_right_type PTP(teamMember.team_scratch(_pm.getTeamScratchLevel(1)), _dimensions, _dimensions);
612 
613  // delta, used for each thread
614  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), max_manifold_NP*_basis_multiplier);
615  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (max_poly_order+1)*_global_dimensions);
616 
617  /*
618  * Determine Coarse Approximation of Manifold Tangent Plane
619  */
620 
621  // getting x y and z from which to derive a manifold
622  this->createWeightsAndPForCurvature(teamMember, delta, thread_workspace, PsqrtW, w, _dimensions, true /* only specific order */);
623 
624  // create PsqrtW^T*PsqrtW
625  KokkosBatched::TeamVectorGemmInternal<KokkosBatched::Algo::Gemm::Unblocked>
626  ::invoke(teamMember,
627  _dimensions, _dimensions, this->getNNeighbors(target_index),
628  1.0,
629  PsqrtW.data(), PsqrtW.stride(1), PsqrtW.stride(0),
630  PsqrtW.data(), PsqrtW.stride(0), PsqrtW.stride(1),
631  0.0,
632  PTP.data(), PTP.stride(0), PTP.stride(1));
633  teamMember.team_barrier();
634 
635  // create coarse approximation of tangent plane in first two rows of T, with normal direction in third column
637  const_cast<pool_type&>(_random_number_pool));
638 
639  teamMember.team_barrier();
640 
641 }
642 
643 KOKKOS_INLINE_FUNCTION
644 void GMLS::operator()(const AssembleCurvaturePsqrtW&, const member_type& teamMember) const {
645 
646  /*
647  * Dimensions
648  */
649 
650  const int target_index = _initial_index_for_batch + teamMember.league_rank();
651  const int local_index = teamMember.league_rank();
652 
653  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
655  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
656  const int this_num_neighbors = this->getNNeighbors(target_index);
657  const int this_num_cols = _basis_multiplier*max_manifold_NP;
658  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
659 
660  int RHS_dim_0, RHS_dim_1;
661  getRHSDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, RHS_dim_0, RHS_dim_1);
662  int P_dim_0, P_dim_1;
663  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
664 
665  /*
666  * Data
667  */
668 
669  scratch_matrix_right_type CurvaturePsqrtW(_P.data()
670  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_0)*TO_GLOBAL(P_dim_1), P_dim_0, P_dim_1);
671  scratch_matrix_right_type RHS(_RHS.data()
672  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_dim_0)*TO_GLOBAL(RHS_dim_1), RHS_dim_0, RHS_dim_1);
673  scratch_vector_type w(_w.data()
674  + TO_GLOBAL(local_index)*TO_GLOBAL(max_num_rows), max_num_rows);
675  scratch_matrix_right_type T(_T.data()
677 
678  // delta, used for each thread
679  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), max_manifold_NP*_basis_multiplier);
680  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (max_poly_order+1)*_global_dimensions);
681 
682 
683  //
684  // RECONSTRUCT ON THE TANGENT PLANE USING LOCAL COORDINATES
685  //
686 
687  // creates the matrix sqrt(W)*P
688  this->createWeightsAndPForCurvature(teamMember, delta, thread_workspace, CurvaturePsqrtW, w, _dimensions-1, false /* only specific order */, &T);
689  teamMember.team_barrier();
690 
691  // CurvaturePsqrtW is sized according to max_num_rows x this_num_cols of which in this case
692  // we are only using this_num_neighbors x manifold_NP
693 
695  // fill in RHS with Identity * sqrt(weights)
696  double * rhs_data = RHS.data();
697  Kokkos::parallel_for(Kokkos::TeamVectorRange(teamMember,this_num_neighbors), [&] (const int i) {
698  rhs_data[i] = std::sqrt(w(i));
699  });
700  } else {
701  // create global memory for matrix M = PsqrtW^T*PsqrtW
702  // don't need to cast into scratch_matrix_left_type since the matrix is symmetric
704  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_dim_0)*TO_GLOBAL(RHS_dim_1), RHS_dim_0, RHS_dim_1);
705  // Assemble matrix M
706  KokkosBatched::TeamVectorGemmInternal<KokkosBatched::Algo::Gemm::Unblocked>
707  ::invoke(teamMember,
708  manifold_NP, manifold_NP, this_num_neighbors,
709  1.0,
710  CurvaturePsqrtW.data(), CurvaturePsqrtW.stride(1), CurvaturePsqrtW.stride(0),
711  CurvaturePsqrtW.data(), CurvaturePsqrtW.stride(0), CurvaturePsqrtW.stride(1),
712  0.0,
713  M.data(), M.stride(0), M.stride(1));
714  teamMember.team_barrier();
715 
716  // Multiply PsqrtW with sqrt(W) to get PW
717  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, this_num_neighbors), [=] (const int i) {
718  for (int j=0; j < manifold_NP; j++) {
719  CurvaturePsqrtW(i, j) = CurvaturePsqrtW(i, j)*std::sqrt(w(i));
720  }
721  });
722  }
723  teamMember.team_barrier();
724 }
725 
726 KOKKOS_INLINE_FUNCTION
727 void GMLS::operator()(const GetAccurateTangentDirections&, const member_type& teamMember) const {
728 
729  /*
730  * Dimensions
731  */
732 
733  const int target_index = _initial_index_for_batch + teamMember.league_rank();
734  const int local_index = teamMember.league_rank();
735 
736  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
738  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
739  const int this_num_cols = _basis_multiplier*max_manifold_NP;
740  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
741 
742  int RHS_dim_0, RHS_dim_1;
743  getRHSDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, RHS_dim_0, RHS_dim_1);
744  int P_dim_0, P_dim_1;
745  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
746 
747  /*
748  * Data
749  */
752  // Solution from QR comes from RHS
754  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_dim_0)*TO_GLOBAL(RHS_dim_1), RHS_dim_0, RHS_dim_1);
755  } else {
756  // Solution from LU comes from P
757  Q = scratch_matrix_right_type(_P.data()
758  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_1)*TO_GLOBAL(P_dim_0), P_dim_1, P_dim_0);
759  }
760 
761  scratch_matrix_right_type T(_T.data()
763 
764  scratch_vector_type manifold_gradient(teamMember.team_scratch(_pm.getTeamScratchLevel(1)), (_dimensions-1)*_max_num_neighbors);
766 
767  // delta, used for each thread
768  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), max_manifold_NP*_basis_multiplier);
769  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (max_poly_order+1)*_global_dimensions);
770 
771  /*
772  * Manifold
773  */
774 
775 
776  //
777  // GET TARGET COEFFICIENTS RELATED TO GRADIENT TERMS
778  //
779  // reconstruct grad_xi1 and grad_xi2, not used for manifold_coeffs
780  this->computeCurvatureFunctionals(teamMember, delta, thread_workspace, P_target_row, &T);
781  teamMember.team_barrier();
782 
783  double grad_xi1 = 0, grad_xi2 = 0;
784  for (int i=0; i<this->getNNeighbors(target_index); ++i) {
785  for (int k=0; k<_dimensions-1; ++k) {
786  double alpha_ij = 0;
787  int offset = getTargetOffsetIndexDevice(0, 0, k, 0);
788  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember,
789  manifold_NP), [=] (const int l, double &talpha_ij) {
790  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
791  talpha_ij += P_target_row(offset,l)*Q(l,i);
792  });
793  }, alpha_ij);
794  teamMember.team_barrier();
795  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
796  manifold_gradient(i*(_dimensions-1) + k) = alpha_ij; // stored staggered, grad_xi1, grad_xi2, grad_xi1, grad_xi2, ....
797  });
798  }
799  teamMember.team_barrier();
800 
801  XYZ rel_coord = getRelativeCoord(target_index, i, _dimensions, &T);
802  double normal_coordinate = rel_coord[_dimensions-1];
803 
804  // apply coefficients to sample data
805  grad_xi1 += manifold_gradient(i*(_dimensions-1)) * normal_coordinate;
806  if (_dimensions>2) grad_xi2 += manifold_gradient(i*(_dimensions-1)+1) * normal_coordinate;
807  teamMember.team_barrier();
808  }
809 
810  // Constructs high order orthonormal tangent space T and inverse of metric tensor
811  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
812 
813  double grad_xi[2] = {grad_xi1, grad_xi2};
814  double T_row[3];
815 
816  // Construct T (high order approximation of orthonormal tangent vectors)
817  for (int i=0; i<_dimensions-1; ++i) {
818  for (int j=0; j<_dimensions; ++j) {
819  T_row[j] = T(i,j);
820  }
821  // build
822  for (int j=0; j<_dimensions; ++j) {
823  T(i,j) = grad_xi[i]*T(_dimensions-1,j);
824  T(i,j) += T_row[j];
825  }
826  }
827 
828  // calculate norm
829  double norm = 0;
830  for (int j=0; j<_dimensions; ++j) {
831  norm += T(0,j)*T(0,j);
832  }
833 
834  // normalize first vector
835  norm = std::sqrt(norm);
836  for (int j=0; j<_dimensions; ++j) {
837  T(0,j) /= norm;
838  }
839 
840  // orthonormalize next vector
841  if (_dimensions-1 == 2) { // 2d manifold
842  double dot_product = T(0,0)*T(1,0) + T(0,1)*T(1,1) + T(0,2)*T(1,2);
843  for (int j=0; j<_dimensions; ++j) {
844  T(1,j) -= dot_product*T(0,j);
845  }
846  // normalize second vector
847  norm = 0;
848  for (int j=0; j<_dimensions; ++j) {
849  norm += T(1,j)*T(1,j);
850  }
851  norm = std::sqrt(norm);
852  for (int j=0; j<_dimensions; ++j) {
853  T(1,j) /= norm;
854  }
855  }
856 
857  // get normal vector to first two rows of T
858  double norm_t_normal = 0;
859  if (_dimensions>2) {
860  T(_dimensions-1,0) = T(0,1)*T(1,2) - T(1,1)*T(0,2);
861  norm_t_normal += T(_dimensions-1,0)*T(_dimensions-1,0);
862  T(_dimensions-1,1) = -(T(0,0)*T(1,2) - T(1,0)*T(0,2));
863  norm_t_normal += T(_dimensions-1,1)*T(_dimensions-1,1);
864  T(_dimensions-1,2) = T(0,0)*T(1,1) - T(1,0)*T(0,1);
865  norm_t_normal += T(_dimensions-1,2)*T(_dimensions-1,2);
866  } else {
867  T(_dimensions-1,0) = T(1,1) - T(0,1);
868  norm_t_normal += T(_dimensions-1,0)*T(_dimensions-1,0);
869  T(_dimensions-1,1) = T(0,0) - T(1,0);
870  norm_t_normal += T(_dimensions-1,1)*T(_dimensions-1,1);
871  }
872  norm_t_normal = std::sqrt(norm_t_normal);
873  for (int i=0; i<_dimensions-1; ++i) {
874  T(_dimensions-1,i) /= norm_t_normal;
875  }
876  });
877  teamMember.team_barrier();
878 }
879 
880 KOKKOS_INLINE_FUNCTION
881 void GMLS::operator()(const FixTangentDirectionOrdering&, const member_type& teamMember) const {
882 
883  /*
884  * Dimensions
885  */
886 
887  const int target_index = _initial_index_for_batch + teamMember.league_rank();
888 
889  /*
890  * Data
891  */
892 
894  scratch_vector_type N(_ref_N.data() + target_index*_dimensions, _dimensions);
895 
896  // take the dot product of the calculated normal in the tangent bundle with a given reference outward normal
897  // direction provided by the user. if the dot product is negative, flip the tangent vector ordering
898  // and flip the sign on the normal direction.
899  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
900  compadre_kernel_assert_debug(_dimensions > 1 && "FixTangentDirectionOrder called on manifold with a dimension of 0.");
901  double dot_product = 0;
902  for (int i=0; i<_dimensions; ++i) {
903  dot_product += T(_dimensions-1,i) * N(i);
904 
905  }
906  if (dot_product < 0) {
907  if (_dimensions==3) {
908  for (int i=0; i<_dimensions; ++i) {
909  // swap tangent directions
910  double tmp = T(0,i);
911  T(0,i) = T(1,i);
912  T(1,i) = tmp;
913  }
914  }
915  for (int i=0; i<_dimensions; ++i) {
916  // flip the sign of the normal vector
917  T(_dimensions-1,i) *= -1;
918 
919  }
920  }
921  });
922  teamMember.team_barrier();
923 }
924 
925 KOKKOS_INLINE_FUNCTION
926 void GMLS::operator()(const ApplyCurvatureTargets&, const member_type& teamMember) const {
927 
928  /*
929  * Dimensions
930  */
931 
932  const int target_index = _initial_index_for_batch + teamMember.league_rank();
933  const int local_index = teamMember.league_rank();
934 
935  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
937  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
938  const int this_num_cols = _basis_multiplier*max_manifold_NP;
939  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
940 
941  int RHS_dim_0, RHS_dim_1;
942  getRHSDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, RHS_dim_0, RHS_dim_1);
943  int P_dim_0, P_dim_1;
944  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
945 
946  /*
947  * Data
948  */
951  // Solution from QR comes from RHS
953  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_dim_0)*TO_GLOBAL(RHS_dim_1), RHS_dim_0, RHS_dim_1);
954  } else {
955  // Solution from LU comes from P
956  Q = scratch_matrix_right_type(_P.data()
957  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_1)*TO_GLOBAL(P_dim_0), P_dim_1, P_dim_0);
958  }
959 
960  scratch_matrix_right_type T(_T.data()
962 
965  scratch_vector_type manifold_coeffs(_manifold_curvature_coefficients.data() + target_index*manifold_NP, manifold_NP);
966  scratch_vector_type manifold_gradient_coeffs(_manifold_curvature_gradient.data() + target_index*(_dimensions-1), (_dimensions-1));
967 
968  scratch_matrix_right_type G(teamMember.team_scratch(_pm.getTeamScratchLevel(1)), _dimensions-1, _dimensions-1);
969  scratch_vector_type manifold_gradient(teamMember.team_scratch(_pm.getTeamScratchLevel(1)), (_dimensions-1)*_max_num_neighbors);
971 
972  // delta, used for each thread
973  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), max_manifold_NP*_basis_multiplier);
974  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (max_poly_order+1)*_global_dimensions);
975 
976  /*
977  * Manifold
978  */
979 
980 
981  //
982  // GET TARGET COEFFICIENTS RELATED TO GRADIENT TERMS
983  //
984  // reconstruct grad_xi1 and grad_xi2, not used for manifold_coeffs
985  this->computeCurvatureFunctionals(teamMember, delta, thread_workspace, P_target_row, &T);
986  teamMember.team_barrier();
987 
988  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
989  for (int j=0; j<manifold_NP; ++j) { // set to zero
990  manifold_coeffs(j) = 0;
991  }
992  });
993  teamMember.team_barrier();
994 
995  double grad_xi1 = 0, grad_xi2 = 0;
996  for (int i=0; i<this->getNNeighbors(target_index); ++i) {
997  for (int k=0; k<_dimensions-1; ++k) {
998  double alpha_ij = 0;
999  int offset = getTargetOffsetIndexDevice(0, 0, k, 0);
1000  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember,
1001  manifold_NP), [=] (const int l, double &talpha_ij) {
1002  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
1003  talpha_ij += P_target_row(offset,l)*Q(l,i);
1004  });
1005  }, alpha_ij);
1006  teamMember.team_barrier();
1007  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1008  manifold_gradient(i*(_dimensions-1) + k) = alpha_ij; // stored staggered, grad_xi1, grad_xi2, grad_xi1, grad_xi2, ....
1009  });
1010  }
1011  teamMember.team_barrier();
1012 
1013  XYZ rel_coord = getRelativeCoord(target_index, i, _dimensions, &T);
1014  double normal_coordinate = rel_coord[_dimensions-1];
1015 
1016  // apply coefficients to sample data
1017  grad_xi1 += manifold_gradient(i*(_dimensions-1)) * normal_coordinate;
1018  if (_dimensions>2) grad_xi2 += manifold_gradient(i*(_dimensions-1)+1) * normal_coordinate;
1019 
1020  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1021  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1022  // coefficients without a target premultiplied
1023  for (int j=0; j<manifold_NP; ++j) {
1024  manifold_coeffs(j) += Q(j,i) * normal_coordinate;
1025  }
1026  });
1027  });
1028  teamMember.team_barrier();
1029  }
1030 
1031  // Constructs high order orthonormal tangent space T and inverse of metric tensor
1032  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1033 
1034  manifold_gradient_coeffs(0) = grad_xi1;
1035  if (_dimensions>2) manifold_gradient_coeffs(1) = grad_xi2;
1036 
1037  // need to get 2x2 matrix of metric tensor
1038  G(0,0) = 1 + grad_xi1*grad_xi1;
1039 
1040  if (_dimensions>2) {
1041  G(0,1) = grad_xi1*grad_xi2;
1042  G(1,0) = grad_xi2*grad_xi1;
1043  G(1,1) = 1 + grad_xi2*grad_xi2;
1044  }
1045 
1046  double G_determinant;
1047  if (_dimensions==2) {
1048  G_determinant = G(0,0);
1049  compadre_kernel_assert_debug((G_determinant!=0) && "Determinant is zero.");
1050  G_inv(0,0) = 1/G_determinant;
1051  } else {
1052  G_determinant = G(0,0)*G(1,1) - G(0,1)*G(1,0); //std::sqrt(G_inv(0,0)*G_inv(1,1) - G_inv(0,1)*G_inv(1,0));
1053  compadre_kernel_assert_debug((G_determinant!=0) && "Determinant is zero.");
1054  {
1055  // inverse of 2x2
1056  G_inv(0,0) = G(1,1)/G_determinant;
1057  G_inv(1,1) = G(0,0)/G_determinant;
1058  G_inv(0,1) = -G(0,1)/G_determinant;
1059  G_inv(1,0) = -G(1,0)/G_determinant;
1060  }
1061  }
1062 
1063  });
1064  teamMember.team_barrier();
1065  //
1066  // END OF MANIFOLD METRIC CALCULATIONS
1067  //
1068 }
1069 
1070 KOKKOS_INLINE_FUNCTION
1071 void GMLS::operator()(const AssembleManifoldPsqrtW&, const member_type& teamMember) const {
1072 
1073  /*
1074  * Dimensions
1075  */
1076 
1077  const int target_index = _initial_index_for_batch + teamMember.league_rank();
1078  const int local_index = teamMember.league_rank();
1079 
1080  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
1082  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
1083  const int this_num_rows = _sampling_multiplier*this->getNNeighbors(target_index);
1084  const int this_num_cols = _basis_multiplier*max_manifold_NP;
1085  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
1086 
1087  int RHS_dim_0, RHS_dim_1;
1088  getRHSDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, RHS_dim_0, RHS_dim_1);
1089  int P_dim_0, P_dim_1;
1090  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
1091 
1092  /*
1093  * Data
1094  */
1095 
1096  scratch_matrix_right_type PsqrtW(_P.data()
1097  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_0)*TO_GLOBAL(P_dim_1), P_dim_0, P_dim_1);
1099  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_dim_0)*TO_GLOBAL(RHS_dim_1), RHS_dim_0, RHS_dim_1);
1100  scratch_vector_type w(_w.data()
1101  + TO_GLOBAL(local_index)*TO_GLOBAL(max_num_rows), max_num_rows);
1102  scratch_matrix_right_type T(_T.data()
1104 
1105  // delta, used for each thread
1106  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), max_manifold_NP*_basis_multiplier);
1107  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (max_poly_order+1)*_global_dimensions);
1108 
1109 
1110  /*
1111  * Manifold
1112  */
1113 
1114  this->createWeightsAndP(teamMember, delta, thread_workspace, PsqrtW, w, _dimensions-1, _poly_order, true /* weight with W*/, &T, _reconstruction_space, _polynomial_sampling_functional);
1115  teamMember.team_barrier();
1116 
1118  // fill in RHS with Identity * sqrt(weights)
1119  double * Q_data = Q.data();
1120  Kokkos::parallel_for(Kokkos::TeamVectorRange(teamMember,this_num_rows), [&] (const int i) {
1121  Q_data[i] = std::sqrt(w(i));
1122  });
1123  } else {
1124  // create global memory for matrix M = PsqrtW^T*PsqrtW
1125  // don't need to cast into scratch_matrix_left_type since the matrix is symmetric
1127  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_dim_0)*TO_GLOBAL(RHS_dim_1), RHS_dim_0, RHS_dim_1);
1128  // Assemble matrix M
1129  KokkosBatched::TeamVectorGemmInternal<KokkosBatched::Algo::Gemm::Unblocked>
1130  ::invoke(teamMember,
1131  this_num_cols, this_num_cols, this_num_rows,
1132  1.0,
1133  PsqrtW.data(), PsqrtW.stride(1), PsqrtW.stride(0),
1134  PsqrtW.data(), PsqrtW.stride(0), PsqrtW.stride(1),
1135  0.0,
1136  M.data(), M.stride(0), M.stride(1));
1137  teamMember.team_barrier();
1138 
1139 
1140  // Multiply PsqrtW with sqrt(W) to get PW
1141  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, max_num_rows), [&] (const int i) {
1142  for (int j=0; j < this_num_cols; j++) {
1143  PsqrtW(i, j) = PsqrtW(i, j)*std::sqrt(w(i));
1144  }
1145  });
1146  }
1147  teamMember.team_barrier();
1148 }
1149 
1150 KOKKOS_INLINE_FUNCTION
1151 void GMLS::operator()(const ApplyManifoldTargets&, const member_type& teamMember) const {
1152 
1153  /*
1154  * Dimensions
1155  */
1156 
1157  const int target_index = _initial_index_for_batch + teamMember.league_rank();
1158  const int local_index = teamMember.league_rank();
1159 
1160  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
1162  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
1163  const int this_num_cols = _basis_multiplier*max_manifold_NP;
1164  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
1165 
1166  int RHS_dim_0, RHS_dim_1;
1167  getRHSDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, RHS_dim_0, RHS_dim_1);
1168  int P_dim_0, P_dim_1;
1169  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
1170 
1171  /*
1172  * Data
1173  */
1174 
1177  Coeffs = scratch_matrix_right_type(_RHS.data()
1178  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_dim_0)*TO_GLOBAL(RHS_dim_1), RHS_dim_0, RHS_dim_1);
1179  } else {
1180  Coeffs = scratch_matrix_right_type(_P.data()
1181  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_1)*TO_GLOBAL(P_dim_0), P_dim_1, P_dim_0);
1182  }
1183  scratch_vector_type w(_w.data()
1184  + TO_GLOBAL(local_index)*TO_GLOBAL(max_num_rows), max_num_rows);
1185  scratch_matrix_right_type T(_T.data()
1187 
1189  + TO_GLOBAL(target_index)*TO_GLOBAL((_dimensions-1))*TO_GLOBAL((_dimensions-1)), _dimensions-1, _dimensions-1);
1190  scratch_vector_type manifold_coeffs(_manifold_curvature_coefficients.data() + target_index*manifold_NP, manifold_NP);
1191  scratch_vector_type manifold_gradient_coeffs(_manifold_curvature_gradient.data() + target_index*(_dimensions-1), (_dimensions-1));
1192 
1196 
1197  // delta, used for each thread
1198  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), max_manifold_NP*_basis_multiplier);
1199  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (max_poly_order+1)*_global_dimensions);
1200 
1201  /*
1202  * Apply Standard Target Evaluations to Polynomial Coefficients
1203  */
1204 
1205  this->computeTargetFunctionalsOnManifold(teamMember, delta, thread_workspace, P_target_row, T, G_inv, manifold_coeffs, manifold_gradient_coeffs);
1206  teamMember.team_barrier();
1207 
1208  this->applyTargetsToCoefficients(teamMember, t1, t2, Coeffs, w, P_target_row, _NP);
1209 
1210  teamMember.team_barrier();
1211 }
1212 
1213 
1214 KOKKOS_INLINE_FUNCTION
1215 void GMLS::operator()(const ComputePrestencilWeights&, const member_type& teamMember) const {
1216 
1217  /*
1218  * Dimensions
1219  */
1220 
1221  const int target_index = _initial_index_for_batch + teamMember.league_rank();
1222  const int local_index = teamMember.league_rank();
1223 
1224  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
1226  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
1227  const int this_num_cols = _basis_multiplier*max_manifold_NP;
1228  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
1229 
1230  int RHS_dim_0, RHS_dim_1;
1231  getRHSDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, RHS_dim_0, RHS_dim_1);
1232  int P_dim_0, P_dim_1;
1233  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
1234 
1235  /*
1236  * Data
1237  */
1238 
1239 
1242 
1243  // holds polynomial coefficients for curvature reconstruction
1246  // Solution from QR comes from RHS
1247  Q = scratch_matrix_right_type(_RHS.data()
1248  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_dim_0)*TO_GLOBAL(RHS_dim_1), RHS_dim_0, RHS_dim_1);
1249  } else {
1250  // Solution from LU comes from P
1251  Q = scratch_matrix_right_type(_P.data()
1252  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_1)*TO_GLOBAL(P_dim_0), P_dim_1, P_dim_0);
1253  }
1254 
1255  scratch_matrix_right_type T(_T.data()
1257 
1258  scratch_vector_type manifold_gradient(teamMember.team_scratch(_pm.getTeamScratchLevel(1)), (_dimensions-1)*_max_num_neighbors);
1260 
1261  /*
1262  * Prestencil Weight Calculations
1263  */
1264 
1266  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1267  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
1268  _prestencil_weights(0,0,0,0,0) = -1;
1269  _prestencil_weights(1,0,0,0,0) = 1;
1270  });
1271  });
1273  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1274  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
1275  for (int j=0; j<_dimensions; ++j) {
1276  for (int k=0; k<_dimensions-1; ++k) {
1277  _prestencil_weights(0,target_index,0,k,j) = T(k,j);
1278  }
1279  }
1280  });
1281  });
1283  compadre_kernel_assert_debug(_problem_type==ProblemType::MANIFOLD && "StaggeredEdgeIntegralSample prestencil weight only written for manifolds.");
1284  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,this->getNNeighbors(target_index)), [=] (const int m) {
1285  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
1286  for (int quadrature = 0; quadrature<_qm.getNumberOfQuadraturePoints(); ++quadrature) {
1287  XYZ tangent_quadrature_coord_2d;
1288  for (int j=0; j<_dimensions-1; ++j) {
1289  tangent_quadrature_coord_2d[j] = getTargetCoordinate(target_index, j, &T);
1290  tangent_quadrature_coord_2d[j] -= getNeighborCoordinate(target_index, m, j, &T);
1291  }
1292  double tangent_vector[3];
1293  tangent_vector[0] = tangent_quadrature_coord_2d[0]*T(0,0) + tangent_quadrature_coord_2d[1]*T(1,0);
1294  tangent_vector[1] = tangent_quadrature_coord_2d[0]*T(0,1) + tangent_quadrature_coord_2d[1]*T(1,1);
1295  tangent_vector[2] = tangent_quadrature_coord_2d[0]*T(0,2) + tangent_quadrature_coord_2d[1]*T(1,2);
1296 
1297  for (int j=0; j<_dimensions; ++j) {
1298  _prestencil_weights(0,target_index,m,0,j) += (1-_qm.getSite(quadrature,0))*tangent_vector[j]*_qm.getWeight(quadrature);
1299  _prestencil_weights(1,target_index,m,0,j) += _qm.getSite(quadrature,0)*tangent_vector[j]*_qm.getWeight(quadrature);
1300  }
1301  }
1302  });
1303  });
1305 
1306  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), manifold_NP);
1307  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (max_poly_order+1)*_global_dimensions);
1308 
1309  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,this->getNNeighbors(target_index)), [&] (const int m) {
1310 
1311  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
1312  this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, m, 0 /*alpha*/, 0 /*partial_direction*/, _dimensions-1, _curvature_poly_order, false /*specific order only*/, &T, ReconstructionSpace::ScalarTaylorPolynomial, PointSample);
1313  });
1314  // reconstructs gradient at local neighbor index m
1315  double grad_xi1 = 0, grad_xi2 = 0;
1316  Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember,this->getNNeighbors(target_index)), [=] (const int i, double &t_grad_xi1) {
1317  double alpha_ij = 0;
1318  for (int l=0; l<manifold_NP; ++l) {
1319  alpha_ij += delta(l)*Q(l,i);
1320  }
1321  XYZ rel_coord = getRelativeCoord(target_index, i, _dimensions, &T);
1322  double normal_coordinate = rel_coord[_dimensions-1];
1323 
1324  // apply coefficients to sample data
1325  t_grad_xi1 += alpha_ij * normal_coordinate;
1326  }, grad_xi1);
1327  t1(m) = grad_xi1;
1328 
1329  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
1330  this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, m, 0 /*alpha*/, 1 /*partial_direction*/, _dimensions-1, _curvature_poly_order, false /*specific order only*/, &T, ReconstructionSpace::ScalarTaylorPolynomial, PointSample);
1331  });
1332  Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember,this->getNNeighbors(target_index)), [=] (const int i, double &t_grad_xi2) {
1333  double alpha_ij = 0;
1334  for (int l=0; l<manifold_NP; ++l) {
1335  alpha_ij += delta(l)*Q(l,i);
1336  }
1337  XYZ rel_coord = getRelativeCoord(target_index, i, _dimensions, &T);
1338  double normal_coordinate = rel_coord[_dimensions-1];
1339 
1340  // apply coefficients to sample data
1341  if (_dimensions>2) t_grad_xi2 += alpha_ij * normal_coordinate;
1342  }, grad_xi2);
1343  t2(m) = grad_xi2;
1344  });
1345 
1346  teamMember.team_barrier();
1347 
1348  scratch_matrix_right_type tangent(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), _dimensions-1, _dimensions);
1349 
1350  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,this->getNNeighbors(target_index)), [=] (const int m) {
1351  // constructs tangent vector at neighbor site
1352  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
1353  for (int j=0; j<_dimensions; ++j) {
1354  tangent(0,j) = t1(m)*T(_dimensions-1,j) + T(0,j);
1355  tangent(1,j) = t2(m)*T(_dimensions-1,j) + T(1,j);
1356  }
1357 
1358  // calculate norm
1359  double norm = 0;
1360  for (int j=0; j<_dimensions; ++j) {
1361  norm += tangent(0,j)*tangent(0,j);
1362  }
1363 
1364  // normalize first vector
1365  norm = std::sqrt(norm);
1366  for (int j=0; j<_dimensions; ++j) {
1367  tangent(0,j) /= norm;
1368  }
1369 
1370  // orthonormalize next vector
1371  if (_dimensions-1 == 2) { // 2d manifold
1372  double dot_product = tangent(0,0)*tangent(1,0) + tangent(0,1)*tangent(1,1) + tangent(0,2)*tangent(1,2);
1373  for (int j=0; j<_dimensions; ++j) {
1374  tangent(1,j) -= dot_product*tangent(0,j);
1375  }
1376  // normalize second vector
1377  norm = 0;
1378  for (int j=0; j<_dimensions; ++j) {
1379  norm += tangent(1,j)*tangent(1,j);
1380  }
1381  norm = std::sqrt(norm);
1382  for (int j=0; j<_dimensions; ++j) {
1383  tangent(1,j) /= norm;
1384  }
1385  }
1386 
1387  // stores matrix of tangent and normal directions as a prestencil weight
1388  for (int j=0; j<_dimensions; ++j) {
1389  for (int k=0; k<_dimensions-1; ++k) {
1390  _prestencil_weights(0,target_index,m,k,j) = tangent(k,j);
1391  }
1392  }
1393  });
1394  });
1395  }
1396  teamMember.team_barrier();
1397 }
1398 
1399 } // Compadre
Kokkos::View< double * > _manifold_metric_tensor_inverse
metric tensor inverse for all problems
Kokkos::View< double *, layout_right > _alphas
generated alpha coefficients (device)
std::size_t global_index_type
KOKKOS_INLINE_FUNCTION int getAdditionalAlphaSizeFromConstraint(DenseSolverType dense_solver_type, ConstraintType constraint_type)
Kokkos::View< const double *****, layout_right >::HostMirror _host_prestencil_weights
generated weights for nontraditional samples required to transform data into expected sampling functi...
Kokkos::View< TargetOperation * > _operations
vector containing target functionals to be applied for reconstruction problem (device) ...
bool _entire_batch_computed_at_once
whether entire calculation was computed at once the alternative is that it was broken up over many sm...
KOKKOS_INLINE_FUNCTION double getNeighborCoordinate(const int target_index, const int neighbor_list_num, const int dim, const scratch_matrix_right_type *V=NULL) const
Returns one component of the neighbor coordinate for a particular target.
int _max_evaluation_sites_per_target
maximum number of evaluation sites for each target (includes target site)
Kokkos::View< double * > _manifold_curvature_coefficients
curvature polynomial coefficients for all problems
KOKKOS_INLINE_FUNCTION void createWeightsAndP(const member_type &teamMember, scratch_vector_type delta, scratch_vector_type thread_workspace, scratch_matrix_right_type P, scratch_vector_type w, const int dimension, int polynomial_order, bool weight_p=false, scratch_matrix_right_type *V=NULL, const ReconstructionSpace reconstruction_space=ReconstructionSpace::ScalarTaylorPolynomial, const SamplingFunctional sampling_strategy=PointSample) const
Fills the _P matrix with either P or P*sqrt(w)
void generatePolynomialCoefficients(const int number_of_batches=1, const bool keep_coefficients=false)
Generates polynomial coefficients by setting up and solving least squares problems ! Sets up the batc...
Tag for functor to evaluate curvature targets and apply to coefficients of curvature reconstruction...
Kokkos::View< double * > _ref_N
Rank 2 tensor for high order approximation of tangent vectors for all problems.
KOKKOS_INLINE_FUNCTION void calcGradientPij(const member_type &teamMember, double *delta, double *thread_workspace, const int target_index, int neighbor_index, const double alpha, const int partial_direction, const int dimension, const int poly_order, bool specific_order_only, const scratch_matrix_right_type *V, const ReconstructionSpace reconstruction_space, const SamplingFunctional sampling_strategy, const int additional_evaluation_local_index=0) const
Evaluates the gradient of a polynomial basis under the Dirac Delta (pointwise) sampling function...
void CallFunctorWithTeamThreads(C functor, const global_index_type batch_size) const
Calls a parallel_for parallel_for will break out over loops over teams with each thread executing cod...
constexpr SamplingFunctional ManifoldVectorPointSample
Point evaluations of the entire vector source function (but on a manifold, so it includes a transform...
template void batchQRPivotingSolve< layout_right, layout_left, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int)
bool _reference_outward_normal_direction_provided
whether or not the reference outward normal directions were provided by the user. ...
KOKKOS_INLINE_FUNCTION double getSite(const int index, const int component) const
team_policy::member_type member_type
KOKKOS_INLINE_FUNCTION int getAdditionalCoeffSizeFromConstraintAndSpace(DenseSolverType dense_solver_type, ConstraintType constraint_type, ReconstructionSpace reconstruction_space, const int dimension)
Neumann Gradient Scalar Type.
int _NP
dimension of basis for polynomial reconstruction
int _total_alpha_values
used for sizing P_target_row and the _alphas view
Kokkos::View< double * > _epsilons
h supports determined through neighbor search (device)
bool _orthonormal_tangent_space_provided
whether or not the orthonormal tangent directions were provided by the user.
KOKKOS_INLINE_FUNCTION void largestTwoEigenvectorsThreeByThreeSymmetric(const member_type &teamMember, scratch_matrix_right_type V, scratch_matrix_right_type PtP, const int dimensions, pool_type &random_number_pool)
Calculates two eigenvectors corresponding to two dominant eigenvalues.
bool _use_reference_outward_normal_direction_provided_to_orient_surface
whether or not to use reference outward normal directions to orient the surface in a manifold problem...
Kokkos::View< double * > _RHS
sqrt(w)*Identity matrix for all problems, later holds polynomial coefficients for all problems ...
Kokkos::View< double * > _w
contains weights for all problems
KOKKOS_INLINE_FUNCTION int getTeamScratchLevel(const int level) const
int _sampling_multiplier
actual dimension of the sampling functional e.g.
void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team=-1, const int vector_lanes_per_thread=-1) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
int _curvature_poly_order
order of basis for curvature reconstruction
constexpr SamplingFunctional VaryingManifoldVectorPointSample
For integrating polynomial dotted with normal over an edge.
Kokkos::View< double **, layout_right > _target_coordinates
coordinates for target sites for reconstruction (device)
Each target applies a different data transform, but the same to each neighbor.
int _data_sampling_multiplier
effective dimension of the data sampling functional e.g.
Each target applies a different transform for each neighbor.
NeighborLists< Kokkos::View< int * > > _neighbor_lists
Accessor to get neighbor list data, offset data, and number of neighbors per target.
Quadrature _qm
manages and calculates quadrature
Kokkos::View< double * >::HostMirror _host_T
tangent vectors information (host)
KOKKOS_INLINE_FUNCTION double getTargetCoordinate(const int target_index, const int dim, const scratch_matrix_right_type *V=NULL) const
Returns one component of the target coordinate for a particular target.
std::vector< TargetOperation > _lro
vector of user requested target operations
constexpr SamplingFunctional StaggeredEdgeAnalyticGradientIntegralSample
Analytical integral of a gradient source vector is just a difference of the scalar source at neighbor...
KOKKOS_INLINE_FUNCTION int getThreadScratchLevel(const int level) const
Scalar polynomial basis centered at the target site and scaled by sum of basis powers e...
Solve GMLS problem on a manifold (will use QR or SVD to solve the resultant GMLS problem dependent on...
int _max_num_neighbors
maximum number of neighbors over all target sites
template void batchQRPivotingSolve< layout_right, layout_right, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int)
const SamplingFunctional _data_sampling_functional
generally the same as _polynomial_sampling_functional, but can differ if specified at GMLS class inst...
Tag for functor to evaluate targets, apply target evaluation to polynomial coefficients to store in _...
KOKKOS_INLINE_FUNCTION int getNNeighbors(const int target_index) const
Returns number of neighbors for a particular target.
int calculateBasisMultiplier(const ReconstructionSpace rs) const
Calculate basis_multiplier.
DenseSolverType _dense_solver_type
solver type for GMLS problem - can be QR, SVD or LU
QR+Pivoting factorization performed on P*sqrt(w) matrix.
Tag for functor to create a coarse tangent approximation from a given neighborhood of points...
pool_type _random_number_pool
Kokkos::View< double * > _manifold_curvature_gradient
_dimension-1 gradient values for curvature for all problems
Kokkos::View< double *****, layout_right > _prestencil_weights
generated weights for nontraditional samples required to transform data into expected sampling functi...
#define TO_GLOBAL(variable)
int _initial_index_for_batch
initial index for current batch
KOKKOS_INLINE_FUNCTION int getTargetOffsetIndexDevice(const int lro_num, const int input_component, const int output_component, const int additional_evaluation_local_index=0) const
Handles offset from operation input/output + extra evaluation sites.
static KOKKOS_INLINE_FUNCTION int getNP(const int m, const int dimension=3, const ReconstructionSpace r_space=ReconstructionSpace::ScalarTaylorPolynomial)
Returns size of the basis for a given polynomial order and dimension General to dimension 1...
constexpr SamplingFunctional StaggeredEdgeIntegralSample
Samples consist of the result of integrals of a vector dotted with the tangent along edges between ne...
const SamplingFunctional _polynomial_sampling_functional
polynomial sampling functional used to construct P matrix, set at GMLS class instantiation ...
Tag for functor to assemble the P*sqrt(weights) matrix and construct sqrt(weights)*Identity.
Tag for functor to calculate prestencil weights to apply to data to transform into a format expected ...
KOKKOS_INLINE_FUNCTION void getPDims(DenseSolverType dense_solver_type, ConstraintType constraint_type, ReconstructionSpace reconstruction_space, const int dimension, const int M, const int N, int &out_row, int &out_col)
Kokkos::View< double **, layout_right, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_right_type
Kokkos::View< double *, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_vector_type
KOKKOS_INLINE_FUNCTION int getNumberOfQuadraturePoints() const
int getOutputDimensionOfSampling(SamplingFunctional sro) const
Dimensions ^ output rank for sampling operation (always in local chart if on a manifold, never ambient space)
Kokkos::View< double **, layout_right > _source_coordinates
all coordinates for the source for which _neighbor_lists refers (device)
void generateAlphas(const int number_of_batches=1, const bool keep_coefficients=false)
Meant to calculate target operations and apply the evaluations to the previously ! constructed polyno...
int _dimension_of_quadrature_points
dimension of quadrature rule
ParallelManager _pm
determines scratch level spaces and is used to call kernels
KOKKOS_INLINE_FUNCTION XYZ getRelativeCoord(const int target_index, const int neighbor_list_num, const int dimension, const scratch_matrix_right_type *V=NULL) const
Returns the relative coordinate as a vector between the target site and the neighbor site...
int _added_alpha_size
additional alpha coefficients due to constraints
ProblemType _problem_type
problem type for GMLS problem, can also be set to STANDARD for normal or MANIFOLD for manifold proble...
int _basis_multiplier
dimension of the reconstructed function e.g.
int _global_dimensions
spatial dimension of the points, set at class instantiation only
Kokkos::View< double * > _T
Rank 3 tensor for high order approximation of tangent vectors for all problems.
KOKKOS_INLINE_FUNCTION void computeTargetFunctionals(const member_type &teamMember, scratch_vector_type t1, scratch_vector_type t2, scratch_matrix_right_type P_target_row) const
Evaluates a polynomial basis with a target functional applied to each member of the basis...
global_index_type getTotalNeighborsOverAllListsHost() const
Get the sum of the number of neighbors of all targets&#39; neighborhoods (host)
void setTeamScratchSize(const int level, const int value)
LU factorization performed on P^T*W*P matrix.
bool _store_PTWP_inv_PTW
whether polynomial coefficients were requested to be stored (in a state not yet applied to data) ...
void setThreadScratchSize(const int level, const int value)
ReconstructionSpace _reconstruction_space
reconstruction space for GMLS problems, set at GMLS class instantiation
Kokkos::View< const double *, layout_right >::HostMirror _host_alphas
generated alpha coefficients (host)
Kokkos::View< double * > _P
P*sqrt(w) matrix for all problems.
Tag for functor to evaluate targets, apply target evaluation to polynomial coefficients to store in _...
Tag for functor to assemble the P*sqrt(weights) matrix and construct sqrt(weights)*Identity.
KOKKOS_INLINE_FUNCTION void getRHSDims(DenseSolverType dense_solver_type, ConstraintType constraint_type, ReconstructionSpace reconstruction_space, const int dimension, const int M, const int N, int &RHS_row, int &RHS_col)
constexpr SamplingFunctional PointSample
Available sampling functionals.
int _dimensions
dimension of the problem, set at class instantiation only
std::string _quadrature_type
quadrature rule type
Kokkos::View< TargetOperation * >::HostMirror _host_operations
vector containing target functionals to be applied for reconstruction problem (host) ...
bool _nontrivial_nullspace
whether or not operator to be inverted for GMLS problem has a nontrivial nullspace (requiring SVD) ...
KOKKOS_INLINE_FUNCTION void applyTargetsToCoefficients(const member_type &teamMember, scratch_vector_type t1, scratch_vector_type t2, scratch_matrix_right_type Q, scratch_vector_type w, scratch_matrix_right_type P_target_row, const int target_NP) const
Helper function for applying the evaluations from a target functional to the polynomial coefficients...
Tag for functor to assemble the P*sqrt(weights) matrix and construct sqrt(weights)*Identity for curva...
bool nontrivial_nullspace
Whether the SamplingFunctional + ReconstructionSpace results in a nontrivial nullspace.
#define compadre_assert_debug(condition)
compadre_assert_debug is used for assertions that are checked in loops, as these significantly impact...
KOKKOS_INLINE_FUNCTION void operator()(const AssembleStandardPsqrtW &, const member_type &teamMember) const
Functor to assemble the P*sqrt(weights) matrix and construct sqrt(weights)*Identity.
int calculateSamplingMultiplier(const ReconstructionSpace rs, const SamplingFunctional sro) const
Calculate sampling_multiplier.
ConstraintType _constraint_type
constraint type for GMLS problem
KOKKOS_INLINE_FUNCTION void computeCurvatureFunctionals(const member_type &teamMember, scratch_vector_type t1, scratch_vector_type t2, scratch_matrix_right_type P_target_row, const scratch_matrix_right_type *V, const local_index_type local_neighbor_index=-1) const
Evaluates a polynomial basis for the curvature with a gradient target functional applied.
int _local_dimensions
dimension of the problem, set at class instantiation only. For manifolds, generally _global_dimension...
int _order_of_quadrature_points
order of exact polynomial integration for quadrature rule
KOKKOS_INLINE_FUNCTION void evaluateConstraints(scratch_matrix_right_type M, scratch_matrix_right_type PsqrtW, const ConstraintType constraint_type, const ReconstructionSpace reconstruction_space, const int NP, const double cutoff_p, const int dimension, const int num_neighbors=0, scratch_matrix_right_type *T=NULL)
#define compadre_assert_release(condition)
compadre_assert_release is used for assertions that should always be checked, but generally are not e...
Tag for functor to determine if tangent directions need reordered, and to reorder them if needed...
Tag for functor to evaluate curvature targets and construct accurate tangent direction approximation ...
KOKKOS_INLINE_FUNCTION void computeTargetFunctionalsOnManifold(const member_type &teamMember, scratch_vector_type t1, scratch_vector_type t2, scratch_matrix_right_type P_target_row, scratch_matrix_right_type V, scratch_matrix_right_type G_inv, scratch_vector_type curvature_coefficients, scratch_vector_type curvature_gradients) const
Evaluates a polynomial basis with a target functional applied, using information from the manifold cu...
KOKKOS_INLINE_FUNCTION double getWeight(const int index) const
int _poly_order
order of basis for polynomial reconstruction
KOKKOS_INLINE_FUNCTION int getNumberOfTargets() const
Get number of total targets having neighborhoods (host/device).
#define compadre_kernel_assert_debug(condition)
KOKKOS_INLINE_FUNCTION void createWeightsAndPForCurvature(const member_type &teamMember, scratch_vector_type delta, scratch_vector_type thread_workspace, scratch_matrix_right_type P, scratch_vector_type w, const int dimension, bool only_specific_order, scratch_matrix_right_type *V=NULL) const
Fills the _P matrix with P*sqrt(w) for use in solving for curvature.