Intrepid2
Intrepid2_TensorBasis.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ************************************************************************
3 //
4 // Intrepid2 Package
5 // Copyright (2007) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Kyungjoo Kim (kyukim@sandia.gov),
38 // Mauro Perego (mperego@sandia.gov), or
39 // Nate Roberts (nvrober@sandia.gov)
40 //
41 // ************************************************************************
42 // @HEADER
43 
49 #ifndef Intrepid2_TensorBasis_h
50 #define Intrepid2_TensorBasis_h
51 
52 #include <Kokkos_View.hpp>
53 #include <Kokkos_DynRankView.hpp>
54 
55 #include <Teuchos_RCP.hpp>
56 
57 #include <Intrepid2_config.h>
58 
59 #include <map>
60 #include <set>
61 #include <vector>
62 
63 #include "Intrepid2_Basis.hpp"
67 #include "Intrepid2_Utils.hpp" // defines FAD_VECTOR_SIZE, VECTOR_SIZE
68 
69 namespace Intrepid2
70 {
71  template<ordinal_type spaceDim>
72  KOKKOS_INLINE_FUNCTION
73  void getDkEnumerationInverse(Kokkos::Array<int,spaceDim> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder);
74 
75  template<>
76  KOKKOS_INLINE_FUNCTION
77  void getDkEnumerationInverse<1>(Kokkos::Array<int,1> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
78  {
79  entries[0] = operatorOrder;
80  }
81 
82  template<>
83  KOKKOS_INLINE_FUNCTION
84  void getDkEnumerationInverse<2>(Kokkos::Array<int,2> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
85  {
86  entries[0] = operatorOrder - dkEnum;
87  entries[1] = dkEnum;
88  }
89 
90  template<>
91  KOKKOS_INLINE_FUNCTION
92  void getDkEnumerationInverse<3>(Kokkos::Array<int,3> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
93  {
94  // formula is zMult + (yMult+zMult)*(yMult+zMult+1)/2; where xMult+yMult+zMult = operatorOrder
95  // it seems complicated to work out a formula that will invert this. For the present we just take a brute force approach,
96  // using getDkEnumeration() to check each possibility
97  for (ordinal_type yMult=0; yMult<=operatorOrder; yMult++)
98  {
99  for (ordinal_type zMult=0; zMult<=operatorOrder-yMult; zMult++)
100  {
101  const ordinal_type xMult = operatorOrder-(zMult+yMult);
102  if (dkEnum == getDkEnumeration<3>(xMult,yMult,zMult))
103  {
104  entries[0] = xMult;
105  entries[1] = yMult;
106  entries[2] = zMult;
107  }
108  }
109  }
110  }
111 
112  template<ordinal_type spaceDim>
113  ordinal_type getDkEnumeration(Kokkos::Array<int,spaceDim> &entries);
114 
115  template<>
116  inline ordinal_type getDkEnumeration<1>(Kokkos::Array<int,1> &entries)
117  {
118  return getDkEnumeration<1>(entries[0]);
119  }
120 
121  template<>
122  inline ordinal_type getDkEnumeration<2>(Kokkos::Array<int,2> &entries)
123  {
124  return getDkEnumeration<2>(entries[0],entries[1]);
125  }
126 
127  template<>
128  inline ordinal_type getDkEnumeration<3>(Kokkos::Array<int,3> &entries)
129  {
130  return getDkEnumeration<3>(entries[0],entries[1],entries[2]);
131  }
132 
133  template<ordinal_type spaceDim1, ordinal_type spaceDim2>
134  inline ordinal_type getDkTensorIndex(const ordinal_type dkEnum1, const ordinal_type operatorOrder1,
135  const ordinal_type dkEnum2, const ordinal_type operatorOrder2)
136  {
137  Kokkos::Array<int,spaceDim1> entries1;
138  getDkEnumerationInverse<spaceDim1>(entries1, dkEnum1, operatorOrder1);
139 
140  Kokkos::Array<int,spaceDim2> entries2;
141  getDkEnumerationInverse<spaceDim2>(entries2, dkEnum2, operatorOrder2);
142 
143  const int spaceDim = spaceDim1 + spaceDim2;
144  Kokkos::Array<int,spaceDim> entries;
145 
146  for (ordinal_type d=0; d<spaceDim1; d++)
147  {
148  entries[d] = entries1[d];
149  }
150 
151  for (ordinal_type d=0; d<spaceDim2; d++)
152  {
153  entries[d+spaceDim1] = entries2[d];
154  }
155 
156  return getDkEnumeration<spaceDim>(entries);
157  }
158 
159 template<typename BasisBase>
161 
166 {
167  // if we want to make this usable on device, we could switch to Kokkos::Array instead of std::vector. But this is not our immediate use case.
168  std::vector< std::vector<EOperator> > ops; // outer index: vector entry ordinal; inner index: basis component ordinal. (scalar-valued operators have a single entry in outer vector)
169  std::vector<double> weights; // weights for each vector entry
170  ordinal_type numBasisComponents_;
171 
172  OperatorTensorDecomposition(const std::vector<EOperator> &opsBasis1, const std::vector<EOperator> &opsBasis2, const std::vector<double> vectorComponentWeights)
173  :
174  weights(vectorComponentWeights),
175  numBasisComponents_(2)
176  {
177  const ordinal_type size = opsBasis1.size();
178  const ordinal_type opsBasis2Size = opsBasis2.size();
179  const ordinal_type weightsSize = weights.size();
180  INTREPID2_TEST_FOR_EXCEPTION(size != opsBasis2Size, std::invalid_argument, "opsBasis1.size() != opsBasis2.size()");
181  INTREPID2_TEST_FOR_EXCEPTION(size != weightsSize, std::invalid_argument, "opsBasis1.size() != weights.size()");
182 
183  for (ordinal_type i=0; i<size; i++)
184  {
185  ops.push_back(std::vector<EOperator>{opsBasis1[i],opsBasis2[i]});
186  }
187  }
188 
189  OperatorTensorDecomposition(const std::vector< std::vector<EOperator> > &vectorEntryOps, const std::vector<double> &vectorComponentWeights)
190  :
191  ops(vectorEntryOps),
192  weights(vectorComponentWeights)
193  {
194  const ordinal_type numVectorComponents = ops.size();
195  const ordinal_type weightsSize = weights.size();
196  INTREPID2_TEST_FOR_EXCEPTION(numVectorComponents != weightsSize, std::invalid_argument, "opsBasis1.size() != weights.size()");
197 
198  INTREPID2_TEST_FOR_EXCEPTION(numVectorComponents == 0, std::invalid_argument, "must have at least one entry!");
199 
200  ordinal_type numBases = 0;
201  for (ordinal_type i=0; i<numVectorComponents; i++)
202  {
203  if (numBases == 0)
204  {
205  numBases = ops[i].size();
206  }
207  else if (ops[i].size() != 0)
208  {
209  const ordinal_type opsiSize = ops[i].size();
210  INTREPID2_TEST_FOR_EXCEPTION(numBases != opsiSize, std::invalid_argument, "must have one operator for each basis in each nontrivial entry in vectorEntryOps");
211  }
212  }
213  INTREPID2_TEST_FOR_EXCEPTION(numBases == 0, std::invalid_argument, "at least one vectorEntryOps entry must be non-trivial");
214  numBasisComponents_ = numBases;
215  }
216 
217  OperatorTensorDecomposition(const std::vector<EOperator> &basisOps, const double weight = 1.0)
218  :
219  ops({basisOps}),
220  weights({weight}),
221  numBasisComponents_(basisOps.size())
222  {}
223 
224  OperatorTensorDecomposition(const EOperator &opBasis1, const EOperator &opBasis2, double weight = 1.0)
225  :
226  ops({ std::vector<EOperator>{opBasis1, opBasis2} }),
227  weights({weight}),
228  numBasisComponents_(2)
229  {}
230 
231  OperatorTensorDecomposition(const EOperator &opBasis1, const EOperator &opBasis2, const EOperator &opBasis3, double weight = 1.0)
232  :
233  ops({ std::vector<EOperator>{opBasis1, opBasis2, opBasis3} }),
234  weights({weight}),
235  numBasisComponents_(3)
236  {}
237 
238  ordinal_type numVectorComponents() const
239  {
240  return ops.size(); // will match weights.size()
241  }
242 
243  ordinal_type numBasisComponents() const
244  {
245  return numBasisComponents_;
246  }
247 
248  double weight(const ordinal_type &vectorComponentOrdinal) const
249  {
250  return weights[vectorComponentOrdinal];
251  }
252 
253  bool identicallyZeroComponent(const ordinal_type &vectorComponentOrdinal) const
254  {
255  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal < 0, std::invalid_argument, "vectorComponentOrdinal is out of bounds");
256  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal >= numVectorComponents(), std::invalid_argument, "vectorComponentOrdinal is out of bounds");
257  return ops[vectorComponentOrdinal].size() == 0;
258  }
259 
260  EOperator op(const ordinal_type &vectorComponentOrdinal, const ordinal_type &basisOrdinal) const
261  {
262  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal < 0, std::invalid_argument, "vectorComponentOrdinal is out of bounds");
263  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal >= numVectorComponents(), std::invalid_argument, "vectorComponentOrdinal is out of bounds");
264  if (identicallyZeroComponent(vectorComponentOrdinal))
265  {
266  return OPERATOR_MAX; // by convention: zero in this component
267  }
268  else
269  {
270  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(basisOrdinal < 0, std::invalid_argument, "basisOrdinal is out of bounds");
271  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(basisOrdinal >= numBasisComponents_, std::invalid_argument, "basisOrdinal is out of bounds");
272  return ops[vectorComponentOrdinal][basisOrdinal];
273  }
274  }
275 
277  template<typename DeviceType, typename OutputValueType, class PointValueType>
279  {
280  const ordinal_type basesSize = bases.size();
281  INTREPID2_TEST_FOR_EXCEPTION(basesSize != numBasisComponents_, std::invalid_argument, "The number of bases provided must match the number of basis components in this decomposition");
282 
283  ordinal_type numExpandedBasisComponents = 0;
285  using TensorBasis = Basis_TensorBasis<BasisBase>;
286  std::vector<TensorBasis*> basesAsTensorBasis(numBasisComponents_);
287  for (ordinal_type basisComponentOrdinal=0; basisComponentOrdinal<numBasisComponents_; basisComponentOrdinal++)
288  {
289  TensorBasis* basisAsTensorBasis = dynamic_cast<TensorBasis*>(bases[basisComponentOrdinal].get());
290  basesAsTensorBasis[basisComponentOrdinal] = basisAsTensorBasis;
291  if (basisAsTensorBasis)
292  {
293  numExpandedBasisComponents += basisAsTensorBasis->getTensorBasisComponents().size();
294  }
295  else
296  {
297  numExpandedBasisComponents += 1;
298  }
299  }
300 
301  std::vector< std::vector<EOperator> > expandedOps; // outer index: vector entry ordinal; inner index: basis component ordinal.
302  std::vector<double> expandedWeights;
303  const ordinal_type opsSize = ops.size();
304  for (ordinal_type simpleVectorEntryOrdinal=0; simpleVectorEntryOrdinal<opsSize; simpleVectorEntryOrdinal++)
305  {
306  if (identicallyZeroComponent(simpleVectorEntryOrdinal))
307  {
308  expandedOps.push_back(std::vector<EOperator>{});
309  expandedWeights.push_back(0.0);
310  continue;
311  }
312 
313  std::vector< std::vector<EOperator> > expandedBasisOpsForSimpleVectorEntry(1); // start out with one outer entry; expands if a component is vector-valued
314 
315  // this lambda appends an op to each of the vector components
316  auto addExpandedOp = [&expandedBasisOpsForSimpleVectorEntry](const EOperator &op)
317  {
318  const ordinal_type size = expandedBasisOpsForSimpleVectorEntry.size();
319  for (ordinal_type i=0; i<size; i++)
320  {
321  expandedBasisOpsForSimpleVectorEntry[i].push_back(op);
322  }
323  };
324 
325  // this lambda takes a scalar-valued (single outer entry) expandedBasisOps and expands it
326  // according to the number of vector entries coming from the vector-valued component basis
327  auto vectorizeExpandedOps = [&expandedBasisOpsForSimpleVectorEntry](const int &numSubVectors)
328  {
329  // we require that this only gets called once per simpleVectorEntryOrdinal -- i.e., only one basis component gets to be vector-valued.
330  INTREPID2_TEST_FOR_EXCEPTION(expandedBasisOpsForSimpleVectorEntry.size() != 1, std::invalid_argument, "multiple basis components may not be vector-valued!");
331  for (ordinal_type i=1; i<numSubVectors; i++)
332  {
333  expandedBasisOpsForSimpleVectorEntry.push_back(expandedBasisOpsForSimpleVectorEntry[0]);
334  }
335  };
336 
337  std::vector<EOperator> subVectorOps; // only used if one of the components is vector-valued
338  std::vector<double> subVectorWeights {weights[simpleVectorEntryOrdinal]};
339  for (ordinal_type basisComponentOrdinal=0; basisComponentOrdinal<numBasisComponents_; basisComponentOrdinal++)
340  {
341  const auto &op = ops[simpleVectorEntryOrdinal][basisComponentOrdinal];
342 
343  if (! basesAsTensorBasis[basisComponentOrdinal])
344  {
345  addExpandedOp(op);
346  }
347  else
348  {
349  OperatorTensorDecomposition basisOpDecomposition = basesAsTensorBasis[basisComponentOrdinal]->getOperatorDecomposition(op);
350  if (basisOpDecomposition.numVectorComponents() > 1)
351  {
352  // We don't currently support a use case where we have multiple component bases that are vector-valued:
353  INTREPID2_TEST_FOR_EXCEPTION(subVectorWeights.size() > 1, std::invalid_argument, "Unhandled case: multiple component bases are vector-valued");
354  // We do support a single vector-valued case, though; this splits the current simpleVectorEntryOrdinal into an appropriate number of components that come in order in the expanded vector
355  ordinal_type numSubVectors = basisOpDecomposition.numVectorComponents();
356  vectorizeExpandedOps(numSubVectors);
357 
358  double weightSoFar = subVectorWeights[0];
359  for (ordinal_type subVectorEntryOrdinal=1; subVectorEntryOrdinal<numSubVectors; subVectorEntryOrdinal++)
360  {
361  subVectorWeights.push_back(weightSoFar * basisOpDecomposition.weight(subVectorEntryOrdinal));
362  }
363  subVectorWeights[0] *= basisOpDecomposition.weight(0);
364  for (ordinal_type subVectorEntryOrdinal=0; subVectorEntryOrdinal<numSubVectors; subVectorEntryOrdinal++)
365  {
366  for (ordinal_type subComponentBasis=0; subComponentBasis<basisOpDecomposition.numBasisComponents(); subComponentBasis++)
367  {
368  const auto &basisOp = basisOpDecomposition.op(subVectorEntryOrdinal, subComponentBasis);
369  expandedBasisOpsForSimpleVectorEntry[subVectorEntryOrdinal].push_back(basisOp);
370  }
371  }
372  }
373  else
374  {
375  double componentWeight = basisOpDecomposition.weight(0);
376  const ordinal_type size = subVectorWeights.size();
377  for (ordinal_type i=0; i<size; i++)
378  {
379  subVectorWeights[i] *= componentWeight;
380  }
381  ordinal_type subVectorEntryOrdinal = 0;
382  const ordinal_type numBasisComponents = basisOpDecomposition.numBasisComponents();
383  for (ordinal_type subComponentBasis=0; subComponentBasis<numBasisComponents; subComponentBasis++)
384  {
385  const auto &basisOp = basisOpDecomposition.op(subVectorEntryOrdinal, basisComponentOrdinal);
386  addExpandedOp( basisOp );
387  }
388  }
389  }
390  }
391 
392  // sanity check on the new expandedOps entries:
393  for (ordinal_type i=0; i<static_cast<ordinal_type>(expandedBasisOpsForSimpleVectorEntry.size()); i++)
394  {
395  const ordinal_type size = expandedBasisOpsForSimpleVectorEntry[i].size();
396  INTREPID2_TEST_FOR_EXCEPTION(size != numExpandedBasisComponents, std::logic_error, "each vector in expandedBasisOpsForSimpleVectorEntry should have as many entries as there are expanded basis components");
397  }
398 
399  expandedOps.insert(expandedOps.end(), expandedBasisOpsForSimpleVectorEntry.begin(), expandedBasisOpsForSimpleVectorEntry.end());
400  expandedWeights.insert(expandedWeights.end(), subVectorWeights.begin(), subVectorWeights.end());
401  }
402  // check that vector lengths agree:
403  INTREPID2_TEST_FOR_EXCEPTION(expandedOps.size() != expandedWeights.size(), std::logic_error, "expandedWeights and expandedOps do not agree on the number of vector components");
404 
405  return OperatorTensorDecomposition(expandedOps, expandedWeights);
406  }
407 };
408 
414  template<class ExecutionSpace, class OutputScalar, class OutputFieldType>
416  {
417  using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
418  using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
419 
420  using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
421  using TeamMember = typename TeamPolicy::member_type;
422 
424  using RankCombinationType = typename TensorViewIteratorType::RankCombinationType;
425 
426  OutputFieldType output_; // F,P[,D…]
427  OutputFieldType input1_; // F1,P[,D…] or F1,P1[,D…]
428  OutputFieldType input2_; // F2,P[,D…] or F2,P2[,D…]
429 
430  int numFields_, numPoints_;
431  int numFields1_, numPoints1_;
432  int numFields2_, numPoints2_;
433 
434  bool tensorPoints_; // if true, input1 and input2 refer to values at decomposed points, and P = P1 * P2. If false, then the two inputs refer to points in the full-dimensional space, and their point lengths are the same as that of the final output.
435 
436  using RankCombinationViewType = typename TensorViewIteratorType::RankCombinationViewType;
437  RankCombinationViewType rank_combinations_;// indicates the policy by which the input views will be combined in output view
438 
439  double weight_;
440 
441  public:
442 
443  TensorViewFunctor(OutputFieldType output, OutputFieldType inputValues1, OutputFieldType inputValues2,
444  bool tensorPoints, double weight)
445  : output_(output), input1_(inputValues1), input2_(inputValues2), tensorPoints_(tensorPoints), weight_(weight)
446  {
447  numFields_ = output.extent_int(0);
448  numPoints_ = output.extent_int(1);
449 
450  numFields1_ = inputValues1.extent_int(0);
451  numPoints1_ = inputValues1.extent_int(1);
452 
453  numFields2_ = inputValues2.extent_int(0);
454  numPoints2_ = inputValues2.extent_int(1);
455 
456  if (!tensorPoints_)
457  {
458  // then the point counts should all match
459  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_, std::invalid_argument, "incompatible point counts");
460  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints2_, std::invalid_argument, "incompatible point counts");
461  }
462  else
463  {
464  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_ * numPoints2_, std::invalid_argument, "incompatible point counts");
465  }
466 
467  INTREPID2_TEST_FOR_EXCEPTION(numFields_ != numFields1_ * numFields2_, std::invalid_argument, "incompatible field sizes");
468 
469  const ordinal_type max_rank = std::max(inputValues1.rank(),inputValues2.rank());
470  // at present, no supported case will result in an output rank greater than both input ranks
471 
472  const ordinal_type outputRank = output.rank();
473  INTREPID2_TEST_FOR_EXCEPTION(outputRank > max_rank, std::invalid_argument, "Unsupported view combination.");
474  rank_combinations_ = RankCombinationViewType("Rank_combinations_", max_rank);
475  auto rank_combinations_host = Kokkos::create_mirror_view(rank_combinations_);
476 
477  rank_combinations_host[0] = TensorViewIteratorType::TENSOR_PRODUCT; // field combination is always tensor product
478  rank_combinations_host[1] = tensorPoints ? TensorViewIteratorType::TENSOR_PRODUCT : TensorViewIteratorType::DIMENSION_MATCH; // tensorPoints controls interpretation of the point dimension
479  for (ordinal_type d=2; d<max_rank; d++)
480  {
481  // d >= 2 have the interpretation of spatial dimensions (gradients, etc.)
482  // we let the extents of the containers determine what we're doing here
483  if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == 1))
484  {
485  rank_combinations_host[d] = TensorViewIteratorType::TENSOR_CONTRACTION;
486  }
487  else if (((inputValues1.extent_int(d) == output.extent_int(d)) && (inputValues2.extent_int(d) == 1))
488  || ((inputValues2.extent_int(d) == output.extent_int(d)) && (inputValues1.extent_int(d) == 1))
489  )
490  {
491  // this looks like multiplication of a vector by a scalar, resulting in a vector
492  // this can be understood as a tensor product
493  rank_combinations_host[d] = TensorViewIteratorType::TENSOR_PRODUCT;
494  }
495  else if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == inputValues1.extent_int(d) * inputValues2.extent_int(d)))
496  {
497  // this is actually a generalization of the above case: a tensor product, something like a vector outer product
498  rank_combinations_host[d] = TensorViewIteratorType::TENSOR_PRODUCT;
499  }
500  else if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == inputValues1.extent_int(d)))
501  {
502  // it's a bit weird (I'm not aware of the use case, in the present context), but we can handle this case by adopting DIMENSION_MATCH here
503  // this is something like MATLAB's .* and .+ operators, which operate entry-wise
504  rank_combinations_host[d] = TensorViewIteratorType::DIMENSION_MATCH;
505  }
506  else
507  {
508  std::cout << "inputValues1.extent_int(" << d << ") = " << inputValues1.extent_int(d) << std::endl;
509  std::cout << "inputValues2.extent_int(" << d << ") = " << inputValues2.extent_int(d) << std::endl;
510  std::cout << "output.extent_int(" << d << ") = " << output.extent_int(d) << std::endl;
511  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "unable to find an interpretation for this combination of views");
512  }
513  }
514  Kokkos::deep_copy(rank_combinations_,rank_combinations_host);
515  }
516 
517  KOKKOS_INLINE_FUNCTION
518  void operator()( const TeamMember & teamMember ) const
519  {
520  auto fieldOrdinal1 = teamMember.league_rank();
521 
522  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
523  TensorViewIteratorType it(output_,input1_,input2_,rank_combinations_);
524  const int FIELD_ORDINAL_DIMENSION = 0;
525  it.setLocation({fieldOrdinal1,0,0,0,0,0,0},{fieldOrdinal2,0,0,0,0,0,0});
526  int next_increment_rank = FIELD_ORDINAL_DIMENSION; // used to initialize prev_increment_rank at the start of the do/while loop. Notionally, we last incremented in the field ordinal rank to get to the {fieldOrdinal1,0,0,0,0,0,0},{fieldOrdinal2,0,0,0,0,0,0} location.
527  OutputScalar accumulator = 0;
528 
529  do
530  {
531  accumulator += weight_ * it.getView1Entry() * it.getView2Entry();
532  next_increment_rank = it.nextIncrementRank();
533 
534  if ((next_increment_rank < 0) || (rank_combinations_[next_increment_rank] != TensorViewIteratorType::TENSOR_CONTRACTION))
535  {
536  // then we've finished the accumulation and should set the value
537  it.set(accumulator);
538  // reset the accumulator:
539  accumulator = 0;
540  }
541  } while (it.increment() > FIELD_ORDINAL_DIMENSION);
542  });
543  }
544  };
545 
559  template<typename BasisBaseClass = void>
560  class Basis_TensorBasis
561  :
562  public BasisBaseClass
563  {
564  public:
565  using BasisBase = BasisBaseClass;
566  using BasisPtr = Teuchos::RCP<BasisBase>;
567 
568  protected:
569  BasisPtr basis1_;
570  BasisPtr basis2_;
571 
572  std::vector<BasisPtr> tensorComponents_;
573 
574  std::string name_; // name of the basis
575  public:
576  using DeviceType = typename BasisBase::DeviceType;
577  using ExecutionSpace = typename BasisBase::ExecutionSpace;
578  using OutputValueType = typename BasisBase::OutputValueType;
579  using PointValueType = typename BasisBase::PointValueType;
580 
581  using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost;
582  using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost;
583  using OutputViewType = typename BasisBase::OutputViewType;
584  using PointViewType = typename BasisBase::PointViewType;
585  using TensorBasis = Basis_TensorBasis<BasisBaseClass>;
586  public:
591  Basis_TensorBasis(BasisPtr basis1, BasisPtr basis2, EFunctionSpace functionSpace = FUNCTION_SPACE_MAX)
592  :
593  basis1_(basis1),basis2_(basis2)
594  {
595  this->functionSpace_ = functionSpace;
596 
597  Basis_TensorBasis* basis1AsTensor = dynamic_cast<Basis_TensorBasis*>(basis1_.get());
598  if (basis1AsTensor)
599  {
600  auto basis1Components = basis1AsTensor->getTensorBasisComponents();
601  tensorComponents_.insert(tensorComponents_.end(), basis1Components.begin(), basis1Components.end());
602  }
603  else
604  {
605  tensorComponents_.push_back(basis1_);
606  }
607 
608  Basis_TensorBasis* basis2AsTensor = dynamic_cast<Basis_TensorBasis*>(basis2_.get());
609  if (basis2AsTensor)
610  {
611  auto basis2Components = basis2AsTensor->getTensorBasisComponents();
612  tensorComponents_.insert(tensorComponents_.end(), basis2Components.begin(), basis2Components.end());
613  }
614  else
615  {
616  tensorComponents_.push_back(basis2_);
617  }
618 
619  this->basisCardinality_ = basis1->getCardinality() * basis2->getCardinality();
620  this->basisDegree_ = std::max(basis1->getDegree(), basis2->getDegree());
621 
622  {
623  std::ostringstream basisName;
624  basisName << basis1->getName() << " x " << basis2->getName();
625  name_ = basisName.str();
626  }
627 
628  // set cell topology
629  shards::CellTopology cellTopo1 = basis1->getBaseCellTopology();
630  shards::CellTopology cellTopo2 = basis2->getBaseCellTopology();
631 
632  auto cellKey1 = basis1->getBaseCellTopology().getKey();
633  auto cellKey2 = basis2->getBaseCellTopology().getKey();
634  if ((cellKey1 == shards::Line<2>::key) && (cellKey2 == shards::Line<2>::key))
635  {
636  this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Quadrilateral<4> >() );
637  }
638  else if (((cellKey1 == shards::Quadrilateral<4>::key) && (cellKey2 == shards::Line<2>::key))
639  || ((cellKey2 == shards::Quadrilateral<4>::key) && (cellKey1 == shards::Line<2>::key)))
640  {
641  this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Hexahedron<8> >() );
642  }
643  else
644  {
645  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Cell topology combination not yet supported");
646  }
647 
648  this->basisType_ = basis1->getBasisType();
649  this->basisCoordinates_ = COORDINATES_CARTESIAN;
650 
651  // initialize tags
652  {
653  const auto & cardinality = this->basisCardinality_;
654 
655  // Basis-dependent initializations
656  const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag
657  const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim
658  const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal
659  const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell
660 
661  OrdinalTypeArray1DHost tagView("tag view", cardinality*tagSize);
662 
663  shards::CellTopology cellTopo = this->basisCellTopology_;
664 
665  ordinal_type tensorSpaceDim = cellTopo.getDimension();
666  ordinal_type spaceDim1 = cellTopo1.getDimension();
667  ordinal_type spaceDim2 = cellTopo2.getDimension();
668 
669  if (this->getBasisType() == BASIS_FEM_HIERARCHICAL)
670  {
671  int degreeSize = basis1_->getPolynomialDegreeLength() + basis2_->getPolynomialDegreeLength();
672  this->fieldOrdinalPolynomialDegree_ = OrdinalTypeArray2DHost("TensorBasis - field ordinal polynomial degree", this->basisCardinality_, degreeSize);
673  }
674 
675  TensorTopologyMap topoMap(cellTopo1, cellTopo2);
676 
677  for (ordinal_type d=0; d<=tensorSpaceDim; d++) // d: tensorial dimension
678  {
679  ordinal_type d2_max = std::min(spaceDim2,d);
680  int subcellOffset = 0; // for this dimension of tensor subcells, how many subcells have we already counted with other d2/d1 combos?
681  for (ordinal_type d2=0; d2<=d2_max; d2++)
682  {
683  ordinal_type d1 = d-d2;
684  if (d1 > spaceDim1) continue;
685 
686  ordinal_type subcellCount2 = cellTopo2.getSubcellCount(d2);
687  ordinal_type subcellCount1 = cellTopo1.getSubcellCount(d1);
688  for (ordinal_type subcellOrdinal2=0; subcellOrdinal2<subcellCount2; subcellOrdinal2++)
689  {
690  ordinal_type subcellDofCount2 = basis2_->getDofCount(d2, subcellOrdinal2);
691  for (ordinal_type subcellOrdinal1=0; subcellOrdinal1<subcellCount1; subcellOrdinal1++)
692  {
693  ordinal_type subcellDofCount1 = basis1_->getDofCount(d1, subcellOrdinal1);
694  ordinal_type tensorLocalDofCount = subcellDofCount1 * subcellDofCount2;
695  for (ordinal_type localDofID2 = 0; localDofID2<subcellDofCount2; localDofID2++)
696  {
697  ordinal_type fieldOrdinal2 = basis2_->getDofOrdinal(d2, subcellOrdinal2, localDofID2);
698  OrdinalTypeArray1DHost degreesField2;
699  if (this->basisType_ == BASIS_FEM_HIERARCHICAL) degreesField2 = basis2_->getPolynomialDegreeOfField(fieldOrdinal2);
700  for (ordinal_type localDofID1 = 0; localDofID1<subcellDofCount1; localDofID1++)
701  {
702  ordinal_type fieldOrdinal1 = basis1_->getDofOrdinal(d1, subcellOrdinal1, localDofID1);
703  ordinal_type tensorLocalDofID = localDofID2 * subcellDofCount1 + localDofID1;
704  ordinal_type tensorFieldOrdinal = fieldOrdinal2 * basis1_->getCardinality() + fieldOrdinal1;
705  tagView(tensorFieldOrdinal*tagSize+0) = d; // subcell dimension
706  tagView(tensorFieldOrdinal*tagSize+1) = topoMap.getCompositeSubcellOrdinal(d1, subcellOrdinal1, d2, subcellOrdinal2);
707  tagView(tensorFieldOrdinal*tagSize+2) = tensorLocalDofID;
708  tagView(tensorFieldOrdinal*tagSize+3) = tensorLocalDofCount;
709 
710  if (this->basisType_ == BASIS_FEM_HIERARCHICAL)
711  {
712  // fill in degree lookup:
713  OrdinalTypeArray1DHost degreesField1 = basis1_->getPolynomialDegreeOfField(fieldOrdinal1);
714 
715  int degreeLengthField1 = degreesField1.extent_int(0);
716  int degreeLengthField2 = degreesField2.extent_int(0);
717  for (int d3=0; d3<degreeLengthField1; d3++)
718  {
719  this->fieldOrdinalPolynomialDegree_(tensorFieldOrdinal,d3) = degreesField1(d3);
720  }
721  for (int d3=0; d3<degreeLengthField2; d3++)
722  {
723  this->fieldOrdinalPolynomialDegree_(tensorFieldOrdinal,d3+degreeLengthField1) = degreesField2(d3);
724  }
725  }
726  } // localDofID1
727  } // localDofID2
728  } // subcellOrdinal1
729  } // subcellOrdinal2
730  subcellOffset += subcellCount1 * subcellCount2;
731  }
732  }
733 
734  // // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
735  // // tags are constructed on host
736  this->setOrdinalTagData(this->tagToOrdinal_,
737  this->ordinalToTag_,
738  tagView,
739  this->basisCardinality_,
740  tagSize,
741  posScDim,
742  posScOrd,
743  posDfOrd);
744  }
745  }
746 
752  {
753  INTREPID2_TEST_FOR_EXCEPTION(true, std::logic_error, "subclasses must override either getSimpleOperatorDecomposition() or getOperatorDecomposition()");
754  // (TensorBasis3 overrides getOperatorDecomposition()…)
755  }
756 
760  {
761  OperatorTensorDecomposition opSimpleDecomposition = this->getSimpleOperatorDecomposition(operatorType);
762  std::vector<BasisPtr> componentBases {basis1_, basis2_};
763  return opSimpleDecomposition.expandedDecomposition(componentBases);
764  }
765 
770  virtual BasisValues<OutputValueType,DeviceType> allocateBasisValues( TensorPoints<PointValueType,DeviceType> points, const EOperator operatorType = OPERATOR_VALUE) const override
771  {
772  const bool operatorSupported = (operatorType == OPERATOR_VALUE) || (operatorType == OPERATOR_GRAD) || (operatorType == OPERATOR_CURL) || (operatorType == OPERATOR_DIV);
773  INTREPID2_TEST_FOR_EXCEPTION(!operatorSupported, std::invalid_argument, "operator is not supported by allocateBasisValues");
774 
775  ordinal_type numBasisComponents = tensorComponents_.size();
776  OperatorTensorDecomposition opDecomposition = getOperatorDecomposition(operatorType);
777 
778  const ordinal_type numVectorComponents = opDecomposition.numVectorComponents();
779  const bool useVectorData = numVectorComponents > 1;
780 
781  if (useVectorData)
782  {
783  const int numFamilies = 1;
784  std::vector< std::vector<TensorData<OutputValueType,DeviceType> > > vectorComponents(numFamilies, std::vector<TensorData<OutputValueType,DeviceType> >(numVectorComponents));
785 
786  const int familyOrdinal = 0;
787  for (ordinal_type vectorComponentOrdinal=0; vectorComponentOrdinal<numVectorComponents; vectorComponentOrdinal++)
788  {
789  if (!opDecomposition.identicallyZeroComponent(vectorComponentOrdinal))
790  {
791  std::vector< Data<OutputValueType,DeviceType> > componentData;
792  for (ordinal_type r=0; r<numBasisComponents; r++)
793  {
794  const int numComponentPoints = points.componentPointCount(r);
795  const EOperator op = opDecomposition.op(vectorComponentOrdinal, r);
796  auto componentView = tensorComponents_[r]->allocateOutputView(numComponentPoints, op);
797  componentData.push_back(Data<OutputValueType,DeviceType>(componentView));
798  }
799  vectorComponents[familyOrdinal][vectorComponentOrdinal] = TensorData<OutputValueType,DeviceType>(componentData);
800  }
801  }
802  VectorData<OutputValueType,DeviceType> vectorData(vectorComponents);
803  return BasisValues<OutputValueType,DeviceType>(vectorData);
804  }
805  else
806  {
807  // TensorData: single tensor product
808  std::vector< Data<OutputValueType,DeviceType> > componentData;
809 
810  const ordinal_type vectorComponentOrdinal = 0;
811  for (ordinal_type r=0; r<numBasisComponents; r++)
812  {
813  const int numComponentPoints = points.componentPointCount(r);
814  const EOperator op = opDecomposition.op(vectorComponentOrdinal, r);
815  auto componentView = tensorComponents_[r]->allocateOutputView(numComponentPoints, op);
816 
817  const int rank = 2; // (F,P) -- TensorData-only BasisValues are always scalar-valued. Use VectorData for vector-valued.
818  // (we need to be explicit about the rank argument because GRAD, even in 1D, elevates to rank 3), so e.g. DIV of HDIV uses a componentView that is rank 3;
819  // we want Data to insulate us from that fact)
820  const Kokkos::Array<int,7> extents {componentView.extent_int(0), componentView.extent_int(1), 1,1,1,1,1};
821  Kokkos::Array<DataVariationType,7> variationType {GENERAL, GENERAL, CONSTANT, CONSTANT, CONSTANT, CONSTANT, CONSTANT };
822  componentData.push_back(Data<OutputValueType,DeviceType>(componentView, rank, extents, variationType));
823  }
824 
825  TensorData<OutputValueType,DeviceType> tensorData(componentData);
826 
827  std::vector< TensorData<OutputValueType,DeviceType> > tensorDataEntries {tensorData};
828  return BasisValues<OutputValueType,DeviceType>(tensorDataEntries);
829  }
830  }
831 
832  // since the getValues() below only overrides the FEM variant, we specify that
833  // we use the base class's getValues(), which implements the FVD variant by throwing an exception.
834  // (It's an error to use the FVD variant on this basis.)
835  using BasisBase::getValues;
836 
848  void getComponentPoints(const PointViewType inputPoints, const bool attemptTensorDecomposition,
849  PointViewType & inputPoints1, PointViewType & inputPoints2, bool &tensorDecompositionSucceeded) const
850  {
851  INTREPID2_TEST_FOR_EXCEPTION(attemptTensorDecomposition, std::invalid_argument, "tensor decomposition not yet supported");
852 
853  // for inputPoints that are actually tensor-product of component quadrature points (say),
854  // having just the one input (which will have a lot of redundant point data) is suboptimal
855  // The general case can have unique x/y/z coordinates at every point, though, so we have to support that
856  // when this interface is used. But we may try detecting that the data is tensor-product and compressing
857  // from there... Ultimately, we should also add a getValues() variant that takes multiple input point containers,
858  // one for each tensorial dimension.
859 
860  // this initial implementation is intended to simplify development of 2D and 3D bases, while also opening
861  // the possibility of higher-dimensional bases. It is not necessarily optimized for speed/memory. There
862  // are things we can do in this regard, which may become important for matrix-free computations wherein
863  // basis values don't get stored but are computed dynamically.
864 
865  int spaceDim1 = basis1_->getBaseCellTopology().getDimension();
866  int spaceDim2 = basis2_->getBaseCellTopology().getDimension();
867 
868  int totalSpaceDim = inputPoints.extent_int(1);
869 
870  TEUCHOS_ASSERT(spaceDim1 + spaceDim2 == totalSpaceDim);
871 
872  // first pass: just take subviews to get input points -- this will result in redundant computations when points are themselves tensor product (i.e., inputPoints itself contains redundant data)
873 
874  inputPoints1 = Kokkos::subview(inputPoints,Kokkos::ALL(),std::make_pair(0,spaceDim1));
875  inputPoints2 = Kokkos::subview(inputPoints,Kokkos::ALL(),std::make_pair(spaceDim1,totalSpaceDim));
876 
877  // std::cout << "inputPoints : " << inputPoints.extent(0) << " x " << inputPoints.extent(1) << std::endl;
878  // std::cout << "inputPoints1 : " << inputPoints1.extent(0) << " x " << inputPoints1.extent(1) << std::endl;
879  // std::cout << "inputPoints2 : " << inputPoints2.extent(0) << " x " << inputPoints2.extent(1) << std::endl;
880 
881  tensorDecompositionSucceeded = false;
882  }
883 
892  virtual void getDofCoords( typename BasisBase::ScalarViewType dofCoords ) const override
893  {
894  int spaceDim1 = basis1_->getBaseCellTopology().getDimension();
895  int spaceDim2 = basis2_->getBaseCellTopology().getDimension();
896 
897  using ValueType = typename BasisBase::ScalarViewType::value_type;
898  using ResultLayout = typename DeduceLayout< typename BasisBase::ScalarViewType >::result_layout;
899  using ViewType = Kokkos::DynRankView<ValueType, ResultLayout, DeviceType >;
900 
901  const ordinal_type basisCardinality1 = basis1_->getCardinality();
902  const ordinal_type basisCardinality2 = basis2_->getCardinality();
903 
904  ViewType dofCoords1("dofCoords1",basisCardinality1,spaceDim1);
905  ViewType dofCoords2("dofCoords2",basisCardinality2,spaceDim2);
906 
907  basis1_->getDofCoords(dofCoords1);
908  basis2_->getDofCoords(dofCoords2);
909 
910  Kokkos::RangePolicy<ExecutionSpace> policy(0, basisCardinality2);
911  Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const int fieldOrdinal2)
912  {
913  for (int fieldOrdinal1=0; fieldOrdinal1<basisCardinality1; fieldOrdinal1++)
914  {
915  const ordinal_type fieldOrdinal = fieldOrdinal1 + fieldOrdinal2 * basisCardinality1;
916  for (int d1=0; d1<spaceDim1; d1++)
917  {
918  dofCoords(fieldOrdinal,d1) = dofCoords1(fieldOrdinal1,d1);
919  }
920  for (int d2=0; d2<spaceDim2; d2++)
921  {
922  dofCoords(fieldOrdinal,spaceDim1+d2) = dofCoords2(fieldOrdinal2,d2);
923  }
924  }
925  });
926  }
927 
928 
936  virtual void getDofCoeffs( typename BasisBase::ScalarViewType dofCoeffs ) const override
937  {
938  using ValueType = typename BasisBase::ScalarViewType::value_type;
939  using ResultLayout = typename DeduceLayout< typename BasisBase::ScalarViewType >::result_layout;
940  using ViewType = Kokkos::DynRankView<ValueType, ResultLayout, DeviceType >;
941 
942  ViewType dofCoeffs1("dofCoeffs1",basis1_->getCardinality());
943  ViewType dofCoeffs2("dofCoeffs2",basis2_->getCardinality());
944 
945  basis1_->getDofCoeffs(dofCoeffs1);
946  basis2_->getDofCoeffs(dofCoeffs2);
947 
948  const ordinal_type basisCardinality1 = basis1_->getCardinality();
949  const ordinal_type basisCardinality2 = basis2_->getCardinality();
950 
951  Kokkos::RangePolicy<ExecutionSpace> policy(0, basisCardinality2);
952  Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const int fieldOrdinal2)
953  {
954  for (int fieldOrdinal1=0; fieldOrdinal1<basisCardinality1; fieldOrdinal1++)
955  {
956  const ordinal_type fieldOrdinal = fieldOrdinal1 + fieldOrdinal2 * basisCardinality1;
957  dofCoeffs(fieldOrdinal) = dofCoeffs1(fieldOrdinal1);
958  dofCoeffs(fieldOrdinal) = dofCoeffs2(fieldOrdinal2);
959  }
960  });
961  }
962 
967  virtual
968  const char*
969  getName() const override {
970  return name_.c_str();
971  }
972 
981  ordinal_type getTensorDkEnumeration(ordinal_type dkEnum1, ordinal_type operatorOrder1,
982  ordinal_type dkEnum2, ordinal_type operatorOrder2) const
983  {
984  ordinal_type spaceDim1 = basis1_->getBaseCellTopology().getDimension();
985  ordinal_type spaceDim2 = basis2_->getBaseCellTopology().getDimension();
986 
987  // for now, we only support total spaceDim <= 3. It would not be too hard to extend to support higher dimensions,
988  // but the support needs to be built out in e.g. shards::CellTopology for this, as well as our DkEnumeration, etc.
989  switch (spaceDim1)
990  {
991  case 1:
992  switch (spaceDim2)
993  {
994  case 1:
995  return getDkTensorIndex<1, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
996  case 2:
997  return getDkTensorIndex<1, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
998  default:
999  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1000  }
1001  case 2:
1002  switch (spaceDim2)
1003  {
1004  case 1:
1005  return getDkTensorIndex<2, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1006  default:
1007  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1008  }
1009  // case 3:
1010  // switch (spaceDim2)
1011  // {
1012  // case 1:
1013  // return getDkTensorIndex<3, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1014  // case 2:
1015  // return getDkTensorIndex<3, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1016  // case 3:
1017  // return getDkTensorIndex<3, 3>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1018  // default:
1019  // INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1020  // }
1021  default:
1022  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1023  }
1024  }
1025 
1026  std::vector<BasisPtr> getTensorBasisComponents() const
1027  {
1028  return tensorComponents_;
1029  }
1030 
1042  virtual
1043  void
1045  const TensorPoints<PointValueType,DeviceType> inputPoints,
1046  const EOperator operatorType = OPERATOR_VALUE ) const override
1047  {
1048  OperatorTensorDecomposition operatorDecomposition = getOperatorDecomposition(operatorType);
1049 
1050  const ordinal_type numVectorComponents = operatorDecomposition.numVectorComponents();
1051  const bool useVectorData = numVectorComponents > 1;
1052  const ordinal_type numBasisComponents = operatorDecomposition.numBasisComponents();
1053 
1054  for (ordinal_type vectorComponentOrdinal=0; vectorComponentOrdinal<numVectorComponents; vectorComponentOrdinal++)
1055  {
1056  const double weight = operatorDecomposition.weight(vectorComponentOrdinal);
1057  for (ordinal_type basisOrdinal=0; basisOrdinal<numBasisComponents; basisOrdinal++)
1058  {
1059  const EOperator op = operatorDecomposition.op(vectorComponentOrdinal, basisOrdinal);
1060  // by convention, op == OPERATOR_MAX signals a zero component; skip
1061  if (op != OPERATOR_MAX)
1062  {
1063  const int vectorFamily = 0; // TensorBasis always has just a single family; multiple families arise in DirectSumBasis
1064  auto tensorData = useVectorData ? outputValues.vectorData().getComponent(vectorFamily,vectorComponentOrdinal) : outputValues.tensorData();
1065  INTREPID2_TEST_FOR_EXCEPTION( ! tensorData.getTensorComponent(basisOrdinal).isValid(), std::invalid_argument, "Invalid output component encountered");
1066 
1067  PointViewType pointView = inputPoints.getTensorComponent(basisOrdinal);
1068 
1069  // Data stores things in fixed-rank Kokkos::View, but Basis requires DynRankView. We allocate a temporary DynRankView, then copy back to Data.
1070  const Data<OutputValueType,DeviceType> & outputData = tensorData.getTensorComponent(basisOrdinal);
1071 
1072  auto basisValueView = outputData.getUnderlyingView();
1073  tensorComponents_[basisOrdinal]->getValues(basisValueView, pointView, op);
1074 
1075  // if weight is non-trivial (not 1.0), then we need to multiply one of the component views by weight.
1076  // we do that for the first basisOrdinal's values
1077  if ((weight != 1.0) && (basisOrdinal == 0))
1078  {
1079  if (basisValueView.rank() == 2)
1080  {
1081  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{basisValueView.extent_int(0),basisValueView.extent_int(1)});
1082  Kokkos::parallel_for("multiply basisValueView by weight", policy,
1083  KOKKOS_LAMBDA (const int &fieldOrdinal, const int &pointOrdinal) {
1084  basisValueView(fieldOrdinal,pointOrdinal) *= weight;
1085  });
1086  }
1087  else if (basisValueView.rank() == 3)
1088  {
1089  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<3>>({0,0,0},{basisValueView.extent_int(0),basisValueView.extent_int(1),basisValueView.extent_int(2)});
1090  Kokkos::parallel_for("multiply basisValueView by weight", policy,
1091  KOKKOS_LAMBDA (const int &fieldOrdinal, const int &pointOrdinal, const int &d) {
1092  basisValueView(fieldOrdinal,pointOrdinal,d) *= weight;
1093  });
1094  }
1095  else
1096  {
1097  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported rank for basisValueView");
1098  }
1099  }
1100  }
1101  }
1102  }
1103  }
1104 
1123  void getValues( OutputViewType outputValues, const PointViewType inputPoints,
1124  const EOperator operatorType = OPERATOR_VALUE ) const override
1125  {
1126  bool tensorPoints; // true would mean that we take the tensor product of inputPoints1 and inputPoints2 (and that this would be equivalent to inputPoints as given -- i.e., inputPoints1 and inputPoints2 would be a tensor decomposition of inputPoints)
1127  bool attemptTensorDecomposition = false; // support for this not yet implemented
1128  PointViewType inputPoints1, inputPoints2;
1129  getComponentPoints(inputPoints, attemptTensorDecomposition, inputPoints1, inputPoints2, tensorPoints);
1130 
1131  switch (operatorType)
1132  {
1133  case OPERATOR_D1:
1134  case OPERATOR_D2:
1135  case OPERATOR_D3:
1136  case OPERATOR_D4:
1137  case OPERATOR_D5:
1138  case OPERATOR_D6:
1139  case OPERATOR_D7:
1140  case OPERATOR_D8:
1141  case OPERATOR_D9:
1142  case OPERATOR_D10:
1143  {
1144  auto opOrder = getOperatorOrder(operatorType); // number of derivatives that we take in total
1145  // the Dk enumeration happens in lexicographic order (reading from left to right: x, y, z, etc.)
1146  // this governs the nesting order of the dkEnum1, dkEnum2 for loops below: dkEnum2 should increment fastest.
1147  for (int derivativeCountComp2=0; derivativeCountComp2<=opOrder; derivativeCountComp2++)
1148  {
1149  int derivativeCountComp1=opOrder-derivativeCountComp2;
1150  EOperator op1 = (derivativeCountComp1 == 0) ? OPERATOR_VALUE : EOperator(OPERATOR_D1 + (derivativeCountComp1 - 1));
1151  EOperator op2 = (derivativeCountComp2 == 0) ? OPERATOR_VALUE : EOperator(OPERATOR_D1 + (derivativeCountComp2 - 1));
1152 
1153  int spaceDim1 = inputPoints1.extent_int(1);
1154  int spaceDim2 = inputPoints2.extent_int(1);
1155 
1156  int dkCardinality1 = (op1 != OPERATOR_VALUE) ? getDkCardinality(op1, spaceDim1) : 1;
1157  int dkCardinality2 = (op2 != OPERATOR_VALUE) ? getDkCardinality(op2, spaceDim2) : 1;
1158 
1159  int basisCardinality1 = basis1_->getCardinality();
1160  int basisCardinality2 = basis2_->getCardinality();
1161 
1162  int totalPointCount = tensorPoints ? inputPoints1.extent_int(0) * inputPoints2.extent_int(0) : inputPoints1.extent_int(0);
1163 
1164  int pointCount1, pointCount2;
1165  if (tensorPoints)
1166  {
1167  pointCount1 = inputPoints1.extent_int(0);
1168  pointCount2 = inputPoints2.extent_int(0);
1169  }
1170  else
1171  {
1172  pointCount1 = totalPointCount;
1173  pointCount2 = totalPointCount;
1174  }
1175 
1176  OutputViewType outputValues1, outputValues2;
1177  if (op1 == OPERATOR_VALUE)
1178  outputValues1 = getMatchingViewWithLabel(outputValues, "output values - basis 1",basisCardinality1,pointCount1);
1179  else
1180  outputValues1 = getMatchingViewWithLabel(outputValues, "output values - basis 1",basisCardinality1,pointCount1,dkCardinality1);
1181 
1182  if (op2 == OPERATOR_VALUE)
1183  outputValues2 = getMatchingViewWithLabel(outputValues, "output values - basis 2",basisCardinality2,pointCount2);
1184  else
1185  outputValues2 = getMatchingViewWithLabel(outputValues, "output values - basis 2",basisCardinality2,pointCount2,dkCardinality2);
1186 
1187  basis1_->getValues(outputValues1,inputPoints1,op1);
1188  basis2_->getValues(outputValues2,inputPoints2,op2);
1189 
1190  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
1191  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
1192  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1193 
1194  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
1195 
1196  double weight = 1.0;
1198 
1199  for (int dkEnum1=0; dkEnum1<dkCardinality1; dkEnum1++)
1200  {
1201  auto outputValues1_dkEnum1 = (op1 != OPERATOR_VALUE) ? Kokkos::subview(outputValues1,Kokkos::ALL(),Kokkos::ALL(),dkEnum1)
1202  : Kokkos::subview(outputValues1,Kokkos::ALL(),Kokkos::ALL());
1203  for (int dkEnum2=0; dkEnum2<dkCardinality2; dkEnum2++)
1204  {
1205  auto outputValues2_dkEnum2 = (op2 != OPERATOR_VALUE) ? Kokkos::subview(outputValues2,Kokkos::ALL(),Kokkos::ALL(),dkEnum2)
1206  : Kokkos::subview(outputValues2,Kokkos::ALL(),Kokkos::ALL());
1207 
1208  ordinal_type dkTensorIndex = getTensorDkEnumeration(dkEnum1, derivativeCountComp1, dkEnum2, derivativeCountComp2);
1209  auto outputValues_dkTensor = Kokkos::subview(outputValues,Kokkos::ALL(),Kokkos::ALL(),dkTensorIndex);
1210  // Note that there may be performance optimizations available here:
1211  // - could eliminate interior for loop in favor of having a vector-valued outputValues1_dk
1212  // - could add support to TensorViewFunctor (and probably TensorViewIterator) for this kind of tensor Dk type of traversal
1213  // (this would allow us to eliminate both for loops here)
1214  // At the moment, we defer such optimizations on the idea that this may not ever become a performance bottleneck.
1215  FunctorType functor(outputValues_dkTensor, outputValues1_dkEnum1, outputValues2_dkEnum2, tensorPoints, weight);
1216  Kokkos::parallel_for( policy , functor, "TensorViewFunctor");
1217  }
1218  }
1219  }
1220  }
1221  break;
1222  default: // non-OPERATOR_Dn case must be handled by subclass.
1223  this->getValues(outputValues, operatorType, inputPoints1, inputPoints2, tensorPoints);
1224  }
1225  }
1226 
1252  virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
1253  const PointViewType inputPoints1, const PointViewType inputPoints2,
1254  bool tensorPoints) const
1255  {
1256  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "one-operator, two-inputPoints getValues should be overridden by TensorBasis subclasses");
1257  }
1258 
1282  void getValues( OutputViewType outputValues,
1283  const PointViewType inputPoints1, const EOperator operatorType1,
1284  const PointViewType inputPoints2, const EOperator operatorType2,
1285  bool tensorPoints, double weight=1.0) const
1286  {
1287  int basisCardinality1 = basis1_->getCardinality();
1288  int basisCardinality2 = basis2_->getCardinality();
1289 
1290  int totalPointCount = tensorPoints ? inputPoints1.extent_int(0) * inputPoints2.extent_int(0) : inputPoints1.extent_int(0);
1291 
1292  int pointCount1, pointCount2;
1293  if (tensorPoints)
1294  {
1295  pointCount1 = inputPoints1.extent_int(0);
1296  pointCount2 = inputPoints2.extent_int(0);
1297  }
1298  else
1299  {
1300  pointCount1 = totalPointCount;
1301  pointCount2 = totalPointCount;
1302  }
1303 
1304  int spaceDim1 = inputPoints1.extent_int(1);
1305  int spaceDim2 = inputPoints2.extent_int(1);
1306 
1307  INTREPID2_TEST_FOR_EXCEPTION(!tensorPoints && (totalPointCount != inputPoints2.extent_int(0)),
1308  std::invalid_argument, "If tensorPoints is false, the point counts must match!");
1309 
1310  int opRank1 = getOperatorRank(basis1_->getFunctionSpace(), operatorType1, spaceDim1);
1311  int opRank2 = getOperatorRank(basis2_->getFunctionSpace(), operatorType2, spaceDim2);
1312 
1313  OutputViewType outputValues1, outputValues2;
1314  if (opRank1 == 0)
1315  {
1316  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1);
1317  }
1318  else if (opRank1 == 1)
1319  {
1320  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1,spaceDim1);
1321  }
1322  else
1323  {
1324  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported opRank1");
1325  }
1326 
1327  if (opRank2 == 0)
1328  {
1329  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2);
1330  }
1331  else if (opRank2 == 1)
1332  {
1333  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2,spaceDim2);
1334  }
1335  else
1336  {
1337  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported opRank2");
1338  }
1339 
1340  basis1_->getValues(outputValues1,inputPoints1,operatorType1);
1341  basis2_->getValues(outputValues2,inputPoints2,operatorType2);
1342 
1343  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
1344  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
1345  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1346 
1347  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
1348 
1350 
1351  FunctorType functor(outputValues, outputValues1, outputValues2, tensorPoints, weight);
1352  Kokkos::parallel_for( policy , functor, "TensorViewFunctor");
1353  }
1354 
1360  getHostBasis() const override {
1361  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "TensorBasis subclasses must override getHostBasis");
1362  }
1363  }; // Basis_TensorBasis
1364 
1372  template<class ExecutionSpace, class OutputScalar, class OutputFieldType>
1374  {
1375  using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
1376  using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
1377 
1378  using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
1379  using TeamMember = typename TeamPolicy::member_type;
1380 
1381  OutputFieldType output_; // F,P
1382  OutputFieldType input1_; // F1,P[,D] or F1,P1[,D]
1383  OutputFieldType input2_; // F2,P[,D] or F2,P2[,D]
1384  OutputFieldType input3_; // F2,P[,D] or F2,P2[,D]
1385 
1386  int numFields_, numPoints_;
1387  int numFields1_, numPoints1_;
1388  int numFields2_, numPoints2_;
1389  int numFields3_, numPoints3_;
1390 
1391  bool tensorPoints_; // if true, input1, input2, input3 refer to values at decomposed points, and P = P1 * P2 * P3. If false, then the three inputs refer to points in the full-dimensional space, and their point lengths are the same as that of the final output.
1392 
1393  double weight_;
1394 
1395  TensorBasis3_Functor(OutputFieldType output, OutputFieldType inputValues1, OutputFieldType inputValues2, OutputFieldType inputValues3,
1396  bool tensorPoints, double weight)
1397  : output_(output), input1_(inputValues1), input2_(inputValues2), input3_(inputValues3), tensorPoints_(tensorPoints), weight_(weight)
1398  {
1399  numFields_ = output.extent_int(0);
1400  numPoints_ = output.extent_int(1);
1401 
1402  numFields1_ = inputValues1.extent_int(0);
1403  numPoints1_ = inputValues1.extent_int(1);
1404 
1405  numFields2_ = inputValues2.extent_int(0);
1406  numPoints2_ = inputValues2.extent_int(1);
1407 
1408  numFields3_ = inputValues3.extent_int(0);
1409  numPoints3_ = inputValues3.extent_int(1);
1410  /*
1411  We don't yet support tensor-valued bases here (only vector and scalar). The main design question is how the layouts
1412  of the input containers relates to the layout of the output container. The work we've done in TensorViewIterator basically
1413  shows the choices that can be made. It does appear that in most cases (at least (most of?) those supported by TensorViewIterator),
1414  we can infer from the dimensions of input/output containers what choice should be made in each dimension.
1415  */
1416  INTREPID2_TEST_FOR_EXCEPTION(inputValues1.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
1417  INTREPID2_TEST_FOR_EXCEPTION(inputValues2.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
1418  INTREPID2_TEST_FOR_EXCEPTION(inputValues3.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
1419  INTREPID2_TEST_FOR_EXCEPTION((inputValues1.rank() == 3) && (inputValues2.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
1420  INTREPID2_TEST_FOR_EXCEPTION((inputValues1.rank() == 3) && (inputValues3.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
1421  INTREPID2_TEST_FOR_EXCEPTION((inputValues2.rank() == 3) && (inputValues3.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
1422 
1423  if (!tensorPoints_)
1424  {
1425  // then the point counts should all match
1426  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_, std::invalid_argument, "incompatible point counts");
1427  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints2_, std::invalid_argument, "incompatible point counts");
1428  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints3_, std::invalid_argument, "incompatible point counts");
1429  }
1430  else
1431  {
1432  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_ * numPoints2_ * numPoints3_, std::invalid_argument, "incompatible point counts");
1433  }
1434 
1435  INTREPID2_TEST_FOR_EXCEPTION(numFields_ != numFields1_ * numFields2_ * numFields3_, std::invalid_argument, "incompatible field sizes");
1436  }
1437 
1438  KOKKOS_INLINE_FUNCTION
1439  void operator()( const TeamMember & teamMember ) const
1440  {
1441  auto fieldOrdinal1 = teamMember.league_rank();
1442 
1443  if (!tensorPoints_)
1444  {
1445  if ((input1_.rank() == 2) && (input2_.rank() == 2) && (input3_.rank() == 2))
1446  {
1447  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1448  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1449  {
1450  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1451  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1452  {
1453  output_(fieldOrdinal,pointOrdinal) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal);
1454  }
1455  }
1456  });
1457  }
1458  else if (input1_.rank() == 3)
1459  {
1460  int spaceDim = input1_.extent_int(2);
1461  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1462  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1463  {
1464  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1465  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1466  {
1467  for (int d=0; d<spaceDim; d++)
1468  {
1469  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal,d) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal);
1470  }
1471  }
1472  }
1473  });
1474  }
1475  else if (input2_.rank() == 3)
1476  {
1477  int spaceDim = input2_.extent_int(2);
1478  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1479  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1480  {
1481  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1482  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1483  {
1484  for (int d=0; d<spaceDim; d++)
1485  {
1486  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal,d) * input3_(fieldOrdinal3,pointOrdinal);
1487  }
1488  }
1489  }
1490  });
1491  }
1492  else if (input3_.rank() == 3)
1493  {
1494  int spaceDim = input3_.extent_int(2);
1495  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1496  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1497  {
1498  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1499  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1500  {
1501  for (int d=0; d<spaceDim; d++)
1502  {
1503  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal,d);
1504  }
1505  }
1506  }
1507  });
1508  }
1509  else
1510  {
1511  // unsupported rank combination -- enforced in constructor
1512  }
1513  }
1514  else
1515  {
1516  if ((input1_.rank() == 2) && (input2_.rank() == 2) && (input3_.rank() == 2) )
1517  {
1518  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1519  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1520  {
1521  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1522  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1523  {
1524  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1525  {
1526  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1527  {
1528  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1529  output_(fieldOrdinal,pointOrdinal) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3);
1530  }
1531  }
1532  }
1533  }
1534  });
1535  }
1536  else if (input1_.rank() == 3) // based on constructor requirements, this means the others are rank 2
1537  {
1538  int spaceDim = input1_.extent_int(2);
1539  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1540  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1541  {
1542  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1543  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1544  {
1545  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1546  {
1547  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1548  {
1549  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1550  for (int d=0; d<spaceDim; d++)
1551  {
1552  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1,d) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3);
1553  }
1554  }
1555  }
1556  }
1557  }
1558  });
1559  }
1560  else if (input2_.rank() == 3) // based on constructor requirements, this means the others are rank 2
1561  {
1562  int spaceDim = input2_.extent_int(2);
1563  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1564  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1565  {
1566  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1567  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1568  {
1569  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1570  {
1571  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1572  {
1573  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1574  for (int d=0; d<spaceDim; d++)
1575  {
1576  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2,d) * input3_(fieldOrdinal3,pointOrdinal3);
1577  }
1578  }
1579  }
1580  }
1581  }
1582  });
1583  }
1584  else if (input3_.rank() == 3) // based on constructor requirements, this means the others are rank 2
1585  {
1586  int spaceDim = input3_.extent_int(2);
1587  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1588  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1589  {
1590  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1591  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1592  {
1593  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1594  {
1595  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1596  {
1597  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1598  for (int d=0; d<spaceDim; d++)
1599  {
1600  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3,d);
1601  }
1602  }
1603  }
1604  }
1605  }
1606  });
1607  }
1608  else
1609  {
1610  // unsupported rank combination -- enforced in constructor
1611  }
1612  }
1613  }
1614  }; // TensorBasis3_Functor
1615 
1616 
1617  template<typename BasisBaseClass = void>
1619  : public Basis_TensorBasis<BasisBaseClass>
1620  {
1621  using BasisBase = BasisBaseClass;
1623  public:
1624  using OutputViewType = typename BasisBase::OutputViewType;
1625  using PointViewType = typename BasisBase::PointViewType;
1626  using ScalarViewType = typename BasisBase::ScalarViewType;
1627 
1628  using OutputValueType = typename BasisBase::OutputValueType;
1629  using PointValueType = typename BasisBase::PointValueType;
1630 
1631  using BasisPtr = Teuchos::RCP<BasisBase>;
1632  protected:
1633  BasisPtr basis1_;
1634  BasisPtr basis2_;
1635  BasisPtr basis3_;
1636  public:
1637  Basis_TensorBasis3(BasisPtr basis1, BasisPtr basis2, BasisPtr basis3)
1638  :
1639  TensorBasis(Teuchos::rcp( new TensorBasis(basis1,basis2)),basis3),
1640  basis1_(basis1),
1641  basis2_(basis2),
1642  basis3_(basis3)
1643  {}
1644 
1651  virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const override
1652  {
1653  OperatorTensorDecomposition opSimpleDecomposition = this->getSimpleOperatorDecomposition(operatorType);
1654  std::vector<BasisPtr> componentBases {basis1_, basis2_, basis3_};
1655  return opSimpleDecomposition.expandedDecomposition(componentBases);
1656  }
1657 
1658  using TensorBasis::getValues;
1659 
1684  virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
1685  const PointViewType inputPoints12, const PointViewType inputPoints3,
1686  bool tensorPoints) const override
1687  {
1688  // TODO: rework this to use superclass's getComponentPoints.
1689 
1690  int spaceDim1 = basis1_->getBaseCellTopology().getDimension();
1691  int spaceDim2 = basis2_->getBaseCellTopology().getDimension();
1692 
1693  int totalSpaceDim12 = inputPoints12.extent_int(1);
1694 
1695  TEUCHOS_ASSERT(spaceDim1 + spaceDim2 == totalSpaceDim12);
1696 
1697  if (!tensorPoints)
1698  {
1699  auto inputPoints1 = Kokkos::subview(inputPoints12,Kokkos::ALL(),std::make_pair(0,spaceDim1));
1700  auto inputPoints2 = Kokkos::subview(inputPoints12,Kokkos::ALL(),std::make_pair(spaceDim1,totalSpaceDim12));
1701 
1702  this->getValues(outputValues, operatorType, inputPoints1, inputPoints2, inputPoints3, tensorPoints);
1703  }
1704  else
1705  {
1706  // superclass doesn't (yet) have a clever way to detect tensor points in a single container
1707  // we'd need something along those lines here to detect them in inputPoints12.
1708  // if we do add such a mechanism to superclass, it should be simple enough to call that from here
1709  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "This method does not yet handle tensorPoints=true");
1710  }
1711  }
1712 
1740  virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
1741  const PointViewType inputPoints1, const PointViewType inputPoints2, const PointViewType inputPoints3,
1742  bool tensorPoints) const = 0;
1743 
1771  void getValues( OutputViewType outputValues,
1772  const PointViewType inputPoints1, const EOperator operatorType1,
1773  const PointViewType inputPoints2, const EOperator operatorType2,
1774  const PointViewType inputPoints3, const EOperator operatorType3,
1775  bool tensorPoints, double weight=1.0) const
1776  {
1777  int basisCardinality1 = basis1_->getCardinality();
1778  int basisCardinality2 = basis2_->getCardinality();
1779  int basisCardinality3 = basis3_->getCardinality();
1780 
1781  int spaceDim1 = inputPoints1.extent_int(1);
1782  int spaceDim2 = inputPoints2.extent_int(1);
1783  int spaceDim3 = inputPoints3.extent_int(1);
1784 
1785  int totalPointCount;
1786  int pointCount1, pointCount2, pointCount3;
1787  if (tensorPoints)
1788  {
1789  pointCount1 = inputPoints1.extent_int(0);
1790  pointCount2 = inputPoints2.extent_int(0);
1791  pointCount3 = inputPoints3.extent_int(0);
1792  totalPointCount = pointCount1 * pointCount2 * pointCount3;
1793  }
1794  else
1795  {
1796  totalPointCount = inputPoints1.extent_int(0);
1797  pointCount1 = totalPointCount;
1798  pointCount2 = totalPointCount;
1799  pointCount3 = totalPointCount;
1800 
1801  INTREPID2_TEST_FOR_EXCEPTION((totalPointCount != inputPoints2.extent_int(0)) || (totalPointCount != inputPoints3.extent_int(0)),
1802  std::invalid_argument, "If tensorPoints is false, the point counts must match!");
1803  }
1804 
1805  // structure of this implementation:
1806  /*
1807  - allocate output1, output2, output3 containers
1808  - either:
1809  1. split off the tensor functor call into its own method in TensorBasis, and
1810  - call it once with output1, output2, placing these in another newly allocated output12, then
1811  - call it again with output12, output3
1812  OR
1813  2. create a 3-argument tensor functor and call it with output1,output2,output3
1814 
1815  At the moment, the 3-argument functor seems like a better approach. It's likely more code, but somewhat
1816  more efficient and easier to understand/debug. And the code is fairly straightforward to produce.
1817  */
1818 
1819  // copied from the 2-argument TensorBasis implementation:
1820 
1821  OutputViewType outputValues1, outputValues2, outputValues3;
1822 
1823  //Note: the gradient of HGRAD basis on a line has an output vector of rank 3, the last dimension being of size 1.
1824  // in particular this holds even when computing the divergence of an HDIV basis, which is scalar and has rank 2.
1825  if ((spaceDim1 == 1) && (operatorType1 == OPERATOR_VALUE))
1826  {
1827  // use a rank 2 container for basis1
1828  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1);
1829  }
1830  else
1831  {
1832  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1,spaceDim1);
1833  }
1834  if ((spaceDim2 == 1) && (operatorType2 == OPERATOR_VALUE))
1835  {
1836  // use a rank 2 container for basis2
1837  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2);
1838  }
1839  else
1840  {
1841  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2,spaceDim2);
1842  }
1843  if ((spaceDim3 == 1) && (operatorType3 == OPERATOR_VALUE))
1844  {
1845  // use a rank 2 container for basis2
1846  outputValues3 = getMatchingViewWithLabel(outputValues,"output values - basis 3",basisCardinality3,pointCount3);
1847  }
1848  else
1849  {
1850  outputValues3 = getMatchingViewWithLabel(outputValues,"output values - basis 3",basisCardinality3,pointCount3,spaceDim3);
1851  }
1852 
1853  basis1_->getValues(outputValues1,inputPoints1,operatorType1);
1854  basis2_->getValues(outputValues2,inputPoints2,operatorType2);
1855  basis3_->getValues(outputValues3,inputPoints3,operatorType3);
1856 
1857  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
1858  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
1859  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1860 
1861  using ExecutionSpace = typename BasisBase::ExecutionSpace;
1862 
1863  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
1864 
1866  FunctorType functor(outputValues, outputValues1, outputValues2, outputValues3, tensorPoints, weight);
1867  Kokkos::parallel_for( policy , functor, "TensorBasis3_Functor");
1868  }
1869 
1875  getHostBasis() const override {
1876  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "TensorBasis3 subclasses must override getHostBasis");
1877  }
1878  };
1879 } // end namespace Intrepid2
1880 
1881 #endif /* Intrepid2_TensorBasis_h */
KOKKOS_INLINE_FUNCTION ScalarType getView1Entry()
Teuchos::RCP< Basis< DeviceType, OutputType, PointType > > BasisPtr
Basis Pointer.
ordinal_type getTensorDkEnumeration(ordinal_type dkEnum1, ordinal_type operatorOrder1, ordinal_type dkEnum2, ordinal_type operatorOrder2) const
Given "Dk" enumeration indices for the component bases, returns a Dk enumeration index for the compos...
Functor for computing values for the TensorBasis class.
arbitrary variation
KOKKOS_INLINE_FUNCTION ordinal_type getOperatorRank(const EFunctionSpace spaceType, const EOperator operatorType, const ordinal_type spaceDim)
Returns rank of an operator.
View-like interface to tensor points; point components are stored separately; the appropriate coordin...
virtual void getDofCoords(typename BasisBase::ScalarViewType dofCoords) const override
Fills in spatial locations (coordinates) of degrees of freedom (nodes) on the reference cell...
virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const
Returns a full decomposition of the specified operator. (Full meaning that all TensorBasis components...
An abstract base class that defines interface for concrete basis implementations for Finite Element (...
void getComponentPoints(const PointViewType inputPoints, const bool attemptTensorDecomposition, PointViewType &inputPoints1, PointViewType &inputPoints2, bool &tensorDecompositionSucceeded) const
Method to extract component points from composite points.
Basis_TensorBasis(BasisPtr basis1, BasisPtr basis2, EFunctionSpace functionSpace=FUNCTION_SPACE_MAX)
Constructor.
KOKKOS_INLINE_FUNCTION enable_if_t< rank==1, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 1...
For a multi-component tensor basis, specifies the operators to be applied to the components to produc...
virtual void getValues(OutputViewType outputValues, const EOperator operatorType, const PointViewType inputPoints12, const PointViewType inputPoints3, bool tensorPoints) const override
Evaluation of a tensor FEM basis on a reference cell.
Wrapper around a Kokkos::View that allows data that is constant or repeating in various logical dimen...
For two cell topologies whose tensor product is a third, this class establishes a mapping from subcel...
Header function for Intrepid2::Util class and other utility functions.
Implementation of an assert that can safely be called from device code.
BasisPtr< typename Kokkos::HostSpace::device_type, OutputType, PointType > HostBasisPtr
Pointer to a Basis whose device type is on the host (Kokkos::HostSpace::device_type), allowing host access to input and output views, and ensuring host execution of basis evaluation.
virtual void getDofCoeffs(typename BasisBase::ScalarViewType dofCoeffs) const override
Fills in coefficients of degrees of freedom on the reference cell.
unsigned getCompositeSubcellOrdinal(unsigned subcell1Dim, unsigned subcell1Ordinal, unsigned subcell2Dim, unsigned subcell2Ordinal)
Map from component subcell ordinals to the corresponding composite subcell ordinal.
KOKKOS_INLINE_FUNCTION ordinal_type getDkCardinality(const EOperator operatorType, const ordinal_type spaceDim)
Returns multiplicities of dx, dy, and dz based on the enumeration of the partial derivative, its order and the space dimension. Inverse of the getDkEnumeration() method.
The data containers in Intrepid2 that support sum factorization and other reduced-data optimizations ...
KOKKOS_INLINE_FUNCTION ordinal_type getDkEnumeration(const ordinal_type xMult, const ordinal_type yMult=-1, const ordinal_type zMult=-1)
Returns the ordinal of a partial derivative of order k based on the multiplicities of the partials dx...
ordinal_type componentPointCount(const ordinal_type &tensorComponentOrdinal) const
Returns the number of points in the indicated component.
EOperator
Enumeration of primitive operators available in Intrepid. Primitive operators act on reconstructed fu...
KOKKOS_INLINE_FUNCTION void set(ScalarType value)
KOKKOS_INLINE_FUNCTION void setLocation(const Kokkos::Array< int, 7 > location)
KOKKOS_INLINE_FUNCTION ScalarType getView2Entry()
virtual HostBasisPtr< OutputValueType, PointValueType > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...
Kokkos::DynRankView< typename ViewType::value_type, typename DeduceLayout< ViewType >::result_layout, typename ViewType::device_type > getMatchingViewWithLabel(const ViewType &view, const std::string &label, DimArgs... dims)
Creates and returns a view that matches the provided view in Kokkos Layout.
virtual const char * getName() const override
Returns basis name.
virtual OperatorTensorDecomposition getSimpleOperatorDecomposition(const EOperator operatorType) const
Returns a simple decomposition of the specified operator: what operator(s) should be applied to basis...
TensorDataType & tensorData()
TensorData accessor for single-family scalar data.
KOKKOS_INLINE_FUNCTION ScalarView< PointScalar, DeviceType > getTensorComponent(const ordinal_type &r) const
Returns the requested tensor component.
Implementation of support for traversing component views alongside a view that represents a combinati...
KOKKOS_INLINE_FUNCTION ordinal_type getOperatorOrder(const EOperator operatorType)
Returns order of an operator.
KOKKOS_INLINE_FUNCTION int nextIncrementRank()
OperatorTensorDecomposition expandedDecomposition(std::vector< Teuchos::RCP< Basis< DeviceType, OutputValueType, PointValueType > > > &bases)
takes as argument bases that are components in this decomposition, and decomposes them further if the...
Class that defines mappings from component cell topologies to their tensor product topologies...
virtual void getValues(BasisValues< OutputValueType, DeviceType > outputValues, const TensorPoints< PointValueType, DeviceType > inputPoints, const EOperator operatorType=OPERATOR_VALUE) const override
Evaluation of a FEM basis on a reference cell, using point and output value containers that allow pre...
View-like interface to tensor data; tensor components are stored separately and multiplied together a...
const VectorDataType & vectorData() const
VectorData accessor.
virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const override
Returns a full decomposition of the specified operator. (Full meaning that all TensorBasis components...
void getValues(OutputViewType outputValues, const PointViewType inputPoints1, const EOperator operatorType1, const PointViewType inputPoints2, const EOperator operatorType2, const PointViewType inputPoints3, const EOperator operatorType3, bool tensorPoints, double weight=1.0) const
Evaluation of a tensor FEM basis on a reference cell; subclasses should override this.
A helper class that allows iteration over three Kokkos Views simultaneously, according to tensor comb...
virtual BasisValues< OutputValueType, DeviceType > allocateBasisValues(TensorPoints< PointValueType, DeviceType > points, const EOperator operatorType=OPERATOR_VALUE) const override
Allocate BasisValues container suitable for passing to the getValues() variant that takes a TensorPoi...
Basis defined as the tensor product of two component bases.
virtual HostBasisPtr< OutputValueType, PointValueType > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...
Reference-space field values for a basis, designed to support typical vector-valued bases...
Functor for computing values for the TensorBasis3 class.
Header file for the abstract base class Intrepid2::Basis.