Viskores  1.0
DeviceAdapterAlgorithmGeneral.h
Go to the documentation of this file.
1 //============================================================================
2 // The contents of this file are covered by the Viskores license. See
3 // LICENSE.txt for details.
4 //
5 // By contributing to this file, all contributors agree to the Developer
6 // Certificate of Origin Version 1.1 (DCO 1.1) as stated in DCO.txt.
7 //============================================================================
8 
9 //============================================================================
10 // Copyright (c) Kitware, Inc.
11 // All rights reserved.
12 // See LICENSE.txt for details.
13 //
14 // This software is distributed WITHOUT ANY WARRANTY; without even
15 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
16 // PURPOSE. See the above copyright notice for more information.
17 //============================================================================
18 
19 #ifndef viskores_cont_internal_DeviceAdapterAlgorithmGeneral_h
20 #define viskores_cont_internal_DeviceAdapterAlgorithmGeneral_h
21 
28 #include <viskores/cont/BitField.h>
29 #include <viskores/cont/Logging.h>
32 
35 
37 #include <viskores/TypeTraits.h>
38 
40 
41 #include <type_traits>
42 
43 namespace viskores
44 {
45 namespace cont
46 {
47 namespace internal
48 {
49 
108 template <class DerivedAlgorithm, class DeviceAdapterTag>
109 struct DeviceAdapterAlgorithmGeneral
110 {
111  //--------------------------------------------------------------------------
112  // Get Execution Value
113  // This method is used internally to get a single element from the execution
114  // array. Normally you would just use ArrayGetValue, but that functionality
115  // relies on the device adapter algorithm and would create a circular
116  // dependency.
117 private:
118  template <typename T, class CIn>
119  VISKORES_CONT static T GetExecutionValue(const viskores::cont::ArrayHandle<T, CIn>& input,
120  viskores::Id index)
121  {
123 
124  {
125  viskores::cont::Token token;
126 
127  auto inputPortal = input.PrepareForInput(DeviceAdapterTag(), token);
128  auto outputPortal = output.PrepareForOutput(1, DeviceAdapterTag(), token);
129 
130  CopyKernel<decltype(inputPortal), decltype(outputPortal)> kernel(
131  inputPortal, outputPortal, index);
132 
133  DerivedAlgorithm::Schedule(kernel, 1);
134  }
135 
136  return output.ReadPortal().Get(0);
137  }
138 
139 public:
140  //--------------------------------------------------------------------------
141  // BitFieldToUnorderedSet
142  template <typename IndicesStorage>
143  VISKORES_CONT static viskores::Id BitFieldToUnorderedSet(
144  const viskores::cont::BitField& bits,
146  {
148 
149  viskores::Id numBits = bits.GetNumberOfBits();
150 
151  viskores::cont::Token token;
152 
153  auto bitsPortal = bits.PrepareForInput(DeviceAdapterTag{}, token);
154  auto indicesPortal = indices.PrepareForOutput(numBits, DeviceAdapterTag{}, token);
155 
156  std::atomic<viskores::UInt64> popCount;
157  popCount.store(0, std::memory_order_seq_cst);
158 
159  using Functor = BitFieldToUnorderedSetFunctor<decltype(bitsPortal), decltype(indicesPortal)>;
160  Functor functor{ bitsPortal, indicesPortal, popCount };
161 
162  DerivedAlgorithm::Schedule(functor, functor.GetNumberOfInstances());
163  DerivedAlgorithm::Synchronize();
164 
165  token.DetachFromAll();
166 
167  numBits = static_cast<viskores::Id>(popCount.load(std::memory_order_seq_cst));
168 
169  indices.Allocate(numBits, viskores::CopyFlag::On);
170  return numBits;
171  }
172 
173  //--------------------------------------------------------------------------
174  // Copy
175  template <typename T, typename U, class CIn, class COut>
176  VISKORES_CONT static void Copy(const viskores::cont::ArrayHandle<T, CIn>& input,
178  {
180 
181  viskores::cont::Token token;
182 
183  const viskores::Id inSize = input.GetNumberOfValues();
184  auto inputPortal = input.PrepareForInput(DeviceAdapterTag(), token);
185  auto outputPortal = output.PrepareForOutput(inSize, DeviceAdapterTag(), token);
186 
187  CopyKernel<decltype(inputPortal), decltype(outputPortal)> kernel(inputPortal, outputPortal);
188  DerivedAlgorithm::Schedule(kernel, inSize);
189  }
190 
191  //--------------------------------------------------------------------------
192  // CopyIf
193  template <typename T, typename U, class CIn, class CStencil, class COut, class UnaryPredicate>
194  VISKORES_CONT static void CopyIf(const viskores::cont::ArrayHandle<T, CIn>& input,
197  UnaryPredicate unary_predicate)
198  {
200 
202  viskores::Id arrayLength = stencil.GetNumberOfValues();
203 
204  using IndexArrayType =
206  IndexArrayType indices;
207 
208  {
209  viskores::cont::Token token;
210 
211  auto stencilPortal = stencil.PrepareForInput(DeviceAdapterTag(), token);
212  auto indexPortal = indices.PrepareForOutput(arrayLength, DeviceAdapterTag(), token);
213 
214  StencilToIndexFlagKernel<decltype(stencilPortal), decltype(indexPortal), UnaryPredicate>
215  indexKernel(stencilPortal, indexPortal, unary_predicate);
216 
217  DerivedAlgorithm::Schedule(indexKernel, arrayLength);
218  }
219 
220  viskores::Id outArrayLength = DerivedAlgorithm::ScanExclusive(indices, indices);
221 
222  {
223  viskores::cont::Token token;
224 
225  auto inputPortal = input.PrepareForInput(DeviceAdapterTag(), token);
226  auto stencilPortal = stencil.PrepareForInput(DeviceAdapterTag(), token);
227  auto indexPortal = indices.PrepareForOutput(arrayLength, DeviceAdapterTag(), token);
228  auto outputPortal = output.PrepareForOutput(outArrayLength, DeviceAdapterTag(), token);
229 
230  CopyIfKernel<decltype(inputPortal),
231  decltype(stencilPortal),
232  decltype(indexPortal),
233  decltype(outputPortal),
234  UnaryPredicate>
235  copyKernel(inputPortal, stencilPortal, indexPortal, outputPortal, unary_predicate);
236  DerivedAlgorithm::Schedule(copyKernel, arrayLength);
237  }
238  }
239 
240  template <typename T, typename U, class CIn, class CStencil, class COut>
241  VISKORES_CONT static void CopyIf(const viskores::cont::ArrayHandle<T, CIn>& input,
244  {
246 
247  ::viskores::NotZeroInitialized unary_predicate;
248  DerivedAlgorithm::CopyIf(input, stencil, output, unary_predicate);
249  }
250 
251  //--------------------------------------------------------------------------
252  // CopySubRange
253  template <typename T, typename U, class CIn, class COut>
254  VISKORES_CONT static bool CopySubRange(const viskores::cont::ArrayHandle<T, CIn>& input,
255  viskores::Id inputStartIndex,
256  viskores::Id numberOfElementsToCopy,
258  viskores::Id outputIndex = 0)
259  {
261 
262  const viskores::Id inSize = input.GetNumberOfValues();
263 
264  // Check if the ranges overlap and fail if they do.
265  if (input == output &&
266  ((outputIndex >= inputStartIndex &&
267  outputIndex < inputStartIndex + numberOfElementsToCopy) ||
268  (inputStartIndex >= outputIndex &&
269  inputStartIndex < outputIndex + numberOfElementsToCopy)))
270  {
271  return false;
272  }
273 
274  if (inputStartIndex < 0 || numberOfElementsToCopy < 0 || outputIndex < 0 ||
275  inputStartIndex >= inSize)
276  { //invalid parameters
277  return false;
278  }
279 
280  //determine if the numberOfElementsToCopy needs to be reduced
281  if (inSize < (inputStartIndex + numberOfElementsToCopy))
282  { //adjust the size
283  numberOfElementsToCopy = (inSize - inputStartIndex);
284  }
285 
286  const viskores::Id outSize = output.GetNumberOfValues();
287  const viskores::Id copyOutEnd = outputIndex + numberOfElementsToCopy;
288  if (outSize < copyOutEnd)
289  { //output is not large enough
290  if (outSize == 0)
291  { //since output has nothing, just need to allocate to correct length
292  output.Allocate(copyOutEnd);
293  }
294  else
295  { //we currently have data in this array, so preserve it in the new
296  //resized array
298  temp.Allocate(copyOutEnd);
299  DerivedAlgorithm::CopySubRange(output, 0, outSize, temp);
300  output = temp;
301  }
302  }
303 
304  viskores::cont::Token token;
305 
306  auto inputPortal = input.PrepareForInput(DeviceAdapterTag(), token);
307  auto outputPortal = output.PrepareForInPlace(DeviceAdapterTag(), token);
308 
309  CopyKernel<decltype(inputPortal), decltype(outputPortal)> kernel(
310  inputPortal, outputPortal, inputStartIndex, outputIndex);
311  DerivedAlgorithm::Schedule(kernel, numberOfElementsToCopy);
312  return true;
313  }
314 
315  //--------------------------------------------------------------------------
316  // Count Set Bits
318  {
320 
321  viskores::cont::Token token;
322 
323  auto bitsPortal = bits.PrepareForInput(DeviceAdapterTag{}, token);
324 
325  std::atomic<viskores::UInt64> popCount;
326  popCount.store(0, std::memory_order_relaxed);
327 
328  using Functor = CountSetBitsFunctor<decltype(bitsPortal)>;
329  Functor functor{ bitsPortal, popCount };
330 
331  DerivedAlgorithm::Schedule(functor, functor.GetNumberOfInstances());
332  DerivedAlgorithm::Synchronize();
333 
334  return static_cast<viskores::Id>(popCount.load(std::memory_order_seq_cst));
335  }
336 
337  //--------------------------------------------------------------------------
338  // Fill Bit Field (bool, resize)
339  VISKORES_CONT static void Fill(viskores::cont::BitField& bits, bool value, viskores::Id numBits)
340  {
342 
343  if (numBits == 0)
344  {
345  bits.Allocate(0);
346  return;
347  }
348 
349  viskores::cont::Token token;
350 
351  auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{}, token);
352 
353  using WordType = typename viskores::cont::BitField::template ExecutionTypes<
354  DeviceAdapterTag>::WordTypePreferred;
355 
356  using Functor = FillBitFieldFunctor<decltype(portal), WordType>;
357  Functor functor{ portal, value ? ~WordType{ 0 } : WordType{ 0 } };
358 
359  const viskores::Id numWords = portal.template GetNumberOfWords<WordType>();
360  DerivedAlgorithm::Schedule(functor, numWords);
361  }
362 
363  //--------------------------------------------------------------------------
364  // Fill Bit Field (bool)
365  VISKORES_CONT static void Fill(viskores::cont::BitField& bits, bool value)
366  {
368 
369  const viskores::Id numBits = bits.GetNumberOfBits();
370  if (numBits == 0)
371  {
372  return;
373  }
374 
375  viskores::cont::Token token;
376 
377  auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{}, token);
378 
379  using WordType = typename viskores::cont::BitField::template ExecutionTypes<
380  DeviceAdapterTag>::WordTypePreferred;
381 
382  using Functor = FillBitFieldFunctor<decltype(portal), WordType>;
383  Functor functor{ portal, value ? ~WordType{ 0 } : WordType{ 0 } };
384 
385  const viskores::Id numWords = portal.template GetNumberOfWords<WordType>();
386  DerivedAlgorithm::Schedule(functor, numWords);
387  }
388 
389  //--------------------------------------------------------------------------
390  // Fill Bit Field (mask, resize)
391  template <typename WordType>
392  VISKORES_CONT static void Fill(viskores::cont::BitField& bits,
393  WordType word,
394  viskores::Id numBits)
395  {
397  "Invalid word type.");
398 
400 
401  if (numBits == 0)
402  {
403  bits.Allocate(0);
404  return;
405  }
406 
407  viskores::cont::Token token;
408 
409  auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{}, token);
410 
411  // If less than 32 bits, repeat the word until we get a 32 bit pattern.
412  // Using this for the pattern prevents races while writing small numbers
413  // to adjacent memory locations.
414  auto repWord = RepeatTo32BitsIfNeeded(word);
415  using RepWordType = decltype(repWord);
416 
417  using Functor = FillBitFieldFunctor<decltype(portal), RepWordType>;
418  Functor functor{ portal, repWord };
419 
420  const viskores::Id numWords = portal.template GetNumberOfWords<RepWordType>();
421  DerivedAlgorithm::Schedule(functor, numWords);
422  }
423 
424  //--------------------------------------------------------------------------
425  // Fill Bit Field (mask)
426  template <typename WordType>
427  VISKORES_CONT static void Fill(viskores::cont::BitField& bits, WordType word)
428  {
430  "Invalid word type.");
432 
433  const viskores::Id numBits = bits.GetNumberOfBits();
434  if (numBits == 0)
435  {
436  return;
437  }
438 
439  viskores::cont::Token token;
440 
441  auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{}, token);
442 
443  // If less than 32 bits, repeat the word until we get a 32 bit pattern.
444  // Using this for the pattern prevents races while writing small numbers
445  // to adjacent memory locations.
446  auto repWord = RepeatTo32BitsIfNeeded(word);
447  using RepWordType = decltype(repWord);
448 
449  using Functor = FillBitFieldFunctor<decltype(portal), RepWordType>;
450  Functor functor{ portal, repWord };
451 
452  const viskores::Id numWords = portal.template GetNumberOfWords<RepWordType>();
453  DerivedAlgorithm::Schedule(functor, numWords);
454  }
455 
456  //--------------------------------------------------------------------------
457  // Fill ArrayHandle
458  template <typename T, typename S>
459  VISKORES_CONT static void Fill(viskores::cont::ArrayHandle<T, S>& handle, const T& value)
460  {
462 
463  const viskores::Id numValues = handle.GetNumberOfValues();
464  if (numValues == 0)
465  {
466  return;
467  }
468 
469  viskores::cont::Token token;
470 
471  auto portal = handle.PrepareForOutput(numValues, DeviceAdapterTag{}, token);
472  FillArrayHandleFunctor<decltype(portal)> functor{ portal, value };
473  DerivedAlgorithm::Schedule(functor, numValues);
474  }
475 
476  //--------------------------------------------------------------------------
477  // Fill ArrayHandle (resize)
478  template <typename T, typename S>
479  VISKORES_CONT static void Fill(viskores::cont::ArrayHandle<T, S>& handle,
480  const T& value,
481  const viskores::Id numValues)
482  {
484  if (numValues == 0)
485  {
486  handle.ReleaseResources();
487  return;
488  }
489 
490  viskores::cont::Token token;
491 
492  auto portal = handle.PrepareForOutput(numValues, DeviceAdapterTag{}, token);
493  FillArrayHandleFunctor<decltype(portal)> functor{ portal, value };
494  DerivedAlgorithm::Schedule(functor, numValues);
495  }
496 
497  //--------------------------------------------------------------------------
498  // Lower Bounds
499  template <typename T, class CIn, class CVal, class COut>
500  VISKORES_CONT static void LowerBounds(const viskores::cont::ArrayHandle<T, CIn>& input,
503  {
505 
506  viskores::Id arraySize = values.GetNumberOfValues();
507 
508  viskores::cont::Token token;
509 
510  auto inputPortal = input.PrepareForInput(DeviceAdapterTag(), token);
511  auto valuesPortal = values.PrepareForInput(DeviceAdapterTag(), token);
512  auto outputPortal = output.PrepareForOutput(arraySize, DeviceAdapterTag(), token);
513 
514  LowerBoundsKernel<decltype(inputPortal), decltype(valuesPortal), decltype(outputPortal)> kernel(
515  inputPortal, valuesPortal, outputPortal);
516 
517  DerivedAlgorithm::Schedule(kernel, arraySize);
518  }
519 
520  template <typename T, class CIn, class CVal, class COut, class BinaryCompare>
521  VISKORES_CONT static void LowerBounds(const viskores::cont::ArrayHandle<T, CIn>& input,
524  BinaryCompare binary_compare)
525  {
527 
528  viskores::Id arraySize = values.GetNumberOfValues();
529 
530  viskores::cont::Token token;
531 
532  auto inputPortal = input.PrepareForInput(DeviceAdapterTag(), token);
533  auto valuesPortal = values.PrepareForInput(DeviceAdapterTag(), token);
534  auto outputPortal = output.PrepareForOutput(arraySize, DeviceAdapterTag(), token);
535 
536  LowerBoundsComparisonKernel<decltype(inputPortal),
537  decltype(valuesPortal),
538  decltype(outputPortal),
539  BinaryCompare>
540  kernel(inputPortal, valuesPortal, outputPortal, binary_compare);
541 
542  DerivedAlgorithm::Schedule(kernel, arraySize);
543  }
544 
545  template <class CIn, class COut>
546  VISKORES_CONT static void LowerBounds(
549  {
551 
552  DeviceAdapterAlgorithmGeneral<DerivedAlgorithm, DeviceAdapterTag>::LowerBounds(
553  input, values_output, values_output);
554  }
555 
556  //--------------------------------------------------------------------------
557  // Reduce
558 #ifndef VISKORES_CUDA
559  // nvcc doesn't like the private class declaration so disable under CUDA
560 private:
561 #endif
562  template <typename T, typename BinaryFunctor>
563  class ReduceDecoratorImpl
564  {
565  public:
566  VISKORES_CONT ReduceDecoratorImpl() = default;
567 
569  ReduceDecoratorImpl(const T& initialValue, const BinaryFunctor& binaryFunctor)
570  : InitialValue(initialValue)
571  , ReduceOperator(binaryFunctor)
572  {
573  }
574 
575  template <typename Portal>
576  VISKORES_CONT ReduceKernel<Portal, T, BinaryFunctor> CreateFunctor(const Portal& portal) const
577  {
578  return ReduceKernel<Portal, T, BinaryFunctor>(
579  portal, this->InitialValue, this->ReduceOperator);
580  }
581 
582  private:
583  T InitialValue;
584  BinaryFunctor ReduceOperator;
585  };
586 
587 public:
588  template <typename T, typename U, class CIn>
589  VISKORES_CONT static U Reduce(const viskores::cont::ArrayHandle<T, CIn>& input, U initialValue)
590  {
592 
593  return DerivedAlgorithm::Reduce(input, initialValue, viskores::Add());
594  }
595 
596  template <typename T, typename U, class CIn, class BinaryFunctor>
597  VISKORES_CONT static U Reduce(const viskores::cont::ArrayHandle<T, CIn>& input,
598  U initialValue,
599  BinaryFunctor binary_functor)
600  {
602 
603  //Crazy Idea:
604  //We perform the reduction in two levels. The first level is performed by
605  //an `ArrayHandleDecorator` which reduces 16 input values and maps them to
606  //one value. The decorator array is then 1/16 the length of the input array,
607  //and we can use inclusive scan as the second level to compute the final
608  //result.
609  viskores::Id length = (input.GetNumberOfValues() / 16);
610  length += (input.GetNumberOfValues() % 16 == 0) ? 0 : 1;
612  length, ReduceDecoratorImpl<U, BinaryFunctor>(initialValue, binary_functor), input);
613 
615  const U scanResult =
616  DerivedAlgorithm::ScanInclusive(reduced, inclusiveScanStorage, binary_functor);
617  return scanResult;
618  }
619 
620  //--------------------------------------------------------------------------
621  // Reduce By Key
622  template <typename T,
623  typename U,
624  class KIn,
625  class VIn,
626  class KOut,
627  class VOut,
628  class BinaryFunctor>
629  VISKORES_CONT static void ReduceByKey(const viskores::cont::ArrayHandle<T, KIn>& keys,
633  BinaryFunctor binary_functor)
634  {
636 
637  using KeysOutputType = viskores::cont::ArrayHandle<U, KOut>;
638 
640  const viskores::Id numberOfKeys = keys.GetNumberOfValues();
641 
642  if (numberOfKeys <= 1)
643  { //we only have a single key/value so that is our output
644  DerivedAlgorithm::Copy(keys, keys_output);
645  DerivedAlgorithm::Copy(values, values_output);
646  return;
647  }
648 
649  //we need to determine based on the keys what is the keystate for
650  //each key. The states are start, middle, end of a series and the special
651  //state start and end of a series
653 
654  {
655  viskores::cont::Token token;
656  auto inputPortal = keys.PrepareForInput(DeviceAdapterTag(), token);
657  auto keyStatePortal = keystate.PrepareForOutput(numberOfKeys, DeviceAdapterTag(), token);
658  ReduceStencilGeneration<decltype(inputPortal), decltype(keyStatePortal)> kernel(
659  inputPortal, keyStatePortal);
660  DerivedAlgorithm::Schedule(kernel, numberOfKeys);
661  }
662 
663  //next step is we need to reduce the values for each key. This is done
664  //by running an inclusive scan over the values array using the stencil.
665  //
666  // this inclusive scan will write out two values, the first being
667  // the value summed currently, the second being 0 or 1, with 1 being used
668  // when this is a value of a key we need to write ( END or START_AND_END)
669  {
671  viskores::cont::ArrayHandle<U> reducedValues;
672 
673  auto scanInput = viskores::cont::make_ArrayHandleZip(values, keystate);
674  auto scanOutput = viskores::cont::make_ArrayHandleZip(reducedValues, stencil);
675 
676  DerivedAlgorithm::ScanInclusive(
677  scanInput, scanOutput, ReduceByKeyAdd<BinaryFunctor>(binary_functor));
678 
679  //at this point we are done with keystate, so free the memory
680  keystate.ReleaseResources();
681 
682  // all we need know is an efficient way of doing the write back to the
683  // reduced global memory. this is done by using CopyIf with the
684  // stencil and values we just created with the inclusive scan
685  DerivedAlgorithm::CopyIf(reducedValues, stencil, values_output, ReduceByKeyUnaryStencilOp());
686 
687  } //release all temporary memory
688 
689  // Don't bother with the keys_output if it's an ArrayHandleDiscard -- there
690  // will be a runtime exception in Unique() otherwise:
692  {
693  //find all the unique keys
694  DerivedAlgorithm::Copy(keys, keys_output);
695  DerivedAlgorithm::Unique(keys_output);
696  }
697  }
698 
699  //--------------------------------------------------------------------------
700  // Scan Exclusive
701  template <typename T, class CIn, class COut, class BinaryFunctor>
702  VISKORES_CONT static T ScanExclusive(const viskores::cont::ArrayHandle<T, CIn>& input,
704  BinaryFunctor binaryFunctor,
705  const T& initialValue)
706  {
708 
709  viskores::Id numValues = input.GetNumberOfValues();
710  if (numValues <= 0)
711  {
712  output.ReleaseResources();
713  return initialValue;
714  }
715 
717  T result = DerivedAlgorithm::ScanInclusive(input, inclusiveScan, binaryFunctor);
718 
719  viskores::cont::Token token;
720 
721  auto inputPortal = inclusiveScan.PrepareForInput(DeviceAdapterTag(), token);
722  auto outputPortal = output.PrepareForOutput(numValues, DeviceAdapterTag(), token);
723 
724  InclusiveToExclusiveKernel<decltype(inputPortal), decltype(outputPortal), BinaryFunctor>
725  inclusiveToExclusive(inputPortal, outputPortal, binaryFunctor, initialValue);
726 
727  DerivedAlgorithm::Schedule(inclusiveToExclusive, numValues);
728 
729  return binaryFunctor(initialValue, result);
730  }
731 
732  template <typename T, class CIn, class COut>
733  VISKORES_CONT static T ScanExclusive(const viskores::cont::ArrayHandle<T, CIn>& input,
735  {
737 
738  return DerivedAlgorithm::ScanExclusive(
740  }
741 
742  //--------------------------------------------------------------------------
743  // Scan Exclusive Extend
744  template <typename T, class CIn, class COut, class BinaryFunctor>
745  VISKORES_CONT static void ScanExtended(const viskores::cont::ArrayHandle<T, CIn>& input,
747  BinaryFunctor binaryFunctor,
748  const T& initialValue)
749  {
751 
752  viskores::Id numValues = input.GetNumberOfValues();
753  if (numValues <= 0)
754  {
755  output.Allocate(1);
756  output.WritePortal().Set(0, initialValue);
757  return;
758  }
759 
761  T result = DerivedAlgorithm::ScanInclusive(input, inclusiveScan, binaryFunctor);
762 
763  viskores::cont::Token token;
764 
765  auto inputPortal = inclusiveScan.PrepareForInput(DeviceAdapterTag(), token);
766  auto outputPortal = output.PrepareForOutput(numValues + 1, DeviceAdapterTag(), token);
767 
768  InclusiveToExtendedKernel<decltype(inputPortal), decltype(outputPortal), BinaryFunctor>
769  inclusiveToExtended(inputPortal,
770  outputPortal,
771  binaryFunctor,
772  initialValue,
773  binaryFunctor(initialValue, result));
774 
775  DerivedAlgorithm::Schedule(inclusiveToExtended, numValues + 1);
776  }
777 
778  template <typename T, class CIn, class COut>
779  VISKORES_CONT static void ScanExtended(const viskores::cont::ArrayHandle<T, CIn>& input,
781  {
783 
784  DerivedAlgorithm::ScanExtended(
786  }
787 
788  //--------------------------------------------------------------------------
789  // Scan Exclusive By Key
790  template <typename KeyT,
791  typename ValueT,
792  typename KIn,
793  typename VIn,
794  typename VOut,
795  class BinaryFunctor>
796  VISKORES_CONT static void ScanExclusiveByKey(
800  const ValueT& initialValue,
801  BinaryFunctor binaryFunctor)
802  {
804 
806 
807  // 0. Special case for 0 and 1 element input
808  viskores::Id numberOfKeys = keys.GetNumberOfValues();
809 
810  if (numberOfKeys == 0)
811  {
812  return;
813  }
814  else if (numberOfKeys == 1)
815  {
816  output.Allocate(1);
817  output.WritePortal().Set(0, initialValue);
818  return;
819  }
820 
821  // 1. Create head flags
822  //we need to determine based on the keys what is the keystate for
823  //each key. The states are start, middle, end of a series and the special
824  //state start and end of a series
826 
827  {
828  viskores::cont::Token token;
829  auto inputPortal = keys.PrepareForInput(DeviceAdapterTag(), token);
830  auto keyStatePortal = keystate.PrepareForOutput(numberOfKeys, DeviceAdapterTag(), token);
831  ReduceStencilGeneration<decltype(inputPortal), decltype(keyStatePortal)> kernel(
832  inputPortal, keyStatePortal);
833  DerivedAlgorithm::Schedule(kernel, numberOfKeys);
834  }
835 
836  // 2. Shift input and initialize elements at head flags position to initValue
838  {
839  viskores::cont::Token token;
840  auto inputPortal = values.PrepareForInput(DeviceAdapterTag(), token);
841  auto keyStatePortal = keystate.PrepareForInput(DeviceAdapterTag(), token);
842  auto tempPortal = temp.PrepareForOutput(numberOfKeys, DeviceAdapterTag(), token);
843 
844  ShiftCopyAndInit<ValueT,
845  decltype(inputPortal),
846  decltype(keyStatePortal),
847  decltype(tempPortal)>
848  kernel(inputPortal, keyStatePortal, tempPortal, initialValue);
849  DerivedAlgorithm::Schedule(kernel, numberOfKeys);
850  }
851  // 3. Perform a ScanInclusiveByKey
852  DerivedAlgorithm::ScanInclusiveByKey(keys, temp, output, binaryFunctor);
853  }
854 
855  template <typename KeyT, typename ValueT, class KIn, typename VIn, typename VOut>
856  VISKORES_CONT static void ScanExclusiveByKey(
860  {
862 
863  DerivedAlgorithm::ScanExclusiveByKey(
865  }
866 
867  //--------------------------------------------------------------------------
868  // Scan Inclusive
869  template <typename T, class CIn, class COut>
870  VISKORES_CONT static T ScanInclusive(const viskores::cont::ArrayHandle<T, CIn>& input,
872  {
874 
875  return DerivedAlgorithm::ScanInclusive(input, output, viskores::Add());
876  }
877 
878 private:
879  template <typename T1, typename S1, typename T2, typename S2>
880  VISKORES_CONT static bool ArrayHandlesAreSame(const viskores::cont::ArrayHandle<T1, S1>&,
882  {
883  return false;
884  }
885 
886  template <typename T, typename S>
887  VISKORES_CONT static bool ArrayHandlesAreSame(const viskores::cont::ArrayHandle<T, S>& a1,
889  {
890  return a1 == a2;
891  }
892 
893 public:
894  template <typename T, class CIn, class COut, class BinaryFunctor>
895  VISKORES_CONT static T ScanInclusive(const viskores::cont::ArrayHandle<T, CIn>& input,
897  BinaryFunctor binary_functor)
898  {
900 
901  if (!ArrayHandlesAreSame(input, output))
902  {
903  DerivedAlgorithm::Copy(input, output);
904  }
905 
906  viskores::Id numValues = output.GetNumberOfValues();
907  if (numValues < 1)
908  {
910  }
911 
912  {
913  viskores::cont::Token token;
914 
915  auto portal = output.PrepareForInPlace(DeviceAdapterTag(), token);
916  using ScanKernelType = ScanKernel<decltype(portal), BinaryFunctor>;
917 
918 
919  viskores::Id stride;
920  for (stride = 2; stride - 1 < numValues; stride *= 2)
921  {
922  ScanKernelType kernel(portal, binary_functor, stride, stride / 2 - 1);
923  DerivedAlgorithm::Schedule(kernel, numValues / stride);
924  }
925 
926  // Do reverse operation on odd indices. Start at stride we were just at.
927  for (stride /= 2; stride > 1; stride /= 2)
928  {
929  ScanKernelType kernel(portal, binary_functor, stride, stride - 1);
930  DerivedAlgorithm::Schedule(kernel, numValues / stride);
931  }
932  }
933 
934  return GetExecutionValue(output, numValues - 1);
935  }
936 
937  template <typename KeyT, typename ValueT, class KIn, class VIn, class VOut>
938  VISKORES_CONT static void ScanInclusiveByKey(
942  {
944 
945  return DerivedAlgorithm::ScanInclusiveByKey(keys, values, values_output, viskores::Add());
946  }
947 
948  template <typename KeyT, typename ValueT, class KIn, class VIn, class VOut, class BinaryFunctor>
949  VISKORES_CONT static void ScanInclusiveByKey(
953  BinaryFunctor binary_functor)
954  {
956 
958  const viskores::Id numberOfKeys = keys.GetNumberOfValues();
959 
960  if (numberOfKeys <= 1)
961  { //we only have a single key/value so that is our output
962  DerivedAlgorithm::Copy(values, values_output);
963  return;
964  }
965 
966  //we need to determine based on the keys what is the keystate for
967  //each key. The states are start, middle, end of a series and the special
968  //state start and end of a series
970 
971  {
972  viskores::cont::Token token;
973  auto inputPortal = keys.PrepareForInput(DeviceAdapterTag(), token);
974  auto keyStatePortal = keystate.PrepareForOutput(numberOfKeys, DeviceAdapterTag(), token);
975  ReduceStencilGeneration<decltype(inputPortal), decltype(keyStatePortal)> kernel(
976  inputPortal, keyStatePortal);
977  DerivedAlgorithm::Schedule(kernel, numberOfKeys);
978  }
979 
980  //next step is we need to reduce the values for each key. This is done
981  //by running an inclusive scan over the values array using the stencil.
982  //
983  // this inclusive scan will write out two values, the first being
984  // the value summed currently, the second being 0 or 1, with 1 being used
985  // when this is a value of a key we need to write ( END or START_AND_END)
986  {
989  auto scanInput = viskores::cont::make_ArrayHandleZip(values, keystate);
990  auto scanOutput = viskores::cont::make_ArrayHandleZip(reducedValues, stencil);
991 
992  DerivedAlgorithm::ScanInclusive(
993  scanInput, scanOutput, ReduceByKeyAdd<BinaryFunctor>(binary_functor));
994  //at this point we are done with keystate, so free the memory
995  keystate.ReleaseResources();
996  DerivedAlgorithm::Copy(reducedValues, values_output);
997  }
998  }
999 
1000  //--------------------------------------------------------------------------
1001  // Sort
1002  template <typename T, class Storage, class BinaryCompare>
1003  VISKORES_CONT static void Sort(viskores::cont::ArrayHandle<T, Storage>& values,
1004  BinaryCompare binary_compare)
1005  {
1007 
1008  viskores::Id numValues = values.GetNumberOfValues();
1009  if (numValues < 2)
1010  {
1011  return;
1012  }
1013  viskores::Id numThreads = 1;
1014  while (numThreads < numValues)
1015  {
1016  numThreads *= 2;
1017  }
1018  numThreads /= 2;
1019 
1020  viskores::cont::Token token;
1021 
1022  auto portal = values.PrepareForInPlace(DeviceAdapterTag(), token);
1023  using MergeKernel = BitonicSortMergeKernel<decltype(portal), BinaryCompare>;
1024  using CrossoverKernel = BitonicSortCrossoverKernel<decltype(portal), BinaryCompare>;
1025 
1026  for (viskores::Id crossoverSize = 1; crossoverSize < numValues; crossoverSize *= 2)
1027  {
1028  DerivedAlgorithm::Schedule(CrossoverKernel(portal, binary_compare, crossoverSize),
1029  numThreads);
1030  for (viskores::Id mergeSize = crossoverSize / 2; mergeSize > 0; mergeSize /= 2)
1031  {
1032  DerivedAlgorithm::Schedule(MergeKernel(portal, binary_compare, mergeSize), numThreads);
1033  }
1034  }
1035  }
1036 
1037  template <typename T, class Storage>
1038  VISKORES_CONT static void Sort(viskores::cont::ArrayHandle<T, Storage>& values)
1039  {
1041 
1042  DerivedAlgorithm::Sort(values, DefaultCompareFunctor());
1043  }
1044 
1045  //--------------------------------------------------------------------------
1046  // Sort by Key
1047  template <typename T, typename U, class StorageT, class StorageU>
1048  VISKORES_CONT static void SortByKey(viskores::cont::ArrayHandle<T, StorageT>& keys,
1050  {
1052 
1053  //combine the keys and values into a ZipArrayHandle
1054  //we than need to specify a custom compare function wrapper
1055  //that only checks for key side of the pair, using a custom compare functor.
1056  auto zipHandle = viskores::cont::make_ArrayHandleZip(keys, values);
1057  DerivedAlgorithm::Sort(zipHandle, internal::KeyCompare<T, U>());
1058  }
1059 
1060  template <typename T, typename U, class StorageT, class StorageU, class BinaryCompare>
1061  VISKORES_CONT static void SortByKey(viskores::cont::ArrayHandle<T, StorageT>& keys,
1063  BinaryCompare binary_compare)
1064  {
1066 
1067  //combine the keys and values into a ZipArrayHandle
1068  //we than need to specify a custom compare function wrapper
1069  //that only checks for key side of the pair, using the custom compare
1070  //functor that the user passed in
1071  auto zipHandle = viskores::cont::make_ArrayHandleZip(keys, values);
1072  DerivedAlgorithm::Sort(zipHandle, internal::KeyCompare<T, U, BinaryCompare>(binary_compare));
1073  }
1074 
1075  template <typename T,
1076  typename U,
1077  typename V,
1078  typename StorageT,
1079  typename StorageU,
1080  typename StorageV,
1081  typename BinaryFunctor>
1085  BinaryFunctor binaryFunctor)
1086  {
1088 
1089  viskores::Id numValues = viskores::Min(input1.GetNumberOfValues(), input2.GetNumberOfValues());
1090  if (numValues <= 0)
1091  {
1092  return;
1093  }
1094 
1095  viskores::cont::Token token;
1096 
1097  auto input1Portal = input1.PrepareForInput(DeviceAdapterTag(), token);
1098  auto input2Portal = input2.PrepareForInput(DeviceAdapterTag(), token);
1099  auto outputPortal = output.PrepareForOutput(numValues, DeviceAdapterTag(), token);
1100 
1101  BinaryTransformKernel<decltype(input1Portal),
1102  decltype(input2Portal),
1103  decltype(outputPortal),
1104  BinaryFunctor>
1105  binaryKernel(input1Portal, input2Portal, outputPortal, binaryFunctor);
1106  DerivedAlgorithm::Schedule(binaryKernel, numValues);
1107  }
1108 
1109  //};
1110  //--------------------------------------------------------------------------
1111  // Unique
1112  template <typename T, class Storage>
1113  VISKORES_CONT static void Unique(viskores::cont::ArrayHandle<T, Storage>& values)
1114  {
1116 
1117  DerivedAlgorithm::Unique(values, viskores::Equal());
1118  }
1119 
1120  template <typename T, class Storage, class BinaryCompare>
1121  VISKORES_CONT static void Unique(viskores::cont::ArrayHandle<T, Storage>& values,
1122  BinaryCompare binary_compare)
1123  {
1125 
1127  viskores::Id inputSize = values.GetNumberOfValues();
1128 
1129  using WrappedBOpType = internal::WrappedBinaryOperator<bool, BinaryCompare>;
1130  WrappedBOpType wrappedCompare(binary_compare);
1131 
1132  {
1133  viskores::cont::Token token;
1134  auto valuesPortal = values.PrepareForInput(DeviceAdapterTag(), token);
1135  auto stencilPortal = stencilArray.PrepareForOutput(inputSize, DeviceAdapterTag(), token);
1136  ClassifyUniqueComparisonKernel<decltype(valuesPortal),
1137  decltype(stencilPortal),
1138  WrappedBOpType>
1139  classifyKernel(valuesPortal, stencilPortal, wrappedCompare);
1140 
1141  DerivedAlgorithm::Schedule(classifyKernel, inputSize);
1142  }
1143 
1145 
1146  DerivedAlgorithm::CopyIf(values, stencilArray, outputArray);
1147 
1148  values.Allocate(outputArray.GetNumberOfValues());
1149  DerivedAlgorithm::Copy(outputArray, values);
1150  }
1151 
1152  //--------------------------------------------------------------------------
1153  // Upper bounds
1154  template <typename T, class CIn, class CVal, class COut>
1155  VISKORES_CONT static void UpperBounds(const viskores::cont::ArrayHandle<T, CIn>& input,
1158  {
1160 
1161  viskores::Id arraySize = values.GetNumberOfValues();
1162 
1163  viskores::cont::Token token;
1164 
1165  auto inputPortal = input.PrepareForInput(DeviceAdapterTag(), token);
1166  auto valuesPortal = values.PrepareForInput(DeviceAdapterTag(), token);
1167  auto outputPortal = output.PrepareForOutput(arraySize, DeviceAdapterTag(), token);
1168 
1169  UpperBoundsKernel<decltype(inputPortal), decltype(valuesPortal), decltype(outputPortal)> kernel(
1170  inputPortal, valuesPortal, outputPortal);
1171  DerivedAlgorithm::Schedule(kernel, arraySize);
1172  }
1173 
1174  template <typename T, class CIn, class CVal, class COut, class BinaryCompare>
1175  VISKORES_CONT static void UpperBounds(const viskores::cont::ArrayHandle<T, CIn>& input,
1178  BinaryCompare binary_compare)
1179  {
1181 
1182  viskores::Id arraySize = values.GetNumberOfValues();
1183 
1184  viskores::cont::Token token;
1185 
1186  auto inputPortal = input.PrepareForInput(DeviceAdapterTag(), token);
1187  auto valuesPortal = values.PrepareForInput(DeviceAdapterTag(), token);
1188  auto outputPortal = output.PrepareForOutput(arraySize, DeviceAdapterTag(), token);
1189 
1190  UpperBoundsKernelComparisonKernel<decltype(inputPortal),
1191  decltype(valuesPortal),
1192  decltype(outputPortal),
1193  BinaryCompare>
1194  kernel(inputPortal, valuesPortal, outputPortal, binary_compare);
1195 
1196  DerivedAlgorithm::Schedule(kernel, arraySize);
1197  }
1198 
1199  template <class CIn, class COut>
1200  VISKORES_CONT static void UpperBounds(
1203  {
1205 
1206  DeviceAdapterAlgorithmGeneral<DerivedAlgorithm, DeviceAdapterTag>::UpperBounds(
1207  input, values_output, values_output);
1208  }
1209 };
1210 
1211 } // namespace internal
1212 
1223 template <typename DeviceTag>
1224 class DeviceTaskTypes
1225 {
1226 public:
1227  template <typename WorkletType, typename InvocationType>
1228  static viskores::exec::internal::TaskSingular<WorkletType, InvocationType> MakeTask(
1229  WorkletType& worklet,
1230  InvocationType& invocation,
1231  viskores::Id,
1232  viskores::Id globalIndexOffset = 0)
1233  {
1234  using Task = viskores::exec::internal::TaskSingular<WorkletType, InvocationType>;
1235  return Task(worklet, invocation, globalIndexOffset);
1236  }
1237 
1238  template <typename WorkletType, typename InvocationType>
1239  static viskores::exec::internal::TaskSingular<WorkletType, InvocationType> MakeTask(
1240  WorkletType& worklet,
1241  InvocationType& invocation,
1242  viskores::Id3,
1243  viskores::Id globalIndexOffset = 0)
1244  {
1245  using Task = viskores::exec::internal::TaskSingular<WorkletType, InvocationType>;
1246  return Task(worklet, invocation, globalIndexOffset);
1247  }
1248 };
1249 }
1250 } // namespace viskores::cont
1251 
1252 #endif //viskores_cont_internal_DeviceAdapterAlgorithmGeneral_h
ArrayHandle.h
viskores::cont::IsArrayHandleDiscard
Helper to determine if an ArrayHandle type is an ArrayHandleDiscard.
Definition: ArrayHandleDiscard.h:194
FunctorsGeneral.h
viskores::cont::ArrayHandle::ReadPortal
ReadPortalType ReadPortal() const
Get an array portal that can be used in the control environment.
Definition: ArrayHandle.h:447
viskores::Transform
auto Transform(const TupleType &&tuple, Function &&f) -> decltype(Apply(tuple, detail::TupleTransformFunctor(), std::forward< Function >(f)))
Construct a new viskores::Tuple by applying a function to each value.
Definition: Tuple.h:221
viskores::cont::ArrayHandle::PrepareForInput
ReadPortalType PrepareForInput(viskores::cont::DeviceAdapterId device, viskores::cont::Token &token) const
Prepares this array to be used as an input to an operation in the execution environment.
Definition: ArrayHandle.h:615
viskores::cont::make_ArrayHandleZip
viskores::cont::ArrayHandleZip< FirstHandleType, SecondHandleType > make_ArrayHandleZip(const FirstHandleType &first, const SecondHandleType &second)
A convenience function for creating an ArrayHandleZip.
Definition: ArrayHandleZip.h:300
viskores::cont::DeviceTaskTypes::MakeTask
static viskores::exec::internal::TaskSingular< WorkletType, InvocationType > MakeTask(WorkletType &worklet, InvocationType &invocation, viskores::Id3, viskores::Id globalIndexOffset=0)
Definition: DeviceAdapterAlgorithmGeneral.h:1239
viskores::TypeTraits::ZeroInitialization
static T ZeroInitialization()
A static function that returns 0 (or the closest equivalent to it) for the given type.
Definition: TypeTraits.h:85
BitField.h
TaskSingular.h
viskores::cont::BitField::GetNumberOfBits
viskores::Id GetNumberOfBits() const
Return the number of bits stored by this BitField.
viskores::cont::ArrayHandle
Manages an array-worth of data.
Definition: ArrayHandle.h:313
viskores::cont::BitField::Allocate
void Allocate(viskores::Id numberOfBits, viskores::CopyFlag preserve, viskores::cont::Token &token) const
Allocate the requested number of bits.
ArrayHandleView.h
ArrayHandleDecorator.h
Hints.h
viskores::Add
Definition: Types.h:268
viskores::cont::BitField::PrepareForInput
ReadPortalType PrepareForInput(viskores::cont::DeviceAdapterId device, viskores::cont::Token &token) const
Prepares this BitField to be used as an input to an operation in the execution environment.
ArrayHandleZip.h
TypeTraits.h
viskores::TypeTraits
The TypeTraits class provides helpful compile-time information about the basic types used in Viskores...
Definition: TypeTraits.h:69
viskores::Id
viskores::Int64 Id
Base type to use to index arrays.
Definition: Types.h:235
viskores::cont::ArrayHandle::ReleaseResources
void ReleaseResources() const
Releases all resources in both the control and execution environments.
Definition: ArrayHandle.h:600
VISKORES_CONT
#define VISKORES_CONT
Definition: ExportMacros.h:65
viskores::cont::BitField
Definition: BitField.h:507
viskores
Groups connected points that have the same field value.
Definition: Atomic.h:27
viskores::NotZeroInitialized
Predicate that takes a single argument x, and returns True if it isn't the identity of the Type T.
Definition: UnaryPredicates.h:40
viskores::cont::Token::DetachFromAll
void DetachFromAll()
Detaches this Token from all resources to allow them to be used elsewhere or deleted.
viskores::cont::ArrayHandle::PrepareForInPlace
WritePortalType PrepareForInPlace(viskores::cont::DeviceAdapterId device, viskores::cont::Token &token) const
Prepares this array to be used in an in-place operation (both as input and output) in the execution e...
Definition: ArrayHandle.h:634
viskores::CopyFlag::On
@ On
ArrayHandleIndex.h
ErrorMessageBuffer.h
viskores::CountSetBits
viskores::Int32 CountSetBits(viskores::UInt32 word)
Count the total number of bits set in word.
Definition: Math.h:2948
viskores::cont::ArrayHandle::Allocate
void Allocate(viskores::Id numberOfValues, viskores::CopyFlag preserve, viskores::cont::Token &token) const
Allocates an array large enough to hold the given number of values.
Definition: ArrayHandle.h:504
viskores::cont::ArrayHandle::GetNumberOfValues
viskores::Id GetNumberOfValues() const
Returns the number of entries in the array.
Definition: ArrayHandle.h:482
viskores::cont::ArrayHandle::PrepareForOutput
WritePortalType PrepareForOutput(viskores::Id numberOfValues, viskores::cont::DeviceAdapterId device, viskores::cont::Token &token) const
Prepares (allocates) this array to be used as an output from an operation in the execution environmen...
Definition: ArrayHandle.h:654
viskores::cont::DeviceTaskTypes::MakeTask
static viskores::exec::internal::TaskSingular< WorkletType, InvocationType > MakeTask(WorkletType &worklet, InvocationType &invocation, viskores::Id, viskores::Id globalIndexOffset=0)
Definition: DeviceAdapterAlgorithmGeneral.h:1228
VISKORES_ASSERT
#define VISKORES_ASSERT(condition)
Definition: Assert.h:51
viskores::cont::ArrayHandle::WritePortal
WritePortalType WritePortal() const
Get an array portal that can be used in the control environment.
Definition: ArrayHandle.h:468
VISKORES_STATIC_ASSERT_MSG
#define VISKORES_STATIC_ASSERT_MSG(condition, message)
Definition: StaticAssert.h:26
viskores::Sum
Binary Predicate that takes two arguments argument x, and y and returns sum (addition) of the two val...
Definition: BinaryOperators.h:41
viskores::cont::make_ArrayHandleDecorator
ArrayHandleDecorator< typename std::decay< DecoratorImplT >::type, typename std::decay< ArrayTs >::type... > make_ArrayHandleDecorator(viskores::Id numValues, DecoratorImplT &&f, ArrayTs &&... arrays)
Create an ArrayHandleDecorator with the specified number of values that uses the provided DecoratorIm...
Definition: ArrayHandleDecorator.h:713
viskores::cont::BitField::IsValidWordType
detail::BitFieldTraits::IsValidWordType< WordType > IsValidWordType
Check whether a word type is valid for non-atomic operations.
Definition: BitField.h:535
VISKORES_LOG_SCOPE_FUNCTION
#define VISKORES_LOG_SCOPE_FUNCTION(level)
Definition: Logging.h:225
BinaryPredicates.h
ArrayHandleDiscard.h
viskores::cont::LogLevel::Perf
@ Perf
General timing data and algorithm flow information, such as filter execution, worklet dispatches,...
viskores::Equal
Binary Predicate that takes two arguments argument x, and y and returns True if and only if x is equa...
Definition: BinaryPredicates.h:41
Logging.h
Logging utilities.
viskores::cont::BitField::PrepareForOutput
WritePortalType PrepareForOutput(viskores::Id numBits, viskores::cont::DeviceAdapterId device, viskores::cont::Token &token) const
Prepares (allocates) this BitField to be used as an output from an operation in the execution environ...
viskores::Vec< viskores::Id, 3 >
viskores::cont::Token
A token to hold the scope of an ArrayHandle or other object.
Definition: Token.h:43
Windows.h