Viskores  1.0
ParallelQuickSortOpenMP.h
Go to the documentation of this file.
1 //============================================================================
2 // The contents of this file are covered by the Viskores license. See
3 // LICENSE.txt for details.
4 //
5 // By contributing to this file, all contributors agree to the Developer
6 // Certificate of Origin Version 1.1 (DCO 1.1) as stated in DCO.txt.
7 //============================================================================
8 
9 //============================================================================
10 // Copyright (c) Kitware, Inc.
11 // All rights reserved.
12 // See LICENSE.txt for details.
13 //
14 // This software is distributed WITHOUT ANY WARRANTY; without even
15 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
16 // PURPOSE. See the above copyright notice for more information.
17 //============================================================================
18 
21 
23 
24 #include <viskores/Types.h>
26 
27 #include <omp.h>
28 
29 #include <iterator>
30 
31 namespace viskores
32 {
33 namespace cont
34 {
35 namespace openmp
36 {
37 namespace sort
38 {
39 namespace quick
40 {
41 
42 template <typename IterType, typename RawBinaryCompare>
44 {
45  using BinaryCompare = viskores::cont::internal::WrappedBinaryOperator<bool, RawBinaryCompare>;
46  using ValueType = typename std::iterator_traits<IterType>::value_type;
47 
48  IterType Data;
51 
52  QuickSorter(IterType iter, RawBinaryCompare comp)
53  : Data(iter)
54  , Compare(comp)
55  , SerialSize(0)
56  {
57  }
58 
59  void Execute(const viskores::Id2 range)
60  {
61  VISKORES_OPENMP_DIRECTIVE(parallel default(shared))
62  {
64  {
65  this->Prepare(range);
66  this->Sort(range);
67  }
68  }
69  }
70 
71 private:
72  void Prepare(const viskores::Id2 /*range*/)
73  {
74  // Rough benchmarking on an 4-core+4HT processor shows that this sort is
75  // most efficient (within 5% of TBB sort) when we switch to a serial
76  // implementation once a partition is less than 32K keys
77  this->SerialSize = 32768;
78  }
79 
84  {
85  if (this->Compare(v1.second, v2.second))
86  { // v1 < v2
87  if (this->Compare(v1.second, v3.second))
88  { // v1 < v3
89  if (this->Compare(v2.second, v3.second))
90  { // v1 < v2 < v3
91  return v2;
92  }
93  else // v3 < v2
94  { // v1 < v3 < v2
95  return v3;
96  }
97  }
98  else // v3 < v1
99  { // v3 < v1 < v2
100  return v1;
101  }
102  }
103  else
104  { // v2 < v1
105  if (this->Compare(v2.second, v3.second))
106  { // v2 < v3
107  if (this->Compare(v1.second, v3.second))
108  { // v2 < v1 < v3
109  return v1;
110  }
111  else
112  { // v2 < v3 < v1
113  return v3;
114  }
115  }
116  else
117  { // v3 < v2 < v1
118  return v2;
119  }
120  }
121  }
122 
124  {
125  return this->MedianOf3(viskores::Pair<viskores::Id, ValueType>(ids[0], this->Data[ids[0]]),
126  viskores::Pair<viskores::Id, ValueType>(ids[1], this->Data[ids[1]]),
127  viskores::Pair<viskores::Id, ValueType>(ids[2], this->Data[ids[2]]));
128  }
129 
131  {
132  return this->MedianOf3(
133  this->MedianOf3(ids), this->MedianOf3(ids + 3), this->MedianOf3(ids + 6));
134  }
135 
136  // Approximate the median of the range and return its index.
138  {
139  const viskores::Id numVals = range[1] - range[0];
140  assert(numVals >= 9);
141 
142  // Pseudorandomize the pivot locations to avoid issues with periodic data
143  // (evenly sampling inputs with periodic values tends to cause the same
144  // value to be obtained for all samples)
145  const viskores::Id seed = range[0] * 3 / 2 + range[1] * 11 / 3 + numVals * 10 / 7;
146  const viskores::Id delta = (numVals / 9) * 4 / 3;
147 
148  viskores::Id sampleLocations[9] = {
149  range[0] + ((seed + 0 * delta) % numVals), range[0] + ((seed + 1 * delta) % numVals),
150  range[0] + ((seed + 2 * delta) % numVals), range[0] + ((seed + 3 * delta) % numVals),
151  range[0] + ((seed + 4 * delta) % numVals), range[0] + ((seed + 5 * delta) % numVals),
152  range[0] + ((seed + 6 * delta) % numVals), range[0] + ((seed + 7 * delta) % numVals),
153  range[0] + ((seed + 8 * delta) % numVals)
154  };
155 
156  return this->PseudoMedianOf9(sampleLocations);
157  }
158 
159  // Select a pivot and partition data with it, returning the final location of
160  // the pivot element(s). We use Bentley-McIlroy three-way partitioning to
161  // improve handling of duplicate keys, so the pivot "location" is actually
162  // a range of identical keys, hence the viskores::Id2 return type, which mark
163  // the [begin, end) of the pivot range.
165  {
166  using namespace std; // For ADL swap
167 
168  const viskores::Pair<viskores::Id, ValueType> pivotData = this->SelectPivot(range);
169  const viskores::Id& origPivotIdx = pivotData.first;
170  const ValueType& pivotVal = pivotData.second;
171 
172  // Move the pivot to the end of the block while we partition the rest:
173  swap(this->Data[origPivotIdx], this->Data[range[1] - 1]);
174 
175  // Indices of the last partitioned keys:
176  viskores::Id2 dataCursors(range[0] - 1, range[1] - 1);
177 
178  // Indices of the start/end of the keys equal to the pivot:
179  viskores::Id2 pivotCursors(dataCursors);
180 
181  for (;;)
182  {
183  // Advance the data cursors past all keys that are already partitioned:
184  while (this->Compare(this->Data[++dataCursors[0]], pivotVal))
185  ;
186  while (this->Compare(pivotVal, this->Data[--dataCursors[1]]) && dataCursors[1] > range[0])
187  ;
188 
189  // Range is partitioned the cursors have crossed:
190  if (dataCursors[0] >= dataCursors[1])
191  {
192  break;
193  }
194 
195  // Both dataCursors are pointing at incorrectly partitioned keys. Swap
196  // them to place them in the proper partitions:
197  swap(this->Data[dataCursors[0]], this->Data[dataCursors[1]]);
198 
199  // If the elements we just swapped are actually equivalent to the pivot
200  // value, move them to the pivot storage locations:
201  if (!this->Compare(this->Data[dataCursors[0]], pivotVal))
202  {
203  ++pivotCursors[0];
204  swap(this->Data[pivotCursors[0]], this->Data[dataCursors[0]]);
205  }
206  if (!this->Compare(pivotVal, this->Data[dataCursors[1]]))
207  {
208  --pivotCursors[1];
209  swap(this->Data[pivotCursors[1]], this->Data[dataCursors[1]]);
210  }
211  }
212 
213  // Data is now partitioned as:
214  // | Equal | Less | Greater | Equal |
215  // Move the equal keys to the middle for the final partitioning:
216  // | Less | Equal | Greater |
217  // First the original pivot value at the end:
218  swap(this->Data[range[1] - 1], this->Data[dataCursors[0]]);
219 
220  // Update the cursors to either side of the pivot:
221  dataCursors = viskores::Id2(dataCursors[0] - 1, dataCursors[0] + 1);
222 
223  for (viskores::Id i = range[0]; i < pivotCursors[0]; ++i, --dataCursors[0])
224  {
225  swap(this->Data[i], this->Data[dataCursors[0]]);
226  }
227 
228  for (viskores::Id i = range[1] - 2; i > pivotCursors[1]; --i, ++dataCursors[1])
229  {
230  swap(this->Data[i], this->Data[dataCursors[1]]);
231  }
232 
233  // Adjust the cursor so we can use them to construct the regions for the
234  // recursive call:
235  ++dataCursors[0];
236 
237  return dataCursors;
238  }
239 
240  void Sort(const viskores::Id2 range)
241  {
242  const viskores::Id numVals = range[1] - range[0];
243  if (numVals <= this->SerialSize)
244  {
245  std::sort(this->Data + range[0], this->Data + range[1], this->Compare);
246  return;
247  }
248 
249  const viskores::Id2 pivots = this->PartitionData(range);
250  const viskores::Id2 lhRange = viskores::Id2(range[0], pivots[0]);
251  const viskores::Id2 rhRange = viskores::Id2(pivots[1], range[1]);
252 
253  // Intel compilers seem to have trouble following the 'this' pointer
254  // when launching tasks, resulting in a corrupt task environment.
255  // Explicitly copying the pointer into a local variable seems to fix this.
256  auto explicitThis = this;
257 
258  VISKORES_OPENMP_DIRECTIVE(task default(none) firstprivate(rhRange, explicitThis))
259  {
260  explicitThis->Sort(rhRange);
261  }
262 
263  this->Sort(lhRange);
264  }
265 };
266 }
267 } // end namespace sort::quick
268 }
269 }
270 } // end namespace viskores::cont::openmp
viskores::cont::openmp::sort::quick::QuickSorter::MedianOf3
viskores::Pair< viskores::Id, ValueType > MedianOf3(const viskores::Id ids[3]) const
Definition: ParallelQuickSortOpenMP.h:123
viskores::Id2
viskores::Vec< viskores::Id, 2 > Id2
Id2 corresponds to a 2-dimensional index.
Definition: Types.h:935
ArrayHandle.h
viskores::cont::openmp::sort::quick::QuickSorter::SelectPivot
viskores::Pair< viskores::Id, ValueType > SelectPivot(const viskores::Id2 range) const
Definition: ParallelQuickSortOpenMP.h:137
FunctorsGeneral.h
viskores::cont::openmp::sort::quick::QuickSorter::Sort
void Sort(const viskores::Id2 range)
Definition: ParallelQuickSortOpenMP.h:240
Types.h
viskores::cont::openmp::sort::quick::QuickSorter::SerialSize
viskores::Id SerialSize
Definition: ParallelQuickSortOpenMP.h:50
viskores::cont::openmp::sort::quick::QuickSorter::QuickSorter
QuickSorter(IterType iter, RawBinaryCompare comp)
Definition: ParallelQuickSortOpenMP.h:52
viskores::cont::openmp::sort::quick::QuickSorter::Prepare
void Prepare(const viskores::Id2)
Definition: ParallelQuickSortOpenMP.h:72
FunctorsOpenMP.h
viskores::cont::openmp::sort::quick::QuickSorter::BinaryCompare
viskores::cont::internal::WrappedBinaryOperator< bool, RawBinaryCompare > BinaryCompare
Definition: ParallelQuickSortOpenMP.h:45
VISKORES_OPENMP_DIRECTIVE
#define VISKORES_OPENMP_DIRECTIVE(directive)
Definition: FunctorsOpenMP.h:45
DeviceAdapterTagOpenMP.h
viskores::Id
viskores::Int64 Id
Base type to use to index arrays.
Definition: Types.h:235
viskores
Groups connected points that have the same field value.
Definition: Atomic.h:27
viskores::cont::openmp::sort::quick::QuickSorter::MedianOf3
viskores::Pair< viskores::Id, ValueType > MedianOf3(const viskores::Pair< viskores::Id, ValueType > &v1, const viskores::Pair< viskores::Id, ValueType > &v2, const viskores::Pair< viskores::Id, ValueType > &v3) const
Definition: ParallelQuickSortOpenMP.h:80
viskores::Pair::first
FirstType first
The pair's first object.
Definition: Pair.h:58
viskores::cont::openmp::sort::quick::QuickSorter
Definition: ParallelQuickSortOpenMP.h:43
viskores::cont::openmp::sort::quick::QuickSorter::Execute
void Execute(const viskores::Id2 range)
Definition: ParallelQuickSortOpenMP.h:59
viskores::Pair
A viskores::Pair is essentially the same as an STL pair object except that the methods (constructors ...
Definition: Pair.h:37
viskores::cont::openmp::sort::quick::QuickSorter::PartitionData
viskores::Id2 PartitionData(const viskores::Id2 range)
Definition: ParallelQuickSortOpenMP.h:164
viskores::cont::openmp::sort::quick::QuickSorter::PseudoMedianOf9
viskores::Pair< viskores::Id, ValueType > PseudoMedianOf9(const viskores::Id ids[9]) const
Definition: ParallelQuickSortOpenMP.h:130
viskores::cont::openmp::sort::quick::QuickSorter::Compare
BinaryCompare Compare
Definition: ParallelQuickSortOpenMP.h:49
viskores::Pair::second
SecondType second
The pair's second object.
Definition: Pair.h:63
viskores::cont::openmp::sort::quick::QuickSorter::ValueType
typename std::iterator_traits< IterType >::value_type ValueType
Definition: ParallelQuickSortOpenMP.h:46
viskores::Vec< viskores::Id, 2 >
viskores::cont::openmp::sort::quick::QuickSorter::Data
IterType Data
Definition: ParallelQuickSortOpenMP.h:48