Viskores  1.0
ArrayPortalFromThrust.h
Go to the documentation of this file.
1 //============================================================================
2 // The contents of this file are covered by the Viskores license. See
3 // LICENSE.txt for details.
4 //
5 // By contributing to this file, all contributors agree to the Developer
6 // Certificate of Origin Version 1.1 (DCO 1.1) as stated in DCO.txt.
7 //============================================================================
8 
9 //============================================================================
10 // Copyright (c) Kitware, Inc.
11 // All rights reserved.
12 // See LICENSE.txt for details.
13 //
14 // This software is distributed WITHOUT ANY WARRANTY; without even
15 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
16 // PURPOSE. See the above copyright notice for more information.
17 //============================================================================
18 #ifndef viskores_exec_cuda_internal_ArrayPortalFromThrust_h
19 #define viskores_exec_cuda_internal_ArrayPortalFromThrust_h
20 
21 #include <viskores/Types.h>
23 
24 #include <iterator>
25 #include <type_traits>
26 
29 #include <thrust/system/cuda/memory.h>
31 
32 namespace viskores
33 {
34 namespace exec
35 {
36 namespace cuda
37 {
38 namespace internal
39 {
40 
41 // The clang-format rules want to put the curly braces on separate lines. Since
42 // these declarations are a type-level truth table, minimize the amount of
43 // space it takes up.
44 // clang-format off
45 template <typename T> struct UseScalarTextureLoad : public std::false_type {};
46 template <typename T> struct UseVecTextureLoads : public std::false_type {};
47 template <typename T> struct UseMultipleScalarTextureLoads : public std::false_type {};
48 
49 //currently CUDA doesn't support texture loading of signed char's so that is why
50 //you don't see viskores::Int8 in any of the lists.
51 template <> struct UseScalarTextureLoad<const viskores::UInt8> : std::true_type {};
52 template <> struct UseScalarTextureLoad<const viskores::Int16> : std::true_type {};
53 template <> struct UseScalarTextureLoad<const viskores::UInt16> : std::true_type {};
54 template <> struct UseScalarTextureLoad<const viskores::Int32> : std::true_type {};
55 template <> struct UseScalarTextureLoad<const viskores::UInt32> : std::true_type {};
56 template <> struct UseScalarTextureLoad<const viskores::Float32> : std::true_type {};
57 template <> struct UseScalarTextureLoad<const viskores::Float64> : std::true_type {};
58 
59 //CUDA needs vec types converted to CUDA types ( float2, uint2), so we have a special
60 //case for these vec texture loads.
61 template <> struct UseVecTextureLoads<const viskores::Vec2i_32> : std::true_type {};
62 template <> struct UseVecTextureLoads<const viskores::Vec2ui_32> : std::true_type {};
63 template <> struct UseVecTextureLoads<const viskores::Vec2f_32> : std::true_type {};
64 template <> struct UseVecTextureLoads<const viskores::Vec2f_64> : std::true_type {};
65 
66 template <> struct UseVecTextureLoads<const viskores::Vec4i_32> : std::true_type {};
67 template <> struct UseVecTextureLoads<const viskores::Vec4ui_32> : std::true_type {};
68 template <> struct UseVecTextureLoads<const viskores::Vec4f_32> : std::true_type {};
69 
70 //CUDA doesn't support loading 3 wide values through a texture unit by default,
71 //so instead we fetch through texture three times and store the result
72 //currently CUDA doesn't support texture loading of signed char's so that is why
73 //you don't see viskores::Int8 in any of the lists.
74 
75 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec2ui_8> : std::true_type {};
76 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec2i_16> : std::true_type {};
77 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec2ui_16> : std::true_type {};
78 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec2i_64> : std::true_type {};
79 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec2ui_64> : std::true_type {};
80 
81 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec3ui_8> : std::true_type {};
82 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec3i_16> : std::true_type {};
83 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec3ui_16> : std::true_type {};
84 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec3i_32> : std::true_type {};
85 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec3ui_32> : std::true_type {};
86 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec3f_32> : std::true_type {};
87 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec3f_64> : std::true_type {};
88 
89 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec4ui_8> : std::true_type {};
90 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec4i_16> : std::true_type {};
91 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec4ui_16> : std::true_type {};
92 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec4i_64> : std::true_type {};
93 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec4ui_64> : std::true_type {};
94 template <> struct UseMultipleScalarTextureLoads<const viskores::Vec4f_64> : std::true_type {};
95 // clang-format on
96 
97 //this T type is not one that is valid to be loaded through texture memory
98 template <typename T, typename Enable = void>
99 struct load_through_texture
100 {
101  static constexpr viskores::IdComponent WillUseTexture = 0;
102 
103  __device__ static T get(const T* const data) { return *data; }
104 };
105 
106 //only load through a texture if we have sm 35 support
107 
108 // this T type is valid to be loaded through a single texture memory fetch
109 template <typename T>
110 struct load_through_texture<T, typename std::enable_if<UseScalarTextureLoad<const T>::value>::type>
111 {
112 
113  static constexpr viskores::IdComponent WillUseTexture = 1;
114 
115  __device__ static T get(const T* const data)
116  {
117 #if __CUDA_ARCH__ >= 350
118  // printf("__CUDA_ARCH__ UseScalarTextureLoad");
119  return __ldg(data);
120 #else
121  return *data;
122 #endif
123  }
124 };
125 
126 // this T type is valid to be loaded through a single vec texture memory fetch
127 template <typename T>
128 struct load_through_texture<T, typename std::enable_if<UseVecTextureLoads<const T>::value>::type>
129 {
130  static constexpr viskores::IdComponent WillUseTexture = 1;
131 
132  __device__ static T get(const T* const data)
133  {
134 #if __CUDA_ARCH__ >= 350
135  // printf("__CUDA_ARCH__ UseVecTextureLoads");
136  return getAs(data);
137 #else
138  return *data;
139 #endif
140  }
141 
142  __device__ static viskores::Vec2i_32 getAs(const viskores::Vec2i_32* const data)
143  {
144  const int2 temp = __ldg((const int2*)data);
145  return viskores::Vec2i_32(temp.x, temp.y);
146  }
147 
148  __device__ static viskores::Vec2ui_32 getAs(const viskores::Vec2ui_32* const data)
149  {
150  const uint2 temp = __ldg((const uint2*)data);
151  return viskores::Vec2ui_32(temp.x, temp.y);
152  }
153 
154  __device__ static viskores::Vec4i_32 getAs(const viskores::Vec4i_32* const data)
155  {
156  const int4 temp = __ldg((const int4*)data);
157  return viskores::Vec4i_32(temp.x, temp.y, temp.z, temp.w);
158  }
159 
160  __device__ static viskores::Vec4ui_32 getAs(const viskores::Vec4ui_32* const data)
161  {
162  const uint4 temp = __ldg((const uint4*)data);
163  return viskores::Vec4ui_32(temp.x, temp.y, temp.z, temp.w);
164  }
165 
166  __device__ static viskores::Vec2f_32 getAs(const viskores::Vec2f_32* const data)
167  {
168  const float2 temp = __ldg((const float2*)data);
169  return viskores::Vec2f_32(temp.x, temp.y);
170  }
171 
172  __device__ static viskores::Vec4f_32 getAs(const viskores::Vec4f_32* const data)
173  {
174  const float4 temp = __ldg((const float4*)data);
175  return viskores::Vec4f_32(temp.x, temp.y, temp.z, temp.w);
176  }
177 
178  __device__ static viskores::Vec2f_64 getAs(const viskores::Vec2f_64* const data)
179  {
180  const double2 temp = __ldg((const double2*)data);
181  return viskores::Vec2f_64(temp.x, temp.y);
182  }
183 };
184 
185 //this T type is valid to be loaded through multiple texture memory fetches
186 template <typename T>
187 struct load_through_texture<
188  T,
189  typename std::enable_if<UseMultipleScalarTextureLoads<const T>::value>::type>
190 {
191  static constexpr viskores::IdComponent WillUseTexture = 1;
192 
193  using NonConstT = typename std::remove_const<T>::type;
194 
195  __device__ static T get(const T* const data)
196  {
197 #if __CUDA_ARCH__ >= 350
198  // printf("__CUDA_ARCH__ UseMultipleScalarTextureLoads");
199  return getAs(data);
200 #else
201  return *data;
202 #endif
203  }
204 
205  __device__ static T getAs(const T* const data)
206  {
207  //we need to fetch each component individually
208  const viskores::IdComponent NUM_COMPONENTS = T::NUM_COMPONENTS;
209  using ComponentType = typename T::ComponentType;
210  const ComponentType* recasted_data = (const ComponentType*)(data);
211  NonConstT result;
212 #pragma unroll
213  for (viskores::IdComponent i = 0; i < NUM_COMPONENTS; ++i)
214  {
215  result[i] = __ldg(recasted_data + i);
216  }
217  return result;
218  }
219 };
220 
221 class ArrayPortalFromThrustBase
222 {
223 };
224 
228 template <typename T>
229 class ArrayPortalFromThrust : public ArrayPortalFromThrustBase
230 {
231 public:
232  using ValueType = T;
233  using IteratorType = T*;
234  using difference_type = std::ptrdiff_t;
235 
236  VISKORES_EXEC_CONT ArrayPortalFromThrust() {}
237 
239  ArrayPortalFromThrust(IteratorType begin, IteratorType end)
240  : BeginIterator(begin)
241  , EndIterator(end)
242  {
243  }
244 
249  template <typename OtherT>
250  VISKORES_EXEC_CONT ArrayPortalFromThrust(const ArrayPortalFromThrust<OtherT>& src)
251  : BeginIterator(src.GetIteratorBegin())
252  , EndIterator(src.GetIteratorEnd())
253  {
254  }
255 
257  viskores::Id GetNumberOfValues() const
258  {
259  // Not using std::distance because on CUDA it cannot be used on a device.
260  return static_cast<viskores::Id>((this->EndIterator - this->BeginIterator));
261  }
262 
264  ValueType Get(viskores::Id index) const
265  {
266  return *(this->BeginIterator + static_cast<difference_type>(index));
267  }
268 
270  void Set(viskores::Id index, ValueType value) const
271  {
272  *(this->BeginIterator + static_cast<difference_type>(index)) = value;
273  }
274 
276  IteratorType GetIteratorBegin() const { return this->BeginIterator; }
277 
279  IteratorType GetIteratorEnd() const { return this->EndIterator; }
280 
281 private:
282  IteratorType BeginIterator;
283  IteratorType EndIterator;
284 };
285 
286 template <typename T>
287 class ConstArrayPortalFromThrust : public ArrayPortalFromThrustBase
288 {
289 public:
290  using ValueType = T;
291  using IteratorType = const T*;
292  using difference_type = std::ptrdiff_t;
293 
294  VISKORES_EXEC_CONT ConstArrayPortalFromThrust()
295  : BeginIterator(nullptr)
296  , EndIterator(nullptr)
297  {
298  }
299 
301  ConstArrayPortalFromThrust(IteratorType begin, IteratorType end)
302  : BeginIterator(begin)
303  , EndIterator(end)
304  {
305  // printf("ConstArrayPortalFromThrust() %s \n", __PRETTY_FUNCTION__ );
306  }
307 
312  // template<typename OtherT>
314  ConstArrayPortalFromThrust(const ArrayPortalFromThrust<T>& src)
315  : BeginIterator(src.GetIteratorBegin())
316  , EndIterator(src.GetIteratorEnd())
317  {
318  }
319 
321  viskores::Id GetNumberOfValues() const
322  {
323  // Not using std::distance because on CUDA it cannot be used on a device.
324  return static_cast<viskores::Id>((this->EndIterator - this->BeginIterator));
325  }
326 
327 //The VISKORES_CUDA_DEVICE_PASS define makes sure that the device only signature
328 //only shows up for the device compilation. This allows the nvcc compiler
329 //to have separate host and device code paths for the same method. This
330 //solves the problem of trying to call a device only method from a
331 //device/host method
332 #ifdef VISKORES_CUDA_DEVICE_PASS
333  __device__ ValueType Get(viskores::Id index) const
334  {
336  this->BeginIterator + index);
337  }
338 
339  __device__ void Set(viskores::Id viskoresNotUsed(index), ValueType viskoresNotUsed(value)) const
340  {
341  }
342 
343 #else
344  ValueType Get(viskores::Id viskoresNotUsed(index)) const { return ValueType(); }
345 
346  void Set(viskores::Id viskoresNotUsed(index), ValueType viskoresNotUsed(value)) const
347  {
348 #if !(defined(VISKORES_MSVC) && defined(VISKORES_CUDA))
349  VISKORES_ASSERT(true && "Cannot set to const array.");
350 #endif
351  }
352 #endif
353 
355  IteratorType GetIteratorBegin() const { return this->BeginIterator; }
356 
358  IteratorType GetIteratorEnd() const { return this->EndIterator; }
359 
360 private:
361  IteratorType BeginIterator;
362  IteratorType EndIterator;
363 };
364 }
365 }
366 }
367 } // namespace viskores::exec::cuda::internal
368 
369 #endif //viskores_exec_cuda_internal_ArrayPortalFromThrust_h
viskores::Int16
int16_t Int16
Base type to use for 16-bit signed integer numbers.
Definition: Types.h:181
Types.h
VISKORES_THIRDPARTY_POST_INCLUDE
#define VISKORES_THIRDPARTY_POST_INCLUDE
Definition: Configure.h:200
ArrayPortalToIterators.h
viskores::Vec2f_32
viskores::Vec< viskores::Float32, 2 > Vec2f_32
Vec2f_32 corresponds to a 2-dimensional vector of 32-bit floating point values.
Definition: Types.h:952
viskoresNotUsed
#define viskoresNotUsed(parameter_name)
Simple macro to identify a parameter as unused.
Definition: ExportMacros.h:136
viskores::Vec4i_32
viskores::Vec< viskores::Int32, 4 > Vec4i_32
Vec4i_32 corresponds to a 4-dimensional vector of 32-bit integer values.
Definition: Types.h:1216
viskores::get
auto get(const viskores::Tuple< Ts... > &tuple) -> decltype(viskores::Get< static_cast< viskores::IdComponent >(Index)>(tuple))
Compatible with std::get for viskores::Tuple.
Definition: Tuple.h:113
viskores::UInt16
uint16_t UInt16
Base type to use for 16-bit unsigned integer numbers.
Definition: Types.h:185
viskores::IdComponent
viskores::Int32 IdComponent
Base type to use to index small lists.
Definition: Types.h:202
VISKORES_EXEC_CONT
#define VISKORES_EXEC_CONT
Definition: ExportMacros.h:60
viskores::Vec2ui_32
viskores::Vec< viskores::UInt32, 2 > Vec2ui_32
Vec2ui_32 corresponds to a 2-dimensional vector of 32-bit unsigned integer values.
Definition: Types.h:1016
ThrustPatches.h
viskores::Vec2f_64
viskores::Vec< viskores::Float64, 2 > Vec2f_64
Vec2f_64 corresponds to a 2-dimensional vector of 64-bit floating point values.
Definition: Types.h:958
viskores::Id
viskores::Int64 Id
Base type to use to index arrays.
Definition: Types.h:235
VISKORES_CONT
#define VISKORES_CONT
Definition: ExportMacros.h:65
viskores
Groups connected points that have the same field value.
Definition: Atomic.h:27
viskores::Float32
float Float32
Base type to use for 32-bit floating-point numbers.
Definition: Types.h:165
viskores::Vec4f_32
viskores::Vec< viskores::Float32, 4 > Vec4f_32
Vec4f_32 corresponds to a 4-dimensional vector of 32-bit floating point values.
Definition: Types.h:1186
viskores::Vec4ui_32
viskores::Vec< viskores::UInt32, 4 > Vec4ui_32
Vec4ui_32 corresponds to a 4-dimensional vector of 32-bit unsigned integer values.
Definition: Types.h:1250
VISKORES_ASSERT
#define VISKORES_ASSERT(condition)
Definition: Assert.h:51
viskores::UInt8
uint8_t UInt8
Base type to use for 8-bit unsigned integer numbers.
Definition: Types.h:177
viskores::Int32
int32_t Int32
Base type to use for 32-bit signed integer numbers.
Definition: Types.h:189
viskores::Vec2i_32
viskores::Vec< viskores::Int32, 2 > Vec2i_32
Vec2i_32 corresponds to a 2-dimensional vector of 32-bit integer values.
Definition: Types.h:982
viskores::Get
auto Get(const viskores::Tuple< Ts... > &tuple)
Retrieve the object from a viskores::Tuple at the given index.
Definition: Tuple.h:89
viskores::Float64
double Float64
Base type to use for 64-bit floating-point numbers.
Definition: Types.h:169
VISKORES_THIRDPARTY_PRE_INCLUDE
#define VISKORES_THIRDPARTY_PRE_INCLUDE
Definition: Configure.h:199
viskores::Vec
A short fixed-length array.
Definition: Types.h:365
viskores::UInt32
uint32_t UInt32
Base type to use for 32-bit unsigned integer numbers.
Definition: Types.h:193