Viskores  1.0
ArrayPortalBasicCuda.h
Go to the documentation of this file.
1 //============================================================================
2 // The contents of this file are covered by the Viskores license. See
3 // LICENSE.txt for details.
4 //
5 // By contributing to this file, all contributors agree to the Developer
6 // Certificate of Origin Version 1.1 (DCO 1.1) as stated in DCO.txt.
7 //============================================================================
8 
9 //============================================================================
10 // Copyright (c) Kitware, Inc.
11 // All rights reserved.
12 // See LICENSE.txt for details.
13 //
14 // This software is distributed WITHOUT ANY WARRANTY; without even
15 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
16 // PURPOSE. See the above copyright notice for more information.
17 //============================================================================
18 #ifndef viskores_exec_cuda_internal_ArrayPortalBasicCuda_h
19 #define viskores_exec_cuda_internal_ArrayPortalBasicCuda_h
20 
21 // This file provides specializations of ArrayPortalBasic that use texture loading
22 // intrinsics to load data from arrays faster in read-only arrays. These intrinsics
23 // are only available with compute capabilities >= 3.5, so only compile this code if
24 // we are compiling for that.
25 #if __CUDA_ARCH__ >= 350
26 
27 #include <viskores/Types.h>
28 
29 namespace viskores
30 {
31 namespace internal
32 {
33 namespace detail
34 {
35 
36 // Forward declaration (declared in viskores/internal/ArrayPortalBasic.h)
37 template <typename T>
38 VISKORES_EXEC_CONT static inline T ArrayPortalBasicReadGet(const T* const data);
39 
40 // Use the __ldg intrinsic to load read-only arrays through texture reads.
41 // Currently CUDA doesn't support texture loading of signed char's so that is why
42 // You don't see viskores::Int8 in any of the lists.
43 
44 VISKORES_EXEC_CONT static inline viskores::UInt8 ArrayPortalBasicReadGet(
45  const viskores::UInt8* const data)
46 {
47  return __ldg(data);
48 }
49 VISKORES_EXEC_CONT static inline viskores::Int16 ArrayPortalBasicReadGet(
50  const viskores::Int16* const data)
51 {
52  return __ldg(data);
53 }
54 VISKORES_EXEC_CONT static inline viskores::UInt16 ArrayPortalBasicReadGet(
55  const viskores::UInt16* const data)
56 {
57  return __ldg(data);
58 }
59 VISKORES_EXEC_CONT static inline viskores::Int32 ArrayPortalBasicReadGet(
60  const viskores::Int32* const data)
61 {
62  return __ldg(data);
63 }
64 VISKORES_EXEC_CONT static inline viskores::UInt32 ArrayPortalBasicReadGet(
65  const viskores::UInt32* const data)
66 {
67  return __ldg(data);
68 }
69 VISKORES_EXEC_CONT static inline viskores::Float32 ArrayPortalBasicReadGet(
70  const viskores::Float32* const data)
71 {
72  return __ldg(data);
73 }
74 VISKORES_EXEC_CONT static inline viskores::Float64 ArrayPortalBasicReadGet(
75  const viskores::Float64* const data)
76 {
77  return __ldg(data);
78 }
79 
80 // CUDA can do some vector texture loads, but only for its own types, so we have to convert
81 // to the CUDA type first.
82 
83 VISKORES_EXEC_CONT static inline viskores::Vec2i_32 ArrayPortalBasicReadGet(
84  const viskores::Vec2i_32* const data)
85 {
86  const int2 temp = __ldg(reinterpret_cast<const int2*>(data));
87  return viskores::Vec2i_32(temp.x, temp.y);
88 }
89 VISKORES_EXEC_CONT static inline viskores::Vec2ui_32 ArrayPortalBasicReadGet(
90  const viskores::Vec2ui_32* const data)
91 {
92  const uint2 temp = __ldg(reinterpret_cast<const uint2*>(data));
93  return viskores::Vec2ui_32(temp.x, temp.y);
94 }
95 VISKORES_EXEC_CONT static inline viskores::Vec2f_32 ArrayPortalBasicReadGet(
96  const viskores::Vec2f_32* const data)
97 {
98  const float2 temp = __ldg(reinterpret_cast<const float2*>(data));
99  return viskores::Vec2f_32(temp.x, temp.y);
100 }
101 VISKORES_EXEC_CONT static inline viskores::Vec2f_64 ArrayPortalBasicReadGet(
102  const viskores::Vec2f_64* const data)
103 {
104  const double2 temp = __ldg(reinterpret_cast<const double2*>(data));
105  return viskores::Vec2f_64(temp.x, temp.y);
106 }
107 
108 VISKORES_EXEC_CONT static inline viskores::Vec4i_32 ArrayPortalBasicReadGet(
109  const viskores::Vec4i_32* const data)
110 {
111  const int4 temp = __ldg(reinterpret_cast<const int4*>(data));
112  return viskores::Vec4i_32(temp.x, temp.y, temp.z, temp.w);
113 }
114 VISKORES_EXEC_CONT static inline viskores::Vec4ui_32 ArrayPortalBasicReadGet(
115  const viskores::Vec4ui_32* const data)
116 {
117  const uint4 temp = __ldg(reinterpret_cast<const uint4*>(data));
118  return viskores::Vec4ui_32(temp.x, temp.y, temp.z, temp.w);
119 }
120 VISKORES_EXEC_CONT static inline viskores::Vec4f_32 ArrayPortalBasicReadGet(
121  const viskores::Vec4f_32* const data)
122 {
123  const float4 temp = __ldg(reinterpret_cast<const float4*>(data));
124  return viskores::Vec4f_32(temp.x, temp.y, temp.z, temp.w);
125 }
126 
127 // CUDA does not support loading many of the vector types we use including 3-wide vectors.
128 // Support these using multiple scalar loads.
129 
130 template <typename T, viskores::IdComponent N>
131 VISKORES_EXEC_CONT static inline viskores::Vec<T, N> ArrayPortalBasicReadGet(
132  const viskores::Vec<T, N>* const data)
133 {
134  const T* recastedData = reinterpret_cast<const T*>(data);
135  viskores::Vec<T, N> result;
136 #pragma unroll
137  for (viskores::IdComponent i = 0; i < N; ++i)
138  {
139  result[i] = ArrayPortalBasicReadGet(recastedData + i);
140  }
141  return result;
142 }
143 }
144 }
145 } // namespace viskores::internal::detail
146 
147 #endif // __CUDA_ARCH__ >= 350
148 
149 #endif //viskores_exec_cuda_internal_ArrayPortalBasicCuda_h
viskores::Int16
int16_t Int16
Base type to use for 16-bit signed integer numbers.
Definition: Types.h:181
Types.h
viskores::Vec2f_32
viskores::Vec< viskores::Float32, 2 > Vec2f_32
Vec2f_32 corresponds to a 2-dimensional vector of 32-bit floating point values.
Definition: Types.h:952
viskores::Vec4i_32
viskores::Vec< viskores::Int32, 4 > Vec4i_32
Vec4i_32 corresponds to a 4-dimensional vector of 32-bit integer values.
Definition: Types.h:1216
viskores::UInt16
uint16_t UInt16
Base type to use for 16-bit unsigned integer numbers.
Definition: Types.h:185
viskores::IdComponent
viskores::Int32 IdComponent
Base type to use to index small lists.
Definition: Types.h:202
VISKORES_EXEC_CONT
#define VISKORES_EXEC_CONT
Definition: ExportMacros.h:60
viskores::Vec2ui_32
viskores::Vec< viskores::UInt32, 2 > Vec2ui_32
Vec2ui_32 corresponds to a 2-dimensional vector of 32-bit unsigned integer values.
Definition: Types.h:1016
viskores::Vec2f_64
viskores::Vec< viskores::Float64, 2 > Vec2f_64
Vec2f_64 corresponds to a 2-dimensional vector of 64-bit floating point values.
Definition: Types.h:958
viskores
Groups connected points that have the same field value.
Definition: Atomic.h:27
viskores::Float32
float Float32
Base type to use for 32-bit floating-point numbers.
Definition: Types.h:165
viskores::Vec4f_32
viskores::Vec< viskores::Float32, 4 > Vec4f_32
Vec4f_32 corresponds to a 4-dimensional vector of 32-bit floating point values.
Definition: Types.h:1186
viskores::Vec4ui_32
viskores::Vec< viskores::UInt32, 4 > Vec4ui_32
Vec4ui_32 corresponds to a 4-dimensional vector of 32-bit unsigned integer values.
Definition: Types.h:1250
viskores::UInt8
uint8_t UInt8
Base type to use for 8-bit unsigned integer numbers.
Definition: Types.h:177
viskores::Int32
int32_t Int32
Base type to use for 32-bit signed integer numbers.
Definition: Types.h:189
viskores::Vec2i_32
viskores::Vec< viskores::Int32, 2 > Vec2i_32
Vec2i_32 corresponds to a 2-dimensional vector of 32-bit integer values.
Definition: Types.h:982
viskores::Float64
double Float64
Base type to use for 64-bit floating-point numbers.
Definition: Types.h:169
viskores::Vec
A short fixed-length array.
Definition: Types.h:365
viskores::UInt32
uint32_t UInt32
Base type to use for 32-bit unsigned integer numbers.
Definition: Types.h:193