18 #ifndef viskores_exec_cuda_internal_ThrustPatches_h
19 #define viskores_exec_cuda_internal_ThrustPatches_h
23 #ifdef VISKORES_ENABLE_CUDA
26 #include <thrust/version.h>
28 #if THRUST_VERSION >= 100900 && THRUST_VERSION < 100906
37 template <
typename T,
typename U>
38 T aligned_reinterpret_cast(U u);
40 #define ALIGN_RE_T(RT) \
42 inline __host__ __device__ RT* aligned_reinterpret_cast(void* u) \
44 return reinterpret_cast<RT*>(reinterpret_cast<void*>(u)); \
47 inline __host__ __device__ RT* aligned_reinterpret_cast(viskores::UInt8* u) \
49 return reinterpret_cast<RT*>(reinterpret_cast<void*>(u)); \
51 struct SwallowSemicolon
53 #define ALIGN_RE_VEC(RT) \
55 inline __host__ __device__ viskores::Vec<RT, 2>* aligned_reinterpret_cast(void* u) \
57 return reinterpret_cast<viskores::Vec<RT, 2>*>(reinterpret_cast<void*>(u)); \
60 inline __host__ __device__ viskores::Vec<RT, 3>* aligned_reinterpret_cast(void* u) \
62 return reinterpret_cast<viskores::Vec<RT, 3>*>(reinterpret_cast<void*>(u)); \
65 inline __host__ __device__ viskores::Vec<RT, 4>* aligned_reinterpret_cast(void* u) \
67 return reinterpret_cast<viskores::Vec<RT, 4>*>(reinterpret_cast<void*>(u)); \
70 inline __host__ __device__ viskores::Vec<viskores::Vec<RT, 3>, 2>* aligned_reinterpret_cast( \
73 return reinterpret_cast<viskores::Vec<viskores::Vec<RT, 3>, 2>*>(reinterpret_cast<void*>(u)); \
76 inline __host__ __device__ viskores::Vec<viskores::Vec<RT, 9>, 2>* aligned_reinterpret_cast( \
79 return reinterpret_cast<viskores::Vec<viskores::Vec<RT, 9>, 2>*>(reinterpret_cast<void*>(u)); \
82 inline __host__ __device__ viskores::Vec<RT, 2>* aligned_reinterpret_cast(viskores::UInt8* u) \
84 return reinterpret_cast<viskores::Vec<RT, 2>*>(reinterpret_cast<void*>(u)); \
87 inline __host__ __device__ viskores::Vec<RT, 3>* aligned_reinterpret_cast(viskores::UInt8* u) \
89 return reinterpret_cast<viskores::Vec<RT, 3>*>(reinterpret_cast<void*>(u)); \
92 inline __host__ __device__ viskores::Vec<RT, 4>* aligned_reinterpret_cast(viskores::UInt8* u) \
94 return reinterpret_cast<viskores::Vec<RT, 4>*>(reinterpret_cast<void*>(u)); \
97 inline __host__ __device__ viskores::Vec<viskores::Vec<RT, 2>, 2>* aligned_reinterpret_cast( \
100 return reinterpret_cast<viskores::Vec<viskores::Vec<RT, 2>, 2>*>(reinterpret_cast<void*>(u)); \
103 inline __host__ __device__ viskores::Vec<viskores::Vec<RT, 3>, 2>* aligned_reinterpret_cast( \
104 viskores::UInt8* u) \
106 return reinterpret_cast<viskores::Vec<viskores::Vec<RT, 3>, 2>*>(reinterpret_cast<void*>(u)); \
109 inline __host__ __device__ viskores::Vec<viskores::Vec<RT, 4>, 2>* aligned_reinterpret_cast( \
110 viskores::UInt8* u) \
112 return reinterpret_cast<viskores::Vec<viskores::Vec<RT, 4>, 2>*>(reinterpret_cast<void*>(u)); \
115 inline __host__ __device__ viskores::Vec<viskores::Vec<RT, 9>, 2>* aligned_reinterpret_cast( \
116 viskores::UInt8* u) \
118 return reinterpret_cast<viskores::Vec<viskores::Vec<RT, 9>, 2>*>(reinterpret_cast<void*>(u)); \
120 struct SwallowSemicolon
122 #define ALIGN_RE_PAIR(T, U) \
124 inline __host__ __device__ viskores::Pair<T, U>* aligned_reinterpret_cast(void* u) \
126 return reinterpret_cast<viskores::Pair<T, U>*>(reinterpret_cast<void*>(u)); \
129 inline __host__ __device__ viskores::Pair<T, U>* aligned_reinterpret_cast(viskores::UInt8* u) \
131 return reinterpret_cast<viskores::Pair<T, U>*>(reinterpret_cast<void*>(u)); \
133 struct SwallowSemicolon
135 #ifndef VISKORES_DONT_FIX_THRUST
146 ALIGN_RE_T(
unsigned long);
162 ALIGN_RE_VEC(
unsigned long);
183 #endif //THRUST_VERSION >= 100900 && THRUST_VERSION < 100906
185 #if (THRUST_VERSION >= 100904) && (THRUST_VERSION < 100909)
192 #include <thrust/mr/allocator.h>
193 #include <thrust/system/cuda/memory_resource.h>
200 template <
typename T>
201 class stateless_resource_allocator<T, ::thrust::system::cuda::memory_resource>
202 :
public thrust::mr::allocator<T, ::thrust::system::cuda::memory_resource>
204 typedef ::thrust::system::cuda::memory_resource Upstream;
205 typedef thrust::mr::allocator<T, Upstream> base;
212 template <
typename U>
217 typedef stateless_resource_allocator<U, Upstream> other;
223 __thrust_exec_check_disable__
225 stateless_resource_allocator()
226 : base(get_global_resource<Upstream>())
231 __host__ __device__ stateless_resource_allocator(
const stateless_resource_allocator& other)
237 template <
typename U>
239 stateless_resource_allocator(
const stateless_resource_allocator<U, Upstream>& other)
245 __host__ __device__ ~stateless_resource_allocator() {}
249 #endif //(THRUST_VERSION >= 100904) && (THRUST_VERSION < 100909)
252 #if THRUST_VERSION < 100900
269 template <
typename T,
typename F>
270 class WrappedBinaryOperator;
288 namespace accumulate_detail
290 template <
typename ConcurrentGroup,
291 typename RandomAccessIterator,
296 destructive_accumulate_n(ConcurrentGroup& g,
297 RandomAccessIterator first,
300 viskores::exec::cuda::internal::WrappedBinaryOperator<T, F> binary_op)
302 using size_type =
typename ConcurrentGroup::size_type;
304 size_type tid = g.this_exec.index();
314 for (size_type offset = 1; offset < g.size(); offset += offset)
316 if (tid >= offset && tid - offset < n)
318 x = binary_op(first[tid - offset], x);
331 T result = binary_op(init, first[n - 1]);
344 #endif //THRUST_VERSION < 100900
346 #endif //CUDA enabled
348 #endif //viskores_exec_cuda_internal_ThrustPatches_h