18 #ifndef viskores_Atomic_h
19 #define viskores_Atomic_h
105 return std::memory_order_relaxed;
107 return std::memory_order_acquire;
109 return std::memory_order_release;
111 return std::memory_order_acq_rel;
113 return std::memory_order_seq_cst;
117 return std::memory_order_seq_cst;
125 #if defined(VISKORES_CUDA_DEVICE_PASS)
154 template <
typename T>
157 volatile T*
const vaddr = addr;
162 const T value = *vaddr;
164 AtomicLoadFence(order);
168 template <
typename T>
171 volatile T* vaddr = addr;
173 AtomicStoreFence(order);
177 template <
typename T>
180 AtomicStoreFence(order);
181 auto result = atomicAdd(addr, arg);
182 AtomicLoadFence(order);
186 template <
typename T>
189 AtomicStoreFence(order);
190 auto result = atomicAnd(addr, mask);
191 AtomicLoadFence(order);
195 template <
typename T>
198 AtomicStoreFence(order);
199 auto result = atomicOr(addr, mask);
200 AtomicLoadFence(order);
204 template <
typename T>
207 AtomicStoreFence(order);
208 auto result = atomicXor(addr, mask);
209 AtomicLoadFence(order);
213 template <
typename T>
216 return AtomicXorImpl(addr,
static_cast<T
>(~T{ 0u }), order);
219 template <
typename T>
225 AtomicStoreFence(order);
226 auto result = atomicCAS(addr, *expected, desired);
227 AtomicLoadFence(order);
228 if (result == *expected)
238 #if __CUDA_ARCH__ < 200
243 AtomicStoreFence(order);
251 __float_as_int(__int_as_float(assumed) + value));
252 }
while (assumed != old);
253 AtomicLoadFence(order);
254 return __int_as_float(old);
257 #if __CUDA_ARCH__ < 600
262 AtomicStoreFence(order);
270 __double_as_longlong(__longlong_as_double(assumed) + value));
271 }
while (assumed != old);
272 AtomicLoadFence(order);
273 return __longlong_as_double(old);
279 #elif defined(VISKORES_ENABLE_KOKKOS)
289 #ifndef KOKKOS_MACROS_HPP
290 #define KOKKOS_MACROS_HPP
291 #include <KokkosCore_config.h>
292 #undef KOKKOS_MACROS_HPP
293 #define KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
295 #if defined(KOKKOS_ENABLE_CUDA) && !defined(VISKORES_CUDA)
296 #undef KOKKOS_ENABLE_CUDA
299 #if KOKKOS_VERSION >= 30401
300 #define KOKKOS_CUDA_SETUP_HPP_
304 #if defined(KOKKOS_ENABLE_HIP) && !defined(VISKORES_HIP)
305 #undef KOKKOS_ENABLE_HIP
308 #endif //KOKKOS_MACROS_HPP not loaded
310 #include <Kokkos_Atomic.hpp>
325 Kokkos::memory_fence();
336 Kokkos::memory_fence();
339 #ifdef KOKKOS_INTERNAL_NOT_PARALLEL
340 #define VISKORES_DESUL_MEM_SCOPE desul::MemoryScopeCaller()
342 #define VISKORES_DESUL_MEM_SCOPE desul::MemoryScopeDevice()
345 template <
typename T>
351 return desul::atomic_load(addr, desul::MemoryOrderRelaxed(), VISKORES_DESUL_MEM_SCOPE);
355 return desul::atomic_load(addr, desul::MemoryOrderAcquire(), VISKORES_DESUL_MEM_SCOPE);
357 return desul::atomic_load(addr, desul::MemoryOrderSeqCst(), VISKORES_DESUL_MEM_SCOPE);
361 return desul::atomic_load(addr, desul::MemoryOrderSeqCst(), VISKORES_DESUL_MEM_SCOPE);
364 template <
typename T>
370 desul::atomic_store(addr, value, desul::MemoryOrderRelaxed(), VISKORES_DESUL_MEM_SCOPE);
375 desul::atomic_store(addr, value, desul::MemoryOrderRelease(), VISKORES_DESUL_MEM_SCOPE);
378 desul::atomic_store(addr, value, desul::MemoryOrderSeqCst(), VISKORES_DESUL_MEM_SCOPE);
383 template <
typename T>
386 AtomicStoreFence(order);
387 T result = Kokkos::atomic_fetch_add(addr, arg);
388 AtomicLoadFence(order);
392 template <
typename T>
395 AtomicStoreFence(order);
396 T result = Kokkos::atomic_fetch_and(addr, mask);
397 AtomicLoadFence(order);
401 template <
typename T>
404 AtomicStoreFence(order);
405 T result = Kokkos::atomic_fetch_or(addr, mask);
406 AtomicLoadFence(order);
410 template <
typename T>
413 AtomicStoreFence(order);
414 T result = Kokkos::atomic_fetch_xor(addr, mask);
415 AtomicLoadFence(order);
419 template <
typename T>
422 return AtomicXorImpl(addr,
static_cast<T
>(~T{ 0u }), order);
425 template <
typename T>
431 AtomicStoreFence(order);
432 T oldValue = Kokkos::atomic_compare_exchange(addr, *expected, desired);
433 AtomicLoadFence(order);
434 if (oldValue == *expected)
440 *expected = oldValue;
447 #elif defined(VISKORES_MSVC)
460 template <
typename To,
typename From>
467 std::memcpy(&dst, &src,
sizeof(From));
471 template <
typename T>
474 return std::forward<T>(src);
499 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
510 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
517 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
524 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
534 _InterlockedExchange8(
reinterpret_cast<volatile CHAR*
>(addr), BitCast<CHAR>(val));
542 _InterlockedExchange16(
reinterpret_cast<volatile SHORT*
>(addr), BitCast<SHORT>(val));
548 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
555 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
559 #define VISKORES_ATOMIC_OP(viskoresName, winName, viskoresType, winType, suffix) \
560 VISKORES_EXEC_CONT inline viskoresType viskoresName( \
561 viskoresType* addr, viskoresType arg, viskores::MemoryOrder order) \
563 return BitCast<viskoresType>( \
564 winName##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(arg))); \
567 #define VISKORES_ATOMIC_OPS_FOR_TYPE(viskoresType, winType, suffix) \
568 VISKORES_ATOMIC_OP(AtomicAddImpl, _InterlockedExchangeAdd, viskoresType, winType, suffix) \
569 VISKORES_ATOMIC_OP(AtomicAndImpl, _InterlockedAnd, viskoresType, winType, suffix) \
570 VISKORES_ATOMIC_OP(AtomicOrImpl, _InterlockedOr, viskoresType, winType, suffix) \
571 VISKORES_ATOMIC_OP(AtomicXorImpl, _InterlockedXor, viskoresType, winType, suffix) \
572 VISKORES_EXEC_CONT inline viskoresType AtomicNotImpl(viskoresType* addr, \
573 viskores::MemoryOrder order) \
575 return AtomicXorImpl(addr, static_cast<viskoresType>(~viskoresType{ 0u }), order); \
577 VISKORES_EXEC_CONT inline bool AtomicCompareExchangeImpl( \
578 viskoresType* addr, \
579 viskoresType* expected, \
580 viskoresType desired, \
581 viskores::MemoryOrder viskoresNotUsed(order)) \
583 viskoresType result = BitCast<viskoresType>( \
584 _InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr), \
585 BitCast<winType>(desired), \
586 BitCast<winType>(*expected))); \
587 if (result == *expected) \
593 *expected = result; \
603 #undef VISKORES_ATOMIC_OPS_FOR_TYPE
611 LONG old = BitCast<LONG>(*address);
615 old = _InterlockedCompareExchange(
reinterpret_cast<volatile LONG*
>(address),
616 BitCast<LONG>(BitCast<viskores::Float32>(assumed) + value),
618 }
while (assumed != old);
619 return BitCast<viskores::Float32>(old);
628 LONG64 old = BitCast<LONG64>(*address);
633 _InterlockedCompareExchange64(
reinterpret_cast<volatile LONG64*
>(address),
634 BitCast<LONG64>(BitCast<viskores::Float64>(assumed) + value),
636 }
while (assumed != old);
637 return BitCast<viskores::Float64>(old);
643 #else // gcc/clang for CPU
660 return __ATOMIC_RELAXED;
662 return __ATOMIC_ACQUIRE;
664 return __ATOMIC_RELEASE;
666 return __ATOMIC_ACQ_REL;
668 return __ATOMIC_SEQ_CST;
672 return __ATOMIC_SEQ_CST;
675 template <
typename T>
678 return __atomic_load_n(addr, GccAtomicMemOrder(order));
681 template <
typename T>
684 return __atomic_store_n(addr, value, GccAtomicMemOrder(order));
687 template <
typename T>
690 return __atomic_fetch_add(addr, arg, GccAtomicMemOrder(order));
693 #include <viskoresstd/bit_cast.h>
705 desired = viskoresstd::bit_cast<viskores::UInt32>(
706 viskoresstd::bit_cast<viskores::Float32>(expected) + arg);
712 GccAtomicMemOrder(order),
713 GccAtomicMemOrder(order)));
715 return viskoresstd::bit_cast<viskores::Float32>(expected);
728 desired = viskoresstd::bit_cast<viskores::UInt64>(
729 viskoresstd::bit_cast<viskores::Float64>(expected) + arg);
735 GccAtomicMemOrder(order),
736 GccAtomicMemOrder(order)));
738 return viskoresstd::bit_cast<viskores::Float64>(expected);
741 template <
typename T>
744 return __atomic_fetch_and(addr, mask, GccAtomicMemOrder(order));
747 template <
typename T>
750 return __atomic_fetch_or(addr, mask, GccAtomicMemOrder(order));
753 template <
typename T>
756 return __atomic_fetch_xor(addr, mask, GccAtomicMemOrder(order));
759 template <
typename T>
762 return AtomicXorImpl(addr,
static_cast<T
>(~T{ 0u }), order);
765 template <
typename T>
771 return __atomic_compare_exchange_n(
772 addr, expected, desired,
false, GccAtomicMemOrder(order), GccAtomicMemOrder(order));
785 template <
typename T>
786 using OppositeSign =
typename std::conditional<std::is_signed<T>::value,
787 typename std::make_unsigned<T>::type,
788 typename std::make_signed<T>::type>::type;
811 template <
typename T>
815 return detail::AtomicLoadImpl(pointer, order);
825 template <
typename T>
829 detail::AtomicStoreImpl(pointer, value, order);
831 template <
typename T>
834 detail::OppositeSign<T> value,
837 detail::AtomicStoreImpl(pointer,
static_cast<T
>(value), order);
854 template <
typename T>
860 return detail::AtomicAddImpl(pointer, operand, order);
862 template <typename T, typename std::enable_if<std::is_integral<T>::value>::type* =
nullptr>
865 detail::OppositeSign<T> operand,
868 return detail::AtomicAddImpl(pointer,
static_cast<T
>(operand), order);
885 template <
typename T>
891 return detail::AtomicAndImpl(pointer, operand, order);
893 template <
typename T>
896 detail::OppositeSign<T> operand,
899 return detail::AtomicAndImpl(pointer,
static_cast<T
>(operand), order);
916 template <
typename T>
922 return detail::AtomicOrImpl(pointer, operand, order);
924 template <
typename T>
927 detail::OppositeSign<T> operand,
930 return detail::AtomicOrImpl(pointer,
static_cast<T
>(operand), order);
946 template <
typename T>
952 return detail::AtomicXorImpl(pointer, operand, order);
954 template <
typename T>
957 detail::OppositeSign<T> operand,
960 return detail::AtomicXorImpl(pointer,
static_cast<T
>(operand), order);
973 template <
typename T>
978 return detail::AtomicNotImpl(pointer, order);
1003 template <
typename T>
1010 return detail::AtomicCompareExchangeImpl(shared, expected, desired, order);
1015 #endif //viskores_Atomic_h