Viskores  1.0
Atomic.h
Go to the documentation of this file.
1 //============================================================================
2 // The contents of this file are covered by the Viskores license. See
3 // LICENSE.txt for details.
4 //
5 // By contributing to this file, all contributors agree to the Developer
6 // Certificate of Origin Version 1.1 (DCO 1.1) as stated in DCO.txt.
7 //============================================================================
8 
9 //============================================================================
10 // Copyright (c) Kitware, Inc.
11 // All rights reserved.
12 // See LICENSE.txt for details.
13 //
14 // This software is distributed WITHOUT ANY WARRANTY; without even
15 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
16 // PURPOSE. See the above copyright notice for more information.
17 //============================================================================
18 #ifndef viskores_Atomic_h
19 #define viskores_Atomic_h
20 
21 #include <viskores/List.h>
22 
24 
25 #include <atomic>
26 
27 namespace viskores
28 {
29 
64 enum class MemoryOrder
65 {
70  Relaxed,
71 
75  Acquire,
76 
80  Release,
81 
88 
95 };
96 
97 namespace internal
98 {
99 
100 VISKORES_EXEC_CONT inline std::memory_order StdAtomicMemOrder(viskores::MemoryOrder order)
101 {
102  switch (order)
103  {
105  return std::memory_order_relaxed;
107  return std::memory_order_acquire;
109  return std::memory_order_release;
111  return std::memory_order_acq_rel;
113  return std::memory_order_seq_cst;
114  }
115 
116  // Should never reach here, but avoid compiler warnings
117  return std::memory_order_seq_cst;
118 }
119 
120 } // namespace internal
121 
122 } // namespace viskores
123 
124 
125 #if defined(VISKORES_CUDA_DEVICE_PASS)
126 
127 namespace viskores
128 {
129 namespace detail
130 {
131 
132 // Fence to ensure that previous non-atomic stores are visible to other threads.
133 VISKORES_EXEC_CONT inline void AtomicStoreFence(viskores::MemoryOrder order)
134 {
135  if ((order == viskores::MemoryOrder::Release) ||
138  {
139  __threadfence();
140  }
141 }
142 
143 // Fence to ensure that previous non-atomic stores are visible to other threads.
144 VISKORES_EXEC_CONT inline void AtomicLoadFence(viskores::MemoryOrder order)
145 {
146  if ((order == viskores::MemoryOrder::Acquire) ||
149  {
150  __threadfence();
151  }
152 }
153 
154 template <typename T>
155 VISKORES_EXEC_CONT inline T AtomicLoadImpl(T* const addr, viskores::MemoryOrder order)
156 {
157  volatile T* const vaddr = addr; /* volatile to bypass cache*/
159  {
160  __threadfence();
161  }
162  const T value = *vaddr;
163  /* fence to ensure that dependent reads are correctly ordered */
164  AtomicLoadFence(order);
165  return value;
166 }
167 
168 template <typename T>
169 VISKORES_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, viskores::MemoryOrder order)
170 {
171  volatile T* vaddr = addr; /* volatile to bypass cache */
172  /* fence to ensure that previous non-atomic stores are visible to other threads */
173  AtomicStoreFence(order);
174  *vaddr = value;
175 }
176 
177 template <typename T>
178 VISKORES_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, viskores::MemoryOrder order)
179 {
180  AtomicStoreFence(order);
181  auto result = atomicAdd(addr, arg);
182  AtomicLoadFence(order);
183  return result;
184 }
185 
186 template <typename T>
187 VISKORES_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, viskores::MemoryOrder order)
188 {
189  AtomicStoreFence(order);
190  auto result = atomicAnd(addr, mask);
191  AtomicLoadFence(order);
192  return result;
193 }
194 
195 template <typename T>
196 VISKORES_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, viskores::MemoryOrder order)
197 {
198  AtomicStoreFence(order);
199  auto result = atomicOr(addr, mask);
200  AtomicLoadFence(order);
201  return result;
202 }
203 
204 template <typename T>
205 VISKORES_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, viskores::MemoryOrder order)
206 {
207  AtomicStoreFence(order);
208  auto result = atomicXor(addr, mask);
209  AtomicLoadFence(order);
210  return result;
211 }
212 
213 template <typename T>
214 VISKORES_EXEC_CONT inline T AtomicNotImpl(T* addr, viskores::MemoryOrder order)
215 {
216  return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
217 }
218 
219 template <typename T>
220 VISKORES_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
221  T* expected,
222  T desired,
223  viskores::MemoryOrder order)
224 {
225  AtomicStoreFence(order);
226  auto result = atomicCAS(addr, *expected, desired);
227  AtomicLoadFence(order);
228  if (result == *expected)
229  {
230  return true;
231  }
232  else
233  {
234  *expected = result;
235  return false;
236  }
237 }
238 #if __CUDA_ARCH__ < 200
239 VISKORES_EXEC_CONT inline viskores::Float32 AtomicAddImpl(viskores::Float32* address,
240  viskores::Float32 value,
241  viskores::MemoryOrder order)
242 {
243  AtomicStoreFence(order);
244  viskores::UInt32 assumed;
245  viskores::UInt32 old = __float_as_int(*address);
246  do
247  {
248  assumed = old;
249  old = atomicCAS(reinterpret_cast<viskores::UInt32*>(address),
250  assumed,
251  __float_as_int(__int_as_float(assumed) + value));
252  } while (assumed != old);
253  AtomicLoadFence(order);
254  return __int_as_float(old);
255 }
256 #endif
257 #if __CUDA_ARCH__ < 600
258 VISKORES_EXEC_CONT inline viskores::Float64 AtomicAddImpl(viskores::Float64* address,
259  viskores::Float64 value,
260  viskores::MemoryOrder order)
261 {
262  AtomicStoreFence(order);
263  viskores::UInt64 assumed;
264  viskores::UInt64 old = __double_as_longlong(*address);
265  do
266  {
267  assumed = old;
268  old = atomicCAS(reinterpret_cast<viskores::UInt64*>(address),
269  assumed,
270  __double_as_longlong(__longlong_as_double(assumed) + value));
271  } while (assumed != old);
272  AtomicLoadFence(order);
273  return __longlong_as_double(old);
274 }
275 #endif
276 }
277 } // namespace viskores::detail
278 
279 #elif defined(VISKORES_ENABLE_KOKKOS)
280 
282 // Superhack! Kokkos_Macros.hpp defines macros to include modifiers like __device__.
283 // However, we don't want to actually use those if compiling this with a standard
284 // C++ compiler (because this particular code does not run on a device). Thus,
285 // we want to disable that behavior when not using the device compiler. To do that,
286 // we are going to have to load the KokkosCore_config.h file (which you are not
287 // supposed to do), then undefine the device enables if necessary, then load
288 // Kokkos_Macros.hpp to finish the state.
289 #ifndef KOKKOS_MACROS_HPP
290 #define KOKKOS_MACROS_HPP
291 #include <KokkosCore_config.h>
292 #undef KOKKOS_MACROS_HPP
293 #define KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
294 
295 #if defined(KOKKOS_ENABLE_CUDA) && !defined(VISKORES_CUDA)
296 #undef KOKKOS_ENABLE_CUDA
297 
298 // In later versions we need to directly deactivate Kokkos_Setup_Cuda.hpp
299 #if KOKKOS_VERSION >= 30401
300 #define KOKKOS_CUDA_SETUP_HPP_
301 #endif
302 #endif
303 
304 #if defined(KOKKOS_ENABLE_HIP) && !defined(VISKORES_HIP)
305 #undef KOKKOS_ENABLE_HIP
306 #endif
307 
308 #endif //KOKKOS_MACROS_HPP not loaded
309 
310 #include <Kokkos_Atomic.hpp>
312 
313 namespace viskores
314 {
315 namespace detail
316 {
317 
318 // Fence to ensure that previous non-atomic stores are visible to other threads.
319 VISKORES_EXEC_CONT inline void AtomicStoreFence(viskores::MemoryOrder order)
320 {
321  if ((order == viskores::MemoryOrder::Release) ||
324  {
325  Kokkos::memory_fence();
326  }
327 }
328 
329 // Fence to ensure that previous non-atomic stores are visible to other threads.
330 VISKORES_EXEC_CONT inline void AtomicLoadFence(viskores::MemoryOrder order)
331 {
332  if ((order == viskores::MemoryOrder::Acquire) ||
335  {
336  Kokkos::memory_fence();
337  }
338 }
339 #ifdef KOKKOS_INTERNAL_NOT_PARALLEL
340 #define VISKORES_DESUL_MEM_SCOPE desul::MemoryScopeCaller()
341 #else
342 #define VISKORES_DESUL_MEM_SCOPE desul::MemoryScopeDevice()
343 #endif
344 
345 template <typename T>
346 VISKORES_EXEC_CONT inline T AtomicLoadImpl(T* const addr, viskores::MemoryOrder order)
347 {
348  switch (order)
349  {
351  return desul::atomic_load(addr, desul::MemoryOrderRelaxed(), VISKORES_DESUL_MEM_SCOPE);
353  case viskores::MemoryOrder::Release: // Release doesn't make sense. Use Acquire.
354  case viskores::MemoryOrder::AcquireAndRelease: // Release doesn't make sense. Use Acquire.
355  return desul::atomic_load(addr, desul::MemoryOrderAcquire(), VISKORES_DESUL_MEM_SCOPE);
357  return desul::atomic_load(addr, desul::MemoryOrderSeqCst(), VISKORES_DESUL_MEM_SCOPE);
358  }
359 
360  // Should never reach here, but avoid compiler warnings
361  return desul::atomic_load(addr, desul::MemoryOrderSeqCst(), VISKORES_DESUL_MEM_SCOPE);
362 }
363 
364 template <typename T>
365 VISKORES_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, viskores::MemoryOrder order)
366 {
367  switch (order)
368  {
370  desul::atomic_store(addr, value, desul::MemoryOrderRelaxed(), VISKORES_DESUL_MEM_SCOPE);
371  break;
372  case viskores::MemoryOrder::Acquire: // Acquire doesn't make sense. Use Release.
374  case viskores::MemoryOrder::AcquireAndRelease: // Acquire doesn't make sense. Use Release.
375  desul::atomic_store(addr, value, desul::MemoryOrderRelease(), VISKORES_DESUL_MEM_SCOPE);
376  break;
378  desul::atomic_store(addr, value, desul::MemoryOrderSeqCst(), VISKORES_DESUL_MEM_SCOPE);
379  break;
380  }
381 }
382 
383 template <typename T>
384 VISKORES_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, viskores::MemoryOrder order)
385 {
386  AtomicStoreFence(order);
387  T result = Kokkos::atomic_fetch_add(addr, arg);
388  AtomicLoadFence(order);
389  return result;
390 }
391 
392 template <typename T>
393 VISKORES_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, viskores::MemoryOrder order)
394 {
395  AtomicStoreFence(order);
396  T result = Kokkos::atomic_fetch_and(addr, mask);
397  AtomicLoadFence(order);
398  return result;
399 }
400 
401 template <typename T>
402 VISKORES_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, viskores::MemoryOrder order)
403 {
404  AtomicStoreFence(order);
405  T result = Kokkos::atomic_fetch_or(addr, mask);
406  AtomicLoadFence(order);
407  return result;
408 }
409 
410 template <typename T>
411 VISKORES_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, viskores::MemoryOrder order)
412 {
413  AtomicStoreFence(order);
414  T result = Kokkos::atomic_fetch_xor(addr, mask);
415  AtomicLoadFence(order);
416  return result;
417 }
418 
419 template <typename T>
420 VISKORES_EXEC_CONT inline T AtomicNotImpl(T* addr, viskores::MemoryOrder order)
421 {
422  return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
423 }
424 
425 template <typename T>
426 VISKORES_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
427  T* expected,
428  T desired,
429  viskores::MemoryOrder order)
430 {
431  AtomicStoreFence(order);
432  T oldValue = Kokkos::atomic_compare_exchange(addr, *expected, desired);
433  AtomicLoadFence(order);
434  if (oldValue == *expected)
435  {
436  return true;
437  }
438  else
439  {
440  *expected = oldValue;
441  return false;
442  }
443 }
444 }
445 } // namespace viskores::detail
446 
447 #elif defined(VISKORES_MSVC)
448 
449 // Supports viskores::UInt8, viskores::UInt16, viskores::UInt32, viskores::UInt64
450 
451 #include <cstdint>
452 #include <cstring>
453 #include <intrin.h> // For MSVC atomics
454 
455 namespace viskores
456 {
457 namespace detail
458 {
459 
460 template <typename To, typename From>
461 VISKORES_EXEC_CONT inline To BitCast(const From& src)
462 {
463  // The memcpy should be removed by the compiler when possible, but this
464  // works around a host of issues with bitcasting using reinterpret_cast.
465  VISKORES_STATIC_ASSERT(sizeof(From) == sizeof(To));
466  To dst;
467  std::memcpy(&dst, &src, sizeof(From));
468  return dst;
469 }
470 
471 template <typename T>
472 VISKORES_EXEC_CONT inline T BitCast(T&& src)
473 {
474  return std::forward<T>(src);
475 }
476 
477 // Note about Load and Store implementations:
478 //
479 // "Simple reads and writes to properly-aligned 32-bit variables are atomic
480 // operations"
481 //
482 // "Simple reads and writes to properly aligned 64-bit variables are atomic on
483 // 64-bit Windows. Reads and writes to 64-bit values are not guaranteed to be
484 // atomic on 32-bit Windows."
485 //
486 // "Reads and writes to variables of other sizes [than 32 or 64 bits] are not
487 // guaranteed to be atomic on any platform."
488 //
489 // https://docs.microsoft.com/en-us/windows/desktop/sync/interlocked-variable-access
490 
491 VISKORES_EXEC_CONT inline viskores::UInt8 AtomicLoadImpl(viskores::UInt8* const addr,
492  viskores::MemoryOrder order)
493 {
494  // This assumes that the memory interface is smart enough to load a 32-bit
495  // word atomically and a properly aligned 8-bit word from it.
496  // We could build address masks and do shifts to perform this manually if
497  // this assumption is incorrect.
498  auto result = *static_cast<volatile viskores::UInt8* const>(addr);
499  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
500  return result;
501 }
502 VISKORES_EXEC_CONT inline viskores::UInt16 AtomicLoadImpl(viskores::UInt16* const addr,
503  viskores::MemoryOrder order)
504 {
505  // This assumes that the memory interface is smart enough to load a 32-bit
506  // word atomically and a properly aligned 16-bit word from it.
507  // We could build address masks and do shifts to perform this manually if
508  // this assumption is incorrect.
509  auto result = *static_cast<volatile viskores::UInt16* const>(addr);
510  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
511  return result;
512 }
513 VISKORES_EXEC_CONT inline viskores::UInt32 AtomicLoadImpl(viskores::UInt32* const addr,
514  viskores::MemoryOrder order)
515 {
516  auto result = *static_cast<volatile viskores::UInt32* const>(addr);
517  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
518  return result;
519 }
520 VISKORES_EXEC_CONT inline viskores::UInt64 AtomicLoadImpl(viskores::UInt64* const addr,
521  viskores::MemoryOrder order)
522 {
523  auto result = *static_cast<volatile viskores::UInt64* const>(addr);
524  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
525  return result;
526 }
527 
528 VISKORES_EXEC_CONT inline void AtomicStoreImpl(viskores::UInt8* addr,
529  viskores::UInt8 val,
531 {
532  // There doesn't seem to be an atomic store instruction in the windows
533  // API, so just exchange and discard the result.
534  _InterlockedExchange8(reinterpret_cast<volatile CHAR*>(addr), BitCast<CHAR>(val));
535 }
536 VISKORES_EXEC_CONT inline void AtomicStoreImpl(viskores::UInt16* addr,
537  viskores::UInt16 val,
539 {
540  // There doesn't seem to be an atomic store instruction in the windows
541  // API, so just exchange and discard the result.
542  _InterlockedExchange16(reinterpret_cast<volatile SHORT*>(addr), BitCast<SHORT>(val));
543 }
544 VISKORES_EXEC_CONT inline void AtomicStoreImpl(viskores::UInt32* addr,
545  viskores::UInt32 val,
546  viskores::MemoryOrder order)
547 {
548  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
549  *addr = val;
550 }
551 VISKORES_EXEC_CONT inline void AtomicStoreImpl(viskores::UInt64* addr,
552  viskores::UInt64 val,
553  viskores::MemoryOrder order)
554 {
555  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
556  *addr = val;
557 }
558 
559 #define VISKORES_ATOMIC_OP(viskoresName, winName, viskoresType, winType, suffix) \
560  VISKORES_EXEC_CONT inline viskoresType viskoresName( \
561  viskoresType* addr, viskoresType arg, viskores::MemoryOrder order) \
562  { \
563  return BitCast<viskoresType>( \
564  winName##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(arg))); \
565  }
566 
567 #define VISKORES_ATOMIC_OPS_FOR_TYPE(viskoresType, winType, suffix) \
568  VISKORES_ATOMIC_OP(AtomicAddImpl, _InterlockedExchangeAdd, viskoresType, winType, suffix) \
569  VISKORES_ATOMIC_OP(AtomicAndImpl, _InterlockedAnd, viskoresType, winType, suffix) \
570  VISKORES_ATOMIC_OP(AtomicOrImpl, _InterlockedOr, viskoresType, winType, suffix) \
571  VISKORES_ATOMIC_OP(AtomicXorImpl, _InterlockedXor, viskoresType, winType, suffix) \
572  VISKORES_EXEC_CONT inline viskoresType AtomicNotImpl(viskoresType* addr, \
573  viskores::MemoryOrder order) \
574  { \
575  return AtomicXorImpl(addr, static_cast<viskoresType>(~viskoresType{ 0u }), order); \
576  } \
577  VISKORES_EXEC_CONT inline bool AtomicCompareExchangeImpl( \
578  viskoresType* addr, \
579  viskoresType* expected, \
580  viskoresType desired, \
581  viskores::MemoryOrder viskoresNotUsed(order)) \
582  { \
583  viskoresType result = BitCast<viskoresType>( \
584  _InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr), \
585  BitCast<winType>(desired), \
586  BitCast<winType>(*expected))); \
587  if (result == *expected) \
588  { \
589  return true; \
590  } \
591  else \
592  { \
593  *expected = result; \
594  return false; \
595  } \
596  }
597 
598 VISKORES_ATOMIC_OPS_FOR_TYPE(viskores::UInt8, CHAR, 8)
599 VISKORES_ATOMIC_OPS_FOR_TYPE(viskores::UInt16, SHORT, 16)
600 VISKORES_ATOMIC_OPS_FOR_TYPE(viskores::UInt32, LONG, )
601 VISKORES_ATOMIC_OPS_FOR_TYPE(viskores::UInt64, LONG64, 64)
602 
603 #undef VISKORES_ATOMIC_OPS_FOR_TYPE
604 
605 VISKORES_EXEC_CONT inline viskores::Float32 AtomicAddImpl(
606  viskores::Float32* address,
607  viskores::Float32 value,
609 {
610  LONG assumed;
611  LONG old = BitCast<LONG>(*address);
612  do
613  {
614  assumed = old;
615  old = _InterlockedCompareExchange(reinterpret_cast<volatile LONG*>(address),
616  BitCast<LONG>(BitCast<viskores::Float32>(assumed) + value),
617  assumed);
618  } while (assumed != old);
619  return BitCast<viskores::Float32>(old);
620 }
621 
622 VISKORES_EXEC_CONT inline viskores::Float64 AtomicAddImpl(
623  viskores::Float64* address,
624  viskores::Float64 value,
626 {
627  LONG64 assumed;
628  LONG64 old = BitCast<LONG64>(*address);
629  do
630  {
631  assumed = old;
632  old =
633  _InterlockedCompareExchange64(reinterpret_cast<volatile LONG64*>(address),
634  BitCast<LONG64>(BitCast<viskores::Float64>(assumed) + value),
635  assumed);
636  } while (assumed != old);
637  return BitCast<viskores::Float64>(old);
638 }
639 
640 }
641 } // namespace viskores::detail
642 
643 #else // gcc/clang for CPU
644 
645 // Supports viskores::UInt8, viskores::UInt16, viskores::UInt32, viskores::UInt64
646 
647 #include <cstdint>
648 #include <cstring>
649 
650 namespace viskores
651 {
652 namespace detail
653 {
654 
655 VISKORES_EXEC_CONT inline int GccAtomicMemOrder(viskores::MemoryOrder order)
656 {
657  switch (order)
658  {
660  return __ATOMIC_RELAXED;
662  return __ATOMIC_ACQUIRE;
664  return __ATOMIC_RELEASE;
666  return __ATOMIC_ACQ_REL;
668  return __ATOMIC_SEQ_CST;
669  }
670 
671  // Should never reach here, but avoid compiler warnings
672  return __ATOMIC_SEQ_CST;
673 }
674 
675 template <typename T>
676 VISKORES_EXEC_CONT inline T AtomicLoadImpl(T* const addr, viskores::MemoryOrder order)
677 {
678  return __atomic_load_n(addr, GccAtomicMemOrder(order));
679 }
680 
681 template <typename T>
682 VISKORES_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, viskores::MemoryOrder order)
683 {
684  return __atomic_store_n(addr, value, GccAtomicMemOrder(order));
685 }
686 
687 template <typename T>
688 VISKORES_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, viskores::MemoryOrder order)
689 {
690  return __atomic_fetch_add(addr, arg, GccAtomicMemOrder(order));
691 }
692 
693 #include <viskoresstd/bit_cast.h>
694 
695 // TODO: Use enable_if to write one version for both Float32 and Float64.
696 VISKORES_EXEC_CONT inline viskores::Float32 AtomicAddImpl(viskores::Float32* addr,
697  viskores::Float32 arg,
698  viskores::MemoryOrder order)
699 {
700  viskores::UInt32 expected = viskoresstd::bit_cast<viskores::UInt32>(*addr);
701  viskores::UInt32 desired;
702 
703  do
704  {
705  desired = viskoresstd::bit_cast<viskores::UInt32>(
706  viskoresstd::bit_cast<viskores::Float32>(expected) + arg);
707  } while (
708  !__atomic_compare_exchange_n(reinterpret_cast<viskores::UInt32*>(addr),
709  &expected, // reloads expected with *addr prior to the operation
710  desired,
711  false,
712  GccAtomicMemOrder(order),
713  GccAtomicMemOrder(order)));
714  // return the "old" value that was in the memory.
715  return viskoresstd::bit_cast<viskores::Float32>(expected);
716 }
717 
718 // TODO: Use enable_if to write one version for both Float32 and Float64.
719 VISKORES_EXEC_CONT inline viskores::Float64 AtomicAddImpl(viskores::Float64* addr,
720  viskores::Float64 arg,
721  viskores::MemoryOrder order)
722 {
723  viskores::UInt64 expected = viskoresstd::bit_cast<viskores::UInt64>(*addr);
724  viskores::UInt64 desired;
725 
726  do
727  {
728  desired = viskoresstd::bit_cast<viskores::UInt64>(
729  viskoresstd::bit_cast<viskores::Float64>(expected) + arg);
730  } while (
731  !__atomic_compare_exchange_n(reinterpret_cast<viskores::UInt64*>(addr),
732  &expected, // reloads expected with *addr prior to the operation
733  desired,
734  false,
735  GccAtomicMemOrder(order),
736  GccAtomicMemOrder(order)));
737  // return the "old" value that was in the memory.
738  return viskoresstd::bit_cast<viskores::Float64>(expected);
739 }
740 
741 template <typename T>
742 VISKORES_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, viskores::MemoryOrder order)
743 {
744  return __atomic_fetch_and(addr, mask, GccAtomicMemOrder(order));
745 }
746 
747 template <typename T>
748 VISKORES_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, viskores::MemoryOrder order)
749 {
750  return __atomic_fetch_or(addr, mask, GccAtomicMemOrder(order));
751 }
752 
753 template <typename T>
754 VISKORES_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, viskores::MemoryOrder order)
755 {
756  return __atomic_fetch_xor(addr, mask, GccAtomicMemOrder(order));
757 }
758 
759 template <typename T>
760 VISKORES_EXEC_CONT inline T AtomicNotImpl(T* addr, viskores::MemoryOrder order)
761 {
762  return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
763 }
764 
765 template <typename T>
766 VISKORES_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
767  T* expected,
768  T desired,
769  viskores::MemoryOrder order)
770 {
771  return __atomic_compare_exchange_n(
772  addr, expected, desired, false, GccAtomicMemOrder(order), GccAtomicMemOrder(order));
773 }
774 }
775 } // namespace viskores::detail
776 
777 #endif // gcc/clang
778 
779 namespace viskores
780 {
781 
782 namespace detail
783 {
784 
785 template <typename T>
786 using OppositeSign = typename std::conditional<std::is_signed<T>::value,
787  typename std::make_unsigned<T>::type,
788  typename std::make_signed<T>::type>::type;
789 
790 } // namespace detail
791 
795 
804 
811 template <typename T>
812 VISKORES_EXEC_CONT inline T AtomicLoad(T* const pointer,
814 {
815  return detail::AtomicLoadImpl(pointer, order);
816 }
817 
825 template <typename T>
826 VISKORES_EXEC_CONT inline void
828 {
829  detail::AtomicStoreImpl(pointer, value, order);
830 }
831 template <typename T>
833  T* pointer,
834  detail::OppositeSign<T> value,
836 {
837  detail::AtomicStoreImpl(pointer, static_cast<T>(value), order);
838 }
840 
854 template <typename T>
856  T* pointer,
857  T operand,
859 {
860  return detail::AtomicAddImpl(pointer, operand, order);
861 }
862 template <typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
864  T* pointer,
865  detail::OppositeSign<T> operand,
867 {
868  return detail::AtomicAddImpl(pointer, static_cast<T>(operand), order);
869 }
871 
885 template <typename T>
887  T* pointer,
888  T operand,
890 {
891  return detail::AtomicAndImpl(pointer, operand, order);
892 }
893 template <typename T>
895  T* pointer,
896  detail::OppositeSign<T> operand,
898 {
899  return detail::AtomicAndImpl(pointer, static_cast<T>(operand), order);
900 }
902 
916 template <typename T>
918  T* pointer,
919  T operand,
921 {
922  return detail::AtomicOrImpl(pointer, operand, order);
923 }
924 template <typename T>
926  T* pointer,
927  detail::OppositeSign<T> operand,
929 {
930  return detail::AtomicOrImpl(pointer, static_cast<T>(operand), order);
931 }
933 
946 template <typename T>
948  T* pointer,
949  T operand,
951 {
952  return detail::AtomicXorImpl(pointer, operand, order);
953 }
954 template <typename T>
956  T* pointer,
957  detail::OppositeSign<T> operand,
959 {
960  return detail::AtomicXorImpl(pointer, static_cast<T>(operand), order);
961 }
963 
973 template <typename T>
975  T* pointer,
977 {
978  return detail::AtomicNotImpl(pointer, order);
979 }
980 
1003 template <typename T>
1005  T* shared,
1006  T* expected,
1007  T desired,
1009 {
1010  return detail::AtomicCompareExchangeImpl(shared, expected, desired, order);
1011 }
1012 
1013 } // namespace viskores
1014 
1015 #endif //viskores_Atomic_h
viskores::AtomicNot
T AtomicNot(T *pointer, viskores::MemoryOrder order=viskores::MemoryOrder::SequentiallyConsistent)
Atomic function to NOT bits to a shared memory location.
Definition: Atomic.h:974
viskores::AtomicTypePreferred
viskores::UInt32 AtomicTypePreferred
The preferred type to use for atomic operations.
Definition: Atomic.h:794
viskores::MemoryOrder::SequentiallyConsistent
@ SequentiallyConsistent
An atomic with SequentiallyConsistent memory order will enforce any appropriate semantics as Acquire,...
VISKORES_THIRDPARTY_POST_INCLUDE
#define VISKORES_THIRDPARTY_POST_INCLUDE
Definition: Configure.h:200
viskoresNotUsed
#define viskoresNotUsed(parameter_name)
Simple macro to identify a parameter as unused.
Definition: ExportMacros.h:136
viskores::MemoryOrder::AcquireAndRelease
@ AcquireAndRelease
A read-modify-write operation with AcquireAndRelease memory order will enforce that any local read or...
viskores::AtomicStore
void AtomicStore(T *pointer, T value, viskores::MemoryOrder order=viskores::MemoryOrder::Release)
Atomic function to save a value to a shared memory location.
Definition: Atomic.h:827
viskores::UInt16
uint16_t UInt16
Base type to use for 16-bit unsigned integer numbers.
Definition: Types.h:185
viskores::AtomicOr
T AtomicOr(T *pointer, T operand, viskores::MemoryOrder order=viskores::MemoryOrder::SequentiallyConsistent)
Atomic function to OR bits to a shared memory location.
Definition: Atomic.h:917
VISKORES_EXEC_CONT
#define VISKORES_EXEC_CONT
Definition: ExportMacros.h:60
viskores::AtomicAnd
T AtomicAnd(T *pointer, T operand, viskores::MemoryOrder order=viskores::MemoryOrder::SequentiallyConsistent)
Atomic function to AND bits to a shared memory location.
Definition: Atomic.h:886
viskores::List
A template used to hold a list of types.
Definition: List.h:47
viskores
Groups connected points that have the same field value.
Definition: Atomic.h:27
viskores::Float32
float Float32
Base type to use for 32-bit floating-point numbers.
Definition: Types.h:165
viskores::AtomicCompareExchange
bool AtomicCompareExchange(T *shared, T *expected, T desired, viskores::MemoryOrder order=viskores::MemoryOrder::SequentiallyConsistent)
Atomic function that replaces a value given a condition.
Definition: Atomic.h:1004
viskores::MemoryOrder::Acquire
@ Acquire
A load operation with Acquire memory order will enforce that any local read or write operations liste...
viskores::MemoryOrder
MemoryOrder
Specifies memory order semantics for atomic operations.
Definition: Atomic.h:64
viskores::AtomicXor
T AtomicXor(T *pointer, T operand, viskores::MemoryOrder order=viskores::MemoryOrder::SequentiallyConsistent)
Atomic function to XOR bits to a shared memory location.
Definition: Atomic.h:947
viskores::UInt64
unsigned long long UInt64
Base type to use for 64-bit signed integer numbers.
Definition: Types.h:215
viskores::UInt8
uint8_t UInt8
Base type to use for 8-bit unsigned integer numbers.
Definition: Types.h:177
viskores::AtomicLoad
T AtomicLoad(T *const pointer, viskores::MemoryOrder order=viskores::MemoryOrder::Acquire)
Atomic function to load a value from a shared memory location.
Definition: Atomic.h:812
viskores::AtomicAdd
T AtomicAdd(T *pointer, T operand, viskores::MemoryOrder order=viskores::MemoryOrder::SequentiallyConsistent)
Atomic function to add a value to a shared memory location.
Definition: Atomic.h:855
viskores::Float64
double Float64
Base type to use for 64-bit floating-point numbers.
Definition: Types.h:169
VISKORES_THIRDPARTY_PRE_INCLUDE
#define VISKORES_THIRDPARTY_PRE_INCLUDE
Definition: Configure.h:199
Windows.h
viskores::UInt32
uint32_t UInt32
Base type to use for 32-bit unsigned integer numbers.
Definition: Types.h:193
VISKORES_STATIC_ASSERT
#define VISKORES_STATIC_ASSERT(condition)
Definition: StaticAssert.h:24
List.h
viskores::MemoryOrder::Release
@ Release
A store operation with Release memory order will enforce that any local read or write operations list...
viskores::MemoryOrder::Relaxed
@ Relaxed
An atomic operations with Relaxed memory order enforces no synchronization or ordering constraints on...