diff options
Diffstat (limited to 'src/nxt_atomic.h')
-rw-r--r-- | src/nxt_atomic.h | 268 |
1 files changed, 268 insertions, 0 deletions
diff --git a/src/nxt_atomic.h b/src/nxt_atomic.h new file mode 100644 index 00000000..b3a1e95a --- /dev/null +++ b/src/nxt_atomic.h @@ -0,0 +1,268 @@ + +/* + * Copyright (C) Igor Sysoev + * Copyright (C) NGINX, Inc. + */ + +#ifndef _NXT_ATOMIC_H_INCLUDED_ +#define _NXT_ATOMIC_H_INCLUDED_ + + +/* + * nxt_atomic_try_lock() must set an acquire barrier on lock. + * nxt_atomic_xchg() must set an acquire barrier. + * nxt_atomic_release() must set a release barrier. + */ + +#if (NXT_HAVE_GCC_ATOMIC) /* GCC 4.1 builtin atomic operations */ + +typedef intptr_t nxt_atomic_int_t; +typedef uintptr_t nxt_atomic_uint_t; +typedef volatile nxt_atomic_uint_t nxt_atomic_t; + +/* + * __sync_bool_compare_and_swap() is a full barrier. + * __sync_lock_test_and_set() is an acquire barrier. + * __sync_lock_release() is a release barrier. + */ + +#define \ +nxt_atomic_cmp_set(lock, cmp, set) \ + __sync_bool_compare_and_swap(lock, cmp, set) + + +#define \ +nxt_atomic_xchg(lock, set) \ + __sync_lock_test_and_set(lock, set) + + +#define \ +nxt_atomic_fetch_add(value, add) \ + __sync_fetch_and_add(value, add) + + +#define \ +nxt_atomic_try_lock(lock) \ + nxt_atomic_cmp_set(lock, 0, 1) + + +#define \ +nxt_atomic_release(lock) \ + __sync_lock_release(lock) + + +#if (__i386__ || __i386 || __amd64__ || __amd64) +#define \ +nxt_cpu_pause() \ + __asm__ ("pause") + +#else +#define \ +nxt_cpu_pause() +#endif + + +#elif (NXT_HAVE_SOLARIS_ATOMIC) /* Solaris 10 */ + +#include <atomic.h> + +typedef long nxt_atomic_int_t; +typedef ulong_t nxt_atomic_uint_t; +typedef volatile nxt_atomic_uint_t nxt_atomic_t; + + +#define \ +nxt_atomic_cmp_set(lock, cmp, set) \ + (atomic_cas_ulong(lock, cmp, set) == (ulong_t) cmp) + + +#define \ +nxt_atomic_xchg(lock, set) \ + atomic_add_swap(lock, set) + + +#define \ +nxt_atomic_fetch_add(value, add) \ + (atomic_add_long_nv(value, add) - add) + +/* + * Solaris uses SPARC Total Store Order model. In this model: + * 1) Each atomic load-store instruction behaves as if it were followed by + * #LoadLoad, #LoadStore, and #StoreStore barriers. + * 2) Each load instruction behaves as if it were followed by + * #LoadLoad and #LoadStore barriers. + * 3) Each store instruction behaves as if it were followed by + * #StoreStore barrier. + * + * In X86_64 atomic instructions set a full barrier and usual instructions + * set implicit #LoadLoad, #LoadStore, and #StoreStore barriers. + * + * An acquire barrier requires at least #LoadLoad and #LoadStore barriers + * and they are provided by atomic load-store instruction. + * + * A release barrier requires at least #LoadStore and #StoreStore barriers, + * so a lock release does not require an explicit barrier: all load + * instructions in critical section is followed by implicit #LoadStore + * barrier and all store instructions are followed by implicit #StoreStore + * barrier. + */ + +#define \ +nxt_atomic_try_lock(lock) \ + nxt_atomic_cmp_set(lock, 0, 1) + + +#define \ +nxt_atomic_release(lock) \ + *lock = 0; + + +/* + * The "rep; nop" is used instead of "pause" to omit the "[ PAUSE ]" hardware + * capability added by linker since Solaris ld.so.1 does not know about it: + * + * ld.so.1: ...: fatal: hardware capability unsupported: 0x2000 [ PAUSE ] + */ + +#if (__i386__ || __i386 || __amd64__ || __amd64) +#define \ +nxt_cpu_pause() \ + __asm__ ("rep; nop") + +#else +#define \ +nxt_cpu_pause() +#endif + + +/* elif (NXT_HAVE_MACOSX_ATOMIC) */ + +/* + * The atomic(3) interface has been introduced in MacOS 10.4 (Tiger) and + * extended in 10.5 (Leopard). However its support is omitted because: + * + * 1) the interface is still incomplete: + * *) there are OSAtomicAdd32Barrier() and OSAtomicAdd64Barrier() + * but no OSAtomicAddLongBarrier(); + * *) there is no interface for XCHG operation. + * + * 2) the interface is tuned for non-SMP systems due to omission of the + * LOCK prefix on single CPU system but nowadays MacOSX systems are at + * least dual core. Thus these indirect calls just add overhead as + * compared with inlined atomic operations which are supported by GCC + * and Clang in modern MacOSX systems. + */ + + +#elif (NXT_HAVE_XLC_ATOMIC) /* XL C/C++ V8.0 for AIX */ + +#if (NXT_64BIT) + +typedef long nxt_atomic_int_t; +typedef unsigned long nxt_atomic_uint_t; +typedef volatile nxt_atomic_int_t nxt_atomic_t; + + +nxt_inline nxt_bool_t +nxt_atomic_cmp_set(nxt_atomic_t *lock, nxt_atomic_int_t cmp, + nxt_atomic_int_t set) +{ + nxt_atomic_int_t old; + + old = cmp; + + return __compare_and_swaplp(lock, &old, set); +} + + +#define \ +nxt_atomic_xchg(lock, set) \ + __fetch_and_swaplp(lock, set) + + +#define \ +nxt_atomic_fetch_add(value, add) \ + __fetch_and_addlp(value, add) + + +#else /* NXT_32BIT */ + +typedef int nxt_atomic_int_t; +typedef unsigned int nxt_atomic_uint_t; +typedef volatile nxt_atomic_int_t nxt_atomic_t; + + +nxt_inline nxt_bool_t +nxt_atomic_cmp_set(nxt_atomic_t *lock, nxt_atomic_int_t cmp, + nxt_atomic_int_t set) +{ + nxt_atomic_int_t old; + + old = cmp; + + return __compare_and_swap(lock, &old, set); +} + + +#define \ +nxt_atomic_xchg(lock, set) \ + __fetch_and_swap(lock, set) + + +#define \ +nxt_atomic_fetch_add(value, add) \ + __fetch_and_add(value, add) + + +#endif /* NXT_32BIT*/ + + +/* + * __lwsync() is a "lwsync" instruction that sets #LoadLoad, #LoadStore, + * and #StoreStore barrier. + * + * __compare_and_swap() is a pair of "ldarx" and "stdcx" instructions. + * A "lwsync" does not set #StoreLoad barrier so it can not be used after + * this pair since a next load inside critical section can be performed + * after the "ldarx" instruction but before the "stdcx" instruction. + * However, this next load instruction will load correct data because + * otherwise the "ldarx/stdcx" pair will fail and this data will be + * discarded. Nevertheless, the "isync" instruction is used for sure. + * + * A full barrier can be set with __sync(), a "sync" instruction, but there + * is also a faster __isync(), an "isync" instruction. This instruction is + * not a memory barrier but an instruction barrier. An "isync" instruction + * causes the processor to complete execution of all previous instructions + * and then to discard instructions (which may have begun execution) following + * the "isync". After the "isync" is executed, the following instructions + * then begin execution. The "isync" is used to ensure that the loads + * following entry into a critical section are not performed (because of + * aggressive out-of-order or speculative execution in the processor) until + * the lock is granted. + */ + +nxt_inline nxt_bool_t +nxt_atomic_try_lock(nxt_atomic_t *lock) +{ + if (nxt_atomic_cmp_set(lock, 0, 1)) { + __isync(); + return 1; + } + + return 0; +} + + +#define \ +nxt_atomic_release(lock) \ + do { __lwsync(); *lock = 0; } while (0) + + +#define \ +nxt_cpu_pause() + + +#endif /* NXT_HAVE_XLC_ATOMIC */ + + +#endif /* _NXT_ATOMIC_H_INCLUDED_ */ |