-
Notifications
You must be signed in to change notification settings - Fork 379
/
hardware.h
142 lines (131 loc) · 5.9 KB
/
hardware.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
/*-
* Copyright (c) 2014-present MongoDB, Inc.
* Copyright (c) 2008-2014 WiredTiger, Inc.
* All rights reserved.
*
* See the file LICENSE for redistribution information.
*/
/*
* This macro doesn't do anything and is used for annotation only. We use it to highlight
* the variable is used in lock-less inter-thread communication - using mechanisms like memory
* barriers and compare_and_swap - and requires caution when handling. It is designed to look like a
* type qualifier.
*
* Example usage:
* wt_shared volatile bool blkcache_exiting;
*/
#define wt_shared
/*
* Publish a value to a shared location. All previous stores must complete before the value is made
* public.
*/
#define WT_PUBLISH(v, val) \
do { \
WT_WRITE_BARRIER(); \
(v) = (val); \
} while (0)
/*
* WT_READ_ONCE --
*
* Ensure a single read from memory in the source code produces a single read from memory in the
* compiled output.
*
* The compiler is allowed to optimize loads from memory in multiple ways such as "load fusing",
* where the compiler takes multiple reads from a memory location and merges them into a single read
* instruction, and "invented loads" where the compiler takes a single load from memory and converts
* it into multiple read instructions.
*
* WiredTiger has many algorithms where threads are allowed to concurrently access and modify the
* same memory location, but to do this safely we need to precisely control how reads to memory are
* performed. This macro gives us control over this.
*
* GCC and Clang have a __typeof__ compiler builtin which allows us to temporarily cast the value
* being read as a volatile and achieve volatile semantics. For other compilers we'll fall back on
* inserting a read barrier after the read (our pre-existing implementation) which prevents invented
* and fused loads for this variable in the code following the expression.
*
* FIXME-WT-11718 - Once Windows build machines that support C11 _Generics are available this macro
* will be updated to use _Generic on all platforms.
*/
#if defined(__GNUC__) || defined(__clang__)
#define WT_READ_ONCE(v, val) (v) = (*(volatile __typeof__(val) *)&(val))
#else
#define WT_READ_ONCE(v, val) WT_ORDERED_READ(v, val)
#endif
/*
* Read a shared location and guarantee that subsequent reads do not see any earlier state.
*/
#define WT_ORDERED_READ(v, val) \
do { \
(v) = (val); \
WT_READ_BARRIER(); \
} while (0)
/*
* In some architectures with weak memory ordering, the CPU can reorder the reads across full
* barriers in other threads. Guarantee that subsequent reads do not see any earlier state in those
* architectures.
*
* !!! This is a temporary solution to avoid a performance regression in x86. Do not use this macro
* and we will revisit it later.
*/
#define WT_ORDERED_READ_WEAK_MEMORDER(v, val) \
do { \
(v) = (val); \
WT_READ_BARRIER_WEAK_MEMORDER(); \
} while (0)
/*
* Atomic versions of the flag set/clear macros.
*/
#define FLD_ISSET_ATOMIC_16(field, mask) ((field) & (uint16_t)(mask))
#define FLD_SET_ATOMIC_16(field, mask) \
do { \
uint16_t __orig; \
if (FLD_ISSET_ATOMIC_16((field), (mask))) \
break; \
do { \
__orig = (field); \
} while (!__wt_atomic_cas16(&(field), __orig, __orig | (uint16_t)(mask))); \
} while (0)
#define FLD_CLR_ATOMIC_16(field, mask) \
do { \
uint16_t __orig; \
if (!FLD_ISSET_ATOMIC_16((field), (mask))) \
break; \
do { \
__orig = (field); \
} while (!__wt_atomic_cas16(&(field), __orig, __orig & (uint16_t)(~(mask)))); \
} while (0)
#define F_ISSET_ATOMIC_16(p, mask) FLD_ISSET_ATOMIC_16((p)->flags_atomic, mask)
#define F_CLR_ATOMIC_16(p, mask) FLD_CLR_ATOMIC_16((p)->flags_atomic, mask)
#define F_SET_ATOMIC_16(p, mask) FLD_SET_ATOMIC_16((p)->flags_atomic, mask)
/*
* Cache line alignment.
*/
#if defined(__PPC64__) || defined(PPC64)
#define WT_CACHE_LINE_ALIGNMENT 128
#elif defined(__s390x__)
#define WT_CACHE_LINE_ALIGNMENT 256
#else
#define WT_CACHE_LINE_ALIGNMENT 64
#endif
/*
* Pad a structure so an array of structures get separate cache lines.
*
* Note that we avoid compiler structure alignment because that requires allocating aligned blocks
* of memory, and alignment pollutes any other type that contains an aligned field. It is possible
* that a hot field positioned before this one will be on the same cache line, but not if it is also
* padded.
*
* This alignment has a small impact on portability as well, as we are using an anonymous union here
* which is supported under C11, earlier versions of the GNU standard, and MSVC versions as early as
* 2003.
*/
#define WT_CACHE_LINE_PAD_BEGIN \
union { \
struct {
#define WT_CACHE_LINE_PAD_END \
} \
; \
char __padding[WT_CACHE_LINE_ALIGNMENT]; \
} \
;