Skip to content

Commit 49d7236

Browse files
encukoufthainvstinnerglaubitz
authored
gh-127545: Add _Py_ALIGNED_DEF(N, T) and use it for PyObject (GH-135209)
* Replace _Py_ALIGN_AS(V) by _Py_ALIGNED_DEF(N, T) This is now a common façade for the various `_Alignas` alternatives, which behave in interesting ways -- see the source comment. The new macro (and MSVC's `__declspec(align)`) should not be used on a variable/member declaration that includes a struct declaraton. A workaround is to separate the struct definition. Do that for `PyASCIIObject.state`. * Specify minimum PyGC_Head and PyObject alignment As documented in InternalDocs/garbage_collector.md, the garbage collector stores flags in the least significant two bits of the _gc_prev pointer in struct PyGC_Head. Consequently, this pointer is only capable of storing a location that's aligned to a 4-byte boundary. Encode this requirement using _Py_ALIGNED_DEF. This patch fixes a segfault in m68k, which was previously investigated by Adrian Glaubitz here: https://lists.debian.org/debian-68k/2024/11/msg00020.html https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1087600 Original patch (using the GCC-only Py_ALIGNED) by Finn Thain. Co-authored-by: Finn Thain <fthain@linux-m68k.org> Co-authored-by: Victor Stinner <vstinner@python.org> Co-authored-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
1 parent 2b8b477 commit 49d7236

File tree

6 files changed

+122
-106
lines changed

6 files changed

+122
-106
lines changed

Include/Python.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,6 @@
5959
# include <intrin.h> // __readgsqword()
6060
#endif
6161

62-
// Suppress known warnings in Python header files.
63-
#if defined(_MSC_VER)
64-
// Warning that alignas behaviour has changed. Doesn't affect us, because we
65-
// never relied on the old behaviour.
66-
#pragma warning(push)
67-
#pragma warning(disable: 5274)
68-
#endif
69-
7062
// Include Python header files
7163
#include "pyport.h"
7264
#include "pymacro.h"
@@ -146,9 +138,4 @@
146138
#include "cpython/pyfpe.h"
147139
#include "cpython/tracemalloc.h"
148140

149-
// Restore warning filter
150-
#ifdef _MSC_VER
151-
#pragma warning(pop)
152-
#endif
153-
154141
#endif /* !Py_PYTHON_H */

Include/cpython/unicodeobject.h

Lines changed: 59 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,63 @@ static inline Py_UCS4 Py_UNICODE_LOW_SURROGATE(Py_UCS4 ch) {
4747

4848
/* --- Unicode Type ------------------------------------------------------- */
4949

50+
struct _PyUnicodeObject_state {
51+
/* If interned is non-zero, the two references from the
52+
dictionary to this object are *not* counted in ob_refcnt.
53+
The possible values here are:
54+
0: Not Interned
55+
1: Interned
56+
2: Interned and Immortal
57+
3: Interned, Immortal, and Static
58+
This categorization allows the runtime to determine the right
59+
cleanup mechanism at runtime shutdown. */
60+
#ifdef Py_GIL_DISABLED
61+
// Needs to be accessed atomically, so can't be a bit field.
62+
unsigned char interned;
63+
#else
64+
unsigned int interned:2;
65+
#endif
66+
/* Character size:
67+
68+
- PyUnicode_1BYTE_KIND (1):
69+
70+
* character type = Py_UCS1 (8 bits, unsigned)
71+
* all characters are in the range U+0000-U+00FF (latin1)
72+
* if ascii is set, all characters are in the range U+0000-U+007F
73+
(ASCII), otherwise at least one character is in the range
74+
U+0080-U+00FF
75+
76+
- PyUnicode_2BYTE_KIND (2):
77+
78+
* character type = Py_UCS2 (16 bits, unsigned)
79+
* all characters are in the range U+0000-U+FFFF (BMP)
80+
* at least one character is in the range U+0100-U+FFFF
81+
82+
- PyUnicode_4BYTE_KIND (4):
83+
84+
* character type = Py_UCS4 (32 bits, unsigned)
85+
* all characters are in the range U+0000-U+10FFFF
86+
* at least one character is in the range U+10000-U+10FFFF
87+
*/
88+
unsigned int kind:3;
89+
/* Compact is with respect to the allocation scheme. Compact unicode
90+
objects only require one memory block while non-compact objects use
91+
one block for the PyUnicodeObject struct and another for its data
92+
buffer. */
93+
unsigned int compact:1;
94+
/* The string only contains characters in the range U+0000-U+007F (ASCII)
95+
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
96+
set, use the PyASCIIObject structure. */
97+
unsigned int ascii:1;
98+
/* The object is statically allocated. */
99+
unsigned int statically_allocated:1;
100+
#ifndef Py_GIL_DISABLED
101+
/* Historical: padding to ensure that PyUnicode_DATA() is always aligned to
102+
4 bytes (see issue gh-63736 on m68k) */
103+
unsigned int :24;
104+
#endif
105+
};
106+
50107
/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
51108
structure. state.ascii and state.compact are set, and the data
52109
immediately follow the structure. utf8_length can be found
@@ -99,67 +156,8 @@ typedef struct {
99156
PyObject_HEAD
100157
Py_ssize_t length; /* Number of code points in the string */
101158
Py_hash_t hash; /* Hash value; -1 if not set */
102-
#ifdef Py_GIL_DISABLED
103-
/* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k.
104-
In the non-free-threaded build, we'll use explicit padding instead */
105-
_Py_ALIGN_AS(4)
106-
#endif
107-
struct {
108-
/* If interned is non-zero, the two references from the
109-
dictionary to this object are *not* counted in ob_refcnt.
110-
The possible values here are:
111-
0: Not Interned
112-
1: Interned
113-
2: Interned and Immortal
114-
3: Interned, Immortal, and Static
115-
This categorization allows the runtime to determine the right
116-
cleanup mechanism at runtime shutdown. */
117-
#ifdef Py_GIL_DISABLED
118-
// Needs to be accessed atomically, so can't be a bit field.
119-
unsigned char interned;
120-
#else
121-
unsigned int interned:2;
122-
#endif
123-
/* Character size:
124-
125-
- PyUnicode_1BYTE_KIND (1):
126-
127-
* character type = Py_UCS1 (8 bits, unsigned)
128-
* all characters are in the range U+0000-U+00FF (latin1)
129-
* if ascii is set, all characters are in the range U+0000-U+007F
130-
(ASCII), otherwise at least one character is in the range
131-
U+0080-U+00FF
132-
133-
- PyUnicode_2BYTE_KIND (2):
134-
135-
* character type = Py_UCS2 (16 bits, unsigned)
136-
* all characters are in the range U+0000-U+FFFF (BMP)
137-
* at least one character is in the range U+0100-U+FFFF
138-
139-
- PyUnicode_4BYTE_KIND (4):
140-
141-
* character type = Py_UCS4 (32 bits, unsigned)
142-
* all characters are in the range U+0000-U+10FFFF
143-
* at least one character is in the range U+10000-U+10FFFF
144-
*/
145-
unsigned int kind:3;
146-
/* Compact is with respect to the allocation scheme. Compact unicode
147-
objects only require one memory block while non-compact objects use
148-
one block for the PyUnicodeObject struct and another for its data
149-
buffer. */
150-
unsigned int compact:1;
151-
/* The string only contains characters in the range U+0000-U+007F (ASCII)
152-
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
153-
set, use the PyASCIIObject structure. */
154-
unsigned int ascii:1;
155-
/* The object is statically allocated. */
156-
unsigned int statically_allocated:1;
157-
#ifndef Py_GIL_DISABLED
158-
/* Padding to ensure that PyUnicode_DATA() is always aligned to
159-
4 bytes (see issue gh-63736 on m68k) */
160-
unsigned int :24;
161-
#endif
162-
} state;
159+
/* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k. */
160+
_Py_ALIGNED_DEF(4, struct _PyUnicodeObject_state) state;
163161
} PyASCIIObject;
164162

165163
/* Non-ASCII strings allocated through PyUnicode_New use the

Include/internal/pycore_interp_structs.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,11 @@ struct atexit_state {
159159
typedef struct {
160160
// Tagged pointer to next object in the list.
161161
// 0 means the object is not tracked
162-
uintptr_t _gc_next;
162+
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) _gc_next;
163163

164164
// Tagged pointer to previous object in the list.
165165
// Lowest two bits are used for flags documented later.
166+
// Those bits are made available by the struct's minimum alignment.
166167
uintptr_t _gc_prev;
167168
} PyGC_Head;
168169

Include/object.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ whose size is determined when the object is allocated.
101101
#define PyObject_VAR_HEAD PyVarObject ob_base;
102102
#define Py_INVALID_SIZE (Py_ssize_t)-1
103103

104+
/* PyObjects are given a minimum alignment so that the least significant bits
105+
* of an object pointer become available for other purposes.
106+
* This must be an integer literal with the value (1 << _PyGC_PREV_SHIFT), number of bytes.
107+
*/
108+
#define _PyObject_MIN_ALIGNMENT 4
109+
104110
/* Nothing is actually declared to be a PyObject, but every pointer to
105111
* a Python object can be cast to a PyObject*. This is inheritance built
106112
* by hand. Similarly every pointer to a variable-size Python object can,
@@ -136,6 +142,7 @@ struct _object {
136142
#else
137143
Py_ssize_t ob_refcnt;
138144
#endif
145+
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, char) _aligner;
139146
};
140147
#ifdef _MSC_VER
141148
__pragma(warning(pop))
@@ -153,7 +160,7 @@ struct _object {
153160
// ob_tid stores the thread id (or zero). It is also used by the GC and the
154161
// trashcan mechanism as a linked list pointer and by the GC to store the
155162
// computed "gc_refs" refcount.
156-
uintptr_t ob_tid;
163+
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) ob_tid;
157164
uint16_t ob_flags;
158165
PyMutex ob_mutex; // per-object lock
159166
uint8_t ob_gc_bits; // gc-related state

Include/pymacro.h

Lines changed: 52 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -24,44 +24,66 @@
2424
#endif
2525

2626

27-
// _Py_ALIGN_AS: this compiler's spelling of `alignas` keyword,
28-
// We currently use alignas for free-threaded builds only; additional compat
29-
// checking would be great before we add it to the default build.
30-
// Standards/compiler support:
27+
// _Py_ALIGNED_DEF(N, T): Define a variable/member with increased alignment
28+
//
29+
// `N`: the desired minimum alignment, an integer literal, number of bytes
30+
// `T`: the type of the defined variable
31+
// (or a type with at least the defined variable's alignment)
32+
//
33+
// May not be used on a struct definition.
34+
//
35+
// Standards/compiler support for `alignas` alternatives:
3136
// - `alignas` is a keyword in C23 and C++11.
3237
// - `_Alignas` is a keyword in C11
3338
// - GCC & clang has __attribute__((aligned))
3439
// (use that for older standards in pedantic mode)
3540
// - MSVC has __declspec(align)
3641
// - `_Alignas` is common C compiler extension
37-
// Older compilers may name it differently; to allow compilation on such
38-
// unsupported platforms, we don't redefine _Py_ALIGN_AS if it's already
42+
// Older compilers may name `alignas` differently; to allow compilation on such
43+
// unsupported platforms, we don't redefine _Py_ALIGNED_DEF if it's already
3944
// defined. Note that defining it wrong (including defining it to nothing) will
4045
// cause ABI incompatibilities.
41-
#ifdef Py_GIL_DISABLED
42-
# ifndef _Py_ALIGN_AS
43-
# ifdef __cplusplus
44-
# if __cplusplus >= 201103L
45-
# define _Py_ALIGN_AS(V) alignas(V)
46-
# elif defined(__GNUC__) || defined(__clang__)
47-
# define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
48-
# elif defined(_MSC_VER)
49-
# define _Py_ALIGN_AS(V) __declspec(align(V))
50-
# else
51-
# define _Py_ALIGN_AS(V) alignas(V)
52-
# endif
53-
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
54-
# define _Py_ALIGN_AS(V) alignas(V)
55-
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
56-
# define _Py_ALIGN_AS(V) _Alignas(V)
57-
# elif (defined(__GNUC__) || defined(__clang__))
58-
# define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
59-
# elif defined(_MSC_VER)
60-
# define _Py_ALIGN_AS(V) __declspec(align(V))
61-
# else
62-
# define _Py_ALIGN_AS(V) _Alignas(V)
63-
# endif
64-
# endif
46+
//
47+
// Behavior of `alignas` alternatives:
48+
// - `alignas` & `_Alignas`:
49+
// - Can be used multiple times; the greatest alignment applies.
50+
// - It is an *error* if the combined effect of all `alignas` modifiers would
51+
// decrease the alignment.
52+
// - Takes types or numbers.
53+
// - May not be used on a struct definition, unless also defining a variable.
54+
// - `__declspec(align)`:
55+
// - Has no effect if it would decrease alignment.
56+
// - Only takes an integer literal.
57+
// - May be used on struct or variable definitions.
58+
// However, when defining both the struct and the variable at once,
59+
// `declspec(aligned)` causes compiler warning 5274 and possible ABI
60+
// incompatibility.
61+
// - ` __attribute__((aligned))`:
62+
// - Has no effect if it would decrease alignment.
63+
// - Takes types or numbers
64+
// - May be used on struct or variable definitions.
65+
#ifndef _Py_ALIGNED_DEF
66+
# ifdef __cplusplus
67+
# if __cplusplus >= 201103L
68+
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
69+
# elif defined(__GNUC__) || defined(__clang__)
70+
# define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
71+
# elif defined(_MSC_VER)
72+
# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
73+
# else
74+
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
75+
# endif
76+
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
77+
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
78+
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
79+
# define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T
80+
# elif (defined(__GNUC__) || defined(__clang__))
81+
# define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
82+
# elif defined(_MSC_VER)
83+
# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
84+
# else
85+
# define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T
86+
# endif
6587
#endif
6688

6789
/* Minimum value between x and y */
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix crash when building on Linux/m68k.

0 commit comments

Comments
 (0)