-
Notifications
You must be signed in to change notification settings - Fork 0
/
malloc.go
1480 lines (1367 loc) · 57 KB
/
malloc.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Memory allocator.
// 内存分配器
//
// This was originally based on tcmalloc, but has diverged quite a bit.
// http://goog-perftools.sourceforge.net/doc/tcmalloc.html
// 这最初是基于tcmalloc,但已经分歧了很多。
// The main allocator works in runs of pages.
// Small allocation sizes (up to and including 32 kB) are
// rounded to one of about 70 size classes, each of which
// has its own free set of objects of exactly that size.
// Any free page of memory can be split into a set of objects
// of one size class, which are then managed using a free bitmap.
// 内存分配器主要是基于页分配。小的分配(<=32kB)被舍入到大约70个大小类中的一个,每个类都有自己的大小相同的空闲对象集。
// 任何空闲内存页都可以拆分为一个大小类的对象集,然后使用可用bitmap管理这些对象。
//
// The allocator's data structures are:
//
// fixalloc: a free-list allocator for fixed-size off-heap objects,
// used to manage storage used by the allocator.
// mheap: the malloc heap, managed at page (8192-byte) granularity.
// mspan: a run of pages managed by the mheap.
// mcentral: collects all spans of a given size class.
// mcache: a per-P cache of mspans with free space.
// mstats: allocation statistics.
// 分配器的数据结构如下:
// fixalloc: 固定大小的堆外对象的自由列表分配器,用于管理分配器使用的存储。
// mheap: 堆分配器,以内存页(8192byte)粒度进行管理。全局,负责从os申请、释放内存。
// mspan: 由mheap管理的一系列页面。将一个或多个内存页形成一种mspan,一种mspan只负责分配固定size的内存。
// mcentral: 所有给定大小类的mspan集合。全局,将内存按mspan划分,统一管理。访问需加锁。
// mcache: 每个P中的mspans。每个P持有自己的mcache,于是获取内存的时候,无锁访问mcache。
//
// Allocating a small object proceeds up a hierarchy of caches:
// 分配小的对象,按照caches的等级来:
//
// 1. Round the size up to one of the small size classes
// and look in the corresponding mspan in this P's mcache.
// Scan the mspan's free bitmap to find a free slot.
// If there is a free slot, allocate it.
// This can all be done without acquiring a lock.
// 1.直接访问P的cache,查看mspan空闲bitmap,如果有空闲的slot则分配,无需加锁,mache是线程所拥有。
//
// 2. If the mspan has no free slots, obtain a new mspan
// from the mcentral's list of mspans of the required size
// class that have free space.
// Obtaining a whole span amortizes the cost of locking
// the mcentral.
// 2.mspan没有空闲的slot,将从mcentral列表获得需要尺寸的类,需要加锁。
//
// 3. If the mcentral's mspan list is empty, obtain a run
// of pages from the mheap to use for the mspan.
// 3.mcentral的mspan列表空的话,就要从mheap获取页。
//
// 4. If the mheap is empty or has no page runs large enough,
// allocate a new group of pages (at least 1MB) from the
// operating system. Allocating a large run of pages
// amortizes the cost of talking to the operating system.
// 4.如果mheap空或者没有足够的页大小可用,就要从操作系统分配页,至少1MB。
//
// Sweeping an mspan and freeing objects on it proceeds up a similar
// hierarchy:
// 清除mspan和释放对象也是按照相似的等级:
//
// 1. If the mspan is being swept in response to allocation, it
// is returned to the mcache to satisfy the allocation.
// 1.如果mspan在响应分配时被扫描,则返回到mcache以满足分配。
//
// 2. Otherwise, if the mspan still has allocated objects in it,
// it is placed on the mcentral free list for the mspan's size
// class.
// 2.否则,如果mspan仍然在其中分配了对象,则将其放置在mspan的大小类的mcentral空闲列表中。
//
// 3. Otherwise, if all objects in the mspan are free, the mspan
// is now "idle", so it is returned to the mheap and no longer
// has a size class.
// This may coalesce it with adjacent idle mspans.
// 3.否则,如果mspan中的所有对象都是空闲的,则mspan现在处于“空闲”状态,因此它将返回到mheap并且不再具有大小类。这可能将其与相邻的空闲mspans合并。
//
// 4. If an mspan remains idle for long enough, return its pages
// to the operating system.
// 4.如果mspan保持空闲的时间足够长,则将其页面返回到操作系统。
//
// Allocating and freeing a large object uses the mheap
// directly, bypassing the mcache and mcentral.
// 分配和释放大对象直接使用mheap,绕过mcache和mcentral。
//
// Free object slots in an mspan are zeroed only if mspan.needzero is
// false. If needzero is true, objects are zeroed as they are
// allocated. There are various benefits to delaying zeroing this way:
// 仅当mspan.needzero为false时,mspan中的自由对象槽才会归零。如果needzero为true,则对象在分配时归零。以这种方式延迟归零有很多好处:
//
// 1. Stack frame allocation can avoid zeroing altogether.
// 1.堆栈帧分配可以完全避免归零。
//
// 2. It exhibits better temporal locality, since the program is
// probably about to write to the memory.
// 2.它表现出更好的时间局部性,因为程序可能要写入内存。
//
// 3. We don't zero pages that never get reused.
// 3.我们不会置零永远不会被重用的页面。
// Virtual memory layout
// 虚拟内存布局
//
// The heap consists of a set of arenas, which are 64MB on 64-bit and
// 4MB on 32-bit (heapArenaBytes). Each arena's start address is also
// aligned to the arena size.
// 堆由一组arena组成,64位为64MB,32位为4MB。每个arena的起始地址和arena大小对齐。
//
// Each arena has an associated heapArena object that stores the
// metadata for that arena: the heap bitmap for all words in the arena
// and the span map for all pages in the arena. heapArena objects are
// themselves allocated off-heap.
// 每个arena都有一个关联的heapArena对象,用于存储该arena的元数据:
// heap bitmap表示arena的字节,span map表示arena的页。
//
// Since arenas are aligned, the address space can be viewed as a
// series of arena frames. The arena map (mheap_.arenas) maps from
// arena frame number to *heapArena, or nil for parts of the address
// space not backed by the Go heap. The arena map is structured as a
// two-level array consisting of a "L1" arena map and many "L2" arena
// maps; however, since arenas are large, on many architectures, the
// arena map consists of a single, large L2 map.
// 由于arenas是对齐的,地址空间可以被视为一系列arena帧。
// arena map(mheap_.arenas)从arena帧号映射到*heapArena,或者对于没有Go堆支持的地址空间的部分映射为nil。
// arena map结构为两层阵列,由“L1”arena map和许多“L2”arena map组成;然而,由于arena map很大,在许多架构上,arena map由单个大型L2地图组成。
//
// The arena map covers the entire possible address space, allowing
// the Go heap to use any part of the address space. The allocator
// attempts to keep arenas contiguous so that large spans (and hence
// large objects) can cross arenas.
// arena map覆盖整个可能的地址空间,允许Go堆使用地址空间的任何部分。分配器试图保持arena连续,以便大跨度(以及因此大对象)可以跨越arena map。
package runtime
import (
"runtime/internal/atomic"
"runtime/internal/math"
"runtime/internal/sys"
"unsafe"
)
const (
debugMalloc = false // 是否debug内存分配
maxTinySize = _TinySize // 微小型数据的最大字节数(16B)
tinySizeClass = _TinySizeClass // 微小型数据对应的类型ID
maxSmallSize = _MaxSmallSize // 小型数据的最大字节数32KB
pageShift = _PageShift // 页面大小左移位数 13
pageSize = _PageSize // 页面大小8KB
pageMask = _PageMask // 页面大小Mask值
// By construction, single page spans of the smallest object class
// have the most objects per span.
maxObjsPerSpan = pageSize / 8 // span中最多有多少数据
concurrentSweep = _ConcurrentSweep // 是否并发扫描
_PageSize = 1 << _PageShift
_PageMask = _PageSize - 1
// _64bit = 1 on 64-bit systems, 0 on 32-bit systems
_64bit = 1 << (^uintptr(0) >> 63) / 2 // 64位系统为1,32为0
// Tiny allocator parameters, see "Tiny allocator" comment in malloc.go.
// 微小分配器大小,大小为16, 类型为2, 见sizeclasses.go
_TinySize = 16
_TinySizeClass = int8(2)
_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc // 块分配大小16KB, 见mfixalloc.go
// Per-P, per order stack segment cache size.
// 每个P,每个堆栈缓存大小 32K
_StackCacheSize = 32 * 1024
// Number of orders that get caching. Order 0 is FixedStack
// and each successive order is twice as large.
// We want to cache 2KB, 4KB, 8KB, and 16KB stacks. Larger stacks
// will be allocated directly.
// Since FixedStack is different on different systems, we
// must vary NumStackOrders to keep the same maximum cached size.
// 需要缓存的有序堆栈的数目。0是固定堆栈,每一个连续的缓存堆栈是两倍关系。
// 我们需要缓存2KB, 4KB, 8KB 和 16KB 的堆栈,更大的则直接分配。因为在不
// 同的系统上,固定堆栈大小不一样,因此我们必须改变NumStackOrders来保持
// 相同的最大缓存大小。
// OS | FixedStack | NumStackOrders
// -----------------+------------+---------------
// linux/darwin/bsd | 2KB | 4 2KB 4KB 8KB 16KB
// windows/32 | 4KB | 3 4KB 8KB 16KB
// windows/64 | 8KB | 2 8KB 16KB
// plan9 | 4KB | 3 4KB 8KB 16KB
_NumStackOrders = 4 - sys.PtrSize/4*sys.GoosWindows - 1*sys.GoosPlan9
// heapAddrBits is the number of bits in a heap address. On
// amd64, addresses are sign-extended beyond heapAddrBits. On
// other arches, they are zero-extended.
//
// heapAddrBits是堆地址中的位数。在amd64上,地址被符号扩展到heapAddrBits之外。在其他系统上,它们是零扩展的。
//
// On most 64-bit platforms, we limit this to 48 bits based on a
// combination of hardware and OS limitations.
//
// 在大多数64位系统上,结合硬件和操作系统限制,我们将此限制为48位。
//
// amd64 hardware limits addresses to 48 bits, sign-extended
// to 64 bits. Addresses where the top 16 bits are not either
// all 0 or all 1 are "non-canonical" and invalid. Because of
// these "negative" addresses, we offset addresses by 1<<47
// (arenaBaseOffset) on amd64 before computing indexes into
// the heap arenas index. In 2017, amd64 hardware added
// support for 57 bit addresses; however, currently only Linux
// supports this extension and the kernel will never choose an
// address above 1<<47 unless mmap is called with a hint
// address above 1<<47 (which we never do).
//
// amd64硬件将地址限制为48位,符号扩展为64位。前16位不全为0,也或全为1的地址是“非规范的”且无效。
// 由于存在这些“负”地址,因此在计算索引到堆区索引,在amd64上将地址偏移1 << 47(arenaBaseOffset)。
// 2017年,amd64硬件增加了对57位地址的支持; 但是,当前只有Linux支持此扩展,内核将永远不会选择大于1 << 47的地址,除非调用mmap的提示地址大于1 << 47(我们从未这样做)。
//
// arm64 hardware (as of ARMv8) limits user addresses to 48
// bits, in the range [0, 1<<48).
//
// arm64硬件(自ARMv8起)将用户地址限制为48位,范围为[0,1 << 48)。256TB
//
// ppc64, mips64, and s390x support arbitrary 64 bit addresses
// in hardware. On Linux, Go leans on stricter OS limits. Based
// on Linux's processor.h, the user address space is limited as
// follows on 64-bit architectures:
//
// ppc64,mips64和s390x在硬件中支持任意64位地址。在Linux上,Go依靠更严格的OS限制。
// 基于Linux的processor.h,在64位体系结构上,用户地址空间受到如下限制
//
// Architecture Name Maximum Value (exclusive)
// ---------------------------------------------------------------------
// amd64 TASK_SIZE_MAX 0x007ffffffff000 (47 bit addresses)
// arm64 TASK_SIZE_64 0x01000000000000 (48 bit addresses)
// ppc64{,le} TASK_SIZE_USER64 0x00400000000000 (46 bit addresses)
// mips64{,le} TASK_SIZE64 0x00010000000000 (40 bit addresses)
// s390x TASK_SIZE 1<<64 (64 bit addresses)
//
// These limits may increase over time, but are currently at
// most 48 bits except on s390x. On all architectures, Linux
// starts placing mmap'd regions at addresses that are
// significantly below 48 bits, so even if it's possible to
// exceed Go's 48 bit limit, it's extremely unlikely in
// practice.
//
// 这些限制可能会随时间增加,但目前最多为48位(除了s390x)。在所有体系结构上,
// Linux都开始将mmap的区域放置在明显低于48位的地址上,因此即使有可能超过Go的48位限制,在实践中也极不可能。
//
// On aix/ppc64, the limits is increased to 1<<60 to accept addresses
// returned by mmap syscall. These are in range:
// 0x0a00000000000000 - 0x0afffffffffffff
//
// 在aix / ppc64上,将限制增加到1 << 60,以接受mmap syscall返回的地址。 在这些范围内:0x0a00000000000000 - 0x0afffffffffffff
//
// On 32-bit platforms, we accept the full 32-bit address
// space because doing so is cheap.
// mips32 only has access to the low 2GB of virtual memory, so
// we further limit it to 31 bits.
//
// 在32位平台上,我们接受完整的32位地址空间,因为这样做便宜。 mips32只可以访问2GB的低虚拟内存,因此我们进一步将其限制为31位。
//
// WebAssembly currently has a limit of 4GB linear memory.
// WebAssembly当前限制为4GB线性内存。
heapAddrBits = (_64bit*(1-sys.GoarchWasm)*(1-sys.GoosAix))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + 60*sys.GoosAix
// maxAlloc is the maximum size of an allocation. On 64-bit,
// it's theoretically possible to allocate 1<<heapAddrBits bytes. On
// 32-bit, however, this is one less than 1<<32 because the
// number of bytes in the address space doesn't actually fit
// in a uintptr.
// maxAlloc是分配的最大大小。在64位上,理论上可以分配1 << heapAddrBits字节。
// 但是,在32位上,这比1 << 32小1,因为地址空间中的字节数实际上不适合uintptr。
maxAlloc = (1 << heapAddrBits) - (1-_64bit)*1
// The number of bits in a heap address, the size of heap
// arenas, and the L1 and L2 arena map sizes are related by
//
// 堆地址中位数目,堆区大小,L1和L2 arena map大小关系如下:
//
// (1 << addr bits) = arena size * L1 entries * L2 entries
//
// Currently, we balance these as follows:
//
// Platform Addr bits Arena size L1 entries L2 entries
// -------------- --------- ---------- ---------- -----------
// */64-bit 48 64MB 1 4M (32MB) // 2^48 = 2^26 * 2^0 * 2^22
// aix/64-bit 60 256MB 4096 4M (32MB) // 2^60 = 2^28 * 2^12 * 2^22 不相等??, 1.13这里删除了,也是48bit
// windows/64-bit 48 4MB 64 1M (8MB) // 2^48 = 2^22 * 2^6 * 2^20
// */32-bit 32 4MB 1 1024 (4KB) // 2^32 = 2^22 * 2^0 * 2^10
// */mips(le) 31 4MB 1 512 (2KB) // 2^31 = 2^22 * 2^0 * 2^9
// heapArenaBytes is the size of a heap arena. The heap
// consists of mappings of size heapArenaBytes, aligned to
// heapArenaBytes. The initial heap mapping is one arena.
//
// heapArenaBytes是堆区的大小。堆由大小为heapArenaBytes的映射组成,并与heapArenaBytes对齐。最初的堆映射是一个arena。
//
// This is currently 64MB on 64-bit non-Windows and 4MB on
// 32-bit and on Windows. We use smaller arenas on Windows
// because all committed memory is charged to the process,
// even if it's not touched. Hence, for processes with small
// heaps, the mapped arena space needs to be commensurate.
// This is particularly important with the race detector,
// since it significantly amplifies the cost of committed
// memory.
//
// 当前在64位非Windows上为64MB,在32位以及Windows上为4MB。我们在Windows上使用较小的堆区,因为所有已提交的内存都由进程负责,即使未涉及也是如此。
// 因此,对于具有小堆的进程,映射的堆区空间需要相对应。这对于竞争检测器尤为重要,因为它会极大地增加提交内存的成本。
heapArenaBytes = 1 << logHeapArenaBytes // amd64上是:64MB
// logHeapArenaBytes is log_2 of heapArenaBytes. For clarity,
// prefer using heapArenaBytes where possible (we need the
// constant to compute some other constants).
//
// logHeapArenaBytes是heapArenaBytes的log_2。为了清楚起见,最好在可能的地方使用heapArenaBytes(我们需要使用常量来计算其他常量)。
// amd64上是:26
logHeapArenaBytes = (6+20)*(_64bit*(1-sys.GoosWindows)*(1-sys.GoosAix)) + (2+20)*(_64bit*sys.GoosWindows) + (2+20)*(1-_64bit) + (8+20)*sys.GoosAix
// heapArenaBitmapBytes is the size of each heap arena's bitmap.
//
// heapArenaBitmapBytes是堆区对应的位图大小。每2个bit记录一个指针大小(8byte)的内存信息。
heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2) // amd64上是:2MB
// 一个堆区中有几个页面,64M/8k = 8192
pagesPerArena = heapArenaBytes / pageSize // amd64上是:8192 (64M/8k)
// arenaL1Bits is the number of bits of the arena number
// covered by the first level arena map.
//
// This number should be small, since the first level arena
// map requires PtrSize*(1<<arenaL1Bits) of space in the
// binary's BSS. It can be zero, in which case the first level
// index is effectively unused. There is a performance benefit
// to this, since the generated code can be more efficient,
// but comes at the cost of having a large L2 mapping.
//
// We use the L1 map on 64-bit Windows because the arena size
// is small, but the address space is still 48 bits, and
// there's a high cost to having a large L2.
//
// We use the L1 map on aix/ppc64 to keep the same L2 value
// as on Linux.
//
// arenaL1Bits是L1堆区映射的位数。(1 << arenaL1Bits) == L1 entries
// 这个数字应该很小,因为L1堆区映射在二进制的BSS中需要PtrSize *(1 << arenaL1Bits)空间。
// 它可以为零,在这种情况下,L1索引实际上未被使用。 这会带来性能上的好处,因为生成的代码可以更高效,但是以拥有较大的L2映射为代价。
arenaL1Bits = 6*(_64bit*sys.GoosWindows) + 12*sys.GoosAix // amd64上是:0
// arenaL2Bits is the number of bits of the arena number
// covered by the second level arena index.
//
// The size of each arena map allocation is proportional to
// 1<<arenaL2Bits, so it's important that this not be too
// large. 48 bits leads to 32MB arena index allocations, which
// is about the practical threshold.
//
// arenaL1Bits是L2堆区映射的位数。(1 << arenaL2Bits) == L2 entries
// 每一个堆区映射的大小跟1<<arenaL2Bits成反比因此,不要太大也很重要。
// 48位导致32MB堆区索引分配,这大约是实际的阈值。
arenaL2Bits = heapAddrBits - logHeapArenaBytes - arenaL1Bits // amd64上是:22
// arenaL1Shift is the number of bits to shift an arena frame
// number by to compute an index into the first level arena map.
//
// arenaL1Shift是将堆区边框移位以计算进入L1堆区映射的索引的位数。
arenaL1Shift = arenaL2Bits // amd64上是:22
// arenaBits is the total bits in a combined arena map index.
// This is split between the index into the L1 arena map and
// the L2 arena map.
//
// arenaBits是堆区映射索引的总位数。这是划分L1堆区映射的索引和L2堆区映射的索引。
arenaBits = arenaL1Bits + arenaL2Bits // amd64上是:22
// arenaBaseOffset is the pointer value that corresponds to
// index 0 in the heap arena map.
//
// On amd64, the address space is 48 bits, sign extended to 64
// bits. This offset lets us handle "negative" addresses (or
// high addresses if viewed as unsigned).
//
// On other platforms, the user address space is contiguous
// and starts at 0, so no offset is necessary.
// arenaBaseOffset是与堆区映射中的索引0对应的指针值。在amd64上,地址空间为48位,符号扩展为64位。
// 该偏移量使我们可以处理“负”地址(如果视为无符号,则为高地址)。在其他平台上,用户地址空间是连续的,
// 并且从0开始,因此不需要偏移量。
arenaBaseOffset uintptr = sys.GoarchAmd64 * (1 << 47) // amd64上是:1 << 47
// Max number of threads to run garbage collection.
// 2, 3, and 4 are all plausible maximums depending
// on the hardware details of the machine. The garbage
// collector scales well to 32 cpus.
// 运行垃圾回收的最大线程数。2、3和4都是合理的最大值,具体取决于机器的硬件细节。垃圾收集器可以很好地扩展到32 cpus。
_MaxGcproc = 32
// minLegalPointer is the smallest possible legal pointer.
// This is the smallest possible architectural page size,
// since we assume that the first page is never mapped.
//
// This should agree with minZeroPage in the compiler.
// minLegalPointer是最小的合法指针。这是可能的最小体系结构页面大小,因为我们假设第一页从未映射过。 这应该与编译器中的minZeroPage一致。
minLegalPointer uintptr = 4096
)
// physPageSize is the size in bytes of the OS's physical pages.
// Mapping and unmapping operations must be done at multiples of
// physPageSize.
//
// This must be set by the OS init code (typically in osinit) before
// mallocinit.
// physPageSize是操作系统物理页面的大小(以字节为单位)。映射和取消映射操作必须以physPageSize的倍数完成。
// 必须在mallocinit之前通过OS初始化代码(通常在osinit中)进行设置。
var physPageSize uintptr // amd64上是:4KB
// OS-defined helpers:
//
// sysAlloc obtains a large chunk of zeroed memory from the
// operating system, typically on the order of a hundred kilobytes
// or a megabyte.
// NOTE: sysAlloc returns OS-aligned memory, but the heap allocator
// may use larger alignment, so the caller must be careful to realign the
// memory obtained by sysAlloc.
//
// sysAlloc从操作系统中获取大量的零位内存,通常大约为100kb或1mb。注意:sysAlloc返回OS对齐的内存,
// 但是堆分配器可能使用更大的对齐方式,因此调用方必须小心地重新对齐sysAlloc获得的内存。
//
// sysUnused notifies the operating system that the contents
// of the memory region are no longer needed and can be reused
// for other purposes.
// sysUsed notifies the operating system that the contents
// of the memory region are needed again.
//
// sysUnused通知操作系统,不再需要内存区域的内容,并且可以将其重新用于其他目的。sysUsed通知操作系统,再次需要内存区域的内容。
//
// sysFree returns it unconditionally; this is only used if
// an out-of-memory error has been detected midway through
// an allocation. It is okay if sysFree is a no-op.
//
// sysFree无条件返回它;仅当在分配中途检测到内存不足错误时才使用此选项。sysFree是无操作的也可以。
//
// sysReserve reserves address space without allocating memory.
// If the pointer passed to it is non-nil, the caller wants the
// reservation there, but sysReserve can still choose another
// location if that one is unavailable.
// NOTE: sysReserve returns OS-aligned memory, but the heap allocator
// may use larger alignment, so the caller must be careful to realign the
// memory obtained by sysAlloc.
//
// sysReserve保留地址空间而不分配内存。如果传递给它的指针为非nil,则调用方希望在那里保留,
// 但是sysReserve仍然可以选择另一个位置(如果该位置不可用)。 注意:sysReserve返回OS对齐的内存,
// 但是堆分配器可能使用更大的对齐方式,因此调用者必须小心地重新对齐sysAlloc获得的内存。
//
// sysMap maps previously reserved address space for use.
//
// sysMap映射以前保留的地址空间以供使用。
//
// sysFault marks a (already sysAlloc'd) region to fault
// if accessed. Used only for debugging the runtime.
//
// sysFault标记(已被sysAlloc'd)访问的区域将发生故障。仅用于调试运行时。
func mallocinit() {
// 确保_TinySizeClass就是_TinySize
if class_to_size[_TinySizeClass] != _TinySize {
throw("bad TinySizeClass")
}
testdefersizes()
// 检测heapArenaBitmapBytes是2的幂(2的N次方)
if heapArenaBitmapBytes&(heapArenaBitmapBytes-1) != 0 {
// heapBits expects modular arithmetic on bitmap
// addresses to work.
throw("heapArenaBitmapBytes not a power of 2")
}
// Copy class sizes out for statistics table.
// 将各种大小类的字节数复制到统计表中。
for i := range class_to_size {
memstats.by_size[i].size = uint32(class_to_size[i])
}
// Check physPageSize.
// 检测物理页大小,如果等于0,则无法获取到物理页大小
if physPageSize == 0 {
// The OS init code failed to fetch the physical page size.
throw("failed to get system page size")
}
// 检测是否小于minPhysPageSize(4K)
if physPageSize < minPhysPageSize {
print("system page size (", physPageSize, ") is smaller than minimum page size (", minPhysPageSize, ")\n")
throw("bad system page size")
}
// 检测physPageSize是2的幂(2的N次方)
if physPageSize&(physPageSize-1) != 0 {
print("system page size (", physPageSize, ") must be a power of 2\n")
throw("bad system page size")
}
// Initialize the heap.
// 初始化堆, 分配mcache
mheap_.init()
_g_ := getg()
_g_.m.mcache = allocmcache()
// Create initial arena growth hints.
// arenaHints是尝试添加更多堆区的地址列表。最初使用一组常规提示地址进行填充,然后使用实际堆区范围的边界进行扩展。
// 初始化mheap_.arenaHints列表
if sys.PtrSize == 8 && GOARCH != "wasm" {
// On a 64-bit machine, we pick the following hints
// because:
//
// 在64位计算机上,我们选择以下提示,因为:
//
// 1. Starting from the middle of the address space
// makes it easier to grow out a contiguous range
// without running in to some other mapping.
//
// 1.从地址空间的中间开始,可以轻松扩展到连续范围,而无需运行其他映射。
//
// 2. This makes Go heap addresses more easily
// recognizable when debugging.
//
// 2.这使得Go堆地址在调试时更容易识别。
//
// 3. Stack scanning in gccgo is still conservative,
// so it's important that addresses be distinguishable
// from other data.
//
// 3. gccgo中的堆栈扫描仍然很保守,因此将地址与其他数据区分开很重要。
//
// Starting at 0x00c0 means that the valid memory addresses
// will begin 0x00c0, 0x00c1, ...
// In little-endian, that's c0 00, c1 00, ... None of those are valid
// UTF-8 sequences, and they are otherwise as far away from
// ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
// addresses. An earlier attempt to use 0x11f8 caused out of memory errors
// on OS X during thread allocations. 0x00c0 causes conflicts with
// AddressSanitizer which reserves all memory up to 0x0100.
// These choices reduce the odds of a conservative garbage collector
// not collecting memory because some non-pointer block of memory
// had a bit pattern that matched a memory address.
//
// 从0x00c0开始意味着有效的内存地址将从0x00c0、0x00c1 ...开始。在little-endian中,这是c0 00,c1 00,...
// 这些都不是有效的UTF-8序列,否则它们与ff(可能是一个公共字节)尽可能远。如果失败,我们尝试其他0xXXc0地址。
// 较早的尝试使用0x11f8导致线程分配期间OS X上的内存不足错误。0x00c0导致与AddressSanitizer发生冲突,后者保留了
// 最多0x0100的所有内存。这些选择降低了保守的垃圾收集器不收集内存的可能性,因为某些非指针内存块具有与内存地址匹配的位模式。
//
// However, on arm64, we ignore all this advice above and slam the
// allocation at 0x40 << 32 because when using 4k pages with 3-level
// translation buffers, the user address space is limited to 39 bits
// On darwin/arm64, the address space is even smaller.
// On AIX, mmaps starts at 0x0A00000000000000 for 64-bit.
// processes.
//
// 但是,在arm64上,我们忽略了上面的所有建议,并在0x40 << 32处分配,因为当使用具有3级转换缓冲区的4k页面时,
// 用户地址空间被限制为39位。在darwin / arm64上,地址空间甚至更小。 在AIX上,对于64位,mmaps从0x0A00000000000000开始。
// 流程如下。
for i := 0x7f; i >= 0; i-- {
var p uintptr
switch {
case GOARCH == "arm64" && GOOS == "darwin":
p = uintptr(i)<<40 | uintptrMask&(0x0013<<28)
case GOARCH == "arm64":
p = uintptr(i)<<40 | uintptrMask&(0x0040<<32)
case GOOS == "aix":
if i == 0 {
// We don't use addresses directly after 0x0A00000000000000
// to avoid collisions with others mmaps done by non-go programs.
// 我们不会在0x0A00000000000000之后直接使用地址,以免与非执行程序造成的其他mmap冲突。
continue
}
p = uintptr(i)<<40 | uintptrMask&(0xa0<<52)
case raceenabled:
// The TSAN runtime requires the heap
// to be in the range [0x00c000000000,
// 0x00e000000000).
// TSAN运行时要求堆的范围为[0x00c000000000,0x00e000000000)。
p = uintptr(i)<<32 | uintptrMask&(0x00c0<<32)
if p >= uintptrMask&0x00e000000000 {
continue
}
default:
p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
}
hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
hint.addr = p
hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
}
} else {
// On a 32-bit machine, we're much more concerned
// about keeping the usable heap contiguous.
// Hence:
//
// 在32位计算机上,我们更加关注保持可用堆是连续的。 因此:
//
// 1. We reserve space for all heapArenas up front so
// they don't get interleaved with the heap. They're
// ~258MB, so this isn't too bad. (We could reserve a
// smaller amount of space up front if this is a
// problem.)
//
// 1.我们为所有heapArena保留空间,这样它们就不会与heap交错。它们约为258MB,所以还算不错。(如果出现问题,我们可以在前面预留较小的空间。)
//
// 2. We hint the heap to start right above the end of
// the binary so we have the best chance of keeping it
// contiguous.
//
// 2.我们建议堆从二进制文件的末尾开始,因此我们有最大的机会保持其连续性。
//
// 3. We try to stake out a reasonably large initial
// heap reservation.
//
// 3.我们尝试明确一个相当大的初始堆保留。
const arenaMetaSize = (1 << arenaBits) * unsafe.Sizeof(heapArena{})
meta := uintptr(sysReserve(nil, arenaMetaSize))
if meta != 0 {
mheap_.heapArenaAlloc.init(meta, arenaMetaSize)
}
// We want to start the arena low, but if we're linked
// against C code, it's possible global constructors
// have called malloc and adjusted the process' brk.
// Query the brk so we can avoid trying to map the
// region over it (which will cause the kernel to put
// the region somewhere else, likely at a high
// address).
//
// 我们想从低地址开始,但是如果我们与C代码链接,则全局构造函数可能已调用malloc并调整了进程的brk。
// 查询brk,这样我们就可以避免尝试在其上映射区域(这将导致内核将区域放置在其他地方,可能位于高地址)
procBrk := sbrk0() // 获取程序当前的brk
// If we ask for the end of the data segment but the
// operating system requires a little more space
// before we can start allocating, it will give out a
// slightly higher pointer. Except QEMU, which is
// buggy, as usual: it won't adjust the pointer
// upward. So adjust it upward a little bit ourselves:
// 1/4 MB to get away from the running binary image.
//
// 如果我们要求结束数据段,但是操作系统在开始分配之前需要更多空间,它将给出稍高的指针。 像往常一样,除了QEMU之外,
// 它还是有问题的:它不会向上调整指针。 因此,我们自己向上调整一点:1/4 MB以远离正在运行的二进制映像。
p := firstmoduledata.end
if p < procBrk {
p = procBrk
}
if mheap_.heapArenaAlloc.next <= p && p < mheap_.heapArenaAlloc.end {
p = mheap_.heapArenaAlloc.end
}
p = round(p+(256<<10), heapArenaBytes) // 1/4 MB
// Because we're worried about fragmentation on
// 32-bit, we try to make a large initial reservation.
// 因为我们担心32位上的碎片,所以我们尝试进行较大的初始保留。
arenaSizes := []uintptr{
512 << 20, // 512MB
256 << 20, // 256MB
128 << 20, // 128MB
}
for _, arenaSize := range arenaSizes {
a, size := sysReserveAligned(unsafe.Pointer(p), arenaSize, heapArenaBytes)
if a != nil {
mheap_.arena.init(uintptr(a), size)
p = uintptr(a) + size // For hint below
break
}
}
hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
hint.addr = p
hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
}
}
// sysAlloc allocates heap arena space for at least n bytes. The
// returned pointer is always heapArenaBytes-aligned and backed by
// h.arenas metadata. The returned size is always a multiple of
// heapArenaBytes. sysAlloc returns nil on failure.
// There is no corresponding free function.
//
// sysAlloc至少为n个字节分配堆区空间。返回的指针始终是heapArenaBytes对齐的,并回到h.arenas元数据。
// 返回的大小始终是heapArenaBytes的倍数。sysAlloc失败时返回nil。没有相应的自由功能。
//
// h must be locked.
// h必须锁住
func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
n = round(n, heapArenaBytes) // 字节对齐
// First, try the arena pre-reservation.
// 首先,尝试预定的堆区分配。
v = h.arena.alloc(n, heapArenaBytes, &memstats.heap_sys)
if v != nil {
size = n
goto mapped
}
// Try to grow the heap at a hint address.
// 尝试在arenaHints地址处增加堆。
for h.arenaHints != nil {
hint := h.arenaHints
p := hint.addr
if hint.down {
p -= n
}
if p+n < p {
// We can't use this, so don't ask.
v = nil
} else if arenaIndex(p+n-1) >= 1<<arenaBits {
// Outside addressable heap. Can't use.
v = nil
} else {
v = sysReserve(unsafe.Pointer(p), n)
}
if p == uintptr(v) {
// Success. Update the hint.
if !hint.down {
p += n
}
hint.addr = p
size = n
break
}
// Failed. Discard this hint and try the next.
//
// TODO: This would be cleaner if sysReserve could be
// told to only return the requested address. In
// particular, this is already how Windows behaves, so
// it would simply things there.
if v != nil {
sysFree(v, n, nil)
}
h.arenaHints = hint.next
h.arenaHintAlloc.free(unsafe.Pointer(hint))
}
if size == 0 {
if raceenabled {
// The race detector assumes the heap lives in
// [0x00c000000000, 0x00e000000000), but we
// just ran out of hints in this region. Give
// a nice failure.
throw("too many address space collisions for -race mode")
}
// All of the hints failed, so we'll take any
// (sufficiently aligned) address the kernel will give
// us.
v, size = sysReserveAligned(nil, n, heapArenaBytes)
if v == nil {
return nil, 0
}
// Create new hints for extending this region.
// 创建用于扩展此堆区的新提示。
hint := (*arenaHint)(h.arenaHintAlloc.alloc())
hint.addr, hint.down = uintptr(v), true
hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
hint = (*arenaHint)(h.arenaHintAlloc.alloc())
hint.addr = uintptr(v) + size
hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
}
// Check for bad pointers or pointers we can't use.
{
var bad string
p := uintptr(v)
if p+size < p {
bad = "region exceeds uintptr range"
} else if arenaIndex(p) >= 1<<arenaBits {
bad = "base outside usable address space"
} else if arenaIndex(p+size-1) >= 1<<arenaBits {
bad = "end outside usable address space"
}
if bad != "" {
// This should be impossible on most architectures,
// but it would be really confusing to debug.
print("runtime: memory allocated by OS [", hex(p), ", ", hex(p+size), ") not in usable address space: ", bad, "\n")
throw("memory reservation exceeds address space limit")
}
}
if uintptr(v)&(heapArenaBytes-1) != 0 {
throw("misrounded allocation in sysAlloc")
}
// Back the reservation.
sysMap(v, size, &memstats.heap_sys)
mapped:
// Create arena metadata.
for ri := arenaIndex(uintptr(v)); ri <= arenaIndex(uintptr(v)+size-1); ri++ {
l2 := h.arenas[ri.l1()]
if l2 == nil {
// Allocate an L2 arena map.
l2 = (*[1 << arenaL2Bits]*heapArena)(persistentalloc(unsafe.Sizeof(*l2), sys.PtrSize, nil))
if l2 == nil {
throw("out of memory allocating heap arena map")
}
atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri.l1()]), unsafe.Pointer(l2))
}
if l2[ri.l2()] != nil {
throw("arena already initialized")
}
var r *heapArena
r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys))
if r == nil {
r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys))
if r == nil {
throw("out of memory allocating heap arena metadata")
}
}
// Add the arena to the arenas list.
if len(h.allArenas) == cap(h.allArenas) {
size := 2 * uintptr(cap(h.allArenas)) * sys.PtrSize
if size == 0 {
size = physPageSize
}
newArray := (*notInHeap)(persistentalloc(size, sys.PtrSize, &memstats.gc_sys))
if newArray == nil {
throw("out of memory allocating allArenas")
}
oldSlice := h.allArenas
*(*notInHeapSlice)(unsafe.Pointer(&h.allArenas)) = notInHeapSlice{newArray, len(h.allArenas), int(size / sys.PtrSize)}
copy(h.allArenas, oldSlice)
// Do not free the old backing array because
// there may be concurrent readers. Since we
// double the array each time, this can lead
// to at most 2x waste.
}
h.allArenas = h.allArenas[:len(h.allArenas)+1]
h.allArenas[len(h.allArenas)-1] = ri
// Store atomically just in case an object from the
// new heap arena becomes visible before the heap lock
// is released (which shouldn't happen, but there's
// little downside to this).
atomic.StorepNoWB(unsafe.Pointer(&l2[ri.l2()]), unsafe.Pointer(r))
}
// Tell the race detector about the new heap memory.
if raceenabled {
racemapshadow(v, size)
}
return
}
// sysReserveAligned is like sysReserve, but the returned pointer is
// aligned to align bytes. It may reserve either n or n+align bytes,
// so it returns the size that was reserved.
func sysReserveAligned(v unsafe.Pointer, size, align uintptr) (unsafe.Pointer, uintptr) {
// Since the alignment is rather large in uses of this
// function, we're not likely to get it by chance, so we ask
// for a larger region and remove the parts we don't need.
retries := 0
retry:
p := uintptr(sysReserve(v, size+align))
switch {
case p == 0:
return nil, 0
case p&(align-1) == 0:
// We got lucky and got an aligned region, so we can
// use the whole thing.
return unsafe.Pointer(p), size + align
case GOOS == "windows":
// On Windows we can't release pieces of a
// reservation, so we release the whole thing and
// re-reserve the aligned sub-region. This may race,
// so we may have to try again.
sysFree(unsafe.Pointer(p), size+align, nil)
p = round(p, align)
p2 := sysReserve(unsafe.Pointer(p), size)
if p != uintptr(p2) {
// Must have raced. Try again.
sysFree(p2, size, nil)
if retries++; retries == 100 {
throw("failed to allocate aligned heap memory; too many retries")
}
goto retry
}
// Success.
return p2, size
default:
// Trim off the unaligned parts.
pAligned := round(p, align)
sysFree(unsafe.Pointer(p), pAligned-p, nil)
end := pAligned + size
endLen := (p + size + align) - end
if endLen > 0 {
sysFree(unsafe.Pointer(end), endLen, nil)
}
return unsafe.Pointer(pAligned), size
}
}
// base address for all 0-byte allocations
// 基础地址:给所有 0 字节的类型
var zerobase uintptr
// nextFreeFast returns the next free object if one is quickly available.
// Otherwise it returns 0.
func nextFreeFast(s *mspan) gclinkptr {
theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache?
if theBit < 64 {
result := s.freeindex + uintptr(theBit)
if result < s.nelems {
freeidx := result + 1
if freeidx%64 == 0 && freeidx != s.nelems {
return 0
}
s.allocCache >>= uint(theBit + 1)
s.freeindex = freeidx
s.allocCount++
return gclinkptr(result*s.elemsize + s.base())
}
}
return 0
}
// nextFree returns the next free object from the cached span if one is available.
// Otherwise it refills the cache with a span with an available object and
// returns that object along with a flag indicating that this was a heavy
// weight allocation. If it is a heavy weight allocation the caller must
// determine whether a new GC cycle needs to be started or if the GC is active
// whether this goroutine needs to assist the GC.
//
// Must run in a non-preemptible context since otherwise the owner of
// c could change.
func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, shouldhelpgc bool) {
s = c.alloc[spc]
shouldhelpgc = false
freeIndex := s.nextFreeIndex()
if freeIndex == s.nelems {
// The span is full.
if uintptr(s.allocCount) != s.nelems {
println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
throw("s.allocCount != s.nelems && freeIndex == s.nelems")
}
c.refill(spc)
shouldhelpgc = true
s = c.alloc[spc]
freeIndex = s.nextFreeIndex()
}
if freeIndex >= s.nelems {
throw("freeIndex is not valid")
}
v = gclinkptr(freeIndex*s.elemsize + s.base())
s.allocCount++
if uintptr(s.allocCount) > s.nelems {
println("s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
throw("s.allocCount > s.nelems")
}
return
}
// Allocate an object of size bytes.
// Small objects are allocated from the per-P cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
if gcphase == _GCmarktermination {
throw("mallocgc called with gcphase == _GCmarktermination")
}
if size == 0 {
return unsafe.Pointer(&zerobase)
}
if debug.sbrk != 0 {
align := uintptr(16)
if typ != nil {
align = uintptr(typ.align)
}
return persistentalloc(size, align, &memstats.other_sys)
}
// assistG is the G to charge for this allocation, or nil if
// GC is not currently active.
var assistG *g
if gcBlackenEnabled != 0 {
// Charge the current user G for this allocation.
assistG = getg()
if assistG.m.curg != nil {
assistG = assistG.m.curg
}
// Charge the allocation against the G. We'll account
// for internal fragmentation at the end of mallocgc.
assistG.gcAssistBytes -= int64(size)
if assistG.gcAssistBytes < 0 {