-
Notifications
You must be signed in to change notification settings - Fork 119
/
lmdb_45a88275d2a410e683bae4ef44881e0f55fa3c4d.patch
3958 lines (3745 loc) · 130 KB
/
lmdb_45a88275d2a410e683bae4ef44881e0f55fa3c4d.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
diff --git a/libraries/liblmdb/Makefile b/libraries/liblmdb/Makefile
index 0940c49..72d0984 100644
--- a/libraries/liblmdb/Makefile
+++ b/libraries/liblmdb/Makefile
@@ -8,7 +8,7 @@
# platforms; you should not need to change any of these.
# Read their descriptions in mdb.c if you do:
#
-# - MDB_USE_POSIX_SEM
+# - MDB_USE_POSIX_MUTEX, MDB_USE_POSIX_SEM, MDB_USE_SYSV_SEM
# - MDB_DSYNC
# - MDB_FDATASYNC
# - MDB_FDATASYNC_WORKS
@@ -24,8 +24,9 @@ W = -W -Wall -Wno-unused-parameter -Wbad-function-cast -Wuninitialized
THREADS = -pthread
OPT = -O2 -g
CFLAGS = $(THREADS) $(OPT) $(W) $(XCFLAGS)
-LDLIBS =
-SOLIBS =
+LDLIBS = # -lntdll # Windows needs ntdll
+SOLIBS = # -lntdll
+SOEXT = .so
prefix = /usr/local
exec_prefix = $(prefix)
bindir = $(exec_prefix)/bin
@@ -37,7 +38,7 @@ mandir = $(datarootdir)/man
########################################################################
IHDRS = lmdb.h
-ILIBS = liblmdb.a liblmdb.so
+ILIBS = liblmdb.a liblmdb$(SOEXT)
IPROGS = mdb_stat mdb_copy mdb_dump mdb_load
IDOCS = mdb_stat.1 mdb_copy.1 mdb_dump.1 mdb_load.1
PROGS = $(IPROGS) mtest mtest2 mtest3 mtest4 mtest5
@@ -63,7 +64,7 @@ test: all
liblmdb.a: mdb.o midl.o
$(AR) rs $@ mdb.o midl.o
-liblmdb.so: mdb.lo midl.lo
+liblmdb$(SOEXT): mdb.lo midl.lo
# $(CC) $(LDFLAGS) -pthread -shared -Wl,-Bsymbolic -o $@ mdb.o midl.o $(SOLIBS)
$(CC) $(LDFLAGS) -pthread -shared -o $@ mdb.lo midl.lo $(SOLIBS)
diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h
index c4d05d1..30d5862 100644
--- a/libraries/liblmdb/lmdb.h
+++ b/libraries/liblmdb/lmdb.h
@@ -53,14 +53,15 @@
*
* Fix: Check for stale readers periodically, using the
* #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool.
- * Stale writers will be cleared automatically on some systems:
+ * Stale writers will be cleared automatically on most systems:
* - Windows - automatic
+ * - BSD, systems using SysV semaphores - automatic
* - Linux, systems using POSIX mutexes with Robust option - automatic
- * - not on BSD, systems using POSIX semaphores.
* Otherwise just make all programs using the database close it;
* the lockfile is always reset on first open of the environment.
*
- * - On BSD systems or others configured with MDB_USE_POSIX_SEM,
+ * - On BSD systems or others configured with MDB_USE_SYSV_SEM or
+ * MDB_USE_POSIX_SEM,
* startup can fail due to semaphores owned by another userid.
*
* Fix: Open and close the database as the user which owns the
@@ -77,6 +78,11 @@
* access to locks and lock file. Exceptions: On read-only filesystems
* or with the #MDB_NOLOCK flag described under #mdb_env_open().
*
+ * - An LMDB configuration will often reserve considerable \b unused
+ * memory address space and maybe file size for future growth.
+ * This does not use actual memory or disk space, but users may need
+ * to understand the difference so they won't be scared off.
+ *
* - By default, in versions before 0.9.10, unused portions of the data
* file might receive garbage data from memory freed by other code.
* (This does not happen when using the #MDB_WRITEMAP flag.) As of
@@ -160,6 +166,8 @@
#define _LMDB_H_
#include <sys/types.h>
+#include <inttypes.h>
+#include <limits.h>
#ifdef __cplusplus
extern "C" {
@@ -172,6 +180,32 @@ typedef int mdb_mode_t;
typedef mode_t mdb_mode_t;
#endif
+#ifdef _WIN32
+# define MDB_FMT_Z "I"
+#else
+# define MDB_FMT_Z "z" /**< printf/scanf format modifier for size_t */
+#endif
+
+#ifndef MDB_VL32
+/** Unsigned type used for mapsize, entry counts and page/transaction IDs.
+ *
+ * It is normally size_t, hence the name. Defining MDB_VL32 makes it
+ * uint64_t, but do not try this unless you know what you are doing.
+ */
+typedef size_t mdb_size_t;
+# define MDB_SIZE_MAX SIZE_MAX /**< max #mdb_size_t */
+/** #mdb_size_t printf formats, \b t = one of [diouxX] without quotes */
+# define MDB_PRIy(t) MDB_FMT_Z #t
+/** #mdb_size_t scanf formats, \b t = one of [dioux] without quotes */
+# define MDB_SCNy(t) MDB_FMT_Z #t
+#else
+typedef uint64_t mdb_size_t;
+# define MDB_SIZE_MAX UINT64_MAX
+# define MDB_PRIy(t) PRI##t##64
+# define MDB_SCNy(t) SCN##t##64
+# define mdb_env_create mdb_env_create_vl32 /**< Prevent mixing with non-VL32 builds */
+#endif
+
/** An abstraction for a file handle.
* On POSIX systems file handles are small integers. On Windows
* they're opaque pointers.
@@ -194,7 +228,7 @@ typedef int mdb_filehandle_t;
/** Library minor version */
#define MDB_VERSION_MINOR 9
/** Library patch version */
-#define MDB_VERSION_PATCH 18
+#define MDB_VERSION_PATCH 70
/** Combine args a,b,c into a single integer for easy version comparisons */
#define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c))
@@ -204,7 +238,7 @@ typedef int mdb_filehandle_t;
MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH)
/** The release date of this library version */
-#define MDB_VERSION_DATE "February 5, 2016"
+#define MDB_VERSION_DATE "December 19, 2015"
/** A stringifier for the version info */
#define MDB_VERSTR(a,b,c,d) "LMDB " #a "." #b "." #c ": (" d ")"
@@ -306,7 +340,8 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel
#define MDB_REVERSEKEY 0x02
/** use sorted duplicates */
#define MDB_DUPSORT 0x04
- /** numeric keys in native byte order: either unsigned int or size_t.
+ /** numeric keys in native byte order, either unsigned int or #mdb_size_t.
+ * (lmdb expects 32-bit int <= size_t <= 32/64-bit mdb_size_t.)
* The keys must all be of the same size. */
#define MDB_INTEGERKEY 0x08
/** with #MDB_DUPSORT, sorted dup items have fixed size */
@@ -383,7 +418,9 @@ typedef enum MDB_cursor_op {
MDB_PREV_NODUP, /**< Position at last data item of previous key */
MDB_SET, /**< Position at specified key */
MDB_SET_KEY, /**< Position at specified key, return key + data */
- MDB_SET_RANGE /**< Position at first key greater than or equal to specified key. */
+ MDB_SET_RANGE, /**< Position at first key greater than or equal to specified key. */
+ MDB_PREV_MULTIPLE /**< Position at previous page and return key and up to
+ a page of duplicate data items. Only for #MDB_DUPFIXED */
} MDB_cursor_op;
/** @defgroup errors Return Codes
@@ -440,8 +477,10 @@ typedef enum MDB_cursor_op {
#define MDB_BAD_VALSIZE (-30781)
/** The specified DBI was changed unexpectedly */
#define MDB_BAD_DBI (-30780)
+ /** Unexpected problem - txn should abort */
+#define MDB_PROBLEM (-30779)
/** The last defined error code */
-#define MDB_LAST_ERRCODE MDB_BAD_DBI
+#define MDB_LAST_ERRCODE MDB_PROBLEM
/** @} */
/** @brief Statistics for a database in the environment */
@@ -449,18 +488,18 @@ typedef struct MDB_stat {
unsigned int ms_psize; /**< Size of a database page.
This is currently the same for all databases. */
unsigned int ms_depth; /**< Depth (height) of the B-tree */
- size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */
- size_t ms_leaf_pages; /**< Number of leaf pages */
- size_t ms_overflow_pages; /**< Number of overflow pages */
- size_t ms_entries; /**< Number of data items */
+ mdb_size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */
+ mdb_size_t ms_leaf_pages; /**< Number of leaf pages */
+ mdb_size_t ms_overflow_pages; /**< Number of overflow pages */
+ mdb_size_t ms_entries; /**< Number of data items */
} MDB_stat;
/** @brief Information about the environment */
typedef struct MDB_envinfo {
void *me_mapaddr; /**< Address of map, if fixed */
- size_t me_mapsize; /**< Size of the data memory map */
- size_t me_last_pgno; /**< ID of the last used page */
- size_t me_last_txnid; /**< ID of the last committed transaction */
+ mdb_size_t me_mapsize; /**< Size of the data memory map */
+ mdb_size_t me_last_pgno; /**< ID of the last used page */
+ mdb_size_t me_last_txnid; /**< ID of the last committed transaction */
unsigned int me_maxreaders; /**< max reader slots in the environment */
unsigned int me_numreaders; /**< max reader slots used in the environment */
} MDB_envinfo;
@@ -672,6 +711,7 @@ int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd);
* <li>#MDB_CP_COMPACT - Perform compaction while copying: omit free
* pages and sequentially renumber all pages in output. This option
* consumes more CPU and runs more slowly than the default.
+ * Currently it fails if the environment has suffered a page leak.
* </ul>
* @return A non-zero error value on failure and 0 on success.
*/
@@ -829,7 +869,7 @@ int mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *fd);
* an active write transaction.
* </ul>
*/
-int mdb_env_set_mapsize(MDB_env *env, size_t size);
+int mdb_env_set_mapsize(MDB_env *env, mdb_size_t size);
/** @brief Set the maximum number of threads/reader slots for the environment.
*
@@ -942,6 +982,10 @@ int mdb_env_set_assert(MDB_env *env, MDB_assert_func *func);
* <ul>
* <li>#MDB_RDONLY
* This transaction will not perform any write operations.
+ * <li>#MDB_NOSYNC
+ * Don't flush system buffers to disk when committing this transaction.
+ * <li>#MDB_NOMETASYNC
+ * Flush system buffers but omit metadata flush when committing this transaction.
* </ul>
* @param[out] txn Address where the new #MDB_txn handle will be stored
* @return A non-zero error value on failure and 0 on success. Some possible
@@ -974,7 +1018,7 @@ MDB_env *mdb_txn_env(MDB_txn *txn);
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @return A transaction ID, valid if input is an active transaction.
*/
-size_t mdb_txn_id(MDB_txn *txn);
+mdb_size_t mdb_txn_id(MDB_txn *txn);
/** @brief Commit all the operations of a transaction into the database.
*
@@ -1084,7 +1128,8 @@ int mdb_txn_renew(MDB_txn *txn);
* keys must be unique and may have only a single data item.
* <li>#MDB_INTEGERKEY
* Keys are binary integers in native byte order, either unsigned int
- * or size_t, and will be sorted as such.
+ * or #mdb_size_t, and will be sorted as such.
+ * (lmdb expects 32-bit int <= size_t <= 32/64-bit mdb_size_t.)
* The keys must all be of the same size.
* <li>#MDB_DUPFIXED
* This flag may only be used in combination with #MDB_DUPSORT. This option
@@ -1524,7 +1569,7 @@ int mdb_cursor_del(MDB_cursor *cursor, unsigned int flags);
* <li>EINVAL - cursor is not initialized, or an invalid parameter was specified.
* </ul>
*/
-int mdb_cursor_count(MDB_cursor *cursor, size_t *countp);
+int mdb_cursor_count(MDB_cursor *cursor, mdb_size_t *countp);
/** @brief Compare two data items according to a particular database.
*
diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c
index d2e81e4..1d9e9fa 100644
--- a/libraries/liblmdb/mdb.c
+++ b/libraries/liblmdb/mdb.c
@@ -35,9 +35,42 @@
#ifndef _GNU_SOURCE
#define _GNU_SOURCE 1
#endif
+#if defined(MDB_VL32) || defined(__WIN64__)
+#define _FILE_OFFSET_BITS 64
+#endif
#ifdef _WIN32
#include <malloc.h>
#include <windows.h>
+
+/* We use native NT APIs to setup the memory map, so that we can
+ * let the DB file grow incrementally instead of always preallocating
+ * the full size. These APIs are defined in <wdm.h> and <ntifs.h>
+ * but those headers are meant for driver-level development and
+ * conflict with the regular user-level headers, so we explicitly
+ * declare them here. Using these APIs also means we must link to
+ * ntdll.dll, which is not linked by default in user code.
+ */
+NTSTATUS WINAPI
+NtCreateSection(OUT PHANDLE sh, IN ACCESS_MASK acc,
+ IN void * oa OPTIONAL,
+ IN PLARGE_INTEGER ms OPTIONAL,
+ IN ULONG pp, IN ULONG aa, IN HANDLE fh OPTIONAL);
+
+typedef enum _SECTION_INHERIT {
+ ViewShare = 1,
+ ViewUnmap = 2
+} SECTION_INHERIT;
+
+NTSTATUS WINAPI
+NtMapViewOfSection(IN PHANDLE sh, IN HANDLE ph,
+ IN OUT PVOID *addr, IN ULONG_PTR zbits,
+ IN SIZE_T cs, IN OUT PLARGE_INTEGER off OPTIONAL,
+ IN OUT PSIZE_T vs, IN SECTION_INHERIT ih,
+ IN ULONG at, IN ULONG pp);
+
+NTSTATUS WINAPI
+NtClose(HANDLE h);
+
/** getpid() returns int; MinGW defines pid_t but MinGW64 typedefs it
* as int64 which is wrong. MSVC doesn't define it at all, so just
* don't use it.
@@ -56,6 +89,10 @@
# define SSIZE_MAX INT_MAX
# endif
#endif
+/* Windows uses 32-bit off_t to define our own
+ 64 bit offset type
+*/
+#define offset_t __int64
#else
#include <sys/types.h>
#include <sys/stat.h>
@@ -68,6 +105,7 @@
#include <sys/file.h>
#endif
#include <fcntl.h>
+#define offset_t off_t
#endif
#if defined(__mips) && defined(__linux)
@@ -116,7 +154,9 @@ typedef SSIZE_T ssize_t;
#endif
#if defined(__APPLE__) || defined (BSD)
-# define MDB_USE_POSIX_SEM 1
+# if !(defined(MDB_USE_POSIX_MUTEX) || defined(MDB_USE_POSIX_SEM))
+# define MDB_USE_SYSV_SEM 1
+# endif
# define MDB_FDATASYNC fsync
#elif defined(ANDROID)
# define MDB_FDATASYNC fsync
@@ -127,12 +167,22 @@ typedef SSIZE_T ssize_t;
#ifdef MDB_USE_POSIX_SEM
# define MDB_USE_HASH 1
#include <semaphore.h>
+#elif defined(MDB_USE_SYSV_SEM)
+#include <sys/ipc.h>
+#include <sys/sem.h>
+#ifdef _SEM_SEMUN_UNDEFINED
+union semun {
+ int val;
+ struct semid_ds *buf;
+ unsigned short *array;
+};
+#endif /* _SEM_SEMUN_UNDEFINED */
#else
#define MDB_USE_POSIX_MUTEX 1
-#endif
-#endif
+#endif /* MDB_USE_POSIX_SEM */
+#endif /* !_WIN32 */
-#if defined(_WIN32) + defined(MDB_USE_POSIX_SEM) \
+#if defined(_WIN32) + defined(MDB_USE_POSIX_SEM) + defined(MDB_USE_SYSV_SEM) \
+ defined(MDB_USE_POSIX_MUTEX) != 1
# error "Ambiguous shared-lock implementation"
#endif
@@ -183,7 +233,7 @@ typedef SSIZE_T ssize_t;
#if (BYTE_ORDER == LITTLE_ENDIAN) == (BYTE_ORDER == BIG_ENDIAN)
# error "Unknown or unsupported endianness (BYTE_ORDER)"
-#elif (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF
+#elif (-6 & 5) || CHAR_BIT!=8 || UINT_MAX!=0xffffffff || MDB_SIZE_MAX%UINT_MAX
# error "Two's complement, reasonably sized integer types, please"
#endif
@@ -234,6 +284,8 @@ typedef SSIZE_T ssize_t;
#define MDB_NO_ROOT (MDB_LAST_ERRCODE + 10)
#ifdef _WIN32
#define MDB_OWNERDEAD ((int) WAIT_ABANDONED)
+#elif defined MDB_USE_SYSV_SEM
+#define MDB_OWNERDEAD (MDB_LAST_ERRCODE + 11)
#elif defined(MDB_USE_POSIX_MUTEX) && defined(EOWNERDEAD)
#define MDB_OWNERDEAD EOWNERDEAD /**< #LOCK_MUTEX0() result if dead owner */
#endif
@@ -256,16 +308,20 @@ typedef SSIZE_T ssize_t;
# define MDB_USE_ROBUST 0
# else
# define MDB_USE_ROBUST 1
+# endif
+#endif /* !MDB_USE_ROBUST */
+
+#if defined(MDB_USE_POSIX_MUTEX) && (MDB_USE_ROBUST)
/* glibc < 2.12 only provided _np API */
-# if defined(__GLIBC__) && GLIBC_VER < 0x02000c
+# if (defined(__GLIBC__) && GLIBC_VER < 0x02000c) || \
+ (defined(PTHREAD_MUTEX_ROBUST_NP) && !defined(PTHREAD_MUTEX_ROBUST))
# define PTHREAD_MUTEX_ROBUST PTHREAD_MUTEX_ROBUST_NP
# define pthread_mutexattr_setrobust(attr, flag) pthread_mutexattr_setrobust_np(attr, flag)
# define pthread_mutex_consistent(mutex) pthread_mutex_consistent_np(mutex)
# endif
-# endif
-#endif /* MDB_USE_ROBUST */
+#endif /* MDB_USE_POSIX_MUTEX && MDB_USE_ROBUST */
-#if defined(MDB_OWNERDEAD) && MDB_USE_ROBUST
+#if defined(MDB_OWNERDEAD) && (MDB_USE_ROBUST)
#define MDB_ROBUST_SUPPORTED 1
#endif
@@ -288,8 +344,10 @@ typedef HANDLE mdb_mutex_t, mdb_mutexref_t;
#define pthread_mutex_lock(x) WaitForSingleObject(*x, INFINITE)
#define pthread_cond_signal(x) SetEvent(*x)
#define pthread_cond_wait(cond,mutex) do{SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE);}while(0)
-#define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL)
-#define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE)
+#define THREAD_CREATE(thr,start,arg) \
+ (((thr) = CreateThread(NULL, 0, start, arg, 0, NULL)) ? 0 : ErrCode())
+#define THREAD_FINISH(thr) \
+ (WaitForSingleObject(thr, INFINITE) ? ErrCode() : 0)
#define LOCK_MUTEX0(mutex) WaitForSingleObject(mutex, INFINITE)
#define UNLOCK_MUTEX(mutex) ReleaseMutex(mutex)
#define mdb_mutex_consistent(mutex) 0
@@ -305,12 +363,10 @@ typedef HANDLE mdb_mutex_t, mdb_mutexref_t;
#else
#define MDB_PROCESS_QUERY_LIMITED_INFORMATION 0x1000
#endif
-#define Z "I"
#else
#define THREAD_RET void *
#define THREAD_CREATE(thr,start,arg) pthread_create(&thr,NULL,start,arg)
#define THREAD_FINISH(thr) pthread_join(thr,NULL)
-#define Z "z" /**< printf format modifier for size_t */
/** For MDB_LOCK_FORMAT: True if readers take a pid lock in the lockfile */
#define MDB_PIDLOCK 1
@@ -329,12 +385,46 @@ mdb_sem_wait(sem_t *sem)
return rc;
}
+#elif defined MDB_USE_SYSV_SEM
+
+typedef struct mdb_mutex {
+ int semid;
+ int semnum;
+ int *locked;
+} mdb_mutex_t[1], *mdb_mutexref_t;
+
+#define LOCK_MUTEX0(mutex) mdb_sem_wait(mutex)
+#define UNLOCK_MUTEX(mutex) do { \
+ struct sembuf sb = { 0, 1, SEM_UNDO }; \
+ sb.sem_num = (mutex)->semnum; \
+ *(mutex)->locked = 0; \
+ semop((mutex)->semid, &sb, 1); \
+} while(0)
+
+static int
+mdb_sem_wait(mdb_mutexref_t sem)
+{
+ int rc, *locked = sem->locked;
+ struct sembuf sb = { 0, -1, SEM_UNDO };
+ sb.sem_num = sem->semnum;
+ do {
+ if (!semop(sem->semid, &sb, 1)) {
+ rc = *locked ? MDB_OWNERDEAD : MDB_SUCCESS;
+ *locked = 1;
+ break;
+ }
+ } while ((rc = errno) == EINTR);
+ return rc;
+}
+
+#define mdb_mutex_consistent(mutex) 0
+
#else /* MDB_USE_POSIX_MUTEX: */
/** Shared mutex/semaphore as it is stored (mdb_mutex_t), and as
* local variables keep it (mdb_mutexref_t).
*
- * When #mdb_mutexref_t is a pointer declaration and #mdb_mutex_t is
- * not, then it is array[size 1] so it can be assigned to a pointer.
+ * An mdb_mutex_t can be assigned to an mdb_mutexref_t. They can
+ * be the same, or an array[size 1] and a pointer.
* @{
*/
typedef pthread_mutex_t mdb_mutex_t[1], *mdb_mutexref_t;
@@ -349,7 +439,7 @@ typedef pthread_mutex_t mdb_mutex_t[1], *mdb_mutexref_t;
/** Mark mutex-protected data as repaired, after death of previous owner.
*/
#define mdb_mutex_consistent(mutex) pthread_mutex_consistent(mutex)
-#endif /* MDB_USE_POSIX_SEM */
+#endif /* MDB_USE_POSIX_SEM || MDB_USE_SYSV_SEM */
/** Get the error code for the last failed system function.
*/
@@ -374,12 +464,24 @@ typedef pthread_mutex_t mdb_mutex_t[1], *mdb_mutexref_t;
#define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE))
#endif
+#define Z MDB_FMT_Z /**< printf/scanf format modifier for size_t */
+#define Yu MDB_PRIy(u) /**< printf format for #mdb_size_t */
+#define Yd MDB_PRIy(d) /**< printf format for "signed #mdb_size_t" */
+
#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
#define MNAME_LEN 32
+#elif defined(MDB_USE_SYSV_SEM)
+#define MNAME_LEN (sizeof(int))
#else
#define MNAME_LEN (sizeof(pthread_mutex_t))
#endif
+#ifdef MDB_USE_SYSV_SEM
+#define SYSV_SEM_FLAG 1 /**< SysV sems in lockfile format */
+#else
+#define SYSV_SEM_FLAG 0
+#endif
+
/** @} */
#ifdef MDB_ROBUST_SUPPORTED
@@ -521,7 +623,7 @@ static txnid_t mdb_debug_start;
/** The version number for a database's datafile format. */
#define MDB_DATA_VERSION ((MDB_DEVEL) ? 999 : 1)
/** The version number for a database's lockfile format. */
-#define MDB_LOCK_VERSION 1
+#define MDB_LOCK_VERSION ((MDB_DEVEL) ? 999 : 1)
/** @brief The max size of a key we can write, or 0 for computed max.
*
@@ -712,6 +814,9 @@ typedef struct MDB_txbody {
uint32_t mtb_format;
#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
char mtb_rmname[MNAME_LEN];
+#elif defined(MDB_USE_SYSV_SEM)
+ int mtb_semid;
+ int mtb_rlocked;
#else
/** Mutex protecting access to this table.
* This is the reader table lock used with LOCK_MUTEX().
@@ -740,12 +845,19 @@ typedef struct MDB_txninfo {
#define mti_rmname mt1.mtb.mtb_rmname
#define mti_txnid mt1.mtb.mtb_txnid
#define mti_numreaders mt1.mtb.mtb_numreaders
+#ifdef MDB_USE_SYSV_SEM
+#define mti_semid mt1.mtb.mtb_semid
+#define mti_rlocked mt1.mtb.mtb_rlocked
+#endif
char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)];
} mt1;
union {
#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
char mt2_wmname[MNAME_LEN];
#define mti_wmname mt2.mt2_wmname
+#elif defined MDB_USE_SYSV_SEM
+ int mt2_wlocked;
+#define mti_wlocked mt2.mt2_wlocked
#else
mdb_mutex_t mt2_wmutex;
#define mti_wmutex mt2.mt2_wmutex
@@ -760,12 +872,27 @@ typedef struct MDB_txninfo {
((uint32_t) \
((MDB_LOCK_VERSION) \
/* Flags which describe functionality */ \
+ + (SYSV_SEM_FLAG << 18) \
+ (((MDB_PIDLOCK) != 0) << 16)))
/** @} */
-/** Common header for all page types.
- * Overflow records occupy a number of contiguous pages with no
- * headers on any page after the first.
+/** Common header for all page types. The page type depends on #mp_flags.
+ *
+ * #P_BRANCH and #P_LEAF pages have unsorted '#MDB_node's at the end, with
+ * sorted #mp_ptrs[] entries referring to them. Exception: #P_LEAF2 pages
+ * omit mp_ptrs and pack sorted #MDB_DUPFIXED values after the page header.
+ *
+ * #P_OVERFLOW records occupy one or more contiguous pages where only the
+ * first has a page header. They hold the real data of #F_BIGDATA nodes.
+ *
+ * #P_SUBP sub-pages are small leaf "pages" with duplicate data.
+ * A node with flag #F_DUPDATA but not #F_SUBDATA contains a sub-page.
+ * (Duplicate data can also go in sub-databases, which use normal pages.)
+ *
+ * #P_META pages contain #MDB_meta, the start point of an LMDB snapshot.
+ *
+ * Each non-metapage up to #MDB_meta.%mm_last_pg is reachable exactly once
+ * in the snapshot: Either used by a database or listed in a freeDB record.
*/
typedef struct MDB_page {
#define mp_pgno mp_p.p_pgno
@@ -774,7 +901,7 @@ typedef struct MDB_page {
pgno_t p_pgno; /**< page number */
struct MDB_page *p_next; /**< for in-memory list of freed pages */
} mp_p;
- uint16_t mp_pad;
+ uint16_t mp_pad; /**< key size if this is a LEAF2 page */
/** @defgroup mdb_page Page Flags
* @ingroup internal
* Flags for the page headers.
@@ -841,7 +968,9 @@ typedef struct MDB_page {
/** The number of overflow pages needed to store the given size. */
#define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1)
- /** Link in #MDB_txn.%mt_loose_pgs list */
+ /** Link in #MDB_txn.%mt_loose_pgs list.
+ * Kept outside the page header, which is needed when reusing the page.
+ */
#define NEXT_LOOSE_PAGE(p) (*(MDB_page **)((p) + 2))
/** Header for a single key/data pair within a page.
@@ -924,7 +1053,7 @@ typedef struct MDB_node {
#ifdef MISALIGNED_OK
#define COPY_PGNO(dst,src) dst = src
#else
-#if SIZE_MAX > 4294967295UL
+#if MDB_SIZE_MAX > 0xffffffffU
#define COPY_PGNO(dst,src) do { \
unsigned short *s, *d; \
s = (unsigned short *)&(src); \
@@ -965,13 +1094,13 @@ typedef struct MDB_db {
pgno_t md_branch_pages; /**< number of internal pages */
pgno_t md_leaf_pages; /**< number of leaf pages */
pgno_t md_overflow_pages; /**< number of overflow pages */
- size_t md_entries; /**< number of data items */
+ mdb_size_t md_entries; /**< number of data items */
pgno_t md_root; /**< the root page of this tree */
} MDB_db;
- /** mdb_dbi_open flags */
#define MDB_VALID 0x8000 /**< DB handle is valid, for me_dbflags */
#define PERSISTENT_FLAGS (0xffff & ~(MDB_VALID))
+ /** #mdb_dbi_open() flags */
#define VALID_FLAGS (MDB_REVERSEKEY|MDB_DUPSORT|MDB_INTEGERKEY|MDB_DUPFIXED|\
MDB_INTEGERDUP|MDB_REVERSEDUP|MDB_CREATE)
@@ -995,14 +1124,25 @@ typedef struct MDB_meta {
uint32_t mm_magic;
/** Version number of this file. Must be set to #MDB_DATA_VERSION. */
uint32_t mm_version;
+#ifdef MDB_VL32
+ union { /* always zero since we don't support fixed mapping in MDB_VL32 */
+ MDB_ID mmun_ull;
+ void *mmun_address;
+ } mm_un;
+#define mm_address mm_un.mmun_address
+#else
void *mm_address; /**< address for fixed mapping */
- size_t mm_mapsize; /**< size of mmap region */
+#endif
+ pgno_t mm_mapsize; /**< size of mmap region */
MDB_db mm_dbs[CORE_DBS]; /**< first is free space, 2nd is main db */
/** The size of pages used in this DB */
#define mm_psize mm_dbs[FREE_DBI].md_pad
/** Any persistent environment flags. @ref mdb_env */
#define mm_flags mm_dbs[FREE_DBI].md_flags
- pgno_t mm_last_pg; /**< last used page in file */
+ /** Last used page in the datafile.
+ * Actually the file may be shorter if the freeDB lists the final pages.
+ */
+ pgno_t mm_last_pg;
volatile txnid_t mm_txnid; /**< txnid that committed this page */
} MDB_meta;
@@ -1039,6 +1179,9 @@ struct MDB_txn {
/** Nested txn under this txn, set together with flag #MDB_TXN_HAS_CHILD */
MDB_txn *mt_child;
pgno_t mt_next_pgno; /**< next unallocated page */
+#ifdef MDB_VL32
+ pgno_t mt_last_pgno; /**< last written page */
+#endif
/** The ID of this transaction. IDs are integers incrementing from 1.
* Only committed write transactions increment the ID. If a transaction
* aborts, the ID may be re-used by the next writer.
@@ -1052,7 +1195,7 @@ struct MDB_txn {
* in this transaction, linked through #NEXT_LOOSE_PAGE(page).
*/
MDB_page *mt_loose_pgs;
- /* #Number of loose pages (#mt_loose_pgs) */
+ /** Number of loose pages (#mt_loose_pgs) */
int mt_loose_count;
/** The sorted list of dirty pages we temporarily wrote to disk
* because the dirty list was full. page numbers in here are
@@ -1085,6 +1228,19 @@ struct MDB_txn {
MDB_cursor **mt_cursors;
/** Array of flags for each DB */
unsigned char *mt_dbflags;
+#ifdef MDB_VL32
+ /** List of read-only pages (actually chunks) */
+ MDB_ID3L mt_rpages;
+ /** We map chunks of 16 pages. Even though Windows uses 4KB pages, all
+ * mappings must begin on 64KB boundaries. So we round off all pgnos to
+ * a chunk boundary. We do the same on Linux for symmetry, and also to
+ * reduce the frequency of mmap/munmap calls.
+ */
+#define MDB_RPAGE_CHUNK 16
+#define MDB_TRPAGE_SIZE 4096 /**< size of #mt_rpages array of chunks */
+#define MDB_TRPAGE_MAX (MDB_TRPAGE_SIZE-1) /**< maximum chunk index */
+ unsigned int mt_rpcheck; /**< threshold for reclaiming unref'd chunks */
+#endif
/** Number of DB records in use, or 0 when the txn is finished.
* This number only ever increments until the txn finishes; we
* don't decrement it when individual DB handles are closed.
@@ -1096,7 +1252,9 @@ struct MDB_txn {
* @{
*/
/** #mdb_txn_begin() flags */
-#define MDB_TXN_BEGIN_FLAGS MDB_RDONLY
+#define MDB_TXN_BEGIN_FLAGS (MDB_NOMETASYNC|MDB_NOSYNC|MDB_RDONLY)
+#define MDB_TXN_NOMETASYNC MDB_NOMETASYNC /**< don't sync meta for this txn on commit */
+#define MDB_TXN_NOSYNC MDB_NOSYNC /**< don't sync this txn on commit */
#define MDB_TXN_RDONLY MDB_RDONLY /**< read-only transaction */
/* internal txn flags */
#define MDB_TXN_WRITEMAP MDB_WRITEMAP /**< copy of #MDB_env flag in writers */
@@ -1162,10 +1320,24 @@ struct MDB_cursor {
#define C_SUB 0x04 /**< Cursor is a sub-cursor */
#define C_DEL 0x08 /**< last op was a cursor_del */
#define C_UNTRACK 0x40 /**< Un-track cursor when closing */
+#define C_WRITEMAP MDB_TXN_WRITEMAP /**< Copy of txn flag */
+/** Read-only cursor into the txn's original snapshot in the map.
+ * Set for read-only txns, and in #mdb_page_alloc() for #FREE_DBI when
+ * #MDB_DEVEL & 2. Only implements code which is necessary for this.
+ */
+#define C_ORIG_RDONLY MDB_TXN_RDONLY
/** @} */
unsigned int mc_flags; /**< @ref mdb_cursor */
MDB_page *mc_pg[CURSOR_STACK]; /**< stack of pushed pages */
indx_t mc_ki[CURSOR_STACK]; /**< stack of page indices */
+#ifdef MDB_VL32
+ MDB_page *mc_ovpg; /**< a referenced overflow page */
+# define MC_OVPG(mc) ((mc)->mc_ovpg)
+# define MC_SET_OVPG(mc, pg) ((mc)->mc_ovpg = (pg))
+#else
+# define MC_OVPG(mc) ((MDB_page *)0)
+# define MC_SET_OVPG(mc, pg) ((void)0)
+#endif
};
/** Context for sorted-dup records.
@@ -1195,6 +1367,9 @@ struct MDB_env {
HANDLE me_fd; /**< The main data file */
HANDLE me_lfd; /**< The lock file */
HANDLE me_mfd; /**< just for writing the meta pages */
+#if defined(MDB_VL32) && defined(_WIN32)
+ HANDLE me_fmh; /**< File Mapping handle */
+#endif
/** Failed to update the meta page. Probably an I/O error. */
#define MDB_FATAL_ERROR 0x80000000U
/** Some fields are initialized. */
@@ -1219,8 +1394,8 @@ struct MDB_env {
void *me_pbuf; /**< scratch area for DUPSORT put() */
MDB_txn *me_txn; /**< current write transaction */
MDB_txn *me_txn0; /**< prealloc'd write transaction */
- size_t me_mapsize; /**< size of the data memory map */
- off_t me_size; /**< current file size */
+ mdb_size_t me_mapsize; /**< size of the data memory map */
+ offset_t me_size; /**< current file size */
pgno_t me_maxpg; /**< me_mapsize / me_psize */
MDB_dbx *me_dbxs; /**< array of static DB info */
uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */
@@ -1253,6 +1428,13 @@ struct MDB_env {
mdb_mutex_t me_rmutex;
mdb_mutex_t me_wmutex;
#endif
+#ifdef MDB_VL32
+ MDB_ID3L me_rpages; /**< like #mt_rpages, but global to env */
+ pthread_mutex_t me_rpmutex; /**< control access to #me_rpages */
+#define MDB_ERPAGE_SIZE 16384
+#define MDB_ERPAGE_MAX (MDB_ERPAGE_SIZE-1)
+ unsigned int me_rpcheck;
+#endif
void *me_userctx; /**< User-settable context */
MDB_assert_func *me_assert_func; /**< Callback for assertion failures */
};
@@ -1298,7 +1480,7 @@ enum {
#define MDB_END_SLOT MDB_NOTLS /**< release any reader slot if #MDB_NOTLS */
static void mdb_txn_end(MDB_txn *txn, unsigned mode);
-static int mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **mp, int *lvl);
+static int mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **mp, int *lvl);
static int mdb_page_search_root(MDB_cursor *mc,
MDB_val *key, int modify);
#define MDB_PS_MODIFY 1
@@ -1327,7 +1509,7 @@ static int mdb_node_add(MDB_cursor *mc, indx_t indx,
static void mdb_node_del(MDB_cursor *mc, int ksize);
static void mdb_node_shrink(MDB_page *mp, indx_t indx);
static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft);
-static int mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data);
+static int mdb_node_read(MDB_cursor *mc, MDB_node *leaf, MDB_val *data);
static size_t mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data);
static size_t mdb_branch_size(MDB_env *env, MDB_val *key);
@@ -1360,13 +1542,18 @@ static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead);
static MDB_cmp_func mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int, mdb_cmp_cint, mdb_cmp_long;
/** @endcond */
-/** Compare two items pointing at size_t's of unknown alignment. */
+/** Compare two items pointing at '#mdb_size_t's of unknown alignment. */
#ifdef MISALIGNED_OK
# define mdb_cmp_clong mdb_cmp_long
#else
# define mdb_cmp_clong mdb_cmp_cint
#endif
+/** True if we need #mdb_cmp_clong() instead of \b cmp for #MDB_INTEGERDUP */
+#define NEED_CMP_CLONG(cmp, ksize) \
+ (UINT_MAX < MDB_SIZE_MAX && \
+ (cmp) == mdb_cmp_int && (ksize) == sizeof(mdb_size_t))
+
#ifdef _WIN32
static SECURITY_DESCRIPTOR mdb_null_sd;
static SECURITY_ATTRIBUTES mdb_all_sa;
@@ -1407,6 +1594,7 @@ static char *const mdb_errstr[] = {
"MDB_BAD_TXN: Transaction must abort, has a child, or is invalid",
"MDB_BAD_VALSIZE: Unsupported size of key/DB name/data, or wrong DUPFIXED size",
"MDB_BAD_DBI: The specified DBI handle was closed/changed unexpectedly",
+ "MDB_PROBLEM: Unexpected problem - txn should abort",
};
char *
@@ -1417,8 +1605,9 @@ mdb_strerror(int err)
* This works as long as no function between the call to mdb_strerror
* and the actual use of the message uses more than 4K of stack.
*/
- char pad[4096];
- char buf[1024], *ptr = buf;
+#define MSGSIZE 1024
+#define PADSIZE 4096
+ char buf[MSGSIZE+PADSIZE], *ptr = buf;
#endif
int i;
if (!err)
@@ -1450,7 +1639,7 @@ mdb_strerror(int err)
buf[0] = 0;
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS,
- NULL, err, 0, ptr, sizeof(buf), (va_list *)pad);
+ NULL, err, 0, ptr, MSGSIZE, (va_list *)buf+MSGSIZE);
return ptr;
#else
return strerror(err);
@@ -1550,20 +1739,20 @@ mdb_page_list(MDB_page *mp)
case P_LEAF|P_LEAF2: type = "LEAF2 page"; break;
case P_LEAF|P_LEAF2|P_SUBP: type = "LEAF2 sub-page"; break;
case P_OVERFLOW:
- fprintf(stderr, "Overflow page %"Z"u pages %u%s\n",
+ fprintf(stderr, "Overflow page %"Yu" pages %u%s\n",
pgno, mp->mp_pages, state);
return;
case P_META:
- fprintf(stderr, "Meta-page %"Z"u txnid %"Z"u\n",
+ fprintf(stderr, "Meta-page %"Yu" txnid %"Yu"\n",
pgno, ((MDB_meta *)METADATA(mp))->mm_txnid);
return;
default:
- fprintf(stderr, "Bad page %"Z"u flags 0x%u\n", pgno, mp->mp_flags);
+ fprintf(stderr, "Bad page %"Yu" flags 0x%X\n", pgno, mp->mp_flags);
return;
}
nkeys = NUMKEYS(mp);
- fprintf(stderr, "%s %"Z"u numkeys %d%s\n", type, pgno, nkeys, state);
+ fprintf(stderr, "%s %"Yu" numkeys %d%s\n", type, pgno, nkeys, state);
for (i=0; i<nkeys; i++) {
if (IS_LEAF2(mp)) { /* LEAF2 pages have no mp_ptrs[] or node headers */
@@ -1578,7 +1767,7 @@ mdb_page_list(MDB_page *mp)
key.mv_data = node->mn_data;
nsize = NODESIZE + key.mv_size;
if (IS_BRANCH(mp)) {
- fprintf(stderr, "key %d: page %"Z"u, %s\n", i, NODEPGNO(node),
+ fprintf(stderr, "key %d: page %"Yu", %s\n", i, NODEPGNO(node),
DKEY(&key));
total += nsize;
} else {
@@ -1674,7 +1863,7 @@ static void mdb_audit(MDB_txn *txn)
}
}
if (freecount + count + NUM_METAS != txn->mt_next_pgno) {
- fprintf(stderr, "audit: %lu freecount: %lu count: %lu total: %lu next_pgno: %lu\n",
+ fprintf(stderr, "audit: %"Yu" freecount: %"Yu" count: %"Yu" total: %"Yu" next_pgno: %"Yu"\n",
txn->mt_txnid, freecount, count+NUM_METAS,
freecount+count+NUM_METAS, txn->mt_next_pgno);
}
@@ -1691,10 +1880,8 @@ int
mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b)
{
MDB_cmp_func *dcmp = txn->mt_dbxs[dbi].md_dcmp;
-#if UINT_MAX < SIZE_MAX
- if (dcmp == mdb_cmp_int && a->mv_size == sizeof(size_t))
+ if (NEED_CMP_CLONG(dcmp, a->mv_size))
dcmp = mdb_cmp_clong;
-#endif
return dcmp(a, b);
}
@@ -1774,6 +1961,51 @@ mdb_dlist_free(MDB_txn *txn)
dl[0].mid = 0;
}
+#ifdef MDB_VL32
+static void
+mdb_page_unref(MDB_txn *txn, MDB_page *mp)
+{
+ pgno_t pgno;
+ MDB_ID3L tl = txn->mt_rpages;
+ unsigned x, rem;
+ if (mp->mp_flags & (P_SUBP|P_DIRTY))
+ return;
+ rem = mp->mp_pgno & (MDB_RPAGE_CHUNK-1);
+ pgno = mp->mp_pgno ^ rem;
+ x = mdb_mid3l_search(tl, pgno);
+ if (x != tl[0].mid && tl[x+1].mid == mp->mp_pgno)
+ x++;
+ if (tl[x].mref)
+ tl[x].mref--;
+}
+#define MDB_PAGE_UNREF(txn, mp) mdb_page_unref(txn, mp)
+
+static void
+mdb_cursor_unref(MDB_cursor *mc)
+{
+ int i;
+ if (!mc->mc_snum || !mc->mc_pg[0] || IS_SUBP(mc->mc_pg[0]))
+ return;
+ for (i=0; i<mc->mc_snum; i++)
+ mdb_page_unref(mc->mc_txn, mc->mc_pg[i]);
+ if (mc->mc_ovpg) {
+ mdb_page_unref(mc->mc_txn, mc->mc_ovpg);
+ mc->mc_ovpg = 0;
+ }
+ mc->mc_snum = mc->mc_top = 0;
+ mc->mc_pg[0] = NULL;
+ mc->mc_flags &= ~C_INITIALIZED;
+}
+#define MDB_CURSOR_UNREF(mc, force) \
+ (((force) || ((mc)->mc_flags & C_INITIALIZED)) \
+ ? mdb_cursor_unref(mc) \
+ : (void)0)
+
+#else
+#define MDB_PAGE_UNREF(txn, mp)
+#define MDB_CURSOR_UNREF(mc, force) ((void)0)
+#endif /* MDB_VL32 */
+
/** Loosen or free a single page.
* Saves single pages to a list for future reuse
* in this same txn. It has been pulled from the freeDB
@@ -1803,7 +2035,7 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp)
if (mp != dl[x].mptr) { /* bad cursor? */
mc->mc_flags &= ~(C_INITIALIZED|C_EOF);
txn->mt_flags |= MDB_TXN_ERROR;
- return MDB_CORRUPTED;
+ return MDB_PROBLEM;
}
/* ok, it's ours */
loose = 1;
@@ -1815,8 +2047,7 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp)
}
}
if (loose) {
- DPRINTF(("loosen db %d page %"Z"u", DDBI(mc),
- mp->mp_pgno));
+ DPRINTF(("loosen db %d page %"Yu, DDBI(mc), mp->mp_pgno));
NEXT_LOOSE_PAGE(mp) = txn->mt_loose_pgs;
txn->mt_loose_pgs = mp;
txn->mt_loose_count++;
@@ -1842,7 +2073,7 @@ mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all)
{
enum { Mask = P_SUBP|P_DIRTY|P_LOOSE|P_KEEP };
MDB_txn *txn = mc->mc_txn;
- MDB_cursor *m3;
+ MDB_cursor *m3, *m0 = mc;
MDB_xcursor *mx;
MDB_page *dp, *mp;
MDB_node *leaf;
@@ -1885,7 +2116,7 @@ mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all)
pgno_t pgno = txn->mt_dbs[i].md_root;
if (pgno == P_INVALID)
continue;
- if ((rc = mdb_page_get(txn, pgno, &dp, &level)) != MDB_SUCCESS)
+ if ((rc = mdb_page_get(m0, pgno, &dp, &level)) != MDB_SUCCESS)
break;
if ((dp->mp_flags & Mask) == pflags && level <= 1)
dp->mp_flags ^= P_KEEP;
@@ -2074,6 +2305,8 @@ mdb_page_dirty(MDB_txn *txn, MDB_page *mp)
* Do not modify the freedB, just merge freeDB records into me_pghead[]
* and move me_pglast to say which records were consumed. Only this
* function can create me_pghead and move me_pglast/mt_next_pgno.
+ * When #MDB_DEVEL & 2, it is not affected by #mdb_freelist_save(): it
+ * then uses the transaction's original snapshot of the freeDB.
* @param[in] mc cursor A cursor handle identifying the transaction and
* database for which we are allocating.
* @param[in] num the number of pages to allocate.
@@ -2111,8 +2344,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
np = txn->mt_loose_pgs;
txn->mt_loose_pgs = NEXT_LOOSE_PAGE(np);
txn->mt_loose_count--;
- DPRINTF(("db %d use loose page %"Z"u", DDBI(mc),
- np->mp_pgno));
+ DPRINTF(("db %d use loose page %"Yu, DDBI(mc), np->mp_pgno));
*mp = np;
return MDB_SUCCESS;
}
@@ -2149,6 +2381,14 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
last = env->me_pglast;
oldest = env->me_pgoldest;
mdb_cursor_init(&m2, txn, FREE_DBI, NULL);
+#if (MDB_DEVEL) & 2 /* "& 2" so MDB_DEVEL=1 won't hide bugs breaking freeDB */
+ /* Use original snapshot. TODO: Should need less care in code
+ * which modifies the database. Maybe we can delete some code?
+ */
+ m2.mc_flags |= C_ORIG_RDONLY;