0 | 0 |
/** @file mdb.c
|
1 | |
* @brief memory-mapped database library
|
|
1 |
* @brief Lightning memory-mapped database library
|
2 | 2 |
*
|
3 | 3 |
* A Btree-based database management library modeled loosely on the
|
4 | 4 |
* BerkeleyDB API, but much simplified.
|
5 | 5 |
*/
|
6 | 6 |
/*
|
7 | |
* Copyright 2011-2013 Howard Chu, Symas Corp.
|
|
7 |
* Copyright 2011-2014 Howard Chu, Symas Corp.
|
8 | 8 |
* All rights reserved.
|
9 | 9 |
*
|
10 | 10 |
* Redistribution and use in source and binary forms, with or without
|
|
34 | 34 |
#ifndef _GNU_SOURCE
|
35 | 35 |
#define _GNU_SOURCE 1
|
36 | 36 |
#endif
|
37 | |
#include <sys/types.h>
|
38 | |
#include <sys/stat.h>
|
39 | 37 |
#ifdef _WIN32
|
|
38 |
#include <malloc.h>
|
40 | 39 |
#include <windows.h>
|
41 | 40 |
/** getpid() returns int; MinGW defines pid_t but MinGW64 typedefs it
|
42 | 41 |
* as int64 which is wrong. MSVC doesn't define it at all, so just
|
43 | 42 |
* don't use it.
|
44 | 43 |
*/
|
45 | 44 |
#define MDB_PID_T int
|
|
45 |
#define MDB_THR_T DWORD
|
|
46 |
#include <sys/types.h>
|
|
47 |
#include <sys/stat.h>
|
46 | 48 |
#ifdef __GNUC__
|
47 | 49 |
# include <sys/param.h>
|
48 | 50 |
#else
|
|
54 | 56 |
# endif
|
55 | 57 |
#endif
|
56 | 58 |
#else
|
|
59 |
#include <sys/types.h>
|
|
60 |
#include <sys/stat.h>
|
57 | 61 |
#define MDB_PID_T pid_t
|
|
62 |
#define MDB_THR_T pthread_t
|
58 | 63 |
#include <sys/param.h>
|
59 | 64 |
#include <sys/uio.h>
|
60 | 65 |
#include <sys/mman.h>
|
|
63 | 68 |
#endif
|
64 | 69 |
#include <fcntl.h>
|
65 | 70 |
#endif
|
|
71 |
|
|
72 |
#if defined(__mips) && defined(__linux)
|
|
73 |
/* MIPS has cache coherency issues, requires explicit cache control */
|
|
74 |
#include <asm/cachectl.h>
|
|
75 |
extern int cacheflush(char *addr, int nbytes, int cache);
|
|
76 |
#define CACHEFLUSH(addr, bytes, cache) cacheflush(addr, bytes, cache)
|
|
77 |
#else
|
|
78 |
#define CACHEFLUSH(addr, bytes, cache)
|
|
79 |
#endif
|
|
80 |
|
66 | 81 |
|
67 | 82 |
#include <errno.h>
|
68 | 83 |
#include <limits.h>
|
|
73 | 88 |
#include <string.h>
|
74 | 89 |
#include <time.h>
|
75 | 90 |
#include <unistd.h>
|
|
91 |
|
|
92 |
#if defined(__sun)
|
|
93 |
/* Most platforms have posix_memalign, older may only have memalign */
|
|
94 |
#define HAVE_MEMALIGN 1
|
|
95 |
#include <malloc.h>
|
|
96 |
#endif
|
76 | 97 |
|
77 | 98 |
#if !(defined(BYTE_ORDER) || defined(__BYTE_ORDER))
|
78 | 99 |
#include <netinet/in.h>
|
|
144 | 165 |
# error "Two's complement, reasonably sized integer types, please"
|
145 | 166 |
#endif
|
146 | 167 |
|
147 | |
/** @defgroup internal MDB Internals
|
|
168 |
#ifdef __GNUC__
|
|
169 |
/** Put infrequently used env functions in separate section */
|
|
170 |
# ifdef __APPLE__
|
|
171 |
# define ESECT __attribute__ ((section("__TEXT,text_env")))
|
|
172 |
# else
|
|
173 |
# define ESECT __attribute__ ((section("text_env")))
|
|
174 |
# endif
|
|
175 |
#else
|
|
176 |
#define ESECT
|
|
177 |
#endif
|
|
178 |
|
|
179 |
/** @defgroup internal LMDB Internals
|
148 | 180 |
* @{
|
149 | 181 |
*/
|
150 | 182 |
/** @defgroup compat Compatibility Macros
|
|
155 | 187 |
* @{
|
156 | 188 |
*/
|
157 | 189 |
|
|
190 |
/** Features under development */
|
|
191 |
#ifndef MDB_DEVEL
|
|
192 |
#define MDB_DEVEL 0
|
|
193 |
#endif
|
|
194 |
|
158 | 195 |
/** Wrapper around __func__, which is a C99 feature */
|
159 | 196 |
#if __STDC_VERSION__ >= 199901L
|
160 | 197 |
# define mdb_func_ __func__
|
|
168 | 205 |
#ifdef _WIN32
|
169 | 206 |
#define MDB_USE_HASH 1
|
170 | 207 |
#define MDB_PIDLOCK 0
|
171 | |
#define pthread_t DWORD
|
|
208 |
#define THREAD_RET DWORD
|
|
209 |
#define pthread_t HANDLE
|
172 | 210 |
#define pthread_mutex_t HANDLE
|
|
211 |
#define pthread_cond_t HANDLE
|
173 | 212 |
#define pthread_key_t DWORD
|
174 | 213 |
#define pthread_self() GetCurrentThreadId()
|
175 | 214 |
#define pthread_key_create(x,y) \
|
|
177 | 216 |
#define pthread_key_delete(x) TlsFree(x)
|
178 | 217 |
#define pthread_getspecific(x) TlsGetValue(x)
|
179 | 218 |
#define pthread_setspecific(x,y) (TlsSetValue(x,y) ? 0 : ErrCode())
|
180 | |
#define pthread_mutex_unlock(x) ReleaseMutex(x)
|
181 | |
#define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE)
|
182 | |
#define LOCK_MUTEX_R(env) pthread_mutex_lock((env)->me_rmutex)
|
183 | |
#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock((env)->me_rmutex)
|
184 | |
#define LOCK_MUTEX_W(env) pthread_mutex_lock((env)->me_wmutex)
|
185 | |
#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock((env)->me_wmutex)
|
|
219 |
#define pthread_mutex_unlock(x) ReleaseMutex(*x)
|
|
220 |
#define pthread_mutex_lock(x) WaitForSingleObject(*x, INFINITE)
|
|
221 |
#define pthread_cond_signal(x) SetEvent(*x)
|
|
222 |
#define pthread_cond_wait(cond,mutex) do{SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE);}while(0)
|
|
223 |
#define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL)
|
|
224 |
#define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE)
|
|
225 |
#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_rmutex)
|
|
226 |
#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_rmutex)
|
|
227 |
#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_wmutex)
|
|
228 |
#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_wmutex)
|
186 | 229 |
#define getpid() GetCurrentProcessId()
|
187 | 230 |
#define MDB_FDATASYNC(fd) (!FlushFileBuffers(fd))
|
188 | 231 |
#define MDB_MSYNC(addr,len,flags) (!FlushViewOfFile(addr,len))
|
|
197 | 240 |
#endif
|
198 | 241 |
#define Z "I"
|
199 | 242 |
#else
|
200 | |
|
|
243 |
#define THREAD_RET void *
|
|
244 |
#define THREAD_CREATE(thr,start,arg) pthread_create(&thr,NULL,start,arg)
|
|
245 |
#define THREAD_FINISH(thr) pthread_join(thr,NULL)
|
201 | 246 |
#define Z "z" /**< printf format modifier for size_t */
|
202 | 247 |
|
203 | 248 |
/** For MDB_LOCK_FORMAT: True if readers take a pid lock in the lockfile */
|
|
351 | 396 |
|
352 | 397 |
/** @brief The maximum size of a database page.
|
353 | 398 |
*
|
354 | |
* This is 32k, since it must fit in #MDB_page.#mp_upper.
|
|
399 |
* It is 32k or 64k, since value-PAGEBASE must fit in
|
|
400 |
* #MDB_page.%mp_upper.
|
355 | 401 |
*
|
356 | 402 |
* LMDB will use database pages < OS pages if needed.
|
357 | 403 |
* That causes more I/O in write transactions: The OS must
|
|
364 | 410 |
* pressure from other processes is high. So until OSs have
|
365 | 411 |
* actual paging support for Huge pages, they're not viable.
|
366 | 412 |
*/
|
367 | |
#define MAX_PAGESIZE 0x8000
|
|
413 |
#define MAX_PAGESIZE (PAGEBASE ? 0x10000 : 0x8000)
|
368 | 414 |
|
369 | 415 |
/** The minimum number of keys required in a database page.
|
370 | 416 |
* Setting this to a larger value will place a smaller bound on the
|
|
380 | 426 |
*/
|
381 | 427 |
#define MDB_MINKEYS 2
|
382 | 428 |
|
383 | |
/** A stamp that identifies a file as an MDB file.
|
|
429 |
/** A stamp that identifies a file as an LMDB file.
|
384 | 430 |
* There's nothing special about this value other than that it is easily
|
385 | 431 |
* recognizable, and it will reflect any byte order mismatches.
|
386 | 432 |
*/
|
387 | 433 |
#define MDB_MAGIC 0xBEEFC0DE
|
388 | 434 |
|
389 | 435 |
/** The version number for a database's datafile format. */
|
390 | |
#define MDB_DATA_VERSION 1
|
|
436 |
#define MDB_DATA_VERSION ((MDB_DEVEL) ? 999 : 1)
|
391 | 437 |
/** The version number for a database's lockfile format. */
|
392 | 438 |
#define MDB_LOCK_VERSION 1
|
393 | 439 |
|
|
396 | 442 |
* Define this as 0 to compute the max from the page size. 511
|
397 | 443 |
* is default for backwards compat: liblmdb <= 0.9.10 can break
|
398 | 444 |
* when modifying a DB with keys/dupsort data bigger than its max.
|
|
445 |
* #MDB_DEVEL sets the default to 0.
|
399 | 446 |
*
|
400 | 447 |
* Data items in an #MDB_DUPSORT database are also limited to
|
401 | 448 |
* this size, since they're actually keys of a sub-DB. Keys and
|
402 | 449 |
* #MDB_DUPSORT data items must fit on a node in a regular page.
|
403 | 450 |
*/
|
404 | 451 |
#ifndef MDB_MAXKEYSIZE
|
405 | |
#define MDB_MAXKEYSIZE 511
|
|
452 |
#define MDB_MAXKEYSIZE ((MDB_DEVEL) ? 0 : 511)
|
406 | 453 |
#endif
|
407 | 454 |
|
408 | 455 |
/** The maximum size of a key we can write to the environment. */
|
|
536 | 583 |
/** The process ID of the process owning this reader txn. */
|
537 | 584 |
MDB_PID_T mrb_pid;
|
538 | 585 |
/** The thread ID of the thread owning this txn. */
|
539 | |
pthread_t mrb_tid;
|
|
586 |
MDB_THR_T mrb_tid;
|
540 | 587 |
} MDB_rxbody;
|
541 | 588 |
|
542 | 589 |
/** The actual reader record, with cacheline padding. */
|
|
567 | 614 |
* unlikely. If a collision occurs, the results are unpredictable.
|
568 | 615 |
*/
|
569 | 616 |
typedef struct MDB_txbody {
|
570 | |
/** Stamp identifying this as an MDB file. It must be set
|
|
617 |
/** Stamp identifying this as an LMDB file. It must be set
|
571 | 618 |
* to #MDB_MAGIC. */
|
572 | 619 |
uint32_t mtb_magic;
|
573 | 620 |
/** Format of this lock file. Must be set to #MDB_LOCK_FORMAT. */
|
|
634 | 681 |
#define mp_next mp_p.p_next
|
635 | 682 |
union {
|
636 | 683 |
pgno_t p_pgno; /**< page number */
|
637 | |
void * p_next; /**< for in-memory list of freed structs */
|
|
684 |
struct MDB_page *p_next; /**< for in-memory list of freed pages */
|
638 | 685 |
} mp_p;
|
639 | 686 |
uint16_t mp_pad;
|
640 | 687 |
/** @defgroup mdb_page Page Flags
|
|
649 | 696 |
#define P_DIRTY 0x10 /**< dirty page, also set for #P_SUBP pages */
|
650 | 697 |
#define P_LEAF2 0x20 /**< for #MDB_DUPFIXED records */
|
651 | 698 |
#define P_SUBP 0x40 /**< for #MDB_DUPSORT sub-pages */
|
|
699 |
#define P_LOOSE 0x4000 /**< page was dirtied then freed, can be reused */
|
652 | 700 |
#define P_KEEP 0x8000 /**< leave this page alone during spill */
|
653 | 701 |
/** @} */
|
654 | 702 |
uint16_t mp_flags; /**< @ref mdb_page */
|
|
671 | 719 |
/** Address of first usable data byte in a page, after the header */
|
672 | 720 |
#define METADATA(p) ((void *)((char *)(p) + PAGEHDRSZ))
|
673 | 721 |
|
|
722 |
/** ITS#7713, change PAGEBASE to handle 65536 byte pages */
|
|
723 |
#define PAGEBASE ((MDB_DEVEL) ? PAGEHDRSZ : 0)
|
|
724 |
|
674 | 725 |
/** Number of nodes on a page */
|
675 | |
#define NUMKEYS(p) (((p)->mp_lower - PAGEHDRSZ) >> 1)
|
|
726 |
#define NUMKEYS(p) (((p)->mp_lower - (PAGEHDRSZ-PAGEBASE)) >> 1)
|
676 | 727 |
|
677 | 728 |
/** The amount of space remaining in the page */
|
678 | 729 |
#define SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower)
|
|
698 | 749 |
|
699 | 750 |
/** The number of overflow pages needed to store the given size. */
|
700 | 751 |
#define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1)
|
|
752 |
|
|
753 |
/** Link in #MDB_txn.%mt_loose_pgs list */
|
|
754 |
#define NEXT_LOOSE_PAGE(p) (*(MDB_page **)((p) + 2))
|
701 | 755 |
|
702 | 756 |
/** Header for a single key/data pair within a page.
|
703 | 757 |
* Used in pages of type #P_BRANCH and #P_LEAF without #P_LEAF2.
|
|
750 | 804 |
#define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size)
|
751 | 805 |
|
752 | 806 |
/** Address of node \b i in page \b p */
|
753 | |
#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i]))
|
|
807 |
#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i] + PAGEBASE))
|
754 | 808 |
|
755 | 809 |
/** Address of the key for the node */
|
756 | 810 |
#define NODEKEY(node) (void *)((node)->mn_data)
|
|
840 | 894 |
* Pages 0-1 are meta pages. Transaction N writes meta page #(N % 2).
|
841 | 895 |
*/
|
842 | 896 |
typedef struct MDB_meta {
|
843 | |
/** Stamp identifying this as an MDB file. It must be set
|
|
897 |
/** Stamp identifying this as an LMDB file. It must be set
|
844 | 898 |
* to #MDB_MAGIC. */
|
845 | 899 |
uint32_t mm_magic;
|
846 | 900 |
/** Version number of this lock file. Must be set to #MDB_DATA_VERSION. */
|
|
897 | 951 |
/** The list of pages that became unused during this transaction.
|
898 | 952 |
*/
|
899 | 953 |
MDB_IDL mt_free_pgs;
|
|
954 |
/** The list of loose pages that became unused and may be reused
|
|
955 |
* in this transaction, linked through #NEXT_LOOSE_PAGE(page).
|
|
956 |
*/
|
|
957 |
MDB_page *mt_loose_pgs;
|
|
958 |
/* #Number of loose pages (#mt_loose_pgs) */
|
|
959 |
int mt_loose_count;
|
900 | 960 |
/** The sorted list of dirty pages we temporarily wrote to disk
|
901 | 961 |
* because the dirty list was full. page numbers in here are
|
902 | 962 |
* shifted left by 1, deleted slots have the LSB set.
|
|
912 | 972 |
MDB_dbx *mt_dbxs;
|
913 | 973 |
/** Array of MDB_db records for each known DB */
|
914 | 974 |
MDB_db *mt_dbs;
|
|
975 |
/** Array of sequence numbers for each DB handle */
|
|
976 |
unsigned int *mt_dbiseqs;
|
915 | 977 |
/** @defgroup mt_dbflag Transaction DB Flags
|
916 | 978 |
* @ingroup internal
|
917 | 979 |
* @{
|
|
935 | 997 |
* @{
|
936 | 998 |
*/
|
937 | 999 |
#define MDB_TXN_RDONLY 0x01 /**< read-only transaction */
|
938 | |
#define MDB_TXN_ERROR 0x02 /**< an error has occurred */
|
|
1000 |
#define MDB_TXN_ERROR 0x02 /**< txn is unusable after an error */
|
939 | 1001 |
#define MDB_TXN_DIRTY 0x04 /**< must write, even if dirty list is empty */
|
940 | 1002 |
#define MDB_TXN_SPILLS 0x08 /**< txn or a parent has spilled pages */
|
941 | 1003 |
/** @} */
|
942 | 1004 |
unsigned int mt_flags; /**< @ref mdb_txn */
|
943 | |
/** dirty_list room: Array size - #dirty pages visible to this txn.
|
|
1005 |
/** #dirty_list room: Array size - \#dirty pages visible to this txn.
|
944 | 1006 |
* Includes ancestor txns' dirty pages not hidden by other txns'
|
945 | 1007 |
* dirty/spilled pages. Thus commit(nested txn) has room to merge
|
946 | 1008 |
* dirty_list into mt_parent after freeing hidden mt_parent pages.
|
|
1033 | 1095 |
#define MDB_ENV_ACTIVE 0x20000000U
|
1034 | 1096 |
/** me_txkey is set */
|
1035 | 1097 |
#define MDB_ENV_TXKEY 0x10000000U
|
1036 | |
/** Have liveness lock in reader table */
|
1037 | |
#define MDB_LIVE_READER 0x08000000U
|
1038 | 1098 |
uint32_t me_flags; /**< @ref mdb_env */
|
1039 | 1099 |
unsigned int me_psize; /**< DB page size, inited from me_os_psize */
|
1040 | 1100 |
unsigned int me_os_psize; /**< OS page size, from #GET_PAGESIZE */
|
|
1049 | 1109 |
MDB_meta *me_metas[2]; /**< pointers to the two meta pages */
|
1050 | 1110 |
void *me_pbuf; /**< scratch area for DUPSORT put() */
|
1051 | 1111 |
MDB_txn *me_txn; /**< current write transaction */
|
|
1112 |
MDB_txn *me_txn0; /**< prealloc'd write transaction */
|
1052 | 1113 |
size_t me_mapsize; /**< size of the data memory map */
|
1053 | 1114 |
off_t me_size; /**< current file size */
|
1054 | 1115 |
pgno_t me_maxpg; /**< me_mapsize / me_psize */
|
1055 | 1116 |
MDB_dbx *me_dbxs; /**< array of static DB info */
|
1056 | 1117 |
uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */
|
|
1118 |
unsigned int *me_dbiseqs; /**< array of dbi sequence numbers */
|
1057 | 1119 |
pthread_key_t me_txkey; /**< thread-key for readers */
|
|
1120 |
txnid_t me_pgoldest; /**< ID of oldest reader last time we looked */
|
1058 | 1121 |
MDB_pgstate me_pgstate; /**< state of old pages from freeDB */
|
1059 | 1122 |
# define me_pglast me_pgstate.mf_pglast
|
1060 | 1123 |
# define me_pghead me_pgstate.mf_pghead
|
|
1070 | 1133 |
#if !(MDB_MAXKEYSIZE)
|
1071 | 1134 |
unsigned int me_maxkey; /**< max size of a key */
|
1072 | 1135 |
#endif
|
|
1136 |
int me_live_reader; /**< have liveness lock in reader table */
|
1073 | 1137 |
#ifdef _WIN32
|
1074 | 1138 |
int me_pidquery; /**< Used in OpenProcess */
|
1075 | 1139 |
HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */
|
|
1095 | 1159 |
#define MDB_COMMIT_PAGES IOV_MAX
|
1096 | 1160 |
#endif
|
1097 | 1161 |
|
1098 | |
/* max bytes to write in one call */
|
|
1162 |
/** max bytes to write in one call */
|
1099 | 1163 |
#define MAX_WRITE (0x80000000U >> (sizeof(ssize_t) == 4))
|
|
1164 |
|
|
1165 |
/** Check \b txn and \b dbi arguments to a function */
|
|
1166 |
#define TXN_DBI_EXIST(txn, dbi) \
|
|
1167 |
((txn) && (dbi) < (txn)->mt_numdbs && ((txn)->mt_dbflags[dbi] & DB_VALID))
|
|
1168 |
|
|
1169 |
/** Check for misused \b dbi handles */
|
|
1170 |
#define TXN_DBI_CHANGED(txn, dbi) \
|
|
1171 |
((txn)->mt_dbiseqs[dbi] != (txn)->mt_env->me_dbiseqs[dbi])
|
1100 | 1172 |
|
1101 | 1173 |
static int mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp);
|
1102 | 1174 |
static int mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp);
|
|
1141 | 1213 |
static void mdb_cursor_pop(MDB_cursor *mc);
|
1142 | 1214 |
static int mdb_cursor_push(MDB_cursor *mc, MDB_page *mp);
|
1143 | 1215 |
|
1144 | |
static int mdb_cursor_del0(MDB_cursor *mc, MDB_node *leaf);
|
|
1216 |
static int mdb_cursor_del0(MDB_cursor *mc);
|
|
1217 |
static int mdb_del0(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, unsigned flags);
|
1145 | 1218 |
static int mdb_cursor_sibling(MDB_cursor *mc, int move_right);
|
1146 | 1219 |
static int mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op);
|
1147 | 1220 |
static int mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op);
|
|
1177 | 1250 |
return MDB_VERSION_STRING;
|
1178 | 1251 |
}
|
1179 | 1252 |
|
1180 | |
/** Table of descriptions for MDB @ref errors */
|
|
1253 |
/** Table of descriptions for LMDB @ref errors */
|
1181 | 1254 |
static char *const mdb_errstr[] = {
|
1182 | 1255 |
"MDB_KEYEXIST: Key/data pair already exists",
|
1183 | 1256 |
"MDB_NOTFOUND: No matching key/data pair found",
|
|
1185 | 1258 |
"MDB_CORRUPTED: Located page was wrong type",
|
1186 | 1259 |
"MDB_PANIC: Update of meta page failed",
|
1187 | 1260 |
"MDB_VERSION_MISMATCH: Database environment version mismatch",
|
1188 | |
"MDB_INVALID: File is not an MDB file",
|
|
1261 |
"MDB_INVALID: File is not an LMDB file",
|
1189 | 1262 |
"MDB_MAP_FULL: Environment mapsize limit reached",
|
1190 | 1263 |
"MDB_DBS_FULL: Environment maxdbs limit reached",
|
1191 | 1264 |
"MDB_READERS_FULL: Environment maxreaders limit reached",
|
|
1197 | 1270 |
"MDB_INCOMPATIBLE: Operation and DB incompatible, or DB flags changed",
|
1198 | 1271 |
"MDB_BAD_RSLOT: Invalid reuse of reader locktable slot",
|
1199 | 1272 |
"MDB_BAD_TXN: Transaction cannot recover - it must be aborted",
|
1200 | |
"MDB_BAD_VALSIZE: Too big key/data, key is empty, or wrong DUPFIXED size",
|
|
1273 |
"MDB_BAD_VALSIZE: Unsupported size of key/DB name/data, or wrong DUPFIXED size",
|
|
1274 |
"MDB_BAD_DBI: The specified DBI handle was closed/changed unexpectedly",
|
1201 | 1275 |
};
|
1202 | 1276 |
|
1203 | 1277 |
char *
|
1204 | 1278 |
mdb_strerror(int err)
|
1205 | 1279 |
{
|
|
1280 |
#ifdef _WIN32
|
|
1281 |
/** HACK: pad 4KB on stack over the buf. Return system msgs in buf.
|
|
1282 |
* This works as long as no function between the call to mdb_strerror
|
|
1283 |
* and the actual use of the message uses more than 4K of stack.
|
|
1284 |
*/
|
|
1285 |
char pad[4096];
|
|
1286 |
char buf[1024], *ptr = buf;
|
|
1287 |
#endif
|
1206 | 1288 |
int i;
|
1207 | 1289 |
if (!err)
|
1208 | 1290 |
return ("Successful return: 0");
|
|
1212 | 1294 |
return mdb_errstr[i];
|
1213 | 1295 |
}
|
1214 | 1296 |
|
|
1297 |
#ifdef _WIN32
|
|
1298 |
/* These are the C-runtime error codes we use. The comment indicates
|
|
1299 |
* their numeric value, and the Win32 error they would correspond to
|
|
1300 |
* if the error actually came from a Win32 API. A major mess, we should
|
|
1301 |
* have used LMDB-specific error codes for everything.
|
|
1302 |
*/
|
|
1303 |
switch(err) {
|
|
1304 |
case ENOENT: /* 2, FILE_NOT_FOUND */
|
|
1305 |
case EIO: /* 5, ACCESS_DENIED */
|
|
1306 |
case ENOMEM: /* 12, INVALID_ACCESS */
|
|
1307 |
case EACCES: /* 13, INVALID_DATA */
|
|
1308 |
case EBUSY: /* 16, CURRENT_DIRECTORY */
|
|
1309 |
case EINVAL: /* 22, BAD_COMMAND */
|
|
1310 |
case ENOSPC: /* 28, OUT_OF_PAPER */
|
|
1311 |
return strerror(err);
|
|
1312 |
default:
|
|
1313 |
;
|
|
1314 |
}
|
|
1315 |
buf[0] = 0;
|
|
1316 |
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM |
|
|
1317 |
FORMAT_MESSAGE_IGNORE_INSERTS,
|
|
1318 |
NULL, err, 0, ptr, sizeof(buf), pad);
|
|
1319 |
return ptr;
|
|
1320 |
#else
|
1215 | 1321 |
return strerror(err);
|
|
1322 |
#endif
|
1216 | 1323 |
}
|
1217 | 1324 |
|
1218 | 1325 |
/** assert(3) variant in cursor context */
|
|
1282 | 1389 |
return buf;
|
1283 | 1390 |
}
|
1284 | 1391 |
|
|
1392 |
static const char *
|
|
1393 |
mdb_leafnode_type(MDB_node *n)
|
|
1394 |
{
|
|
1395 |
static char *const tp[2][2] = {{"", ": DB"}, {": sub-page", ": sub-DB"}};
|
|
1396 |
return F_ISSET(n->mn_flags, F_BIGDATA) ? ": overflow page" :
|
|
1397 |
tp[F_ISSET(n->mn_flags, F_DUPDATA)][F_ISSET(n->mn_flags, F_SUBDATA)];
|
|
1398 |
}
|
|
1399 |
|
1285 | 1400 |
/** Display all the keys in the page. */
|
1286 | 1401 |
void
|
1287 | 1402 |
mdb_page_list(MDB_page *mp)
|
1288 | 1403 |
{
|
|
1404 |
pgno_t pgno = mdb_dbg_pgno(mp);
|
|
1405 |
const char *type, *state = (mp->mp_flags & P_DIRTY) ? ", dirty" : "";
|
1289 | 1406 |
MDB_node *node;
|
1290 | 1407 |
unsigned int i, nkeys, nsize, total = 0;
|
1291 | 1408 |
MDB_val key;
|
1292 | 1409 |
DKBUF;
|
1293 | 1410 |
|
|
1411 |
switch (mp->mp_flags & (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP)) {
|
|
1412 |
case P_BRANCH: type = "Branch page"; break;
|
|
1413 |
case P_LEAF: type = "Leaf page"; break;
|
|
1414 |
case P_LEAF|P_SUBP: type = "Sub-page"; break;
|
|
1415 |
case P_LEAF|P_LEAF2: type = "LEAF2 page"; break;
|
|
1416 |
case P_LEAF|P_LEAF2|P_SUBP: type = "LEAF2 sub-page"; break;
|
|
1417 |
case P_OVERFLOW:
|
|
1418 |
fprintf(stderr, "Overflow page %"Z"u pages %u%s\n",
|
|
1419 |
pgno, mp->mp_pages, state);
|
|
1420 |
return;
|
|
1421 |
case P_META:
|
|
1422 |
fprintf(stderr, "Meta-page %"Z"u txnid %"Z"u\n",
|
|
1423 |
pgno, ((MDB_meta *)METADATA(mp))->mm_txnid);
|
|
1424 |
return;
|
|
1425 |
default:
|
|
1426 |
fprintf(stderr, "Bad page %"Z"u flags 0x%u\n", pgno, mp->mp_flags);
|
|
1427 |
return;
|
|
1428 |
}
|
|
1429 |
|
1294 | 1430 |
nkeys = NUMKEYS(mp);
|
1295 | |
fprintf(stderr, "Page %"Z"u numkeys %d\n", mdb_dbg_pgno(mp), nkeys);
|
|
1431 |
fprintf(stderr, "%s %"Z"u numkeys %d%s\n", type, pgno, nkeys, state);
|
|
1432 |
|
1296 | 1433 |
for (i=0; i<nkeys; i++) {
|
|
1434 |
if (IS_LEAF2(mp)) { /* LEAF2 pages have no mp_ptrs[] or node headers */
|
|
1435 |
key.mv_size = nsize = mp->mp_pad;
|
|
1436 |
key.mv_data = LEAF2KEY(mp, i, nsize);
|
|
1437 |
total += nsize;
|
|
1438 |
fprintf(stderr, "key %d: nsize %d, %s\n", i, nsize, DKEY(&key));
|
|
1439 |
continue;
|
|
1440 |
}
|
1297 | 1441 |
node = NODEPTR(mp, i);
|
1298 | 1442 |
key.mv_size = node->mn_ksize;
|
1299 | 1443 |
key.mv_data = node->mn_data;
|
|
1309 | 1453 |
nsize += NODEDSZ(node);
|
1310 | 1454 |
total += nsize;
|
1311 | 1455 |
nsize += sizeof(indx_t);
|
1312 | |
fprintf(stderr, "key %d: nsize %d, %s\n", i, nsize, DKEY(&key));
|
|
1456 |
fprintf(stderr, "key %d: nsize %d, %s%s\n",
|
|
1457 |
i, nsize, DKEY(&key), mdb_leafnode_type(node));
|
1313 | 1458 |
}
|
1314 | 1459 |
total = EVEN(total);
|
1315 | 1460 |
}
|
1316 | |
fprintf(stderr, "Total: %d\n", total);
|
|
1461 |
fprintf(stderr, "Total: header %d + contents %d + unused %d\n",
|
|
1462 |
IS_LEAF2(mp) ? PAGEHDRSZ : PAGEBASE + mp->mp_lower, total, SIZELEFT(mp));
|
1317 | 1463 |
}
|
1318 | 1464 |
|
1319 | 1465 |
void
|
|
1339 | 1485 |
/** Count all the pages in each DB and in the freelist
|
1340 | 1486 |
* and make sure it matches the actual number of pages
|
1341 | 1487 |
* being used.
|
|
1488 |
* All named DBs must be open for a correct count.
|
1342 | 1489 |
*/
|
1343 | 1490 |
static void mdb_audit(MDB_txn *txn)
|
1344 | 1491 |
{
|
|
1352 | 1499 |
mdb_cursor_init(&mc, txn, FREE_DBI, NULL);
|
1353 | 1500 |
while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0)
|
1354 | 1501 |
freecount += *(MDB_ID *)data.mv_data;
|
|
1502 |
mdb_tassert(txn, rc == MDB_NOTFOUND);
|
1355 | 1503 |
|
1356 | 1504 |
count = 0;
|
1357 | 1505 |
for (i = 0; i<txn->mt_numdbs; i++) {
|
1358 | 1506 |
MDB_xcursor mx;
|
|
1507 |
if (!(txn->mt_dbflags[i] & DB_VALID))
|
|
1508 |
continue;
|
1359 | 1509 |
mdb_cursor_init(&mc, txn, i, &mx);
|
1360 | 1510 |
if (txn->mt_dbs[i].md_root == P_INVALID)
|
1361 | 1511 |
continue;
|
|
1363 | 1513 |
txn->mt_dbs[i].md_leaf_pages +
|
1364 | 1514 |
txn->mt_dbs[i].md_overflow_pages;
|
1365 | 1515 |
if (txn->mt_dbs[i].md_flags & MDB_DUPSORT) {
|
1366 | |
mdb_page_search(&mc, NULL, MDB_PS_FIRST);
|
1367 | |
do {
|
|
1516 |
rc = mdb_page_search(&mc, NULL, MDB_PS_FIRST);
|
|
1517 |
for (; rc == MDB_SUCCESS; rc = mdb_cursor_sibling(&mc, 1)) {
|
1368 | 1518 |
unsigned j;
|
1369 | 1519 |
MDB_page *mp;
|
1370 | 1520 |
mp = mc.mc_pg[mc.mc_top];
|
|
1378 | 1528 |
}
|
1379 | 1529 |
}
|
1380 | 1530 |
}
|
1381 | |
while (mdb_cursor_sibling(&mc, 1) == 0);
|
|
1531 |
mdb_tassert(txn, rc == MDB_NOTFOUND);
|
1382 | 1532 |
}
|
1383 | 1533 |
}
|
1384 | 1534 |
if (freecount + count + 2 /* metapages */ != txn->mt_next_pgno) {
|
|
1437 | 1587 |
}
|
1438 | 1588 |
return ret;
|
1439 | 1589 |
}
|
1440 | |
|
1441 | 1590 |
/** Free a single page.
|
1442 | 1591 |
* Saves single pages to a list, for future reuse.
|
1443 | 1592 |
* (This is not used for multi-page overflow pages.)
|
|
1475 | 1624 |
mdb_dpage_free(env, dl[i].mptr);
|
1476 | 1625 |
}
|
1477 | 1626 |
dl[0].mid = 0;
|
|
1627 |
}
|
|
1628 |
|
|
1629 |
/** Loosen or free a single page.
|
|
1630 |
* Saves single pages to a list for future reuse
|
|
1631 |
* in this same txn. It has been pulled from the freeDB
|
|
1632 |
* and already resides on the dirty list, but has been
|
|
1633 |
* deleted. Use these pages first before pulling again
|
|
1634 |
* from the freeDB.
|
|
1635 |
*
|
|
1636 |
* If the page wasn't dirtied in this txn, just add it
|
|
1637 |
* to this txn's free list.
|
|
1638 |
*/
|
|
1639 |
static int
|
|
1640 |
mdb_page_loose(MDB_cursor *mc, MDB_page *mp)
|
|
1641 |
{
|
|
1642 |
int loose = 0;
|
|
1643 |
pgno_t pgno = mp->mp_pgno;
|
|
1644 |
MDB_txn *txn = mc->mc_txn;
|
|
1645 |
|
|
1646 |
if ((mp->mp_flags & P_DIRTY) && mc->mc_dbi != FREE_DBI) {
|
|
1647 |
if (txn->mt_parent) {
|
|
1648 |
MDB_ID2 *dl = txn->mt_u.dirty_list;
|
|
1649 |
/* If txn has a parent, make sure the page is in our
|
|
1650 |
* dirty list.
|
|
1651 |
*/
|
|
1652 |
if (dl[0].mid) {
|
|
1653 |
unsigned x = mdb_mid2l_search(dl, pgno);
|
|
1654 |
if (x <= dl[0].mid && dl[x].mid == pgno) {
|
|
1655 |
if (mp != dl[x].mptr) { /* bad cursor? */
|
|
1656 |
mc->mc_flags &= ~(C_INITIALIZED|C_EOF);
|
|
1657 |
txn->mt_flags |= MDB_TXN_ERROR;
|
|
1658 |
return MDB_CORRUPTED;
|
|
1659 |
}
|
|
1660 |
/* ok, it's ours */
|
|
1661 |
loose = 1;
|
|
1662 |
}
|
|
1663 |
}
|
|
1664 |
} else {
|
|
1665 |
/* no parent txn, so it's just ours */
|
|
1666 |
loose = 1;
|
|
1667 |
}
|
|
1668 |
}
|
|
1669 |
if (loose) {
|
|
1670 |
DPRINTF(("loosen db %d page %"Z"u", DDBI(mc),
|
|
1671 |
mp->mp_pgno));
|
|
1672 |
NEXT_LOOSE_PAGE(mp) = txn->mt_loose_pgs;
|
|
1673 |
txn->mt_loose_pgs = mp;
|
|
1674 |
txn->mt_loose_count++;
|
|
1675 |
mp->mp_flags |= P_LOOSE;
|
|
1676 |
} else {
|
|
1677 |
int rc = mdb_midl_append(&txn->mt_free_pgs, pgno);
|
|
1678 |
if (rc)
|
|
1679 |
return rc;
|
|
1680 |
}
|
|
1681 |
|
|
1682 |
return MDB_SUCCESS;
|
1478 | 1683 |
}
|
1479 | 1684 |
|
1480 | 1685 |
/** Set or clear P_KEEP in dirty, non-overflow, non-sub pages watched by txn.
|
|
1487 | 1692 |
static int
|
1488 | 1693 |
mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all)
|
1489 | 1694 |
{
|
1490 | |
enum { Mask = P_SUBP|P_DIRTY|P_KEEP };
|
|
1695 |
enum { Mask = P_SUBP|P_DIRTY|P_LOOSE|P_KEEP };
|
1491 | 1696 |
MDB_txn *txn = mc->mc_txn;
|
1492 | 1697 |
MDB_cursor *m3;
|
1493 | 1698 |
MDB_xcursor *mx;
|
|
1638 | 1843 |
for (i=dl[0].mid; i && need; i--) {
|
1639 | 1844 |
MDB_ID pn = dl[i].mid << 1;
|
1640 | 1845 |
dp = dl[i].mptr;
|
1641 | |
if (dp->mp_flags & P_KEEP)
|
|
1846 |
if (dp->mp_flags & (P_LOOSE|P_KEEP))
|
1642 | 1847 |
continue;
|
1643 | 1848 |
/* Can't spill twice, make sure it's not already in a parent's
|
1644 | 1849 |
* spill list.
|
|
1742 | 1947 |
#else
|
1743 | 1948 |
enum { Paranoid = 0, Max_retries = INT_MAX /*infinite*/ };
|
1744 | 1949 |
#endif
|
1745 | |
int rc, retry = Max_retries;
|
|
1950 |
int rc, retry = num * 60;
|
1746 | 1951 |
MDB_txn *txn = mc->mc_txn;
|
1747 | 1952 |
MDB_env *env = txn->mt_env;
|
1748 | 1953 |
pgno_t pgno, *mop = env->me_pghead;
|
1749 | |
unsigned i, j, k, mop_len = mop ? mop[0] : 0, n2 = num-1;
|
|
1954 |
unsigned i, j, mop_len = mop ? mop[0] : 0, n2 = num-1;
|
1750 | 1955 |
MDB_page *np;
|
1751 | 1956 |
txnid_t oldest = 0, last;
|
1752 | 1957 |
MDB_cursor_op op;
|
1753 | 1958 |
MDB_cursor m2;
|
|
1959 |
int found_old = 0;
|
|
1960 |
|
|
1961 |
/* If there are any loose pages, just use them */
|
|
1962 |
if (num == 1 && txn->mt_loose_pgs) {
|
|
1963 |
np = txn->mt_loose_pgs;
|
|
1964 |
txn->mt_loose_pgs = NEXT_LOOSE_PAGE(np);
|
|
1965 |
txn->mt_loose_count--;
|
|
1966 |
DPRINTF(("db %d use loose page %"Z"u", DDBI(mc),
|
|
1967 |
np->mp_pgno));
|
|
1968 |
*mp = np;
|
|
1969 |
return MDB_SUCCESS;
|
|
1970 |
}
|
1754 | 1971 |
|
1755 | 1972 |
*mp = NULL;
|
1756 | 1973 |
|
|
1763 | 1980 |
for (op = MDB_FIRST;; op = MDB_NEXT) {
|
1764 | 1981 |
MDB_val key, data;
|
1765 | 1982 |
MDB_node *leaf;
|
1766 | |
pgno_t *idl, old_id, new_id;
|
|
1983 |
pgno_t *idl;
|
1767 | 1984 |
|
1768 | 1985 |
/* Seek a big enough contiguous page range. Prefer
|
1769 | 1986 |
* pages at the tail, just truncating the list.
|
|
1775 | 1992 |
if (mop[i-n2] == pgno+n2)
|
1776 | 1993 |
goto search_done;
|
1777 | 1994 |
} while (--i > n2);
|
1778 | |
if (Max_retries < INT_MAX && --retry < 0)
|
|
1995 |
if (--retry < 0)
|
1779 | 1996 |
break;
|
1780 | 1997 |
}
|
1781 | 1998 |
|
1782 | 1999 |
if (op == MDB_FIRST) { /* 1st iteration */
|
1783 | 2000 |
/* Prepare to fetch more and coalesce */
|
1784 | |
oldest = mdb_find_oldest(txn);
|
1785 | 2001 |
last = env->me_pglast;
|
|
2002 |
oldest = env->me_pgoldest;
|
1786 | 2003 |
mdb_cursor_init(&m2, txn, FREE_DBI, NULL);
|
1787 | 2004 |
if (last) {
|
1788 | 2005 |
op = MDB_SET_RANGE;
|
|
1797 | 2014 |
|
1798 | 2015 |
last++;
|
1799 | 2016 |
/* Do not fetch more if the record will be too recent */
|
1800 | |
if (oldest <= last)
|
1801 | |
break;
|
|
2017 |
if (oldest <= last) {
|
|
2018 |
if (!found_old) {
|
|
2019 |
oldest = mdb_find_oldest(txn);
|
|
2020 |
env->me_pgoldest = oldest;
|
|
2021 |
found_old = 1;
|
|
2022 |
}
|
|
2023 |
if (oldest <= last)
|
|
2024 |
break;
|
|
2025 |
}
|
1802 | 2026 |
rc = mdb_cursor_get(&m2, &key, NULL, op);
|
1803 | 2027 |
if (rc) {
|
1804 | 2028 |
if (rc == MDB_NOTFOUND)
|
|
1806 | 2030 |
goto fail;
|
1807 | 2031 |
}
|
1808 | 2032 |
last = *(txnid_t*)key.mv_data;
|
1809 | |
if (oldest <= last)
|
1810 | |
break;
|
|
2033 |
if (oldest <= last) {
|
|
2034 |
if (!found_old) {
|
|
2035 |
oldest = mdb_find_oldest(txn);
|
|
2036 |
env->me_pgoldest = oldest;
|
|
2037 |
found_old = 1;
|
|
2038 |
}
|
|
2039 |
if (oldest <= last)
|
|
2040 |
break;
|
|
2041 |
}
|
1811 | 2042 |
np = m2.mc_pg[m2.mc_top];
|
1812 | 2043 |
leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]);
|
1813 | 2044 |
if ((rc = mdb_node_read(txn, leaf, &data)) != MDB_SUCCESS)
|
|
1829 | 2060 |
#if (MDB_DEBUG) > 1
|
1830 | 2061 |
DPRINTF(("IDL read txn %"Z"u root %"Z"u num %u",
|
1831 | 2062 |
last, txn->mt_dbs[FREE_DBI].md_root, i));
|
1832 | |
for (k = i; k; k--)
|
1833 | |
DPRINTF(("IDL %"Z"u", idl[k]));
|
|
2063 |
for (j = i; j; j--)
|
|
2064 |
DPRINTF(("IDL %"Z"u", idl[j]));
|
1834 | 2065 |
#endif
|
1835 | 2066 |
/* Merge in descending sorted order */
|
1836 | |
j = mop_len;
|
1837 | |
k = mop_len += i;
|
1838 | |
mop[0] = (pgno_t)-1;
|
1839 | |
old_id = mop[j];
|
1840 | |
while (i) {
|
1841 | |
new_id = idl[i--];
|
1842 | |
for (; old_id < new_id; old_id = mop[--j])
|
1843 | |
mop[k--] = old_id;
|
1844 | |
mop[k--] = new_id;
|
1845 | |
}
|
1846 | |
mop[0] = mop_len;
|
|
2067 |
mdb_midl_xmerge(mop, idl);
|
|
2068 |
mop_len = mop[0];
|
1847 | 2069 |
}
|
1848 | 2070 |
|
1849 | 2071 |
/* Use new pages from the map when nothing suitable in the freeDB */
|
|
1898 | 2120 |
* alignment so memcpy may copy words instead of bytes.
|
1899 | 2121 |
*/
|
1900 | 2122 |
if ((unused &= -Align) && !IS_LEAF2(src)) {
|
1901 | |
upper &= -Align;
|
1902 | |
memcpy(dst, src, (lower + (Align-1)) & -Align);
|
|
2123 |
upper = (upper + PAGEBASE) & -Align;
|
|
2124 |
memcpy(dst, src, (lower + PAGEBASE + (Align-1)) & -Align);
|
1903 | 2125 |
memcpy((pgno_t *)((char *)dst+upper), (pgno_t *)((char *)src+upper),
|
1904 | 2126 |
psize - upper);
|
1905 | 2127 |
} else {
|
|
2058 | 2280 |
if (m2->mc_pg[mc->mc_top] == mp) {
|
2059 | 2281 |
m2->mc_pg[mc->mc_top] = np;
|
2060 | 2282 |
if ((mc->mc_db->md_flags & MDB_DUPSORT) &&
|
|
2283 |
IS_LEAF(np) &&
|
2061 | 2284 |
m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top])
|
2062 | 2285 |
{
|
2063 | 2286 |
MDB_node *leaf = NODEPTR(np, mc->mc_ki[mc->mc_top]);
|
|
2265 | 2488 |
return MDB_BAD_RSLOT;
|
2266 | 2489 |
} else {
|
2267 | 2490 |
MDB_PID_T pid = env->me_pid;
|
2268 | |
pthread_t tid = pthread_self();
|
2269 | |
|
2270 | |
if (!(env->me_flags & MDB_LIVE_READER)) {
|
|
2491 |
MDB_THR_T tid = pthread_self();
|
|
2492 |
|
|
2493 |
if (!env->me_live_reader) {
|
2271 | 2494 |
rc = mdb_reader_pid(env, Pidset, pid);
|
2272 | 2495 |
if (rc)
|
2273 | 2496 |
return rc;
|
2274 | |
env->me_flags |= MDB_LIVE_READER;
|
|
2497 |
env->me_live_reader = 1;
|
2275 | 2498 |
}
|
2276 | 2499 |
|
2277 | 2500 |
LOCK_MUTEX_R(env);
|
|
2324 | 2547 |
txn->mt_free_pgs[0] = 0;
|
2325 | 2548 |
txn->mt_spill_pgs = NULL;
|
2326 | 2549 |
env->me_txn = txn;
|
|
2550 |
memcpy(txn->mt_dbiseqs, env->me_dbiseqs, env->me_maxdbs * sizeof(unsigned int));
|
2327 | 2551 |
}
|
2328 | 2552 |
|
2329 | 2553 |
/* Copy the DB info and flags */
|
|
2398 | 2622 |
tsize = sizeof(MDB_ntxn);
|
2399 | 2623 |
}
|
2400 | 2624 |
size = tsize + env->me_maxdbs * (sizeof(MDB_db)+1);
|
2401 | |
if (!(flags & MDB_RDONLY))
|
|
2625 |
if (!(flags & MDB_RDONLY)) {
|
|
2626 |
if (!parent) {
|
|
2627 |
txn = env->me_txn0;
|
|
2628 |
txn->mt_flags = 0;
|
|
2629 |
goto ok;
|
|
2630 |
}
|
2402 | 2631 |
size += env->me_maxdbs * sizeof(MDB_cursor *);
|
|
2632 |
/* child txns use parent's dbiseqs */
|
|
2633 |
if (!parent)
|
|
2634 |
size += env->me_maxdbs * sizeof(unsigned int);
|
|
2635 |
}
|
2403 | 2636 |
|
2404 | 2637 |
if ((txn = calloc(1, size)) == NULL) {
|
2405 | |
DPRINTF(("calloc: %s", strerror(ErrCode())));
|
|
2638 |
DPRINTF(("calloc: %s", strerror(errno)));
|
2406 | 2639 |
return ENOMEM;
|
2407 | 2640 |
}
|
2408 | 2641 |
txn->mt_dbs = (MDB_db *) ((char *)txn + tsize);
|
2409 | 2642 |
if (flags & MDB_RDONLY) {
|
2410 | 2643 |
txn->mt_flags |= MDB_TXN_RDONLY;
|
2411 | 2644 |
txn->mt_dbflags = (unsigned char *)(txn->mt_dbs + env->me_maxdbs);
|
|
2645 |
txn->mt_dbiseqs = env->me_dbiseqs;
|
2412 | 2646 |
} else {
|
2413 | 2647 |
txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs);
|
2414 | |
txn->mt_dbflags = (unsigned char *)(txn->mt_cursors + env->me_maxdbs);
|
|
2648 |
if (parent) {
|
|
2649 |
txn->mt_dbiseqs = parent->mt_dbiseqs;
|
|
2650 |
txn->mt_dbflags = (unsigned char *)(txn->mt_cursors + env->me_maxdbs);
|
|
2651 |
} else {
|
|
2652 |
txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs);
|
|
2653 |
txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs);
|
|
2654 |
}
|
2415 | 2655 |
}
|
2416 | 2656 |
txn->mt_env = env;
|
2417 | 2657 |
|
|
2658 |
ok:
|
2418 | 2659 |
if (parent) {
|
2419 | 2660 |
unsigned int i;
|
2420 | 2661 |
txn->mt_u.dirty_list = malloc(sizeof(MDB_ID2)*MDB_IDL_UM_SIZE);
|
|
2457 | 2698 |
} else {
|
2458 | 2699 |
rc = mdb_txn_renew0(txn);
|
2459 | 2700 |
}
|
2460 | |
if (rc)
|
2461 | |
free(txn);
|
2462 | |
else {
|
|
2701 |
if (rc) {
|
|
2702 |
if (txn != env->me_txn0)
|
|
2703 |
free(txn);
|
|
2704 |
} else {
|
2463 | 2705 |
*ret = txn;
|
2464 | 2706 |
DPRINTF(("begin txn %"Z"u%c %p on mdbenv %p, root page %"Z"u",
|
2465 | 2707 |
txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
|
|
2491 | 2733 |
env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID;
|
2492 | 2734 |
} else {
|
2493 | 2735 |
char *ptr = env->me_dbxs[i].md_name.mv_data;
|
2494 | |
env->me_dbxs[i].md_name.mv_data = NULL;
|
2495 | |
env->me_dbxs[i].md_name.mv_size = 0;
|
2496 | |
env->me_dbflags[i] = 0;
|
2497 | |
free(ptr);
|
|
2736 |
if (ptr) {
|
|
2737 |
env->me_dbxs[i].md_name.mv_data = NULL;
|
|
2738 |
env->me_dbxs[i].md_name.mv_size = 0;
|
|
2739 |
env->me_dbflags[i] = 0;
|
|
2740 |
env->me_dbiseqs[i]++;
|
|
2741 |
free(ptr);
|
|
2742 |
}
|
2498 | 2743 |
}
|
2499 | 2744 |
}
|
2500 | 2745 |
}
|
|
2583 | 2828 |
if ((txn->mt_flags & MDB_TXN_RDONLY) && txn->mt_u.reader)
|
2584 | 2829 |
txn->mt_u.reader->mr_pid = 0;
|
2585 | 2830 |
|
2586 | |
free(txn);
|
|
2831 |
if (txn != txn->mt_env->me_txn0)
|
|
2832 |
free(txn);
|
2587 | 2833 |
}
|
2588 | 2834 |
|
2589 | 2835 |
/** Save the freelist as of this transaction to the freeDB.
|
|
2610 | 2856 |
rc = mdb_page_search(&mc, NULL, MDB_PS_FIRST|MDB_PS_MODIFY);
|
2611 | 2857 |
if (rc && rc != MDB_NOTFOUND)
|
2612 | 2858 |
return rc;
|
|
2859 |
}
|
|
2860 |
|
|
2861 |
if (!env->me_pghead && txn->mt_loose_pgs) {
|
|
2862 |
/* Put loose page numbers in mt_free_pgs, since
|
|
2863 |
* we may be unable to return them to me_pghead.
|
|
2864 |
*/
|
|
2865 |
MDB_page *mp = txn->mt_loose_pgs;
|
|
2866 |
if ((rc = mdb_midl_need(&txn->mt_free_pgs, txn->mt_loose_count)) != 0)
|
|
2867 |
return rc;
|
|
2868 |
for (; mp; mp = NEXT_LOOSE_PAGE(mp))
|
|
2869 |
mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno);
|
|
2870 |
txn->mt_loose_pgs = NULL;
|
|
2871 |
txn->mt_loose_count = 0;
|
2613 | 2872 |
}
|
2614 | 2873 |
|
2615 | 2874 |
/* MDB_RESERVE cancels meminit in ovpage malloc (when no WRITEMAP) */
|
|
2673 | 2932 |
}
|
2674 | 2933 |
|
2675 | 2934 |
mop = env->me_pghead;
|
2676 | |
mop_len = mop ? mop[0] : 0;
|
|
2935 |
mop_len = (mop ? mop[0] : 0) + txn->mt_loose_count;
|
2677 | 2936 |
|
2678 | 2937 |
/* Reserve records for me_pghead[]. Split it if multi-page,
|
2679 | 2938 |
* to avoid searching freeDB for a page range. Use keys in
|
|
2713 | 2972 |
total_room += head_room;
|
2714 | 2973 |
}
|
2715 | 2974 |
|
|
2975 |
/* Return loose page numbers to me_pghead, though usually none are
|
|
2976 |
* left at this point. The pages themselves remain in dirty_list.
|
|
2977 |
*/
|
|
2978 |
if (txn->mt_loose_pgs) {
|
|
2979 |
MDB_page *mp = txn->mt_loose_pgs;
|
|
2980 |
unsigned count = txn->mt_loose_count;
|
|
2981 |
MDB_IDL loose;
|
|
2982 |
/* Room for loose pages + temp IDL with same */
|
|
2983 |
if ((rc = mdb_midl_need(&env->me_pghead, 2*count+1)) != 0)
|
|
2984 |
return rc;
|
|
2985 |
mop = env->me_pghead;
|
|
2986 |
loose = mop + MDB_IDL_ALLOCLEN(mop) - count;
|
|
2987 |
for (count = 0; mp; mp = NEXT_LOOSE_PAGE(mp))
|
|
2988 |
loose[ ++count ] = mp->mp_pgno;
|
|
2989 |
loose[0] = count;
|
|
2990 |
mdb_midl_sort(loose);
|
|
2991 |
mdb_midl_xmerge(mop, loose);
|
|
2992 |
txn->mt_loose_pgs = NULL;
|
|
2993 |
txn->mt_loose_count = 0;
|
|
2994 |
mop_len = mop[0];
|
|
2995 |
}
|
|
2996 |
|
2716 | 2997 |
/* Fill in the reserved me_pghead records */
|
2717 | 2998 |
rc = MDB_SUCCESS;
|
2718 | 2999 |
if (mop_len) {
|
|
2721 | 3002 |
mop += mop_len;
|
2722 | 3003 |
rc = mdb_cursor_first(&mc, &key, &data);
|
2723 | 3004 |
for (; !rc; rc = mdb_cursor_next(&mc, &key, &data, MDB_NEXT)) {
|
2724 | |
unsigned flags = MDB_CURRENT;
|
2725 | 3005 |
txnid_t id = *(txnid_t *)key.mv_data;
|
2726 | 3006 |
ssize_t len = (ssize_t)(data.mv_size / sizeof(MDB_ID)) - 1;
|
2727 | 3007 |
MDB_ID save;
|
|
2731 | 3011 |
if (len > mop_len) {
|
2732 | 3012 |
len = mop_len;
|
2733 | 3013 |
data.mv_size = (len + 1) * sizeof(MDB_ID);
|
2734 | |
flags = 0;
|
2735 | 3014 |
}
|
2736 | 3015 |
data.mv_data = mop -= len;
|
2737 | 3016 |
save = mop[0];
|
2738 | 3017 |
mop[0] = len;
|
2739 | |
rc = mdb_cursor_put(&mc, &key, &data, flags);
|
|
3018 |
rc = mdb_cursor_put(&mc, &key, &data, MDB_CURRENT);
|
2740 | 3019 |
mop[0] = save;
|
2741 | 3020 |
if (rc || !(mop_len -= len))
|
2742 | 3021 |
break;
|
|
2776 | 3055 |
while (++i <= pagecount) {
|
2777 | 3056 |
dp = dl[i].mptr;
|
2778 | 3057 |
/* Don't flush this page yet */
|
2779 | |
if (dp->mp_flags & P_KEEP) {
|
2780 | |
dp->mp_flags ^= P_KEEP;
|
|
3058 |
if (dp->mp_flags & (P_LOOSE|P_KEEP)) {
|
|
3059 |
dp->mp_flags &= ~P_KEEP;
|
2781 | 3060 |
dl[++j] = dl[i];
|
2782 | 3061 |
continue;
|
2783 | 3062 |
}
|
|
2791 | 3070 |
if (++i <= pagecount) {
|
2792 | 3071 |
dp = dl[i].mptr;
|
2793 | 3072 |
/* Don't flush this page yet */
|
2794 | |
if (dp->mp_flags & P_KEEP) {
|
2795 | |
dp->mp_flags ^= P_KEEP;
|
|
3073 |
if (dp->mp_flags & (P_LOOSE|P_KEEP)) {
|
|
3074 |
dp->mp_flags &= ~P_KEEP;
|
2796 | 3075 |
dl[i].mid = 0;
|
2797 | 3076 |
continue;
|
2798 | 3077 |
}
|
|
2867 | 3146 |
#endif /* _WIN32 */
|
2868 | 3147 |
}
|
2869 | 3148 |
|
|
3149 |
/* MIPS has cache coherency issues, this is a no-op everywhere else
|
|
3150 |
* Note: for any size >= on-chip cache size, entire on-chip cache is
|
|
3151 |
* flushed.
|
|
3152 |
*/
|
|
3153 |
CACHEFLUSH(env->me_map, txn->mt_next_pgno * env->me_psize, DCACHE);
|
|
3154 |
|
2870 | 3155 |
for (i = keep; ++i <= pagecount; ) {
|
2871 | 3156 |
dp = dl[i].mptr;
|
2872 | 3157 |
/* This is a page we skipped above */
|
|
2921 | 3206 |
|
2922 | 3207 |
if (txn->mt_parent) {
|
2923 | 3208 |
MDB_txn *parent = txn->mt_parent;
|
|
3209 |
MDB_page **lp;
|
2924 | 3210 |
MDB_ID2L dst, src;
|
2925 | 3211 |
MDB_IDL pspill;
|
2926 | 3212 |
unsigned x, y, len, ps_len;
|
|
3018 | 3304 |
}
|
3019 | 3305 |
}
|
3020 | 3306 |
|
|
3307 |
/* Append our loose page list to parent's */
|
|
3308 |
for (lp = &parent->mt_loose_pgs; *lp; lp = &NEXT_LOOSE_PAGE(lp))
|
|
3309 |
;
|
|
3310 |
*lp = txn->mt_loose_pgs;
|
|
3311 |
parent->mt_loose_count += txn->mt_loose_count;
|
|
3312 |
|
3021 | 3313 |
parent->mt_child = NULL;
|
3022 | 3314 |
mdb_midl_free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pghead);
|
3023 | 3315 |
free(txn);
|
|
3049 | 3341 |
mdb_cursor_init(&mc, txn, MAIN_DBI, NULL);
|
3050 | 3342 |
for (i = 2; i < txn->mt_numdbs; i++) {
|
3051 | 3343 |
if (txn->mt_dbflags[i] & DB_DIRTY) {
|
|
3344 |
if (TXN_DBI_CHANGED(txn, i)) {
|
|
3345 |
rc = MDB_BAD_DBI;
|
|
3346 |
goto fail;
|
|
3347 |
}
|
3052 | 3348 |
data.mv_data = &txn->mt_dbs[i];
|
3053 | 3349 |
rc = mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data, 0);
|
3054 | 3350 |
if (rc)
|
|
3075 | 3371 |
(rc = mdb_env_write_meta(txn)))
|
3076 | 3372 |
goto fail;
|
3077 | 3373 |
|
|
3374 |
/* Free P_LOOSE pages left behind in dirty_list */
|
|
3375 |
if (!(env->me_flags & MDB_WRITEMAP))
|
|
3376 |
mdb_dlist_free(txn);
|
|
3377 |
|
3078 | 3378 |
done:
|
3079 | 3379 |
env->me_pglast = 0;
|
3080 | 3380 |
env->me_txn = NULL;
|
|
3082 | 3382 |
|
3083 | 3383 |
if (env->me_txns)
|
3084 | 3384 |
UNLOCK_MUTEX_W(env);
|
3085 | |
free(txn);
|
|
3385 |
if (txn != env->me_txn0)
|
|
3386 |
free(txn);
|
3086 | 3387 |
|
3087 | 3388 |
return MDB_SUCCESS;
|
3088 | 3389 |
|
|
3097 | 3398 |
* @param[out] meta address of where to store the meta information
|
3098 | 3399 |
* @return 0 on success, non-zero on failure.
|
3099 | 3400 |
*/
|
3100 | |
static int
|
|
3401 |
static int ESECT
|
3101 | 3402 |
mdb_env_read_header(MDB_env *env, MDB_meta *meta)
|
3102 | 3403 |
{
|
3103 | 3404 |
MDB_metabuf pbuf;
|
|
3155 | 3456 |
return 0;
|
3156 | 3457 |
}
|
3157 | 3458 |
|
|
3459 |
static void ESECT
|
|
3460 |
mdb_env_init_meta0(MDB_env *env, MDB_meta *meta)
|
|
3461 |
{
|
|
3462 |
meta->mm_magic = MDB_MAGIC;
|
|
3463 |
meta->mm_version = MDB_DATA_VERSION;
|
|
3464 |
meta->mm_mapsize = env->me_mapsize;
|
|
3465 |
meta->mm_psize = env->me_psize;
|
|
3466 |
meta->mm_last_pg = 1;
|
|
3467 |
meta->mm_flags = env->me_flags & 0xffff;
|
|
3468 |
meta->mm_flags |= MDB_INTEGERKEY;
|
|
3469 |
meta->mm_dbs[0].md_root = P_INVALID;
|
|
3470 |
meta->mm_dbs[1].md_root = P_INVALID;
|
|
3471 |
}
|
|
3472 |
|
3158 | 3473 |
/** Write the environment parameters of a freshly created DB environment.
|
3159 | 3474 |
* @param[in] env the environment handle
|
3160 | 3475 |
* @param[out] meta address of where to store the meta information
|
3161 | 3476 |
* @return 0 on success, non-zero on failure.
|
3162 | 3477 |
*/
|
3163 | |
static int
|
|
3478 |
static int ESECT
|
3164 | 3479 |
mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
|
3165 | 3480 |
{
|
3166 | 3481 |
MDB_page *p, *q;
|
|
3184 | 3499 |
|
3185 | 3500 |
psize = env->me_psize;
|
3186 | 3501 |
|
3187 | |
meta->mm_magic = MDB_MAGIC;
|
3188 | |
meta->mm_version = MDB_DATA_VERSION;
|
3189 | |
meta->mm_mapsize = env->me_mapsize;
|
3190 | |
meta->mm_psize = psize;
|
3191 | |
meta->mm_last_pg = 1;
|
3192 | |
meta->mm_flags = env->me_flags & 0xffff;
|
3193 | |
meta->mm_flags |= MDB_INTEGERKEY;
|
3194 | |
meta->mm_dbs[0].md_root = P_INVALID;
|
3195 | |
meta->mm_dbs[1].md_root = P_INVALID;
|
|
3502 |
mdb_env_init_meta0(env, meta);
|
3196 | 3503 |
|
3197 | 3504 |
p = calloc(2, psize);
|
3198 | 3505 |
p->mp_pgno = 0;
|
|
3224 | 3531 |
{
|
3225 | 3532 |
MDB_env *env;
|
3226 | 3533 |
MDB_meta meta, metab, *mp;
|
|
3534 |
size_t mapsize;
|
3227 | 3535 |
off_t off;
|
3228 | 3536 |
int rc, len, toggle;
|
3229 | 3537 |
char *ptr;
|
|
3240 | 3548 |
|
3241 | 3549 |
env = txn->mt_env;
|
3242 | 3550 |
mp = env->me_metas[toggle];
|
|
3551 |
mapsize = env->me_metas[toggle ^ 1]->mm_mapsize;
|
|
3552 |
/* Persist any increases of mapsize config */
|
|
3553 |
if (mapsize < env->me_mapsize)
|
|
3554 |
mapsize = env->me_mapsize;
|
3243 | 3555 |
|
3244 | 3556 |
if (env->me_flags & MDB_WRITEMAP) {
|
3245 | |
/* Persist any increases of mapsize config */
|
3246 | |
if (env->me_mapsize > mp->mm_mapsize)
|
3247 | |
mp->mm_mapsize = env->me_mapsize;
|
|
3557 |
mp->mm_mapsize = mapsize;
|
3248 | 3558 |
mp->mm_dbs[0] = txn->mt_dbs[0];
|
3249 | 3559 |
mp->mm_dbs[1] = txn->mt_dbs[1];
|
3250 | 3560 |
mp->mm_last_pg = txn->mt_next_pgno - 1;
|
|
3271 | 3581 |
metab.mm_txnid = env->me_metas[toggle]->mm_txnid;
|
3272 | 3582 |
metab.mm_last_pg = env->me_metas[toggle]->mm_last_pg;
|
3273 | 3583 |
|
3274 | |
ptr = (char *)&meta;
|
3275 | |
if (env->me_mapsize > mp->mm_mapsize) {
|
3276 | |
/* Persist any increases of mapsize config */
|
3277 | |
meta.mm_mapsize = env->me_mapsize;
|
3278 | |
off = offsetof(MDB_meta, mm_mapsize);
|
3279 | |
} else {
|
3280 | |
off = offsetof(MDB_meta, mm_dbs[0].md_depth);
|
3281 | |
}
|
3282 | |
len = sizeof(MDB_meta) - off;
|
3283 | |
|
3284 | |
ptr += off;
|
|
3584 |
meta.mm_mapsize = mapsize;
|
3285 | 3585 |
meta.mm_dbs[0] = txn->mt_dbs[0];
|
3286 | 3586 |
meta.mm_dbs[1] = txn->mt_dbs[1];
|
3287 | 3587 |
meta.mm_last_pg = txn->mt_next_pgno - 1;
|
3288 | 3588 |
meta.mm_txnid = txn->mt_txnid;
|
3289 | 3589 |
|
|
3590 |
off = offsetof(MDB_meta, mm_mapsize);
|
|
3591 |
ptr = (char *)&meta + off;
|
|
3592 |
len = sizeof(MDB_meta) - off;
|
3290 | 3593 |
if (toggle)
|
3291 | 3594 |
off += env->me_psize;
|
3292 | 3595 |
off += PAGEHDRSZ;
|
|
3325 | 3628 |
env->me_flags |= MDB_FATAL_ERROR;
|
3326 | 3629 |
return rc;
|
3327 | 3630 |
}
|
|
3631 |
/* MIPS has cache coherency issues, this is a no-op everywhere else */
|
|
3632 |
CACHEFLUSH(env->me_map + off, len, DCACHE);
|
3328 | 3633 |
done:
|
3329 | 3634 |
/* Memory ordering issues are irrelevant; since the entire writer
|
3330 | 3635 |
* is wrapped by wmutex, all of these changes will become visible
|
|
3348 | 3653 |
return (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid);
|
3349 | 3654 |
}
|
3350 | 3655 |
|
3351 | |
int
|
|
3656 |
int ESECT
|
3352 | 3657 |
mdb_env_create(MDB_env **env)
|
3353 | 3658 |
{
|
3354 | 3659 |
MDB_env *e;
|
|
3373 | 3678 |
return MDB_SUCCESS;
|
3374 | 3679 |
}
|
3375 | 3680 |
|
3376 | |
static int
|
3377 | |
mdb_env_map(MDB_env *env, void *addr, int newsize)
|
|
3681 |
static int ESECT
|
|
3682 |
mdb_env_map(MDB_env *env, void *addr)
|
3378 | 3683 |
{
|
3379 | 3684 |
MDB_page *p;
|
3380 | 3685 |
unsigned int flags = env->me_flags;
|
|
3382 | 3687 |
int rc;
|
3383 | 3688 |
HANDLE mh;
|
3384 | 3689 |
LONG sizelo, sizehi;
|
3385 | |
sizelo = env->me_mapsize & 0xffffffff;
|
3386 | |
sizehi = env->me_mapsize >> 16 >> 16; /* only needed on Win64 */
|
3387 | |
|
3388 | |
/* Windows won't create mappings for zero length files.
|
3389 | |
* Just allocate the maxsize right now.
|
3390 | |
*/
|
3391 | |
if (newsize) {
|
|
3690 |
size_t msize;
|
|
3691 |
|
|
3692 |
if (flags & MDB_RDONLY) {
|
|
3693 |
/* Don't set explicit map size, use whatever exists */
|
|
3694 |
msize = 0;
|
|
3695 |
sizelo = 0;
|
|
3696 |
sizehi = 0;
|
|
3697 |
} else {
|
|
3698 |
msize = env->me_mapsize;
|
|
3699 |
sizelo = msize & 0xffffffff;
|
|
3700 |
sizehi = msize >> 16 >> 16; /* only needed on Win64 */
|
|
3701 |
|
|
3702 |
/* Windows won't create mappings for zero length files.
|
|
3703 |
* and won't map more than the file size.
|
|
3704 |
* Just set the maxsize right now.
|
|
3705 |
*/
|
3392 | 3706 |
if (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo
|
3393 | 3707 |
|| !SetEndOfFile(env->me_fd)
|
3394 | 3708 |
|| SetFilePointer(env->me_fd, 0, NULL, 0) != 0)
|
3395 | 3709 |
return ErrCode();
|
3396 | 3710 |
}
|
|
3711 |
|
3397 | 3712 |
mh = CreateFileMapping(env->me_fd, NULL, flags & MDB_WRITEMAP ?
|
3398 | 3713 |
PAGE_READWRITE : PAGE_READONLY,
|
3399 | 3714 |
sizehi, sizelo, NULL);
|
|
3401 | 3716 |
return ErrCode();
|
3402 | 3717 |
env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ?
|
3403 | 3718 |
FILE_MAP_WRITE : FILE_MAP_READ,
|
3404 | |
0, 0, env->me_mapsize, addr);
|
|
3719 |
0, 0, msize, addr);
|
3405 | 3720 |
rc = env->me_map ? 0 : ErrCode();
|
3406 | 3721 |
CloseHandle(mh);
|
3407 | 3722 |
if (rc)
|
|
3447 | 3762 |
return MDB_SUCCESS;
|
3448 | 3763 |
}
|
3449 | 3764 |
|
3450 | |
int
|
|
3765 |
int ESECT
|
3451 | 3766 |
mdb_env_set_mapsize(MDB_env *env, size_t size)
|
3452 | 3767 |
{
|
3453 | 3768 |
/* If env is already open, caller is responsible for making
|
|
3471 | 3786 |
munmap(env->me_map, env->me_mapsize);
|
3472 | 3787 |
env->me_mapsize = size;
|
3473 | 3788 |
old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL;
|
3474 | |
rc = mdb_env_map(env, old, 1);
|
|
3789 |
rc = mdb_env_map(env, old);
|
3475 | 3790 |
if (rc)
|
3476 | 3791 |
return rc;
|
3477 | 3792 |
}
|
|
3481 | 3796 |
return MDB_SUCCESS;
|
3482 | 3797 |
}
|
3483 | 3798 |
|
3484 | |
int
|
|
3799 |
int ESECT
|
3485 | 3800 |
mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs)
|
3486 | 3801 |
{
|
3487 | 3802 |
if (env->me_map)
|
|
3490 | 3805 |
return MDB_SUCCESS;
|
3491 | 3806 |
}
|
3492 | 3807 |
|
3493 | |
int
|
|
3808 |
int ESECT
|
3494 | 3809 |
mdb_env_set_maxreaders(MDB_env *env, unsigned int readers)
|
3495 | 3810 |
{
|
3496 | 3811 |
if (env->me_map || readers < 1)
|
|
3499 | 3814 |
return MDB_SUCCESS;
|
3500 | 3815 |
}
|
3501 | 3816 |
|
3502 | |
int
|
|
3817 |
int ESECT
|
3503 | 3818 |
mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers)
|
3504 | 3819 |
{
|
3505 | 3820 |
if (!env || !readers)
|
|
3508 | 3823 |
return MDB_SUCCESS;
|
3509 | 3824 |
}
|
3510 | 3825 |
|
3511 | |
/** Further setup required for opening an MDB environment
|
|
3826 |
/** Further setup required for opening an LMDB environment
|
3512 | 3827 |
*/
|
3513 | |
static int
|
|
3828 |
static int ESECT
|
3514 | 3829 |
mdb_env_open2(MDB_env *env)
|
3515 | 3830 |
{
|
3516 | 3831 |
unsigned int flags = env->me_flags;
|
|
3555 | 3870 |
env->me_mapsize = minsize;
|
3556 | 3871 |
}
|
3557 | 3872 |
|
3558 | |
rc = mdb_env_map(env, meta.mm_address, newenv || env->me_mapsize != meta.mm_mapsize);
|
|
3873 |
rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta.mm_address : NULL);
|
3559 | 3874 |
if (rc)
|
3560 | 3875 |
return rc;
|
3561 | 3876 |
|
|
3630 | 3945 |
case DLL_THREAD_DETACH:
|
3631 | 3946 |
for (i=0; i<mdb_tls_nkeys; i++) {
|
3632 | 3947 |
MDB_reader *r = pthread_getspecific(mdb_tls_keys[i]);
|
3633 | |
mdb_env_reader_dest(r);
|
|
3948 |
if (r) {
|
|
3949 |
mdb_env_reader_dest(r);
|
|
3950 |
}
|
3634 | 3951 |
}
|
3635 | 3952 |
break;
|
3636 | 3953 |
case DLL_PROCESS_DETACH: break;
|
|
3665 | 3982 |
#endif
|
3666 | 3983 |
|
3667 | 3984 |
/** Downgrade the exclusive lock on the region back to shared */
|
3668 | |
static int
|
|
3985 |
static int ESECT
|
3669 | 3986 |
mdb_env_share_locks(MDB_env *env, int *excl)
|
3670 | 3987 |
{
|
3671 | 3988 |
int rc = 0, toggle = mdb_env_pick_meta(env);
|
|
3707 | 4024 |
/** Try to get exlusive lock, otherwise shared.
|
3708 | 4025 |
* Maintain *excl = -1: no/unknown lock, 0: shared, 1: exclusive.
|
3709 | 4026 |
*/
|
3710 | |
static int
|
|
4027 |
static int ESECT
|
3711 | 4028 |
mdb_env_excl_lock(MDB_env *env, int *excl)
|
3712 | 4029 |
{
|
3713 | 4030 |
int rc = 0;
|
|
3842 | 4159 |
#endif
|
3843 | 4160 |
|
3844 | 4161 |
/** Open and/or initialize the lock region for the environment.
|
3845 | |
* @param[in] env The MDB environment.
|
|
4162 |
* @param[in] env The LMDB environment.
|
3846 | 4163 |
* @param[in] lpath The pathname of the file used for the lock region.
|
3847 | 4164 |
* @param[in] mode The Unix permissions for the file, if we create it.
|
3848 | 4165 |
* @param[out] excl Resulting file lock type: -1 none, 0 shared, 1 exclusive
|
3849 | 4166 |
* @param[in,out] excl In -1, out lock type: -1 none, 0 shared, 1 exclusive
|
3850 | 4167 |
* @return 0 on success, non-zero on failure.
|
3851 | 4168 |
*/
|
3852 | |
static int
|
|
4169 |
static int ESECT
|
3853 | 4170 |
mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
|
3854 | 4171 |
{
|
3855 | 4172 |
#ifdef _WIN32
|
|
4079 | 4396 |
# error "Persistent DB flags & env flags overlap, but both go in mm_flags"
|
4080 | 4397 |
#endif
|
4081 | 4398 |
|
4082 | |
int
|
|
4399 |
int ESECT
|
4083 | 4400 |
mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode)
|
4084 | 4401 |
{
|
4085 | 4402 |
int oflags, rc, len, excl = -1;
|
|
4124 | 4441 |
env->me_path = strdup(path);
|
4125 | 4442 |
env->me_dbxs = calloc(env->me_maxdbs, sizeof(MDB_dbx));
|
4126 | 4443 |
env->me_dbflags = calloc(env->me_maxdbs, sizeof(uint16_t));
|
4127 | |
if (!(env->me_dbxs && env->me_path && env->me_dbflags)) {
|
|
4444 |
env->me_dbiseqs = calloc(env->me_maxdbs, sizeof(unsigned int));
|
|
4445 |
if (!(env->me_dbxs && env->me_path && env->me_dbflags && env->me_dbiseqs)) {
|
4128 | 4446 |
rc = ENOMEM;
|
4129 | 4447 |
goto leave;
|
4130 | 4448 |
}
|
|
4196 | 4514 |
if (!((flags & MDB_RDONLY) ||
|
4197 | 4515 |
(env->me_pbuf = calloc(1, env->me_psize))))
|
4198 | 4516 |
rc = ENOMEM;
|
|
4517 |
if (!(flags & MDB_RDONLY)) {
|
|
4518 |
MDB_txn *txn;
|
|
4519 |
int tsize = sizeof(MDB_txn), size = tsize + env->me_maxdbs *
|
|
4520 |
(sizeof(MDB_db)+sizeof(MDB_cursor)+sizeof(unsigned int)+1);
|
|
4521 |
txn = calloc(1, size);
|
|
4522 |
if (txn) {
|
|
4523 |
txn->mt_dbs = (MDB_db *)((char *)txn + tsize);
|
|
4524 |
txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs);
|
|
4525 |
txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs);
|
|
4526 |
txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs);
|
|
4527 |
txn->mt_env = env;
|
|
4528 |
env->me_txn0 = txn;
|
|
4529 |
} else {
|
|
4530 |
rc = ENOMEM;
|
|
4531 |
}
|
|
4532 |
}
|
4199 | 4533 |
}
|
4200 | 4534 |
|
4201 | 4535 |
leave:
|
|
4207 | 4541 |
}
|
4208 | 4542 |
|
4209 | 4543 |
/** Destroy resources from mdb_env_open(), clear our readers & DBIs */
|
4210 | |
static void
|
|
4544 |
static void ESECT
|
4211 | 4545 |
mdb_env_close0(MDB_env *env, int excl)
|
4212 | 4546 |
{
|
4213 | 4547 |
int i;
|
|
4220 | 4554 |
free(env->me_dbxs[i].md_name.mv_data);
|
4221 | 4555 |
|
4222 | 4556 |
free(env->me_pbuf);
|
|
4557 |
free(env->me_dbiseqs);
|
4223 | 4558 |
free(env->me_dbflags);
|
4224 | 4559 |
free(env->me_dbxs);
|
4225 | 4560 |
free(env->me_path);
|
4226 | 4561 |
free(env->me_dirty_list);
|
|
4562 |
free(env->me_txn0);
|
4227 | 4563 |
mdb_midl_free(env->me_free_pgs);
|
4228 | 4564 |
|
4229 | 4565 |
if (env->me_flags & MDB_ENV_TXKEY) {
|
|
4295 | 4631 |
env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY);
|
4296 | 4632 |
}
|
4297 | 4633 |
|
4298 | |
int
|
4299 | |
mdb_env_copyfd(MDB_env *env, HANDLE fd)
|
4300 | |
{
|
4301 | |
MDB_txn *txn = NULL;
|
4302 | |
int rc;
|
4303 | |
size_t wsize;
|
4304 | |
char *ptr;
|
4305 | |
#ifdef _WIN32
|
4306 | |
DWORD len, w2;
|
4307 | |
#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL)
|
4308 | |
#else
|
4309 | |
ssize_t len;
|
4310 | |
size_t w2;
|
4311 | |
#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0)
|
4312 | |
#endif
|
4313 | |
|
4314 | |
/* Do the lock/unlock of the reader mutex before starting the
|
4315 | |
* write txn. Otherwise other read txns could block writers.
|
4316 | |
*/
|
4317 | |
rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
|
4318 | |
if (rc)
|
4319 | |
return rc;
|
4320 | |
|
4321 | |
if (env->me_txns) {
|
4322 | |
/* We must start the actual read txn after blocking writers */
|
4323 | |
mdb_txn_reset0(txn, "reset-stage1");
|
4324 | |
|
4325 | |
/* Temporarily block writers until we snapshot the meta pages */
|
4326 | |
LOCK_MUTEX_W(env);
|
4327 | |
|
4328 | |
rc = mdb_txn_renew0(txn);
|
4329 | |
if (rc) {
|
4330 | |
UNLOCK_MUTEX_W(env);
|
4331 | |
goto leave;
|
4332 | |
}
|
4333 | |
}
|
4334 | |
|
4335 | |
wsize = env->me_psize * 2;
|
4336 | |
ptr = env->me_map;
|
4337 | |
w2 = wsize;
|
4338 | |
while (w2 > 0) {
|
4339 | |
DO_WRITE(rc, fd, ptr, w2, len);
|
4340 | |
if (!rc) {
|
4341 | |
rc = ErrCode();
|
4342 | |
break;
|
4343 | |
} else if (len > 0) {
|
4344 | |
rc = MDB_SUCCESS;
|
4345 | |
ptr += len;
|
4346 | |
w2 -= len;
|
4347 | |
continue;
|
4348 | |
} else {
|
4349 | |
/* Non-blocking or async handles are not supported */
|
4350 | |
rc = EIO;
|
4351 | |
break;
|
4352 | |
}
|
4353 | |
}
|
4354 | |
if (env->me_txns)
|
4355 | |
UNLOCK_MUTEX_W(env);
|
4356 | |
|
4357 | |
if (rc)
|
4358 | |
goto leave;
|
4359 | |
|
4360 | |
wsize = txn->mt_next_pgno * env->me_psize - wsize;
|
4361 | |
while (wsize > 0) {
|
4362 | |
if (wsize > MAX_WRITE)
|
4363 | |
w2 = MAX_WRITE;
|
4364 | |
else
|
4365 | |
w2 = wsize;
|
4366 | |
DO_WRITE(rc, fd, ptr, w2, len);
|
4367 | |
if (!rc) {
|
4368 | |
rc = ErrCode();
|
4369 | |
break;
|
4370 | |
} else if (len > 0) {
|
4371 | |
rc = MDB_SUCCESS;
|
4372 | |
ptr += len;
|
4373 | |
wsize -= len;
|
4374 | |
continue;
|
4375 | |
} else {
|
4376 | |
rc = EIO;
|
4377 | |
break;
|
4378 | |
}
|
4379 | |
}
|
4380 | |
|
4381 | |
leave:
|
4382 | |
mdb_txn_abort(txn);
|
4383 | |
return rc;
|
4384 | |
}
|
4385 | |
|
4386 | |
int
|
4387 | |
mdb_env_copy(MDB_env *env, const char *path)
|
4388 | |
{
|
4389 | |
int rc, len;
|
4390 | |
char *lpath;
|
4391 | |
HANDLE newfd = INVALID_HANDLE_VALUE;
|
4392 | |
|
4393 | |
if (env->me_flags & MDB_NOSUBDIR) {
|
4394 | |
lpath = (char *)path;
|
4395 | |
} else {
|
4396 | |
len = strlen(path);
|
4397 | |
len += sizeof(DATANAME);
|
4398 | |
lpath = malloc(len);
|
4399 | |
if (!lpath)
|
4400 | |
return ENOMEM;
|
4401 | |
sprintf(lpath, "%s" DATANAME, path);
|
4402 | |
}
|
4403 | |
|
4404 | |
/* The destination path must exist, but the destination file must not.
|
4405 | |
* We don't want the OS to cache the writes, since the source data is
|
4406 | |
* already in the OS cache.
|
4407 | |
*/
|
4408 | |
#ifdef _WIN32
|
4409 | |
newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW,
|
4410 | |
FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL);
|
4411 | |
#else
|
4412 | |
newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666);
|
4413 | |
#endif
|
4414 | |
if (newfd == INVALID_HANDLE_VALUE) {
|
4415 | |
rc = ErrCode();
|
4416 | |
goto leave;
|
4417 | |
}
|
4418 | |
|
4419 | |
#ifdef O_DIRECT
|
4420 | |
/* Set O_DIRECT if the file system supports it */
|
4421 | |
if ((rc = fcntl(newfd, F_GETFL)) != -1)
|
4422 | |
(void) fcntl(newfd, F_SETFL, rc | O_DIRECT);
|
4423 | |
#endif
|
4424 | |
#ifdef F_NOCACHE /* __APPLE__ */
|
4425 | |
rc = fcntl(newfd, F_NOCACHE, 1);
|
4426 | |
if (rc) {
|
4427 | |
rc = ErrCode();
|
4428 | |
goto leave;
|
4429 | |
}
|
4430 | |
#endif
|
4431 | |
|
4432 | |
rc = mdb_env_copyfd(env, newfd);
|
4433 | |
|
4434 | |
leave:
|
4435 | |
if (!(env->me_flags & MDB_NOSUBDIR))
|
4436 | |
free(lpath);
|
4437 | |
if (newfd != INVALID_HANDLE_VALUE)
|
4438 | |
if (close(newfd) < 0 && rc == MDB_SUCCESS)
|
4439 | |
rc = ErrCode();
|
4440 | |
|
4441 | |
return rc;
|
4442 | |
}
|
4443 | |
|
4444 | |
void
|
|
4634 |
|
|
4635 |
void ESECT
|
4445 | 4636 |
mdb_env_close(MDB_env *env)
|
4446 | 4637 |
{
|
4447 | 4638 |
MDB_page *dp;
|
|
4493 | 4684 |
} while(!x && u > (unsigned short *)a->mv_data);
|
4494 | 4685 |
return x;
|
4495 | 4686 |
#else
|
4496 | |
return memcmp(a->mv_data, b->mv_data, a->mv_size);
|
4497 | |
#endif
|
4498 | |
}
|
|
4687 |
unsigned short *u, *c, *end;
|
|
4688 |
int x;
|
|
4689 |
|
|
4690 |
end = (unsigned short *) ((char *) a->mv_data + a->mv_size);
|
|
4691 |
u = (unsigned short *)a->mv_data;
|
|
4692 |
c = (unsigned short *)b->mv_data;
|
|
4693 |
do {
|
|
4694 |
x = *u++ - *c++;
|
|
4695 |
} while(!x && u < end);
|
|
4696 |
return x;
|
|
4697 |
#endif
|
|
4698 |
}
|
|
4699 |
|
|
4700 |
/** Compare two items pointing at size_t's of unknown alignment. */
|
|
4701 |
#ifdef MISALIGNED_OK
|
|
4702 |
# define mdb_cmp_clong mdb_cmp_long
|
|
4703 |
#else
|
|
4704 |
# define mdb_cmp_clong mdb_cmp_cint
|
|
4705 |
#endif
|
4499 | 4706 |
|
4500 | 4707 |
/** Compare two items lexically */
|
4501 | 4708 |
static int
|
|
4868 | 5075 |
/* Make sure we're using an up-to-date root */
|
4869 | 5076 |
if (*mc->mc_dbflag & DB_STALE) {
|
4870 | 5077 |
MDB_cursor mc2;
|
|
5078 |
if (TXN_DBI_CHANGED(mc->mc_txn, mc->mc_dbi))
|
|
5079 |
return MDB_BAD_DBI;
|
4871 | 5080 |
mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, NULL);
|
4872 | 5081 |
rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, 0);
|
4873 | 5082 |
if (rc)
|
|
5040 | 5249 |
int exact = 0;
|
5041 | 5250 |
DKBUF;
|
5042 | 5251 |
|
5043 | |
if (key == NULL || data == NULL)
|
5044 | |
return EINVAL;
|
5045 | |
|
5046 | 5252 |
DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key)));
|
5047 | 5253 |
|
5048 | |
if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
|
|
5254 |
if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
|
5049 | 5255 |
return EINVAL;
|
5050 | 5256 |
|
5051 | 5257 |
if (txn->mt_flags & MDB_TXN_ERROR)
|
|
5211 | 5417 |
if (op == MDB_PREV || op == MDB_PREV_DUP) {
|
5212 | 5418 |
rc = mdb_cursor_prev(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_PREV);
|
5213 | 5419 |
if (op != MDB_PREV || rc != MDB_NOTFOUND) {
|
5214 | |
if (rc == MDB_SUCCESS)
|
|
5420 |
if (rc == MDB_SUCCESS) {
|
5215 | 5421 |
MDB_GET_KEY(leaf, key);
|
|
5422 |
mc->mc_flags &= ~C_EOF;
|
|
5423 |
}
|
5216 | 5424 |
return rc;
|
5217 | 5425 |
}
|
5218 | 5426 |
} else {
|
|
5368 | 5576 |
if (!mc->mc_top) {
|
5369 | 5577 |
/* There are no other pages */
|
5370 | 5578 |
mc->mc_ki[mc->mc_top] = 0;
|
5371 | |
if (op == MDB_SET_RANGE) {
|
|
5579 |
if (op == MDB_SET_RANGE && !exactp) {
|
5372 | 5580 |
rc = 0;
|
5373 | 5581 |
goto set1;
|
5374 | 5582 |
} else
|
|
5404 | 5612 |
mc->mc_flags &= ~C_EOF;
|
5405 | 5613 |
|
5406 | 5614 |
if (IS_LEAF2(mp)) {
|
5407 | |
key->mv_size = mc->mc_db->md_pad;
|
5408 | |
key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size);
|
|
5615 |
if (op == MDB_SET_RANGE || op == MDB_SET_KEY) {
|
|
5616 |
key->mv_size = mc->mc_db->md_pad;
|
|
5617 |
key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size);
|
|
5618 |
}
|
5409 | 5619 |
return MDB_SUCCESS;
|
5410 | 5620 |
}
|
5411 | 5621 |
|
|
5687 | 5897 |
rc = MDB_INCOMPATIBLE;
|
5688 | 5898 |
break;
|
5689 | 5899 |
}
|
|
5900 |
{
|
|
5901 |
MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
|
|
5902 |
if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
|
|
5903 |
MDB_GET_KEY(leaf, key);
|
|
5904 |
rc = mdb_node_read(mc->mc_txn, leaf, data);
|
|
5905 |
break;
|
|
5906 |
}
|
|
5907 |
}
|
5690 | 5908 |
if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) {
|
5691 | 5909 |
rc = EINVAL;
|
5692 | 5910 |
break;
|
|
5723 | 5941 |
if (mc->mc_dbi > MAIN_DBI && !(*mc->mc_dbflag & DB_DIRTY)) {
|
5724 | 5942 |
MDB_cursor mc2;
|
5725 | 5943 |
MDB_xcursor mcx;
|
|
5944 |
if (TXN_DBI_CHANGED(mc->mc_txn, mc->mc_dbi))
|
|
5945 |
return MDB_BAD_DBI;
|
5726 | 5946 |
mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, &mcx);
|
5727 | 5947 |
rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, MDB_PS_MODIFY);
|
5728 | 5948 |
if (rc)
|
|
5753 | 5973 |
uint16_t fp_flags;
|
5754 | 5974 |
MDB_val xdata, *rdata, dkey, olddata;
|
5755 | 5975 |
MDB_db dummy;
|
5756 | |
int do_sub = 0, insert;
|
|
5976 |
int do_sub = 0, insert_key, insert_data;
|
5757 | 5977 |
unsigned int mcount = 0, dcount = 0, nospill;
|
5758 | 5978 |
size_t nsize;
|
5759 | 5979 |
int rc, rc2;
|
|
5781 | 6001 |
if (mc->mc_txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR))
|
5782 | 6002 |
return (mc->mc_txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
|
5783 | 6003 |
|
5784 | |
if (flags != MDB_CURRENT && key->mv_size-1 >= ENV_MAXKEY(env))
|
|
6004 |
if (key->mv_size-1 >= ENV_MAXKEY(env))
|
5785 | 6005 |
return MDB_BAD_VALSIZE;
|
5786 | 6006 |
|
5787 | 6007 |
#if SIZE_MAX > MAXDATASIZE
|
|
5872 | 6092 |
return rc2;
|
5873 | 6093 |
}
|
5874 | 6094 |
|
5875 | |
insert = rc;
|
5876 | |
if (insert) {
|
|
6095 |
insert_key = insert_data = rc;
|
|
6096 |
if (insert_key) {
|
5877 | 6097 |
/* The key does not exist */
|
5878 | 6098 |
DPRINTF(("inserting key at index %i", mc->mc_ki[mc->mc_top]));
|
5879 | 6099 |
if ((mc->mc_db->md_flags & MDB_DUPSORT) &&
|
5880 | 6100 |
LEAFSIZE(key, data) > env->me_nodemax)
|
5881 | 6101 |
{
|
5882 | |
/* Too big for a node, insert in sub-DB */
|
|
6102 |
/* Too big for a node, insert in sub-DB. Set up an empty
|
|
6103 |
* "old sub-page" for prep_subDB to expand to a full page.
|
|
6104 |
*/
|
5883 | 6105 |
fp_flags = P_LEAF|P_DIRTY;
|
5884 | 6106 |
fp = env->me_pbuf;
|
5885 | 6107 |
fp->mp_pad = data->mv_size; /* used if MDB_DUPFIXED */
|
5886 | |
fp->mp_lower = fp->mp_upper = olddata.mv_size = PAGEHDRSZ;
|
|
6108 |
fp->mp_lower = fp->mp_upper = (PAGEHDRSZ-PAGEBASE);
|
|
6109 |
olddata.mv_size = PAGEHDRSZ;
|
5887 | 6110 |
goto prep_subDB;
|
5888 | 6111 |
}
|
5889 | 6112 |
} else {
|
5890 | 6113 |
/* there's only a key anyway, so this is a no-op */
|
5891 | 6114 |
if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
|
|
6115 |
char *ptr;
|
5892 | 6116 |
unsigned int ksize = mc->mc_db->md_pad;
|
5893 | 6117 |
if (key->mv_size != ksize)
|
5894 | 6118 |
return MDB_BAD_VALSIZE;
|
5895 | |
if (flags == MDB_CURRENT) {
|
5896 | |
char *ptr = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], ksize);
|
5897 | |
memcpy(ptr, key->mv_data, ksize);
|
|
6119 |
ptr = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], ksize);
|
|
6120 |
memcpy(ptr, key->mv_data, ksize);
|
|
6121 |
fix_parent:
|
|
6122 |
/* if overwriting slot 0 of leaf, need to
|
|
6123 |
* update branch key if there is a parent page
|
|
6124 |
*/
|
|
6125 |
if (mc->mc_top && !mc->mc_ki[mc->mc_top]) {
|
|
6126 |
unsigned short top = mc->mc_top;
|
|
6127 |
mc->mc_top--;
|
|
6128 |
/* slot 0 is always an empty key, find real slot */
|
|
6129 |
while (mc->mc_top && !mc->mc_ki[mc->mc_top])
|
|
6130 |
mc->mc_top--;
|
|
6131 |
if (mc->mc_ki[mc->mc_top])
|
|
6132 |
rc2 = mdb_update_key(mc, key);
|
|
6133 |
else
|
|
6134 |
rc2 = MDB_SUCCESS;
|
|
6135 |
mc->mc_top = top;
|
|
6136 |
if (rc2)
|
|
6137 |
return rc2;
|
5898 | 6138 |
}
|
5899 | 6139 |
return MDB_SUCCESS;
|
5900 | 6140 |
}
|
|
5923 | 6163 |
|
5924 | 6164 |
#if UINT_MAX < SIZE_MAX
|
5925 | 6165 |
if (mc->mc_dbx->md_dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t))
|
5926 | |
#ifdef MISALIGNED_OK
|
5927 | |
mc->mc_dbx->md_dcmp = mdb_cmp_long;
|
5928 | |
#else
|
5929 | |
mc->mc_dbx->md_dcmp = mdb_cmp_cint;
|
5930 | |
#endif
|
5931 | |
#endif
|
5932 | |
/* if data matches, skip it */
|
|
6166 |
mc->mc_dbx->md_dcmp = mdb_cmp_clong;
|
|
6167 |
#endif
|
|
6168 |
/* does data match? */
|
5933 | 6169 |
if (!mc->mc_dbx->md_dcmp(data, &olddata)) {
|
5934 | 6170 |
if (flags & MDB_NODUPDATA)
|
5935 | |
rc = MDB_KEYEXIST;
|
5936 | |
else if (flags & MDB_MULTIPLE)
|
5937 | |
goto next_mult;
|
5938 | |
else
|
5939 | |
rc = MDB_SUCCESS;
|
5940 | |
return rc;
|
|
6171 |
return MDB_KEYEXIST;
|
|
6172 |
/* overwrite it */
|
|
6173 |
goto current;
|
5941 | 6174 |
}
|
5942 | 6175 |
|
5943 | 6176 |
/* Back up original data item */
|
|
5946 | 6179 |
|
5947 | 6180 |
/* Make sub-page header for the dup items, with dummy body */
|
5948 | 6181 |
fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP;
|
5949 | |
fp->mp_lower = PAGEHDRSZ;
|
|
6182 |
fp->mp_lower = (PAGEHDRSZ-PAGEBASE);
|
5950 | 6183 |
xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size;
|
5951 | 6184 |
if (mc->mc_db->md_flags & MDB_DUPFIXED) {
|
5952 | 6185 |
fp->mp_flags |= P_LEAF2;
|
|
5956 | 6189 |
xdata.mv_size += 2 * (sizeof(indx_t) + NODESIZE) +
|
5957 | 6190 |
(dkey.mv_size & 1) + (data->mv_size & 1);
|
5958 | 6191 |
}
|
5959 | |
fp->mp_upper = xdata.mv_size;
|
5960 | |
olddata.mv_size = fp->mp_upper; /* pretend olddata is fp */
|
|
6192 |
fp->mp_upper = xdata.mv_size - PAGEBASE;
|
|
6193 |
olddata.mv_size = xdata.mv_size; /* pretend olddata is fp */
|
5961 | 6194 |
} else if (leaf->mn_flags & F_SUBDATA) {
|
5962 | 6195 |
/* Data is on sub-DB, just store it */
|
5963 | 6196 |
flags |= F_DUPDATA|F_SUBDATA;
|
|
6024 | 6257 |
if (fp_flags & P_LEAF2) {
|
6025 | 6258 |
memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad);
|
6026 | 6259 |
} else {
|
6027 | |
memcpy((char *)mp + mp->mp_upper, (char *)fp + fp->mp_upper,
|
6028 | |
olddata.mv_size - fp->mp_upper);
|
|
6260 |
memcpy((char *)mp + mp->mp_upper + PAGEBASE, (char *)fp + fp->mp_upper + PAGEBASE,
|
|
6261 |
olddata.mv_size - fp->mp_upper - PAGEBASE);
|
6029 | 6262 |
for (i=0; i<NUMKEYS(fp); i++)
|
6030 | 6263 |
mp->mp_ptrs[i] = fp->mp_ptrs[i] + offset;
|
6031 | 6264 |
}
|
|
6034 | 6267 |
rdata = &xdata;
|
6035 | 6268 |
flags |= F_DUPDATA;
|
6036 | 6269 |
do_sub = 1;
|
6037 | |
if (!insert)
|
|
6270 |
if (!insert_key)
|
6038 | 6271 |
mdb_node_del(mc, 0);
|
6039 | 6272 |
goto new_sub;
|
6040 | 6273 |
}
|
|
6075 | 6308 |
return ENOMEM;
|
6076 | 6309 |
id2.mid = pg;
|
6077 | 6310 |
id2.mptr = np;
|
6078 | |
rc = mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2);
|
6079 | |
mdb_cassert(mc, rc == 0);
|
|
6311 |
rc2 = mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2);
|
|
6312 |
mdb_cassert(mc, rc2 == 0);
|
6080 | 6313 |
if (!(flags & MDB_RESERVE)) {
|
6081 | 6314 |
/* Copy end of page, adjusting alignment so
|
6082 | 6315 |
* compiler may copy words instead of bytes.
|
|
6094 | 6327 |
data->mv_data = METADATA(omp);
|
6095 | 6328 |
else
|
6096 | 6329 |
memcpy(METADATA(omp), data->mv_data, data->mv_size);
|
6097 | |
goto done;
|
|
6330 |
return MDB_SUCCESS;
|
6098 | 6331 |
}
|
6099 | 6332 |
}
|
6100 | 6333 |
if ((rc2 = mdb_ovpage_free(mc, omp)) != MDB_SUCCESS)
|
|
6106 | 6339 |
*/
|
6107 | 6340 |
if (F_ISSET(flags, MDB_RESERVE))
|
6108 | 6341 |
data->mv_data = olddata.mv_data;
|
6109 | |
else if (data->mv_size)
|
|
6342 |
else if (!(mc->mc_flags & C_SUB))
|
6110 | 6343 |
memcpy(olddata.mv_data, data->mv_data, data->mv_size);
|
6111 | |
else
|
|
6344 |
else {
|
6112 | 6345 |
memcpy(NODEKEY(leaf), key->mv_data, key->mv_size);
|
6113 | |
goto done;
|
|
6346 |
goto fix_parent;
|
|
6347 |
}
|
|
6348 |
return MDB_SUCCESS;
|
6114 | 6349 |
}
|
6115 | 6350 |
mdb_node_del(mc, 0);
|
6116 | |
mc->mc_db->md_entries--;
|
6117 | 6351 |
}
|
6118 | 6352 |
|
6119 | 6353 |
rdata = data;
|
|
6123 | 6357 |
nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) ? key->mv_size : mdb_leaf_size(env, key, rdata);
|
6124 | 6358 |
if (SIZELEFT(mc->mc_pg[mc->mc_top]) < nsize) {
|
6125 | 6359 |
if (( flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA )
|
6126 | |
nflags &= ~MDB_APPEND;
|
6127 | |
if (!insert)
|
|
6360 |
nflags &= ~MDB_APPEND; /* sub-page may need room to grow */
|
|
6361 |
if (!insert_key)
|
6128 | 6362 |
nflags |= MDB_SPLIT_REPLACE;
|
6129 | 6363 |
rc = mdb_page_split(mc, key, rdata, P_INVALID, nflags);
|
6130 | 6364 |
} else {
|
6131 | 6365 |
/* There is room already in this leaf page. */
|
6132 | 6366 |
rc = mdb_node_add(mc, mc->mc_ki[mc->mc_top], key, rdata, 0, nflags);
|
6133 | |
if (rc == 0 && !do_sub && insert) {
|
|
6367 |
if (rc == 0 && insert_key) {
|
6134 | 6368 |
/* Adjust other cursors pointing to mp */
|
6135 | 6369 |
MDB_cursor *m2, *m3;
|
6136 | 6370 |
MDB_dbi dbi = mc->mc_dbi;
|
|
6150 | 6384 |
}
|
6151 | 6385 |
}
|
6152 | 6386 |
|
6153 | |
if (rc != MDB_SUCCESS)
|
6154 | |
mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
|
6155 | |
else {
|
|
6387 |
if (rc == MDB_SUCCESS) {
|
6156 | 6388 |
/* Now store the actual data in the child DB. Note that we're
|
6157 | 6389 |
* storing the user data in the keys field, so there are strict
|
6158 | 6390 |
* size limits on dupdata. The actual data fields of the child
|
|
6160 | 6392 |
*/
|
6161 | 6393 |
if (do_sub) {
|
6162 | 6394 |
int xflags;
|
|
6395 |
size_t ecount;
|
6163 | 6396 |
put_sub:
|
6164 | 6397 |
xdata.mv_size = 0;
|
6165 | 6398 |
xdata.mv_data = "";
|
|
6175 | 6408 |
if (dkey.mv_size) {
|
6176 | 6409 |
rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags);
|
6177 | 6410 |
if (rc)
|
6178 | |
return rc;
|
|
6411 |
goto bad_sub;
|
6179 | 6412 |
{
|
6180 | 6413 |
/* Adjust other cursors pointing to mp */
|
6181 | 6414 |
MDB_cursor *m2;
|
|
6193 | 6426 |
/* we've done our job */
|
6194 | 6427 |
dkey.mv_size = 0;
|
6195 | 6428 |
}
|
|
6429 |
ecount = mc->mc_xcursor->mx_db.md_entries;
|
6196 | 6430 |
if (flags & MDB_APPENDDUP)
|
6197 | 6431 |
xflags |= MDB_APPEND;
|
6198 | 6432 |
rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags);
|
|
6200 | 6434 |
void *db = NODEDATA(leaf);
|
6201 | 6435 |
memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db));
|
6202 | 6436 |
}
|
6203 | |
}
|
6204 | |
/* sub-writes might have failed so check rc again.
|
6205 | |
* Don't increment count if we just replaced an existing item.
|
6206 | |
*/
|
6207 | |
if (!rc && !(flags & MDB_CURRENT))
|
|
6437 |
insert_data = mc->mc_xcursor->mx_db.md_entries - ecount;
|
|
6438 |
}
|
|
6439 |
/* Increment count unless we just replaced an existing item. */
|
|
6440 |
if (insert_data)
|
6208 | 6441 |
mc->mc_db->md_entries++;
|
|
6442 |
if (insert_key) {
|
|
6443 |
/* Invalidate txn if we created an empty sub-DB */
|
|
6444 |
if (rc)
|
|
6445 |
goto bad_sub;
|
|
6446 |
/* If we succeeded and the key didn't exist before,
|
|
6447 |
* make sure the cursor is marked valid.
|
|
6448 |
*/
|
|
6449 |
mc->mc_flags |= C_INITIALIZED;
|
|
6450 |
}
|
6209 | 6451 |
if (flags & MDB_MULTIPLE) {
|
6210 | 6452 |
if (!rc) {
|
6211 | |
next_mult:
|
6212 | 6453 |
mcount++;
|
6213 | 6454 |
/* let caller know how many succeeded, if any */
|
6214 | 6455 |
data[1].mv_size = mcount;
|
6215 | 6456 |
if (mcount < dcount) {
|
6216 | 6457 |
data[0].mv_data = (char *)data[0].mv_data + data[0].mv_size;
|
|
6458 |
insert_key = insert_data = 0;
|
6217 | 6459 |
goto more;
|
6218 | 6460 |
}
|
6219 | 6461 |
}
|
6220 | 6462 |
}
|
6221 | |
}
|
6222 | |
done:
|
6223 | |
/* If we succeeded and the key didn't exist before, make sure
|
6224 | |
* the cursor is marked valid.
|
6225 | |
*/
|
6226 | |
if (!rc && insert)
|
6227 | |
mc->mc_flags |= C_INITIALIZED;
|
|
6463 |
return rc;
|
|
6464 |
bad_sub:
|
|
6465 |
if (rc == MDB_KEYEXIST) /* should not happen, we deleted that item */
|
|
6466 |
rc = MDB_CORRUPTED;
|
|
6467 |
}
|
|
6468 |
mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
|
6228 | 6469 |
return rc;
|
6229 | 6470 |
}
|
6230 | 6471 |
|
|
6252 | 6493 |
return rc;
|
6253 | 6494 |
|
6254 | 6495 |
mp = mc->mc_pg[mc->mc_top];
|
|
6496 |
if (IS_LEAF2(mp))
|
|
6497 |
goto del_key;
|
6255 | 6498 |
leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
|
6256 | 6499 |
|
6257 | |
if (!IS_LEAF2(mp) && F_ISSET(leaf->mn_flags, F_DUPDATA)) {
|
6258 | |
if (!(flags & MDB_NODUPDATA)) {
|
|
6500 |
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
|
|
6501 |
if (flags & MDB_NODUPDATA) {
|
|
6502 |
/* mdb_cursor_del0() will subtract the final entry */
|
|
6503 |
mc->mc_db->md_entries -= mc->mc_xcursor->mx_db.md_entries - 1;
|
|
6504 |
} else {
|
6259 | 6505 |
if (!F_ISSET(leaf->mn_flags, F_SUBDATA)) {
|
6260 | 6506 |
mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
|
6261 | 6507 |
}
|
6262 | 6508 |
rc = mdb_cursor_del(&mc->mc_xcursor->mx_cursor, MDB_NOSPILL);
|
|
6509 |
if (rc)
|
|
6510 |
return rc;
|
6263 | 6511 |
/* If sub-DB still has entries, we're done */
|
6264 | 6512 |
if (mc->mc_xcursor->mx_db.md_entries) {
|
6265 | 6513 |
if (leaf->mn_flags & F_SUBDATA) {
|
|
6290 | 6538 |
if (leaf->mn_flags & F_SUBDATA) {
|
6291 | 6539 |
/* add all the child DB's pages to the free list */
|
6292 | 6540 |
rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0);
|
6293 | |
if (rc == MDB_SUCCESS) {
|
6294 | |
mc->mc_db->md_entries -=
|
6295 | |
mc->mc_xcursor->mx_db.md_entries;
|
6296 | |
}
|
6297 | |
}
|
6298 | |
}
|
6299 | |
|
6300 | |
return mdb_cursor_del0(mc, leaf);
|
|
6541 |
if (rc)
|
|
6542 |
goto fail;
|
|
6543 |
}
|
|
6544 |
}
|
|
6545 |
|
|
6546 |
/* add overflow pages to free list */
|
|
6547 |
if (F_ISSET(leaf->mn_flags, F_BIGDATA)) {
|
|
6548 |
MDB_page *omp;
|
|
6549 |
pgno_t pg;
|
|
6550 |
|
|
6551 |
memcpy(&pg, NODEDATA(leaf), sizeof(pg));
|
|
6552 |
if ((rc = mdb_page_get(mc->mc_txn, pg, &omp, NULL)) ||
|
|
6553 |
(rc = mdb_ovpage_free(mc, omp)))
|
|
6554 |
goto fail;
|
|
6555 |
}
|
|
6556 |
|
|
6557 |
del_key:
|
|
6558 |
return mdb_cursor_del0(mc);
|
|
6559 |
|
|
6560 |
fail:
|
|
6561 |
mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
|
|
6562 |
return rc;
|
6301 | 6563 |
}
|
6302 | 6564 |
|
6303 | 6565 |
/** Allocate and initialize new pages for a database.
|
|
6319 | 6581 |
DPRINTF(("allocated new mpage %"Z"u, page size %u",
|
6320 | 6582 |
np->mp_pgno, mc->mc_txn->mt_env->me_psize));
|
6321 | 6583 |
np->mp_flags = flags | P_DIRTY;
|
6322 | |
np->mp_lower = PAGEHDRSZ;
|
6323 | |
np->mp_upper = mc->mc_txn->mt_env->me_psize;
|
|
6584 |
np->mp_lower = (PAGEHDRSZ-PAGEBASE);
|
|
6585 |
np->mp_upper = mc->mc_txn->mt_env->me_psize - PAGEBASE;
|
6324 | 6586 |
|
6325 | 6587 |
if (IS_BRANCH(np))
|
6326 | 6588 |
mc->mc_db->md_branch_pages++;
|
|
6524 | 6786 |
}
|
6525 | 6787 |
|
6526 | 6788 |
/** Delete the specified node from a page.
|
6527 | |
* @param[in] mp The page to operate on.
|
6528 | |
* @param[in] indx The index of the node to delete.
|
|
6789 |
* @param[in] mc Cursor pointing to the node to delete.
|
6529 | 6790 |
* @param[in] ksize The size of a node. Only used if the page is
|
6530 | 6791 |
* part of a #MDB_DUPFIXED database.
|
6531 | 6792 |
*/
|
|
6574 | 6835 |
}
|
6575 | 6836 |
}
|
6576 | 6837 |
|
6577 | |
base = (char *)mp + mp->mp_upper;
|
|
6838 |
base = (char *)mp + mp->mp_upper + PAGEBASE;
|
6578 | 6839 |
memmove(base + sz, base, ptr - mp->mp_upper);
|
6579 | 6840 |
|
6580 | 6841 |
mp->mp_lower -= sizeof(indx_t);
|
|
6628 | 6889 |
mp->mp_ptrs[i] += delta;
|
6629 | 6890 |
}
|
6630 | 6891 |
|
6631 | |
base = (char *)mp + mp->mp_upper;
|
|
6892 |
base = (char *)mp + mp->mp_upper + PAGEBASE;
|
6632 | 6893 |
memmove(base + delta, base, ptr - mp->mp_upper + NODESIZE + NODEKSZ(node));
|
6633 | 6894 |
mp->mp_upper += delta;
|
6634 | 6895 |
}
|
|
6707 | 6968 |
mx->mx_dbflag = DB_VALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */
|
6708 | 6969 |
#if UINT_MAX < SIZE_MAX
|
6709 | 6970 |
if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t))
|
6710 | |
#ifdef MISALIGNED_OK
|
6711 | |
mx->mx_dbx.md_cmp = mdb_cmp_long;
|
6712 | |
#else
|
6713 | |
mx->mx_dbx.md_cmp = mdb_cmp_cint;
|
6714 | |
#endif
|
|
6971 |
mx->mx_dbx.md_cmp = mdb_cmp_clong;
|
6715 | 6972 |
#endif
|
6716 | 6973 |
}
|
6717 | 6974 |
|
|
6748 | 7005 |
MDB_cursor *mc;
|
6749 | 7006 |
size_t size = sizeof(MDB_cursor);
|
6750 | 7007 |
|
6751 | |
if (txn == NULL || ret == NULL || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
|
|
7008 |
if (!ret || !TXN_DBI_EXIST(txn, dbi))
|
6752 | 7009 |
return EINVAL;
|
6753 | 7010 |
|
6754 | 7011 |
if (txn->mt_flags & MDB_TXN_ERROR)
|
|
6780 | 7037 |
int
|
6781 | 7038 |
mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc)
|
6782 | 7039 |
{
|
6783 | |
if (txn == NULL || mc == NULL || mc->mc_dbi >= txn->mt_numdbs)
|
|
7040 |
if (!mc || !TXN_DBI_EXIST(txn, mc->mc_dbi))
|
6784 | 7041 |
return EINVAL;
|
6785 | 7042 |
|
6786 | 7043 |
if ((mc->mc_flags & C_UNTRACK) || txn->mt_cursors)
|
6787 | 7044 |
return EINVAL;
|
|
7045 |
|
|
7046 |
if (txn->mt_flags & MDB_TXN_ERROR)
|
|
7047 |
return MDB_BAD_TXN;
|
6788 | 7048 |
|
6789 | 7049 |
mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor);
|
6790 | 7050 |
return MDB_SUCCESS;
|
|
6801 | 7061 |
|
6802 | 7062 |
if (mc->mc_xcursor == NULL)
|
6803 | 7063 |
return MDB_INCOMPATIBLE;
|
|
7064 |
|
|
7065 |
if (mc->mc_txn->mt_flags & MDB_TXN_ERROR)
|
|
7066 |
return MDB_BAD_TXN;
|
|
7067 |
|
|
7068 |
if (!(mc->mc_flags & C_INITIALIZED))
|
|
7069 |
return EINVAL;
|
|
7070 |
|
|
7071 |
if (!mc->mc_snum || (mc->mc_flags & C_EOF))
|
|
7072 |
return MDB_NOTFOUND;
|
6804 | 7073 |
|
6805 | 7074 |
leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
|
6806 | 7075 |
if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
|
|
6898 | 7167 |
mp->mp_ptrs[i] -= delta;
|
6899 | 7168 |
}
|
6900 | 7169 |
|
6901 | |
base = (char *)mp + mp->mp_upper;
|
|
7170 |
base = (char *)mp + mp->mp_upper + PAGEBASE;
|
6902 | 7171 |
len = ptr - mp->mp_upper + NODESIZE;
|
6903 | 7172 |
memmove(base - delta, base, len);
|
6904 | 7173 |
mp->mp_upper -= delta;
|
|
6954 | 7223 |
unsigned int snum = csrc->mc_snum;
|
6955 | 7224 |
MDB_node *s2;
|
6956 | 7225 |
/* must find the lowest key below src */
|
6957 | |
mdb_page_search_lowest(csrc);
|
|
7226 |
rc = mdb_page_search_lowest(csrc);
|
|
7227 |
if (rc)
|
|
7228 |
return rc;
|
6958 | 7229 |
if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
|
6959 | 7230 |
key.mv_size = csrc->mc_db->md_pad;
|
6960 | 7231 |
key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size);
|
|
6977 | 7248 |
MDB_node *s2;
|
6978 | 7249 |
MDB_val bkey;
|
6979 | 7250 |
/* must find the lowest key below dst */
|
6980 | |
mdb_page_search_lowest(cdst);
|
6981 | |
if (IS_LEAF2(cdst->mc_pg[cdst->mc_top])) {
|
6982 | |
bkey.mv_size = cdst->mc_db->md_pad;
|
6983 | |
bkey.mv_data = LEAF2KEY(cdst->mc_pg[cdst->mc_top], 0, bkey.mv_size);
|
|
7251 |
mdb_cursor_copy(cdst, &mn);
|
|
7252 |
rc = mdb_page_search_lowest(&mn);
|
|
7253 |
if (rc)
|
|
7254 |
return rc;
|
|
7255 |
if (IS_LEAF2(mn.mc_pg[mn.mc_top])) {
|
|
7256 |
bkey.mv_size = mn.mc_db->md_pad;
|
|
7257 |
bkey.mv_data = LEAF2KEY(mn.mc_pg[mn.mc_top], 0, bkey.mv_size);
|
6984 | 7258 |
} else {
|
6985 | |
s2 = NODEPTR(cdst->mc_pg[cdst->mc_top], 0);
|
|
7259 |
s2 = NODEPTR(mn.mc_pg[mn.mc_top], 0);
|
6986 | 7260 |
bkey.mv_size = NODEKSZ(s2);
|
6987 | 7261 |
bkey.mv_data = NODEKEY(s2);
|
6988 | 7262 |
}
|
6989 | |
cdst->mc_snum = snum--;
|
6990 | |
cdst->mc_top = snum;
|
6991 | |
mdb_cursor_copy(cdst, &mn);
|
|
7263 |
mn.mc_snum = snum--;
|
|
7264 |
mn.mc_top = snum;
|
6992 | 7265 |
mn.mc_ki[snum] = 0;
|
6993 | 7266 |
rc = mdb_update_key(&mn, &bkey);
|
6994 | 7267 |
if (rc)
|
|
7099 | 7372 |
* the \b csrc page will be freed.
|
7100 | 7373 |
* @param[in] csrc Cursor pointing to the source page.
|
7101 | 7374 |
* @param[in] cdst Cursor pointing to the destination page.
|
|
7375 |
* @return 0 on success, non-zero on failure.
|
7102 | 7376 |
*/
|
7103 | 7377 |
static int
|
7104 | 7378 |
mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
7105 | 7379 |
{
|
|
7380 |
MDB_page *psrc, *pdst;
|
|
7381 |
MDB_node *srcnode;
|
|
7382 |
MDB_val key, data;
|
|
7383 |
unsigned nkeys;
|
7106 | 7384 |
int rc;
|
7107 | |
indx_t i, j;
|
7108 | |
MDB_node *srcnode;
|
7109 | |
MDB_val key, data;
|
7110 | |
unsigned nkeys;
|
7111 | |
|
7112 | |
DPRINTF(("merging page %"Z"u into %"Z"u", csrc->mc_pg[csrc->mc_top]->mp_pgno,
|
7113 | |
cdst->mc_pg[cdst->mc_top]->mp_pgno));
|
|
7385 |
indx_t i, j;
|
|
7386 |
|
|
7387 |
psrc = csrc->mc_pg[csrc->mc_top];
|
|
7388 |
pdst = cdst->mc_pg[cdst->mc_top];
|
|
7389 |
|
|
7390 |
DPRINTF(("merging page %"Z"u into %"Z"u", psrc->mp_pgno, pdst->mp_pgno));
|
7114 | 7391 |
|
7115 | 7392 |
mdb_cassert(csrc, csrc->mc_snum > 1); /* can't merge root page */
|
7116 | 7393 |
mdb_cassert(csrc, cdst->mc_snum > 1);
|
|
7121 | 7398 |
|
7122 | 7399 |
/* Move all nodes from src to dst.
|
7123 | 7400 |
*/
|
7124 | |
j = nkeys = NUMKEYS(cdst->mc_pg[cdst->mc_top]);
|
7125 | |
if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
|
|
7401 |
j = nkeys = NUMKEYS(pdst);
|
|
7402 |
if (IS_LEAF2(psrc)) {
|
7126 | 7403 |
key.mv_size = csrc->mc_db->md_pad;
|
7127 | |
key.mv_data = METADATA(csrc->mc_pg[csrc->mc_top]);
|
7128 | |
for (i = 0; i < NUMKEYS(csrc->mc_pg[csrc->mc_top]); i++, j++) {
|
|
7404 |
key.mv_data = METADATA(psrc);
|
|
7405 |
for (i = 0; i < NUMKEYS(psrc); i++, j++) {
|
7129 | 7406 |
rc = mdb_node_add(cdst, j, &key, NULL, 0, 0);
|
7130 | 7407 |
if (rc != MDB_SUCCESS)
|
7131 | 7408 |
return rc;
|
7132 | 7409 |
key.mv_data = (char *)key.mv_data + key.mv_size;
|
7133 | 7410 |
}
|
7134 | 7411 |
} else {
|
7135 | |
for (i = 0; i < NUMKEYS(csrc->mc_pg[csrc->mc_top]); i++, j++) {
|
7136 | |
srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], i);
|
7137 | |
if (i == 0 && IS_BRANCH(csrc->mc_pg[csrc->mc_top])) {
|
7138 | |
unsigned int snum = csrc->mc_snum;
|
|
7412 |
for (i = 0; i < NUMKEYS(psrc); i++, j++) {
|
|
7413 |
srcnode = NODEPTR(psrc, i);
|
|
7414 |
if (i == 0 && IS_BRANCH(psrc)) {
|
|
7415 |
MDB_cursor mn;
|
7139 | 7416 |
MDB_node *s2;
|
|
7417 |
mdb_cursor_copy(csrc, &mn);
|
7140 | 7418 |
/* must find the lowest key below src */
|
7141 | |
mdb_page_search_lowest(csrc);
|
7142 | |
if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
|
7143 | |
key.mv_size = csrc->mc_db->md_pad;
|
7144 | |
key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size);
|
|
7419 |
rc = mdb_page_search_lowest(&mn);
|
|
7420 |
if (rc)
|
|
7421 |
return rc;
|
|
7422 |
if (IS_LEAF2(mn.mc_pg[mn.mc_top])) {
|
|
7423 |
key.mv_size = mn.mc_db->md_pad;
|
|
7424 |
key.mv_data = LEAF2KEY(mn.mc_pg[mn.mc_top], 0, key.mv_size);
|
7145 | 7425 |
} else {
|
7146 | |
s2 = NODEPTR(csrc->mc_pg[csrc->mc_top], 0);
|
|
7426 |
s2 = NODEPTR(mn.mc_pg[mn.mc_top], 0);
|
7147 | 7427 |
key.mv_size = NODEKSZ(s2);
|
7148 | 7428 |
key.mv_data = NODEKEY(s2);
|
7149 | 7429 |
}
|
7150 | |
csrc->mc_snum = snum--;
|
7151 | |
csrc->mc_top = snum;
|
7152 | 7430 |
} else {
|
7153 | 7431 |
key.mv_size = srcnode->mn_ksize;
|
7154 | 7432 |
key.mv_data = NODEKEY(srcnode);
|
|
7163 | 7441 |
}
|
7164 | 7442 |
|
7165 | 7443 |
DPRINTF(("dst page %"Z"u now has %u keys (%.1f%% filled)",
|
7166 | |
cdst->mc_pg[cdst->mc_top]->mp_pgno, NUMKEYS(cdst->mc_pg[cdst->mc_top]),
|
7167 | |
(float)PAGEFILL(cdst->mc_txn->mt_env, cdst->mc_pg[cdst->mc_top]) / 10));
|
|
7444 |
pdst->mp_pgno, NUMKEYS(pdst),
|
|
7445 |
(float)PAGEFILL(cdst->mc_txn->mt_env, pdst) / 10));
|
7168 | 7446 |
|
7169 | 7447 |
/* Unlink the src page from parent and add to free list.
|
7170 | 7448 |
*/
|
|
7180 | 7458 |
}
|
7181 | 7459 |
csrc->mc_top++;
|
7182 | 7460 |
|
7183 | |
rc = mdb_midl_append(&csrc->mc_txn->mt_free_pgs,
|
7184 | |
csrc->mc_pg[csrc->mc_top]->mp_pgno);
|
|
7461 |
psrc = csrc->mc_pg[csrc->mc_top];
|
|
7462 |
/* If not operating on FreeDB, allow this page to be reused
|
|
7463 |
* in this txn. Otherwise just add to free list.
|
|
7464 |
*/
|
|
7465 |
rc = mdb_page_loose(csrc, psrc);
|
7185 | 7466 |
if (rc)
|
7186 | 7467 |
return rc;
|
7187 | |
if (IS_LEAF(csrc->mc_pg[csrc->mc_top]))
|
|
7468 |
if (IS_LEAF(psrc))
|
7188 | 7469 |
csrc->mc_db->md_leaf_pages--;
|
7189 | 7470 |
else
|
7190 | 7471 |
csrc->mc_db->md_branch_pages--;
|
|
7192 | 7473 |
/* Adjust other cursors pointing to mp */
|
7193 | 7474 |
MDB_cursor *m2, *m3;
|
7194 | 7475 |
MDB_dbi dbi = csrc->mc_dbi;
|
7195 | |
MDB_page *mp = cdst->mc_pg[cdst->mc_top];
|
7196 | 7476 |
|
7197 | 7477 |
for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
|
7198 | 7478 |
if (csrc->mc_flags & C_SUB)
|
|
7201 | 7481 |
m3 = m2;
|
7202 | 7482 |
if (m3 == csrc) continue;
|
7203 | 7483 |
if (m3->mc_snum < csrc->mc_snum) continue;
|
7204 | |
if (m3->mc_pg[csrc->mc_top] == csrc->mc_pg[csrc->mc_top]) {
|
7205 | |
m3->mc_pg[csrc->mc_top] = mp;
|
|
7484 |
if (m3->mc_pg[csrc->mc_top] == psrc) {
|
|
7485 |
m3->mc_pg[csrc->mc_top] = pdst;
|
7206 | 7486 |
m3->mc_ki[csrc->mc_top] += nkeys;
|
7207 | 7487 |
}
|
7208 | 7488 |
}
|
7209 | 7489 |
}
|
7210 | |
mdb_cursor_pop(csrc);
|
7211 | |
|
7212 | |
return mdb_rebalance(csrc);
|
|
7490 |
{
|
|
7491 |
unsigned int snum = cdst->mc_snum;
|
|
7492 |
uint16_t depth = cdst->mc_db->md_depth;
|
|
7493 |
mdb_cursor_pop(cdst);
|
|
7494 |
rc = mdb_rebalance(cdst);
|
|
7495 |
/* Did the tree shrink? */
|
|
7496 |
if (depth > cdst->mc_db->md_depth)
|
|
7497 |
snum--;
|
|
7498 |
cdst->mc_snum = snum;
|
|
7499 |
cdst->mc_top = snum-1;
|
|
7500 |
}
|
|
7501 |
return rc;
|
7213 | 7502 |
}
|
7214 | 7503 |
|
7215 | 7504 |
/** Copy the contents of a cursor.
|
|
7247 | 7536 |
int rc;
|
7248 | 7537 |
unsigned int ptop, minkeys;
|
7249 | 7538 |
MDB_cursor mn;
|
|
7539 |
indx_t oldki;
|
7250 | 7540 |
|
7251 | 7541 |
minkeys = 1 + (IS_BRANCH(mc->mc_pg[mc->mc_top]));
|
7252 | 7542 |
DPRINTF(("rebalancing %s page %"Z"u (has %u keys, %.1f%% full)",
|
|
7297 | 7587 |
}
|
7298 | 7588 |
}
|
7299 | 7589 |
} else if (IS_BRANCH(mp) && NUMKEYS(mp) == 1) {
|
|
7590 |
int i;
|
7300 | 7591 |
DPUTS("collapsing root page!");
|
7301 | 7592 |
rc = mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno);
|
7302 | 7593 |
if (rc)
|
|
7308 | 7599 |
mc->mc_db->md_depth--;
|
7309 | 7600 |
mc->mc_db->md_branch_pages--;
|
7310 | 7601 |
mc->mc_ki[0] = mc->mc_ki[1];
|
|
7602 |
for (i = 1; i<mc->mc_db->md_depth; i++) {
|
|
7603 |
mc->mc_pg[i] = mc->mc_pg[i+1];
|
|
7604 |
mc->mc_ki[i] = mc->mc_ki[i+1];
|
|
7605 |
}
|
7311 | 7606 |
{
|
7312 | 7607 |
/* Adjust other cursors pointing to mp */
|
7313 | 7608 |
MDB_cursor *m2, *m3;
|
|
7320 | 7615 |
m3 = m2;
|
7321 | 7616 |
if (m3 == mc || m3->mc_snum < mc->mc_snum) continue;
|
7322 | 7617 |
if (m3->mc_pg[0] == mp) {
|
7323 | |
int i;
|
7324 | 7618 |
m3->mc_snum--;
|
7325 | 7619 |
m3->mc_top--;
|
7326 | 7620 |
for (i=0; i<m3->mc_snum; i++) {
|
|
7351 | 7645 |
mdb_cursor_copy(mc, &mn);
|
7352 | 7646 |
mn.mc_xcursor = NULL;
|
7353 | 7647 |
|
|
7648 |
oldki = mc->mc_ki[mc->mc_top];
|
7354 | 7649 |
if (mc->mc_ki[ptop] == 0) {
|
7355 | 7650 |
/* We're the leftmost leaf in our parent.
|
7356 | 7651 |
*/
|
|
7384 | 7679 |
* (A branch page must never have less than 2 keys.)
|
7385 | 7680 |
*/
|
7386 | 7681 |
minkeys = 1 + (IS_BRANCH(mn.mc_pg[mn.mc_top]));
|
7387 | |
if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= FILL_THRESHOLD && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys)
|
7388 | |
return mdb_node_move(&mn, mc);
|
7389 | |
else {
|
7390 | |
if (mc->mc_ki[ptop] == 0)
|
|
7682 |
if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= FILL_THRESHOLD && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) {
|
|
7683 |
rc = mdb_node_move(&mn, mc);
|
|
7684 |
if (mc->mc_ki[ptop]) {
|
|
7685 |
oldki++;
|
|
7686 |
}
|
|
7687 |
} else {
|
|
7688 |
if (mc->mc_ki[ptop] == 0) {
|
7391 | 7689 |
rc = mdb_page_merge(&mn, mc);
|
7392 | |
else {
|
|
7690 |
} else {
|
|
7691 |
oldki += NUMKEYS(mn.mc_pg[mn.mc_top]);
|
7393 | 7692 |
mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1;
|
7394 | 7693 |
rc = mdb_page_merge(mc, &mn);
|
7395 | 7694 |
mdb_cursor_copy(&mn, mc);
|
7396 | 7695 |
}
|
7397 | |
mc->mc_flags &= ~(C_INITIALIZED|C_EOF);
|
7398 | |
}
|
|
7696 |
mc->mc_flags &= ~C_EOF;
|
|
7697 |
}
|
|
7698 |
mc->mc_ki[mc->mc_top] = oldki;
|
7399 | 7699 |
return rc;
|
7400 | 7700 |
}
|
7401 | 7701 |
|
7402 | 7702 |
/** Complete a delete operation started by #mdb_cursor_del(). */
|
7403 | 7703 |
static int
|
7404 | |
mdb_cursor_del0(MDB_cursor *mc, MDB_node *leaf)
|
|
7704 |
mdb_cursor_del0(MDB_cursor *mc)
|
7405 | 7705 |
{
|
7406 | 7706 |
int rc;
|
7407 | 7707 |
MDB_page *mp;
|
7408 | 7708 |
indx_t ki;
|
7409 | 7709 |
unsigned int nkeys;
|
7410 | 7710 |
|
7411 | |
mp = mc->mc_pg[mc->mc_top];
|
7412 | 7711 |
ki = mc->mc_ki[mc->mc_top];
|
7413 | |
|
7414 | |
/* add overflow pages to free list */
|
7415 | |
if (!IS_LEAF2(mp) && F_ISSET(leaf->mn_flags, F_BIGDATA)) {
|
7416 | |
MDB_page *omp;
|
7417 | |
pgno_t pg;
|
7418 | |
|
7419 | |
memcpy(&pg, NODEDATA(leaf), sizeof(pg));
|
7420 | |
if ((rc = mdb_page_get(mc->mc_txn, pg, &omp, NULL)) ||
|
7421 | |
(rc = mdb_ovpage_free(mc, omp)))
|
7422 | |
return rc;
|
7423 | |
}
|
7424 | 7712 |
mdb_node_del(mc, mc->mc_db->md_pad);
|
7425 | 7713 |
mc->mc_db->md_entries--;
|
7426 | 7714 |
rc = mdb_rebalance(mc);
|
7427 | |
if (rc != MDB_SUCCESS)
|
7428 | |
mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
|
7429 | |
else {
|
|
7715 |
|
|
7716 |
if (rc == MDB_SUCCESS) {
|
7430 | 7717 |
MDB_cursor *m2, *m3;
|
7431 | 7718 |
MDB_dbi dbi = mc->mc_dbi;
|
7432 | 7719 |
|
|
7434 | 7721 |
nkeys = NUMKEYS(mp);
|
7435 | 7722 |
|
7436 | 7723 |
/* if mc points past last node in page, find next sibling */
|
7437 | |
if (mc->mc_ki[mc->mc_top] >= nkeys)
|
7438 | |
mdb_cursor_sibling(mc, 1);
|
|
7724 |
if (mc->mc_ki[mc->mc_top] >= nkeys) {
|
|
7725 |
rc = mdb_cursor_sibling(mc, 1);
|
|
7726 |
if (rc == MDB_NOTFOUND) {
|
|
7727 |
mc->mc_flags |= C_EOF;
|
|
7728 |
rc = MDB_SUCCESS;
|
|
7729 |
}
|
|
7730 |
}
|
7439 | 7731 |
|
7440 | 7732 |
/* Adjust other cursors pointing to mp */
|
7441 | |
for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
|
|
7733 |
for (m2 = mc->mc_txn->mt_cursors[dbi]; !rc && m2; m2=m2->mc_next) {
|
7442 | 7734 |
m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
|
7443 | 7735 |
if (! (m2->mc_flags & m3->mc_flags & C_INITIALIZED))
|
7444 | 7736 |
continue;
|
|
7449 | 7741 |
m3->mc_flags |= C_DEL;
|
7450 | 7742 |
if (m3->mc_ki[mc->mc_top] > ki)
|
7451 | 7743 |
m3->mc_ki[mc->mc_top]--;
|
|
7744 |
else if (mc->mc_db->md_flags & MDB_DUPSORT)
|
|
7745 |
m3->mc_xcursor->mx_cursor.mc_flags |= C_EOF;
|
7452 | 7746 |
}
|
7453 | |
if (m3->mc_ki[mc->mc_top] >= nkeys)
|
7454 | |
mdb_cursor_sibling(m3, 1);
|
|
7747 |
if (m3->mc_ki[mc->mc_top] >= nkeys) {
|
|
7748 |
rc = mdb_cursor_sibling(m3, 1);
|
|
7749 |
if (rc == MDB_NOTFOUND) {
|
|
7750 |
m3->mc_flags |= C_EOF;
|
|
7751 |
rc = MDB_SUCCESS;
|
|
7752 |
}
|
|
7753 |
}
|
7455 | 7754 |
}
|
7456 | 7755 |
}
|
7457 | 7756 |
mc->mc_flags |= C_DEL;
|
7458 | 7757 |
}
|
7459 | 7758 |
|
|
7759 |
if (rc)
|
|
7760 |
mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
|
7460 | 7761 |
return rc;
|
7461 | 7762 |
}
|
7462 | 7763 |
|
7463 | 7764 |
int
|
7464 | 7765 |
mdb_del(MDB_txn *txn, MDB_dbi dbi,
|
7465 | 7766 |
MDB_val *key, MDB_val *data)
|
|
7767 |
{
|
|
7768 |
if (!key || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
|
|
7769 |
return EINVAL;
|
|
7770 |
|
|
7771 |
if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR))
|
|
7772 |
return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
|
|
7773 |
|
|
7774 |
if (!F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) {
|
|
7775 |
/* must ignore any data */
|
|
7776 |
data = NULL;
|
|
7777 |
}
|
|
7778 |
|
|
7779 |
return mdb_del0(txn, dbi, key, data, 0);
|
|
7780 |
}
|
|
7781 |
|
|
7782 |
static int
|
|
7783 |
mdb_del0(MDB_txn *txn, MDB_dbi dbi,
|
|
7784 |
MDB_val *key, MDB_val *data, unsigned flags)
|
7466 | 7785 |
{
|
7467 | 7786 |
MDB_cursor mc;
|
7468 | 7787 |
MDB_xcursor mx;
|
7469 | 7788 |
MDB_cursor_op op;
|
7470 | 7789 |
MDB_val rdata, *xdata;
|
7471 | |
int rc, exact;
|
|
7790 |
int rc, exact = 0;
|
7472 | 7791 |
DKBUF;
|
7473 | 7792 |
|
7474 | |
if (key == NULL)
|
7475 | |
return EINVAL;
|
7476 | |
|
7477 | 7793 |
DPRINTF(("====> delete db %u key [%s]", dbi, DKEY(key)));
|
7478 | 7794 |
|
7479 | |
if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
|
7480 | |
return EINVAL;
|
7481 | |
|
7482 | |
if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR))
|
7483 | |
return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
|
7484 | |
|
7485 | 7795 |
mdb_cursor_init(&mc, txn, dbi, &mx);
|
7486 | 7796 |
|
7487 | |
exact = 0;
|
7488 | |
if (!F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) {
|
7489 | |
/* must ignore any data */
|
7490 | |
data = NULL;
|
7491 | |
}
|
7492 | 7797 |
if (data) {
|
7493 | 7798 |
op = MDB_GET_BOTH;
|
7494 | 7799 |
rdata = *data;
|
|
7496 | 7801 |
} else {
|
7497 | 7802 |
op = MDB_SET;
|
7498 | 7803 |
xdata = NULL;
|
|
7804 |
flags |= MDB_NODUPDATA;
|
7499 | 7805 |
}
|
7500 | 7806 |
rc = mdb_cursor_set(&mc, key, xdata, op, &exact);
|
7501 | 7807 |
if (rc == 0) {
|
|
7510 | 7816 |
mc.mc_flags |= C_UNTRACK;
|
7511 | 7817 |
mc.mc_next = txn->mt_cursors[dbi];
|
7512 | 7818 |
txn->mt_cursors[dbi] = &mc;
|
7513 | |
rc = mdb_cursor_del(&mc, data ? 0 : MDB_NODUPDATA);
|
|
7819 |
rc = mdb_cursor_del(&mc, flags);
|
7514 | 7820 |
txn->mt_cursors[dbi] = mc.mc_next;
|
7515 | 7821 |
}
|
7516 | 7822 |
return rc;
|
|
7559 | 7865 |
|
7560 | 7866 |
if (mc->mc_snum < 2) {
|
7561 | 7867 |
if ((rc = mdb_page_new(mc, P_BRANCH, 1, &pp)))
|
7562 | |
return rc;
|
|
7868 |
goto done;
|
7563 | 7869 |
/* shift current top to make room for new parent */
|
7564 | 7870 |
mc->mc_pg[1] = mc->mc_pg[0];
|
7565 | 7871 |
mc->mc_ki[1] = mc->mc_ki[0];
|
|
7577 | 7883 |
mc->mc_ki[0] = mc->mc_ki[1];
|
7578 | 7884 |
mc->mc_db->md_root = mp->mp_pgno;
|
7579 | 7885 |
mc->mc_db->md_depth--;
|
7580 | |
return rc;
|
|
7886 |
goto done;
|
7581 | 7887 |
}
|
7582 | 7888 |
mc->mc_snum = 2;
|
7583 | 7889 |
mc->mc_top = 1;
|
|
7606 | 7912 |
int x;
|
7607 | 7913 |
unsigned int lsize, rsize, ksize;
|
7608 | 7914 |
/* Move half of the keys to the right sibling */
|
7609 | |
copy = NULL;
|
7610 | 7915 |
x = mc->mc_ki[mc->mc_top] - split_indx;
|
7611 | 7916 |
ksize = mc->mc_db->md_pad;
|
7612 | 7917 |
split = LEAF2KEY(mp, split_indx, ksize);
|
|
7653 | 7958 |
|
7654 | 7959 |
/* grab a page to hold a temporary copy */
|
7655 | 7960 |
copy = mdb_page_malloc(mc->mc_txn, 1);
|
7656 | |
if (copy == NULL)
|
7657 | |
return ENOMEM;
|
|
7961 |
if (copy == NULL) {
|
|
7962 |
rc = ENOMEM;
|
|
7963 |
goto done;
|
|
7964 |
}
|
7658 | 7965 |
copy->mp_pgno = mp->mp_pgno;
|
7659 | 7966 |
copy->mp_flags = mp->mp_flags;
|
7660 | |
copy->mp_lower = PAGEHDRSZ;
|
7661 | |
copy->mp_upper = env->me_psize;
|
|
7967 |
copy->mp_lower = (PAGEHDRSZ-PAGEBASE);
|
|
7968 |
copy->mp_upper = env->me_psize - PAGEBASE;
|
7662 | 7969 |
|
7663 | 7970 |
/* prepare to insert */
|
7664 | 7971 |
for (i=0, j=0; i<nkeys; i++) {
|
|
7698 | 8005 |
psize += nsize;
|
7699 | 8006 |
node = NULL;
|
7700 | 8007 |
} else {
|
7701 | |
node = (MDB_node *)((char *)mp + copy->mp_ptrs[i]);
|
|
8008 |
node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE);
|
7702 | 8009 |
psize += NODESIZE + NODEKSZ(node) + sizeof(indx_t);
|
7703 | 8010 |
if (IS_LEAF(mp)) {
|
7704 | 8011 |
if (F_ISSET(node->mn_flags, F_BIGDATA))
|
|
7718 | 8025 |
sepkey.mv_size = newkey->mv_size;
|
7719 | 8026 |
sepkey.mv_data = newkey->mv_data;
|
7720 | 8027 |
} else {
|
7721 | |
node = (MDB_node *)((char *)mp + copy->mp_ptrs[split_indx]);
|
|
8028 |
node = (MDB_node *)((char *)mp + copy->mp_ptrs[split_indx] + PAGEBASE);
|
7722 | 8029 |
sepkey.mv_size = node->mn_ksize;
|
7723 | 8030 |
sepkey.mv_data = NODEKEY(node);
|
7724 | 8031 |
}
|
|
7734 | 8041 |
mn.mc_top--;
|
7735 | 8042 |
did_split = 1;
|
7736 | 8043 |
rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0);
|
|
8044 |
if (rc)
|
|
8045 |
goto done;
|
7737 | 8046 |
|
7738 | 8047 |
/* root split? */
|
7739 | 8048 |
if (mn.mc_snum == mc->mc_snum) {
|
|
7755 | 8064 |
mc->mc_ki[i] = mn.mc_ki[i];
|
7756 | 8065 |
}
|
7757 | 8066 |
mc->mc_pg[ptop] = mn.mc_pg[ptop];
|
7758 | |
mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1;
|
|
8067 |
if (mn.mc_ki[ptop]) {
|
|
8068 |
mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1;
|
|
8069 |
} else {
|
|
8070 |
/* find right page's left sibling */
|
|
8071 |
mc->mc_ki[ptop] = mn.mc_ki[ptop];
|
|
8072 |
mdb_cursor_sibling(mc, 0);
|
|
8073 |
}
|
7759 | 8074 |
}
|
7760 | 8075 |
} else {
|
7761 | 8076 |
mn.mc_top--;
|
|
7764 | 8079 |
}
|
7765 | 8080 |
mc->mc_flags ^= C_SPLITTING;
|
7766 | 8081 |
if (rc != MDB_SUCCESS) {
|
7767 | |
return rc;
|
|
8082 |
goto done;
|
7768 | 8083 |
}
|
7769 | 8084 |
if (nflags & MDB_APPEND) {
|
7770 | 8085 |
mc->mc_pg[mc->mc_top] = rp;
|
7771 | 8086 |
mc->mc_ki[mc->mc_top] = 0;
|
7772 | 8087 |
rc = mdb_node_add(mc, 0, newkey, newdata, newpgno, nflags);
|
7773 | 8088 |
if (rc)
|
7774 | |
return rc;
|
|
8089 |
goto done;
|
7775 | 8090 |
for (i=0; i<mc->mc_top; i++)
|
7776 | 8091 |
mc->mc_ki[i] = mn.mc_ki[i];
|
7777 | 8092 |
} else if (!IS_LEAF2(mp)) {
|
|
7791 | 8106 |
/* Update index for the new key. */
|
7792 | 8107 |
mc->mc_ki[mc->mc_top] = j;
|
7793 | 8108 |
} else {
|
7794 | |
node = (MDB_node *)((char *)mp + copy->mp_ptrs[i]);
|
|
8109 |
node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE);
|
7795 | 8110 |
rkey.mv_data = NODEKEY(node);
|
7796 | 8111 |
rkey.mv_size = node->mn_ksize;
|
7797 | 8112 |
if (IS_LEAF(mp)) {
|
|
7809 | 8124 |
}
|
7810 | 8125 |
|
7811 | 8126 |
rc = mdb_node_add(mc, j, &rkey, rdata, pgno, flags);
|
7812 | |
if (rc) {
|
7813 | |
/* return tmp page to freelist */
|
7814 | |
mdb_page_free(env, copy);
|
7815 | |
return rc;
|
7816 | |
}
|
|
8127 |
if (rc)
|
|
8128 |
goto done;
|
7817 | 8129 |
if (i == nkeys) {
|
7818 | 8130 |
i = 0;
|
7819 | 8131 |
j = 0;
|
|
7830 | 8142 |
mp->mp_lower = copy->mp_lower;
|
7831 | 8143 |
mp->mp_upper = copy->mp_upper;
|
7832 | 8144 |
memcpy(NODEPTR(mp, nkeys-1), NODEPTR(copy, nkeys-1),
|
7833 | |
env->me_psize - copy->mp_upper);
|
|
8145 |
env->me_psize - copy->mp_upper - PAGEBASE);
|
7834 | 8146 |
|
7835 | 8147 |
/* reset back to original page */
|
7836 | 8148 |
if (newindx < split_indx) {
|
|
7847 | 8159 |
*/
|
7848 | 8160 |
if (mn.mc_pg[ptop] != mc->mc_pg[ptop] &&
|
7849 | 8161 |
mc->mc_ki[ptop] >= NUMKEYS(mc->mc_pg[ptop])) {
|
7850 | |
for (i=0; i<ptop; i++) {
|
|
8162 |
for (i=0; i<=ptop; i++) {
|
7851 | 8163 |
mc->mc_pg[i] = mn.mc_pg[i];
|
7852 | 8164 |
mc->mc_ki[i] = mn.mc_ki[i];
|
7853 | 8165 |
}
|
7854 | |
mc->mc_pg[ptop] = mn.mc_pg[ptop];
|
7855 | |
mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1;
|
7856 | |
}
|
7857 | |
}
|
7858 | |
/* return tmp page to freelist */
|
7859 | |
mdb_page_free(env, copy);
|
|
8166 |
}
|
|
8167 |
}
|
7860 | 8168 |
}
|
7861 | 8169 |
|
7862 | 8170 |
{
|
|
7907 | 8215 |
}
|
7908 | 8216 |
}
|
7909 | 8217 |
DPRINTF(("mp left: %d, rp left: %d", SIZELEFT(mp), SIZELEFT(rp)));
|
|
8218 |
|
|
8219 |
done:
|
|
8220 |
if (copy) /* tmp page */
|
|
8221 |
mdb_page_free(env, copy);
|
|
8222 |
if (rc)
|
|
8223 |
mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
|
7910 | 8224 |
return rc;
|
7911 | 8225 |
}
|
7912 | 8226 |
|
|
7917 | 8231 |
MDB_cursor mc;
|
7918 | 8232 |
MDB_xcursor mx;
|
7919 | 8233 |
|
7920 | |
if (key == NULL || data == NULL)
|
7921 | |
return EINVAL;
|
7922 | |
|
7923 | |
if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
|
|
8234 |
if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
|
7924 | 8235 |
return EINVAL;
|
7925 | 8236 |
|
7926 | 8237 |
if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags)
|
|
7930 | 8241 |
return mdb_cursor_put(&mc, key, data, flags);
|
7931 | 8242 |
}
|
7932 | 8243 |
|
7933 | |
int
|
|
8244 |
#ifndef MDB_WBUF
|
|
8245 |
#define MDB_WBUF (1024*1024)
|
|
8246 |
#endif
|
|
8247 |
|
|
8248 |
/** State needed for a compacting copy. */
|
|
8249 |
typedef struct mdb_copy {
|
|
8250 |
pthread_mutex_t mc_mutex;
|
|
8251 |
pthread_cond_t mc_cond;
|
|
8252 |
char *mc_wbuf[2];
|
|
8253 |
char *mc_over[2];
|
|
8254 |
MDB_env *mc_env;
|
|
8255 |
MDB_txn *mc_txn;
|
|
8256 |
int mc_wlen[2];
|
|
8257 |
int mc_olen[2];
|
|
8258 |
pgno_t mc_next_pgno;
|
|
8259 |
HANDLE mc_fd;
|
|
8260 |
int mc_status;
|
|
8261 |
volatile int mc_new;
|
|
8262 |
int mc_toggle;
|
|
8263 |
|
|
8264 |
} mdb_copy;
|
|
8265 |
|
|
8266 |
/** Dedicated writer thread for compacting copy. */
|
|
8267 |
static THREAD_RET ESECT
|
|
8268 |
mdb_env_copythr(void *arg)
|
|
8269 |
{
|
|
8270 |
mdb_copy *my = arg;
|
|
8271 |
char *ptr;
|
|
8272 |
int toggle = 0, wsize, rc;
|
|
8273 |
#ifdef _WIN32
|
|
8274 |
DWORD len;
|
|
8275 |
#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL)
|
|
8276 |
#else
|
|
8277 |
int len;
|
|
8278 |
#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0)
|
|
8279 |
#endif
|
|
8280 |
|
|
8281 |
pthread_mutex_lock(&my->mc_mutex);
|
|
8282 |
my->mc_new = 0;
|
|
8283 |
pthread_cond_signal(&my->mc_cond);
|
|
8284 |
for(;;) {
|
|
8285 |
while (!my->mc_new)
|
|
8286 |
pthread_cond_wait(&my->mc_cond, &my->mc_mutex);
|
|
8287 |
if (my->mc_new < 0) {
|
|
8288 |
my->mc_new = 0;
|
|
8289 |
break;
|
|
8290 |
}
|
|
8291 |
my->mc_new = 0;
|
|
8292 |
wsize = my->mc_wlen[toggle];
|
|
8293 |
ptr = my->mc_wbuf[toggle];
|
|
8294 |
again:
|
|
8295 |
while (wsize > 0) {
|
|
8296 |
DO_WRITE(rc, my->mc_fd, ptr, wsize, len);
|
|
8297 |
if (!rc) {
|
|
8298 |
rc = ErrCode();
|
|
8299 |
break;
|
|
8300 |
} else if (len > 0) {
|
|
8301 |
rc = MDB_SUCCESS;
|
|
8302 |
ptr += len;
|
|
8303 |
wsize -= len;
|
|
8304 |
continue;
|
|
8305 |
} else {
|
|
8306 |
rc = EIO;
|
|
8307 |
break;
|
|
8308 |
}
|
|
8309 |
}
|
|
8310 |
if (rc) {
|
|
8311 |
my->mc_status = rc;
|
|
8312 |
break;
|
|
8313 |
}
|
|
8314 |
/* If there's an overflow page tail, write it too */
|
|
8315 |
if (my->mc_olen[toggle]) {
|
|
8316 |
wsize = my->mc_olen[toggle];
|
|
8317 |
ptr = my->mc_over[toggle];
|
|
8318 |
my->mc_olen[toggle] = 0;
|
|
8319 |
goto again;
|
|
8320 |
}
|
|
8321 |
my->mc_wlen[toggle] = 0;
|
|
8322 |
toggle ^= 1;
|
|
8323 |
pthread_cond_signal(&my->mc_cond);
|
|
8324 |
}
|
|
8325 |
pthread_cond_signal(&my->mc_cond);
|
|
8326 |
pthread_mutex_unlock(&my->mc_mutex);
|
|
8327 |
return (THREAD_RET)0;
|
|
8328 |
#undef DO_WRITE
|
|
8329 |
}
|
|
8330 |
|
|
8331 |
/** Tell the writer thread there's a buffer ready to write */
|
|
8332 |
static int ESECT
|
|
8333 |
mdb_env_cthr_toggle(mdb_copy *my, int st)
|
|
8334 |
{
|
|
8335 |
int toggle = my->mc_toggle ^ 1;
|
|
8336 |
pthread_mutex_lock(&my->mc_mutex);
|
|
8337 |
if (my->mc_status) {
|
|
8338 |
pthread_mutex_unlock(&my->mc_mutex);
|
|
8339 |
return my->mc_status;
|
|
8340 |
}
|
|
8341 |
while (my->mc_new == 1)
|
|
8342 |
pthread_cond_wait(&my->mc_cond, &my->mc_mutex);
|
|
8343 |
my->mc_new = st;
|
|
8344 |
my->mc_toggle = toggle;
|
|
8345 |
pthread_cond_signal(&my->mc_cond);
|
|
8346 |
pthread_mutex_unlock(&my->mc_mutex);
|
|
8347 |
return 0;
|
|
8348 |
}
|
|
8349 |
|
|
8350 |
/** Depth-first tree traversal for compacting copy. */
|
|
8351 |
static int ESECT
|
|
8352 |
mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags)
|
|
8353 |
{
|
|
8354 |
MDB_cursor mc;
|
|
8355 |
MDB_txn *txn = my->mc_txn;
|
|
8356 |
MDB_node *ni;
|
|
8357 |
MDB_page *mo, *mp, *leaf;
|
|
8358 |
char *buf, *ptr;
|
|
8359 |
int rc, toggle;
|
|
8360 |
unsigned int i;
|
|
8361 |
|
|
8362 |
/* Empty DB, nothing to do */
|
|
8363 |
if (*pg == P_INVALID)
|
|
8364 |
return MDB_SUCCESS;
|
|
8365 |
|
|
8366 |
mc.mc_snum = 1;
|
|
8367 |
mc.mc_top = 0;
|
|
8368 |
mc.mc_txn = txn;
|
|
8369 |
|
|
8370 |
rc = mdb_page_get(my->mc_txn, *pg, &mc.mc_pg[0], NULL);
|
|
8371 |
if (rc)
|
|
8372 |
return rc;
|
|
8373 |
rc = mdb_page_search_root(&mc, NULL, MDB_PS_FIRST);
|
|
8374 |
if (rc)
|
|
8375 |
return rc;
|
|
8376 |
|
|
8377 |
/* Make cursor pages writable */
|
|
8378 |
buf = ptr = malloc(my->mc_env->me_psize * mc.mc_snum);
|
|
8379 |
if (buf == NULL)
|
|
8380 |
return ENOMEM;
|
|
8381 |
|
|
8382 |
for (i=0; i<mc.mc_top; i++) {
|
|
8383 |
mdb_page_copy((MDB_page *)ptr, mc.mc_pg[i], my->mc_env->me_psize);
|
|
8384 |
mc.mc_pg[i] = (MDB_page *)ptr;
|
|
8385 |
ptr += my->mc_env->me_psize;
|
|
8386 |
}
|
|
8387 |
|
|
8388 |
/* This is writable space for a leaf page. Usually not needed. */
|
|
8389 |
leaf = (MDB_page *)ptr;
|
|
8390 |
|
|
8391 |
toggle = my->mc_toggle;
|
|
8392 |
while (mc.mc_snum > 0) {
|
|
8393 |
unsigned n;
|
|
8394 |
mp = mc.mc_pg[mc.mc_top];
|
|
8395 |
n = NUMKEYS(mp);
|
|
8396 |
|
|
8397 |
if (IS_LEAF(mp)) {
|
|
8398 |
if (!IS_LEAF2(mp) && !(flags & F_DUPDATA)) {
|
|
8399 |
for (i=0; i<n; i++) {
|
|
8400 |
ni = NODEPTR(mp, i);
|
|
8401 |
if (ni->mn_flags & F_BIGDATA) {
|
|
8402 |
MDB_page *omp;
|
|
8403 |
pgno_t pg;
|
|
8404 |
|
|
8405 |
/* Need writable leaf */
|
|
8406 |
if (mp != leaf) {
|
|
8407 |
mc.mc_pg[mc.mc_top] = leaf;
|
|
8408 |
mdb_page_copy(leaf, mp, my->mc_env->me_psize);
|
|
8409 |
mp = leaf;
|
|
8410 |
ni = NODEPTR(mp, i);
|
|
8411 |
}
|
|
8412 |
|
|
8413 |
memcpy(&pg, NODEDATA(ni), sizeof(pg));
|
|
8414 |
rc = mdb_page_get(txn, pg, &omp, NULL);
|
|
8415 |
if (rc)
|
|
8416 |
goto done;
|
|
8417 |
if (my->mc_wlen[toggle] >= MDB_WBUF) {
|
|
8418 |
rc = mdb_env_cthr_toggle(my, 1);
|
|
8419 |
if (rc)
|
|
8420 |
goto done;
|
|
8421 |
toggle = my->mc_toggle;
|
|
8422 |
}
|
|
8423 |
mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]);
|
|
8424 |
memcpy(mo, omp, my->mc_env->me_psize);
|
|
8425 |
mo->mp_pgno = my->mc_next_pgno;
|
|
8426 |
my->mc_next_pgno += omp->mp_pages;
|
|
8427 |
my->mc_wlen[toggle] += my->mc_env->me_psize;
|
|
8428 |
if (omp->mp_pages > 1) {
|
|
8429 |
my->mc_olen[toggle] = my->mc_env->me_psize * (omp->mp_pages - 1);
|
|
8430 |
my->mc_over[toggle] = (char *)omp + my->mc_env->me_psize;
|
|
8431 |
rc = mdb_env_cthr_toggle(my, 1);
|
|
8432 |
if (rc)
|
|
8433 |
goto done;
|
|
8434 |
toggle = my->mc_toggle;
|
|
8435 |
}
|
|
8436 |
memcpy(NODEDATA(ni), &mo->mp_pgno, sizeof(pgno_t));
|
|
8437 |
} else if (ni->mn_flags & F_SUBDATA) {
|
|
8438 |
MDB_db db;
|
|
8439 |
|
|
8440 |
/* Need writable leaf */
|
|
8441 |
if (mp != leaf) {
|
|
8442 |
mc.mc_pg[mc.mc_top] = leaf;
|
|
8443 |
mdb_page_copy(leaf, mp, my->mc_env->me_psize);
|
|
8444 |
mp = leaf;
|
|
8445 |
ni = NODEPTR(mp, i);
|
|
8446 |
}
|
|
8447 |
|
|
8448 |
memcpy(&db, NODEDATA(ni), sizeof(db));
|
|
8449 |
my->mc_toggle = toggle;
|
|
8450 |
rc = mdb_env_cwalk(my, &db.md_root, ni->mn_flags & F_DUPDATA);
|
|
8451 |
if (rc)
|
|
8452 |
goto done;
|
|
8453 |
toggle = my->mc_toggle;
|
|
8454 |
memcpy(NODEDATA(ni), &db, sizeof(db));
|
|
8455 |
}
|
|
8456 |
}
|
|
8457 |
}
|
|
8458 |
} else {
|
|
8459 |
mc.mc_ki[mc.mc_top]++;
|
|
8460 |
if (mc.mc_ki[mc.mc_top] < n) {
|
|
8461 |
pgno_t pg;
|
|
8462 |
again:
|
|
8463 |
ni = NODEPTR(mp, mc.mc_ki[mc.mc_top]);
|
|
8464 |
pg = NODEPGNO(ni);
|
|
8465 |
rc = mdb_page_get(txn, pg, &mp, NULL);
|
|
8466 |
if (rc)
|
|
8467 |
goto done;
|
|
8468 |
mc.mc_top++;
|
|
8469 |
mc.mc_snum++;
|
|
8470 |
mc.mc_ki[mc.mc_top] = 0;
|
|
8471 |
if (IS_BRANCH(mp)) {
|
|
8472 |
/* Whenever we advance to a sibling branch page,
|
|
8473 |
* we must proceed all the way down to its first leaf.
|
|
8474 |
*/
|
|
8475 |
mdb_page_copy(mc.mc_pg[mc.mc_top], mp, my->mc_env->me_psize);
|
|
8476 |
goto again;
|
|
8477 |
} else
|
|
8478 |
mc.mc_pg[mc.mc_top] = mp;
|
|
8479 |
continue;
|
|
8480 |
}
|
|
8481 |
}
|
|
8482 |
if (my->mc_wlen[toggle] >= MDB_WBUF) {
|
|
8483 |
rc = mdb_env_cthr_toggle(my, 1);
|
|
8484 |
if (rc)
|
|
8485 |
goto done;
|
|
8486 |
toggle = my->mc_toggle;
|
|
8487 |
}
|
|
8488 |
mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]);
|
|
8489 |
mdb_page_copy(mo, mp, my->mc_env->me_psize);
|
|
8490 |
mo->mp_pgno = my->mc_next_pgno++;
|
|
8491 |
my->mc_wlen[toggle] += my->mc_env->me_psize;
|
|
8492 |
if (mc.mc_top) {
|
|
8493 |
/* Update parent if there is one */
|
|
8494 |
ni = NODEPTR(mc.mc_pg[mc.mc_top-1], mc.mc_ki[mc.mc_top-1]);
|
|
8495 |
SETPGNO(ni, mo->mp_pgno);
|
|
8496 |
mdb_cursor_pop(&mc);
|
|
8497 |
} else {
|
|
8498 |
/* Otherwise we're done */
|
|
8499 |
*pg = mo->mp_pgno;
|
|
8500 |
break;
|
|
8501 |
}
|
|
8502 |
}
|
|
8503 |
done:
|
|
8504 |
free(buf);
|
|
8505 |
return rc;
|
|
8506 |
}
|
|
8507 |
|
|
8508 |
/** Copy environment with compaction. */
|
|
8509 |
static int ESECT
|
|
8510 |
mdb_env_copyfd1(MDB_env *env, HANDLE fd)
|
|
8511 |
{
|
|
8512 |
MDB_meta *mm;
|
|
8513 |
MDB_page *mp;
|
|
8514 |
mdb_copy my;
|
|
8515 |
MDB_txn *txn = NULL;
|
|
8516 |
pthread_t thr;
|
|
8517 |
int rc;
|
|
8518 |
|
|
8519 |
#ifdef _WIN32
|
|
8520 |
my.mc_mutex = CreateMutex(NULL, FALSE, NULL);
|
|
8521 |
my.mc_cond = CreateEvent(NULL, FALSE, FALSE, NULL);
|
|
8522 |
my.mc_wbuf[0] = _aligned_malloc(MDB_WBUF*2, env->me_os_psize);
|
|
8523 |
if (my.mc_wbuf[0] == NULL)
|
|
8524 |
return errno;
|
|
8525 |
#else
|
|
8526 |
pthread_mutex_init(&my.mc_mutex, NULL);
|
|
8527 |
pthread_cond_init(&my.mc_cond, NULL);
|
|
8528 |
#ifdef HAVE_MEMALIGN
|
|
8529 |
my.mc_wbuf[0] = memalign(env->me_os_psize, MDB_WBUF*2);
|
|
8530 |
if (my.mc_wbuf[0] == NULL)
|
|
8531 |
return errno;
|
|
8532 |
#else
|
|
8533 |
rc = posix_memalign((void **)&my.mc_wbuf[0], env->me_os_psize, MDB_WBUF*2);
|
|
8534 |
if (rc)
|
|
8535 |
return rc;
|
|
8536 |
#endif
|
|
8537 |
#endif
|
|
8538 |
memset(my.mc_wbuf[0], 0, MDB_WBUF*2);
|
|
8539 |
my.mc_wbuf[1] = my.mc_wbuf[0] + MDB_WBUF;
|
|
8540 |
my.mc_wlen[0] = 0;
|
|
8541 |
my.mc_wlen[1] = 0;
|
|
8542 |
my.mc_olen[0] = 0;
|
|
8543 |
my.mc_olen[1] = 0;
|
|
8544 |
my.mc_next_pgno = 2;
|
|
8545 |
my.mc_status = 0;
|
|
8546 |
my.mc_new = 1;
|
|
8547 |
my.mc_toggle = 0;
|
|
8548 |
my.mc_env = env;
|
|
8549 |
my.mc_fd = fd;
|
|
8550 |
THREAD_CREATE(thr, mdb_env_copythr, &my);
|
|
8551 |
|
|
8552 |
rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
|
|
8553 |
if (rc)
|
|
8554 |
return rc;
|
|
8555 |
|
|
8556 |
mp = (MDB_page *)my.mc_wbuf[0];
|
|
8557 |
memset(mp, 0, 2*env->me_psize);
|
|
8558 |
mp->mp_pgno = 0;
|
|
8559 |
mp->mp_flags = P_META;
|
|
8560 |
mm = (MDB_meta *)METADATA(mp);
|
|
8561 |
mdb_env_init_meta0(env, mm);
|
|
8562 |
mm->mm_address = env->me_metas[0]->mm_address;
|
|
8563 |
|
|
8564 |
mp = (MDB_page *)(my.mc_wbuf[0] + env->me_psize);
|
|
8565 |
mp->mp_pgno = 1;
|
|
8566 |
mp->mp_flags = P_META;
|
|
8567 |
*(MDB_meta *)METADATA(mp) = *mm;
|
|
8568 |
mm = (MDB_meta *)METADATA(mp);
|
|
8569 |
|
|
8570 |
/* Count the number of free pages, subtract from lastpg to find
|
|
8571 |
* number of active pages
|
|
8572 |
*/
|
|
8573 |
{
|
|
8574 |
MDB_ID freecount = 0;
|
|
8575 |
MDB_cursor mc;
|
|
8576 |
MDB_val key, data;
|
|
8577 |
mdb_cursor_init(&mc, txn, FREE_DBI, NULL);
|
|
8578 |
while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0)
|
|
8579 |
freecount += *(MDB_ID *)data.mv_data;
|
|
8580 |
freecount += txn->mt_dbs[0].md_branch_pages +
|
|
8581 |
txn->mt_dbs[0].md_leaf_pages +
|
|
8582 |
txn->mt_dbs[0].md_overflow_pages;
|
|
8583 |
|
|
8584 |
/* Set metapage 1 */
|
|
8585 |
mm->mm_last_pg = txn->mt_next_pgno - freecount - 1;
|
|
8586 |
mm->mm_dbs[1] = txn->mt_dbs[1];
|
|
8587 |
mm->mm_dbs[1].md_root = mm->mm_last_pg;
|
|
8588 |
mm->mm_txnid = 1;
|
|
8589 |
}
|
|
8590 |
my.mc_wlen[0] = env->me_psize * 2;
|
|
8591 |
my.mc_txn = txn;
|
|
8592 |
pthread_mutex_lock(&my.mc_mutex);
|
|
8593 |
while(my.mc_new)
|
|
8594 |
pthread_cond_wait(&my.mc_cond, &my.mc_mutex);
|
|
8595 |
pthread_mutex_unlock(&my.mc_mutex);
|
|
8596 |
rc = mdb_env_cwalk(&my, &txn->mt_dbs[1].md_root, 0);
|
|
8597 |
if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle])
|
|
8598 |
rc = mdb_env_cthr_toggle(&my, 1);
|
|
8599 |
mdb_env_cthr_toggle(&my, -1);
|
|
8600 |
pthread_mutex_lock(&my.mc_mutex);
|
|
8601 |
while(my.mc_new)
|
|
8602 |
pthread_cond_wait(&my.mc_cond, &my.mc_mutex);
|
|
8603 |
pthread_mutex_unlock(&my.mc_mutex);
|
|
8604 |
THREAD_FINISH(thr);
|
|
8605 |
|
|
8606 |
mdb_txn_abort(txn);
|
|
8607 |
#ifdef _WIN32
|
|
8608 |
CloseHandle(my.mc_cond);
|
|
8609 |
CloseHandle(my.mc_mutex);
|
|
8610 |
_aligned_free(my.mc_wbuf[0]);
|
|
8611 |
#else
|
|
8612 |
pthread_cond_destroy(&my.mc_cond);
|
|
8613 |
pthread_mutex_destroy(&my.mc_mutex);
|
|
8614 |
free(my.mc_wbuf[0]);
|
|
8615 |
#endif
|
|
8616 |
return rc;
|
|
8617 |
}
|
|
8618 |
|
|
8619 |
/** Copy environment as-is. */
|
|
8620 |
static int ESECT
|
|
8621 |
mdb_env_copyfd0(MDB_env *env, HANDLE fd)
|
|
8622 |
{
|
|
8623 |
MDB_txn *txn = NULL;
|
|
8624 |
int rc;
|
|
8625 |
size_t wsize;
|
|
8626 |
char *ptr;
|
|
8627 |
#ifdef _WIN32
|
|
8628 |
DWORD len, w2;
|
|
8629 |
#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL)
|
|
8630 |
#else
|
|
8631 |
ssize_t len;
|
|
8632 |
size_t w2;
|
|
8633 |
#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0)
|
|
8634 |
#endif
|
|
8635 |
|
|
8636 |
/* Do the lock/unlock of the reader mutex before starting the
|
|
8637 |
* write txn. Otherwise other read txns could block writers.
|
|
8638 |
*/
|
|
8639 |
rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
|
|
8640 |
if (rc)
|
|
8641 |
return rc;
|
|
8642 |
|
|
8643 |
if (env->me_txns) {
|
|
8644 |
/* We must start the actual read txn after blocking writers */
|
|
8645 |
mdb_txn_reset0(txn, "reset-stage1");
|
|
8646 |
|
|
8647 |
/* Temporarily block writers until we snapshot the meta pages */
|
|
8648 |
LOCK_MUTEX_W(env);
|
|
8649 |
|
|
8650 |
rc = mdb_txn_renew0(txn);
|
|
8651 |
if (rc) {
|
|
8652 |
UNLOCK_MUTEX_W(env);
|
|
8653 |
goto leave;
|
|
8654 |
}
|
|
8655 |
}
|
|
8656 |
|
|
8657 |
wsize = env->me_psize * 2;
|
|
8658 |
ptr = env->me_map;
|
|
8659 |
w2 = wsize;
|
|
8660 |
while (w2 > 0) {
|
|
8661 |
DO_WRITE(rc, fd, ptr, w2, len);
|
|
8662 |
if (!rc) {
|
|
8663 |
rc = ErrCode();
|
|
8664 |
break;
|
|
8665 |
} else if (len > 0) {
|
|
8666 |
rc = MDB_SUCCESS;
|
|
8667 |
ptr += len;
|
|
8668 |
w2 -= len;
|
|
8669 |
continue;
|
|
8670 |
} else {
|
|
8671 |
/* Non-blocking or async handles are not supported */
|
|
8672 |
rc = EIO;
|
|
8673 |
break;
|
|
8674 |
}
|
|
8675 |
}
|
|
8676 |
if (env->me_txns)
|
|
8677 |
UNLOCK_MUTEX_W(env);
|
|
8678 |
|
|
8679 |
if (rc)
|
|
8680 |
goto leave;
|
|
8681 |
|
|
8682 |
w2 = txn->mt_next_pgno * env->me_psize;
|
|
8683 |
#ifdef WIN32
|
|
8684 |
{
|
|
8685 |
LARGE_INTEGER fsize;
|
|
8686 |
GetFileSizeEx(env->me_fd, &fsize);
|
|
8687 |
if (w2 > fsize.QuadPart)
|
|
8688 |
w2 = fsize.QuadPart;
|
|
8689 |
}
|
|
8690 |
#else
|
|
8691 |
{
|
|
8692 |
struct stat st;
|
|
8693 |
fstat(env->me_fd, &st);
|
|
8694 |
if (w2 > (size_t)st.st_size)
|
|
8695 |
w2 = st.st_size;
|
|
8696 |
}
|
|
8697 |
#endif
|
|
8698 |
wsize = w2 - wsize;
|
|
8699 |
while (wsize > 0) {
|
|
8700 |
if (wsize > MAX_WRITE)
|
|
8701 |
w2 = MAX_WRITE;
|
|
8702 |
else
|
|
8703 |
w2 = wsize;
|
|
8704 |
DO_WRITE(rc, fd, ptr, w2, len);
|
|
8705 |
if (!rc) {
|
|
8706 |
rc = ErrCode();
|
|
8707 |
break;
|
|
8708 |
} else if (len > 0) {
|
|
8709 |
rc = MDB_SUCCESS;
|
|
8710 |
ptr += len;
|
|
8711 |
wsize -= len;
|
|
8712 |
continue;
|
|
8713 |
} else {
|
|
8714 |
rc = EIO;
|
|
8715 |
break;
|
|
8716 |
}
|
|
8717 |
}
|
|
8718 |
|
|
8719 |
leave:
|
|
8720 |
mdb_txn_abort(txn);
|
|
8721 |
return rc;
|
|
8722 |
}
|
|
8723 |
|
|
8724 |
int ESECT
|
|
8725 |
mdb_env_copyfd2(MDB_env *env, HANDLE fd, unsigned int flags)
|
|
8726 |
{
|
|
8727 |
if (flags & MDB_CP_COMPACT)
|
|
8728 |
return mdb_env_copyfd1(env, fd);
|
|
8729 |
else
|
|
8730 |
return mdb_env_copyfd0(env, fd);
|
|
8731 |
}
|
|
8732 |
|
|
8733 |
int ESECT
|
|
8734 |
mdb_env_copyfd(MDB_env *env, HANDLE fd)
|
|
8735 |
{
|
|
8736 |
return mdb_env_copyfd2(env, fd, 0);
|
|
8737 |
}
|
|
8738 |
|
|
8739 |
int ESECT
|
|
8740 |
mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags)
|
|
8741 |
{
|
|
8742 |
int rc, len;
|
|
8743 |
char *lpath;
|
|
8744 |
HANDLE newfd = INVALID_HANDLE_VALUE;
|
|
8745 |
|
|
8746 |
if (env->me_flags & MDB_NOSUBDIR) {
|
|
8747 |
lpath = (char *)path;
|
|
8748 |
} else {
|
|
8749 |
len = strlen(path);
|
|
8750 |
len += sizeof(DATANAME);
|
|
8751 |
lpath = malloc(len);
|
|
8752 |
if (!lpath)
|
|
8753 |
return ENOMEM;
|
|
8754 |
sprintf(lpath, "%s" DATANAME, path);
|
|
8755 |
}
|
|
8756 |
|
|
8757 |
/* The destination path must exist, but the destination file must not.
|
|
8758 |
* We don't want the OS to cache the writes, since the source data is
|
|
8759 |
* already in the OS cache.
|
|
8760 |
*/
|
|
8761 |
#ifdef _WIN32
|
|
8762 |
newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW,
|
|
8763 |
FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL);
|
|
8764 |
#else
|
|
8765 |
newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666);
|
|
8766 |
#endif
|
|
8767 |
if (newfd == INVALID_HANDLE_VALUE) {
|
|
8768 |
rc = ErrCode();
|
|
8769 |
goto leave;
|
|
8770 |
}
|
|
8771 |
|
|
8772 |
if (env->me_psize >= env->me_os_psize) {
|
|
8773 |
#ifdef O_DIRECT
|
|
8774 |
/* Set O_DIRECT if the file system supports it */
|
|
8775 |
if ((rc = fcntl(newfd, F_GETFL)) != -1)
|
|
8776 |
(void) fcntl(newfd, F_SETFL, rc | O_DIRECT);
|
|
8777 |
#endif
|
|
8778 |
#ifdef F_NOCACHE /* __APPLE__ */
|
|
8779 |
rc = fcntl(newfd, F_NOCACHE, 1);
|
|
8780 |
if (rc) {
|
|
8781 |
rc = ErrCode();
|
|
8782 |
goto leave;
|
|
8783 |
}
|
|
8784 |
#endif
|
|
8785 |
}
|
|
8786 |
|
|
8787 |
rc = mdb_env_copyfd2(env, newfd, flags);
|
|
8788 |
|
|
8789 |
leave:
|
|
8790 |
if (!(env->me_flags & MDB_NOSUBDIR))
|
|
8791 |
free(lpath);
|
|
8792 |
if (newfd != INVALID_HANDLE_VALUE)
|
|
8793 |
if (close(newfd) < 0 && rc == MDB_SUCCESS)
|
|
8794 |
rc = ErrCode();
|
|
8795 |
|
|
8796 |
return rc;
|
|
8797 |
}
|
|
8798 |
|
|
8799 |
int ESECT
|
|
8800 |
mdb_env_copy(MDB_env *env, const char *path)
|
|
8801 |
{
|
|
8802 |
return mdb_env_copy2(env, path, 0);
|
|
8803 |
}
|
|
8804 |
|
|
8805 |
int ESECT
|
7934 | 8806 |
mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff)
|
7935 | 8807 |
{
|
7936 | 8808 |
if ((flag & CHANGEABLE) != flag)
|
|
7942 | 8814 |
return MDB_SUCCESS;
|
7943 | 8815 |
}
|
7944 | 8816 |
|
7945 | |
int
|
|
8817 |
int ESECT
|
7946 | 8818 |
mdb_env_get_flags(MDB_env *env, unsigned int *arg)
|
7947 | 8819 |
{
|
7948 | 8820 |
if (!env || !arg)
|
|
7952 | 8824 |
return MDB_SUCCESS;
|
7953 | 8825 |
}
|
7954 | 8826 |
|
7955 | |
int
|
|
8827 |
int ESECT
|
7956 | 8828 |
mdb_env_set_userctx(MDB_env *env, void *ctx)
|
7957 | 8829 |
{
|
7958 | 8830 |
if (!env)
|
|
7961 | 8833 |
return MDB_SUCCESS;
|
7962 | 8834 |
}
|
7963 | 8835 |
|
7964 | |
void *
|
|
8836 |
void * ESECT
|
7965 | 8837 |
mdb_env_get_userctx(MDB_env *env)
|
7966 | 8838 |
{
|
7967 | 8839 |
return env ? env->me_userctx : NULL;
|
7968 | 8840 |
}
|
7969 | 8841 |
|
7970 | |
int
|
|
8842 |
int ESECT
|
7971 | 8843 |
mdb_env_set_assert(MDB_env *env, MDB_assert_func *func)
|
7972 | 8844 |
{
|
7973 | 8845 |
if (!env)
|
|
7978 | 8850 |
return MDB_SUCCESS;
|
7979 | 8851 |
}
|
7980 | 8852 |
|
7981 | |
int
|
|
8853 |
int ESECT
|
7982 | 8854 |
mdb_env_get_path(MDB_env *env, const char **arg)
|
7983 | 8855 |
{
|
7984 | 8856 |
if (!env || !arg)
|
|
7988 | 8860 |
return MDB_SUCCESS;
|
7989 | 8861 |
}
|
7990 | 8862 |
|
7991 | |
int
|
|
8863 |
int ESECT
|
7992 | 8864 |
mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *arg)
|
7993 | 8865 |
{
|
7994 | 8866 |
if (!env || !arg)
|
|
8004 | 8876 |
* @param[out] arg the address of an #MDB_stat structure to receive the stats.
|
8005 | 8877 |
* @return 0, this function always succeeds.
|
8006 | 8878 |
*/
|
8007 | |
static int
|
|
8879 |
static int ESECT
|
8008 | 8880 |
mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg)
|
8009 | 8881 |
{
|
8010 | 8882 |
arg->ms_psize = env->me_psize;
|
|
8016 | 8888 |
|
8017 | 8889 |
return MDB_SUCCESS;
|
8018 | 8890 |
}
|
8019 | |
int
|
|
8891 |
|
|
8892 |
int ESECT
|
8020 | 8893 |
mdb_env_stat(MDB_env *env, MDB_stat *arg)
|
8021 | 8894 |
{
|
8022 | 8895 |
int toggle;
|
|
8029 | 8902 |
return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], arg);
|
8030 | 8903 |
}
|
8031 | 8904 |
|
8032 | |
int
|
|
8905 |
int ESECT
|
8033 | 8906 |
mdb_env_info(MDB_env *env, MDB_envinfo *arg)
|
8034 | 8907 |
{
|
8035 | 8908 |
int toggle;
|
|
8038 | 8911 |
return EINVAL;
|
8039 | 8912 |
|
8040 | 8913 |
toggle = mdb_env_pick_meta(env);
|
8041 | |
arg->me_mapaddr = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : 0;
|
|
8914 |
arg->me_mapaddr = env->me_metas[toggle]->mm_address;
|
8042 | 8915 |
arg->me_mapsize = env->me_mapsize;
|
8043 | 8916 |
arg->me_maxreaders = env->me_maxreaders;
|
8044 | 8917 |
|
|
8080 | 8953 |
MDB_val key, data;
|
8081 | 8954 |
MDB_dbi i;
|
8082 | 8955 |
MDB_cursor mc;
|
|
8956 |
MDB_db dummy;
|
8083 | 8957 |
int rc, dbflag, exact;
|
8084 | |
unsigned int unused = 0;
|
|
8958 |
unsigned int unused = 0, seq;
|
8085 | 8959 |
size_t len;
|
8086 | 8960 |
|
8087 | 8961 |
if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) {
|
|
8149 | 9023 |
return MDB_INCOMPATIBLE;
|
8150 | 9024 |
} else if (rc == MDB_NOTFOUND && (flags & MDB_CREATE)) {
|
8151 | 9025 |
/* Create if requested */
|
8152 | |
MDB_db dummy;
|
8153 | 9026 |
data.mv_size = sizeof(MDB_db);
|
8154 | 9027 |
data.mv_data = &dummy;
|
8155 | 9028 |
memset(&dummy, 0, sizeof(dummy));
|
|
8166 | 9039 |
txn->mt_dbxs[slot].md_name.mv_size = len;
|
8167 | 9040 |
txn->mt_dbxs[slot].md_rel = NULL;
|
8168 | 9041 |
txn->mt_dbflags[slot] = dbflag;
|
|
9042 |
/* txn-> and env-> are the same in read txns, use
|
|
9043 |
* tmp variable to avoid undefined assignment
|
|
9044 |
*/
|
|
9045 |
seq = ++txn->mt_env->me_dbiseqs[slot];
|
|
9046 |
txn->mt_dbiseqs[slot] = seq;
|
|
9047 |
|
8169 | 9048 |
memcpy(&txn->mt_dbs[slot], data.mv_data, sizeof(MDB_db));
|
8170 | 9049 |
*dbi = slot;
|
8171 | 9050 |
mdb_default_cmp(txn, slot);
|
|
8179 | 9058 |
|
8180 | 9059 |
int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg)
|
8181 | 9060 |
{
|
8182 | |
if (txn == NULL || arg == NULL || dbi >= txn->mt_numdbs)
|
|
9061 |
if (!arg || !TXN_DBI_EXIST(txn, dbi))
|
8183 | 9062 |
return EINVAL;
|
|
9063 |
|
|
9064 |
if (txn->mt_flags & MDB_TXN_ERROR)
|
|
9065 |
return MDB_BAD_TXN;
|
8184 | 9066 |
|
8185 | 9067 |
if (txn->mt_dbflags[dbi] & DB_STALE) {
|
8186 | 9068 |
MDB_cursor mc;
|
|
8197 | 9079 |
if (dbi <= MAIN_DBI || dbi >= env->me_maxdbs)
|
8198 | 9080 |
return;
|
8199 | 9081 |
ptr = env->me_dbxs[dbi].md_name.mv_data;
|
8200 | |
env->me_dbxs[dbi].md_name.mv_data = NULL;
|
8201 | |
env->me_dbxs[dbi].md_name.mv_size = 0;
|
8202 | |
env->me_dbflags[dbi] = 0;
|
8203 | |
free(ptr);
|
|
9082 |
/* If there was no name, this was already closed */
|
|
9083 |
if (ptr) {
|
|
9084 |
env->me_dbxs[dbi].md_name.mv_data = NULL;
|
|
9085 |
env->me_dbxs[dbi].md_name.mv_size = 0;
|
|
9086 |
env->me_dbflags[dbi] = 0;
|
|
9087 |
env->me_dbiseqs[dbi]++;
|
|
9088 |
free(ptr);
|
|
9089 |
}
|
8204 | 9090 |
}
|
8205 | 9091 |
|
8206 | 9092 |
int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags)
|
8207 | 9093 |
{
|
8208 | 9094 |
/* We could return the flags for the FREE_DBI too but what's the point? */
|
8209 | |
if (txn == NULL || dbi < MAIN_DBI || dbi >= txn->mt_numdbs)
|
|
9095 |
if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
|
8210 | 9096 |
return EINVAL;
|
8211 | 9097 |
*flags = txn->mt_dbs[dbi].md_flags & PERSISTENT_FLAGS;
|
8212 | 9098 |
return MDB_SUCCESS;
|
|
8246 | 9132 |
memcpy(&pg, NODEDATA(ni), sizeof(pg));
|
8247 | 9133 |
rc = mdb_page_get(txn, pg, &omp, NULL);
|
8248 | 9134 |
if (rc != 0)
|
8249 | |
return rc;
|
|
9135 |
goto done;
|
8250 | 9136 |
mdb_cassert(mc, IS_OVERFLOW(omp));
|
8251 | 9137 |
rc = mdb_midl_append_range(&txn->mt_free_pgs,
|
8252 | 9138 |
pg, omp->mp_pages);
|
8253 | 9139 |
if (rc)
|
8254 | |
return rc;
|
|
9140 |
goto done;
|
8255 | 9141 |
} else if (subs && (ni->mn_flags & F_SUBDATA)) {
|
8256 | 9142 |
mdb_xcursor_init1(mc, ni);
|
8257 | 9143 |
rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0);
|
8258 | 9144 |
if (rc)
|
8259 | |
return rc;
|
|
9145 |
goto done;
|
8260 | 9146 |
}
|
8261 | 9147 |
}
|
8262 | 9148 |
} else {
|
8263 | 9149 |
if ((rc = mdb_midl_need(&txn->mt_free_pgs, n)) != 0)
|
8264 | |
return rc;
|
|
9150 |
goto done;
|
8265 | 9151 |
for (i=0; i<n; i++) {
|
8266 | 9152 |
pgno_t pg;
|
8267 | 9153 |
ni = NODEPTR(mp, i);
|
|
8275 | 9161 |
mc->mc_ki[mc->mc_top] = i;
|
8276 | 9162 |
rc = mdb_cursor_sibling(mc, 1);
|
8277 | 9163 |
if (rc) {
|
|
9164 |
if (rc != MDB_NOTFOUND)
|
|
9165 |
goto done;
|
8278 | 9166 |
/* no more siblings, go back to beginning
|
8279 | 9167 |
* of previous level.
|
8280 | 9168 |
*/
|
|
8288 | 9176 |
}
|
8289 | 9177 |
/* free it */
|
8290 | 9178 |
rc = mdb_midl_append(&txn->mt_free_pgs, mc->mc_db->md_root);
|
|
9179 |
done:
|
|
9180 |
if (rc)
|
|
9181 |
txn->mt_flags |= MDB_TXN_ERROR;
|
8291 | 9182 |
} else if (rc == MDB_NOTFOUND) {
|
8292 | 9183 |
rc = MDB_SUCCESS;
|
8293 | 9184 |
}
|
|
8299 | 9190 |
MDB_cursor *mc, *m2;
|
8300 | 9191 |
int rc;
|
8301 | 9192 |
|
8302 | |
if (!txn || !dbi || dbi >= txn->mt_numdbs || (unsigned)del > 1 || !(txn->mt_dbflags[dbi] & DB_VALID))
|
|
9193 |
if ((unsigned)del > 1 || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
|
8303 | 9194 |
return EINVAL;
|
8304 | 9195 |
|
8305 | 9196 |
if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY))
|
8306 | 9197 |
return EACCES;
|
|
9198 |
|
|
9199 |
if (dbi > MAIN_DBI && TXN_DBI_CHANGED(txn, dbi))
|
|
9200 |
return MDB_BAD_DBI;
|
8307 | 9201 |
|
8308 | 9202 |
rc = mdb_cursor_open(txn, dbi, &mc);
|
8309 | 9203 |
if (rc)
|
|
8318 | 9212 |
|
8319 | 9213 |
/* Can't delete the main DB */
|
8320 | 9214 |
if (del && dbi > MAIN_DBI) {
|
8321 | |
rc = mdb_del(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL);
|
|
9215 |
rc = mdb_del0(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, 0);
|
8322 | 9216 |
if (!rc) {
|
8323 | 9217 |
txn->mt_dbflags[dbi] = DB_STALE;
|
8324 | 9218 |
mdb_dbi_close(txn->mt_env, dbi);
|
|
9219 |
} else {
|
|
9220 |
txn->mt_flags |= MDB_TXN_ERROR;
|
8325 | 9221 |
}
|
8326 | 9222 |
} else {
|
8327 | 9223 |
/* reset the DB record, mark it dirty */
|
|
8342 | 9238 |
|
8343 | 9239 |
int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp)
|
8344 | 9240 |
{
|
8345 | |
if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
|
|
9241 |
if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
|
8346 | 9242 |
return EINVAL;
|
8347 | 9243 |
|
8348 | 9244 |
txn->mt_dbxs[dbi].md_cmp = cmp;
|
|
8351 | 9247 |
|
8352 | 9248 |
int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp)
|
8353 | 9249 |
{
|
8354 | |
if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
|
|
9250 |
if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
|
8355 | 9251 |
return EINVAL;
|
8356 | 9252 |
|
8357 | 9253 |
txn->mt_dbxs[dbi].md_dcmp = cmp;
|
|
8360 | 9256 |
|
8361 | 9257 |
int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel)
|
8362 | 9258 |
{
|
8363 | |
if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
|
|
9259 |
if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
|
8364 | 9260 |
return EINVAL;
|
8365 | 9261 |
|
8366 | 9262 |
txn->mt_dbxs[dbi].md_rel = rel;
|
|
8369 | 9265 |
|
8370 | 9266 |
int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx)
|
8371 | 9267 |
{
|
8372 | |
if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
|
|
9268 |
if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
|
8373 | 9269 |
return EINVAL;
|
8374 | 9270 |
|
8375 | 9271 |
txn->mt_dbxs[dbi].md_relctx = ctx;
|
8376 | 9272 |
return MDB_SUCCESS;
|
8377 | 9273 |
}
|
8378 | 9274 |
|
8379 | |
int mdb_env_get_maxkeysize(MDB_env *env)
|
|
9275 |
int ESECT
|
|
9276 |
mdb_env_get_maxkeysize(MDB_env *env)
|
8380 | 9277 |
{
|
8381 | 9278 |
return ENV_MAXKEY(env);
|
8382 | 9279 |
}
|
8383 | 9280 |
|
8384 | |
int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
|
|
9281 |
int ESECT
|
|
9282 |
mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
|
8385 | 9283 |
{
|
8386 | 9284 |
unsigned int i, rdrs;
|
8387 | 9285 |
MDB_reader *mr;
|
|
8421 | 9319 |
/** Insert pid into list if not already present.
|
8422 | 9320 |
* return -1 if already present.
|
8423 | 9321 |
*/
|
8424 | |
static int mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid)
|
|
9322 |
static int ESECT
|
|
9323 |
mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid)
|
8425 | 9324 |
{
|
8426 | 9325 |
/* binary search of pid in list */
|
8427 | 9326 |
unsigned base = 0;
|
|
8457 | 9356 |
return 0;
|
8458 | 9357 |
}
|
8459 | 9358 |
|
8460 | |
int mdb_reader_check(MDB_env *env, int *dead)
|
|
9359 |
int ESECT
|
|
9360 |
mdb_reader_check(MDB_env *env, int *dead)
|
8461 | 9361 |
{
|
8462 | 9362 |
unsigned int i, j, rdrs;
|
8463 | 9363 |
MDB_reader *mr;
|