64 | 64 |
#include <fcntl.h>
|
65 | 65 |
#endif
|
66 | 66 |
|
67 | |
#include <assert.h>
|
68 | 67 |
#include <errno.h>
|
69 | 68 |
#include <limits.h>
|
70 | 69 |
#include <stddef.h>
|
|
148 | 147 |
/** @defgroup internal MDB Internals
|
149 | 148 |
* @{
|
150 | 149 |
*/
|
151 | |
/** @defgroup compat Windows Compatibility Macros
|
|
150 |
/** @defgroup compat Compatibility Macros
|
152 | 151 |
* A bunch of macros to minimize the amount of platform-specific ifdefs
|
153 | 152 |
* needed throughout the rest of the code. When the features this library
|
154 | 153 |
* needs are similar enough to POSIX to be hidden in a one-or-two line
|
155 | 154 |
* replacement, this macro approach is used.
|
156 | 155 |
* @{
|
157 | 156 |
*/
|
|
157 |
|
|
158 |
/** Wrapper around __func__, which is a C99 feature */
|
|
159 |
#if __STDC_VERSION__ >= 199901L
|
|
160 |
# define mdb_func_ __func__
|
|
161 |
#elif __GNUC__ >= 2 || _MSC_VER >= 1300
|
|
162 |
# define mdb_func_ __FUNCTION__
|
|
163 |
#else
|
|
164 |
/* If a debug message says <mdb_unknown>(), update the #if statements above */
|
|
165 |
# define mdb_func_ "<mdb_unknown>"
|
|
166 |
#endif
|
|
167 |
|
158 | 168 |
#ifdef _WIN32
|
159 | 169 |
#define MDB_USE_HASH 1
|
160 | 170 |
#define MDB_PIDLOCK 0
|
|
326 | 336 |
*/
|
327 | 337 |
# define DPRINTF(args) ((void) ((mdb_debug) && DPRINTF0 args))
|
328 | 338 |
# define DPRINTF0(fmt, ...) \
|
329 | |
fprintf(stderr, "%s:%d " fmt "\n", __func__, __LINE__, __VA_ARGS__)
|
|
339 |
fprintf(stderr, "%s:%d " fmt "\n", mdb_func_, __LINE__, __VA_ARGS__)
|
330 | 340 |
#else
|
331 | 341 |
# define DPRINTF(args) ((void) 0)
|
332 | 342 |
#endif
|
|
381 | 391 |
/** The version number for a database's lockfile format. */
|
382 | 392 |
#define MDB_LOCK_VERSION 1
|
383 | 393 |
|
384 | |
/** @brief The maximum size of a key in the database.
|
|
394 |
/** @brief The max size of a key we can write, or 0 for dynamic max.
|
385 | 395 |
*
|
386 | |
* The library rejects bigger keys, and cannot deal with records
|
387 | |
* with bigger keys stored by a library with bigger max keysize.
|
|
396 |
* Define this as 0 to compute the max from the page size. 511
|
|
397 |
* is default for backwards compat: liblmdb <= 0.9.10 can break
|
|
398 |
* when modifying a DB with keys/dupsort data bigger than its max.
|
388 | 399 |
*
|
389 | |
* We require that keys all fit onto a regular page. This limit
|
390 | |
* could be raised a bit further if needed; to something just
|
391 | |
* under (page size / #MDB_MINKEYS / 3).
|
392 | |
*
|
393 | |
* Note that data items in an #MDB_DUPSORT database are actually keys
|
394 | |
* of a subDB, so they're also limited to this size.
|
|
400 |
* Data items in an #MDB_DUPSORT database are also limited to
|
|
401 |
* this size, since they're actually keys of a sub-DB. Keys and
|
|
402 |
* #MDB_DUPSORT data items must fit on a node in a regular page.
|
395 | 403 |
*/
|
396 | 404 |
#ifndef MDB_MAXKEYSIZE
|
397 | 405 |
#define MDB_MAXKEYSIZE 511
|
|
406 |
#endif
|
|
407 |
|
|
408 |
/** The maximum size of a key we can write to the environment. */
|
|
409 |
#if MDB_MAXKEYSIZE
|
|
410 |
#define ENV_MAXKEY(env) (MDB_MAXKEYSIZE)
|
|
411 |
#else
|
|
412 |
#define ENV_MAXKEY(env) ((env)->me_maxkey)
|
398 | 413 |
#endif
|
399 | 414 |
|
400 | 415 |
/** @brief The maximum size of a data item.
|
|
404 | 419 |
#define MAXDATASIZE 0xffffffffUL
|
405 | 420 |
|
406 | 421 |
#if MDB_DEBUG
|
|
422 |
/** Key size which fits in a #DKBUF.
|
|
423 |
* @ingroup debug
|
|
424 |
*/
|
|
425 |
#define DKBUF_MAXKEYSIZE ((MDB_MAXKEYSIZE) > 0 ? (MDB_MAXKEYSIZE) : 511)
|
407 | 426 |
/** A key buffer.
|
408 | 427 |
* @ingroup debug
|
409 | 428 |
* This is used for printing a hex dump of a key's contents.
|
410 | 429 |
*/
|
411 | |
#define DKBUF char kbuf[(MDB_MAXKEYSIZE*2+1)]
|
|
430 |
#define DKBUF char kbuf[DKBUF_MAXKEYSIZE*2+1]
|
412 | 431 |
/** Display a key in hex.
|
413 | 432 |
* @ingroup debug
|
414 | 433 |
* Invoke a function to display a key in hex.
|
|
426 | 445 |
|
427 | 446 |
/** Test if the flags \b f are set in a flag word \b w. */
|
428 | 447 |
#define F_ISSET(w, f) (((w) & (f)) == (f))
|
|
448 |
|
|
449 |
/** Round \b n up to an even number. */
|
|
450 |
#define EVEN(n) (((n) + 1U) & -2) /* sign-extending -2 to match n+1U */
|
429 | 451 |
|
430 | 452 |
/** Used for offsets within a single page.
|
431 | 453 |
* Since memory pages are typically 4 or 8KB in size, 12-13 bits,
|
|
678 | 700 |
#define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1)
|
679 | 701 |
|
680 | 702 |
/** Header for a single key/data pair within a page.
|
681 | |
* We guarantee 2-byte alignment for nodes.
|
|
703 |
* Used in pages of type #P_BRANCH and #P_LEAF without #P_LEAF2.
|
|
704 |
* We guarantee 2-byte alignment for 'MDB_node's.
|
682 | 705 |
*/
|
683 | 706 |
typedef struct MDB_node {
|
684 | 707 |
/** lo and hi are used for data size on leaf nodes and for
|
|
687 | 710 |
* They are in host byte order in case that lets some
|
688 | 711 |
* accesses be optimized into a 32-bit word access.
|
689 | 712 |
*/
|
690 | |
#define mn_lo mn_offset[BYTE_ORDER!=LITTLE_ENDIAN]
|
691 | |
#define mn_hi mn_offset[BYTE_ORDER==LITTLE_ENDIAN] /**< part of dsize or pgno */
|
692 | |
unsigned short mn_offset[2]; /**< storage for #mn_lo and #mn_hi */
|
|
713 |
#if BYTE_ORDER == LITTLE_ENDIAN
|
|
714 |
unsigned short mn_lo, mn_hi; /**< part of data size or pgno */
|
|
715 |
#else
|
|
716 |
unsigned short mn_hi, mn_lo;
|
|
717 |
#endif
|
693 | 718 |
/** @defgroup mdb_node Node Flags
|
694 | 719 |
* @ingroup internal
|
695 | 720 |
* Flags for node headers.
|
|
1042 | 1067 |
int me_maxfree_1pg;
|
1043 | 1068 |
/** Max size of a node on a page */
|
1044 | 1069 |
unsigned int me_nodemax;
|
|
1070 |
#if !(MDB_MAXKEYSIZE)
|
|
1071 |
unsigned int me_maxkey; /**< max size of a key */
|
|
1072 |
#endif
|
1045 | 1073 |
#ifdef _WIN32
|
1046 | 1074 |
int me_pidquery; /**< Used in OpenProcess */
|
1047 | 1075 |
HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */
|
|
1050 | 1078 |
sem_t *me_rmutex; /* Shared mutexes are not supported */
|
1051 | 1079 |
sem_t *me_wmutex;
|
1052 | 1080 |
#endif
|
|
1081 |
void *me_userctx; /**< User-settable context */
|
|
1082 |
MDB_assert_func *me_assert_func; /**< Callback for assertion failures */
|
1053 | 1083 |
};
|
1054 | 1084 |
|
1055 | 1085 |
/** Nested transaction */
|
|
1098 | 1128 |
static MDB_node *mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp);
|
1099 | 1129 |
static int mdb_node_add(MDB_cursor *mc, indx_t indx,
|
1100 | 1130 |
MDB_val *key, MDB_val *data, pgno_t pgno, unsigned int flags);
|
1101 | |
static void mdb_node_del(MDB_page *mp, indx_t indx, int ksize);
|
|
1131 |
static void mdb_node_del(MDB_cursor *mc, int ksize);
|
1102 | 1132 |
static void mdb_node_shrink(MDB_page *mp, indx_t indx);
|
1103 | 1133 |
static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst);
|
1104 | 1134 |
static int mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data);
|
|
1185 | 1215 |
return strerror(err);
|
1186 | 1216 |
}
|
1187 | 1217 |
|
|
1218 |
/** assert(3) variant in cursor context */
|
|
1219 |
#define mdb_cassert(mc, expr) mdb_assert0((mc)->mc_txn->mt_env, expr, #expr)
|
|
1220 |
/** assert(3) variant in transaction context */
|
|
1221 |
#define mdb_tassert(mc, expr) mdb_assert0((txn)->mt_env, expr, #expr)
|
|
1222 |
/** assert(3) variant in environment context */
|
|
1223 |
#define mdb_eassert(env, expr) mdb_assert0(env, expr, #expr)
|
|
1224 |
|
|
1225 |
#ifndef NDEBUG
|
|
1226 |
# define mdb_assert0(env, expr, expr_txt) ((expr) ? (void)0 : \
|
|
1227 |
mdb_assert_fail(env, expr_txt, mdb_func_, __FILE__, __LINE__))
|
|
1228 |
|
|
1229 |
static void
|
|
1230 |
mdb_assert_fail(MDB_env *env, const char *expr_txt,
|
|
1231 |
const char *func, const char *file, int line)
|
|
1232 |
{
|
|
1233 |
char buf[400];
|
|
1234 |
sprintf(buf, "%.100s:%d: Assertion '%.200s' failed in %.40s()",
|
|
1235 |
file, line, expr_txt, func);
|
|
1236 |
if (env->me_assert_func)
|
|
1237 |
env->me_assert_func(env, buf);
|
|
1238 |
fprintf(stderr, "%s\n", buf);
|
|
1239 |
abort();
|
|
1240 |
}
|
|
1241 |
#else
|
|
1242 |
# define mdb_assert0(env, expr, expr_txt) ((void) 0)
|
|
1243 |
#endif /* NDEBUG */
|
|
1244 |
|
1188 | 1245 |
#if MDB_DEBUG
|
|
1246 |
/** Return the page number of \b mp which may be sub-page, for debug output */
|
|
1247 |
static pgno_t
|
|
1248 |
mdb_dbg_pgno(MDB_page *mp)
|
|
1249 |
{
|
|
1250 |
pgno_t ret;
|
|
1251 |
COPY_PGNO(ret, mp->mp_pgno);
|
|
1252 |
return ret;
|
|
1253 |
}
|
|
1254 |
|
1189 | 1255 |
/** Display a key in hexadecimal and return the address of the result.
|
1190 | 1256 |
* @param[in] key the key to display
|
1191 | 1257 |
* @param[in] buf the buffer to write into. Should always be #DKBUF.
|
|
1201 | 1267 |
if (!key)
|
1202 | 1268 |
return "";
|
1203 | 1269 |
|
1204 | |
if (key->mv_size > MDB_MAXKEYSIZE)
|
|
1270 |
if (key->mv_size > DKBUF_MAXKEYSIZE)
|
1205 | 1271 |
return "MDB_MAXKEYSIZE";
|
1206 | 1272 |
/* may want to make this a dynamic check: if the key is mostly
|
1207 | 1273 |
* printable characters, print it as-is instead of converting to hex.
|
|
1221 | 1287 |
mdb_page_list(MDB_page *mp)
|
1222 | 1288 |
{
|
1223 | 1289 |
MDB_node *node;
|
1224 | |
unsigned int i, nkeys, nsize;
|
|
1290 |
unsigned int i, nkeys, nsize, total = 0;
|
1225 | 1291 |
MDB_val key;
|
1226 | 1292 |
DKBUF;
|
1227 | 1293 |
|
1228 | 1294 |
nkeys = NUMKEYS(mp);
|
1229 | |
fprintf(stderr, "Page %"Z"u numkeys %d\n", mp->mp_pgno, nkeys);
|
|
1295 |
fprintf(stderr, "Page %"Z"u numkeys %d\n", mdb_dbg_pgno(mp), nkeys);
|
1230 | 1296 |
for (i=0; i<nkeys; i++) {
|
1231 | 1297 |
node = NODEPTR(mp, i);
|
1232 | 1298 |
key.mv_size = node->mn_ksize;
|
1233 | 1299 |
key.mv_data = node->mn_data;
|
1234 | |
nsize = NODESIZE + NODEKSZ(node) + sizeof(indx_t);
|
|
1300 |
nsize = NODESIZE + key.mv_size;
|
1235 | 1301 |
if (IS_BRANCH(mp)) {
|
1236 | 1302 |
fprintf(stderr, "key %d: page %"Z"u, %s\n", i, NODEPGNO(node),
|
1237 | 1303 |
DKEY(&key));
|
|
1304 |
total += nsize;
|
1238 | 1305 |
} else {
|
1239 | 1306 |
if (F_ISSET(node->mn_flags, F_BIGDATA))
|
1240 | 1307 |
nsize += sizeof(pgno_t);
|
1241 | 1308 |
else
|
1242 | 1309 |
nsize += NODEDSZ(node);
|
|
1310 |
total += nsize;
|
|
1311 |
nsize += sizeof(indx_t);
|
1243 | 1312 |
fprintf(stderr, "key %d: nsize %d, %s\n", i, nsize, DKEY(&key));
|
1244 | 1313 |
}
|
1245 | |
}
|
|
1314 |
total = EVEN(total);
|
|
1315 |
}
|
|
1316 |
fprintf(stderr, "Total: %d\n", total);
|
1246 | 1317 |
}
|
1247 | 1318 |
|
1248 | 1319 |
void
|
|
1356 | 1427 |
off = sz - psize;
|
1357 | 1428 |
}
|
1358 | 1429 |
if ((ret = malloc(sz)) != NULL) {
|
|
1430 |
VGMEMP_ALLOC(env, ret, sz);
|
1359 | 1431 |
if (!(env->me_flags & MDB_NOMEMINIT)) {
|
1360 | 1432 |
memset((char *)ret + off, 0, psize);
|
1361 | 1433 |
ret->mp_pad = 0;
|
1362 | 1434 |
}
|
1363 | |
VGMEMP_ALLOC(env, ret, sz);
|
|
1435 |
} else {
|
|
1436 |
txn->mt_flags |= MDB_TXN_ERROR;
|
1364 | 1437 |
}
|
1365 | 1438 |
return ret;
|
1366 | 1439 |
}
|
|
1626 | 1699 |
mdb_page_dirty(MDB_txn *txn, MDB_page *mp)
|
1627 | 1700 |
{
|
1628 | 1701 |
MDB_ID2 mid;
|
1629 | |
int (*insert)(MDB_ID2L, MDB_ID2 *);
|
|
1702 |
int rc, (*insert)(MDB_ID2L, MDB_ID2 *);
|
1630 | 1703 |
|
1631 | 1704 |
if (txn->mt_env->me_flags & MDB_WRITEMAP) {
|
1632 | 1705 |
insert = mdb_mid2l_append;
|
|
1635 | 1708 |
}
|
1636 | 1709 |
mid.mid = mp->mp_pgno;
|
1637 | 1710 |
mid.mptr = mp;
|
1638 | |
insert(txn->mt_u.dirty_list, &mid);
|
|
1711 |
rc = insert(txn->mt_u.dirty_list, &mid);
|
|
1712 |
mdb_tassert(txn, rc == 0);
|
1639 | 1713 |
txn->mt_dirty_room--;
|
1640 | 1714 |
}
|
1641 | 1715 |
|
|
1668 | 1742 |
#else
|
1669 | 1743 |
enum { Paranoid = 0, Max_retries = INT_MAX /*infinite*/ };
|
1670 | 1744 |
#endif
|
1671 | |
int rc, n2 = num-1, retry = Max_retries;
|
|
1745 |
int rc, retry = Max_retries;
|
1672 | 1746 |
MDB_txn *txn = mc->mc_txn;
|
1673 | 1747 |
MDB_env *env = txn->mt_env;
|
1674 | 1748 |
pgno_t pgno, *mop = env->me_pghead;
|
1675 | |
unsigned i, j, k, mop_len = mop ? mop[0] : 0;
|
|
1749 |
unsigned i, j, k, mop_len = mop ? mop[0] : 0, n2 = num-1;
|
1676 | 1750 |
MDB_page *np;
|
1677 | 1751 |
txnid_t oldest = 0, last;
|
1678 | 1752 |
MDB_cursor_op op;
|
|
1681 | 1755 |
*mp = NULL;
|
1682 | 1756 |
|
1683 | 1757 |
/* If our dirty list is already full, we can't do anything */
|
1684 | |
if (txn->mt_dirty_room == 0)
|
1685 | |
return MDB_TXN_FULL;
|
|
1758 |
if (txn->mt_dirty_room == 0) {
|
|
1759 |
rc = MDB_TXN_FULL;
|
|
1760 |
goto fail;
|
|
1761 |
}
|
1686 | 1762 |
|
1687 | 1763 |
for (op = MDB_FIRST;; op = MDB_NEXT) {
|
1688 | 1764 |
MDB_val key, data;
|
|
1692 | 1768 |
/* Seek a big enough contiguous page range. Prefer
|
1693 | 1769 |
* pages at the tail, just truncating the list.
|
1694 | 1770 |
*/
|
1695 | |
if (mop_len >= (unsigned)num) {
|
|
1771 |
if (mop_len > n2) {
|
1696 | 1772 |
i = mop_len;
|
1697 | 1773 |
do {
|
1698 | 1774 |
pgno = mop[i];
|
1699 | 1775 |
if (mop[i-n2] == pgno+n2)
|
1700 | 1776 |
goto search_done;
|
1701 | |
} while (--i >= (unsigned)num);
|
|
1777 |
} while (--i > n2);
|
1702 | 1778 |
if (Max_retries < INT_MAX && --retry < 0)
|
1703 | 1779 |
break;
|
1704 | 1780 |
}
|
|
1727 | 1803 |
if (rc) {
|
1728 | 1804 |
if (rc == MDB_NOTFOUND)
|
1729 | 1805 |
break;
|
1730 | |
return rc;
|
|
1806 |
goto fail;
|
1731 | 1807 |
}
|
1732 | 1808 |
last = *(txnid_t*)key.mv_data;
|
1733 | 1809 |
if (oldest <= last)
|
|
1740 | 1816 |
idl = (MDB_ID *) data.mv_data;
|
1741 | 1817 |
i = idl[0];
|
1742 | 1818 |
if (!mop) {
|
1743 | |
if (!(env->me_pghead = mop = mdb_midl_alloc(i)))
|
1744 | |
return ENOMEM;
|
|
1819 |
if (!(env->me_pghead = mop = mdb_midl_alloc(i))) {
|
|
1820 |
rc = ENOMEM;
|
|
1821 |
goto fail;
|
|
1822 |
}
|
1745 | 1823 |
} else {
|
1746 | 1824 |
if ((rc = mdb_midl_need(&env->me_pghead, i)) != 0)
|
1747 | |
return rc;
|
|
1825 |
goto fail;
|
1748 | 1826 |
mop = env->me_pghead;
|
1749 | 1827 |
}
|
1750 | 1828 |
env->me_pglast = last;
|
|
1773 | 1851 |
pgno = txn->mt_next_pgno;
|
1774 | 1852 |
if (pgno + num >= env->me_maxpg) {
|
1775 | 1853 |
DPUTS("DB size maxed out");
|
1776 | |
return MDB_MAP_FULL;
|
|
1854 |
rc = MDB_MAP_FULL;
|
|
1855 |
goto fail;
|
1777 | 1856 |
}
|
1778 | 1857 |
|
1779 | 1858 |
search_done:
|
1780 | 1859 |
if (env->me_flags & MDB_WRITEMAP) {
|
1781 | 1860 |
np = (MDB_page *)(env->me_map + env->me_psize * pgno);
|
1782 | 1861 |
} else {
|
1783 | |
if (!(np = mdb_page_malloc(txn, num)))
|
1784 | |
return ENOMEM;
|
|
1862 |
if (!(np = mdb_page_malloc(txn, num))) {
|
|
1863 |
rc = ENOMEM;
|
|
1864 |
goto fail;
|
|
1865 |
}
|
1785 | 1866 |
}
|
1786 | 1867 |
if (i) {
|
1787 | 1868 |
mop[0] = mop_len -= num;
|
|
1796 | 1877 |
*mp = np;
|
1797 | 1878 |
|
1798 | 1879 |
return MDB_SUCCESS;
|
|
1880 |
|
|
1881 |
fail:
|
|
1882 |
txn->mt_flags |= MDB_TXN_ERROR;
|
|
1883 |
return rc;
|
1799 | 1884 |
}
|
1800 | 1885 |
|
1801 | 1886 |
/** Copy the used portions of a non-overflow page.
|
|
1826 | 1911 |
* If a page being referenced was spilled to disk in this txn, bring
|
1827 | 1912 |
* it back and make it dirty/writable again.
|
1828 | 1913 |
* @param[in] txn the transaction handle.
|
1829 | |
* @param[in] mp the page being referenced.
|
|
1914 |
* @param[in] mp the page being referenced. It must not be dirty.
|
1830 | 1915 |
* @param[out] ret the writable page, if any. ret is unchanged if
|
1831 | 1916 |
* mp wasn't spilled.
|
1832 | 1917 |
*/
|
|
1902 | 1987 |
np = NULL;
|
1903 | 1988 |
rc = mdb_page_unspill(txn, mp, &np);
|
1904 | 1989 |
if (rc)
|
1905 | |
return rc;
|
|
1990 |
goto fail;
|
1906 | 1991 |
if (np)
|
1907 | 1992 |
goto done;
|
1908 | 1993 |
}
|
1909 | 1994 |
if ((rc = mdb_midl_need(&txn->mt_free_pgs, 1)) ||
|
1910 | 1995 |
(rc = mdb_page_alloc(mc, 1, &np)))
|
1911 | |
return rc;
|
|
1996 |
goto fail;
|
1912 | 1997 |
pgno = np->mp_pgno;
|
1913 | 1998 |
DPRINTF(("touched db %d page %"Z"u -> %"Z"u", DDBI(mc),
|
1914 | 1999 |
mp->mp_pgno, pgno));
|
1915 | |
assert(mp->mp_pgno != pgno);
|
|
2000 |
mdb_cassert(mc, mp->mp_pgno != pgno);
|
1916 | 2001 |
mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno);
|
1917 | 2002 |
/* Update the parent page, if any, to point to the new page */
|
1918 | 2003 |
if (mc->mc_top) {
|
|
1933 | 2018 |
if (x <= dl[0].mid && dl[x].mid == pgno) {
|
1934 | 2019 |
if (mp != dl[x].mptr) { /* bad cursor? */
|
1935 | 2020 |
mc->mc_flags &= ~(C_INITIALIZED|C_EOF);
|
|
2021 |
txn->mt_flags |= MDB_TXN_ERROR;
|
1936 | 2022 |
return MDB_CORRUPTED;
|
1937 | 2023 |
}
|
1938 | 2024 |
return 0;
|
1939 | 2025 |
}
|
1940 | 2026 |
}
|
1941 | |
assert(dl[0].mid < MDB_IDL_UM_MAX);
|
|
2027 |
mdb_cassert(mc, dl[0].mid < MDB_IDL_UM_MAX);
|
1942 | 2028 |
/* No - copy it */
|
1943 | 2029 |
np = mdb_page_malloc(txn, 1);
|
1944 | 2030 |
if (!np)
|
1945 | 2031 |
return ENOMEM;
|
1946 | 2032 |
mid.mid = pgno;
|
1947 | 2033 |
mid.mptr = np;
|
1948 | |
mdb_mid2l_insert(dl, &mid);
|
|
2034 |
rc = mdb_mid2l_insert(dl, &mid);
|
|
2035 |
mdb_cassert(mc, rc == 0);
|
1949 | 2036 |
} else {
|
1950 | 2037 |
return 0;
|
1951 | 2038 |
}
|
|
1981 | 2068 |
}
|
1982 | 2069 |
}
|
1983 | 2070 |
return 0;
|
|
2071 |
|
|
2072 |
fail:
|
|
2073 |
txn->mt_flags |= MDB_TXN_ERROR;
|
|
2074 |
return rc;
|
1984 | 2075 |
}
|
1985 | 2076 |
|
1986 | 2077 |
int
|
|
2178 | 2269 |
|
2179 | 2270 |
if (!(env->me_flags & MDB_LIVE_READER)) {
|
2180 | 2271 |
rc = mdb_reader_pid(env, Pidset, pid);
|
2181 | |
if (rc) {
|
2182 | |
UNLOCK_MUTEX_R(env);
|
|
2272 |
if (rc)
|
2183 | 2273 |
return rc;
|
2184 | |
}
|
2185 | 2274 |
env->me_flags |= MDB_LIVE_READER;
|
2186 | 2275 |
}
|
2187 | 2276 |
|
|
2542 | 2631 |
return rc;
|
2543 | 2632 |
pglast = head_id = *(txnid_t *)key.mv_data;
|
2544 | 2633 |
total_room = head_room = 0;
|
2545 | |
assert(pglast <= env->me_pglast);
|
|
2634 |
mdb_tassert(txn, pglast <= env->me_pglast);
|
2546 | 2635 |
rc = mdb_cursor_del(&mc, 0);
|
2547 | 2636 |
if (rc)
|
2548 | 2637 |
return rc;
|
|
2637 | 2726 |
ssize_t len = (ssize_t)(data.mv_size / sizeof(MDB_ID)) - 1;
|
2638 | 2727 |
MDB_ID save;
|
2639 | 2728 |
|
2640 | |
assert(len >= 0 && id <= env->me_pglast);
|
|
2729 |
mdb_tassert(txn, len >= 0 && id <= env->me_pglast);
|
2641 | 2730 |
key.mv_data = &id;
|
2642 | 2731 |
if (len > mop_len) {
|
2643 | 2732 |
len = mop_len;
|
|
2803 | 2892 |
unsigned int i;
|
2804 | 2893 |
MDB_env *env;
|
2805 | 2894 |
|
2806 | |
assert(txn != NULL);
|
2807 | |
assert(txn->mt_env != NULL);
|
|
2895 |
if (txn == NULL || txn->mt_env == NULL)
|
|
2896 |
return EINVAL;
|
2808 | 2897 |
|
2809 | 2898 |
if (txn->mt_child) {
|
2810 | 2899 |
rc = mdb_txn_commit(txn->mt_child);
|
|
2912 | 3001 |
if (yp == dst[x].mid)
|
2913 | 3002 |
free(dst[x--].mptr);
|
2914 | 3003 |
}
|
2915 | |
assert(i == x);
|
|
3004 |
mdb_tassert(txn, i == x);
|
2916 | 3005 |
dst[0].mid = len;
|
2917 | 3006 |
free(txn->mt_u.dirty_list);
|
2918 | 3007 |
parent->mt_dirty_room = txn->mt_dirty_room;
|
|
3144 | 3233 |
#else
|
3145 | 3234 |
int r2;
|
3146 | 3235 |
#endif
|
3147 | |
|
3148 | |
assert(txn != NULL);
|
3149 | |
assert(txn->mt_env != NULL);
|
3150 | 3236 |
|
3151 | 3237 |
toggle = txn->mt_txnid & 1;
|
3152 | 3238 |
DPRINTF(("writing meta page %d for root page %"Z"u",
|
|
3469 | 3555 |
env->me_mapsize = minsize;
|
3470 | 3556 |
}
|
3471 | 3557 |
|
3472 | |
rc = mdb_env_map(env, meta.mm_address, newenv);
|
|
3558 |
rc = mdb_env_map(env, meta.mm_address, newenv || env->me_mapsize != meta.mm_mapsize);
|
3473 | 3559 |
if (rc)
|
3474 | 3560 |
return rc;
|
3475 | 3561 |
|
|
3481 | 3567 |
return i;
|
3482 | 3568 |
}
|
3483 | 3569 |
}
|
|
3570 |
|
3484 | 3571 |
env->me_maxfree_1pg = (env->me_psize - PAGEHDRSZ) / sizeof(pgno_t) - 1;
|
3485 | |
env->me_nodemax = (env->me_psize - PAGEHDRSZ) / MDB_MINKEYS;
|
3486 | |
|
|
3572 |
env->me_nodemax = (((env->me_psize - PAGEHDRSZ) / MDB_MINKEYS) & -2)
|
|
3573 |
- sizeof(indx_t);
|
|
3574 |
#if !(MDB_MAXKEYSIZE)
|
|
3575 |
env->me_maxkey = env->me_nodemax - (NODESIZE + sizeof(MDB_db));
|
|
3576 |
#endif
|
3487 | 3577 |
env->me_maxpg = env->me_mapsize / env->me_psize;
|
|
3578 |
|
3488 | 3579 |
#if MDB_DEBUG
|
3489 | 3580 |
{
|
3490 | 3581 |
int toggle = mdb_env_pick_meta(env);
|
|
3984 | 4075 |
#define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY|MDB_WRITEMAP| \
|
3985 | 4076 |
MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD)
|
3986 | 4077 |
|
|
4078 |
#if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE|CHANGELESS)
|
|
4079 |
# error "Persistent DB flags & env flags overlap, but both go in mm_flags"
|
|
4080 |
#endif
|
|
4081 |
|
3987 | 4082 |
int
|
3988 | 4083 |
mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode)
|
3989 | 4084 |
{
|
|
4468 | 4563 |
|
4469 | 4564 |
nkeys = NUMKEYS(mp);
|
4470 | 4565 |
|
4471 | |
#if MDB_DEBUG
|
4472 | |
{
|
4473 | |
pgno_t pgno;
|
4474 | |
COPY_PGNO(pgno, mp->mp_pgno);
|
4475 | 4566 |
DPRINTF(("searching %u keys in %s %spage %"Z"u",
|
4476 | 4567 |
nkeys, IS_LEAF(mp) ? "leaf" : "branch", IS_SUBP(mp) ? "sub-" : "",
|
4477 | |
pgno));
|
4478 | |
}
|
4479 | |
#endif
|
4480 | |
|
4481 | |
assert(nkeys > 0);
|
|
4568 |
mdb_dbg_pgno(mp)));
|
4482 | 4569 |
|
4483 | 4570 |
low = IS_LEAF(mp) ? 0 : 1;
|
4484 | 4571 |
high = nkeys - 1;
|
|
4542 | 4629 |
node = NODEPTR(mp, i);
|
4543 | 4630 |
}
|
4544 | 4631 |
if (exactp)
|
4545 | |
*exactp = (rc == 0);
|
|
4632 |
*exactp = (rc == 0 && nkeys > 0);
|
4546 | 4633 |
/* store the key index */
|
4547 | 4634 |
mc->mc_ki[mc->mc_top] = i;
|
4548 | 4635 |
if (i >= nkeys)
|
|
4592 | 4679 |
DDBI(mc), (void *) mc));
|
4593 | 4680 |
|
4594 | 4681 |
if (mc->mc_snum >= CURSOR_STACK) {
|
4595 | |
assert(mc->mc_snum < CURSOR_STACK);
|
|
4682 |
mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
|
4596 | 4683 |
return MDB_CURSOR_FULL;
|
4597 | 4684 |
}
|
4598 | 4685 |
|
|
4652 | 4739 |
p = (MDB_page *)(env->me_map + env->me_psize * pgno);
|
4653 | 4740 |
} else {
|
4654 | 4741 |
DPRINTF(("page %"Z"u not found", pgno));
|
4655 | |
assert(p != NULL);
|
|
4742 |
txn->mt_flags |= MDB_TXN_ERROR;
|
4656 | 4743 |
return MDB_PAGE_NOTFOUND;
|
4657 | 4744 |
}
|
4658 | 4745 |
|
|
4678 | 4765 |
indx_t i;
|
4679 | 4766 |
|
4680 | 4767 |
DPRINTF(("branch page %"Z"u has %u keys", mp->mp_pgno, NUMKEYS(mp)));
|
4681 | |
assert(NUMKEYS(mp) > 1);
|
|
4768 |
mdb_cassert(mc, NUMKEYS(mp) > 1);
|
4682 | 4769 |
DPRINTF(("found index 0 to page %"Z"u", NODEPGNO(NODEPTR(mp, 0))));
|
4683 | 4770 |
|
4684 | 4771 |
if (flags & (MDB_PS_FIRST|MDB_PS_LAST)) {
|
|
4693 | 4780 |
else {
|
4694 | 4781 |
i = mc->mc_ki[mc->mc_top];
|
4695 | 4782 |
if (!exact) {
|
4696 | |
assert(i > 0);
|
|
4783 |
mdb_cassert(mc, i > 0);
|
4697 | 4784 |
i--;
|
4698 | 4785 |
}
|
4699 | 4786 |
}
|
4700 | 4787 |
DPRINTF(("following index %u for key [%s]", i, DKEY(key)));
|
4701 | 4788 |
}
|
4702 | 4789 |
|
4703 | |
assert(i < NUMKEYS(mp));
|
|
4790 |
mdb_cassert(mc, i < NUMKEYS(mp));
|
4704 | 4791 |
node = NODEPTR(mp, i);
|
4705 | 4792 |
|
4706 | 4793 |
if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0)
|
|
4720 | 4807 |
if (!IS_LEAF(mp)) {
|
4721 | 4808 |
DPRINTF(("internal error, index points to a %02X page!?",
|
4722 | 4809 |
mp->mp_flags));
|
|
4810 |
mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
|
4723 | 4811 |
return MDB_CORRUPTED;
|
4724 | 4812 |
}
|
4725 | 4813 |
|
|
4814 | 4902 |
}
|
4815 | 4903 |
}
|
4816 | 4904 |
|
4817 | |
assert(root > 1);
|
|
4905 |
mdb_cassert(mc, root > 1);
|
4818 | 4906 |
if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root)
|
4819 | 4907 |
if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0], NULL)) != 0)
|
4820 | 4908 |
return rc;
|
|
4884 | 4972 |
iy = dl[x];
|
4885 | 4973 |
dl[x] = ix;
|
4886 | 4974 |
} else {
|
4887 | |
assert(x > 1);
|
|
4975 |
mdb_cassert(mc, x > 1);
|
4888 | 4976 |
j = ++(dl[0].mid);
|
4889 | 4977 |
dl[j] = ix; /* Unsorted. OK when MDB_TXN_ERROR. */
|
4890 | 4978 |
txn->mt_flags |= MDB_TXN_ERROR;
|
|
4952 | 5040 |
int exact = 0;
|
4953 | 5041 |
DKBUF;
|
4954 | 5042 |
|
4955 | |
assert(key);
|
4956 | |
assert(data);
|
|
5043 |
if (key == NULL || data == NULL)
|
|
5044 |
return EINVAL;
|
|
5045 |
|
4957 | 5046 |
DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key)));
|
4958 | 5047 |
|
4959 | 5048 |
if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
|
|
4961 | 5050 |
|
4962 | 5051 |
if (txn->mt_flags & MDB_TXN_ERROR)
|
4963 | 5052 |
return MDB_BAD_TXN;
|
4964 | |
|
4965 | |
if (key->mv_size > MDB_MAXKEYSIZE) {
|
4966 | |
return MDB_BAD_VALSIZE;
|
4967 | |
}
|
4968 | 5053 |
|
4969 | 5054 |
mdb_cursor_init(&mc, txn, dbi, &mx);
|
4970 | 5055 |
return mdb_cursor_set(&mc, key, data, MDB_SET, &exact);
|
|
5011 | 5096 |
DPRINTF(("just moving to %s index key %u",
|
5012 | 5097 |
move_right ? "right" : "left", mc->mc_ki[mc->mc_top]));
|
5013 | 5098 |
}
|
5014 | |
assert(IS_BRANCH(mc->mc_pg[mc->mc_top]));
|
|
5099 |
mdb_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top]));
|
5015 | 5100 |
|
5016 | 5101 |
indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
|
5017 | 5102 |
if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp, NULL)) != 0) {
|
|
5039 | 5124 |
return MDB_NOTFOUND;
|
5040 | 5125 |
}
|
5041 | 5126 |
|
5042 | |
assert(mc->mc_flags & C_INITIALIZED);
|
|
5127 |
mdb_cassert(mc, mc->mc_flags & C_INITIALIZED);
|
5043 | 5128 |
|
5044 | 5129 |
mp = mc->mc_pg[mc->mc_top];
|
5045 | 5130 |
|
|
5061 | 5146 |
}
|
5062 | 5147 |
}
|
5063 | 5148 |
|
5064 | |
DPRINTF(("cursor_next: top page is %"Z"u in cursor %p", mp->mp_pgno, (void *) mc));
|
|
5149 |
DPRINTF(("cursor_next: top page is %"Z"u in cursor %p",
|
|
5150 |
mdb_dbg_pgno(mp), (void *) mc));
|
5065 | 5151 |
if (mc->mc_flags & C_DEL)
|
5066 | 5152 |
goto skip;
|
5067 | 5153 |
|
|
5078 | 5164 |
|
5079 | 5165 |
skip:
|
5080 | 5166 |
DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u",
|
5081 | |
mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
|
|
5167 |
mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
|
5082 | 5168 |
|
5083 | 5169 |
if (IS_LEAF2(mp)) {
|
5084 | 5170 |
key->mv_size = mc->mc_db->md_pad;
|
|
5086 | 5172 |
return MDB_SUCCESS;
|
5087 | 5173 |
}
|
5088 | 5174 |
|
5089 | |
assert(IS_LEAF(mp));
|
|
5175 |
mdb_cassert(mc, IS_LEAF(mp));
|
5090 | 5176 |
leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
|
5091 | 5177 |
|
5092 | 5178 |
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
|
|
5115 | 5201 |
MDB_node *leaf;
|
5116 | 5202 |
int rc;
|
5117 | 5203 |
|
5118 | |
assert(mc->mc_flags & C_INITIALIZED);
|
|
5204 |
mdb_cassert(mc, mc->mc_flags & C_INITIALIZED);
|
5119 | 5205 |
|
5120 | 5206 |
mp = mc->mc_pg[mc->mc_top];
|
5121 | 5207 |
|
|
5137 | 5223 |
}
|
5138 | 5224 |
}
|
5139 | 5225 |
|
5140 | |
DPRINTF(("cursor_prev: top page is %"Z"u in cursor %p", mp->mp_pgno, (void *) mc));
|
|
5226 |
DPRINTF(("cursor_prev: top page is %"Z"u in cursor %p",
|
|
5227 |
mdb_dbg_pgno(mp), (void *) mc));
|
5141 | 5228 |
|
5142 | 5229 |
if (mc->mc_ki[mc->mc_top] == 0) {
|
5143 | 5230 |
DPUTS("=====> move to prev sibling page");
|
|
5153 | 5240 |
mc->mc_flags &= ~C_EOF;
|
5154 | 5241 |
|
5155 | 5242 |
DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u",
|
5156 | |
mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
|
|
5243 |
mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
|
5157 | 5244 |
|
5158 | 5245 |
if (IS_LEAF2(mp)) {
|
5159 | 5246 |
key->mv_size = mc->mc_db->md_pad;
|
|
5161 | 5248 |
return MDB_SUCCESS;
|
5162 | 5249 |
}
|
5163 | 5250 |
|
5164 | |
assert(IS_LEAF(mp));
|
|
5251 |
mdb_cassert(mc, IS_LEAF(mp));
|
5165 | 5252 |
leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
|
5166 | 5253 |
|
5167 | 5254 |
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
|
|
5192 | 5279 |
MDB_node *leaf = NULL;
|
5193 | 5280 |
DKBUF;
|
5194 | 5281 |
|
5195 | |
assert(mc);
|
5196 | |
assert(key);
|
5197 | 5282 |
if (key->mv_size == 0)
|
5198 | 5283 |
return MDB_BAD_VALSIZE;
|
5199 | 5284 |
|
|
5296 | 5381 |
return rc;
|
5297 | 5382 |
|
5298 | 5383 |
mp = mc->mc_pg[mc->mc_top];
|
5299 | |
assert(IS_LEAF(mp));
|
|
5384 |
mdb_cassert(mc, IS_LEAF(mp));
|
5300 | 5385 |
|
5301 | 5386 |
set2:
|
5302 | 5387 |
leaf = mdb_node_search(mc, key, exactp);
|
|
5310 | 5395 |
if ((rc = mdb_cursor_sibling(mc, 1)) != MDB_SUCCESS)
|
5311 | 5396 |
return rc; /* no entries matched */
|
5312 | 5397 |
mp = mc->mc_pg[mc->mc_top];
|
5313 | |
assert(IS_LEAF(mp));
|
|
5398 |
mdb_cassert(mc, IS_LEAF(mp));
|
5314 | 5399 |
leaf = NODEPTR(mp, 0);
|
5315 | 5400 |
}
|
5316 | 5401 |
|
|
5352 | 5437 |
if (op == MDB_GET_BOTH || rc > 0)
|
5353 | 5438 |
return MDB_NOTFOUND;
|
5354 | 5439 |
rc = 0;
|
|
5440 |
*data = d2;
|
5355 | 5441 |
}
|
5356 | 5442 |
|
5357 | 5443 |
} else {
|
|
5385 | 5471 |
if (rc != MDB_SUCCESS)
|
5386 | 5472 |
return rc;
|
5387 | 5473 |
}
|
5388 | |
assert(IS_LEAF(mc->mc_pg[mc->mc_top]));
|
|
5474 |
mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]));
|
5389 | 5475 |
|
5390 | 5476 |
leaf = NODEPTR(mc->mc_pg[mc->mc_top], 0);
|
5391 | 5477 |
mc->mc_flags |= C_INITIALIZED;
|
|
5431 | 5517 |
if (rc != MDB_SUCCESS)
|
5432 | 5518 |
return rc;
|
5433 | 5519 |
}
|
5434 | |
assert(IS_LEAF(mc->mc_pg[mc->mc_top]));
|
|
5520 |
mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]));
|
5435 | 5521 |
|
5436 | 5522 |
}
|
5437 | 5523 |
mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1;
|
|
5468 | 5554 |
int exact = 0;
|
5469 | 5555 |
int (*mfunc)(MDB_cursor *mc, MDB_val *key, MDB_val *data);
|
5470 | 5556 |
|
5471 | |
assert(mc);
|
|
5557 |
if (mc == NULL)
|
|
5558 |
return EINVAL;
|
5472 | 5559 |
|
5473 | 5560 |
if (mc->mc_txn->mt_flags & MDB_TXN_ERROR)
|
5474 | 5561 |
return MDB_BAD_TXN;
|
|
5520 | 5607 |
case MDB_SET_RANGE:
|
5521 | 5608 |
if (key == NULL) {
|
5522 | 5609 |
rc = EINVAL;
|
5523 | |
} else if (key->mv_size > MDB_MAXKEYSIZE) {
|
5524 | |
rc = MDB_BAD_VALSIZE;
|
5525 | |
} else if (op == MDB_SET_RANGE)
|
5526 | |
rc = mdb_cursor_set(mc, key, data, op, NULL);
|
5527 | |
else
|
5528 | |
rc = mdb_cursor_set(mc, key, data, op, &exact);
|
|
5610 |
} else {
|
|
5611 |
rc = mdb_cursor_set(mc, key, data, op,
|
|
5612 |
op == MDB_SET_RANGE ? NULL : &exact);
|
|
5613 |
}
|
5529 | 5614 |
break;
|
5530 | 5615 |
case MDB_GET_MULTIPLE:
|
5531 | 5616 |
if (data == NULL || !(mc->mc_flags & C_INITIALIZED)) {
|
|
5662 | 5747 |
unsigned int flags)
|
5663 | 5748 |
{
|
5664 | 5749 |
enum { MDB_NO_ROOT = MDB_LAST_ERRCODE+10 }; /* internal code */
|
5665 | |
MDB_env *env = mc->mc_txn->mt_env;
|
|
5750 |
MDB_env *env;
|
5666 | 5751 |
MDB_node *leaf = NULL;
|
5667 | |
MDB_val xdata, *rdata, dkey;
|
|
5752 |
MDB_page *fp, *mp;
|
|
5753 |
uint16_t fp_flags;
|
|
5754 |
MDB_val xdata, *rdata, dkey, olddata;
|
5668 | 5755 |
MDB_db dummy;
|
5669 | |
int do_sub = 0, insert = 0;
|
|
5756 |
int do_sub = 0, insert;
|
5670 | 5757 |
unsigned int mcount = 0, dcount = 0, nospill;
|
5671 | 5758 |
size_t nsize;
|
5672 | 5759 |
int rc, rc2;
|
5673 | |
char dbuf[MDB_MAXKEYSIZE+1];
|
5674 | 5760 |
unsigned int nflags;
|
5675 | 5761 |
DKBUF;
|
|
5762 |
|
|
5763 |
if (mc == NULL || key == NULL)
|
|
5764 |
return EINVAL;
|
|
5765 |
|
|
5766 |
env = mc->mc_txn->mt_env;
|
5676 | 5767 |
|
5677 | 5768 |
/* Check this first so counter will always be zero on any
|
5678 | 5769 |
* early failures.
|
|
5690 | 5781 |
if (mc->mc_txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR))
|
5691 | 5782 |
return (mc->mc_txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
|
5692 | 5783 |
|
5693 | |
if (flags != MDB_CURRENT && (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE))
|
|
5784 |
if (flags != MDB_CURRENT && key->mv_size-1 >= ENV_MAXKEY(env))
|
5694 | 5785 |
return MDB_BAD_VALSIZE;
|
5695 | 5786 |
|
5696 | |
if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT) && data->mv_size > MDB_MAXKEYSIZE)
|
|
5787 |
#if SIZE_MAX > MAXDATASIZE
|
|
5788 |
if (data->mv_size > ((mc->mc_db->md_flags & MDB_DUPSORT) ? ENV_MAXKEY(env) : MAXDATASIZE))
|
5697 | 5789 |
return MDB_BAD_VALSIZE;
|
5698 | |
|
5699 | |
#if SIZE_MAX > MAXDATASIZE
|
5700 | |
if (data->mv_size > MAXDATASIZE)
|
|
5790 |
#else
|
|
5791 |
if ((mc->mc_db->md_flags & MDB_DUPSORT) && data->mv_size > ENV_MAXKEY(env))
|
5701 | 5792 |
return MDB_BAD_VALSIZE;
|
5702 | 5793 |
#endif
|
5703 | 5794 |
|
|
5781 | 5872 |
return rc2;
|
5782 | 5873 |
}
|
5783 | 5874 |
|
5784 | |
/* The key already exists */
|
5785 | |
if (rc == MDB_SUCCESS) {
|
5786 | |
MDB_page *fp, *mp;
|
5787 | |
MDB_val olddata;
|
5788 | |
|
|
5875 |
insert = rc;
|
|
5876 |
if (insert) {
|
|
5877 |
/* The key does not exist */
|
|
5878 |
DPRINTF(("inserting key at index %i", mc->mc_ki[mc->mc_top]));
|
|
5879 |
if ((mc->mc_db->md_flags & MDB_DUPSORT) &&
|
|
5880 |
LEAFSIZE(key, data) > env->me_nodemax)
|
|
5881 |
{
|
|
5882 |
/* Too big for a node, insert in sub-DB */
|
|
5883 |
fp_flags = P_LEAF|P_DIRTY;
|
|
5884 |
fp = env->me_pbuf;
|
|
5885 |
fp->mp_pad = data->mv_size; /* used if MDB_DUPFIXED */
|
|
5886 |
fp->mp_lower = fp->mp_upper = olddata.mv_size = PAGEHDRSZ;
|
|
5887 |
goto prep_subDB;
|
|
5888 |
}
|
|
5889 |
} else {
|
5789 | 5890 |
/* there's only a key anyway, so this is a no-op */
|
5790 | 5891 |
if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
|
5791 | 5892 |
unsigned int ksize = mc->mc_db->md_pad;
|
|
5805 | 5906 |
|
5806 | 5907 |
/* DB has dups? */
|
5807 | 5908 |
if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT)) {
|
|
5909 |
/* Prepare (sub-)page/sub-DB to accept the new item,
|
|
5910 |
* if needed. fp: old sub-page or a header faking
|
|
5911 |
* it. mp: new (sub-)page. offset: growth in page
|
|
5912 |
* size. xdata: node data with new page or DB.
|
|
5913 |
*/
|
|
5914 |
unsigned i, offset = 0;
|
5808 | 5915 |
mp = fp = xdata.mv_data = env->me_pbuf;
|
5809 | 5916 |
mp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno;
|
5810 | 5917 |
|
|
5814 | 5921 |
if (flags == MDB_CURRENT)
|
5815 | 5922 |
goto current;
|
5816 | 5923 |
|
5817 | |
dkey = olddata;
|
5818 | 5924 |
#if UINT_MAX < SIZE_MAX
|
5819 | |
if (mc->mc_dbx->md_dcmp == mdb_cmp_int && dkey.mv_size == sizeof(size_t))
|
|
5925 |
if (mc->mc_dbx->md_dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t))
|
5820 | 5926 |
#ifdef MISALIGNED_OK
|
5821 | 5927 |
mc->mc_dbx->md_dcmp = mdb_cmp_long;
|
5822 | 5928 |
#else
|
|
5824 | 5930 |
#endif
|
5825 | 5931 |
#endif
|
5826 | 5932 |
/* if data matches, skip it */
|
5827 | |
if (!mc->mc_dbx->md_dcmp(data, &dkey)) {
|
|
5933 |
if (!mc->mc_dbx->md_dcmp(data, &olddata)) {
|
5828 | 5934 |
if (flags & MDB_NODUPDATA)
|
5829 | 5935 |
rc = MDB_KEYEXIST;
|
5830 | 5936 |
else if (flags & MDB_MULTIPLE)
|
|
5834 | 5940 |
return rc;
|
5835 | 5941 |
}
|
5836 | 5942 |
|
5837 | |
/* create a fake page for the dup items */
|
5838 | |
memcpy(dbuf, dkey.mv_data, dkey.mv_size);
|
5839 | |
dkey.mv_data = dbuf;
|
|
5943 |
/* Back up original data item */
|
|
5944 |
dkey.mv_size = olddata.mv_size;
|
|
5945 |
dkey.mv_data = memcpy(fp+1, olddata.mv_data, olddata.mv_size);
|
|
5946 |
|
|
5947 |
/* Make sub-page header for the dup items, with dummy body */
|
5840 | 5948 |
fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP;
|
5841 | 5949 |
fp->mp_lower = PAGEHDRSZ;
|
5842 | 5950 |
xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size;
|
|
5849 | 5957 |
(dkey.mv_size & 1) + (data->mv_size & 1);
|
5850 | 5958 |
}
|
5851 | 5959 |
fp->mp_upper = xdata.mv_size;
|
|
5960 |
olddata.mv_size = fp->mp_upper; /* pretend olddata is fp */
|
5852 | 5961 |
} else if (leaf->mn_flags & F_SUBDATA) {
|
5853 | 5962 |
/* Data is on sub-DB, just store it */
|
5854 | 5963 |
flags |= F_DUPDATA|F_SUBDATA;
|
5855 | 5964 |
goto put_sub;
|
5856 | 5965 |
} else {
|
5857 | |
/* See if we need to convert from fake page to subDB */
|
5858 | |
unsigned int offset;
|
5859 | |
unsigned int i;
|
5860 | |
uint16_t fp_flags;
|
5861 | |
|
|
5966 |
/* Data is on sub-page */
|
5862 | 5967 |
fp = olddata.mv_data;
|
5863 | 5968 |
switch (flags) {
|
5864 | 5969 |
default:
|
5865 | 5970 |
if (!(mc->mc_db->md_flags & MDB_DUPFIXED)) {
|
5866 | |
offset = NODESIZE + sizeof(indx_t) + data->mv_size;
|
5867 | |
offset += offset & 1;
|
|
5971 |
offset = EVEN(NODESIZE + sizeof(indx_t) +
|
|
5972 |
data->mv_size);
|
5868 | 5973 |
break;
|
5869 | 5974 |
}
|
5870 | 5975 |
offset = fp->mp_pad;
|
|
5880 | 5985 |
flags |= F_DUPDATA;
|
5881 | 5986 |
goto put_sub;
|
5882 | 5987 |
}
|
5883 | |
fp_flags = fp->mp_flags;
|
5884 | 5988 |
xdata.mv_size = olddata.mv_size + offset;
|
5885 | |
if (NODESIZE + sizeof(indx_t) + NODEKSZ(leaf) + xdata.mv_size
|
5886 | |
>= env->me_nodemax) {
|
5887 | |
/* yes, convert it */
|
|
5989 |
}
|
|
5990 |
|
|
5991 |
fp_flags = fp->mp_flags;
|
|
5992 |
if (NODESIZE + NODEKSZ(leaf) + xdata.mv_size > env->me_nodemax) {
|
|
5993 |
/* Too big for a sub-page, convert to sub-DB */
|
|
5994 |
fp_flags &= ~P_SUBP;
|
|
5995 |
prep_subDB:
|
5888 | 5996 |
if (mc->mc_db->md_flags & MDB_DUPFIXED) {
|
|
5997 |
fp_flags |= P_LEAF2;
|
5889 | 5998 |
dummy.md_pad = fp->mp_pad;
|
5890 | 5999 |
dummy.md_flags = MDB_DUPFIXED;
|
5891 | 6000 |
if (mc->mc_db->md_flags & MDB_INTEGERDUP)
|
|
5906 | 6015 |
offset = env->me_psize - olddata.mv_size;
|
5907 | 6016 |
flags |= F_DUPDATA|F_SUBDATA;
|
5908 | 6017 |
dummy.md_root = mp->mp_pgno;
|
5909 | |
fp_flags &= ~P_SUBP;
|
5910 | |
}
|
|
6018 |
}
|
|
6019 |
if (mp != fp) {
|
5911 | 6020 |
mp->mp_flags = fp_flags | P_DIRTY;
|
5912 | 6021 |
mp->mp_pad = fp->mp_pad;
|
5913 | 6022 |
mp->mp_lower = fp->mp_lower;
|
5914 | 6023 |
mp->mp_upper = fp->mp_upper + offset;
|
5915 | |
if (IS_LEAF2(fp)) {
|
|
6024 |
if (fp_flags & P_LEAF2) {
|
5916 | 6025 |
memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad);
|
5917 | 6026 |
} else {
|
5918 | 6027 |
memcpy((char *)mp + mp->mp_upper, (char *)fp + fp->mp_upper,
|
|
5925 | 6034 |
rdata = &xdata;
|
5926 | 6035 |
flags |= F_DUPDATA;
|
5927 | 6036 |
do_sub = 1;
|
5928 | |
mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0);
|
|
6037 |
if (!insert)
|
|
6038 |
mdb_node_del(mc, 0);
|
5929 | 6039 |
goto new_sub;
|
5930 | 6040 |
}
|
5931 | 6041 |
current:
|
|
5965 | 6075 |
return ENOMEM;
|
5966 | 6076 |
id2.mid = pg;
|
5967 | 6077 |
id2.mptr = np;
|
5968 | |
mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2);
|
|
6078 |
rc = mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2);
|
|
6079 |
mdb_cassert(mc, rc == 0);
|
5969 | 6080 |
if (!(flags & MDB_RESERVE)) {
|
5970 | 6081 |
/* Copy end of page, adjusting alignment so
|
5971 | 6082 |
* compiler may copy words instead of bytes.
|
|
6001 | 6112 |
memcpy(NODEKEY(leaf), key->mv_data, key->mv_size);
|
6002 | 6113 |
goto done;
|
6003 | 6114 |
}
|
6004 | |
mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0);
|
|
6115 |
mdb_node_del(mc, 0);
|
6005 | 6116 |
mc->mc_db->md_entries--;
|
6006 | |
} else {
|
6007 | |
DPRINTF(("inserting key at index %i", mc->mc_ki[mc->mc_top]));
|
6008 | |
insert = 1;
|
6009 | 6117 |
}
|
6010 | 6118 |
|
6011 | 6119 |
rdata = data;
|
|
6244 | 6352 |
size_t sz;
|
6245 | 6353 |
|
6246 | 6354 |
sz = LEAFSIZE(key, data);
|
6247 | |
if (sz >= env->me_nodemax) {
|
|
6355 |
if (sz > env->me_nodemax) {
|
6248 | 6356 |
/* put on overflow page */
|
6249 | 6357 |
sz -= data->mv_size - sizeof(pgno_t);
|
6250 | 6358 |
}
|
6251 | |
sz += sz & 1;
|
6252 | |
|
6253 | |
return sz + sizeof(indx_t);
|
|
6359 |
|
|
6360 |
return EVEN(sz + sizeof(indx_t));
|
6254 | 6361 |
}
|
6255 | 6362 |
|
6256 | 6363 |
/** Calculate the size of a branch node.
|
|
6269 | 6376 |
size_t sz;
|
6270 | 6377 |
|
6271 | 6378 |
sz = INDXSIZE(key);
|
6272 | |
if (sz >= env->me_nodemax) {
|
|
6379 |
if (sz > env->me_nodemax) {
|
6273 | 6380 |
/* put on overflow page */
|
6274 | 6381 |
/* not implemented */
|
6275 | 6382 |
/* sz -= key->size - sizeof(pgno_t); */
|
|
6306 | 6413 |
MDB_page *ofp = NULL; /* overflow page */
|
6307 | 6414 |
DKBUF;
|
6308 | 6415 |
|
6309 | |
assert(mp->mp_upper >= mp->mp_lower);
|
|
6416 |
mdb_cassert(mc, mp->mp_upper >= mp->mp_lower);
|
6310 | 6417 |
|
6311 | 6418 |
DPRINTF(("add to %s %spage %"Z"u index %i, data size %"Z"u key size %"Z"u [%s]",
|
6312 | 6419 |
IS_LEAF(mp) ? "leaf" : "branch",
|
6313 | 6420 |
IS_SUBP(mp) ? "sub-" : "",
|
6314 | |
mp->mp_pgno, indx, data ? data->mv_size : 0,
|
|
6421 |
mdb_dbg_pgno(mp), indx, data ? data->mv_size : 0,
|
6315 | 6422 |
key ? key->mv_size : 0, key ? DKEY(key) : "null"));
|
6316 | 6423 |
|
6317 | 6424 |
if (IS_LEAF2(mp)) {
|
|
6334 | 6441 |
if (key != NULL)
|
6335 | 6442 |
node_size += key->mv_size;
|
6336 | 6443 |
if (IS_LEAF(mp)) {
|
6337 | |
assert(data);
|
|
6444 |
mdb_cassert(mc, data);
|
6338 | 6445 |
if (F_ISSET(flags, F_BIGDATA)) {
|
6339 | 6446 |
/* Data already on overflow page. */
|
6340 | 6447 |
node_size += sizeof(pgno_t);
|
6341 | |
} else if (node_size + data->mv_size >= mc->mc_txn->mt_env->me_nodemax) {
|
|
6448 |
} else if (node_size + data->mv_size > mc->mc_txn->mt_env->me_nodemax) {
|
6342 | 6449 |
int ovpages = OVPAGES(data->mv_size, mc->mc_txn->mt_env->me_psize);
|
6343 | 6450 |
int rc;
|
6344 | 6451 |
/* Put data on overflow page. */
|
6345 | 6452 |
DPRINTF(("data size is %"Z"u, node would be %"Z"u, put data on overflow page",
|
6346 | 6453 |
data->mv_size, node_size+data->mv_size));
|
6347 | |
node_size += sizeof(pgno_t) + (node_size & 1);
|
|
6454 |
node_size = EVEN(node_size + sizeof(pgno_t));
|
6348 | 6455 |
if ((ssize_t)node_size > room)
|
6349 | 6456 |
goto full;
|
6350 | 6457 |
if ((rc = mdb_page_new(mc, P_OVERFLOW, ovpages, &ofp)))
|
|
6356 | 6463 |
node_size += data->mv_size;
|
6357 | 6464 |
}
|
6358 | 6465 |
}
|
6359 | |
node_size += node_size & 1;
|
|
6466 |
node_size = EVEN(node_size);
|
6360 | 6467 |
if ((ssize_t)node_size > room)
|
6361 | 6468 |
goto full;
|
6362 | 6469 |
|
|
6367 | 6474 |
|
6368 | 6475 |
/* Adjust free space offsets. */
|
6369 | 6476 |
ofs = mp->mp_upper - node_size;
|
6370 | |
assert(ofs >= mp->mp_lower + sizeof(indx_t));
|
|
6477 |
mdb_cassert(mc, ofs >= mp->mp_lower + sizeof(indx_t));
|
6371 | 6478 |
mp->mp_ptrs[indx] = ofs;
|
6372 | 6479 |
mp->mp_upper = ofs;
|
6373 | 6480 |
mp->mp_lower += sizeof(indx_t);
|
|
6385 | 6492 |
memcpy(NODEKEY(node), key->mv_data, key->mv_size);
|
6386 | 6493 |
|
6387 | 6494 |
if (IS_LEAF(mp)) {
|
6388 | |
assert(key);
|
|
6495 |
mdb_cassert(mc, key);
|
6389 | 6496 |
if (ofp == NULL) {
|
6390 | 6497 |
if (F_ISSET(flags, F_BIGDATA))
|
6391 | 6498 |
memcpy(node->mn_data + key->mv_size, data->mv_data,
|
|
6409 | 6516 |
|
6410 | 6517 |
full:
|
6411 | 6518 |
DPRINTF(("not enough room in page %"Z"u, got %u ptrs",
|
6412 | |
mp->mp_pgno, NUMKEYS(mp)));
|
|
6519 |
mdb_dbg_pgno(mp), NUMKEYS(mp)));
|
6413 | 6520 |
DPRINTF(("upper-lower = %u - %u = %"Z"d", mp->mp_upper,mp->mp_lower,room));
|
6414 | 6521 |
DPRINTF(("node size = %"Z"u", node_size));
|
|
6522 |
mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
|
6415 | 6523 |
return MDB_PAGE_FULL;
|
6416 | 6524 |
}
|
6417 | 6525 |
|
|
6422 | 6530 |
* part of a #MDB_DUPFIXED database.
|
6423 | 6531 |
*/
|
6424 | 6532 |
static void
|
6425 | |
mdb_node_del(MDB_page *mp, indx_t indx, int ksize)
|
6426 | |
{
|
|
6533 |
mdb_node_del(MDB_cursor *mc, int ksize)
|
|
6534 |
{
|
|
6535 |
MDB_page *mp = mc->mc_pg[mc->mc_top];
|
|
6536 |
indx_t indx = mc->mc_ki[mc->mc_top];
|
6427 | 6537 |
unsigned int sz;
|
6428 | 6538 |
indx_t i, j, numkeys, ptr;
|
6429 | 6539 |
MDB_node *node;
|
6430 | 6540 |
char *base;
|
6431 | 6541 |
|
6432 | |
#if MDB_DEBUG
|
6433 | |
{
|
6434 | |
pgno_t pgno;
|
6435 | |
COPY_PGNO(pgno, mp->mp_pgno);
|
6436 | 6542 |
DPRINTF(("delete node %u on %s page %"Z"u", indx,
|
6437 | |
IS_LEAF(mp) ? "leaf" : "branch", pgno));
|
6438 | |
}
|
6439 | |
#endif
|
6440 | |
assert(indx < NUMKEYS(mp));
|
|
6543 |
IS_LEAF(mp) ? "leaf" : "branch", mdb_dbg_pgno(mp)));
|
|
6544 |
numkeys = NUMKEYS(mp);
|
|
6545 |
mdb_cassert(mc, indx < numkeys);
|
6441 | 6546 |
|
6442 | 6547 |
if (IS_LEAF2(mp)) {
|
6443 | |
int x = NUMKEYS(mp) - 1 - indx;
|
|
6548 |
int x = numkeys - 1 - indx;
|
6444 | 6549 |
base = LEAF2KEY(mp, indx, ksize);
|
6445 | 6550 |
if (x)
|
6446 | 6551 |
memmove(base, base + ksize, x * ksize);
|
|
6457 | 6562 |
else
|
6458 | 6563 |
sz += NODEDSZ(node);
|
6459 | 6564 |
}
|
6460 | |
sz += sz & 1;
|
|
6565 |
sz = EVEN(sz);
|
6461 | 6566 |
|
6462 | 6567 |
ptr = mp->mp_ptrs[indx];
|
6463 | |
numkeys = NUMKEYS(mp);
|
6464 | 6568 |
for (i = j = 0; i < numkeys; i++) {
|
6465 | 6569 |
if (i != indx) {
|
6466 | 6570 |
mp->mp_ptrs[j] = mp->mp_ptrs[i];
|
|
6487 | 6591 |
MDB_node *node;
|
6488 | 6592 |
MDB_page *sp, *xp;
|
6489 | 6593 |
char *base;
|
6490 | |
int osize, nsize;
|
6491 | |
int delta;
|
|
6594 |
int nsize, delta;
|
6492 | 6595 |
indx_t i, numkeys, ptr;
|
6493 | 6596 |
|
6494 | 6597 |
node = NODEPTR(mp, indx);
|
6495 | 6598 |
sp = (MDB_page *)NODEDATA(node);
|
6496 | |
osize = NODEDSZ(node);
|
6497 | |
|
6498 | |
delta = sp->mp_upper - sp->mp_lower;
|
6499 | |
SETDSZ(node, osize - delta);
|
|
6599 |
delta = SIZELEFT(sp);
|
6500 | 6600 |
xp = (MDB_page *)((char *)sp + delta);
|
6501 | 6601 |
|
6502 | 6602 |
/* shift subpage upward */
|
6503 | 6603 |
if (IS_LEAF2(sp)) {
|
6504 | 6604 |
nsize = NUMKEYS(sp) * sp->mp_pad;
|
|
6605 |
if (nsize & 1)
|
|
6606 |
return; /* do not make the node uneven-sized */
|
6505 | 6607 |
memmove(METADATA(xp), METADATA(sp), nsize);
|
6506 | 6608 |
} else {
|
6507 | 6609 |
int i;
|
6508 | |
nsize = osize - sp->mp_upper;
|
6509 | 6610 |
numkeys = NUMKEYS(sp);
|
6510 | 6611 |
for (i=numkeys-1; i>=0; i--)
|
6511 | 6612 |
xp->mp_ptrs[i] = sp->mp_ptrs[i] - delta;
|
|
6515 | 6616 |
xp->mp_flags = sp->mp_flags;
|
6516 | 6617 |
xp->mp_pad = sp->mp_pad;
|
6517 | 6618 |
COPY_PGNO(xp->mp_pgno, mp->mp_pgno);
|
|
6619 |
|
|
6620 |
nsize = NODEDSZ(node) - delta;
|
|
6621 |
SETDSZ(node, nsize);
|
6518 | 6622 |
|
6519 | 6623 |
/* shift lower nodes upward */
|
6520 | 6624 |
ptr = mp->mp_ptrs[indx];
|
|
6627 | 6731 |
mc->mc_pg[0] = 0;
|
6628 | 6732 |
mc->mc_flags = 0;
|
6629 | 6733 |
if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) {
|
6630 | |
assert(mx != NULL);
|
|
6734 |
mdb_tassert(txn, mx != NULL);
|
6631 | 6735 |
mc->mc_xcursor = mx;
|
6632 | 6736 |
mdb_xcursor_init0(mc);
|
6633 | 6737 |
} else {
|
|
6735 | 6839 |
MDB_dbi
|
6736 | 6840 |
mdb_cursor_dbi(MDB_cursor *mc)
|
6737 | 6841 |
{
|
6738 | |
assert(mc != NULL);
|
6739 | 6842 |
return mc->mc_dbi;
|
6740 | 6843 |
}
|
6741 | 6844 |
|
6742 | |
/** Replace the key for a node with a new key.
|
|
6845 |
/** Replace the key for a branch node with a new key.
|
6743 | 6846 |
* @param[in] mc Cursor pointing to the node to operate on.
|
6744 | 6847 |
* @param[in] key The new key to use.
|
6745 | 6848 |
* @return 0 on success, non-zero on failure.
|
|
6751 | 6854 |
MDB_node *node;
|
6752 | 6855 |
char *base;
|
6753 | 6856 |
size_t len;
|
6754 | |
int delta, delta0;
|
|
6857 |
int delta, ksize, oksize;
|
6755 | 6858 |
indx_t ptr, i, numkeys, indx;
|
6756 | 6859 |
DKBUF;
|
6757 | 6860 |
|
|
6762 | 6865 |
#if MDB_DEBUG
|
6763 | 6866 |
{
|
6764 | 6867 |
MDB_val k2;
|
6765 | |
char kbuf2[(MDB_MAXKEYSIZE*2+1)];
|
|
6868 |
char kbuf2[DKBUF_MAXKEYSIZE*2+1];
|
6766 | 6869 |
k2.mv_data = NODEKEY(node);
|
6767 | 6870 |
k2.mv_size = node->mn_ksize;
|
6768 | 6871 |
DPRINTF(("update key %u (ofs %u) [%s] to [%s] on page %"Z"u",
|
|
6773 | 6876 |
}
|
6774 | 6877 |
#endif
|
6775 | 6878 |
|
6776 | |
delta0 = delta = key->mv_size - node->mn_ksize;
|
6777 | |
|
6778 | |
/* Must be 2-byte aligned. If new key is
|
6779 | |
* shorter by 1, the shift will be skipped.
|
6780 | |
*/
|
6781 | |
delta += (delta & 1);
|
|
6879 |
/* Sizes must be 2-byte aligned. */
|
|
6880 |
ksize = EVEN(key->mv_size);
|
|
6881 |
oksize = EVEN(node->mn_ksize);
|
|
6882 |
delta = ksize - oksize;
|
|
6883 |
|
|
6884 |
/* Shift node contents if EVEN(key length) changed. */
|
6782 | 6885 |
if (delta) {
|
6783 | 6886 |
if (delta > 0 && SIZELEFT(mp) < delta) {
|
6784 | 6887 |
pgno_t pgno;
|
6785 | 6888 |
/* not enough space left, do a delete and split */
|
6786 | 6889 |
DPRINTF(("Not enough room, delta = %d, splitting...", delta));
|
6787 | 6890 |
pgno = NODEPGNO(node);
|
6788 | |
mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0);
|
|
6891 |
mdb_node_del(mc, 0);
|
6789 | 6892 |
return mdb_page_split(mc, key, NULL, pgno, MDB_SPLIT_REPLACE);
|
6790 | 6893 |
}
|
6791 | 6894 |
|
|
6804 | 6907 |
}
|
6805 | 6908 |
|
6806 | 6909 |
/* But even if no shift was needed, update ksize */
|
6807 | |
if (delta0)
|
|
6910 |
if (node->mn_ksize != key->mv_size)
|
6808 | 6911 |
node->mn_ksize = key->mv_size;
|
6809 | 6912 |
|
6810 | 6913 |
if (key->mv_size)
|
|
6836 | 6939 |
return rc;
|
6837 | 6940 |
|
6838 | 6941 |
if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
|
6839 | |
srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); /* fake */
|
6840 | 6942 |
key.mv_size = csrc->mc_db->md_pad;
|
6841 | 6943 |
key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size);
|
6842 | 6944 |
data.mv_size = 0;
|
|
6845 | 6947 |
flags = 0;
|
6846 | 6948 |
} else {
|
6847 | 6949 |
srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top]);
|
6848 | |
assert(!((size_t)srcnode&1));
|
|
6950 |
mdb_cassert(csrc, !((size_t)srcnode & 1));
|
6849 | 6951 |
srcpg = NODEPGNO(srcnode);
|
6850 | 6952 |
flags = srcnode->mn_flags;
|
6851 | 6953 |
if (csrc->mc_ki[csrc->mc_top] == 0 && IS_BRANCH(csrc->mc_pg[csrc->mc_top])) {
|
|
6908 | 7010 |
|
6909 | 7011 |
/* Delete the node from the source page.
|
6910 | 7012 |
*/
|
6911 | |
mdb_node_del(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size);
|
|
7013 |
mdb_node_del(csrc, key.mv_size);
|
6912 | 7014 |
|
6913 | 7015 |
{
|
6914 | 7016 |
/* Adjust other cursors pointing to mp */
|
|
6956 | 7058 |
csrc->mc_ki[csrc->mc_top] = 0;
|
6957 | 7059 |
rc = mdb_update_key(csrc, &nullkey);
|
6958 | 7060 |
csrc->mc_ki[csrc->mc_top] = ix;
|
6959 | |
assert(rc == MDB_SUCCESS);
|
|
7061 |
mdb_cassert(csrc, rc == MDB_SUCCESS);
|
6960 | 7062 |
}
|
6961 | 7063 |
}
|
6962 | 7064 |
|
|
6984 | 7086 |
cdst->mc_ki[cdst->mc_top] = 0;
|
6985 | 7087 |
rc = mdb_update_key(cdst, &nullkey);
|
6986 | 7088 |
cdst->mc_ki[cdst->mc_top] = ix;
|
6987 | |
assert(rc == MDB_SUCCESS);
|
|
7089 |
mdb_cassert(csrc, rc == MDB_SUCCESS);
|
6988 | 7090 |
}
|
6989 | 7091 |
}
|
6990 | 7092 |
|
|
7010 | 7112 |
DPRINTF(("merging page %"Z"u into %"Z"u", csrc->mc_pg[csrc->mc_top]->mp_pgno,
|
7011 | 7113 |
cdst->mc_pg[cdst->mc_top]->mp_pgno));
|
7012 | 7114 |
|
7013 | |
assert(csrc->mc_snum > 1); /* can't merge root page */
|
7014 | |
assert(cdst->mc_snum > 1);
|
|
7115 |
mdb_cassert(csrc, csrc->mc_snum > 1); /* can't merge root page */
|
|
7116 |
mdb_cassert(csrc, cdst->mc_snum > 1);
|
7015 | 7117 |
|
7016 | 7118 |
/* Mark dst as dirty. */
|
7017 | 7119 |
if ((rc = mdb_page_touch(cdst)))
|
|
7066 | 7168 |
|
7067 | 7169 |
/* Unlink the src page from parent and add to free list.
|
7068 | 7170 |
*/
|
7069 | |
mdb_node_del(csrc->mc_pg[csrc->mc_top-1], csrc->mc_ki[csrc->mc_top-1], 0);
|
7070 | |
if (csrc->mc_ki[csrc->mc_top-1] == 0) {
|
|
7171 |
csrc->mc_top--;
|
|
7172 |
mdb_node_del(csrc, 0);
|
|
7173 |
if (csrc->mc_ki[csrc->mc_top] == 0) {
|
7071 | 7174 |
key.mv_size = 0;
|
7072 | |
csrc->mc_top--;
|
7073 | 7175 |
rc = mdb_update_key(csrc, &key);
|
7074 | |
csrc->mc_top++;
|
7075 | |
if (rc)
|
|
7176 |
if (rc) {
|
|
7177 |
csrc->mc_top++;
|
7076 | 7178 |
return rc;
|
7077 | |
}
|
|
7179 |
}
|
|
7180 |
}
|
|
7181 |
csrc->mc_top++;
|
7078 | 7182 |
|
7079 | 7183 |
rc = mdb_midl_append(&csrc->mc_txn->mt_free_pgs,
|
7080 | 7184 |
csrc->mc_pg[csrc->mc_top]->mp_pgno);
|
|
7145 | 7249 |
MDB_cursor mn;
|
7146 | 7250 |
|
7147 | 7251 |
minkeys = 1 + (IS_BRANCH(mc->mc_pg[mc->mc_top]));
|
7148 | |
#if MDB_DEBUG
|
7149 | |
{
|
7150 | |
pgno_t pgno;
|
7151 | |
COPY_PGNO(pgno, mc->mc_pg[mc->mc_top]->mp_pgno);
|
7152 | 7252 |
DPRINTF(("rebalancing %s page %"Z"u (has %u keys, %.1f%% full)",
|
7153 | 7253 |
IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch",
|
7154 | |
pgno, NUMKEYS(mc->mc_pg[mc->mc_top]),
|
|
7254 |
mdb_dbg_pgno(mc->mc_pg[mc->mc_top]), NUMKEYS(mc->mc_pg[mc->mc_top]),
|
7155 | 7255 |
(float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10));
|
7156 | |
}
|
7157 | |
#endif
|
7158 | 7256 |
|
7159 | 7257 |
if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= FILL_THRESHOLD &&
|
7160 | 7258 |
NUMKEYS(mc->mc_pg[mc->mc_top]) >= minkeys) {
|
7161 | |
#if MDB_DEBUG
|
7162 | |
pgno_t pgno;
|
7163 | |
COPY_PGNO(pgno, mc->mc_pg[mc->mc_top]->mp_pgno);
|
7164 | 7259 |
DPRINTF(("no need to rebalance page %"Z"u, above fill threshold",
|
7165 | |
pgno));
|
7166 | |
#endif
|
|
7260 |
mdb_dbg_pgno(mc->mc_pg[mc->mc_top])));
|
7167 | 7261 |
return MDB_SUCCESS;
|
7168 | 7262 |
}
|
7169 | 7263 |
|
|
7245 | 7339 |
* otherwise the tree is invalid.
|
7246 | 7340 |
*/
|
7247 | 7341 |
ptop = mc->mc_top-1;
|
7248 | |
assert(NUMKEYS(mc->mc_pg[ptop]) > 1);
|
|
7342 |
mdb_cassert(mc, NUMKEYS(mc->mc_pg[ptop]) > 1);
|
7249 | 7343 |
|
7250 | 7344 |
/* Leaf page fill factor is below the threshold.
|
7251 | 7345 |
* Try to move keys from left or right neighbor, or
|
|
7327 | 7421 |
(rc = mdb_ovpage_free(mc, omp)))
|
7328 | 7422 |
return rc;
|
7329 | 7423 |
}
|
7330 | |
mdb_node_del(mp, ki, mc->mc_db->md_pad);
|
|
7424 |
mdb_node_del(mc, mc->mc_db->md_pad);
|
7331 | 7425 |
mc->mc_db->md_entries--;
|
7332 | 7426 |
rc = mdb_rebalance(mc);
|
7333 | 7427 |
if (rc != MDB_SUCCESS)
|
7334 | 7428 |
mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
|
7335 | 7429 |
else {
|
7336 | |
MDB_cursor *m2;
|
|
7430 |
MDB_cursor *m2, *m3;
|
7337 | 7431 |
MDB_dbi dbi = mc->mc_dbi;
|
7338 | 7432 |
|
7339 | 7433 |
mp = mc->mc_pg[mc->mc_top];
|
|
7345 | 7439 |
|
7346 | 7440 |
/* Adjust other cursors pointing to mp */
|
7347 | 7441 |
for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
|
7348 | |
if (m2 == mc || m2->mc_snum < mc->mc_snum)
|
|
7442 |
m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
|
|
7443 |
if (! (m2->mc_flags & m3->mc_flags & C_INITIALIZED))
|
7349 | 7444 |
continue;
|
7350 | |
if (!(m2->mc_flags & C_INITIALIZED))
|
|
7445 |
if (m3 == mc || m3->mc_snum < mc->mc_snum)
|
7351 | 7446 |
continue;
|
7352 | |
if (m2->mc_pg[mc->mc_top] == mp) {
|
7353 | |
if (m2->mc_ki[mc->mc_top] >= ki) {
|
7354 | |
m2->mc_flags |= C_DEL;
|
7355 | |
if (m2->mc_ki[mc->mc_top] > ki)
|
7356 | |
m2->mc_ki[mc->mc_top]--;
|
|
7447 |
if (m3->mc_pg[mc->mc_top] == mp) {
|
|
7448 |
if (m3->mc_ki[mc->mc_top] >= ki) {
|
|
7449 |
m3->mc_flags |= C_DEL;
|
|
7450 |
if (m3->mc_ki[mc->mc_top] > ki)
|
|
7451 |
m3->mc_ki[mc->mc_top]--;
|
7357 | 7452 |
}
|
7358 | |
if (m2->mc_ki[mc->mc_top] >= nkeys)
|
7359 | |
mdb_cursor_sibling(m2, 1);
|
|
7453 |
if (m3->mc_ki[mc->mc_top] >= nkeys)
|
|
7454 |
mdb_cursor_sibling(m3, 1);
|
7360 | 7455 |
}
|
7361 | 7456 |
}
|
7362 | 7457 |
mc->mc_flags |= C_DEL;
|
|
7376 | 7471 |
int rc, exact;
|
7377 | 7472 |
DKBUF;
|
7378 | 7473 |
|
7379 | |
assert(key != NULL);
|
|
7474 |
if (key == NULL)
|
|
7475 |
return EINVAL;
|
7380 | 7476 |
|
7381 | 7477 |
DPRINTF(("====> delete db %u key [%s]", dbi, DKEY(key)));
|
7382 | 7478 |
|
|
7385 | 7481 |
|
7386 | 7482 |
if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR))
|
7387 | 7483 |
return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
|
7388 | |
|
7389 | |
if (key->mv_size > MDB_MAXKEYSIZE) {
|
7390 | |
return MDB_BAD_VALSIZE;
|
7391 | |
}
|
7392 | 7484 |
|
7393 | 7485 |
mdb_cursor_init(&mc, txn, dbi, &mx);
|
7394 | 7486 |
|
|
7557 | 7649 |
nsize = mdb_leaf_size(env, newkey, newdata);
|
7558 | 7650 |
else
|
7559 | 7651 |
nsize = mdb_branch_size(env, newkey);
|
7560 | |
nsize += nsize & 1;
|
|
7652 |
nsize = EVEN(nsize);
|
7561 | 7653 |
|
7562 | 7654 |
/* grab a page to hold a temporary copy */
|
7563 | 7655 |
copy = mdb_page_malloc(mc->mc_txn, 1);
|
|
7614 | 7706 |
else
|
7615 | 7707 |
psize += NODEDSZ(node);
|
7616 | 7708 |
}
|
7617 | |
psize += psize & 1;
|
|
7709 |
psize = EVEN(psize);
|
7618 | 7710 |
}
|
7619 | 7711 |
if (psize > pmax || i == k-j) {
|
7620 | 7712 |
split_indx = i + (j<0);
|
|
7825 | 7917 |
MDB_cursor mc;
|
7826 | 7918 |
MDB_xcursor mx;
|
7827 | 7919 |
|
7828 | |
assert(key != NULL);
|
7829 | |
assert(data != NULL);
|
|
7920 |
if (key == NULL || data == NULL)
|
|
7921 |
return EINVAL;
|
7830 | 7922 |
|
7831 | 7923 |
if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
|
7832 | 7924 |
return EINVAL;
|
|
7857 | 7949 |
return EINVAL;
|
7858 | 7950 |
|
7859 | 7951 |
*arg = env->me_flags;
|
|
7952 |
return MDB_SUCCESS;
|
|
7953 |
}
|
|
7954 |
|
|
7955 |
int
|
|
7956 |
mdb_env_set_userctx(MDB_env *env, void *ctx)
|
|
7957 |
{
|
|
7958 |
if (!env)
|
|
7959 |
return EINVAL;
|
|
7960 |
env->me_userctx = ctx;
|
|
7961 |
return MDB_SUCCESS;
|
|
7962 |
}
|
|
7963 |
|
|
7964 |
void *
|
|
7965 |
mdb_env_get_userctx(MDB_env *env)
|
|
7966 |
{
|
|
7967 |
return env ? env->me_userctx : NULL;
|
|
7968 |
}
|
|
7969 |
|
|
7970 |
int
|
|
7971 |
mdb_env_set_assert(MDB_env *env, MDB_assert_func *func)
|
|
7972 |
{
|
|
7973 |
if (!env)
|
|
7974 |
return EINVAL;
|
|
7975 |
#ifndef NDEBUG
|
|
7976 |
env->me_assert_func = func;
|
|
7977 |
#endif
|
7860 | 7978 |
return MDB_SUCCESS;
|
7861 | 7979 |
}
|
7862 | 7980 |
|
|
8129 | 8247 |
rc = mdb_page_get(txn, pg, &omp, NULL);
|
8130 | 8248 |
if (rc != 0)
|
8131 | 8249 |
return rc;
|
8132 | |
assert(IS_OVERFLOW(omp));
|
|
8250 |
mdb_cassert(mc, IS_OVERFLOW(omp));
|
8133 | 8251 |
rc = mdb_midl_append_range(&txn->mt_free_pgs,
|
8134 | 8252 |
pg, omp->mp_pages);
|
8135 | 8253 |
if (rc)
|
|
8260 | 8378 |
|
8261 | 8379 |
int mdb_env_get_maxkeysize(MDB_env *env)
|
8262 | 8380 |
{
|
8263 | |
return MDB_MAXKEYSIZE;
|
|
8381 |
return ENV_MAXKEY(env);
|
8264 | 8382 |
}
|
8265 | 8383 |
|
8266 | 8384 |
int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
|
|
8268 | 8386 |
unsigned int i, rdrs;
|
8269 | 8387 |
MDB_reader *mr;
|
8270 | 8388 |
char buf[64];
|
8271 | |
int first = 1;
|
|
8389 |
int rc = 0, first = 1;
|
8272 | 8390 |
|
8273 | 8391 |
if (!env || !func)
|
8274 | 8392 |
return -1;
|
|
8279 | 8397 |
mr = env->me_txns->mti_readers;
|
8280 | 8398 |
for (i=0; i<rdrs; i++) {
|
8281 | 8399 |
if (mr[i].mr_pid) {
|
8282 | |
size_t tid;
|
8283 | |
int rc;
|
8284 | |
tid = mr[i].mr_tid;
|
8285 | |
if (mr[i].mr_txnid == (txnid_t)-1) {
|
8286 | |
sprintf(buf, "%10d %"Z"x -\n", mr[i].mr_pid, tid);
|
8287 | |
} else {
|
8288 | |
sprintf(buf, "%10d %"Z"x %"Z"u\n", mr[i].mr_pid, tid, mr[i].mr_txnid);
|
8289 | |
}
|
|
8400 |
txnid_t txnid = mr[i].mr_txnid;
|
|
8401 |
sprintf(buf, txnid == (txnid_t)-1 ?
|
|
8402 |
"%10d %"Z"x -\n" : "%10d %"Z"x %"Z"u\n",
|
|
8403 |
(int)mr[i].mr_pid, (size_t)mr[i].mr_tid, txnid);
|
8290 | 8404 |
if (first) {
|
8291 | 8405 |
first = 0;
|
8292 | |
func(" pid thread txnid\n", ctx);
|
|
8406 |
rc = func(" pid thread txnid\n", ctx);
|
|
8407 |
if (rc < 0)
|
|
8408 |
break;
|
8293 | 8409 |
}
|
8294 | 8410 |
rc = func(buf, ctx);
|
8295 | 8411 |
if (rc < 0)
|
8296 | |
return rc;
|
|
8412 |
break;
|
8297 | 8413 |
}
|
8298 | 8414 |
}
|
8299 | 8415 |
if (first) {
|
8300 | |
func("(no active readers)\n", ctx);
|
8301 | |
}
|
8302 | |
return 0;
|
|
8416 |
rc = func("(no active readers)\n", ctx);
|
|
8417 |
}
|
|
8418 |
return rc;
|
8303 | 8419 |
}
|
8304 | 8420 |
|
8305 | 8421 |
/** Insert pid into list if not already present.
|
|
8360 | 8476 |
return ENOMEM;
|
8361 | 8477 |
pids[0] = 0;
|
8362 | 8478 |
mr = env->me_txns->mti_readers;
|
8363 | |
j = 0;
|
8364 | 8479 |
for (i=0; i<rdrs; i++) {
|
8365 | 8480 |
if (mr[i].mr_pid && mr[i].mr_pid != env->me_pid) {
|
8366 | 8481 |
pid = mr[i].mr_pid;
|