Codebase list lmdb / upstream/0.9.6.20130417
Imported Upstream version 0.9.6.20130417 Ondřej Surý 10 years ago
21 changed file(s) with 12149 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 mtest
1 mtest[23456]
2 testdb
3 mdb_copy
4 mdb_stat
5 *.[ao]
6 *.so
7 *[~#]
8 *.bak
9 *.orig
10 *.rej
11 core
12 core.*
13 valgrind.*
14 man/
15 html/
0 Copyright 2011-2013 Howard Chu, Symas Corp.
1 All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted only as authorized by the OpenLDAP
5 Public License.
6
7 A copy of this license is available in the file LICENSE in the
8 top-level directory of the distribution or, alternatively, at
9 <http://www.OpenLDAP.org/license.html>.
10
11 OpenLDAP is a registered trademark of the OpenLDAP Foundation.
12
13 Individual files and/or contributed packages may be copyright by
14 other parties and/or subject to additional restrictions.
15
16 This work also contains materials derived from public sources.
17
18 Additional information about OpenLDAP can be obtained at
19 <http://www.openldap.org/>.
0 # Doxyfile 1.7.1
1
2 # This file describes the settings to be used by the documentation system
3 # doxygen (www.doxygen.org) for a project
4 #
5 # All text after a hash (#) is considered a comment and will be ignored
6 # The format is:
7 # TAG = value [value, ...]
8 # For lists items can also be appended using:
9 # TAG += value [value, ...]
10 # Values that contain spaces should be placed between quotes (" ")
11
12 #---------------------------------------------------------------------------
13 # Project related configuration options
14 #---------------------------------------------------------------------------
15
16 # This tag specifies the encoding used for all characters in the config file
17 # that follow. The default is UTF-8 which is also the encoding used for all
18 # text before the first occurrence of this tag. Doxygen uses libiconv (or the
19 # iconv built into libc) for the transcoding. See
20 # http://www.gnu.org/software/libiconv for the list of possible encodings.
21
22 DOXYFILE_ENCODING = UTF-8
23
24 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded
25 # by quotes) that should identify the project.
26
27 PROJECT_NAME = MDB
28
29 # The PROJECT_NUMBER tag can be used to enter a project or revision number.
30 # This could be handy for archiving the generated documentation or
31 # if some version control system is used.
32
33 PROJECT_NUMBER =
34
35 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
36 # base path where the generated documentation will be put.
37 # If a relative path is entered, it will be relative to the location
38 # where doxygen was started. If left blank the current directory will be used.
39
40 OUTPUT_DIRECTORY =
41
42 # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
43 # 4096 sub-directories (in 2 levels) under the output directory of each output
44 # format and will distribute the generated files over these directories.
45 # Enabling this option can be useful when feeding doxygen a huge amount of
46 # source files, where putting all generated files in the same directory would
47 # otherwise cause performance problems for the file system.
48
49 CREATE_SUBDIRS = NO
50
51 # The OUTPUT_LANGUAGE tag is used to specify the language in which all
52 # documentation generated by doxygen is written. Doxygen will use this
53 # information to generate all constant output in the proper language.
54 # The default language is English, other supported languages are:
55 # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
56 # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
57 # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
58 # messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
59 # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak,
60 # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
61
62 OUTPUT_LANGUAGE = English
63
64 # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
65 # include brief member descriptions after the members that are listed in
66 # the file and class documentation (similar to JavaDoc).
67 # Set to NO to disable this.
68
69 BRIEF_MEMBER_DESC = YES
70
71 # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
72 # the brief description of a member or function before the detailed description.
73 # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
74 # brief descriptions will be completely suppressed.
75
76 REPEAT_BRIEF = YES
77
78 # This tag implements a quasi-intelligent brief description abbreviator
79 # that is used to form the text in various listings. Each string
80 # in this list, if found as the leading text of the brief description, will be
81 # stripped from the text and the result after processing the whole list, is
82 # used as the annotated text. Otherwise, the brief description is used as-is.
83 # If left blank, the following values are used ("$name" is automatically
84 # replaced with the name of the entity): "The $name class" "The $name widget"
85 # "The $name file" "is" "provides" "specifies" "contains"
86 # "represents" "a" "an" "the"
87
88 ABBREVIATE_BRIEF =
89
90 # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
91 # Doxygen will generate a detailed section even if there is only a brief
92 # description.
93
94 ALWAYS_DETAILED_SEC = NO
95
96 # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
97 # inherited members of a class in the documentation of that class as if those
98 # members were ordinary class members. Constructors, destructors and assignment
99 # operators of the base classes will not be shown.
100
101 INLINE_INHERITED_MEMB = NO
102
103 # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
104 # path before files name in the file list and in the header files. If set
105 # to NO the shortest path that makes the file name unique will be used.
106
107 FULL_PATH_NAMES = YES
108
109 # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
110 # can be used to strip a user-defined part of the path. Stripping is
111 # only done if one of the specified strings matches the left-hand part of
112 # the path. The tag can be used to show relative paths in the file list.
113 # If left blank the directory from which doxygen is run is used as the
114 # path to strip.
115
116 STRIP_FROM_PATH =
117
118 # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
119 # the path mentioned in the documentation of a class, which tells
120 # the reader which header file to include in order to use a class.
121 # If left blank only the name of the header file containing the class
122 # definition is used. Otherwise one should specify the include paths that
123 # are normally passed to the compiler using the -I flag.
124
125 STRIP_FROM_INC_PATH =
126
127 # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
128 # (but less readable) file names. This can be useful is your file systems
129 # doesn't support long names like on DOS, Mac, or CD-ROM.
130
131 SHORT_NAMES = NO
132
133 # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
134 # will interpret the first line (until the first dot) of a JavaDoc-style
135 # comment as the brief description. If set to NO, the JavaDoc
136 # comments will behave just like regular Qt-style comments
137 # (thus requiring an explicit @brief command for a brief description.)
138
139 JAVADOC_AUTOBRIEF = NO
140
141 # If the QT_AUTOBRIEF tag is set to YES then Doxygen will
142 # interpret the first line (until the first dot) of a Qt-style
143 # comment as the brief description. If set to NO, the comments
144 # will behave just like regular Qt-style comments (thus requiring
145 # an explicit \brief command for a brief description.)
146
147 QT_AUTOBRIEF = NO
148
149 # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
150 # treat a multi-line C++ special comment block (i.e. a block of //! or ///
151 # comments) as a brief description. This used to be the default behaviour.
152 # The new default is to treat a multi-line C++ comment block as a detailed
153 # description. Set this tag to YES if you prefer the old behaviour instead.
154
155 MULTILINE_CPP_IS_BRIEF = NO
156
157 # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
158 # member inherits the documentation from any documented member that it
159 # re-implements.
160
161 INHERIT_DOCS = YES
162
163 # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
164 # a new page for each member. If set to NO, the documentation of a member will
165 # be part of the file/class/namespace that contains it.
166
167 SEPARATE_MEMBER_PAGES = NO
168
169 # The TAB_SIZE tag can be used to set the number of spaces in a tab.
170 # Doxygen uses this value to replace tabs by spaces in code fragments.
171
172 TAB_SIZE = 4
173
174 # This tag can be used to specify a number of aliases that acts
175 # as commands in the documentation. An alias has the form "name=value".
176 # For example adding "sideeffect=\par Side Effects:\n" will allow you to
177 # put the command \sideeffect (or @sideeffect) in the documentation, which
178 # will result in a user-defined paragraph with heading "Side Effects:".
179 # You can put \n's in the value part of an alias to insert newlines.
180
181 ALIASES =
182
183 # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
184 # sources only. Doxygen will then generate output that is more tailored for C.
185 # For instance, some of the names that are used will be different. The list
186 # of all members will be omitted, etc.
187
188 OPTIMIZE_OUTPUT_FOR_C = YES
189
190 # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
191 # sources only. Doxygen will then generate output that is more tailored for
192 # Java. For instance, namespaces will be presented as packages, qualified
193 # scopes will look different, etc.
194
195 OPTIMIZE_OUTPUT_JAVA = NO
196
197 # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
198 # sources only. Doxygen will then generate output that is more tailored for
199 # Fortran.
200
201 OPTIMIZE_FOR_FORTRAN = NO
202
203 # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
204 # sources. Doxygen will then generate output that is tailored for
205 # VHDL.
206
207 OPTIMIZE_OUTPUT_VHDL = NO
208
209 # Doxygen selects the parser to use depending on the extension of the files it
210 # parses. With this tag you can assign which parser to use for a given extension.
211 # Doxygen has a built-in mapping, but you can override or extend it using this
212 # tag. The format is ext=language, where ext is a file extension, and language
213 # is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C,
214 # C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make
215 # doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
216 # (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions
217 # you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
218
219 EXTENSION_MAPPING =
220
221 # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
222 # to include (a tag file for) the STL sources as input, then you should
223 # set this tag to YES in order to let doxygen match functions declarations and
224 # definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
225 # func(std::string) {}). This also make the inheritance and collaboration
226 # diagrams that involve STL classes more complete and accurate.
227
228 BUILTIN_STL_SUPPORT = NO
229
230 # If you use Microsoft's C++/CLI language, you should set this option to YES to
231 # enable parsing support.
232
233 CPP_CLI_SUPPORT = NO
234
235 # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
236 # Doxygen will parse them like normal C++ but will assume all classes use public
237 # instead of private inheritance when no explicit protection keyword is present.
238
239 SIP_SUPPORT = NO
240
241 # For Microsoft's IDL there are propget and propput attributes to indicate getter
242 # and setter methods for a property. Setting this option to YES (the default)
243 # will make doxygen to replace the get and set methods by a property in the
244 # documentation. This will only work if the methods are indeed getting or
245 # setting a simple type. If this is not the case, or you want to show the
246 # methods anyway, you should set this option to NO.
247
248 IDL_PROPERTY_SUPPORT = YES
249
250 # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
251 # tag is set to YES, then doxygen will reuse the documentation of the first
252 # member in the group (if any) for the other members of the group. By default
253 # all members of a group must be documented explicitly.
254
255 DISTRIBUTE_GROUP_DOC = NO
256
257 # Set the SUBGROUPING tag to YES (the default) to allow class member groups of
258 # the same type (for instance a group of public functions) to be put as a
259 # subgroup of that type (e.g. under the Public Functions section). Set it to
260 # NO to prevent subgrouping. Alternatively, this can be done per class using
261 # the \nosubgrouping command.
262
263 SUBGROUPING = YES
264
265 INLINE_GROUPED_CLASSES = YES
266 # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
267 # is documented as struct, union, or enum with the name of the typedef. So
268 # typedef struct TypeS {} TypeT, will appear in the documentation as a struct
269 # with name TypeT. When disabled the typedef will appear as a member of a file,
270 # namespace, or class. And the struct will be named TypeS. This can typically
271 # be useful for C code in case the coding convention dictates that all compound
272 # types are typedef'ed and only the typedef is referenced, never the tag name.
273
274 TYPEDEF_HIDES_STRUCT = YES
275
276 # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
277 # determine which symbols to keep in memory and which to flush to disk.
278 # When the cache is full, less often used symbols will be written to disk.
279 # For small to medium size projects (<1000 input files) the default value is
280 # probably good enough. For larger projects a too small cache size can cause
281 # doxygen to be busy swapping symbols to and from disk most of the time
282 # causing a significant performance penality.
283 # If the system has enough physical memory increasing the cache will improve the
284 # performance by keeping more symbols in memory. Note that the value works on
285 # a logarithmic scale so increasing the size by one will rougly double the
286 # memory usage. The cache size is given by this formula:
287 # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
288 # corresponding to a cache size of 2^16 = 65536 symbols
289
290 SYMBOL_CACHE_SIZE = 0
291
292 #---------------------------------------------------------------------------
293 # Build related configuration options
294 #---------------------------------------------------------------------------
295
296 # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
297 # documentation are documented, even if no documentation was available.
298 # Private class members and static file members will be hidden unless
299 # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
300
301 EXTRACT_ALL = NO
302
303 # If the EXTRACT_PRIVATE tag is set to YES all private members of a class
304 # will be included in the documentation.
305
306 EXTRACT_PRIVATE = NO
307
308 # If the EXTRACT_STATIC tag is set to YES all static members of a file
309 # will be included in the documentation.
310
311 EXTRACT_STATIC = YES
312
313 # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
314 # defined locally in source files will be included in the documentation.
315 # If set to NO only classes defined in header files are included.
316
317 EXTRACT_LOCAL_CLASSES = YES
318
319 # This flag is only useful for Objective-C code. When set to YES local
320 # methods, which are defined in the implementation section but not in
321 # the interface are included in the documentation.
322 # If set to NO (the default) only methods in the interface are included.
323
324 EXTRACT_LOCAL_METHODS = NO
325
326 # If this flag is set to YES, the members of anonymous namespaces will be
327 # extracted and appear in the documentation as a namespace called
328 # 'anonymous_namespace{file}', where file will be replaced with the base
329 # name of the file that contains the anonymous namespace. By default
330 # anonymous namespace are hidden.
331
332 EXTRACT_ANON_NSPACES = NO
333
334 # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
335 # undocumented members of documented classes, files or namespaces.
336 # If set to NO (the default) these members will be included in the
337 # various overviews, but no documentation section is generated.
338 # This option has no effect if EXTRACT_ALL is enabled.
339
340 HIDE_UNDOC_MEMBERS = NO
341
342 # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
343 # undocumented classes that are normally visible in the class hierarchy.
344 # If set to NO (the default) these classes will be included in the various
345 # overviews. This option has no effect if EXTRACT_ALL is enabled.
346
347 HIDE_UNDOC_CLASSES = NO
348
349 # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
350 # friend (class|struct|union) declarations.
351 # If set to NO (the default) these declarations will be included in the
352 # documentation.
353
354 HIDE_FRIEND_COMPOUNDS = NO
355
356 # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
357 # documentation blocks found inside the body of a function.
358 # If set to NO (the default) these blocks will be appended to the
359 # function's detailed documentation block.
360
361 HIDE_IN_BODY_DOCS = NO
362
363 # The INTERNAL_DOCS tag determines if documentation
364 # that is typed after a \internal command is included. If the tag is set
365 # to NO (the default) then the documentation will be excluded.
366 # Set it to YES to include the internal documentation.
367
368 INTERNAL_DOCS = NO
369
370 # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
371 # file names in lower-case letters. If set to YES upper-case letters are also
372 # allowed. This is useful if you have classes or files whose names only differ
373 # in case and if your file system supports case sensitive file names. Windows
374 # and Mac users are advised to set this option to NO.
375
376 CASE_SENSE_NAMES = YES
377
378 # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
379 # will show members with their full class and namespace scopes in the
380 # documentation. If set to YES the scope will be hidden.
381
382 HIDE_SCOPE_NAMES = NO
383
384 # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
385 # will put a list of the files that are included by a file in the documentation
386 # of that file.
387
388 SHOW_INCLUDE_FILES = YES
389
390 # If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
391 # will list include files with double quotes in the documentation
392 # rather than with sharp brackets.
393
394 FORCE_LOCAL_INCLUDES = NO
395
396 # If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
397 # is inserted in the documentation for inline members.
398
399 INLINE_INFO = YES
400
401 # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
402 # will sort the (detailed) documentation of file and class members
403 # alphabetically by member name. If set to NO the members will appear in
404 # declaration order.
405
406 SORT_MEMBER_DOCS = NO
407
408 # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
409 # brief documentation of file, namespace and class members alphabetically
410 # by member name. If set to NO (the default) the members will appear in
411 # declaration order.
412
413 SORT_BRIEF_DOCS = NO
414
415 # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
416 # will sort the (brief and detailed) documentation of class members so that
417 # constructors and destructors are listed first. If set to NO (the default)
418 # the constructors will appear in the respective orders defined by
419 # SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
420 # This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
421 # and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
422
423 SORT_MEMBERS_CTORS_1ST = NO
424
425 # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
426 # hierarchy of group names into alphabetical order. If set to NO (the default)
427 # the group names will appear in their defined order.
428
429 SORT_GROUP_NAMES = NO
430
431 # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
432 # sorted by fully-qualified names, including namespaces. If set to
433 # NO (the default), the class list will be sorted only by class name,
434 # not including the namespace part.
435 # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
436 # Note: This option applies only to the class list, not to the
437 # alphabetical list.
438
439 SORT_BY_SCOPE_NAME = NO
440
441 # The GENERATE_TODOLIST tag can be used to enable (YES) or
442 # disable (NO) the todo list. This list is created by putting \todo
443 # commands in the documentation.
444
445 GENERATE_TODOLIST = YES
446
447 # The GENERATE_TESTLIST tag can be used to enable (YES) or
448 # disable (NO) the test list. This list is created by putting \test
449 # commands in the documentation.
450
451 GENERATE_TESTLIST = YES
452
453 # The GENERATE_BUGLIST tag can be used to enable (YES) or
454 # disable (NO) the bug list. This list is created by putting \bug
455 # commands in the documentation.
456
457 GENERATE_BUGLIST = YES
458
459 # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
460 # disable (NO) the deprecated list. This list is created by putting
461 # \deprecated commands in the documentation.
462
463 GENERATE_DEPRECATEDLIST= YES
464
465 # The ENABLED_SECTIONS tag can be used to enable conditional
466 # documentation sections, marked by \if sectionname ... \endif.
467
468 ENABLED_SECTIONS =
469
470 # The MAX_INITIALIZER_LINES tag determines the maximum number of lines
471 # the initial value of a variable or define consists of for it to appear in
472 # the documentation. If the initializer consists of more lines than specified
473 # here it will be hidden. Use a value of 0 to hide initializers completely.
474 # The appearance of the initializer of individual variables and defines in the
475 # documentation can be controlled using \showinitializer or \hideinitializer
476 # command in the documentation regardless of this setting.
477
478 MAX_INITIALIZER_LINES = 30
479
480 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated
481 # at the bottom of the documentation of classes and structs. If set to YES the
482 # list will mention the files that were used to generate the documentation.
483
484 SHOW_USED_FILES = YES
485
486 # If the sources in your project are distributed over multiple directories
487 # then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
488 # in the documentation. The default is NO.
489
490 SHOW_DIRECTORIES = NO
491
492 # Set the SHOW_FILES tag to NO to disable the generation of the Files page.
493 # This will remove the Files entry from the Quick Index and from the
494 # Folder Tree View (if specified). The default is YES.
495
496 SHOW_FILES = YES
497
498 # Set the SHOW_NAMESPACES tag to NO to disable the generation of the
499 # Namespaces page.
500 # This will remove the Namespaces entry from the Quick Index
501 # and from the Folder Tree View (if specified). The default is YES.
502
503 SHOW_NAMESPACES = YES
504
505 # The FILE_VERSION_FILTER tag can be used to specify a program or script that
506 # doxygen should invoke to get the current version for each file (typically from
507 # the version control system). Doxygen will invoke the program by executing (via
508 # popen()) the command <command> <input-file>, where <command> is the value of
509 # the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
510 # provided by doxygen. Whatever the program writes to standard output
511 # is used as the file version. See the manual for examples.
512
513 FILE_VERSION_FILTER =
514
515 # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
516 # by doxygen. The layout file controls the global structure of the generated
517 # output files in an output format independent way. The create the layout file
518 # that represents doxygen's defaults, run doxygen with the -l option.
519 # You can optionally specify a file name after the option, if omitted
520 # DoxygenLayout.xml will be used as the name of the layout file.
521
522 LAYOUT_FILE =
523
524 #---------------------------------------------------------------------------
525 # configuration options related to warning and progress messages
526 #---------------------------------------------------------------------------
527
528 # The QUIET tag can be used to turn on/off the messages that are generated
529 # by doxygen. Possible values are YES and NO. If left blank NO is used.
530
531 QUIET = NO
532
533 # The WARNINGS tag can be used to turn on/off the warning messages that are
534 # generated by doxygen. Possible values are YES and NO. If left blank
535 # NO is used.
536
537 WARNINGS = YES
538
539 # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
540 # for undocumented members. If EXTRACT_ALL is set to YES then this flag will
541 # automatically be disabled.
542
543 WARN_IF_UNDOCUMENTED = YES
544
545 # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
546 # potential errors in the documentation, such as not documenting some
547 # parameters in a documented function, or documenting parameters that
548 # don't exist or using markup commands wrongly.
549
550 WARN_IF_DOC_ERROR = YES
551
552 # This WARN_NO_PARAMDOC option can be abled to get warnings for
553 # functions that are documented, but have no documentation for their parameters
554 # or return value. If set to NO (the default) doxygen will only warn about
555 # wrong or incomplete parameter documentation, but not about the absence of
556 # documentation.
557
558 WARN_NO_PARAMDOC = NO
559
560 # The WARN_FORMAT tag determines the format of the warning messages that
561 # doxygen can produce. The string should contain the $file, $line, and $text
562 # tags, which will be replaced by the file and line number from which the
563 # warning originated and the warning text. Optionally the format may contain
564 # $version, which will be replaced by the version of the file (if it could
565 # be obtained via FILE_VERSION_FILTER)
566
567 WARN_FORMAT = "$file:$line: $text"
568
569 # The WARN_LOGFILE tag can be used to specify a file to which warning
570 # and error messages should be written. If left blank the output is written
571 # to stderr.
572
573 WARN_LOGFILE =
574
575 #---------------------------------------------------------------------------
576 # configuration options related to the input files
577 #---------------------------------------------------------------------------
578
579 # The INPUT tag can be used to specify the files and/or directories that contain
580 # documented source files. You may enter file names like "myfile.cpp" or
581 # directories like "/usr/src/myproject". Separate the files or directories
582 # with spaces.
583
584 INPUT = lmdb.h midl.h mdb.c midl.c
585
586 # This tag can be used to specify the character encoding of the source files
587 # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
588 # also the default input encoding. Doxygen uses libiconv (or the iconv built
589 # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
590 # the list of possible encodings.
591
592 INPUT_ENCODING = UTF-8
593
594 # If the value of the INPUT tag contains directories, you can use the
595 # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
596 # and *.h) to filter out the source-files in the directories. If left
597 # blank the following patterns are tested:
598 # *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
599 # *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
600
601 FILE_PATTERNS =
602
603 # The RECURSIVE tag can be used to turn specify whether or not subdirectories
604 # should be searched for input files as well. Possible values are YES and NO.
605 # If left blank NO is used.
606
607 RECURSIVE = NO
608
609 # The EXCLUDE tag can be used to specify files and/or directories that should
610 # excluded from the INPUT source files. This way you can easily exclude a
611 # subdirectory from a directory tree whose root is specified with the INPUT tag.
612
613 EXCLUDE =
614
615 # The EXCLUDE_SYMLINKS tag can be used select whether or not files or
616 # directories that are symbolic links (a Unix filesystem feature) are excluded
617 # from the input.
618
619 EXCLUDE_SYMLINKS = NO
620
621 # If the value of the INPUT tag contains directories, you can use the
622 # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
623 # certain files from those directories. Note that the wildcards are matched
624 # against the file with absolute path, so to exclude all test directories
625 # for example use the pattern */test/*
626
627 EXCLUDE_PATTERNS =
628
629 # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
630 # (namespaces, classes, functions, etc.) that should be excluded from the
631 # output. The symbol name can be a fully qualified name, a word, or if the
632 # wildcard * is used, a substring. Examples: ANamespace, AClass,
633 # AClass::ANamespace, ANamespace::*Test
634
635 EXCLUDE_SYMBOLS =
636
637 # The EXAMPLE_PATH tag can be used to specify one or more files or
638 # directories that contain example code fragments that are included (see
639 # the \include command).
640
641 EXAMPLE_PATH =
642
643 # If the value of the EXAMPLE_PATH tag contains directories, you can use the
644 # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
645 # and *.h) to filter out the source-files in the directories. If left
646 # blank all files are included.
647
648 EXAMPLE_PATTERNS =
649
650 # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
651 # searched for input files to be used with the \include or \dontinclude
652 # commands irrespective of the value of the RECURSIVE tag.
653 # Possible values are YES and NO. If left blank NO is used.
654
655 EXAMPLE_RECURSIVE = NO
656
657 # The IMAGE_PATH tag can be used to specify one or more files or
658 # directories that contain image that are included in the documentation (see
659 # the \image command).
660
661 IMAGE_PATH =
662
663 # The INPUT_FILTER tag can be used to specify a program that doxygen should
664 # invoke to filter for each input file. Doxygen will invoke the filter program
665 # by executing (via popen()) the command <filter> <input-file>, where <filter>
666 # is the value of the INPUT_FILTER tag, and <input-file> is the name of an
667 # input file. Doxygen will then use the output that the filter program writes
668 # to standard output.
669 # If FILTER_PATTERNS is specified, this tag will be
670 # ignored.
671
672 INPUT_FILTER =
673
674 # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
675 # basis.
676 # Doxygen will compare the file name with each pattern and apply the
677 # filter if there is a match.
678 # The filters are a list of the form:
679 # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
680 # info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
681 # is applied to all files.
682
683 FILTER_PATTERNS =
684
685 # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
686 # INPUT_FILTER) will be used to filter the input files when producing source
687 # files to browse (i.e. when SOURCE_BROWSER is set to YES).
688
689 FILTER_SOURCE_FILES = NO
690
691 #---------------------------------------------------------------------------
692 # configuration options related to source browsing
693 #---------------------------------------------------------------------------
694
695 # If the SOURCE_BROWSER tag is set to YES then a list of source files will
696 # be generated. Documented entities will be cross-referenced with these sources.
697 # Note: To get rid of all source code in the generated output, make sure also
698 # VERBATIM_HEADERS is set to NO.
699
700 SOURCE_BROWSER = NO
701
702 # Setting the INLINE_SOURCES tag to YES will include the body
703 # of functions and classes directly in the documentation.
704
705 INLINE_SOURCES = NO
706
707 # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
708 # doxygen to hide any special comment blocks from generated source code
709 # fragments. Normal C and C++ comments will always remain visible.
710
711 STRIP_CODE_COMMENTS = YES
712
713 # If the REFERENCED_BY_RELATION tag is set to YES
714 # then for each documented function all documented
715 # functions referencing it will be listed.
716
717 REFERENCED_BY_RELATION = NO
718
719 # If the REFERENCES_RELATION tag is set to YES
720 # then for each documented function all documented entities
721 # called/used by that function will be listed.
722
723 REFERENCES_RELATION = NO
724
725 # If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
726 # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
727 # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
728 # link to the source code.
729 # Otherwise they will link to the documentation.
730
731 REFERENCES_LINK_SOURCE = YES
732
733 # If the USE_HTAGS tag is set to YES then the references to source code
734 # will point to the HTML generated by the htags(1) tool instead of doxygen
735 # built-in source browser. The htags tool is part of GNU's global source
736 # tagging system (see http://www.gnu.org/software/global/global.html). You
737 # will need version 4.8.6 or higher.
738
739 USE_HTAGS = NO
740
741 # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
742 # will generate a verbatim copy of the header file for each class for
743 # which an include is specified. Set to NO to disable this.
744
745 VERBATIM_HEADERS = YES
746
747 #---------------------------------------------------------------------------
748 # configuration options related to the alphabetical class index
749 #---------------------------------------------------------------------------
750
751 # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
752 # of all compounds will be generated. Enable this if the project
753 # contains a lot of classes, structs, unions or interfaces.
754
755 ALPHABETICAL_INDEX = YES
756
757 # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
758 # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
759 # in which this list will be split (can be a number in the range [1..20])
760
761 COLS_IN_ALPHA_INDEX = 5
762
763 # In case all classes in a project start with a common prefix, all
764 # classes will be put under the same header in the alphabetical index.
765 # The IGNORE_PREFIX tag can be used to specify one or more prefixes that
766 # should be ignored while generating the index headers.
767
768 IGNORE_PREFIX =
769
770 #---------------------------------------------------------------------------
771 # configuration options related to the HTML output
772 #---------------------------------------------------------------------------
773
774 # If the GENERATE_HTML tag is set to YES (the default) Doxygen will
775 # generate HTML output.
776
777 GENERATE_HTML = YES
778
779 # The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
780 # If a relative path is entered the value of OUTPUT_DIRECTORY will be
781 # put in front of it. If left blank `html' will be used as the default path.
782
783 HTML_OUTPUT = html
784
785 # The HTML_FILE_EXTENSION tag can be used to specify the file extension for
786 # each generated HTML page (for example: .htm,.php,.asp). If it is left blank
787 # doxygen will generate files with .html extension.
788
789 HTML_FILE_EXTENSION = .html
790
791 # The HTML_HEADER tag can be used to specify a personal HTML header for
792 # each generated HTML page. If it is left blank doxygen will generate a
793 # standard header.
794
795 HTML_HEADER =
796
797 # The HTML_FOOTER tag can be used to specify a personal HTML footer for
798 # each generated HTML page. If it is left blank doxygen will generate a
799 # standard footer.
800
801 HTML_FOOTER =
802
803 # The HTML_STYLESHEET tag can be used to specify a user-defined cascading
804 # style sheet that is used by each HTML page. It can be used to
805 # fine-tune the look of the HTML output. If the tag is left blank doxygen
806 # will generate a default style sheet. Note that doxygen will try to copy
807 # the style sheet file to the HTML output directory, so don't put your own
808 # stylesheet in the HTML output directory as well, or it will be erased!
809
810 HTML_STYLESHEET =
811
812 # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
813 # Doxygen will adjust the colors in the stylesheet and background images
814 # according to this color. Hue is specified as an angle on a colorwheel,
815 # see http://en.wikipedia.org/wiki/Hue for more information.
816 # For instance the value 0 represents red, 60 is yellow, 120 is green,
817 # 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
818 # The allowed range is 0 to 359.
819
820 HTML_COLORSTYLE_HUE = 220
821
822 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
823 # the colors in the HTML output. For a value of 0 the output will use
824 # grayscales only. A value of 255 will produce the most vivid colors.
825
826 HTML_COLORSTYLE_SAT = 100
827
828 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
829 # the luminance component of the colors in the HTML output. Values below
830 # 100 gradually make the output lighter, whereas values above 100 make
831 # the output darker. The value divided by 100 is the actual gamma applied,
832 # so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
833 # and 100 does not change the gamma.
834
835 HTML_COLORSTYLE_GAMMA = 80
836
837 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
838 # page will contain the date and time when the page was generated. Setting
839 # this to NO can help when comparing the output of multiple runs.
840
841 HTML_TIMESTAMP = YES
842
843 # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
844 # files or namespaces will be aligned in HTML using tables. If set to
845 # NO a bullet list will be used.
846
847 HTML_ALIGN_MEMBERS = YES
848
849 # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
850 # documentation will contain sections that can be hidden and shown after the
851 # page has loaded. For this to work a browser that supports
852 # JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
853 # Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
854
855 HTML_DYNAMIC_SECTIONS = NO
856
857 # If the GENERATE_DOCSET tag is set to YES, additional index files
858 # will be generated that can be used as input for Apple's Xcode 3
859 # integrated development environment, introduced with OSX 10.5 (Leopard).
860 # To create a documentation set, doxygen will generate a Makefile in the
861 # HTML output directory. Running make will produce the docset in that
862 # directory and running "make install" will install the docset in
863 # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
864 # it at startup.
865 # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
866 # for more information.
867
868 GENERATE_DOCSET = NO
869
870 # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
871 # feed. A documentation feed provides an umbrella under which multiple
872 # documentation sets from a single provider (such as a company or product suite)
873 # can be grouped.
874
875 DOCSET_FEEDNAME = "Doxygen generated docs"
876
877 # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
878 # should uniquely identify the documentation set bundle. This should be a
879 # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
880 # will append .docset to the name.
881
882 DOCSET_BUNDLE_ID = org.doxygen.Project
883
884 # When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify
885 # the documentation publisher. This should be a reverse domain-name style
886 # string, e.g. com.mycompany.MyDocSet.documentation.
887
888 DOCSET_PUBLISHER_ID = org.doxygen.Publisher
889
890 # The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
891
892 DOCSET_PUBLISHER_NAME = Publisher
893
894 # If the GENERATE_HTMLHELP tag is set to YES, additional index files
895 # will be generated that can be used as input for tools like the
896 # Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
897 # of the generated HTML documentation.
898
899 GENERATE_HTMLHELP = NO
900
901 # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
902 # be used to specify the file name of the resulting .chm file. You
903 # can add a path in front of the file if the result should not be
904 # written to the html output directory.
905
906 CHM_FILE =
907
908 # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
909 # be used to specify the location (absolute path including file name) of
910 # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
911 # the HTML help compiler on the generated index.hhp.
912
913 HHC_LOCATION =
914
915 # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
916 # controls if a separate .chi index file is generated (YES) or that
917 # it should be included in the master .chm file (NO).
918
919 GENERATE_CHI = NO
920
921 # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
922 # is used to encode HtmlHelp index (hhk), content (hhc) and project file
923 # content.
924
925 CHM_INDEX_ENCODING =
926
927 # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
928 # controls whether a binary table of contents is generated (YES) or a
929 # normal table of contents (NO) in the .chm file.
930
931 BINARY_TOC = NO
932
933 # The TOC_EXPAND flag can be set to YES to add extra items for group members
934 # to the contents of the HTML help documentation and to the tree view.
935
936 TOC_EXPAND = NO
937
938 # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
939 # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
940 # that can be used as input for Qt's qhelpgenerator to generate a
941 # Qt Compressed Help (.qch) of the generated HTML documentation.
942
943 GENERATE_QHP = NO
944
945 # If the QHG_LOCATION tag is specified, the QCH_FILE tag can
946 # be used to specify the file name of the resulting .qch file.
947 # The path specified is relative to the HTML output folder.
948
949 QCH_FILE =
950
951 # The QHP_NAMESPACE tag specifies the namespace to use when generating
952 # Qt Help Project output. For more information please see
953 # http://doc.trolltech.com/qthelpproject.html#namespace
954
955 QHP_NAMESPACE = org.doxygen.Project
956
957 # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
958 # Qt Help Project output. For more information please see
959 # http://doc.trolltech.com/qthelpproject.html#virtual-folders
960
961 QHP_VIRTUAL_FOLDER = doc
962
963 # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
964 # add. For more information please see
965 # http://doc.trolltech.com/qthelpproject.html#custom-filters
966
967 QHP_CUST_FILTER_NAME =
968
969 # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
970 # custom filter to add. For more information please see
971 # <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
972 # Qt Help Project / Custom Filters</a>.
973
974 QHP_CUST_FILTER_ATTRS =
975
976 # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
977 # project's
978 # filter section matches.
979 # <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
980 # Qt Help Project / Filter Attributes</a>.
981
982 QHP_SECT_FILTER_ATTRS =
983
984 # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
985 # be used to specify the location of Qt's qhelpgenerator.
986 # If non-empty doxygen will try to run qhelpgenerator on the generated
987 # .qhp file.
988
989 QHG_LOCATION =
990
991 # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
992 # will be generated, which together with the HTML files, form an Eclipse help
993 # plugin. To install this plugin and make it available under the help contents
994 # menu in Eclipse, the contents of the directory containing the HTML and XML
995 # files needs to be copied into the plugins directory of eclipse. The name of
996 # the directory within the plugins directory should be the same as
997 # the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
998 # the help appears.
999
1000 GENERATE_ECLIPSEHELP = NO
1001
1002 # A unique identifier for the eclipse help plugin. When installing the plugin
1003 # the directory name containing the HTML and XML files should also have
1004 # this name.
1005
1006 ECLIPSE_DOC_ID = org.doxygen.Project
1007
1008 # The DISABLE_INDEX tag can be used to turn on/off the condensed index at
1009 # top of each HTML page. The value NO (the default) enables the index and
1010 # the value YES disables it.
1011
1012 DISABLE_INDEX = NO
1013
1014 # This tag can be used to set the number of enum values (range [1..20])
1015 # that doxygen will group on one line in the generated HTML documentation.
1016
1017 ENUM_VALUES_PER_LINE = 4
1018
1019 # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
1020 # structure should be generated to display hierarchical information.
1021 # If the tag value is set to YES, a side panel will be generated
1022 # containing a tree-like index structure (just like the one that
1023 # is generated for HTML Help). For this to work a browser that supports
1024 # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
1025 # Windows users are probably better off using the HTML help feature.
1026
1027 GENERATE_TREEVIEW = NO
1028
1029 # By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
1030 # and Class Hierarchy pages using a tree view instead of an ordered list.
1031
1032 USE_INLINE_TREES = NO
1033
1034 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
1035 # used to set the initial width (in pixels) of the frame in which the tree
1036 # is shown.
1037
1038 TREEVIEW_WIDTH = 250
1039
1040 # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
1041 # links to external symbols imported via tag files in a separate window.
1042
1043 EXT_LINKS_IN_WINDOW = NO
1044
1045 # Use this tag to change the font size of Latex formulas included
1046 # as images in the HTML documentation. The default is 10. Note that
1047 # when you change the font size after a successful doxygen run you need
1048 # to manually remove any form_*.png images from the HTML output directory
1049 # to force them to be regenerated.
1050
1051 FORMULA_FONTSIZE = 10
1052
1053 # Use the FORMULA_TRANPARENT tag to determine whether or not the images
1054 # generated for formulas are transparent PNGs. Transparent PNGs are
1055 # not supported properly for IE 6.0, but are supported on all modern browsers.
1056 # Note that when changing this option you need to delete any form_*.png files
1057 # in the HTML output before the changes have effect.
1058
1059 FORMULA_TRANSPARENT = YES
1060
1061 # When the SEARCHENGINE tag is enabled doxygen will generate a search box
1062 # for the HTML output. The underlying search engine uses javascript
1063 # and DHTML and should work on any modern browser. Note that when using
1064 # HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
1065 # (GENERATE_DOCSET) there is already a search function so this one should
1066 # typically be disabled. For large projects the javascript based search engine
1067 # can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
1068
1069 SEARCHENGINE = YES
1070
1071 # When the SERVER_BASED_SEARCH tag is enabled the search engine will be
1072 # implemented using a PHP enabled web server instead of at the web client
1073 # using Javascript. Doxygen will generate the search PHP script and index
1074 # file to put on the web server. The advantage of the server
1075 # based approach is that it scales better to large projects and allows
1076 # full text search. The disadvances is that it is more difficult to setup
1077 # and does not have live searching capabilities.
1078
1079 SERVER_BASED_SEARCH = NO
1080
1081 #---------------------------------------------------------------------------
1082 # configuration options related to the LaTeX output
1083 #---------------------------------------------------------------------------
1084
1085 # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
1086 # generate Latex output.
1087
1088 GENERATE_LATEX = NO
1089
1090 # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
1091 # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1092 # put in front of it. If left blank `latex' will be used as the default path.
1093
1094 LATEX_OUTPUT = latex
1095
1096 # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
1097 # invoked. If left blank `latex' will be used as the default command name.
1098 # Note that when enabling USE_PDFLATEX this option is only used for
1099 # generating bitmaps for formulas in the HTML output, but not in the
1100 # Makefile that is written to the output directory.
1101
1102 LATEX_CMD_NAME = latex
1103
1104 # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
1105 # generate index for LaTeX. If left blank `makeindex' will be used as the
1106 # default command name.
1107
1108 MAKEINDEX_CMD_NAME = makeindex
1109
1110 # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
1111 # LaTeX documents. This may be useful for small projects and may help to
1112 # save some trees in general.
1113
1114 COMPACT_LATEX = NO
1115
1116 # The PAPER_TYPE tag can be used to set the paper type that is used
1117 # by the printer. Possible values are: a4, a4wide, letter, legal and
1118 # executive. If left blank a4wide will be used.
1119
1120 PAPER_TYPE = a4wide
1121
1122 # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
1123 # packages that should be included in the LaTeX output.
1124
1125 EXTRA_PACKAGES =
1126
1127 # The LATEX_HEADER tag can be used to specify a personal LaTeX header for
1128 # the generated latex document. The header should contain everything until
1129 # the first chapter. If it is left blank doxygen will generate a
1130 # standard header. Notice: only use this tag if you know what you are doing!
1131
1132 LATEX_HEADER =
1133
1134 # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
1135 # is prepared for conversion to pdf (using ps2pdf). The pdf file will
1136 # contain links (just like the HTML output) instead of page references
1137 # This makes the output suitable for online browsing using a pdf viewer.
1138
1139 PDF_HYPERLINKS = YES
1140
1141 # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
1142 # plain latex in the generated Makefile. Set this option to YES to get a
1143 # higher quality PDF documentation.
1144
1145 USE_PDFLATEX = YES
1146
1147 # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
1148 # command to the generated LaTeX files. This will instruct LaTeX to keep
1149 # running if errors occur, instead of asking the user for help.
1150 # This option is also used when generating formulas in HTML.
1151
1152 LATEX_BATCHMODE = NO
1153
1154 # If LATEX_HIDE_INDICES is set to YES then doxygen will not
1155 # include the index chapters (such as File Index, Compound Index, etc.)
1156 # in the output.
1157
1158 LATEX_HIDE_INDICES = NO
1159
1160 # If LATEX_SOURCE_CODE is set to YES then doxygen will include
1161 # source code with syntax highlighting in the LaTeX output.
1162 # Note that which sources are shown also depends on other settings
1163 # such as SOURCE_BROWSER.
1164
1165 LATEX_SOURCE_CODE = NO
1166
1167 #---------------------------------------------------------------------------
1168 # configuration options related to the RTF output
1169 #---------------------------------------------------------------------------
1170
1171 # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
1172 # The RTF output is optimized for Word 97 and may not look very pretty with
1173 # other RTF readers or editors.
1174
1175 GENERATE_RTF = NO
1176
1177 # The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
1178 # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1179 # put in front of it. If left blank `rtf' will be used as the default path.
1180
1181 RTF_OUTPUT = rtf
1182
1183 # If the COMPACT_RTF tag is set to YES Doxygen generates more compact
1184 # RTF documents. This may be useful for small projects and may help to
1185 # save some trees in general.
1186
1187 COMPACT_RTF = NO
1188
1189 # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
1190 # will contain hyperlink fields. The RTF file will
1191 # contain links (just like the HTML output) instead of page references.
1192 # This makes the output suitable for online browsing using WORD or other
1193 # programs which support those fields.
1194 # Note: wordpad (write) and others do not support links.
1195
1196 RTF_HYPERLINKS = NO
1197
1198 # Load stylesheet definitions from file. Syntax is similar to doxygen's
1199 # config file, i.e. a series of assignments. You only have to provide
1200 # replacements, missing definitions are set to their default value.
1201
1202 RTF_STYLESHEET_FILE =
1203
1204 # Set optional variables used in the generation of an rtf document.
1205 # Syntax is similar to doxygen's config file.
1206
1207 RTF_EXTENSIONS_FILE =
1208
1209 #---------------------------------------------------------------------------
1210 # configuration options related to the man page output
1211 #---------------------------------------------------------------------------
1212
1213 # If the GENERATE_MAN tag is set to YES (the default) Doxygen will
1214 # generate man pages
1215
1216 GENERATE_MAN = YES
1217
1218 # The MAN_OUTPUT tag is used to specify where the man pages will be put.
1219 # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1220 # put in front of it. If left blank `man' will be used as the default path.
1221
1222 MAN_OUTPUT = man
1223
1224 # The MAN_EXTENSION tag determines the extension that is added to
1225 # the generated man pages (default is the subroutine's section .3)
1226
1227 MAN_EXTENSION = .3
1228
1229 # If the MAN_LINKS tag is set to YES and Doxygen generates man output,
1230 # then it will generate one additional man file for each entity
1231 # documented in the real man page(s). These additional files
1232 # only source the real man page, but without them the man command
1233 # would be unable to find the correct page. The default is NO.
1234
1235 MAN_LINKS = NO
1236
1237 #---------------------------------------------------------------------------
1238 # configuration options related to the XML output
1239 #---------------------------------------------------------------------------
1240
1241 # If the GENERATE_XML tag is set to YES Doxygen will
1242 # generate an XML file that captures the structure of
1243 # the code including all documentation.
1244
1245 GENERATE_XML = NO
1246
1247 # The XML_OUTPUT tag is used to specify where the XML pages will be put.
1248 # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1249 # put in front of it. If left blank `xml' will be used as the default path.
1250
1251 XML_OUTPUT = xml
1252
1253 # The XML_SCHEMA tag can be used to specify an XML schema,
1254 # which can be used by a validating XML parser to check the
1255 # syntax of the XML files.
1256
1257 XML_SCHEMA =
1258
1259 # The XML_DTD tag can be used to specify an XML DTD,
1260 # which can be used by a validating XML parser to check the
1261 # syntax of the XML files.
1262
1263 XML_DTD =
1264
1265 # If the XML_PROGRAMLISTING tag is set to YES Doxygen will
1266 # dump the program listings (including syntax highlighting
1267 # and cross-referencing information) to the XML output. Note that
1268 # enabling this will significantly increase the size of the XML output.
1269
1270 XML_PROGRAMLISTING = YES
1271
1272 #---------------------------------------------------------------------------
1273 # configuration options for the AutoGen Definitions output
1274 #---------------------------------------------------------------------------
1275
1276 # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
1277 # generate an AutoGen Definitions (see autogen.sf.net) file
1278 # that captures the structure of the code including all
1279 # documentation. Note that this feature is still experimental
1280 # and incomplete at the moment.
1281
1282 GENERATE_AUTOGEN_DEF = NO
1283
1284 #---------------------------------------------------------------------------
1285 # configuration options related to the Perl module output
1286 #---------------------------------------------------------------------------
1287
1288 # If the GENERATE_PERLMOD tag is set to YES Doxygen will
1289 # generate a Perl module file that captures the structure of
1290 # the code including all documentation. Note that this
1291 # feature is still experimental and incomplete at the
1292 # moment.
1293
1294 GENERATE_PERLMOD = NO
1295
1296 # If the PERLMOD_LATEX tag is set to YES Doxygen will generate
1297 # the necessary Makefile rules, Perl scripts and LaTeX code to be able
1298 # to generate PDF and DVI output from the Perl module output.
1299
1300 PERLMOD_LATEX = NO
1301
1302 # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
1303 # nicely formatted so it can be parsed by a human reader.
1304 # This is useful
1305 # if you want to understand what is going on.
1306 # On the other hand, if this
1307 # tag is set to NO the size of the Perl module output will be much smaller
1308 # and Perl will parse it just the same.
1309
1310 PERLMOD_PRETTY = YES
1311
1312 # The names of the make variables in the generated doxyrules.make file
1313 # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
1314 # This is useful so different doxyrules.make files included by the same
1315 # Makefile don't overwrite each other's variables.
1316
1317 PERLMOD_MAKEVAR_PREFIX =
1318
1319 #---------------------------------------------------------------------------
1320 # Configuration options related to the preprocessor
1321 #---------------------------------------------------------------------------
1322
1323 # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
1324 # evaluate all C-preprocessor directives found in the sources and include
1325 # files.
1326
1327 ENABLE_PREPROCESSING = YES
1328
1329 # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
1330 # names in the source code. If set to NO (the default) only conditional
1331 # compilation will be performed. Macro expansion can be done in a controlled
1332 # way by setting EXPAND_ONLY_PREDEF to YES.
1333
1334 MACRO_EXPANSION = NO
1335
1336 # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
1337 # then the macro expansion is limited to the macros specified with the
1338 # PREDEFINED and EXPAND_AS_DEFINED tags.
1339
1340 EXPAND_ONLY_PREDEF = NO
1341
1342 # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
1343 # in the INCLUDE_PATH (see below) will be search if a #include is found.
1344
1345 SEARCH_INCLUDES = YES
1346
1347 # The INCLUDE_PATH tag can be used to specify one or more directories that
1348 # contain include files that are not input files but should be processed by
1349 # the preprocessor.
1350
1351 INCLUDE_PATH =
1352
1353 # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
1354 # patterns (like *.h and *.hpp) to filter out the header-files in the
1355 # directories. If left blank, the patterns specified with FILE_PATTERNS will
1356 # be used.
1357
1358 INCLUDE_FILE_PATTERNS =
1359
1360 # The PREDEFINED tag can be used to specify one or more macro names that
1361 # are defined before the preprocessor is started (similar to the -D option of
1362 # gcc). The argument of the tag is a list of macros of the form: name
1363 # or name=definition (no spaces). If the definition and the = are
1364 # omitted =1 is assumed. To prevent a macro definition from being
1365 # undefined via #undef or recursively expanded use the := operator
1366 # instead of the = operator.
1367
1368 PREDEFINED = DEBUG=2 __GNUC__=1
1369
1370 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
1371 # this tag can be used to specify a list of macro names that should be expanded.
1372 # The macro definition that is found in the sources will be used.
1373 # Use the PREDEFINED tag if you want to use a different macro definition.
1374
1375 EXPAND_AS_DEFINED =
1376
1377 # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
1378 # doxygen's preprocessor will remove all function-like macros that are alone
1379 # on a line, have an all uppercase name, and do not end with a semicolon. Such
1380 # function macros are typically used for boiler-plate code, and will confuse
1381 # the parser if not removed.
1382
1383 SKIP_FUNCTION_MACROS = YES
1384
1385 #---------------------------------------------------------------------------
1386 # Configuration::additions related to external references
1387 #---------------------------------------------------------------------------
1388
1389 # The TAGFILES option can be used to specify one or more tagfiles.
1390 # Optionally an initial location of the external documentation
1391 # can be added for each tagfile. The format of a tag file without
1392 # this location is as follows:
1393 #
1394 # TAGFILES = file1 file2 ...
1395 # Adding location for the tag files is done as follows:
1396 #
1397 # TAGFILES = file1=loc1 "file2 = loc2" ...
1398 # where "loc1" and "loc2" can be relative or absolute paths or
1399 # URLs. If a location is present for each tag, the installdox tool
1400 # does not have to be run to correct the links.
1401 # Note that each tag file must have a unique name
1402 # (where the name does NOT include the path)
1403 # If a tag file is not located in the directory in which doxygen
1404 # is run, you must also specify the path to the tagfile here.
1405
1406 TAGFILES =
1407
1408 # When a file name is specified after GENERATE_TAGFILE, doxygen will create
1409 # a tag file that is based on the input files it reads.
1410
1411 GENERATE_TAGFILE =
1412
1413 # If the ALLEXTERNALS tag is set to YES all external classes will be listed
1414 # in the class index. If set to NO only the inherited external classes
1415 # will be listed.
1416
1417 ALLEXTERNALS = NO
1418
1419 # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
1420 # in the modules index. If set to NO, only the current project's groups will
1421 # be listed.
1422
1423 EXTERNAL_GROUPS = YES
1424
1425 # The PERL_PATH should be the absolute path and name of the perl script
1426 # interpreter (i.e. the result of `which perl').
1427
1428 PERL_PATH = /usr/bin/perl
1429
1430 #---------------------------------------------------------------------------
1431 # Configuration options related to the dot tool
1432 #---------------------------------------------------------------------------
1433
1434 # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
1435 # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
1436 # or super classes. Setting the tag to NO turns the diagrams off. Note that
1437 # this option is superseded by the HAVE_DOT option below. This is only a
1438 # fallback. It is recommended to install and use dot, since it yields more
1439 # powerful graphs.
1440
1441 CLASS_DIAGRAMS = YES
1442
1443 # You can define message sequence charts within doxygen comments using the \msc
1444 # command. Doxygen will then run the mscgen tool (see
1445 # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
1446 # documentation. The MSCGEN_PATH tag allows you to specify the directory where
1447 # the mscgen tool resides. If left empty the tool is assumed to be found in the
1448 # default search path.
1449
1450 MSCGEN_PATH =
1451
1452 # If set to YES, the inheritance and collaboration graphs will hide
1453 # inheritance and usage relations if the target is undocumented
1454 # or is not a class.
1455
1456 HIDE_UNDOC_RELATIONS = YES
1457
1458 # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
1459 # available from the path. This tool is part of Graphviz, a graph visualization
1460 # toolkit from AT&T and Lucent Bell Labs. The other options in this section
1461 # have no effect if this option is set to NO (the default)
1462
1463 HAVE_DOT = NO
1464
1465 # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
1466 # allowed to run in parallel. When set to 0 (the default) doxygen will
1467 # base this on the number of processors available in the system. You can set it
1468 # explicitly to a value larger than 0 to get control over the balance
1469 # between CPU load and processing speed.
1470
1471 DOT_NUM_THREADS = 0
1472
1473 # By default doxygen will write a font called FreeSans.ttf to the output
1474 # directory and reference it in all dot files that doxygen generates. This
1475 # font does not include all possible unicode characters however, so when you need
1476 # these (or just want a differently looking font) you can specify the font name
1477 # using DOT_FONTNAME. You need need to make sure dot is able to find the font,
1478 # which can be done by putting it in a standard location or by setting the
1479 # DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
1480 # containing the font.
1481
1482 DOT_FONTNAME = FreeSans.ttf
1483
1484 # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
1485 # The default size is 10pt.
1486
1487 DOT_FONTSIZE = 10
1488
1489 # By default doxygen will tell dot to use the output directory to look for the
1490 # FreeSans.ttf font (which doxygen will put there itself). If you specify a
1491 # different font using DOT_FONTNAME you can set the path where dot
1492 # can find it using this tag.
1493
1494 DOT_FONTPATH =
1495
1496 # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
1497 # will generate a graph for each documented class showing the direct and
1498 # indirect inheritance relations. Setting this tag to YES will force the
1499 # the CLASS_DIAGRAMS tag to NO.
1500
1501 CLASS_GRAPH = YES
1502
1503 # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
1504 # will generate a graph for each documented class showing the direct and
1505 # indirect implementation dependencies (inheritance, containment, and
1506 # class references variables) of the class with other documented classes.
1507
1508 COLLABORATION_GRAPH = YES
1509
1510 # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
1511 # will generate a graph for groups, showing the direct groups dependencies
1512
1513 GROUP_GRAPHS = YES
1514
1515 # If the UML_LOOK tag is set to YES doxygen will generate inheritance and
1516 # collaboration diagrams in a style similar to the OMG's Unified Modeling
1517 # Language.
1518
1519 UML_LOOK = NO
1520
1521 # If set to YES, the inheritance and collaboration graphs will show the
1522 # relations between templates and their instances.
1523
1524 TEMPLATE_RELATIONS = NO
1525
1526 # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
1527 # tags are set to YES then doxygen will generate a graph for each documented
1528 # file showing the direct and indirect include dependencies of the file with
1529 # other documented files.
1530
1531 INCLUDE_GRAPH = YES
1532
1533 # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
1534 # HAVE_DOT tags are set to YES then doxygen will generate a graph for each
1535 # documented header file showing the documented files that directly or
1536 # indirectly include this file.
1537
1538 INCLUDED_BY_GRAPH = YES
1539
1540 # If the CALL_GRAPH and HAVE_DOT options are set to YES then
1541 # doxygen will generate a call dependency graph for every global function
1542 # or class method. Note that enabling this option will significantly increase
1543 # the time of a run. So in most cases it will be better to enable call graphs
1544 # for selected functions only using the \callgraph command.
1545
1546 CALL_GRAPH = NO
1547
1548 # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
1549 # doxygen will generate a caller dependency graph for every global function
1550 # or class method. Note that enabling this option will significantly increase
1551 # the time of a run. So in most cases it will be better to enable caller
1552 # graphs for selected functions only using the \callergraph command.
1553
1554 CALLER_GRAPH = NO
1555
1556 # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
1557 # will graphical hierarchy of all classes instead of a textual one.
1558
1559 GRAPHICAL_HIERARCHY = YES
1560
1561 # If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
1562 # then doxygen will show the dependencies a directory has on other directories
1563 # in a graphical way. The dependency relations are determined by the #include
1564 # relations between the files in the directories.
1565
1566 DIRECTORY_GRAPH = YES
1567
1568 # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
1569 # generated by dot. Possible values are png, jpg, or gif
1570 # If left blank png will be used.
1571
1572 DOT_IMAGE_FORMAT = png
1573
1574 # The tag DOT_PATH can be used to specify the path where the dot tool can be
1575 # found. If left blank, it is assumed the dot tool can be found in the path.
1576
1577 DOT_PATH =
1578
1579 # The DOTFILE_DIRS tag can be used to specify one or more directories that
1580 # contain dot files that are included in the documentation (see the
1581 # \dotfile command).
1582
1583 DOTFILE_DIRS =
1584
1585 # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
1586 # nodes that will be shown in the graph. If the number of nodes in a graph
1587 # becomes larger than this value, doxygen will truncate the graph, which is
1588 # visualized by representing a node as a red box. Note that doxygen if the
1589 # number of direct children of the root node in a graph is already larger than
1590 # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
1591 # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
1592
1593 DOT_GRAPH_MAX_NODES = 50
1594
1595 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
1596 # graphs generated by dot. A depth value of 3 means that only nodes reachable
1597 # from the root by following a path via at most 3 edges will be shown. Nodes
1598 # that lay further from the root node will be omitted. Note that setting this
1599 # option to 1 or 2 may greatly reduce the computation time needed for large
1600 # code bases. Also note that the size of a graph can be further restricted by
1601 # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
1602
1603 MAX_DOT_GRAPH_DEPTH = 0
1604
1605 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
1606 # background. This is disabled by default, because dot on Windows does not
1607 # seem to support this out of the box. Warning: Depending on the platform used,
1608 # enabling this option may lead to badly anti-aliased labels on the edges of
1609 # a graph (i.e. they become hard to read).
1610
1611 DOT_TRANSPARENT = NO
1612
1613 # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
1614 # files in one run (i.e. multiple -o and -T options on the command line). This
1615 # makes dot run faster, but since only newer versions of dot (>1.8.10)
1616 # support this, this feature is disabled by default.
1617
1618 DOT_MULTI_TARGETS = YES
1619
1620 # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
1621 # generate a legend page explaining the meaning of the various boxes and
1622 # arrows in the dot generated graphs.
1623
1624 GENERATE_LEGEND = YES
1625
1626 # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
1627 # remove the intermediate dot files that are used to generate
1628 # the various graphs.
1629
1630 DOT_CLEANUP = YES
0 The OpenLDAP Public License
1 Version 2.8, 17 August 2003
2
3 Redistribution and use of this software and associated documentation
4 ("Software"), with or without modification, are permitted provided
5 that the following conditions are met:
6
7 1. Redistributions in source form must retain copyright statements
8 and notices,
9
10 2. Redistributions in binary form must reproduce applicable copyright
11 statements and notices, this list of conditions, and the following
12 disclaimer in the documentation and/or other materials provided
13 with the distribution, and
14
15 3. Redistributions must contain a verbatim copy of this document.
16
17 The OpenLDAP Foundation may revise this license from time to time.
18 Each revision is distinguished by a version number. You may use
19 this Software under terms of this license revision or under the
20 terms of any subsequent revision of the license.
21
22 THIS SOFTWARE IS PROVIDED BY THE OPENLDAP FOUNDATION AND ITS
23 CONTRIBUTORS ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
24 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
25 AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
26 SHALL THE OPENLDAP FOUNDATION, ITS CONTRIBUTORS, OR THE AUTHOR(S)
27 OR OWNER(S) OF THE SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
28 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35
36 The names of the authors and copyright holders must not be used in
37 advertising or otherwise to promote the sale, use or other dealing
38 in this Software without specific, written prior permission. Title
39 to copyright in this Software shall at all times remain with copyright
40 holders.
41
42 OpenLDAP is a registered trademark of the OpenLDAP Foundation.
43
44 Copyright 1999-2003 The OpenLDAP Foundation, Redwood City,
45 California, USA. All Rights Reserved. Permission to copy and
46 distribute verbatim copies of this document is granted.
0 CC = gcc
1 W = -W -Wall -Wno-unused-parameter -Wbad-function-cast
2 OPT = -O2 -g
3 CFLAGS = -pthread $(OPT) $(W) $(XCFLAGS)
4 LDLIBS =
5 SOLIBS =
6 prefix = /usr/local
7
8 IHDRS = lmdb.h
9 ILIBS = liblmdb.a liblmdb.so
10 IPROGS = mdb_stat mdb_copy
11 IDOCS = mdb_stat.1 mdb_copy.1
12 PROGS = $(IPROGS) mtest mtest2 mtest3 mtest4 mtest5
13 all: $(ILIBS) $(PROGS)
14
15 install: $(ILIBS) $(IPROGS) $(IHDRS)
16 cp $(IPROGS) $(DESTDIR)$(prefix)/bin
17 cp $(ILIBS) $(DESTDIR)$(prefix)/lib
18 cp $(IHDRS) $(DESTDIR)$(prefix)/include
19 cp $(IDOCS) $(DESTDIR)$(prefix)/man/man1
20
21 clean:
22 rm -rf $(PROGS) *.[ao] *.so *~ testdb
23
24 test: all
25 mkdir testdb
26 ./mtest && ./mdb_stat testdb
27
28 liblmdb.a: mdb.o midl.o
29 ar rs $@ mdb.o midl.o
30
31 liblmdb.so: mdb.o midl.o
32 $(CC) $(LDFLAGS) -pthread -shared -o $@ mdb.o midl.o $(SOLIBS)
33
34 mdb_stat: mdb_stat.o liblmdb.a
35 mdb_copy: mdb_copy.o liblmdb.a
36 mtest: mtest.o liblmdb.a
37 mtest2: mtest2.o liblmdb.a
38 mtest3: mtest3.o liblmdb.a
39 mtest4: mtest4.o liblmdb.a
40 mtest5: mtest5.o liblmdb.a
41 mtest6: mtest6.o liblmdb.a
42
43 mdb.o: mdb.c lmdb.h midl.h
44 $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -c mdb.c
45
46 midl.o: midl.c midl.h
47 $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -c midl.c
48
49 %: %.o
50 $(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@
51
52 %.o: %.c lmdb.h
53 $(CC) $(CFLAGS) $(CPPFLAGS) -c $<
0 /** @file lmdb.h
1 * @brief Lightning memory-mapped database library
2 *
3 * @mainpage Lightning Memory-Mapped Database Manager (MDB)
4 *
5 * @section intro_sec Introduction
6 * MDB is a Btree-based database management library modeled loosely on the
7 * BerkeleyDB API, but much simplified. The entire database is exposed
8 * in a memory map, and all data fetches return data directly
9 * from the mapped memory, so no malloc's or memcpy's occur during
10 * data fetches. As such, the library is extremely simple because it
11 * requires no page caching layer of its own, and it is extremely high
12 * performance and memory-efficient. It is also fully transactional with
13 * full ACID semantics, and when the memory map is read-only, the
14 * database integrity cannot be corrupted by stray pointer writes from
15 * application code.
16 *
17 * The library is fully thread-aware and supports concurrent read/write
18 * access from multiple processes and threads. Data pages use a copy-on-
19 * write strategy so no active data pages are ever overwritten, which
20 * also provides resistance to corruption and eliminates the need of any
21 * special recovery procedures after a system crash. Writes are fully
22 * serialized; only one write transaction may be active at a time, which
23 * guarantees that writers can never deadlock. The database structure is
24 * multi-versioned so readers run with no locks; writers cannot block
25 * readers, and readers don't block writers.
26 *
27 * Unlike other well-known database mechanisms which use either write-ahead
28 * transaction logs or append-only data writes, MDB requires no maintenance
29 * during operation. Both write-ahead loggers and append-only databases
30 * require periodic checkpointing and/or compaction of their log or database
31 * files otherwise they grow without bound. MDB tracks free pages within
32 * the database and re-uses them for new write operations, so the database
33 * size does not grow without bound in normal use.
34 *
35 * The memory map can be used as a read-only or read-write map. It is
36 * read-only by default as this provides total immunity to corruption.
37 * Using read-write mode offers much higher write performance, but adds
38 * the possibility for stray application writes thru pointers to silently
39 * corrupt the database. Of course if your application code is known to
40 * be bug-free (...) then this is not an issue.
41 *
42 * @section caveats_sec Caveats
43 * Troubleshooting the lock file, plus semaphores on BSD systems:
44 *
45 * - A broken lockfile can cause sync issues.
46 * Stale reader transactions left behind by an aborted program
47 * cause further writes to grow the database quickly, and
48 * stale locks can block further operation.
49 *
50 * Fix: Terminate all programs using the database, or make
51 * them close it. Next database user will reset the lockfile.
52 *
53 * - On BSD systems or others configured with MDB_USE_POSIX_SEM,
54 * startup can fail due to semaphores owned by another userid.
55 *
56 * Fix: Open and close the database as the user which owns the
57 * semaphores (likely last user) or as root, while no other
58 * process is using the database.
59 *
60 * Restrictions/caveats (in addition to those listed for some functions):
61 *
62 * - Only the database owner should normally use the database on
63 * BSD systems or when otherwise configured with MDB_USE_POSIX_SEM.
64 * Multiple users can cause startup to fail later, as noted above.
65 *
66 * - A thread can only use one transaction at a time, plus any child
67 * transactions. Each transaction belongs to one thread. See below.
68 *
69 * - Use an MDB_env* in the process which opened it, without fork()ing.
70 *
71 * - Do not have open an MDB database twice in the same process at
72 * the same time. Not even from a plain open() call - close()ing it
73 * breaks flock() advisory locking.
74 *
75 * - Avoid long-lived transactions. Read transactions prevent
76 * reuse of pages freed by newer write transactions, thus the
77 * database can grow quickly. Write transactions prevent
78 * other write transactions, since writes are serialized.
79 *
80 * - Avoid suspending a process with active transactions. These
81 * would then be "long-lived" as above. Also read transactions
82 * suspended when writers commit could sometimes see wrong data.
83 *
84 * ...when several processes can use a database concurrently:
85 *
86 * - Avoid aborting a process with an active transaction.
87 * The transaction becomes "long-lived" as above until the lockfile
88 * is reset, since the process may not remove it from the lockfile.
89 *
90 * - If you do that anyway, close the environment once in a while,
91 * so the lockfile can get reset.
92 *
93 * - Do not use MDB databases on remote filesystems, even between
94 * processes on the same host. This breaks flock() on some OSes,
95 * possibly memory map sync, and certainly sync between programs
96 * on different hosts.
97 *
98 * - Opening a database can fail if another process is opening or
99 * closing it at exactly the same time.
100 *
101 * @author Howard Chu, Symas Corporation.
102 *
103 * @copyright Copyright 2011-2013 Howard Chu, Symas Corp. All rights reserved.
104 *
105 * Redistribution and use in source and binary forms, with or without
106 * modification, are permitted only as authorized by the OpenLDAP
107 * Public License.
108 *
109 * A copy of this license is available in the file LICENSE in the
110 * top-level directory of the distribution or, alternatively, at
111 * <http://www.OpenLDAP.org/license.html>.
112 *
113 * @par Derived From:
114 * This code is derived from btree.c written by Martin Hedenfalk.
115 *
116 * Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se>
117 *
118 * Permission to use, copy, modify, and distribute this software for any
119 * purpose with or without fee is hereby granted, provided that the above
120 * copyright notice and this permission notice appear in all copies.
121 *
122 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
123 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
124 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
125 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
126 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
127 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
128 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
129 */
130 #ifndef _LMDB_H_
131 #define _LMDB_H_
132
133 #include <sys/types.h>
134
135 #ifdef __cplusplus
136 extern "C" {
137 #endif
138
139 #ifdef _MSC_VER
140 typedef int mdb_mode_t;
141 #else
142 typedef mode_t mdb_mode_t;
143 #endif
144
145 /** @defgroup mdb MDB API
146 * @{
147 * @brief OpenLDAP Lightning Memory-Mapped Database Manager
148 */
149 /** @defgroup Version Version Macros
150 * @{
151 */
152 /** Library major version */
153 #define MDB_VERSION_MAJOR 0
154 /** Library minor version */
155 #define MDB_VERSION_MINOR 9
156 /** Library patch version */
157 #define MDB_VERSION_PATCH 6
158
159 /** Combine args a,b,c into a single integer for easy version comparisons */
160 #define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c))
161
162 /** The full library version as a single integer */
163 #define MDB_VERSION_FULL \
164 MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH)
165
166 /** The release date of this library version */
167 #define MDB_VERSION_DATE "January 10, 2013"
168
169 /** A stringifier for the version info */
170 #define MDB_VERSTR(a,b,c,d) "MDB " #a "." #b "." #c ": (" d ")"
171
172 /** A helper for the stringifier macro */
173 #define MDB_VERFOO(a,b,c,d) MDB_VERSTR(a,b,c,d)
174
175 /** The full library version as a C string */
176 #define MDB_VERSION_STRING \
177 MDB_VERFOO(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH,MDB_VERSION_DATE)
178 /** @} */
179
180 /** @brief Opaque structure for a database environment.
181 *
182 * A DB environment supports multiple databases, all residing in the same
183 * shared-memory map.
184 */
185 typedef struct MDB_env MDB_env;
186
187 /** @brief Opaque structure for a transaction handle.
188 *
189 * All database operations require a transaction handle. Transactions may be
190 * read-only or read-write.
191 */
192 typedef struct MDB_txn MDB_txn;
193
194 /** @brief A handle for an individual database in the DB environment. */
195 typedef unsigned int MDB_dbi;
196
197 /** @brief Opaque structure for navigating through a database */
198 typedef struct MDB_cursor MDB_cursor;
199
200 /** @brief Generic structure used for passing keys and data in and out
201 * of the database.
202 *
203 * Key sizes must be between 1 and the liblmdb build-time constant
204 * #MDB_MAXKEYSIZE inclusive. This currently defaults to 511. The
205 * same applies to data sizes in databases with the #MDB_DUPSORT flag.
206 * Other data items can in theory be from 0 to 0xffffffff bytes long.
207 */
208 typedef struct MDB_val {
209 size_t mv_size; /**< size of the data item */
210 void *mv_data; /**< address of the data item */
211 } MDB_val;
212
213 /** @brief A callback function used to compare two keys in a database */
214 typedef int (MDB_cmp_func)(const MDB_val *a, const MDB_val *b);
215
216 /** @brief A callback function used to relocate a position-dependent data item
217 * in a fixed-address database.
218 *
219 * The \b newptr gives the item's desired address in
220 * the memory map, and \b oldptr gives its previous address. The item's actual
221 * data resides at the address in \b item. This callback is expected to walk
222 * through the fields of the record in \b item and modify any
223 * values based at the \b oldptr address to be relative to the \b newptr address.
224 * @param[in,out] item The item that is to be relocated.
225 * @param[in] oldptr The previous address.
226 * @param[in] newptr The new address to relocate to.
227 * @param[in] relctx An application-provided context, set by #mdb_set_relctx().
228 * @todo This feature is currently unimplemented.
229 */
230 typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *relctx);
231
232 /** @defgroup mdb_env Environment Flags
233 *
234 * Values do not overlap Database Flags.
235 * @{
236 */
237 /** mmap at a fixed address (experimental) */
238 #define MDB_FIXEDMAP 0x01
239 /** no environment directory */
240 #define MDB_NOSUBDIR 0x4000
241 /** don't fsync after commit */
242 #define MDB_NOSYNC 0x10000
243 /** read only */
244 #define MDB_RDONLY 0x20000
245 /** don't fsync metapage after commit */
246 #define MDB_NOMETASYNC 0x40000
247 /** use writable mmap */
248 #define MDB_WRITEMAP 0x80000
249 /** use asynchronous msync when MDB_WRITEMAP is used */
250 #define MDB_MAPASYNC 0x100000
251 /** @} */
252
253 /** @defgroup mdb_dbi_open Database Flags
254 *
255 * Values do not overlap Environment Flags.
256 * @{
257 */
258 /** use reverse string keys */
259 #define MDB_REVERSEKEY 0x02
260 /** use sorted duplicates */
261 #define MDB_DUPSORT 0x04
262 /** numeric keys in native byte order.
263 * The keys must all be of the same size. */
264 #define MDB_INTEGERKEY 0x08
265 /** with #MDB_DUPSORT, sorted dup items have fixed size */
266 #define MDB_DUPFIXED 0x10
267 /** with #MDB_DUPSORT, dups are numeric in native byte order */
268 #define MDB_INTEGERDUP 0x20
269 /** with #MDB_DUPSORT, use reverse string dups */
270 #define MDB_REVERSEDUP 0x40
271 /** create DB if not already existing */
272 #define MDB_CREATE 0x40000
273 /** @} */
274
275 /** @defgroup mdb_put Write Flags
276 * @{
277 */
278 /** For put: Don't write if the key already exists. */
279 #define MDB_NOOVERWRITE 0x10
280 /** Only for #MDB_DUPSORT<br>
281 * For put: don't write if the key and data pair already exist.<br>
282 * For mdb_cursor_del: remove all duplicate data items.
283 */
284 #define MDB_NODUPDATA 0x20
285 /** For mdb_cursor_put: overwrite the current key/data pair */
286 #define MDB_CURRENT 0x40
287 /** For put: Just reserve space for data, don't copy it. Return a
288 * pointer to the reserved space.
289 */
290 #define MDB_RESERVE 0x10000
291 /** Data is being appended, don't split full pages. */
292 #define MDB_APPEND 0x20000
293 /** Duplicate data is being appended, don't split full pages. */
294 #define MDB_APPENDDUP 0x40000
295 /** Store multiple data items in one call. */
296 #define MDB_MULTIPLE 0x80000
297 /* @} */
298
299 /** @brief Cursor Get operations.
300 *
301 * This is the set of all operations for retrieving data
302 * using a cursor.
303 */
304 typedef enum MDB_cursor_op {
305 MDB_FIRST, /**< Position at first key/data item */
306 MDB_FIRST_DUP, /**< Position at first data item of current key.
307 Only for #MDB_DUPSORT */
308 MDB_GET_BOTH, /**< Position at key/data pair. Only for #MDB_DUPSORT */
309 MDB_GET_BOTH_RANGE, /**< position at key, nearest data. Only for #MDB_DUPSORT */
310 MDB_GET_CURRENT, /**< Return key/data at current cursor position */
311 MDB_GET_MULTIPLE, /**< Return all the duplicate data items at the current
312 cursor position. Only for #MDB_DUPFIXED */
313 MDB_LAST, /**< Position at last key/data item */
314 MDB_LAST_DUP, /**< Position at last data item of current key.
315 Only for #MDB_DUPSORT */
316 MDB_NEXT, /**< Position at next data item */
317 MDB_NEXT_DUP, /**< Position at next data item of current key.
318 Only for #MDB_DUPSORT */
319 MDB_NEXT_MULTIPLE, /**< Return all duplicate data items at the next
320 cursor position. Only for #MDB_DUPFIXED */
321 MDB_NEXT_NODUP, /**< Position at first data item of next key.
322 Only for #MDB_DUPSORT */
323 MDB_PREV, /**< Position at previous data item */
324 MDB_PREV_DUP, /**< Position at previous data item of current key.
325 Only for #MDB_DUPSORT */
326 MDB_PREV_NODUP, /**< Position at last data item of previous key.
327 Only for #MDB_DUPSORT */
328 MDB_SET, /**< Position at specified key */
329 MDB_SET_KEY, /**< Position at specified key, return key + data */
330 MDB_SET_RANGE /**< Position at first key greater than or equal to specified key. */
331 } MDB_cursor_op;
332
333 /** @defgroup errors Return Codes
334 *
335 * BerkeleyDB uses -30800 to -30999, we'll go under them
336 * @{
337 */
338 /** Successful result */
339 #define MDB_SUCCESS 0
340 /** key/data pair already exists */
341 #define MDB_KEYEXIST (-30799)
342 /** key/data pair not found (EOF) */
343 #define MDB_NOTFOUND (-30798)
344 /** Requested page not found - this usually indicates corruption */
345 #define MDB_PAGE_NOTFOUND (-30797)
346 /** Located page was wrong type */
347 #define MDB_CORRUPTED (-30796)
348 /** Update of meta page failed, probably I/O error */
349 #define MDB_PANIC (-30795)
350 /** Environment version mismatch */
351 #define MDB_VERSION_MISMATCH (-30794)
352 /** File is not a valid MDB file */
353 #define MDB_INVALID (-30793)
354 /** Environment mapsize reached */
355 #define MDB_MAP_FULL (-30792)
356 /** Environment maxdbs reached */
357 #define MDB_DBS_FULL (-30791)
358 /** Environment maxreaders reached */
359 #define MDB_READERS_FULL (-30790)
360 /** Too many TLS keys in use - Windows only */
361 #define MDB_TLS_FULL (-30789)
362 /** Txn has too many dirty pages */
363 #define MDB_TXN_FULL (-30788)
364 /** Cursor stack too deep - internal error */
365 #define MDB_CURSOR_FULL (-30787)
366 /** Page has not enough space - internal error */
367 #define MDB_PAGE_FULL (-30786)
368 /** Database contents grew beyond environment mapsize */
369 #define MDB_MAP_RESIZED (-30785)
370 /** Database flags changed or would change */
371 #define MDB_INCOMPATIBLE (-30784)
372 #define MDB_LAST_ERRCODE MDB_INCOMPATIBLE
373 /** @} */
374
375 /** @brief Statistics for a database in the environment */
376 typedef struct MDB_stat {
377 unsigned int ms_psize; /**< Size of a database page.
378 This is currently the same for all databases. */
379 unsigned int ms_depth; /**< Depth (height) of the B-tree */
380 size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */
381 size_t ms_leaf_pages; /**< Number of leaf pages */
382 size_t ms_overflow_pages; /**< Number of overflow pages */
383 size_t ms_entries; /**< Number of data items */
384 } MDB_stat;
385
386 /** @brief Information about the environment */
387 typedef struct MDB_envinfo {
388 void *me_mapaddr; /**< Address of map, if fixed */
389 size_t me_mapsize; /**< Size of the data memory map */
390 size_t me_last_pgno; /**< ID of the last used page */
391 size_t me_last_txnid; /**< ID of the last committed transaction */
392 unsigned int me_maxreaders; /**< maximum number of threads for the environment */
393 unsigned int me_numreaders; /**< maximum number of threads used in the environment */
394 } MDB_envinfo;
395
396 /** @brief Return the mdb library version information.
397 *
398 * @param[out] major if non-NULL, the library major version number is copied here
399 * @param[out] minor if non-NULL, the library minor version number is copied here
400 * @param[out] patch if non-NULL, the library patch version number is copied here
401 * @retval "version string" The library version as a string
402 */
403 char *mdb_version(int *major, int *minor, int *patch);
404
405 /** @brief Return a string describing a given error code.
406 *
407 * This function is a superset of the ANSI C X3.159-1989 (ANSI C) strerror(3)
408 * function. If the error code is greater than or equal to 0, then the string
409 * returned by the system function strerror(3) is returned. If the error code
410 * is less than 0, an error string corresponding to the MDB library error is
411 * returned. See @ref errors for a list of MDB-specific error codes.
412 * @param[in] err The error code
413 * @retval "error message" The description of the error
414 */
415 char *mdb_strerror(int err);
416
417 /** @brief Create an MDB environment handle.
418 *
419 * This function allocates memory for a #MDB_env structure. To release
420 * the allocated memory and discard the handle, call #mdb_env_close().
421 * Before the handle may be used, it must be opened using #mdb_env_open().
422 * Various other options may also need to be set before opening the handle,
423 * e.g. #mdb_env_set_mapsize(), #mdb_env_set_maxreaders(), #mdb_env_set_maxdbs(),
424 * depending on usage requirements.
425 * @param[out] env The address where the new handle will be stored
426 * @return A non-zero error value on failure and 0 on success.
427 */
428 int mdb_env_create(MDB_env **env);
429
430 /** @brief Open an environment handle.
431 *
432 * If this function fails, #mdb_env_close() must be called to discard the #MDB_env handle.
433 * @param[in] env An environment handle returned by #mdb_env_create()
434 * @param[in] path The directory in which the database files reside. This
435 * directory must already exist and be writable.
436 * @param[in] flags Special options for this environment. This parameter
437 * must be set to 0 or by bitwise OR'ing together one or more of the
438 * values described here.
439 * Flags set by mdb_env_set_flags() are also used.
440 * <ul>
441 * <li>#MDB_FIXEDMAP
442 * use a fixed address for the mmap region. This flag must be specified
443 * when creating the environment, and is stored persistently in the environment.
444 * If successful, the memory map will always reside at the same virtual address
445 * and pointers used to reference data items in the database will be constant
446 * across multiple invocations. This option may not always work, depending on
447 * how the operating system has allocated memory to shared libraries and other uses.
448 * The feature is highly experimental.
449 * <li>#MDB_NOSUBDIR
450 * By default, MDB creates its environment in a directory whose
451 * pathname is given in \b path, and creates its data and lock files
452 * under that directory. With this option, \b path is used as-is for
453 * the database main data file. The database lock file is the \b path
454 * with "-lock" appended.
455 * <li>#MDB_RDONLY
456 * Open the environment in read-only mode. No write operations will be
457 * allowed. MDB will still modify the lock file - except on read-only
458 * filesystems, where MDB does not use locks.
459 * <li>#MDB_WRITEMAP
460 * Use a writeable memory map unless MDB_RDONLY is set. This is faster
461 * and uses fewer mallocs, but loses protection from application bugs
462 * like wild pointer writes and other bad updates into the database.
463 * Incompatible with nested transactions.
464 * <li>#MDB_NOMETASYNC
465 * Flush system buffers to disk only once per transaction, omit the
466 * metadata flush. Defer that until the system flushes files to disk,
467 * or next non-MDB_RDONLY commit or #mdb_env_sync(). This optimization
468 * maintains database integrity, but a system crash may undo the last
469 * committed transaction. I.e. it preserves the ACI (atomicity,
470 * consistency, isolation) but not D (durability) database property.
471 * This flag may be changed at any time using #mdb_env_set_flags().
472 * <li>#MDB_NOSYNC
473 * Don't flush system buffers to disk when committing a transaction.
474 * This optimization means a system crash can corrupt the database or
475 * lose the last transactions if buffers are not yet flushed to disk.
476 * The risk is governed by how often the system flushes dirty buffers
477 * to disk and how often #mdb_env_sync() is called. However, if the
478 * filesystem preserves write order and the #MDB_WRITEMAP flag is not
479 * used, transactions exhibit ACI (atomicity, consistency, isolation)
480 * properties and only lose D (durability). I.e. database integrity
481 * is maintained, but a system crash may undo the final transactions.
482 * Note that (#MDB_NOSYNC | #MDB_WRITEMAP) leaves the system with no
483 * hint for when to write transactions to disk, unless #mdb_env_sync()
484 * is called. (#MDB_MAPASYNC | #MDB_WRITEMAP) may be preferable.
485 * This flag may be changed at any time using #mdb_env_set_flags().
486 * <li>#MDB_MAPASYNC
487 * When using #MDB_WRITEMAP, use asynchronous flushes to disk.
488 * As with #MDB_NOSYNC, a system crash can then corrupt the
489 * database or lose the last transactions. Calling #mdb_env_sync()
490 * ensures on-disk database integrity until next commit.
491 * This flag may be changed at any time using #mdb_env_set_flags().
492 * </ul>
493 * @param[in] mode The UNIX permissions to set on created files. This parameter
494 * is ignored on Windows.
495 * @return A non-zero error value on failure and 0 on success. Some possible
496 * errors are:
497 * <ul>
498 * <li>#MDB_VERSION_MISMATCH - the version of the MDB library doesn't match the
499 * version that created the database environment.
500 * <li>#MDB_INVALID - the environment file headers are corrupted.
501 * <li>ENOENT - the directory specified by the path parameter doesn't exist.
502 * <li>EACCES - the user didn't have permission to access the environment files.
503 * <li>EAGAIN - the environment was locked by another process.
504 * </ul>
505 */
506 int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode);
507
508 /** @brief Copy an MDB environment to the specified path.
509 *
510 * This function may be used to make a backup of an existing environment.
511 * @param[in] env An environment handle returned by #mdb_env_create(). It
512 * must have already been opened successfully.
513 * @param[in] path The directory in which the copy will reside. This
514 * directory must already exist and be writable but must otherwise be
515 * empty.
516 * @return A non-zero error value on failure and 0 on success.
517 */
518 int mdb_env_copy(MDB_env *env, const char *path);
519
520 /** @brief Return statistics about the MDB environment.
521 *
522 * @param[in] env An environment handle returned by #mdb_env_create()
523 * @param[out] stat The address of an #MDB_stat structure
524 * where the statistics will be copied
525 */
526 int mdb_env_stat(MDB_env *env, MDB_stat *stat);
527
528 /** @brief Return information about the MDB environment.
529 *
530 * @param[in] env An environment handle returned by #mdb_env_create()
531 * @param[out] stat The address of an #MDB_envinfo structure
532 * where the information will be copied
533 */
534 int mdb_env_info(MDB_env *env, MDB_envinfo *stat);
535
536 /** @brief Flush the data buffers to disk.
537 *
538 * Data is always written to disk when #mdb_txn_commit() is called,
539 * but the operating system may keep it buffered. MDB always flushes
540 * the OS buffers upon commit as well, unless the environment was
541 * opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC.
542 * @param[in] env An environment handle returned by #mdb_env_create()
543 * @param[in] force If non-zero, force a synchronous flush. Otherwise
544 * if the environment has the #MDB_NOSYNC flag set the flushes
545 * will be omitted, and with #MDB_MAPASYNC they will be asynchronous.
546 * @return A non-zero error value on failure and 0 on success. Some possible
547 * errors are:
548 * <ul>
549 * <li>EINVAL - an invalid parameter was specified.
550 * <li>EIO - an error occurred during synchronization.
551 * </ul>
552 */
553 int mdb_env_sync(MDB_env *env, int force);
554
555 /** @brief Close the environment and release the memory map.
556 *
557 * Only a single thread may call this function. All transactions, databases,
558 * and cursors must already be closed before calling this function. Attempts to
559 * use any such handles after calling this function will cause a SIGSEGV.
560 * The environment handle will be freed and must not be used again after this call.
561 * @param[in] env An environment handle returned by #mdb_env_create()
562 */
563 void mdb_env_close(MDB_env *env);
564
565 /** @brief Set environment flags.
566 *
567 * This may be used to set some flags in addition to those from
568 * #mdb_env_open(), or to unset these flags.
569 * @param[in] env An environment handle returned by #mdb_env_create()
570 * @param[in] flags The flags to change, bitwise OR'ed together
571 * @param[in] onoff A non-zero value sets the flags, zero clears them.
572 * @return A non-zero error value on failure and 0 on success. Some possible
573 * errors are:
574 * <ul>
575 * <li>EINVAL - an invalid parameter was specified.
576 * </ul>
577 */
578 int mdb_env_set_flags(MDB_env *env, unsigned int flags, int onoff);
579
580 /** @brief Get environment flags.
581 *
582 * @param[in] env An environment handle returned by #mdb_env_create()
583 * @param[out] flags The address of an integer to store the flags
584 * @return A non-zero error value on failure and 0 on success. Some possible
585 * errors are:
586 * <ul>
587 * <li>EINVAL - an invalid parameter was specified.
588 * </ul>
589 */
590 int mdb_env_get_flags(MDB_env *env, unsigned int *flags);
591
592 /** @brief Return the path that was used in #mdb_env_open().
593 *
594 * @param[in] env An environment handle returned by #mdb_env_create()
595 * @param[out] path Address of a string pointer to contain the path. This
596 * is the actual string in the environment, not a copy. It should not be
597 * altered in any way.
598 * @return A non-zero error value on failure and 0 on success. Some possible
599 * errors are:
600 * <ul>
601 * <li>EINVAL - an invalid parameter was specified.
602 * </ul>
603 */
604 int mdb_env_get_path(MDB_env *env, const char **path);
605
606 /** @brief Set the size of the memory map to use for this environment.
607 *
608 * The size should be a multiple of the OS page size. The default is
609 * 10485760 bytes. The size of the memory map is also the maximum size
610 * of the database. The value should be chosen as large as possible,
611 * to accommodate future growth of the database.
612 * This function may only be called after #mdb_env_create() and before #mdb_env_open().
613 * The size may be changed by closing and reopening the environment.
614 * Any attempt to set a size smaller than the space already consumed
615 * by the environment will be silently changed to the current size of the used space.
616 * @param[in] env An environment handle returned by #mdb_env_create()
617 * @param[in] size The size in bytes
618 * @return A non-zero error value on failure and 0 on success. Some possible
619 * errors are:
620 * <ul>
621 * <li>EINVAL - an invalid parameter was specified, or the environment is already open.
622 * </ul>
623 */
624 int mdb_env_set_mapsize(MDB_env *env, size_t size);
625
626 /** @brief Set the maximum number of threads for the environment.
627 *
628 * This defines the number of slots in the lock table that is used to track readers in the
629 * the environment. The default is 126.
630 * This function may only be called after #mdb_env_create() and before #mdb_env_open().
631 * @param[in] env An environment handle returned by #mdb_env_create()
632 * @param[in] readers The maximum number of threads
633 * @return A non-zero error value on failure and 0 on success. Some possible
634 * errors are:
635 * <ul>
636 * <li>EINVAL - an invalid parameter was specified, or the environment is already open.
637 * </ul>
638 */
639 int mdb_env_set_maxreaders(MDB_env *env, unsigned int readers);
640
641 /** @brief Get the maximum number of threads for the environment.
642 *
643 * @param[in] env An environment handle returned by #mdb_env_create()
644 * @param[out] readers Address of an integer to store the number of readers
645 * @return A non-zero error value on failure and 0 on success. Some possible
646 * errors are:
647 * <ul>
648 * <li>EINVAL - an invalid parameter was specified.
649 * </ul>
650 */
651 int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers);
652
653 /** @brief Set the maximum number of named databases for the environment.
654 *
655 * This function is only needed if multiple databases will be used in the
656 * environment. Simpler applications that use the environment as a single
657 * unnamed database can ignore this option.
658 * This function may only be called after #mdb_env_create() and before #mdb_env_open().
659 * @param[in] env An environment handle returned by #mdb_env_create()
660 * @param[in] dbs The maximum number of databases
661 * @return A non-zero error value on failure and 0 on success. Some possible
662 * errors are:
663 * <ul>
664 * <li>EINVAL - an invalid parameter was specified, or the environment is already open.
665 * </ul>
666 */
667 int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs);
668
669 /** @brief Create a transaction for use with the environment.
670 *
671 * The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit().
672 * @note Transactions may not span threads; a transaction must only be used by a
673 * single thread. Also, a thread may only have a single transaction.
674 * @note Cursors may not span transactions; each cursor must be opened and closed
675 * within a single transaction.
676 * @param[in] env An environment handle returned by #mdb_env_create()
677 * @param[in] parent If this parameter is non-NULL, the new transaction
678 * will be a nested transaction, with the transaction indicated by \b parent
679 * as its parent. Transactions may be nested to any level. A parent
680 * transaction may not issue any other operations besides mdb_txn_begin,
681 * mdb_txn_abort, or mdb_txn_commit while it has active child transactions.
682 * @param[in] flags Special options for this transaction. This parameter
683 * must be set to 0 or by bitwise OR'ing together one or more of the
684 * values described here.
685 * <ul>
686 * <li>#MDB_RDONLY
687 * This transaction will not perform any write operations.
688 * </ul>
689 * @param[out] txn Address where the new #MDB_txn handle will be stored
690 * @return A non-zero error value on failure and 0 on success. Some possible
691 * errors are:
692 * <ul>
693 * <li>#MDB_PANIC - a fatal error occurred earlier and the environment
694 - * must be shut down.
695 * <li>#MDB_MAP_RESIZED - another process wrote data beyond this MDB_env's
696 * mapsize and the environment must be shut down.
697 * <li>#MDB_READERS_FULL - a read-only transaction was requested and
698 * the reader lock table is full. See #mdb_env_set_maxreaders().
699 * <li>ENOMEM - out of memory.
700 * </ul>
701 */
702 int mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **txn);
703
704 /** @brief Commit all the operations of a transaction into the database.
705 *
706 * All cursors opened within the transaction will be closed by this call. The cursors
707 * and transaction handle will be freed and must not be used again after this call.
708 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
709 * @return A non-zero error value on failure and 0 on success. Some possible
710 * errors are:
711 * <ul>
712 * <li>EINVAL - an invalid parameter was specified.
713 * <li>ENOSPC - no more disk space.
714 * <li>EIO - a low-level I/O error occurred while writing.
715 * <li>ENOMEM - out of memory.
716 * </ul>
717 */
718 int mdb_txn_commit(MDB_txn *txn);
719
720 /** @brief Abandon all the operations of the transaction instead of saving them.
721 *
722 * All cursors opened within the transaction will be closed by this call. The cursors
723 * and transaction handle will be freed and must not be used again after this call.
724 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
725 */
726 void mdb_txn_abort(MDB_txn *txn);
727
728 /** @brief Reset a read-only transaction.
729 *
730 * This releases the current reader lock but doesn't free the
731 * transaction handle, allowing it to be used again later by #mdb_txn_renew().
732 * It otherwise has the same effect as #mdb_txn_abort() but saves some memory
733 * allocation/deallocation overhead if a thread is going to start a new
734 * read-only transaction again soon.
735 * All cursors opened within the transaction must be closed before the transaction
736 * is reset.
737 * Reader locks generally don't interfere with writers, but they keep old
738 * versions of database pages allocated. Thus they prevent the old pages
739 * from being reused when writers commit new data, and so under heavy load
740 * the database size may grow much more rapidly than otherwise.
741 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
742 */
743 void mdb_txn_reset(MDB_txn *txn);
744
745 /** @brief Renew a read-only transaction.
746 *
747 * This acquires a new reader lock for a transaction handle that had been
748 * released by #mdb_txn_reset(). It must be called before a reset transaction
749 * may be used again.
750 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
751 * @return A non-zero error value on failure and 0 on success. Some possible
752 * errors are:
753 * <ul>
754 * <li>#MDB_PANIC - a fatal error occurred earlier and the environment
755 * must be shut down.
756 * <li>EINVAL - an invalid parameter was specified.
757 * </ul>
758 */
759 int mdb_txn_renew(MDB_txn *txn);
760
761 /** Compat with version <= 0.9.4, avoid clash with libmdb from MDB Tools project */
762 #define mdb_open(txn,name,flags,dbi) mdb_dbi_open(txn,name,flags,dbi)
763 /** Compat with version <= 0.9.4, avoid clash with libmdb from MDB Tools project */
764 #define mdb_close(env,dbi) mdb_dbi_close(env,dbi)
765
766 /** @brief Open a database in the environment.
767 *
768 * The database handle may be discarded by calling #mdb_dbi_close().
769 * The old database handle is returned if the database was already open.
770 * The handle must only be closed once.
771 * The database handle will be private to the current transaction until
772 * the transaction is successfully committed. If the transaction is
773 * aborted the handle will be closed automatically.
774 * After a successful commit the
775 * handle will reside in the shared environment, and may be used
776 * by other transactions. This function must not be called from
777 * multiple concurrent transactions. A transaction that uses this function
778 * must finish (either commit or abort) before any other transaction may
779 * use this function.
780 *
781 * To use named databases (with name != NULL), #mdb_env_set_maxdbs()
782 * must be called before opening the environment.
783 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
784 * @param[in] name The name of the database to open. If only a single
785 * database is needed in the environment, this value may be NULL.
786 * @param[in] flags Special options for this database. This parameter
787 * must be set to 0 or by bitwise OR'ing together one or more of the
788 * values described here.
789 * <ul>
790 * <li>#MDB_REVERSEKEY
791 * Keys are strings to be compared in reverse order, from the end
792 * of the strings to the beginning. By default, Keys are treated as strings and
793 * compared from beginning to end.
794 * <li>#MDB_DUPSORT
795 * Duplicate keys may be used in the database. (Or, from another perspective,
796 * keys may have multiple data items, stored in sorted order.) By default
797 * keys must be unique and may have only a single data item.
798 * <li>#MDB_INTEGERKEY
799 * Keys are binary integers in native byte order. Setting this option
800 * requires all keys to be the same size, typically sizeof(int)
801 * or sizeof(size_t).
802 * <li>#MDB_DUPFIXED
803 * This flag may only be used in combination with #MDB_DUPSORT. This option
804 * tells the library that the data items for this database are all the same
805 * size, which allows further optimizations in storage and retrieval. When
806 * all data items are the same size, the #MDB_GET_MULTIPLE and #MDB_NEXT_MULTIPLE
807 * cursor operations may be used to retrieve multiple items at once.
808 * <li>#MDB_INTEGERDUP
809 * This option specifies that duplicate data items are also integers, and
810 * should be sorted as such.
811 * <li>#MDB_REVERSEDUP
812 * This option specifies that duplicate data items should be compared as
813 * strings in reverse order.
814 * <li>#MDB_CREATE
815 * Create the named database if it doesn't exist. This option is not
816 * allowed in a read-only transaction or a read-only environment.
817 * </ul>
818 * @param[out] dbi Address where the new #MDB_dbi handle will be stored
819 * @return A non-zero error value on failure and 0 on success. Some possible
820 * errors are:
821 * <ul>
822 * <li>#MDB_NOTFOUND - the specified database doesn't exist in the environment
823 * and #MDB_CREATE was not specified.
824 * <li>#MDB_DBS_FULL - too many databases have been opened. See #mdb_env_set_maxdbs().
825 * </ul>
826 */
827 int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi);
828
829 /** @brief Retrieve statistics for a database.
830 *
831 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
832 * @param[in] dbi A database handle returned by #mdb_dbi_open()
833 * @param[out] stat The address of an #MDB_stat structure
834 * where the statistics will be copied
835 * @return A non-zero error value on failure and 0 on success. Some possible
836 * errors are:
837 * <ul>
838 * <li>EINVAL - an invalid parameter was specified.
839 * </ul>
840 */
841 int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat);
842
843 /** @brief Close a database handle.
844 *
845 * This call is not mutex protected. Handles should only be closed by
846 * a single thread, and only if no other threads are going to reference
847 * the database handle or one of its cursors any further. Do not close
848 * a handle if an existing transaction has modified its database.
849 * @param[in] env An environment handle returned by #mdb_env_create()
850 * @param[in] dbi A database handle returned by #mdb_dbi_open()
851 */
852 void mdb_dbi_close(MDB_env *env, MDB_dbi dbi);
853
854 /** @brief Delete a database and/or free all its pages.
855 *
856 * If the \b del parameter is 1, the DB handle will be closed
857 * and the DB will be deleted.
858 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
859 * @param[in] dbi A database handle returned by #mdb_dbi_open()
860 * @param[in] del 1 to delete the DB from the environment,
861 * 0 to just free its pages.
862 * @return A non-zero error value on failure and 0 on success.
863 */
864 int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del);
865
866 /** @brief Set a custom key comparison function for a database.
867 *
868 * The comparison function is called whenever it is necessary to compare a
869 * key specified by the application with a key currently stored in the database.
870 * If no comparison function is specified, and no special key flags were specified
871 * with #mdb_dbi_open(), the keys are compared lexically, with shorter keys collating
872 * before longer keys.
873 * @warning This function must be called before any data access functions are used,
874 * otherwise data corruption may occur. The same comparison function must be used by every
875 * program accessing the database, every time the database is used.
876 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
877 * @param[in] dbi A database handle returned by #mdb_dbi_open()
878 * @param[in] cmp A #MDB_cmp_func function
879 * @return A non-zero error value on failure and 0 on success. Some possible
880 * errors are:
881 * <ul>
882 * <li>EINVAL - an invalid parameter was specified.
883 * </ul>
884 */
885 int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp);
886
887 /** @brief Set a custom data comparison function for a #MDB_DUPSORT database.
888 *
889 * This comparison function is called whenever it is necessary to compare a data
890 * item specified by the application with a data item currently stored in the database.
891 * This function only takes effect if the database was opened with the #MDB_DUPSORT
892 * flag.
893 * If no comparison function is specified, and no special key flags were specified
894 * with #mdb_dbi_open(), the data items are compared lexically, with shorter items collating
895 * before longer items.
896 * @warning This function must be called before any data access functions are used,
897 * otherwise data corruption may occur. The same comparison function must be used by every
898 * program accessing the database, every time the database is used.
899 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
900 * @param[in] dbi A database handle returned by #mdb_dbi_open()
901 * @param[in] cmp A #MDB_cmp_func function
902 * @return A non-zero error value on failure and 0 on success. Some possible
903 * errors are:
904 * <ul>
905 * <li>EINVAL - an invalid parameter was specified.
906 * </ul>
907 */
908 int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp);
909
910 /** @brief Set a relocation function for a #MDB_FIXEDMAP database.
911 *
912 * @todo The relocation function is called whenever it is necessary to move the data
913 * of an item to a different position in the database (e.g. through tree
914 * balancing operations, shifts as a result of adds or deletes, etc.). It is
915 * intended to allow address/position-dependent data items to be stored in
916 * a database in an environment opened with the #MDB_FIXEDMAP option.
917 * Currently the relocation feature is unimplemented and setting
918 * this function has no effect.
919 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
920 * @param[in] dbi A database handle returned by #mdb_dbi_open()
921 * @param[in] rel A #MDB_rel_func function
922 * @return A non-zero error value on failure and 0 on success. Some possible
923 * errors are:
924 * <ul>
925 * <li>EINVAL - an invalid parameter was specified.
926 * </ul>
927 */
928 int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel);
929
930 /** @brief Set a context pointer for a #MDB_FIXEDMAP database's relocation function.
931 *
932 * See #mdb_set_relfunc and #MDB_rel_func for more details.
933 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
934 * @param[in] dbi A database handle returned by #mdb_dbi_open()
935 * @param[in] ctx An arbitrary pointer for whatever the application needs.
936 * It will be passed to the callback function set by #mdb_set_relfunc
937 * as its \b relctx parameter whenever the callback is invoked.
938 * @return A non-zero error value on failure and 0 on success. Some possible
939 * errors are:
940 * <ul>
941 * <li>EINVAL - an invalid parameter was specified.
942 * </ul>
943 */
944 int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx);
945
946 /** @brief Get items from a database.
947 *
948 * This function retrieves key/data pairs from the database. The address
949 * and length of the data associated with the specified \b key are returned
950 * in the structure to which \b data refers.
951 * If the database supports duplicate keys (#MDB_DUPSORT) then the
952 * first data item for the key will be returned. Retrieval of other
953 * items requires the use of #mdb_cursor_get().
954 *
955 * @note The memory pointed to by the returned values is owned by the
956 * database. The caller need not dispose of the memory, and may not
957 * modify it in any way. For values returned in a read-only transaction
958 * any modification attempts will cause a SIGSEGV.
959 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
960 * @param[in] dbi A database handle returned by #mdb_dbi_open()
961 * @param[in] key The key to search for in the database
962 * @param[out] data The data corresponding to the key
963 * @return A non-zero error value on failure and 0 on success. Some possible
964 * errors are:
965 * <ul>
966 * <li>#MDB_NOTFOUND - the key was not in the database.
967 * <li>EINVAL - an invalid parameter was specified.
968 * </ul>
969 */
970 int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data);
971
972 /** @brief Store items into a database.
973 *
974 * This function stores key/data pairs in the database. The default behavior
975 * is to enter the new key/data pair, replacing any previously existing key
976 * if duplicates are disallowed, or adding a duplicate data item if
977 * duplicates are allowed (#MDB_DUPSORT).
978 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
979 * @param[in] dbi A database handle returned by #mdb_dbi_open()
980 * @param[in] key The key to store in the database
981 * @param[in,out] data The data to store
982 * @param[in] flags Special options for this operation. This parameter
983 * must be set to 0 or by bitwise OR'ing together one or more of the
984 * values described here.
985 * <ul>
986 * <li>#MDB_NODUPDATA - enter the new key/data pair only if it does not
987 * already appear in the database. This flag may only be specified
988 * if the database was opened with #MDB_DUPSORT. The function will
989 * return #MDB_KEYEXIST if the key/data pair already appears in the
990 * database.
991 * <li>#MDB_NOOVERWRITE - enter the new key/data pair only if the key
992 * does not already appear in the database. The function will return
993 * #MDB_KEYEXIST if the key already appears in the database, even if
994 * the database supports duplicates (#MDB_DUPSORT). The \b data
995 * parameter will be set to point to the existing item.
996 * <li>#MDB_RESERVE - reserve space for data of the given size, but
997 * don't copy the given data. Instead, return a pointer to the
998 * reserved space, which the caller can fill in later. This saves
999 * an extra memcpy if the data is being generated later.
1000 * <li>#MDB_APPEND - append the given key/data pair to the end of the
1001 * database. No key comparisons are performed. This option allows
1002 * fast bulk loading when keys are already known to be in the
1003 * correct order. Loading unsorted keys with this flag will cause
1004 * data corruption.
1005 * <li>#MDB_APPENDDUP - as above, but for sorted dup data.
1006 * </ul>
1007 * @return A non-zero error value on failure and 0 on success. Some possible
1008 * errors are:
1009 * <ul>
1010 * <li>#MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize().
1011 * <li>#MDB_TXN_FULL - the transaction has too many dirty pages.
1012 * <li>EACCES - an attempt was made to write in a read-only transaction.
1013 * <li>EINVAL - an invalid parameter was specified.
1014 * </ul>
1015 */
1016 int mdb_put(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data,
1017 unsigned int flags);
1018
1019 /** @brief Delete items from a database.
1020 *
1021 * This function removes key/data pairs from the database.
1022 * If the database does not support sorted duplicate data items
1023 * (#MDB_DUPSORT) the data parameter is ignored.
1024 * If the database supports sorted duplicates and the data parameter
1025 * is NULL, all of the duplicate data items for the key will be
1026 * deleted. Otherwise, if the data parameter is non-NULL
1027 * only the matching data item will be deleted.
1028 * This function will return #MDB_NOTFOUND if the specified key/data
1029 * pair is not in the database.
1030 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
1031 * @param[in] dbi A database handle returned by #mdb_dbi_open()
1032 * @param[in] key The key to delete from the database
1033 * @param[in] data The data to delete
1034 * @return A non-zero error value on failure and 0 on success. Some possible
1035 * errors are:
1036 * <ul>
1037 * <li>EACCES - an attempt was made to write in a read-only transaction.
1038 * <li>EINVAL - an invalid parameter was specified.
1039 * </ul>
1040 */
1041 int mdb_del(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data);
1042
1043 /** @brief Create a cursor handle.
1044 *
1045 * Cursors are associated with a specific transaction and database and
1046 * may not span threads.
1047 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
1048 * @param[in] dbi A database handle returned by #mdb_dbi_open()
1049 * @param[out] cursor Address where the new #MDB_cursor handle will be stored
1050 * @return A non-zero error value on failure and 0 on success. Some possible
1051 * errors are:
1052 * <ul>
1053 * <li>EINVAL - an invalid parameter was specified.
1054 * </ul>
1055 */
1056 int mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **cursor);
1057
1058 /** @brief Close a cursor handle.
1059 *
1060 * The cursor handle will be freed and must not be used again after this call.
1061 * @param[in] cursor A cursor handle returned by #mdb_cursor_open()
1062 */
1063 void mdb_cursor_close(MDB_cursor *cursor);
1064
1065 /** @brief Renew a cursor handle.
1066 *
1067 * Cursors are associated with a specific transaction and database and
1068 * may not span threads. Cursors that are only used in read-only
1069 * transactions may be re-used, to avoid unnecessary malloc/free overhead.
1070 * The cursor may be associated with a new read-only transaction, and
1071 * referencing the same database handle as it was created with.
1072 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
1073 * @param[in] cursor A cursor handle returned by #mdb_cursor_open()
1074 * @return A non-zero error value on failure and 0 on success. Some possible
1075 * errors are:
1076 * <ul>
1077 * <li>EINVAL - an invalid parameter was specified.
1078 * </ul>
1079 */
1080 int mdb_cursor_renew(MDB_txn *txn, MDB_cursor *cursor);
1081
1082 /** @brief Return the cursor's transaction handle.
1083 *
1084 * @param[in] cursor A cursor handle returned by #mdb_cursor_open()
1085 */
1086 MDB_txn *mdb_cursor_txn(MDB_cursor *cursor);
1087
1088 /** @brief Return the cursor's database handle.
1089 *
1090 * @param[in] cursor A cursor handle returned by #mdb_cursor_open()
1091 */
1092 MDB_dbi mdb_cursor_dbi(MDB_cursor *cursor);
1093
1094 /** @brief Retrieve by cursor.
1095 *
1096 * This function retrieves key/data pairs from the database. The address and length
1097 * of the key are returned in the object to which \b key refers (except for the
1098 * case of the #MDB_SET option, in which the \b key object is unchanged), and
1099 * the address and length of the data are returned in the object to which \b data
1100 * refers.
1101 * @param[in] cursor A cursor handle returned by #mdb_cursor_open()
1102 * @param[in,out] key The key for a retrieved item
1103 * @param[in,out] data The data of a retrieved item
1104 * @param[in] op A cursor operation #MDB_cursor_op
1105 * @return A non-zero error value on failure and 0 on success. Some possible
1106 * errors are:
1107 * <ul>
1108 * <li>#MDB_NOTFOUND - no matching key found.
1109 * <li>EINVAL - an invalid parameter was specified.
1110 * </ul>
1111 */
1112 int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
1113 MDB_cursor_op op);
1114
1115 /** @brief Store by cursor.
1116 *
1117 * This function stores key/data pairs into the database.
1118 * If the function fails for any reason, the state of the cursor will be
1119 * unchanged. If the function succeeds and an item is inserted into the
1120 * database, the cursor is always positioned to refer to the newly inserted item.
1121 * @param[in] cursor A cursor handle returned by #mdb_cursor_open()
1122 * @param[in] key The key operated on.
1123 * @param[in] data The data operated on.
1124 * @param[in] flags Options for this operation. This parameter
1125 * must be set to 0 or one of the values described here.
1126 * <ul>
1127 * <li>#MDB_CURRENT - overwrite the data of the key/data pair to which
1128 * the cursor refers with the specified data item. The \b key
1129 * parameter is ignored.
1130 * <li>#MDB_NODUPDATA - enter the new key/data pair only if it does not
1131 * already appear in the database. This flag may only be specified
1132 * if the database was opened with #MDB_DUPSORT. The function will
1133 * return #MDB_KEYEXIST if the key/data pair already appears in the
1134 * database.
1135 * <li>#MDB_NOOVERWRITE - enter the new key/data pair only if the key
1136 * does not already appear in the database. The function will return
1137 * #MDB_KEYEXIST if the key already appears in the database, even if
1138 * the database supports duplicates (#MDB_DUPSORT).
1139 * <li>#MDB_RESERVE - reserve space for data of the given size, but
1140 * don't copy the given data. Instead, return a pointer to the
1141 * reserved space, which the caller can fill in later. This saves
1142 * an extra memcpy if the data is being generated later.
1143 * <li>#MDB_APPEND - append the given key/data pair to the end of the
1144 * database. No key comparisons are performed. This option allows
1145 * fast bulk loading when keys are already known to be in the
1146 * correct order. Loading unsorted keys with this flag will cause
1147 * data corruption.
1148 * <li>#MDB_APPENDDUP - as above, but for sorted dup data.
1149 * </ul>
1150 * @return A non-zero error value on failure and 0 on success. Some possible
1151 * errors are:
1152 * <ul>
1153 * <li>#MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize().
1154 * <li>#MDB_TXN_FULL - the transaction has too many dirty pages.
1155 * <li>EACCES - an attempt was made to modify a read-only database.
1156 * <li>EINVAL - an invalid parameter was specified.
1157 * </ul>
1158 */
1159 int mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
1160 unsigned int flags);
1161
1162 /** @brief Delete current key/data pair
1163 *
1164 * This function deletes the key/data pair to which the cursor refers.
1165 * @param[in] cursor A cursor handle returned by #mdb_cursor_open()
1166 * @param[in] flags Options for this operation. This parameter
1167 * must be set to 0 or one of the values described here.
1168 * <ul>
1169 * <li>#MDB_NODUPDATA - delete all of the data items for the current key.
1170 * This flag may only be specified if the database was opened with #MDB_DUPSORT.
1171 * </ul>
1172 * @return A non-zero error value on failure and 0 on success. Some possible
1173 * errors are:
1174 * <ul>
1175 * <li>EACCES - an attempt was made to modify a read-only database.
1176 * <li>EINVAL - an invalid parameter was specified.
1177 * </ul>
1178 */
1179 int mdb_cursor_del(MDB_cursor *cursor, unsigned int flags);
1180
1181 /** @brief Return count of duplicates for current key.
1182 *
1183 * This call is only valid on databases that support sorted duplicate
1184 * data items #MDB_DUPSORT.
1185 * @param[in] cursor A cursor handle returned by #mdb_cursor_open()
1186 * @param[out] countp Address where the count will be stored
1187 * @return A non-zero error value on failure and 0 on success. Some possible
1188 * errors are:
1189 * <ul>
1190 * <li>EINVAL - cursor is not initialized, or an invalid parameter was specified.
1191 * </ul>
1192 */
1193 int mdb_cursor_count(MDB_cursor *cursor, size_t *countp);
1194
1195 /** @brief Compare two data items according to a particular database.
1196 *
1197 * This returns a comparison as if the two data items were keys in the
1198 * specified database.
1199 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
1200 * @param[in] dbi A database handle returned by #mdb_dbi_open()
1201 * @param[in] a The first item to compare
1202 * @param[in] b The second item to compare
1203 * @return < 0 if a < b, 0 if a == b, > 0 if a > b
1204 */
1205 int mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b);
1206
1207 /** @brief Compare two data items according to a particular database.
1208 *
1209 * This returns a comparison as if the two items were data items of
1210 * a sorted duplicates #MDB_DUPSORT database.
1211 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
1212 * @param[in] dbi A database handle returned by #mdb_dbi_open()
1213 * @param[in] a The first item to compare
1214 * @param[in] b The second item to compare
1215 * @return < 0 if a < b, 0 if a == b, > 0 if a > b
1216 */
1217 int mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b);
1218 /** @} */
1219
1220 #ifdef __cplusplus
1221 }
1222 #endif
1223 #endif /* _LMDB_H_ */
0 /** @file mdb.c
1 * @brief memory-mapped database library
2 *
3 * A Btree-based database management library modeled loosely on the
4 * BerkeleyDB API, but much simplified.
5 */
6 /*
7 * Copyright 2011-2013 Howard Chu, Symas Corp.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted only as authorized by the OpenLDAP
12 * Public License.
13 *
14 * A copy of this license is available in the file LICENSE in the
15 * top-level directory of the distribution or, alternatively, at
16 * <http://www.OpenLDAP.org/license.html>.
17 *
18 * This code is derived from btree.c written by Martin Hedenfalk.
19 *
20 * Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se>
21 *
22 * Permission to use, copy, modify, and distribute this software for any
23 * purpose with or without fee is hereby granted, provided that the above
24 * copyright notice and this permission notice appear in all copies.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
27 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
28 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
29 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
30 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
31 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
32 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
33 */
34 #define _GNU_SOURCE 1
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/param.h>
38 #ifdef _WIN32
39 #include <windows.h>
40 #else
41 #include <sys/uio.h>
42 #include <sys/mman.h>
43 #ifdef HAVE_SYS_FILE_H
44 #include <sys/file.h>
45 #endif
46 #include <fcntl.h>
47 #endif
48
49 #include <assert.h>
50 #include <errno.h>
51 #include <limits.h>
52 #include <stddef.h>
53 #include <inttypes.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <time.h>
58 #include <unistd.h>
59
60 #if !(defined(BYTE_ORDER) || defined(__BYTE_ORDER))
61 #include <resolv.h> /* defines BYTE_ORDER on HPUX and Solaris */
62 #endif
63
64 #if defined(__APPLE__) || defined (BSD)
65 # define MDB_USE_POSIX_SEM 1
66 # define MDB_FDATASYNC fsync
67 #elif defined(ANDROID)
68 # define MDB_FDATASYNC fsync
69 #endif
70
71 #ifndef _WIN32
72 #include <pthread.h>
73 #ifdef MDB_USE_POSIX_SEM
74 #include <semaphore.h>
75 #endif
76 #endif
77
78 #ifdef USE_VALGRIND
79 #include <valgrind/memcheck.h>
80 #define VGMEMP_CREATE(h,r,z) VALGRIND_CREATE_MEMPOOL(h,r,z)
81 #define VGMEMP_ALLOC(h,a,s) VALGRIND_MEMPOOL_ALLOC(h,a,s)
82 #define VGMEMP_FREE(h,a) VALGRIND_MEMPOOL_FREE(h,a)
83 #define VGMEMP_DESTROY(h) VALGRIND_DESTROY_MEMPOOL(h)
84 #define VGMEMP_DEFINED(a,s) VALGRIND_MAKE_MEM_DEFINED(a,s)
85 #else
86 #define VGMEMP_CREATE(h,r,z)
87 #define VGMEMP_ALLOC(h,a,s)
88 #define VGMEMP_FREE(h,a)
89 #define VGMEMP_DESTROY(h)
90 #define VGMEMP_DEFINED(a,s)
91 #endif
92
93 #ifndef BYTE_ORDER
94 # if (defined(_LITTLE_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN))
95 /* Solaris just defines one or the other */
96 # define LITTLE_ENDIAN 1234
97 # define BIG_ENDIAN 4321
98 # ifdef _LITTLE_ENDIAN
99 # define BYTE_ORDER LITTLE_ENDIAN
100 # else
101 # define BYTE_ORDER BIG_ENDIAN
102 # endif
103 # else
104 # define BYTE_ORDER __BYTE_ORDER
105 # endif
106 #endif
107
108 #ifndef LITTLE_ENDIAN
109 #define LITTLE_ENDIAN __LITTLE_ENDIAN
110 #endif
111 #ifndef BIG_ENDIAN
112 #define BIG_ENDIAN __BIG_ENDIAN
113 #endif
114
115 #if defined(__i386) || defined(__x86_64)
116 #define MISALIGNED_OK 1
117 #endif
118
119 #include "lmdb.h"
120 #include "midl.h"
121
122 #if (BYTE_ORDER == LITTLE_ENDIAN) == (BYTE_ORDER == BIG_ENDIAN)
123 # error "Unknown or unsupported endianness (BYTE_ORDER)"
124 #elif (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF
125 # error "Two's complement, reasonably sized integer types, please"
126 #endif
127
128 /** @defgroup internal MDB Internals
129 * @{
130 */
131 /** @defgroup compat Windows Compatibility Macros
132 * A bunch of macros to minimize the amount of platform-specific ifdefs
133 * needed throughout the rest of the code. When the features this library
134 * needs are similar enough to POSIX to be hidden in a one-or-two line
135 * replacement, this macro approach is used.
136 * @{
137 */
138 #ifdef _WIN32
139 #define pthread_t DWORD
140 #define pthread_mutex_t HANDLE
141 #define pthread_key_t DWORD
142 #define pthread_self() GetCurrentThreadId()
143 #define pthread_key_create(x,y) \
144 ((*(x) = TlsAlloc()) == TLS_OUT_OF_INDEXES ? ErrCode() : 0)
145 #define pthread_key_delete(x) TlsFree(x)
146 #define pthread_getspecific(x) TlsGetValue(x)
147 #define pthread_setspecific(x,y) (TlsSetValue(x,y) ? 0 : ErrCode())
148 #define pthread_mutex_unlock(x) ReleaseMutex(x)
149 #define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE)
150 #define LOCK_MUTEX_R(env) pthread_mutex_lock((env)->me_rmutex)
151 #define UNLOCK_MUTEX_R(env) pthread_mutex_unlock((env)->me_rmutex)
152 #define LOCK_MUTEX_W(env) pthread_mutex_lock((env)->me_wmutex)
153 #define UNLOCK_MUTEX_W(env) pthread_mutex_unlock((env)->me_wmutex)
154 #define getpid() GetCurrentProcessId()
155 #define MDB_FDATASYNC(fd) (!FlushFileBuffers(fd))
156 #define MDB_MSYNC(addr,len,flags) (!FlushViewOfFile(addr,len))
157 #define ErrCode() GetLastError()
158 #define GET_PAGESIZE(x) {SYSTEM_INFO si; GetSystemInfo(&si); (x) = si.dwPageSize;}
159 #define close(fd) CloseHandle(fd)
160 #define munmap(ptr,len) UnmapViewOfFile(ptr)
161 #else
162
163 #ifdef MDB_USE_POSIX_SEM
164
165 #define LOCK_MUTEX_R(env) mdb_sem_wait((env)->me_rmutex)
166 #define UNLOCK_MUTEX_R(env) sem_post((env)->me_rmutex)
167 #define LOCK_MUTEX_W(env) mdb_sem_wait((env)->me_wmutex)
168 #define UNLOCK_MUTEX_W(env) sem_post((env)->me_wmutex)
169
170 static int
171 mdb_sem_wait(sem_t *sem)
172 {
173 int rc;
174 while ((rc = sem_wait(sem)) && (rc = errno) == EINTR) ;
175 return rc;
176 }
177
178 #else
179 /** Lock the reader mutex.
180 */
181 #define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_txns->mti_mutex)
182 /** Unlock the reader mutex.
183 */
184 #define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_txns->mti_mutex)
185
186 /** Lock the writer mutex.
187 * Only a single write transaction is allowed at a time. Other writers
188 * will block waiting for this mutex.
189 */
190 #define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_txns->mti_wmutex)
191 /** Unlock the writer mutex.
192 */
193 #define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_txns->mti_wmutex)
194 #endif /* MDB_USE_POSIX_SEM */
195
196 /** Get the error code for the last failed system function.
197 */
198 #define ErrCode() errno
199
200 /** An abstraction for a file handle.
201 * On POSIX systems file handles are small integers. On Windows
202 * they're opaque pointers.
203 */
204 #define HANDLE int
205
206 /** A value for an invalid file handle.
207 * Mainly used to initialize file variables and signify that they are
208 * unused.
209 */
210 #define INVALID_HANDLE_VALUE (-1)
211
212 /** Get the size of a memory page for the system.
213 * This is the basic size that the platform's memory manager uses, and is
214 * fundamental to the use of memory-mapped files.
215 */
216 #define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE))
217 #endif
218
219 #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
220 #define MNAME_LEN 32
221 #else
222 #define MNAME_LEN (sizeof(pthread_mutex_t))
223 #endif
224
225 /** @} */
226
227 #ifndef _WIN32
228 /** A flag for opening a file and requesting synchronous data writes.
229 * This is only used when writing a meta page. It's not strictly needed;
230 * we could just do a normal write and then immediately perform a flush.
231 * But if this flag is available it saves us an extra system call.
232 *
233 * @note If O_DSYNC is undefined but exists in /usr/include,
234 * preferably set some compiler flag to get the definition.
235 * Otherwise compile with the less efficient -DMDB_DSYNC=O_SYNC.
236 */
237 #ifndef MDB_DSYNC
238 # define MDB_DSYNC O_DSYNC
239 #endif
240 #endif
241
242 /** Function for flushing the data of a file. Define this to fsync
243 * if fdatasync() is not supported.
244 */
245 #ifndef MDB_FDATASYNC
246 # define MDB_FDATASYNC fdatasync
247 #endif
248
249 #ifndef MDB_MSYNC
250 # define MDB_MSYNC(addr,len,flags) msync(addr,len,flags)
251 #endif
252
253 #ifndef MS_SYNC
254 #define MS_SYNC 1
255 #endif
256
257 #ifndef MS_ASYNC
258 #define MS_ASYNC 0
259 #endif
260
261 /** A page number in the database.
262 * Note that 64 bit page numbers are overkill, since pages themselves
263 * already represent 12-13 bits of addressable memory, and the OS will
264 * always limit applications to a maximum of 63 bits of address space.
265 *
266 * @note In the #MDB_node structure, we only store 48 bits of this value,
267 * which thus limits us to only 60 bits of addressable data.
268 */
269 typedef MDB_ID pgno_t;
270
271 /** A transaction ID.
272 * See struct MDB_txn.mt_txnid for details.
273 */
274 typedef MDB_ID txnid_t;
275
276 /** @defgroup debug Debug Macros
277 * @{
278 */
279 #ifndef MDB_DEBUG
280 /** Enable debug output.
281 * Set this to 1 for copious tracing. Set to 2 to add dumps of all IDLs
282 * read from and written to the database (used for free space management).
283 */
284 #define MDB_DEBUG 0
285 #endif
286
287 #if !(__STDC_VERSION__ >= 199901L || defined(__GNUC__))
288 # undef MDB_DEBUG
289 # define MDB_DEBUG 0
290 # define DPRINTF (void) /* Vararg macros may be unsupported */
291 #elif MDB_DEBUG
292 static int mdb_debug;
293 static txnid_t mdb_debug_start;
294
295 /** Print a debug message with printf formatting. */
296 # define DPRINTF(fmt, ...) /**< Requires 2 or more args */ \
297 ((void) ((mdb_debug) && \
298 fprintf(stderr, "%s:%d " fmt "\n", __func__, __LINE__, __VA_ARGS__)))
299 #else
300 # define DPRINTF(fmt, ...) ((void) 0)
301 # define MDB_DEBUG_SKIP
302 #endif
303 /** Print a debug string.
304 * The string is printed literally, with no format processing.
305 */
306 #define DPUTS(arg) DPRINTF("%s", arg)
307 /** @} */
308
309 /** A default memory page size.
310 * The actual size is platform-dependent, but we use this for
311 * boot-strapping. We probably should not be using this any more.
312 * The #GET_PAGESIZE() macro is used to get the actual size.
313 *
314 * Note that we don't currently support Huge pages. On Linux,
315 * regular data files cannot use Huge pages, and in general
316 * Huge pages aren't actually pageable. We rely on the OS
317 * demand-pager to read our data and page it out when memory
318 * pressure from other processes is high. So until OSs have
319 * actual paging support for Huge pages, they're not viable.
320 */
321 #define MDB_PAGESIZE 4096
322
323 /** The minimum number of keys required in a database page.
324 * Setting this to a larger value will place a smaller bound on the
325 * maximum size of a data item. Data items larger than this size will
326 * be pushed into overflow pages instead of being stored directly in
327 * the B-tree node. This value used to default to 4. With a page size
328 * of 4096 bytes that meant that any item larger than 1024 bytes would
329 * go into an overflow page. That also meant that on average 2-3KB of
330 * each overflow page was wasted space. The value cannot be lower than
331 * 2 because then there would no longer be a tree structure. With this
332 * value, items larger than 2KB will go into overflow pages, and on
333 * average only 1KB will be wasted.
334 */
335 #define MDB_MINKEYS 2
336
337 /** A stamp that identifies a file as an MDB file.
338 * There's nothing special about this value other than that it is easily
339 * recognizable, and it will reflect any byte order mismatches.
340 */
341 #define MDB_MAGIC 0xBEEFC0DE
342
343 /** The version number for a database's file format. */
344 #define MDB_VERSION 1
345
346 /** @brief The maximum size of a key in the database.
347 *
348 * We require that keys all fit onto a regular page. This limit
349 * could be raised a bit further if needed; to something just
350 * under #MDB_PAGESIZE / #MDB_MINKEYS.
351 *
352 * Note that data items in an #MDB_DUPSORT database are actually keys
353 * of a subDB, so they're also limited to this size.
354 */
355 #ifndef MDB_MAXKEYSIZE
356 #define MDB_MAXKEYSIZE 511
357 #endif
358
359 /** @brief The maximum size of a data item.
360 *
361 * We only store a 32 bit value for node sizes.
362 */
363 #define MAXDATASIZE 0xffffffffUL
364
365 #if MDB_DEBUG
366 /** A key buffer.
367 * @ingroup debug
368 * This is used for printing a hex dump of a key's contents.
369 */
370 #define DKBUF char kbuf[(MDB_MAXKEYSIZE*2+1)]
371 /** Display a key in hex.
372 * @ingroup debug
373 * Invoke a function to display a key in hex.
374 */
375 #define DKEY(x) mdb_dkey(x, kbuf)
376 #else
377 #define DKBUF typedef int dummy_kbuf /* so we can put ';' after */
378 #define DKEY(x) 0
379 #endif
380
381 /** An invalid page number.
382 * Mainly used to denote an empty tree.
383 */
384 #define P_INVALID (~(pgno_t)0)
385
386 /** Test if the flags \b f are set in a flag word \b w. */
387 #define F_ISSET(w, f) (((w) & (f)) == (f))
388
389 /** Used for offsets within a single page.
390 * Since memory pages are typically 4 or 8KB in size, 12-13 bits,
391 * this is plenty.
392 */
393 typedef uint16_t indx_t;
394
395 /** Default size of memory map.
396 * This is certainly too small for any actual applications. Apps should always set
397 * the size explicitly using #mdb_env_set_mapsize().
398 */
399 #define DEFAULT_MAPSIZE 1048576
400
401 /** @defgroup readers Reader Lock Table
402 * Readers don't acquire any locks for their data access. Instead, they
403 * simply record their transaction ID in the reader table. The reader
404 * mutex is needed just to find an empty slot in the reader table. The
405 * slot's address is saved in thread-specific data so that subsequent read
406 * transactions started by the same thread need no further locking to proceed.
407 *
408 * No reader table is used if the database is on a read-only filesystem.
409 *
410 * Since the database uses multi-version concurrency control, readers don't
411 * actually need any locking. This table is used to keep track of which
412 * readers are using data from which old transactions, so that we'll know
413 * when a particular old transaction is no longer in use. Old transactions
414 * that have discarded any data pages can then have those pages reclaimed
415 * for use by a later write transaction.
416 *
417 * The lock table is constructed such that reader slots are aligned with the
418 * processor's cache line size. Any slot is only ever used by one thread.
419 * This alignment guarantees that there will be no contention or cache
420 * thrashing as threads update their own slot info, and also eliminates
421 * any need for locking when accessing a slot.
422 *
423 * A writer thread will scan every slot in the table to determine the oldest
424 * outstanding reader transaction. Any freed pages older than this will be
425 * reclaimed by the writer. The writer doesn't use any locks when scanning
426 * this table. This means that there's no guarantee that the writer will
427 * see the most up-to-date reader info, but that's not required for correct
428 * operation - all we need is to know the upper bound on the oldest reader,
429 * we don't care at all about the newest reader. So the only consequence of
430 * reading stale information here is that old pages might hang around a
431 * while longer before being reclaimed. That's actually good anyway, because
432 * the longer we delay reclaiming old pages, the more likely it is that a
433 * string of contiguous pages can be found after coalescing old pages from
434 * many old transactions together.
435 * @{
436 */
437 /** Number of slots in the reader table.
438 * This value was chosen somewhat arbitrarily. 126 readers plus a
439 * couple mutexes fit exactly into 8KB on my development machine.
440 * Applications should set the table size using #mdb_env_set_maxreaders().
441 */
442 #define DEFAULT_READERS 126
443
444 /** The size of a CPU cache line in bytes. We want our lock structures
445 * aligned to this size to avoid false cache line sharing in the
446 * lock table.
447 * This value works for most CPUs. For Itanium this should be 128.
448 */
449 #ifndef CACHELINE
450 #define CACHELINE 64
451 #endif
452
453 /** The information we store in a single slot of the reader table.
454 * In addition to a transaction ID, we also record the process and
455 * thread ID that owns a slot, so that we can detect stale information,
456 * e.g. threads or processes that went away without cleaning up.
457 * @note We currently don't check for stale records. We simply re-init
458 * the table when we know that we're the only process opening the
459 * lock file.
460 */
461 typedef struct MDB_rxbody {
462 /** Current Transaction ID when this transaction began, or (txnid_t)-1.
463 * Multiple readers that start at the same time will probably have the
464 * same ID here. Again, it's not important to exclude them from
465 * anything; all we need to know is which version of the DB they
466 * started from so we can avoid overwriting any data used in that
467 * particular version.
468 */
469 txnid_t mrb_txnid;
470 /** The process ID of the process owning this reader txn. */
471 pid_t mrb_pid;
472 /** The thread ID of the thread owning this txn. */
473 pthread_t mrb_tid;
474 } MDB_rxbody;
475
476 /** The actual reader record, with cacheline padding. */
477 typedef struct MDB_reader {
478 union {
479 MDB_rxbody mrx;
480 /** shorthand for mrb_txnid */
481 #define mr_txnid mru.mrx.mrb_txnid
482 #define mr_pid mru.mrx.mrb_pid
483 #define mr_tid mru.mrx.mrb_tid
484 /** cache line alignment */
485 char pad[(sizeof(MDB_rxbody)+CACHELINE-1) & ~(CACHELINE-1)];
486 } mru;
487 } MDB_reader;
488
489 /** The header for the reader table.
490 * The table resides in a memory-mapped file. (This is a different file
491 * than is used for the main database.)
492 *
493 * For POSIX the actual mutexes reside in the shared memory of this
494 * mapped file. On Windows, mutexes are named objects allocated by the
495 * kernel; we store the mutex names in this mapped file so that other
496 * processes can grab them. This same approach is also used on
497 * MacOSX/Darwin (using named semaphores) since MacOSX doesn't support
498 * process-shared POSIX mutexes. For these cases where a named object
499 * is used, the object name is derived from a 64 bit FNV hash of the
500 * environment pathname. As such, naming collisions are extremely
501 * unlikely. If a collision occurs, the results are unpredictable.
502 */
503 typedef struct MDB_txbody {
504 /** Stamp identifying this as an MDB file. It must be set
505 * to #MDB_MAGIC. */
506 uint32_t mtb_magic;
507 /** Version number of this lock file. Must be set to #MDB_VERSION. */
508 uint32_t mtb_version;
509 #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
510 char mtb_rmname[MNAME_LEN];
511 #else
512 /** Mutex protecting access to this table.
513 * This is the reader lock that #LOCK_MUTEX_R acquires.
514 */
515 pthread_mutex_t mtb_mutex;
516 #endif
517 /** The ID of the last transaction committed to the database.
518 * This is recorded here only for convenience; the value can always
519 * be determined by reading the main database meta pages.
520 */
521 txnid_t mtb_txnid;
522 /** The number of slots that have been used in the reader table.
523 * This always records the maximum count, it is not decremented
524 * when readers release their slots.
525 */
526 unsigned mtb_numreaders;
527 } MDB_txbody;
528
529 /** The actual reader table definition. */
530 typedef struct MDB_txninfo {
531 union {
532 MDB_txbody mtb;
533 #define mti_magic mt1.mtb.mtb_magic
534 #define mti_version mt1.mtb.mtb_version
535 #define mti_mutex mt1.mtb.mtb_mutex
536 #define mti_rmname mt1.mtb.mtb_rmname
537 #define mti_txnid mt1.mtb.mtb_txnid
538 #define mti_numreaders mt1.mtb.mtb_numreaders
539 char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)];
540 } mt1;
541 union {
542 #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
543 char mt2_wmname[MNAME_LEN];
544 #define mti_wmname mt2.mt2_wmname
545 #else
546 pthread_mutex_t mt2_wmutex;
547 #define mti_wmutex mt2.mt2_wmutex
548 #endif
549 char pad[(MNAME_LEN+CACHELINE-1) & ~(CACHELINE-1)];
550 } mt2;
551 MDB_reader mti_readers[1];
552 } MDB_txninfo;
553 /** @} */
554
555 /** Common header for all page types.
556 * Overflow records occupy a number of contiguous pages with no
557 * headers on any page after the first.
558 */
559 typedef struct MDB_page {
560 #define mp_pgno mp_p.p_pgno
561 #define mp_next mp_p.p_next
562 union {
563 pgno_t p_pgno; /**< page number */
564 void * p_next; /**< for in-memory list of freed structs */
565 } mp_p;
566 uint16_t mp_pad;
567 /** @defgroup mdb_page Page Flags
568 * @ingroup internal
569 * Flags for the page headers.
570 * @{
571 */
572 #define P_BRANCH 0x01 /**< branch page */
573 #define P_LEAF 0x02 /**< leaf page */
574 #define P_OVERFLOW 0x04 /**< overflow page */
575 #define P_META 0x08 /**< meta page */
576 #define P_DIRTY 0x10 /**< dirty page */
577 #define P_LEAF2 0x20 /**< for #MDB_DUPFIXED records */
578 #define P_SUBP 0x40 /**< for #MDB_DUPSORT sub-pages */
579 /** @} */
580 uint16_t mp_flags; /**< @ref mdb_page */
581 #define mp_lower mp_pb.pb.pb_lower
582 #define mp_upper mp_pb.pb.pb_upper
583 #define mp_pages mp_pb.pb_pages
584 union {
585 struct {
586 indx_t pb_lower; /**< lower bound of free space */
587 indx_t pb_upper; /**< upper bound of free space */
588 } pb;
589 uint32_t pb_pages; /**< number of overflow pages */
590 } mp_pb;
591 indx_t mp_ptrs[1]; /**< dynamic size */
592 } MDB_page;
593
594 /** Size of the page header, excluding dynamic data at the end */
595 #define PAGEHDRSZ ((unsigned) offsetof(MDB_page, mp_ptrs))
596
597 /** Address of first usable data byte in a page, after the header */
598 #define METADATA(p) ((void *)((char *)(p) + PAGEHDRSZ))
599
600 /** Number of nodes on a page */
601 #define NUMKEYS(p) (((p)->mp_lower - PAGEHDRSZ) >> 1)
602
603 /** The amount of space remaining in the page */
604 #define SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower)
605
606 /** The percentage of space used in the page, in tenths of a percent. */
607 #define PAGEFILL(env, p) (1000L * ((env)->me_psize - PAGEHDRSZ - SIZELEFT(p)) / \
608 ((env)->me_psize - PAGEHDRSZ))
609 /** The minimum page fill factor, in tenths of a percent.
610 * Pages emptier than this are candidates for merging.
611 */
612 #define FILL_THRESHOLD 250
613
614 /** Test if a page is a leaf page */
615 #define IS_LEAF(p) F_ISSET((p)->mp_flags, P_LEAF)
616 /** Test if a page is a LEAF2 page */
617 #define IS_LEAF2(p) F_ISSET((p)->mp_flags, P_LEAF2)
618 /** Test if a page is a branch page */
619 #define IS_BRANCH(p) F_ISSET((p)->mp_flags, P_BRANCH)
620 /** Test if a page is an overflow page */
621 #define IS_OVERFLOW(p) F_ISSET((p)->mp_flags, P_OVERFLOW)
622 /** Test if a page is a sub page */
623 #define IS_SUBP(p) F_ISSET((p)->mp_flags, P_SUBP)
624
625 /** The number of overflow pages needed to store the given size. */
626 #define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1)
627
628 /** Header for a single key/data pair within a page.
629 * We guarantee 2-byte alignment for nodes.
630 */
631 typedef struct MDB_node {
632 /** lo and hi are used for data size on leaf nodes and for
633 * child pgno on branch nodes. On 64 bit platforms, flags
634 * is also used for pgno. (Branch nodes have no flags).
635 * They are in host byte order in case that lets some
636 * accesses be optimized into a 32-bit word access.
637 */
638 #define mn_lo mn_offset[BYTE_ORDER!=LITTLE_ENDIAN]
639 #define mn_hi mn_offset[BYTE_ORDER==LITTLE_ENDIAN] /**< part of dsize or pgno */
640 unsigned short mn_offset[2]; /**< storage for #mn_lo and #mn_hi */
641 /** @defgroup mdb_node Node Flags
642 * @ingroup internal
643 * Flags for node headers.
644 * @{
645 */
646 #define F_BIGDATA 0x01 /**< data put on overflow page */
647 #define F_SUBDATA 0x02 /**< data is a sub-database */
648 #define F_DUPDATA 0x04 /**< data has duplicates */
649
650 /** valid flags for #mdb_node_add() */
651 #define NODE_ADD_FLAGS (F_DUPDATA|F_SUBDATA|MDB_RESERVE|MDB_APPEND)
652
653 /** @} */
654 unsigned short mn_flags; /**< @ref mdb_node */
655 unsigned short mn_ksize; /**< key size */
656 char mn_data[1]; /**< key and data are appended here */
657 } MDB_node;
658
659 /** Size of the node header, excluding dynamic data at the end */
660 #define NODESIZE offsetof(MDB_node, mn_data)
661
662 /** Bit position of top word in page number, for shifting mn_flags */
663 #define PGNO_TOPWORD ((pgno_t)-1 > 0xffffffffu ? 32 : 0)
664
665 /** Size of a node in a branch page with a given key.
666 * This is just the node header plus the key, there is no data.
667 */
668 #define INDXSIZE(k) (NODESIZE + ((k) == NULL ? 0 : (k)->mv_size))
669
670 /** Size of a node in a leaf page with a given key and data.
671 * This is node header plus key plus data size.
672 */
673 #define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size)
674
675 /** Address of node \b i in page \b p */
676 #define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i]))
677
678 /** Address of the key for the node */
679 #define NODEKEY(node) (void *)((node)->mn_data)
680
681 /** Address of the data for a node */
682 #define NODEDATA(node) (void *)((char *)(node)->mn_data + (node)->mn_ksize)
683
684 /** Get the page number pointed to by a branch node */
685 #define NODEPGNO(node) \
686 ((node)->mn_lo | ((pgno_t) (node)->mn_hi << 16) | \
687 (PGNO_TOPWORD ? ((pgno_t) (node)->mn_flags << PGNO_TOPWORD) : 0))
688 /** Set the page number in a branch node */
689 #define SETPGNO(node,pgno) do { \
690 (node)->mn_lo = (pgno) & 0xffff; (node)->mn_hi = (pgno) >> 16; \
691 if (PGNO_TOPWORD) (node)->mn_flags = (pgno) >> PGNO_TOPWORD; } while(0)
692
693 /** Get the size of the data in a leaf node */
694 #define NODEDSZ(node) ((node)->mn_lo | ((unsigned)(node)->mn_hi << 16))
695 /** Set the size of the data for a leaf node */
696 #define SETDSZ(node,size) do { \
697 (node)->mn_lo = (size) & 0xffff; (node)->mn_hi = (size) >> 16;} while(0)
698 /** The size of a key in a node */
699 #define NODEKSZ(node) ((node)->mn_ksize)
700
701 /** Copy a page number from src to dst */
702 #ifdef MISALIGNED_OK
703 #define COPY_PGNO(dst,src) dst = src
704 #else
705 #if SIZE_MAX > 4294967295UL
706 #define COPY_PGNO(dst,src) do { \
707 unsigned short *s, *d; \
708 s = (unsigned short *)&(src); \
709 d = (unsigned short *)&(dst); \
710 *d++ = *s++; \
711 *d++ = *s++; \
712 *d++ = *s++; \
713 *d = *s; \
714 } while (0)
715 #else
716 #define COPY_PGNO(dst,src) do { \
717 unsigned short *s, *d; \
718 s = (unsigned short *)&(src); \
719 d = (unsigned short *)&(dst); \
720 *d++ = *s++; \
721 *d = *s; \
722 } while (0)
723 #endif
724 #endif
725 /** The address of a key in a LEAF2 page.
726 * LEAF2 pages are used for #MDB_DUPFIXED sorted-duplicate sub-DBs.
727 * There are no node headers, keys are stored contiguously.
728 */
729 #define LEAF2KEY(p, i, ks) ((char *)(p) + PAGEHDRSZ + ((i)*(ks)))
730
731 /** Set the \b node's key into \b key, if requested. */
732 #define MDB_GET_KEY(node, key) { if ((key) != NULL) { \
733 (key)->mv_size = NODEKSZ(node); (key)->mv_data = NODEKEY(node); } }
734
735 /** Information about a single database in the environment. */
736 typedef struct MDB_db {
737 uint32_t md_pad; /**< also ksize for LEAF2 pages */
738 uint16_t md_flags; /**< @ref mdb_dbi_open */
739 uint16_t md_depth; /**< depth of this tree */
740 pgno_t md_branch_pages; /**< number of internal pages */
741 pgno_t md_leaf_pages; /**< number of leaf pages */
742 pgno_t md_overflow_pages; /**< number of overflow pages */
743 size_t md_entries; /**< number of data items */
744 pgno_t md_root; /**< the root page of this tree */
745 } MDB_db;
746
747 /** mdb_dbi_open flags */
748 #define PERSISTENT_FLAGS 0x7fff
749 #define VALID_FLAGS (MDB_REVERSEKEY|MDB_DUPSORT|MDB_INTEGERKEY|MDB_DUPFIXED|\
750 MDB_INTEGERDUP|MDB_REVERSEDUP|MDB_CREATE)
751
752 /** Handle for the DB used to track free pages. */
753 #define FREE_DBI 0
754 /** Handle for the default DB. */
755 #define MAIN_DBI 1
756
757 /** Meta page content. */
758 typedef struct MDB_meta {
759 /** Stamp identifying this as an MDB file. It must be set
760 * to #MDB_MAGIC. */
761 uint32_t mm_magic;
762 /** Version number of this lock file. Must be set to #MDB_VERSION. */
763 uint32_t mm_version;
764 void *mm_address; /**< address for fixed mapping */
765 size_t mm_mapsize; /**< size of mmap region */
766 MDB_db mm_dbs[2]; /**< first is free space, 2nd is main db */
767 /** The size of pages used in this DB */
768 #define mm_psize mm_dbs[0].md_pad
769 /** Any persistent environment flags. @ref mdb_env */
770 #define mm_flags mm_dbs[0].md_flags
771 pgno_t mm_last_pg; /**< last used page in file */
772 txnid_t mm_txnid; /**< txnid that committed this page */
773 } MDB_meta;
774
775 /** Buffer for a stack-allocated dirty page.
776 * The members define size and alignment, and silence type
777 * aliasing warnings. They are not used directly; that could
778 * mean incorrectly using several union members in parallel.
779 */
780 typedef union MDB_pagebuf {
781 char mb_raw[MDB_PAGESIZE];
782 MDB_page mb_page;
783 struct {
784 char mm_pad[PAGEHDRSZ];
785 MDB_meta mm_meta;
786 } mb_metabuf;
787 } MDB_pagebuf;
788
789 /** Auxiliary DB info.
790 * The information here is mostly static/read-only. There is
791 * only a single copy of this record in the environment.
792 */
793 typedef struct MDB_dbx {
794 MDB_val md_name; /**< name of the database */
795 MDB_cmp_func *md_cmp; /**< function for comparing keys */
796 MDB_cmp_func *md_dcmp; /**< function for comparing data items */
797 MDB_rel_func *md_rel; /**< user relocate function */
798 void *md_relctx; /**< user-provided context for md_rel */
799 } MDB_dbx;
800
801 /** A database transaction.
802 * Every operation requires a transaction handle.
803 */
804 struct MDB_txn {
805 MDB_txn *mt_parent; /**< parent of a nested txn */
806 MDB_txn *mt_child; /**< nested txn under this txn */
807 pgno_t mt_next_pgno; /**< next unallocated page */
808 /** The ID of this transaction. IDs are integers incrementing from 1.
809 * Only committed write transactions increment the ID. If a transaction
810 * aborts, the ID may be re-used by the next writer.
811 */
812 txnid_t mt_txnid;
813 MDB_env *mt_env; /**< the DB environment */
814 /** The list of pages that became unused during this transaction.
815 */
816 MDB_IDL mt_free_pgs;
817 union {
818 MDB_ID2L dirty_list; /**< for write txns: modified pages */
819 MDB_reader *reader; /**< this thread's reader table slot or NULL */
820 } mt_u;
821 /** Array of records for each DB known in the environment. */
822 MDB_dbx *mt_dbxs;
823 /** Array of MDB_db records for each known DB */
824 MDB_db *mt_dbs;
825 /** @defgroup mt_dbflag Transaction DB Flags
826 * @ingroup internal
827 * @{
828 */
829 #define DB_DIRTY 0x01 /**< DB was written in this txn */
830 #define DB_STALE 0x02 /**< DB record is older than txnID */
831 #define DB_NEW 0x04 /**< DB handle opened in this txn */
832 #define DB_VALID 0x08 /**< DB handle is valid */
833 #define MDB_VALID 0x8000 /**< DB handle is valid, for me_dbflags */
834 /** @} */
835 /** In write txns, array of cursors for each DB */
836 MDB_cursor **mt_cursors;
837 /** Array of flags for each DB */
838 unsigned char *mt_dbflags;
839 /** Number of DB records in use. This number only ever increments;
840 * we don't decrement it when individual DB handles are closed.
841 */
842 MDB_dbi mt_numdbs;
843
844 /** @defgroup mdb_txn Transaction Flags
845 * @ingroup internal
846 * @{
847 */
848 #define MDB_TXN_RDONLY 0x01 /**< read-only transaction */
849 #define MDB_TXN_ERROR 0x02 /**< an error has occurred */
850 #define MDB_TXN_DIRTY 0x04 /**< must write, even if dirty list is empty */
851 /** @} */
852 unsigned int mt_flags; /**< @ref mdb_txn */
853 /** dirty_list maxsize - #allocated pages including in parent txns */
854 unsigned int mt_dirty_room;
855 /** Tracks which of the two meta pages was used at the start
856 * of this transaction.
857 */
858 unsigned int mt_toggle;
859 };
860
861 /** Enough space for 2^32 nodes with minimum of 2 keys per node. I.e., plenty.
862 * At 4 keys per node, enough for 2^64 nodes, so there's probably no need to
863 * raise this on a 64 bit machine.
864 */
865 #define CURSOR_STACK 32
866
867 struct MDB_xcursor;
868
869 /** Cursors are used for all DB operations */
870 struct MDB_cursor {
871 /** Next cursor on this DB in this txn */
872 MDB_cursor *mc_next;
873 /** Original cursor if this is a shadow */
874 MDB_cursor *mc_orig;
875 /** Context used for databases with #MDB_DUPSORT, otherwise NULL */
876 struct MDB_xcursor *mc_xcursor;
877 /** The transaction that owns this cursor */
878 MDB_txn *mc_txn;
879 /** The database handle this cursor operates on */
880 MDB_dbi mc_dbi;
881 /** The database record for this cursor */
882 MDB_db *mc_db;
883 /** The database auxiliary record for this cursor */
884 MDB_dbx *mc_dbx;
885 /** The @ref mt_dbflag for this database */
886 unsigned char *mc_dbflag;
887 unsigned short mc_snum; /**< number of pushed pages */
888 unsigned short mc_top; /**< index of top page, normally mc_snum-1 */
889 /** @defgroup mdb_cursor Cursor Flags
890 * @ingroup internal
891 * Cursor state flags.
892 * @{
893 */
894 #define C_INITIALIZED 0x01 /**< cursor has been initialized and is valid */
895 #define C_EOF 0x02 /**< No more data */
896 #define C_SUB 0x04 /**< Cursor is a sub-cursor */
897 #define C_SHADOW 0x08 /**< Cursor is a dup from a parent txn */
898 #define C_ALLOCD 0x10 /**< Cursor was malloc'd */
899 #define C_SPLITTING 0x20 /**< Cursor is in page_split */
900 /** @} */
901 unsigned int mc_flags; /**< @ref mdb_cursor */
902 MDB_page *mc_pg[CURSOR_STACK]; /**< stack of pushed pages */
903 indx_t mc_ki[CURSOR_STACK]; /**< stack of page indices */
904 };
905
906 /** Context for sorted-dup records.
907 * We could have gone to a fully recursive design, with arbitrarily
908 * deep nesting of sub-databases. But for now we only handle these
909 * levels - main DB, optional sub-DB, sorted-duplicate DB.
910 */
911 typedef struct MDB_xcursor {
912 /** A sub-cursor for traversing the Dup DB */
913 MDB_cursor mx_cursor;
914 /** The database record for this Dup DB */
915 MDB_db mx_db;
916 /** The auxiliary DB record for this Dup DB */
917 MDB_dbx mx_dbx;
918 /** The @ref mt_dbflag for this Dup DB */
919 unsigned char mx_dbflag;
920 } MDB_xcursor;
921
922 /** State of FreeDB old pages, stored in the MDB_env */
923 typedef struct MDB_pgstate {
924 txnid_t mf_pglast; /**< ID of last old page record we used */
925 pgno_t *mf_pghead; /**< old pages reclaimed from freelist */
926 pgno_t *mf_pgfree; /**< memory to free when dropping me_pghead */
927 } MDB_pgstate;
928
929 /** The database environment. */
930 struct MDB_env {
931 HANDLE me_fd; /**< The main data file */
932 HANDLE me_lfd; /**< The lock file */
933 HANDLE me_mfd; /**< just for writing the meta pages */
934 /** Failed to update the meta page. Probably an I/O error. */
935 #define MDB_FATAL_ERROR 0x80000000U
936 /** Read-only Filesystem. Allow read access, no locking. */
937 #define MDB_ROFS 0x40000000U
938 /** Some fields are initialized. */
939 #define MDB_ENV_ACTIVE 0x20000000U
940 uint32_t me_flags; /**< @ref mdb_env */
941 unsigned int me_psize; /**< size of a page, from #GET_PAGESIZE */
942 unsigned int me_maxreaders; /**< size of the reader table */
943 unsigned int me_numreaders; /**< max numreaders set by this env */
944 MDB_dbi me_numdbs; /**< number of DBs opened */
945 MDB_dbi me_maxdbs; /**< size of the DB table */
946 pid_t me_pid; /**< process ID of this env */
947 char *me_path; /**< path to the DB files */
948 char *me_map; /**< the memory map of the data file */
949 MDB_txninfo *me_txns; /**< the memory map of the lock file or NULL */
950 MDB_meta *me_metas[2]; /**< pointers to the two meta pages */
951 MDB_txn *me_txn; /**< current write transaction */
952 size_t me_mapsize; /**< size of the data memory map */
953 off_t me_size; /**< current file size */
954 pgno_t me_maxpg; /**< me_mapsize / me_psize */
955 MDB_dbx *me_dbxs; /**< array of static DB info */
956 uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */
957 pthread_key_t me_txkey; /**< thread-key for readers */
958 MDB_pgstate me_pgstate; /**< state of old pages from freeDB */
959 # define me_pglast me_pgstate.mf_pglast
960 # define me_pghead me_pgstate.mf_pghead
961 # define me_pgfree me_pgstate.mf_pgfree
962 MDB_page *me_dpages; /**< list of malloc'd blocks for re-use */
963 /** IDL of pages that became unused in a write txn */
964 MDB_IDL me_free_pgs;
965 /** ID2L of pages that were written during a write txn */
966 MDB_ID2 me_dirty_list[MDB_IDL_UM_SIZE];
967 /** Max number of freelist items that can fit in a single overflow page */
968 unsigned int me_maxfree_1pg;
969 /** Max size of a node on a page */
970 unsigned int me_nodemax;
971 #ifdef _WIN32
972 HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */
973 HANDLE me_wmutex;
974 #elif defined(MDB_USE_POSIX_SEM)
975 sem_t *me_rmutex; /* Shared mutexes are not supported */
976 sem_t *me_wmutex;
977 #endif
978 };
979
980 /** Nested transaction */
981 typedef struct MDB_ntxn {
982 MDB_txn mnt_txn; /* the transaction */
983 MDB_pgstate mnt_pgstate; /* parent transaction's saved freestate */
984 } MDB_ntxn;
985
986 /** max number of pages to commit in one writev() call */
987 #define MDB_COMMIT_PAGES 64
988 #if defined(IOV_MAX) && IOV_MAX < MDB_COMMIT_PAGES
989 #undef MDB_COMMIT_PAGES
990 #define MDB_COMMIT_PAGES IOV_MAX
991 #endif
992
993 static int mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp);
994 static int mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp);
995 static int mdb_page_touch(MDB_cursor *mc);
996
997 static int mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **mp);
998 static int mdb_page_search_root(MDB_cursor *mc,
999 MDB_val *key, int modify);
1000 #define MDB_PS_MODIFY 1
1001 #define MDB_PS_ROOTONLY 2
1002 static int mdb_page_search(MDB_cursor *mc,
1003 MDB_val *key, int flags);
1004 static int mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst);
1005
1006 #define MDB_SPLIT_REPLACE MDB_APPENDDUP /**< newkey is not new */
1007 static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata,
1008 pgno_t newpgno, unsigned int nflags);
1009
1010 static int mdb_env_read_header(MDB_env *env, MDB_meta *meta);
1011 static int mdb_env_pick_meta(const MDB_env *env);
1012 static int mdb_env_write_meta(MDB_txn *txn);
1013 static void mdb_env_close0(MDB_env *env, int excl);
1014
1015 static MDB_node *mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp);
1016 static int mdb_node_add(MDB_cursor *mc, indx_t indx,
1017 MDB_val *key, MDB_val *data, pgno_t pgno, unsigned int flags);
1018 static void mdb_node_del(MDB_page *mp, indx_t indx, int ksize);
1019 static void mdb_node_shrink(MDB_page *mp, indx_t indx);
1020 static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst);
1021 static int mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data);
1022 static size_t mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data);
1023 static size_t mdb_branch_size(MDB_env *env, MDB_val *key);
1024
1025 static int mdb_rebalance(MDB_cursor *mc);
1026 static int mdb_update_key(MDB_cursor *mc, MDB_val *key);
1027
1028 static void mdb_cursor_pop(MDB_cursor *mc);
1029 static int mdb_cursor_push(MDB_cursor *mc, MDB_page *mp);
1030
1031 static int mdb_cursor_del0(MDB_cursor *mc, MDB_node *leaf);
1032 static int mdb_cursor_sibling(MDB_cursor *mc, int move_right);
1033 static int mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op);
1034 static int mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op);
1035 static int mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op,
1036 int *exactp);
1037 static int mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data);
1038 static int mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data);
1039
1040 static void mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx);
1041 static void mdb_xcursor_init0(MDB_cursor *mc);
1042 static void mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node);
1043
1044 static int mdb_drop0(MDB_cursor *mc, int subs);
1045 static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi);
1046
1047 /** @cond */
1048 static MDB_cmp_func mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int, mdb_cmp_cint, mdb_cmp_long;
1049 /** @endcond */
1050
1051 #ifdef _WIN32
1052 static SECURITY_DESCRIPTOR mdb_null_sd;
1053 static SECURITY_ATTRIBUTES mdb_all_sa;
1054 static int mdb_sec_inited;
1055 #endif
1056
1057 /** Return the library version info. */
1058 char *
1059 mdb_version(int *major, int *minor, int *patch)
1060 {
1061 if (major) *major = MDB_VERSION_MAJOR;
1062 if (minor) *minor = MDB_VERSION_MINOR;
1063 if (patch) *patch = MDB_VERSION_PATCH;
1064 return MDB_VERSION_STRING;
1065 }
1066
1067 /** Table of descriptions for MDB @ref errors */
1068 static char *const mdb_errstr[] = {
1069 "MDB_KEYEXIST: Key/data pair already exists",
1070 "MDB_NOTFOUND: No matching key/data pair found",
1071 "MDB_PAGE_NOTFOUND: Requested page not found",
1072 "MDB_CORRUPTED: Located page was wrong type",
1073 "MDB_PANIC: Update of meta page failed",
1074 "MDB_VERSION_MISMATCH: Database environment version mismatch",
1075 "MDB_INVALID: File is not an MDB file",
1076 "MDB_MAP_FULL: Environment mapsize limit reached",
1077 "MDB_DBS_FULL: Environment maxdbs limit reached",
1078 "MDB_READERS_FULL: Environment maxreaders limit reached",
1079 "MDB_TLS_FULL: Thread-local storage keys full - too many environments open",
1080 "MDB_TXN_FULL: Transaction has too many dirty pages - transaction too big",
1081 "MDB_CURSOR_FULL: Internal error - cursor stack limit reached",
1082 "MDB_PAGE_FULL: Internal error - page has no more space",
1083 "MDB_MAP_RESIZED: Database contents grew beyond environment mapsize",
1084 "MDB_INCOMPATIBLE: Database flags changed or would change",
1085 };
1086
1087 char *
1088 mdb_strerror(int err)
1089 {
1090 int i;
1091 if (!err)
1092 return ("Successful return: 0");
1093
1094 if (err >= MDB_KEYEXIST && err <= MDB_LAST_ERRCODE) {
1095 i = err - MDB_KEYEXIST;
1096 return mdb_errstr[i];
1097 }
1098
1099 return strerror(err);
1100 }
1101
1102 #if MDB_DEBUG
1103 /** Display a key in hexadecimal and return the address of the result.
1104 * @param[in] key the key to display
1105 * @param[in] buf the buffer to write into. Should always be #DKBUF.
1106 * @return The key in hexadecimal form.
1107 */
1108 char *
1109 mdb_dkey(MDB_val *key, char *buf)
1110 {
1111 char *ptr = buf;
1112 unsigned char *c = key->mv_data;
1113 unsigned int i;
1114
1115 if (!key)
1116 return "";
1117
1118 if (key->mv_size > MDB_MAXKEYSIZE)
1119 return "MDB_MAXKEYSIZE";
1120 /* may want to make this a dynamic check: if the key is mostly
1121 * printable characters, print it as-is instead of converting to hex.
1122 */
1123 #if 1
1124 buf[0] = '\0';
1125 for (i=0; i<key->mv_size; i++)
1126 ptr += sprintf(ptr, "%02x", *c++);
1127 #else
1128 sprintf(buf, "%.*s", key->mv_size, key->mv_data);
1129 #endif
1130 return buf;
1131 }
1132
1133 /** Display all the keys in the page. */
1134 static void
1135 mdb_page_list(MDB_page *mp)
1136 {
1137 MDB_node *node;
1138 unsigned int i, nkeys, nsize;
1139 MDB_val key;
1140 DKBUF;
1141
1142 nkeys = NUMKEYS(mp);
1143 fprintf(stderr, "Page %zu numkeys %d\n", mp->mp_pgno, nkeys);
1144 for (i=0; i<nkeys; i++) {
1145 node = NODEPTR(mp, i);
1146 key.mv_size = node->mn_ksize;
1147 key.mv_data = node->mn_data;
1148 nsize = NODESIZE + NODEKSZ(node) + sizeof(indx_t);
1149 if (IS_BRANCH(mp)) {
1150 fprintf(stderr, "key %d: page %zu, %s\n", i, NODEPGNO(node),
1151 DKEY(&key));
1152 } else {
1153 if (F_ISSET(node->mn_flags, F_BIGDATA))
1154 nsize += sizeof(pgno_t);
1155 else
1156 nsize += NODEDSZ(node);
1157 fprintf(stderr, "key %d: nsize %d, %s\n", i, nsize, DKEY(&key));
1158 }
1159 }
1160 }
1161
1162 void
1163 mdb_cursor_chk(MDB_cursor *mc)
1164 {
1165 unsigned int i;
1166 MDB_node *node;
1167 MDB_page *mp;
1168
1169 if (!mc->mc_snum && !(mc->mc_flags & C_INITIALIZED)) return;
1170 for (i=0; i<mc->mc_top; i++) {
1171 mp = mc->mc_pg[i];
1172 node = NODEPTR(mp, mc->mc_ki[i]);
1173 if (NODEPGNO(node) != mc->mc_pg[i+1]->mp_pgno)
1174 printf("oops!\n");
1175 }
1176 if (mc->mc_ki[i] >= NUMKEYS(mc->mc_pg[i]))
1177 printf("ack!\n");
1178 }
1179 #endif
1180
1181 #if MDB_DEBUG > 2
1182 /** Count all the pages in each DB and in the freelist
1183 * and make sure it matches the actual number of pages
1184 * being used.
1185 */
1186 static void mdb_audit(MDB_txn *txn)
1187 {
1188 MDB_cursor mc;
1189 MDB_val key, data;
1190 MDB_ID freecount, count;
1191 MDB_dbi i;
1192 int rc;
1193
1194 freecount = 0;
1195 mdb_cursor_init(&mc, txn, FREE_DBI, NULL);
1196 while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0)
1197 freecount += *(MDB_ID *)data.mv_data;
1198
1199 count = 0;
1200 for (i = 0; i<txn->mt_numdbs; i++) {
1201 MDB_xcursor mx, *mxp;
1202 mxp = (txn->mt_dbs[i].md_flags & MDB_DUPSORT) ? &mx : NULL;
1203 mdb_cursor_init(&mc, txn, i, mxp);
1204 if (txn->mt_dbs[i].md_root == P_INVALID)
1205 continue;
1206 count += txn->mt_dbs[i].md_branch_pages +
1207 txn->mt_dbs[i].md_leaf_pages +
1208 txn->mt_dbs[i].md_overflow_pages;
1209 if (txn->mt_dbs[i].md_flags & MDB_DUPSORT) {
1210 mdb_page_search(&mc, NULL, 0);
1211 do {
1212 unsigned j;
1213 MDB_page *mp;
1214 mp = mc.mc_pg[mc.mc_top];
1215 for (j=0; j<NUMKEYS(mp); j++) {
1216 MDB_node *leaf = NODEPTR(mp, j);
1217 if (leaf->mn_flags & F_SUBDATA) {
1218 MDB_db db;
1219 memcpy(&db, NODEDATA(leaf), sizeof(db));
1220 count += db.md_branch_pages + db.md_leaf_pages +
1221 db.md_overflow_pages;
1222 }
1223 }
1224 }
1225 while (mdb_cursor_sibling(&mc, 1) == 0);
1226 }
1227 }
1228 if (freecount + count + 2 /* metapages */ != txn->mt_next_pgno) {
1229 fprintf(stderr, "audit: %lu freecount: %lu count: %lu total: %lu next_pgno: %lu\n",
1230 txn->mt_txnid, freecount, count+2, freecount+count+2, txn->mt_next_pgno);
1231 }
1232 }
1233 #endif
1234
1235 int
1236 mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b)
1237 {
1238 return txn->mt_dbxs[dbi].md_cmp(a, b);
1239 }
1240
1241 int
1242 mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b)
1243 {
1244 if (txn->mt_dbxs[dbi].md_dcmp)
1245 return txn->mt_dbxs[dbi].md_dcmp(a, b);
1246 else
1247 return EINVAL; /* too bad you can't distinguish this from a valid result */
1248 }
1249
1250 /** Allocate a single page.
1251 * Re-use old malloc'd pages first, otherwise just malloc.
1252 */
1253 static MDB_page *
1254 mdb_page_malloc(MDB_cursor *mc) {
1255 MDB_page *ret;
1256 size_t sz = mc->mc_txn->mt_env->me_psize;
1257 if ((ret = mc->mc_txn->mt_env->me_dpages) != NULL) {
1258 VGMEMP_ALLOC(mc->mc_txn->mt_env, ret, sz);
1259 VGMEMP_DEFINED(ret, sizeof(ret->mp_next));
1260 mc->mc_txn->mt_env->me_dpages = ret->mp_next;
1261 } else if ((ret = malloc(sz)) != NULL) {
1262 VGMEMP_ALLOC(mc->mc_txn->mt_env, ret, sz);
1263 }
1264 return ret;
1265 }
1266
1267 static void
1268 mdb_page_free(MDB_env *env, MDB_page *mp)
1269 {
1270 mp->mp_next = env->me_dpages;
1271 VGMEMP_FREE(env, mp);
1272 env->me_dpages = mp;
1273 }
1274
1275 /** Allocate pages for writing.
1276 * If there are free pages available from older transactions, they
1277 * will be re-used first. Otherwise a new page will be allocated.
1278 * @param[in] mc cursor A cursor handle identifying the transaction and
1279 * database for which we are allocating.
1280 * @param[in] num the number of pages to allocate.
1281 * @param[out] mp Address of the allocated page(s). Requests for multiple pages
1282 * will always be satisfied by a single contiguous chunk of memory.
1283 * @return 0 on success, non-zero on failure.
1284 */
1285 static int
1286 mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
1287 {
1288 MDB_txn *txn = mc->mc_txn;
1289 MDB_page *np;
1290 pgno_t pgno = P_INVALID;
1291 MDB_ID2 mid;
1292 txnid_t oldest = 0, last;
1293 int rc;
1294
1295 *mp = NULL;
1296
1297 /* If our dirty list is already full, we can't do anything */
1298 if (txn->mt_dirty_room == 0)
1299 return MDB_TXN_FULL;
1300
1301 /* The free list won't have any content at all until txn 2 has
1302 * committed. The pages freed by txn 2 will be unreferenced
1303 * after txn 3 commits, and so will be safe to re-use in txn 4.
1304 */
1305 if (txn->mt_txnid > 3) {
1306 if (!txn->mt_env->me_pghead &&
1307 txn->mt_dbs[FREE_DBI].md_root != P_INVALID) {
1308 /* See if there's anything in the free DB */
1309 MDB_reader *r;
1310 MDB_cursor m2;
1311 MDB_node *leaf;
1312 MDB_val data;
1313 txnid_t *kptr;
1314
1315 mdb_cursor_init(&m2, txn, FREE_DBI, NULL);
1316 if (!txn->mt_env->me_pglast) {
1317 mdb_page_search(&m2, NULL, 0);
1318 leaf = NODEPTR(m2.mc_pg[m2.mc_top], 0);
1319 kptr = (txnid_t *)NODEKEY(leaf);
1320 last = *kptr;
1321 } else {
1322 MDB_val key;
1323 again:
1324 last = txn->mt_env->me_pglast + 1;
1325 leaf = NULL;
1326 key.mv_data = &last;
1327 key.mv_size = sizeof(last);
1328 rc = mdb_cursor_set(&m2, &key, &data, MDB_SET_RANGE, NULL);
1329 if (rc)
1330 goto none;
1331 last = *(txnid_t *)key.mv_data;
1332 }
1333
1334 {
1335 unsigned int i, nr;
1336 txnid_t mr;
1337 oldest = txn->mt_txnid - 1;
1338 nr = txn->mt_env->me_txns->mti_numreaders;
1339 r = txn->mt_env->me_txns->mti_readers;
1340 for (i=0; i<nr; i++) {
1341 if (!r[i].mr_pid) continue;
1342 mr = r[i].mr_txnid;
1343 if (mr < oldest)
1344 oldest = mr;
1345 }
1346 }
1347
1348 if (oldest > last) {
1349 /* It's usable, grab it.
1350 */
1351 pgno_t *idl, *mop;
1352
1353 if (!txn->mt_env->me_pglast) {
1354 mdb_node_read(txn, leaf, &data);
1355 }
1356 idl = (MDB_ID *) data.mv_data;
1357 /* We might have a zero-length IDL due to freelist growth
1358 * during a prior commit
1359 */
1360 if (!idl[0]) {
1361 txn->mt_env->me_pglast = last;
1362 goto again;
1363 }
1364 mop = malloc(MDB_IDL_SIZEOF(idl));
1365 if (!mop)
1366 return ENOMEM;
1367 txn->mt_env->me_pglast = last;
1368 txn->mt_env->me_pghead = txn->mt_env->me_pgfree = mop;
1369 memcpy(mop, idl, MDB_IDL_SIZEOF(idl));
1370
1371 #if MDB_DEBUG > 1
1372 {
1373 unsigned int i;
1374 DPRINTF("IDL read txn %zu root %zu num %zu",
1375 last, txn->mt_dbs[FREE_DBI].md_root, idl[0]);
1376 for (i=0; i<idl[0]; i++) {
1377 DPRINTF("IDL %zu", idl[i+1]);
1378 }
1379 }
1380 #endif
1381 }
1382 }
1383 none:
1384 if (txn->mt_env->me_pghead) {
1385 pgno_t *mop = txn->mt_env->me_pghead;
1386 if (num > 1) {
1387 MDB_cursor m2;
1388 int retry = 1, readit = 0, n2 = num-1;
1389 unsigned int i, j, k;
1390
1391 /* If current list is too short, must fetch more and coalesce */
1392 if (mop[0] < (unsigned)num)
1393 readit = 1;
1394
1395 mdb_cursor_init(&m2, txn, FREE_DBI, NULL);
1396 do {
1397 /* If on freelist, don't try to read more. If what we have
1398 * right now isn't enough just use new pages.
1399 * TODO: get all of this working. Many circular dependencies...
1400 */
1401 if (mc->mc_dbi == FREE_DBI) {
1402 retry = 0;
1403 readit = 0;
1404 }
1405 if (readit) {
1406 MDB_val key, data;
1407 pgno_t *idl, *mop2;
1408
1409 last = txn->mt_env->me_pglast + 1;
1410
1411 /* We haven't hit the readers list yet? */
1412 if (!oldest) {
1413 MDB_reader *r;
1414 unsigned int nr;
1415 txnid_t mr;
1416
1417 oldest = txn->mt_txnid - 1;
1418 nr = txn->mt_env->me_txns->mti_numreaders;
1419 r = txn->mt_env->me_txns->mti_readers;
1420 for (i=0; i<nr; i++) {
1421 if (!r[i].mr_pid) continue;
1422 mr = r[i].mr_txnid;
1423 if (mr < oldest)
1424 oldest = mr;
1425 }
1426 }
1427
1428 /* There's nothing we can use on the freelist */
1429 if (oldest - last < 1)
1430 break;
1431
1432 key.mv_data = &last;
1433 key.mv_size = sizeof(last);
1434 rc = mdb_cursor_set(&m2,&key,&data,MDB_SET_RANGE,NULL);
1435 if (rc) {
1436 if (rc == MDB_NOTFOUND)
1437 break;
1438 return rc;
1439 }
1440 last = *(txnid_t*)key.mv_data;
1441 if (oldest <= last)
1442 break;
1443 idl = (MDB_ID *) data.mv_data;
1444 mop2 = malloc(MDB_IDL_SIZEOF(idl) + MDB_IDL_SIZEOF(mop));
1445 if (!mop2)
1446 return ENOMEM;
1447 /* merge in sorted order */
1448 i = idl[0]; j = mop[0]; mop2[0] = k = i+j;
1449 mop[0] = P_INVALID;
1450 while (i>0 || j>0) {
1451 if (i && idl[i] < mop[j])
1452 mop2[k--] = idl[i--];
1453 else
1454 mop2[k--] = mop[j--];
1455 }
1456 txn->mt_env->me_pglast = last;
1457 free(txn->mt_env->me_pgfree);
1458 txn->mt_env->me_pghead = txn->mt_env->me_pgfree = mop2;
1459 mop = mop2;
1460 /* Keep trying to read until we have enough */
1461 if (mop[0] < (unsigned)num) {
1462 continue;
1463 }
1464 }
1465
1466 /* current list has enough pages, but are they contiguous? */
1467 for (i=mop[0]; i>=(unsigned)num; i--) {
1468 if (mop[i-n2] == mop[i] + n2) {
1469 pgno = mop[i];
1470 i -= n2;
1471 /* move any stragglers down */
1472 for (j=i+num; j<=mop[0]; j++)
1473 mop[i++] = mop[j];
1474 mop[0] -= num;
1475 break;
1476 }
1477 }
1478
1479 /* Stop if we succeeded, or no retries */
1480 if (!retry || pgno != P_INVALID)
1481 break;
1482 readit = 1;
1483
1484 } while (1);
1485 } else {
1486 /* peel pages off tail, so we only have to truncate the list */
1487 pgno = MDB_IDL_LAST(mop);
1488 mop[0]--;
1489 }
1490 if (MDB_IDL_IS_ZERO(mop)) {
1491 free(txn->mt_env->me_pgfree);
1492 txn->mt_env->me_pghead = txn->mt_env->me_pgfree = NULL;
1493 }
1494 }
1495 }
1496
1497 if (pgno == P_INVALID) {
1498 /* DB size is maxed out */
1499 if (txn->mt_next_pgno + num >= txn->mt_env->me_maxpg) {
1500 DPUTS("DB size maxed out");
1501 return MDB_MAP_FULL;
1502 }
1503 }
1504 if (txn->mt_env->me_flags & MDB_WRITEMAP) {
1505 if (pgno == P_INVALID) {
1506 pgno = txn->mt_next_pgno;
1507 txn->mt_next_pgno += num;
1508 }
1509 np = (MDB_page *)(txn->mt_env->me_map + txn->mt_env->me_psize * pgno);
1510 np->mp_pgno = pgno;
1511 } else {
1512 if (txn->mt_env->me_dpages && num == 1) {
1513 np = txn->mt_env->me_dpages;
1514 VGMEMP_ALLOC(txn->mt_env, np, txn->mt_env->me_psize);
1515 VGMEMP_DEFINED(np, sizeof(np->mp_next));
1516 txn->mt_env->me_dpages = np->mp_next;
1517 } else {
1518 size_t sz = txn->mt_env->me_psize * num;
1519 if ((np = malloc(sz)) == NULL)
1520 return ENOMEM;
1521 VGMEMP_ALLOC(txn->mt_env, np, sz);
1522 }
1523 if (pgno == P_INVALID) {
1524 np->mp_pgno = txn->mt_next_pgno;
1525 txn->mt_next_pgno += num;
1526 } else {
1527 np->mp_pgno = pgno;
1528 }
1529 }
1530 mid.mid = np->mp_pgno;
1531 mid.mptr = np;
1532 if (txn->mt_env->me_flags & MDB_WRITEMAP) {
1533 mdb_mid2l_append(txn->mt_u.dirty_list, &mid);
1534 } else {
1535 mdb_mid2l_insert(txn->mt_u.dirty_list, &mid);
1536 }
1537 txn->mt_dirty_room--;
1538 *mp = np;
1539
1540 return MDB_SUCCESS;
1541 }
1542
1543 /** Copy a page: avoid copying unused portions of the page.
1544 * @param[in] dst page to copy into
1545 * @param[in] src page to copy from
1546 */
1547 static void
1548 mdb_page_copy(MDB_page *dst, MDB_page *src, unsigned int psize)
1549 {
1550 dst->mp_flags = src->mp_flags | P_DIRTY;
1551 dst->mp_pages = src->mp_pages;
1552
1553 if (IS_LEAF2(src)) {
1554 memcpy(dst->mp_ptrs, src->mp_ptrs, psize - PAGEHDRSZ - SIZELEFT(src));
1555 } else {
1556 unsigned int i, nkeys = NUMKEYS(src);
1557 for (i=0; i<nkeys; i++)
1558 dst->mp_ptrs[i] = src->mp_ptrs[i];
1559 memcpy((char *)dst+src->mp_upper, (char *)src+src->mp_upper,
1560 psize - src->mp_upper);
1561 }
1562 }
1563
1564 /** Touch a page: make it dirty and re-insert into tree with updated pgno.
1565 * @param[in] mc cursor pointing to the page to be touched
1566 * @return 0 on success, non-zero on failure.
1567 */
1568 static int
1569 mdb_page_touch(MDB_cursor *mc)
1570 {
1571 MDB_page *mp = mc->mc_pg[mc->mc_top];
1572 pgno_t pgno;
1573 int rc;
1574
1575 if (!F_ISSET(mp->mp_flags, P_DIRTY)) {
1576 MDB_page *np;
1577 if ((rc = mdb_page_alloc(mc, 1, &np)))
1578 return rc;
1579 DPRINTF("touched db %u page %zu -> %zu", mc->mc_dbi, mp->mp_pgno, np->mp_pgno);
1580 assert(mp->mp_pgno != np->mp_pgno);
1581 mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno);
1582 if (SIZELEFT(mp)) {
1583 /* If page isn't full, just copy the used portion */
1584 mdb_page_copy(np, mp, mc->mc_txn->mt_env->me_psize);
1585 } else {
1586 pgno = np->mp_pgno;
1587 memcpy(np, mp, mc->mc_txn->mt_env->me_psize);
1588 np->mp_pgno = pgno;
1589 np->mp_flags |= P_DIRTY;
1590 }
1591 mp = np;
1592
1593 finish:
1594 /* Adjust other cursors pointing to mp */
1595 if (mc->mc_flags & C_SUB) {
1596 MDB_cursor *m2, *m3;
1597 MDB_dbi dbi = mc->mc_dbi-1;
1598
1599 for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
1600 if (m2 == mc) continue;
1601 m3 = &m2->mc_xcursor->mx_cursor;
1602 if (m3->mc_snum < mc->mc_snum) continue;
1603 if (m3->mc_pg[mc->mc_top] == mc->mc_pg[mc->mc_top]) {
1604 m3->mc_pg[mc->mc_top] = mp;
1605 }
1606 }
1607 } else {
1608 MDB_cursor *m2;
1609
1610 for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) {
1611 if (m2 == mc || m2->mc_snum < mc->mc_snum) continue;
1612 if (m2->mc_pg[mc->mc_top] == mc->mc_pg[mc->mc_top]) {
1613 m2->mc_pg[mc->mc_top] = mp;
1614 }
1615 }
1616 }
1617 mc->mc_pg[mc->mc_top] = mp;
1618 /** If this page has a parent, update the parent to point to
1619 * this new page.
1620 */
1621 if (mc->mc_top)
1622 SETPGNO(NODEPTR(mc->mc_pg[mc->mc_top-1], mc->mc_ki[mc->mc_top-1]), mp->mp_pgno);
1623 else
1624 mc->mc_db->md_root = mp->mp_pgno;
1625 } else if (mc->mc_txn->mt_parent) {
1626 MDB_page *np;
1627 MDB_ID2 mid;
1628 /* If txn has a parent, make sure the page is in our
1629 * dirty list.
1630 */
1631 if (mc->mc_txn->mt_u.dirty_list[0].mid) {
1632 unsigned x = mdb_mid2l_search(mc->mc_txn->mt_u.dirty_list, mp->mp_pgno);
1633 if (x <= mc->mc_txn->mt_u.dirty_list[0].mid &&
1634 mc->mc_txn->mt_u.dirty_list[x].mid == mp->mp_pgno) {
1635 if (mc->mc_txn->mt_u.dirty_list[x].mptr != mp) {
1636 mp = mc->mc_txn->mt_u.dirty_list[x].mptr;
1637 mc->mc_pg[mc->mc_top] = mp;
1638 }
1639 return 0;
1640 }
1641 }
1642 assert(mc->mc_txn->mt_u.dirty_list[0].mid < MDB_IDL_UM_MAX);
1643 /* No - copy it */
1644 np = mdb_page_malloc(mc);
1645 if (!np)
1646 return ENOMEM;
1647 memcpy(np, mp, mc->mc_txn->mt_env->me_psize);
1648 mid.mid = np->mp_pgno;
1649 mid.mptr = np;
1650 mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &mid);
1651 mp = np;
1652 goto finish;
1653 }
1654 return 0;
1655 }
1656
1657 int
1658 mdb_env_sync(MDB_env *env, int force)
1659 {
1660 int rc = 0;
1661 if (force || !F_ISSET(env->me_flags, MDB_NOSYNC)) {
1662 if (env->me_flags & MDB_WRITEMAP) {
1663 int flags = ((env->me_flags & MDB_MAPASYNC) && !force)
1664 ? MS_ASYNC : MS_SYNC;
1665 if (MDB_MSYNC(env->me_map, env->me_mapsize, flags))
1666 rc = ErrCode();
1667 #ifdef _WIN32
1668 else if (flags == MS_SYNC && MDB_FDATASYNC(env->me_fd))
1669 rc = ErrCode();
1670 #endif
1671 } else {
1672 if (MDB_FDATASYNC(env->me_fd))
1673 rc = ErrCode();
1674 }
1675 }
1676 return rc;
1677 }
1678
1679 /** Make shadow copies of all of parent txn's cursors */
1680 static int
1681 mdb_cursor_shadow(MDB_txn *src, MDB_txn *dst)
1682 {
1683 MDB_cursor *mc, *m2;
1684 unsigned int i, j, size;
1685
1686 for (i=0;i<src->mt_numdbs; i++) {
1687 if (src->mt_cursors[i]) {
1688 size = sizeof(MDB_cursor);
1689 if (src->mt_cursors[i]->mc_xcursor)
1690 size += sizeof(MDB_xcursor);
1691 for (m2 = src->mt_cursors[i]; m2; m2=m2->mc_next) {
1692 mc = malloc(size);
1693 if (!mc)
1694 return ENOMEM;
1695 mc->mc_orig = m2;
1696 mc->mc_txn = dst;
1697 mc->mc_dbi = i;
1698 mc->mc_db = &dst->mt_dbs[i];
1699 mc->mc_dbx = m2->mc_dbx;
1700 mc->mc_dbflag = &dst->mt_dbflags[i];
1701 mc->mc_snum = m2->mc_snum;
1702 mc->mc_top = m2->mc_top;
1703 mc->mc_flags = m2->mc_flags | C_SHADOW;
1704 for (j=0; j<mc->mc_snum; j++) {
1705 mc->mc_pg[j] = m2->mc_pg[j];
1706 mc->mc_ki[j] = m2->mc_ki[j];
1707 }
1708 if (m2->mc_xcursor) {
1709 MDB_xcursor *mx, *mx2;
1710 mx = (MDB_xcursor *)(mc+1);
1711 mc->mc_xcursor = mx;
1712 mx2 = m2->mc_xcursor;
1713 mx->mx_db = mx2->mx_db;
1714 mx->mx_dbx = mx2->mx_dbx;
1715 mx->mx_dbflag = mx2->mx_dbflag;
1716 mx->mx_cursor.mc_txn = dst;
1717 mx->mx_cursor.mc_dbi = mx2->mx_cursor.mc_dbi;
1718 mx->mx_cursor.mc_db = &mx->mx_db;
1719 mx->mx_cursor.mc_dbx = &mx->mx_dbx;
1720 mx->mx_cursor.mc_dbflag = &mx->mx_dbflag;
1721 mx->mx_cursor.mc_snum = mx2->mx_cursor.mc_snum;
1722 mx->mx_cursor.mc_top = mx2->mx_cursor.mc_top;
1723 mx->mx_cursor.mc_flags = mx2->mx_cursor.mc_flags | C_SHADOW;
1724 for (j=0; j<mx2->mx_cursor.mc_snum; j++) {
1725 mx->mx_cursor.mc_pg[j] = mx2->mx_cursor.mc_pg[j];
1726 mx->mx_cursor.mc_ki[j] = mx2->mx_cursor.mc_ki[j];
1727 }
1728 } else {
1729 mc->mc_xcursor = NULL;
1730 }
1731 mc->mc_next = dst->mt_cursors[i];
1732 dst->mt_cursors[i] = mc;
1733 }
1734 }
1735 }
1736 return MDB_SUCCESS;
1737 }
1738
1739 /** Merge shadow cursors back into parent's */
1740 static void
1741 mdb_cursor_merge(MDB_txn *txn)
1742 {
1743 MDB_dbi i;
1744 for (i=0; i<txn->mt_numdbs; i++) {
1745 if (txn->mt_cursors[i]) {
1746 MDB_cursor *mc;
1747 while ((mc = txn->mt_cursors[i])) {
1748 txn->mt_cursors[i] = mc->mc_next;
1749 if (mc->mc_flags & C_SHADOW) {
1750 MDB_cursor *m2 = mc->mc_orig;
1751 unsigned int j;
1752 m2->mc_snum = mc->mc_snum;
1753 m2->mc_top = mc->mc_top;
1754 for (j=0; j<mc->mc_snum; j++) {
1755 m2->mc_pg[j] = mc->mc_pg[j];
1756 m2->mc_ki[j] = mc->mc_ki[j];
1757 }
1758 }
1759 if (mc->mc_flags & C_ALLOCD)
1760 free(mc);
1761 }
1762 }
1763 }
1764 }
1765
1766 static void
1767 mdb_txn_reset0(MDB_txn *txn);
1768
1769 /** Common code for #mdb_txn_begin() and #mdb_txn_renew().
1770 * @param[in] txn the transaction handle to initialize
1771 * @return 0 on success, non-zero on failure. This can only
1772 * fail for read-only transactions, and then only if the
1773 * reader table is full.
1774 */
1775 static int
1776 mdb_txn_renew0(MDB_txn *txn)
1777 {
1778 MDB_env *env = txn->mt_env;
1779 unsigned int i;
1780 uint16_t x;
1781 int rc;
1782
1783 /* Setup db info */
1784 txn->mt_numdbs = env->me_numdbs;
1785 txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */
1786
1787 if (txn->mt_flags & MDB_TXN_RDONLY) {
1788 if (env->me_flags & MDB_ROFS) {
1789 i = mdb_env_pick_meta(env);
1790 txn->mt_txnid = env->me_metas[i]->mm_txnid;
1791 txn->mt_u.reader = NULL;
1792 } else {
1793 MDB_reader *r = pthread_getspecific(env->me_txkey);
1794 if (!r) {
1795 pid_t pid = env->me_pid;
1796 pthread_t tid = pthread_self();
1797
1798 LOCK_MUTEX_R(env);
1799 for (i=0; i<env->me_txns->mti_numreaders; i++)
1800 if (env->me_txns->mti_readers[i].mr_pid == 0)
1801 break;
1802 if (i == env->me_maxreaders) {
1803 UNLOCK_MUTEX_R(env);
1804 return MDB_READERS_FULL;
1805 }
1806 env->me_txns->mti_readers[i].mr_pid = pid;
1807 env->me_txns->mti_readers[i].mr_tid = tid;
1808 if (i >= env->me_txns->mti_numreaders)
1809 env->me_txns->mti_numreaders = i+1;
1810 /* Save numreaders for un-mutexed mdb_env_close() */
1811 env->me_numreaders = env->me_txns->mti_numreaders;
1812 UNLOCK_MUTEX_R(env);
1813 r = &env->me_txns->mti_readers[i];
1814 if ((rc = pthread_setspecific(env->me_txkey, r)) != 0) {
1815 env->me_txns->mti_readers[i].mr_pid = 0;
1816 return rc;
1817 }
1818 }
1819 txn->mt_txnid = r->mr_txnid = env->me_txns->mti_txnid;
1820 txn->mt_u.reader = r;
1821 }
1822 txn->mt_toggle = txn->mt_txnid & 1;
1823 txn->mt_next_pgno = env->me_metas[txn->mt_toggle]->mm_last_pg+1;
1824 } else {
1825 LOCK_MUTEX_W(env);
1826
1827 txn->mt_txnid = env->me_txns->mti_txnid;
1828 txn->mt_toggle = txn->mt_txnid & 1;
1829 txn->mt_next_pgno = env->me_metas[txn->mt_toggle]->mm_last_pg+1;
1830 txn->mt_txnid++;
1831 #if MDB_DEBUG
1832 if (txn->mt_txnid == mdb_debug_start)
1833 mdb_debug = 1;
1834 #endif
1835 txn->mt_dirty_room = MDB_IDL_UM_MAX;
1836 txn->mt_u.dirty_list = env->me_dirty_list;
1837 txn->mt_u.dirty_list[0].mid = 0;
1838 txn->mt_free_pgs = env->me_free_pgs;
1839 txn->mt_free_pgs[0] = 0;
1840 env->me_txn = txn;
1841 }
1842
1843 /* Copy the DB info and flags */
1844 memcpy(txn->mt_dbs, env->me_metas[txn->mt_toggle]->mm_dbs, 2 * sizeof(MDB_db));
1845 for (i=2; i<txn->mt_numdbs; i++) {
1846 x = env->me_dbflags[i];
1847 txn->mt_dbs[i].md_flags = x & PERSISTENT_FLAGS;
1848 txn->mt_dbflags[i] = (x & MDB_VALID) ? DB_VALID|DB_STALE : 0;
1849 }
1850 txn->mt_dbflags[0] = txn->mt_dbflags[1] = DB_VALID;
1851
1852 if (env->me_maxpg < txn->mt_next_pgno) {
1853 mdb_txn_reset0(txn);
1854 return MDB_MAP_RESIZED;
1855 }
1856
1857 return MDB_SUCCESS;
1858 }
1859
1860 int
1861 mdb_txn_renew(MDB_txn *txn)
1862 {
1863 int rc;
1864
1865 if (! (txn && (txn->mt_flags & MDB_TXN_RDONLY)))
1866 return EINVAL;
1867
1868 if (txn->mt_env->me_flags & MDB_FATAL_ERROR) {
1869 DPUTS("environment had fatal error, must shutdown!");
1870 return MDB_PANIC;
1871 }
1872
1873 rc = mdb_txn_renew0(txn);
1874 if (rc == MDB_SUCCESS) {
1875 DPRINTF("renew txn %zu%c %p on mdbenv %p, root page %zu",
1876 txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
1877 (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root);
1878 }
1879 return rc;
1880 }
1881
1882 int
1883 mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
1884 {
1885 MDB_txn *txn;
1886 MDB_ntxn *ntxn;
1887 int rc, size, tsize = sizeof(MDB_txn);
1888
1889 if (env->me_flags & MDB_FATAL_ERROR) {
1890 DPUTS("environment had fatal error, must shutdown!");
1891 return MDB_PANIC;
1892 }
1893 if ((env->me_flags & MDB_RDONLY) && !(flags & MDB_RDONLY))
1894 return EACCES;
1895 if (parent) {
1896 /* Nested transactions: Max 1 child, write txns only, no writemap */
1897 if (parent->mt_child ||
1898 (flags & MDB_RDONLY) || (parent->mt_flags & MDB_TXN_RDONLY) ||
1899 (env->me_flags & MDB_WRITEMAP))
1900 {
1901 return EINVAL;
1902 }
1903 tsize = sizeof(MDB_ntxn);
1904 }
1905 size = tsize + env->me_maxdbs * (sizeof(MDB_db)+1);
1906 if (!(flags & MDB_RDONLY))
1907 size += env->me_maxdbs * sizeof(MDB_cursor *);
1908
1909 if ((txn = calloc(1, size)) == NULL) {
1910 DPRINTF("calloc: %s", strerror(ErrCode()));
1911 return ENOMEM;
1912 }
1913 txn->mt_dbs = (MDB_db *) ((char *)txn + tsize);
1914 if (flags & MDB_RDONLY) {
1915 txn->mt_flags |= MDB_TXN_RDONLY;
1916 txn->mt_dbflags = (unsigned char *)(txn->mt_dbs + env->me_maxdbs);
1917 } else {
1918 txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs);
1919 txn->mt_dbflags = (unsigned char *)(txn->mt_cursors + env->me_maxdbs);
1920 }
1921 txn->mt_env = env;
1922
1923 if (parent) {
1924 unsigned int i;
1925 txn->mt_free_pgs = mdb_midl_alloc();
1926 if (!txn->mt_free_pgs) {
1927 free(txn);
1928 return ENOMEM;
1929 }
1930 txn->mt_u.dirty_list = malloc(sizeof(MDB_ID2)*MDB_IDL_UM_SIZE);
1931 if (!txn->mt_u.dirty_list) {
1932 free(txn->mt_free_pgs);
1933 free(txn);
1934 return ENOMEM;
1935 }
1936 txn->mt_txnid = parent->mt_txnid;
1937 txn->mt_toggle = parent->mt_toggle;
1938 txn->mt_dirty_room = parent->mt_dirty_room;
1939 txn->mt_u.dirty_list[0].mid = 0;
1940 txn->mt_free_pgs[0] = 0;
1941 txn->mt_next_pgno = parent->mt_next_pgno;
1942 parent->mt_child = txn;
1943 txn->mt_parent = parent;
1944 txn->mt_numdbs = parent->mt_numdbs;
1945 txn->mt_dbxs = parent->mt_dbxs;
1946 memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db));
1947 /* Copy parent's mt_dbflags, but clear DB_NEW */
1948 for (i=0; i<txn->mt_numdbs; i++)
1949 txn->mt_dbflags[i] = parent->mt_dbflags[i] & ~DB_NEW;
1950 rc = 0;
1951 ntxn = (MDB_ntxn *)txn;
1952 ntxn->mnt_pgstate = env->me_pgstate; /* save parent me_pghead & co */
1953 if (env->me_pghead) {
1954 size = MDB_IDL_SIZEOF(env->me_pghead);
1955 env->me_pghead = malloc(size);
1956 if (env->me_pghead)
1957 memcpy(env->me_pghead, ntxn->mnt_pgstate.mf_pghead, size);
1958 else
1959 rc = ENOMEM;
1960 }
1961 env->me_pgfree = env->me_pghead;
1962 if (!rc)
1963 rc = mdb_cursor_shadow(parent, txn);
1964 if (rc)
1965 mdb_txn_reset0(txn);
1966 } else {
1967 rc = mdb_txn_renew0(txn);
1968 }
1969 if (rc)
1970 free(txn);
1971 else {
1972 *ret = txn;
1973 DPRINTF("begin txn %zu%c %p on mdbenv %p, root page %zu",
1974 txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
1975 (void *) txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root);
1976 }
1977
1978 return rc;
1979 }
1980
1981 /** Common code for #mdb_txn_reset() and #mdb_txn_abort().
1982 * @param[in] txn the transaction handle to reset
1983 */
1984 static void
1985 mdb_txn_reset0(MDB_txn *txn)
1986 {
1987 MDB_env *env = txn->mt_env;
1988 unsigned int i;
1989
1990 /* Close any DBI handles opened in this txn */
1991 for (i=2; i<txn->mt_numdbs; i++) {
1992 if (txn->mt_dbflags[i] & DB_NEW) {
1993 char *ptr = env->me_dbxs[i].md_name.mv_data;
1994 env->me_dbxs[i].md_name.mv_data = NULL;
1995 env->me_dbxs[i].md_name.mv_size = 0;
1996 free(ptr);
1997 }
1998 }
1999
2000 if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) {
2001 if (!(env->me_flags & MDB_ROFS))
2002 txn->mt_u.reader->mr_txnid = (txnid_t)-1;
2003 } else {
2004 MDB_page *dp;
2005
2006 /* close(free) all cursors */
2007 for (i=0; i<txn->mt_numdbs; i++) {
2008 if (txn->mt_cursors[i]) {
2009 MDB_cursor *mc;
2010 while ((mc = txn->mt_cursors[i])) {
2011 txn->mt_cursors[i] = mc->mc_next;
2012 if (mc->mc_flags & C_ALLOCD)
2013 free(mc);
2014 }
2015 }
2016 }
2017
2018 if (!(env->me_flags & MDB_WRITEMAP)) {
2019 /* return all dirty pages to dpage list */
2020 for (i=1; i<=txn->mt_u.dirty_list[0].mid; i++) {
2021 dp = txn->mt_u.dirty_list[i].mptr;
2022 if (!IS_OVERFLOW(dp) || dp->mp_pages == 1) {
2023 mdb_page_free(txn->mt_env, dp);
2024 } else {
2025 /* large pages just get freed directly */
2026 VGMEMP_FREE(txn->mt_env, dp);
2027 free(dp);
2028 }
2029 }
2030 }
2031
2032 free(env->me_pgfree);
2033
2034 if (txn->mt_parent) {
2035 txn->mt_parent->mt_child = NULL;
2036 env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate;
2037 mdb_midl_free(txn->mt_free_pgs);
2038 free(txn->mt_u.dirty_list);
2039 return;
2040 } else {
2041 if (mdb_midl_shrink(&txn->mt_free_pgs))
2042 env->me_free_pgs = txn->mt_free_pgs;
2043 }
2044
2045 txn->mt_env->me_pghead = txn->mt_env->me_pgfree = NULL;
2046 txn->mt_env->me_pglast = 0;
2047
2048 env->me_txn = NULL;
2049 /* The writer mutex was locked in mdb_txn_begin. */
2050 UNLOCK_MUTEX_W(env);
2051 }
2052 }
2053
2054 void
2055 mdb_txn_reset(MDB_txn *txn)
2056 {
2057 if (txn == NULL)
2058 return;
2059
2060 DPRINTF("reset txn %zu%c %p on mdbenv %p, root page %zu",
2061 txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
2062 (void *) txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root);
2063
2064 mdb_txn_reset0(txn);
2065 }
2066
2067 void
2068 mdb_txn_abort(MDB_txn *txn)
2069 {
2070 if (txn == NULL)
2071 return;
2072
2073 DPRINTF("abort txn %zu%c %p on mdbenv %p, root page %zu",
2074 txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
2075 (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root);
2076
2077 if (txn->mt_child)
2078 mdb_txn_abort(txn->mt_child);
2079
2080 mdb_txn_reset0(txn);
2081 free(txn);
2082 }
2083
2084 int
2085 mdb_txn_commit(MDB_txn *txn)
2086 {
2087 int n, done;
2088 unsigned int i;
2089 ssize_t rc;
2090 off_t size;
2091 MDB_page *dp;
2092 MDB_env *env;
2093 pgno_t next, freecnt;
2094 txnid_t oldpg_txnid, id;
2095 MDB_cursor mc;
2096
2097 assert(txn != NULL);
2098 assert(txn->mt_env != NULL);
2099
2100 if (txn->mt_child) {
2101 mdb_txn_commit(txn->mt_child);
2102 txn->mt_child = NULL;
2103 }
2104
2105 env = txn->mt_env;
2106
2107 if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) {
2108 /* update the DB flags */
2109 for (i = 2; i<txn->mt_numdbs; i++) {
2110 if (txn->mt_dbflags[i] & DB_NEW)
2111 env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID;
2112 }
2113 if (txn->mt_numdbs > env->me_numdbs)
2114 env->me_numdbs = txn->mt_numdbs;
2115 txn->mt_numdbs = 2; /* so txn_abort() doesn't close any new handles */
2116 mdb_txn_abort(txn);
2117 return MDB_SUCCESS;
2118 }
2119
2120 if (F_ISSET(txn->mt_flags, MDB_TXN_ERROR)) {
2121 DPUTS("error flag is set, can't commit");
2122 if (txn->mt_parent)
2123 txn->mt_parent->mt_flags |= MDB_TXN_ERROR;
2124 mdb_txn_abort(txn);
2125 return EINVAL;
2126 }
2127
2128 if (txn->mt_parent) {
2129 MDB_txn *parent = txn->mt_parent;
2130 unsigned x, y, len;
2131 MDB_ID2L dst, src;
2132
2133 /* Append our free list to parent's */
2134 if (mdb_midl_append_list(&parent->mt_free_pgs, txn->mt_free_pgs)) {
2135 mdb_txn_abort(txn);
2136 return ENOMEM;
2137 }
2138 mdb_midl_free(txn->mt_free_pgs);
2139
2140 parent->mt_next_pgno = txn->mt_next_pgno;
2141 parent->mt_flags = txn->mt_flags;
2142
2143 /* Merge (and close) our cursors with parent's */
2144 mdb_cursor_merge(txn);
2145
2146 /* Update parent's DB table. */
2147 memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDB_db));
2148 txn->mt_parent->mt_numdbs = txn->mt_numdbs;
2149 txn->mt_parent->mt_dbflags[0] = txn->mt_dbflags[0];
2150 txn->mt_parent->mt_dbflags[1] = txn->mt_dbflags[1];
2151 for (i=2; i<txn->mt_numdbs; i++) {
2152 /* preserve parent's DB_NEW status */
2153 x = txn->mt_parent->mt_dbflags[i] & DB_NEW;
2154 txn->mt_parent->mt_dbflags[i] = txn->mt_dbflags[i] | x;
2155 }
2156
2157 dst = txn->mt_parent->mt_u.dirty_list;
2158 src = txn->mt_u.dirty_list;
2159 /* Find len = length of merging our dirty list with parent's */
2160 x = dst[0].mid;
2161 dst[0].mid = 0; /* simplify loops */
2162 if (parent->mt_parent) {
2163 len = x + src[0].mid;
2164 y = mdb_mid2l_search(src, dst[x].mid + 1) - 1;
2165 for (i = x; y && i; y--) {
2166 pgno_t yp = src[y].mid;
2167 while (yp < dst[i].mid)
2168 i--;
2169 if (yp == dst[i].mid) {
2170 i--;
2171 len--;
2172 }
2173 }
2174 } else { /* Simplify the above for single-ancestor case */
2175 len = MDB_IDL_UM_MAX - txn->mt_dirty_room;
2176 }
2177 /* Merge our dirty list with parent's */
2178 y = src[0].mid;
2179 for (i = len; y; dst[i--] = src[y--]) {
2180 pgno_t yp = src[y].mid;
2181 while (yp < dst[x].mid)
2182 dst[i--] = dst[x--];
2183 if (yp == dst[x].mid)
2184 free(dst[x--].mptr);
2185 }
2186 assert(i == x);
2187 dst[0].mid = len;
2188 free(txn->mt_u.dirty_list);
2189 parent->mt_dirty_room = txn->mt_dirty_room;
2190
2191 txn->mt_parent->mt_child = NULL;
2192 free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pgfree);
2193 free(txn);
2194 return MDB_SUCCESS;
2195 }
2196
2197 if (txn != env->me_txn) {
2198 DPUTS("attempt to commit unknown transaction");
2199 mdb_txn_abort(txn);
2200 return EINVAL;
2201 }
2202
2203 if (!txn->mt_u.dirty_list[0].mid && !(txn->mt_flags & MDB_TXN_DIRTY))
2204 goto done;
2205
2206 DPRINTF("committing txn %zu %p on mdbenv %p, root page %zu",
2207 txn->mt_txnid, (void *)txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root);
2208
2209 /* Update DB root pointers */
2210 if (txn->mt_numdbs > 2) {
2211 MDB_dbi i;
2212 MDB_val data;
2213 data.mv_size = sizeof(MDB_db);
2214
2215 mdb_cursor_init(&mc, txn, MAIN_DBI, NULL);
2216 for (i = 2; i < txn->mt_numdbs; i++) {
2217 if (txn->mt_dbflags[i] & DB_DIRTY) {
2218 data.mv_data = &txn->mt_dbs[i];
2219 rc = mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data, 0);
2220 if (rc)
2221 goto fail;
2222 }
2223 }
2224 }
2225
2226 /* Save the freelist as of this transaction to the freeDB. This
2227 * can change the freelist, so keep trying until it stabilizes.
2228 *
2229 * env->me_pglast and the length of txn->mt_free_pgs cannot decrease,
2230 * except the code below can decrease env->me_pglast to split pghead.
2231 * Page numbers cannot disappear from txn->mt_free_pgs. New pages
2232 * can only appear in env->me_pghead when env->me_pglast increases.
2233 * Until then, the me_pghead pointer won't move but can become NULL.
2234 */
2235
2236 mdb_cursor_init(&mc, txn, FREE_DBI, NULL);
2237 oldpg_txnid = id = 0;
2238 freecnt = 0;
2239
2240 /* should only be one record now */
2241 if (env->me_pghead || env->me_pglast) {
2242 /* make sure first page of freeDB is touched and on freelist */
2243 rc = mdb_page_search(&mc, NULL, MDB_PS_MODIFY);
2244 if (rc && rc != MDB_NOTFOUND) {
2245 fail:
2246 mdb_txn_abort(txn);
2247 return rc;
2248 }
2249 }
2250
2251 /* Delete IDLs we used from the free list */
2252 if (env->me_pglast) {
2253 MDB_val key;
2254
2255 do {
2256 free_pgfirst:
2257 rc = mdb_cursor_first(&mc, &key, NULL);
2258 if (rc)
2259 goto fail;
2260 oldpg_txnid = *(txnid_t *)key.mv_data;
2261 again:
2262 assert(oldpg_txnid <= env->me_pglast);
2263 id = 0;
2264 rc = mdb_cursor_del(&mc, 0);
2265 if (rc)
2266 goto fail;
2267 } while (oldpg_txnid < env->me_pglast);
2268 }
2269
2270 /* Save IDL of pages freed by this txn, to freeDB */
2271 free2:
2272 if (freecnt != txn->mt_free_pgs[0]) {
2273 MDB_val key, data;
2274
2275 /* make sure last page of freeDB is touched and on freelist */
2276 key.mv_size = MDB_MAXKEYSIZE+1;
2277 key.mv_data = NULL;
2278 rc = mdb_page_search(&mc, &key, MDB_PS_MODIFY);
2279 if (rc && rc != MDB_NOTFOUND)
2280 goto fail;
2281
2282 #if MDB_DEBUG > 1
2283 {
2284 unsigned int i;
2285 MDB_IDL idl = txn->mt_free_pgs;
2286 mdb_midl_sort(txn->mt_free_pgs);
2287 DPRINTF("IDL write txn %zu root %zu num %zu",
2288 txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, idl[0]);
2289 for (i=1; i<=idl[0]; i++) {
2290 DPRINTF("IDL %zu", idl[i]);
2291 }
2292 }
2293 #endif
2294 /* write to last page of freeDB */
2295 key.mv_size = sizeof(pgno_t);
2296 key.mv_data = &txn->mt_txnid;
2297 /* The free list can still grow during this call,
2298 * despite the pre-emptive touches above. So retry
2299 * until the reserved space remains big enough.
2300 */
2301 do {
2302 assert(freecnt < txn->mt_free_pgs[0]);
2303 freecnt = txn->mt_free_pgs[0];
2304 data.mv_size = MDB_IDL_SIZEOF(txn->mt_free_pgs);
2305 rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
2306 if (rc)
2307 goto fail;
2308 } while (freecnt != txn->mt_free_pgs[0]);
2309 mdb_midl_sort(txn->mt_free_pgs);
2310 memcpy(data.mv_data, txn->mt_free_pgs, data.mv_size);
2311 if (oldpg_txnid < env->me_pglast || (!env->me_pghead && id))
2312 goto free_pgfirst; /* used up freeDB[oldpg_txnid] */
2313 }
2314
2315 /* Put back page numbers we took from freeDB but did not use */
2316 if (env->me_pghead) {
2317 for (;;) {
2318 MDB_val key, data;
2319 pgno_t orig, *mop;
2320
2321 mop = env->me_pghead;
2322 id = env->me_pglast;
2323 key.mv_size = sizeof(id);
2324 key.mv_data = &id;
2325 /* These steps may grow the freelist again
2326 * due to freed overflow pages...
2327 */
2328 i = 2;
2329 do {
2330 orig = mop[0];
2331 if (orig > env->me_maxfree_1pg && id > 4)
2332 orig = env->me_maxfree_1pg; /* Do not use more than 1 page */
2333 data.mv_size = (orig + 1) * sizeof(pgno_t);
2334 rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
2335 if (rc)
2336 goto fail;
2337 assert(!env->me_pghead || env->me_pglast);
2338 /* mop could have been used again here */
2339 if (id != env->me_pglast || env->me_pghead == NULL)
2340 goto again; /* was completely used up */
2341 assert(mop == env->me_pghead);
2342 } while (mop[0] < orig && --i);
2343 memcpy(data.mv_data, mop, data.mv_size);
2344 if (mop[0] <= orig)
2345 break;
2346 *(pgno_t *)data.mv_data = orig;
2347 mop[orig] = mop[0] - orig;
2348 env->me_pghead = mop += orig;
2349 /* Save more oldpages at the previous txnid. */
2350 assert(env->me_pglast == id && id == oldpg_txnid);
2351 env->me_pglast = --oldpg_txnid;
2352 }
2353 }
2354
2355 /* Check for growth of freelist again */
2356 if (freecnt != txn->mt_free_pgs[0])
2357 goto free2;
2358
2359 free(env->me_pgfree);
2360 env->me_pghead = env->me_pgfree = NULL;
2361
2362 if (!MDB_IDL_IS_ZERO(txn->mt_free_pgs)) {
2363 if (mdb_midl_shrink(&txn->mt_free_pgs))
2364 env->me_free_pgs = txn->mt_free_pgs;
2365 }
2366
2367 #if MDB_DEBUG > 2
2368 mdb_audit(txn);
2369 #endif
2370
2371 if (env->me_flags & MDB_WRITEMAP) {
2372 for (i=1; i<=txn->mt_u.dirty_list[0].mid; i++) {
2373 dp = txn->mt_u.dirty_list[i].mptr;
2374 /* clear dirty flag */
2375 dp->mp_flags &= ~P_DIRTY;
2376 txn->mt_u.dirty_list[i].mid = 0;
2377 }
2378 txn->mt_u.dirty_list[0].mid = 0;
2379 goto sync;
2380 }
2381
2382 /* Commit up to MDB_COMMIT_PAGES dirty pages to disk until done.
2383 */
2384 next = 0;
2385 i = 1;
2386 do {
2387 #ifdef _WIN32
2388 /* Windows actually supports scatter/gather I/O, but only on
2389 * unbuffered file handles. Since we're relying on the OS page
2390 * cache for all our data, that's self-defeating. So we just
2391 * write pages one at a time. We use the ov structure to set
2392 * the write offset, to at least save the overhead of a Seek
2393 * system call.
2394 */
2395 OVERLAPPED ov;
2396 memset(&ov, 0, sizeof(ov));
2397 for (; i<=txn->mt_u.dirty_list[0].mid; i++) {
2398 size_t wsize;
2399 dp = txn->mt_u.dirty_list[i].mptr;
2400 DPRINTF("committing page %zu", dp->mp_pgno);
2401 size = dp->mp_pgno * env->me_psize;
2402 ov.Offset = size & 0xffffffff;
2403 ov.OffsetHigh = size >> 16;
2404 ov.OffsetHigh >>= 16;
2405 /* clear dirty flag */
2406 dp->mp_flags &= ~P_DIRTY;
2407 wsize = env->me_psize;
2408 if (IS_OVERFLOW(dp)) wsize *= dp->mp_pages;
2409 rc = WriteFile(env->me_fd, dp, wsize, NULL, &ov);
2410 if (!rc) {
2411 n = ErrCode();
2412 DPRINTF("WriteFile: %d", n);
2413 mdb_txn_abort(txn);
2414 return n;
2415 }
2416 }
2417 done = 1;
2418 #else
2419 struct iovec iov[MDB_COMMIT_PAGES];
2420 n = 0;
2421 done = 1;
2422 size = 0;
2423 for (; i<=txn->mt_u.dirty_list[0].mid; i++) {
2424 dp = txn->mt_u.dirty_list[i].mptr;
2425 if (dp->mp_pgno != next) {
2426 if (n) {
2427 rc = writev(env->me_fd, iov, n);
2428 if (rc != size) {
2429 n = ErrCode();
2430 if (rc > 0)
2431 DPUTS("short write, filesystem full?");
2432 else
2433 DPRINTF("writev: %s", strerror(n));
2434 mdb_txn_abort(txn);
2435 return n;
2436 }
2437 n = 0;
2438 size = 0;
2439 }
2440 lseek(env->me_fd, dp->mp_pgno * env->me_psize, SEEK_SET);
2441 next = dp->mp_pgno;
2442 }
2443 DPRINTF("committing page %zu", dp->mp_pgno);
2444 iov[n].iov_len = env->me_psize;
2445 if (IS_OVERFLOW(dp)) iov[n].iov_len *= dp->mp_pages;
2446 iov[n].iov_base = (char *)dp;
2447 size += iov[n].iov_len;
2448 next = dp->mp_pgno + (IS_OVERFLOW(dp) ? dp->mp_pages : 1);
2449 /* clear dirty flag */
2450 dp->mp_flags &= ~P_DIRTY;
2451 if (++n >= MDB_COMMIT_PAGES) {
2452 done = 0;
2453 i++;
2454 break;
2455 }
2456 }
2457
2458 if (n == 0)
2459 break;
2460
2461 rc = writev(env->me_fd, iov, n);
2462 if (rc != size) {
2463 n = ErrCode();
2464 if (rc > 0)
2465 DPUTS("short write, filesystem full?");
2466 else
2467 DPRINTF("writev: %s", strerror(n));
2468 mdb_txn_abort(txn);
2469 return n;
2470 }
2471 #endif
2472 } while (!done);
2473
2474 /* Drop the dirty pages.
2475 */
2476 for (i=1; i<=txn->mt_u.dirty_list[0].mid; i++) {
2477 dp = txn->mt_u.dirty_list[i].mptr;
2478 if (!IS_OVERFLOW(dp) || dp->mp_pages == 1) {
2479 mdb_page_free(txn->mt_env, dp);
2480 } else {
2481 VGMEMP_FREE(txn->mt_env, dp);
2482 free(dp);
2483 }
2484 txn->mt_u.dirty_list[i].mid = 0;
2485 }
2486 txn->mt_u.dirty_list[0].mid = 0;
2487
2488 sync:
2489 if ((n = mdb_env_sync(env, 0)) != 0 ||
2490 (n = mdb_env_write_meta(txn)) != MDB_SUCCESS) {
2491 mdb_txn_abort(txn);
2492 return n;
2493 }
2494
2495 done:
2496 env->me_pglast = 0;
2497 env->me_txn = NULL;
2498 /* update the DB flags */
2499 for (i = 2; i<txn->mt_numdbs; i++) {
2500 if (txn->mt_dbflags[i] & DB_NEW)
2501 env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID;
2502 }
2503 if (txn->mt_numdbs > env->me_numdbs)
2504 env->me_numdbs = txn->mt_numdbs;
2505
2506 UNLOCK_MUTEX_W(env);
2507 free(txn);
2508
2509 return MDB_SUCCESS;
2510 }
2511
2512 /** Read the environment parameters of a DB environment before
2513 * mapping it into memory.
2514 * @param[in] env the environment handle
2515 * @param[out] meta address of where to store the meta information
2516 * @return 0 on success, non-zero on failure.
2517 */
2518 static int
2519 mdb_env_read_header(MDB_env *env, MDB_meta *meta)
2520 {
2521 MDB_pagebuf pbuf;
2522 MDB_page *p;
2523 MDB_meta *m;
2524 int i, rc, err;
2525
2526 /* We don't know the page size yet, so use a minimum value.
2527 * Read both meta pages so we can use the latest one.
2528 */
2529
2530 for (i=0; i<2; i++) {
2531 #ifdef _WIN32
2532 if (!ReadFile(env->me_fd, &pbuf, MDB_PAGESIZE, (DWORD *)&rc, NULL) || rc == 0)
2533 #else
2534 if ((rc = read(env->me_fd, &pbuf, MDB_PAGESIZE)) == 0)
2535 #endif
2536 {
2537 return ENOENT;
2538 }
2539 else if (rc != MDB_PAGESIZE) {
2540 err = ErrCode();
2541 if (rc > 0)
2542 err = MDB_INVALID;
2543 DPRINTF("read: %s", strerror(err));
2544 return err;
2545 }
2546
2547 p = (MDB_page *)&pbuf;
2548
2549 if (!F_ISSET(p->mp_flags, P_META)) {
2550 DPRINTF("page %zu not a meta page", p->mp_pgno);
2551 return MDB_INVALID;
2552 }
2553
2554 m = METADATA(p);
2555 if (m->mm_magic != MDB_MAGIC) {
2556 DPUTS("meta has invalid magic");
2557 return MDB_INVALID;
2558 }
2559
2560 if (m->mm_version != MDB_VERSION) {
2561 DPRINTF("database is version %u, expected version %u",
2562 m->mm_version, MDB_VERSION);
2563 return MDB_VERSION_MISMATCH;
2564 }
2565
2566 if (i) {
2567 if (m->mm_txnid > meta->mm_txnid)
2568 memcpy(meta, m, sizeof(*m));
2569 } else {
2570 memcpy(meta, m, sizeof(*m));
2571 #ifdef _WIN32
2572 if (SetFilePointer(env->me_fd, meta->mm_psize, NULL, FILE_BEGIN) != meta->mm_psize)
2573 #else
2574 if (lseek(env->me_fd, meta->mm_psize, SEEK_SET) != meta->mm_psize)
2575 #endif
2576 return ErrCode();
2577 }
2578 }
2579 return 0;
2580 }
2581
2582 /** Write the environment parameters of a freshly created DB environment.
2583 * @param[in] env the environment handle
2584 * @param[out] meta address of where to store the meta information
2585 * @return 0 on success, non-zero on failure.
2586 */
2587 static int
2588 mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
2589 {
2590 MDB_page *p, *q;
2591 MDB_meta *m;
2592 int rc;
2593 unsigned int psize;
2594
2595 DPUTS("writing new meta page");
2596
2597 GET_PAGESIZE(psize);
2598
2599 meta->mm_magic = MDB_MAGIC;
2600 meta->mm_version = MDB_VERSION;
2601 meta->mm_mapsize = env->me_mapsize;
2602 meta->mm_psize = psize;
2603 meta->mm_last_pg = 1;
2604 meta->mm_flags = env->me_flags & 0xffff;
2605 meta->mm_flags |= MDB_INTEGERKEY;
2606 meta->mm_dbs[0].md_root = P_INVALID;
2607 meta->mm_dbs[1].md_root = P_INVALID;
2608
2609 p = calloc(2, psize);
2610 p->mp_pgno = 0;
2611 p->mp_flags = P_META;
2612
2613 m = METADATA(p);
2614 memcpy(m, meta, sizeof(*meta));
2615
2616 q = (MDB_page *)((char *)p + psize);
2617
2618 q->mp_pgno = 1;
2619 q->mp_flags = P_META;
2620
2621 m = METADATA(q);
2622 memcpy(m, meta, sizeof(*meta));
2623
2624 #ifdef _WIN32
2625 {
2626 DWORD len;
2627 SetFilePointer(env->me_fd, 0, NULL, FILE_BEGIN);
2628 rc = WriteFile(env->me_fd, p, psize * 2, &len, NULL);
2629 rc = (len == psize * 2) ? MDB_SUCCESS : ErrCode();
2630 }
2631 #else
2632 lseek(env->me_fd, 0, SEEK_SET);
2633 rc = write(env->me_fd, p, psize * 2);
2634 rc = (rc == (int)psize * 2) ? MDB_SUCCESS : ErrCode();
2635 #endif
2636 free(p);
2637 return rc;
2638 }
2639
2640 /** Update the environment info to commit a transaction.
2641 * @param[in] txn the transaction that's being committed
2642 * @return 0 on success, non-zero on failure.
2643 */
2644 static int
2645 mdb_env_write_meta(MDB_txn *txn)
2646 {
2647 MDB_env *env;
2648 MDB_meta meta, metab, *mp;
2649 off_t off;
2650 int rc, len, toggle;
2651 char *ptr;
2652 HANDLE mfd;
2653 #ifdef _WIN32
2654 OVERLAPPED ov;
2655 #endif
2656
2657 assert(txn != NULL);
2658 assert(txn->mt_env != NULL);
2659
2660 toggle = !txn->mt_toggle;
2661 DPRINTF("writing meta page %d for root page %zu",
2662 toggle, txn->mt_dbs[MAIN_DBI].md_root);
2663
2664 env = txn->mt_env;
2665 mp = env->me_metas[toggle];
2666
2667 if (env->me_flags & MDB_WRITEMAP) {
2668 /* Persist any increases of mapsize config */
2669 if (env->me_mapsize > mp->mm_mapsize)
2670 mp->mm_mapsize = env->me_mapsize;
2671 mp->mm_dbs[0] = txn->mt_dbs[0];
2672 mp->mm_dbs[1] = txn->mt_dbs[1];
2673 mp->mm_last_pg = txn->mt_next_pgno - 1;
2674 mp->mm_txnid = txn->mt_txnid;
2675 if (!(env->me_flags & (MDB_NOMETASYNC|MDB_NOSYNC))) {
2676 rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC;
2677 ptr = env->me_map;
2678 if (toggle)
2679 ptr += env->me_psize;
2680 if (MDB_MSYNC(ptr, env->me_psize, rc)) {
2681 rc = ErrCode();
2682 goto fail;
2683 }
2684 }
2685 goto done;
2686 }
2687 metab.mm_txnid = env->me_metas[toggle]->mm_txnid;
2688 metab.mm_last_pg = env->me_metas[toggle]->mm_last_pg;
2689
2690 ptr = (char *)&meta;
2691 if (env->me_mapsize > mp->mm_mapsize) {
2692 /* Persist any increases of mapsize config */
2693 meta.mm_mapsize = env->me_mapsize;
2694 off = offsetof(MDB_meta, mm_mapsize);
2695 } else {
2696 off = offsetof(MDB_meta, mm_dbs[0].md_depth);
2697 }
2698 len = sizeof(MDB_meta) - off;
2699
2700 ptr += off;
2701 meta.mm_dbs[0] = txn->mt_dbs[0];
2702 meta.mm_dbs[1] = txn->mt_dbs[1];
2703 meta.mm_last_pg = txn->mt_next_pgno - 1;
2704 meta.mm_txnid = txn->mt_txnid;
2705
2706 if (toggle)
2707 off += env->me_psize;
2708 off += PAGEHDRSZ;
2709
2710 /* Write to the SYNC fd */
2711 mfd = env->me_flags & (MDB_NOSYNC|MDB_NOMETASYNC) ?
2712 env->me_fd : env->me_mfd;
2713 #ifdef _WIN32
2714 {
2715 memset(&ov, 0, sizeof(ov));
2716 ov.Offset = off;
2717 WriteFile(mfd, ptr, len, (DWORD *)&rc, &ov);
2718 }
2719 #else
2720 rc = pwrite(mfd, ptr, len, off);
2721 #endif
2722 if (rc != len) {
2723 int r2;
2724 rc = ErrCode();
2725 DPUTS("write failed, disk error?");
2726 /* On a failure, the pagecache still contains the new data.
2727 * Write some old data back, to prevent it from being used.
2728 * Use the non-SYNC fd; we know it will fail anyway.
2729 */
2730 meta.mm_last_pg = metab.mm_last_pg;
2731 meta.mm_txnid = metab.mm_txnid;
2732 #ifdef _WIN32
2733 WriteFile(env->me_fd, ptr, len, NULL, &ov);
2734 #else
2735 r2 = pwrite(env->me_fd, ptr, len, off);
2736 #endif
2737 fail:
2738 env->me_flags |= MDB_FATAL_ERROR;
2739 return rc;
2740 }
2741 done:
2742 /* Memory ordering issues are irrelevant; since the entire writer
2743 * is wrapped by wmutex, all of these changes will become visible
2744 * after the wmutex is unlocked. Since the DB is multi-version,
2745 * readers will get consistent data regardless of how fresh or
2746 * how stale their view of these values is.
2747 */
2748 txn->mt_env->me_txns->mti_txnid = txn->mt_txnid;
2749
2750 return MDB_SUCCESS;
2751 }
2752
2753 /** Check both meta pages to see which one is newer.
2754 * @param[in] env the environment handle
2755 * @return meta toggle (0 or 1).
2756 */
2757 static int
2758 mdb_env_pick_meta(const MDB_env *env)
2759 {
2760 return (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid);
2761 }
2762
2763 int
2764 mdb_env_create(MDB_env **env)
2765 {
2766 MDB_env *e;
2767
2768 e = calloc(1, sizeof(MDB_env));
2769 if (!e)
2770 return ENOMEM;
2771
2772 e->me_free_pgs = mdb_midl_alloc();
2773 if (!e->me_free_pgs) {
2774 free(e);
2775 return ENOMEM;
2776 }
2777 e->me_maxreaders = DEFAULT_READERS;
2778 e->me_maxdbs = 2;
2779 e->me_fd = INVALID_HANDLE_VALUE;
2780 e->me_lfd = INVALID_HANDLE_VALUE;
2781 e->me_mfd = INVALID_HANDLE_VALUE;
2782 #ifdef MDB_USE_POSIX_SEM
2783 e->me_rmutex = SEM_FAILED;
2784 e->me_wmutex = SEM_FAILED;
2785 #endif
2786 e->me_pid = getpid();
2787 VGMEMP_CREATE(e,0,0);
2788 *env = e;
2789 return MDB_SUCCESS;
2790 }
2791
2792 int
2793 mdb_env_set_mapsize(MDB_env *env, size_t size)
2794 {
2795 if (env->me_map)
2796 return EINVAL;
2797 env->me_mapsize = size;
2798 if (env->me_psize)
2799 env->me_maxpg = env->me_mapsize / env->me_psize;
2800 return MDB_SUCCESS;
2801 }
2802
2803 int
2804 mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs)
2805 {
2806 if (env->me_map)
2807 return EINVAL;
2808 env->me_maxdbs = dbs + 2; /* Named databases + main and free DB */
2809 return MDB_SUCCESS;
2810 }
2811
2812 int
2813 mdb_env_set_maxreaders(MDB_env *env, unsigned int readers)
2814 {
2815 if (env->me_map || readers < 1)
2816 return EINVAL;
2817 env->me_maxreaders = readers;
2818 return MDB_SUCCESS;
2819 }
2820
2821 int
2822 mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers)
2823 {
2824 if (!env || !readers)
2825 return EINVAL;
2826 *readers = env->me_maxreaders;
2827 return MDB_SUCCESS;
2828 }
2829
2830 /** Further setup required for opening an MDB environment
2831 */
2832 static int
2833 mdb_env_open2(MDB_env *env)
2834 {
2835 unsigned int flags = env->me_flags;
2836 int i, newenv = 0, prot;
2837 MDB_meta meta;
2838 MDB_page *p;
2839
2840 memset(&meta, 0, sizeof(meta));
2841
2842 if ((i = mdb_env_read_header(env, &meta)) != 0) {
2843 if (i != ENOENT)
2844 return i;
2845 DPUTS("new mdbenv");
2846 newenv = 1;
2847 }
2848
2849 /* Was a mapsize configured? */
2850 if (!env->me_mapsize) {
2851 /* If this is a new environment, take the default,
2852 * else use the size recorded in the existing env.
2853 */
2854 env->me_mapsize = newenv ? DEFAULT_MAPSIZE : meta.mm_mapsize;
2855 } else if (env->me_mapsize < meta.mm_mapsize) {
2856 /* If the configured size is smaller, make sure it's
2857 * still big enough. Silently round up to minimum if not.
2858 */
2859 size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize;
2860 if (env->me_mapsize < minsize)
2861 env->me_mapsize = minsize;
2862 }
2863
2864 #ifdef _WIN32
2865 {
2866 HANDLE mh;
2867 LONG sizelo, sizehi;
2868 sizelo = env->me_mapsize & 0xffffffff;
2869 sizehi = env->me_mapsize >> 16; /* pointless on WIN32, only needed on W64 */
2870 sizehi >>= 16;
2871 /* Windows won't create mappings for zero length files.
2872 * Just allocate the maxsize right now.
2873 */
2874 if (newenv) {
2875 SetFilePointer(env->me_fd, sizelo, sizehi ? &sizehi : NULL, 0);
2876 if (!SetEndOfFile(env->me_fd))
2877 return ErrCode();
2878 SetFilePointer(env->me_fd, 0, NULL, 0);
2879 }
2880 mh = CreateFileMapping(env->me_fd, NULL, flags & MDB_WRITEMAP ?
2881 PAGE_READWRITE : PAGE_READONLY,
2882 sizehi, sizelo, NULL);
2883 if (!mh)
2884 return ErrCode();
2885 env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ?
2886 FILE_MAP_WRITE : FILE_MAP_READ,
2887 0, 0, env->me_mapsize, meta.mm_address);
2888 CloseHandle(mh);
2889 if (!env->me_map)
2890 return ErrCode();
2891 }
2892 #else
2893 i = MAP_SHARED;
2894 prot = PROT_READ;
2895 if (flags & MDB_WRITEMAP) {
2896 prot |= PROT_WRITE;
2897 if (ftruncate(env->me_fd, env->me_mapsize) < 0)
2898 return ErrCode();
2899 }
2900 env->me_map = mmap(meta.mm_address, env->me_mapsize, prot, i,
2901 env->me_fd, 0);
2902 if (env->me_map == MAP_FAILED) {
2903 env->me_map = NULL;
2904 return ErrCode();
2905 }
2906 /* Turn off readahead. It's harmful when the DB is larger than RAM. */
2907 #ifdef MADV_RANDOM
2908 madvise(env->me_map, env->me_mapsize, MADV_RANDOM);
2909 #else
2910 #ifdef POSIX_MADV_RANDOM
2911 posix_madvise(env->me_map, env->me_mapsize, POSIX_MADV_RANDOM);
2912 #endif /* POSIX_MADV_RANDOM */
2913 #endif /* MADV_RANDOM */
2914 #endif /* _WIN32 */
2915
2916 if (newenv) {
2917 if (flags & MDB_FIXEDMAP)
2918 meta.mm_address = env->me_map;
2919 i = mdb_env_init_meta(env, &meta);
2920 if (i != MDB_SUCCESS) {
2921 return i;
2922 }
2923 } else if (meta.mm_address && env->me_map != meta.mm_address) {
2924 /* Can happen because the address argument to mmap() is just a
2925 * hint. mmap() can pick another, e.g. if the range is in use.
2926 * The MAP_FIXED flag would prevent that, but then mmap could
2927 * instead unmap existing pages to make room for the new map.
2928 */
2929 return EBUSY; /* TODO: Make a new MDB_* error code? */
2930 }
2931 env->me_psize = meta.mm_psize;
2932 env->me_maxfree_1pg = (env->me_psize - PAGEHDRSZ) / sizeof(pgno_t) - 1;
2933 env->me_nodemax = (env->me_psize - PAGEHDRSZ) / MDB_MINKEYS;
2934
2935 env->me_maxpg = env->me_mapsize / env->me_psize;
2936
2937 p = (MDB_page *)env->me_map;
2938 env->me_metas[0] = METADATA(p);
2939 env->me_metas[1] = (MDB_meta *)((char *)env->me_metas[0] + meta.mm_psize);
2940
2941 #if MDB_DEBUG
2942 {
2943 int toggle = mdb_env_pick_meta(env);
2944 MDB_db *db = &env->me_metas[toggle]->mm_dbs[MAIN_DBI];
2945
2946 DPRINTF("opened database version %u, pagesize %u",
2947 env->me_metas[0]->mm_version, env->me_psize);
2948 DPRINTF("using meta page %d", toggle);
2949 DPRINTF("depth: %u", db->md_depth);
2950 DPRINTF("entries: %zu", db->md_entries);
2951 DPRINTF("branch pages: %zu", db->md_branch_pages);
2952 DPRINTF("leaf pages: %zu", db->md_leaf_pages);
2953 DPRINTF("overflow pages: %zu", db->md_overflow_pages);
2954 DPRINTF("root: %zu", db->md_root);
2955 }
2956 #endif
2957
2958 return MDB_SUCCESS;
2959 }
2960
2961
2962 /** Release a reader thread's slot in the reader lock table.
2963 * This function is called automatically when a thread exits.
2964 * @param[in] ptr This points to the slot in the reader lock table.
2965 */
2966 static void
2967 mdb_env_reader_dest(void *ptr)
2968 {
2969 MDB_reader *reader = ptr;
2970
2971 reader->mr_pid = 0;
2972 }
2973
2974 #ifdef _WIN32
2975 /** Junk for arranging thread-specific callbacks on Windows. This is
2976 * necessarily platform and compiler-specific. Windows supports up
2977 * to 1088 keys. Let's assume nobody opens more than 64 environments
2978 * in a single process, for now. They can override this if needed.
2979 */
2980 #ifndef MAX_TLS_KEYS
2981 #define MAX_TLS_KEYS 64
2982 #endif
2983 static pthread_key_t mdb_tls_keys[MAX_TLS_KEYS];
2984 static int mdb_tls_nkeys;
2985
2986 static void NTAPI mdb_tls_callback(PVOID module, DWORD reason, PVOID ptr)
2987 {
2988 int i;
2989 switch(reason) {
2990 case DLL_PROCESS_ATTACH: break;
2991 case DLL_THREAD_ATTACH: break;
2992 case DLL_THREAD_DETACH:
2993 for (i=0; i<mdb_tls_nkeys; i++) {
2994 MDB_reader *r = pthread_getspecific(mdb_tls_keys[i]);
2995 mdb_env_reader_dest(r);
2996 }
2997 break;
2998 case DLL_PROCESS_DETACH: break;
2999 }
3000 }
3001 #ifdef __GNUC__
3002 #ifdef _WIN64
3003 const PIMAGE_TLS_CALLBACK mdb_tls_cbp __attribute__((section (".CRT$XLB"))) = mdb_tls_callback;
3004 #else
3005 PIMAGE_TLS_CALLBACK mdb_tls_cbp __attribute__((section (".CRT$XLB"))) = mdb_tls_callback;
3006 #endif
3007 #else
3008 #ifdef _WIN64
3009 /* Force some symbol references.
3010 * _tls_used forces the linker to create the TLS directory if not already done
3011 * mdb_tls_cbp prevents whole-program-optimizer from dropping the symbol.
3012 */
3013 #pragma comment(linker, "/INCLUDE:_tls_used")
3014 #pragma comment(linker, "/INCLUDE:mdb_tls_cbp")
3015 #pragma const_seg(".CRT$XLB")
3016 extern const PIMAGE_TLS_CALLBACK mdb_tls_callback;
3017 const PIMAGE_TLS_CALLBACK mdb_tls_cbp = mdb_tls_callback;
3018 #pragma const_seg()
3019 #else /* WIN32 */
3020 #pragma comment(linker, "/INCLUDE:__tls_used")
3021 #pragma comment(linker, "/INCLUDE:_mdb_tls_cbp")
3022 #pragma data_seg(".CRT$XLB")
3023 PIMAGE_TLS_CALLBACK mdb_tls_cbp = mdb_tls_callback;
3024 #pragma data_seg()
3025 #endif /* WIN 32/64 */
3026 #endif /* !__GNUC__ */
3027 #endif
3028
3029 /** Downgrade the exclusive lock on the region back to shared */
3030 static int
3031 mdb_env_share_locks(MDB_env *env, int *excl)
3032 {
3033 int rc = 0, toggle = mdb_env_pick_meta(env);
3034
3035 env->me_txns->mti_txnid = env->me_metas[toggle]->mm_txnid;
3036
3037 #ifdef _WIN32
3038 {
3039 OVERLAPPED ov;
3040 /* First acquire a shared lock. The Unlock will
3041 * then release the existing exclusive lock.
3042 */
3043 memset(&ov, 0, sizeof(ov));
3044 if (!LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov)) {
3045 rc = ErrCode();
3046 } else {
3047 UnlockFile(env->me_lfd, 0, 0, 1, 0);
3048 *excl = 0;
3049 }
3050 }
3051 #else
3052 {
3053 struct flock lock_info;
3054 /* The shared lock replaces the existing lock */
3055 memset((void *)&lock_info, 0, sizeof(lock_info));
3056 lock_info.l_type = F_RDLCK;
3057 lock_info.l_whence = SEEK_SET;
3058 lock_info.l_start = 0;
3059 lock_info.l_len = 1;
3060 while ((rc = fcntl(env->me_lfd, F_SETLK, &lock_info)) &&
3061 (rc = ErrCode()) == EINTR) ;
3062 *excl = rc ? -1 : 0; /* error may mean we lost the lock */
3063 }
3064 #endif
3065
3066 return rc;
3067 }
3068
3069 /** Try to get exlusive lock, otherwise shared.
3070 * Maintain *excl = -1: no/unknown lock, 0: shared, 1: exclusive.
3071 */
3072 static int
3073 mdb_env_excl_lock(MDB_env *env, int *excl)
3074 {
3075 int rc = 0;
3076 #ifdef _WIN32
3077 if (LockFile(env->me_lfd, 0, 0, 1, 0)) {
3078 *excl = 1;
3079 } else {
3080 OVERLAPPED ov;
3081 memset(&ov, 0, sizeof(ov));
3082 if (LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov)) {
3083 *excl = 0;
3084 } else {
3085 rc = ErrCode();
3086 }
3087 }
3088 #else
3089 struct flock lock_info;
3090 memset((void *)&lock_info, 0, sizeof(lock_info));
3091 lock_info.l_type = F_WRLCK;
3092 lock_info.l_whence = SEEK_SET;
3093 lock_info.l_start = 0;
3094 lock_info.l_len = 1;
3095 while ((rc = fcntl(env->me_lfd, F_SETLK, &lock_info)) &&
3096 (rc = ErrCode()) == EINTR) ;
3097 if (!rc) {
3098 *excl = 1;
3099 } else
3100 # ifdef MDB_USE_POSIX_SEM
3101 if (*excl < 0) /* always true when !MDB_USE_POSIX_SEM */
3102 # endif
3103 {
3104 lock_info.l_type = F_RDLCK;
3105 while ((rc = fcntl(env->me_lfd, F_SETLKW, &lock_info)) &&
3106 (rc = ErrCode()) == EINTR) ;
3107 if (rc == 0)
3108 *excl = 0;
3109 }
3110 #endif
3111 return rc;
3112 }
3113
3114 #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
3115 /*
3116 * hash_64 - 64 bit Fowler/Noll/Vo-0 FNV-1a hash code
3117 *
3118 * @(#) $Revision: 5.1 $
3119 * @(#) $Id: hash_64a.c,v 5.1 2009/06/30 09:01:38 chongo Exp $
3120 * @(#) $Source: /usr/local/src/cmd/fnv/RCS/hash_64a.c,v $
3121 *
3122 * http://www.isthe.com/chongo/tech/comp/fnv/index.html
3123 *
3124 ***
3125 *
3126 * Please do not copyright this code. This code is in the public domain.
3127 *
3128 * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
3129 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO
3130 * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR
3131 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
3132 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
3133 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
3134 * PERFORMANCE OF THIS SOFTWARE.
3135 *
3136 * By:
3137 * chongo <Landon Curt Noll> /\oo/\
3138 * http://www.isthe.com/chongo/
3139 *
3140 * Share and Enjoy! :-)
3141 */
3142
3143 typedef unsigned long long mdb_hash_t;
3144 #define MDB_HASH_INIT ((mdb_hash_t)0xcbf29ce484222325ULL)
3145
3146 /** perform a 64 bit Fowler/Noll/Vo FNV-1a hash on a buffer
3147 * @param[in] str string to hash
3148 * @param[in] hval initial value for hash
3149 * @return 64 bit hash
3150 *
3151 * NOTE: To use the recommended 64 bit FNV-1a hash, use MDB_HASH_INIT as the
3152 * hval arg on the first call.
3153 */
3154 static mdb_hash_t
3155 mdb_hash_val(MDB_val *val, mdb_hash_t hval)
3156 {
3157 unsigned char *s = (unsigned char *)val->mv_data; /* unsigned string */
3158 unsigned char *end = s + val->mv_size;
3159 /*
3160 * FNV-1a hash each octet of the string
3161 */
3162 while (s < end) {
3163 /* xor the bottom with the current octet */
3164 hval ^= (mdb_hash_t)*s++;
3165
3166 /* multiply by the 64 bit FNV magic prime mod 2^64 */
3167 hval += (hval << 1) + (hval << 4) + (hval << 5) +
3168 (hval << 7) + (hval << 8) + (hval << 40);
3169 }
3170 /* return our new hash value */
3171 return hval;
3172 }
3173
3174 /** Hash the string and output the hash in hex.
3175 * @param[in] str string to hash
3176 * @param[out] hexbuf an array of 17 chars to hold the hash
3177 */
3178 static void
3179 mdb_hash_hex(MDB_val *val, char *hexbuf)
3180 {
3181 int i;
3182 mdb_hash_t h = mdb_hash_val(val, MDB_HASH_INIT);
3183 for (i=0; i<8; i++) {
3184 hexbuf += sprintf(hexbuf, "%02x", (unsigned int)h & 0xff);
3185 h >>= 8;
3186 }
3187 }
3188 #endif
3189
3190 /** Open and/or initialize the lock region for the environment.
3191 * @param[in] env The MDB environment.
3192 * @param[in] lpath The pathname of the file used for the lock region.
3193 * @param[in] mode The Unix permissions for the file, if we create it.
3194 * @param[out] excl Resulting file lock type: -1 none, 0 shared, 1 exclusive
3195 * @return 0 on success, non-zero on failure.
3196 */
3197 static int
3198 mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
3199 {
3200 int rc;
3201 off_t size, rsize;
3202
3203 *excl = -1;
3204
3205 #ifdef _WIN32
3206 if ((env->me_lfd = CreateFile(lpath, GENERIC_READ|GENERIC_WRITE,
3207 FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS,
3208 FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) {
3209 rc = ErrCode();
3210 if (rc == ERROR_WRITE_PROTECT && (env->me_flags & MDB_RDONLY)) {
3211 env->me_flags |= MDB_ROFS;
3212 return MDB_SUCCESS;
3213 }
3214 goto fail_errno;
3215 }
3216 /* Try to get exclusive lock. If we succeed, then
3217 * nobody is using the lock region and we should initialize it.
3218 */
3219 if ((rc = mdb_env_excl_lock(env, excl))) goto fail;
3220 size = GetFileSize(env->me_lfd, NULL);
3221
3222 #else
3223 #if !(O_CLOEXEC)
3224 {
3225 int fdflags;
3226 if ((env->me_lfd = open(lpath, O_RDWR|O_CREAT, mode)) == -1) {
3227 rc = ErrCode();
3228 if (rc == EROFS && (env->me_flags & MDB_RDONLY)) {
3229 env->me_flags |= MDB_ROFS;
3230 return MDB_SUCCESS;
3231 }
3232 goto fail_errno;
3233 }
3234 /* Lose record locks when exec*() */
3235 if ((fdflags = fcntl(env->me_lfd, F_GETFD) | FD_CLOEXEC) >= 0)
3236 fcntl(env->me_lfd, F_SETFD, fdflags);
3237 }
3238 #else /* O_CLOEXEC on Linux: Open file and set FD_CLOEXEC atomically */
3239 if ((env->me_lfd = open(lpath, O_RDWR|O_CREAT|O_CLOEXEC, mode)) == -1) {
3240 rc = ErrCode();
3241 if (rc == EROFS && (env->me_flags & MDB_RDONLY)) {
3242 env->me_flags |= MDB_ROFS;
3243 return MDB_SUCCESS;
3244 }
3245 goto fail_errno;
3246 }
3247 #endif
3248
3249 /* Try to get exclusive lock. If we succeed, then
3250 * nobody is using the lock region and we should initialize it.
3251 */
3252 if ((rc = mdb_env_excl_lock(env, excl))) goto fail;
3253
3254 size = lseek(env->me_lfd, 0, SEEK_END);
3255 #endif
3256 rsize = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo);
3257 if (size < rsize && *excl > 0) {
3258 #ifdef _WIN32
3259 SetFilePointer(env->me_lfd, rsize, NULL, 0);
3260 if (!SetEndOfFile(env->me_lfd)) goto fail_errno;
3261 #else
3262 if (ftruncate(env->me_lfd, rsize) != 0) goto fail_errno;
3263 #endif
3264 } else {
3265 rsize = size;
3266 size = rsize - sizeof(MDB_txninfo);
3267 env->me_maxreaders = size/sizeof(MDB_reader) + 1;
3268 }
3269 {
3270 #ifdef _WIN32
3271 HANDLE mh;
3272 mh = CreateFileMapping(env->me_lfd, NULL, PAGE_READWRITE,
3273 0, 0, NULL);
3274 if (!mh) goto fail_errno;
3275 env->me_txns = MapViewOfFileEx(mh, FILE_MAP_WRITE, 0, 0, rsize, NULL);
3276 CloseHandle(mh);
3277 if (!env->me_txns) goto fail_errno;
3278 #else
3279 void *m = mmap(NULL, rsize, PROT_READ|PROT_WRITE, MAP_SHARED,
3280 env->me_lfd, 0);
3281 if (m == MAP_FAILED) goto fail_errno;
3282 env->me_txns = m;
3283 #endif
3284 }
3285 if (*excl > 0) {
3286 #ifdef _WIN32
3287 BY_HANDLE_FILE_INFORMATION stbuf;
3288 struct {
3289 DWORD volume;
3290 DWORD nhigh;
3291 DWORD nlow;
3292 } idbuf;
3293 MDB_val val;
3294 char hexbuf[17];
3295
3296 if (!mdb_sec_inited) {
3297 InitializeSecurityDescriptor(&mdb_null_sd,
3298 SECURITY_DESCRIPTOR_REVISION);
3299 SetSecurityDescriptorDacl(&mdb_null_sd, TRUE, 0, FALSE);
3300 mdb_all_sa.nLength = sizeof(SECURITY_ATTRIBUTES);
3301 mdb_all_sa.bInheritHandle = FALSE;
3302 mdb_all_sa.lpSecurityDescriptor = &mdb_null_sd;
3303 mdb_sec_inited = 1;
3304 }
3305 if (!GetFileInformationByHandle(env->me_lfd, &stbuf)) goto fail_errno;
3306 idbuf.volume = stbuf.dwVolumeSerialNumber;
3307 idbuf.nhigh = stbuf.nFileIndexHigh;
3308 idbuf.nlow = stbuf.nFileIndexLow;
3309 val.mv_data = &idbuf;
3310 val.mv_size = sizeof(idbuf);
3311 mdb_hash_hex(&val, hexbuf);
3312 sprintf(env->me_txns->mti_rmname, "Global\\MDBr%s", hexbuf);
3313 sprintf(env->me_txns->mti_wmname, "Global\\MDBw%s", hexbuf);
3314 env->me_rmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_rmname);
3315 if (!env->me_rmutex) goto fail_errno;
3316 env->me_wmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_wmname);
3317 if (!env->me_wmutex) goto fail_errno;
3318 #elif defined(MDB_USE_POSIX_SEM)
3319 struct stat stbuf;
3320 struct {
3321 dev_t dev;
3322 ino_t ino;
3323 } idbuf;
3324 MDB_val val;
3325 char hexbuf[17];
3326
3327 if (fstat(env->me_lfd, &stbuf)) goto fail_errno;
3328 idbuf.dev = stbuf.st_dev;
3329 idbuf.ino = stbuf.st_ino;
3330 val.mv_data = &idbuf;
3331 val.mv_size = sizeof(idbuf);
3332 mdb_hash_hex(&val, hexbuf);
3333 sprintf(env->me_txns->mti_rmname, "/MDBr%s", hexbuf);
3334 sprintf(env->me_txns->mti_wmname, "/MDBw%s", hexbuf);
3335 /* Clean up after a previous run, if needed: Try to
3336 * remove both semaphores before doing anything else.
3337 */
3338 sem_unlink(env->me_txns->mti_rmname);
3339 sem_unlink(env->me_txns->mti_wmname);
3340 env->me_rmutex = sem_open(env->me_txns->mti_rmname,
3341 O_CREAT|O_EXCL, mode, 1);
3342 if (env->me_rmutex == SEM_FAILED) goto fail_errno;
3343 env->me_wmutex = sem_open(env->me_txns->mti_wmname,
3344 O_CREAT|O_EXCL, mode, 1);
3345 if (env->me_wmutex == SEM_FAILED) goto fail_errno;
3346 #else /* MDB_USE_POSIX_SEM */
3347 pthread_mutexattr_t mattr;
3348
3349 if ((rc = pthread_mutexattr_init(&mattr))
3350 || (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED))
3351 || (rc = pthread_mutex_init(&env->me_txns->mti_mutex, &mattr))
3352 || (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr)))
3353 goto fail;
3354 pthread_mutexattr_destroy(&mattr);
3355 #endif /* _WIN32 || MDB_USE_POSIX_SEM */
3356
3357 env->me_txns->mti_version = MDB_VERSION;
3358 env->me_txns->mti_magic = MDB_MAGIC;
3359 env->me_txns->mti_txnid = 0;
3360 env->me_txns->mti_numreaders = 0;
3361
3362 } else {
3363 if (env->me_txns->mti_magic != MDB_MAGIC) {
3364 DPUTS("lock region has invalid magic");
3365 rc = MDB_INVALID;
3366 goto fail;
3367 }
3368 if (env->me_txns->mti_version != MDB_VERSION) {
3369 DPRINTF("lock region is version %u, expected version %u",
3370 env->me_txns->mti_version, MDB_VERSION);
3371 rc = MDB_VERSION_MISMATCH;
3372 goto fail;
3373 }
3374 rc = ErrCode();
3375 if (rc != EACCES && rc != EAGAIN) {
3376 goto fail;
3377 }
3378 #ifdef _WIN32
3379 env->me_rmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_rmname);
3380 if (!env->me_rmutex) goto fail_errno;
3381 env->me_wmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_wmname);
3382 if (!env->me_wmutex) goto fail_errno;
3383 #elif defined(MDB_USE_POSIX_SEM)
3384 env->me_rmutex = sem_open(env->me_txns->mti_rmname, 0);
3385 if (env->me_rmutex == SEM_FAILED) goto fail_errno;
3386 env->me_wmutex = sem_open(env->me_txns->mti_wmname, 0);
3387 if (env->me_wmutex == SEM_FAILED) goto fail_errno;
3388 #endif
3389 }
3390 return MDB_SUCCESS;
3391
3392 fail_errno:
3393 rc = ErrCode();
3394 fail:
3395 return rc;
3396 }
3397
3398 /** The name of the lock file in the DB environment */
3399 #define LOCKNAME "/lock.mdb"
3400 /** The name of the data file in the DB environment */
3401 #define DATANAME "/data.mdb"
3402 /** The suffix of the lock file when no subdir is used */
3403 #define LOCKSUFF "-lock"
3404 /** Only a subset of the @ref mdb_env flags can be changed
3405 * at runtime. Changing other flags requires closing the
3406 * environment and re-opening it with the new flags.
3407 */
3408 #define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC)
3409 #define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY|MDB_WRITEMAP)
3410
3411 int
3412 mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode)
3413 {
3414 int oflags, rc, len, excl;
3415 char *lpath, *dpath;
3416
3417 if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS)))
3418 return EINVAL;
3419
3420 len = strlen(path);
3421 if (flags & MDB_NOSUBDIR) {
3422 rc = len + sizeof(LOCKSUFF) + len + 1;
3423 } else {
3424 rc = len + sizeof(LOCKNAME) + len + sizeof(DATANAME);
3425 }
3426 lpath = malloc(rc);
3427 if (!lpath)
3428 return ENOMEM;
3429 if (flags & MDB_NOSUBDIR) {
3430 dpath = lpath + len + sizeof(LOCKSUFF);
3431 sprintf(lpath, "%s" LOCKSUFF, path);
3432 strcpy(dpath, path);
3433 } else {
3434 dpath = lpath + len + sizeof(LOCKNAME);
3435 sprintf(lpath, "%s" LOCKNAME, path);
3436 sprintf(dpath, "%s" DATANAME, path);
3437 }
3438
3439 flags |= env->me_flags;
3440 /* silently ignore WRITEMAP if we're only getting read access */
3441 if (F_ISSET(flags, MDB_RDONLY|MDB_WRITEMAP))
3442 flags ^= MDB_WRITEMAP;
3443 env->me_flags = flags |= MDB_ENV_ACTIVE;
3444
3445 rc = mdb_env_setup_locks(env, lpath, mode, &excl);
3446 if (rc)
3447 goto leave;
3448
3449 #ifdef _WIN32
3450 if (F_ISSET(flags, MDB_RDONLY)) {
3451 oflags = GENERIC_READ;
3452 len = OPEN_EXISTING;
3453 } else {
3454 oflags = GENERIC_READ|GENERIC_WRITE;
3455 len = OPEN_ALWAYS;
3456 }
3457 mode = FILE_ATTRIBUTE_NORMAL;
3458 env->me_fd = CreateFile(dpath, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE,
3459 NULL, len, mode, NULL);
3460 #else
3461 if (F_ISSET(flags, MDB_RDONLY))
3462 oflags = O_RDONLY;
3463 else
3464 oflags = O_RDWR | O_CREAT;
3465
3466 env->me_fd = open(dpath, oflags, mode);
3467 #endif
3468 if (env->me_fd == INVALID_HANDLE_VALUE) {
3469 rc = ErrCode();
3470 goto leave;
3471 }
3472
3473 if ((rc = mdb_env_open2(env)) == MDB_SUCCESS) {
3474 if (flags & (MDB_RDONLY|MDB_WRITEMAP)) {
3475 env->me_mfd = env->me_fd;
3476 } else {
3477 /* Synchronous fd for meta writes. Needed even with
3478 * MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset.
3479 */
3480 #ifdef _WIN32
3481 env->me_mfd = CreateFile(dpath, oflags,
3482 FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, len,
3483 mode | FILE_FLAG_WRITE_THROUGH, NULL);
3484 #else
3485 env->me_mfd = open(dpath, oflags | MDB_DSYNC, mode);
3486 #endif
3487 if (env->me_mfd == INVALID_HANDLE_VALUE) {
3488 rc = ErrCode();
3489 goto leave;
3490 }
3491 }
3492 DPRINTF("opened dbenv %p", (void *) env);
3493 rc = pthread_key_create(&env->me_txkey, mdb_env_reader_dest);
3494 if (rc)
3495 goto leave;
3496 env->me_numdbs = 2; /* this notes that me_txkey was set */
3497 #ifdef _WIN32
3498 /* Windows TLS callbacks need help finding their TLS info. */
3499 if (mdb_tls_nkeys < MAX_TLS_KEYS)
3500 mdb_tls_keys[mdb_tls_nkeys++] = env->me_txkey;
3501 else {
3502 rc = MDB_TLS_FULL;
3503 goto leave;
3504 }
3505 #endif
3506 if (excl > 0) {
3507 rc = mdb_env_share_locks(env, &excl);
3508 if (rc)
3509 goto leave;
3510 }
3511 env->me_dbxs = calloc(env->me_maxdbs, sizeof(MDB_dbx));
3512 env->me_dbflags = calloc(env->me_maxdbs, sizeof(uint16_t));
3513 env->me_path = strdup(path);
3514 if (!env->me_dbxs || !env->me_dbflags || !env->me_path)
3515 rc = ENOMEM;
3516 }
3517
3518 leave:
3519 if (rc) {
3520 mdb_env_close0(env, excl);
3521 }
3522 free(lpath);
3523 return rc;
3524 }
3525
3526 /** Destroy resources from mdb_env_open() and clear our readers */
3527 static void
3528 mdb_env_close0(MDB_env *env, int excl)
3529 {
3530 int i;
3531
3532 if (!(env->me_flags & MDB_ENV_ACTIVE))
3533 return;
3534
3535 free(env->me_dbflags);
3536 free(env->me_dbxs);
3537 free(env->me_path);
3538
3539 if (env->me_numdbs) {
3540 pthread_key_delete(env->me_txkey);
3541 #ifdef _WIN32
3542 /* Delete our key from the global list */
3543 for (i=0; i<mdb_tls_nkeys; i++)
3544 if (mdb_tls_keys[i] == env->me_txkey) {
3545 mdb_tls_keys[i] = mdb_tls_keys[mdb_tls_nkeys-1];
3546 mdb_tls_nkeys--;
3547 break;
3548 }
3549 #endif
3550 }
3551
3552 if (env->me_map) {
3553 munmap(env->me_map, env->me_mapsize);
3554 }
3555 if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE)
3556 close(env->me_mfd);
3557 if (env->me_fd != INVALID_HANDLE_VALUE)
3558 close(env->me_fd);
3559 if (env->me_txns) {
3560 pid_t pid = env->me_pid;
3561 /* Clearing readers is done in this function because
3562 * me_txkey with its destructor must be disabled first.
3563 */
3564 for (i = env->me_numreaders; --i >= 0; )
3565 if (env->me_txns->mti_readers[i].mr_pid == pid)
3566 env->me_txns->mti_readers[i].mr_pid = 0;
3567 #ifdef _WIN32
3568 if (env->me_rmutex) {
3569 CloseHandle(env->me_rmutex);
3570 if (env->me_wmutex) CloseHandle(env->me_wmutex);
3571 }
3572 /* Windows automatically destroys the mutexes when
3573 * the last handle closes.
3574 */
3575 #elif defined(MDB_USE_POSIX_SEM)
3576 if (env->me_rmutex != SEM_FAILED) {
3577 sem_close(env->me_rmutex);
3578 if (env->me_wmutex != SEM_FAILED)
3579 sem_close(env->me_wmutex);
3580 /* If we have the filelock: If we are the
3581 * only remaining user, clean up semaphores.
3582 */
3583 if (excl == 0)
3584 mdb_env_excl_lock(env, &excl);
3585 if (excl > 0) {
3586 sem_unlink(env->me_txns->mti_rmname);
3587 sem_unlink(env->me_txns->mti_wmname);
3588 }
3589 }
3590 #endif
3591 munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo));
3592 }
3593 if (env->me_lfd != INVALID_HANDLE_VALUE) {
3594 #ifdef _WIN32
3595 if (excl >= 0) {
3596 /* Unlock the lockfile. Windows would have unlocked it
3597 * after closing anyway, but not necessarily at once.
3598 */
3599 UnlockFile(env->me_lfd, 0, 0, 1, 0);
3600 }
3601 #endif
3602 close(env->me_lfd);
3603 }
3604
3605 env->me_flags &= ~MDB_ENV_ACTIVE;
3606 }
3607
3608 int
3609 mdb_env_copy(MDB_env *env, const char *path)
3610 {
3611 MDB_txn *txn = NULL;
3612 int rc, len;
3613 size_t wsize;
3614 char *lpath, *ptr;
3615 HANDLE newfd = INVALID_HANDLE_VALUE;
3616
3617 if (env->me_flags & MDB_NOSUBDIR) {
3618 lpath = (char *)path;
3619 } else {
3620 len = strlen(path);
3621 len += sizeof(DATANAME);
3622 lpath = malloc(len);
3623 if (!lpath)
3624 return ENOMEM;
3625 sprintf(lpath, "%s" DATANAME, path);
3626 }
3627
3628 /* The destination path must exist, but the destination file must not.
3629 * We don't want the OS to cache the writes, since the source data is
3630 * already in the OS cache.
3631 */
3632 #ifdef _WIN32
3633 newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW,
3634 FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL);
3635 #else
3636 newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL
3637 #ifdef O_DIRECT
3638 |O_DIRECT
3639 #endif
3640 , 0666);
3641 #endif
3642 if (!(env->me_flags & MDB_NOSUBDIR))
3643 free(lpath);
3644 if (newfd == INVALID_HANDLE_VALUE) {
3645 rc = ErrCode();
3646 goto leave;
3647 }
3648
3649 #ifdef F_NOCACHE /* __APPLE__ */
3650 rc = fcntl(newfd, F_NOCACHE, 1);
3651 if (rc) {
3652 rc = ErrCode();
3653 goto leave;
3654 }
3655 #endif
3656
3657 /* Do the lock/unlock of the reader mutex before starting the
3658 * write txn. Otherwise other read txns could block writers.
3659 */
3660 rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
3661 if (rc)
3662 goto leave;
3663
3664 if (!(env->me_flags & MDB_ROFS)) {
3665 /* We must start the actual read txn after blocking writers */
3666 mdb_txn_reset0(txn);
3667
3668 /* Temporarily block writers until we snapshot the meta pages */
3669 LOCK_MUTEX_W(env);
3670
3671 rc = mdb_txn_renew0(txn);
3672 if (rc) {
3673 UNLOCK_MUTEX_W(env);
3674 goto leave;
3675 }
3676 }
3677
3678 wsize = env->me_psize * 2;
3679 #ifdef _WIN32
3680 {
3681 DWORD len;
3682 rc = WriteFile(newfd, env->me_map, wsize, &len, NULL);
3683 rc = (len == wsize) ? MDB_SUCCESS : ErrCode();
3684 }
3685 #else
3686 rc = write(newfd, env->me_map, wsize);
3687 rc = (rc == (int)wsize) ? MDB_SUCCESS : ErrCode();
3688 #endif
3689 if (! (env->me_flags & MDB_ROFS))
3690 UNLOCK_MUTEX_W(env);
3691
3692 if (rc)
3693 goto leave;
3694
3695 ptr = env->me_map + wsize;
3696 wsize = txn->mt_next_pgno * env->me_psize - wsize;
3697 #define MAX_WRITE 2147483648U
3698 #ifdef _WIN32
3699 while (wsize > 0) {
3700 DWORD len, w2;
3701 if (wsize > MAX_WRITE)
3702 w2 = MAX_WRITE;
3703 else
3704 w2 = wsize;
3705 rc = WriteFile(newfd, ptr, w2, &len, NULL);
3706 rc = (len == w2) ? MDB_SUCCESS : ErrCode();
3707 if (rc) break;
3708 wsize -= w2;
3709 ptr += w2;
3710 }
3711 #else
3712 while (wsize > 0) {
3713 size_t w2;
3714 ssize_t wres;
3715 if (wsize > MAX_WRITE)
3716 w2 = MAX_WRITE;
3717 else
3718 w2 = wsize;
3719 wres = write(newfd, ptr, w2);
3720 rc = (wres > 0) ? MDB_SUCCESS : ErrCode();
3721 if (rc) break;
3722 wsize -= wres;
3723 ptr += wres;
3724 }
3725 #endif
3726 mdb_txn_abort(txn);
3727
3728 leave:
3729 if (newfd != INVALID_HANDLE_VALUE)
3730 close(newfd);
3731
3732 return rc;
3733 }
3734
3735 void
3736 mdb_env_close(MDB_env *env)
3737 {
3738 MDB_page *dp;
3739 int i;
3740
3741 if (env == NULL)
3742 return;
3743
3744 for (i = env->me_numdbs; --i > MAIN_DBI; )
3745 free(env->me_dbxs[i].md_name.mv_data);
3746
3747 VGMEMP_DESTROY(env);
3748 while ((dp = env->me_dpages) != NULL) {
3749 VGMEMP_DEFINED(&dp->mp_next, sizeof(dp->mp_next));
3750 env->me_dpages = dp->mp_next;
3751 free(dp);
3752 }
3753
3754 mdb_env_close0(env, 0);
3755 mdb_midl_free(env->me_free_pgs);
3756 free(env);
3757 }
3758
3759 /** Compare two items pointing at aligned size_t's */
3760 static int
3761 mdb_cmp_long(const MDB_val *a, const MDB_val *b)
3762 {
3763 return (*(size_t *)a->mv_data < *(size_t *)b->mv_data) ? -1 :
3764 *(size_t *)a->mv_data > *(size_t *)b->mv_data;
3765 }
3766
3767 /** Compare two items pointing at aligned int's */
3768 static int
3769 mdb_cmp_int(const MDB_val *a, const MDB_val *b)
3770 {
3771 return (*(unsigned int *)a->mv_data < *(unsigned int *)b->mv_data) ? -1 :
3772 *(unsigned int *)a->mv_data > *(unsigned int *)b->mv_data;
3773 }
3774
3775 /** Compare two items pointing at ints of unknown alignment.
3776 * Nodes and keys are guaranteed to be 2-byte aligned.
3777 */
3778 static int
3779 mdb_cmp_cint(const MDB_val *a, const MDB_val *b)
3780 {
3781 #if BYTE_ORDER == LITTLE_ENDIAN
3782 unsigned short *u, *c;
3783 int x;
3784
3785 u = (unsigned short *) ((char *) a->mv_data + a->mv_size);
3786 c = (unsigned short *) ((char *) b->mv_data + a->mv_size);
3787 do {
3788 x = *--u - *--c;
3789 } while(!x && u > (unsigned short *)a->mv_data);
3790 return x;
3791 #else
3792 return memcmp(a->mv_data, b->mv_data, a->mv_size);
3793 #endif
3794 }
3795
3796 /** Compare two items lexically */
3797 static int
3798 mdb_cmp_memn(const MDB_val *a, const MDB_val *b)
3799 {
3800 int diff;
3801 ssize_t len_diff;
3802 unsigned int len;
3803
3804 len = a->mv_size;
3805 len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size;
3806 if (len_diff > 0) {
3807 len = b->mv_size;
3808 len_diff = 1;
3809 }
3810
3811 diff = memcmp(a->mv_data, b->mv_data, len);
3812 return diff ? diff : len_diff<0 ? -1 : len_diff;
3813 }
3814
3815 /** Compare two items in reverse byte order */
3816 static int
3817 mdb_cmp_memnr(const MDB_val *a, const MDB_val *b)
3818 {
3819 const unsigned char *p1, *p2, *p1_lim;
3820 ssize_t len_diff;
3821 int diff;
3822
3823 p1_lim = (const unsigned char *)a->mv_data;
3824 p1 = (const unsigned char *)a->mv_data + a->mv_size;
3825 p2 = (const unsigned char *)b->mv_data + b->mv_size;
3826
3827 len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size;
3828 if (len_diff > 0) {
3829 p1_lim += len_diff;
3830 len_diff = 1;
3831 }
3832
3833 while (p1 > p1_lim) {
3834 diff = *--p1 - *--p2;
3835 if (diff)
3836 return diff;
3837 }
3838 return len_diff<0 ? -1 : len_diff;
3839 }
3840
3841 /** Search for key within a page, using binary search.
3842 * Returns the smallest entry larger or equal to the key.
3843 * If exactp is non-null, stores whether the found entry was an exact match
3844 * in *exactp (1 or 0).
3845 * Updates the cursor index with the index of the found entry.
3846 * If no entry larger or equal to the key is found, returns NULL.
3847 */
3848 static MDB_node *
3849 mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp)
3850 {
3851 unsigned int i = 0, nkeys;
3852 int low, high;
3853 int rc = 0;
3854 MDB_page *mp = mc->mc_pg[mc->mc_top];
3855 MDB_node *node = NULL;
3856 MDB_val nodekey;
3857 MDB_cmp_func *cmp;
3858 DKBUF;
3859
3860 nkeys = NUMKEYS(mp);
3861
3862 #if MDB_DEBUG
3863 {
3864 pgno_t pgno;
3865 COPY_PGNO(pgno, mp->mp_pgno);
3866 DPRINTF("searching %u keys in %s %spage %zu",
3867 nkeys, IS_LEAF(mp) ? "leaf" : "branch", IS_SUBP(mp) ? "sub-" : "",
3868 pgno);
3869 }
3870 #endif
3871
3872 assert(nkeys > 0);
3873
3874 low = IS_LEAF(mp) ? 0 : 1;
3875 high = nkeys - 1;
3876 cmp = mc->mc_dbx->md_cmp;
3877
3878 /* Branch pages have no data, so if using integer keys,
3879 * alignment is guaranteed. Use faster mdb_cmp_int.
3880 */
3881 if (cmp == mdb_cmp_cint && IS_BRANCH(mp)) {
3882 if (NODEPTR(mp, 1)->mn_ksize == sizeof(size_t))
3883 cmp = mdb_cmp_long;
3884 else
3885 cmp = mdb_cmp_int;
3886 }
3887
3888 if (IS_LEAF2(mp)) {
3889 nodekey.mv_size = mc->mc_db->md_pad;
3890 node = NODEPTR(mp, 0); /* fake */
3891 while (low <= high) {
3892 i = (low + high) >> 1;
3893 nodekey.mv_data = LEAF2KEY(mp, i, nodekey.mv_size);
3894 rc = cmp(key, &nodekey);
3895 DPRINTF("found leaf index %u [%s], rc = %i",
3896 i, DKEY(&nodekey), rc);
3897 if (rc == 0)
3898 break;
3899 if (rc > 0)
3900 low = i + 1;
3901 else
3902 high = i - 1;
3903 }
3904 } else {
3905 while (low <= high) {
3906 i = (low + high) >> 1;
3907
3908 node = NODEPTR(mp, i);
3909 nodekey.mv_size = NODEKSZ(node);
3910 nodekey.mv_data = NODEKEY(node);
3911
3912 rc = cmp(key, &nodekey);
3913 #if MDB_DEBUG
3914 if (IS_LEAF(mp))
3915 DPRINTF("found leaf index %u [%s], rc = %i",
3916 i, DKEY(&nodekey), rc);
3917 else
3918 DPRINTF("found branch index %u [%s -> %zu], rc = %i",
3919 i, DKEY(&nodekey), NODEPGNO(node), rc);
3920 #endif
3921 if (rc == 0)
3922 break;
3923 if (rc > 0)
3924 low = i + 1;
3925 else
3926 high = i - 1;
3927 }
3928 }
3929
3930 if (rc > 0) { /* Found entry is less than the key. */
3931 i++; /* Skip to get the smallest entry larger than key. */
3932 if (!IS_LEAF2(mp))
3933 node = NODEPTR(mp, i);
3934 }
3935 if (exactp)
3936 *exactp = (rc == 0);
3937 /* store the key index */
3938 mc->mc_ki[mc->mc_top] = i;
3939 if (i >= nkeys)
3940 /* There is no entry larger or equal to the key. */
3941 return NULL;
3942
3943 /* nodeptr is fake for LEAF2 */
3944 return node;
3945 }
3946
3947 #if 0
3948 static void
3949 mdb_cursor_adjust(MDB_cursor *mc, func)
3950 {
3951 MDB_cursor *m2;
3952
3953 for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) {
3954 if (m2->mc_pg[m2->mc_top] == mc->mc_pg[mc->mc_top]) {
3955 func(mc, m2);
3956 }
3957 }
3958 }
3959 #endif
3960
3961 /** Pop a page off the top of the cursor's stack. */
3962 static void
3963 mdb_cursor_pop(MDB_cursor *mc)
3964 {
3965 if (mc->mc_snum) {
3966 #ifndef MDB_DEBUG_SKIP
3967 MDB_page *top = mc->mc_pg[mc->mc_top];
3968 #endif
3969 mc->mc_snum--;
3970 if (mc->mc_snum)
3971 mc->mc_top--;
3972
3973 DPRINTF("popped page %zu off db %u cursor %p", top->mp_pgno,
3974 mc->mc_dbi, (void *) mc);
3975 }
3976 }
3977
3978 /** Push a page onto the top of the cursor's stack. */
3979 static int
3980 mdb_cursor_push(MDB_cursor *mc, MDB_page *mp)
3981 {
3982 DPRINTF("pushing page %zu on db %u cursor %p", mp->mp_pgno,
3983 mc->mc_dbi, (void *) mc);
3984
3985 if (mc->mc_snum >= CURSOR_STACK) {
3986 assert(mc->mc_snum < CURSOR_STACK);
3987 return MDB_CURSOR_FULL;
3988 }
3989
3990 mc->mc_top = mc->mc_snum++;
3991 mc->mc_pg[mc->mc_top] = mp;
3992 mc->mc_ki[mc->mc_top] = 0;
3993
3994 return MDB_SUCCESS;
3995 }
3996
3997 /** Find the address of the page corresponding to a given page number.
3998 * @param[in] txn the transaction for this access.
3999 * @param[in] pgno the page number for the page to retrieve.
4000 * @param[out] ret address of a pointer where the page's address will be stored.
4001 * @return 0 on success, non-zero on failure.
4002 */
4003 static int
4004 mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret)
4005 {
4006 MDB_page *p = NULL;
4007
4008 if (!((txn->mt_flags & MDB_TXN_RDONLY) |
4009 (txn->mt_env->me_flags & MDB_WRITEMAP)))
4010 {
4011 MDB_txn *tx2 = txn;
4012 do {
4013 MDB_ID2L dl = tx2->mt_u.dirty_list;
4014 if (dl[0].mid) {
4015 unsigned x = mdb_mid2l_search(dl, pgno);
4016 if (x <= dl[0].mid && dl[x].mid == pgno) {
4017 p = dl[x].mptr;
4018 goto done;
4019 }
4020 }
4021 } while ((tx2 = tx2->mt_parent) != NULL);
4022 }
4023
4024 if (pgno < txn->mt_next_pgno) {
4025 p = (MDB_page *)(txn->mt_env->me_map + txn->mt_env->me_psize * pgno);
4026 } else {
4027 DPRINTF("page %zu not found", pgno);
4028 assert(p != NULL);
4029 }
4030
4031 done:
4032 *ret = p;
4033 return (p != NULL) ? MDB_SUCCESS : MDB_PAGE_NOTFOUND;
4034 }
4035
4036 /** Search for the page a given key should be in.
4037 * Pushes parent pages on the cursor stack. This function continues a
4038 * search on a cursor that has already been initialized. (Usually by
4039 * #mdb_page_search() but also by #mdb_node_move().)
4040 * @param[in,out] mc the cursor for this operation.
4041 * @param[in] key the key to search for. If NULL, search for the lowest
4042 * page. (This is used by #mdb_cursor_first().)
4043 * @param[in] flags If MDB_PS_MODIFY set, visited pages are updated with new page numbers.
4044 * If MDB_PS_ROOTONLY set, just fetch root node, no further lookups.
4045 * @return 0 on success, non-zero on failure.
4046 */
4047 static int
4048 mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int modify)
4049 {
4050 MDB_page *mp = mc->mc_pg[mc->mc_top];
4051 DKBUF;
4052 int rc;
4053
4054
4055 while (IS_BRANCH(mp)) {
4056 MDB_node *node;
4057 indx_t i;
4058
4059 DPRINTF("branch page %zu has %u keys", mp->mp_pgno, NUMKEYS(mp));
4060 assert(NUMKEYS(mp) > 1);
4061 DPRINTF("found index 0 to page %zu", NODEPGNO(NODEPTR(mp, 0)));
4062
4063 if (key == NULL) /* Initialize cursor to first page. */
4064 i = 0;
4065 else if (key->mv_size > MDB_MAXKEYSIZE && key->mv_data == NULL) {
4066 /* cursor to last page */
4067 i = NUMKEYS(mp)-1;
4068 } else {
4069 int exact;
4070 node = mdb_node_search(mc, key, &exact);
4071 if (node == NULL)
4072 i = NUMKEYS(mp) - 1;
4073 else {
4074 i = mc->mc_ki[mc->mc_top];
4075 if (!exact) {
4076 assert(i > 0);
4077 i--;
4078 }
4079 }
4080 }
4081
4082 if (key)
4083 DPRINTF("following index %u for key [%s]",
4084 i, DKEY(key));
4085 assert(i < NUMKEYS(mp));
4086 node = NODEPTR(mp, i);
4087
4088 if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp)))
4089 return rc;
4090
4091 mc->mc_ki[mc->mc_top] = i;
4092 if ((rc = mdb_cursor_push(mc, mp)))
4093 return rc;
4094
4095 if (modify) {
4096 if ((rc = mdb_page_touch(mc)) != 0)
4097 return rc;
4098 mp = mc->mc_pg[mc->mc_top];
4099 }
4100 }
4101
4102 if (!IS_LEAF(mp)) {
4103 DPRINTF("internal error, index points to a %02X page!?",
4104 mp->mp_flags);
4105 return MDB_CORRUPTED;
4106 }
4107
4108 DPRINTF("found leaf page %zu for key [%s]", mp->mp_pgno,
4109 key ? DKEY(key) : NULL);
4110
4111 return MDB_SUCCESS;
4112 }
4113
4114 /** Search for the page a given key should be in.
4115 * Pushes parent pages on the cursor stack. This function just sets up
4116 * the search; it finds the root page for \b mc's database and sets this
4117 * as the root of the cursor's stack. Then #mdb_page_search_root() is
4118 * called to complete the search.
4119 * @param[in,out] mc the cursor for this operation.
4120 * @param[in] key the key to search for. If NULL, search for the lowest
4121 * page. (This is used by #mdb_cursor_first().)
4122 * @param[in] modify If true, visited pages are updated with new page numbers.
4123 * @return 0 on success, non-zero on failure.
4124 */
4125 static int
4126 mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags)
4127 {
4128 int rc;
4129 pgno_t root;
4130
4131 /* Make sure the txn is still viable, then find the root from
4132 * the txn's db table.
4133 */
4134 if (F_ISSET(mc->mc_txn->mt_flags, MDB_TXN_ERROR)) {
4135 DPUTS("transaction has failed, must abort");
4136 return EINVAL;
4137 } else {
4138 /* Make sure we're using an up-to-date root */
4139 if (mc->mc_dbi > MAIN_DBI) {
4140 if ((*mc->mc_dbflag & DB_STALE) ||
4141 ((flags & MDB_PS_MODIFY) && !(*mc->mc_dbflag & DB_DIRTY))) {
4142 MDB_cursor mc2;
4143 unsigned char dbflag = 0;
4144 mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, NULL);
4145 rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, flags & MDB_PS_MODIFY);
4146 if (rc)
4147 return rc;
4148 if (*mc->mc_dbflag & DB_STALE) {
4149 MDB_val data;
4150 int exact = 0;
4151 uint16_t flags;
4152 MDB_node *leaf = mdb_node_search(&mc2,
4153 &mc->mc_dbx->md_name, &exact);
4154 if (!exact)
4155 return MDB_NOTFOUND;
4156 mdb_node_read(mc->mc_txn, leaf, &data);
4157 memcpy(&flags, ((char *) data.mv_data + offsetof(MDB_db, md_flags)),
4158 sizeof(uint16_t));
4159 /* The txn may not know this DBI, or another process may
4160 * have dropped and recreated the DB with other flags.
4161 */
4162 if ((mc->mc_db->md_flags & PERSISTENT_FLAGS) != flags)
4163 return MDB_INCOMPATIBLE;
4164 memcpy(mc->mc_db, data.mv_data, sizeof(MDB_db));
4165 }
4166 if (flags & MDB_PS_MODIFY)
4167 dbflag = DB_DIRTY;
4168 *mc->mc_dbflag &= ~DB_STALE;
4169 *mc->mc_dbflag |= dbflag;
4170 }
4171 }
4172 root = mc->mc_db->md_root;
4173
4174 if (root == P_INVALID) { /* Tree is empty. */
4175 DPUTS("tree is empty");
4176 return MDB_NOTFOUND;
4177 }
4178 }
4179
4180 assert(root > 1);
4181 if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root)
4182 if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0])))
4183 return rc;
4184
4185 mc->mc_snum = 1;
4186 mc->mc_top = 0;
4187
4188 DPRINTF("db %u root page %zu has flags 0x%X",
4189 mc->mc_dbi, root, mc->mc_pg[0]->mp_flags);
4190
4191 if (flags & MDB_PS_MODIFY) {
4192 if ((rc = mdb_page_touch(mc)))
4193 return rc;
4194 }
4195
4196 if (flags & MDB_PS_ROOTONLY)
4197 return MDB_SUCCESS;
4198
4199 return mdb_page_search_root(mc, key, flags);
4200 }
4201
4202 /** Return the data associated with a given node.
4203 * @param[in] txn The transaction for this operation.
4204 * @param[in] leaf The node being read.
4205 * @param[out] data Updated to point to the node's data.
4206 * @return 0 on success, non-zero on failure.
4207 */
4208 static int
4209 mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data)
4210 {
4211 MDB_page *omp; /* overflow page */
4212 pgno_t pgno;
4213 int rc;
4214
4215 if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) {
4216 data->mv_size = NODEDSZ(leaf);
4217 data->mv_data = NODEDATA(leaf);
4218 return MDB_SUCCESS;
4219 }
4220
4221 /* Read overflow data.
4222 */
4223 data->mv_size = NODEDSZ(leaf);
4224 memcpy(&pgno, NODEDATA(leaf), sizeof(pgno));
4225 if ((rc = mdb_page_get(txn, pgno, &omp))) {
4226 DPRINTF("read overflow page %zu failed", pgno);
4227 return rc;
4228 }
4229 data->mv_data = METADATA(omp);
4230
4231 return MDB_SUCCESS;
4232 }
4233
4234 int
4235 mdb_get(MDB_txn *txn, MDB_dbi dbi,
4236 MDB_val *key, MDB_val *data)
4237 {
4238 MDB_cursor mc;
4239 MDB_xcursor mx;
4240 int exact = 0;
4241 DKBUF;
4242
4243 assert(key);
4244 assert(data);
4245 DPRINTF("===> get db %u key [%s]", dbi, DKEY(key));
4246
4247 if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
4248 return EINVAL;
4249
4250 if (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE) {
4251 return EINVAL;
4252 }
4253
4254 mdb_cursor_init(&mc, txn, dbi, &mx);
4255 return mdb_cursor_set(&mc, key, data, MDB_SET, &exact);
4256 }
4257
4258 /** Find a sibling for a page.
4259 * Replaces the page at the top of the cursor's stack with the
4260 * specified sibling, if one exists.
4261 * @param[in] mc The cursor for this operation.
4262 * @param[in] move_right Non-zero if the right sibling is requested,
4263 * otherwise the left sibling.
4264 * @return 0 on success, non-zero on failure.
4265 */
4266 static int
4267 mdb_cursor_sibling(MDB_cursor *mc, int move_right)
4268 {
4269 int rc;
4270 MDB_node *indx;
4271 MDB_page *mp;
4272
4273 if (mc->mc_snum < 2) {
4274 return MDB_NOTFOUND; /* root has no siblings */
4275 }
4276
4277 mdb_cursor_pop(mc);
4278 DPRINTF("parent page is page %zu, index %u",
4279 mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top]);
4280
4281 if (move_right ? (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mc->mc_pg[mc->mc_top]))
4282 : (mc->mc_ki[mc->mc_top] == 0)) {
4283 DPRINTF("no more keys left, moving to %s sibling",
4284 move_right ? "right" : "left");
4285 if ((rc = mdb_cursor_sibling(mc, move_right)) != MDB_SUCCESS) {
4286 /* undo cursor_pop before returning */
4287 mc->mc_top++;
4288 mc->mc_snum++;
4289 return rc;
4290 }
4291 } else {
4292 if (move_right)
4293 mc->mc_ki[mc->mc_top]++;
4294 else
4295 mc->mc_ki[mc->mc_top]--;
4296 DPRINTF("just moving to %s index key %u",
4297 move_right ? "right" : "left", mc->mc_ki[mc->mc_top]);
4298 }
4299 assert(IS_BRANCH(mc->mc_pg[mc->mc_top]));
4300
4301 indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
4302 if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp)))
4303 return rc;
4304
4305 mdb_cursor_push(mc, mp);
4306 if (!move_right)
4307 mc->mc_ki[mc->mc_top] = NUMKEYS(mp)-1;
4308
4309 return MDB_SUCCESS;
4310 }
4311
4312 /** Move the cursor to the next data item. */
4313 static int
4314 mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
4315 {
4316 MDB_page *mp;
4317 MDB_node *leaf;
4318 int rc;
4319
4320 if (mc->mc_flags & C_EOF) {
4321 return MDB_NOTFOUND;
4322 }
4323
4324 assert(mc->mc_flags & C_INITIALIZED);
4325
4326 mp = mc->mc_pg[mc->mc_top];
4327
4328 if (mc->mc_db->md_flags & MDB_DUPSORT) {
4329 leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
4330 if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
4331 if (op == MDB_NEXT || op == MDB_NEXT_DUP) {
4332 rc = mdb_cursor_next(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_NEXT);
4333 if (op != MDB_NEXT || rc == MDB_SUCCESS)
4334 return rc;
4335 }
4336 } else {
4337 mc->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED;
4338 if (op == MDB_NEXT_DUP)
4339 return MDB_NOTFOUND;
4340 }
4341 }
4342
4343 DPRINTF("cursor_next: top page is %zu in cursor %p", mp->mp_pgno, (void *) mc);
4344
4345 if (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mp)) {
4346 DPUTS("=====> move to next sibling page");
4347 if (mdb_cursor_sibling(mc, 1) != MDB_SUCCESS) {
4348 mc->mc_flags |= C_EOF;
4349 mc->mc_flags &= ~C_INITIALIZED;
4350 return MDB_NOTFOUND;
4351 }
4352 mp = mc->mc_pg[mc->mc_top];
4353 DPRINTF("next page is %zu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]);
4354 } else
4355 mc->mc_ki[mc->mc_top]++;
4356
4357 DPRINTF("==> cursor points to page %zu with %u keys, key index %u",
4358 mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]);
4359
4360 if (IS_LEAF2(mp)) {
4361 key->mv_size = mc->mc_db->md_pad;
4362 key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size);
4363 return MDB_SUCCESS;
4364 }
4365
4366 assert(IS_LEAF(mp));
4367 leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
4368
4369 if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
4370 mdb_xcursor_init1(mc, leaf);
4371 }
4372 if (data) {
4373 if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS)
4374 return rc;
4375
4376 if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
4377 rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
4378 if (rc != MDB_SUCCESS)
4379 return rc;
4380 }
4381 }
4382
4383 MDB_GET_KEY(leaf, key);
4384 return MDB_SUCCESS;
4385 }
4386
4387 /** Move the cursor to the previous data item. */
4388 static int
4389 mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
4390 {
4391 MDB_page *mp;
4392 MDB_node *leaf;
4393 int rc;
4394
4395 assert(mc->mc_flags & C_INITIALIZED);
4396
4397 mp = mc->mc_pg[mc->mc_top];
4398
4399 if (mc->mc_db->md_flags & MDB_DUPSORT) {
4400 leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
4401 if (op == MDB_PREV || op == MDB_PREV_DUP) {
4402 if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
4403 rc = mdb_cursor_prev(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_PREV);
4404 if (op != MDB_PREV || rc == MDB_SUCCESS)
4405 return rc;
4406 } else {
4407 mc->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED;
4408 if (op == MDB_PREV_DUP)
4409 return MDB_NOTFOUND;
4410 }
4411 }
4412 }
4413
4414 DPRINTF("cursor_prev: top page is %zu in cursor %p", mp->mp_pgno, (void *) mc);
4415
4416 if (mc->mc_ki[mc->mc_top] == 0) {
4417 DPUTS("=====> move to prev sibling page");
4418 if (mdb_cursor_sibling(mc, 0) != MDB_SUCCESS) {
4419 mc->mc_flags &= ~C_INITIALIZED;
4420 return MDB_NOTFOUND;
4421 }
4422 mp = mc->mc_pg[mc->mc_top];
4423 mc->mc_ki[mc->mc_top] = NUMKEYS(mp) - 1;
4424 DPRINTF("prev page is %zu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]);
4425 } else
4426 mc->mc_ki[mc->mc_top]--;
4427
4428 mc->mc_flags &= ~C_EOF;
4429
4430 DPRINTF("==> cursor points to page %zu with %u keys, key index %u",
4431 mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]);
4432
4433 if (IS_LEAF2(mp)) {
4434 key->mv_size = mc->mc_db->md_pad;
4435 key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size);
4436 return MDB_SUCCESS;
4437 }
4438
4439 assert(IS_LEAF(mp));
4440 leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
4441
4442 if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
4443 mdb_xcursor_init1(mc, leaf);
4444 }
4445 if (data) {
4446 if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS)
4447 return rc;
4448
4449 if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
4450 rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL);
4451 if (rc != MDB_SUCCESS)
4452 return rc;
4453 }
4454 }
4455
4456 MDB_GET_KEY(leaf, key);
4457 return MDB_SUCCESS;
4458 }
4459
4460 /** Set the cursor on a specific data item. */
4461 static int
4462 mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
4463 MDB_cursor_op op, int *exactp)
4464 {
4465 int rc;
4466 MDB_page *mp;
4467 MDB_node *leaf = NULL;
4468 DKBUF;
4469
4470 assert(mc);
4471 assert(key);
4472 assert(key->mv_size > 0);
4473
4474 /* See if we're already on the right page */
4475 if (mc->mc_flags & C_INITIALIZED) {
4476 MDB_val nodekey;
4477
4478 mp = mc->mc_pg[mc->mc_top];
4479 if (!NUMKEYS(mp)) {
4480 mc->mc_ki[mc->mc_top] = 0;
4481 return MDB_NOTFOUND;
4482 }
4483 if (mp->mp_flags & P_LEAF2) {
4484 nodekey.mv_size = mc->mc_db->md_pad;
4485 nodekey.mv_data = LEAF2KEY(mp, 0, nodekey.mv_size);
4486 } else {
4487 leaf = NODEPTR(mp, 0);
4488 MDB_GET_KEY(leaf, &nodekey);
4489 }
4490 rc = mc->mc_dbx->md_cmp(key, &nodekey);
4491 if (rc == 0) {
4492 /* Probably happens rarely, but first node on the page
4493 * was the one we wanted.
4494 */
4495 mc->mc_ki[mc->mc_top] = 0;
4496 if (exactp)
4497 *exactp = 1;
4498 goto set1;
4499 }
4500 if (rc > 0) {
4501 unsigned int i;
4502 unsigned int nkeys = NUMKEYS(mp);
4503 if (nkeys > 1) {
4504 if (mp->mp_flags & P_LEAF2) {
4505 nodekey.mv_data = LEAF2KEY(mp,
4506 nkeys-1, nodekey.mv_size);
4507 } else {
4508 leaf = NODEPTR(mp, nkeys-1);
4509 MDB_GET_KEY(leaf, &nodekey);
4510 }
4511 rc = mc->mc_dbx->md_cmp(key, &nodekey);
4512 if (rc == 0) {
4513 /* last node was the one we wanted */
4514 mc->mc_ki[mc->mc_top] = nkeys-1;
4515 if (exactp)
4516 *exactp = 1;
4517 goto set1;
4518 }
4519 if (rc < 0) {
4520 if (mc->mc_ki[mc->mc_top] < NUMKEYS(mp)) {
4521 /* This is definitely the right page, skip search_page */
4522 if (mp->mp_flags & P_LEAF2) {
4523 nodekey.mv_data = LEAF2KEY(mp,
4524 mc->mc_ki[mc->mc_top], nodekey.mv_size);
4525 } else {
4526 leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
4527 MDB_GET_KEY(leaf, &nodekey);
4528 }
4529 rc = mc->mc_dbx->md_cmp(key, &nodekey);
4530 if (rc == 0) {
4531 /* current node was the one we wanted */
4532 if (exactp)
4533 *exactp = 1;
4534 goto set1;
4535 }
4536 }
4537 rc = 0;
4538 goto set2;
4539 }
4540 }
4541 /* If any parents have right-sibs, search.
4542 * Otherwise, there's nothing further.
4543 */
4544 for (i=0; i<mc->mc_top; i++)
4545 if (mc->mc_ki[i] <
4546 NUMKEYS(mc->mc_pg[i])-1)
4547 break;
4548 if (i == mc->mc_top) {
4549 /* There are no other pages */
4550 mc->mc_ki[mc->mc_top] = nkeys;
4551 return MDB_NOTFOUND;
4552 }
4553 }
4554 if (!mc->mc_top) {
4555 /* There are no other pages */
4556 mc->mc_ki[mc->mc_top] = 0;
4557 return MDB_NOTFOUND;
4558 }
4559 }
4560
4561 rc = mdb_page_search(mc, key, 0);
4562 if (rc != MDB_SUCCESS)
4563 return rc;
4564
4565 mp = mc->mc_pg[mc->mc_top];
4566 assert(IS_LEAF(mp));
4567
4568 set2:
4569 leaf = mdb_node_search(mc, key, exactp);
4570 if (exactp != NULL && !*exactp) {
4571 /* MDB_SET specified and not an exact match. */
4572 return MDB_NOTFOUND;
4573 }
4574
4575 if (leaf == NULL) {
4576 DPUTS("===> inexact leaf not found, goto sibling");
4577 if ((rc = mdb_cursor_sibling(mc, 1)) != MDB_SUCCESS)
4578 return rc; /* no entries matched */
4579 mp = mc->mc_pg[mc->mc_top];
4580 assert(IS_LEAF(mp));
4581 leaf = NODEPTR(mp, 0);
4582 }
4583
4584 set1:
4585 mc->mc_flags |= C_INITIALIZED;
4586 mc->mc_flags &= ~C_EOF;
4587
4588 if (IS_LEAF2(mp)) {
4589 key->mv_size = mc->mc_db->md_pad;
4590 key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size);
4591 return MDB_SUCCESS;
4592 }
4593
4594 if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
4595 mdb_xcursor_init1(mc, leaf);
4596 }
4597 if (data) {
4598 if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
4599 if (op == MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) {
4600 rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
4601 } else {
4602 int ex2, *ex2p;
4603 if (op == MDB_GET_BOTH) {
4604 ex2p = &ex2;
4605 ex2 = 0;
4606 } else {
4607 ex2p = NULL;
4608 }
4609 rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p);
4610 if (rc != MDB_SUCCESS)
4611 return rc;
4612 }
4613 } else if (op == MDB_GET_BOTH || op == MDB_GET_BOTH_RANGE) {
4614 MDB_val d2;
4615 if ((rc = mdb_node_read(mc->mc_txn, leaf, &d2)) != MDB_SUCCESS)
4616 return rc;
4617 rc = mc->mc_dbx->md_dcmp(data, &d2);
4618 if (rc) {
4619 if (op == MDB_GET_BOTH || rc > 0)
4620 return MDB_NOTFOUND;
4621 }
4622
4623 } else {
4624 if (mc->mc_xcursor)
4625 mc->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED;
4626 if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS)
4627 return rc;
4628 }
4629 }
4630
4631 /* The key already matches in all other cases */
4632 if (op == MDB_SET_RANGE || op == MDB_SET_KEY)
4633 MDB_GET_KEY(leaf, key);
4634 DPRINTF("==> cursor placed on key [%s]", DKEY(key));
4635
4636 return rc;
4637 }
4638
4639 /** Move the cursor to the first item in the database. */
4640 static int
4641 mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data)
4642 {
4643 int rc;
4644 MDB_node *leaf;
4645
4646 if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) {
4647 rc = mdb_page_search(mc, NULL, 0);
4648 if (rc != MDB_SUCCESS)
4649 return rc;
4650 }
4651 assert(IS_LEAF(mc->mc_pg[mc->mc_top]));
4652
4653 leaf = NODEPTR(mc->mc_pg[mc->mc_top], 0);
4654 mc->mc_flags |= C_INITIALIZED;
4655 mc->mc_flags &= ~C_EOF;
4656
4657 mc->mc_ki[mc->mc_top] = 0;
4658
4659 if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
4660 key->mv_size = mc->mc_db->md_pad;
4661 key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], 0, key->mv_size);
4662 return MDB_SUCCESS;
4663 }
4664
4665 if (data) {
4666 if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
4667 mdb_xcursor_init1(mc, leaf);
4668 rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
4669 if (rc)
4670 return rc;
4671 } else {
4672 if (mc->mc_xcursor)
4673 mc->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED;
4674 if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS)
4675 return rc;
4676 }
4677 }
4678 MDB_GET_KEY(leaf, key);
4679 return MDB_SUCCESS;
4680 }
4681
4682 /** Move the cursor to the last item in the database. */
4683 static int
4684 mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data)
4685 {
4686 int rc;
4687 MDB_node *leaf;
4688
4689 if (!(mc->mc_flags & C_EOF)) {
4690
4691 if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) {
4692 MDB_val lkey;
4693
4694 lkey.mv_size = MDB_MAXKEYSIZE+1;
4695 lkey.mv_data = NULL;
4696 rc = mdb_page_search(mc, &lkey, 0);
4697 if (rc != MDB_SUCCESS)
4698 return rc;
4699 }
4700 assert(IS_LEAF(mc->mc_pg[mc->mc_top]));
4701
4702 mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1;
4703 }
4704 mc->mc_flags |= C_INITIALIZED|C_EOF;
4705 leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
4706
4707 if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
4708 key->mv_size = mc->mc_db->md_pad;
4709 key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], key->mv_size);
4710 return MDB_SUCCESS;
4711 }
4712
4713 if (data) {
4714 if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
4715 mdb_xcursor_init1(mc, leaf);
4716 rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL);
4717 if (rc)
4718 return rc;
4719 } else {
4720 if (mc->mc_xcursor)
4721 mc->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED;
4722 if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS)
4723 return rc;
4724 }
4725 }
4726
4727 MDB_GET_KEY(leaf, key);
4728 return MDB_SUCCESS;
4729 }
4730
4731 int
4732 mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data,
4733 MDB_cursor_op op)
4734 {
4735 int rc;
4736 int exact = 0;
4737
4738 assert(mc);
4739
4740 switch (op) {
4741 case MDB_GET_CURRENT:
4742 if (!(mc->mc_flags & C_INITIALIZED)) {
4743 rc = EINVAL;
4744 } else {
4745 MDB_page *mp = mc->mc_pg[mc->mc_top];
4746 if (!NUMKEYS(mp)) {
4747 mc->mc_ki[mc->mc_top] = 0;
4748 rc = MDB_NOTFOUND;
4749 break;
4750 }
4751 rc = MDB_SUCCESS;
4752 if (IS_LEAF2(mp)) {
4753 key->mv_size = mc->mc_db->md_pad;
4754 key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size);
4755 } else {
4756 MDB_node *leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
4757 MDB_GET_KEY(leaf, key);
4758 if (data) {
4759 if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
4760 rc = mdb_cursor_get(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_GET_CURRENT);
4761 } else {
4762 rc = mdb_node_read(mc->mc_txn, leaf, data);
4763 }
4764 }
4765 }
4766 }
4767 break;
4768 case MDB_GET_BOTH:
4769 case MDB_GET_BOTH_RANGE:
4770 if (data == NULL || mc->mc_xcursor == NULL) {
4771 rc = EINVAL;
4772 break;
4773 }
4774 /* FALLTHRU */
4775 case MDB_SET:
4776 case MDB_SET_KEY:
4777 case MDB_SET_RANGE:
4778 if (key == NULL || key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE) {
4779 rc = EINVAL;
4780 } else if (op == MDB_SET_RANGE)
4781 rc = mdb_cursor_set(mc, key, data, op, NULL);
4782 else
4783 rc = mdb_cursor_set(mc, key, data, op, &exact);
4784 break;
4785 case MDB_GET_MULTIPLE:
4786 if (data == NULL ||
4787 !(mc->mc_db->md_flags & MDB_DUPFIXED) ||
4788 !(mc->mc_flags & C_INITIALIZED)) {
4789 rc = EINVAL;
4790 break;
4791 }
4792 rc = MDB_SUCCESS;
4793 if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) ||
4794 (mc->mc_xcursor->mx_cursor.mc_flags & C_EOF))
4795 break;
4796 goto fetchm;
4797 case MDB_NEXT_MULTIPLE:
4798 if (data == NULL ||
4799 !(mc->mc_db->md_flags & MDB_DUPFIXED)) {
4800 rc = EINVAL;
4801 break;
4802 }
4803 if (!(mc->mc_flags & C_INITIALIZED))
4804 rc = mdb_cursor_first(mc, key, data);
4805 else
4806 rc = mdb_cursor_next(mc, key, data, MDB_NEXT_DUP);
4807 if (rc == MDB_SUCCESS) {
4808 if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) {
4809 MDB_cursor *mx;
4810 fetchm:
4811 mx = &mc->mc_xcursor->mx_cursor;
4812 data->mv_size = NUMKEYS(mx->mc_pg[mx->mc_top]) *
4813 mx->mc_db->md_pad;
4814 data->mv_data = METADATA(mx->mc_pg[mx->mc_top]);
4815 mx->mc_ki[mx->mc_top] = NUMKEYS(mx->mc_pg[mx->mc_top])-1;
4816 } else {
4817 rc = MDB_NOTFOUND;
4818 }
4819 }
4820 break;
4821 case MDB_NEXT:
4822 case MDB_NEXT_DUP:
4823 case MDB_NEXT_NODUP:
4824 if (!(mc->mc_flags & C_INITIALIZED))
4825 rc = mdb_cursor_first(mc, key, data);
4826 else
4827 rc = mdb_cursor_next(mc, key, data, op);
4828 break;
4829 case MDB_PREV:
4830 case MDB_PREV_DUP:
4831 case MDB_PREV_NODUP:
4832 if (!(mc->mc_flags & C_INITIALIZED)) {
4833 rc = mdb_cursor_last(mc, key, data);
4834 mc->mc_flags |= C_INITIALIZED;
4835 mc->mc_ki[mc->mc_top]++;
4836 }
4837 rc = mdb_cursor_prev(mc, key, data, op);
4838 break;
4839 case MDB_FIRST:
4840 rc = mdb_cursor_first(mc, key, data);
4841 break;
4842 case MDB_FIRST_DUP:
4843 if (data == NULL ||
4844 !(mc->mc_db->md_flags & MDB_DUPSORT) ||
4845 !(mc->mc_flags & C_INITIALIZED) ||
4846 !(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) {
4847 rc = EINVAL;
4848 break;
4849 }
4850 rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
4851 break;
4852 case MDB_LAST:
4853 rc = mdb_cursor_last(mc, key, data);
4854 break;
4855 case MDB_LAST_DUP:
4856 if (data == NULL ||
4857 !(mc->mc_db->md_flags & MDB_DUPSORT) ||
4858 !(mc->mc_flags & C_INITIALIZED) ||
4859 !(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) {
4860 rc = EINVAL;
4861 break;
4862 }
4863 rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL);
4864 break;
4865 default:
4866 DPRINTF("unhandled/unimplemented cursor operation %u", op);
4867 rc = EINVAL;
4868 break;
4869 }
4870
4871 return rc;
4872 }
4873
4874 /** Touch all the pages in the cursor stack.
4875 * Makes sure all the pages are writable, before attempting a write operation.
4876 * @param[in] mc The cursor to operate on.
4877 */
4878 static int
4879 mdb_cursor_touch(MDB_cursor *mc)
4880 {
4881 int rc;
4882
4883 if (mc->mc_dbi > MAIN_DBI && !(*mc->mc_dbflag & DB_DIRTY)) {
4884 MDB_cursor mc2;
4885 MDB_xcursor mcx;
4886 mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI,
4887 mc->mc_txn->mt_dbs[MAIN_DBI].md_flags & MDB_DUPSORT ? &mcx : NULL);
4888 rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, MDB_PS_MODIFY);
4889 if (rc)
4890 return rc;
4891 *mc->mc_dbflag |= DB_DIRTY;
4892 }
4893 for (mc->mc_top = 0; mc->mc_top < mc->mc_snum; mc->mc_top++) {
4894 rc = mdb_page_touch(mc);
4895 if (rc)
4896 return rc;
4897 }
4898 mc->mc_top = mc->mc_snum-1;
4899 return MDB_SUCCESS;
4900 }
4901
4902 int
4903 mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
4904 unsigned int flags)
4905 {
4906 MDB_node *leaf = NULL;
4907 MDB_val xdata, *rdata, dkey;
4908 MDB_page *fp;
4909 MDB_db dummy;
4910 int do_sub = 0, insert = 0;
4911 unsigned int mcount = 0;
4912 size_t nsize;
4913 int rc, rc2;
4914 MDB_pagebuf pbuf;
4915 char dbuf[MDB_MAXKEYSIZE+1];
4916 unsigned int nflags;
4917 DKBUF;
4918
4919 if (F_ISSET(mc->mc_txn->mt_flags, MDB_TXN_RDONLY))
4920 return EACCES;
4921
4922 if (flags != MDB_CURRENT && (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE))
4923 return EINVAL;
4924
4925 if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT) && data->mv_size > MDB_MAXKEYSIZE)
4926 return EINVAL;
4927
4928 #if SIZE_MAX > MAXDATASIZE
4929 if (data->mv_size > MAXDATASIZE)
4930 return EINVAL;
4931 #endif
4932
4933 DPRINTF("==> put db %u key [%s], size %zu, data size %zu",
4934 mc->mc_dbi, DKEY(key), key ? key->mv_size:0, data->mv_size);
4935
4936 dkey.mv_size = 0;
4937
4938 if (flags == MDB_CURRENT) {
4939 if (!(mc->mc_flags & C_INITIALIZED))
4940 return EINVAL;
4941 rc = MDB_SUCCESS;
4942 } else if (mc->mc_db->md_root == P_INVALID) {
4943 MDB_page *np;
4944 /* new database, write a root leaf page */
4945 DPUTS("allocating new root leaf page");
4946 if ((rc = mdb_page_new(mc, P_LEAF, 1, &np))) {
4947 return rc;
4948 }
4949 mc->mc_snum = 0;
4950 mdb_cursor_push(mc, np);
4951 mc->mc_db->md_root = np->mp_pgno;
4952 mc->mc_db->md_depth++;
4953 *mc->mc_dbflag |= DB_DIRTY;
4954 if ((mc->mc_db->md_flags & (MDB_DUPSORT|MDB_DUPFIXED))
4955 == MDB_DUPFIXED)
4956 np->mp_flags |= P_LEAF2;
4957 mc->mc_flags |= C_INITIALIZED;
4958 rc = MDB_NOTFOUND;
4959 goto top;
4960 } else {
4961 int exact = 0;
4962 MDB_val d2;
4963 if (flags & MDB_APPEND) {
4964 MDB_val k2;
4965 rc = mdb_cursor_last(mc, &k2, &d2);
4966 if (rc == 0) {
4967 rc = mc->mc_dbx->md_cmp(key, &k2);
4968 if (rc > 0) {
4969 rc = MDB_NOTFOUND;
4970 mc->mc_ki[mc->mc_top]++;
4971 } else {
4972 /* new key is <= last key */
4973 rc = MDB_KEYEXIST;
4974 }
4975 }
4976 } else {
4977 rc = mdb_cursor_set(mc, key, &d2, MDB_SET, &exact);
4978 }
4979 if ((flags & MDB_NOOVERWRITE) && rc == 0) {
4980 DPRINTF("duplicate key [%s]", DKEY(key));
4981 *data = d2;
4982 return MDB_KEYEXIST;
4983 }
4984 if (rc && rc != MDB_NOTFOUND)
4985 return rc;
4986 }
4987
4988 /* Cursor is positioned, now make sure all pages are writable */
4989 rc2 = mdb_cursor_touch(mc);
4990 if (rc2)
4991 return rc2;
4992
4993 top:
4994 /* The key already exists */
4995 if (rc == MDB_SUCCESS) {
4996 /* there's only a key anyway, so this is a no-op */
4997 if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
4998 unsigned int ksize = mc->mc_db->md_pad;
4999 if (key->mv_size != ksize)
5000 return EINVAL;
5001 if (flags == MDB_CURRENT) {
5002 char *ptr = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], ksize);
5003 memcpy(ptr, key->mv_data, ksize);
5004 }
5005 return MDB_SUCCESS;
5006 }
5007
5008 leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
5009
5010 /* DB has dups? */
5011 if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT)) {
5012 /* Was a single item before, must convert now */
5013 more:
5014 if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
5015 /* Just overwrite the current item */
5016 if (flags == MDB_CURRENT)
5017 goto current;
5018
5019 dkey.mv_size = NODEDSZ(leaf);
5020 dkey.mv_data = NODEDATA(leaf);
5021 #if UINT_MAX < SIZE_MAX
5022 if (mc->mc_dbx->md_dcmp == mdb_cmp_int && dkey.mv_size == sizeof(size_t))
5023 #ifdef MISALIGNED_OK
5024 mc->mc_dbx->md_dcmp = mdb_cmp_long;
5025 #else
5026 mc->mc_dbx->md_dcmp = mdb_cmp_cint;
5027 #endif
5028 #endif
5029 /* if data matches, ignore it */
5030 if (!mc->mc_dbx->md_dcmp(data, &dkey))
5031 return (flags == MDB_NODUPDATA) ? MDB_KEYEXIST : MDB_SUCCESS;
5032
5033 /* create a fake page for the dup items */
5034 memcpy(dbuf, dkey.mv_data, dkey.mv_size);
5035 dkey.mv_data = dbuf;
5036 fp = (MDB_page *)&pbuf;
5037 fp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno;
5038 fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP;
5039 fp->mp_lower = PAGEHDRSZ;
5040 fp->mp_upper = PAGEHDRSZ + dkey.mv_size + data->mv_size;
5041 if (mc->mc_db->md_flags & MDB_DUPFIXED) {
5042 fp->mp_flags |= P_LEAF2;
5043 fp->mp_pad = data->mv_size;
5044 fp->mp_upper += 2 * data->mv_size; /* leave space for 2 more */
5045 } else {
5046 fp->mp_upper += 2 * sizeof(indx_t) + 2 * NODESIZE +
5047 (dkey.mv_size & 1) + (data->mv_size & 1);
5048 }
5049 mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0);
5050 do_sub = 1;
5051 rdata = &xdata;
5052 xdata.mv_size = fp->mp_upper;
5053 xdata.mv_data = fp;
5054 flags |= F_DUPDATA;
5055 goto new_sub;
5056 }
5057 if (!F_ISSET(leaf->mn_flags, F_SUBDATA)) {
5058 /* See if we need to convert from fake page to subDB */
5059 MDB_page *mp;
5060 unsigned int offset;
5061 unsigned int i;
5062
5063 fp = NODEDATA(leaf);
5064 if (flags == MDB_CURRENT) {
5065 reuse:
5066 fp->mp_flags |= P_DIRTY;
5067 COPY_PGNO(fp->mp_pgno, mc->mc_pg[mc->mc_top]->mp_pgno);
5068 mc->mc_xcursor->mx_cursor.mc_pg[0] = fp;
5069 flags |= F_DUPDATA;
5070 goto put_sub;
5071 }
5072 if (mc->mc_db->md_flags & MDB_DUPFIXED) {
5073 offset = fp->mp_pad;
5074 if (SIZELEFT(fp) >= offset)
5075 goto reuse;
5076 offset *= 4; /* space for 4 more */
5077 } else {
5078 offset = NODESIZE + sizeof(indx_t) + data->mv_size;
5079 }
5080 offset += offset & 1;
5081 if (NODESIZE + sizeof(indx_t) + NODEKSZ(leaf) + NODEDSZ(leaf) +
5082 offset >= mc->mc_txn->mt_env->me_nodemax) {
5083 /* yes, convert it */
5084 dummy.md_flags = 0;
5085 if (mc->mc_db->md_flags & MDB_DUPFIXED) {
5086 dummy.md_pad = fp->mp_pad;
5087 dummy.md_flags = MDB_DUPFIXED;
5088 if (mc->mc_db->md_flags & MDB_INTEGERDUP)
5089 dummy.md_flags |= MDB_INTEGERKEY;
5090 }
5091 dummy.md_depth = 1;
5092 dummy.md_branch_pages = 0;
5093 dummy.md_leaf_pages = 1;
5094 dummy.md_overflow_pages = 0;
5095 dummy.md_entries = NUMKEYS(fp);
5096 rdata = &xdata;
5097 xdata.mv_size = sizeof(MDB_db);
5098 xdata.mv_data = &dummy;
5099 if ((rc = mdb_page_alloc(mc, 1, &mp)))
5100 return rc;
5101 offset = mc->mc_txn->mt_env->me_psize - NODEDSZ(leaf);
5102 flags |= F_DUPDATA|F_SUBDATA;
5103 dummy.md_root = mp->mp_pgno;
5104 } else {
5105 /* no, just grow it */
5106 rdata = &xdata;
5107 xdata.mv_size = NODEDSZ(leaf) + offset;
5108 xdata.mv_data = &pbuf;
5109 mp = (MDB_page *)&pbuf;
5110 mp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno;
5111 flags |= F_DUPDATA;
5112 }
5113 mp->mp_flags = fp->mp_flags | P_DIRTY;
5114 mp->mp_pad = fp->mp_pad;
5115 mp->mp_lower = fp->mp_lower;
5116 mp->mp_upper = fp->mp_upper + offset;
5117 if (IS_LEAF2(fp)) {
5118 memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad);
5119 } else {
5120 nsize = NODEDSZ(leaf) - fp->mp_upper;
5121 memcpy((char *)mp + mp->mp_upper, (char *)fp + fp->mp_upper, nsize);
5122 for (i=0; i<NUMKEYS(fp); i++)
5123 mp->mp_ptrs[i] = fp->mp_ptrs[i] + offset;
5124 }
5125 mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0);
5126 do_sub = 1;
5127 goto new_sub;
5128 }
5129 /* data is on sub-DB, just store it */
5130 flags |= F_DUPDATA|F_SUBDATA;
5131 goto put_sub;
5132 }
5133 current:
5134 /* overflow page overwrites need special handling */
5135 if (F_ISSET(leaf->mn_flags, F_BIGDATA)) {
5136 MDB_page *omp;
5137 pgno_t pg;
5138 int ovpages, dpages;
5139
5140 ovpages = OVPAGES(NODEDSZ(leaf), mc->mc_txn->mt_env->me_psize);
5141 dpages = OVPAGES(data->mv_size, mc->mc_txn->mt_env->me_psize);
5142 memcpy(&pg, NODEDATA(leaf), sizeof(pg));
5143 mdb_page_get(mc->mc_txn, pg, &omp);
5144 /* Is the ov page writable and large enough? */
5145 if ((omp->mp_flags & P_DIRTY) && ovpages >= dpages) {
5146 /* yes, overwrite it. Note in this case we don't
5147 * bother to try shrinking the node if the new data
5148 * is smaller than the overflow threshold.
5149 */
5150 if (F_ISSET(flags, MDB_RESERVE))
5151 data->mv_data = METADATA(omp);
5152 else
5153 memcpy(METADATA(omp), data->mv_data, data->mv_size);
5154 goto done;
5155 } else {
5156 /* no, free ovpages */
5157 int i;
5158 mc->mc_db->md_overflow_pages -= ovpages;
5159 for (i=0; i<ovpages; i++) {
5160 DPRINTF("freed ov page %zu", pg);
5161 mdb_midl_append(&mc->mc_txn->mt_free_pgs, pg);
5162 pg++;
5163 }
5164 }
5165 } else if (NODEDSZ(leaf) == data->mv_size) {
5166 /* same size, just replace it. Note that we could
5167 * also reuse this node if the new data is smaller,
5168 * but instead we opt to shrink the node in that case.
5169 */
5170 if (F_ISSET(flags, MDB_RESERVE))
5171 data->mv_data = NODEDATA(leaf);
5172 else if (data->mv_size)
5173 memcpy(NODEDATA(leaf), data->mv_data, data->mv_size);
5174 else
5175 memcpy(NODEKEY(leaf), key->mv_data, key->mv_size);
5176 goto done;
5177 }
5178 mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0);
5179 mc->mc_db->md_entries--;
5180 } else {
5181 DPRINTF("inserting key at index %i", mc->mc_ki[mc->mc_top]);
5182 insert = 1;
5183 }
5184
5185 rdata = data;
5186
5187 new_sub:
5188 nflags = flags & NODE_ADD_FLAGS;
5189 nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) ? key->mv_size : mdb_leaf_size(mc->mc_txn->mt_env, key, rdata);
5190 if (SIZELEFT(mc->mc_pg[mc->mc_top]) < nsize) {
5191 if (( flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA )
5192 nflags &= ~MDB_APPEND;
5193 if (!insert)
5194 nflags |= MDB_SPLIT_REPLACE;
5195 rc = mdb_page_split(mc, key, rdata, P_INVALID, nflags);
5196 } else {
5197 /* There is room already in this leaf page. */
5198 rc = mdb_node_add(mc, mc->mc_ki[mc->mc_top], key, rdata, 0, nflags);
5199 if (rc == 0 && !do_sub && insert) {
5200 /* Adjust other cursors pointing to mp */
5201 MDB_cursor *m2, *m3;
5202 MDB_dbi dbi = mc->mc_dbi;
5203 unsigned i = mc->mc_top;
5204 MDB_page *mp = mc->mc_pg[i];
5205
5206 if (mc->mc_flags & C_SUB)
5207 dbi--;
5208
5209 for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
5210 if (mc->mc_flags & C_SUB)
5211 m3 = &m2->mc_xcursor->mx_cursor;
5212 else
5213 m3 = m2;
5214 if (m3 == mc || m3->mc_snum < mc->mc_snum) continue;
5215 if (m3->mc_pg[i] == mp && m3->mc_ki[i] >= mc->mc_ki[i]) {
5216 m3->mc_ki[i]++;
5217 }
5218 }
5219 }
5220 }
5221
5222 if (rc != MDB_SUCCESS)
5223 mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
5224 else {
5225 /* Now store the actual data in the child DB. Note that we're
5226 * storing the user data in the keys field, so there are strict
5227 * size limits on dupdata. The actual data fields of the child
5228 * DB are all zero size.
5229 */
5230 if (do_sub) {
5231 int xflags;
5232 put_sub:
5233 xdata.mv_size = 0;
5234 xdata.mv_data = "";
5235 leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
5236 if (flags & MDB_CURRENT) {
5237 xflags = MDB_CURRENT;
5238 } else {
5239 mdb_xcursor_init1(mc, leaf);
5240 xflags = (flags & MDB_NODUPDATA) ? MDB_NOOVERWRITE : 0;
5241 }
5242 /* converted, write the original data first */
5243 if (dkey.mv_size) {
5244 rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags);
5245 if (rc)
5246 return rc;
5247 {
5248 /* Adjust other cursors pointing to mp */
5249 MDB_cursor *m2;
5250 unsigned i = mc->mc_top;
5251 MDB_page *mp = mc->mc_pg[i];
5252
5253 for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) {
5254 if (m2 == mc || m2->mc_snum < mc->mc_snum) continue;
5255 if (m2->mc_pg[i] == mp && m2->mc_ki[i] == mc->mc_ki[i]) {
5256 mdb_xcursor_init1(m2, leaf);
5257 }
5258 }
5259 }
5260 /* we've done our job */
5261 dkey.mv_size = 0;
5262 }
5263 if (flags & MDB_APPENDDUP)
5264 xflags |= MDB_APPEND;
5265 rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags);
5266 if (flags & F_SUBDATA) {
5267 void *db = NODEDATA(leaf);
5268 memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db));
5269 }
5270 }
5271 /* sub-writes might have failed so check rc again.
5272 * Don't increment count if we just replaced an existing item.
5273 */
5274 if (!rc && !(flags & MDB_CURRENT))
5275 mc->mc_db->md_entries++;
5276 if (flags & MDB_MULTIPLE) {
5277 mcount++;
5278 if (mcount < data[1].mv_size) {
5279 data[0].mv_data = (char *)data[0].mv_data + data[0].mv_size;
5280 leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
5281 goto more;
5282 }
5283 }
5284 }
5285 done:
5286 /* If we succeeded and the key didn't exist before, make sure
5287 * the cursor is marked valid.
5288 */
5289 if (!rc && insert)
5290 mc->mc_flags |= C_INITIALIZED;
5291 return rc;
5292 }
5293
5294 int
5295 mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
5296 {
5297 MDB_node *leaf;
5298 int rc;
5299
5300 if (F_ISSET(mc->mc_txn->mt_flags, MDB_TXN_RDONLY))
5301 return EACCES;
5302
5303 if (!(mc->mc_flags & C_INITIALIZED))
5304 return EINVAL;
5305
5306 rc = mdb_cursor_touch(mc);
5307 if (rc)
5308 return rc;
5309
5310 leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
5311
5312 if (!IS_LEAF2(mc->mc_pg[mc->mc_top]) && F_ISSET(leaf->mn_flags, F_DUPDATA)) {
5313 if (flags != MDB_NODUPDATA) {
5314 if (!F_ISSET(leaf->mn_flags, F_SUBDATA)) {
5315 mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
5316 }
5317 rc = mdb_cursor_del(&mc->mc_xcursor->mx_cursor, 0);
5318 /* If sub-DB still has entries, we're done */
5319 if (mc->mc_xcursor->mx_db.md_entries) {
5320 if (leaf->mn_flags & F_SUBDATA) {
5321 /* update subDB info */
5322 void *db = NODEDATA(leaf);
5323 memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db));
5324 } else {
5325 /* shrink fake page */
5326 mdb_node_shrink(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
5327 }
5328 mc->mc_db->md_entries--;
5329 return rc;
5330 }
5331 /* otherwise fall thru and delete the sub-DB */
5332 }
5333
5334 if (leaf->mn_flags & F_SUBDATA) {
5335 /* add all the child DB's pages to the free list */
5336 rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0);
5337 if (rc == MDB_SUCCESS) {
5338 mc->mc_db->md_entries -=
5339 mc->mc_xcursor->mx_db.md_entries;
5340 }
5341 }
5342 }
5343
5344 return mdb_cursor_del0(mc, leaf);
5345 }
5346
5347 /** Allocate and initialize new pages for a database.
5348 * @param[in] mc a cursor on the database being added to.
5349 * @param[in] flags flags defining what type of page is being allocated.
5350 * @param[in] num the number of pages to allocate. This is usually 1,
5351 * unless allocating overflow pages for a large record.
5352 * @param[out] mp Address of a page, or NULL on failure.
5353 * @return 0 on success, non-zero on failure.
5354 */
5355 static int
5356 mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp)
5357 {
5358 MDB_page *np;
5359 int rc;
5360
5361 if ((rc = mdb_page_alloc(mc, num, &np)))
5362 return rc;
5363 DPRINTF("allocated new mpage %zu, page size %u",
5364 np->mp_pgno, mc->mc_txn->mt_env->me_psize);
5365 np->mp_flags = flags | P_DIRTY;
5366 np->mp_lower = PAGEHDRSZ;
5367 np->mp_upper = mc->mc_txn->mt_env->me_psize;
5368
5369 if (IS_BRANCH(np))
5370 mc->mc_db->md_branch_pages++;
5371 else if (IS_LEAF(np))
5372 mc->mc_db->md_leaf_pages++;
5373 else if (IS_OVERFLOW(np)) {
5374 mc->mc_db->md_overflow_pages += num;
5375 np->mp_pages = num;
5376 }
5377 *mp = np;
5378
5379 return 0;
5380 }
5381
5382 /** Calculate the size of a leaf node.
5383 * The size depends on the environment's page size; if a data item
5384 * is too large it will be put onto an overflow page and the node
5385 * size will only include the key and not the data. Sizes are always
5386 * rounded up to an even number of bytes, to guarantee 2-byte alignment
5387 * of the #MDB_node headers.
5388 * @param[in] env The environment handle.
5389 * @param[in] key The key for the node.
5390 * @param[in] data The data for the node.
5391 * @return The number of bytes needed to store the node.
5392 */
5393 static size_t
5394 mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data)
5395 {
5396 size_t sz;
5397
5398 sz = LEAFSIZE(key, data);
5399 if (sz >= env->me_nodemax) {
5400 /* put on overflow page */
5401 sz -= data->mv_size - sizeof(pgno_t);
5402 }
5403 sz += sz & 1;
5404
5405 return sz + sizeof(indx_t);
5406 }
5407
5408 /** Calculate the size of a branch node.
5409 * The size should depend on the environment's page size but since
5410 * we currently don't support spilling large keys onto overflow
5411 * pages, it's simply the size of the #MDB_node header plus the
5412 * size of the key. Sizes are always rounded up to an even number
5413 * of bytes, to guarantee 2-byte alignment of the #MDB_node headers.
5414 * @param[in] env The environment handle.
5415 * @param[in] key The key for the node.
5416 * @return The number of bytes needed to store the node.
5417 */
5418 static size_t
5419 mdb_branch_size(MDB_env *env, MDB_val *key)
5420 {
5421 size_t sz;
5422
5423 sz = INDXSIZE(key);
5424 if (sz >= env->me_nodemax) {
5425 /* put on overflow page */
5426 /* not implemented */
5427 /* sz -= key->size - sizeof(pgno_t); */
5428 }
5429
5430 return sz + sizeof(indx_t);
5431 }
5432
5433 /** Add a node to the page pointed to by the cursor.
5434 * @param[in] mc The cursor for this operation.
5435 * @param[in] indx The index on the page where the new node should be added.
5436 * @param[in] key The key for the new node.
5437 * @param[in] data The data for the new node, if any.
5438 * @param[in] pgno The page number, if adding a branch node.
5439 * @param[in] flags Flags for the node.
5440 * @return 0 on success, non-zero on failure. Possible errors are:
5441 * <ul>
5442 * <li>ENOMEM - failed to allocate overflow pages for the node.
5443 * <li>MDB_PAGE_FULL - there is insufficient room in the page. This error
5444 * should never happen since all callers already calculate the
5445 * page's free space before calling this function.
5446 * </ul>
5447 */
5448 static int
5449 mdb_node_add(MDB_cursor *mc, indx_t indx,
5450 MDB_val *key, MDB_val *data, pgno_t pgno, unsigned int flags)
5451 {
5452 unsigned int i;
5453 size_t node_size = NODESIZE;
5454 indx_t ofs;
5455 MDB_node *node;
5456 MDB_page *mp = mc->mc_pg[mc->mc_top];
5457 MDB_page *ofp = NULL; /* overflow page */
5458 DKBUF;
5459
5460 assert(mp->mp_upper >= mp->mp_lower);
5461
5462 DPRINTF("add to %s %spage %zu index %i, data size %zu key size %zu [%s]",
5463 IS_LEAF(mp) ? "leaf" : "branch",
5464 IS_SUBP(mp) ? "sub-" : "",
5465 mp->mp_pgno, indx, data ? data->mv_size : 0,
5466 key ? key->mv_size : 0, key ? DKEY(key) : NULL);
5467
5468 if (IS_LEAF2(mp)) {
5469 /* Move higher keys up one slot. */
5470 int ksize = mc->mc_db->md_pad, dif;
5471 char *ptr = LEAF2KEY(mp, indx, ksize);
5472 dif = NUMKEYS(mp) - indx;
5473 if (dif > 0)
5474 memmove(ptr+ksize, ptr, dif*ksize);
5475 /* insert new key */
5476 memcpy(ptr, key->mv_data, ksize);
5477
5478 /* Just using these for counting */
5479 mp->mp_lower += sizeof(indx_t);
5480 mp->mp_upper -= ksize - sizeof(indx_t);
5481 return MDB_SUCCESS;
5482 }
5483
5484 if (key != NULL)
5485 node_size += key->mv_size;
5486
5487 if (IS_LEAF(mp)) {
5488 assert(data);
5489 if (F_ISSET(flags, F_BIGDATA)) {
5490 /* Data already on overflow page. */
5491 node_size += sizeof(pgno_t);
5492 } else if (node_size + data->mv_size >= mc->mc_txn->mt_env->me_nodemax) {
5493 int ovpages = OVPAGES(data->mv_size, mc->mc_txn->mt_env->me_psize);
5494 int rc;
5495 /* Put data on overflow page. */
5496 DPRINTF("data size is %zu, node would be %zu, put data on overflow page",
5497 data->mv_size, node_size+data->mv_size);
5498 node_size += sizeof(pgno_t);
5499 if ((rc = mdb_page_new(mc, P_OVERFLOW, ovpages, &ofp)))
5500 return rc;
5501 DPRINTF("allocated overflow page %zu", ofp->mp_pgno);
5502 flags |= F_BIGDATA;
5503 } else {
5504 node_size += data->mv_size;
5505 }
5506 }
5507 node_size += node_size & 1;
5508
5509 if (node_size + sizeof(indx_t) > SIZELEFT(mp)) {
5510 DPRINTF("not enough room in page %zu, got %u ptrs",
5511 mp->mp_pgno, NUMKEYS(mp));
5512 DPRINTF("upper - lower = %u - %u = %u", mp->mp_upper, mp->mp_lower,
5513 mp->mp_upper - mp->mp_lower);
5514 DPRINTF("node size = %zu", node_size);
5515 return MDB_PAGE_FULL;
5516 }
5517
5518 /* Move higher pointers up one slot. */
5519 for (i = NUMKEYS(mp); i > indx; i--)
5520 mp->mp_ptrs[i] = mp->mp_ptrs[i - 1];
5521
5522 /* Adjust free space offsets. */
5523 ofs = mp->mp_upper - node_size;
5524 assert(ofs >= mp->mp_lower + sizeof(indx_t));
5525 mp->mp_ptrs[indx] = ofs;
5526 mp->mp_upper = ofs;
5527 mp->mp_lower += sizeof(indx_t);
5528
5529 /* Write the node data. */
5530 node = NODEPTR(mp, indx);
5531 node->mn_ksize = (key == NULL) ? 0 : key->mv_size;
5532 node->mn_flags = flags;
5533 if (IS_LEAF(mp))
5534 SETDSZ(node,data->mv_size);
5535 else
5536 SETPGNO(node,pgno);
5537
5538 if (key)
5539 memcpy(NODEKEY(node), key->mv_data, key->mv_size);
5540
5541 if (IS_LEAF(mp)) {
5542 assert(key);
5543 if (ofp == NULL) {
5544 if (F_ISSET(flags, F_BIGDATA))
5545 memcpy(node->mn_data + key->mv_size, data->mv_data,
5546 sizeof(pgno_t));
5547 else if (F_ISSET(flags, MDB_RESERVE))
5548 data->mv_data = node->mn_data + key->mv_size;
5549 else
5550 memcpy(node->mn_data + key->mv_size, data->mv_data,
5551 data->mv_size);
5552 } else {
5553 memcpy(node->mn_data + key->mv_size, &ofp->mp_pgno,
5554 sizeof(pgno_t));
5555 if (F_ISSET(flags, MDB_RESERVE))
5556 data->mv_data = METADATA(ofp);
5557 else
5558 memcpy(METADATA(ofp), data->mv_data, data->mv_size);
5559 }
5560 }
5561
5562 return MDB_SUCCESS;
5563 }
5564
5565 /** Delete the specified node from a page.
5566 * @param[in] mp The page to operate on.
5567 * @param[in] indx The index of the node to delete.
5568 * @param[in] ksize The size of a node. Only used if the page is
5569 * part of a #MDB_DUPFIXED database.
5570 */
5571 static void
5572 mdb_node_del(MDB_page *mp, indx_t indx, int ksize)
5573 {
5574 unsigned int sz;
5575 indx_t i, j, numkeys, ptr;
5576 MDB_node *node;
5577 char *base;
5578
5579 #if MDB_DEBUG
5580 {
5581 pgno_t pgno;
5582 COPY_PGNO(pgno, mp->mp_pgno);
5583 DPRINTF("delete node %u on %s page %zu", indx,
5584 IS_LEAF(mp) ? "leaf" : "branch", pgno);
5585 }
5586 #endif
5587 assert(indx < NUMKEYS(mp));
5588
5589 if (IS_LEAF2(mp)) {
5590 int x = NUMKEYS(mp) - 1 - indx;
5591 base = LEAF2KEY(mp, indx, ksize);
5592 if (x)
5593 memmove(base, base + ksize, x * ksize);
5594 mp->mp_lower -= sizeof(indx_t);
5595 mp->mp_upper += ksize - sizeof(indx_t);
5596 return;
5597 }
5598
5599 node = NODEPTR(mp, indx);
5600 sz = NODESIZE + node->mn_ksize;
5601 if (IS_LEAF(mp)) {
5602 if (F_ISSET(node->mn_flags, F_BIGDATA))
5603 sz += sizeof(pgno_t);
5604 else
5605 sz += NODEDSZ(node);
5606 }
5607 sz += sz & 1;
5608
5609 ptr = mp->mp_ptrs[indx];
5610 numkeys = NUMKEYS(mp);
5611 for (i = j = 0; i < numkeys; i++) {
5612 if (i != indx) {
5613 mp->mp_ptrs[j] = mp->mp_ptrs[i];
5614 if (mp->mp_ptrs[i] < ptr)
5615 mp->mp_ptrs[j] += sz;
5616 j++;
5617 }
5618 }
5619
5620 base = (char *)mp + mp->mp_upper;
5621 memmove(base + sz, base, ptr - mp->mp_upper);
5622
5623 mp->mp_lower -= sizeof(indx_t);
5624 mp->mp_upper += sz;
5625 }
5626
5627 /** Compact the main page after deleting a node on a subpage.
5628 * @param[in] mp The main page to operate on.
5629 * @param[in] indx The index of the subpage on the main page.
5630 */
5631 static void
5632 mdb_node_shrink(MDB_page *mp, indx_t indx)
5633 {
5634 MDB_node *node;
5635 MDB_page *sp, *xp;
5636 char *base;
5637 int osize, nsize;
5638 int delta;
5639 indx_t i, numkeys, ptr;
5640
5641 node = NODEPTR(mp, indx);
5642 sp = (MDB_page *)NODEDATA(node);
5643 osize = NODEDSZ(node);
5644
5645 delta = sp->mp_upper - sp->mp_lower;
5646 SETDSZ(node, osize - delta);
5647 xp = (MDB_page *)((char *)sp + delta);
5648
5649 /* shift subpage upward */
5650 if (IS_LEAF2(sp)) {
5651 nsize = NUMKEYS(sp) * sp->mp_pad;
5652 memmove(METADATA(xp), METADATA(sp), nsize);
5653 } else {
5654 int i;
5655 nsize = osize - sp->mp_upper;
5656 numkeys = NUMKEYS(sp);
5657 for (i=numkeys-1; i>=0; i--)
5658 xp->mp_ptrs[i] = sp->mp_ptrs[i] - delta;
5659 }
5660 xp->mp_upper = sp->mp_lower;
5661 xp->mp_lower = sp->mp_lower;
5662 xp->mp_flags = sp->mp_flags;
5663 xp->mp_pad = sp->mp_pad;
5664 COPY_PGNO(xp->mp_pgno, mp->mp_pgno);
5665
5666 /* shift lower nodes upward */
5667 ptr = mp->mp_ptrs[indx];
5668 numkeys = NUMKEYS(mp);
5669 for (i = 0; i < numkeys; i++) {
5670 if (mp->mp_ptrs[i] <= ptr)
5671 mp->mp_ptrs[i] += delta;
5672 }
5673
5674 base = (char *)mp + mp->mp_upper;
5675 memmove(base + delta, base, ptr - mp->mp_upper + NODESIZE + NODEKSZ(node));
5676 mp->mp_upper += delta;
5677 }
5678
5679 /** Initial setup of a sorted-dups cursor.
5680 * Sorted duplicates are implemented as a sub-database for the given key.
5681 * The duplicate data items are actually keys of the sub-database.
5682 * Operations on the duplicate data items are performed using a sub-cursor
5683 * initialized when the sub-database is first accessed. This function does
5684 * the preliminary setup of the sub-cursor, filling in the fields that
5685 * depend only on the parent DB.
5686 * @param[in] mc The main cursor whose sorted-dups cursor is to be initialized.
5687 */
5688 static void
5689 mdb_xcursor_init0(MDB_cursor *mc)
5690 {
5691 MDB_xcursor *mx = mc->mc_xcursor;
5692
5693 mx->mx_cursor.mc_xcursor = NULL;
5694 mx->mx_cursor.mc_txn = mc->mc_txn;
5695 mx->mx_cursor.mc_db = &mx->mx_db;
5696 mx->mx_cursor.mc_dbx = &mx->mx_dbx;
5697 mx->mx_cursor.mc_dbi = mc->mc_dbi+1;
5698 mx->mx_cursor.mc_dbflag = &mx->mx_dbflag;
5699 mx->mx_cursor.mc_snum = 0;
5700 mx->mx_cursor.mc_top = 0;
5701 mx->mx_cursor.mc_flags = C_SUB;
5702 mx->mx_dbx.md_cmp = mc->mc_dbx->md_dcmp;
5703 mx->mx_dbx.md_dcmp = NULL;
5704 mx->mx_dbx.md_rel = mc->mc_dbx->md_rel;
5705 }
5706
5707 /** Final setup of a sorted-dups cursor.
5708 * Sets up the fields that depend on the data from the main cursor.
5709 * @param[in] mc The main cursor whose sorted-dups cursor is to be initialized.
5710 * @param[in] node The data containing the #MDB_db record for the
5711 * sorted-dup database.
5712 */
5713 static void
5714 mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node)
5715 {
5716 MDB_xcursor *mx = mc->mc_xcursor;
5717
5718 if (node->mn_flags & F_SUBDATA) {
5719 memcpy(&mx->mx_db, NODEDATA(node), sizeof(MDB_db));
5720 mx->mx_cursor.mc_pg[0] = 0;
5721 mx->mx_cursor.mc_snum = 0;
5722 mx->mx_cursor.mc_flags = C_SUB;
5723 } else {
5724 MDB_page *fp = NODEDATA(node);
5725 mx->mx_db.md_pad = mc->mc_pg[mc->mc_top]->mp_pad;
5726 mx->mx_db.md_flags = 0;
5727 mx->mx_db.md_depth = 1;
5728 mx->mx_db.md_branch_pages = 0;
5729 mx->mx_db.md_leaf_pages = 1;
5730 mx->mx_db.md_overflow_pages = 0;
5731 mx->mx_db.md_entries = NUMKEYS(fp);
5732 COPY_PGNO(mx->mx_db.md_root, fp->mp_pgno);
5733 mx->mx_cursor.mc_snum = 1;
5734 mx->mx_cursor.mc_flags = C_INITIALIZED|C_SUB;
5735 mx->mx_cursor.mc_top = 0;
5736 mx->mx_cursor.mc_pg[0] = fp;
5737 mx->mx_cursor.mc_ki[0] = 0;
5738 if (mc->mc_db->md_flags & MDB_DUPFIXED) {
5739 mx->mx_db.md_flags = MDB_DUPFIXED;
5740 mx->mx_db.md_pad = fp->mp_pad;
5741 if (mc->mc_db->md_flags & MDB_INTEGERDUP)
5742 mx->mx_db.md_flags |= MDB_INTEGERKEY;
5743 }
5744 }
5745 DPRINTF("Sub-db %u for db %u root page %zu", mx->mx_cursor.mc_dbi, mc->mc_dbi,
5746 mx->mx_db.md_root);
5747 mx->mx_dbflag = DB_VALID | (F_ISSET(mc->mc_pg[mc->mc_top]->mp_flags, P_DIRTY) ?
5748 DB_DIRTY : 0);
5749 mx->mx_dbx.md_name.mv_data = NODEKEY(node);
5750 mx->mx_dbx.md_name.mv_size = node->mn_ksize;
5751 #if UINT_MAX < SIZE_MAX
5752 if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t))
5753 #ifdef MISALIGNED_OK
5754 mx->mx_dbx.md_cmp = mdb_cmp_long;
5755 #else
5756 mx->mx_dbx.md_cmp = mdb_cmp_cint;
5757 #endif
5758 #endif
5759 }
5760
5761 /** Initialize a cursor for a given transaction and database. */
5762 static void
5763 mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx)
5764 {
5765 mc->mc_orig = NULL;
5766 mc->mc_dbi = dbi;
5767 mc->mc_txn = txn;
5768 mc->mc_db = &txn->mt_dbs[dbi];
5769 mc->mc_dbx = &txn->mt_dbxs[dbi];
5770 mc->mc_dbflag = &txn->mt_dbflags[dbi];
5771 mc->mc_snum = 0;
5772 mc->mc_top = 0;
5773 mc->mc_pg[0] = 0;
5774 mc->mc_flags = 0;
5775 if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) {
5776 assert(mx != NULL);
5777 mc->mc_xcursor = mx;
5778 mdb_xcursor_init0(mc);
5779 } else {
5780 mc->mc_xcursor = NULL;
5781 }
5782 if (*mc->mc_dbflag & DB_STALE) {
5783 mdb_page_search(mc, NULL, MDB_PS_ROOTONLY);
5784 }
5785 }
5786
5787 int
5788 mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret)
5789 {
5790 MDB_cursor *mc;
5791 MDB_xcursor *mx = NULL;
5792 size_t size = sizeof(MDB_cursor);
5793
5794 if (txn == NULL || ret == NULL || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
5795 return EINVAL;
5796
5797 /* Allow read access to the freelist */
5798 if (!dbi && !F_ISSET(txn->mt_flags, MDB_TXN_RDONLY))
5799 return EINVAL;
5800
5801 if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT)
5802 size += sizeof(MDB_xcursor);
5803
5804 if ((mc = malloc(size)) != NULL) {
5805 if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) {
5806 mx = (MDB_xcursor *)(mc + 1);
5807 }
5808 mdb_cursor_init(mc, txn, dbi, mx);
5809 if (txn->mt_cursors) {
5810 mc->mc_next = txn->mt_cursors[dbi];
5811 txn->mt_cursors[dbi] = mc;
5812 }
5813 mc->mc_flags |= C_ALLOCD;
5814 } else {
5815 return ENOMEM;
5816 }
5817
5818 *ret = mc;
5819
5820 return MDB_SUCCESS;
5821 }
5822
5823 int
5824 mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc)
5825 {
5826 if (txn == NULL || mc == NULL || mc->mc_dbi >= txn->mt_numdbs)
5827 return EINVAL;
5828
5829 if (txn->mt_cursors)
5830 return EINVAL;
5831
5832 mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor);
5833 return MDB_SUCCESS;
5834 }
5835
5836 /* Return the count of duplicate data items for the current key */
5837 int
5838 mdb_cursor_count(MDB_cursor *mc, size_t *countp)
5839 {
5840 MDB_node *leaf;
5841
5842 if (mc == NULL || countp == NULL)
5843 return EINVAL;
5844
5845 if (!(mc->mc_db->md_flags & MDB_DUPSORT))
5846 return EINVAL;
5847
5848 leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
5849 if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
5850 *countp = 1;
5851 } else {
5852 if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED))
5853 return EINVAL;
5854
5855 *countp = mc->mc_xcursor->mx_db.md_entries;
5856 }
5857 return MDB_SUCCESS;
5858 }
5859
5860 void
5861 mdb_cursor_close(MDB_cursor *mc)
5862 {
5863 if (mc != NULL) {
5864 /* remove from txn, if tracked */
5865 if (mc->mc_txn->mt_cursors) {
5866 MDB_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi];
5867 while (*prev && *prev != mc) prev = &(*prev)->mc_next;
5868 if (*prev == mc)
5869 *prev = mc->mc_next;
5870 }
5871 if (mc->mc_flags & C_ALLOCD)
5872 free(mc);
5873 }
5874 }
5875
5876 MDB_txn *
5877 mdb_cursor_txn(MDB_cursor *mc)
5878 {
5879 if (!mc) return NULL;
5880 return mc->mc_txn;
5881 }
5882
5883 MDB_dbi
5884 mdb_cursor_dbi(MDB_cursor *mc)
5885 {
5886 assert(mc != NULL);
5887 return mc->mc_dbi;
5888 }
5889
5890 /** Replace the key for a node with a new key.
5891 * @param[in] mp The page containing the node to operate on.
5892 * @param[in] indx The index of the node to operate on.
5893 * @param[in] key The new key to use.
5894 * @return 0 on success, non-zero on failure.
5895 */
5896 static int
5897 mdb_update_key(MDB_cursor *mc, MDB_val *key)
5898 {
5899 MDB_page *mp;
5900 MDB_node *node;
5901 char *base;
5902 size_t len;
5903 int delta, delta0;
5904 indx_t ptr, i, numkeys, indx;
5905 DKBUF;
5906
5907 indx = mc->mc_ki[mc->mc_top];
5908 mp = mc->mc_pg[mc->mc_top];
5909 node = NODEPTR(mp, indx);
5910 ptr = mp->mp_ptrs[indx];
5911 #if MDB_DEBUG
5912 {
5913 MDB_val k2;
5914 char kbuf2[(MDB_MAXKEYSIZE*2+1)];
5915 k2.mv_data = NODEKEY(node);
5916 k2.mv_size = node->mn_ksize;
5917 DPRINTF("update key %u (ofs %u) [%s] to [%s] on page %zu",
5918 indx, ptr,
5919 mdb_dkey(&k2, kbuf2),
5920 DKEY(key),
5921 mp->mp_pgno);
5922 }
5923 #endif
5924
5925 delta0 = delta = key->mv_size - node->mn_ksize;
5926
5927 /* Must be 2-byte aligned. If new key is
5928 * shorter by 1, the shift will be skipped.
5929 */
5930 delta += (delta & 1);
5931 if (delta) {
5932 if (delta > 0 && SIZELEFT(mp) < delta) {
5933 pgno_t pgno;
5934 /* not enough space left, do a delete and split */
5935 DPRINTF("Not enough room, delta = %d, splitting...", delta);
5936 pgno = NODEPGNO(node);
5937 mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0);
5938 return mdb_page_split(mc, key, NULL, pgno, MDB_SPLIT_REPLACE);
5939 }
5940
5941 numkeys = NUMKEYS(mp);
5942 for (i = 0; i < numkeys; i++) {
5943 if (mp->mp_ptrs[i] <= ptr)
5944 mp->mp_ptrs[i] -= delta;
5945 }
5946
5947 base = (char *)mp + mp->mp_upper;
5948 len = ptr - mp->mp_upper + NODESIZE;
5949 memmove(base - delta, base, len);
5950 mp->mp_upper -= delta;
5951
5952 node = NODEPTR(mp, indx);
5953 }
5954
5955 /* But even if no shift was needed, update ksize */
5956 if (delta0)
5957 node->mn_ksize = key->mv_size;
5958
5959 if (key->mv_size)
5960 memcpy(NODEKEY(node), key->mv_data, key->mv_size);
5961
5962 return MDB_SUCCESS;
5963 }
5964
5965 static void
5966 mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst);
5967
5968 /** Move a node from csrc to cdst.
5969 */
5970 static int
5971 mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
5972 {
5973 MDB_node *srcnode;
5974 MDB_val key, data;
5975 pgno_t srcpg;
5976 MDB_cursor mn;
5977 int rc;
5978 unsigned short flags;
5979
5980 DKBUF;
5981
5982 /* Mark src and dst as dirty. */
5983 if ((rc = mdb_page_touch(csrc)) ||
5984 (rc = mdb_page_touch(cdst)))
5985 return rc;
5986
5987 if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
5988 srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); /* fake */
5989 key.mv_size = csrc->mc_db->md_pad;
5990 key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size);
5991 data.mv_size = 0;
5992 data.mv_data = NULL;
5993 srcpg = 0;
5994 flags = 0;
5995 } else {
5996 srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top]);
5997 assert(!((long)srcnode&1));
5998 srcpg = NODEPGNO(srcnode);
5999 flags = srcnode->mn_flags;
6000 if (csrc->mc_ki[csrc->mc_top] == 0 && IS_BRANCH(csrc->mc_pg[csrc->mc_top])) {
6001 unsigned int snum = csrc->mc_snum;
6002 MDB_node *s2;
6003 /* must find the lowest key below src */
6004 mdb_page_search_root(csrc, NULL, 0);
6005 if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
6006 key.mv_size = csrc->mc_db->md_pad;
6007 key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size);
6008 } else {
6009 s2 = NODEPTR(csrc->mc_pg[csrc->mc_top], 0);
6010 key.mv_size = NODEKSZ(s2);
6011 key.mv_data = NODEKEY(s2);
6012 }
6013 csrc->mc_snum = snum--;
6014 csrc->mc_top = snum;
6015 } else {
6016 key.mv_size = NODEKSZ(srcnode);
6017 key.mv_data = NODEKEY(srcnode);
6018 }
6019 data.mv_size = NODEDSZ(srcnode);
6020 data.mv_data = NODEDATA(srcnode);
6021 }
6022 if (IS_BRANCH(cdst->mc_pg[cdst->mc_top]) && cdst->mc_ki[cdst->mc_top] == 0) {
6023 unsigned int snum = cdst->mc_snum;
6024 MDB_node *s2;
6025 MDB_val bkey;
6026 /* must find the lowest key below dst */
6027 mdb_page_search_root(cdst, NULL, 0);
6028 if (IS_LEAF2(cdst->mc_pg[cdst->mc_top])) {
6029 bkey.mv_size = cdst->mc_db->md_pad;
6030 bkey.mv_data = LEAF2KEY(cdst->mc_pg[cdst->mc_top], 0, bkey.mv_size);
6031 } else {
6032 s2 = NODEPTR(cdst->mc_pg[cdst->mc_top], 0);
6033 bkey.mv_size = NODEKSZ(s2);
6034 bkey.mv_data = NODEKEY(s2);
6035 }
6036 cdst->mc_snum = snum--;
6037 cdst->mc_top = snum;
6038 mdb_cursor_copy(cdst, &mn);
6039 mn.mc_ki[snum] = 0;
6040 rc = mdb_update_key(&mn, &bkey);
6041 if (rc)
6042 return rc;
6043 }
6044
6045 DPRINTF("moving %s node %u [%s] on page %zu to node %u on page %zu",
6046 IS_LEAF(csrc->mc_pg[csrc->mc_top]) ? "leaf" : "branch",
6047 csrc->mc_ki[csrc->mc_top],
6048 DKEY(&key),
6049 csrc->mc_pg[csrc->mc_top]->mp_pgno,
6050 cdst->mc_ki[cdst->mc_top], cdst->mc_pg[cdst->mc_top]->mp_pgno);
6051
6052 /* Add the node to the destination page.
6053 */
6054 rc = mdb_node_add(cdst, cdst->mc_ki[cdst->mc_top], &key, &data, srcpg, flags);
6055 if (rc != MDB_SUCCESS)
6056 return rc;
6057
6058 /* Delete the node from the source page.
6059 */
6060 mdb_node_del(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size);
6061
6062 {
6063 /* Adjust other cursors pointing to mp */
6064 MDB_cursor *m2, *m3;
6065 MDB_dbi dbi = csrc->mc_dbi;
6066 MDB_page *mp = csrc->mc_pg[csrc->mc_top];
6067
6068 if (csrc->mc_flags & C_SUB)
6069 dbi--;
6070
6071 for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
6072 if (m2 == csrc) continue;
6073 if (csrc->mc_flags & C_SUB)
6074 m3 = &m2->mc_xcursor->mx_cursor;
6075 else
6076 m3 = m2;
6077 if (m3->mc_pg[csrc->mc_top] == mp && m3->mc_ki[csrc->mc_top] ==
6078 csrc->mc_ki[csrc->mc_top]) {
6079 m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top];
6080 m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top];
6081 }
6082 }
6083 }
6084
6085 /* Update the parent separators.
6086 */
6087 if (csrc->mc_ki[csrc->mc_top] == 0) {
6088 if (csrc->mc_ki[csrc->mc_top-1] != 0) {
6089 if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
6090 key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size);
6091 } else {
6092 srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0);
6093 key.mv_size = NODEKSZ(srcnode);
6094 key.mv_data = NODEKEY(srcnode);
6095 }
6096 DPRINTF("update separator for source page %zu to [%s]",
6097 csrc->mc_pg[csrc->mc_top]->mp_pgno, DKEY(&key));
6098 mdb_cursor_copy(csrc, &mn);
6099 mn.mc_snum--;
6100 mn.mc_top--;
6101 if ((rc = mdb_update_key(&mn, &key)) != MDB_SUCCESS)
6102 return rc;
6103 }
6104 if (IS_BRANCH(csrc->mc_pg[csrc->mc_top])) {
6105 MDB_val nullkey;
6106 indx_t ix = csrc->mc_ki[csrc->mc_top];
6107 nullkey.mv_size = 0;
6108 csrc->mc_ki[csrc->mc_top] = 0;
6109 rc = mdb_update_key(csrc, &nullkey);
6110 csrc->mc_ki[csrc->mc_top] = ix;
6111 assert(rc == MDB_SUCCESS);
6112 }
6113 }
6114
6115 if (cdst->mc_ki[cdst->mc_top] == 0) {
6116 if (cdst->mc_ki[cdst->mc_top-1] != 0) {
6117 if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
6118 key.mv_data = LEAF2KEY(cdst->mc_pg[cdst->mc_top], 0, key.mv_size);
6119 } else {
6120 srcnode = NODEPTR(cdst->mc_pg[cdst->mc_top], 0);
6121 key.mv_size = NODEKSZ(srcnode);
6122 key.mv_data = NODEKEY(srcnode);
6123 }
6124 DPRINTF("update separator for destination page %zu to [%s]",
6125 cdst->mc_pg[cdst->mc_top]->mp_pgno, DKEY(&key));
6126 mdb_cursor_copy(cdst, &mn);
6127 mn.mc_snum--;
6128 mn.mc_top--;
6129 if ((rc = mdb_update_key(&mn, &key)) != MDB_SUCCESS)
6130 return rc;
6131 }
6132 if (IS_BRANCH(cdst->mc_pg[cdst->mc_top])) {
6133 MDB_val nullkey;
6134 indx_t ix = cdst->mc_ki[cdst->mc_top];
6135 nullkey.mv_size = 0;
6136 cdst->mc_ki[cdst->mc_top] = 0;
6137 rc = mdb_update_key(cdst, &nullkey);
6138 cdst->mc_ki[cdst->mc_top] = ix;
6139 assert(rc == MDB_SUCCESS);
6140 }
6141 }
6142
6143 return MDB_SUCCESS;
6144 }
6145
6146 /** Merge one page into another.
6147 * The nodes from the page pointed to by \b csrc will
6148 * be copied to the page pointed to by \b cdst and then
6149 * the \b csrc page will be freed.
6150 * @param[in] csrc Cursor pointing to the source page.
6151 * @param[in] cdst Cursor pointing to the destination page.
6152 */
6153 static int
6154 mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
6155 {
6156 int rc;
6157 indx_t i, j;
6158 MDB_node *srcnode;
6159 MDB_val key, data;
6160 unsigned nkeys;
6161
6162 DPRINTF("merging page %zu into %zu", csrc->mc_pg[csrc->mc_top]->mp_pgno,
6163 cdst->mc_pg[cdst->mc_top]->mp_pgno);
6164
6165 assert(csrc->mc_snum > 1); /* can't merge root page */
6166 assert(cdst->mc_snum > 1);
6167
6168 /* Mark dst as dirty. */
6169 if ((rc = mdb_page_touch(cdst)))
6170 return rc;
6171
6172 /* Move all nodes from src to dst.
6173 */
6174 j = nkeys = NUMKEYS(cdst->mc_pg[cdst->mc_top]);
6175 if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
6176 key.mv_size = csrc->mc_db->md_pad;
6177 key.mv_data = METADATA(csrc->mc_pg[csrc->mc_top]);
6178 for (i = 0; i < NUMKEYS(csrc->mc_pg[csrc->mc_top]); i++, j++) {
6179 rc = mdb_node_add(cdst, j, &key, NULL, 0, 0);
6180 if (rc != MDB_SUCCESS)
6181 return rc;
6182 key.mv_data = (char *)key.mv_data + key.mv_size;
6183 }
6184 } else {
6185 for (i = 0; i < NUMKEYS(csrc->mc_pg[csrc->mc_top]); i++, j++) {
6186 srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], i);
6187 if (i == 0 && IS_BRANCH(csrc->mc_pg[csrc->mc_top])) {
6188 unsigned int snum = csrc->mc_snum;
6189 MDB_node *s2;
6190 /* must find the lowest key below src */
6191 mdb_page_search_root(csrc, NULL, 0);
6192 if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
6193 key.mv_size = csrc->mc_db->md_pad;
6194 key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size);
6195 } else {
6196 s2 = NODEPTR(csrc->mc_pg[csrc->mc_top], 0);
6197 key.mv_size = NODEKSZ(s2);
6198 key.mv_data = NODEKEY(s2);
6199 }
6200 csrc->mc_snum = snum--;
6201 csrc->mc_top = snum;
6202 } else {
6203 key.mv_size = srcnode->mn_ksize;
6204 key.mv_data = NODEKEY(srcnode);
6205 }
6206
6207 data.mv_size = NODEDSZ(srcnode);
6208 data.mv_data = NODEDATA(srcnode);
6209 rc = mdb_node_add(cdst, j, &key, &data, NODEPGNO(srcnode), srcnode->mn_flags);
6210 if (rc != MDB_SUCCESS)
6211 return rc;
6212 }
6213 }
6214
6215 DPRINTF("dst page %zu now has %u keys (%.1f%% filled)",
6216 cdst->mc_pg[cdst->mc_top]->mp_pgno, NUMKEYS(cdst->mc_pg[cdst->mc_top]), (float)PAGEFILL(cdst->mc_txn->mt_env, cdst->mc_pg[cdst->mc_top]) / 10);
6217
6218 /* Unlink the src page from parent and add to free list.
6219 */
6220 mdb_node_del(csrc->mc_pg[csrc->mc_top-1], csrc->mc_ki[csrc->mc_top-1], 0);
6221 if (csrc->mc_ki[csrc->mc_top-1] == 0) {
6222 key.mv_size = 0;
6223 csrc->mc_top--;
6224 rc = mdb_update_key(csrc, &key);
6225 csrc->mc_top++;
6226 if (rc)
6227 return rc;
6228 }
6229
6230 mdb_midl_append(&csrc->mc_txn->mt_free_pgs, csrc->mc_pg[csrc->mc_top]->mp_pgno);
6231 if (IS_LEAF(csrc->mc_pg[csrc->mc_top]))
6232 csrc->mc_db->md_leaf_pages--;
6233 else
6234 csrc->mc_db->md_branch_pages--;
6235 {
6236 /* Adjust other cursors pointing to mp */
6237 MDB_cursor *m2, *m3;
6238 MDB_dbi dbi = csrc->mc_dbi;
6239 MDB_page *mp = cdst->mc_pg[cdst->mc_top];
6240
6241 if (csrc->mc_flags & C_SUB)
6242 dbi--;
6243
6244 for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
6245 if (csrc->mc_flags & C_SUB)
6246 m3 = &m2->mc_xcursor->mx_cursor;
6247 else
6248 m3 = m2;
6249 if (m3 == csrc) continue;
6250 if (m3->mc_snum < csrc->mc_snum) continue;
6251 if (m3->mc_pg[csrc->mc_top] == csrc->mc_pg[csrc->mc_top]) {
6252 m3->mc_pg[csrc->mc_top] = mp;
6253 m3->mc_ki[csrc->mc_top] += nkeys;
6254 }
6255 }
6256 }
6257 mdb_cursor_pop(csrc);
6258
6259 return mdb_rebalance(csrc);
6260 }
6261
6262 /** Copy the contents of a cursor.
6263 * @param[in] csrc The cursor to copy from.
6264 * @param[out] cdst The cursor to copy to.
6265 */
6266 static void
6267 mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst)
6268 {
6269 unsigned int i;
6270
6271 cdst->mc_txn = csrc->mc_txn;
6272 cdst->mc_dbi = csrc->mc_dbi;
6273 cdst->mc_db = csrc->mc_db;
6274 cdst->mc_dbx = csrc->mc_dbx;
6275 cdst->mc_snum = csrc->mc_snum;
6276 cdst->mc_top = csrc->mc_top;
6277 cdst->mc_flags = csrc->mc_flags;
6278
6279 for (i=0; i<csrc->mc_snum; i++) {
6280 cdst->mc_pg[i] = csrc->mc_pg[i];
6281 cdst->mc_ki[i] = csrc->mc_ki[i];
6282 }
6283 }
6284
6285 /** Rebalance the tree after a delete operation.
6286 * @param[in] mc Cursor pointing to the page where rebalancing
6287 * should begin.
6288 * @return 0 on success, non-zero on failure.
6289 */
6290 static int
6291 mdb_rebalance(MDB_cursor *mc)
6292 {
6293 MDB_node *node;
6294 int rc;
6295 unsigned int ptop, minkeys;
6296 MDB_cursor mn;
6297
6298 minkeys = 1 + (IS_BRANCH(mc->mc_pg[mc->mc_top]));
6299 #if MDB_DEBUG
6300 {
6301 pgno_t pgno;
6302 COPY_PGNO(pgno, mc->mc_pg[mc->mc_top]->mp_pgno);
6303 DPRINTF("rebalancing %s page %zu (has %u keys, %.1f%% full)",
6304 IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch",
6305 pgno, NUMKEYS(mc->mc_pg[mc->mc_top]), (float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10);
6306 }
6307 #endif
6308
6309 if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= FILL_THRESHOLD &&
6310 NUMKEYS(mc->mc_pg[mc->mc_top]) >= minkeys) {
6311 #if MDB_DEBUG
6312 pgno_t pgno;
6313 COPY_PGNO(pgno, mc->mc_pg[mc->mc_top]->mp_pgno);
6314 DPRINTF("no need to rebalance page %zu, above fill threshold",
6315 pgno);
6316 #endif
6317 return MDB_SUCCESS;
6318 }
6319
6320 if (mc->mc_snum < 2) {
6321 MDB_page *mp = mc->mc_pg[0];
6322 if (NUMKEYS(mp) == 0) {
6323 DPUTS("tree is completely empty");
6324 mc->mc_db->md_root = P_INVALID;
6325 mc->mc_db->md_depth = 0;
6326 mc->mc_db->md_leaf_pages = 0;
6327 mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno);
6328 mc->mc_snum = 0;
6329 mc->mc_top = 0;
6330 {
6331 /* Adjust other cursors pointing to mp */
6332 MDB_cursor *m2, *m3;
6333 MDB_dbi dbi = mc->mc_dbi;
6334
6335 if (mc->mc_flags & C_SUB)
6336 dbi--;
6337
6338 for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
6339 if (m2 == mc) continue;
6340 if (mc->mc_flags & C_SUB)
6341 m3 = &m2->mc_xcursor->mx_cursor;
6342 else
6343 m3 = m2;
6344 if (m3->mc_snum < mc->mc_snum) continue;
6345 if (m3->mc_pg[0] == mp) {
6346 m3->mc_snum = 0;
6347 m3->mc_top = 0;
6348 }
6349 }
6350 }
6351 } else if (IS_BRANCH(mp) && NUMKEYS(mp) == 1) {
6352 DPUTS("collapsing root page!");
6353 mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno);
6354 mc->mc_db->md_root = NODEPGNO(NODEPTR(mp, 0));
6355 if ((rc = mdb_page_get(mc->mc_txn, mc->mc_db->md_root,
6356 &mc->mc_pg[0])))
6357 return rc;
6358 mc->mc_db->md_depth--;
6359 mc->mc_db->md_branch_pages--;
6360 {
6361 /* Adjust other cursors pointing to mp */
6362 MDB_cursor *m2, *m3;
6363 MDB_dbi dbi = mc->mc_dbi;
6364
6365 if (mc->mc_flags & C_SUB)
6366 dbi--;
6367
6368 for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
6369 if (m2 == mc) continue;
6370 if (mc->mc_flags & C_SUB)
6371 m3 = &m2->mc_xcursor->mx_cursor;
6372 else
6373 m3 = m2;
6374 if (m3->mc_snum < mc->mc_snum) continue;
6375 if (m3->mc_pg[0] == mp) {
6376 m3->mc_pg[0] = mc->mc_pg[0];
6377 m3->mc_snum = 1;
6378 m3->mc_top = 0;
6379 }
6380 }
6381 }
6382 } else
6383 DPUTS("root page doesn't need rebalancing");
6384 return MDB_SUCCESS;
6385 }
6386
6387 /* The parent (branch page) must have at least 2 pointers,
6388 * otherwise the tree is invalid.
6389 */
6390 ptop = mc->mc_top-1;
6391 assert(NUMKEYS(mc->mc_pg[ptop]) > 1);
6392
6393 /* Leaf page fill factor is below the threshold.
6394 * Try to move keys from left or right neighbor, or
6395 * merge with a neighbor page.
6396 */
6397
6398 /* Find neighbors.
6399 */
6400 mdb_cursor_copy(mc, &mn);
6401 mn.mc_xcursor = NULL;
6402
6403 if (mc->mc_ki[ptop] == 0) {
6404 /* We're the leftmost leaf in our parent.
6405 */
6406 DPUTS("reading right neighbor");
6407 mn.mc_ki[ptop]++;
6408 node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]);
6409 if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mn.mc_pg[mn.mc_top])))
6410 return rc;
6411 mn.mc_ki[mn.mc_top] = 0;
6412 mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]);
6413 } else {
6414 /* There is at least one neighbor to the left.
6415 */
6416 DPUTS("reading left neighbor");
6417 mn.mc_ki[ptop]--;
6418 node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]);
6419 if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mn.mc_pg[mn.mc_top])))
6420 return rc;
6421 mn.mc_ki[mn.mc_top] = NUMKEYS(mn.mc_pg[mn.mc_top]) - 1;
6422 mc->mc_ki[mc->mc_top] = 0;
6423 }
6424
6425 DPRINTF("found neighbor page %zu (%u keys, %.1f%% full)",
6426 mn.mc_pg[mn.mc_top]->mp_pgno, NUMKEYS(mn.mc_pg[mn.mc_top]), (float)PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) / 10);
6427
6428 /* If the neighbor page is above threshold and has enough keys,
6429 * move one key from it. Otherwise we should try to merge them.
6430 * (A branch page must never have less than 2 keys.)
6431 */
6432 minkeys = 1 + (IS_BRANCH(mn.mc_pg[mn.mc_top]));
6433 if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= FILL_THRESHOLD && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys)
6434 return mdb_node_move(&mn, mc);
6435 else {
6436 if (mc->mc_ki[ptop] == 0)
6437 rc = mdb_page_merge(&mn, mc);
6438 else
6439 rc = mdb_page_merge(mc, &mn);
6440 mc->mc_flags &= ~C_INITIALIZED;
6441 }
6442 return rc;
6443 }
6444
6445 /** Complete a delete operation started by #mdb_cursor_del(). */
6446 static int
6447 mdb_cursor_del0(MDB_cursor *mc, MDB_node *leaf)
6448 {
6449 int rc;
6450
6451 /* add overflow pages to free list */
6452 if (!IS_LEAF2(mc->mc_pg[mc->mc_top]) && F_ISSET(leaf->mn_flags, F_BIGDATA)) {
6453 int i, ovpages;
6454 pgno_t pg;
6455
6456 memcpy(&pg, NODEDATA(leaf), sizeof(pg));
6457 ovpages = OVPAGES(NODEDSZ(leaf), mc->mc_txn->mt_env->me_psize);
6458 mc->mc_db->md_overflow_pages -= ovpages;
6459 for (i=0; i<ovpages; i++) {
6460 DPRINTF("freed ov page %zu", pg);
6461 mdb_midl_append(&mc->mc_txn->mt_free_pgs, pg);
6462 pg++;
6463 }
6464 }
6465 mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], mc->mc_db->md_pad);
6466 mc->mc_db->md_entries--;
6467 rc = mdb_rebalance(mc);
6468 if (rc != MDB_SUCCESS)
6469 mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
6470 /* if mc points past last node in page, invalidate */
6471 else if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top]))
6472 mc->mc_flags &= ~C_INITIALIZED;
6473
6474 return rc;
6475 }
6476
6477 int
6478 mdb_del(MDB_txn *txn, MDB_dbi dbi,
6479 MDB_val *key, MDB_val *data)
6480 {
6481 MDB_cursor mc;
6482 MDB_xcursor mx;
6483 MDB_cursor_op op;
6484 MDB_val rdata, *xdata;
6485 int rc, exact;
6486 DKBUF;
6487
6488 assert(key != NULL);
6489
6490 DPRINTF("====> delete db %u key [%s]", dbi, DKEY(key));
6491
6492 if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
6493 return EINVAL;
6494
6495 if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) {
6496 return EACCES;
6497 }
6498
6499 if (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE) {
6500 return EINVAL;
6501 }
6502
6503 mdb_cursor_init(&mc, txn, dbi, &mx);
6504
6505 exact = 0;
6506 if (data) {
6507 op = MDB_GET_BOTH;
6508 rdata = *data;
6509 xdata = &rdata;
6510 } else {
6511 op = MDB_SET;
6512 xdata = NULL;
6513 }
6514 rc = mdb_cursor_set(&mc, key, xdata, op, &exact);
6515 if (rc == 0) {
6516 /* let mdb_page_split know about this cursor if needed:
6517 * delete will trigger a rebalance; if it needs to move
6518 * a node from one page to another, it will have to
6519 * update the parent's separator key(s). If the new sepkey
6520 * is larger than the current one, the parent page may
6521 * run out of space, triggering a split. We need this
6522 * cursor to be consistent until the end of the rebalance.
6523 */
6524 mc.mc_next = txn->mt_cursors[dbi];
6525 txn->mt_cursors[dbi] = &mc;
6526 rc = mdb_cursor_del(&mc, data ? 0 : MDB_NODUPDATA);
6527 txn->mt_cursors[dbi] = mc.mc_next;
6528 }
6529 return rc;
6530 }
6531
6532 /** Split a page and insert a new node.
6533 * @param[in,out] mc Cursor pointing to the page and desired insertion index.
6534 * The cursor will be updated to point to the actual page and index where
6535 * the node got inserted after the split.
6536 * @param[in] newkey The key for the newly inserted node.
6537 * @param[in] newdata The data for the newly inserted node.
6538 * @param[in] newpgno The page number, if the new node is a branch node.
6539 * @param[in] nflags The #NODE_ADD_FLAGS for the new node.
6540 * @return 0 on success, non-zero on failure.
6541 */
6542 static int
6543 mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno,
6544 unsigned int nflags)
6545 {
6546 unsigned int flags;
6547 int rc = MDB_SUCCESS, ins_new = 0, new_root = 0, newpos = 1, did_split = 0;
6548 indx_t newindx;
6549 pgno_t pgno = 0;
6550 unsigned int i, j, split_indx, nkeys, pmax;
6551 MDB_node *node;
6552 MDB_val sepkey, rkey, xdata, *rdata = &xdata;
6553 MDB_page *copy;
6554 MDB_page *mp, *rp, *pp;
6555 unsigned int ptop;
6556 MDB_cursor mn;
6557 DKBUF;
6558
6559 mp = mc->mc_pg[mc->mc_top];
6560 newindx = mc->mc_ki[mc->mc_top];
6561
6562 DPRINTF("-----> splitting %s page %zu and adding [%s] at index %i",
6563 IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno,
6564 DKEY(newkey), mc->mc_ki[mc->mc_top]);
6565
6566 /* Create a right sibling. */
6567 if ((rc = mdb_page_new(mc, mp->mp_flags, 1, &rp)))
6568 return rc;
6569 DPRINTF("new right sibling: page %zu", rp->mp_pgno);
6570
6571 if (mc->mc_snum < 2) {
6572 if ((rc = mdb_page_new(mc, P_BRANCH, 1, &pp)))
6573 return rc;
6574 /* shift current top to make room for new parent */
6575 mc->mc_pg[1] = mc->mc_pg[0];
6576 mc->mc_ki[1] = mc->mc_ki[0];
6577 mc->mc_pg[0] = pp;
6578 mc->mc_ki[0] = 0;
6579 mc->mc_db->md_root = pp->mp_pgno;
6580 DPRINTF("root split! new root = %zu", pp->mp_pgno);
6581 mc->mc_db->md_depth++;
6582 new_root = 1;
6583
6584 /* Add left (implicit) pointer. */
6585 if ((rc = mdb_node_add(mc, 0, NULL, NULL, mp->mp_pgno, 0)) != MDB_SUCCESS) {
6586 /* undo the pre-push */
6587 mc->mc_pg[0] = mc->mc_pg[1];
6588 mc->mc_ki[0] = mc->mc_ki[1];
6589 mc->mc_db->md_root = mp->mp_pgno;
6590 mc->mc_db->md_depth--;
6591 return rc;
6592 }
6593 mc->mc_snum = 2;
6594 mc->mc_top = 1;
6595 ptop = 0;
6596 } else {
6597 ptop = mc->mc_top-1;
6598 DPRINTF("parent branch page is %zu", mc->mc_pg[ptop]->mp_pgno);
6599 }
6600
6601 mc->mc_flags |= C_SPLITTING;
6602 mdb_cursor_copy(mc, &mn);
6603 mn.mc_pg[mn.mc_top] = rp;
6604 mn.mc_ki[ptop] = mc->mc_ki[ptop]+1;
6605
6606 if (nflags & MDB_APPEND) {
6607 mn.mc_ki[mn.mc_top] = 0;
6608 sepkey = *newkey;
6609 split_indx = newindx;
6610 nkeys = 0;
6611 goto newsep;
6612 }
6613
6614 nkeys = NUMKEYS(mp);
6615 split_indx = nkeys / 2;
6616 if (newindx < split_indx)
6617 newpos = 0;
6618
6619 if (IS_LEAF2(rp)) {
6620 char *split, *ins;
6621 int x;
6622 unsigned int lsize, rsize, ksize;
6623 /* Move half of the keys to the right sibling */
6624 copy = NULL;
6625 x = mc->mc_ki[mc->mc_top] - split_indx;
6626 ksize = mc->mc_db->md_pad;
6627 split = LEAF2KEY(mp, split_indx, ksize);
6628 rsize = (nkeys - split_indx) * ksize;
6629 lsize = (nkeys - split_indx) * sizeof(indx_t);
6630 mp->mp_lower -= lsize;
6631 rp->mp_lower += lsize;
6632 mp->mp_upper += rsize - lsize;
6633 rp->mp_upper -= rsize - lsize;
6634 sepkey.mv_size = ksize;
6635 if (newindx == split_indx) {
6636 sepkey.mv_data = newkey->mv_data;
6637 } else {
6638 sepkey.mv_data = split;
6639 }
6640 if (x<0) {
6641 ins = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], ksize);
6642 memcpy(rp->mp_ptrs, split, rsize);
6643 sepkey.mv_data = rp->mp_ptrs;
6644 memmove(ins+ksize, ins, (split_indx - mc->mc_ki[mc->mc_top]) * ksize);
6645 memcpy(ins, newkey->mv_data, ksize);
6646 mp->mp_lower += sizeof(indx_t);
6647 mp->mp_upper -= ksize - sizeof(indx_t);
6648 } else {
6649 if (x)
6650 memcpy(rp->mp_ptrs, split, x * ksize);
6651 ins = LEAF2KEY(rp, x, ksize);
6652 memcpy(ins, newkey->mv_data, ksize);
6653 memcpy(ins+ksize, split + x * ksize, rsize - x * ksize);
6654 rp->mp_lower += sizeof(indx_t);
6655 rp->mp_upper -= ksize - sizeof(indx_t);
6656 mc->mc_ki[mc->mc_top] = x;
6657 mc->mc_pg[mc->mc_top] = rp;
6658 }
6659 goto newsep;
6660 }
6661
6662 /* For leaf pages, check the split point based on what
6663 * fits where, since otherwise mdb_node_add can fail.
6664 *
6665 * This check is only needed when the data items are
6666 * relatively large, such that being off by one will
6667 * make the difference between success or failure.
6668 *
6669 * It's also relevant if a page happens to be laid out
6670 * such that one half of its nodes are all "small" and
6671 * the other half of its nodes are "large." If the new
6672 * item is also "large" and falls on the half with
6673 * "large" nodes, it also may not fit.
6674 */
6675 if (IS_LEAF(mp)) {
6676 unsigned int psize, nsize;
6677 /* Maximum free space in an empty page */
6678 pmax = mc->mc_txn->mt_env->me_psize - PAGEHDRSZ;
6679 nsize = mdb_leaf_size(mc->mc_txn->mt_env, newkey, newdata);
6680 if ((nkeys < 20) || (nsize > pmax/16)) {
6681 if (newindx <= split_indx) {
6682 psize = nsize;
6683 newpos = 0;
6684 for (i=0; i<split_indx; i++) {
6685 node = NODEPTR(mp, i);
6686 psize += NODESIZE + NODEKSZ(node) + sizeof(indx_t);
6687 if (F_ISSET(node->mn_flags, F_BIGDATA))
6688 psize += sizeof(pgno_t);
6689 else
6690 psize += NODEDSZ(node);
6691 psize += psize & 1;
6692 if (psize > pmax) {
6693 if (i <= newindx) {
6694 split_indx = newindx;
6695 if (i < newindx)
6696 newpos = 1;
6697 }
6698 else
6699 split_indx = i;
6700 break;
6701 }
6702 }
6703 } else {
6704 psize = nsize;
6705 for (i=nkeys-1; i>=split_indx; i--) {
6706 node = NODEPTR(mp, i);
6707 psize += NODESIZE + NODEKSZ(node) + sizeof(indx_t);
6708 if (F_ISSET(node->mn_flags, F_BIGDATA))
6709 psize += sizeof(pgno_t);
6710 else
6711 psize += NODEDSZ(node);
6712 psize += psize & 1;
6713 if (psize > pmax) {
6714 if (i >= newindx) {
6715 split_indx = newindx;
6716 newpos = 0;
6717 } else
6718 split_indx = i+1;
6719 break;
6720 }
6721 }
6722 }
6723 }
6724 }
6725
6726 /* First find the separating key between the split pages.
6727 * The case where newindx == split_indx is ambiguous; the
6728 * new item could go to the new page or stay on the original
6729 * page. If newpos == 1 it goes to the new page.
6730 */
6731 if (newindx == split_indx && newpos) {
6732 sepkey.mv_size = newkey->mv_size;
6733 sepkey.mv_data = newkey->mv_data;
6734 } else {
6735 node = NODEPTR(mp, split_indx);
6736 sepkey.mv_size = node->mn_ksize;
6737 sepkey.mv_data = NODEKEY(node);
6738 }
6739
6740 newsep:
6741 DPRINTF("separator is [%s]", DKEY(&sepkey));
6742
6743 /* Copy separator key to the parent.
6744 */
6745 if (SIZELEFT(mn.mc_pg[ptop]) < mdb_branch_size(mc->mc_txn->mt_env, &sepkey)) {
6746 mn.mc_snum--;
6747 mn.mc_top--;
6748 did_split = 1;
6749 rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0);
6750
6751 /* root split? */
6752 if (mn.mc_snum == mc->mc_snum) {
6753 mc->mc_pg[mc->mc_snum] = mc->mc_pg[mc->mc_top];
6754 mc->mc_ki[mc->mc_snum] = mc->mc_ki[mc->mc_top];
6755 mc->mc_pg[mc->mc_top] = mc->mc_pg[ptop];
6756 mc->mc_ki[mc->mc_top] = mc->mc_ki[ptop];
6757 mc->mc_snum++;
6758 mc->mc_top++;
6759 ptop++;
6760 }
6761 /* Right page might now have changed parent.
6762 * Check if left page also changed parent.
6763 */
6764 if (mn.mc_pg[ptop] != mc->mc_pg[ptop] &&
6765 mc->mc_ki[ptop] >= NUMKEYS(mc->mc_pg[ptop])) {
6766 for (i=0; i<ptop; i++) {
6767 mc->mc_pg[i] = mn.mc_pg[i];
6768 mc->mc_ki[i] = mn.mc_ki[i];
6769 }
6770 mc->mc_pg[ptop] = mn.mc_pg[ptop];
6771 mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1;
6772 }
6773 } else {
6774 mn.mc_top--;
6775 rc = mdb_node_add(&mn, mn.mc_ki[ptop], &sepkey, NULL, rp->mp_pgno, 0);
6776 mn.mc_top++;
6777 }
6778 mc->mc_flags ^= C_SPLITTING;
6779 if (rc != MDB_SUCCESS) {
6780 return rc;
6781 }
6782 if (nflags & MDB_APPEND) {
6783 mc->mc_pg[mc->mc_top] = rp;
6784 mc->mc_ki[mc->mc_top] = 0;
6785 rc = mdb_node_add(mc, 0, newkey, newdata, newpgno, nflags);
6786 if (rc)
6787 return rc;
6788 for (i=0; i<mc->mc_top; i++)
6789 mc->mc_ki[i] = mn.mc_ki[i];
6790 goto done;
6791 }
6792 if (IS_LEAF2(rp)) {
6793 goto done;
6794 }
6795
6796 /* Move half of the keys to the right sibling. */
6797
6798 /* grab a page to hold a temporary copy */
6799 copy = mdb_page_malloc(mc);
6800 if (copy == NULL)
6801 return ENOMEM;
6802
6803 copy->mp_pgno = mp->mp_pgno;
6804 copy->mp_flags = mp->mp_flags;
6805 copy->mp_lower = PAGEHDRSZ;
6806 copy->mp_upper = mc->mc_txn->mt_env->me_psize;
6807 mc->mc_pg[mc->mc_top] = copy;
6808 for (i = j = 0; i <= nkeys; j++) {
6809 if (i == split_indx) {
6810 /* Insert in right sibling. */
6811 /* Reset insert index for right sibling. */
6812 if (i != newindx || (newpos ^ ins_new)) {
6813 j = 0;
6814 mc->mc_pg[mc->mc_top] = rp;
6815 }
6816 }
6817
6818 if (i == newindx && !ins_new) {
6819 /* Insert the original entry that caused the split. */
6820 rkey.mv_data = newkey->mv_data;
6821 rkey.mv_size = newkey->mv_size;
6822 if (IS_LEAF(mp)) {
6823 rdata = newdata;
6824 } else
6825 pgno = newpgno;
6826 flags = nflags;
6827
6828 ins_new = 1;
6829
6830 /* Update index for the new key. */
6831 mc->mc_ki[mc->mc_top] = j;
6832 } else if (i == nkeys) {
6833 break;
6834 } else {
6835 node = NODEPTR(mp, i);
6836 rkey.mv_data = NODEKEY(node);
6837 rkey.mv_size = node->mn_ksize;
6838 if (IS_LEAF(mp)) {
6839 xdata.mv_data = NODEDATA(node);
6840 xdata.mv_size = NODEDSZ(node);
6841 rdata = &xdata;
6842 } else
6843 pgno = NODEPGNO(node);
6844 flags = node->mn_flags;
6845
6846 i++;
6847 }
6848
6849 if (!IS_LEAF(mp) && j == 0) {
6850 /* First branch index doesn't need key data. */
6851 rkey.mv_size = 0;
6852 }
6853
6854 rc = mdb_node_add(mc, j, &rkey, rdata, pgno, flags);
6855 if (rc) break;
6856 }
6857
6858 nkeys = NUMKEYS(copy);
6859 for (i=0; i<nkeys; i++)
6860 mp->mp_ptrs[i] = copy->mp_ptrs[i];
6861 mp->mp_lower = copy->mp_lower;
6862 mp->mp_upper = copy->mp_upper;
6863 memcpy(NODEPTR(mp, nkeys-1), NODEPTR(copy, nkeys-1),
6864 mc->mc_txn->mt_env->me_psize - copy->mp_upper);
6865
6866 /* reset back to original page */
6867 if (newindx < split_indx || (!newpos && newindx == split_indx)) {
6868 mc->mc_pg[mc->mc_top] = mp;
6869 if (nflags & MDB_RESERVE) {
6870 node = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
6871 if (!(node->mn_flags & F_BIGDATA))
6872 newdata->mv_data = NODEDATA(node);
6873 }
6874 } else {
6875 mc->mc_ki[ptop]++;
6876 }
6877
6878 /* return tmp page to freelist */
6879 mdb_page_free(mc->mc_txn->mt_env, copy);
6880 done:
6881 {
6882 /* Adjust other cursors pointing to mp */
6883 MDB_cursor *m2, *m3;
6884 MDB_dbi dbi = mc->mc_dbi;
6885 int fixup = NUMKEYS(mp);
6886
6887 if (mc->mc_flags & C_SUB)
6888 dbi--;
6889
6890 for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
6891 if (m2 == mc) continue;
6892 if (mc->mc_flags & C_SUB)
6893 m3 = &m2->mc_xcursor->mx_cursor;
6894 else
6895 m3 = m2;
6896 if (!(m3->mc_flags & C_INITIALIZED))
6897 continue;
6898 if (m3->mc_flags & C_SPLITTING)
6899 continue;
6900 if (new_root) {
6901 int k;
6902 /* root split */
6903 for (k=m3->mc_top; k>=0; k--) {
6904 m3->mc_ki[k+1] = m3->mc_ki[k];
6905 m3->mc_pg[k+1] = m3->mc_pg[k];
6906 }
6907 if (m3->mc_ki[0] >= split_indx) {
6908 m3->mc_ki[0] = 1;
6909 } else {
6910 m3->mc_ki[0] = 0;
6911 }
6912 m3->mc_pg[0] = mc->mc_pg[0];
6913 m3->mc_snum++;
6914 m3->mc_top++;
6915 }
6916 if (m3->mc_pg[mc->mc_top] == mp) {
6917 if (m3->mc_ki[mc->mc_top] >= newindx && !(nflags & MDB_SPLIT_REPLACE))
6918 m3->mc_ki[mc->mc_top]++;
6919 if (m3->mc_ki[mc->mc_top] >= fixup) {
6920 m3->mc_pg[mc->mc_top] = rp;
6921 m3->mc_ki[mc->mc_top] -= fixup;
6922 m3->mc_ki[ptop] = mn.mc_ki[ptop];
6923 }
6924 } else if (!did_split && m3->mc_pg[ptop] == mc->mc_pg[ptop] &&
6925 m3->mc_ki[ptop] >= mc->mc_ki[ptop]) {
6926 m3->mc_ki[ptop]++;
6927 }
6928 }
6929 }
6930 return rc;
6931 }
6932
6933 int
6934 mdb_put(MDB_txn *txn, MDB_dbi dbi,
6935 MDB_val *key, MDB_val *data, unsigned int flags)
6936 {
6937 MDB_cursor mc;
6938 MDB_xcursor mx;
6939
6940 assert(key != NULL);
6941 assert(data != NULL);
6942
6943 if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
6944 return EINVAL;
6945
6946 if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) {
6947 return EACCES;
6948 }
6949
6950 if (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE) {
6951 return EINVAL;
6952 }
6953
6954 if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags)
6955 return EINVAL;
6956
6957 mdb_cursor_init(&mc, txn, dbi, &mx);
6958 return mdb_cursor_put(&mc, key, data, flags);
6959 }
6960
6961 int
6962 mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff)
6963 {
6964 if ((flag & CHANGEABLE) != flag)
6965 return EINVAL;
6966 if (onoff)
6967 env->me_flags |= flag;
6968 else
6969 env->me_flags &= ~flag;
6970 return MDB_SUCCESS;
6971 }
6972
6973 int
6974 mdb_env_get_flags(MDB_env *env, unsigned int *arg)
6975 {
6976 if (!env || !arg)
6977 return EINVAL;
6978
6979 *arg = env->me_flags;
6980 return MDB_SUCCESS;
6981 }
6982
6983 int
6984 mdb_env_get_path(MDB_env *env, const char **arg)
6985 {
6986 if (!env || !arg)
6987 return EINVAL;
6988
6989 *arg = env->me_path;
6990 return MDB_SUCCESS;
6991 }
6992
6993 /** Common code for #mdb_stat() and #mdb_env_stat().
6994 * @param[in] env the environment to operate in.
6995 * @param[in] db the #MDB_db record containing the stats to return.
6996 * @param[out] arg the address of an #MDB_stat structure to receive the stats.
6997 * @return 0, this function always succeeds.
6998 */
6999 static int
7000 mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg)
7001 {
7002 arg->ms_psize = env->me_psize;
7003 arg->ms_depth = db->md_depth;
7004 arg->ms_branch_pages = db->md_branch_pages;
7005 arg->ms_leaf_pages = db->md_leaf_pages;
7006 arg->ms_overflow_pages = db->md_overflow_pages;
7007 arg->ms_entries = db->md_entries;
7008
7009 return MDB_SUCCESS;
7010 }
7011 int
7012 mdb_env_stat(MDB_env *env, MDB_stat *arg)
7013 {
7014 int toggle;
7015
7016 if (env == NULL || arg == NULL)
7017 return EINVAL;
7018
7019 toggle = mdb_env_pick_meta(env);
7020
7021 return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], arg);
7022 }
7023
7024 int
7025 mdb_env_info(MDB_env *env, MDB_envinfo *arg)
7026 {
7027 int toggle;
7028
7029 if (env == NULL || arg == NULL)
7030 return EINVAL;
7031
7032 toggle = mdb_env_pick_meta(env);
7033 arg->me_mapaddr = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : 0;
7034 arg->me_mapsize = env->me_mapsize;
7035 arg->me_maxreaders = env->me_maxreaders;
7036 arg->me_numreaders = env->me_numreaders;
7037 arg->me_last_pgno = env->me_metas[toggle]->mm_last_pg;
7038 arg->me_last_txnid = env->me_metas[toggle]->mm_txnid;
7039 return MDB_SUCCESS;
7040 }
7041
7042 /** Set the default comparison functions for a database.
7043 * Called immediately after a database is opened to set the defaults.
7044 * The user can then override them with #mdb_set_compare() or
7045 * #mdb_set_dupsort().
7046 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
7047 * @param[in] dbi A database handle returned by #mdb_dbi_open()
7048 */
7049 static void
7050 mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi)
7051 {
7052 uint16_t f = txn->mt_dbs[dbi].md_flags;
7053
7054 txn->mt_dbxs[dbi].md_cmp =
7055 (f & MDB_REVERSEKEY) ? mdb_cmp_memnr :
7056 (f & MDB_INTEGERKEY) ? mdb_cmp_cint : mdb_cmp_memn;
7057
7058 txn->mt_dbxs[dbi].md_dcmp =
7059 !(f & MDB_DUPSORT) ? 0 :
7060 ((f & MDB_INTEGERDUP)
7061 ? ((f & MDB_DUPFIXED) ? mdb_cmp_int : mdb_cmp_cint)
7062 : ((f & MDB_REVERSEDUP) ? mdb_cmp_memnr : mdb_cmp_memn));
7063 }
7064
7065 int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi)
7066 {
7067 MDB_val key, data;
7068 MDB_dbi i;
7069 MDB_cursor mc;
7070 int rc, dbflag, exact;
7071 unsigned int unused = 0;
7072 size_t len;
7073
7074 if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) {
7075 mdb_default_cmp(txn, FREE_DBI);
7076 }
7077
7078 if ((flags & VALID_FLAGS) != flags)
7079 return EINVAL;
7080
7081 /* main DB? */
7082 if (!name) {
7083 *dbi = MAIN_DBI;
7084 if (flags & PERSISTENT_FLAGS) {
7085 uint16_t f2 = flags & PERSISTENT_FLAGS;
7086 /* make sure flag changes get committed */
7087 if ((txn->mt_dbs[MAIN_DBI].md_flags | f2) != txn->mt_dbs[MAIN_DBI].md_flags) {
7088 txn->mt_dbs[MAIN_DBI].md_flags |= f2;
7089 txn->mt_flags |= MDB_TXN_DIRTY;
7090 }
7091 }
7092 mdb_default_cmp(txn, MAIN_DBI);
7093 return MDB_SUCCESS;
7094 }
7095
7096 if (txn->mt_dbxs[MAIN_DBI].md_cmp == NULL) {
7097 mdb_default_cmp(txn, MAIN_DBI);
7098 }
7099
7100 /* Is the DB already open? */
7101 len = strlen(name);
7102 for (i=2; i<txn->mt_numdbs; i++) {
7103 if (!txn->mt_dbxs[i].md_name.mv_size) {
7104 /* Remember this free slot */
7105 if (!unused) unused = i;
7106 continue;
7107 }
7108 if (len == txn->mt_dbxs[i].md_name.mv_size &&
7109 !strncmp(name, txn->mt_dbxs[i].md_name.mv_data, len)) {
7110 *dbi = i;
7111 return MDB_SUCCESS;
7112 }
7113 }
7114
7115 /* If no free slot and max hit, fail */
7116 if (!unused && txn->mt_numdbs >= txn->mt_env->me_maxdbs)
7117 return MDB_DBS_FULL;
7118
7119 /* Find the DB info */
7120 dbflag = DB_NEW|DB_VALID;
7121 exact = 0;
7122 key.mv_size = len;
7123 key.mv_data = (void *)name;
7124 mdb_cursor_init(&mc, txn, MAIN_DBI, NULL);
7125 rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact);
7126 if (rc == MDB_SUCCESS) {
7127 /* make sure this is actually a DB */
7128 MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]);
7129 if (!(node->mn_flags & F_SUBDATA))
7130 return EINVAL;
7131 } else if (rc == MDB_NOTFOUND && (flags & MDB_CREATE)) {
7132 /* Create if requested */
7133 MDB_db dummy;
7134 data.mv_size = sizeof(MDB_db);
7135 data.mv_data = &dummy;
7136 memset(&dummy, 0, sizeof(dummy));
7137 dummy.md_root = P_INVALID;
7138 dummy.md_flags = flags & PERSISTENT_FLAGS;
7139 rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA);
7140 dbflag |= DB_DIRTY;
7141 }
7142
7143 /* OK, got info, add to table */
7144 if (rc == MDB_SUCCESS) {
7145 unsigned int slot = unused ? unused : txn->mt_numdbs;
7146 txn->mt_dbxs[slot].md_name.mv_data = strdup(name);
7147 txn->mt_dbxs[slot].md_name.mv_size = len;
7148 txn->mt_dbxs[slot].md_rel = NULL;
7149 txn->mt_dbflags[slot] = dbflag;
7150 memcpy(&txn->mt_dbs[slot], data.mv_data, sizeof(MDB_db));
7151 *dbi = slot;
7152 txn->mt_env->me_dbflags[slot] = txn->mt_dbs[slot].md_flags;
7153 mdb_default_cmp(txn, slot);
7154 if (!unused) {
7155 txn->mt_numdbs++;
7156 }
7157 }
7158
7159 return rc;
7160 }
7161
7162 int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg)
7163 {
7164 if (txn == NULL || arg == NULL || dbi >= txn->mt_numdbs)
7165 return EINVAL;
7166
7167 return mdb_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg);
7168 }
7169
7170 void mdb_dbi_close(MDB_env *env, MDB_dbi dbi)
7171 {
7172 char *ptr;
7173 if (dbi <= MAIN_DBI || dbi >= env->me_maxdbs)
7174 return;
7175 ptr = env->me_dbxs[dbi].md_name.mv_data;
7176 env->me_dbxs[dbi].md_name.mv_data = NULL;
7177 env->me_dbxs[dbi].md_name.mv_size = 0;
7178 free(ptr);
7179 }
7180
7181 /** Add all the DB's pages to the free list.
7182 * @param[in] mc Cursor on the DB to free.
7183 * @param[in] subs non-Zero to check for sub-DBs in this DB.
7184 * @return 0 on success, non-zero on failure.
7185 */
7186 static int
7187 mdb_drop0(MDB_cursor *mc, int subs)
7188 {
7189 int rc;
7190
7191 rc = mdb_page_search(mc, NULL, 0);
7192 if (rc == MDB_SUCCESS) {
7193 MDB_node *ni;
7194 MDB_cursor mx;
7195 unsigned int i;
7196
7197 /* LEAF2 pages have no nodes, cannot have sub-DBs */
7198 if (IS_LEAF2(mc->mc_pg[mc->mc_top]))
7199 mdb_cursor_pop(mc);
7200
7201 mdb_cursor_copy(mc, &mx);
7202 while (mc->mc_snum > 0) {
7203 if (IS_LEAF(mc->mc_pg[mc->mc_top])) {
7204 for (i=0; i<NUMKEYS(mc->mc_pg[mc->mc_top]); i++) {
7205 ni = NODEPTR(mc->mc_pg[mc->mc_top], i);
7206 if (ni->mn_flags & F_BIGDATA) {
7207 int j, ovpages = OVPAGES(NODEDSZ(ni), mc->mc_txn->mt_env->me_psize);
7208 pgno_t pg;
7209 memcpy(&pg, NODEDATA(ni), sizeof(pg));
7210 for (j=0; j<ovpages; j++) {
7211 mdb_midl_append(&mc->mc_txn->mt_free_pgs, pg);
7212 pg++;
7213 }
7214 } else if (subs && (ni->mn_flags & F_SUBDATA)) {
7215 mdb_xcursor_init1(mc, ni);
7216 rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0);
7217 if (rc)
7218 return rc;
7219 }
7220 }
7221 } else {
7222 for (i=0; i<NUMKEYS(mc->mc_pg[mc->mc_top]); i++) {
7223 pgno_t pg;
7224 ni = NODEPTR(mc->mc_pg[mc->mc_top], i);
7225 pg = NODEPGNO(ni);
7226 /* free it */
7227 mdb_midl_append(&mc->mc_txn->mt_free_pgs, pg);
7228 }
7229 }
7230 if (!mc->mc_top)
7231 break;
7232 mc->mc_ki[mc->mc_top] = i;
7233 rc = mdb_cursor_sibling(mc, 1);
7234 if (rc) {
7235 /* no more siblings, go back to beginning
7236 * of previous level.
7237 */
7238 mdb_cursor_pop(mc);
7239 mc->mc_ki[0] = 0;
7240 for (i=1; i<mc->mc_snum; i++) {
7241 mc->mc_ki[i] = 0;
7242 mc->mc_pg[i] = mx.mc_pg[i];
7243 }
7244 }
7245 }
7246 /* free it */
7247 mdb_midl_append(&mc->mc_txn->mt_free_pgs,
7248 mc->mc_db->md_root);
7249 }
7250 return 0;
7251 }
7252
7253 int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del)
7254 {
7255 MDB_cursor *mc;
7256 int rc;
7257
7258 if (!txn || !dbi || dbi >= txn->mt_numdbs || (unsigned)del > 1 || !(txn->mt_dbflags[dbi] & DB_VALID))
7259 return EINVAL;
7260
7261 if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY))
7262 return EACCES;
7263
7264 rc = mdb_cursor_open(txn, dbi, &mc);
7265 if (rc)
7266 return rc;
7267
7268 rc = mdb_drop0(mc, mc->mc_db->md_flags & MDB_DUPSORT);
7269 if (rc)
7270 goto leave;
7271
7272 /* Can't delete the main DB */
7273 if (del && dbi > MAIN_DBI) {
7274 rc = mdb_del(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL);
7275 if (!rc) {
7276 txn->mt_dbflags[dbi] = DB_STALE;
7277 mdb_dbi_close(txn->mt_env, dbi);
7278 }
7279 } else {
7280 /* reset the DB record, mark it dirty */
7281 txn->mt_dbflags[dbi] |= DB_DIRTY;
7282 txn->mt_dbs[dbi].md_depth = 0;
7283 txn->mt_dbs[dbi].md_branch_pages = 0;
7284 txn->mt_dbs[dbi].md_leaf_pages = 0;
7285 txn->mt_dbs[dbi].md_overflow_pages = 0;
7286 txn->mt_dbs[dbi].md_entries = 0;
7287 txn->mt_dbs[dbi].md_root = P_INVALID;
7288
7289 txn->mt_flags |= MDB_TXN_DIRTY;
7290 }
7291 leave:
7292 mdb_cursor_close(mc);
7293 return rc;
7294 }
7295
7296 int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp)
7297 {
7298 if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
7299 return EINVAL;
7300
7301 txn->mt_dbxs[dbi].md_cmp = cmp;
7302 return MDB_SUCCESS;
7303 }
7304
7305 int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp)
7306 {
7307 if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
7308 return EINVAL;
7309
7310 txn->mt_dbxs[dbi].md_dcmp = cmp;
7311 return MDB_SUCCESS;
7312 }
7313
7314 int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel)
7315 {
7316 if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
7317 return EINVAL;
7318
7319 txn->mt_dbxs[dbi].md_rel = rel;
7320 return MDB_SUCCESS;
7321 }
7322
7323 int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx)
7324 {
7325 if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
7326 return EINVAL;
7327
7328 txn->mt_dbxs[dbi].md_relctx = ctx;
7329 return MDB_SUCCESS;
7330 }
7331
7332 /** @} */
0 .TH MDB_COPY 1 "2012/12/12" "LMDB 0.9.5"
1 .\" Copyright 2012 Howard Chu, Symas Corp. All Rights Reserved.
2 .\" Copying restrictions apply. See COPYRIGHT/LICENSE.
3 .SH NAME
4 mdb_copy \- LMDB environment copy tool
5 .SH SYNOPSIS
6 .B mdb_copy
7 .I srcpath\ dstpath
8 .SH DESCRIPTION
9 The
10 .B mdb_copy
11 utility copies an LMDB environment. The environment can
12 be copied regardless of whether it is currently in use.
13 .SH DIAGNOSTICS
14 Exit status is zero if no errors occur.
15 Errors result in a non-zero exit status and
16 a diagnostic message being written to standard error.
17 .SH "SEE ALSO"
18 .BR mdb_stat (1)
19 .SH AUTHOR
20 Howard Chu of Symas Corporation <http://www.symas.com>
0 /* mdb_copy.c - memory-mapped database backup tool */
1 /*
2 * Copyright 2012 Howard Chu, Symas Corp.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted only as authorized by the OpenLDAP
7 * Public License.
8 *
9 * A copy of this license is available in the file LICENSE in the
10 * top-level directory of the distribution or, alternatively, at
11 * <http://www.OpenLDAP.org/license.html>.
12 */
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include "lmdb.h"
16
17 int main(int argc,char * argv[])
18 {
19 int rc;
20 MDB_env *env;
21 char *envname = argv[1];
22
23 if (argc != 3) {
24 fprintf(stderr, "usage: %s srcpath dstpath\n", argv[0]);
25 exit(EXIT_FAILURE);
26 }
27
28 rc = mdb_env_create(&env);
29
30 rc = mdb_env_open(env, envname, MDB_RDONLY, 0);
31 if (rc) {
32 printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc));
33 } else {
34 rc = mdb_env_copy(env, argv[2]);
35 if (rc)
36 printf("mdb_env_copy failed, error %d %s\n", rc, mdb_strerror(rc));
37 }
38 mdb_env_close(env);
39
40 return rc ? EXIT_FAILURE : EXIT_SUCCESS;
41 }
0 .TH MDB_STAT 1 "2012/12/12" "LMDB 0.9.5"
1 .\" Copyright 2012 Howard Chu, Symas Corp. All Rights Reserved.
2 .\" Copying restrictions apply. See COPYRIGHT/LICENSE.
3 .SH NAME
4 mdb_stat \- LMDB environment status tool
5 .SH SYNOPSIS
6 .B mdb_stat
7 .BR \ envpath
8 [\c
9 .BR \-e ]
10 [\c
11 .BR \-f [ f [ f ]]]
12 [\c
13 .BR \-n ]
14 [\c
15 .BR \-a \ |
16 .BI \-s \ subdb\fR]
17 .SH DESCRIPTION
18 The
19 .B mdb_stat
20 utility displays the status of an LMDB environment.
21 .SH OPTIONS
22 .TP
23 .BR \-e
24 Display information about the database environment.
25 .TP
26 .BR \-f
27 Display information about the environment freelist.
28 If \fB\-ff\fP is given, summarize each freelist entry.
29 If \fB\-fff\fP is given, display the full list of page IDs in the freelist.
30 .TP
31 .BR \-n
32 Display the status of an LMDB database which does not use subdirectories.
33 .TP
34 .BR \-a
35 Display the status of all of the subdatabases in the environment.
36 .TP
37 .BR \-s \ subdb
38 Display the status of a specific subdatabase.
39 .SH DIAGNOSTICS
40 Exit status is zero if no errors occur.
41 Errors result in a non-zero exit status and
42 a diagnostic message being written to standard error.
43 .SH "SEE ALSO"
44 .BR mdb_copy (1)
45 .SH AUTHOR
46 Howard Chu of Symas Corporation <http://www.symas.com>
0 /* mdb_stat.c - memory-mapped database status tool */
1 /*
2 * Copyright 2011-2013 Howard Chu, Symas Corp.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted only as authorized by the OpenLDAP
7 * Public License.
8 *
9 * A copy of this license is available in the file LICENSE in the
10 * top-level directory of the distribution or, alternatively, at
11 * <http://www.OpenLDAP.org/license.html>.
12 */
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <unistd.h>
17 #include "lmdb.h"
18
19 static void prstat(MDB_stat *ms)
20 {
21 #if 0
22 printf(" Page size: %u\n", ms->ms_psize);
23 #endif
24 printf(" Tree depth: %u\n", ms->ms_depth);
25 printf(" Branch pages: %zu\n", ms->ms_branch_pages);
26 printf(" Leaf pages: %zu\n", ms->ms_leaf_pages);
27 printf(" Overflow pages: %zu\n", ms->ms_overflow_pages);
28 printf(" Entries: %zu\n", ms->ms_entries);
29 }
30
31 static void usage(char *prog)
32 {
33 fprintf(stderr, "usage: %s dbpath [-e] [-f[f[f]]] [-n] [-a|-s subdb]\n", prog);
34 exit(EXIT_FAILURE);
35 }
36
37 int main(int argc, char *argv[])
38 {
39 int i, rc;
40 MDB_env *env;
41 MDB_txn *txn;
42 MDB_dbi dbi;
43 MDB_stat mst;
44 MDB_envinfo mei;
45 char *prog = argv[0];
46 char *envname;
47 char *subname = NULL;
48 int alldbs = 0, envinfo = 0, envflags = 0, freinfo = 0;
49
50 if (argc < 2) {
51 usage(prog);
52 }
53
54 /* -a: print stat of main DB and all subDBs
55 * -s: print stat of only the named subDB
56 * -e: print env info
57 * -f: print freelist info
58 * -n: use NOSUBDIR flag on env_open
59 * (default) print stat of only the main DB
60 */
61 while ((i = getopt(argc, argv, "aefns:")) != EOF) {
62 switch(i) {
63 case 'a':
64 if (subname)
65 usage(prog);
66 alldbs++;
67 break;
68 case 'e':
69 envinfo++;
70 break;
71 case 'f':
72 freinfo++;
73 break;
74 case 'n':
75 envflags |= MDB_NOSUBDIR;
76 break;
77 case 's':
78 if (alldbs)
79 usage(prog);
80 subname = optarg;
81 break;
82 default:
83 usage(prog);
84 }
85 }
86
87 if (optind != argc - 1)
88 usage(prog);
89
90 envname = argv[optind];
91 rc = mdb_env_create(&env);
92
93 if (alldbs || subname) {
94 mdb_env_set_maxdbs(env, 4);
95 }
96
97 rc = mdb_env_open(env, envname, envflags | MDB_RDONLY, 0664);
98 if (rc) {
99 printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc));
100 goto env_close;
101 }
102 rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
103 if (rc) {
104 printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
105 goto env_close;
106 }
107
108 if (envinfo) {
109 rc = mdb_env_stat(env, &mst);
110 rc = mdb_env_info(env, &mei);
111 printf("Environment Info\n");
112 printf(" Map address: %p\n", mei.me_mapaddr);
113 printf(" Map size: %zu\n", mei.me_mapsize);
114 printf(" Page size: %u\n", mst.ms_psize);
115 printf(" Max pages: %zu\n", mei.me_mapsize / mst.ms_psize);
116 printf(" Number of pages used: %zu\n", mei.me_last_pgno+1);
117 printf(" Last transaction ID: %zu\n", mei.me_last_txnid);
118 printf(" Max readers: %u\n", mei.me_maxreaders);
119 printf(" Number of readers used: %u\n", mei.me_numreaders);
120 }
121
122 if (freinfo) {
123 MDB_cursor *cursor;
124 MDB_val key, data;
125 size_t pages = 0, *iptr;
126
127 printf("Freelist Status\n");
128 dbi = 0;
129 rc = mdb_cursor_open(txn, dbi, &cursor);
130 if (rc) {
131 printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
132 goto txn_abort;
133 }
134 rc = mdb_stat(txn, dbi, &mst);
135 if (rc) {
136 printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc));
137 goto txn_abort;
138 }
139 prstat(&mst);
140 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) {
141 iptr = data.mv_data;
142 pages += *iptr;
143 if (freinfo > 1) {
144 char *bad = "";
145 size_t pg, prev;
146 ssize_t i, j, span = 0;
147 j = *iptr++;
148 for (i = j, prev = 1; --i >= 0; ) {
149 pg = iptr[i];
150 if (pg <= prev)
151 bad = " [bad sequence]";
152 prev = pg;
153 pg += span;
154 for (; i >= span && iptr[i-span] == pg; span++, pg++) ;
155 }
156 printf(" Transaction %zu, %zd pages, maxspan %zd%s\n",
157 *(size_t *)key.mv_data, j, span, bad);
158 if (freinfo > 2) {
159 for (--j; j >= 0; ) {
160 pg = iptr[j];
161 for (span=1; --j >= 0 && iptr[j] == pg+span; span++) ;
162 printf(span>1 ? " %9zu[%zd]\n" : " %9zu\n",
163 pg, span);
164 }
165 }
166 }
167 }
168 mdb_cursor_close(cursor);
169 printf(" Free pages: %zu\n", pages);
170 }
171
172 rc = mdb_open(txn, subname, 0, &dbi);
173 if (rc) {
174 printf("mdb_open failed, error %d %s\n", rc, mdb_strerror(rc));
175 goto txn_abort;
176 }
177
178 rc = mdb_stat(txn, dbi, &mst);
179 if (rc) {
180 printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc));
181 goto txn_abort;
182 }
183 printf("Status of %s\n", subname ? subname : "Main DB");
184 prstat(&mst);
185
186 if (alldbs) {
187 MDB_cursor *cursor;
188 MDB_val key;
189
190 rc = mdb_cursor_open(txn, dbi, &cursor);
191 if (rc) {
192 printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
193 goto txn_abort;
194 }
195 while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT)) == 0) {
196 char *str = malloc(key.mv_size+1);
197 MDB_dbi db2;
198 memcpy(str, key.mv_data, key.mv_size);
199 str[key.mv_size] = '\0';
200 rc = mdb_open(txn, str, 0, &db2);
201 if (rc == MDB_SUCCESS)
202 printf("Status of %s\n", str);
203 free(str);
204 if (rc) continue;
205 rc = mdb_stat(txn, db2, &mst);
206 if (rc) {
207 printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc));
208 goto txn_abort;
209 }
210 prstat(&mst);
211 mdb_close(env, db2);
212 }
213 mdb_cursor_close(cursor);
214 }
215
216 mdb_close(env, dbi);
217 txn_abort:
218 mdb_txn_abort(txn);
219 env_close:
220 mdb_env_close(env);
221
222 return rc ? EXIT_FAILURE : EXIT_SUCCESS;
223 }
0 /** @file midl.c
1 * @brief ldap bdb back-end ID List functions */
2 /* $OpenLDAP$ */
3 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
4 *
5 * Copyright 2000-2013 The OpenLDAP Foundation.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted only as authorized by the OpenLDAP
10 * Public License.
11 *
12 * A copy of this license is available in the file LICENSE in the
13 * top-level directory of the distribution or, alternatively, at
14 * <http://www.OpenLDAP.org/license.html>.
15 */
16
17 #include <limits.h>
18 #include <string.h>
19 #include <stdlib.h>
20 #include <sys/types.h>
21 #include <assert.h>
22 #include "midl.h"
23
24 /** @defgroup internal MDB Internals
25 * @{
26 */
27 /** @defgroup idls ID List Management
28 * @{
29 */
30 #define CMP(x,y) ( (x) < (y) ? -1 : (x) > (y) )
31
32 #if 0 /* superseded by append/sort */
33 static unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id )
34 {
35 /*
36 * binary search of id in ids
37 * if found, returns position of id
38 * if not found, returns first position greater than id
39 */
40 unsigned base = 0;
41 unsigned cursor = 1;
42 int val = 0;
43 unsigned n = ids[0];
44
45 while( 0 < n ) {
46 unsigned pivot = n >> 1;
47 cursor = base + pivot + 1;
48 val = CMP( ids[cursor], id );
49
50 if( val < 0 ) {
51 n = pivot;
52
53 } else if ( val > 0 ) {
54 base = cursor;
55 n -= pivot + 1;
56
57 } else {
58 return cursor;
59 }
60 }
61
62 if( val > 0 ) {
63 ++cursor;
64 }
65 return cursor;
66 }
67
68 int mdb_midl_insert( MDB_IDL ids, MDB_ID id )
69 {
70 unsigned x, i;
71
72 if (MDB_IDL_IS_RANGE( ids )) {
73 /* if already in range, treat as a dup */
74 if (id >= MDB_IDL_RANGE_FIRST(ids) && id <= MDB_IDL_RANGE_LAST(ids))
75 return -1;
76 if (id < MDB_IDL_RANGE_FIRST(ids))
77 ids[1] = id;
78 else if (id > MDB_IDL_RANGE_LAST(ids))
79 ids[2] = id;
80 return 0;
81 }
82
83 x = mdb_midl_search( ids, id );
84 assert( x > 0 );
85
86 if( x < 1 ) {
87 /* internal error */
88 return -2;
89 }
90
91 if ( x <= ids[0] && ids[x] == id ) {
92 /* duplicate */
93 assert(0);
94 return -1;
95 }
96
97 if ( ++ids[0] >= MDB_IDL_DB_MAX ) {
98 if( id < ids[1] ) {
99 ids[1] = id;
100 ids[2] = ids[ids[0]-1];
101 } else if ( ids[ids[0]-1] < id ) {
102 ids[2] = id;
103 } else {
104 ids[2] = ids[ids[0]-1];
105 }
106 ids[0] = MDB_NOID;
107
108 } else {
109 /* insert id */
110 for (i=ids[0]; i>x; i--)
111 ids[i] = ids[i-1];
112 ids[x] = id;
113 }
114
115 return 0;
116 }
117 #endif
118
119 MDB_IDL mdb_midl_alloc(void)
120 {
121 MDB_IDL ids = malloc((MDB_IDL_UM_MAX+1) * sizeof(MDB_ID));
122 *ids++ = MDB_IDL_UM_MAX;
123 return ids;
124 }
125
126 void mdb_midl_free(MDB_IDL ids)
127 {
128 free(ids-1);
129 }
130
131 int mdb_midl_shrink( MDB_IDL *idp )
132 {
133 MDB_IDL ids = *idp;
134 if (*(--ids) > MDB_IDL_UM_MAX) {
135 ids = realloc(ids, (MDB_IDL_UM_MAX+1) * sizeof(MDB_ID));
136 *ids++ = MDB_IDL_UM_MAX;
137 *idp = ids;
138 return 1;
139 }
140 return 0;
141 }
142
143 int mdb_midl_append( MDB_IDL *idp, MDB_ID id )
144 {
145 MDB_IDL ids = *idp;
146 /* Too big? */
147 if (ids[0] >= ids[-1]) {
148 MDB_IDL idn = ids-1;
149 /* grow it */
150 idn = realloc(idn, (*idn + MDB_IDL_UM_MAX + 1) * sizeof(MDB_ID));
151 if (!idn)
152 return -1;
153 *idn++ += MDB_IDL_UM_MAX;
154 ids = idn;
155 *idp = ids;
156 }
157 ids[0]++;
158 ids[ids[0]] = id;
159 return 0;
160 }
161
162 int mdb_midl_append_list( MDB_IDL *idp, MDB_IDL app )
163 {
164 MDB_IDL ids = *idp;
165 /* Too big? */
166 if (ids[0] + app[0] >= ids[-1]) {
167 MDB_IDL idn = ids-1;
168 /* grow it */
169 idn = realloc(idn, (*idn + app[-1]) * sizeof(MDB_ID));
170 if (!idn)
171 return -1;
172 *idn++ += app[-1];
173 ids = idn;
174 *idp = ids;
175 }
176 memcpy(&ids[ids[0]+1], &app[1], app[0] * sizeof(MDB_ID));
177 ids[0] += app[0];
178 return 0;
179 }
180
181 /* Quicksort + Insertion sort for small arrays */
182
183 #define SMALL 8
184 #define SWAP(a,b) { itmp=(a); (a)=(b); (b)=itmp; }
185
186 void
187 mdb_midl_sort( MDB_IDL ids )
188 {
189 /* Max possible depth of int-indexed tree * 2 items/level */
190 int istack[sizeof(int)*CHAR_BIT * 2];
191 int i,j,k,l,ir,jstack;
192 MDB_ID a, itmp;
193
194 ir = (int)ids[0];
195 l = 1;
196 jstack = 0;
197 for(;;) {
198 if (ir - l < SMALL) { /* Insertion sort */
199 for (j=l+1;j<=ir;j++) {
200 a = ids[j];
201 for (i=j-1;i>=1;i--) {
202 if (ids[i] >= a) break;
203 ids[i+1] = ids[i];
204 }
205 ids[i+1] = a;
206 }
207 if (jstack == 0) break;
208 ir = istack[jstack--];
209 l = istack[jstack--];
210 } else {
211 k = (l + ir) >> 1; /* Choose median of left, center, right */
212 SWAP(ids[k], ids[l+1]);
213 if (ids[l] < ids[ir]) {
214 SWAP(ids[l], ids[ir]);
215 }
216 if (ids[l+1] < ids[ir]) {
217 SWAP(ids[l+1], ids[ir]);
218 }
219 if (ids[l] < ids[l+1]) {
220 SWAP(ids[l], ids[l+1]);
221 }
222 i = l+1;
223 j = ir;
224 a = ids[l+1];
225 for(;;) {
226 do i++; while(ids[i] > a);
227 do j--; while(ids[j] < a);
228 if (j < i) break;
229 SWAP(ids[i],ids[j]);
230 }
231 ids[l+1] = ids[j];
232 ids[j] = a;
233 jstack += 2;
234 if (ir-i+1 >= j-l) {
235 istack[jstack] = ir;
236 istack[jstack-1] = i;
237 ir = j-1;
238 } else {
239 istack[jstack] = j-1;
240 istack[jstack-1] = l;
241 l = i;
242 }
243 }
244 }
245 }
246
247 unsigned mdb_mid2l_search( MDB_ID2L ids, MDB_ID id )
248 {
249 /*
250 * binary search of id in ids
251 * if found, returns position of id
252 * if not found, returns first position greater than id
253 */
254 unsigned base = 0;
255 unsigned cursor = 1;
256 int val = 0;
257 unsigned n = (unsigned)ids[0].mid;
258
259 while( 0 < n ) {
260 unsigned pivot = n >> 1;
261 cursor = base + pivot + 1;
262 val = CMP( id, ids[cursor].mid );
263
264 if( val < 0 ) {
265 n = pivot;
266
267 } else if ( val > 0 ) {
268 base = cursor;
269 n -= pivot + 1;
270
271 } else {
272 return cursor;
273 }
274 }
275
276 if( val > 0 ) {
277 ++cursor;
278 }
279 return cursor;
280 }
281
282 int mdb_mid2l_insert( MDB_ID2L ids, MDB_ID2 *id )
283 {
284 unsigned x, i;
285
286 x = mdb_mid2l_search( ids, id->mid );
287 assert( x > 0 );
288
289 if( x < 1 ) {
290 /* internal error */
291 return -2;
292 }
293
294 if ( x <= ids[0].mid && ids[x].mid == id->mid ) {
295 /* duplicate */
296 return -1;
297 }
298
299 if ( ids[0].mid >= MDB_IDL_UM_MAX ) {
300 /* too big */
301 return -2;
302
303 } else {
304 /* insert id */
305 ids[0].mid++;
306 for (i=(unsigned)ids[0].mid; i>x; i--)
307 ids[i] = ids[i-1];
308 ids[x] = *id;
309 }
310
311 return 0;
312 }
313
314 int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id )
315 {
316 /* Too big? */
317 if (ids[0].mid >= MDB_IDL_UM_MAX) {
318 return -2;
319 }
320 ids[0].mid++;
321 ids[ids[0].mid] = *id;
322 return 0;
323 }
324
325 /** @} */
326 /** @} */
0 /** @file midl.h
1 * @brief mdb ID List header file.
2 *
3 * This file was originally part of back-bdb but has been
4 * modified for use in libmdb. Most of the macros defined
5 * in this file are unused, just left over from the original.
6 *
7 * This file is only used internally in libmdb and its definitions
8 * are not exposed publicly.
9 */
10 /* $OpenLDAP$ */
11 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
12 *
13 * Copyright 2000-2013 The OpenLDAP Foundation.
14 * All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted only as authorized by the OpenLDAP
18 * Public License.
19 *
20 * A copy of this license is available in the file LICENSE in the
21 * top-level directory of the distribution or, alternatively, at
22 * <http://www.OpenLDAP.org/license.html>.
23 */
24
25 #ifndef _MDB_MIDL_H_
26 #define _MDB_MIDL_H_
27
28 #include <stddef.h>
29
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33
34 /** @defgroup internal MDB Internals
35 * @{
36 */
37
38 /** @defgroup idls ID List Management
39 * @{
40 */
41 /** A generic ID number. These were entryIDs in back-bdb.
42 * Preferably it should have the same size as a pointer.
43 */
44 typedef size_t MDB_ID;
45
46 /** An IDL is an ID List, a sorted array of IDs. The first
47 * element of the array is a counter for how many actual
48 * IDs are in the list. In the original back-bdb code, IDLs are
49 * sorted in ascending order. For libmdb IDLs are sorted in
50 * descending order.
51 */
52 typedef MDB_ID *MDB_IDL;
53
54 #define MDB_NOID (~(MDB_ID)0)
55
56 /* IDL sizes - likely should be even bigger
57 * limiting factors: sizeof(ID), thread stack size
58 */
59 #define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */
60 #define MDB_IDL_DB_SIZE (1<<MDB_IDL_LOGN)
61 #define MDB_IDL_UM_SIZE (1<<(MDB_IDL_LOGN+1))
62 #define MDB_IDL_UM_SIZEOF (MDB_IDL_UM_SIZE * sizeof(MDB_ID))
63
64 #define MDB_IDL_DB_MAX (MDB_IDL_DB_SIZE-1)
65
66 #define MDB_IDL_UM_MAX (MDB_IDL_UM_SIZE-1)
67
68 #define MDB_IDL_IS_RANGE(ids) ((ids)[0] == MDB_NOID)
69 #define MDB_IDL_RANGE_SIZE (3)
70 #define MDB_IDL_RANGE_SIZEOF (MDB_IDL_RANGE_SIZE * sizeof(MDB_ID))
71 #define MDB_IDL_SIZEOF(ids) ((MDB_IDL_IS_RANGE(ids) \
72 ? MDB_IDL_RANGE_SIZE : ((ids)[0]+1)) * sizeof(MDB_ID))
73
74 #define MDB_IDL_RANGE_FIRST(ids) ((ids)[1])
75 #define MDB_IDL_RANGE_LAST(ids) ((ids)[2])
76
77 #define MDB_IDL_RANGE( ids, f, l ) \
78 do { \
79 (ids)[0] = MDB_NOID; \
80 (ids)[1] = (f); \
81 (ids)[2] = (l); \
82 } while(0)
83
84 #define MDB_IDL_ZERO(ids) \
85 do { \
86 (ids)[0] = 0; \
87 (ids)[1] = 0; \
88 (ids)[2] = 0; \
89 } while(0)
90
91 #define MDB_IDL_IS_ZERO(ids) ( (ids)[0] == 0 )
92 #define MDB_IDL_IS_ALL( range, ids ) ( (ids)[0] == MDB_NOID \
93 && (ids)[1] <= (range)[1] && (range)[2] <= (ids)[2] )
94
95 #define MDB_IDL_CPY( dst, src ) (memcpy( dst, src, MDB_IDL_SIZEOF( src ) ))
96
97 #define MDB_IDL_ID( bdb, ids, id ) MDB_IDL_RANGE( ids, id, ((bdb)->bi_lastid) )
98 #define MDB_IDL_ALL( bdb, ids ) MDB_IDL_RANGE( ids, 1, ((bdb)->bi_lastid) )
99
100 #define MDB_IDL_FIRST( ids ) ( (ids)[1] )
101 #define MDB_IDL_LAST( ids ) ( MDB_IDL_IS_RANGE(ids) \
102 ? (ids)[2] : (ids)[(ids)[0]] )
103
104 #define MDB_IDL_N( ids ) ( MDB_IDL_IS_RANGE(ids) \
105 ? ((ids)[2]-(ids)[1])+1 : (ids)[0] )
106
107 #if 0 /* superseded by append/sort */
108 /** Insert an ID into an IDL.
109 * @param[in,out] ids The IDL to insert into.
110 * @param[in] id The ID to insert.
111 * @return 0 on success, -1 if the ID was already present in the IDL.
112 */
113 int mdb_midl_insert( MDB_IDL ids, MDB_ID id );
114 #endif
115
116 /** Allocate an IDL.
117 * Allocates memory for an IDL of a default size.
118 * @return IDL on success, NULL on failure.
119 */
120 MDB_IDL mdb_midl_alloc(void);
121
122 /** Free an IDL.
123 * @param[in] ids The IDL to free.
124 */
125 void mdb_midl_free(MDB_IDL ids);
126
127 /** Shrink an IDL.
128 * Return the IDL to the default size if it has grown larger.
129 * @param[in,out] idp Address of the IDL to shrink.
130 * @return 0 on no change, non-zero if shrunk.
131 */
132 int mdb_midl_shrink(MDB_IDL *idp);
133
134 /** Append an ID onto an IDL.
135 * @param[in,out] idp Address of the IDL to append to.
136 * @param[in] id The ID to append.
137 * @return 0 on success, -1 if the IDL is too large.
138 */
139 int mdb_midl_append( MDB_IDL *idp, MDB_ID id );
140
141 /** Append an IDL onto an IDL.
142 * @param[in,out] idp Address of the IDL to append to.
143 * @param[in] app The IDL to append.
144 * @return 0 on success, -1 if the IDL is too large.
145 */
146 int mdb_midl_append_list( MDB_IDL *idp, MDB_IDL app );
147
148 /** Sort an IDL.
149 * @param[in,out] ids The IDL to sort.
150 */
151 void mdb_midl_sort( MDB_IDL ids );
152
153 /** An ID2 is an ID/pointer pair.
154 */
155 typedef struct MDB_ID2 {
156 MDB_ID mid; /**< The ID */
157 void *mptr; /**< The pointer */
158 } MDB_ID2;
159
160 /** An ID2L is an ID2 List, a sorted array of ID2s.
161 * The first element's \b mid member is a count of how many actual
162 * elements are in the array. The \b mptr member of the first element is unused.
163 * The array is sorted in ascending order by \b mid.
164 */
165 typedef MDB_ID2 *MDB_ID2L;
166
167 /** Search for an ID in an ID2L.
168 * @param[in] ids The ID2L to search.
169 * @param[in] id The ID to search for.
170 * @return The index of the first ID2 whose \b mid member is greater than or equal to \b id.
171 */
172 unsigned mdb_mid2l_search( MDB_ID2L ids, MDB_ID id );
173
174
175 /** Insert an ID2 into a ID2L.
176 * @param[in,out] ids The ID2L to insert into.
177 * @param[in] id The ID2 to insert.
178 * @return 0 on success, -1 if the ID was already present in the ID2L.
179 */
180 int mdb_mid2l_insert( MDB_ID2L ids, MDB_ID2 *id );
181
182 /** Append an ID2 into a ID2L.
183 * @param[in,out] ids The ID2L to append into.
184 * @param[in] id The ID2 to append.
185 * @return 0 on success, -2 if the ID2L is too big.
186 */
187 int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id );
188
189 /** @} */
190 /** @} */
191 #ifdef __cplusplus
192 }
193 #endif
194 #endif /* _MDB_MIDL_H_ */
0 /* mtest.c - memory-mapped database tester/toy */
1 /*
2 * Copyright 2011 Howard Chu, Symas Corp.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted only as authorized by the OpenLDAP
7 * Public License.
8 *
9 * A copy of this license is available in the file LICENSE in the
10 * top-level directory of the distribution or, alternatively, at
11 * <http://www.OpenLDAP.org/license.html>.
12 */
13 #define _XOPEN_SOURCE 500 /* srandom(), random() */
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <time.h>
17 #include "lmdb.h"
18
19 int main(int argc,char * argv[])
20 {
21 int i = 0, j = 0, rc;
22 MDB_env *env;
23 MDB_dbi dbi;
24 MDB_val key, data;
25 MDB_txn *txn;
26 MDB_stat mst;
27 MDB_cursor *cursor, *cur2;
28 int count;
29 int *values;
30 char sval[32];
31
32 srandom(time(NULL));
33
34 count = (random()%384) + 64;
35 values = (int *)malloc(count*sizeof(int));
36
37 for(i = 0;i<count;i++) {
38 values[i] = random()%1024;
39 }
40
41 rc = mdb_env_create(&env);
42 rc = mdb_env_set_mapsize(env, 10485760);
43 rc = mdb_env_open(env, "./testdb", MDB_FIXEDMAP /*|MDB_NOSYNC*/, 0664);
44 rc = mdb_txn_begin(env, NULL, 0, &txn);
45 rc = mdb_open(txn, NULL, 0, &dbi);
46
47 key.mv_size = sizeof(int);
48 key.mv_data = sval;
49 data.mv_size = sizeof(sval);
50 data.mv_data = sval;
51
52 printf("Adding %d values\n", count);
53 for (i=0;i<count;i++) {
54 sprintf(sval, "%03x %d foo bar", values[i], values[i]);
55 rc = mdb_put(txn, dbi, &key, &data, MDB_NOOVERWRITE);
56 if (rc) {
57 j++;
58 data.mv_size = sizeof(sval);
59 data.mv_data = sval;
60 }
61 }
62 if (j) printf("%d duplicates skipped\n", j);
63 rc = mdb_txn_commit(txn);
64 rc = mdb_env_stat(env, &mst);
65
66 rc = mdb_txn_begin(env, NULL, 1, &txn);
67 rc = mdb_cursor_open(txn, dbi, &cursor);
68 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) {
69 printf("key: %p %.*s, data: %p %.*s\n",
70 key.mv_data, (int) key.mv_size, (char *) key.mv_data,
71 data.mv_data, (int) data.mv_size, (char *) data.mv_data);
72 }
73 mdb_cursor_close(cursor);
74 mdb_txn_abort(txn);
75
76 j=0;
77 key.mv_data = sval;
78 for (i= count - 1; i > -1; i-= (random()%5)) {
79 j++;
80 txn=NULL;
81 rc = mdb_txn_begin(env, NULL, 0, &txn);
82 sprintf(sval, "%03x ", values[i]);
83 rc = mdb_del(txn, dbi, &key, NULL);
84 if (rc) {
85 j--;
86 mdb_txn_abort(txn);
87 } else {
88 rc = mdb_txn_commit(txn);
89 }
90 }
91 free(values);
92 printf("Deleted %d values\n", j);
93
94 rc = mdb_env_stat(env, &mst);
95 rc = mdb_txn_begin(env, NULL, 1, &txn);
96 rc = mdb_cursor_open(txn, dbi, &cursor);
97 printf("Cursor next\n");
98 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) {
99 printf("key: %.*s, data: %.*s\n",
100 (int) key.mv_size, (char *) key.mv_data,
101 (int) data.mv_size, (char *) data.mv_data);
102 }
103 printf("Cursor last\n");
104 rc = mdb_cursor_get(cursor, &key, &data, MDB_LAST);
105 printf("key: %.*s, data: %.*s\n",
106 (int) key.mv_size, (char *) key.mv_data,
107 (int) data.mv_size, (char *) data.mv_data);
108 printf("Cursor prev\n");
109 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) {
110 printf("key: %.*s, data: %.*s\n",
111 (int) key.mv_size, (char *) key.mv_data,
112 (int) data.mv_size, (char *) data.mv_data);
113 }
114 printf("Cursor last/prev\n");
115 rc = mdb_cursor_get(cursor, &key, &data, MDB_LAST);
116 printf("key: %.*s, data: %.*s\n",
117 (int) key.mv_size, (char *) key.mv_data,
118 (int) data.mv_size, (char *) data.mv_data);
119 rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV);
120 printf("key: %.*s, data: %.*s\n",
121 (int) key.mv_size, (char *) key.mv_data,
122 (int) data.mv_size, (char *) data.mv_data);
123
124 mdb_txn_abort(txn);
125
126 printf("Deleting with cursor\n");
127 rc = mdb_txn_begin(env, NULL, 0, &txn);
128 rc = mdb_cursor_open(txn, dbi, &cur2);
129 for (i=0; i<50; i++) {
130 rc = mdb_cursor_get(cur2, &key, &data, MDB_NEXT);
131 printf("key: %p %.*s, data: %p %.*s\n",
132 key.mv_data, (int) key.mv_size, (char *) key.mv_data,
133 data.mv_data, (int) data.mv_size, (char *) data.mv_data);
134 rc = mdb_del(txn, dbi, &key, NULL);
135 }
136
137 printf("Restarting cursor in txn\n");
138 rc = mdb_cursor_get(cur2, &key, &data, MDB_FIRST);
139 printf("key: %p %.*s, data: %p %.*s\n",
140 key.mv_data, (int) key.mv_size, (char *) key.mv_data,
141 data.mv_data, (int) data.mv_size, (char *) data.mv_data);
142 for (i=0; i<32; i++) {
143 rc = mdb_cursor_get(cur2, &key, &data, MDB_NEXT);
144 printf("key: %p %.*s, data: %p %.*s\n",
145 key.mv_data, (int) key.mv_size, (char *) key.mv_data,
146 data.mv_data, (int) data.mv_size, (char *) data.mv_data);
147 }
148 mdb_cursor_close(cur2);
149 rc = mdb_txn_commit(txn);
150
151 printf("Restarting cursor outside txn\n");
152 rc = mdb_txn_begin(env, NULL, 0, &txn);
153 rc = mdb_cursor_open(txn, dbi, &cursor);
154 rc = mdb_cursor_get(cursor, &key, &data, MDB_FIRST);
155 printf("key: %p %.*s, data: %p %.*s\n",
156 key.mv_data, (int) key.mv_size, (char *) key.mv_data,
157 data.mv_data, (int) data.mv_size, (char *) data.mv_data);
158 for (i=0; i<32; i++) {
159 rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT);
160 printf("key: %p %.*s, data: %p %.*s\n",
161 key.mv_data, (int) key.mv_size, (char *) key.mv_data,
162 data.mv_data, (int) data.mv_size, (char *) data.mv_data);
163 }
164 mdb_cursor_close(cursor);
165 mdb_close(env, dbi);
166
167 mdb_txn_abort(txn);
168 mdb_env_close(env);
169
170 return 0;
171 }
0 /* mtest2.c - memory-mapped database tester/toy */
1 /*
2 * Copyright 2011 Howard Chu, Symas Corp.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted only as authorized by the OpenLDAP
7 * Public License.
8 *
9 * A copy of this license is available in the file LICENSE in the
10 * top-level directory of the distribution or, alternatively, at
11 * <http://www.OpenLDAP.org/license.html>.
12 */
13
14 /* Just like mtest.c, but using a subDB instead of the main DB */
15
16 #define _XOPEN_SOURCE 500 /* srandom(), random() */
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <time.h>
20 #include "lmdb.h"
21
22 int main(int argc,char * argv[])
23 {
24 int i = 0, j = 0, rc;
25 MDB_env *env;
26 MDB_dbi dbi;
27 MDB_val key, data;
28 MDB_txn *txn;
29 MDB_stat mst;
30 MDB_cursor *cursor;
31 int count;
32 int *values;
33 char sval[32];
34
35 srandom(time(NULL));
36
37 count = (random()%384) + 64;
38 values = (int *)malloc(count*sizeof(int));
39
40 for(i = 0;i<count;i++) {
41 values[i] = random()%1024;
42 }
43
44 rc = mdb_env_create(&env);
45 rc = mdb_env_set_mapsize(env, 10485760);
46 rc = mdb_env_set_maxdbs(env, 4);
47 rc = mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664);
48 rc = mdb_txn_begin(env, NULL, 0, &txn);
49 rc = mdb_open(txn, "id1", MDB_CREATE, &dbi);
50
51 key.mv_size = sizeof(int);
52 key.mv_data = sval;
53 data.mv_size = sizeof(sval);
54 data.mv_data = sval;
55
56 printf("Adding %d values\n", count);
57 for (i=0;i<count;i++) {
58 sprintf(sval, "%03x %d foo bar", values[i], values[i]);
59 rc = mdb_put(txn, dbi, &key, &data, MDB_NOOVERWRITE);
60 if (rc) j++;
61 }
62 if (j) printf("%d duplicates skipped\n", j);
63 rc = mdb_txn_commit(txn);
64 rc = mdb_env_stat(env, &mst);
65
66 rc = mdb_txn_begin(env, NULL, 1, &txn);
67 rc = mdb_cursor_open(txn, dbi, &cursor);
68 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) {
69 printf("key: %p %.*s, data: %p %.*s\n",
70 key.mv_data, (int) key.mv_size, (char *) key.mv_data,
71 data.mv_data, (int) data.mv_size, (char *) data.mv_data);
72 }
73 mdb_cursor_close(cursor);
74 mdb_txn_abort(txn);
75
76 j=0;
77 key.mv_data = sval;
78 for (i= count - 1; i > -1; i-= (random()%5)) {
79 j++;
80 txn=NULL;
81 rc = mdb_txn_begin(env, NULL, 0, &txn);
82 sprintf(sval, "%03x ", values[i]);
83 rc = mdb_del(txn, dbi, &key, NULL);
84 if (rc) {
85 j--;
86 mdb_txn_abort(txn);
87 } else {
88 rc = mdb_txn_commit(txn);
89 }
90 }
91 free(values);
92 printf("Deleted %d values\n", j);
93
94 rc = mdb_env_stat(env, &mst);
95 rc = mdb_txn_begin(env, NULL, 1, &txn);
96 rc = mdb_cursor_open(txn, dbi, &cursor);
97 printf("Cursor next\n");
98 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) {
99 printf("key: %.*s, data: %.*s\n",
100 (int) key.mv_size, (char *) key.mv_data,
101 (int) data.mv_size, (char *) data.mv_data);
102 }
103 printf("Cursor prev\n");
104 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) {
105 printf("key: %.*s, data: %.*s\n",
106 (int) key.mv_size, (char *) key.mv_data,
107 (int) data.mv_size, (char *) data.mv_data);
108 }
109 mdb_cursor_close(cursor);
110 mdb_close(env, dbi);
111
112 mdb_txn_abort(txn);
113 mdb_env_close(env);
114
115 return 0;
116 }
0 /* mtest3.c - memory-mapped database tester/toy */
1 /*
2 * Copyright 2011 Howard Chu, Symas Corp.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted only as authorized by the OpenLDAP
7 * Public License.
8 *
9 * A copy of this license is available in the file LICENSE in the
10 * top-level directory of the distribution or, alternatively, at
11 * <http://www.OpenLDAP.org/license.html>.
12 */
13
14 /* Tests for sorted duplicate DBs */
15 #define _XOPEN_SOURCE 500 /* srandom(), random() */
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <time.h>
20 #include "lmdb.h"
21
22 int main(int argc,char * argv[])
23 {
24 int i = 0, j = 0, rc;
25 MDB_env *env;
26 MDB_dbi dbi;
27 MDB_val key, data;
28 MDB_txn *txn;
29 MDB_stat mst;
30 MDB_cursor *cursor;
31 int count;
32 int *values;
33 char sval[32];
34 char kval[sizeof(int)];
35
36 srandom(time(NULL));
37
38 memset(sval, 0, sizeof(sval));
39
40 count = (random()%384) + 64;
41 values = (int *)malloc(count*sizeof(int));
42
43 for(i = 0;i<count;i++) {
44 values[i] = random()%1024;
45 }
46
47 rc = mdb_env_create(&env);
48 rc = mdb_env_set_mapsize(env, 10485760);
49 rc = mdb_env_set_maxdbs(env, 4);
50 rc = mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664);
51 rc = mdb_txn_begin(env, NULL, 0, &txn);
52 rc = mdb_open(txn, "id2", MDB_CREATE|MDB_DUPSORT, &dbi);
53
54 key.mv_size = sizeof(int);
55 key.mv_data = kval;
56 data.mv_size = sizeof(sval);
57 data.mv_data = sval;
58
59 printf("Adding %d values\n", count);
60 for (i=0;i<count;i++) {
61 if (!(i & 0x0f))
62 sprintf(kval, "%03x", values[i]);
63 sprintf(sval, "%03x %d foo bar", values[i], values[i]);
64 rc = mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA);
65 if (rc) j++;
66 }
67 if (j) printf("%d duplicates skipped\n", j);
68 rc = mdb_txn_commit(txn);
69 rc = mdb_env_stat(env, &mst);
70
71 rc = mdb_txn_begin(env, NULL, 1, &txn);
72 rc = mdb_cursor_open(txn, dbi, &cursor);
73 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) {
74 printf("key: %p %.*s, data: %p %.*s\n",
75 key.mv_data, (int) key.mv_size, (char *) key.mv_data,
76 data.mv_data, (int) data.mv_size, (char *) data.mv_data);
77 }
78 mdb_cursor_close(cursor);
79 mdb_txn_abort(txn);
80
81 j=0;
82
83 for (i= count - 1; i > -1; i-= (random()%5)) {
84 j++;
85 txn=NULL;
86 rc = mdb_txn_begin(env, NULL, 0, &txn);
87 sprintf(kval, "%03x", values[i & ~0x0f]);
88 sprintf(sval, "%03x %d foo bar", values[i], values[i]);
89 key.mv_size = sizeof(int);
90 key.mv_data = kval;
91 data.mv_size = sizeof(sval);
92 data.mv_data = sval;
93 rc = mdb_del(txn, dbi, &key, &data);
94 if (rc) {
95 j--;
96 mdb_txn_abort(txn);
97 } else {
98 rc = mdb_txn_commit(txn);
99 }
100 }
101 free(values);
102 printf("Deleted %d values\n", j);
103
104 rc = mdb_env_stat(env, &mst);
105 rc = mdb_txn_begin(env, NULL, 1, &txn);
106 rc = mdb_cursor_open(txn, dbi, &cursor);
107 printf("Cursor next\n");
108 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) {
109 printf("key: %.*s, data: %.*s\n",
110 (int) key.mv_size, (char *) key.mv_data,
111 (int) data.mv_size, (char *) data.mv_data);
112 }
113 printf("Cursor prev\n");
114 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) {
115 printf("key: %.*s, data: %.*s\n",
116 (int) key.mv_size, (char *) key.mv_data,
117 (int) data.mv_size, (char *) data.mv_data);
118 }
119 mdb_cursor_close(cursor);
120 mdb_close(env, dbi);
121
122 mdb_txn_abort(txn);
123 mdb_env_close(env);
124
125 return 0;
126 }
0 /* mtest4.c - memory-mapped database tester/toy */
1 /*
2 * Copyright 2011 Howard Chu, Symas Corp.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted only as authorized by the OpenLDAP
7 * Public License.
8 *
9 * A copy of this license is available in the file LICENSE in the
10 * top-level directory of the distribution or, alternatively, at
11 * <http://www.OpenLDAP.org/license.html>.
12 */
13
14 /* Tests for sorted duplicate DBs with fixed-size keys */
15 #define _XOPEN_SOURCE 500 /* srandom(), random() */
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <time.h>
20 #include "lmdb.h"
21
22 int main(int argc,char * argv[])
23 {
24 int i = 0, j = 0, rc;
25 MDB_env *env;
26 MDB_dbi dbi;
27 MDB_val key, data;
28 MDB_txn *txn;
29 MDB_stat mst;
30 MDB_cursor *cursor;
31 int count;
32 int *values;
33 char sval[8];
34 char kval[sizeof(int)];
35
36 memset(sval, 0, sizeof(sval));
37
38 count = 510;
39 values = (int *)malloc(count*sizeof(int));
40
41 for(i = 0;i<count;i++) {
42 values[i] = i*5;
43 }
44
45 rc = mdb_env_create(&env);
46 rc = mdb_env_set_mapsize(env, 10485760);
47 rc = mdb_env_set_maxdbs(env, 4);
48 rc = mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664);
49 rc = mdb_txn_begin(env, NULL, 0, &txn);
50 rc = mdb_open(txn, "id2", MDB_CREATE|MDB_DUPSORT|MDB_DUPFIXED, &dbi);
51
52 key.mv_size = sizeof(int);
53 key.mv_data = kval;
54 data.mv_size = sizeof(sval);
55 data.mv_data = sval;
56
57 printf("Adding %d values\n", count);
58 strcpy(kval, "001");
59 for (i=0;i<count;i++) {
60 sprintf(sval, "%07x", values[i]);
61 rc = mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA);
62 if (rc) j++;
63 }
64 if (j) printf("%d duplicates skipped\n", j);
65 rc = mdb_txn_commit(txn);
66 rc = mdb_env_stat(env, &mst);
67
68 /* there should be one full page of dups now.
69 */
70 rc = mdb_txn_begin(env, NULL, 1, &txn);
71 rc = mdb_cursor_open(txn, dbi, &cursor);
72 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) {
73 printf("key: %p %.*s, data: %p %.*s\n",
74 key.mv_data, (int) key.mv_size, (char *) key.mv_data,
75 data.mv_data, (int) data.mv_size, (char *) data.mv_data);
76 }
77 mdb_cursor_close(cursor);
78 mdb_txn_abort(txn);
79
80 /* test all 3 branches of split code:
81 * 1: new key in lower half
82 * 2: new key at split point
83 * 3: new key in upper half
84 */
85
86 key.mv_size = sizeof(int);
87 key.mv_data = kval;
88 data.mv_size = sizeof(sval);
89 data.mv_data = sval;
90
91 sprintf(sval, "%07x", values[3]+1);
92 rc = mdb_txn_begin(env, NULL, 0, &txn);
93 rc = mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA);
94 mdb_txn_abort(txn);
95
96 sprintf(sval, "%07x", values[255]+1);
97 rc = mdb_txn_begin(env, NULL, 0, &txn);
98 rc = mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA);
99 mdb_txn_abort(txn);
100
101 sprintf(sval, "%07x", values[500]+1);
102 rc = mdb_txn_begin(env, NULL, 0, &txn);
103 rc = mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA);
104 rc = mdb_txn_commit(txn);
105
106 /* Try MDB_NEXT_MULTIPLE */
107 rc = mdb_txn_begin(env, NULL, 0, &txn);
108 rc = mdb_cursor_open(txn, dbi, &cursor);
109 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT_MULTIPLE)) == 0) {
110 printf("key: %.*s, data: %.*s\n",
111 (int) key.mv_size, (char *) key.mv_data,
112 (int) data.mv_size, (char *) data.mv_data);
113 }
114 mdb_cursor_close(cursor);
115 mdb_txn_abort(txn);
116 j=0;
117
118 for (i= count - 1; i > -1; i-= (random()%3)) {
119 j++;
120 txn=NULL;
121 rc = mdb_txn_begin(env, NULL, 0, &txn);
122 sprintf(sval, "%07x", values[i]);
123 key.mv_size = sizeof(int);
124 key.mv_data = kval;
125 data.mv_size = sizeof(sval);
126 data.mv_data = sval;
127 rc = mdb_del(txn, dbi, &key, &data);
128 if (rc) {
129 j--;
130 mdb_txn_abort(txn);
131 } else {
132 rc = mdb_txn_commit(txn);
133 }
134 }
135 free(values);
136 printf("Deleted %d values\n", j);
137
138 rc = mdb_env_stat(env, &mst);
139 rc = mdb_txn_begin(env, NULL, 1, &txn);
140 rc = mdb_cursor_open(txn, dbi, &cursor);
141 printf("Cursor next\n");
142 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) {
143 printf("key: %.*s, data: %.*s\n",
144 (int) key.mv_size, (char *) key.mv_data,
145 (int) data.mv_size, (char *) data.mv_data);
146 }
147 printf("Cursor prev\n");
148 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) {
149 printf("key: %.*s, data: %.*s\n",
150 (int) key.mv_size, (char *) key.mv_data,
151 (int) data.mv_size, (char *) data.mv_data);
152 }
153 mdb_cursor_close(cursor);
154 mdb_close(env, dbi);
155
156 mdb_txn_abort(txn);
157 mdb_env_close(env);
158
159 return 0;
160 }
0 /* mtest5.c - memory-mapped database tester/toy */
1 /*
2 * Copyright 2011 Howard Chu, Symas Corp.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted only as authorized by the OpenLDAP
7 * Public License.
8 *
9 * A copy of this license is available in the file LICENSE in the
10 * top-level directory of the distribution or, alternatively, at
11 * <http://www.OpenLDAP.org/license.html>.
12 */
13
14 /* Tests for sorted duplicate DBs using cursor_put */
15 #define _XOPEN_SOURCE 500 /* srandom(), random() */
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <time.h>
20 #include "lmdb.h"
21
22 int main(int argc,char * argv[])
23 {
24 int i = 0, j = 0, rc;
25 MDB_env *env;
26 MDB_dbi dbi;
27 MDB_val key, data;
28 MDB_txn *txn;
29 MDB_stat mst;
30 MDB_cursor *cursor;
31 int count;
32 int *values;
33 char sval[32];
34 char kval[sizeof(int)];
35
36 srandom(time(NULL));
37
38 memset(sval, 0, sizeof(sval));
39
40 count = (random()%384) + 64;
41 values = (int *)malloc(count*sizeof(int));
42
43 for(i = 0;i<count;i++) {
44 values[i] = random()%1024;
45 }
46
47 rc = mdb_env_create(&env);
48 rc = mdb_env_set_mapsize(env, 10485760);
49 rc = mdb_env_set_maxdbs(env, 4);
50 rc = mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664);
51 rc = mdb_txn_begin(env, NULL, 0, &txn);
52 rc = mdb_open(txn, "id2", MDB_CREATE|MDB_DUPSORT, &dbi);
53 rc = mdb_cursor_open(txn, dbi, &cursor);
54
55 key.mv_size = sizeof(int);
56 key.mv_data = kval;
57 data.mv_size = sizeof(sval);
58 data.mv_data = sval;
59
60 printf("Adding %d values\n", count);
61 for (i=0;i<count;i++) {
62 if (!(i & 0x0f))
63 sprintf(kval, "%03x", values[i]);
64 sprintf(sval, "%03x %d foo bar", values[i], values[i]);
65 rc = mdb_cursor_put(cursor, &key, &data, MDB_NODUPDATA);
66 if (rc) j++;
67 }
68 if (j) printf("%d duplicates skipped\n", j);
69 mdb_cursor_close(cursor);
70 rc = mdb_txn_commit(txn);
71 rc = mdb_env_stat(env, &mst);
72
73 rc = mdb_txn_begin(env, NULL, 1, &txn);
74 rc = mdb_cursor_open(txn, dbi, &cursor);
75 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) {
76 printf("key: %p %.*s, data: %p %.*s\n",
77 key.mv_data, (int) key.mv_size, (char *) key.mv_data,
78 data.mv_data, (int) data.mv_size, (char *) data.mv_data);
79 }
80 mdb_cursor_close(cursor);
81 mdb_txn_abort(txn);
82
83 j=0;
84
85 for (i= count - 1; i > -1; i-= (random()%5)) {
86 j++;
87 txn=NULL;
88 rc = mdb_txn_begin(env, NULL, 0, &txn);
89 sprintf(kval, "%03x", values[i & ~0x0f]);
90 sprintf(sval, "%03x %d foo bar", values[i], values[i]);
91 key.mv_size = sizeof(int);
92 key.mv_data = kval;
93 data.mv_size = sizeof(sval);
94 data.mv_data = sval;
95 rc = mdb_del(txn, dbi, &key, &data);
96 if (rc) {
97 j--;
98 mdb_txn_abort(txn);
99 } else {
100 rc = mdb_txn_commit(txn);
101 }
102 }
103 free(values);
104 printf("Deleted %d values\n", j);
105
106 rc = mdb_env_stat(env, &mst);
107 rc = mdb_txn_begin(env, NULL, 1, &txn);
108 rc = mdb_cursor_open(txn, dbi, &cursor);
109 printf("Cursor next\n");
110 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) {
111 printf("key: %.*s, data: %.*s\n",
112 (int) key.mv_size, (char *) key.mv_data,
113 (int) data.mv_size, (char *) data.mv_data);
114 }
115 printf("Cursor prev\n");
116 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) {
117 printf("key: %.*s, data: %.*s\n",
118 (int) key.mv_size, (char *) key.mv_data,
119 (int) data.mv_size, (char *) data.mv_data);
120 }
121 mdb_cursor_close(cursor);
122 mdb_close(env, dbi);
123
124 mdb_txn_abort(txn);
125 mdb_env_close(env);
126
127 return 0;
128 }
0 /* mtest6.c - memory-mapped database tester/toy */
1 /*
2 * Copyright 2011 Howard Chu, Symas Corp.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted only as authorized by the OpenLDAP
7 * Public License.
8 *
9 * A copy of this license is available in the file LICENSE in the
10 * top-level directory of the distribution or, alternatively, at
11 * <http://www.OpenLDAP.org/license.html>.
12 */
13
14 /* Tests for DB splits and merges */
15 #define _XOPEN_SOURCE 500 /* srandom(), random() */
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <time.h>
20 #include "lmdb.h"
21
22 char dkbuf[1024];
23
24 int main(int argc,char * argv[])
25 {
26 int i = 0, j = 0, rc;
27 MDB_env *env;
28 MDB_dbi dbi;
29 MDB_val key, data;
30 MDB_txn *txn;
31 MDB_stat mst;
32 MDB_cursor *cursor;
33 int count;
34 int *values;
35 long kval;
36 char *sval;
37
38 srandom(time(NULL));
39
40 rc = mdb_env_create(&env);
41 rc = mdb_env_set_mapsize(env, 10485760);
42 rc = mdb_env_set_maxdbs(env, 4);
43 rc = mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664);
44 rc = mdb_txn_begin(env, NULL, 0, &txn);
45 rc = mdb_open(txn, "id2", MDB_CREATE|MDB_INTEGERKEY, &dbi);
46 rc = mdb_cursor_open(txn, dbi, &cursor);
47 rc = mdb_stat(txn, dbi, &mst);
48
49 sval = calloc(1, mst.ms_psize / 4);
50 key.mv_size = sizeof(long);
51 key.mv_data = &kval;
52 data.mv_size = mst.ms_psize / 4 - 30;
53 data.mv_data = sval;
54
55 printf("Adding 12 values, should yield 3 splits\n");
56 for (i=0;i<12;i++) {
57 kval = i*5;
58 sprintf(sval, "%08x", kval);
59 rc = mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE);
60 }
61 printf("Adding 12 more values, should yield 3 splits\n");
62 for (i=0;i<12;i++) {
63 kval = i*5+4;
64 sprintf(sval, "%08x", kval);
65 rc = mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE);
66 }
67 printf("Adding 12 more values, should yield 3 splits\n");
68 for (i=0;i<12;i++) {
69 kval = i*5+1;
70 sprintf(sval, "%08x", kval);
71 rc = mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE);
72 }
73 rc = mdb_cursor_get(cursor, &key, &data, MDB_FIRST);
74
75 do {
76 printf("key: %p %s, data: %p %.*s\n",
77 key.mv_data, mdb_dkey(&key, dkbuf),
78 data.mv_data, (int) data.mv_size, (char *) data.mv_data);
79 } while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0);
80 mdb_cursor_close(cursor);
81 mdb_txn_commit(txn);
82
83 #if 0
84 j=0;
85
86 for (i= count - 1; i > -1; i-= (random()%5)) {
87 j++;
88 txn=NULL;
89 rc = mdb_txn_begin(env, NULL, 0, &txn);
90 sprintf(kval, "%03x", values[i & ~0x0f]);
91 sprintf(sval, "%03x %d foo bar", values[i], values[i]);
92 key.mv_size = sizeof(int);
93 key.mv_data = kval;
94 data.mv_size = sizeof(sval);
95 data.mv_data = sval;
96 rc = mdb_del(txn, dbi, &key, &data);
97 if (rc) {
98 j--;
99 mdb_txn_abort(txn);
100 } else {
101 rc = mdb_txn_commit(txn);
102 }
103 }
104 free(values);
105 printf("Deleted %d values\n", j);
106
107 rc = mdb_env_stat(env, &mst);
108 rc = mdb_txn_begin(env, NULL, 1, &txn);
109 rc = mdb_cursor_open(txn, dbi, &cursor);
110 printf("Cursor next\n");
111 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) {
112 printf("key: %.*s, data: %.*s\n",
113 (int) key.mv_size, (char *) key.mv_data,
114 (int) data.mv_size, (char *) data.mv_data);
115 }
116 printf("Cursor prev\n");
117 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) {
118 printf("key: %.*s, data: %.*s\n",
119 (int) key.mv_size, (char *) key.mv_data,
120 (int) data.mv_size, (char *) data.mv_data);
121 }
122 mdb_cursor_close(cursor);
123 mdb_close(txn, dbi);
124
125 mdb_txn_abort(txn);
126 #endif
127 mdb_env_close(env);
128
129 return 0;
130 }
0 /* sample-bdb.c - BerkeleyDB toy/sample
1 *
2 * Do a line-by-line comparison of this and sample-mdb.c
3 */
4 /*
5 * Copyright 2012 Howard Chu, Symas Corp.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted only as authorized by the OpenLDAP
10 * Public License.
11 *
12 * A copy of this license is available in the file LICENSE in the
13 * top-level directory of the distribution or, alternatively, at
14 * <http://www.OpenLDAP.org/license.html>.
15 */
16 #include <stdio.h>
17 #include <string.h>
18 #include <db.h>
19
20 int main(int argc,char * argv[])
21 {
22 int rc;
23 DB_ENV *env;
24 DB *dbi;
25 DBT key, data;
26 DB_TXN *txn;
27 DBC *cursor;
28 char sval[32], kval[32];
29
30 #define FLAGS (DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_INIT_MPOOL|DB_CREATE|DB_THREAD)
31 rc = db_env_create(&env, 0);
32 rc = env->open(env, "./testdb", FLAGS, 0664);
33 rc = db_create(&dbi, env, 0);
34 rc = env->txn_begin(env, NULL, &txn, 0);
35 rc = dbi->open(dbi, txn, "test.bdb", NULL, DB_BTREE, DB_CREATE, 0664);
36
37 memset(&key, 0, sizeof(DBT));
38 memset(&data, 0, sizeof(DBT));
39 key.size = sizeof(int);
40 key.data = sval;
41 data.size = sizeof(sval);
42 data.data = sval;
43
44 sprintf(sval, "%03x %d foo bar", 32, 3141592);
45 rc = dbi->put(dbi, txn, &key, &data, 0);
46 rc = txn->commit(txn, 0);
47 if (rc) {
48 fprintf(stderr, "txn->commit: (%d) %s\n", rc, db_strerror(rc));
49 goto leave;
50 }
51 rc = env->txn_begin(env, NULL, &txn, 0);
52 rc = dbi->cursor(dbi, txn, &cursor, 0);
53 key.flags = DB_DBT_USERMEM;
54 key.data = kval;
55 key.ulen = sizeof(kval);
56 data.flags = DB_DBT_USERMEM;
57 data.data = sval;
58 data.ulen = sizeof(sval);
59 while ((rc = cursor->c_get(cursor, &key, &data, DB_NEXT)) == 0) {
60 printf("key: %p %.*s, data: %p %.*s\n",
61 key.data, (int) key.size, (char *) key.data,
62 data.data, (int) data.size, (char *) data.data);
63 }
64 rc = cursor->c_close(cursor);
65 rc = txn->abort(txn);
66 leave:
67 rc = dbi->close(dbi, 0);
68 rc = env->close(env, 0);
69 return rc;
70 }
0 /* sample-mdb.c - MDB toy/sample
1 *
2 * Do a line-by-line comparison of this and sample-bdb.c
3 */
4 /*
5 * Copyright 2012 Howard Chu, Symas Corp.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted only as authorized by the OpenLDAP
10 * Public License.
11 *
12 * A copy of this license is available in the file LICENSE in the
13 * top-level directory of the distribution or, alternatively, at
14 * <http://www.OpenLDAP.org/license.html>.
15 */
16 #include <stdio.h>
17 #include "lmdb.h"
18
19 int main(int argc,char * argv[])
20 {
21 int rc;
22 MDB_env *env;
23 MDB_dbi dbi;
24 MDB_val key, data;
25 MDB_txn *txn;
26 MDB_cursor *cursor;
27 char sval[32];
28
29 rc = mdb_env_create(&env);
30 rc = mdb_env_open(env, "./testdb", 0, 0664);
31 rc = mdb_txn_begin(env, NULL, 0, &txn);
32 rc = mdb_open(txn, NULL, 0, &dbi);
33
34 key.mv_size = sizeof(int);
35 key.mv_data = sval;
36 data.mv_size = sizeof(sval);
37 data.mv_data = sval;
38
39 sprintf(sval, "%03x %d foo bar", 32, 3141592);
40 rc = mdb_put(txn, dbi, &key, &data, 0);
41 rc = mdb_txn_commit(txn);
42 if (rc) {
43 fprintf(stderr, "mdb_txn_commit: (%d) %s\n", rc, mdb_strerror(rc));
44 goto leave;
45 }
46 rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
47 rc = mdb_cursor_open(txn, dbi, &cursor);
48 while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) {
49 printf("key: %p %.*s, data: %p %.*s\n",
50 key.mv_data, (int) key.mv_size, (char *) key.mv_data,
51 data.mv_data, (int) data.mv_size, (char *) data.mv_data);
52 }
53 mdb_cursor_close(cursor);
54 mdb_txn_abort(txn);
55 leave:
56 mdb_close(env, dbi);
57 mdb_env_close(env);
58 return 0;
59 }