Commit f5bc08010bcd4352d0c4df4b255e4f7d866af06b - pybigwig

New upstream release. Debian Janitor 2 years ago

23 changed file(s) with 4636 addition(s) and 97 deletion(s). Raw diff Collapse all Expand all

-62

~~.gitignore~~ less more

0		# Byte-compiled / optimized / DLL files
1		__pycache__/
2		*.py[cod]
3
4		# C extensions
5		*.so
6
7		# Distribution / packaging
8		.Python
9		env/
10		build/
11		develop-eggs/
12		dist/
13		downloads/
14		eggs/
15		.eggs/
16		lib64/
17		parts/
18		sdist/
19		var/
20		*.egg-info/
21		.installed.cfg
22		*.egg
23
24		# PyInstaller
25		# Usually these files are written by a python script from a template
26		# before PyInstaller builds the exe, so as to inject date/other infos into it.
27		*.manifest
28		*.spec
29
30		# Installer logs
31		pip-log.txt
32		pip-delete-this-directory.txt
33
34		# Unit test / coverage reports
35		htmlcov/
36		.tox/
37		.coverage
38		.coverage.*
39		.cache
40		nosetests.xml
41		coverage.xml
42		*,cover
43
44		# Translations
45		*.mo
46		*.pot
47
48		# Django stuff:
49		*.log
50
51		# Sphinx documentation
52		docs/_build/
53
54		# PyBuilder
55		target/
56
57		*.o
58		#./setup.py sdist creates this
59		MANIFEST
60
61		*.swp

-0

~~.gitmodules~~ less more

(Empty file)

-11

~~.travis.yml~~ less more

0		language: python
1		python:
2		- "3.5"
3		- "3.6"
4		install:
5		- pip install numpy && python ./setup.py install
6		matrix:
7		include:
8		- python: 3.7
9		dist: xenial
10		script: nosetests -sv

+13

-0

PKG-INFO less more

	0	Metadata-Version: 2.1
	1	Name: pyBigWig
	2	Version: 0.3.18
	3	Summary: A package for accessing bigWig files using libBigWig
	4	Home-page: https://github.com/dpryan79/pyBigWig
	5	Author: Devon P. Ryan
	6	Author-email: ryan@ie-freiburg.mpg.de
	7	License: UNKNOWN
	8	Download-URL: https://github.com/dpryan79/pyBigWig/tarball/0.3.13
	9	Description: UNKNOWN
	10	Keywords: bioinformatics,bigWig,bigBed
	11	Platform: UNKNOWN
	12	Provides-Extra: numpy input

+11

-3

README.md less more

6	6	=================
7	7
8	8	* [Installation](#installation)
	9	* [Requirements](#requirements)
9	10	* [Usage](#usage)
10	11	* [Load the extension](#load-the-extension)
11	12	* [Open a bigWig or bigBed file](#open-a-bigwig-or-bigbed-file)

33	34
34	35	or with conda
35	36
36		conda install pybigwig -c bioconda
37
38		Note that libcurl (and the `curl-config` command) are required for installation. This is typically already installed on many Linux and OSX systems (if you install with conda then this will happen automatically).
	37	conda install pybigwig -c conda-forge -c bioconda
	38
	39	## Requirements
	40
	41	The follow non-python requirements must be installed:
	42
	43	- libcurl (and the `curl-config` config)
	44	- zlib
	45
	46	The headers and libraries for these are required.
39	47
40	48	# Usage
41	49	Basic usage is as follows:

-0

debian/changelog less more

	0	pybigwig (0.3.18-1) UNRELEASED; urgency=low
	1
	2	* New upstream release.
	3
	4	-- Debian Janitor <janitor@jelmer.uk> Sun, 06 Jun 2021 10:02:01 -0000
	5
0	6	pybigwig (0.3.17-1) unstable; urgency=medium
1	7
2	8	[ Steffen Möller ]

+22

-0

libBigWig/LICENSE less more

	0	The MIT License (MIT)
	1
	2	Copyright (c) 2015 Devon Ryan
	3
	4	Permission is hereby granted, free of charge, to any person obtaining a copy
	5	of this software and associated documentation files (the "Software"), to deal
	6	in the Software without restriction, including without limitation the rights
	7	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	8	copies of the Software, and to permit persons to whom the Software is
	9	furnished to do so, subject to the following conditions:
	10
	11	The above copyright notice and this permission notice shall be included in all
	12	copies or substantial portions of the Software.
	13
	14	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	15	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	16	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	17	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	18	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	19	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	20	SOFTWARE.
	21

+236

-0

libBigWig/README.md less more

	0	![Master build status](https://travis-ci.org/dpryan79/libBigWig.svg?branch=master) [![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.45278.svg)](http://dx.doi.org/10.5281/zenodo.45278)
	1
	2	A C library for reading/parsing local and remote bigWig and bigBed files. While Kent's source code is free to use for these purposes, it's really inappropriate as library code since it has the unfortunate habit of calling `exit()` whenever there's an error. If that's then used inside of something like python then the python interpreter gets killed. This library is aimed at resolving these sorts of issues and should also use more standard things like curl and has a friendlier license to boot.
	3
	4	Documentation is automatically generated by doxygen and can be found under `docs/html` or online [here](https://cdn.rawgit.com/dpryan79/libBigWig/master/docs/html/index.html).
	5
	6	# Example
	7
	8	The only functions and structures that end users need to care about are in "bigWig.h". Below is a commented example. You can see the files under `test/` for further examples.
	9
	10	#include "bigWig.h"
	11	int main(int argc, char *argv[]) {
	12	bigWigFile_t *fp = NULL;
	13	bwOverlappingIntervals_t *intervals = NULL;
	14	double *stats = NULL;
	15	if(argc != 2) {
	16	fprintf(stderr, "Usage: %s {file.bw\|URL://path/file.bw}\n", argv[0]);
	17	return 1;
	18	}
	19
	20	//Initialize enough space to hold 128KiB (1<<17) of data at a time
	21	if(bwInit(1<<17) != 0) {
	22	fprintf(stderr, "Received an error in bwInit\n");
	23	return 1;
	24	}
	25
	26	//Open the local/remote file
	27	fp = bwOpen(argv[1], NULL, "r");
	28	if(!fp) {
	29	fprintf(stderr, "An error occurred while opening %s\n", argv[1]);
	30	return 1;
	31	}
	32
	33	//Get values in a range (0-based, half open) without NAs
	34	intervals = bwGetValues(fp, "chr1", 10000000, 10000100, 0);
	35	bwDestroyOverlappingIntervals(intervals); //Free allocated memory
	36
	37	//Get values in a range (0-based, half open) with NAs
	38	intervals = bwGetValues(fp, "chr1", 10000000, 10000100, 1);
	39	bwDestroyOverlappingIntervals(intervals); //Free allocated memory
	40
	41	//Get the full intervals that overlap
	42	intervals = bwGetOverlappingIntervals(fp, "chr1", 10000000, 10000100);
	43	bwDestroyOverlappingIntervals(intervals);
	44
	45	//Get an example statistic - standard deviation
	46	//We want ~4 bins in the range
	47	stats = bwStats(fp, "chr1", 10000000, 10000100, 4, dev);
	48	if(stats) {
	49	printf("chr1:10000000-10000100 std. dev.: %f %f %f %f\n", stats[0], stats[1], stats[2], stats[3]);
	50	free(stats);
	51	}
	52
	53	bwClose(fp);
	54	bwCleanup();
	55	return 0;
	56	}
	57
	58	##Writing example
	59
	60	N.B., creation of bigBed files is not supported (there are no plans to change this).
	61
	62	Below is an example of how to write bigWig files. You can also find this file under `test/exampleWrite.c`. Unlike with Kent's tools, you can create bigWig files entry by entry without needing an intermediate wiggle or bedGraph file. Entries in bigWig files are stored in blocks with each entry in a block referring to the same chromosome and having the same type, of which there are three (see the [wiggle specification](http://genome.ucsc.edu/goldenpath/help/wiggle.html) for more information on this).
	63
	64	#include "bigWig.h"
	65
	66	int main(int argc, char *argv[]) {
	67	bigWigFile_t *fp = NULL;
	68	char *chroms[] = {"1", "2"};
	69	char *chromsUse[] = {"1", "1", "1"};
	70	uint32_t chrLens[] = {1000000, 1500000};
	71	uint32_t starts[] = {0, 100, 125,
	72	200, 220, 230,
	73	500, 600, 625,
	74	700, 800, 850};
	75	uint32_t ends[] = {5, 120, 126,
	76	205, 226, 231};
	77	float values[] = {0.0f, 1.0f, 200.0f,
	78	-2.0f, 150.0f, 25.0f,
	79	0.0f, 1.0f, 200.0f,
	80	-2.0f, 150.0f, 25.0f,
	81	-5.0f, -20.0f, 25.0f,
	82	-5.0f, -20.0f, 25.0f};
	83
	84	if(bwInit(1<<17) != 0) {
	85	fprintf(stderr, "Received an error in bwInit\n");
	86	return 1;
	87	}
	88
	89	fp = bwOpen("example_output.bw", NULL, "w");
	90	if(!fp) {
	91	fprintf(stderr, "An error occurred while opening example_output.bw for writingn\n");
	92	return 1;
	93	}
	94
	95	//Allow up to 10 zoom levels, though fewer will be used in practice
	96	if(bwCreateHdr(fp, 10)) goto error;
	97
	98	//Create the chromosome lists
	99	fp->cl = bwCreateChromList(chroms, chrLens, 2);
	100	if(!fp->cl) goto error;
	101
	102	//Write the header
	103	if(bwWriteHdr(fp)) goto error;
	104
	105	//Some example bedGraph-like entries
	106	if(bwAddIntervals(fp, chromsUse, starts, ends, values, 3)) goto error;
	107	//We can continue appending similarly formatted entries
	108	//N.B. you can't append a different chromosome (those always go into different
	109	if(bwAppendIntervals(fp, starts+3, ends+3, values+3, 3)) goto error;
	110
	111	//Add a new block of entries with a span. Since bwAdd/AppendIntervals was just used we MUST create a new block
	112	if(bwAddIntervalSpans(fp, "1", starts+6, 20, values+6, 3)) goto error;
	113	//We can continue appending similarly formatted entries
	114	if(bwAppendIntervalSpans(fp, starts+9, values+9, 3)) goto error;
	115
	116	//Add a new block of fixed-step entries
	117	if(bwAddIntervalSpanSteps(fp, "1", 900, 20, 30, values+12, 3)) goto error;
	118	//The start is then 760, since that's where the previous step ended
	119	if(bwAppendIntervalSpanSteps(fp, values+15, 3)) goto error;
	120
	121	//Add a new chromosome
	122	chromsUse[0] = "2";
	123	chromsUse[1] = "2";
	124	chromsUse[2] = "2";
	125	if(bwAddIntervals(fp, chromsUse, starts, ends, values, 3)) goto error;
	126
	127	//Closing the file causes the zoom levels to be created
	128	bwClose(fp);
	129	bwCleanup();
	130
	131	return 0;
	132
	133	error:
	134	fprintf(stderr, "Received an error somewhere!\n");
	135	bwClose(fp);
	136	bwCleanup();
	137	return 1;
	138	}
	139
	140	# Testing file types
	141
	142	As of version 0.3.0, this library supports accessing bigBed files, which are related to bigWig files. Applications that need to support both bigWig and bigBed input can use the `bwIsBigWig` and `bbIsBigBed` functions to determine if their inputs are bigWig/bigBed files:
	143
	144	...code...
	145	if(bwIsBigWig(input_file_name, NULL)) {
	146	//do something
	147	} else if(bbIsBigBed(input_file_name, NULL)) {
	148	//do something else
	149	} else {
	150	//handle unknown input
	151	}
	152
	153	Note that these two functions rely on the "magic number" at the beginning of each file, which differs between bigWig and bigBed files.
	154
	155	# bigBed support
	156
	157	Support for accessing bigBed files was added in version 0.3.0. The function names used for accessing bigBed files are similar to those used for bigWig files.
	158
	159	Function \| Use
	160	--- \| ---
	161	bbOpen \| Opens a bigBed file
	162	bbGetSQL \| Returns the SQL string (if it exists) in a bigBed file
	163	bbGetOverlappingEntries \| Returns all entries overlapping an interval (either with or without their associated strings
	164	bbDestroyOverlappingEntries \| Free memory allocated by the above command
	165
	166	Other functions, such as `bwClose` and `bwInit`, are shared between bigWig and bigBed files. See `test/testBigBed.c` for a full example.
	167
	168	# A note on bigBed entries
	169
	170	Inside bigBed files, entries are stored as chromosome, start, and end coordinates with an (optional) associated string. For example, a "bedRNAElements" file from Encode has name, score, strand, "level", "significance", and "score2" values associated with each entry. These are stored inside the bigBed files as a single tab-separated character vector (char \*), which makes parsing difficult. The names of the various fields inside of bigBed files is stored as an SQL string, for example:
	171
	172	table RnaElements
	173	"BED6 + 3 scores for RNA Elements data "
	174	(
	175	string chrom; "Reference sequence chromosome or scaffold"
	176	uint chromStart; "Start position in chromosome"
	177	uint chromEnd; "End position in chromosome"
	178	string name; "Name of item"
	179	uint score; "Normalized score from 0-1000"
	180	char[1] strand; "+ or - or . for unknown"
	181	float level; "Expression level such as RPKM or FPKM. Set to -1 for no data."
	182	float signif; "Statistical significance such as IDR. Set to -1 for no data."
	183	uint score2; "Additional measurement/count e.g. number of reads. Set to 0 for no data."
	184	)
	185
	186	Entries will then be of the form (one per line):
	187
	188	59426 115 - 0.021 0.48 218
	189	51 209 + 0.071 0.74 130
	190	52 170 + 0.045 0.61 171
	191	59433 178 - 0.049 0.34 296
	192	53 156 + 0.038 0.19 593
	193	59436 186 - 0.054 0.15 1010
	194	59437 506 - 1.560 0.00 430611
	195
	196	Note that chromosome and start/end intervals are stored separately, so there's no need to parse them out of string. libBigWig can return these entries, either with or without the above associated strings. Parsing these string is left to the application requiring them and is currently outside the scope of this library.
	197
	198	# Interval/Entry iterators
	199
	200	Sometimes it is desirable to request a large number of intervals from a bigWig file or entries from a bigBed file, but not hold them all in memory at once (e.g., due to saving memory). To support this, libBigWig (since version 0.3.0) supports two kinds of iterators. The general process of using iterators is: (1) iterator creation, (2) traversal, and finally (3) iterator destruction. Only iterator creation differs between bigWig and bigBed files.
	201
	202	Importantly, iterators return results by one or more blocks. This is for convenience, since bigWig intervals and bigBed entries are stored in together in fixed-size groups, called blocks. The number of blocks of entries returned, therefore, is an option that can be specified to balance performance and memory usage.
	203
	204	## Iterator creation
	205
	206	For bigwig files, iterators are created with the `bwOverlappingIntervalsIterator()`. This function takes chromosomal bounds (chromosome name, start, and end position) as well as a number of blocks. The equivalent function for bigBed files is `bbOverlappingEntriesIterator()`, which additionally takes a `withString` argutment, which dictates whether the returned entries include the associated string values or not.
	207
	208	Each of the aforementioned files returns a pointer to a `bwOverlapIterator_t` object. The only important parts of this structure for end users are the following members: `entries`, `intervals`, and `data`. `entries` is a pointer to a `bbOverlappingEntries_t` object, or `NULL` if a bigWig file is being used. Likewise, `intervals` is a pointer to a `bwOverlappingIntervals_t` object, or `NULL` if a bigBed file is being used. `data` is a special pointer, used to signify the end of iteration. Thus, when `data` is a `NULL` pointer, iteration has ended.
	209
	210	## Iterator traversal
	211
	212	Regardless of whether a bigWig or bigBed file is being used, the `bwIteratorNext()` function will free currently used memory and load the appropriate intervals or entries for the next block(s). On error, this will return a NULL pointer (memory is already internally freed in this case).
	213
	214	## Iterator destruction
	215
	216	`bwOverlapIterator_t` objects MUST be destroyed after use. This can be done with the `bwIteratorDestroy()` function.
	217
	218	## Example
	219
	220	A full example is provided in `tests/testIterator.c`, but a small example of iterating over all bigWig intervals in `chr1:0-10000000` in chunks of 5 blocks follows:
	221
	222	iter = bwOverlappingIntervalsIterator(fp, "chr1", 0, 10000000, 5);
	223	while(iter->data) {
	224	//Do stuff with iter->intervals
	225	iter = bwIteratorNext(iter);
	226	}
	227	bwIteratorDestroy(iter);
	228
	229	# A note on bigWig statistics
	230
	231	The results of `min`, `max`, and `mean` should be the same as those from `BigWigSummary`. `stdev` and `coverage`, however, may differ due to Kent's tools producing incorrect results (at least for `coverage`, though the same appears to be the case for `stdev`).
	232
	233	# Python interface
	234
	235	There are currently two python interfaces that make use of libBigWig: [pyBigWig](https://github.com/dpryan79/pyBigWig) by me and [bw-python](https://github.com/brentp/bw-python) by Brent Pederson. Those interested are encouraged to give both a try!

+606

-0

libBigWig/bigWig.h less more

	0	#ifndef LIBBIGWIG_H
	1	#define LIBBIGWIG_H
	2
	3	#include "bigWigIO.h"
	4	#include "bwValues.h"
	5	#include <inttypes.h>
	6	#include <zlib.h>
	7
	8	#ifdef __cplusplus
	9	extern "C" {
	10	#endif
	11
	12	/*! \mainpage libBigWig
	13	*
	14	* \section Introduction
	15	*
	16	* libBigWig is a C library for parsing local/remote bigWig and bigBed files. This is similar to Kent's library from UCSC, except
	17	* * The license is much more liberal
	18	* * This code doesn't call `exit()` on error, thereby killing the calling application.
	19	*
	20	* External files are accessed using [curl](http://curl.haxx.se/).
	21	*
	22	* Please submit issues and pull requests [here](https://github.com/dpryan79/libBigWig).
	23	*
	24	* \section Compilation
	25	*
	26	* Assuming you already have the curl libraries installed (not just the curl binary!):
	27	*
	28	* make install prefix=/some/path
	29	*
	30	* \section Writing bigWig files
	31	*
	32	* There are three methods for storing values in a bigWig file, further described in the [wiggle format](http://genome.ucsc.edu/goldenpath/help/wiggle.html). The entries within the file are grouped into "blocks" and each such block is limited to storing entries of a single type. So, it is unwise to use a single bedGraph-like endtry followed by a single fixed-step entry followed by a variable-step entry, as that would require three separate blocks, with additional space required for each.
	33	*
	34	* \section Testing file types
	35	*
	36	* As of version 0.3.0, libBigWig supports reading bigBed files. If an application needs to support both bigBed and bigWig input, then the `bwIsBigWig` and `bbIsBigBed` functions can be used to determine the file type. These both use the "magic" number at the beginning of the file to determine the file type.
	37	*
	38	* \section Interval and entry iterators
	39	*
	40	* As of version 0.3.0, libBigWig supports iterating over intervals in bigWig files and entries in bigBed files. The number of intervals/entries returned with each iteration can be controlled by setting the number of blocks processed in each iteration (intervals and entries are group inside of bigWig and bigBed files into blocks of entries). See `test/testIterator.c` for an example.
	41	*
	42	* \section Examples
	43	*
	44	* Please see [README.md](README.md) and the files under `test/` for examples.
	45	*/
	46
	47
	48	/*! \file bigWig.h
	49	*
	50	* These are the functions and structured that should be used by external users. While I don't particularly recommend dealing with some of the structures (e.g., a bigWigHdr_t), they're described here in case you need them.
	51	*
	52	* BTW, this library doesn't switch endianness as appropriate, since I kind of assume that there's only one type produced these days.
	53	*/
	54
	55	/*!
	56	* The library version number
	57	*/
	58	#define LIBBIGWIG_VERSION 0.4.6
	59
	60	/*!
	61	* If 1, then this library was compiled with remote file support.
	62	*/
	63	#ifdef NOCURL
	64	#define LIBBIGWIG_CURL 0
	65	#ifndef CURLTYPE_DEFINED
	66	#define CURLTYPE_DEFINED
	67	typedef int CURLcode;
	68	typedef void CURL;
	69	#endif
	70	#else
	71	#define LIBBIGWIG_CURL 1
	72	#endif
	73
	74	/*!
	75	* The magic number of a bigWig file.
	76	*/
	77	#define BIGWIG_MAGIC 0x888FFC26
	78	/*!
	79	* The magic number of a bigBed file.
	80	*/
	81	#define BIGBED_MAGIC 0x8789F2EB
	82	/*!
	83	* The magic number of a "cirTree" block in a file.
	84	*/
	85	#define CIRTREE_MAGIC 0x78ca8c91
	86	/*!
	87	* The magic number of an index block in a file.
	88	*/
	89	#define IDX_MAGIC 0x2468ace0
	90	/*!
	91	* The default number of children per block.
	92	*/
	93	#define DEFAULT_nCHILDREN 64
	94	/*!
	95	* The default decompression buffer size in bytes. This is used to determin
	96	*/
	97	#define DEFAULT_BLOCKSIZE 32768
	98
	99	/*!
	100	* An enum that dictates the type of statistic to fetch for a given interval
	101	*/
	102	enum bwStatsType {
	103	doesNotExist = -1, /!< This does nothing /
	104	mean = 0, /!< The mean value /
	105	average = 0, /!< The mean value /
	106	stdev = 1, /!< The standard deviation of the values /
	107	dev = 1, /!< The standard deviation of the values /
	108	max = 2, /!< The maximum value /
	109	min = 3, /!< The minimum value /
	110	cov = 4, /!< The number of bases covered /
	111	coverage = 4, /!<The number of bases covered /
	112	sum = 5 /!< The sum of per-base values /
	113	};
	114
	115	//Should hide this from end users
	116	/*!
	117	* @brief BigWig files have multiple "zoom" levels, each of which has its own header. This hold those headers
	118	*
	119	* N.B., there's 4 bytes of padding in the on disk representation of level and dataOffset.
	120	*/
	121	typedef struct {
	122	uint32_t level; /<The zoom level, which is an integer starting with 0./
	123	//There's 4 bytes of padding between these
	124	uint64_t dataOffset; /<The offset to the on-disk start of the data. This isn't used currently./
	125	uint64_t indexOffset; /<The offset to the on-disk start of the index. This is* used.*/
	126	bwRTree_t idx; /<Index for each zoom level. Represented as a tree*/
	127	} bwZoomHdr_t;
	128
	129	/*!
	130	* @brief The header section of a bigWig file.
	131	*
	132	* Some of the values aren't currently used for anything. Others may optionally not exist.
	133	*/
	134	typedef struct {
	135	uint16_t version; /*<The version information of the file./
	136	uint16_t nLevels; /*<The number of "zoom" levels./
	137	uint64_t ctOffset; /*<The offset to the on-disk chromosome tree list./
	138	uint64_t dataOffset; /*<The on-disk offset to the first block of data./
	139	uint64_t indexOffset; /*<The on-disk offset to the data index./
	140	uint16_t fieldCount; /*<Total number of fields./
	141	uint16_t definedFieldCount; /*<Number of fixed-format BED fields./
	142	uint64_t sqlOffset; /*<The on-disk offset to an SQL string. This is unused./
	143	uint64_t summaryOffset; /*<If there's a summary, this is the offset to it on the disk./
	144	uint32_t bufSize; /*<The compression buffer size (if the data is compressed)./
	145	uint64_t extensionOffset; /*<Unused/
	146	bwZoomHdr_t zoomHdrs; /<Pointers to the header for each zoom level./
	147	//total Summary
	148	uint64_t nBasesCovered; /*<The total bases covered in the file./
	149	double minVal; /*<The minimum value in the file./
	150	double maxVal; /*<The maximum value in the file./
	151	double sumData; /*<The sum of all values in the file./
	152	double sumSquared; /*<The sum of the squared values in the file./
	153	} bigWigHdr_t;
	154
	155	//Should probably replace this with a hash
	156	/*!
	157	* @brief Holds the chromosomes and their lengths
	158	*/
	159	typedef struct {
	160	int64_t nKeys; /*<The number of chromosomes /
	161	char chrom; /<A list of null terminated chromosomes */
	162	uint32_t len; /<The lengths of each chromosome /
	163	} chromList_t;
	164
	165	//TODO remove from bigWig.h
	166	/// @cond SKIP
	167	typedef struct bwLL bwLL;
	168	struct bwLL {
	169	bwRTreeNode_t *node;
	170	struct bwLL *next;
	171	};
	172	typedef struct bwZoomBuffer_t bwZoomBuffer_t;
	173	struct bwZoomBuffer_t { //each individual entry takes 32 bytes
	174	void *p;
	175	uint32_t l, m;
	176	struct bwZoomBuffer_t *next;
	177	};
	178	/// @endcond
	179
	180	/*!
	181	* @brief This is only needed for writing bigWig files (and won't be created otherwise)
	182	* This should be removed from bigWig.h
	183	*/
	184	typedef struct {
	185	uint64_t nBlocks; /*<The number of blocks written/
	186	uint32_t blockSize; /*<The maximum number of children/
	187	uint64_t nEntries; /*<The number of entries processed. This is used for the first contig and determining how the zoom levels are computed/
	188	uint64_t runningWidthSum; /*<The running sum of the entry widths for the first contig (again, used for the first contig and computing zoom levels)/
	189	uint32_t tid; /*<The current TID that's being processed/
	190	uint32_t start; /*<The start position of the block/
	191	uint32_t end; /*<The end position of the block/
	192	uint32_t span; /*<The span of each entry, if applicable/
	193	uint32_t step; /*<The step size, if applicable/
	194	uint8_t ltype; /*<The type of the last entry added/
	195	uint32_t l; /*<The current size of p. This and the type determine the number of items held/
	196	void p; /<A buffer of size hdr->bufSize/
	197	bwLL firstIndexNode; /<The first index node in the linked list/
	198	bwLL currentIndexNode; /<The last index node in a linked list/
	199	bwZoomBuffer_t firstZoomBuffer; /<The first node in a linked list of leaf nodes*/
	200	bwZoomBuffer_t lastZoomBuffer; /<The last node in a linked list of leaf nodes*/
	201	uint64_t nNodes; /<The number of leaf nodes per zoom level, useful for determining duplicate levels/
	202	uLongf compressPsz; /*<The size of the compression buffer/
	203	void compressP; /<A compressed buffer of size compressPsz/
	204	} bwWriteBuffer_t;
	205
	206	/*!
	207	* @brief A structure that holds everything needed to access a bigWig file.
	208	*/
	209	typedef struct {
	210	URL_t URL; /<A pointer that can handle both local and remote files (including a buffer if needed)./
	211	bigWigHdr_t hdr; /<The file header./
	212	chromList_t cl; /<A list of chromosome names (the order is the ID)./
	213	bwRTree_t idx; /<The index for the full dataset./
	214	bwWriteBuffer_t writeBuffer; /<The buffer used for writing./
	215	int isWrite; /*<0: Opened for reading, 1: Opened for writing./
	216	int type; /*<0: bigWig, 1: bigBed./
	217	} bigWigFile_t;
	218
	219	/*!
	220	* @brief Holds interval:value associations
	221	*/
	222	typedef struct {
	223	uint32_t l; /*<Number of intervals held/
	224	uint32_t m; /*<Maximum number of values/intervals the struct can hold/
	225	uint32_t start; /<The start positions (0-based half open)/
	226	uint32_t end; /<The end positions (0-based half open)/
	227	float value; /<The value associated with each position/
	228	} bwOverlappingIntervals_t;
	229
	230	/*!
	231	* @brief Holds interval:str associations
	232	*/
	233	typedef struct {
	234	uint32_t l; /*<Number of intervals held/
	235	uint32_t m; /*<Maximum number of values/intervals the struct can hold/
	236	uint32_t start; /<The start positions (0-based half open)/
	237	uint32_t end; /<The end positions (0-based half open)/
	238	char str; /<The strings associated with a given entry.*/
	239	} bbOverlappingEntries_t;
	240
	241	/*!
	242	* @brief A structure to hold iterations
	243	* One of intervals and entries should be used to access records from bigWig or bigBed files, respectively.
	244	*/
	245	typedef struct {
	246	bigWigFile_t bw; /<Pointer to the bigWig/bigBed file./
	247	uint32_t tid; /*<The contig/chromosome ID./
	248	uint32_t start; /*<Start position of the query interval./
	249	uint32_t end; /*<End position of the query interval./
	250	uint64_t offset; /*<Offset into the blocks./
	251	uint32_t blocksPerIteration; /*<Number of blocks to use per iteration./
	252	int withString; /*<For bigBed entries, whether to return the string with the entries./
	253	void blocks; /<Overlapping blocks./
	254	bwOverlappingIntervals_t intervals; /<Overlapping intervals (or NULL)./
	255	bbOverlappingEntries_t entries; /<Overlapping entries (or NULL)./
	256	void data; /<Points to either intervals or entries. If there are no further intervals/entries, then this is NULL. Use this to test for whether to continue iterating./
	257	} bwOverlapIterator_t;
	258
	259	/*!
	260	* @brief Initializes curl and global variables. This MUST be called before other functions (at least if you want to connect to remote files).
	261	* For remote file, curl must be initialized and regions of a file read into an internal buffer. If the buffer is too small then an excessive number of connections will be made. If the buffer is too large than more data than required is fetched. 128KiB is likely sufficient for most needs.
	262	* @param bufSize The internal buffer size used for remote connection.
	263	* @see bwCleanup
	264	* @return 0 on success and 1 on error.
	265	*/
	266	int bwInit(size_t bufSize);
	267
	268	/*!
	269	* @brief The counterpart to bwInit, this cleans up curl.
	270	* @see bwInit
	271	*/
	272	void bwCleanup(void);
	273
	274	/*!
	275	* @brief Determine if a file is a bigWig file.
	276	* This function will quickly check either local or remote files to determine if they appear to be valid bigWig files. This can be determined by reading the first 4 bytes of the file.
	277	* @param fname The file name or URL (http, https, and ftp are supported)
	278	* @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
	279	* @return 1 if the file appears to be bigWig, otherwise 0.
	280	*/
	281	int bwIsBigWig(char fname, CURLcode (callBack)(CURL*));
	282
	283	/*!
	284	* @brief Determine is a file is a bigBed file.
	285	* This function will quickly check either local or remote files to determine if they appear to be valid bigWig files. This can be determined by reading the first 4 bytes of the file.
	286	* @param fname The file name or URL (http, https, and ftp are supported)
	287	* @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
	288	* @return 1 if the file appears to be bigWig, otherwise 0.
	289	*/
	290	int bbIsBigBed(char fname, CURLcode (callBack)(CURL*));
	291
	292	/*!
	293	* @brief Opens a local or remote bigWig file.
	294	* This will open a local or remote bigWig file. Writing of local bigWig files is also supported.
	295	* @param fname The file name or URL (http, https, and ftp are supported)
	296	* @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
	297	* @param mode The mode, by default "r". Both local and remote files can be read, but only local files can be written. For files being written the callback function is ignored. If and only if the mode contains "w" will the file be opened for writing (in all other cases the file will be opened for reading.
	298	* @return A bigWigFile_t * on success and NULL on error.
	299	*/
	300	bigWigFile_t bwOpen(char fname, CURLcode (callBack)(CURL), const char* mode);
	301
	302	/*!
	303	* @brief Opens a local or remote bigBed file.
	304	* This will open a local or remote bigBed file. Note that this file format can only be read and NOT written!
	305	* @param fname The file name or URL (http, https, and ftp are supported)
	306	* @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
	307	* @return A bigWigFile_t * on success and NULL on error.
	308	*/
	309	bigWigFile_t bbOpen(char fname, CURLcode (callBack)(CURL));
	310
	311	/*!
	312	* @brief Returns a string containing the SQL entry (or NULL).
	313	* The "auto SQL" field contains the names and value types of the entries in
	314	* each bigBed entry. If you need to parse a particular value out of each entry,
	315	* then you'll need to first parse this.
	316	* @param fp The file pointer to a valid bigWigFile_t
	317	* @return A char *, which you MUST free!
	318	*/
	319	char bbGetSQL(bigWigFile_t fp);
	320
	321	/*!
	322	* @brief Closes a bigWigFile_t and frees up allocated memory
	323	* This closes both bigWig and bigBed files.
	324	* @param fp The file pointer.
	325	*/
	326	void bwClose(bigWigFile_t *fp);
	327
	328	/*******************************************************************************
	329	*
	330	* The following are in bwStats.c
	331	*
	332	*******************************************************************************/
	333
	334	/*!
	335	* @brief Converts between chromosome name and ID
	336	*
	337	* @param fp A valid bigWigFile_t pointer
	338	* @param chrom A chromosome name
	339	* @return An ID, -1 will be returned on error (note that this is an unsigned value, so that's ~4 billion. bigWig/bigBed files can't store that many chromosomes anyway.
	340	*/
	341	uint32_t bwGetTid(bigWigFile_t fp, char chrom);
	342
	343	/*!
	344	* @brief Frees space allocated by `bwGetOverlappingIntervals`
	345	* @param o A valid `bwOverlappingIntervals_t` pointer.
	346	* @see bwGetOverlappingIntervals
	347	*/
	348	void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o);
	349
	350	/*!
	351	* @brief Frees space allocated by `bbGetOverlappingEntries`
	352	* @param o A valid `bbOverlappingEntries_t` pointer.
	353	* @see bbGetOverlappingEntries
	354	*/
	355	void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o);
	356
	357	/*!
	358	* @brief Return bigWig entries overlapping an interval.
	359	* Find all bigWig entries overlapping a range and returns them, including their associated values.
	360	* @param fp A valid bigWigFile_t pointer. This MUST be for a bigWig file!
	361	* @param chrom A valid chromosome name.
	362	* @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
	363	* @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
	364	* @return NULL on error or no overlapping values, otherwise a `bwOverlappingIntervals_t *` holding the values and intervals.
	365	* @see bwOverlappingIntervals_t
	366	* @see bwDestroyOverlappingIntervals
	367	* @see bwGetValues
	368	*/
	369	bwOverlappingIntervals_t bwGetOverlappingIntervals(bigWigFile_t fp, char *chrom, uint32_t start, uint32_t end);
	370
	371	/*!
	372	* @brief Return bigBed entries overlapping an interval.
	373	* Find all bigBed entries overlapping a range and returns them.
	374	* @param fp A valid bigWigFile_t pointer. This MUST be for a bigBed file!
	375	* @param chrom A valid chromosome name.
	376	* @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
	377	* @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
	378	* @param withString If not 0, return the string associated with each entry in the output. If 0, there are no associated strings returned. This is useful if the only information needed are the locations of the entries, which require significantly less memory.
	379	* @return NULL on error or no overlapping values, otherwise a `bbOverlappingEntries_t *` holding the intervals and (optionally) the associated string.
	380	* @see bbOverlappingEntries_t
	381	* @see bbDestroyOverlappingEntries
	382	*/
	383	bbOverlappingEntries_t bbGetOverlappingEntries(bigWigFile_t fp, char *chrom, uint32_t start, uint32_t end, int withString);
	384
	385	/*!
	386	* @brief Creates an iterator over intervals in a bigWig file
	387	* Iterators can be traversed with `bwIteratorNext()` and destroyed with `bwIteratorDestroy()`.
	388	* Intervals are in the `intervals` member and `data` can be used to determine when to end iteration.
	389	* @param fp A valid bigWigFile_t pointer. This MUST be for a bigWig file!
	390	* @param chrom A valid chromosome name.
	391	* @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
	392	* @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
	393	* @param blocksPerIteration The number of blocks (internal groupings of intervals in bigWig files) to return per iteration.
	394	* @return NULL on error, otherwise a bwOverlapIterator_t pointer
	395	* @see bwOverlapIterator_t
	396	* @see bwIteratorNext
	397	* @see bwIteratorDestroy
	398	*/
	399	bwOverlapIterator_t bwOverlappingIntervalsIterator(bigWigFile_t fp, char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration);
	400
	401	/*!
	402	* @brief Creates an iterator over entries in a bigBed file
	403	* Iterators can be traversed with `bwIteratorNext()` and destroyed with `bwIteratorDestroy()`.
	404	* Entries are in the `entries` member and `data` can be used to determine when to end iteration.
	405	* @param fp A valid bigWigFile_t pointer. This MUST be for a bigBed file!
	406	* @param chrom A valid chromosome name.
	407	* @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
	408	* @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
	409	* @param withString Whether the returned entries should include their associated strings.
	410	* @param blocksPerIteration The number of blocks (internal groupings of entries in bigBed files) to return per iteration.
	411	* @return NULL on error, otherwise a bwOverlapIterator_t pointer
	412	* @see bbGetOverlappingEntries
	413	* @see bwOverlapIterator_t
	414	* @see bwIteratorNext
	415	* @see bwIteratorDestroy
	416	*/
	417	bwOverlapIterator_t bbOverlappingEntriesIterator(bigWigFile_t fp, char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration);
	418
	419	/*!
	420	* @brief Traverses to the entries/intervals in the next group of blocks.
	421	* @param iter A bwOverlapIterator_t pointer that is updated (or destroyed on error)
	422	* @return NULL on error, otherwise a bwOverlapIterator_t pointer with the intervals or entries from the next set of blocks.
	423	* @see bwOverlapIterator_t
	424	* @see bwIteratorDestroy
	425	*/
	426	bwOverlapIterator_t bwIteratorNext(bwOverlapIterator_t iter);
	427
	428	/*!
	429	* @brief Destroys a bwOverlapIterator_t
	430	* @param iter The bwOverlapIterator_t that should be destroyed
	431	*/
	432	void bwIteratorDestroy(bwOverlapIterator_t *iter);
	433
	434	/*!
	435	* @brief Return all per-base bigWig values in a given interval.
	436	* Given an interval (e.g., chr1:0-100), return the value at each position in a bigWig file. Positions without associated values are suppressed by default, but may be returned if `includeNA` is not 0.
	437	* @param fp A valid bigWigFile_t pointer.
	438	* @param chrom A valid chromosome name.
	439	* @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
	440	* @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
	441	* @param includeNA If not 0, report NA values as well (as NA).
	442	* @return NULL on error or no overlapping values, otherwise a `bwOverlappingIntervals_t *` holding the values and positions.
	443	* @see bwOverlappingIntervals_t
	444	* @see bwDestroyOverlappingIntervals
	445	* @see bwGetOverlappingIntervals
	446	*/
	447	bwOverlappingIntervals_t bwGetValues(bigWigFile_t fp, char *chrom, uint32_t start, uint32_t end, int includeNA);
	448
	449	/*!
	450	* @brief Determines per-interval bigWig statistics
	451	* Can determine mean/min/max/coverage/standard deviation of values in one or more intervals in a bigWig file. You can optionally give it an interval and ask for values from X number of sub-intervals.
	452	* @param fp The file from which to extract statistics.
	453	* @param chrom A valid chromosome name.
	454	* @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
	455	* @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
	456	* @param nBins The number of bins within the interval to calculate statistics for.
	457	* @param type The type of statistic.
	458	* @see bwStatsType
	459	* @return A pointer to an array of double precission floating point values. Note that bigWig files only hold 32-bit values, so this is done to help prevent overflows.
	460	*/
	461	double bwStats(bigWigFile_t fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type);
	462
	463	/*!
	464	* @brief Determines per-interval bigWig statistics
	465	* Can determine mean/min/max/coverage/standard deviation of values in one or more intervals in a bigWig file. You can optionally give it an interval and ask for values from X number of sub-intervals. The difference with bwStats is that zoom levels are never used.
	466	* @param fp The file from which to extract statistics.
	467	* @param chrom A valid chromosome name.
	468	* @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
	469	* @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
	470	* @param nBins The number of bins within the interval to calculate statistics for.
	471	* @param type The type of statistic.
	472	* @see bwStatsType
	473	* @return A pointer to an array of double precission floating point values. Note that bigWig files only hold 32-bit values, so this is done to help prevent overflows.
	474	*/
	475	double bwStatsFromFull(bigWigFile_t fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type);
	476
	477	//Writer functions
	478
	479	/*!
	480	* @brief Create a largely empty bigWig header
	481	* Every bigWig file has a header, this creates the template for one. It also takes care of space allocation in the output write buffer.
	482	* @param fp The bigWigFile_t* that you want to write to.
	483	* @param maxZooms The maximum number of zoom levels. If you specify 0 then there will be no zoom levels. A value <0 or > 65535 will result in a maximum of 10.
	484	* @return 0 on success.
	485	*/
	486	int bwCreateHdr(bigWigFile_t *fp, int32_t maxZooms);
	487
	488	/*!
	489	* @brief Take a list of chromosome names and lengths and return a pointer to a chromList_t
	490	* This MUST be run before `bwWriteHdr()`. Note that the input is NOT free()d!
	491	* @param chroms A list of chromosomes.
	492	* @param lengths The length of each chromosome.
	493	* @param n The number of chromosomes (thus, the length of `chroms` and `lengths`)
	494	* @return A pointer to a chromList_t or NULL on error.
	495	*/
	496	chromList_t bwCreateChromList(char chroms, uint32_t lengths, int64_t n);
	497
	498	/*!
	499	* @brief Write a the header to a bigWig file.
	500	* You must have already opened the output file, created a header and a chromosome list.
	501	* @param bw The output bigWigFile_t pointer.
	502	* @see bwCreateHdr
	503	* @see bwCreateChromList
	504	*/
	505	int bwWriteHdr(bigWigFile_t *bw);
	506
	507	/*!
	508	* @brief Write a new block of bedGraph-like intervals to a bigWig file
	509	* Adds entries of the form:
	510	* chromosome start end value
	511	* to the file. These will always be added in a new block, so you may have previously used a different storage type.
	512	*
	513	* In general it's more efficient to use the bwAppend* functions, but then you MUST know that the previously written block is of the same type. In other words, you can only use bwAppendIntervals() after bwAddIntervals() or a previous bwAppendIntervals().
	514	* @param fp The output file pointer.
	515	* @param chrom A list of chromosomes, of length `n`.
	516	* @param start A list of start positions of length`n`.
	517	* @param end A list of end positions of length`n`.
	518	* @param values A list of values of length`n`.
	519	* @param n The length of the aforementioned lists.
	520	* @return 0 on success and another value on error.
	521	* @see bwAppendIntervals
	522	*/
	523	int bwAddIntervals(bigWigFile_t fp, char chrom, uint32_t start, uint32_t end, float values, uint32_t n);
	524
	525	/*!
	526	* @brief Append bedGraph-like intervals to a previous block of bedGraph-like intervals in a bigWig file.
	527	* If you have previously used bwAddIntervals() then this will append additional entries into the previous block (or start a new one if needed).
	528	* @param fp The output file pointer.
	529	* @param start A list of start positions of length`n`.
	530	* @param end A list of end positions of length`n`.
	531	* @param values A list of values of length`n`.
	532	* @param n The length of the aforementioned lists.
	533	* @return 0 on success and another value on error.
	534	* @warning Do NOT use this after `bwAddIntervalSpanSteps()`, `bwAppendIntervalSpanSteps()`, `bwAddIntervalSpanSteps()`, or `bwAppendIntervalSpanSteps()`.
	535	* @see bwAddIntervals
	536	*/
	537	int bwAppendIntervals(bigWigFile_t fp, uint32_t start, uint32_t end, float values, uint32_t n);
	538
	539	/*!
	540	* @brief Add a new block of variable-step entries to a bigWig file
	541	* Adds entries for the form
	542	* chromosome start value
	543	* to the file. Each block of such entries has an associated "span", so each value describes the region chromosome:start-(start+span)
	544	*
	545	* This will always start a new block of values.
	546	* @param fp The output file pointer.
	547	* @param chrom A list of chromosomes, of length `n`.
	548	* @param start A list of start positions of length`n`.
	549	* @param span The span of each entry (the must all be the same).
	550	* @param values A list of values of length`n`.
	551	* @param n The length of the aforementioned lists.
	552	* @return 0 on success and another value on error.
	553	* @see bwAppendIntervalSpans
	554	*/
	555	int bwAddIntervalSpans(bigWigFile_t fp, char chrom, uint32_t start, uint32_t span, float values, uint32_t n);
	556
	557	/*!
	558	* @brief Append to a previous block of variable-step entries.
	559	* If you previously used `bwAddIntervalSpans()`, this will continue appending more values to the block(s) it created.
	560	* @param fp The output file pointer.
	561	* @param start A list of start positions of length`n`.
	562	* @param values A list of values of length`n`.
	563	* @param n The length of the aforementioned lists.
	564	* @return 0 on success and another value on error.
	565	* @warning Do NOT use this after `bwAddIntervals()`, `bwAppendIntervals()`, `bwAddIntervalSpanSteps()` or `bwAppendIntervalSpanSteps()`
	566	* @see bwAddIntervalSpans
	567	*/
	568	int bwAppendIntervalSpans(bigWigFile_t fp, uint32_t start, float *values, uint32_t n);
	569
	570	/*!
	571	* @brief Add a new block of fixed-step entries to a bigWig file
	572	* Adds entries for the form
	573	* value
	574	* to the file. Each block of such entries has an associated "span", "step", chromosome and start position. See the wiggle format for more details.
	575	*
	576	* This will always start a new block of values.
	577	* @param fp The output file pointer.
	578	* @param chrom The chromosome that the entries describe.
	579	* @param start The starting position of the block of entries.
	580	* @param span The span of each entry (i.e., the number of bases it describes).
	581	* @param step The step between entry start positions.
	582	* @param values A list of values of length`n`.
	583	* @param n The length of the aforementioned lists.
	584	* @return 0 on success and another value on error.
	585	* @see bwAddIntervalSpanSteps
	586	*/
	587	int bwAddIntervalSpanSteps(bigWigFile_t fp, char chrom, uint32_t start, uint32_t span, uint32_t step, float *values, uint32_t n);
	588
	589	/*!
	590	* @brief Append to a previous block of fixed-step entries.
	591	* If you previously used `bwAddIntervalSpanSteps()`, this will continue appending more values to the block(s) it created.
	592	* @param fp The output file pointer.
	593	* @param values A list of values of length`n`.
	594	* @param n The length of the aforementioned lists.
	595	* @return 0 on success and another value on error.
	596	* @warning Do NOT use this after `bwAddIntervals()`, `bwAppendIntervals()`, `bwAddIntervalSpans()` or `bwAppendIntervalSpans()`
	597	* @see bwAddIntervalSpanSteps
	598	*/
	599	int bwAppendIntervalSpanSteps(bigWigFile_t fp, float values, uint32_t n);
	600
	601	#ifdef __cplusplus
	602	}
	603	#endif
	604
	605	#endif // LIBBIGWIG_H

+110

-0

libBigWig/bigWigIO.h less more

	0	#ifndef LIBBIGWIG_IO_H
	1	#define LIBBIGWIG_IO_H
	2
	3	#ifndef NOCURL
	4	#include <curl/curl.h>
	5	#else
	6	#include <stdio.h>
	7	#ifndef CURLTYPE_DEFINED
	8	#define CURLTYPE_DEFINED
	9	typedef int CURLcode;
	10	typedef void CURL;
	11	#endif
	12	#define CURLE_OK 0
	13	#define CURLE_FAILED_INIT 1
	14	#endif
	15	/*! \file bigWigIO.h
	16	* These are (typically internal) IO functions, so there's generally no need for you to directly use them!
	17	*/
	18
	19	/*!
	20	* The size of the buffer used for remote files.
	21	*/
	22	extern size_t GLOBAL_DEFAULTBUFFERSIZE;
	23
	24	/*!
	25	* The enumerated values that indicate the connection type used to access a file.
	26	*/
	27	enum bigWigFile_type_enum {
	28	BWG_FILE = 0,
	29	BWG_HTTP = 1,
	30	BWG_HTTPS = 2,
	31	BWG_FTP = 3
	32	};
	33
	34	/*!
	35	* @brief This structure holds the file pointers and buffers needed for raw access to local and remote files.
	36	*/
	37	typedef struct {
	38	union {
	39	#ifndef NOCURL
	40	CURL curl; /<The CURL file pointer for remote files.*/
	41	#endif
	42	FILE fp; /<The FILE file pointer for local files.**/
	43	} x; /*<A union holding curl and fp./
	44	void memBuf; /<A void pointing to memory of size bufSize.*/
	45	size_t filePos; /*<Current position inside the file./
	46	size_t bufPos; /*<Curent position inside the buffer./
	47	size_t bufSize; /*<The size of the buffer./
	48	size_t bufLen; /*<The actual size of the buffer used./
	49	enum bigWigFile_type_enum type; /*<The connection type/
	50	int isCompressed; /*<1 if the file is compressed, otherwise 0/
	51	char fname; /<Only needed for remote connections. The original URL/filename requested, since we need to make multiple connections./
	52	} URL_t;
	53
	54	/*!
	55	* @brief Reads data into the given buffer.
	56	*
	57	* This function will store bufSize data into buf for both local and remote files. For remote files an internal buffer is used to store a (typically larger) segment of the remote file.
	58	*
	59	* @param URL A URL_t * pointing to a valid opened file or remote URL.
	60	* @param buf The buffer in memory that you would like filled. It must be able to hold bufSize bytes!
	61	* @param bufSize The number of bytes to transfer to buf.
	62	*
	63	* @return Returns the number of bytes stored in buf, which should be bufSize on success and something else on error.
	64	*
	65	* @warning Note that on error, URL for remote files is left in an unusable state. You can get around this by running urlSeek() to a position outside of the range held by the internal buffer.
	66	*/
	67	size_t urlRead(URL_t URL, void buf, size_t bufSize);
	68
	69	/*!
	70	* @brief Seeks to a given position in a local or remote file.
	71	*
	72	* For local files, this will set the file position indicator for the file pointer to the desired position. For remote files, it sets the position to start downloading data for the next urlRead(). Note that for remote files that running urlSeek() with a pos within the current buffer will simply modify the internal offset.
	73	*
	74	* @param URL A URL_t * pointing to a valid opened file or remote URL.
	75	* @param pos The position to seek to.
	76	*
	77	* @return CURLE_OK on success and a different CURLE_XXX on error. For local files, the error return value is always CURLE_FAILED_INIT
	78	*/
	79	CURLcode urlSeek(URL_t *URL, size_t pos);
	80
	81	/*!
	82	* @brief Open a local or remote file
	83	*
	84	* Opens a local or remote file. Currently, http, https, and ftp are the only supported protocols and the URL must then begin with "http://", "https://", or "ftp://" as appropriate.
	85	*
	86	* For remote files, an internal buffer is used to hold file contents, to avoid downloading entire files before starting. The size of this buffer and various variable related to connection timeout are set with bwInit().
	87	*
	88	* Note that you must run urlClose() on this when finished. However, you would typically just use bwOpen() rather than directly calling this function.
	89	*
	90	* @param fname The file name or URL to open.
	91	* @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information.
	92	* @param mode "r", "w" or NULL. If and only if the mode contains the character "w" will the file be opened for writing.
	93	*
	94	* @return A URL_t * or NULL on error.
	95	*/
	96	URL_t urlOpen(char fname, CURLcode (callBack)(CURL), const char* mode);
	97
	98	/*!
	99	* @brief Close a local/remote file
	100	*
	101	* This will perform the cleanup required on a URL_t*, releasing memory as needed.
	102	*
	103	* @param URL A URL_t * pointing to a valid opened file or remote URL.
	104	*
	105	* @warning URL will no longer point to a valid location in memory!
	106	*/
	107	void urlClose(URL_t *URL);
	108
	109	#endif // LIBBIGWIG_IO_H

+70

-0

libBigWig/bwCommon.h less more

	0	/*! \file bwCommon.h
	1	*
	2	* You have no reason to use these functions. They may change without warning because there's no reason for them to be used outside of libBigWig's internals.
	3	*
	4	* These are structures and functions from a variety of files that are used across files internally but don't need to be see by libBigWig users.
	5	*/
	6
	7	/*!
	8	* @brief Like fsetpos, but for local or remote bigWig files.
	9	* This will set the file position indicator to the specified point. For local files this literally is `fsetpos`, while for remote files it fills a memory buffer with data starting at the desired position.
	10	* @param fp A valid opened bigWigFile_t.
	11	* @param pos The position within the file to seek to.
	12	* @return 0 on success and -1 on error.
	13	*/
	14	int bwSetPos(bigWigFile_t *fp, size_t pos);
	15
	16	/*!
	17	* @brief A local/remote version of `fread`.
	18	* Reads data from either local or remote bigWig files.
	19	* @param data An allocated memory block big enough to hold the data.
	20	* @param sz The size of each member that should be copied.
	21	* @param nmemb The number of members to copy.
	22	* @param fp The bigWigFile_t * from which to copy the data.
	23	* @see bwSetPos
	24	* @return For nmemb==1, the size of the copied data. For nmemb>1, the number of members fully copied (this is equivalent to `fread`).
	25	*/
	26	size_t bwRead(void data, size_t sz, size_t nmemb, bigWigFile_t fp);
	27
	28	/*!
	29	* @brief Determine what the file position indicator say.
	30	* This is equivalent to `ftell` for local or remote files.
	31	* @param fp The file.
	32	* @return The position in the file.
	33	*/
	34	long bwTell(bigWigFile_t *fp);
	35
	36	/*!
	37	* @brief Reads a data index (either full data or a zoom level) from a bigWig file.
	38	* There is little reason for end users to use this function. This must be freed with `bwDestroyIndex`
	39	* @param fp A valid bigWigFile_t pointer
	40	* @param offset The file offset where the index begins
	41	* @return A bwRTree_t pointer or NULL on error.
	42	*/
	43	bwRTree_t bwReadIndex(bigWigFile_t fp, uint64_t offset);
	44
	45	/*!
	46	* @brief Destroy an bwRTreeNode_t and all of its children.
	47	* @param node The node to destroy.
	48	*/
	49	void bwDestroyIndexNode(bwRTreeNode_t *node);
	50
	51	/*!
	52	* @brief Frees space allocated by `bwReadIndex`
	53	* There is generally little reason to use this, since end users should typically not need to run `bwReadIndex` themselves.
	54	* @param idx A bwRTree_t pointer allocated by `bwReadIndex`.
	55	*/
	56	void bwDestroyIndex(bwRTree_t *idx);
	57
	58	/// @cond SKIP
	59	bwOverlapBlock_t walkRTreeNodes(bigWigFile_t bw, bwRTreeNode_t *root, uint32_t tid, uint32_t start, uint32_t end);
	60	void destroyBWOverlapBlock(bwOverlapBlock_t *b);
	61	/// @endcond
	62
	63	/*!
	64	* @brief Finishes what's needed to write a bigWigFile
	65	* Flushes the buffer, converts the index linked list to a tree, writes that to disk, handles zoom level stuff, writes magic at the end
	66	* @param fp A valid bigWigFile_t pointer
	67	* @return 0 on success
	68	*/
	69	int bwFinalize(bigWigFile_t *fp);

+427

-0

libBigWig/bwRead.c less more

	0	#include "bigWig.h"
	1	#include "bwCommon.h"
	2	#include <stdlib.h>
	3	#include <math.h>
	4	#include <string.h>
	5	#include <stdio.h>
	6
	7	static uint64_t readChromBlock(bigWigFile_t bw, chromList_t cl, uint32_t keySize);
	8
	9	//Return the position in the file
	10	long bwTell(bigWigFile_t *fp) {
	11	if(fp->URL->type == BWG_FILE) return ftell(fp->URL->x.fp);
	12	return (long) (fp->URL->filePos + fp->URL->bufPos);
	13	}
	14
	15	//Seek to a given position, always from the beginning of the file
	16	//Return 0 on success and -1 on error
	17	//To do, use the return code of urlSeek() in a more useful way.
	18	int bwSetPos(bigWigFile_t *fp, size_t pos) {
	19	CURLcode rv = urlSeek(fp->URL, pos);
	20	if(rv == CURLE_OK) return 0;
	21	return -1;
	22	}
	23
	24	//returns the number of full members read (nmemb on success, something less on error)
	25	size_t bwRead(void data, size_t sz, size_t nmemb, bigWigFile_t fp) {
	26	size_t i, rv;
	27	for(i=0; i<nmemb; i++) {
	28	rv = urlRead(fp->URL, data+i*sz, sz);
	29	if(rv != sz) return i;
	30	}
	31	return nmemb;
	32	}
	33
	34	//Initializes curl and sets global variables
	35	//Returns 0 on success and 1 on error
	36	//This should be called only once and bwCleanup() must be called when finished.
	37	int bwInit(size_t defaultBufSize) {
	38	//set the buffer size, number of iterations, sleep time between iterations, etc.
	39	GLOBAL_DEFAULTBUFFERSIZE = defaultBufSize;
	40
	41	//call curl_global_init()
	42	#ifndef NOCURL
	43	CURLcode rv;
	44	rv = curl_global_init(CURL_GLOBAL_ALL);
	45	if(rv != CURLE_OK) return 1;
	46	#endif
	47	return 0;
	48	}
	49
	50	//This should be called before quiting, to release memory acquired by curl
	51	void bwCleanup() {
	52	#ifndef NOCURL
	53	curl_global_cleanup();
	54	#endif
	55	}
	56
	57	static bwZoomHdr_t bwReadZoomHdrs(bigWigFile_t bw) {
	58	if(bw->isWrite) return NULL;
	59	uint16_t i;
	60	bwZoomHdr_t *zhdr = malloc(sizeof(bwZoomHdr_t));
	61	if(!zhdr) return NULL;
	62	uint32_t level = malloc(bw->hdr->nLevels sizeof(uint64_t));
	63	if(!level) {
	64	free(zhdr);
	65	return NULL;
	66	}
	67	uint32_t padding = 0;
	68	uint64_t dataOffset = malloc(sizeof(uint64_t) bw->hdr->nLevels);
	69	if(!dataOffset) {
	70	free(zhdr);
	71	free(level);
	72	return NULL;
	73	}
	74	uint64_t indexOffset = malloc(sizeof(uint64_t) bw->hdr->nLevels);
	75	if(!indexOffset) {
	76	free(zhdr);
	77	free(level);
	78	free(dataOffset);
	79	return NULL;
	80	}
	81
	82	for(i=0; i<bw->hdr->nLevels; i++) {
	83	if(bwRead((void*) &(level[i]), sizeof(uint32_t), 1, bw) != 1) goto error;
	84	if(bwRead((void*) &padding, sizeof(uint32_t), 1, bw) != 1) goto error;
	85	if(bwRead((void*) &(dataOffset[i]), sizeof(uint64_t), 1, bw) != 1) goto error;
	86	if(bwRead((void*) &(indexOffset[i]), sizeof(uint64_t), 1, bw) != 1) goto error;
	87	}
	88
	89	zhdr->level = level;
	90	zhdr->dataOffset = dataOffset;
	91	zhdr->indexOffset = indexOffset;
	92	zhdr->idx = calloc(bw->hdr->nLevels, sizeof(bwRTree_t*));
	93	if(!zhdr->idx) goto error;
	94
	95	return zhdr;
	96
	97	error:
	98	for(i=0; i<bw->hdr->nLevels; i++) {
	99	if(zhdr->idx[i]) bwDestroyIndex(zhdr->idx[i]);
	100	}
	101	free(zhdr);
	102	free(level);
	103	free(dataOffset);
	104	free(indexOffset);
	105	return NULL;
	106	}
	107
	108	static void bwHdrDestroy(bigWigHdr_t *hdr) {
	109	int i;
	110	if(hdr->zoomHdrs) {
	111	free(hdr->zoomHdrs->level);
	112	free(hdr->zoomHdrs->dataOffset);
	113	free(hdr->zoomHdrs->indexOffset);
	114	for(i=0; i<hdr->nLevels; i++) {
	115	if(hdr->zoomHdrs->idx[i]) bwDestroyIndex(hdr->zoomHdrs->idx[i]);
	116	}
	117	free(hdr->zoomHdrs->idx);
	118	free(hdr->zoomHdrs);
	119	}
	120	free(hdr);
	121	}
	122
	123	static void bwHdrRead(bigWigFile_t *bw) {
	124	uint32_t magic;
	125	if(bw->isWrite) return;
	126	bw->hdr = calloc(1, sizeof(bigWigHdr_t));
	127	if(!bw->hdr) return;
	128
	129	if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error; //0x0
	130	if(magic != BIGWIG_MAGIC && magic != BIGBED_MAGIC) goto error;
	131
	132	if(bwRead((void*) &(bw->hdr->version), sizeof(uint16_t), 1, bw) != 1) goto error; //0x4
	133	if(bwRead((void*) &(bw->hdr->nLevels), sizeof(uint16_t), 1, bw) != 1) goto error; //0x6
	134	if(bwRead((void*) &(bw->hdr->ctOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x8
	135	if(bwRead((void*) &(bw->hdr->dataOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x10
	136	if(bwRead((void*) &(bw->hdr->indexOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x18
	137	if(bwRead((void*) &(bw->hdr->fieldCount), sizeof(uint16_t), 1, bw) != 1) goto error; //0x20
	138	if(bwRead((void*) &(bw->hdr->definedFieldCount), sizeof(uint16_t), 1, bw) != 1) goto error; //0x22
	139	if(bwRead((void*) &(bw->hdr->sqlOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x24
	140	if(bwRead((void*) &(bw->hdr->summaryOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x2c
	141	if(bwRead((void*) &(bw->hdr->bufSize), sizeof(uint32_t), 1, bw) != 1) goto error; //0x34
	142	if(bwRead((void*) &(bw->hdr->extensionOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x38
	143
	144	//zoom headers
	145	if(bw->hdr->nLevels) {
	146	if(!(bw->hdr->zoomHdrs = bwReadZoomHdrs(bw))) goto error;
	147	}
	148
	149	//File summary information
	150	if(bw->hdr->summaryOffset) {
	151	if(urlSeek(bw->URL, bw->hdr->summaryOffset) != CURLE_OK) goto error;
	152	if(bwRead((void*) &(bw->hdr->nBasesCovered), sizeof(uint64_t), 1, bw) != 1) goto error;
	153	if(bwRead((void*) &(bw->hdr->minVal), sizeof(uint64_t), 1, bw) != 1) goto error;
	154	if(bwRead((void*) &(bw->hdr->maxVal), sizeof(uint64_t), 1, bw) != 1) goto error;
	155	if(bwRead((void*) &(bw->hdr->sumData), sizeof(uint64_t), 1, bw) != 1) goto error;
	156	if(bwRead((void*) &(bw->hdr->sumSquared), sizeof(uint64_t), 1, bw) != 1) goto error;
	157	}
	158
	159	//In case of uncompressed remote files, let the IO functions know to request larger chunks
	160	bw->URL->isCompressed = (bw->hdr->bufSize > 0)?1:0;
	161
	162	return;
	163
	164	error:
	165	bwHdrDestroy(bw->hdr);
	166	fprintf(stderr, "[bwHdrRead] There was an error while reading in the header!\n");
	167	bw->hdr = NULL;
	168	}
	169
	170	static void destroyChromList(chromList_t *cl) {
	171	uint32_t i;
	172	if(!cl) return;
	173	if(cl->nKeys && cl->chrom) {
	174	for(i=0; i<cl->nKeys; i++) {
	175	if(cl->chrom[i]) free(cl->chrom[i]);
	176	}
	177	}
	178	if(cl->chrom) free(cl->chrom);
	179	if(cl->len) free(cl->len);
	180	free(cl);
	181	}
	182
	183	static uint64_t readChromLeaf(bigWigFile_t bw, chromList_t cl, uint32_t valueSize) {
	184	uint16_t nVals, i;
	185	uint32_t idx;
	186	char *chrom = NULL;
	187
	188	if(bwRead((void*) &nVals, sizeof(uint16_t), 1, bw) != 1) return -1;
	189	chrom = calloc(valueSize+1, sizeof(char));
	190	if(!chrom) return -1;
	191
	192	for(i=0; i<nVals; i++) {
	193	if(bwRead((void*) chrom, sizeof(char), valueSize, bw) != valueSize) goto error;
	194	if(bwRead((void*) &idx, sizeof(uint32_t), 1, bw) != 1) goto error;
	195	if(bwRead((void*) &(cl->len[idx]), sizeof(uint32_t), 1, bw) != 1) goto error;
	196	cl->chrom[idx] = strdup(chrom);
	197	if(!(cl->chrom[idx])) goto error;
	198	}
	199
	200	free(chrom);
	201	return nVals;
	202
	203	error:
	204	free(chrom);
	205	return -1;
	206	}
	207
	208	static uint64_t readChromNonLeaf(bigWigFile_t bw, chromList_t cl, uint32_t keySize) {
	209	uint64_t offset , rv = 0, previous;
	210	uint16_t nVals, i;
	211
	212	if(bwRead((void*) &nVals, sizeof(uint16_t), 1, bw) != 1) return -1;
	213
	214	previous = bwTell(bw) + keySize;
	215	for(i=0; i<nVals; i++) {
	216	if(bwSetPos(bw, previous)) return -1;
	217	if(bwRead((void*) &offset, sizeof(uint64_t), 1, bw) != 1) return -1;
	218	if(bwSetPos(bw, offset)) return -1;
	219	rv += readChromBlock(bw, cl, keySize);
	220	previous += 8 + keySize;
	221	}
	222
	223	return rv;
	224	}
	225
	226	static uint64_t readChromBlock(bigWigFile_t bw, chromList_t cl, uint32_t keySize) {
	227	uint8_t isLeaf, padding;
	228
	229	if(bwRead((void*) &isLeaf, sizeof(uint8_t), 1, bw) != 1) return -1;
	230	if(bwRead((void*) &padding, sizeof(uint8_t), 1, bw) != 1) return -1;
	231
	232	if(isLeaf) {
	233	return readChromLeaf(bw, cl, keySize);
	234	} else { //I've never actually observed one of these, which is good since they're pointless
	235	return readChromNonLeaf(bw, cl, keySize);
	236	}
	237	}
	238
	239	static chromList_t bwReadChromList(bigWigFile_t bw) {
	240	chromList_t *cl = NULL;
	241	uint32_t magic, keySize, valueSize, itemsPerBlock;
	242	uint64_t rv, itemCount;
	243	if(bw->isWrite) return NULL;
	244	if(bwSetPos(bw, bw->hdr->ctOffset)) return NULL;
	245
	246	cl = calloc(1, sizeof(chromList_t));
	247	if(!cl) return NULL;
	248
	249	if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
	250	if(magic != CIRTREE_MAGIC) goto error;
	251
	252	if(bwRead((void*) &itemsPerBlock, sizeof(uint32_t), 1, bw) != 1) goto error;
	253	if(bwRead((void*) &keySize, sizeof(uint32_t), 1, bw) != 1) goto error;
	254	if(bwRead((void*) &valueSize, sizeof(uint32_t), 1, bw) != 1) goto error;
	255	if(bwRead((void*) &itemCount, sizeof(uint64_t), 1, bw) != 1) goto error;
	256
	257	cl->nKeys = itemCount;
	258	cl->chrom = calloc(itemCount, sizeof(char*));
	259	cl->len = calloc(itemCount, sizeof(uint32_t));
	260	if(!cl->chrom) goto error;
	261	if(!cl->len) goto error;
	262
	263	if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
	264	if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
	265
	266	//Read in the blocks
	267	rv = readChromBlock(bw, cl, keySize);
	268	if(rv == (uint64_t) -1) goto error;
	269	if(rv != itemCount) goto error;
	270
	271	return cl;
	272
	273	error:
	274	destroyChromList(cl);
	275	return NULL;
	276	}
	277
	278	//This is here mostly for convenience
	279	static void bwDestroyWriteBuffer(bwWriteBuffer_t *wb) {
	280	if(wb->p) free(wb->p);
	281	if(wb->compressP) free(wb->compressP);
	282	if(wb->firstZoomBuffer) free(wb->firstZoomBuffer);
	283	if(wb->lastZoomBuffer) free(wb->lastZoomBuffer);
	284	if(wb->nNodes) free(wb->nNodes);
	285	free(wb);
	286	}
	287
	288	void bwClose(bigWigFile_t *fp) {
	289	if(!fp) return;
	290	if(bwFinalize(fp)) {
	291	fprintf(stderr, "[bwClose] There was an error while finishing writing a bigWig file! The output is likely truncated.\n");
	292	}
	293	if(fp->URL) urlClose(fp->URL);
	294	if(fp->hdr) bwHdrDestroy(fp->hdr);
	295	if(fp->cl) destroyChromList(fp->cl);
	296	if(fp->idx) bwDestroyIndex(fp->idx);
	297	if(fp->writeBuffer) bwDestroyWriteBuffer(fp->writeBuffer);
	298	free(fp);
	299	}
	300
	301	int bwIsBigWig(char fname, CURLcode (callBack) (CURL*)) {
	302	uint32_t magic = 0;
	303	URL_t *URL = NULL;
	304
	305	URL = urlOpen(fname, *callBack, NULL);
	306
	307	if(!URL) return 0;
	308	if(urlRead(URL, (void*) &magic, sizeof(uint32_t)) != sizeof(uint32_t)) magic = 0;
	309	urlClose(URL);
	310	if(magic == BIGWIG_MAGIC) return 1;
	311	return 0;
	312	}
	313
	314	char bbGetSQL(bigWigFile_t bw) {
	315	char *o = NULL;
	316	uint64_t len;
	317	if(!bw->hdr->sqlOffset) return NULL;
	318	len = bw->hdr->summaryOffset - bw->hdr->sqlOffset; //This includes the NULL terminator
	319	o = malloc(sizeof(char) * len);
	320	if(!o) goto error;
	321	if(bwSetPos(bw, bw->hdr->sqlOffset)) goto error;
	322	if(bwRead((void*) o, len, 1, bw) != 1) goto error;
	323	return o;
	324
	325	error:
	326	if(o) free(o);
	327	printf("Got an error in bbGetSQL!\n");
	328	return NULL;
	329	}
	330
	331	int bbIsBigBed(char fname, CURLcode (callBack) (CURL*)) {
	332	uint32_t magic = 0;
	333	URL_t *URL = NULL;
	334
	335	URL = urlOpen(fname, *callBack, NULL);
	336
	337	if(!URL) return 0;
	338	if(urlRead(URL, (void*) &magic, sizeof(uint32_t)) != sizeof(uint32_t)) magic = 0;
	339	urlClose(URL);
	340	if(magic == BIGBED_MAGIC) return 1;
	341	return 0;
	342	}
	343
	344	bigWigFile_t bwOpen(char fname, CURLcode (callBack) (CURL), const char *mode) {
	345	bigWigFile_t *bwg = calloc(1, sizeof(bigWigFile_t));
	346	if(!bwg) {
	347	fprintf(stderr, "[bwOpen] Couldn't allocate space to create the output object!\n");
	348	return NULL;
	349	}
	350	if((!mode) \|\| (strchr(mode, 'w') == NULL)) {
	351	bwg->isWrite = 0;
	352	bwg->URL = urlOpen(fname, *callBack, NULL);
	353	if(!bwg->URL) {
	354	fprintf(stderr, "[bwOpen] urlOpen is NULL!\n");
	355	goto error;
	356	}
	357
	358	//Attempt to read in the fixed header
	359	bwHdrRead(bwg);
	360	if(!bwg->hdr) {
	361	fprintf(stderr, "[bwOpen] bwg->hdr is NULL!\n");
	362	goto error;
	363	}
	364
	365	//Read in the chromosome list
	366	bwg->cl = bwReadChromList(bwg);
	367	if(!bwg->cl) {
	368	fprintf(stderr, "[bwOpen] bwg->cl is NULL (%s)!\n", fname);
	369	goto error;
	370	}
	371
	372	//Read in the index
	373	if(bwg->hdr->indexOffset) {
	374	bwg->idx = bwReadIndex(bwg, 0);
	375	if(!bwg->idx) {
	376	fprintf(stderr, "[bwOpen] bwg->idx is NULL bwg->hdr->dataOffset 0x%"PRIx64"!\n", bwg->hdr->dataOffset);
	377	goto error;
	378	}
	379	}
	380	} else {
	381	bwg->isWrite = 1;
	382	bwg->URL = urlOpen(fname, NULL, "w+");
	383	if(!bwg->URL) goto error;
	384	bwg->writeBuffer = calloc(1,sizeof(bwWriteBuffer_t));
	385	if(!bwg->writeBuffer) goto error;
	386	bwg->writeBuffer->l = 24;
	387	}
	388
	389	return bwg;
	390
	391	error:
	392	bwClose(bwg);
	393	return NULL;
	394	}
	395
	396	bigWigFile_t bbOpen(char fname, CURLcode (callBack) (CURL)) {
	397	bigWigFile_t *bb = calloc(1, sizeof(bigWigFile_t));
	398	if(!bb) {
	399	fprintf(stderr, "[bbOpen] Couldn't allocate space to create the output object!\n");
	400	return NULL;
	401	}
	402
	403	//Set the type to 1 for bigBed
	404	bb->type = 1;
	405
	406	bb->URL = urlOpen(fname, *callBack, NULL);
	407	if(!bb->URL) goto error;
	408
	409	//Attempt to read in the fixed header
	410	bwHdrRead(bb);
	411	if(!bb->hdr) goto error;
	412
	413	//Read in the chromosome list
	414	bb->cl = bwReadChromList(bb);
	415	if(!bb->cl) goto error;
	416
	417	//Read in the index
	418	bb->idx = bwReadIndex(bb, 0);
	419	if(!bb->idx) goto error;
	420
	421	return bb;
	422
	423	error:
	424	bwClose(bb);
	425	return NULL;
	426	}

+537

-0

libBigWig/bwStats.c less more

	0	#include "bigWig.h"
	1	#include "bwCommon.h"
	2	#include <errno.h>
	3	#include <stdlib.h>
	4	#include <zlib.h>
	5	#include <math.h>
	6	#include <string.h>
	7
	8	//Returns -1 if there are no applicable levels, otherwise an integer indicating the most appropriate level.
	9	//Like Kent's library, this divides the desired bin size by 2 to minimize the effect of blocks overlapping multiple bins
	10	static int32_t determineZoomLevel(bigWigFile_t *fp, int basesPerBin) {
	11	int32_t out = -1;
	12	int64_t diff;
	13	uint32_t bestDiff = -1;
	14	uint16_t i;
	15
	16	basesPerBin/=2;
	17	for(i=0; i<fp->hdr->nLevels; i++) {
	18	diff = basesPerBin - (int64_t) fp->hdr->zoomHdrs->level[i];
	19	if(diff >= 0 && diff < bestDiff) {
	20	bestDiff = diff;
	21	out = i;
	22	}
	23	}
	24	return out;
	25	}
	26
	27	/// @cond SKIP
	28	struct val_t {
	29	uint32_t nBases;
	30	float min, max, sum, sumsq;
	31	double scalar;
	32	};
	33
	34	struct vals_t {
	35	uint32_t n;
	36	struct val_t **vals;
	37	};
	38	/// @endcond
	39
	40	void destroyVals_t(struct vals_t *v) {
	41	uint32_t i;
	42	if(!v) return;
	43	for(i=0; i<v->n; i++) free(v->vals[i]);
	44	if(v->vals) free(v->vals);
	45	free(v);
	46	}
	47
	48	//Determine the base-pair overlap between an interval and a block
	49	double getScalar(uint32_t i_start, uint32_t i_end, uint32_t b_start, uint32_t b_end) {
	50	double rv = 0.0;
	51	if(b_start <= i_start) {
	52	if(b_end > i_start) rv = ((double)(b_end - i_start))/(b_end-b_start);
	53	} else if(b_start < i_end) {
	54	if(b_end < i_end) rv = ((double)(b_end - b_start))/(b_end-b_start);
	55	else rv = ((double)(i_end - b_start))/(b_end-b_start);
	56	}
	57
	58	return rv;
	59	}
	60
	61	//Returns NULL on error
	62	static struct vals_t getVals(bigWigFile_t fp, bwOverlapBlock_t *o, int i, uint32_t tid, uint32_t start, uint32_t end) {
	63	void buf = NULL, compBuf = NULL;
	64	uLongf sz = fp->hdr->bufSize;
	65	int compressed = 0, rv;
	66	uint32_t *p, vtid, vstart, vend;
	67	struct vals_t *vals = NULL;
	68	struct val_t *v = NULL;
	69
	70	if(sz) {
	71	compressed = 1;
	72	buf = malloc(sz);
	73	}
	74	sz = 0; //This is now the size of the compressed buffer
	75
	76	if(bwSetPos(fp, o->offset[i])) goto error;
	77
	78	vals = calloc(1,sizeof(struct vals_t));
	79	if(!vals) goto error;
	80
	81	v = malloc(sizeof(struct val_t));
	82	if(!v) goto error;
	83
	84	if(sz < o->size[i]) compBuf = malloc(o->size[i]);
	85	if(!compBuf) goto error;
	86
	87	if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
	88	if(compressed) {
	89	sz = fp->hdr->bufSize;
	90	rv = uncompress(buf, &sz, compBuf, o->size[i]);
	91	if(rv != Z_OK) goto error;
	92	} else {
	93	buf = compBuf;
	94	sz = o->size[i];
	95	}
	96
	97	p = buf;
	98	while(((uLongf) ((void*)p-buf)) < sz) {
	99	vtid = p[0];
	100	vstart = p[1];
	101	vend = p[2];
	102	v->nBases = p[3];
	103	v->min = ((float*) p)[4];
	104	v->max = ((float*) p)[5];
	105	v->sum = ((float*) p)[6];
	106	v->sumsq = ((float*) p)[7];
	107	v->scalar = getScalar(start, end, vstart, vend);
	108
	109	if(tid == vtid) {
	110	if((start <= vstart && end > vstart) \|\| (start < vend && start >= vstart)) {
	111	vals->vals = realloc(vals->vals, sizeof(struct val_t)(vals->n+1));
	112	if(!vals->vals) goto error;
	113	vals->vals[vals->n++] = v;
	114	v = malloc(sizeof(struct val_t));
	115	if(!v) goto error;
	116	}
	117	if(vstart > end) break;
	118	} else if(vtid > tid) {
	119	break;
	120	}
	121	p+=8;
	122	}
	123
	124	free(v);
	125	free(buf);
	126	if(compressed) free(compBuf);
	127	return vals;
	128
	129	error:
	130	if(buf) free(buf);
	131	if(compBuf && compressed) free(compBuf);
	132	if(v) free(v);
	133	destroyVals_t(vals);
	134	return NULL;
	135	}
	136
	137	//On error, errno is set to ENOMEM and NaN is returned (though NaN can be returned normally)
	138	static double blockMean(bigWigFile_t fp, bwOverlapBlock_t blocks, uint32_t tid, uint32_t start, uint32_t end) {
	139	uint32_t i, j;
	140	double output = 0.0, coverage = 0.0;
	141	struct vals_t *v = NULL;
	142
	143	if(!blocks->n) return strtod("NaN", NULL);
	144
	145	//Iterate over the blocks
	146	for(i=0; i<blocks->n; i++) {
	147	v = getVals(fp, blocks, i, tid, start, end);
	148	if(!v) goto error;
	149	for(j=0; j<v->n; j++) {
	150	output += v->vals[j]->sum * v->vals[j]->scalar;
	151	coverage += v->vals[j]->nBases * v->vals[j]->scalar;
	152	}
	153	destroyVals_t(v);
	154	}
	155
	156
	157	if(!coverage) return strtod("NaN", NULL);
	158
	159	return output/coverage;
	160
	161	error:
	162	if(v) free(v);
	163	errno = ENOMEM;
	164	return strtod("NaN", NULL);
	165	}
	166
	167	static double intMean(bwOverlappingIntervals_t* ints, uint32_t start, uint32_t end) {
	168	double sum = 0.0;
	169	uint32_t nBases = 0, i, start_use, end_use;
	170
	171	if(!ints->l) return strtod("NaN", NULL);
	172
	173	for(i=0; i<ints->l; i++) {
	174	start_use = ints->start[i];
	175	end_use = ints->end[i];
	176	if(ints->start[i] < start) start_use = start;
	177	if(ints->end[i] > end) end_use = end;
	178	nBases += end_use-start_use;
	179	sum += (end_use-start_use)*((double) ints->value[i]);
	180	}
	181
	182	return sum/nBases;
	183	}
	184
	185	//Does UCSC compensate for partial block/range overlap?
	186	static double blockDev(bigWigFile_t fp, bwOverlapBlock_t blocks, uint32_t tid, uint32_t start, uint32_t end) {
	187	uint32_t i, j;
	188	double mean = 0.0, ssq = 0.0, coverage = 0.0, diff;
	189	struct vals_t *v = NULL;
	190
	191	if(!blocks->n) return strtod("NaN", NULL);
	192
	193	//Iterate over the blocks
	194	for(i=0; i<blocks->n; i++) {
	195	v = getVals(fp, blocks, i, tid, start, end);
	196	if(!v) goto error;
	197	for(j=0; j<v->n; j++) {
	198	coverage += v->vals[j]->nBases * v->vals[j]->scalar;
	199	mean += v->vals[j]->sum * v->vals[j]->scalar;
	200	ssq += v->vals[j]->sumsq * v->vals[j]->scalar;
	201	}
	202	destroyVals_t(v);
	203	v = NULL;
	204	}
	205
	206	if(coverage<=1.0) return strtod("NaN", NULL);
	207	diff = ssq-mean*mean/coverage;
	208	if(coverage > 1.0) diff /= coverage-1;
	209	if(fabs(diff) > 1e-8) { //Ignore floating point differences
	210	return sqrt(diff);
	211	} else {
	212	return 0.0;
	213	}
	214
	215	error:
	216	if(v) destroyVals_t(v);
	217	errno = ENOMEM;
	218	return strtod("NaN", NULL);
	219	}
	220
	221	//This uses compensated summation to account for finite precision math
	222	static double intDev(bwOverlappingIntervals_t* ints, uint32_t start, uint32_t end) {
	223	double v1 = 0.0, mean, rv;
	224	uint32_t nBases = 0, i, start_use, end_use;
	225
	226	if(!ints->l) return strtod("NaN", NULL);
	227	mean = intMean(ints, start, end);
	228
	229	for(i=0; i<ints->l; i++) {
	230	start_use = ints->start[i];
	231	end_use = ints->end[i];
	232	if(ints->start[i] < start) start_use = start;
	233	if(ints->end[i] > end) end_use = end;
	234	nBases += end_use-start_use;
	235	v1 += (end_use-start_use) * pow(ints->value[i]-mean, 2.0); //running sum of squared difference
	236	}
	237
	238	if(nBases>=2) rv = sqrt(v1/(nBases-1));
	239	else if(nBases==1) rv = sqrt(v1);
	240	else rv = strtod("NaN", NULL);
	241
	242	return rv;
	243	}
	244
	245	static double blockMax(bigWigFile_t fp, bwOverlapBlock_t blocks, uint32_t tid, uint32_t start, uint32_t end) {
	246	uint32_t i, j, isNA = 1;
	247	double o = strtod("NaN", NULL);
	248	struct vals_t *v = NULL;
	249
	250	if(!blocks->n) return o;
	251
	252	//Iterate the blocks
	253	for(i=0; i<blocks->n; i++) {
	254	v = getVals(fp, blocks, i, tid, start, end);
	255	if(!v) goto error;
	256	for(j=0; j<v->n; j++) {
	257	if(isNA) {
	258	o = v->vals[j]->max;
	259	isNA = 0;
	260	} else if(v->vals[j]->max > o) {
	261	o = v->vals[j]->max;
	262	}
	263	}
	264	destroyVals_t(v);
	265	}
	266
	267	return o;
	268
	269	error:
	270	destroyVals_t(v);
	271	errno = ENOMEM;
	272	return strtod("NaN", NULL);
	273	}
	274
	275	static double intMax(bwOverlappingIntervals_t* ints) {
	276	uint32_t i;
	277	double o;
	278
	279	if(ints->l < 1) return strtod("NaN", NULL);
	280
	281	o = ints->value[0];
	282	for(i=1; i<ints->l; i++) {
	283	if(ints->value[i] > o) o = ints->value[i];
	284	}
	285
	286	return o;
	287	}
	288
	289	static double blockMin(bigWigFile_t fp, bwOverlapBlock_t blocks, uint32_t tid, uint32_t start, uint32_t end) {
	290	uint32_t i, j, isNA = 1;
	291	double o = strtod("NaN", NULL);
	292	struct vals_t *v = NULL;
	293
	294	if(!blocks->n) return o;
	295
	296	//Iterate the blocks
	297	for(i=0; i<blocks->n; i++) {
	298	v = getVals(fp, blocks, i, tid, start, end);
	299	if(!v) goto error;
	300	for(j=0; j<v->n; j++) {
	301	if(isNA) {
	302	o = v->vals[j]->min;
	303	isNA = 0;
	304	} else if(v->vals[j]->min < o) o = v->vals[j]->min;
	305	}
	306	destroyVals_t(v);
	307	}
	308
	309	return o;
	310
	311	error:
	312	destroyVals_t(v);
	313	errno = ENOMEM;
	314	return strtod("NaN", NULL);
	315	}
	316
	317	static double intMin(bwOverlappingIntervals_t* ints) {
	318	uint32_t i;
	319	double o;
	320
	321	if(ints->l < 1) return strtod("NaN", NULL);
	322
	323	o = ints->value[0];
	324	for(i=1; i<ints->l; i++) {
	325	if(ints->value[i] < o) o = ints->value[i];
	326	}
	327
	328	return o;
	329	}
	330
	331	//Does UCSC compensate for only partial block/interval overlap?
	332	static double blockCoverage(bigWigFile_t fp, bwOverlapBlock_t blocks, uint32_t tid, uint32_t start, uint32_t end) {
	333	uint32_t i, j;
	334	double o = 0.0;
	335	struct vals_t *v = NULL;
	336
	337	if(!blocks->n) return strtod("NaN", NULL);
	338
	339	//Iterate over the blocks
	340	for(i=0; i<blocks->n; i++) {
	341	v = getVals(fp, blocks, i, tid, start, end);
	342	if(!v) goto error;
	343	for(j=0; j<v->n; j++) {
	344	o+= v->vals[j]->nBases * v->vals[j]->scalar;
	345	}
	346	destroyVals_t(v);
	347	}
	348
	349	if(o == 0.0) return strtod("NaN", NULL);
	350	return o;
	351
	352	error:
	353	destroyVals_t(v);
	354	errno = ENOMEM;
	355	return strtod("NaN", NULL);
	356	}
	357
	358	static double intCoverage(bwOverlappingIntervals_t* ints, uint32_t start, uint32_t end) {
	359	uint32_t i, start_use, end_use;
	360	double o = 0.0;
	361
	362	if(!ints->l) return strtod("NaN", NULL);
	363
	364	for(i=0; i<ints->l; i++) {
	365	start_use = ints->start[i];
	366	end_use = ints->end[i];
	367	if(start_use < start) start_use = start;
	368	if(end_use > end) end_use = end;
	369	o += end_use - start_use;
	370	}
	371
	372	return o/(end-start);
	373	}
	374
	375	static double blockSum(bigWigFile_t fp, bwOverlapBlock_t blocks, uint32_t tid, uint32_t start, uint32_t end) {
	376	uint32_t i, j, sizeUse;
	377	double o = 0.0;
	378	struct vals_t *v = NULL;
	379
	380	if(!blocks->n) return strtod("NaN", NULL);
	381
	382	//Iterate over the blocks
	383	for(i=0; i<blocks->n; i++) {
	384	v = getVals(fp, blocks, i, tid, start, end);
	385	if(!v) goto error;
	386	for(j=0; j<v->n; j++) {
	387	//Multiply the block average by min(bases covered, block overlap with interval)
	388	sizeUse = v->vals[j]->scalar;
	389	if(sizeUse > v->vals[j]->nBases) sizeUse = v->vals[j]->nBases;
	390	o+= (v->vals[j]->sum * sizeUse) / v->vals[j]->nBases;
	391	}
	392	destroyVals_t(v);
	393	}
	394
	395	if(o == 0.0) return strtod("NaN", NULL);
	396	return o;
	397
	398	error:
	399	destroyVals_t(v);
	400	errno = ENOMEM;
	401	return strtod("NaN", NULL);
	402	}
	403
	404	static double intSum(bwOverlappingIntervals_t* ints, uint32_t start, uint32_t end) {
	405	uint32_t i, start_use, end_use;
	406	double o = 0.0;
	407
	408	if(!ints->l) return strtod("NaN", NULL);
	409
	410	for(i=0; i<ints->l; i++) {
	411	start_use = ints->start[i];
	412	end_use = ints->end[i];
	413	if(start_use < start) start_use = start;
	414	if(end_use > end) end_use = end;
	415	o += (end_use - start_use) * ints->value[i];
	416	}
	417
	418	return o;
	419	}
	420
	421	//Returns NULL on error, otherwise a double* that needs to be free()d
	422	double bwStatsFromZoom(bigWigFile_t fp, int32_t level, uint32_t tid, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type) {
	423	bwOverlapBlock_t *blocks = NULL;
	424	double *output = NULL;
	425	uint32_t pos = start, i, end2;
	426
	427	if(!fp->hdr->zoomHdrs->idx[level]) {
	428	fp->hdr->zoomHdrs->idx[level] = bwReadIndex(fp, fp->hdr->zoomHdrs->indexOffset[level]);
	429	if(!fp->hdr->zoomHdrs->idx[level]) return NULL;
	430	}
	431	errno = 0; //Sometimes libCurls sets and then doesn't unset errno on errors
	432
	433	output = malloc(sizeof(double)*nBins);
	434	if(!output) return NULL;
	435
	436	for(i=0, pos=start; i<nBins; i++) {
	437	end2 = start + ((double)(end-start)*(i+1))/((int) nBins);
	438	blocks = walkRTreeNodes(fp, fp->hdr->zoomHdrs->idx[level]->root, tid, pos, end2);
	439	if(!blocks) goto error;
	440
	441	switch(type) {
	442	case 0:
	443	//mean
	444	output[i] = blockMean(fp, blocks, tid, pos, end2);
	445	break;
	446	case 1:
	447	//stdev
	448	output[i] = blockDev(fp, blocks, tid, pos, end2);
	449	break;
	450	case 2:
	451	//max
	452	output[i] = blockMax(fp, blocks, tid, pos, end2);
	453	break;
	454	case 3:
	455	//min
	456	output[i] = blockMin(fp, blocks, tid, pos, end2);
	457	break;
	458	case 4:
	459	//cov
	460	output[i] = blockCoverage(fp, blocks, tid, pos, end2)/(end2-pos);
	461	break;
	462	case 5:
	463	//sum
	464	output[i] = blockSum(fp, blocks, tid, pos, end2);
	465	break;
	466	default:
	467	goto error;
	468	break;
	469	}
	470	if(errno) goto error;
	471	destroyBWOverlapBlock(blocks);
	472	pos = end2;
	473	}
	474
	475	return output;
	476
	477	error:
	478	fprintf(stderr, "got an error in bwStatsFromZoom in the range %"PRIu32"-%"PRIu32": %s\n", pos, end2, strerror(errno));
	479	if(blocks) destroyBWOverlapBlock(blocks);
	480	if(output) free(output);
	481	return NULL;
	482	}
	483
	484	double bwStatsFromFull(bigWigFile_t fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type) {
	485	bwOverlappingIntervals_t *ints = NULL;
	486	double output = malloc(sizeof(double)nBins);
	487	uint32_t i, pos = start, end2;
	488	if(!output) return NULL;
	489
	490	for(i=0; i<nBins; i++) {
	491	end2 = start + ((double)(end-start)*(i+1))/((int) nBins);
	492	ints = bwGetOverlappingIntervals(fp, chrom, pos, end2);
	493
	494	if(!ints) {
	495	output[i] = strtod("NaN", NULL);
	496	continue;
	497	}
	498
	499	switch(type) {
	500	default :
	501	case 0:
	502	output[i] = intMean(ints, pos, end2);
	503	break;
	504	case 1:
	505	output[i] = intDev(ints, pos, end2);
	506	break;
	507	case 2:
	508	output[i] = intMax(ints);
	509	break;
	510	case 3:
	511	output[i] = intMin(ints);
	512	break;
	513	case 4:
	514	output[i] = intCoverage(ints, pos, end2);
	515	break;
	516	case 5:
	517	output[i] = intSum(ints, pos, end2);
	518	break;
	519	}
	520	bwDestroyOverlappingIntervals(ints);
	521	pos = end2;
	522	}
	523
	524	return output;
	525	}
	526
	527	//Returns a list of floats of length nBins that must be free()d
	528	//On error, NULL is returned
	529	double bwStats(bigWigFile_t fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type) {
	530	int32_t level = determineZoomLevel(fp, ((double)(end-start))/((int) nBins));
	531	uint32_t tid = bwGetTid(fp, chrom);
	532	if(tid == (uint32_t) -1) return NULL;
	533
	534	if(level == -1) return bwStatsFromFull(fp, chrom, start, end, nBins, type);
	535	return bwStatsFromZoom(fp, level, tid, start, end, nBins, type);
	536	}

+802

-0

libBigWig/bwValues.c less more

	0	#include "bigWig.h"
	1	#include "bwCommon.h"
	2	#include <stdlib.h>
	3	#include <string.h>
	4	#include <zlib.h>
	5	#include <errno.h>
	6
	7	static uint32_t roundup(uint32_t v) {
	8	v--;
	9	v \|= v >> 1;
	10	v \|= v >> 2;
	11	v \|= v >> 4;
	12	v \|= v >> 8;
	13	v \|= v >> 16;
	14	v++;
	15	return v;
	16	}
	17
	18	//Returns the root node on success and NULL on error
	19	static bwRTree_t readRTreeIdx(bigWigFile_t fp, uint64_t offset) {
	20	uint32_t magic;
	21	bwRTree_t *node;
	22
	23	if(!offset) {
	24	if(bwSetPos(fp, fp->hdr->indexOffset)) return NULL;
	25	} else {
	26	if(bwSetPos(fp, offset)) return NULL;
	27	}
	28
	29	if(bwRead(&magic, sizeof(uint32_t), 1, fp) != 1) return NULL;
	30	if(magic != IDX_MAGIC) {
	31	fprintf(stderr, "[readRTreeIdx] Mismatch in the magic number!\n");
	32	return NULL;
	33	}
	34
	35	node = calloc(1, sizeof(bwRTree_t));
	36	if(!node) return NULL;
	37
	38	if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
	39	if(bwRead(&(node->nItems), sizeof(uint64_t), 1, fp) != 1) goto error;
	40	if(bwRead(&(node->chrIdxStart), sizeof(uint32_t), 1, fp) != 1) goto error;
	41	if(bwRead(&(node->baseStart), sizeof(uint32_t), 1, fp) != 1) goto error;
	42	if(bwRead(&(node->chrIdxEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
	43	if(bwRead(&(node->baseEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
	44	if(bwRead(&(node->idxSize), sizeof(uint64_t), 1, fp) != 1) goto error;
	45	if(bwRead(&(node->nItemsPerSlot), sizeof(uint32_t), 1, fp) != 1) goto error;
	46	//Padding
	47	if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
	48	node->rootOffset = bwTell(fp);
	49
	50	//For remote files, libCurl sometimes sets errno to 115 and doesn't clear it
	51	errno = 0;
	52
	53	return node;
	54
	55	error:
	56	free(node);
	57	return NULL;
	58	}
	59
	60	//Returns a bwRTreeNode_t on success and NULL on an error
	61	//For the root node, set offset to 0
	62	static bwRTreeNode_t bwGetRTreeNode(bigWigFile_t fp, uint64_t offset) {
	63	bwRTreeNode_t *node = NULL;
	64	uint8_t padding;
	65	uint16_t i;
	66	if(offset) {
	67	if(bwSetPos(fp, offset)) return NULL;
	68	} else {
	69	//seek
	70	if(bwSetPos(fp, fp->idx->rootOffset)) return NULL;
	71	}
	72
	73	node = calloc(1, sizeof(bwRTreeNode_t));
	74	if(!node) return NULL;
	75
	76	if(bwRead(&(node->isLeaf), sizeof(uint8_t), 1, fp) != 1) goto error;
	77	if(bwRead(&padding, sizeof(uint8_t), 1, fp) != 1) goto error;
	78	if(bwRead(&(node->nChildren), sizeof(uint16_t), 1, fp) != 1) goto error;
	79
	80	node->chrIdxStart = malloc(sizeof(uint32_t)*(node->nChildren));
	81	if(!node->chrIdxStart) goto error;
	82	node->baseStart = malloc(sizeof(uint32_t)*(node->nChildren));
	83	if(!node->baseStart) goto error;
	84	node->chrIdxEnd = malloc(sizeof(uint32_t)*(node->nChildren));
	85	if(!node->chrIdxEnd) goto error;
	86	node->baseEnd = malloc(sizeof(uint32_t)*(node->nChildren));
	87	if(!node->baseEnd) goto error;
	88	node->dataOffset = malloc(sizeof(uint64_t)*(node->nChildren));
	89	if(!node->dataOffset) goto error;
	90	if(node->isLeaf) {
	91	node->x.size = malloc(node->nChildren * sizeof(uint64_t));
	92	if(!node->x.size) goto error;
	93	} else {
	94	node->x.child = calloc(node->nChildren, sizeof(struct bwRTreeNode_t *));
	95	if(!node->x.child) goto error;
	96	}
	97	for(i=0; i<node->nChildren; i++) {
	98	if(bwRead(&(node->chrIdxStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
	99	if(bwRead(&(node->baseStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
	100	if(bwRead(&(node->chrIdxEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
	101	if(bwRead(&(node->baseEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
	102	if(bwRead(&(node->dataOffset[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
	103	if(node->isLeaf) {
	104	if(bwRead(&(node->x.size[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
	105	}
	106	}
	107
	108	return node;
	109
	110	error:
	111	if(node->chrIdxStart) free(node->chrIdxStart);
	112	if(node->baseStart) free(node->baseStart);
	113	if(node->chrIdxEnd) free(node->chrIdxEnd);
	114	if(node->baseEnd) free(node->baseEnd);
	115	if(node->dataOffset) free(node->dataOffset);
	116	if(node->isLeaf && node->x.size) free(node->x.size);
	117	else if((!node->isLeaf) && node->x.child) free(node->x.child);
	118	free(node);
	119	return NULL;
	120	}
	121
	122	void destroyBWOverlapBlock(bwOverlapBlock_t *b) {
	123	if(!b) return;
	124	if(b->size) free(b->size);
	125	if(b->offset) free(b->offset);
	126	free(b);
	127	}
	128
	129	//Returns a bwOverlapBlock_t * object or NULL on error.
	130	static bwOverlapBlock_t overlapsLeaf(bwRTreeNode_t node, uint32_t tid, uint32_t start, uint32_t end) {
	131	uint16_t i, idx = 0;
	132	bwOverlapBlock_t *o = calloc(1, sizeof(bwOverlapBlock_t));
	133	if(!o) return NULL;
	134
	135	for(i=0; i<node->nChildren; i++) {
	136	if(tid < node->chrIdxStart[i]) break;
	137	if(tid > node->chrIdxEnd[i]) continue;
	138
	139	/*
	140	The individual blocks can theoretically span multiple contigs.
	141	So if we treat the first/last contig in the range as special
	142	but anything in the middle is a guaranteed match
	143	*/
	144	if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
	145	if(tid == node->chrIdxStart[i]) {
	146	if(node->baseStart[i] >= end) break;
	147	} else if(tid == node->chrIdxEnd[i]) {
	148	if(node->baseEnd[i] <= start) continue;
	149	}
	150	} else {
	151	if(node->baseStart[i] >= end \|\| node->baseEnd[i] <= start) continue;
	152	}
	153	o->n++;
	154	}
	155
	156	if(o->n) {
	157	o->offset = malloc(sizeof(uint64_t) * (o->n));
	158	if(!o->offset) goto error;
	159	o->size = malloc(sizeof(uint64_t) * (o->n));
	160	if(!o->size) goto error;
	161
	162	for(i=0; i<node->nChildren; i++) {
	163	if(tid < node->chrIdxStart[i]) break;
	164	if(tid < node->chrIdxStart[i] \|\| tid > node->chrIdxEnd[i]) continue;
	165	if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
	166	if(tid == node->chrIdxStart[i]) {
	167	if(node->baseStart[i] >= end) continue;
	168	} else if(tid == node->chrIdxEnd[i]) {
	169	if(node->baseEnd[i] <= start) continue;
	170	}
	171	} else {
	172	if(node->baseStart[i] >= end \|\| node->baseEnd[i] <= start) continue;
	173	}
	174	o->offset[idx] = node->dataOffset[i];
	175	o->size[idx++] = node->x.size[i];
	176	if(idx >= o->n) break;
	177	}
	178	}
	179
	180	if(idx != o->n) { //This should never happen
	181	fprintf(stderr, "[overlapsLeaf] Mismatch between number of overlaps calculated and found!\n");
	182	goto error;
	183	}
	184
	185	return o;
	186
	187	error:
	188	if(o) destroyBWOverlapBlock(o);
	189	return NULL;
	190	}
	191
	192	//This will free l2 unless there's an error!
	193	//Returns NULL on error, otherwise the merged lists
	194	static bwOverlapBlock_t mergeOverlapBlocks(bwOverlapBlock_t b1, bwOverlapBlock_t *b2) {
	195	uint64_t i,j;
	196	if(!b2) return b1;
	197	if(!b2->n) {
	198	destroyBWOverlapBlock(b2);
	199	return b1;
	200	}
	201	if(!b1->n) {
	202	destroyBWOverlapBlock(b1);
	203	return b2;
	204	}
	205	j = b1->n;
	206	b1->n += b2->n;
	207	b1->offset = realloc(b1->offset, sizeof(uint64_t) * (b1->n+b2->n));
	208	if(!b1->offset) goto error;
	209	b1->size = realloc(b1->size, sizeof(uint64_t) * (b1->n+b2->n));
	210	if(!b1->size) goto error;
	211
	212	for(i=0; i<b2->n; i++) {
	213	b1->offset[j+i] = b2->offset[i];
	214	b1->size[j+i] = b2->size[i];
	215	}
	216	destroyBWOverlapBlock(b2);
	217	return b1;
	218
	219	error:
	220	destroyBWOverlapBlock(b1);
	221	return NULL;
	222	}
	223
	224	//Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
	225	//The output needs to be free()d if not NULL (likewise with *sizes)
	226	static bwOverlapBlock_t overlapsNonLeaf(bigWigFile_t fp, bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
	227	uint16_t i;
	228	bwOverlapBlock_t nodeBlocks, output = calloc(1, sizeof(bwOverlapBlock_t));
	229	if(!output) return NULL;
	230
	231	for(i=0; i<node->nChildren; i++) {
	232	if(tid < node->chrIdxStart[i]) break;
	233	if(tid < node->chrIdxStart[i] \|\| tid > node->chrIdxEnd[i]) continue;
	234	if(node->chrIdxStart[i] != node->chrIdxEnd[i]) { //child spans contigs
	235	if(tid == node->chrIdxStart[i]) {
	236	if(node->baseStart[i] >= end) continue;
	237	} else if(tid == node->chrIdxEnd[i]) {
	238	if(node->baseEnd[i] <= start) continue;
	239	}
	240	} else {
	241	if(end <= node->baseStart[i] \|\| start >= node->baseEnd[i]) continue;
	242	}
	243
	244	//We have an overlap!
	245	if(!node->x.child[i])
	246	node->x.child[i] = bwGetRTreeNode(fp, node->dataOffset[i]);
	247	if(!node->x.child[i]) goto error;
	248
	249	if(node->x.child[i]->isLeaf) { //leaf
	250	nodeBlocks = overlapsLeaf(node->x.child[i], tid, start, end);
	251	} else { //non-leaf
	252	nodeBlocks = overlapsNonLeaf(fp, node->x.child[i], tid, start, end);
	253	}
	254
	255	//The output is processed the same regardless of leaf/non-leaf
	256	if(!nodeBlocks) goto error;
	257	else {
	258	output = mergeOverlapBlocks(output, nodeBlocks);
	259	if(!output) {
	260	destroyBWOverlapBlock(nodeBlocks);
	261	goto error;
	262	}
	263	}
	264	}
	265
	266	return output;
	267
	268	error:
	269	destroyBWOverlapBlock(output);
	270	return NULL;
	271	}
	272
	273	//Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
	274	//The output must be free()d
	275	bwOverlapBlock_t walkRTreeNodes(bigWigFile_t bw, bwRTreeNode_t *root, uint32_t tid, uint32_t start, uint32_t end) {
	276	if(root->isLeaf) return overlapsLeaf(root, tid, start, end);
	277	return overlapsNonLeaf(bw, root, tid, start, end);
	278	}
	279
	280	//In reality, a hash or some sort of tree structure is probably faster...
	281	//Return -1 (AKA 0xFFFFFFFF...) on "not there", so we can hold (2^32)-1 items.
	282	uint32_t bwGetTid(bigWigFile_t fp, char chrom) {
	283	uint32_t i;
	284	if(!chrom) return -1;
	285	for(i=0; i<fp->cl->nKeys; i++) {
	286	if(strcmp(chrom, fp->cl->chrom[i]) == 0) return i;
	287	}
	288	return -1;
	289	}
	290
	291	static bwOverlapBlock_t bwGetOverlappingBlocks(bigWigFile_t fp, char *chrom, uint32_t start, uint32_t end) {
	292	uint32_t tid = bwGetTid(fp, chrom);
	293
	294	if(tid == (uint32_t) -1) {
	295	fprintf(stderr, "[bwGetOverlappingBlocks] Non-existent contig: %s\n", chrom);
	296	return NULL;
	297	}
	298
	299	//Get the info if needed
	300	if(!fp->idx) {
	301	fp->idx = readRTreeIdx(fp, fp->hdr->indexOffset);
	302	if(!fp->idx) {
	303	return NULL;
	304	}
	305	}
	306
	307	if(!fp->idx->root) fp->idx->root = bwGetRTreeNode(fp, 0);
	308	if(!fp->idx->root) return NULL;
	309
	310	return walkRTreeNodes(fp, fp->idx->root, tid, start, end);
	311	}
	312
	313	void bwFillDataHdr(bwDataHeader_t hdr, void b) {
	314	hdr->tid = ((uint32_t*)b)[0];
	315	hdr->start = ((uint32_t*)b)[1];
	316	hdr->end = ((uint32_t*)b)[2];
	317	hdr->step = ((uint32_t*)b)[3];
	318	hdr->span = ((uint32_t*)b)[4];
	319	hdr->type = ((uint8_t*)b)[20];
	320	hdr->nItems = ((uint16_t*)b)[11];
	321	}
	322
	323	void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o) {
	324	if(!o) return;
	325	if(o->start) free(o->start);
	326	if(o->end) free(o->end);
	327	if(o->value) free(o->value);
	328	free(o);
	329	}
	330
	331	void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o) {
	332	uint32_t i;
	333	if(!o) return;
	334	if(o->start) free(o->start);
	335	if(o->end) free(o->end);
	336	if(o->str) {
	337	for(i=0; i<o->l; i++) {
	338	if(o->str[i]) free(o->str[i]);
	339	}
	340	free(o->str);
	341	}
	342	free(o);
	343	}
	344
	345	//Returns NULL on error, in which case o has been free()d
	346	static bwOverlappingIntervals_t pushIntervals(bwOverlappingIntervals_t o, uint32_t start, uint32_t end, float value) {
	347	if(o->l+1 >= o->m) {
	348	o->m = roundup(o->l+1);
	349	o->start = realloc(o->start, o->m * sizeof(uint32_t));
	350	if(!o->start) goto error;
	351	o->end = realloc(o->end, o->m * sizeof(uint32_t));
	352	if(!o->end) goto error;
	353	o->value = realloc(o->value, o->m * sizeof(float));
	354	if(!o->value) goto error;
	355	}
	356	o->start[o->l] = start;
	357	o->end[o->l] = end;
	358	o->value[o->l++] = value;
	359	return o;
	360
	361	error:
	362	bwDestroyOverlappingIntervals(o);
	363	return NULL;
	364	}
	365
	366	static bbOverlappingEntries_t pushBBIntervals(bbOverlappingEntries_t o, uint32_t start, uint32_t end, char *str, int withString) {
	367	if(o->l+1 >= o->m) {
	368	o->m = roundup(o->l+1);
	369	o->start = realloc(o->start, o->m * sizeof(uint32_t));
	370	if(!o->start) goto error;
	371	o->end = realloc(o->end, o->m * sizeof(uint32_t));
	372	if(!o->end) goto error;
	373	if(withString) {
	374	o->str = realloc(o->str, o->m * sizeof(char**));
	375	if(!o->str) goto error;
	376	}
	377	}
	378	o->start[o->l] = start;
	379	o->end[o->l] = end;
	380	if(withString) o->str[o->l] = strdup(str);
	381	o->l++;
	382	return o;
	383
	384	error:
	385	bbDestroyOverlappingEntries(o);
	386	return NULL;
	387	}
	388
	389	//Returns NULL on error
	390	bwOverlappingIntervals_t bwGetOverlappingIntervalsCore(bigWigFile_t fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend) {
	391	uint64_t i;
	392	uint16_t j;
	393	int compressed = 0, rv;
	394	uLongf sz = fp->hdr->bufSize, tmp;
	395	void buf = NULL, compBuf = NULL;
	396	uint32_t start = 0, end , *p;
	397	float value;
	398	bwDataHeader_t hdr;
	399	bwOverlappingIntervals_t *output = calloc(1, sizeof(bwOverlappingIntervals_t));
	400
	401	if(!output) goto error;
	402
	403	if(!o) return output;
	404	if(!o->n) return output;
	405
	406	if(sz) {
	407	compressed = 1;
	408	buf = malloc(sz);
	409	}
	410	sz = 0; //This is now the size of the compressed buffer
	411
	412	for(i=0; i<o->n; i++) {
	413	if(bwSetPos(fp, o->offset[i])) goto error;
	414
	415	if(sz < o->size[i]) {
	416	compBuf = realloc(compBuf, o->size[i]);
	417	sz = o->size[i];
	418	}
	419	if(!compBuf) goto error;
	420
	421	if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
	422	if(compressed) {
	423	tmp = fp->hdr->bufSize; //This gets over-written by uncompress
	424	rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
	425	if(rv != Z_OK) goto error;
	426	} else {
	427	buf = compBuf;
	428	}
	429
	430	//TODO: ensure that tmp is large enough!
	431	bwFillDataHdr(&hdr, buf);
	432
	433	p = ((uint32_t*) buf);
	434	p += 6;
	435	if(hdr.tid != tid) continue;
	436
	437	if(hdr.type == 3) start = hdr.start - hdr.step;
	438
	439	//FIXME: We should ensure that sz is large enough to hold nItems of the given type
	440	for(j=0; j<hdr.nItems; j++) {
	441	switch(hdr.type) {
	442	case 1:
	443	start = *p;
	444	p++;
	445	end = *p;
	446	p++;
	447	value = ((float )p);
	448	p++;
	449	break;
	450	case 2:
	451	start = *p;
	452	p++;
	453	end = start + hdr.span;
	454	value = ((float )p);
	455	p++;
	456	break;
	457	case 3:
	458	start += hdr.step;
	459	end = start+hdr.span;
	460	value = ((float )p);
	461	p++;
	462	break;
	463	default :
	464	goto error;
	465	break;
	466	}
	467
	468	if(end <= ostart \|\| start >= oend) continue;
	469	//Push the overlap
	470	if(!pushIntervals(output, start, end, value)) goto error;
	471	}
	472	}
	473
	474	if(compressed && buf) free(buf);
	475	if(compBuf) free(compBuf);
	476	return output;
	477
	478	error:
	479	fprintf(stderr, "[bwGetOverlappingIntervalsCore] Got an error\n");
	480	if(output) bwDestroyOverlappingIntervals(output);
	481	if(compressed && buf) free(buf);
	482	if(compBuf) free(compBuf);
	483	return NULL;
	484	}
	485
	486	bbOverlappingEntries_t bbGetOverlappingEntriesCore(bigWigFile_t fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend, int withString) {
	487	uint64_t i;
	488	int compressed = 0, rv, slen;
	489	uLongf sz = fp->hdr->bufSize, tmp = 0;
	490	void buf = NULL, bufEnd = NULL, *compBuf = NULL;
	491	uint32_t entryTid = 0, start = 0, end;
	492	char *str;
	493	bbOverlappingEntries_t *output = calloc(1, sizeof(bbOverlappingEntries_t));
	494
	495	if(!output) goto error;
	496
	497	if(!o) return output;
	498	if(!o->n) return output;
	499
	500	if(sz) {
	501	compressed = 1;
	502	buf = malloc(sz);
	503	}
	504	sz = 0; //This is now the size of the compressed buffer
	505
	506	for(i=0; i<o->n; i++) {
	507	if(bwSetPos(fp, o->offset[i])) goto error;
	508
	509	if(sz < o->size[i]) {
	510	compBuf = realloc(compBuf, o->size[i]);
	511	sz = o->size[i];
	512	}
	513	if(!compBuf) goto error;
	514
	515	if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
	516	if(compressed) {
	517	tmp = fp->hdr->bufSize; //This gets over-written by uncompress
	518	rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
	519	if(rv != Z_OK) goto error;
	520	} else {
	521	buf = compBuf;
	522	tmp = o->size[i]; //TODO: Is this correct? Do non-gzipped bigBeds exist?
	523	}
	524
	525	bufEnd = buf + tmp;
	526	while(buf < bufEnd) {
	527	entryTid = ((uint32_t*)buf)[0];
	528	start = ((uint32_t*)buf)[1];
	529	end = ((uint32_t*)buf)[2];
	530	buf += 12;
	531	str = (char*)buf;
	532	slen = strlen(str) + 1;
	533	buf += slen;
	534
	535	if(entryTid < tid) continue;
	536	if(entryTid > tid) break;
	537	if(end <= ostart) continue;
	538	if(start >= oend) break;
	539
	540	//Push the overlap
	541	if(!pushBBIntervals(output, start, end, str, withString)) goto error;
	542	}
	543
	544	buf = bufEnd - tmp; //reset the buffer pointer
	545	}
	546
	547	if(compressed && buf) free(buf);
	548	if(compBuf) free(compBuf);
	549	return output;
	550
	551	error:
	552	fprintf(stderr, "[bbGetOverlappingEntriesCore] Got an error\n");
	553	buf = bufEnd - tmp;
	554	if(output) bbDestroyOverlappingEntries(output);
	555	if(compressed && buf) free(buf);
	556	if(compBuf) free(compBuf);
	557	return NULL;
	558	}
	559
	560	//Returns NULL on error OR no intervals, which is a bad design...
	561	bwOverlappingIntervals_t bwGetOverlappingIntervals(bigWigFile_t fp, char *chrom, uint32_t start, uint32_t end) {
	562	bwOverlappingIntervals_t *output;
	563	uint32_t tid = bwGetTid(fp, chrom);
	564	if(tid == (uint32_t) -1) return NULL;
	565	bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
	566	if(!blocks) return NULL;
	567	output = bwGetOverlappingIntervalsCore(fp, blocks, tid, start, end);
	568	destroyBWOverlapBlock(blocks);
	569	return output;
	570	}
	571
	572	//Like above, but for bigBed files
	573	bbOverlappingEntries_t bbGetOverlappingEntries(bigWigFile_t fp, char *chrom, uint32_t start, uint32_t end, int withString) {
	574	bbOverlappingEntries_t *output;
	575	uint32_t tid = bwGetTid(fp, chrom);
	576	if(tid == (uint32_t) -1) return NULL;
	577	bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
	578	if(!blocks) return NULL;
	579	output = bbGetOverlappingEntriesCore(fp, blocks, tid, start, end, withString);
	580	destroyBWOverlapBlock(blocks);
	581	return output;
	582	}
	583
	584	//Returns NULL on error
	585	bwOverlapIterator_t bwOverlappingIntervalsIterator(bigWigFile_t bw, char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration) {
	586	bwOverlapIterator_t *output = NULL;
	587	uint64_t n;
	588	uint32_t tid = bwGetTid(bw, chrom);
	589	if(tid == (uint32_t) -1) return output;
	590	output = calloc(1, sizeof(bwOverlapIterator_t));
	591	if(!output) return output;
	592	bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(bw, chrom, start, end);
	593
	594	output->bw = bw;
	595	output->tid = tid;
	596	output->start = start;
	597	output->end = end;
	598	output->blocks = blocks;
	599	output->blocksPerIteration = blocksPerIteration;
	600
	601	if(blocks) {
	602	n = blocks->n;
	603	if(n>blocksPerIteration) blocks->n = blocksPerIteration;
	604	output->intervals = bwGetOverlappingIntervalsCore(bw, blocks,tid, start, end);
	605	blocks->n = n;
	606	output->offset = blocksPerIteration;
	607	}
	608	output->data = output->intervals;
	609	return output;
	610	}
	611
	612	//Returns NULL on error
	613	bwOverlapIterator_t bbOverlappingEntriesIterator(bigWigFile_t bw, char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration) {
	614	bwOverlapIterator_t *output = NULL;
	615	uint64_t n;
	616	uint32_t tid = bwGetTid(bw, chrom);
	617	if(tid == (uint32_t) -1) return output;
	618	output = calloc(1, sizeof(bwOverlapIterator_t));
	619	if(!output) return output;
	620	bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(bw, chrom, start, end);
	621
	622	output->bw = bw;
	623	output->tid = tid;
	624	output->start = start;
	625	output->end = end;
	626	output->blocks = blocks;
	627	output->blocksPerIteration = blocksPerIteration;
	628	output->withString = withString;
	629
	630	if(blocks) {
	631	n = blocks->n;
	632	if(n>blocksPerIteration) blocks->n = blocksPerIteration;
	633	output->entries = bbGetOverlappingEntriesCore(bw, blocks,tid, start, end, withString);
	634	blocks->n = n;
	635	output->offset = blocksPerIteration;
	636	}
	637	output->data = output->entries;
	638	return output;
	639	}
	640
	641	void bwIteratorDestroy(bwOverlapIterator_t *iter) {
	642	if(!iter) return;
	643	if(iter->blocks) destroyBWOverlapBlock((bwOverlapBlock_t*) iter->blocks);
	644	if(iter->intervals) bwDestroyOverlappingIntervals(iter->intervals);
	645	if(iter->entries) bbDestroyOverlappingEntries(iter->entries);
	646	free(iter);
	647	}
	648
	649	//On error, points to NULL and destroys the input
	650	bwOverlapIterator_t bwIteratorNext(bwOverlapIterator_t iter) {
	651	uint64_t n, offset, size;
	652	bwOverlapBlock_t *blocks = iter->blocks;
	653
	654	if(iter->intervals) {
	655	bwDestroyOverlappingIntervals(iter->intervals);
	656	iter->intervals = NULL;
	657	}
	658	if(iter->entries) {
	659	bbDestroyOverlappingEntries(iter->entries);
	660	iter->entries = NULL;
	661	}
	662	iter->data = NULL;
	663
	664	if(iter->offset < blocks->n) {
	665	//store the previous values
	666	n = blocks->n;
	667	offset = blocks->offset;
	668	size = blocks->size;
	669
	670	//Move the start of the blocks
	671	blocks->offset += iter->offset;
	672	blocks->size += iter->offset;
	673	if(iter->offset + iter->blocksPerIteration > n) {
	674	blocks->n = blocks->n - iter->offset;
	675	} else {
	676	blocks->n = iter->blocksPerIteration;
	677	}
	678
	679	//Get the intervals or entries, as appropriate
	680	if(iter->bw->type == 0) {
	681	//bigWig
	682	iter->intervals = bwGetOverlappingIntervalsCore(iter->bw, blocks, iter->tid, iter->start, iter->end);
	683	iter->data = iter->intervals;
	684	} else {
	685	//bigBed
	686	iter->entries = bbGetOverlappingEntriesCore(iter->bw, blocks, iter->tid, iter->start, iter->end, iter->withString);
	687	iter->data = iter->entries;
	688	}
	689	iter->offset += iter->blocksPerIteration;
	690
	691	//reset the values in iter->blocks
	692	blocks->n = n;
	693	blocks->offset = offset;
	694	blocks->size = size;
	695
	696	//Check for error
	697	if(!iter->intervals && !iter->entries) goto error;
	698	}
	699
	700	return iter;
	701
	702	error:
	703	bwIteratorDestroy(iter);
	704	return NULL;
	705	}
	706
	707	//This is like bwGetOverlappingIntervals, except it returns 1 base windows. If includeNA is not 0, then a value will be returned for every position in the range (defaulting to NAN).
	708	//The ->end member is NULL
	709	//If includeNA is not 0 then ->start is also NULL, since it's implied
	710	//Note that bwDestroyOverlappingIntervals() will work in either case
	711	bwOverlappingIntervals_t bwGetValues(bigWigFile_t fp, char *chrom, uint32_t start, uint32_t end, int includeNA) {
	712	uint32_t i, j, n;
	713	bwOverlappingIntervals_t *output = NULL;
	714	bwOverlappingIntervals_t *intermediate = bwGetOverlappingIntervals(fp, chrom, start, end);
	715	if(!intermediate) return NULL;
	716
	717	output = calloc(1, sizeof(bwOverlappingIntervals_t));
	718	if(!output) goto error;
	719	if(includeNA) {
	720	output->l = end-start;
	721	output->value = malloc((end-start)*sizeof(float));
	722	if(!output->value) goto error;
	723	for(i=0; i<end-start; i++) output->value[i] = strtod("NaN", NULL);
	724	for(i=0; i<intermediate->l; i++) {
	725	for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
	726	if(j < start \|\| j >= end) continue;
	727	output->value[j-start] = intermediate->value[i];
	728	}
	729	}
	730	} else {
	731	n = 0;
	732	for(i=0; i<intermediate->l; i++) {
	733	if(intermediate->start[i] < start) intermediate->start[i] = start;
	734	if(intermediate->end[i] > end) intermediate->end[i] = end;
	735	n += intermediate->end[i]-intermediate->start[i];
	736	}
	737	output->l = n;
	738	output->start = malloc(sizeof(uint32_t)*n);
	739	if(!output->start) goto error;
	740	output->value = malloc(sizeof(float)*n);
	741	if(!output->value) goto error;
	742	n = 0; //this is now the index
	743	for(i=0; i<intermediate->l; i++) {
	744	for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
	745	if(j < start \|\| j >= end) continue;
	746	output->start[n] = j;
	747	output->value[n++] = intermediate->value[i];
	748	}
	749	}
	750	}
	751
	752	bwDestroyOverlappingIntervals(intermediate);
	753	return output;
	754
	755	error:
	756	if(intermediate) bwDestroyOverlappingIntervals(intermediate);
	757	if(output) bwDestroyOverlappingIntervals(output);
	758	return NULL;
	759	}
	760
	761	void bwDestroyIndexNode(bwRTreeNode_t *node) {
	762	uint16_t i;
	763
	764	if(!node) return;
	765
	766	free(node->chrIdxStart);
	767	free(node->baseStart);
	768	free(node->chrIdxEnd);
	769	free(node->baseEnd);
	770	free(node->dataOffset);
	771	if(!node->isLeaf) {
	772	for(i=0; i<node->nChildren; i++) {
	773	bwDestroyIndexNode(node->x.child[i]);
	774	}
	775	free(node->x.child);
	776	} else {
	777	free(node->x.size);
	778	}
	779	free(node);
	780	}
	781
	782	void bwDestroyIndex(bwRTree_t *idx) {
	783	bwDestroyIndexNode(idx->root);
	784	free(idx);
	785	}
	786
	787	//Returns a pointer to the requested index (@offset, unless it's 0, in which case the index for the values is returned
	788	//Returns NULL on error
	789	bwRTree_t bwReadIndex(bigWigFile_t fp, uint64_t offset) {
	790	bwRTree_t *idx = readRTreeIdx(fp, offset);
	791	if(!idx) return NULL;
	792
	793	//Read in the root node
	794	idx->root = bwGetRTreeNode(fp, idx->rootOffset);
	795
	796	if(!idx->root) {
	797	bwDestroyIndex(idx);
	798	return NULL;
	799	}
	800	return idx;
	801	}

+77

-0

libBigWig/bwValues.h less more

	0	#ifndef LIBBIGWIG_VALUES_H
	1	#define LIBBIGWIG_VALUES_H
	2
	3	#include <inttypes.h>
	4	/*! \file bwValues.h
	5	*
	6	* You should not directly use functions and structures defined here. They're really meant for internal use only.
	7	*
	8	* All of the structures here need to be destroyed or you'll leak memory! There are methods available to destroy anything that you need to take care of yourself.
	9	*/
	10
	11	//N.B., coordinates are still 0-based half open!
	12	/*!
	13	* @brief A node within an R-tree holding the index for data.
	14	*
	15	* Note that there are two types of nodes: leaf and twig. Leaf nodes point to where data actually is. Twig nodes point to additional index nodes, which may or may not be leaves. Each of these nodes has additional children, which may span multiple chromosomes/contigs.
	16	*
	17	* With the start/end position, these positions refer specifically to the chromosomes specified in chrIdxStart/chrIdxEnd. Any chromosomes between these are completely spanned by a given child.
	18	*/
	19	typedef struct bwRTreeNode_t {
	20	uint8_t isLeaf; /*<Is this node a leaf?/
	21	//1 byte of padding
	22	uint16_t nChildren; /*<The number of children of this node, all lists have this length./
	23	uint32_t chrIdxStart; /<A list of the starting chromosome indices of each child./
	24	uint32_t baseStart; /<A list of the start position of each child./
	25	uint32_t chrIdxEnd; /<A list of the end chromosome indices of each child./
	26	uint32_t baseEnd; /<A list of the end position of each child./
	27	uint64_t dataOffset; /<For leaves, the offset to the on-disk data. For twigs, the offset to the child node./
	28	union {
	29	uint64_t size; /<Leaves only: The size of the data block./
	30	struct bwRTreeNode_t child; /<Twigs only: The child node(s).*/
	31	} x; /*<A union holding either size or child/
	32	} bwRTreeNode_t;
	33
	34	/*!
	35	* A header and index that points to an R-tree that in turn points to data blocks.
	36	*/
	37	//TODO rootOffset is pointless, it's 48bytes after the indexOffset
	38	typedef struct {
	39	uint32_t blockSize; /*<The maximum number of children a node can have/
	40	uint64_t nItems; /*<The total number of data blocks pointed to by the tree. This is completely redundant./
	41	uint32_t chrIdxStart; /*<The index to the first chromosome described./
	42	uint32_t baseStart; /*<The first position on chrIdxStart with a value./
	43	uint32_t chrIdxEnd; /*<The index of the last chromosome with an entry./
	44	uint32_t baseEnd; /*<The last position on chrIdxEnd with an entry./
	45	uint64_t idxSize; /*<This is actually the offset of the index rather than the size?!? Yes, it's completely redundant./
	46	uint32_t nItemsPerSlot; /*<This is always 1!/
	47	//There's 4 bytes of padding in the file here
	48	uint64_t rootOffset; /*<The offset to the root node of the R-Tree (on disk). Yes, this is redundant./
	49	bwRTreeNode_t root; /<A pointer to the root node./
	50	} bwRTree_t;
	51
	52	/*!
	53	* @brief This structure holds the data blocks that overlap a given interval.
	54	*/
	55	typedef struct {
	56	uint64_t n; /*<The number of blocks that overlap. This MAY* be 0!.*/
	57	uint64_t offset; /<The offset to the on-disk position of the block./
	58	uint64_t size; /<The size of each block on disk (in bytes)./
	59	} bwOverlapBlock_t;
	60
	61	/*!
	62	* @brief The header section of a given data block.
	63	*
	64	* There are 3 types of data blocks in bigWig files, each with slightly different needs. This is all taken care of internally.
	65	*/
	66	typedef struct {
	67	uint32_t tid; /*<The chromosome ID./
	68	uint32_t start; /*<The start position of a block/
	69	uint32_t end; /*<The end position of a block/
	70	uint32_t step; /*<The step size of the values/
	71	uint32_t span; /*<The span of each data value/
	72	uint8_t type; /*<The block type: 1, bedGraph; 2, variable step; 3, fixed step./
	73	uint16_t nItems; /*<The number of values in a given block./
	74	} bwDataHeader_t;
	75
	76	#endif // LIBBIGWIG_VALUES_H

+1328

-0

libBigWig/bwWrite.c less more

	0	#include <limits.h>
	1	#include <float.h>
	2	#include <stdlib.h>
	3	#include <string.h>
	4	#include <math.h>
	5	#include "bigWig.h"
	6	#include "bwCommon.h"
	7
	8	/// @cond SKIP
	9	struct val_t {
	10	uint32_t tid;
	11	uint32_t start;
	12	uint32_t nBases;
	13	float min, max, sum, sumsq;
	14	double scalar;
	15	struct val_t *next;
	16	};
	17	/// @endcond
	18
	19	//Create a chromList_t and attach it to a bigWigFile_t *. Returns NULL on error
	20	//Note that chroms and lengths are duplicated, so you MUST free the input
	21	chromList_t bwCreateChromList(char chroms, uint32_t lengths, int64_t n) {
	22	int64_t i = 0;
	23	chromList_t *cl = calloc(1, sizeof(chromList_t));
	24	if(!cl) return NULL;
	25
	26	cl->nKeys = n;
	27	cl->chrom = malloc(sizeof(char)n);
	28	cl->len = malloc(sizeof(uint32_t)*n);
	29	if(!cl->chrom) goto error;
	30	if(!cl->len) goto error;
	31
	32	for(i=0; i<n; i++) {
	33	cl->len[i] = lengths[i];
	34	cl->chrom[i] = strdup(chroms[i]);
	35	if(!cl->chrom[i]) goto error;
	36	}
	37
	38	return cl;
	39
	40	error:
	41	if(i) {
	42	int64_t j;
	43	for(j=0; j<i; j++) free(cl->chrom[j]);
	44	}
	45	if(cl) {
	46	if(cl->chrom) free(cl->chrom);
	47	if(cl->len) free(cl->len);
	48	free(cl);
	49	}
	50	return NULL;
	51	}
	52
	53	//If maxZooms == 0, then 0 is used (i.e., there are no zoom levels). If maxZooms < 0 or > 65535 then 10 is used.
	54	//TODO allow changing bufSize and blockSize
	55	int bwCreateHdr(bigWigFile_t *fp, int32_t maxZooms) {
	56	if(!fp->isWrite) return 1;
	57	bigWigHdr_t *hdr = calloc(1, sizeof(bigWigHdr_t));
	58	if(!hdr) return 2;
	59
	60	hdr->version = 4;
	61	if(maxZooms < 0 \|\| maxZooms > 65535) {
	62	hdr->nLevels = 10;
	63	} else {
	64	hdr->nLevels = maxZooms;
	65	}
	66
	67	hdr->bufSize = 32768; //When the file is finalized this is reset if fp->writeBuffer->compressPsz is 0!
	68	hdr->minVal = DBL_MAX;
	69	hdr->maxVal = DBL_MIN;
	70	fp->hdr = hdr;
	71	fp->writeBuffer->blockSize = 64;
	72
	73	//Allocate the writeBuffer buffers
	74	fp->writeBuffer->compressPsz = compressBound(hdr->bufSize);
	75	fp->writeBuffer->compressP = malloc(fp->writeBuffer->compressPsz);
	76	if(!fp->writeBuffer->compressP) return 3;
	77	fp->writeBuffer->p = calloc(1,hdr->bufSize);
	78	if(!fp->writeBuffer->p) return 4;
	79
	80	return 0;
	81	}
	82
	83	//return 0 on success
	84	static int writeAtPos(void ptr, size_t sz, size_t nmemb, size_t pos, FILE fp) {
	85	size_t curpos = ftell(fp);
	86	if(fseek(fp, pos, SEEK_SET)) return 1;
	87	if(fwrite(ptr, sz, nmemb, fp) != nmemb) return 2;
	88	if(fseek(fp, curpos, SEEK_SET)) return 3;
	89	return 0;
	90	}
	91
	92	//We lose keySize bytes on error
	93	static int writeChromList(FILE fp, chromList_t cl) {
	94	uint16_t k;
	95	uint32_t j, magic = CIRTREE_MAGIC;
	96	uint32_t nperblock = (cl->nKeys > 0x7FFF) ? 0x7FFF : cl->nKeys; //Items per leaf/non-leaf, there are no unsigned ints in java :(
	97	uint32_t nblocks, keySize = 0, valSize = 8; //In theory valSize could be optimized, in practice that'd be annoying
	98	uint64_t i, nonLeafEnd, leafSize, nextLeaf;
	99	uint8_t eight;
	100	int64_t i64;
	101	char *chrom;
	102	size_t l;
	103
	104	if(cl->nKeys > 1073676289) {
	105	fprintf(stderr, "[writeChromList] Error: Currently only 1,073,676,289 contigs are supported. If you really need more then please post a request on github.\n");
	106	return 1;
	107	}
	108	nblocks = cl->nKeys/nperblock;
	109	nblocks += ((cl->nKeys % nperblock) > 0)?1:0;
	110
	111	for(i64=0; i64<cl->nKeys; i64++) {
	112	l = strlen(cl->chrom[i64]);
	113	if(l>keySize) keySize = l;
	114	}
	115	l--; //We don't null terminate strings, because schiess mich tot
	116	chrom = calloc(keySize, sizeof(char));
	117
	118	//Write the root node of a largely pointless tree
	119	if(fwrite(&magic, sizeof(uint32_t), 1, fp) != 1) return 1;
	120	if(fwrite(&nperblock, sizeof(uint32_t), 1, fp) != 1) return 2;
	121	if(fwrite(&keySize, sizeof(uint32_t), 1, fp) != 1) return 3;
	122	if(fwrite(&valSize, sizeof(uint32_t), 1, fp) != 1) return 4;
	123	if(fwrite(&(cl->nKeys), sizeof(uint64_t), 1, fp) != 1) return 5;
	124
	125	//Padding?
	126	i=0;
	127	if(fwrite(&i, sizeof(uint64_t), 1, fp) != 1) return 6;
	128
	129	//Do we need a non-leaf node?
	130	if(nblocks > 1) {
	131	eight = 0;
	132	if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 7;
	133	if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 8; //padding
	134	if(fwrite(&nblocks, sizeof(uint16_t), 1, fp) != 1) return 8;
	135	nonLeafEnd = ftell(fp) + nperblock * (keySize + 8);
	136	leafSize = nperblock * (keySize + 8) + 4;
	137	for(i=0; i<nblocks; i++) { //Why yes, this is pointless
	138	chrom = strncpy(chrom, cl->chrom[i * nperblock], keySize);
	139	nextLeaf = nonLeafEnd + i * leafSize;
	140	if(fwrite(chrom, keySize, 1, fp) != 1) return 9;
	141	if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 10;
	142	}
	143	for(i=0; i<keySize; i++) chrom[i] = '\0';
	144	nextLeaf = 0;
	145	for(i=nblocks; i<nperblock; i++) {
	146	if(fwrite(chrom, keySize, 1, fp) != 1) return 9;
	147	if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 10;
	148	}
	149	}
	150
	151	//Write the leaves
	152	nextLeaf = 0;
	153	for(i=0, j=0; i<nblocks; i++) {
	154	eight = 1;
	155	if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 11;
	156	eight = 0;
	157	if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 12;
	158	if(cl->nKeys - j < nperblock) {
	159	k = cl->nKeys - j;
	160	if(fwrite(&k, sizeof(uint16_t), 1, fp) != 1) return 13;
	161	} else {
	162	if(fwrite(&nperblock, sizeof(uint16_t), 1, fp) != 1) return 13;
	163	}
	164	for(k=0; k<nperblock; k++) {
	165	if(j>=cl->nKeys) {
	166	if(chrom[0]) {
	167	for(l=0; l<keySize; l++) chrom[l] = '\0';
	168	}
	169	if(fwrite(chrom, keySize, 1, fp) != 1) return 15;
	170	if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 16;
	171	} else {
	172	chrom = strncpy(chrom, cl->chrom[j], keySize);
	173	if(fwrite(chrom, keySize, 1, fp) != 1) return 15;
	174	if(fwrite(&j, sizeof(uint32_t), 1, fp) != 1) return 16;
	175	if(fwrite(&(cl->len[j++]), sizeof(uint32_t), 1, fp) != 1) return 17;
	176	}
	177	}
	178	}
	179
	180	free(chrom);
	181	return 0;
	182	}
	183
	184	//returns 0 on success
	185	//Still need to fill in indexOffset
	186	int bwWriteHdr(bigWigFile_t *bw) {
	187	uint32_t magic = BIGWIG_MAGIC;
	188	uint16_t two = 4;
	189	FILE *fp;
	190	void *p = calloc(58, sizeof(uint8_t)); //58 bytes of nothing
	191	if(!bw->isWrite) return 1;
	192
	193	//The header itself, largely just reserving space...
	194	fp = bw->URL->x.fp;
	195	if(!fp) return 2;
	196	if(fseek(fp, 0, SEEK_SET)) return 3;
	197	if(fwrite(&magic, sizeof(uint32_t), 1, fp) != 1) return 4;
	198	if(fwrite(&two, sizeof(uint16_t), 1, fp) != 1) return 5;
	199	if(fwrite(p, sizeof(uint8_t), 58, fp) != 58) return 6;
	200
	201	//Empty zoom headers
	202	if(bw->hdr->nLevels) {
	203	for(two=0; two<bw->hdr->nLevels; two++) {
	204	if(fwrite(p, sizeof(uint8_t), 24, fp) != 24) return 9;
	205	}
	206	}
	207
	208	//Update summaryOffset and write an empty summary block
	209	bw->hdr->summaryOffset = ftell(fp);
	210	if(fwrite(p, sizeof(uint8_t), 40, fp) != 40) return 10;
	211	if(writeAtPos(&(bw->hdr->summaryOffset), sizeof(uint64_t), 1, 0x2c, fp)) return 11;
	212
	213	//Write the chromosome list as a stupid freaking tree (because let's TREE ALL THE THINGS!!!)
	214	bw->hdr->ctOffset = ftell(fp);
	215	if(writeChromList(fp, bw->cl)) return 7;
	216	if(writeAtPos(&(bw->hdr->ctOffset), sizeof(uint64_t), 1, 0x8, fp)) return 8;
	217
	218	//Update the dataOffset
	219	bw->hdr->dataOffset = ftell(fp);
	220	if(writeAtPos(&bw->hdr->dataOffset, sizeof(uint64_t), 1, 0x10, fp)) return 12;
	221
	222	//Save space for the number of blocks
	223	if(fwrite(p, sizeof(uint8_t), 8, fp) != 8) return 13;
	224
	225	free(p);
	226	return 0;
	227	}
	228
	229	static int insertIndexNode(bigWigFile_t fp, bwRTreeNode_t leaf) {
	230	bwLL *l = malloc(sizeof(bwLL));
	231	if(!l) return 1;
	232	l->node = leaf;
	233	l->next = NULL;
	234
	235	if(!fp->writeBuffer->firstIndexNode) {
	236	fp->writeBuffer->firstIndexNode = l;
	237	} else {
	238	fp->writeBuffer->currentIndexNode->next = l;
	239	}
	240	fp->writeBuffer->currentIndexNode = l;
	241	return 0;
	242	}
	243
	244	//0 on success
	245	static int appendIndexNodeEntry(bigWigFile_t *fp, uint32_t tid0, uint32_t tid1, uint32_t start, uint32_t end, uint64_t offset, uint64_t size) {
	246	bwLL *n = fp->writeBuffer->currentIndexNode;
	247	if(!n) return 1;
	248	if(n->node->nChildren >= fp->writeBuffer->blockSize) return 2;
	249
	250	n->node->chrIdxStart[n->node->nChildren] = tid0;
	251	n->node->baseStart[n->node->nChildren] = start;
	252	n->node->chrIdxEnd[n->node->nChildren] = tid1;
	253	n->node->baseEnd[n->node->nChildren] = end;
	254	n->node->dataOffset[n->node->nChildren] = offset;
	255	n->node->x.size[n->node->nChildren] = size;
	256	n->node->nChildren++;
	257	return 0;
	258	}
	259
	260	//Returns 0 on success
	261	static int addIndexEntry(bigWigFile_t *fp, uint32_t tid0, uint32_t tid1, uint32_t start, uint32_t end, uint64_t offset, uint64_t size) {
	262	bwRTreeNode_t *node;
	263
	264	if(appendIndexNodeEntry(fp, tid0, tid1, start, end, offset, size)) {
	265	//The last index node is full, we need to add a new one
	266	node = calloc(1, sizeof(bwRTreeNode_t));
	267	if(!node) return 1;
	268
	269	//Allocate and set the fields
	270	node->isLeaf = 1;
	271	node->nChildren = 1;
	272	node->chrIdxStart = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
	273	if(!node->chrIdxStart) goto error;
	274	node->baseStart = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
	275	if(!node->baseStart) goto error;
	276	node->chrIdxEnd = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
	277	if(!node->chrIdxEnd) goto error;
	278	node->baseEnd = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
	279	if(!node->baseEnd) goto error;
	280	node->dataOffset = malloc(sizeof(uint64_t)*fp->writeBuffer->blockSize);
	281	if(!node->dataOffset) goto error;
	282	node->x.size = malloc(sizeof(uint64_t)*fp->writeBuffer->blockSize);
	283	if(!node->x.size) goto error;
	284
	285	node->chrIdxStart[0] = tid0;
	286	node->baseStart[0] = start;
	287	node->chrIdxEnd[0] = tid1;
	288	node->baseEnd[0] = end;
	289	node->dataOffset[0] = offset;
	290	node->x.size[0] = size;
	291
	292	if(insertIndexNode(fp, node)) goto error;
	293	}
	294
	295	return 0;
	296
	297	error:
	298	if(node->chrIdxStart) free(node->chrIdxStart);
	299	if(node->baseStart) free(node->baseStart);
	300	if(node->chrIdxEnd) free(node->chrIdxEnd);
	301	if(node->baseEnd) free(node->baseEnd);
	302	if(node->dataOffset) free(node->dataOffset);
	303	if(node->x.size) free(node->x.size);
	304	return 2;
	305	}
	306
	307	/*
	308	* TODO:
	309	* The buffer size and compression sz need to be determined elsewhere (and p and compressP filled in!)
	310	*/
	311	static int flushBuffer(bigWigFile_t *fp) {
	312	bwWriteBuffer_t *wb = fp->writeBuffer;
	313	uLongf sz = wb->compressPsz;
	314	uint16_t nItems;
	315	if(!fp->writeBuffer->l) return 0;
	316	if(!wb->ltype) return 0;
	317
	318	//Fill in the header
	319	if(!memcpy(wb->p, &(wb->tid), sizeof(uint32_t))) return 1;
	320	if(!memcpy(wb->p+4, &(wb->start), sizeof(uint32_t))) return 2;
	321	if(!memcpy(wb->p+8, &(wb->end), sizeof(uint32_t))) return 3;
	322	if(!memcpy(wb->p+12, &(wb->step), sizeof(uint32_t))) return 4;
	323	if(!memcpy(wb->p+16, &(wb->span), sizeof(uint32_t))) return 5;
	324	if(!memcpy(wb->p+20, &(wb->ltype), sizeof(uint8_t))) return 6;
	325	//1 byte padding
	326	//Determine the number of items
	327	switch(wb->ltype) {
	328	case 1:
	329	nItems = (wb->l-24)/12;
	330	break;
	331	case 2:
	332	nItems = (wb->l-24)/8;
	333	break;
	334	case 3:
	335	nItems = (wb->l-24)/4;
	336	break;
	337	default:
	338	return 7;
	339	}
	340	if(!memcpy(wb->p+22, &nItems, sizeof(uint16_t))) return 8;
	341
	342	if(sz) {
	343	//compress
	344	if(compress(wb->compressP, &sz, wb->p, wb->l) != Z_OK) return 9;
	345
	346	//write the data to disk
	347	if(fwrite(wb->compressP, sizeof(uint8_t), sz, fp->URL->x.fp) != sz) return 10;
	348	} else {
	349	sz = wb->l;
	350	if(fwrite(wb->p, sizeof(uint8_t), wb->l, fp->URL->x.fp) != wb->l) return 10;
	351	}
	352
	353	//Add an entry into the index
	354	if(addIndexEntry(fp, wb->tid, wb->tid, wb->start, wb->end, bwTell(fp)-sz, sz)) return 11;
	355
	356	wb->nBlocks++;
	357	wb->l = 24;
	358	return 0;
	359	}
	360
	361	static void updateStats(bigWigFile_t *fp, uint32_t span, float val) {
	362	if(val < fp->hdr->minVal) fp->hdr->minVal = val;
	363	else if(val > fp->hdr->maxVal) fp->hdr->maxVal = val;
	364	fp->hdr->nBasesCovered += span;
	365	fp->hdr->sumData += span*val;
	366	fp->hdr->sumSquared += span*pow(val,2);
	367
	368	fp->writeBuffer->nEntries++;
	369	fp->writeBuffer->runningWidthSum += span;
	370	}
	371
	372	//12 bytes per entry
	373	int bwAddIntervals(bigWigFile_t fp, char chrom, uint32_t start, uint32_t end, float values, uint32_t n) {
	374	uint32_t tid = 0, i;
	375	char *lastChrom = NULL;
	376	bwWriteBuffer_t *wb = fp->writeBuffer;
	377	if(!n) return 0; //Not an error per se
	378	if(!fp->isWrite) return 1;
	379	if(!wb) return 2;
	380
	381	//Flush if needed
	382	if(wb->ltype != 1) if(flushBuffer(fp)) return 3;
	383	if(wb->l+36 > fp->hdr->bufSize) if(flushBuffer(fp)) return 4;
	384	lastChrom = chrom[0];
	385	tid = bwGetTid(fp, chrom[0]);
	386	if(tid == (uint32_t) -1) return 5;
	387	if(tid != wb->tid) {
	388	if(flushBuffer(fp)) return 6;
	389	wb->tid = tid;
	390	wb->start = start[0];
	391	wb->end = end[0];
	392	}
	393
	394	//Ensure that everything is set correctly
	395	wb->ltype = 1;
	396	if(wb->l <= 24) {
	397	wb->start = start[0];
	398	wb->span = 0;
	399	wb->step = 0;
	400	}
	401	if(!memcpy(wb->p+wb->l, start, sizeof(uint32_t))) return 7;
	402	if(!memcpy(wb->p+wb->l+4, end, sizeof(uint32_t))) return 8;
	403	if(!memcpy(wb->p+wb->l+8, values, sizeof(float))) return 9;
	404	updateStats(fp, end[0]-start[0], values[0]);
	405	wb->l += 12;
	406
	407	for(i=1; i<n; i++) {
	408	if(strcmp(chrom[i],lastChrom) != 0) {
	409	wb->end = end[i-1];
	410	flushBuffer(fp);
	411	lastChrom = chrom[i];
	412	tid = bwGetTid(fp, chrom[i]);
	413	if(tid == (uint32_t) -1) return 10;
	414	wb->tid = tid;
	415	wb->start = start[i];
	416	}
	417	if(wb->l+12 > fp->hdr->bufSize) { //12 bytes/entry
	418	wb->end = end[i-1];
	419	flushBuffer(fp);
	420	wb->start = start[i];
	421	}
	422	if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 11;
	423	if(!memcpy(wb->p+wb->l+4, &(end[i]), sizeof(uint32_t))) return 12;
	424	if(!memcpy(wb->p+wb->l+8, &(values[i]), sizeof(float))) return 13;
	425	updateStats(fp, end[i]-start[i], values[i]);
	426	wb->l += 12;
	427	}
	428	wb->end = end[i-1];
	429
	430	return 0;
	431	}
	432
	433	int bwAppendIntervals(bigWigFile_t fp, uint32_t start, uint32_t end, float values, uint32_t n) {
	434	uint32_t i;
	435	bwWriteBuffer_t *wb = fp->writeBuffer;
	436	if(!n) return 0;
	437	if(!fp->isWrite) return 1;
	438	if(!wb) return 2;
	439	if(wb->ltype != 1) return 3;
	440
	441	for(i=0; i<n; i++) {
	442	if(wb->l+12 > fp->hdr->bufSize) {
	443	if(i>0) { //otherwise it's already set
	444	wb->end = end[i-1];
	445	}
	446	flushBuffer(fp);
	447	wb->start = start[i];
	448	}
	449	if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 4;
	450	if(!memcpy(wb->p+wb->l+4, &(end[i]), sizeof(uint32_t))) return 5;
	451	if(!memcpy(wb->p+wb->l+8, &(values[i]), sizeof(float))) return 6;
	452	updateStats(fp, end[i]-start[i], values[i]);
	453	wb->l += 12;
	454	}
	455	wb->end = end[i-1];
	456
	457	return 0;
	458	}
	459
	460	//8 bytes per entry
	461	int bwAddIntervalSpans(bigWigFile_t fp, char chrom, uint32_t start, uint32_t span, float values, uint32_t n) {
	462	uint32_t i, tid;
	463	bwWriteBuffer_t *wb = fp->writeBuffer;
	464	if(!n) return 0;
	465	if(!fp->isWrite) return 1;
	466	if(!wb) return 2;
	467	if(wb->ltype != 2) if(flushBuffer(fp)) return 3;
	468	if(flushBuffer(fp)) return 4;
	469
	470	tid = bwGetTid(fp, chrom);
	471	if(tid == (uint32_t) -1) return 5;
	472	wb->tid = tid;
	473	wb->start = start[0];
	474	wb->step = 0;
	475	wb->span = span;
	476	wb->ltype = 2;
	477
	478	for(i=0; i<n; i++) {
	479	if(wb->l + 8 >= fp->hdr->bufSize) { //8 bytes/entry
	480	if(i) wb->end = start[i-1]+span;
	481	flushBuffer(fp);
	482	wb->start = start[i];
	483	}
	484	if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 5;
	485	if(!memcpy(wb->p+wb->l+4, &(values[i]), sizeof(float))) return 6;
	486	updateStats(fp, span, values[i]);
	487	wb->l += 8;
	488	}
	489	wb->end = start[n-1] + span;
	490
	491	return 0;
	492	}
	493
	494	int bwAppendIntervalSpans(bigWigFile_t fp, uint32_t start, float *values, uint32_t n) {
	495	uint32_t i;
	496	bwWriteBuffer_t *wb = fp->writeBuffer;
	497	if(!n) return 0;
	498	if(!fp->isWrite) return 1;
	499	if(!wb) return 2;
	500	if(wb->ltype != 2) return 3;
	501
	502	for(i=0; i<n; i++) {
	503	if(wb->l + 8 >= fp->hdr->bufSize) {
	504	if(i) wb->end = start[i-1]+wb->span;
	505	flushBuffer(fp);
	506	wb->start = start[i];
	507	}
	508	if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 4;
	509	if(!memcpy(wb->p+wb->l+4, &(values[i]), sizeof(float))) return 5;
	510	updateStats(fp, wb->span, values[i]);
	511	wb->l += 8;
	512	}
	513	wb->end = start[n-1] + wb->span;
	514
	515	return 0;
	516	}
	517
	518	//4 bytes per entry
	519	int bwAddIntervalSpanSteps(bigWigFile_t fp, char chrom, uint32_t start, uint32_t span, uint32_t step, float *values, uint32_t n) {
	520	uint32_t i, tid;
	521	bwWriteBuffer_t *wb = fp->writeBuffer;
	522	if(!n) return 0;
	523	if(!fp->isWrite) return 1;
	524	if(!wb) return 2;
	525	if(wb->ltype != 3) flushBuffer(fp);
	526	if(flushBuffer(fp)) return 3;
	527
	528	tid = bwGetTid(fp, chrom);
	529	if(tid == (uint32_t) -1) return 4;
	530	wb->tid = tid;
	531	wb->start = start;
	532	wb->step = step;
	533	wb->span = span;
	534	wb->ltype = 3;
	535
	536	for(i=0; i<n; i++) {
	537	if(wb->l + 4 >= fp->hdr->bufSize) {
	538	wb->end = wb->start + ((wb->l-24)>>2) * step;
	539	flushBuffer(fp);
	540	wb->start = wb->end;
	541	}
	542	if(!memcpy(wb->p+wb->l, &(values[i]), sizeof(float))) return 5;
	543	updateStats(fp, wb->span, values[i]);
	544	wb->l += 4;
	545	}
	546	wb->end = wb->start + (wb->l>>2) * step;
	547
	548	return 0;
	549	}
	550
	551	int bwAppendIntervalSpanSteps(bigWigFile_t fp, float values, uint32_t n) {
	552	uint32_t i;
	553	bwWriteBuffer_t *wb = fp->writeBuffer;
	554	if(!n) return 0;
	555	if(!fp->isWrite) return 1;
	556	if(!wb) return 2;
	557	if(wb->ltype != 3) return 3;
	558
	559	for(i=0; i<n; i++) {
	560	if(wb->l + 4 >= fp->hdr->bufSize) {
	561	wb->end = wb->start + ((wb->l-24)>>2) * wb->step;
	562	flushBuffer(fp);
	563	wb->start = wb->end;
	564	}
	565	if(!memcpy(wb->p+wb->l, &(values[i]), sizeof(float))) return 4;
	566	updateStats(fp, wb->span, values[i]);
	567	wb->l += 4;
	568	}
	569	wb->end = wb->start + (wb->l>>2) * wb->step;
	570
	571	return 0;
	572	}
	573
	574	//0 on success
	575	int writeSummary(bigWigFile_t *fp) {
	576	if(writeAtPos(&(fp->hdr->nBasesCovered), sizeof(uint64_t), 1, fp->hdr->summaryOffset, fp->URL->x.fp)) return 1;
	577	if(writeAtPos(&(fp->hdr->minVal), sizeof(double), 1, fp->hdr->summaryOffset+8, fp->URL->x.fp)) return 2;
	578	if(writeAtPos(&(fp->hdr->maxVal), sizeof(double), 1, fp->hdr->summaryOffset+16, fp->URL->x.fp)) return 3;
	579	if(writeAtPos(&(fp->hdr->sumData), sizeof(double), 1, fp->hdr->summaryOffset+24, fp->URL->x.fp)) return 4;
	580	if(writeAtPos(&(fp->hdr->sumSquared), sizeof(double), 1, fp->hdr->summaryOffset+32, fp->URL->x.fp)) return 5;
	581	return 0;
	582	}
	583
	584	static bwRTreeNode_t *makeEmptyNode(uint32_t blockSize) {
	585	bwRTreeNode_t *n = calloc(1, sizeof(bwRTreeNode_t));
	586	if(!n) return NULL;
	587
	588	n->chrIdxStart = malloc(blockSize*sizeof(uint32_t));
	589	if(!n->chrIdxStart) goto error;
	590	n->baseStart = malloc(blockSize*sizeof(uint32_t));
	591	if(!n->baseStart) goto error;
	592	n->chrIdxEnd = malloc(blockSize*sizeof(uint32_t));
	593	if(!n->chrIdxEnd) goto error;
	594	n->baseEnd = malloc(blockSize*sizeof(uint32_t));
	595	if(!n->baseEnd) goto error;
	596	n->dataOffset = calloc(blockSize,sizeof(uint64_t)); //This MUST be 0 for node writing!
	597	if(!n->dataOffset) goto error;
	598	n->x.child = malloc(blockSize*sizeof(uint64_t));
	599	if(!n->x.child) goto error;
	600
	601	return n;
	602
	603	error:
	604	if(n->chrIdxStart) free(n->chrIdxStart);
	605	if(n->baseStart) free(n->baseStart);
	606	if(n->chrIdxEnd) free(n->chrIdxEnd);
	607	if(n->baseEnd) free(n->baseEnd);
	608	if(n->dataOffset) free(n->dataOffset);
	609	if(n->x.child) free(n->x.child);
	610	free(n);
	611	return NULL;
	612	}
	613
	614	//Returns 0 on success. This doesn't attempt to clean up!
	615	static bwRTreeNode_t addLeaves(bwLL ll, uint64_t sz, uint64_t toProcess, uint32_t blockSize) {
	616	uint32_t i;
	617	uint64_t foo;
	618	bwRTreeNode_t *n = makeEmptyNode(blockSize);
	619	if(!n) return NULL;
	620
	621	if(toProcess <= blockSize) {
	622	for(i=0; i<toProcess; i++) {
	623	n->chrIdxStart[i] = (*ll)->node->chrIdxStart[0];
	624	n->baseStart[i] = (*ll)->node->baseStart[0];
	625	n->chrIdxEnd[i] = (ll)->node->chrIdxEnd[(ll)->node->nChildren-1];
	626	n->baseEnd[i] = (ll)->node->baseEnd[(ll)->node->nChildren-1];
	627	n->x.child[i] = (*ll)->node;
	628	sz += 4 + 32(*ll)->node->nChildren;
	629	ll = (ll)->next;
	630	n->nChildren++;
	631	}
	632	} else {
	633	for(i=0; i<blockSize; i++) {
	634	foo = ceil(((double) toProcess)/((double) blockSize-i));
	635	if(!ll) break;
	636	n->x.child[i] = addLeaves(ll, sz, foo, blockSize);
	637	if(!n->x.child[i]) goto error;
	638	n->chrIdxStart[i] = n->x.child[i]->chrIdxStart[0];
	639	n->baseStart[i] = n->x.child[i]->baseStart[0];
	640	n->chrIdxEnd[i] = n->x.child[i]->chrIdxEnd[n->x.child[i]->nChildren-1];
	641	n->baseEnd[i] = n->x.child[i]->baseEnd[n->x.child[i]->nChildren-1];
	642	n->nChildren++;
	643	toProcess -= foo;
	644	}
	645	}
	646
	647	sz += 4 + 24n->nChildren;
	648	return n;
	649
	650	error:
	651	bwDestroyIndexNode(n);
	652	return NULL;
	653	}
	654
	655	//Returns 1 on error
	656	int writeIndexTreeNode(FILE fp, bwRTreeNode_t n, uint8_t *wrote, int level) {
	657	uint8_t one = 0;
	658	uint32_t i, j, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0
	659
	660	if(n->isLeaf) return 0;
	661
	662	for(i=0; i<n->nChildren; i++) {
	663	if(n->dataOffset[i]) { //traverse into child
	664	if(n->isLeaf) return 0; //Only write leaves once!
	665	if(writeIndexTreeNode(fp, n->x.child[i], wrote, level+1)) return 1;
	666	} else {
	667	n->dataOffset[i] = ftell(fp);
	668	if(fwrite(&(n->x.child[i]->isLeaf), sizeof(uint8_t), 1, fp) != 1) return 1;
	669	if(fwrite(&one, sizeof(uint8_t), 1, fp) != 1) return 1; //one byte of padding
	670	if(fwrite(&(n->x.child[i]->nChildren), sizeof(uint16_t), 1, fp) != 1) return 1;
	671	for(j=0; j<n->x.child[i]->nChildren; j++) {
	672	vector[0] = n->x.child[i]->chrIdxStart[j];
	673	vector[1] = n->x.child[i]->baseStart[j];
	674	vector[2] = n->x.child[i]->chrIdxEnd[j];
	675	vector[3] = n->x.child[i]->baseEnd[j];
	676	if(n->x.child[i]->isLeaf) {
	677	//Include the offset and size
	678	if(fwrite(vector, sizeof(uint32_t), 4, fp) != 4) return 1;
	679	if(fwrite(&(n->x.child[i]->dataOffset[j]), sizeof(uint64_t), 1, fp) != 1) return 1;
	680	if(fwrite(&(n->x.child[i]->x.size[j]), sizeof(uint64_t), 1, fp) != 1) return 1;
	681	} else {
	682	if(fwrite(vector, sizeof(uint32_t), 6, fp) != 6) return 1;
	683	}
	684	}
	685	*wrote = 1;
	686	}
	687	}
	688
	689	return 0;
	690	}
	691
	692	//returns 1 on success
	693	int writeIndexOffsets(FILE fp, bwRTreeNode_t n, uint64_t offset) {
	694	uint32_t i;
	695
	696	if(n->isLeaf) return 0;
	697	for(i=0; i<n->nChildren; i++) {
	698	if(writeIndexOffsets(fp, n->x.child[i], n->dataOffset[i])) return 1;
	699	if(writeAtPos(&(n->dataOffset[i]), sizeof(uint64_t), 1, offset+20+24*i, fp)) return 2;
	700	}
	701	return 0;
	702	}
	703
	704	//Returns 0 on success
	705	int writeIndexTree(bigWigFile_t *fp) {
	706	uint64_t offset;
	707	uint8_t wrote = 0;
	708	int rv;
	709
	710	while((rv = writeIndexTreeNode(fp->URL->x.fp, fp->idx->root, &wrote, 0)) == 0) {
	711	if(!wrote) break;
	712	wrote = 0;
	713	}
	714
	715	if(rv \|\| wrote) return 1;
	716
	717	//Save the file position
	718	offset = bwTell(fp);
	719
	720	//Write the offsets
	721	if(writeIndexOffsets(fp->URL->x.fp, fp->idx->root, fp->idx->rootOffset)) return 2;
	722
	723	//Move the file pointer back to the end
	724	bwSetPos(fp, offset);
	725
	726	return 0;
	727	}
	728
	729	//Returns 0 on success. The original state SHOULD be preserved on error
	730	int writeIndex(bigWigFile_t *fp) {
	731	uint32_t four = IDX_MAGIC;
	732	uint64_t idxSize = 0, foo;
	733	uint8_t one = 0;
	734	uint32_t i, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0
	735	bwLL ll = fp->writeBuffer->firstIndexNode, p;
	736	bwRTreeNode_t *root = NULL;
	737
	738	if(!fp->writeBuffer->nBlocks) return 0;
	739	fp->idx = malloc(sizeof(bwRTree_t));
	740	if(!fp->idx) return 2;
	741	fp->idx->root = root;
	742
	743	//Update the file header to indicate the proper index position
	744	foo = bwTell(fp);
	745	if(writeAtPos(&foo, sizeof(uint64_t), 1, 0x18, fp->URL->x.fp)) return 3;
	746
	747	//Make the tree
	748	if(ll == fp->writeBuffer->currentIndexNode) {
	749	root = ll->node;
	750	idxSize = 4 + 24*root->nChildren;
	751	} else {
	752	root = addLeaves(&ll, &idxSize, ceil(((double)fp->writeBuffer->nBlocks)/fp->writeBuffer->blockSize), fp->writeBuffer->blockSize);
	753	}
	754	if(!root) return 4;
	755	fp->idx->root = root;
	756
	757	ll = fp->writeBuffer->firstIndexNode;
	758	while(ll) {
	759	p = ll->next;
	760	free(ll);
	761	ll=p;
	762	}
	763
	764	//write the header
	765	if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 5;
	766	if(fwrite(&(fp->writeBuffer->blockSize), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 6;
	767	if(fwrite(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 7;
	768	if(fwrite(&(root->chrIdxStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
	769	if(fwrite(&(root->baseStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
	770	if(fwrite(&(root->chrIdxEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 10;
	771	if(fwrite(&(root->baseEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 11;
	772	if(fwrite(&idxSize, sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 12;
	773	four = 1;
	774	if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 13;
	775	four = 0;
	776	if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 14; //padding
	777	fp->idx->rootOffset = bwTell(fp);
	778
	779	//Write the root node, since writeIndexTree writes the children and fills in the offset
	780	if(fwrite(&(root->isLeaf), sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 16;
	781	if(fwrite(&one, sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 17; //one byte of padding
	782	if(fwrite(&(root->nChildren), sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 18;
	783	for(i=0; i<root->nChildren; i++) {
	784	vector[0] = root->chrIdxStart[i];
	785	vector[1] = root->baseStart[i];
	786	vector[2] = root->chrIdxEnd[i];
	787	vector[3] = root->baseEnd[i];
	788	if(root->isLeaf) {
	789	//Include the offset and size
	790	if(fwrite(vector, sizeof(uint32_t), 4, fp->URL->x.fp) != 4) return 19;
	791	if(fwrite(&(root->dataOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 20;
	792	if(fwrite(&(root->x.size[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 21;
	793	} else {
	794	root->dataOffset[i] = 0; //FIXME: Something upstream is setting this to impossible values (e.g., 0x21?!?!?)
	795	if(fwrite(vector, sizeof(uint32_t), 6, fp->URL->x.fp) != 6) return 22;
	796	}
	797	}
	798
	799	//Write each level
	800	if(writeIndexTree(fp)) return 23;
	801
	802	return 0;
	803	}
	804
	805	//The first zoom level has a resolution of 4x mean entry size
	806	//This may or may not produce the requested number of zoom levels
	807	int makeZoomLevels(bigWigFile_t *fp) {
	808	uint32_t meanBinSize, i;
	809	uint32_t multiplier = 4, zoom = 10, maxZoom = 0;
	810	uint16_t nLevels = 0;
	811
	812	meanBinSize = ((double) fp->writeBuffer->runningWidthSum)/(fp->writeBuffer->nEntries);
	813	//In reality, one level is skipped
	814	meanBinSize *= 4;
	815	//N.B., we must ALWAYS check that the zoom doesn't overflow a uint32_t!
	816	if(((uint32_t)-1)>>2 < meanBinSize) return 0; //No zoom levels!
	817	if(meanBinSize4 > zoom) zoom = multipliermeanBinSize;
	818
	819	fp->hdr->zoomHdrs = calloc(1, sizeof(bwZoomHdr_t));
	820	if(!fp->hdr->zoomHdrs) return 1;
	821	fp->hdr->zoomHdrs->level = malloc(fp->hdr->nLevels * sizeof(uint32_t));
	822	fp->hdr->zoomHdrs->dataOffset = calloc(fp->hdr->nLevels, sizeof(uint64_t));
	823	fp->hdr->zoomHdrs->indexOffset = calloc(fp->hdr->nLevels, sizeof(uint64_t));
	824	fp->hdr->zoomHdrs->idx = calloc(fp->hdr->nLevels, sizeof(bwRTree_t*));
	825	if(!fp->hdr->zoomHdrs->level) return 2;
	826	if(!fp->hdr->zoomHdrs->dataOffset) return 3;
	827	if(!fp->hdr->zoomHdrs->indexOffset) return 4;
	828	if(!fp->hdr->zoomHdrs->idx) return 5;
	829
	830	//There's no point in having a zoom level larger than the largest chromosome
	831	//This will none the less allow at least one zoom level, which is generally needed for IGV et al.
	832	for(i=0; i<fp->cl->nKeys; i++) {
	833	if(fp->cl->len[i] > maxZoom) maxZoom = fp->cl->len[i];
	834	}
	835	if(zoom > maxZoom) zoom = maxZoom;
	836
	837	for(i=0; i<fp->hdr->nLevels; i++) {
	838	if(zoom > maxZoom) break; //prevent absurdly large zoom levels
	839	fp->hdr->zoomHdrs->level[i] = zoom;
	840	nLevels++;
	841	if(((uint32_t)-1)/multiplier < zoom) break;
	842	zoom *= multiplier;
	843	}
	844	fp->hdr->nLevels = nLevels;
	845
	846	fp->writeBuffer->firstZoomBuffer = calloc(nLevels,sizeof(bwZoomBuffer_t*));
	847	if(!fp->writeBuffer->firstZoomBuffer) goto error;
	848	fp->writeBuffer->lastZoomBuffer = calloc(nLevels,sizeof(bwZoomBuffer_t*));
	849	if(!fp->writeBuffer->lastZoomBuffer) goto error;
	850	fp->writeBuffer->nNodes = calloc(nLevels, sizeof(uint64_t));
	851
	852	for(i=0; i<fp->hdr->nLevels; i++) {
	853	fp->writeBuffer->firstZoomBuffer[i] = calloc(1, sizeof(bwZoomBuffer_t));
	854	if(!fp->writeBuffer->firstZoomBuffer[i]) goto error;
	855	fp->writeBuffer->firstZoomBuffer[i]->p = calloc(fp->hdr->bufSize/32, 32);
	856	if(!fp->writeBuffer->firstZoomBuffer[i]->p) goto error;
	857	fp->writeBuffer->firstZoomBuffer[i]->m = fp->hdr->bufSize;
	858	((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[0] = 0;
	859	((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[1] = 0;
	860	((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[2] = fp->hdr->zoomHdrs->level[i];
	861	if(fp->hdr->zoomHdrs->level[i] > fp->cl->len[0]) ((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[2] = fp->cl->len[0];
	862	fp->writeBuffer->lastZoomBuffer[i] = fp->writeBuffer->firstZoomBuffer[i];
	863	}
	864
	865	return 0;
	866
	867	error:
	868	if(fp->writeBuffer->firstZoomBuffer) {
	869	for(i=0; i<fp->hdr->nLevels; i++) {
	870	if(fp->writeBuffer->firstZoomBuffer[i]) {
	871	if(fp->writeBuffer->firstZoomBuffer[i]->p) free(fp->writeBuffer->firstZoomBuffer[i]->p);
	872	free(fp->writeBuffer->firstZoomBuffer[i]);
	873	}
	874	}
	875	free(fp->writeBuffer->firstZoomBuffer);
	876	}
	877	if(fp->writeBuffer->lastZoomBuffer) free(fp->writeBuffer->lastZoomBuffer);
	878	if(fp->writeBuffer->nNodes) free(fp->writeBuffer->lastZoomBuffer);
	879	return 6;
	880	}
	881
	882	//Given an interval start, calculate the next one at a zoom level
	883	void nextPos(bigWigFile_t fp, uint32_t size, uint32_t pos, uint32_t desiredTid) {
	884	uint32_t *tid = pos;
	885	uint32_t *start = pos+1;
	886	uint32_t *end = pos+2;
	887	*start += size;
	888	if(start >= fp->cl->len[tid]) {
	889	(*start) = 0;
	890	(*tid)++;
	891	}
	892
	893	//prevent needless iteration when changing chromosomes
	894	if(*tid < desiredTid) {
	895	*tid = desiredTid;
	896	*start = 0;
	897	}
	898
	899	(end) = start+size;
	900	if(end > fp->cl->len[tid]) (end) = fp->cl->len[tid];
	901	}
	902
	903	//Return the number of bases two intervals overlap
	904	uint32_t overlapsInterval(uint32_t tid0, uint32_t start0, uint32_t end0, uint32_t tid1, uint32_t start1, uint32_t end1) {
	905	if(tid0 != tid1) return 0;
	906	if(end0 <= start1) return 0;
	907	if(end1 <= start0) return 0;
	908	if(end0 <= end1) {
	909	if(start1 > start0) return end0-start1;
	910	return end0-start0;
	911	} else {
	912	if(start1 > start0) return end1-start1;
	913	return end1-start0;
	914	}
	915	}
	916
	917	//Returns the number of bases of the interval written
	918	uint32_t updateInterval(bigWigFile_t fp, bwZoomBuffer_t buffer, double sum, double sumsq, uint32_t size, uint32_t tid, uint32_t start, uint32_t end, float value) {
	919	uint32_t p2 = (uint32_t) buffer->p;
	920	float fp2 = (float) p2;
	921	uint32_t rv = 0, offset = 0;
	922	if(!buffer) return 0;
	923	if(buffer->l+32 >= buffer->m) return 0;
	924
	925	//Make sure that we don't overflow a uint32_t by adding some huge value to start
	926	if(start + size < start) size = ((uint32_t) -1) - start;
	927
	928	if(buffer->l) {
	929	offset = buffer->l/32;
	930	} else {
	931	p2[0] = tid;
	932	p2[1] = start;
	933	if(start+size < end) p2[2] = start+size;
	934	else p2[2] = end;
	935	}
	936
	937	//Do we have any overlap with the previously added interval?
	938	if(offset) {
	939	rv = overlapsInterval(p2[8(offset-1)], p2[8(offset-1)+1], p2[8*(offset-1)+1] + size, tid, start, end);
	940	if(rv) {
	941	p2[8*(offset-1)+2] = start + rv;
	942	p2[8*(offset-1)+3] += rv;
	943	if(fp2[8(offset-1)+4] > value) fp2[8(offset-1)+4] = value;
	944	if(fp2[8(offset-1)+5] < value) fp2[8(offset-1)+5] = value;
	945	sum += rvvalue;
	946	sumsq += rvpow(value, 2.0);
	947	return rv;
	948	} else {
	949	fp2[8(offset-1)+6] = sum;
	950	fp2[8(offset-1)+7] = sumsq;
	951	*sum = 0.0;
	952	*sumsq = 0.0;
	953	}
	954	}
	955
	956	//If we move to a new interval then skip iterating over a bunch of obviously non-overlapping intervals
	957	if(offset && p2[8*offset+2] == 0) {
	958	p2[8*offset] = tid;
	959	p2[8*offset+1] = start;
	960	if(start+size < end) p2[8*offset+2] = start+size;
	961	else p2[8*offset+2] = end;
	962	//nextPos(fp, size, p2+8*offset, tid); //We can actually skip uncovered intervals
	963	}
	964
	965	//Add a new entry
	966	while(!(rv = overlapsInterval(p2[8offset], p2[8offset+1], p2[8*offset+1] + size, tid, start, end))) {
	967	p2[8*offset] = tid;
	968	p2[8*offset+1] = start;
	969	if(start+size < end) p2[8*offset+2] = start+size;
	970	else p2[8*offset+2] = end;
	971	//nextPos(fp, size, p2+8*offset, tid);
	972	}
	973	p2[8*offset+3] = rv;
	974	fp2[8*offset+4] = value; //min
	975	fp2[8*offset+5] = value; //max
	976	sum += rv value;
	977	sumsq += rv pow(value,2.0);
	978	buffer->l += 32;
	979	return rv;
	980	}
	981
	982	//Returns 0 on success
	983	int addIntervalValue(bigWigFile_t fp, uint64_t nEntries, double sum, double sumsq, bwZoomBuffer_t *buffer, uint32_t itemsPerSlot, uint32_t zoom, uint32_t tid, uint32_t start, uint32_t end, float value) {
	984	bwZoomBuffer_t *newBuffer = NULL;
	985	uint32_t rv;
	986
	987	while(start < end) {
	988	rv = updateInterval(fp, buffer, sum, sumsq, zoom, tid, start, end, value);
	989	if(!rv) {
	990	//Allocate a new buffer
	991	newBuffer = calloc(1, sizeof(bwZoomBuffer_t));
	992	if(!newBuffer) return 1;
	993	newBuffer->p = calloc(itemsPerSlot, 32);
	994	if(!newBuffer->p) goto error;
	995	newBuffer->m = itemsPerSlot*32;
	996	memcpy(newBuffer->p, buffer->p+buffer->l-32, 4);
	997	memcpy(newBuffer->p+4, buffer->p+buffer->l-28, 4);
	998	((uint32_t) newBuffer->p)[2] = ((uint32_t) newBuffer->p)[1] + zoom;
	999	sum = sumsq = 0.0;
	1000	rv = updateInterval(fp, newBuffer, sum, sumsq, zoom, tid, start, end, value);
	1001	if(!rv) goto error;
	1002	buffer->next = newBuffer;
	1003	buffer = buffer->next;
	1004	*nEntries += 1;
	1005	}
	1006	start += rv;
	1007	}
	1008
	1009	return 0;
	1010
	1011	error:
	1012	if(newBuffer) {
	1013	if(newBuffer->m) free(newBuffer->p);
	1014	free(newBuffer);
	1015	}
	1016	return 2;
	1017	}
	1018
	1019	//Get all of the intervals and add them to the appropriate zoomBuffer
	1020	int constructZoomLevels(bigWigFile_t *fp) {
	1021	bwOverlappingIntervals_t *intervals = NULL;
	1022	double sum = NULL, sumsq = NULL;
	1023	uint32_t i, j, k;
	1024
	1025	sum = calloc(fp->hdr->nLevels, sizeof(double));
	1026	sumsq = calloc(fp->hdr->nLevels, sizeof(double));
	1027	if(!sum \|\| !sumsq) goto error;
	1028
	1029	for(i=0; i<fp->cl->nKeys; i++) {
	1030	intervals = bwGetOverlappingIntervals(fp, fp->cl->chrom[i], 0, fp->cl->len[i]);
	1031	if(!intervals) goto error;
	1032	for(j=0; j<intervals->l; j++) {
	1033	for(k=0; k<fp->hdr->nLevels; k++) {
	1034	if(addIntervalValue(fp, &(fp->writeBuffer->nNodes[k]), sum+k, sumsq+k, fp->writeBuffer->lastZoomBuffer[k], fp->hdr->bufSize/32, fp->hdr->zoomHdrs->level[k], i, intervals->start[j], intervals->end[j], intervals->value[j])) goto error;
	1035	while(fp->writeBuffer->lastZoomBuffer[k]->next) fp->writeBuffer->lastZoomBuffer[k] = fp->writeBuffer->lastZoomBuffer[k]->next;
	1036	}
	1037	}
	1038	bwDestroyOverlappingIntervals(intervals);
	1039	}
	1040
	1041	//Make an index for each zoom level
	1042	for(i=0; i<fp->hdr->nLevels; i++) {
	1043	fp->hdr->zoomHdrs->idx[i] = calloc(1, sizeof(bwRTree_t));
	1044	if(!fp->hdr->zoomHdrs->idx[i]) return 1;
	1045	fp->hdr->zoomHdrs->idx[i]->blockSize = fp->writeBuffer->blockSize;
	1046	}
	1047
	1048	free(sum);
	1049	free(sumsq);
	1050
	1051	return 0;
	1052
	1053	error:
	1054	if(intervals) bwDestroyOverlappingIntervals(intervals);
	1055	if(sum) free(sum);
	1056	if(sumsq) free(sumsq);
	1057	return 1;
	1058	}
	1059
	1060	int writeZoomLevels(bigWigFile_t *fp) {
	1061	uint64_t offset1, offset2, idxSize = 0;
	1062	uint32_t i, j, four = 0, last, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0;
	1063	uint8_t wrote, one = 0;
	1064	uint16_t actualNLevels = 0;
	1065	int rv;
	1066	bwLL ll, p;
	1067	bwRTreeNode_t *root;
	1068	bwZoomBuffer_t zb, zb2;
	1069	bwWriteBuffer_t *wb = fp->writeBuffer;
	1070	uLongf sz;
	1071
	1072	for(i=0; i<fp->hdr->nLevels; i++) {
	1073	if(i) {
	1074	//Is this a duplicate level?
	1075	if(fp->writeBuffer->nNodes[i] == fp->writeBuffer->nNodes[i-1]) break;
	1076	}
	1077	actualNLevels++;
	1078
	1079	//reserve a uint32_t for the number of blocks
	1080	fp->hdr->zoomHdrs->dataOffset[i] = bwTell(fp);
	1081	fp->writeBuffer->nBlocks = 0;
	1082	fp->writeBuffer->l = 24;
	1083	if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 1;
	1084	zb = fp->writeBuffer->firstZoomBuffer[i];
	1085	fp->writeBuffer->firstIndexNode = NULL;
	1086	fp->writeBuffer->currentIndexNode = NULL;
	1087	while(zb) {
	1088	sz = fp->hdr->bufSize;
	1089	if(compress(wb->compressP, &sz, zb->p, zb->l) != Z_OK) return 2;
	1090
	1091	//write the data to disk
	1092	if(fwrite(wb->compressP, sizeof(uint8_t), sz, fp->URL->x.fp) != sz) return 3;
	1093
	1094	//Add an entry into the index
	1095	last = (zb->l - 32)>>2;
	1096	if(addIndexEntry(fp, ((uint32_t)zb->p)[0], ((uint32_t)zb->p)[last], ((uint32_t)zb->p)[1], ((uint32_t)zb->p)[last+2], bwTell(fp)-sz, sz)) return 4;
	1097
	1098	wb->nBlocks++;
	1099	wb->l = 24;
	1100	zb = zb->next;
	1101	}
	1102	if(writeAtPos(&(wb->nBlocks), sizeof(uint32_t), 1, fp->hdr->zoomHdrs->dataOffset[i], fp->URL->x.fp)) return 5;
	1103
	1104	//Make the tree
	1105	ll = fp->writeBuffer->firstIndexNode;
	1106	if(ll == fp->writeBuffer->currentIndexNode) {
	1107	root = ll->node;
	1108	idxSize = 4 + 24*root->nChildren;
	1109	} else {
	1110	root = addLeaves(&ll, &idxSize, ceil(((double)fp->writeBuffer->nBlocks)/fp->writeBuffer->blockSize), fp->writeBuffer->blockSize);
	1111	}
	1112	if(!root) return 4;
	1113	fp->hdr->zoomHdrs->idx[i]->root = root;
	1114
	1115	ll = fp->writeBuffer->firstIndexNode;
	1116	while(ll) {
	1117	p = ll->next;
	1118	free(ll);
	1119	ll=p;
	1120	}
	1121
	1122
	1123	//write the index
	1124	wrote = 0;
	1125	fp->hdr->zoomHdrs->indexOffset[i] = bwTell(fp);
	1126	four = IDX_MAGIC;
	1127	if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 1;
	1128	root = fp->hdr->zoomHdrs->idx[i]->root;
	1129	if(fwrite(&(fp->writeBuffer->blockSize), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 6;
	1130	if(fwrite(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 7;
	1131	if(fwrite(&(root->chrIdxStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
	1132	if(fwrite(&(root->baseStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
	1133	if(fwrite(&(root->chrIdxEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 10;
	1134	if(fwrite(&(root->baseEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 11;
	1135	if(fwrite(&idxSize, sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 12;
	1136	four = fp->hdr->bufSize/32;
	1137	if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 13;
	1138	four = 0;
	1139	if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 14; //padding
	1140	fp->hdr->zoomHdrs->idx[i]->rootOffset = bwTell(fp);
	1141
	1142	//Write the root node, since writeIndexTree writes the children and fills in the offset
	1143	offset1 = bwTell(fp);
	1144	if(fwrite(&(root->isLeaf), sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 16;
	1145	if(fwrite(&one, sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 17; //one byte of padding
	1146	if(fwrite(&(root->nChildren), sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 18;
	1147	for(j=0; j<root->nChildren; j++) {
	1148	vector[0] = root->chrIdxStart[j];
	1149	vector[1] = root->baseStart[j];
	1150	vector[2] = root->chrIdxEnd[j];
	1151	vector[3] = root->baseEnd[j];
	1152	if(root->isLeaf) {
	1153	//Include the offset and size
	1154	if(fwrite(vector, sizeof(uint32_t), 4, fp->URL->x.fp) != 4) return 19;
	1155	if(fwrite(&(root->dataOffset[j]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 20;
	1156	if(fwrite(&(root->x.size[j]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 21;
	1157	} else {
	1158	if(fwrite(vector, sizeof(uint32_t), 6, fp->URL->x.fp) != 6) return 22;
	1159	}
	1160	}
	1161
	1162	while((rv = writeIndexTreeNode(fp->URL->x.fp, fp->hdr->zoomHdrs->idx[i]->root, &wrote, 0)) == 0) {
	1163	if(!wrote) break;
	1164	wrote = 0;
	1165	}
	1166
	1167	if(rv \|\| wrote) return 6;
	1168
	1169	//Save the file position
	1170	offset2 = bwTell(fp);
	1171
	1172	//Write the offsets
	1173	if(writeIndexOffsets(fp->URL->x.fp, root, offset1)) return 2;
	1174
	1175	//Move the file pointer back to the end
	1176	bwSetPos(fp, offset2);
	1177
	1178
	1179	//Free the linked list
	1180	zb = fp->writeBuffer->firstZoomBuffer[i];
	1181	while(zb) {
	1182	if(zb->p) free(zb->p);
	1183	zb2 = zb->next;
	1184	free(zb);
	1185	zb = zb2;
	1186	}
	1187	fp->writeBuffer->firstZoomBuffer[i] = NULL;
	1188	}
	1189
	1190	//Free unused zoom levels
	1191	for(i=actualNLevels; i<fp->hdr->nLevels; i++) {
	1192	zb = fp->writeBuffer->firstZoomBuffer[i];
	1193	while(zb) {
	1194	if(zb->p) free(zb->p);
	1195	zb2 = zb->next;
	1196	free(zb);
	1197	zb = zb2;
	1198	}
	1199	fp->writeBuffer->firstZoomBuffer[i] = NULL;
	1200	}
	1201
	1202	//Write the zoom headers to disk
	1203	offset1 = bwTell(fp);
	1204	if(bwSetPos(fp, 0x40)) return 7;
	1205	four = 0;
	1206	for(i=0; i<actualNLevels; i++) {
	1207	if(fwrite(&(fp->hdr->zoomHdrs->level[i]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
	1208	if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
	1209	if(fwrite(&(fp->hdr->zoomHdrs->dataOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 10;
	1210	if(fwrite(&(fp->hdr->zoomHdrs->indexOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 11;
	1211	}
	1212
	1213	//Write the number of levels if needed
	1214	if(bwSetPos(fp, 0x6)) return 12;
	1215	if(fwrite(&actualNLevels, sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 13;
	1216
	1217	if(bwSetPos(fp, offset1)) return 14;
	1218
	1219	return 0;
	1220	}
	1221
	1222	//0 on success
	1223	int bwFinalize(bigWigFile_t *fp) {
	1224	uint32_t four;
	1225	uint64_t offset;
	1226	if(!fp->isWrite) return 0;
	1227
	1228	//Flush the buffer
	1229	if(flushBuffer(fp)) return 1; //Valgrind reports a problem here!
	1230
	1231	//Update the data section with the number of blocks written
	1232	if(fp->hdr) {
	1233	if(writeAtPos(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->hdr->dataOffset, fp->URL->x.fp)) return 2;
	1234	} else {
	1235	//The header wasn't written!
	1236	return 1;
	1237	}
	1238
	1239	//write the bufferSize
	1240	if(fp->hdr->bufSize) {
	1241	if(writeAtPos(&(fp->hdr->bufSize), sizeof(uint32_t), 1, 0x34, fp->URL->x.fp)) return 2;
	1242	}
	1243
	1244	//write the summary information
	1245	if(writeSummary(fp)) return 3;
	1246
	1247	//Convert the linked-list to a tree and write to disk
	1248	if(writeIndex(fp)) return 4;
	1249
	1250	//Zoom level stuff here?
	1251	if(fp->hdr->nLevels && fp->writeBuffer->nBlocks) {
	1252	offset = bwTell(fp);
	1253	if(makeZoomLevels(fp)) return 5;
	1254	if(constructZoomLevels(fp)) return 6;
	1255	bwSetPos(fp, offset);
	1256	if(writeZoomLevels(fp)) return 7; //This write nLevels as well
	1257	}
	1258
	1259	//write magic at the end of the file
	1260	four = BIGWIG_MAGIC;
	1261	if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
	1262
	1263	return 0;
	1264	}
	1265
	1266	/*
	1267	data chunk:
	1268	uint64_t number of blocks (2 / 110851)
	1269	some blocks
	1270
	1271	an uncompressed data block (24 byte header)
	1272	uint32_t Tid 0-4
	1273	uint32_t start 4-8
	1274	uint32_t end 8-12
	1275	uint32_t step 12-16
	1276	uint32_t span 16-20
	1277	uint8_t type 20
	1278	uint8_t padding
	1279	uint16_t nItems 22
	1280	nItems of:
	1281	type 1: //12 bytes
	1282	uint32_t start
	1283	uint32_t end
	1284	float value
	1285	type 2: //8 bytes
	1286	uint32_t start
	1287	float value
	1288	type 3: //4 bytes
	1289	float value
	1290
	1291	data block index header
	1292	uint32_t magic
	1293	uint32_t blockSize (256 in the example) maximum number of children
	1294	uint64_t number of blocks (2 / 110851)
	1295	uint32_t startTid
	1296	uint32_t startPos
	1297	uint32_t endTid
	1298	uint32_t endPos
	1299	uint64_t index size? (0x1E7 / 0x1AF0401F) index address?
	1300	uint32_t itemsPerBlock (1 / 1) 1024 for zoom headers 1024 for zoom headers
	1301	uint32_t padding
	1302
	1303	data block index node non-leaf (4 bytes + 24*nChildren)
	1304	uint8_t isLeaf
	1305	uint8_t padding
	1306	uint16_t nChildren (2, 256)
	1307	uint32_t startTid
	1308	uint32_t startPos
	1309	uint32_t endTid
	1310	uint32_t endPos
	1311	uint64_t dataOffset (0x1AF05853, 0x1AF07057)
	1312
	1313	data block index node leaf (4 bytes + 32*nChildren)
	1314	uint8_t isLeaf
	1315	uint8_t padding
	1316	uint16_t nChildren (2)
	1317	uint32_t startTid
	1318	uint32_t startPos
	1319	uint32_t endTid
	1320	uint32_t endPos
	1321	uint64_t dataOffset (0x198, 0x1CF)
	1322	uint64_t dataSize (55, 24)
	1323
	1324	zoom data block
	1325	uint32_t number of blocks (10519766)
	1326	some data blocks
	1327	*/

+296

-0

libBigWig/io.c less more

	0	#ifndef NOCURL
	1	#include <curl/curl.h>
	2	#endif
	3	#include <stdio.h>
	4	#include <stdlib.h>
	5	#include <string.h>
	6	#include <unistd.h>
	7	#include "bigWigIO.h"
	8	#include <inttypes.h>
	9	#include <errno.h>
	10
	11	size_t GLOBAL_DEFAULTBUFFERSIZE;
	12
	13	#ifndef NOCURL
	14	uint64_t getContentLength(URL_t *URL) {
	15	double size;
	16	if(curl_easy_getinfo(URL->x.curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &size) != CURLE_OK) {
	17	return 0;
	18	}
	19	if(size== -1.0) return 0;
	20	return (uint64_t) size;
	21	}
	22
	23	//Fill the buffer, note that URL may be left in an unusable state on error!
	24	CURLcode urlFetchData(URL_t *URL, unsigned long bufSize) {
	25	CURLcode rv;
	26	char range[1024];
	27
	28	if(URL->filePos != (size_t) -1) URL->filePos += URL->bufLen;
	29	else URL->filePos = 0;
	30
	31	URL->bufPos = URL->bufLen = 0; //Otherwise, we can't copy anything into the buffer!
	32	sprintf(range,"%lu-%lu", URL->filePos, URL->filePos+bufSize-1);
	33	rv = curl_easy_setopt(URL->x.curl, CURLOPT_RANGE, range);
	34	if(rv != CURLE_OK) {
	35	fprintf(stderr, "[urlFetchData] Couldn't set the range (%s)\n", range);
	36	return rv;
	37	}
	38
	39	rv = curl_easy_perform(URL->x.curl);
	40	errno = 0; //Sometimes curl_easy_perform leaves a random errno remnant
	41	return rv;
	42	}
	43
	44	//Read data into a buffer, ideally from a buffer already in memory
	45	//The loop is likely no longer needed.
	46	size_t url_fread(void obuf, size_t obufSize, URL_t URL) {
	47	size_t remaining = obufSize, fetchSize;
	48	void *p = obuf;
	49	CURLcode rv;
	50
	51	while(remaining) {
	52	if(!URL->bufLen) {
	53	rv = urlFetchData(URL, URL->bufSize);
	54	if(rv != CURLE_OK) {
	55	fprintf(stderr, "[url_fread] urlFetchData (A) returned %s\n", curl_easy_strerror(rv));
	56	return 0;
	57	}
	58	} else if(URL->bufLen < URL->bufPos + remaining) { //Copy the remaining buffer and reload the buffer as needed
	59	p = memcpy(p, URL->memBuf+URL->bufPos, URL->bufLen - URL->bufPos);
	60	if(!p) return 0;
	61	p += URL->bufLen - URL->bufPos;
	62	remaining -= URL->bufLen - URL->bufPos;
	63	if(remaining) {
	64	if(!URL->isCompressed) {
	65	fetchSize = URL->bufSize;
	66	} else {
	67	fetchSize = (remaining<URL->bufSize)?remaining:URL->bufSize;
	68	}
	69	rv = urlFetchData(URL, fetchSize);
	70	if(rv != CURLE_OK) {
	71	fprintf(stderr, "[url_fread] urlFetchData (B) returned %s\n", curl_easy_strerror(rv));
	72	return 0;
	73	}
	74	}
	75	} else {
	76	p = memcpy(p, URL->memBuf+URL->bufPos, remaining);
	77	if(!p) return 0;
	78	URL->bufPos += remaining;
	79	remaining = 0;
	80	}
	81	}
	82	return obufSize;
	83	}
	84	#endif
	85
	86	//Returns the number of bytes requested or a smaller number on error
	87	//Note that in the case of remote files, the actual amount read may be less than the return value!
	88	size_t urlRead(URL_t URL, void buf, size_t bufSize) {
	89	#ifndef NOCURL
	90	if(URL->type==0) {
	91	return fread(buf, bufSize, 1, URL->x.fp)*bufSize;
	92	} else {
	93	return url_fread(buf, bufSize, URL);
	94	}
	95	#else
	96	return fread(buf, bufSize, 1, URL->x.fp)*bufSize;
	97	#endif
	98	}
	99
	100	size_t bwFillBuffer(void inBuf, size_t l, size_t nmemb, void pURL) {
	101	URL_t URL = (URL_t) pURL;
	102	void *p = URL->memBuf;
	103	size_t copied = l*nmemb;
	104	if(!p) return 0;
	105
	106	p += URL->bufLen;
	107	if(l*nmemb > URL->bufSize - URL->bufPos) { //We received more than we can store!
	108	copied = URL->bufSize - URL->bufLen;
	109	}
	110	memcpy(p, inBuf, copied);
	111	URL->bufLen += copied;
	112
	113	if(!URL->memBuf) return 0; //signal error
	114	return copied;
	115	}
	116
	117	//Seek to an arbitrary location, returning a CURLcode
	118	//Note that a local file returns CURLE_OK on success or CURLE_FAILED_INIT on any error;
	119	CURLcode urlSeek(URL_t *URL, size_t pos) {
	120	#ifndef NOCURL
	121	char range[1024];
	122	CURLcode rv;
	123
	124	if(URL->type == BWG_FILE) {
	125	#endif
	126	if(fseek(URL->x.fp, pos, SEEK_SET) == 0) {
	127	errno = 0;
	128	return CURLE_OK;
	129	} else {
	130	return CURLE_FAILED_INIT; //This is arbitrary
	131	}
	132	#ifndef NOCURL
	133	} else {
	134	//If the location is covered by the buffer then don't seek!
	135	if(pos < URL->filePos \|\| pos >= URL->filePos+URL->bufLen) {
	136	URL->filePos = pos;
	137	URL->bufLen = 0; //Otherwise, filePos will get incremented on the next read!
	138	URL->bufPos = 0;
	139	//Maybe this works for FTP?
	140	sprintf(range,"%lu-%lu", pos, pos+URL->bufSize-1);
	141	rv = curl_easy_setopt(URL->x.curl, CURLOPT_RANGE, range);
	142	if(rv != CURLE_OK) {
	143	fprintf(stderr, "[urlSeek] Couldn't set the range (%s)\n", range);
	144	return rv;
	145	}
	146	rv = curl_easy_perform(URL->x.curl);
	147	if(rv != CURLE_OK) {
	148	fprintf(stderr, "[urlSeek] curl_easy_perform received an error!\n");
	149	}
	150	errno = 0; //Don't propogate remnant resolved libCurl errors
	151	return rv;
	152	} else {
	153	URL->bufPos = pos-URL->filePos;
	154	return CURLE_OK;
	155	}
	156	}
	157	#endif
	158	}
	159
	160	URL_t urlOpen(char fname, CURLcode (callBack)(CURL), const char *mode) {
	161	URL_t *URL = calloc(1, sizeof(URL_t));
	162	if(!URL) return NULL;
	163	char url = NULL, req = NULL;
	164	#ifndef NOCURL
	165	CURLcode code;
	166	char range[1024];
	167	#endif
	168
	169	URL->fname = fname;
	170
	171	if((!mode) \|\| (strchr(mode, 'w') == 0)) {
	172	//Set the protocol
	173	#ifndef NOCURL
	174	if(strncmp(fname, "http://", 7) == 0) URL->type = BWG_HTTP;
	175	else if(strncmp(fname, "https://", 8) == 0) URL->type = BWG_HTTPS;
	176	else if(strncmp(fname, "ftp://", 6) == 0) URL->type = BWG_FTP;
	177	else URL->type = BWG_FILE;
	178	#else
	179	URL->type = BWG_FILE;
	180	#endif
	181
	182	//local file?
	183	if(URL->type == BWG_FILE) {
	184	URL->filePos = -1; //This signals that nothing has been read
	185	URL->x.fp = fopen(fname, "rb");
	186	if(!(URL->x.fp)) {
	187	free(URL);
	188	fprintf(stderr, "[urlOpen] Couldn't open %s for reading\n", fname);
	189	return NULL;
	190	}
	191	#ifndef NOCURL
	192	} else {
	193	//Remote file, set up the memory buffer and get CURL ready
	194	URL->memBuf = malloc(GLOBAL_DEFAULTBUFFERSIZE);
	195	if(!(URL->memBuf)) {
	196	free(URL);
	197	fprintf(stderr, "[urlOpen] Couldn't allocate enough space for the file buffer!\n");
	198	return NULL;
	199	}
	200	URL->bufSize = GLOBAL_DEFAULTBUFFERSIZE;
	201	URL->x.curl = curl_easy_init();
	202	if(!(URL->x.curl)) {
	203	fprintf(stderr, "[urlOpen] curl_easy_init() failed!\n");
	204	goto error;
	205	}
	206	//Negotiate a reasonable HTTP authentication method
	207	if(curl_easy_setopt(URL->x.curl, CURLOPT_HTTPAUTH, CURLAUTH_ANY) != CURLE_OK) {
	208	fprintf(stderr, "[urlOpen] Failed instructing curl to use any HTTP authentication it finds to be suitable!\n");
	209	goto error;
	210	}
	211	//Follow redirects
	212	if(curl_easy_setopt(URL->x.curl, CURLOPT_FOLLOWLOCATION, 1L) != CURLE_OK) {
	213	fprintf(stderr, "[urlOpen] Failed instructing curl to follow redirects!\n");
	214	goto error;
	215	}
	216	//Set the URL
	217	if(curl_easy_setopt(URL->x.curl, CURLOPT_URL, fname) != CURLE_OK) {
	218	fprintf(stderr, "[urlOpen] Couldn't set CURLOPT_URL!\n");
	219	goto error;
	220	}
	221	//Set the range, which doesn't do anything for HTTP
	222	sprintf(range, "0-%lu", URL->bufSize-1);
	223	if(curl_easy_setopt(URL->x.curl, CURLOPT_RANGE, range) != CURLE_OK) {
	224	fprintf(stderr, "[urlOpen] Couldn't set CURLOPT_RANGE (%s)!\n", range);
	225	goto error;
	226	}
	227	//Set the callback info, which means we no longer need to directly deal with sockets and header!
	228	if(curl_easy_setopt(URL->x.curl, CURLOPT_WRITEFUNCTION, bwFillBuffer) != CURLE_OK) {
	229	fprintf(stderr, "[urlOpen] Couldn't set CURLOPT_WRITEFUNCTION!\n");
	230	goto error;
	231	}
	232	if(curl_easy_setopt(URL->x.curl, CURLOPT_WRITEDATA, (void*)URL) != CURLE_OK) {
	233	fprintf(stderr, "[urlOpen] Couldn't set CURLOPT_WRITEDATA!\n");
	234	goto error;
	235	}
	236	//Ignore certificate errors with https, libcurl just isn't reliable enough with conda
	237	if(curl_easy_setopt(URL->x.curl, CURLOPT_SSL_VERIFYPEER, 0) != CURLE_OK) {
	238	fprintf(stderr, "[urlOpen] Couldn't set CURLOPT_SSL_VERIFYPEER to 0!\n");
	239	goto error;
	240	}
	241	if(curl_easy_setopt(URL->x.curl, CURLOPT_SSL_VERIFYHOST, 0) != CURLE_OK) {
	242	fprintf(stderr, "[urlOpen] Couldn't set CURLOPT_SSL_VERIFYHOST to 0!\n");
	243	goto error;
	244	}
	245	if(callBack) {
	246	code = callBack(URL->x.curl);
	247	if(code != CURLE_OK) {
	248	fprintf(stderr, "[urlOpen] The user-supplied call back function returned an error: %s\n", curl_easy_strerror(code));
	249	goto error;
	250	}
	251	}
	252	code = curl_easy_perform(URL->x.curl);
	253	errno = 0; //Sometimes curl_easy_perform leaves a random errno remnant
	254	if(code != CURLE_OK) {
	255	fprintf(stderr, "[urlOpen] curl_easy_perform received an error: %s\n", curl_easy_strerror(code));
	256	goto error;
	257	}
	258	#endif
	259	}
	260	} else {
	261	URL->type = BWG_FILE;
	262	URL->x.fp = fopen(fname, mode);
	263	if(!(URL->x.fp)) {
	264	free(URL);
	265	fprintf(stderr, "[urlOpen] Couldn't open %s for writing\n", fname);
	266	return NULL;
	267	}
	268	}
	269	if(url) free(url);
	270	if(req) free(req);
	271	return URL;
	272
	273	#ifndef NOCURL
	274	error:
	275	if(url) free(url);
	276	if(req) free(req);
	277	free(URL->memBuf);
	278	curl_easy_cleanup(URL->x.curl);
	279	free(URL);
	280	return NULL;
	281	#endif
	282	}
	283
	284	//Performs the necessary free() operations and handles cleaning up curl
	285	void urlClose(URL_t *URL) {
	286	if(URL->type == BWG_FILE) {
	287	fclose(URL->x.fp);
	288	#ifndef NOCURL
	289	} else {
	290	free(URL->memBuf);
	291	curl_easy_cleanup(URL->x.curl);
	292	#endif
	293	}
	294	free(URL);
	295	}

+46

-19

pyBigWig.c less more

196	196
197	197	//Return 1 if there are any entries at all
198	198	int hasEntries(bigWigFile_t *bw) {
199		if(bw->hdr->nBasesCovered > 0) return 1;
	199	if(bw->hdr->indexOffset != 0) return 1; // No index, no entries pyBigWig issue #111
	200	//if(bw->hdr->nBasesCovered > 0) return 1; // Sometimes headers are broken
200	201	return 0;
201	202	}
202	203

377	378	double *val;
378	379	uint32_t start, end = -1, tid;
379	380	unsigned long startl = 0, endl = -1;
380		static char *kwd_list[] = {"chrom", "start", "end", "type", "nBins", "exact", NULL};
	381	static char *kwd_list[] = {"chrom", "start", "end", "type", "nBins", "exact", "numpy", NULL};
381	382	char chrom, type = "mean";
382	383	PyObject ret, exact = Py_False, starto = NULL, endo = NULL;
	384	PyObject *outputNumpy = Py_False;
383	385	int i, nBins = 1;
384	386	errno = 0; //In the off-chance that something elsewhere got an error and didn't clear it...
385	387

398	400	return NULL;
399	401	}
400	402
401		if(!PyArg_ParseTupleAndKeywords(args, kwds, "s\|OOsiO", kwd_list, &chrom, &starto, &endo, &type, &nBins, &exact)) {
	403	if(!PyArg_ParseTupleAndKeywords(args, kwds, "s\|OOsiOO", kwd_list, &chrom, &starto, &endo, &type, &nBins, &exact, &outputNumpy)) {
402	404	PyErr_SetString(PyExc_RuntimeError, "You must supply at least a chromosome!");
403	405	return NULL;
404	406	}

463	465
464	466	//Return a list of None if there are no entries at all
465	467	if(!hasEntries(bw)) {
466		ret = PyList_New(nBins);
467		for(i=0; i<nBins; i++) {
468		Py_INCREF(Py_None);
469		PyList_SetItem(ret, i, Py_None);
470		}
	468	#ifdef WITHNUMPY
	469	if(outputNumpy == Py_True) {
	470	val = malloc(sizeof(double)*nBins);
	471	for(i=0; i<nBins; i++) {
	472	val[i] = NPY_NAN;
	473	}
	474	npy_intp len = nBins;
	475	ret = PyArray_SimpleNewFromData(1, &len, NPY_FLOAT64, (void *) val);
	476	//This will break if numpy ever stops using malloc!
	477	PyArray_ENABLEFLAGS((PyArrayObject*) ret, NPY_ARRAY_OWNDATA);
	478	} else {
	479	#endif
	480	ret = PyList_New(nBins);
	481	for(i=0; i<nBins; i++) {
	482	Py_INCREF(Py_None);
	483	PyList_SetItem(ret, i, Py_None);
	484	}
	485	#ifdef WITHNUMPY
	486	}
	487	#endif
471	488	return ret;
472	489	}
473	490

483	500	return NULL;
484	501	}
485	502
486		ret = PyList_New(nBins);
487		for(i=0; i<nBins; i++) {
488		if(isnan(val[i])) {
489		Py_INCREF(Py_None);
490		PyList_SetItem(ret, i, Py_None);
491		} else {
492		PyList_SetItem(ret, i, PyFloat_FromDouble(val[i]));
493		}
494		}
495		free(val);
496
	503	#ifdef WITHNUMPY
	504	if(outputNumpy == Py_True) {
	505	npy_intp len = nBins;
	506	ret = PyArray_SimpleNewFromData(1, &len, NPY_FLOAT64, (void *) val);
	507	//This will break if numpy ever stops using malloc!
	508	PyArray_ENABLEFLAGS((PyArrayObject*) ret, NPY_ARRAY_OWNDATA);
	509	} else {
	510	#endif
	511	ret = PyList_New(nBins);
	512	for(i=0; i<nBins; i++) {
	513	if(isnan(val[i])) {
	514	Py_INCREF(Py_None);
	515	PyList_SetItem(ret, i, Py_None);
	516	} else {
	517	PyList_SetItem(ret, i, PyFloat_FromDouble(val[i]));
	518	}
	519	}
	520	free(val);
	521	#ifdef WITHNUMPY
	522	}
	523	#endif
497	524	return ret;
498	525	}
499	526

+22

-0

pyBigWig.egg-info/SOURCES.txt less more

	0	LICENSE.txt
	1	MANIFEST.in
	2	README.md
	3	pyBigWig.c
	4	pyBigWig.h
	5	setup.cfg
	6	setup.py
	7	libBigWig/LICENSE
	8	libBigWig/README.md
	9	libBigWig/bigWig.h
	10	libBigWig/bigWigIO.h
	11	libBigWig/bwCommon.h
	12	libBigWig/bwRead.c
	13	libBigWig/bwStats.c
	14	libBigWig/bwValues.c
	15	libBigWig/bwValues.h
	16	libBigWig/bwWrite.c
	17	libBigWig/io.c
	18	pyBigWigTest/__init__.py
	19	pyBigWigTest/test.bigBed
	20	pyBigWigTest/test.bw
	21	pyBigWigTest/test.py⏎

-1

pyBigWig.h less more

1	1	#include <structmember.h>
2	2	#include "bigWig.h"
3	3
4		#define pyBigWigVersion "0.3.17"
	4	#define pyBigWigVersion "0.3.18"
5	5
6	6	typedef struct {
7	7	PyObject_HEAD

+20

-0

pyBigWigTest/test.py less more

307	307
308	308	bw.close()
309	309	os.remove("/tmp/delete.bw")
	310
	311	def testNumpyValues(self):
	312	if pyBigWig.numpy == 0:
	313	return 0
	314	import numpy as np
	315
	316	fname = "http://raw.githubusercontent.com/dpryan79/pyBigWig/master/pyBigWigTest/test.bw"
	317	bw = pyBigWig.open(fname, "r")
	318
	319	assert np.allclose(
	320	bw.values("1", 0, 20, numpy=True),
	321	np.array(bw.values("1", 0, 20), dtype=np.float32),
	322	equal_nan=True
	323	)
	324
	325	assert np.allclose(
	326	bw.stats("1", 0, 20, "mean", 5, numpy=True),
	327	np.array(bw.stats("1", 0, 20, "mean", 5), dtype=np.float64),
	328	equal_nan=True
	329	)

-0

setup.cfg less more

0	0	[metadata]
1	1	description-file = README.md
	2
	3	[egg_info]
	4	tag_build =
	5	tag_date = 0
	6

-1

setup.py less more

61	61	include_dirs = include_dirs)
62	62
63	63	setup(name = 'pyBigWig',
64		version = '0.3.17',
	64	version = '0.3.18',
65	65	description = 'A package for accessing bigWig files using libBigWig',
66	66	author = "Devon P. Ryan",
67	67	author_email = "ryan@ie-freiburg.mpg.de",