Codebase list fermi-lite / HEAD htab.c
HEAD

Tree @HEAD (Download .tar.gz)

htab.c @HEADraw · history · blame

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "htab.h"
#include "khash.h"

#define _cnt_eq(a, b) ((a)>>14 == (b)>>14)
#define _cnt_hash(a) ((a)>>14)
KHASH_INIT(cnt, uint64_t, char, 0, _cnt_hash, _cnt_eq)
typedef khash_t(cnt) cnthash_t;

struct bfc_ch_s {
	int k;
	cnthash_t **h;
	// private
	int l_pre;
};

bfc_ch_t *bfc_ch_init(int k, int l_pre)
{
	bfc_ch_t *ch;
	int i;
	assert(k <= 63);
	if (k * 2 - l_pre > BFC_CH_KEYBITS)
		l_pre = k * 2 - BFC_CH_KEYBITS;
	if (l_pre > BFC_CH_MAXPRE) l_pre = BFC_CH_MAXPRE;
	assert(k - l_pre < BFC_CH_KEYBITS);
	ch = calloc(1, sizeof(bfc_ch_t));
	ch->k = k, ch->l_pre = l_pre;
	ch->h = calloc(1<<ch->l_pre, sizeof(void*));
	for (i = 0; i < 1<<ch->l_pre; ++i)
		ch->h[i] = kh_init(cnt);
	return ch;
}

void bfc_ch_destroy(bfc_ch_t *ch)
{
	int i;
	if (ch == 0) return;
	for (i = 0; i < 1<<ch->l_pre; ++i)
		kh_destroy(cnt, ch->h[i]);
	free(ch->h); free(ch);
}

static inline cnthash_t *get_subhash(const bfc_ch_t *ch, const uint64_t x[2], uint64_t *key)
{
	if (ch->k <= 32) {
		int t = ch->k * 2 - ch->l_pre;
		uint64_t z = x[0] << ch->k | x[1];
		*key = (z & ((1ULL<<t) - 1)) << 14 | 1;
		return ch->h[z>>t];
	} else {
		int t = ch->k - ch->l_pre;
		int shift = t + ch->k < BFC_CH_KEYBITS? ch->k : BFC_CH_KEYBITS - t;
		*key = ((x[0] & ((1ULL<<t) - 1)) << shift ^ x[1]) << 14 | 1;
		return ch->h[x[0]>>t];
	}
}

int bfc_ch_insert(bfc_ch_t *ch, const uint64_t x[2], int is_high, int forced)
{
	int absent;
	uint64_t key;
	cnthash_t *h;
	khint_t k;
	h = get_subhash(ch, x, &key);
	if (__sync_lock_test_and_set(&h->lock, 1)) {
		if (forced) // then wait until the hash table is unlocked by the thread using it
			while (__sync_lock_test_and_set(&h->lock, 1))
				while (h->lock); // lock
		else return -1;
	}
	k = kh_put(cnt, h, key, &absent);
	if (absent) {
		if (is_high) kh_key(h, k) |= 1<<8;
	} else {
		if ((kh_key(h, k) & 0xff) != 0xff) ++kh_key(h, k);
		if (is_high && (kh_key(h, k) >> 8 & 0x3f) != 0x3f) kh_key(h, k) += 1<<8;
	}
	__sync_lock_release(&h->lock); // unlock
	return 0;
}

int bfc_ch_get(const bfc_ch_t *ch, const uint64_t x[2])
{
	uint64_t key;
	cnthash_t *h;
	khint_t itr;
	h = get_subhash(ch, x, &key);
	itr = kh_get(cnt, h, key);
	return itr == kh_end(h)? -1 : kh_key(h, itr) & 0x3fff;
}

int bfc_ch_kmer_occ(const bfc_ch_t *ch, const bfc_kmer_t *z)
{
	uint64_t x[2];
	bfc_kmer_hash(ch->k, z->x, x);
	return bfc_ch_get(ch, x);
}

uint64_t bfc_ch_count(const bfc_ch_t *ch)
{
	int i;
	uint64_t cnt = 0;
	for (i = 0; i < 1<<ch->l_pre; ++i)
		cnt += kh_size(ch->h[i]);
	return cnt;
}

int bfc_ch_hist(const bfc_ch_t *ch, uint64_t cnt[256], uint64_t high[64])
{
	int i, max_i = -1;
	uint64_t max;
	memset(cnt, 0, 256 * 8);
	memset(high, 0, 64 * 8);
	for (i = 0; i < 1<<ch->l_pre; ++i) {
		khint_t k;
		cnthash_t *h = ch->h[i];
		for (k = 0; k != kh_end(h); ++k)
			if (kh_exist(h, k))
				++cnt[kh_key(h, k) & 0xff], ++high[kh_key(h, k)>>8 & 0x3f];
	}
	for (i = 3, max = 0; i < 256; ++i)
		if (cnt[i] > max)
			max = cnt[i], max_i = i;
	return max_i;
}

int bfc_ch_get_k(const bfc_ch_t *ch)
{
	return ch->k;
}