/*
* index.c: create and collate index data structures
*/
#include <stdio.h>
#include <stdlib.h>
#include "halibut.h"
static int compare_tags(const void *av, const void *bv, void *cmpctx);
static int compare_entries(const void *av, const void *bv, void *cmpctx);
indexdata *make_index(void) {
indexdata *ret = snew(indexdata);
ret->tags = newtree234(compare_tags, NULL);
ret->entries = newtree234(compare_entries, NULL);
return ret;
}
static indextag *make_indextag(void) {
indextag *ret = snew(indextag);
ret->name = NULL;
ret->implicit_text = NULL;
ret->explicit_texts = NULL;
ret->explicit_fpos = NULL;
ret->nexplicit = ret->explicit_size = ret->nrefs = 0;
ret->refs = NULL;
return ret;
}
static int compare_tags(const void *av, const void *bv, void *cmpctx) {
const indextag *a = (const indextag *)av, *b = (const indextag *)bv;
return ustricmp(a->name, b->name);
}
static int compare_to_find_tag(const void *av, const void *bv, void *cmpctx) {
const wchar_t *a = (const wchar_t *)av;
const indextag *b = (const indextag *)bv;
return ustricmp(a, b->name);
}
static int compare_entries(const void *av, const void *bv, void *cmpctx) {
indexentry *a = (indexentry *)av, *b = (indexentry *)bv;
return compare_wordlists(a->text, b->text);
}
/*
* Back-end utility: find the indextag with a given name.
*/
indextag *index_findtag(indexdata *idx, wchar_t *name) {
return findcmp234(idx->tags, name, compare_to_find_tag, NULL);
}
/*
* Add a \IM. `tags' points to a zero-terminated chain of
* zero-terminated strings ("first\0second\0thirdandlast\0\0").
* `text' points to a word list.
*
* Guarantee on calling sequence: all implicit merges are given
* before the explicit ones.
*/
void index_merge(indexdata *idx, bool is_explicit, wchar_t *tags, word *text,
filepos *fpos, errorstate *es) {
indextag *t, *existing;
/*
* For an implicit merge, we want to remove all emphasis,
* because the chances are that the user didn't really want to
* index the term as emphasised.
*/
{
word *w;
for (w = text; w; w = w->next) {
if (w->type == word_Emph || w->type == word_Strong)
w->type = word_Normal;
else if (w->type == word_EmphSpace || w->type == word_StrongSpace)
w->type = word_WhiteSpace;
else if (w->type == word_EmphQuote || w->type == word_StrongQuote)
w->type = word_Quote;
}
}
/*
* FIXME: want to warn on overlapping source sets.
*/
for (; *tags; tags = uadv(tags)) {
t = make_indextag();
t->name = tags;
existing = add234(idx->tags, t);
if (existing == t) {
/*
* Duplicate this so we can free it independently.
*/
t->name = ustrdup(tags);
/*
* Every tag has an implicit \IM. So if this tag
* doesn't exist and we're explicit, then we should
* warn (and drop it, since it won't be referenced).
*/
if (is_explicit) {
err_nosuchidxtag(es, fpos, tags);
continue;
}
/*
* Otherwise, this is a new tag with an implicit \IM.
*/
t->implicit_text = text;
t->implicit_fpos = *fpos;
} else {
if (!is_explicit) {
/*
* An implicit \IM for a tag that's had an implicit
* \IM before. FIXME: we should check the text
* against the existing text and warn on
* differences. And check the tag for case match
* against the existing tag, likewise.
*/
/*
* Check the tag against its previous occurrence to
* see if the cases match.
*/
if (ustrcmp(t->name, existing->name)) {
err_indexcase(es, fpos, t->name,
&existing->implicit_fpos, existing->name);
}
sfree(t);
} else {
/*
* An explicit \IM added to a valid tag. In
* particular, this removes the implicit \IM if
* present.
*/
sfree(t);
t = existing;
if (t->implicit_text) {
free_word_list(t->implicit_text);
t->implicit_text = NULL;
}
if (t->nexplicit >= t->explicit_size) {
t->explicit_size = t->nexplicit + 8;
t->explicit_texts = sresize(t->explicit_texts,
t->explicit_size, word *);
t->explicit_fpos = sresize(t->explicit_fpos,
t->explicit_size, filepos);
}
t->explicit_texts[t->nexplicit] = text;
t->explicit_fpos[t->nexplicit] = *fpos;
t->nexplicit++;
}
}
}
}
/*
* Build the final-form index. We now have every tag, with every
* \IM, set up in a 2-3 tree indexed by tag. We now want to collate
* the RHSes of the \IMs, and sort by final form, and decorate the
* entries in the original 2-3 tree with pointers to the RHS
* entries.
*/
void build_index(indexdata *i) {
indextag *t;
word **ta;
filepos *fa;
int ti;
int j;
for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
if (t->implicit_text) {
t->nrefs = 1;
ta = &t->implicit_text;
fa = &t->implicit_fpos;
} else {
t->nrefs = t->nexplicit;
ta = t->explicit_texts;
fa = t->explicit_fpos;
}
if (t->nrefs) {
t->refs = snewn(t->nrefs, indexentry *);
for (j = 0; j < t->nrefs; j++) {
indexentry *ent = snew(indexentry);
ent->text = *ta++;
ent->fpos = *fa++;
t->refs[j] = add234(i->entries, ent);
if (t->refs[j] != ent) /* duplicate */
sfree(ent);
}
}
}
}
void cleanup_index(indexdata *i) {
indextag *t;
indexentry *ent;
int ti;
for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
sfree(t->name);
free_word_list(t->implicit_text);
sfree(t->explicit_texts);
sfree(t->refs);
sfree(t);
}
freetree234(i->tags);
for (ti = 0; (ent = (indexentry *)index234(i->entries, ti))!=NULL; ti++) {
sfree(ent);
}
freetree234(i->entries);
sfree(i);
}
static void dbg_prtwordlist(int level, word *w);
static void dbg_prtmerge(bool is_explicit, wchar_t *tag, word *text);
void index_debug(indexdata *i) {
indextag *t;
indexentry *y;
int ti;
int j;
printf("\nINDEX TAGS\n==========\n\n");
for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
printf("\n");
if (t->implicit_text)
dbg_prtmerge(0, t->name, t->implicit_text);
for (j = 0; j < t->nexplicit; j++)
dbg_prtmerge(1, t->name, t->explicit_texts[j]);
}
printf("\nINDEX ENTRIES\n=============\n\n");
for (ti = 0; (y = (indexentry *)index234(i->entries, ti)) != NULL; ti++) {
printf("\n");
printf("{\n");
dbg_prtwordlist(1, y->text);
printf("}\n");
}
}
static void dbg_prtmerge(bool is_explicit, wchar_t *tag, word *text) {
printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im");
for (; *tag; tag++)
putchar(*tag);
printf("\" {\n");
dbg_prtwordlist(1, text);
printf("}\n");
}
static void dbg_prtwordlist(int level, word *w) {
for (; w; w = w->next) {
wchar_t *wp;
printf("%*sword %d ", level*4, "", w->type);
if (w->text) {
printf("\"");
for (wp = w->text; *wp; wp++)
putchar(*wp);
printf("\"");
} else
printf("(no text)");
if (w->alt) {
printf(" alt = {\n");
dbg_prtwordlist(level+1, w->alt);
printf("%*s}", level*4, "");
}
printf("\n");
}
}