Codebase list unrtf / debian/0.21.2-1 src / hash.c
debian/0.21.2-1

Tree @debian/0.21.2-1 (Download .tar.gz)

hash.c @debian/0.21.2-1

0c292bb
 
d5eaff9
0c292bb
 
 
d5eaff9
0c292bb
 
 
 
 
 
 
 
 
d5eaff9
0c292bb
d5eaff9
0c292bb
 
 
 
 
d5eaff9
0c292bb
 
 
 
 
 
 
 
 
 
 
 
 
d5eaff9
 
 
0c292bb
 
d5eaff9
 
 
 
 
0c292bb
d5eaff9
 
 
0c292bb
d5eaff9
0c292bb
 
 
 
 
 
 
 
 
 
d5eaff9
0c292bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5eaff9
 
 
0c292bb
d5eaff9
0c292bb
 
d5eaff9
0c292bb
 
 
 
 
 
 
d5eaff9
0c292bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5eaff9
0c292bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5eaff9
0c292bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5eaff9
0c292bb
 
/*=============================================================================
   GNU UnRTF, a command-line program to convert RTF documents to other formats.
   Copyright (C) 2000,2001,2004 by Zachary Smith

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

   The maintainer is reachable by electronic mail at daved@physiol.usyd.edu.au
=============================================================================*/


/*----------------------------------------------------------------------
 * Module name:    hash
 * Author name:    Zachary Smith
 * Create date:    01 Sep 00
 * Purpose:        Word-hash management. Words are put into a hash and an
 *                 identifier is returned. This is used to save us from
 *                 doing multiple mallocs for recurring strings such as
 *                 'the' and \par. This is not a big issue under Unix,
 *                 but it is under other OSes and anyway, waste not want not.
 *----------------------------------------------------------------------
 * Changes:
 * 08 Apr 01, tuorfa@yahoo.com: check for out of memory after malloc.
 * 21 Apr 01, tuorfa@yahoo.com: signed to conversion unsigned bug
 * 03 Aug 01, tuorfa@yahoo.com: fixes for using 16-bit compiler
 * 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks 
 * 08 Oct 03, daved@physiol.usyd.edu.au: some type fixes
 * 29 Mar 05, daved@physiol.usyd.edu.au: changes requsted by ZT Smith
 * 06 Jan 06, marcossamaral@terra.com.br: changes hash_stats function
 * 16 Dec 07, daved@physiol.usyd.edu.au: updated to GPL v3
 *--------------------------------------------------------------------*/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#ifdef HAVE_STDIO_H
#include <stdio.h>
#endif

#ifdef HAVE_STRING_H
#include <string.h>
#endif

#include "error.h"
#include "main.h"
#include "malloc.h"


typedef struct _hi {
	struct _hi *next;
	char *str;
	unsigned long value;
} HashItem;


/* Index by first char of string */
static HashItem *hash[256];
static unsigned long hash_length[256];
static unsigned long hash_value=0;



/*========================================================================
 * Name:	hash_init
 * Purpose:	Clear the hash table.
 * Args:	None.
 * Returns:	None.
 *=======================================================================*/

void
hash_init ()
{
	int i;
	for (i=0; i<256; i++) {
		hash[i]=NULL;
		hash_length[i]=0;
	}
}



/*========================================================================
 * Name:	hash_stats
 * Purpose:	Return the number of words stored. This is all words,
 * 			including commands to RTF, NOT the number of printed words in
 * 			a given document.
 * Args:	None.
 * Returns:	Number of words stored.
 *=======================================================================*/

unsigned long
hash_stats ()
{
	int i;
	unsigned long total=0;
	for (i=0; i<256; i++) {
		total += hash_length[i];
	}
	return(total);
}



/*========================================================================
 * Name:	hashitem_new
 * Purpose:	Creates a new linked list item for the hash table.
 * Args:	String.
 * Returns:	HashItem.
 *=======================================================================*/

static HashItem *
hashitem_new (char *str)
{
	HashItem *hi;
	unsigned long i;

	hi=(HashItem*) my_malloc(sizeof(HashItem));
	if (!hi)
		error_handler("Out of memory");
	memset ((void*)hi, 0, sizeof (HashItem));

	hi->str = my_strdup(str);

	i = *str;
	if (i=='\\') i=str[1];
	i <<= 24;
	hi->value = i | (hash_value++ & 0xffffff);
	hi->next = NULL;

#if 0
	if (debug_mode) {
		printf ("<!-- storing val %08lx str %s -->\n",
			hi->value, hi->str);
	}
#endif

	return hi;
}


/*========================================================================
 * Name:	hash_get_index
 * Purpose:	Given a string, returns the "index" i.e. the word identifier.
 * Args:	String.
 * Returns:	Index.
 *=======================================================================*/

unsigned long
hash_get_index (char *str)
{
#if 1 /* daved - 0.19.1 */
	unsigned short index;
	unsigned char ch;
#else
	int index;
	char ch;
#endif
	HashItem *hi;

#if 1 /* daved - 0.19.1 */
	ch = (unsigned char)*str;
#else
	ch = *str;
#endif
	if (ch=='\\' && *(str+1))
		ch = *(str+1); 
	index = ch;
	hi = hash[index];
	while (hi) {
		if (!strcmp(hi->str,str))
			return hi->value;
		hi=hi->next;
	}
	/* not in hash */
	hi = hashitem_new (str);
	hi->next = hash[index];
	hash [index] = hi;
	++hash_length [index];
	return hi->value;
}


/*========================================================================
 * Name:	hash_get_string

 * Purpose:	Given the index (word identifier) returns the word string.
 * Args:	Index.
 * Returns:	String, or NULL if not found.
 *=======================================================================*/

char*
hash_get_string (unsigned long value)
{
	int index;
	HashItem *hi;

	index = value >> 24;
	hi = hash[index];
	while (hi) {
		if (hi->value == value)
			return hi->str;
		hi=hi->next;
	}
	warning_handler("Word not in hash");
	return NULL;
}