src/word.c - unrtf (upstream/0.21.5)

Tree @upstream/0.21.5 (Download .tar.gz)

word.c @upstream/0.21.5 — raw · history · blame

/*=============================================================================
   GNU UnRTF, a command-line program to convert RTF documents to other formats.
   Copyright (C) 2000,2001,2004 by Zachary Smith

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

   The maintainer is reachable by electronic mail at daved@physiol.usyd.edu.au
=============================================================================*/


/*----------------------------------------------------------------------
 * Module name:    word
 * Author name:    Zachary Smith
 * Create date:    01 Sep 00
 * Purpose:        Management of Word objects, which contain strings
 *                 as well as other Words.
 *----------------------------------------------------------------------
 * Changes:
 * 14 Oct 00, tuorfa@yahoo.com: fixed \fs bug (# is 2X the point size).
 * 14 Oct 00, tuorfa@yahoo.com: fixed table data printing.
 * 14 Oct 00, tuorfa@yahoo.com: protection against null entries in \info
 * 14 Oct 00, tuorfa@yahoo.com: fixed printing of <body> again
 * 14 Oct 00, tuorfa@yahoo.com: fixed closure of tables
 * 15 Oct 00, tuorfa@yahoo.com: fixed font attributes preceding <tr><td>
 * 15 Oct 00, tuorfa@yahoo.com: attributes now continue if >1 \cell in group
 * 15 Oct 00, tuorfa@yahoo.com: fixed font-size bug, lack of </head>
 *  7 Nov 00, tuorfa@yahoo.com: fixed \'## translatin bug
 *  8 Apr 01, tuorfa@yahoo.com: added check for out of memory after malloc
 * 21 Apr 01, tuorfa@yahoo.com: bug fixes regarding author, date
 * 21 Apr 01, tuorfa@yahoo.com: added paragraph alignment
 * 21 Apr 01, tuorfa@yahoo.com: fix for words getting lost after \par
 * 24 Jul 01, tuorfa@yahoo.com: moved conversion code to convert.c
 * 22 Sep 01, tuorfa@yahoo.com: moved word_dump to here from parse.c
 * 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks 
 * 29 Mar 05, daved@physiol.usyd.edu.au: changes requested by ZT Smith
 * 16 Dec 07, daved@physiol.usyd.edu.au: updated to GPL v3
 *--------------------------------------------------------------------*/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#ifdef HAVE_STDIO_H
#include <stdio.h>
#endif

#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif

#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif

#ifdef HAVE_STRING_H
#include <string.h>
#endif

#include "defs.h"
#include "parse.h"
#include "malloc.h"
#include "main.h"
#include "error.h"
#include "word.h"
#include "hash.h"


/* For word_dump */
static int indent_level=0;


/*========================================================================
 * Name:	word_string
 * Purpose:	Obtains the string of a Word object. This involves accessing
 *			the Word hash.
 * Args:	Word*.
 * Returns:	String.
 *=======================================================================*/

char *
word_string (Word *w) {
	char *str;
	CHECK_PARAM_NOT_NULL(w);
	if (w->hash_index) str = hash_get_string (w->hash_index);
	else str = NULL;
	return str;
}



/*========================================================================
 * Name:	word_new
 * Purpose:	Instantiates a new Word object.
 * Args:	String.
 * Returns:	Word*.
 *=======================================================================*/

Word * 
word_new (char *str) {
	Word * w;

	w = (Word *) my_malloc(sizeof(Word));
	if (!w)
		error_handler ("out of memory");
	memset ((void*) w, 0, sizeof(Word));
	if (!w) error_handler ("cannot allocate a Word");

	if (str)  w->hash_index = hash_get_index (str);
	else w->hash_index = 0;

	return w;
}




/*========================================================================
 * Name:	word_free
 * Purpose:	Deallocates a Word object. This is only called at the end of
 * 			main(), after everything is processed and output complete.
 * Args:	Word.
 * Returns:	None.
 *=======================================================================*/

void word_free (Word *w) {
	Word *prev;
	Word *w2;

	CHECK_PARAM_NOT_NULL(w);

	while (w) {
		w2 = w->child;
		if (w2)
			word_free(w2);

		prev = w;
		w = w->next;
		my_free((char*) prev);
	}
}





/*========================================================================
 * Name:	print_indentation
 * Purpose:	Prints padding for the word_dump routine.
 * Args:	Identation level.
 * Returns:	None.
 *=======================================================================*/

static void
print_indentation (int level)
{
	int i;

	if (level) {
		for (i=0;i<level;i+=2)
			printf (". ");
	} else {
		printf ("\n-----------------------------------------------------------------------\n\n");
	}
}




/*========================================================================
 * Name:	word_dump
 * Purpose:	Recursive diagnostic routine to print out a tree of words.
 * Args:	Word tree.
 * Returns:	None.
 *=======================================================================*/

void
word_dump (Word *w)
{
	char *s;

	CHECK_PARAM_NOT_NULL(w);

	printf ("\n");
	indent_level += 2;
	print_indentation (indent_level);

	while (w) {
		s = word_string (w);
		if (s) { 
			printf ("\"%s\" ", s);
		} else { 
			if (w->child) {
				word_dump (w->child); 
				printf ("\n");
				print_indentation (indent_level);
			}
			else
				warning_handler ("Word object has no string and no children");
		}
		w = w->next;
	}

	indent_level -= 2;
}

/*========================================================================
 * Name:	optimize_word
 * Purpose:	Function tries to optimize Word.
 * Args:	Word to optimize.
 * Returns:	Optimized word.
 *=======================================================================*/
Word *
optimize_word(Word *w)
{
	char *s, *s1;
	int i = 0, len;
	Collection *c = NULL;
	Tag tags_to_opt[] = OPT_ARRAY;
	Word *root = w, *w2;

	for (; w != NULL; w = w->next)
	{

		if (s = word_string(w))
		{
			for (i = 0; tags_to_opt[i].name[0] != '\0'; i++)
			{
				if (tags_to_opt[i].has_param)
				{
					len = strlen(tags_to_opt[i].name);
					if (!strncmp(tags_to_opt[i].name, s, len) && (isdigit(s[len]) || s[len] == '-'))
						break;
				}
				else
					if (!strcmp(tags_to_opt[i].name, s))
						break;					
			}

			if (tags_to_opt[i].name[0] != '\0')
			{
				s1 = get_from_collection(c, i);

				if (s != NULL && s1 != NULL && !strcmp(s1, s))
				{
					w2->next = w->next;
					my_free((char *)w);
					w = w2;
				}
				else
					c = add_to_collection(c, i, s);
			}
		}

		if (w->child != NULL)
			w->child = optimize_word(w->child);

		w2 = w;
	}

	free_collection(c);

	return root;
}