src/word.c - unrtf (bd13ae3a-94e6-40bf-b472-38179327dae8/main)

Tree @bd13ae3a-94e6-40bf-b472-38179327dae8/main (Download .tar.gz)

word.c @bd13ae3a-94e6-40bf-b472-38179327dae8/main

0c292bb
 
d5eaff9
0c292bb
 
 
d5eaff9
0c292bb
 
 
 
 
 
 
 
 
d5eaff9
0c292bb
d5eaff9
0c292bb
 
 
 
 
d5eaff9
0c292bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dff09ba
d5eaff9
 
0c292bb
 
d5eaff9
 
 
0c292bb
d5eaff9
0c292bb
d5eaff9
 
 
0c292bb
d5eaff9
 
 
0c292bb
d5eaff9
 
 
0c292bb
d5eaff9
0c292bb
 
 
 
 
 
 
 
 
 
 
dff09ba
0c292bb
 
 
 
 
d5eaff9
0c292bb
 
 
 
641be09
dff09ba
 
0c292bb
641be09
0c292bb
 
 
 
 
 
 
 
 
 
 
dff09ba
 
 
 
0c292bb
 
 
dff09ba
 
 
 
 
 
 
 
0c292bb
dff09ba
 
 
 
0c292bb
 
 
 
 
 
d5eaff9
 
0c292bb
 
 
 
dff09ba
 
0c292bb
 
 
 
 
dff09ba
 
0c292bb
 
dff09ba
0c292bb
dff09ba
0c292bb
 
 
dff09ba
0c292bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dff09ba
0c292bb
 
 
dff09ba
 
 
 
 
 
 
 
 
 
0c292bb
 
 
 
 
 
 
 
 
 
 
 
 
 
dff09ba
0c292bb
641be09
0c292bb
 
 
dff09ba
0c292bb
dff09ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c292bb
 
dff09ba
 
 
0c292bb
 
 
 
 
 
 
d5eaff9
 
dff09ba
 
d5eaff9
 
 
 
641be09
d5eaff9
641be09
d5eaff9
 
 
641be09
d5eaff9
dff09ba
 
641be09
dff09ba
641be09
dff09ba
d5eaff9
 
 
641be09
d5eaff9
 
 
 
 
 
 
dff09ba
d5eaff9
dff09ba
 
 
 
 
d5eaff9
 
 
 
 
 
 
 
 
 
 
 
 
 
dff09ba
d5eaff9
dff09ba
d5eaff9
 
 
 
dff09ba
 
 
d5eaff9
 
 
 
4329b02
d5eaff9
 
 
0c292bb

/*=============================================================================
   GNU UnRTF, a command-line program to convert RTF documents to other formats.
   Copyright (C) 2000,2001,2004 by Zachary Smith

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

   The maintainer is reachable by electronic mail at daved@physiol.usyd.edu.au
=============================================================================*/


/*----------------------------------------------------------------------
 * Module name:    word
 * Author name:    Zachary Smith
 * Create date:    01 Sep 00
 * Purpose:        Management of Word objects, which contain strings
 *                 as well as other Words.
 *----------------------------------------------------------------------
 * Changes:
 * 14 Oct 00, tuorfa@yahoo.com: fixed \fs bug (# is 2X the point size).
 * 14 Oct 00, tuorfa@yahoo.com: fixed table data printing.
 * 14 Oct 00, tuorfa@yahoo.com: protection against null entries in \info
 * 14 Oct 00, tuorfa@yahoo.com: fixed printing of <body> again
 * 14 Oct 00, tuorfa@yahoo.com: fixed closure of tables
 * 15 Oct 00, tuorfa@yahoo.com: fixed font attributes preceding <tr><td>
 * 15 Oct 00, tuorfa@yahoo.com: attributes now continue if >1 \cell in group
 * 15 Oct 00, tuorfa@yahoo.com: fixed font-size bug, lack of </head>
 *  7 Nov 00, tuorfa@yahoo.com: fixed \'## translatin bug
 *  8 Apr 01, tuorfa@yahoo.com: added check for out of memory after malloc
 * 21 Apr 01, tuorfa@yahoo.com: bug fixes regarding author, date
 * 21 Apr 01, tuorfa@yahoo.com: added paragraph alignment
 * 21 Apr 01, tuorfa@yahoo.com: fix for words getting lost after \par
 * 24 Jul 01, tuorfa@yahoo.com: moved conversion code to convert.c
 * 22 Sep 01, tuorfa@yahoo.com: moved word_dump to here from parse.c
 * 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
 * 29 Mar 05, daved@physiol.usyd.edu.au: changes requested by ZT Smith
 * 16 Dec 07, daved@physiol.usyd.edu.au: updated to GPL v3
 *--------------------------------------------------------------------*/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#ifdef HAVE_STDIO_H
#include <stdio.h>
#endif

#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif

#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif

#ifdef HAVE_STRING_H
#include <string.h>
#endif

#include "defs.h"
#include "parse.h"
#include "malloc.h"
#include "main.h"
#include "error.h"
#include "word.h"
#include "hash.h"


/* For word_dump */
static int indent_level = 0;


/*========================================================================
 * Name:	word_string
 * Purpose:	Obtains the string of a Word object. This involves accessing
 *			the Word hash.
 * Args:	Word*.
 * Returns:	String.
 *=======================================================================*/

const char *
word_string(Word *w)
{
	CHECK_PARAM_NOT_NULL(w);
	return w->str;
}



/*========================================================================
 * Name:	word_new
 * Purpose:	Instantiates a new Word object.
 * Args:	String.
 * Returns:	Word*.
 *=======================================================================*/

Word *
word_new(char *str)
{
	Word *w;

	w = (Word *) my_malloc(sizeof(Word));
	if (!w)
	{
		error_handler("out of memory");
	}
	memset((void *) w, 0, sizeof(Word));
	if (!w)
	{
		error_handler("cannot allocate a Word");
	}

	if (str)
	{
		w->str = hash_store(str);
	}

	return w;
}

/*========================================================================
 * Name:	word_free
 * Purpose:	Deallocates a Word object. This is only called at the end of
 * 			main(), after everything is processed and output complete.
 * Args:	Word.
 * Returns:	None.
 *=======================================================================*/

void word_free(Word *w)
{
	Word *prev;
	Word *w2;

	CHECK_PARAM_NOT_NULL(w);

	while (w)
	{
		w2 = w->child;
		if (w2)
		{
			word_free(w2);
		}

		prev = w;
		w = w->next;
		my_free((char *) prev);
	}
}





/*========================================================================
 * Name:	print_indentation
 * Purpose:	Prints padding for the word_dump routine.
 * Args:	Identation level.
 * Returns:	None.
 *=======================================================================*/

static void
print_indentation(int level)
{
	int i;

	if (level)
	{
		for (i = 0; i < level; i += 2)
		{
			printf(". ");
		}
	}
	else
	{
		printf("\n-----------------------------------------------------------------------\n\n");
	}
}




/*========================================================================
 * Name:	word_dump
 * Purpose:	Recursive diagnostic routine to print out a tree of words.
 * Args:	Word tree.
 * Returns:	None.
 *=======================================================================*/

void
word_dump(Word *w)
{
	const char *s;

	CHECK_PARAM_NOT_NULL(w);

	printf("\n");
	indent_level += 2;
	print_indentation(indent_level);

	while (w)
	{
		s = word_string(w);
		if (s)
		{
			printf("\"%s\" ", s);
		}
		else
		{
			if (w->child)
			{
				word_dump(w->child);
				printf("\n");
				print_indentation(indent_level);
			}
			else
			{
				warning_handler("Word object has no string and no children");
			}
		}
		w = w->next;
	}

	indent_level -= 2;
}

/*========================================================================
 * Name:	optimize_word
 * Purpose:	Function tries to optimize group by eliminating redundant commands
 *   consecutively setting same attribute to same value.
 * Args:	Word to optimize.
 * Returns:	Optimized word.
 *=======================================================================*/
Word *
optimize_word(Word *w, int depth)
{
	const char *s, *s1;
	int i = 0, len;
	Collection *c = NULL;
	Tag tags_to_opt[] = OPT_ARRAY;
	Word *root = w, *w2 = 0;

	if (depth > MAX_GROUP_DEPTH)
	{
		/* Have to be reasonable at some point */
		warning_handler("Max group depth reached");
		return w;
	}
	for (; w != NULL; w = w->next)
	{

		if ((s = word_string(w)))
		{
			for (i = 0; tags_to_opt[i].name[0] != '\0'; i++)
			{
				if (tags_to_opt[i].has_param)
				{
					len = strlen(tags_to_opt[i].name);
					if (!strncmp(tags_to_opt[i].name, s, len) && (isdigit(s[len]) || s[len] == '-'))
					{
						break;
					}
				}
				else if (!strcmp(tags_to_opt[i].name, s))
				{
					break;
				}
			}

			if (tags_to_opt[i].name[0] != '\0')
			{
				s1 = get_from_collection(c, i);

				if (s != NULL && s1 != NULL && !strcmp(s1, s))
				{
					w2->next = w->next;
					my_free((char *)w);
					w = w2;
				}
				else
				{
					c = add_to_collection(c, i, s);
				}
			}
		}

		if (w->child != NULL)
		{
			w->child = optimize_word(w->child, depth + 1);
		}

		w2 = w;
	}

	free_collection(c);

	return root;
}