good.c - ispell (d8401b87-e679-4da2-a163-3bb1fda51c7a/main)

Tree @d8401b87-e679-4da2-a163-3bb1fda51c7a/main (Download .tar.gz)

good.c @d8401b87-e679-4da2-a163-3bb1fda51c7a/main — raw · history · blame

#ifndef lint
static char Rcs_Id[] =
    "$Id: good.c,v 1.51 2005/04/14 14:38:23 geoff Exp $";
#endif

/*
 * good.c - see if a word or its root word
 * is in the dictionary.
 *
 * Pace Willisson, 1983
 *
 * Copyright 1992, 1993, 1999, 2001, Geoff Kuenning, Claremont, CA
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All modifications to the source code must be clearly marked as
 *    such.  Binary redistributions based on modified source code
 *    must be clearly marked as modified versions in the documentation
 *    and/or other materials provided with the distribution.
 * 4. The code that causes the 'ispell -v' command to display a prominent
 *    link to the official ispell Web site may not be removed.
 * 5. The name of Geoff Kuenning may not be used to endorse or promote
 *    products derived from this software without specific prior
 *    written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * $Log: good.c,v $
 * Revision 1.51  2005/04/14 14:38:23  geoff
 * Update license.
 *
 * Revision 1.50  2001/07/25 21:51:45  geoff
 * Minor license update.
 *
 * Revision 1.49  2001/07/23 20:24:03  geoff
 * Update the copyright and the license.
 *
 * Revision 1.48  2000/08/22 10:52:25  geoff
 * Fix some compiler warnings.
 *
 * Revision 1.47  1999/01/18 03:28:33  geoff
 * Turn some char declarations into unsigned char, so that we won't have
 * sign-extension problems.
 *
 * Revision 1.46  1999/01/07  01:22:36  geoff
 * Update the copyright.
 *
 * Revision 1.45  1997/12/03  06:08:38  geoff
 * In crunch mode, print the root capitalized if a prefix was stripped
 * from a capitalized word.  For example, if the input word was
 * "Uncapitalized" in English, print "Capitalized/U".
 *
 * Revision 1.44  1997/12/02  06:24:42  geoff
 * Get rid of some compile options that really shouldn't be optional.
 *
 * Revision 1.43  1994/11/02  06:56:05  geoff
 * Remove the anyword feature, which I've decided is a bad idea.
 *
 * Revision 1.42  1994/10/25  05:45:59  geoff
 * Add support for an affix that will work with any word, even if there's
 * no explicit flag.
 *
 * Revision 1.41  1994/05/24  06:23:06  geoff
 * Let tgood decide capitalization questions, rather than doing it ourselves.
 *
 * Revision 1.40  1994/05/17  06:44:10  geoff
 * Add support for controlled compound formation and the COMPOUNDONLY
 * option to affix flags.
 *
 * Revision 1.39  1994/01/25  07:11:31  geoff
 * Get rid of all old RCS log lines in preparation for the 3.1 release.
 *
 */

#include <ctype.h>
#include "config.h"
#include "ispell.h"
#include "proto.h"

int		good P ((ichar_t * word, int ignoreflagbits, int allhits,
		  int pfxopts, int sfxopts));
int		cap_ok P ((ichar_t * word, struct success * hit, int len));
static int	entryhasaffixes P ((struct dent * dent, struct success * hit));
void		flagpr P ((ichar_t * word, int preflag, int prestrip,
		  int preadd, int sufflag, int sufadd));

static ichar_t *	orig_word;

int good (w, ignoreflagbits, allhits, pfxopts, sfxopts)
    ichar_t *		w;		/* Word to look up */
    int			ignoreflagbits;	/* NZ to ignore affix flags in dict */
    int			allhits;	/* NZ to ignore case, get every hit */
    int			pfxopts;	/* Options to apply to prefixes */
    int			sfxopts;	/* Options to apply to suffixes */
    {
    ichar_t		nword[INPUTWORDLEN + MAXAFFIXLEN];
    register ichar_t *	p;
    register ichar_t *	q;
    register int	n;
    register struct dent * dp;

    /*
    ** Make an uppercase copy of the word we are checking.
    */
    for (p = w, q = nword;  *p;  )
	*q++ = mytoupper (*p++);
    *q = 0;
    n = q - nword;

    numhits = 0;

    if (cflag)
	{
	(void) printf ("%s", (char *) ichartosstr (w, 0));
	orig_word = w;
	}
    else if ((dp = lookup (nword, 1)) != NULL)
	{
	hits[0].dictent = dp;
	hits[0].prefix = NULL;
	hits[0].suffix = NULL;
	if (allhits  ||  cap_ok (w, &hits[0], n))
	    numhits = 1;
	/*
	 * If we're looking for compounds, and this root doesn't
	 * participate in compound formation, undo the hit.
	 */
	if (compoundflag == COMPOUND_CONTROLLED
	  &&  ((pfxopts | sfxopts) & FF_COMPOUNDONLY) != 0
	  &&  hashheader.compoundbit >= 0
	  &&  TSTMASKBIT (dp->mask, hashheader.compoundbit) == 0)
	    numhits = 0;
	}
    if (numhits  &&  !allhits)
	return 1;

    /* try stripping off affixes */

    chk_aff (w, nword, n, ignoreflagbits, allhits, pfxopts, sfxopts);

    if (cflag)
	(void) putchar ('\n');

    return numhits;
    }

int cap_ok (word, hit, len)
    register ichar_t *		word;
    register struct success *	hit;
    int				len;
    {
    register ichar_t *		dword;
    register ichar_t *		w;
    register struct dent *	dent;
    ichar_t			dentword[INPUTWORDLEN + MAXAFFIXLEN];
    int				preadd;
    int				prestrip;
    int				sufadd;
    ichar_t *			limit;
    long			thiscap;
    long			dentcap;

    thiscap = whatcap (word);
    /*
    ** All caps is always legal, regardless of affixes.
    */
    preadd = prestrip = sufadd = 0;
    if (thiscap == ALLCAPS)
	return 1;
    else if (thiscap == FOLLOWCASE)
	{
	/* Set up some constants for the while(1) loop below */
	if (hit->prefix)
	    {
	    preadd = hit->prefix->affl;
	    prestrip = hit->prefix->stripl;
	    }
	else
	    preadd = prestrip = 0;
	sufadd = hit->suffix ? hit->suffix->affl : 0;
	}
    /*
    ** Search the variants for one that matches what we have.  Note
    ** that thiscap can't be ALLCAPS, since we already returned
    ** for that case.
    */
    dent = hit->dictent;
    for (  ;  ;  )
	{
	dentcap = captype (dent->flagfield);
	if (dentcap != thiscap)
	    {
	    if (dentcap == ANYCASE  &&  thiscap == CAPITALIZED
	     &&  entryhasaffixes (dent, hit))
		return 1;
	    }
	else				/* captypes match */
	    {
	    if (thiscap != FOLLOWCASE)
		{
		if (entryhasaffixes (dent, hit))
		    return 1;
		}
	    else
		{
		/*
		** Make sure followcase matches exactly.
		** Life is made more difficult by the
		** possibility of affixes.  Start with
		** the prefix.
		*/
		(void) strtoichar (dentword, dent->word, INPUTWORDLEN, 1);
		dword = dentword;
		limit = word + preadd;
		if (myupper (dword[prestrip]))
		    {
		    for (w = word;  w < limit;  w++)
			{
			if (mylower (*w))
			    goto doublecontinue;
			}
		    }
		else
		    {
		    for (w = word;  w < limit;  w++)
			{
			if (myupper (*w))
			    goto doublecontinue;
			}
		    }
		dword += prestrip;
		/* Do root part of word */
		limit = dword + len - preadd - sufadd;
		while (dword < limit)
		    {
		    if (*dword++ != *w++)
		      goto doublecontinue;
		    }
		/* Do suffix */
		dword = limit - 1;
		if (myupper (*dword))
		    {
		    for (  ;  *w;  w++)
			{
			if (mylower (*w))
			    goto doublecontinue;
			}
		    }
		else
		    {
		    for (  ;  *w;  w++)
			{
			if (myupper (*w))
			    goto doublecontinue;
			}
		    }
		/*
		** All failure paths go to "doublecontinue,"
		** so if we get here it must match.
		*/
		if (entryhasaffixes (dent, hit))
		    return 1;
doublecontinue:	;
		}
	    }
	if ((dent->flagfield & MOREVARIANTS) == 0)
	    break;
	dent = dent->next;
	}

    /* No matches found */
    return 0;
    }

/*
** See if this particular capitalization (dent) is legal with these
** particular affixes.
*/
static int entryhasaffixes (dent, hit)
    register struct dent *	dent;
    register struct success *	hit;
    {

    if (hit->prefix  &&  !TSTMASKBIT (dent->mask, hit->prefix->flagbit))
	return 0;
    if (hit->suffix  &&  !TSTMASKBIT (dent->mask, hit->suffix->flagbit))
	return 0;
    return 1;			/* Yes, these affixes are legal */
    }

/*
 * Print a word and its flag, making sure the case of the output matches
 * the case of the original found in "orig_word".
 */
void flagpr (word, preflag, prestrip, preadd, sufflag, sufadd)
    register ichar_t *	word;		/* (Modified) word to print */
    int			preflag;	/* Prefix flag (if any) */
    int			prestrip;	/* Lth of pfx stripped off orig_word */
    int			preadd;		/* Length of prefix added to w */
    int			sufflag;	/* Suffix flag (if any) */
    int			sufadd;		/* Length of suffix added to w */
    {
    register int	i;		/* Handy loop counter */
    register ichar_t *	origp;		/* Pointer into orig_word */
    int			orig_len;	/* Length of orig_word */

    orig_len = icharlen (orig_word);
    /*
     * We refuse to print if the cases outside the modification
     * points don't match those just inside.  This prevents things
     * like "OEM's" from being turned into "OEM/S" which expands
     * only to "OEM'S".  But for prefix flags, we ignore the case of
     * the first character of the word, because capitalization of the
     * root is legal.
     */
    if (preflag > 0)
	{
	origp = orig_word + preadd;
	if (myupper (*origp))
	    {
	    for (origp = orig_word + 1;  origp < orig_word + preadd;  origp++)
		{
		if (mylower (*origp))
		    return;
		}
	    }
	else
	    {
	    for (origp = orig_word + 1;  origp < orig_word + preadd;  origp++)
		{
		if (myupper (*origp))
		    return;
		}
	    }
	}
    if (sufflag > 0)
	{
	origp = orig_word + orig_len - sufadd;
	if (myupper (origp[-1]))
	    {
	    for (  ;  *origp != 0;  origp++)
		{
		if (mylower (*origp))
		    return;
		}
	    }
	else
	    {
	    origp = orig_word + orig_len - sufadd;
	    for (  ;  *origp != 0;  origp++)
		{
		if (myupper (*origp))
		    return;
		}
	    }
	}
    /*
     * The cases are ok.  Put out the word, being careful that the
     * prefix/suffix cases match those in the original, and that the
     * unchanged characters from the original actually match it.
     */
    (void) putchar (' ');
    origp = orig_word + preadd;
    if (myupper (*origp))
	{
	for (i = prestrip;  --i >= 0;  )
	    (void) fputs (printichar ((int) *word++), stdout);
	}
    else
	{
	i = prestrip;
	if (i > 0  &&  myupper (orig_word[0]))
	    {
	    --i;
	    (void) fputs (printichar ((int) mytoupper (*word++)), stdout);
	    }
	while (--i >= 0)
	    (void) fputs (printichar ((int) mytolower (*word++)), stdout);
	}
    i = orig_len - preadd - sufadd;
    if (prestrip == 0  &&  myupper (orig_word[0]))
	{
	--i;
	(void) fputs (printichar ((int) mytoupper (*origp++)), stdout);
	word++;
	}
    for (  ;  --i >= 0;  word++)
	(void) fputs (printichar ((int) *origp++), stdout);
    if (origp > orig_word)
	origp--;
    if (myupper (*origp))
	(void) fputs ((char *) ichartosstr (word, 0), stdout);
    else
	{
	while (*word)
	    {
	    (void) fputs (printichar ((int) mytolower (*word++)), stdout);
	    }
	}
    /*
     * Now put out the flags
     */
    (void) putchar (hashheader.flagmarker);
    if (preflag > 0)
	(void) putchar (preflag);
    if (sufflag > 0)
	(void) putchar (sufflag);
    }