/*	BIC:	Bickel's Algorithm test program
		Spell Checker and Corrector

		(C)opyright May 1990, 1991 by Daniel Lawrence
		All Rights Reserved

	Revision History:
	29-mar-91	Trying some filters to improve this
*/

#define	maindef	1

#include	<stdio.h>
#include	<ctype.h>
#include	"dopt.h"
#include	"dstruct.h"
#include	"ddef.h"


#define	MWSIZE	40		/* maximum # chars in a word */

/*	letter weighing table	*/

char lweight[26] = {
	/* a */	3,
	/* b */	6,
	/* c */	5,
	/* d */	4,
	/* e */	3,
	/* f */	5,
	/* g */	5,
	/* h */	4,
	/* i */	3,
	/* j */	8,
	/* k */	7,
	/* l */	4,
	/* m */	5,
	/* n */	3,
	/* o */	3,
	/* p */	5,
	/* q */	7,
	/* r */	4,
	/* s */	3,
	/* t */	3,
	/* u */	4,
	/* v */	6,
	/* w */	5,
	/* x */	8,
	/* y */	8,
	/* z */	9
};

/*	some limits	*/
#define	MAXMATCHES	50

/*	local prototypes	*/

#if	TURBO
char *cleanword(char *word);
char *sortword(char *word);
char *intersect(char *word1, char *word2);
int weight(char *cletters);
int mismatch(char *, char *);
#else
char *cleanword();
char *sortword();
char *intersect();
int weight();
int mismatch();
#endif

/*	GLOBALS for BIC	*/

char testwrd[MWSIZE];		/* word to test against dictionary */
char sortwrd[MWSIZE];		/* sorted letters of testwrd */
char dictwrd[MWSIZE];		/* dictionary word to compare */
int likeness;			/* likeness score of current word */
int threshold;			/* threshold to display likeness */
int max_matches;		/* maximum number of matches */
char matches[MAXMATCHES][128];	/* list of current matches */
int match_scores[MAXMATCHES];	/* difference scores for these */
int swterse = FALSE;		/* called from EMACS? */
int swverbose = FALSE;		/* display extra info? */
int exact_match = FALSE;	/* is this word in the dictionary? */

main(argc, argv)

int argc;	/* command line argument count */
char **argv;	/*              argument vector */

{
	register int dif_value;	/* difference value of current word */
	register char *sortptr;	/* ptr to the sorted cleaned dictionary word */
	register char first_char; /* first character in word to test */

	max_matches = 12;	/* default list length! */

	if (argc < 2) {
		usage();
		exit(EXBADOPT);
	}

	/* deal with the command options */
	while (argc > 1) {

		/* grab the next option */
		--argc;
		argv++;
		strcpy(testwrd, argv[0]);

		if (*testwrd == '-') {

			switch (testwrd[1]) {

				case 'm':
					--argc;
					argv++;
					max_matches = atoi(argv[0]);
					break;

				case 't':
					swterse = TRUE;
					break;

				case 'v':
					swverbose = TRUE;
					break;

				default:
					usage();
					exit(EXBADOPT);
			}
		}

	}

	/* announce us */
	if (swterse == FALSE)
		printf("BIC %s	Bickel's Test Algorithm\n", VERSION);

	/* initialize the lcase array */
	init_lcase();

	/* grab the test word */
	strcpy(sortwrd, sortword(cleanword(testwrd)));
	threshold = weight(intersect(sortwrd, sortwrd)) - 4;
	if (swverbose)
		printf("threshold = %u\n", threshold);

	/* initailize the list of matches */
	init_match();

	/* open the main dictionary */
	if (mopen() == FALSE)
		exit(EXMDICT);

	first_char = lcase[*testwrd];
	skipdict(first_char);
	strcpy(dictwrd, nxtmword());

	/* scan the dictionary */
	while (lcase[*dictwrd] == first_char) {

		/* dump them if off by more than 3 chars! */
		if (abs(strlen(dictwrd) - strlen(testwrd)) < 4) {

			/* clean and sort the word */
			sortptr = sortword(cleanword(dictwrd));

			/* calculate the likeness score */
			likeness = weight(intersect(sortwrd, sortptr));

			/* tell us about it */
			if (likeness > threshold) {

				/* are these nearly alike? */
				if ((dif_value = mismatch(sortwrd, sortptr)) < 20) {
					add_match(dictwrd, dif_value);

					/* is this the word? */
					if (strcmp(testwrd, dictwrd) == 0) {
						exact_match = TRUE;
						if (swterse) {
							printf("EXACT MATCH\n");
							mclose();
							exit(EXGOOD);
						}
					}
	
				}
			}
		}

		/* get a dictionary word */
nextword:	strcpy(dictwrd, nxtmword());

		/* at the end... skip out */
		if (strcmp(dictwrd, hivalue) == 0)
			break;
	}

	/* close the main dictionary */
	mclose();

	/* show us the results */
	show_match();

	/* error code of EXMISS if word is not in dictionary */
	if (exact_match) {
		exit(EXGOOD);
	} else {
		exit(EXMISS);
	}
}

usage()		/* print the command line usage */

{
	printf("BIC:	Bickel's Algorithm Test Program\n");
	printf("	for MicroSPELL %s\n",VERSION);
	puts("\nUsage\n");
	puts("	BIC {-t|-v} {-m <n>} <test word>\n");
	puts("		-m <n>	return top <n> suggestions / default = 12");
	puts("		-t	terse mode");
	puts("		-v	verbose mode");
}


#if	RAMSIZE & LATTICE & MSDOS
/*	These routines will allow me to track memory usage by placing
	a layer on top of the standard system malloc() and free() calls.
	with this code defined, the number of allocated bytes is displayed
	in the upper right corner of the screen
*/

#undef	malloc
#undef	free

char *allocate(nbytes)	/* allocate nbytes and track */

unsigned nbytes;	/* # of bytes to allocate */

{
	char *mp;	/* ptr returned from malloc */
	char *malloc();

	mp = malloc(nbytes);
	if (mp) {
		envram += nbytes;
#if	RAMSHOW
		dspram();
#endif
	}

	return(mp);
}

release(mp)	/* release malloced memory and track */

char *mp;	/* chunk of RAM to release */

{
	unsigned *lp;	/* ptr to the long containing the block size */

	if (mp) {
		lp = ((unsigned *)mp) - 1;

		/* update amount of ram currently malloced */
		envram -= (long)*lp - 2;
		free(mp);
#if	RAMSHOW
		dspram();
#endif
	}
}

#if	RAMSHOW
dspram()	/* display the amount of RAM currently malloced */

{
	char mbuf[20];
	char *sp;

/*	TTmove(term.t_nrow - 1, 70);*/
	sprintf(mbuf, "[%lu]", envram);
	sp = &mbuf[0];
	puts(sp);
}
#endif
#endif

#if	AZTEC & MSDOS
#undef	fgetc
/*	a1getc:		Get an ascii char from the file input stream
			but DO NOT strip the high bit
*/

int a1getc(fp)

FILE *fp;

{
	int c;		/* translated character */

	c = getc(fp);	/* get the character */

	/* if its a <LF> char, throw it out  */
	while (c == 10)
		c = getc(fp);

	/* if its a <RETURN> char, change it to a LF */
	if (c == '\r')
		c = '\n';

	/* if its a ^Z, its an EOF */
	if (c == 26)
		c = EOF;

	return(c);
}
#endif

#if	CMS
#undef	fopen
/*	The IBM 30xx likes to tell us when file opens
	fail...it's too chatty....I like to handle these myself	*/

FILE *cmsopen(file, mode)

char *file;	/* name of file to open */
char *mode;	/* mode to open it in */

{
	quiet(1);
	return(fopen(file,mode));
}
#endif

/**********************************************************************/

/*	clean word:	dump all the non alpha chars, and make them
			lowercase only
*/

char *cleanword(word)

char *word;	/* word to clean */

{
	register char *sp;		/* ptr into return buffer */
	static char rbuf[MWSIZE];	/* return buffer */

	/* scan through the word */
	sp = rbuf;
	while (*word) {
		if (isalpha(*word)) {
			if (islower(*word))
				*sp++ = *word;
			else
				*sp++ = *word + 32;
		}

		++word;
	}

	*sp = 0;
	return(rbuf);
}

/*	Sort word:  bubble sort the letters in a word....
		    average word size is 4.5 chars. Bubble sort
		    is most efficient at that size
*/

char *sortword(word)

char *word;	/* word to sort letters in */

{
	register int i, j;		/* bubble sort indexes */
	register int temp;		/* temp for swap */
	register int swapflag;		/* did a swap occur? */
	static char sword[MWSIZE];	/* return buffer */

	/* copy the word to the return buffer */
	strcpy(sword, word);

	/* bubble sort it */
	for (i = strlen(sword) - 1; i > 0; i--) {

		/* scan through the list once */
		swapflag = FALSE;
		for (j = 0; j < i; j++) {

			/* compare */
			if (sword[j] > sword[j + 1]) {

				temp = sword[j];
				sword[j] = sword[j + 1];
				sword[j + 1] = temp;
				swapflag = TRUE;
			}
		}

		/* if no movement occurred, we are done early */
		if (swapflag == FALSE)
			break;
	}

	return(sword);
}

/*	intersect:	find all letters occuring in 2 words	*/

char *intersect(word1, word2)

char *word1;	/* first word to intersect with */
char *word2;	/*     second word */

{
	register char *iptr;		/* ptr into return buffer */
	static char rbuf[MWSIZE];	/* return buffer */

	/* scan through both words */
	iptr = rbuf;
	while (*word1 && *word2) {

		/* if they are the same, record it */
		if (*word1 == *word2) {
			*iptr = *word1++;
			++word2;

			/* skip duplicates */
			while (*word1 == *iptr)
				++word1;
			while (*word2 == *iptr)
				++word2;

			++iptr;

			continue;
		}

		/* advance the appropriate word */
		if (*word1 < *word2)
			++word1;
		else
			++word2;
	}

	/* terminate and return the intersection */
	*iptr = 0;
	return(rbuf);
}

/*	weight:	using the common letters between two words, apply
		bickels weighing function to produce a sameness score
*/

int weight(cletters)

char *cletters;		/* common letters between two words */

{
	int result;	/* resulting weighing factor */

	/* scan the letters, add up the result */
	result = 0;
	while (*cletters)
		result += lweight[*cletters++ - 'a'];

	return(result);
}

/* How many characters are different between these words? */

int mismatch(word1, word2)

char *word1;	/* first word to compare */
char *word2;	/* second word to compare */

{
	int count;	/* count of mismatched characters */

	count = 0;
	while (*word1) {

		/* at the end of word2? */
		if (*word2 == 0) {
			count += lweight[*word1-'a'];
			++word1;
		} else {

			/* a match, advance */
			if (*word1 == *word2) {
				++word1;
				++word2;
			} else {
				if (*word1 < *word2) {
					count += lweight[*word1-'a'];
					++word1;
				} else {
					count += lweight[*word2-'a'];
					++word2;
				}
			}
		}
	}

	/* past the rest */
	while (*word2) {
		++count;
		count += lweight[*word2-'a'];
		++word2;
	}

	return(count);
}

init_match()

{
	register int index;

	for (index = 0; index < max_matches; index++) {
		matches[index][0] = 0;
		match_scores[index] = 1000;
	}
}

add_match(word, score)

char *word;	/* word to add to match list */
int score;	/* score of that word */

{
	register int index;	/* ptr into match list */
	register int rest;	/* ptr into rest of list to move */

	for (index = 0; index < max_matches; index++) {
		if (match_scores[index] > score) {
			for (rest = max_matches; rest > index; --rest) {
				match_scores[rest] = match_scores[rest-1];
				strcpy(matches[rest], matches[rest-1]);
			}
			match_scores[index] = score;
			strcpy(matches[index], word);
			return;
		}
	}
}

show_match()

{
	register int index;
	register int split;

	if (swterse == FALSE) {
		for (index = 0; index < max_matches; index++) {
			if (match_scores[index] > 999)
				return;
			if (swverbose)
				printf("[%u] %s\n", match_scores[index], matches[index]);
			else
				printf("%s\n", matches[index]);
		}
	} else {
		split = (max_matches + 2) / 3;
		for (index = 0; index < split; index++) {
			printf("%20s %20s %20s \n",
				matches[index],
				matches[index + split],
				matches[index + split * 2]);
		}
	}
}	
