The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 6.02 on page 143

Write a program that reads a C program and prints in alphabetical order each group of variable names that are identical in the first 6 characters but different somewhere thereafter. Don't count words within strings and comments. Make 6 a parameter that can be set from the command line.

Solution by Barrett Drawdy

/*
 * Exercise 6-2 in K&R2 on page 143.  By: Barrett Drawdy
 * Write a program that reads a C program and prints in alphabetical
 * order each group of variable names that are identical in the first
 * 6 characters, but different somewhere thereafter.  Don't count words
 * within strings and comments.  Make 6 a parameter that can be set
 * from the command line.
 */
 
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <limits.h>

#define MAXWORD 1000     /* longest word that can be read by getword */
#define DEFAULT_COMP_LEN 6  /* default length to compare */

/*
 * tnode: Tree node from K&R2 page 140.  Words are initially read into
 * the tree by getword.
 */
struct tnode
{
	char *word;
	int count;
	struct tnode *left;
	struct tnode *right;
};

/*
 * simroot: Part of a linked list of pointers to simword lists with
 * a common root.
 */
struct simroot
{
	struct simword *firstword; /* points to the list of words */
	struct simroot *nextroot;  /* points to the next node in the list */
};

/*
 * simword: Part of a linked list of words with a common root.  Points
 * to the word in the tnodes.
 */
struct simword
{
	char *word;   /* points to the word in the tree */
	int count;    /* copied from the tree */
	struct simword *nextword; /* next node */
};

struct tnode *addtree(struct tnode *, char *);
void treeprint(const struct tnode *);
int getword(char *, int);
struct simroot *addroot(struct simroot *, struct tnode *, int);
struct simroot *parse(struct tnode *, int);
void printlist(struct simroot *, int);
void printwords(struct simword *);

int main(int argc, char *argv[])
{
	struct tnode *root;
	char word[MAXWORD];
	struct simroot *listroot;
	int len;
	
	/* get all the words */
	root = NULL;
	while(getword(word, MAXWORD) != EOF)
		if(isalpha(word[0]))
			root = addtree(root, word);
	
	if(argc == 1)
		len = DEFAULT_COMP_LEN;
	else if(argc == 2)
		len = atoi(argv[1]);
	else {
		printf("Incorrect number of arguments.\n");
		return 1;
	}
	
	/* parse the tree to find similar words and populate list */
	listroot = NULL;
	listroot = parse(root, len);
	treeprint(root);               /* prints all the words */
	printf("\nDuplicate list:\n\n");
	printlist(listroot, len);      /* prints the similar words */
	
	return 0;
} /* end of main() */

/*
 * parse: Reads the tree created by getword.  It finds words that are
 * similar in the first len letters and places them in the list structure
 * that it creates.
 */
struct simroot *parse(struct tnode *node, int len)
{
	struct tnode *temp; /* pointer to the children of the node */
	int this_len;       /* length of the word in the current node */
	static struct simroot *root = NULL; /* points to the root of the tree */
	
	if(node == NULL)
		return NULL;
	
	this_len = strlen(node->word);
	
	parse(node->left, len); /* start in the left subtree */
	
	temp = node->left;      /* find the closest left child and compare */
	if(temp != NULL) {
		while(temp->right != NULL)
			temp = temp->right;
		/* if the word matches, put both words in the list */
		if(this_len >= len && strncmp(temp->word, node->word, len) == 0) {
			root = addroot(root, temp, len);
			addroot(root, node, len);
		}
	}
	
	temp = node->right;    /* find the closest right child and compare */
	if(temp != NULL) {
		while(temp->left != NULL)
			temp = temp->left;
		/* if the word matches, put both words in the list */
		if(this_len >= len && strncmp(temp->word, node->word, len) == 0) {
			root = addroot(root, node, len);
			addroot(root, temp, len);
		}
	}
	
	parse(node->right, len);  /* continue on to right subtree */
	
	return root;
}

/*
 * printlist: Prints the roots that were similar, followed by the list
 * of words.
 */
void printlist(struct simroot *p, int len)
{
	int i;
	if(p == NULL)
		return;
	for(i = 0; i < len; ++i)    /* print the root */
		putchar(p->firstword->word[i]);
	putchar('\n');
	printwords(p->firstword);   /* print the list of words */
	printlist(p->nextroot, len); /* print the next root/list */
}

/*
 * printword: Prints the list of words with the same roots.
 */
void printwords(struct simword *p)
{
	printf("  %4d %s\n", p->count, p->word);
	if(p->nextword != NULL)
		printwords(p->nextword);
}

struct tnode *talloc(void);
char *strdup(char *);
struct simword *walloc(struct simword *, struct tnode *);
void addword(struct simword *, struct tnode *);

/*
 * addroot: When a node n is passed to addroot, n->word is compared to the
 * first word in ps list.  If they have are the same for the first 'len'
 * characters, then the word is added to that list.  Otherwise, it is passed
 * along the simroots until it reaches the end, where a new simroot is 
 * created if the word has a new root.
 */
struct simroot *addroot(struct simroot *p, struct tnode *n, int len)
{
	/* end of list, create a new root */
	if(p == NULL) {
		p = (struct simroot *) malloc(sizeof(struct simroot));
		p->nextroot = NULL;
		p->firstword = walloc(p->firstword, n);
	}
	/* word belongs to this list, add it */
	else if(strncmp(p->firstword->word, n->word, len) == 0)
		addword(p->firstword, n);
	/* haven't found the right root or end yet */
	else
		p->nextroot = addroot(p->nextroot, n, len);
	return p;
}

/*
 * addword: Compares the word from n to the word in p.  If they are the same,
 * the word was already added and it returns.  This continues down the list
 * until the end, where a new node is created if the word is new.
 */
void addword(struct simword *p, struct tnode *n)
{
	/* word was already added */
	if(strcmp(p->word, n->word) == 0)
		return;
	/* end of list. create a new node */
	if(p->nextword == NULL)
		p->nextword = walloc(p->nextword, n);
	/* haven't reached the end yet */
	else
		addword(p->nextword, n);
}

/* walloc: Creates a new simword node. */
struct simword *walloc(struct simword *p, struct tnode *n)
{
	p = (struct simword *) malloc(sizeof(struct simword));
	if(p != NULL) {
		p->word = n->word;
		p->count = n->count;
		p->nextword = NULL;
	}
	return p;
}

/*
 * getword:  Modified from the original on page 136 of K&R2 for exercise
 * 6-1.  Get next word or character from input.
 * Returns EOF, first character of word for strings, or other non-alpha
 * characters.  The function ignores pre-processor directives, ANSI C
 * style comments, and string literals.
 */
int getword(char *word, int lim)
{
	int c, getch(void);
	void ungetch(int);
	char *w = word;
	static int line_beg = 1;    /* 1 at beginning of a new line */
	static int after_slash = 0; /* 1 after '\' */
	int after_star = 0;         /* 1 after '*' */
	
	if(isspace(c = getch()))
		after_slash = 0;
	while(isspace(c)) {
		if(c == '\n')
			line_beg = 1;
		c = getch();
	}
	
	if(c != EOF)
		*w++ = c;
	
	if(c == '#' && line_beg == 1) {            /* preprocessor directive */
		while((c = getch()) != '\n' && c != EOF)    /* go to end of line */
			;
		return getword(word, lim);              /* start over */
	}
	line_beg = 0;
	
	if(c == '\\')                              /* set after_slash flag */
		after_slash = after_slash ? 0 : 1;         /*ignore \\ */
	
	else if(c == '/' ) {
		if((c = getch()) == '*' && !after_slash) {  /* comment beginning */
			while((c = getch()) != EOF) {
				if(c == '/') {
					if(after_star)                 /* end of comment */
						return getword(word, lim); /* start over */
				}
				else if(c == '*' && !after_slash)
					after_star = 1;
				else if(c == '\\')
					after_slash = after_slash ? 0 : 1;    /*ignore \\ */
				else {
					after_star = 0;
					after_slash = 0;
				}
			}
		}                                     /* end comment block */
		
		after_slash = 0;       /* not after slash anymore */
		if(c != EOF)
			ungetch(c);
	}
	
	else if(c == '\"') {
		if(!after_slash) {                  /* string literal begin */
			--w;                            /* reset w */
			while((c = getch()) != EOF) {
				if(c == '\"' && !after_slash)
					break;
				else if(c == '\\')
					after_slash = after_slash ? 0 : 1;    /*ignore \\ */
				else
					after_slash = 0;
				*w++ = c;
			}
			*w = '\0';
			if(c == EOF)
				return EOF;
			else
				return getword(word, lim);            /* start over */
		}
		after_slash = 0;                  /* not after a slash anymore */
	}
	
	if(!isalpha(c) && c != '_') {           /* it's a symbol */
		*w = '\0';
		if(c != '\\')
			after_slash = 0;
		return c;
	}
	
	/* reset this flag since a slash would have just returned */
	after_slash = 0;
	
	for( ; --lim > 0; ++w)                  /* it's a word or letter */
		if(!isalnum(*w = getch()) && *w != '_') {
			ungetch(*w);
			break;
		}
	*w = '\0';
	return word[0];
} /* end getword() */


/***************************************************************************
 *                    All code below here is from K&R2.                    *
 ***************************************************************************/

/*
 * addtree: From K&R2 page 141.
 * Adds a node containing w, at or below node p.
 */
struct tnode *addtree(struct tnode *p, char *w)
{
	int cond;
	
	if(p == NULL) { /* new word */
		p = talloc();
		p->word = strdup(w);
		p->count = 1;
		p->left = p->right = NULL;
	}
	else if((cond = strcmp(w, p->word)) == 0)
		p->count++;
	else if(cond < 0)
		p->left = addtree(p->left, w);
	else
		p->right = addtree(p->right, w);
	return p;
}

/* treeprint: From K&R2 page 142. Prints tree p in-order. */
void treeprint(const struct tnode *p)
{
	if(p != NULL) {
		treeprint(p->left);
		printf("%4d %s\n", p->count, p->word);
		treeprint(p->right);
	}
}

/* talloc: From K&R2 page 142. Makes a tnode. */
struct tnode *talloc(void)
{
	return (struct tnode *) malloc(sizeof(struct tnode));
}

/* strdup: From K&R2 page 143. Makes a duplicate of s. */
char *strdup(char *s)
{
	char *p;
	p = (char *) malloc(strlen(s) + 1);
	if(p != NULL)
		strcpy(p, s);
	return p;
}

/*
 * getch and ungetch are from K&R2, page 79
 */
#define BUFSIZE 100

char buf[BUFSIZE];  /* buffer for ungetch() */
int bufp = 0;       /* next free position in buf */

int getch(void) { /* get a (possibly pushed back) character */
	return (bufp > 0) ? buf[--bufp] : getchar();
}

void ungetch(int c) {  /* push character back on input */
	if(bufp >= BUFSIZE)
		printf("ungetch: too many characters\n");
	else
		buf[bufp++] = c;
	return;
}
Personal tools
Tidy_icons
not logged in