Jump to: navigation, search

The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 6.06 on page 145

Implement a simple version of the #define processor (i.e., no arguments) suitable for use with C programs, based on the routines of this section. You may also find getch and ungetch helpful.



Solution by Akil Adeshwar

/*
   Implement a simple version of the #define processor (i.e., no arguments)
   suitable for use with C programs, based on the routines of this section.
   You may also find getch and ungetch helpful.
*/

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

#define HASHSIZE 101
#define MAXLEN 200
#define BUFSIZE 1000
#define STATE_OUT 321
#define STATE_IN_NO_NAME 322
#define STATE_IN_WITH_NAME 323


static int state = STATE_OUT; // Initial state


char buf[BUFSIZE];
int bufp = 0;


int getch(FILE *fp){
	return (bufp > 0)? buf[--bufp] : fgetc(fp);
}

void ungetch(int c){
	if(bufp >= BUFSIZE)
		printf("\nUngetch: Too many characters");
	else
		buf[bufp++] = c;
}

// Data structure to store the definitions
typedef struct list{
	struct list *next;
	char *name;
	char *defn;
}nlist;

static nlist *hashtab[HASHSIZE]; // Pointer Table


// Hash
unsigned hash(char *s){

	unsigned hashval;
	
	for(hashval=0; *s!='\0'; s++)
		hashval = *s + 31 * hashval;

	return hashval%HASHSIZE;
}


//Lookup

nlist *lookup(char *s){

	nlist *np;
	
	for(np = hashtab[hash(s)];np!=NULL;np=np->next)
		if(strcmp(np->name,s) == 0)
			return np;
	return NULL;
}

// Install

nlist *install(char *name,char *defn){

	nlist *np;
	unsigned hashval;
	hashval = hash(name);

	if((np = lookup(name))== NULL){

		np = (nlist *) malloc(sizeof(nlist));
		if((np->name=strdup(name))==NULL)
			return NULL;
		np->next = NULL;
		if(hashtab[hashval]==NULL){
			hashtab[hashval] = np;
		} else {
			np->next = hashtab[hashval];
			hashtab[hashval] = np;
		}
	}
	else
		free((void *) np->defn);

	if((np->defn = strdup(defn))==NULL)
		return NULL;
	return np;
}


// Print the Lookup Table
void print_all_def(){
	int i=0;
	for(;i<HASHSIZE;i++){
		if(hashtab[i]!=NULL){
			nlist *p = hashtab[i];
			while(p!=NULL){
				printf("LABEL: %s DEFN: %s\n",p->name,p->defn);
				p = p->next;
			}
		}
	}
}

int getword(char *word,int lim,FILE *fp){
	
	int c;
	char *w = word;

	while(isspace(c = getch(fp)));
	if(c==EOF){
		if(state == STATE_OUT)
			return -1;
		else{
			puts("Error: Incorrect definition\n");
			return -1;
		}
	}
	// Word should be identifier in STATE_IN_WITH_NAME to name the definition
	if(isalpha(c) || c=='_' || (state == STATE_OUT) || state==STATE_IN_WITH_NAME)
		*w++=c;
	if(!isalpha(c) && c!='_' && state!=STATE_OUT && state!=STATE_IN_WITH_NAME){
		*w = '\0';
		return w-word;
	}
	for(; --lim>0; w++){
		*w = getch(fp);
		if(state!=STATE_IN_WITH_NAME){
			// Name of definition must be a valid identifier
			if((!isalnum(*w) && *w!='_')){
				ungetch(*w);
				break;
			}
		}
		else
			if(isspace(*w)) // Definition can be any character
				break;
	}
	*w = '\0';
	return w-word;
}

int main(void){
	puts("\nCheck t6.txt to understand the below output:\n");	
	char line[MAXLEN];
	FILE *fp = fopen("t6.txt","r");
	char *name,*defn,*p;
	int len;
	if(fp!=NULL){
		while((len = getword(line,MAXLEN,fp)>0)){
			switch(state){

				case STATE_OUT:
					if(strcmp(line,"#define")==0)
						state = STATE_IN_NO_NAME;
					else{
						/*
						   Check if line is present in lookup table,
						   if yes, substitute the definition, else print as it is.
						   */
						nlist *np;
						if((np=lookup(line))==NULL)
							printf("%s ",line);
						else
							printf("%s ",np->defn);
					}	
					break;
				case STATE_IN_NO_NAME:
					// Received name for definition
					name = (char *) malloc(len);
					strcpy(name,line);
					state = STATE_IN_WITH_NAME;
					break;
				case STATE_IN_WITH_NAME:
					// Received defn for name
					defn = (char *) malloc(len);
					strcpy(defn,line);
					// Update the lookup table
					if(install(name,defn)==NULL){
						puts("Insert Error");
						return -1;
					}
					state = STATE_OUT;
					break;
			}
		}
		// Print the Lookup Table
		printf("\nLookup Table Now: \n");
		print_all_def();
		fclose(fp);
	}
	return 0;
}

Solution by Barrett Drawdy

/*
 * K&R2 exercise 6-6.  By: Barrett Drawdy
 * "Implement a simple version of the #define processor (i.e., no arguments)
 * suitable for use with C programs, based on the routines of this section.
 * You may also find getch and ungetch helpful."
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define HASHSIZE 101
#define MAXWORD 1000

/* nlist:  From K&R2 page 144.  Points to a search text and
 * a replacement text */
struct nlist
{
	struct nlist *next;
	char *name;
	char *defn;
};

static struct nlist *hashtab[HASHSIZE];

struct nlist *lookup(char *);
struct nlist *install(char *, char *);
int process(void);
int getch(void);
void ungetch(int);
int preproc(void);
int backslash(void);
int comment(void);
int literal(void);
int readword(void);

int main(void)
{
	int c;
	const int done = 0;     /* just a dummy for the infinite loop */
	int status = 1; /* 1 at line begin, 2 for failure, 0 otherwise */
	
	while(!done) {
		while(isspace(c = getch())) {
			putchar(c);
			if(c == '\n')
				status = 1;
		}
		
		if(c == '#' && status == 1)
			status = preproc();
		
		else if(c == '\\')
			status = backslash();
		
		else if(c == '/')
			status = comment();
		
		else if(c == '\"')
			status = literal();
		
		else if(c == EOF)
			return 0;
		
		else if(!isalpha(c) && c != '_') {
			putchar(c);
			status = 0;
		}
		
		else {
			ungetch(c);
			status = readword();
		}
		
		if(status == 2)
			return 1;
	}
	return 0;
} /* end main() */

/*
 * preproc:  Called when a '#' is found at the beginning of a line.
 * If a #define statement is found, it stores add the search and replacement
 * text to the hash table.  Otherwise it just prints what it reads.
 * The line a #define statement is found on is replaced with a blank line.
 */
int preproc(void)
{
	int c;
	char name[MAXWORD+1], defn[MAXWORD+1];
	char *n, *d;
	
	/* Read what comes after the '#' */
	for(n = name; isalpha(c = getch()) && n - name < MAXWORD; ++n)
		*n = c;
	*n = '\0';
	
	/* If it's a #define... */
	if(strcmp(name, "define") == 0) {
		
		/* ignore whitespace */
		while(isspace(c)) {
			if(c == '\n') { /* a #define followed by a '\n' is discarded */
				putchar(c);
				return 1;
			}
			c = getch();
		}
		
		/* Read search text */
		for(n = name; (isalnum(c) || c == '_') && n - name < MAXWORD; ++n) {
			*n = c;
			c = getch();
		}
		*n = '\0';
		
		/* Ignore whitespace */
		while(isspace(c)) {
			if(c == '\n')
				*defn = '\0'; /* If there is no replacement text. */
			c = getch();
		}
		
		/* Read replacement text. */
		for(d = defn; (isalnum(c) || c == '_') && d - defn < MAXWORD; ++d) {
			*d = c;
			c = getch();
		}
		*d = '\0';
		
		/* Add to hash table. */
		if(install(name, defn) == NULL)
			return 2;
	}
	else {                    /* not a #define statement */
		putchar('#');
		printf("%s", name);
	}
	
	/* finish line reading/printing line */
	while(c != '\n') {
		if(c == EOF)
			return EOF;
		putchar(c);
		c = getch();
	}
	putchar(c);
	return 1;
} /* end preproc() */

/*
 * backslash:  Makes sure that if a /, *, or " is preceded by a \ it is
 * just printed, not used in a comment or string literal.
 */
int backslash(void)
{
	int c, slash = 1;
	putchar('\\');
	while((c = getch()) == '\\') {
		slash = !slash;
		putchar(c);
	}
	
	if(slash)
		putchar(c);
	else
		ungetch(c);
	
	return 0;
} /* end backslash() */

/*
 * comment:  Prints comments without changes.
 */
int comment(void)
{
	int c;
	int after_star = 0;
	
	putchar('/');
	
	if((c = getch()) == '*') {   /* comment begin */
		putchar(c);
		c = getch();
		
		while(c != EOF) {
			if(c == '\\') {
				backslash();
				after_star = 0;
			}
			else if(c == '*') {
				after_star = 1;
				putchar(c);
			}
			else if(c == '/' && after_star) {
				putchar(c);
				return 0;
			}
			else {
				after_star = 0;
				putchar(c);
			}
			c = getch();
		}
		
		if(c == EOF)
			return EOF;
		
		putchar(c);
		return 0;
	}
	else {
		ungetch(c);
		return 0;
	}
} /* end comment() */

/*
 * literal:  Prints string literals without changes.
 */
int literal(void)
{
	int c;
	putchar('\"');
	c = getch();
	
	while(c != '\"' && c != EOF) {
		if(c == '\\')
			backslash();
		else
			putchar(c);
		c = getch();
	}
	
	if(c == EOF)
		return EOF;
	
	putchar(c);
	return 0;
} /* end literal() */

/*
 * readwood:  Reads an identifier and looks for it in the hash table.  If
 * it's found, it's replaced with the replacement text.  Otherwise, it is
 * printed as is.
 */
int readword(void)
{
	int c;
	char word[MAXWORD];
	char *w;
	struct nlist *node;
	
	c = getch();
	for(w = word; (isalnum(c) || c == '_') && c != EOF; ++w) {
		*w = c;
		c = getch();
	}
	*w = '\0';
	node = lookup(word);
	if(node == NULL)
		printf("%s", word);
	else
		printf("%s", node->defn);
	
	if(c == EOF)
		return EOF;
	
	ungetch(c);
	return 0;
} /* end readword() */

/***************************************************************************
 *                              From K&R2                                  *
 ***************************************************************************/

unsigned hash(char *);
char *strdup(char *);

/* lookup: From K&R2 page 145. Look for s in hashtab. */
struct nlist *lookup(char *s)
{
	struct nlist *np;
	for(np = hashtab[hash(s)]; np != NULL; np = np->next)
		if(strcmp(s, np->name) == 0)
			return np;
	return NULL;
}

/* install: From K&R2 page 145. Put (name, defn) in hashtab. */
struct nlist *install(char *name, char *defn)
{
	struct nlist *np;
	unsigned hashval;
	
	if((np = lookup(name)) == NULL) {
		np = (struct nlist *) malloc(sizeof(*np));
		if(np == NULL || (np->name = strdup(name)) == NULL)
			return NULL;
		hashval = hash(name);
		np->next = hashtab[hashval];
		hashtab[hashval] = np;
	}
	else
		free((void *) np->defn);
	if((np->defn = strdup(defn)) == NULL)
		return NULL;
	return np;
}

/* hash: From K&R2 page 144.  Form hash value for string s. */
unsigned hash(char *s)
{
	unsigned hashval;
	for(hashval = 0; *s != '\0'; ++s)
		hashval = *s + 31 * hashval;
	return hashval % HASHSIZE;
}

/* strdup: From K&R2 page 143. Makes a duplicate of s. */
char *strdup(char *s)
{
	char *p;
	p = (char *) malloc(strlen(s) + 1);
	if(p != NULL)
		strcpy(p, s);
	return p;
}

#define BUFSIZE 100

char buf[BUFSIZE];  /* buffer for ungetch() */
int bufp = 0;       /* next free position in buf */

int getch(void) { /* get a (possibly pushed back) character */
	return (bufp > 0) ? buf[--bufp] : getchar();
}

void ungetch(int c) {  /* push character back on input */
	if(bufp >= BUFSIZE)
		printf("ungetch: too many characters\n");
	else
		buf[bufp++] = c;
	return;
}


Solution by codybartfast

Category 0

getparsed() (via filter-code.h) uses the code from 6-1 and 6-2 to provide the next char with a filtermode enum to indicate if the char is from regular code, a preprocessor statement, comment, string or char literal.

table.h has the code from 6-5 but with an updated lookup to return just the definition instead of an nlist struct.

#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include "filter-code.h"
#include "table.h"

#define MAXWORD 1000

static struct charinfo *stored;
char *gettoken(void);
char *code(char c);
char *checkdefines(char *t);
char *preproc(char c);
int asalpha(char c);
int asalnum(char c);

char token[MAXWORD];
char *ERROR = "ERROR";

int main(void)
{
   char *t;
   while ((t = gettoken()) != NULL && t != ERROR)
      printf("%s", t);
   return 0;
}

char *gettoken(void)
{
   struct charinfo *ci;
   char *t;

   if (stored != NULL) {
      ci = stored;
      stored = NULL;
   } else {
      ci = getparsed();
   }

   if (ci->ch == EOF) {
      freeci(ci);
      return NULL;
   }

   switch (ci->mode) {
   case CODE:
      t = code(ci->ch);
      break;
   case PREPROC:
      t = preproc(ci->ch);
      break;
   default:
      token[0] = ci->ch;
      token[1] = '\0';
      break;
   }
   freeci(ci);
   return (t == NULL) ? gettoken() : t;
}

char *code(char c)
{
   struct charinfo *ci;
   char *t = token;

   *t++ = c;
   if (asalpha(c)) {
      while ((ci = getparsed())->mode == CODE &&
             asalnum(c = ci->ch)) {
         *t++ = c;
         freeci(ci);
      }
      stored = ci;
   }
   *t = '\0';
   return checkdefines(token);
}

char *checkdefines(char *name)
{
   char *defn = lookup(name);
   return (defn == NULL) ? name : defn;
}

char *preproc(char c)
{
   struct charinfo *ci;
   char *name, *d, *t = token;

   if (c != '#') {
      printf("error: Expected '#' at start of preproc, got %d\n", c);
      return ERROR;
   }
   *(t++) = c;

   /* copy preprocessor statement to token */
   while ((ci = getparsed())->mode == PREPROC) {
      *(t++) = ci->ch;
      freeci(ci);
   }
   stored = ci;
   *t = '\0';

   /* if not #define return as is */
   if (strncmp(token, "#define", 7) != 0 || !isspace(token[7]))
      return token;

   /* trim trailing space */
   while (t > token && isspace(*(t - 1)))
      *--t = '\0';

   /* trim whitespace before name */
   d = token + 7;
   while (isspace(*++d))
      ;

   /* set name */
   name = d;
   if (!asalpha(*name)) {
      printf("error: Expected #define identifier (%s).\n", token);
      return ERROR;
   }
   while (asalnum(*++d))
      ;

   /* advance to definition */
   if (*d != '\0') {
      if (!isspace(*d)) {
         /* gcc doesn't require this */
         printf("error: Expected space after identifier: (%s)\n",
                token);
         return ERROR;
      }
      *d = '\0';
      while (isspace(*++d))
         ;
   }
   install(name, d);
   return NULL;
}

int asalpha(char c)
{
   return isalpha(c) || (c == '_');
}

#define SORT_OF_LETTER asalpha(c)
#define NUMERAL isdigit(c)

int asalnum(char c)
{
   return SORT_OF_LETTER || NUMERAL;
}

Full code on github

Solution by anonymous

I reused functions from the book and some of my solutions. The code I needed for this exercise is all in main. I considered using getword and other systems, but I found it easier to just process each line at a time. Even though this approach led to lengthy code, it is simple in the end.

My feedback for the other solutions:

  • Akil Adeshwar's output is in an odd format (extra spaces, no newlines), but it mostly works.
  • Barrett Drawdy's solution worked on my numeric #defines, but not on my string ones
  • codybartfast's solution replaced all of the identifiers with their values, but it printed strange characters inside of my printf function call for some reason.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

/*
    Exercise 6-6. Implement a simple version of the #define processor (i.e., no arguments) suitable for use with C programs, based on the routines of this section. You may
    also find getch and ungetch helpful.
*/

#define HASHSIZE 101
#define MAXLEN 1000
#define MAXWORD 100

struct nlist // table entry
{
    struct nlist *next; // next entry in chain
    char *name; // defined name
    char *defn; // replacement text
};

static struct nlist *hashtab[HASHSIZE]; // pointer table

struct nlist *lookup(char *s);
struct nlist *install(char *name, char *defn);
char *mystrdup(char *s);
unsigned hash(char *s);
int getline(char *s, int lim);

int main()
{
    char line[MAXLEN], name[MAXWORD], defn[MAXWORD], *s;
    int i, len;
    while ((len = getline(line, MAXLEN)) > 0)
    {
        for (int j = 0; j < len; )
            if (*(s = &line[j]) == '#' && strncmp(s, "#define", 7) == 0) // found a #define statement
            {
                j += 7; // skip it
                while (isspace(line[j]) && j < len) // skip spaces
                    j++;
                i = 0;
                while ((isalnum(line[j]) || line[j] == '_') && i < MAXWORD && j < len) // get name
                    name[i++] = line[j++];
                name[i] = '\0';
                while (isspace(line[j]) && j < len) // skip spaces
                    j++;
                i = 0;
                while (!isspace(line[j]) && i < MAXWORD && j < len) // get defn
                    defn[i++] = line[j++];
                defn[i] = '\0';
                install(name, defn); // add to hashtab
                break; // skip rest of line
            }
            else if (isalpha(line[j]) || line[j] == '_') // possibly the #define identifier, so check!
            {
                i = 0;
                while ((isalnum(line[j]) || line[j] == '_') && i < MAXWORD && j < len) // get the next word
                    name[i++] = line[j++];
                name[i] = '\0';
                struct nlist *result = lookup(name); // check to see if in hashtab
                if (result != NULL) // if it is, print defn
                    printf("%s", result->defn);
                else // otherwise, print word
                    printf("%s", name);
            }
            else // print everything else
                putchar(line[j++]);
    }
    return 0;
}

unsigned hash(char *s)
{
    unsigned hashval;
    for (hashval = 0; *s != '\0'; s++)
        hashval = *s + 31 * hashval;
    return hashval % HASHSIZE;
}

struct nlist *lookup(char *s)
{
    for (struct nlist *np = hashtab[hash(s)]; np != NULL; np = np->next)
        if (strcmp(s, np->name) == 0)
            return np; // found
    return NULL; // not found
}

// put (name, defn) in hashtab
struct nlist *install(char *name, char *defn)
{
    struct nlist *np;
    unsigned hashval;

    if ((np = lookup(name)) == NULL) // not found
    {
        np = (struct nlist *) malloc(sizeof(*np));
        if (np == NULL || (np->name = mystrdup(name)) == NULL)
            return NULL;
        hashval = hash(name);
        np->next = hashtab[hashval];
        hashtab[hashval] = np;
    }
    else // already there
        free((void *) np->defn); // free memory used by previous defn
    if ((np->defn = mystrdup(defn)) == NULL)
        return NULL;
    return np;
}

char *mystrdup(char *s) // make a duplicate of s
{
    char *p = (char *) malloc(strlen(s) + 1); // + 1 for '\0'
    if (p != NULL)
        strcpy(p, s);
    return p;
}

// get line into s, return length
int getline(char *s, int lim)
{
    int c;
    char *original = s;
    while (--lim > 0 && (c = getchar()) != EOF && c != '\n')
        *s++ = c;
    if (c == '\n')
        *s++ = c;
    *s = '\0';
    return s - original; // returns length
}
Personal tools