Jump to: navigation, search

The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 5.19 on page 126

Modify undcl so that it does not add redundant parentheses to declarations.



Solution by Robert Taylor

undcl source

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "getch.h"
#define MAXTOKEN 100
#define MAXPOINTERS 10
/* Author: Robert Taylor
 * Date: June, 2014
 * Exercise 5-19. Modify undcl so that it does not add redundant parentheses
 * to declarations. 
 *
 * From the input syntax stipulated for undcl, namely:
 * x () * [] * () char
 * to generate:
 * char (*(*x())[])()
 *
 * We can see that...
 * 1. The only time that parentheses are seen in the input are if they 
 * indicate a function.
 * 2. Other than when they indicate a function, parentheses in the output are
 * only added when a pointer '*' is seen.
 *
 * Parentheses used to represent a function are never redundant. So the item
 * of code that we need to focus on to solve the exercise is the pointer 
 * section.
 *
 * Sample: x () * * * char
 * output before changes: char(*(*(*x())))
 * output after changes: char(***x())
 */
enum { NAME, PARENS, BRACKETS };


int gettoken(void);
int tokentype; /* type of last token */
char token[MAXTOKEN]; /* last token string */
char out[1000]; /* output string */

/* undcl: convert word description to declaration */
int main(void)
{
	int type;
	int i, c;
	int pcount;
	char temp[MAXTOKEN];
	char p[MAXPOINTERS]; /* space for 9 pointers */
	while (gettoken() != EOF){
		strcpy(out, token);		
		pcount = 0;
		while ((type = gettoken()) != '\n')
			if (type == PARENS || type == BRACKETS){
				strcat(out, token);
			} else if (type == '*'){
				pcount++;
				while ((c = getch()) == '*' || c == ' '){
					if (c == '*'){
						if (pcount < (MAXPOINTERS - 1))
							pcount++;
						else
							break;
					}
				}
				ungetch();
				for (i = 0; i < pcount; i++){
					p[i] = '*';
				}
				p[i] = '\0';
				pcount = 0;
				sprintf(temp, "(%s%s)", p, out);
				strcpy(out, temp);
			} else if (type == NAME){
				sprintf(temp, "%s%s", token, out);
				strcpy(out, temp);
			} else {
				printf("invalid input at %s\n", token);
			}
		printf("%s\n", out);
	}
	return 0;
}

/* gettoken: return next token.
 * I have getch() and ungetch() included from the header
 * file getch.h , note that ungetch() in my implementation
 * requires no prameter.
 */
int gettoken(void)
{
	int c;
	char *p = token;
	while ((c = getch()) == ' ' || c == '\t')
		;
	if (c == '('){
		if ((c = getch()) == ')'){
			strcpy(token, "()");
			return tokentype = PARENS;
		} else {
			ungetch();
			return tokentype = '(';
		}
	} else if (c == '['){
		for (*p++ = c; (*p++ = getch()) != ']'; )
			;
		*p = '\0';
		return tokentype = BRACKETS;
	} else if (isalpha(c)) {
		for (*p++ = c; isalnum(c = getch()); )
			*p++ = c;
		*p = '\0';
		ungetch();
		return tokentype = NAME;
	} else {
		return tokentype = c;
	}
}

getch.c source

#include <stdio.h>
#define BUFSIZE 1000
static char line[BUFSIZE]; /* buffer for line */
static int bufp = 0; /* position in buf */
static int readflag = 1;
static int get_line(char *s, int max_length);
/* getch: read a character */
int getch(void)
{
	int length = 0;
	do {
		if (readflag == 1){
			/* prime the line array */
			if ((length = get_line(line,BUFSIZE)) > 0){
				if(length >= (BUFSIZE - 1)){
					printf("ERROR: line buffer exceeded\n");
					return EOF;
				}
			}else{
				return EOF;
			}
			bufp = 0;
			readflag = 0;
		}
		if (line[bufp] == '\0')
			readflag = 1; /* need to read a new line */	
	} while (readflag);
	return line[bufp++];
}
/* ungetch: push character back on input */
void ungetch(void)
{
	if (bufp > 0)
		--bufp;
}
/*
 * print_source: print out the contents of the line read
 */
void print_source(void)
{
	printf("%s", line);
}
/*
 * getline: 
 * Author: Robert Taylor
 * This version will read a line up to a maximum number of characters
 * (max_length - 1) and store a '\0' at the end. The number of characters
 * read is returned, if that is a 0 then we are done reading lines.
 */
int get_line(char *s, int max_length)
{
	int c;
	char *start = s; /* save pointer to start of buffer s */
	char *end = s + (max_length - 2); /* point near to end of buffer s */
	while((c = getchar()) != EOF && c != '\n' && s < end){
		*s++ = c;	
	}
	/* store last character read */
	if(c != EOF)
		*s++ = c;
	*s = '\0';/* terminate the line */
	return s - start; /* number of characters read */
}

getch.h source

#ifndef GETCH
#define GETCH
int getch(void);
void ungetch(void);
void print_source(void);
#endif


Solution by Revc

int undcl()
{
	int type;
	int await = 0;
	char temp[MAXTOKEN];
	char pointers[MAXTOKEN];

	while (gettoken() != EOF) {
		strcpy(out, token);
		while ((type = gettoken()) != '\n')
		{

			if (type == PARENS || type == BRACKETS)
			{
				if (await)
				{
					pointers[await] = '\0';
					while (await)
						pointers[--await] = '*';
					sprintf(temp, "(%s%s)", pointers, out);
					strcpy(out, temp);
				}
				strcat(out, token);
			}
			else if (type == '*')
			{
				await++; /* defer processing to the next token*/
			}
			else if (type == NAME)
			{
				if (await)
				{
					pointers[await] = '\0';
					while (await)
						pointers[--await] = '*';
					sprintf(temp, "%s%s", pointers, out);
					strcpy(out, temp);
				}
				sprintf(temp, "%s %s", token, out);
				strcpy(out, temp);
			}
			else
				printf("invalid input at %s\n", token);
		}
		printf("%s\n", out);
	}
	return 0;
}


Solution by codybartfast

Category 0

I think we only need parantheses when * is followed by () or [] because these have higher precedence than *. So with the expression:

x [3] * * () * * char

we only need one pair of parantheses for grouping *:

char **(**x[3])()

But the books version of undcl produces:

char (*(*(*(*x[3]))()))

We can verify that these two are equivalent by checking they produce the same result with dcl:

x:  array[3] of pointer to pointer to function returning pointer to pointer to char

The function needparens looks ahead to see if * is followed by ( or [

int main(void)
{
	int type;
	char temp[MAXTOKEN];

	while (gettoken() != EOF) {
		strcpy(out, token);
		while ((type = gettoken()) != '\n')
			if (type == PARENS || type == BRACKETS)
				strcat(out, token);
			else if (type == '*') {
				if (needparens())
					sprintf(temp, "(*%s)", out);
				else
					sprintf(temp, "*%s", out);
				strcpy(out, temp);
			} else if (type == NAME) {
				sprintf(temp, "%s %s", token, out);
				strcpy(out, temp);
			} else
				printf("invaid input at %s\n", token);
		printf("%s\n", out);
	}
	return 0;
}

int needparens(void)
{
	int c1, c2, rslt;

	rslt = 0;
	if ((c1 = getch()) == ' ') {
		if ((c2 = getch()) == '(' || c2 == '[')
			rslt = 1;
		ungetch(c2);
	}
	ungetch(c1);
	return rslt;
}

Full code on github

Solution by anonymous

I noticed the book gave insecure code, which my compiler quickly warned me about. As such, I updated sprint(temp, ...) to snprintf(temp, sizeof(temp), ...) to prevent a buffer overflow if the output was over 100 characters long. An example of this is:

[1234567890] [1234567890] [1234567890] [1234567890] [1234567890] [1234567890] [1234567890] [1234567890] char

This is 101 characters long when it gets copied from out to temp, and, consequently, overflows the buffer. I considered adding error handling to let the user know that the input was too long, but I suppose that improvement is for another day.

I based my solution on the characters in declarations/non-function definitions. Based on my research and reviewing the C17 standard, these are the only 9 special characters show up in them

*,.()[]{}

When you look at the C operator precedence table, ignore the braces and periods, and understand how the remaining special characters are used in declarations/non-function definitions, you will understand that the only special character that would need parentheses to preserve the order of operation is the asterisk, as known as the dereference operator.

I realized that if I checked the next token type to see if it was parentheses or brackets, I would need to put the deference operator in parentheses since the other the two operators, parentheses and brackets, have a higher precedence. Fortunately, I could just rely on gettoken to check the next operator type since proper input would never end with an asterisk. So I just added a delay boolean flag to delay printing out the dereferencing operator until the next loop iteration. I then either added parentheses or not depending on the operators that followed the asterisk.

I noticed that codybartfast's code is based on the same idea as mine, but he incorrectly handles the whitespace in-between operators and assumes that the user would input things correctly. However, if the user tab delimited the arguments or had more than one space in-between them, then his code would fail to detect that parentheses are necessary and not add them printing an incorrect result.

My only changes from the book's code are in main and the enum boolean at the top

Here is my code

#include <stdio.h>
#include <string.h>
#include <ctype.h>

/*
    Exercise 5-19. Modify undcl so that it does not add redundant parentheses to declarations.
*/

#define MAXTOKEN 100
#define BUFSIZE 100

enum tokentype { NAME, PARENS, BRACKETS };
enum boolean { FALSE, TRUE };

void dcl(void);
void dirdcl(void);
int gettoken(void);
int getch(void);
void ungetch(int);

int tokentype;              // type of last token
char token[MAXTOKEN];       // last token string
char name[MAXTOKEN];        // identifier name
char datatype[MAXTOKEN];    // data type = char, int, etc.
char out[1000];             // output string
char buf[BUFSIZE];          // buffer for ungetch
int bufp = 0;               // next free position in buf

// convert word description to declaration
int main()
{
    int type, delay = FALSE;
    char temp[MAXTOKEN];

    while (gettoken() != EOF)
    {
        strcpy(out, token);
        while ((type = gettoken()) != '\n')
        {
            if (delay)
            {
                if (type == PARENS || type == BRACKETS)
                    snprintf(temp, sizeof(temp), "(*%s)", out);
                else
                    snprintf(temp, sizeof(temp), "*%s", out);
                strcpy(out, temp);
                delay = FALSE;
            }
            if (type == PARENS || type == BRACKETS)
                strcat(out, token);
            else if (type == '*')
                delay = TRUE;
            else if (type == NAME)
            {
                snprintf(temp, sizeof(temp), "%s %s", token, out);
                strcpy(out, temp);
            }
            else
                printf("invalid input at %s\n", token);
        }
        printf("%s\n", out);
    }
    return 0;
}

// skips blanks and tabs, then finds the next token in the input; a "token" is a name, a pair of parentheses, a pair of brackets perhaps including a number, or any other single character.
int gettoken(void) // return next token
{
    int c;
    char *p = token;

    while ((c = getch()) == ' ' || c == '\t')
        ;
    if (c == '(')
    {
        if ((c = getch()) == ')')
        {
            strcpy(token, "()");
            return tokentype = PARENS;
        }
        else
        {
            ungetch(c);
            return tokentype = '(';
        }
    }
    else if (c == '[')
    {
        for (*p++ = c; (*p++ = getch()) != ']'; )
            ;
        *p = '\0';
        return tokentype = BRACKETS;
    }
    else if (isalpha(c))
    {
        for (*p++ = c; isalnum(c = getch()); )
            *p++ = c;
        *p = '\0';
        ungetch(c);
        return tokentype = NAME;
    }
    else
        return tokentype = c;
}

// parse a declarator
void dcl(void)
{
    int ns;
    for (ns = 0; gettoken() == '*'; ) // count *'s
        ns++;
    dirdcl();
    while (ns-- > 0)
        strcat(out, " pointer to");
}

// parse a direct declarator
void dirdcl(void)
{
    int type;
    if (tokentype == '(') // ( dcl )
    {
        dcl();
        if (tokentype != ')')
            printf("error: missing )\n");
    }
    else if (tokentype == NAME) // variable name
        strcpy(name, token);
    else
        printf("error: expected name or (dcl)\n");
    while ((type = gettoken()) == PARENS || type == BRACKETS)
        if (type == PARENS)
            strcat(out, " function returning");
        else
        {
            strcat(out, " array");
            strcat(out, token);
            strcat(out, " of");
        }
}

// get a (possibly pushed back) character
int getch(void)
{
    return (bufp > 0) ? buf[--bufp] : getchar();
}

// push character back on input
void ungetch(int c)
{
    if (bufp >= BUFSIZE)
        printf("ungetch: too many characters\n");
    else
        buf[bufp++] = c;
}
Personal tools