The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 5.19 on page 126
Modify undcl so that it does not add redundant parentheses to declarations.
Solution by Robert Taylor
undcl source
#include <stdio.h> #include <string.h> #include <ctype.h> #include "getch.h" #define MAXTOKEN 100 #define MAXPOINTERS 10 /* Author: Robert Taylor * Date: June, 2014 * Exercise 5-19. Modify undcl so that it does not add redundant parentheses * to declarations. * * From the input syntax stipulated for undcl, namely: * x () * [] * () char * to generate: * char (*(*x())[])() * * We can see that... * 1. The only time that parentheses are seen in the input are if they * indicate a function. * 2. Other than when they indicate a function, parentheses in the output are * only added when a pointer '*' is seen. * * Parentheses used to represent a function are never redundant. So the item * of code that we need to focus on to solve the exercise is the pointer * section. * * Sample: x () * * * char * output before changes: char(*(*(*x()))) * output after changes: char(***x()) */ enum { NAME, PARENS, BRACKETS }; int gettoken(void); int tokentype; /* type of last token */ char token[MAXTOKEN]; /* last token string */ char out[1000]; /* output string */ /* undcl: convert word description to declaration */ int main(void) { int type; int i, c; int pcount; char temp[MAXTOKEN]; char p[MAXPOINTERS]; /* space for 9 pointers */ while (gettoken() != EOF){ strcpy(out, token); pcount = 0; while ((type = gettoken()) != '\n') if (type == PARENS || type == BRACKETS){ strcat(out, token); } else if (type == '*'){ pcount++; while ((c = getch()) == '*' || c == ' '){ if (c == '*'){ if (pcount < (MAXPOINTERS - 1)) pcount++; else break; } } ungetch(); for (i = 0; i < pcount; i++){ p[i] = '*'; } p[i] = '\0'; pcount = 0; sprintf(temp, "(%s%s)", p, out); strcpy(out, temp); } else if (type == NAME){ sprintf(temp, "%s%s", token, out); strcpy(out, temp); } else { printf("invalid input at %s\n", token); } printf("%s\n", out); } return 0; } /* gettoken: return next token. * I have getch() and ungetch() included from the header * file getch.h , note that ungetch() in my implementation * requires no prameter. */ int gettoken(void) { int c; char *p = token; while ((c = getch()) == ' ' || c == '\t') ; if (c == '('){ if ((c = getch()) == ')'){ strcpy(token, "()"); return tokentype = PARENS; } else { ungetch(); return tokentype = '('; } } else if (c == '['){ for (*p++ = c; (*p++ = getch()) != ']'; ) ; *p = '\0'; return tokentype = BRACKETS; } else if (isalpha(c)) { for (*p++ = c; isalnum(c = getch()); ) *p++ = c; *p = '\0'; ungetch(); return tokentype = NAME; } else { return tokentype = c; } }
getch.c source
#include <stdio.h> #define BUFSIZE 1000 static char line[BUFSIZE]; /* buffer for line */ static int bufp = 0; /* position in buf */ static int readflag = 1; static int get_line(char *s, int max_length); /* getch: read a character */ int getch(void) { int length = 0; do { if (readflag == 1){ /* prime the line array */ if ((length = get_line(line,BUFSIZE)) > 0){ if(length >= (BUFSIZE - 1)){ printf("ERROR: line buffer exceeded\n"); return EOF; } }else{ return EOF; } bufp = 0; readflag = 0; } if (line[bufp] == '\0') readflag = 1; /* need to read a new line */ } while (readflag); return line[bufp++]; } /* ungetch: push character back on input */ void ungetch(void) { if (bufp > 0) --bufp; } /* * print_source: print out the contents of the line read */ void print_source(void) { printf("%s", line); } /* * getline: * Author: Robert Taylor * This version will read a line up to a maximum number of characters * (max_length - 1) and store a '\0' at the end. The number of characters * read is returned, if that is a 0 then we are done reading lines. */ int get_line(char *s, int max_length) { int c; char *start = s; /* save pointer to start of buffer s */ char *end = s + (max_length - 2); /* point near to end of buffer s */ while((c = getchar()) != EOF && c != '\n' && s < end){ *s++ = c; } /* store last character read */ if(c != EOF) *s++ = c; *s = '\0';/* terminate the line */ return s - start; /* number of characters read */ }
getch.h source
#ifndef GETCH #define GETCH int getch(void); void ungetch(void); void print_source(void); #endif
Solution by Revc
int undcl() { int type; int await = 0; char temp[MAXTOKEN]; char pointers[MAXTOKEN]; while (gettoken() != EOF) { strcpy(out, token); while ((type = gettoken()) != '\n') { if (type == PARENS || type == BRACKETS) { if (await) { pointers[await] = '\0'; while (await) pointers[--await] = '*'; sprintf(temp, "(%s%s)", pointers, out); strcpy(out, temp); } strcat(out, token); } else if (type == '*') { await++; /* defer processing to the next token*/ } else if (type == NAME) { if (await) { pointers[await] = '\0'; while (await) pointers[--await] = '*'; sprintf(temp, "%s%s", pointers, out); strcpy(out, temp); } sprintf(temp, "%s %s", token, out); strcpy(out, temp); } else printf("invalid input at %s\n", token); } printf("%s\n", out); } return 0; }
Solution by codybartfast
I think we only need parantheses when *
is followed by ()
or []
because these have higher precedence than *
. So with the expression:
x [3] * * () * * char
we only need one pair of parantheses for grouping *:
char **(**x[3])()
But the books version of undcl
produces:
char (*(*(*(*x[3]))()))
We can verify that these two are equivalent by checking they produce the same result with dcl
:
x: array[3] of pointer to pointer to function returning pointer to pointer to char
The function needparens
looks ahead to see if * is followed by (
or [
int main(void) { int type; char temp[MAXTOKEN]; while (gettoken() != EOF) { strcpy(out, token); while ((type = gettoken()) != '\n') if (type == PARENS || type == BRACKETS) strcat(out, token); else if (type == '*') { if (needparens()) sprintf(temp, "(*%s)", out); else sprintf(temp, "*%s", out); strcpy(out, temp); } else if (type == NAME) { sprintf(temp, "%s %s", token, out); strcpy(out, temp); } else printf("invaid input at %s\n", token); printf("%s\n", out); } return 0; } int needparens(void) { int c1, c2, rslt; rslt = 0; if ((c1 = getch()) == ' ') { if ((c2 = getch()) == '(' || c2 == '[') rslt = 1; ungetch(c2); } ungetch(c1); return rslt; }
Full code on github
Solution by anonymous
I noticed the book gave insecure code, which my compiler quickly warned me about. As such, I updated sprint(temp, ...) to snprintf(temp, sizeof(temp), ...) to prevent a buffer overflow if the output was over 100 characters long. An example of this is:
[1234567890] [1234567890] [1234567890] [1234567890] [1234567890] [1234567890] [1234567890] [1234567890] char
This is 101 characters long when it gets copied from out to temp, and, consequently, overflows the buffer. I considered adding error handling to let the user know that the input was too long, but I suppose that improvement is for another day.
I based my solution on the characters in declarations/non-function definitions. Based on my research and reviewing the C17 standard, these are the only 9 special characters show up in them
*,.()[]{}
When you look at the C operator precedence table, ignore the braces and periods, and understand how the remaining special characters are used in declarations/non-function definitions, you will understand that the only special character that would need parentheses to preserve the order of operation is the asterisk, as known as the dereference operator.
I realized that if I checked the next token type to see if it was parentheses or brackets, I would need to put the deference operator in parentheses since the other the two operators, parentheses and brackets, have a higher precedence. Fortunately, I could just rely on gettoken to check the next operator type since proper input would never end with an asterisk. So I just added a delay boolean flag to delay printing out the dereferencing operator until the next loop iteration. I then either added parentheses or not depending on the operators that followed the asterisk.
I noticed that codybartfast's code is based on the same idea as mine, but he incorrectly handles the whitespace in-between operators and assumes that the user would input things correctly. However, if the user tab delimited the arguments or had more than one space in-between them, then his code would fail to detect that parentheses are necessary and not add them printing an incorrect result.
My only changes from the book's code are in main and the enum boolean at the top
Here is my code
#include <stdio.h> #include <string.h> #include <ctype.h> /* Exercise 5-19. Modify undcl so that it does not add redundant parentheses to declarations. */ #define MAXTOKEN 100 #define BUFSIZE 100 enum tokentype { NAME, PARENS, BRACKETS }; enum boolean { FALSE, TRUE }; void dcl(void); void dirdcl(void); int gettoken(void); int getch(void); void ungetch(int); int tokentype; // type of last token char token[MAXTOKEN]; // last token string char name[MAXTOKEN]; // identifier name char datatype[MAXTOKEN]; // data type = char, int, etc. char out[1000]; // output string char buf[BUFSIZE]; // buffer for ungetch int bufp = 0; // next free position in buf // convert word description to declaration int main() { int type, delay = FALSE; char temp[MAXTOKEN]; while (gettoken() != EOF) { strcpy(out, token); while ((type = gettoken()) != '\n') { if (delay) { if (type == PARENS || type == BRACKETS) snprintf(temp, sizeof(temp), "(*%s)", out); else snprintf(temp, sizeof(temp), "*%s", out); strcpy(out, temp); delay = FALSE; } if (type == PARENS || type == BRACKETS) strcat(out, token); else if (type == '*') delay = TRUE; else if (type == NAME) { snprintf(temp, sizeof(temp), "%s %s", token, out); strcpy(out, temp); } else printf("invalid input at %s\n", token); } printf("%s\n", out); } return 0; } // skips blanks and tabs, then finds the next token in the input; a "token" is a name, a pair of parentheses, a pair of brackets perhaps including a number, or any other single character. int gettoken(void) // return next token { int c; char *p = token; while ((c = getch()) == ' ' || c == '\t') ; if (c == '(') { if ((c = getch()) == ')') { strcpy(token, "()"); return tokentype = PARENS; } else { ungetch(c); return tokentype = '('; } } else if (c == '[') { for (*p++ = c; (*p++ = getch()) != ']'; ) ; *p = '\0'; return tokentype = BRACKETS; } else if (isalpha(c)) { for (*p++ = c; isalnum(c = getch()); ) *p++ = c; *p = '\0'; ungetch(c); return tokentype = NAME; } else return tokentype = c; } // parse a declarator void dcl(void) { int ns; for (ns = 0; gettoken() == '*'; ) // count *'s ns++; dirdcl(); while (ns-- > 0) strcat(out, " pointer to"); } // parse a direct declarator void dirdcl(void) { int type; if (tokentype == '(') // ( dcl ) { dcl(); if (tokentype != ')') printf("error: missing )\n"); } else if (tokentype == NAME) // variable name strcpy(name, token); else printf("error: expected name or (dcl)\n"); while ((type = gettoken()) == PARENS || type == BRACKETS) if (type == PARENS) strcat(out, " function returning"); else { strcat(out, " array"); strcat(out, token); strcat(out, " of"); } } // get a (possibly pushed back) character int getch(void) { return (bufp > 0) ? buf[--bufp] : getchar(); } // push character back on input void ungetch(int c) { if (bufp >= BUFSIZE) printf("ungetch: too many characters\n"); else buf[bufp++] = c; }