Jump to: navigation, search

The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 1.23 on page 34

Write a program to remove all comments from a C program. Don't forget to handle quoted strings and character constants properly. C comments do not nest.



This was the first exercise to be posted as a fun "competition" on comp.lang.c, on 1 June 2000. As a result, there was a small flurry of submissions. Not all of them are completely working solutions. See the very end middle of this page for a test program which breaks most of them. :-)

Solution by Charles Faisandier

Works fine on the test program. I think it is correct.

// Exercise 1-23. Write a program to remove all comments from a C program. Don't forget to 
// handle quoted strings and character constants properly. C comments don't nest. 
//
// Solution by Charles Faisandier.

#include <stdio.h>

int main() {
	char ch;
	while ((ch = getchar()) != EOF) {
		if (ch == '\'') {
			// character litteral case
			putchar(ch);
			if ((ch = getchar()) == '\\') {
				// If the character literal is an escape sequence put two character down
				// before closing it. Otherwise put one.
				putchar(ch);
				putchar(getchar());
			} else {
				putchar(ch);
			}
			putchar(getchar());
		} else if (ch == '"') {
			// string literal case
			do {
				putchar(ch);
				ch = getchar();
				if (ch == '\\') {
					// if we have an escape sequence print it reguardless of whether it is a "
					putchar(ch);
					putchar(getchar());
					ch = getchar();
				}
			} while (ch != '"');
			putchar(ch);
		} else if (ch == '/') {
			// Possible comment case.
			char next = getchar();
			if (next == '*') {
				// multi-line comment
				ch = getchar();
				next = getchar();
				while (!(ch == '*' && next == '/')) {
					ch = next;
					next = getchar();
				}
			} else if (next == '/') {
				// Single line comment.
				while (!((ch = getchar()) == '\n' || ch == EOF));
				if (ch == EOF) {
					return 1;
				}
				putchar(ch); // Put the /n so we get an empty line instead of wrong indenting next line.
			} else {
				/* not a comment */
				putchar(ch);
				putchar(next);
			}
		} else {
			putchar(ch);
		}
	}
}

Solution by Rick Dearman (Category 0)

Now handles "/* comment in string */" correctly, but does not remove the comment from

  return /* comment inside return statement */ 0;



/******************************************************
"Write a program to remove all comments from a C program. 
Don't forget to handle quoted strings and character 
constants properly. C comments do not nest."

Author: Rick Dearman (rick@ricken.demon.co.uk) 
******************************************************/

#include <stdio.h>

#define MAXLINE 1000 /* max input line size */
char line[MAXLINE]; /*current input line*/

int getline(void);  /* taken from the KnR book. */


int
main()
{
  int in_comment,len;
  int in_quote;
  int t;
  
  in_comment = in_quote = t = 0;
  while ((len = getline()) > 0 )
    {
      t=0;
      while(t < len)
        {
	  if( line[t] == '"')
		in_quote = 1;

	  if( ! in_quote )
	  {
          	if( line[t] == '/' && line[t+1] == '*')
            	{
              		t=t+2;
              		in_comment = 1;
            	}
          	if( line[t] == '*' && line[t+1] == '/')
            	{
              		t=t+2;
              		in_comment = 0;
            	}
          	if(in_comment == 1)
           	 {
              		t++;
            	}
          	else
            	{
              		printf ("%c", line[t]);
              		t++;
            	}
	  } 
	  else
	  {
              printf ("%c", line[t]);
              t++;
	  }
        }
    }
  return 0;
}


/* getline: specialized version */
int getline(void)
{
  int c, i;
  extern char line[];
  
  for ( i=0;i<MAXLINE-1 && ( c=getchar()) != EOF && c != '\n'; ++i)
    line[i] = c;
  if(c == '\n') 
    {
      line[i] = c;
      ++i;
    }
  line[i] = '\0';
  return i;

}

Solution by Daniel Schmidt da Silva

This program passed all the tests of this page and has all the points Rick made in his critique.
My goal here was to make the program run the loop once for each character, without too much extra cases.
It has the cases matched in the ifs, with the tests suited to exactly one of each case.

#include <stdio.h>

/*  author: Daniel Schmidt Silva  */
/* remove comments from C sources */

#define YES 1
#define NO  !YES

int main()
{
	/* c is the current character, c_prev is the previous one and c_pprev the one before c_prev */
	int c, c_prev='\0', c_pprev = '\0', is_comment = NO, is_string = NO, closing_symbol;

	while ((c = getchar()) != EOF)
	{
		if (!is_comment)
		{
			/* fix the slash if it is not a comment */
			if (!is_string && c_prev == '/' && c != '*' && c_pprev != '*')
				putchar('/');
			/* print the char if it is not the begining of a comment */
			if (is_string || (c != '/' && (c != '*' || c_prev != '/')))
				putchar(c);
		}
		/* closing the comment */
		if (is_comment && c == '/' && c_prev == '*')
			is_comment = NO;
		/* begining the comment */
		else if (!is_comment && !is_string && c == '*' && c_prev == '/')
			is_comment = YES;
		/* closing the string or character, handles escape sequences \' and \\' */
		else if (is_string && c == closing_symbol && (c_prev != '\\' || c_pprev == '\\'))
			is_string = NO;
		/* begining the string or character */
		else if (!is_string && !is_comment && (c == '"' || c == '\''))
		{
			is_string = YES;
			closing_symbol = c;
		}
		c_pprev = c_prev;
		c_prev = c;
	}

	return 0;
}

Solution by Ben Pfaff

This version is a bugfix for the code var/'\2'

/* K&R2 1-23: Write a program to remove all comments from a C program.
   Don't forget to handle quoted strings and character constants
   properly.  C comments do not nest.

   This solution does not deal with other special cases, such as
   trigraphs, line continuation with \, or <> quoting on #include,
   since these aren't mentioned up 'til then in K&R2.  Perhaps this is
   cheating.

   Note that this program contains both comments and quoted strings of
   text that looks like comments, so running it on itself is a
   reasonable test.  It also contains examples of a comment that ends
   in a star and a comment preceded by a slash.  Note that the latter
   will break C99 compilers and C89 compilers with // comment
   extensions.

   Interface: The C source file is read from stdin and the
   comment-less output is written to stdout. **/

#include <stdio.h>

int
main(void)
{
#define PROGRAM 0
#define SLASH 1
#define COMMENT 2
#define STAR 3
#define QUOTE 4
#define LITERAL 5

    /* State machine's current state, one of the above values. */
    int state;

    /* If state == QUOTE, then ' or ".  Otherwise, undefined. */
    int quote;

    /* Input character. */
    int c;

    state = PROGRAM;
    while ((c = getchar()) != EOF) {
        /* The following cases are in guesstimated order from most common
           to least common. */
        if (state == PROGRAM || state == SLASH) {
            if (state == SLASH) {
                /* Program text following a slash. */
                if (c == '*')
                    state = COMMENT;
                else {
                    putchar('/');
                    state = PROGRAM;
                }
            }

            if (state == PROGRAM) {
                /* Program text. */
                if (c == '\'' || c == '"') {
                    quote = c;
                    state = QUOTE;
                    putchar(c);
                }
                else if (c == "/*"[0])
                    state = SLASH;
                else
                    putchar(c);
            }
        }
        else if (state == COMMENT) {
            /* Comment. */
            if (c == "/*"[1])
                state = STAR;
        }
        else if (state == QUOTE) {
            /* Within quoted string or character constant. */
            putchar(c);
            if (c == '\\')
                state = LITERAL;
            else if (c == quote)
                state = PROGRAM;
        }
        else if (state == SLASH) {
        }
        else if (state == STAR) {
            /* Comment following a star. */
            if (c == '/')
                state = PROGRAM;
            else if (c != '*')
                state = COMMENT;
        }
        else /* state == LITERAL */ {
            /* Within quoted string or character constant, following \. */
            putchar(c);
            state = QUOTE;
        }
    }

    if (state == SLASH)
        putchar('/' //**/
                1);

    return 0;
}

/* 
   Local variables:
   compile-command: "checkergcc -W -Wall -ansi -pedantic knr123-0.c -o knr123-0"
   End: 
*/



Solution by Lew Pitcher



/* Lew Pitcher <lpitcher@yesic.com> */

/*/
** derem - remove C comments
**
** (attempt to solve K&R Exercise 1-22)
**
** As I only have v1 copy of K&R, I cannot
** be sure what is covered in K&R ANSI chapter 1.
** So, I restrict Richard Heathfield to the components covered
** in K&R v1 chapter 1, but modified for requisite ANSI
** features (int main() and return value).
**
** Components covered in v1 K&R chapter 1 include:
**  while (), for (), if () else
**  getchar(), putchar(), EOF
**  character constants, character escapes
**  strings
**  array subscripting
**
** Not directly covered are
**  string subscripting ( "/*"[0] )
**  initializers ( int state = PROGRAM; )
**/

/*/*/

#include <stdio.h>

#define	PROGRAM		0
#define	BEGIN_COMMENT	1
#define	COMMENT		2
#define	END_COMMENT	3
#define	QUOTE		4

int main(void)
{
	int this_char, quote_char;
	int state;

	state = PROGRAM;

	while ((this_char = getchar()) != EOF)
	{
		if (state == PROGRAM)
		{
			if (this_char == '/')
				state = BEGIN_COMMENT;
			else if ((this_char == '"') || (this_char == '\''))
			{
				state = QUOTE;
				putchar(quote_char = this_char);
			}
			else	putchar(this_char);
		}
		else if (state == BEGIN_COMMENT)
		{
			if (this_char == '*')
				state = COMMENT;
			else
			{
				putchar('/'); /* for the '/' of the comment */
				if (this_char != '/')
				{
					state = PROGRAM;
					putchar(this_char);
				}
				else	state = COMMENT;	/* stuttered */
			}
		}
		else if (state == QUOTE)
		{
			putchar(this_char);
			if (this_char == '\\')
				putchar(getchar());	/* escaped character */
			else if (this_char == quote_char)
				state = PROGRAM;
		}
		else if (state == COMMENT)
		{
			if (this_char == '*')
				state = END_COMMENT;
		}
		else if (state == END_COMMENT)
		{
			if (this_char == '/')
				state = PROGRAM;
			else if (this_char != '*')	/* stuttered */
				state = COMMENT;
		}
	}

	return 0;
}

Solution by Gregory Pietsch



/* Gregory Pietsch <gkp1@flash.net> */

#include <stdio.h>

char p[] =
"0/!10\"040\'050.001/011*!21\"/41\'/51./02*!32.!23/ "
"03*!33.!24\"004\\064.045\'005\\075.056.047.05";

int main(){int c,i,d;char s,n;s='0';while((c=getchar())
!=EOF){d=0;for(i=0;p[i]!='\0'&&d==0;i=i+4){if(p[i]==s&&
(p[i+1]==c||p[i+1]=='.')){if(p[i+2]=='0')putchar(c);else
if(p[i+2]=='/'){putchar('/');putchar(c);}else if(p[i+2]
==' ')putchar(' ');n=p[i+3];d=1;}}s=n;}return 0;}

Solution by Scopych Viktor

#include <stdio.h>

/* Author: Scopych
   Date:   11.06.2018
   Purpose: remove all comments from a C
      program.
*/

int isComment (void);
int isEndComment (void);
void toQuot (void);

int c;

int main(void) {
   extern int c;
   while ((c = getchar()) != EOF) {
       if (c == '"' || c == '\''){
           toQuot();
       } else if (isComment()) {
             while ( !(isEndComment())) {
             putchar(' ');
       }
             printf("    ");
      } else {
        putchar(c);
      }
   }
   return 0;
}


int isComment (void) {
   extern int c;

   if (c == '/') {
      c = getchar();
      if (c == '*') {
         return 1;
      } else {
         putchar('/');
      }
   }
   return 0;
}

int isEndComment (void) {
   extern int c;

   c = getchar();
   if (c == '*') {
      c = getchar();
      if (c == '/') {
         return 1;
      }
   }
   return 0;
}

void toQuot (void){
    extern int c;
    do {
        putchar(c);
        c = getchar();
    } while (c == '"');
    putchar(c);
}






Solution by Ben Pfaff (Category 1)

This version has the var/'\2' bug fix.

/* K&R2 1-23: Write a program to remove all comments from a C program.
   Don't forget to handle quoted strings and character constants
   properly.  C comments do not nest.

   This solution does not deal with other special cases, such as
   trigraphs, line continuation with \, or <> quoting on #include,
   since these aren't mentioned up 'til then in K&R2.  Perhaps this is
   cheating.

   Note that this program contains both comments and quoted strings of
   text that looks like comments, so running it on itself is a
   reasonable test.  It also contains examples of a comment that ends
   in a star and a comment preceded by a slash.  Note that the latter
   will break C99 compilers and C89 compilers with // comment
   extensions.

   Interface: The C source file is read from stdin and the
   comment-less output is written to stdout. **/

#include <stdio.h>

int
main(void)
{
    /* State machine's current state. */
    enum {
        PROGRAM,
        SLASH,
        COMMENT,
        STAR,
        QUOTE,
        LITERAL
    } state;

    /* If state == QUOTE, then ' or ".  Otherwise, undefined. */
    int quote;

    state = PROGRAM;
    for (;;) {
        int c = getchar();
        if (c == EOF) {
            if (state == SLASH)
                putchar('/' //**/
                        1 / 1 /'\1');
            break;
        }

        switch (state) {
        case SLASH:
            /* Program text following a slash. */
            if (c == "/*"[1]) {
                state = COMMENT;
                break;
            }
            putchar('/');
            state = PROGRAM;
            /* Fall through. */

        case PROGRAM:
            /* Program text. */
            if (c == '\'' || c == '"') {
                quote = c;
                state = QUOTE;
                putchar(c);
            }
            else if (c == "/*"[0])
                state = SLASH;
            else
                putchar(c);
            break;

        case COMMENT:
            /* Comment. */
            if (c == '*')
                state = STAR;
            break;

        case STAR:
            /* Comment following a star. */
            if (c == '/')
                state = PROGRAM;
            else if (c != '*') {
                state = COMMENT;
                putchar (' ');
            }
            break;

        case QUOTE:
            /* Within quoted string or character constant. */
            putchar(c);
            if (c == '\\')
                state = LITERAL;
            else if (c == quote)
                state = PROGRAM;
            break;

        case LITERAL:
            /* Within quoted string or character constant, following \. */
            putchar(c);
            state = QUOTE;
            break;

        default:
            abort();
        }
    }

    return 0;
}

/* 
   Local variables:
   compile-command: "checkergcc -W -Wall -ansi -pedantic knr123.c -o knr123"
   End: 
*/


Solution by Chris Torek



/* torek@elf.bsdi.com (Chris Torek) */

/*
"Write a program to remove all comments from a C program. Don't forget
to handle quoted strings and character constants properly. C comments do
not nest."

Well, what the heck.  I mailed this a day or two ago, but here is
the posted version.  I modified the problem a bit: it removes
comments from full ANSI C89 or C99 programs, handling trigraphs
and \-newline sequences.  It attempts to preserve any trigraphs in
the output, even while examining them in the "C code" as their
translated characters.  (I am not sure why I bothered doing all of
them, when only ??/ matters here.)  It keeps output line numbers in
sync with input line numbers, so that if the output is compiled,
any error messages will refer back to the proper input source line.

Lightly tested.
*/

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*
 * This flag controls whether we do trigraph processing.
 */
int	trigraphs = 1;

/*
 * This flag controls whether a comment becomes "whitespace" (ANSI C)
 * or "nothing at all" (some pre-ANSI K&R C compilers).
 */
int	whitespace = 1;

/*
 * This flag controls whether we do C89 or C99.  (C99 also handles C++.)
 */
int	c99;

/*
 * These are global so that options() can get at them, and for later
 * error messages if needed.
 */
const char *inname, *outname;

int options(const char *, char **);
void usage(void);

void	process(FILE *, FILE *);

#ifdef __GNUC__
void	panic(const char *) __attribute__((noreturn));
#else
void	panic(const char *);
#endif

int main(int argc, char **argv) {
	int i;
	FILE *in, *out;

	for (i = 1; i < argc; i++) {
		if (argv[i][0] == '-')
			i += options(argv[i] + 1, argv + i + 1);
		else if (inname == NULL)
			inname = argv[i];
		else
			usage();
	}
	if (inname != NULL) {
		if ((in = fopen(inname, "r")) == NULL) {
			fprintf(stderr, "cannot open %s for reading\n", inname);
			exit(EXIT_FAILURE);
		}
	} else {
		inname = "stdin";
		in = stdin;
	}
	if (outname != NULL) {
		if ((out = fopen(outname, "w")) == NULL) {
			fprintf(stderr, "cannot open %s for writing\n",
			    outname);
			exit(EXIT_FAILURE);
		}
	} else {
		outname = "stdout";
		out = stdout;
	}
	process(in, out);
	fclose(in);
	fclose(out);
	exit(EXIT_SUCCESS);
}

/*
 * This scans for -o type options.  Options that have an argument
 * can either take it immediately or as a subsequent argument (e.g.,
 * -ofoo means the same thing as -o foo).  We return 0 for "handled
 * them normally", 1 for "handled them normally but needed more
 * arguments".
 *
 * Currently this function is more powerful than really needed, but
 * if we ever decide to have more arguments...
 */
int options(const char *afterdash, char **moreargs) {
	int nmore = 0, c;

	while ((c = *afterdash++) != '\0') {
		if (c == 'o') {
			if (*afterdash) {
				outname = afterdash;
				afterdash = "";
			} else if (moreargs[nmore] != NULL)
				outname = moreargs[nmore++];
			else
				usage();
		} else if (c == 't')
			trigraphs = 0;
		else if (c == 'w')
			whitespace = 0;
		else if (c == '9')
			c99 = 1;
		else
			usage();
	}
	return nmore;
}

void usage(void) {
	fprintf(stderr, "usage: uncomment [-9tw] [-o outfile] [infile]\n");
	exit(EXIT_FAILURE);	/* ??? */
}

/*
 * States, level 0:
 *	normal
 *	trigraph processing: Q1 Q2 (for ??x)
 *
 * States, level 1:
 *	backslash-newline processing: BACK (seen \, may consume NL)
 *
 * States, level 2:
 *	normal
 *	character constant: CC (seen '), CCBACK (seen \ inside CC)
 *	string constant: SC, SCBACK
 *	comment: SLASH, COMM, COMMSTAR (for /, in-comment, & seen-star)
 *	C99: SLASHSLASH
 */

enum l0state {
	L0_NORMAL,
	L0_Q1, L0_Q2
};
enum l1state {
	L1_NORMAL,
	L1_BACK
};
enum l2state {
	L2_NORMAL,
	L2_CC, L2_CCBACK,
	L2_SC, L2_SCBACK,
	L2_SLASH, L2_COMM, L2_COMMSTAR,
	L2_SLASHSLASH
};

struct state {
	FILE *in;
	enum l0state l0state;
	int npushback;
	char pushback[4];
	char pushorig[4];	/* nonzero => trigraph pushback */
	int lastgetc;
	int lineno;
};

/*
 * Set up "initial" state.
 */
static void state0(struct state *sp, FILE *in) {
	sp->in = in;
	sp->l0state = L0_NORMAL;
	sp->npushback = 0;
	sp->lastgetc = 0;
	sp->lineno = 1;
}

static void pushback(struct state *sp, int c, char origc) {
	assert(sp->npushback < sizeof sp->pushback);
	sp->pushback[sp->npushback] = c;
	sp->pushorig[sp->npushback++] = origc;
}

/*
 * Get a character, doing trigraph processing.  Set *origc to 0 for normal
 * characters, or the actual input character pre-trigraph-mapping
 * for trigraph input.
 *
 * As a side effect, this can wind up getting up to 3 characters, maybe
 * stuffing two of them into the pushback buffer sp->buf[].  It also bumps
 * sp->lineno when a previously-read newline has been passed over.
 */
static int getl0char(struct state *sp, char *origc) {
	int c, newc;
	enum l0state state;

	state = sp->l0state;
	*origc = 0;
	while ((c = getc(sp->in)) != EOF) {
		if (sp->lastgetc == '\n')
			sp->lineno++;
		sp->lastgetc = c;
		switch (state) {

		case L0_NORMAL:
			/* ? => get another character; otherwise we are ok */
			if (c == '?') {
				state = L0_Q1;
				continue;
			}
			assert(sp->l0state == L0_NORMAL);
			return c;

		case L0_Q1:
			/* ?? => get another character */
			if (c == '?') {
				state = L0_Q2;
				continue;
			}
			/* ?X => return ?, look at X later */
			pushback(sp, c, 0);
			sp->l0state = L0_NORMAL;
			return '?';

		case L0_Q2:
			/*
			 * ??X, where X is trigraph => map
			 * ??X, where X is non-trigraph => tricky
			 * ??? => also tricky
			 */
			switch (c) {
			case '=':
				newc = '#';
				break;
			case '(':
				newc = '[';
				break;
			case '/':
				newc = '\\';
				break;
			case ')':
				newc = ']';
				break;
			case '\'':
				newc = '^';
				break;
			case '<':
				newc = '{';
				break;
			case '!':
				newc = '|';
				break;
			case '>':
				newc = '}';
				break;
			case '?':
				/*
				 * This one is slightly tricky.  Three '?'s
				 * mean that the '?' we read two characters
				 * ago gets returned, and the two remaining
				 * '?'s leave us in Q2 state.
				 */
				sp->l0state = L0_Q2;
				return '?';
			default:
				/*
				 * This one returns the first ?, leaves
				 * the second ? to be re-examined, and
				 * leaves the last character to be re-examined.
				 * In any case we are back in "normal" state.
				 */
				pushback(sp, c, 0);
				pushback(sp, '?', 0);
				sp->l0state = L0_NORMAL;
				return '?';
			}
			/* mapped a trigraph char -- return new char */
			*origc = c;
			sp->l0state = L0_NORMAL;
			return newc;

		default:
			panic("getl0char state");
		}
	}
	sp->lastgetc = EOF;
	return EOF;
}

void warn(struct state *, const char *);

void process(FILE *in, FILE *out) {
	enum l1state l1state = L1_NORMAL;
	enum l2state l2state = L2_NORMAL;
	int c, pendnls;
	char origc, backc;
	struct state state;

	state0(&state, in);
	pendnls = 0;
	backc = 0;		/* defeat gcc warning */

	/*
	 * Slight sort-of-bug: files ending in \ cause two "final" getc()s.
	 */
	do {
		if (state.npushback) {
			c = state.pushback[--state.npushback];
			origc = state.pushorig[state.npushback];
		} else if (trigraphs) {
			c = getl0char(&state, &origc);
		} else {
			c = getc(in);
			origc = 0;
			if (state.lastgetc == '\n')
				state.lineno++;
			state.lastgetc = c;
		}

		/*
		 * Do backslash-newline processing.
		 */
		switch (l1state) {

		case L1_NORMAL:
			if (c == '\\') {
				l1state = L1_BACK;
				backc = origc;
				continue;
			}
			break;

		case L1_BACK:
			/*
			 * If backc is nonzero here, the backslash that
			 * got us into this state was spelled ??/ --
			 * if we eat a newline (and hence the backslash),
			 * we forget that the eaten newline was spelled
			 * this way.  This is sort of a bug, but so it goes.
			 */
			l1state = L1_NORMAL;
			if (c == '\n') {
				pendnls++;
				continue;
			}
			if (c != EOF)
				pushback(&state, c, origc);
			c = '\\';
			origc = backc;
			break;

		default:
			panic("bad l1state");
		}

		/*
		 * Now ready to do "C proper" processing.
		 */
#define	SYNCLINES()	while (pendnls) putc('\n', out), pendnls--
#define	OUTPUT(ch, tri) ((tri) ? fprintf(out, "??%c", tri) : putc(ch, out))
#define	COPY()		OUTPUT(c, origc)

		switch (l2state) {
		case L2_NORMAL:
			switch (c) {
			case '\'':
				l2state = L2_CC;
				break;
			case '"':
				l2state = L2_SC;
				break;
			case '/':
				l2state = L2_SLASH;
				continue;
			default:
				break;
			}
			SYNCLINES();
			if (c != EOF)
				COPY();
			break;

		case L2_CC:
			switch (c) {
			case EOF:
				warn(&state, "EOF in character constant");
				break;
			case '\n':
				warn(&state, "newline in character constant");
				break;
			case '\\':
				l2state = L2_CCBACK;
				break;
			case '\'':
				l2state = L2_NORMAL;
				break;
			default:
				break;
			}
			if (c != EOF)
				COPY();
			break;

		case L2_CCBACK:
			switch (c) {
			case EOF:
				warn(&state, "EOF in character constant");
				break;
			case '\n':
				warn(&state, "newline in character constant");
				break;
			default:
				break;
			}
			l2state = L2_CC;
			if (c != EOF)
				COPY();
			break;

		case L2_SC:	/* much like CC */
			switch (c) {
			case EOF:
				warn(&state, "EOF in string constant");
				break;
			case '\n':
				warn(&state, "newline in string constant");
				break;
			case '\\':
				l2state = L2_SCBACK;
				break;
			case '"':
				l2state = L2_NORMAL;
				break;
			default:
				break;
			}
			if (c != EOF)
				COPY();
			break;

		case L2_SCBACK:
			switch (c) {
			case EOF:
				warn(&state, "EOF in string constant");
				break;
			case '\n':
				warn(&state, "newline in string constant");
				break;
			default:
				break;
			}
			l2state = L2_SC;
			if (c != EOF)
				COPY();
			break;

		case L2_SLASH:
			if (c == '*')
				l2state = L2_COMM;
			else if (c99 && c == '/')
				l2state = L2_SLASHSLASH;
			else {
				SYNCLINES();
				OUTPUT('/', 0);
				if (c != '/') {
					if (c != EOF)
						COPY();
					l2state = L2_NORMAL;
				}
			}
			break;

		case L2_COMM:
			switch (c) {
			case '*':
				l2state = L2_COMMSTAR;
				break;
			case '\n':
				pendnls++;
				break;
			case EOF:
				warn(&state, "EOF inside comment");
				break;
			}
			break;

		case L2_COMMSTAR:
			switch (c) {
			case '/':
				l2state = L2_NORMAL;
				/*
				 * If comments become whitespace,
				 * and we have no pending newlines,
				 * must emit a blank here.
				 *
				 * The comment text is now all eaten.
				 */
				if (whitespace && pendnls == 0)
					putc(' ', out);
				SYNCLINES();
				break;
			case '*':
				/* stay in L2_COMMSTAR state */
				break;
			case EOF:
				warn(&state, "EOF inside comment");
				break;
			case '\n':
				pendnls++;
				/* FALLTHROUGH */
			default:
				l2state = L2_COMM;
			}
			break;

		case L2_SLASHSLASH:
			switch (c) {
			case EOF:
				/* ??? do we really care? */
				warn(&state, "EOF inside //-comment");
				break;
			case '\n':
				l2state = L2_NORMAL;
				pendnls++;	/* cheesy, but... */
				SYNCLINES();
			default:
				break;
			}
			break;

		default:
			panic("bad l2state");
		}
	} while (c != EOF);
	SYNCLINES();
}

void warn(struct state *sp, const char *msg) {
	fprintf(stderr, "uncomment: %s(%d): %s\n", inname, sp->lineno, msg);
}

void panic(const char *msg) {
	fprintf(stderr, "panic: %s\n", msg);
	abort();
	exit(EXIT_FAILURE);
}

Solution by Chris Mears

Here's Chris's updated version, without the bugs (says he).  :-)

/*
 * C comment stripper.
 *
 * Strips comments from C or C++ code.
 */

#include <stdio.h>

enum state_t { normal, string, character, block_comment, line_comment};

enum token_t { none, backslash, slash, star, tri1, tri2, tri_backslash};

static int print_mode(enum state_t s)
{
        return (s == normal || s == string || s == character);
}

void cstrip(FILE *infile, FILE *outfile)
{
        int ch;
        int comment_newline = 0;
        enum state_t state = normal;
        enum token_t token = none;
        enum token_t last_token = none;
        
        if (!infile || !outfile || (infile == outfile)) {
                return;
        }


        while ((ch = fgetc(infile)) != EOF) {
                switch (ch) {
                case '/':
                        if (token == tri2) {
                                token = tri_backslash;
                                if (print_mode(state))
                                        fputc(ch, outfile);
                        } else if (state == string || state == character) {
                                fputc(ch, outfile);
                                token = slash;
                        } else if (state == block_comment && token == star) {
                                state = normal;
                                token = none;

                                /* Replace block comments with whitespace. */
                                if (comment_newline) {
                                        fputc('\n', outfile);
                                } else {
                                        fputc(' ', outfile);
                                }
                        } else if (state == normal && token == slash) {
                                state = line_comment;
                                token = slash;
                        } else {
                                token = slash;
                        }
                        
                        break;

                case '\\':
                        if (state == normal && token == slash)
                                fputc('/', outfile);
                        if (print_mode(state))
                                fputc(ch, outfile);

                        if (token == backslash || token == tri_backslash) {
                                token = none;
                        } else {
                                last_token = token;
                                token = backslash;
                        }
                                
                        break;

                case '"':
                        if (state == normal && token == slash)
                                fputc('/', outfile);
                        if (state == string && token != backslash)
                                state = normal;
                        else if (state == normal && token != backslash)
                                state = string;

                        if (print_mode(state))
                                fputc(ch, outfile);

                        token = none;
                        
                        break;

                case '\'':
                        if (state == normal && token == slash)
                                fputc('/', outfile);
                        if (state == character && token != backslash)
                                state = normal;
                        else if (state == normal && token != backslash)
                                state = character;

                        if (print_mode(state))
                                fputc(ch, outfile);

                        token = none;
                        
                        break;

                case '\n':
                        /* This test is independent of the others. */
                        if (state == block_comment)
                                comment_newline = 1;
                        
                        if (state == normal && token == slash)
                                fputc('/', outfile);
                        
                        if (token == backslash || token == tri_backslash)
                                token = last_token;
                        else if (state == line_comment &&
                                        token != backslash) {
                                state = normal;
                                token = none;
                        } else {
                                token = none;
                        }

                        if (print_mode(state))
                                fputc(ch, outfile);

                        break;

                case '*':
                        if (state == normal && token == slash) {
                                state = block_comment;
                                token = none;
                                comment_newline = 0;
                        } else {
                                token = star;
                        }
                        
                        if (print_mode(state))
                                fputc(ch, outfile);

                        break;

                case '?':
                        if (state == normal && token == slash)
                                fputc('/', outfile);
                        
                        if (token == tri1) {
                                token = tri2;
                        } else if (token == tri2) {
                                token = tri2;   /* retain state */
                        } else {
                                /* We might need the last token if this
                                 * trigraph turns out to be a backslash.
                                 */
                                last_token = token;
                                token = tri1;
                        }

                        if (print_mode(state))
                                fputc(ch, outfile);

                        break;

                default:
                        if (state == normal && token == slash)
                                fputc('/', outfile);

                        if (print_mode(state))
                                fputc(ch, outfile);

                        token = none;

                        break;
                } /* switch */

        } /* while */

        return;
}


/* Small driver program. */

int main(void)
{
        cstrip(stdin, stdout);

        return 0;
}



Here's a critique of the above, sent in by Rick Litherland. (Please note: when Rick posted this, I hadn't yet posted Chris Mears's updated version of the code.)

(Since I find it hard to pick the solution number out of KRX12300.C at a glance, I'll refer to the solutions as uncomment00, uncomment01, and so on.)

[Rick - KR means K&R. X means eXercise. 1 means Chapter 1. 23 means exercise 23. The next digit is the category number - 0 == Cat 0 (ANSI C89, with code restricted to what K&R have discussed at this point in the book). The final digit is the solution number. 0 is the first I received in that category, 1 is the second, and so on. (RJH)]

Uncomment03 (Gregory Pietsch)

I can find only one possible flaw in this, namely that it does not allow for a slash in program text being immediately followed by a quotation mark. One could reasonably argue that this is not a flaw at all, because that would never happen in sensible code. On the other hand, it can happen in legal code, as demonstrated by the following complete (if useless) program.

#include <stdio.h>
int main(void)
{
    /* print the number three */
    printf("%d\n", 6/'\2');
    /* remember to return a value from main */
    return 0;
}


When this is fed to uncomment03, the output is

#include <stdio.h>
int main(void)
{
     
    printf("%d\n", 6/'\2');
    /* remember to return a value from main */
    return 0;
}


Clearly, uncomment03 realises that the second comment is too important to remove. Um, sorry, that was a feeble excuse for a joke. What's happening is that uncomment03 doesn't recognise the beginning of the character constant '\2', so it takes the closing quote as the start of a "character constant" that is never terminated. The peculiar idiom 6/'\2' for 3 can be replaced by the even more brain-damaged 6/"\2"[0] with the same effect. Since uncomment03 is table-driven, it's easy to make it recognise these situations by adding two new rules to the table.

/* modified krx12303.c */
#include <stdio.h>

char p[] =
"0/!10\"@40\'@50.@01/@11*!2"
"1\"/41\'/5"            /* added by RAL */
"1./02*!32.!23/ 03*!33.!24\"@04\\@64.@45\'@05\\@75.@56.@47.@5";

int main(){int c,i,d;char s,n;s='0';while((c=getchar())
!=EOF){d=0;for(i=0;p[i]!='\0'&&d==0;i=i+4){if(p[i]==s&&
(p[i+1]==c||p[i+1]=='.')){if(p[i+2]=='@')putchar(c);else
if(p[i+2]=='/'){putchar('/');putchar(c);}else if(p[i+2]
==' ')putchar(' ');n=p[i+3];d=1;}}s=n;}return 0;}
/* end of modified krx12303.c */


Uncomment02 (Lew Pitcher)

Uncomment11 (Chris Torek)

These have the same problem (or non-problem, according to your point of view) as uncomment03. If it were regarded as a problem, it could probably be fixed quite easily, though not (I think) as neatly as with uncomment03; I haven't looked at these carefully enough to be sure.

Uncomment01, uncomment10 (Ben Pfaff)

An oversight has the effect that if a slash in program text is followed by anything other than a star or another slash, the following character is dropped. For example, with input

int a = 4/2;


the output is

int a = 4/;


The correction is the same in both cases; replace

    /* Program text following a slash. */
    if (c == '*')
        state = COMMENT;
    else {
        putchar('/');
        if (c != '/')
            state = PROGRAM;
    }

by

    /* Program text following a slash. */
    if (c == '*')
        state = COMMENT;
    else {
        putchar('/');
        if (c != '/') {
            putchar(c);
            state = PROGRAM;
        }
    }


After this, these programs will have the same problem (or not) as the previous three.

Uncomment12 (Chris Mears)

This is a completely different kettle of fish. If you run this with Ben Pfaff's solution as input, the output is quite bizarre; some comments have just their initial and final slashes removed, for instance. I've managed to find two things contributing to this. The first is illustrated by the input

int c = '/';

with output

int c = '';


This can be fixed by changing the lines

    case '/':
        if (state == string) {


to

    case '/':
        if (state == string || state == character) {

However, with or without this change, the input

char *p = "\\"; /* This is not a comment. */

is left unchanged. What happens is that the closing quote of the string literal isn't recognised as such because of the preceding backlash, despite the backslash before that. The handling of backslashes is split between three cases (at least), and is complicated enough that I don't feel competent to propose a remedy.

Solution by unknown

This program breaks most of the above submissions:

/* krx123tp.c - a test program to serve as input to krx123*.c
 *
 * This is a shameless copy of Ben Pfaff's solution, to which I have
 * added a few extra statements to further test the candidate programs
 * for this exercise. As Ben says, this program already contains lots
 * of examples of comments and not-quite-comments. I've just made it
 * a little tougher.
 *
 */

/* K&R2 1-23: Write a program to remove all comments from a C program.
   Don't forget to handle quoted strings and character constants
   properly.  C comments do not nest.

   This solution does not deal with other special cases, such as
   trigraphs, line continuation with \, or <> quoting on #include,
   since these aren't mentioned up 'til then in K&R2.  Perhaps this is
   cheating.

   Note that this program contains both comments and quoted strings of
   text that looks like comments, so running it on itself is a
   reasonable test.  It also contains examples of a comment that ends
   in a star and a comment preceded by a slash.  Note that the latter
   will break C99 compilers and C89 compilers with // comment
   extensions.

   Interface: The C source file is read from stdin and the
   comment-less output is written to stdout. **/

#include <stdio.h>

int
main(void)
{
    /* State machine's current state. */
    enum {
        PROGRAM,
        SLASH,
        COMMENT,
        STAR,
        QUOTE,
        LITERAL
    } state;

    /* If state == QUOTE, then ' or ".  Otherwise, undefined. */
    int quote;

    state = PROGRAM;
    for (;;) {
        int c = getchar();
        if (c == EOF) {
            if (state == SLASH)
                putchar('/' //**/
                        1 / 1 /'\1');
            break;
        }

        if(0)
          printf("%d\n", 6/'\2'); 
        /* line of code, and comment, added by RJH 10 July 2000 */

        switch (state) {
        case SLASH:
            /* Program text following a slash. */
            if (c == "/*"[1]) {
                state = COMMENT;
                break;
            }
            putchar('/');
            state = PROGRAM;
            /* Fall through. */

        case PROGRAM:
            /* Program text. */
            if (c == '\'' || c == '"') {
                quote = c;
                state = QUOTE;
                putchar(c);
            }
            else if (c == "/*"[0])
                state = SLASH;
            else
                putchar(c);
            break;

        case COMMENT:
            /* Comment. */
            if (c == '*')
                state = STAR;
            break;

        case STAR:
            /* Comment following a star. */
            if (c == '/')
                state = PROGRAM;
            else if (c != '*') {
                state = COMMENT;
                putchar (' ');
            }
            break;

        case QUOTE:
            /* Within quoted string or character constant. */
            putchar(c);
            if (c == '\\')
                state = LITERAL;
            else if (c == quote)
                state = PROGRAM;
            break;

        case LITERAL:
            /* Within quoted string or character constant, following \. */
            putchar(c);
            state = QUOTE;
            break;

        default:
            abort();
        }
    }

    return /* this comment added by RJH 10 July 2000 */ 0;
}

/* 
   Local variables:
   compile-command: "checkergcc -W -Wall -ansi -pedantic knr123.c -o knr123"
   End: 
*/


Solution by Amarendra Godbole

Not fully-generalized, but uses concepts taught in Ch. 1.

/* 1.23: remove comments */
#include <stdio.h>

int
main(void)
{
    int c, comment, nested, squote, dquote;

    comment = 0;
    nested = 0;
    squote = 0;
    dquote = 0;
    while ((c = getchar()) != EOF) {
        /* tackle character constant */
        if (c == '\'') {
            putchar(c);
            c = getchar();
            putchar(c);
            if (c == '\\') {
                c = getchar();
                putchar(c);
            }
            /* gobble the ending single quote */
            c = getchar();
            putchar(c);
        }

        if (c == '"') {
            if (dquote > 0)
                dquote--;
            else
                dquote++;
        }

        if (dquote == 0) {
            if (c == '/') {
                c = getchar();
                if (c == '*') {
                    comment++;
                } else {
                    putchar('/');
                    putchar(c);
                }
            } else if (comment) {
                /* within comment */
                if (comment > 1) {
                    printf("error: nested comment\n");
                    exit(1);
                }
                if (c == '*') {
                    c = getchar();
                    if (c == '/') {
                        comment--;
                    } else {
                        putchar('*');
                        putchar(c);
                    }
                }
            } else {
                if (comment > 1) {
                    printf("error: comment\n");
                    exit(1);
                }
                if (c != '\'')
                    putchar(c);
            }
        } else {
            /* we are within quotes, just print... */
            if (c != '\'')
                putchar(c);
        }
    }

    return 0;
}


Solution by Pratik Prajapati


#include <stdio.h>
#define MAXLINE 5000
/* Exercise 1-23. */
int main()
{
        char output[MAXLINE], ch;
        int i = 0 ,ic = 0 , pos = 0, in_str = 0;
        while(( ch = getchar()) != EOF) {
                if( (ch == '"') ) {
                        in_str = !in_str;
                }
                if((ch == '*') && (i > 0) && ( output[i-1] == '/') && !in_str) {
                        if(ic)
                                continue;
                        ic = 1;
                        pos = i-1;
                } else if ( (ch == '/') && ( i > 0 ) && (output[i-1] == '*') && ic && !in_str) {
                        i = pos;
                        ic = 0;
                        continue;
                }
                output[i] = ch;
                i++;
        }
        output[i] = '\0';
        printf("\n output = \n%s\n",output);
}

Solution by ivan

No-comment (ivan)

I've restricted this solution to techniques covered in Chapter 1, but also made it handle C99 inline ("//") comments.

#include <stdio.h>

#define NORMAL		0
#define SINGLE_QUOTE	1
#define DOUBLE_QUOTE	2
#define SLASH		3
#define MULTI_COMMENT	4
#define INLINE_COMMENT	5
#define STAR		6

int state_from_normal(char symbol, char prev_symbol)
{
	int state = NORMAL;

	if (symbol == '\'' && prev_symbol != '\\')
		state = SINGLE_QUOTE;
	else if (symbol == '"')
		state = DOUBLE_QUOTE;
	else if (symbol == '/')
		state = SLASH;

	return state;
}

int state_from_single_quote(char symbol, char prev_symbol, char pre_prev_symbol)
{
	int state = SINGLE_QUOTE;

	if (symbol == '\'' && (prev_symbol != '\\' || pre_prev_symbol == '\\'))
		state = NORMAL;

	return state;
}

int state_from_double_quote(char symbol, char prev_symbol, char pre_prev_symbol)
{
	int state = DOUBLE_QUOTE;

	if (symbol == '"' && (prev_symbol != '\\' || pre_prev_symbol == '\\'))
		state = NORMAL;

	return state;
}

int state_from_slash(char symbol)
{
	int state = SLASH;

	if (symbol == '*')
		state = MULTI_COMMENT;
	else if (symbol == '/')
		state = INLINE_COMMENT;
	else
		state = NORMAL;

	return state;
}

int state_from_multi_comment(char symbol)
{
	int state = MULTI_COMMENT;

	if (symbol == '*')
		state = STAR;

	return state;
}

int state_from_star(char symbol)
{
	int state = STAR;

	if (symbol == '/')
		state = NORMAL;
	else if (symbol != '*')
		state = MULTI_COMMENT;

	return state;
}

int state_from_inline_comment(char symbol)
{
	int state = INLINE_COMMENT;

	if (symbol == '\n')
		state = NORMAL;

	return state;
}

int state_from(int prev_state, char symbol, char prev_symbol, char pre_prev_symbol)
{
	if (prev_state == NORMAL)
		return state_from_normal(symbol, prev_symbol);
	else if (prev_state == SINGLE_QUOTE)
		return state_from_single_quote(symbol, prev_symbol, pre_prev_symbol);
	else if (prev_state == DOUBLE_QUOTE)
		return state_from_double_quote(symbol, prev_symbol, pre_prev_symbol);
	else if (prev_state == SLASH)
		return state_from_slash(symbol);
	else if (prev_state == MULTI_COMMENT)
		return state_from_multi_comment(symbol);
	else if (prev_state == INLINE_COMMENT)
		return state_from_inline_comment(symbol);
	else if (prev_state == STAR)
		return state_from_star(symbol);
	else
		return -1;
}

int main(void)
{
	char input;
	char symbol = '\0';
	char prev_symbol = '\0';
	char pre_prev_symbol;

	int state = NORMAL;
	int prev_state;

	while ((input = getchar()) != EOF) {
		pre_prev_symbol = prev_symbol;
		prev_symbol     = symbol;
		symbol          = input;

		prev_state = state;
		state = state_from(prev_state, symbol, prev_symbol, pre_prev_symbol);

		if (prev_state == SLASH && state == NORMAL)
			putchar(prev_symbol);

		if (prev_state != STAR && state < SLASH)
			putchar(symbol);
	}
}

Solution by Flavio Ferreira Santos

Only Chapter 1. Is this going to get any easier? Anyway, too much conditions make my head ache. I tried to minimize the number of conditions to be tested. If you have any improvements or suggestions, please email me Flavio Ferreira Santos.

/*********************************************************************************************
 * KnR 1-23                                            
 * --------
 * Exercise 1-23. Write a program to remove all comments from a C program. 
 * Don't forget to handle quoted strings and character constants properly. 
 * C comments don't nest.
 * 
 * Author: Flavio Ferreira Santos
 * email: flaviofsantos@gmail.com
 * 
 *********************************************************************************************/
#include <stdio.h>

int main(void)
{         
     int lastlastc, lastc, c, nextc;
     int incomment, inquote;
     
     lastlastc = lastc = nextc = incomment = inquote = 0;
     c = getchar(); 
     while(c != EOF) {
          nextc = getchar();          
                                        
          if (!incomment) {
               if (!inquote) { 
                    if (c == '"' && lastc != '\'')
                         inquote = 1;
               } else {
                    // look out for escape characters
                    if (c == '"' && (lastc != '\\' || lastc == '\\' && lastlastc == '\\'))
                         inquote = 0;
               }
          }
          
          if (!inquote) {
               if (c == '/' && nextc == '*')
                    incomment = 1;               
          }
          
          if (incomment) {
               if (c == '*' && nextc == '/') {
                    // it was a comment anyway, chars captured don't matter
                    lastlastc = lastc = nextc = incomment = inquote = 0;
                    c = getchar(); 
                    continue;
               }
          } else
               putchar(c);
          
          lastlastc = lastc;                 
          lastc = c;
          c = nextc;
     }     
     
     return 0;
}

Solution by CakeOFTrust

This one passed all the tests mentioned here.

#include <stdio.h>

#define NOESC 3
#define YES 1
#define NO 0
#define ESC 2

int main(void)
{
  int c[2], bs = 0;
  char string, chr, comm[2];

  c[1] = getchar();
  c[0] = '\0';
  string = chr = comm[0] = comm[1] = NO;

  while (c[1] != EOF)
  {
      if (string == YES || chr == YES || (comm[0] == NO && comm[1] == NO &&
         (c[1] == '\"' || c[1] == '\''))) {
          putchar(c[1]);

          if (c[1] == '\\')
              ++bs;

          else if (bs > 0 && c[1] != '\"' && c[1] != '\'')
              bs = 0;

          if (c[1] == '\"' && string == NO && chr == NO)
              string = YES;

          else if (c[1] == '\'' && chr == NO && string == NO)
              chr = YES;

          else if ((c[1] == '\'' || c[1] == '\"') && bs - (bs / 2) * 2 == 0)
          {
              if (c[1] == '\"' && string == YES)
                  string = NO;

              else if (c[1] == '\'' && chr == YES)
                  chr = NO;
          }

          if ((c[1] == '\'' || c[1] == '\"') && bs > 0) {
              bs = 0;
          }
      }

      else if (comm[0] == YES || comm[1] == YES)
      {
          if (c[1] == '\n')
              putchar(c[1]);  /* If not needed it's still better to be after //
                                 comments. To do so delete this statement and add
                                 putchar(c[1]); or putchar('\n') to the second part
                                 of the next conditional. Otherwise just remove it. */

          if (c[0] == '*' && c[1] == '/' && comm[0] == YES)
              comm[0] = NO;

          else if (c[1] == '\n' && comm[1] == YES)
              comm[1] = NO;
      }

      else  if (c[1] != '/')
          putchar(c[1]);

      else {
          c[0] = c[1];
          c[1] = getchar();

          if (c[1] != '*' && c[1] != '/') {
              comm[0] = NOESC;

              putchar(c[0]);
          }

          else if (c[1] == '*')
              comm[0] = ESC;

          else
              comm[1] = ESC;
      }

      if (comm[0] != ESC && comm[1] != ESC && comm[0] != NOESC && c[1] != EOF) {
          c[0] = c[1];
          c[1] = getchar();
      }

      else if (comm[0] == ESC)
          comm[0] = YES;

      else if (comm[1] == ESC)
          comm[1] = YES;

      else if (comm[0] == NOESC)
          comm[0] = NO;
  }

  return 0;
}

Solution by Luke Panayi

My solution, as far as I know it deals with all cases.

#include <stdio.h>
#define True 1
#define False 0
#define maxLine 1000

int main() {

	int i, c;
	int inComment, inQuote = False;
	char input[maxLine];

	for (i = 0; i<maxLine-1 && (c=getchar()) != EOF; ++i) {
		if (c == '"') { 
			if (inQuote == True) {
				inQuote = False;
			}
			else {
				inQuote = True;
			}
		}
		if (c == '/' && inQuote == False) {
			if (getchar() == '*') {
				--i; /*deincrement i to account for the for loop incrementing i upon seeing the next non-comment character*/
				inComment = True;
				while (inComment && c != EOF) { /*if the comment never ends program won't enter infinite loop*/
					if ((c=getchar()) == '*') {
						if (getchar() == '/') {
							inComment = False;
							}
						}
					}
				}
			else { /*as far as I know, there is no instance in which '/' should show up in a C program without being in quotes.*/
				input[i] = c; /*To be safe though, this deals with such a case*/
				}
			}
		else {
			input[i] = c;
			}
	}

	input[i] = '\0';
	printf("%s", input);

	return 0;
}

Solution by codybartfast

This not a very interesting solution, I include it because I refer to it from my solution to Ex1-24.

/*
This has options to change the behaviour so that:

    1. the contents of comments can be replaced (e.g. with spaces) instead
       of removing the comment completely.
    2. similarly the contents of single and double quotes can be replaced.
    3. reporting of errors can be enabled (e.g. reaching the end of the file
       without an open comment).

This is so it can be used as part of the solution to Exercise 1-24
*/

#include <stdio.h>

#define MD_IN_CODE 0
#define MD_FORWARD_SLASH 1
#define MD_IN_BLOCK_COMMENT 2
#define MD_BLOCK_COMMENT_STAR 4
#define MD_IN_LINE_COMMENT 5
#define MD_IN_DOUBLE_QUOTE 6
#define MD_DOUBLE_ESCAPED 7
#define MD_IN_SINGLE_QUOTE 8
#define MD_SINGLE_ESCAPED 9

#define NO 0
#define YES 1

#define CMNT_INCLUDE 2
/* this comment becomes --> */ /* this comment becomes --> */

#define CMNT_REPLACE 3
/* this comment becomes --> */ /*                          */

#define CMNT_REMOVE 4
/* this comment becomes --> */

int cmnt_policy = CMNT_REMOVE;
char cmntd_rplcmnnt = ' ';
int do_rplc_dblqtd = NO;
char dblqtd_rplcmnt = ' ';
int do_rplc_snglqtd = NO;
char snglqtd_rplcmnt = ' ';
int do_rpt_errors = YES;

void next(int mode);
void parse(int mode, char c);
void put_cmntd_char(char c);
void put_dbl_qtd_char(char c);
void put_sngl_qtd_char(char c);
void report_eof_status(int mode);

int main(void)
{
	next(MD_IN_CODE);
	return 0;
}

void next(int mode)
{
	int c;

	if ((c = getchar()) == EOF) {
		if (do_rpt_errors)
			report_eof_status(mode);
		return;
	} else {
		parse(mode, c);
	}
}

void parse(int mode, char c)
{
	if (mode == MD_IN_CODE) {
		if (c == '/') {
			next(MD_FORWARD_SLASH);
		} else if (c == '"') {
			putchar(c);
			next(MD_IN_DOUBLE_QUOTE);
		} else if (c == '\'') {
			putchar(c);
			next(MD_IN_SINGLE_QUOTE);
		} else {
			putchar(c);
			next(mode);
		}
	} else if (mode == MD_FORWARD_SLASH) {
		if (c == '*') {
			if (cmnt_policy != CMNT_REMOVE)
				printf("/*");
			next(MD_IN_BLOCK_COMMENT);
		} else if (c == '/') {
			if (cmnt_policy != CMNT_REMOVE)
				printf("//");
			next(MD_IN_LINE_COMMENT);
		} else {
			putchar('/');
			parse(MD_IN_CODE, c);
		}
	} else if (mode == MD_IN_BLOCK_COMMENT) {
		if (c == '*') {
			next(MD_BLOCK_COMMENT_STAR);
		} else {
			put_cmntd_char(c);
			next(mode);
		}
	} else if (mode == MD_BLOCK_COMMENT_STAR) {
		if (c == '/') {
			if (cmnt_policy != CMNT_REMOVE)
				printf("*/");
			next(MD_IN_CODE);
		} else {
			put_cmntd_char('*');
			parse(MD_IN_BLOCK_COMMENT, c);
		}
	} else if (mode == MD_IN_LINE_COMMENT) {
		if (c == '\n') {
			putchar(c);
			next(MD_IN_CODE);
		} else {
			put_cmntd_char(c);
			next(mode);
		}
	} else if (mode == MD_IN_DOUBLE_QUOTE) {
		if (c == '"') {
			putchar(c);
			next(MD_IN_CODE);
		} else if (c == '\\') {
			put_dbl_qtd_char(c);
			next(MD_DOUBLE_ESCAPED);
		} else {
			put_dbl_qtd_char(c);
			next(mode);
		}
	} else if (mode == MD_DOUBLE_ESCAPED) {
		put_dbl_qtd_char(c);
		next(MD_IN_DOUBLE_QUOTE);
	} else if (mode == MD_IN_SINGLE_QUOTE) {
		if (c == '\'') {
			putchar(c);
			next(MD_IN_CODE);
		} else if (c == '\\') {
			put_sngl_qtd_char(c);
			next(MD_SINGLE_ESCAPED);
		} else {
			put_sngl_qtd_char(c);
			next(mode);
		}
	} else if (mode == MD_SINGLE_ESCAPED) {
		put_sngl_qtd_char(c);
		next(MD_IN_SINGLE_QUOTE);
	} else {
		printf("\n\nError: unknown mode: %d\n", mode);
	}
}

void put_cmntd_char(char c)
{
	if (cmnt_policy == CMNT_REMOVE)
		/* don't put anything */;
	else if (cmnt_policy == CMNT_REPLACE) {
		if (c == '\n' || c == '\t')
			putchar(c);
		else
			putchar(cmntd_rplcmnnt);
	} else
		putchar(c);
}

void put_dbl_qtd_char(char c)
{
	if (do_rplc_dblqtd == YES)
		putchar(dblqtd_rplcmnt);
	else
		putchar(c);
}

void put_sngl_qtd_char(char c)
{
	if (do_rplc_snglqtd == YES)
		putchar(snglqtd_rplcmnt);
	else
		putchar(c);
}

void report_eof_status(int mode)
{
	printf("\n\n/* ");
	if (mode == MD_IN_CODE)
		printf("No comment or quote issues discovered.");
	else {
		printf("[) ");
		if (mode == MD_FORWARD_SLASH)
			printf("Error: unexpected slash '/' at end of file.");
		else if (mode == MD_IN_BLOCK_COMMENT ||
			 mode == MD_BLOCK_COMMENT_STAR)
			printf("Error: open comment '/*' at end of file.");
		else if (mode == MD_IN_DOUBLE_QUOTE ||
			 mode == MD_DOUBLE_ESCAPED)
			printf("Error: open double quote '\"' at end of file.");
		else if (mode == MD_IN_SINGLE_QUOTE ||
			 mode == MD_SINGLE_ESCAPED)
			printf("Error: open single quote ''' at end of file.");
		else
			printf("Internal Error: got mode:%d", mode);
	}
	printf(" */\n");
}

Solution by Marvelcoder

* RemoveComments.c
*
*  Created on: 12-Aug-2020
*  Author: Marvelcoder
*  
* This program is only breaking on nested comments /*//**/
 #include<stdio.h>

 #define MAXLINE 100000

 int main(){

    char line[MAXLINE],ch;
    int i=0;
    while((ch=getchar())!=EOF){
        line[i]=ch;
        i++;
    }

    for(int j=0;j<i;j++){
        if(line[j]=='/' && line[j+1]=='/'){
            ++j;
            ++j;
            while(line[j+1]!='\n'){
                ++j;
            }
        }else if(line[j]=='/' && line[j+1]=='*' && line[j+2]!='"'){
            ++j;
            ++j;
            while((line[j]=='/' && line[j+1]=='/') || line[j]!='/' || (line[j+1]!='\n' && line[j+1]=='\n')){
                ++j;
            }
        }else{
            printf("%c",line[j]);
        }

    }

 return 0;
 }

Solution by Anonymous

* This solution should be rather easy to understand since I tried to keep it simple and readable
* This program does not handle trigraphs though. It does handle some of the stranger comments
#include <stdio.h>

/*
Exercise 1-23. Write a program to remove all comments from a C program. Don't forget to handle quoted strings and character constants properly. C comments do not nest.
*/

#define NORMAL 0
#define DOUBLE_SLASH_COMMENT 1
#define ASTERISK_COMMENT 2
#define SINGLE_QUOTE 3
#define DOUBLE_QUOTE 4


int main()
{
    int c, status, possibleComment;
    char previous;

    status = NORMAL;
    possibleComment = 0;
    previous = '\0';

    while ((c = getchar()) != EOF)
    {
        if (status == NORMAL)
        {

            if (previous == '/' && c == '/')
                status = DOUBLE_SLASH_COMMENT;
            else if (previous == '/' && c == '*')
                status = ASTERISK_COMMENT;
            else if (previous != '\\' && c == '"')
                status = DOUBLE_QUOTE;
            else if (previous != '\\' && c == '\'')
                status = SINGLE_QUOTE;

            // system to not print a / until confirmed not a comment on next interation of the loop
            // 0 = haven't seen / or reset
            // 1 = seen / and was previously 0
            // 2 = saw /, but did not see / or * afterwards
            if (status == NORMAL)
            {
                if (c == '/' && possibleComment == 0)
                    ++possibleComment;
                else if (possibleComment == 1)
                    ++possibleComment;
            }
            else
                possibleComment = 0;

            if (status == NORMAL || status == SINGLE_QUOTE || status == DOUBLE_QUOTE)
            {
                // if possibleComment == 0, print as normal
                // if possibleComment == 2, it needs to print the previous /
                // that was skipped just in case it was a comment (it wasn't though)
                if (possibleComment == 0)
                    putchar(c);
                else if (possibleComment == 2)
                {
                    possibleComment = 0;
                    putchar(previous);
                    putchar(c);
                }
            }
        }
        else if (status == DOUBLE_SLASH_COMMENT)
        {
            if (previous != '\\' && c == '\n')
            {
                status = NORMAL;
                putchar('\n');
            }
        }
        else if (status == ASTERISK_COMMENT)
        {
            if (previous == '*' && c == '/')
                status = NORMAL;
        }
        else if (status == DOUBLE_QUOTE)
        {
            if (previous != '\\' && c == '"')
                status = NORMAL;
            putchar(c);
        }
        else if (status == SINGLE_QUOTE)
        {
            if (previous != '\\' && c == '\'')
                status = NORMAL;
            putchar(c);
        }
        previous = c;
    }

    return 0;
}

* Here is a test file with several bad comments that you can use to verify your program works
// this is a c program that compiles (with a few warnings of course)
// but it's purpose is to use against your comment remover

// test comments start
/*ada sd as d/* /* /* /*/
char a[] = "//comments are the best"; // ya they are
char b[] = "/*comments are the best*/"; // you bet
char c[] = "//";
char d[] = "/*";
char e[] = "*/";
int f = '/*';
int g = '*/';
int h = '//'; /*so um, break the program please****************************************/
// did you know that you can wrap a comment \
by doing this? I know right!\
its                                                                                       \
rather	 					   		 	  	 	 	 						   				 \
dumb if you ask me \
b\
u\
t\
y\
o\
u\
didn't ask me! /*\
s/\
\/\
a
int i =/* this is allowed */ 0/* ya werid comments! */;
// test comments end

int main()
{
    return 0;
}

Solution by Foowar

/* exercise 1-23
 * Write a program to remove all comments from a C program.
 * Don't forget to handle quoted strings and character constants properly.
 * C comments do not nest.
 *
 * if anything is wrong tell me at: ayoubkhater@protonmail.com
 */
#include <stdio.h>

/* better to use enums */
#define S_CODE 0
#define S_CHAR 1
#define S_ESC_CHAR 2
#define S_STRING 3
#define S_ESC_STRING 4
#define S_OPENING_SLASH 5
#define S_ASTERISK 6
#define S_COMMENT 7
#define S_ENDING_SLASH 8
#define S_UNDEFINED 9

int
handle_code(int c)
{
	if (c == '/')
		return S_OPENING_SLASH;
	else if (c == '\'')
		return S_CHAR;
	else if (c == '"')
		return S_STRING;
	else
		return S_CODE;
}

int
next_state(int state, int c)
{
	if (state == S_CODE) {
		/* CODE */
		return handle_code(c);
	} else if (state == S_CHAR) {
		/* CHAR */
		if (c == '\\')
			return S_ESC_CHAR;
		else if (c == '\'')
			return S_CODE;
		else
			return S_CHAR;
	} else if (state == S_ESC_CHAR) {
		/* ESC_CHAR */
		return S_CHAR;
	} else if (state == S_STRING) {
		/* STRING */
		if (c == '\\')
			return S_ESC_STRING;
		else if (c == '\"')
			return S_CODE;
		else
			return S_STRING;
	} else if (state == S_ESC_STRING) {
		/* ESC_STRING */
		return S_STRING;
	} else if (state == S_OPENING_SLASH) {
		/* OPENING_SLASH */
		if (c == '*') {
			return S_COMMENT;
		} else {
			return handle_code(c);
		}
	} else if (state == S_ENDING_SLASH) {
		/* ENDING_SLASH */
		return handle_code(c);
	} else if (state == S_ASTERISK) {
		/* ASTERISK */
		if (c == '/') {
			return S_ENDING_SLASH;
		} else if (c == '*') {
			return S_ASTERISK;
		} else {
			return S_COMMENT;
		}
	} else if (state == S_COMMENT) {
		/* COMMENT */
		if (c == '*')
			return S_ASTERISK;
		else
			return S_COMMENT;
	}

	return S_UNDEFINED;
}

int
main(int argc, char *argv[])
{
	int c;
	int state, old_state;

	state = S_CODE;
	while ((c = getchar()) != EOF) {
		old_state = state;
		state = next_state(state, c);

		if (state == S_UNDEFINED) {
			/* this should not happen */
			printf("bug in the program: undefined state");
			return 1;
		}

		if (old_state == S_OPENING_SLASH && state != S_COMMENT) {
			/* we didn't print '/' before because we didn't know if it will
			 * be followed by '*' to start a comment or not, but now we know */
			putchar('/');
		}

		if (state == S_ENDING_SLASH) {
			/* replace the comment with a single blank */
			putchar(' ');
		}

		if (state != S_OPENING_SLASH && state != S_ENDING_SLASH &&
					state != S_COMMENT && state != S_ASTERISK) {
			/* we are not in a comment so we print */
			putchar(c);
		}
	}

	return 0;
}
Personal tools