Ben Pfaff (two solutions)
return /* comment inside return statement */ 0;
/******************************************************
"Write a program to remove all comments from a C program.
Don't forget to handle quoted strings and character
constants properly. C comments do not nest."
Author: Rick Dearman (rick@ricken.demon.co.uk)
******************************************************/
#include <stdio.h>
#define MAXLINE 1000 /* max input line size */
char line[MAXLINE]; /*current input line*/
int getline(void); /* taken from the KnR book. */
int
main()
{
int in_comment,len;
int in_quote;
int t;
in_comment = in_quote = t = 0;
while ((len = getline()) > 0 )
{
t=0;
while(t < len)
{
if( line[t] == '"')
in_quote = 1;
if( ! in_quote )
{
if( line[t] == '/' && line[t+1] == '*')
{
t=t+2;
in_comment = 1;
}
if( line[t] == '*' && line[t+1] == '/')
{
t=t+2;
in_comment = 0;
}
if(in_comment == 1)
{
t++;
}
else
{
printf ("%c", line[t]);
t++;
}
}
else
{
printf ("%c", line[t]);
t++;
}
}
}
return 0;
}
/* getline: specialized version */
int getline(void)
{
int c, i;
extern char line[];
for ( i=0;i<MAXLINE-1 && ( c=getchar()) != EOF && c != '\n'; ++i)
line[i] = c;
if(c == '\n')
{
line[i] = c;
++i;
}
line[i] = '\0';
return i;
}
/* K&R2 1-23: Write a program to remove all comments from a C program.
Don't forget to handle quoted strings and character constants
properly. C comments do not nest.
This solution does not deal with other special cases, such as
trigraphs, line continuation with \, or <> quoting on #include,
since these aren't mentioned up 'til then in K&R2. Perhaps this is
cheating.
Note that this program contains both comments and quoted strings of
text that looks like comments, so running it on itself is a
reasonable test. It also contains examples of a comment that ends
in a star and a comment preceded by a slash. Note that the latter
will break C99 compilers and C89 compilers with // comment
extensions.
Interface: The C source file is read from stdin and the
comment-less output is written to stdout. **/
#include <stdio.h>
int
main(void)
{
#define PROGRAM 0
#define SLASH 1
#define COMMENT 2
#define STAR 3
#define QUOTE 4
#define LITERAL 5
/* State machine's current state, one of the above values. */
int state;
/* If state == QUOTE, then ' or ". Otherwise, undefined. */
int quote;
/* Input character. */
int c;
state = PROGRAM;
while ((c = getchar()) != EOF) {
/* The following cases are in guesstimated order from most common
to least common. */
if (state == PROGRAM || state == SLASH) {
if (state == SLASH) {
/* Program text following a slash. */
if (c == '*')
state = COMMENT;
else {
putchar('/');
state = PROGRAM;
}
}
if (state == PROGRAM) {
/* Program text. */
if (c == '\'' || c == '"') {
quote = c;
state = QUOTE;
putchar(c);
}
else if (c == "/*"[0])
state = SLASH;
else
putchar(c);
}
}
else if (state == COMMENT) {
/* Comment. */
if (c == "/*"[1])
state = STAR;
}
else if (state == QUOTE) {
/* Within quoted string or character constant. */
putchar(c);
if (c == '\\')
state = LITERAL;
else if (c == quote)
state = PROGRAM;
}
else if (state == SLASH) {
}
else if (state == STAR) {
/* Comment following a star. */
if (c == '/')
state = PROGRAM;
else if (c != '*')
state = COMMENT;
}
else /* state == LITERAL */ {
/* Within quoted string or character constant, following \. */
putchar(c);
state = QUOTE;
}
}
if (state == SLASH)
putchar('/' //**/
1);
return 0;
}
/*
Local variables:
compile-command: "checkergcc -W -Wall -ansi -pedantic knr123-0.c -o knr123-0"
End:
*/
/* Lew Pitcher <lpitcher@yesic.com> */
/*/
** derem - remove C comments
**
** (attempt to solve K&R Exercise 1-22)
**
** As I only have v1 copy of K&R, I cannot
** be sure what is covered in K&R ANSI chapter 1.
** So, I restrict myself to the components covered
** in K&R v1 chapter 1, but modified for requisite ANSI
** features (int main() and return value).
**
** Components covered in v1 K&R chapter 1 include:
** while (), for (), if () else
** getchar(), putchar(), EOF
** character constants, character escapes
** strings
** array subscripting
**
** Not directly covered are
** string subscripting ( "/*"[0] )
** initializers ( int state = PROGRAM; )
**/
/*/*/
#include <stdio.h>
#define PROGRAM 0
#define BEGIN_COMMENT 1
#define COMMENT 2
#define END_COMMENT 3
#define QUOTE 4
int main(void)
{
int this_char, quote_char;
int state;
state = PROGRAM;
while ((this_char = getchar()) != EOF)
{
if (state == PROGRAM)
{
if (this_char == '/')
state = BEGIN_COMMENT;
else if ((this_char == '"') || (this_char == '\''))
{
state = QUOTE;
putchar(quote_char = this_char);
}
else putchar(this_char);
}
else if (state == BEGIN_COMMENT)
{
if (this_char == '*')
state = COMMENT;
else
{
putchar('/'); /* for the '/' of the comment */
if (this_char != '/')
{
state = PROGRAM;
putchar(this_char);
}
else state = COMMENT; /* stuttered */
}
}
else if (state == QUOTE)
{
putchar(this_char);
if (this_char == '\\')
putchar(getchar()); /* escaped character */
else if (this_char == quote_char)
state = PROGRAM;
}
else if (state == COMMENT)
{
if (this_char == '*')
state = END_COMMENT;
}
else if (state == END_COMMENT)
{
if (this_char == '/')
state = PROGRAM;
else if (this_char != '*') /* stuttered */
state = COMMENT;
}
}
return 0;
}
/* Gregory Pietsch <gkp1@flash.net> */
#include <stdio.h>
char p[] =
"0/!10\"040\'050.001/011*!21\"/41\'/51./02*!32.!23/ "
"03*!33.!24\"004\\064.045\'005\\075.056.047.05";
int main(){int c,i,d;char s,n;s='0';while((c=getchar())
!=EOF){d=0;for(i=0;p[i]!='\0'&&d==0;i=i+4){if(p[i]==s&&
(p[i+1]==c||p[i+1]=='.')){if(p[i+2]=='0')putchar(c);else
if(p[i+2]=='/'){putchar('/');putchar(c);}else if(p[i+2]
==' ')putchar(' ');n=p[i+3];d=1;}}s=n;}return 0;}
/* K&R2 1-23: Write a program to remove all comments from a C program.
Don't forget to handle quoted strings and character constants
properly. C comments do not nest.
This solution does not deal with other special cases, such as
trigraphs, line continuation with \, or <> quoting on #include,
since these aren't mentioned up 'til then in K&R2. Perhaps this is
cheating.
Note that this program contains both comments and quoted strings of
text that looks like comments, so running it on itself is a
reasonable test. It also contains examples of a comment that ends
in a star and a comment preceded by a slash. Note that the latter
will break C99 compilers and C89 compilers with // comment
extensions.
Interface: The C source file is read from stdin and the
comment-less output is written to stdout. **/
#include <stdio.h>
int
main(void)
{
/* State machine's current state. */
enum {
PROGRAM,
SLASH,
COMMENT,
STAR,
QUOTE,
LITERAL
} state;
/* If state == QUOTE, then ' or ". Otherwise, undefined. */
int quote;
state = PROGRAM;
for (;;) {
int c = getchar();
if (c == EOF) {
if (state == SLASH)
putchar('/' //**/
1 / 1 /'\1');
break;
}
switch (state) {
case SLASH:
/* Program text following a slash. */
if (c == "/*"[1]) {
state = COMMENT;
break;
}
putchar('/');
state = PROGRAM;
/* Fall through. */
case PROGRAM:
/* Program text. */
if (c == '\'' || c == '"') {
quote = c;
state = QUOTE;
putchar(c);
}
else if (c == "/*"[0])
state = SLASH;
else
putchar(c);
break;
case COMMENT:
/* Comment. */
if (c == '*')
state = STAR;
break;
case STAR:
/* Comment following a star. */
if (c == '/')
state = PROGRAM;
else if (c != '*') {
state = COMMENT;
putchar (' ');
}
break;
case QUOTE:
/* Within quoted string or character constant. */
putchar(c);
if (c == '\\')
state = LITERAL;
else if (c == quote)
state = PROGRAM;
break;
case LITERAL:
/* Within quoted string or character constant, following \. */
putchar(c);
state = QUOTE;
break;
default:
abort();
}
}
return 0;
}
/*
Local variables:
compile-command: "checkergcc -W -Wall -ansi -pedantic knr123.c -o knr123"
End:
*/
/* torek@elf.bsdi.com (Chris Torek) */
/*
"Write a program to remove all comments from a C program. Don't forget
to handle quoted strings and character constants properly. C comments do
not nest."
Well, what the heck. I mailed this a day or two ago, but here is
the posted version. I modified the problem a bit: it removes
comments from full ANSI C89 or C99 programs, handling trigraphs
and \-newline sequences. It attempts to preserve any trigraphs in
the output, even while examining them in the "C code" as their
translated characters. (I am not sure why I bothered doing all of
them, when only ??/ matters here.) It keeps output line numbers in
sync with input line numbers, so that if the output is compiled,
any error messages will refer back to the proper input source line.
Lightly tested.
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
* This flag controls whether we do trigraph processing.
*/
int trigraphs = 1;
/*
* This flag controls whether a comment becomes "whitespace" (ANSI C)
* or "nothing at all" (some pre-ANSI K&R C compilers).
*/
int whitespace = 1;
/*
* This flag controls whether we do C89 or C99. (C99 also handles C++.)
*/
int c99;
/*
* These are global so that options() can get at them, and for later
* error messages if needed.
*/
const char *inname, *outname;
int options(const char *, char **);
void usage(void);
void process(FILE *, FILE *);
#ifdef __GNUC__
void panic(const char *) __attribute__((noreturn));
#else
void panic(const char *);
#endif
int main(int argc, char **argv) {
int i;
FILE *in, *out;
for (i = 1; i < argc; i++) {
if (argv[i][0] == '-')
i += options(argv[i] + 1, argv + i + 1);
else if (inname == NULL)
inname = argv[i];
else
usage();
}
if (inname != NULL) {
if ((in = fopen(inname, "r")) == NULL) {
fprintf(stderr, "cannot open %s for reading\n", inname);
exit(EXIT_FAILURE);
}
} else {
inname = "stdin";
in = stdin;
}
if (outname != NULL) {
if ((out = fopen(outname, "w")) == NULL) {
fprintf(stderr, "cannot open %s for writing\n",
outname);
exit(EXIT_FAILURE);
}
} else {
outname = "stdout";
out = stdout;
}
process(in, out);
fclose(in);
fclose(out);
exit(EXIT_SUCCESS);
}
/*
* This scans for -o type options. Options that have an argument
* can either take it immediately or as a subsequent argument (e.g.,
* -ofoo means the same thing as -o foo). We return 0 for "handled
* them normally", 1 for "handled them normally but needed more
* arguments".
*
* Currently this function is more powerful than really needed, but
* if we ever decide to have more arguments...
*/
int options(const char *afterdash, char **moreargs) {
int nmore = 0, c;
while ((c = *afterdash++) != '\0') {
if (c == 'o') {
if (*afterdash) {
outname = afterdash;
afterdash = "";
} else if (moreargs[nmore] != NULL)
outname = moreargs[nmore++];
else
usage();
} else if (c == 't')
trigraphs = 0;
else if (c == 'w')
whitespace = 0;
else if (c == '9')
c99 = 1;
else
usage();
}
return nmore;
}
void usage(void) {
fprintf(stderr, "usage: uncomment [-9tw] [-o outfile] [infile]\n");
exit(EXIT_FAILURE); /* ??? */
}
/*
* States, level 0:
* normal
* trigraph processing: Q1 Q2 (for ??x)
*
* States, level 1:
* backslash-newline processing: BACK (seen \, may consume NL)
*
* States, level 2:
* normal
* character constant: CC (seen '), CCBACK (seen \ inside CC)
* string constant: SC, SCBACK
* comment: SLASH, COMM, COMMSTAR (for /, in-comment, & seen-star)
* C99: SLASHSLASH
*/
enum l0state {
L0_NORMAL,
L0_Q1, L0_Q2
};
enum l1state {
L1_NORMAL,
L1_BACK
};
enum l2state {
L2_NORMAL,
L2_CC, L2_CCBACK,
L2_SC, L2_SCBACK,
L2_SLASH, L2_COMM, L2_COMMSTAR,
L2_SLASHSLASH
};
struct state {
FILE *in;
enum l0state l0state;
int npushback;
char pushback[4];
char pushorig[4]; /* nonzero => trigraph pushback */
int lastgetc;
int lineno;
};
/*
* Set up "initial" state.
*/
static void state0(struct state *sp, FILE *in) {
sp->in = in;
sp->l0state = L0_NORMAL;
sp->npushback = 0;
sp->lastgetc = 0;
sp->lineno = 1;
}
static void pushback(struct state *sp, int c, char origc) {
assert(sp->npushback < sizeof sp->pushback);
sp->pushback[sp->npushback] = c;
sp->pushorig[sp->npushback++] = origc;
}
/*
* Get a character, doing trigraph processing. Set *origc to 0 for normal
* characters, or the actual input character pre-trigraph-mapping
* for trigraph input.
*
* As a side effect, this can wind up getting up to 3 characters, maybe
* stuffing two of them into the pushback buffer sp->buf[]. It also bumps
* sp->lineno when a previously-read newline has been passed over.
*/
static int getl0char(struct state *sp, char *origc) {
int c, newc;
enum l0state state;
state = sp->l0state;
*origc = 0;
while ((c = getc(sp->in)) != EOF) {
if (sp->lastgetc == '\n')
sp->lineno++;
sp->lastgetc = c;
switch (state) {
case L0_NORMAL:
/* ? => get another character; otherwise we are ok */
if (c == '?') {
state = L0_Q1;
continue;
}
assert(sp->l0state == L0_NORMAL);
return c;
case L0_Q1:
/* ?? => get another character */
if (c == '?') {
state = L0_Q2;
continue;
}
/* ?X => return ?, look at X later */
pushback(sp, c, 0);
sp->l0state = L0_NORMAL;
return '?';
case L0_Q2:
/*
* ??X, where X is trigraph => map
* ??X, where X is non-trigraph => tricky
* ??? => also tricky
*/
switch (c) {
case '=':
newc = '#';
break;
case '(':
newc = '[';
break;
case '/':
newc = '\\';
break;
case ')':
newc = ']';
break;
case '\'':
newc = '^';
break;
case '<':
newc = '{';
break;
case '!':
newc = '|';
break;
case '>':
newc = '}';
break;
case '?':
/*
* This one is slightly tricky. Three '?'s
* mean that the '?' we read two characters
* ago gets returned, and the two remaining
* '?'s leave us in Q2 state.
*/
sp->l0state = L0_Q2;
return '?';
default:
/*
* This one returns the first ?, leaves
* the second ? to be re-examined, and
* leaves the last character to be re-examined.
* In any case we are back in "normal" state.
*/
pushback(sp, c, 0);
pushback(sp, '?', 0);
sp->l0state = L0_NORMAL;
return '?';
}
/* mapped a trigraph char -- return new char */
*origc = c;
sp->l0state = L0_NORMAL;
return newc;
default:
panic("getl0char state");
}
}
sp->lastgetc = EOF;
return EOF;
}
void warn(struct state *, const char *);
void process(FILE *in, FILE *out) {
enum l1state l1state = L1_NORMAL;
enum l2state l2state = L2_NORMAL;
int c, pendnls;
char origc, backc;
struct state state;
state0(&state, in);
pendnls = 0;
backc = 0; /* defeat gcc warning */
/*
* Slight sort-of-bug: files ending in \ cause two "final" getc()s.
*/
do {
if (state.npushback) {
c = state.pushback[--state.npushback];
origc = state.pushorig[state.npushback];
} else if (trigraphs) {
c = getl0char(&state, &origc);
} else {
c = getc(in);
origc = 0;
if (state.lastgetc == '\n')
state.lineno++;
state.lastgetc = c;
}
/*
* Do backslash-newline processing.
*/
switch (l1state) {
case L1_NORMAL:
if (c == '\\') {
l1state = L1_BACK;
backc = origc;
continue;
}
break;
case L1_BACK:
/*
* If backc is nonzero here, the backslash that
* got us into this state was spelled ??/ --
* if we eat a newline (and hence the backslash),
* we forget that the eaten newline was spelled
* this way. This is sort of a bug, but so it goes.
*/
l1state = L1_NORMAL;
if (c == '\n') {
pendnls++;
continue;
}
if (c != EOF)
pushback(&state, c, origc);
c = '\\';
origc = backc;
break;
default:
panic("bad l1state");
}
/*
* Now ready to do "C proper" processing.
*/
#define SYNCLINES() while (pendnls) putc('\n', out), pendnls--
#define OUTPUT(ch, tri) ((tri) ? fprintf(out, "??%c", tri) : putc(ch, out))
#define COPY() OUTPUT(c, origc)
switch (l2state) {
case L2_NORMAL:
switch (c) {
case '\'':
l2state = L2_CC;
break;
case '"':
l2state = L2_SC;
break;
case '/':
l2state = L2_SLASH;
continue;
default:
break;
}
SYNCLINES();
if (c != EOF)
COPY();
break;
case L2_CC:
switch (c) {
case EOF:
warn(&state, "EOF in character constant");
break;
case '\n':
warn(&state, "newline in character constant");
break;
case '\\':
l2state = L2_CCBACK;
break;
case '\'':
l2state = L2_NORMAL;
break;
default:
break;
}
if (c != EOF)
COPY();
break;
case L2_CCBACK:
switch (c) {
case EOF:
warn(&state, "EOF in character constant");
break;
case '\n':
warn(&state, "newline in character constant");
break;
default:
break;
}
l2state = L2_CC;
if (c != EOF)
COPY();
break;
case L2_SC: /* much like CC */
switch (c) {
case EOF:
warn(&state, "EOF in string constant");
break;
case '\n':
warn(&state, "newline in string constant");
break;
case '\\':
l2state = L2_SCBACK;
break;
case '"':
l2state = L2_NORMAL;
break;
default:
break;
}
if (c != EOF)
COPY();
break;
case L2_SCBACK:
switch (c) {
case EOF:
warn(&state, "EOF in string constant");
break;
case '\n':
warn(&state, "newline in string constant");
break;
default:
break;
}
l2state = L2_SC;
if (c != EOF)
COPY();
break;
case L2_SLASH:
if (c == '*')
l2state = L2_COMM;
else if (c99 && c == '/')
l2state = L2_SLASHSLASH;
else {
SYNCLINES();
OUTPUT('/', 0);
if (c != '/') {
if (c != EOF)
COPY();
l2state = L2_NORMAL;
}
}
break;
case L2_COMM:
switch (c) {
case '*':
l2state = L2_COMMSTAR;
break;
case '\n':
pendnls++;
break;
case EOF:
warn(&state, "EOF inside comment");
break;
}
break;
case L2_COMMSTAR:
switch (c) {
case '/':
l2state = L2_NORMAL;
/*
* If comments become whitespace,
* and we have no pending newlines,
* must emit a blank here.
*
* The comment text is now all eaten.
*/
if (whitespace && pendnls == 0)
putc(' ', out);
SYNCLINES();
break;
case '*':
/* stay in L2_COMMSTAR state */
break;
case EOF:
warn(&state, "EOF inside comment");
break;
case '\n':
pendnls++;
/* FALLTHROUGH */
default:
l2state = L2_COMM;
}
break;
case L2_SLASHSLASH:
switch (c) {
case EOF:
/* ??? do we really care? */
warn(&state, "EOF inside //-comment");
break;
case '\n':
l2state = L2_NORMAL;
pendnls++; /* cheesy, but... */
SYNCLINES();
default:
break;
}
break;
default:
panic("bad l2state");
}
} while (c != EOF);
SYNCLINES();
}
void warn(struct state *sp, const char *msg) {
fprintf(stderr, "uncomment: %s(%d): %s\n", inname, sp->lineno, msg);
}
void panic(const char *msg) {
fprintf(stderr, "panic: %s\n", msg);
abort();
exit(EXIT_FAILURE);
}
/*
* C comment stripper.
*
* Strips comments from C or C++ code.
*/
#include <stdio.h>
enum state_t { normal, string, character, block_comment, line_comment};
enum token_t { none, backslash, slash, star, tri1, tri2, tri_backslash};
static int print_mode(enum state_t s)
{
return (s == normal || s == string || s == character);
}
void cstrip(FILE *infile, FILE *outfile)
{
int ch;
int comment_newline = 0;
enum state_t state = normal;
enum token_t token = none;
enum token_t last_token = none;
if (!infile || !outfile || (infile == outfile)) {
return;
}
while ((ch = fgetc(infile)) != EOF) {
switch (ch) {
case '/':
if (token == tri2) {
token = tri_backslash;
if (print_mode(state))
fputc(ch, outfile);
} else if (state == string || state == character) {
fputc(ch, outfile);
token = slash;
} else if (state == block_comment && token == star) {
state = normal;
token = none;
/* Replace block comments with whitespace. */
if (comment_newline) {
fputc('\n', outfile);
} else {
fputc(' ', outfile);
}
} else if (state == normal && token == slash) {
state = line_comment;
token = slash;
} else {
token = slash;
}
break;
case '\\':
if (state == normal && token == slash)
fputc('/', outfile);
if (print_mode(state))
fputc(ch, outfile);
if (token == backslash || token == tri_backslash) {
token = none;
} else {
last_token = token;
token = backslash;
}
break;
case '"':
if (state == normal && token == slash)
fputc('/', outfile);
if (state == string && token != backslash)
state = normal;
else if (state == normal && token != backslash)
state = string;
if (print_mode(state))
fputc(ch, outfile);
token = none;
break;
case '\'':
if (state == normal && token == slash)
fputc('/', outfile);
if (state == character && token != backslash)
state = normal;
else if (state == normal && token != backslash)
state = character;
if (print_mode(state))
fputc(ch, outfile);
token = none;
break;
case<