229 lines
6.4 KiB
C
229 lines
6.4 KiB
C
|
/**
|
||
|
Strips C- and C++-style comments from stdin, sending the results to
|
||
|
stdout. It assumes that its input is legal C-like code, and does
|
||
|
only little error handling.
|
||
|
|
||
|
It treats string literals as anything starting and ending with
|
||
|
matching double OR single quotes OR backticks (for use with
|
||
|
scripting languages which use those). It assumes that a quote
|
||
|
character within a string which uses the same quote type is escaped
|
||
|
by a backslash. It should not be used on any code which might
|
||
|
contain C/C++ comments inside heredocs, and similar constructs, as
|
||
|
it will strip those out.
|
||
|
|
||
|
Usage: $0 [--keep-first|-k] < input > output
|
||
|
|
||
|
The --keep-first (-k) flag tells it to retain the first comment in the
|
||
|
input stream (which is often a license or attribution block). It
|
||
|
may be given repeatedly, each one incrementing the number of
|
||
|
retained comments by one.
|
||
|
|
||
|
License: Public Domain
|
||
|
Author: Stephan Beal (stephan@wanderinghorse.net)
|
||
|
*/
|
||
|
#include <stdio.h>
|
||
|
#include <assert.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#if 1
|
||
|
#define MARKER(pfexp) \
|
||
|
do{ printf("MARKER: %s:%d:\t",__FILE__,__LINE__); \
|
||
|
printf pfexp; \
|
||
|
} while(0)
|
||
|
#else
|
||
|
#define MARKER(exp) if(0) printf
|
||
|
#endif
|
||
|
|
||
|
struct {
|
||
|
FILE * input;
|
||
|
FILE * output;
|
||
|
int rc;
|
||
|
int keepFirst;
|
||
|
} App = {
|
||
|
0/*input*/,
|
||
|
0/*output*/,
|
||
|
0/*rc*/,
|
||
|
0/*keepFirst*/
|
||
|
};
|
||
|
|
||
|
void do_it_all(void){
|
||
|
enum states {
|
||
|
S_NONE = 0 /* not in comment */,
|
||
|
S_SLASH1 = 1 /* slash - possibly comment prefix */,
|
||
|
S_CPP = 2 /* in C++ comment */,
|
||
|
S_C = 3 /* in C comment */
|
||
|
};
|
||
|
int ch, prev = EOF;
|
||
|
FILE * out = App.output;
|
||
|
int const slash = '/';
|
||
|
int const star = '*';
|
||
|
int line = 1;
|
||
|
int col = 0;
|
||
|
enum states state = S_NONE /* current state */;
|
||
|
int elide = 0 /* true if currently eliding output */;
|
||
|
int state3Col = -99
|
||
|
/* huge kludge for odd corner case: */
|
||
|
/*/ <--- here. state3Col marks the source column in which a C-style
|
||
|
comment starts, so that it can tell if star-slash inside a
|
||
|
C-style comment is the end of the comment or is the weird corner
|
||
|
case marked at the start of _this_ comment block. */;
|
||
|
for( ; EOF != (ch = fgetc(App.input)); prev = ch,
|
||
|
++col){
|
||
|
switch(state){
|
||
|
case S_NONE:
|
||
|
if('\''==ch || '"'==ch || '`'==ch){
|
||
|
/* Read string literal...
|
||
|
needed to properly catch comments in strings. */
|
||
|
int const quote = ch,
|
||
|
startLine = line, startCol = col;
|
||
|
int ch2, escaped = 0, endOfString = 0;
|
||
|
fputc(ch, out);
|
||
|
for( ++col; !endOfString && EOF != (ch2 = fgetc(App.input));
|
||
|
++col ){
|
||
|
switch(ch2){
|
||
|
case '\\': escaped = !escaped;
|
||
|
break;
|
||
|
case '`':
|
||
|
case '\'':
|
||
|
case '"':
|
||
|
if(!escaped && quote == ch2) endOfString = 1;
|
||
|
escaped = 0;
|
||
|
break;
|
||
|
default:
|
||
|
escaped = 0;
|
||
|
break;
|
||
|
}
|
||
|
if('\n'==ch2){
|
||
|
++line;
|
||
|
col = 0;
|
||
|
}
|
||
|
fputc(ch2, out);
|
||
|
}
|
||
|
if(EOF == ch2){
|
||
|
fprintf(stderr, "Unexpected EOF while reading %s literal "
|
||
|
"on line %d column %d.\n",
|
||
|
('\''==ch) ? "char" : "string",
|
||
|
startLine, startCol);
|
||
|
App.rc = 1;
|
||
|
return;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
else if(slash == ch){
|
||
|
/* MARKER(("state 0 ==> 1 @ %d:%d\n", line, col)); */
|
||
|
state = S_SLASH1;
|
||
|
break;
|
||
|
}
|
||
|
fputc(ch, out);
|
||
|
break;
|
||
|
case S_SLASH1: /* 1 slash */
|
||
|
/* MARKER(("SLASH1 @ %d:%d App.keepFirst=%d\n",
|
||
|
line, col, App.keepFirst)); */
|
||
|
switch(ch){
|
||
|
case '*':
|
||
|
/* Enter C comment */
|
||
|
if(App.keepFirst>0){
|
||
|
elide = 0;
|
||
|
--App.keepFirst;
|
||
|
}else{
|
||
|
elide = 1;
|
||
|
}
|
||
|
/*MARKER(("state 1 ==> 3 @ %d:%d\n", line, col));*/
|
||
|
state = S_C;
|
||
|
state3Col = col-1;
|
||
|
if(!elide){
|
||
|
fputc(prev, out);
|
||
|
fputc(ch, out);
|
||
|
}
|
||
|
break;
|
||
|
case '/':
|
||
|
/* Enter C++ comment */
|
||
|
if(App.keepFirst>0){
|
||
|
elide = 0;
|
||
|
--App.keepFirst;
|
||
|
}else{
|
||
|
elide = 1;
|
||
|
}
|
||
|
/*MARKER(("state 1 ==> 2 @ %d:%d\n", line, col));*/
|
||
|
state = S_CPP;
|
||
|
if(!elide){
|
||
|
fputc(prev, out);
|
||
|
fputc(ch, out);
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
/* It wasn't a comment after all. */
|
||
|
state = S_NONE;
|
||
|
if(!elide){
|
||
|
fputc(prev, out);
|
||
|
fputc(ch, out);
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
case S_CPP: /* C++ comment */
|
||
|
if('\n' == ch){
|
||
|
/* MARKER(("state 2 ==> 0 @ %d:%d\n", line, col)); */
|
||
|
state = S_NONE;
|
||
|
elide = 0;
|
||
|
}
|
||
|
if(!elide){
|
||
|
fputc(ch, out);
|
||
|
}
|
||
|
break;
|
||
|
case S_C: /* C comment */
|
||
|
if(!elide){
|
||
|
fputc(ch, out);
|
||
|
}
|
||
|
if(slash == ch){
|
||
|
if(star == prev){
|
||
|
/* MARKER(("state 3 ==> 0 @ %d:%d\n", line, col)); */
|
||
|
/* Corner case which breaks this: */
|
||
|
/*/ <-- slash there */
|
||
|
/* That shows up twice in a piece of 3rd-party
|
||
|
code i use. */
|
||
|
/* And thus state3Col was introduced :/ */
|
||
|
if(col!=state3Col+2){
|
||
|
state = S_NONE;
|
||
|
elide = 0;
|
||
|
state3Col = -99;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
assert(!"impossible!");
|
||
|
break;
|
||
|
}
|
||
|
if('\n' == ch){
|
||
|
++line;
|
||
|
col = 0;
|
||
|
state3Col = -99;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void usage(char const *zAppName){
|
||
|
fprintf(stderr, "Strips C- and C++-style comments from stdin and sends "
|
||
|
"the results to stdout.\n");
|
||
|
fprintf(stderr, "Usage: %s [--keep-first|-k] < input > output\n", zAppName);
|
||
|
}
|
||
|
|
||
|
int main( int argc, char const * const * argv ){
|
||
|
int i;
|
||
|
for(i = 1; i < argc; ++i){
|
||
|
char const * zArg = argv[i];
|
||
|
while( '-'==*zArg ) ++zArg;
|
||
|
if( 0==strcmp(zArg,"k")
|
||
|
|| 0==strcmp(zArg,"keep-first") ){
|
||
|
++App.keepFirst;
|
||
|
}else{
|
||
|
usage(argv[0]);
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
App.input = stdin;
|
||
|
App.output = stdout;
|
||
|
do_it_all();
|
||
|
return App.rc ? 1 : 0;
|
||
|
}
|