本來這個blog是記錄開發輸入法的點滴的,後來越來越雜,現在什麼都記錄了。

2011年2月9日 星期三

C++ program to remove C/C++ style comments, while keep string content intact and line numbers of code unchnaged...

#include <string.h>
#include <stdio.h>
#include <sys/types.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string>
#include <iostream>
#include <vector>
#include <string>

int my_fgets(std::string &str, FILE *fp, unsigned char to_remove_comments) {
    // read one line from file. if end of file return 0 otherwise return 1
    int ret=1;
    static unsigned char prev_linebreak;
    unsigned char ch;

    str.clear();

    while(1) {
        ch = fgetc(fp);

        if (feof(fp)) {
            prev_linebreak='\0';
            if (str.empty())
                ret=0;
            break;
        }

        if (ch=='\r') {
            prev_linebreak='\r';
            break;
        }

        if (ch=='\n') {
            if (prev_linebreak=='\r') {
                prev_linebreak='\0';
                continue;
            } else {
                prev_linebreak='\n';
                break;
            }
        }
        str+=ch;
    }

    if (to_remove_comments)
    {
        std::string tmpstr=str;
        
        static enum IN_COMMENT_T {
            IN_COMMENT_NOT,
            IN_COMMENT_WAIT_FOR_STAR_SLASH,
            IN_COMMENT_DOUBLE_QUOTE,
        } in_comment = IN_COMMENT_NOT;
        const char *p, *p_limit, *p_start;
        
        str.clear();
        p_start = tmpstr.c_str();
        p_limit = p_start + tmpstr.length();
        
        for(p=p_start; p<p_limit; p++) {
            switch(in_comment) {
            case IN_COMMENT_NOT:
                if (strncmp(p, "//", 2)==0) {
                    p=p_limit-1;
                    continue;
                } else if (strncmp(p, "/*", 2)==0) {
                    in_comment=IN_COMMENT_WAIT_FOR_STAR_SLASH;
                    p+=(sizeof("/*")-1)-1;
                } else if (*p=='"') {
                    // avoid case like '"'
                    if (p==p_start || *(p-1)!='\'')
                        in_comment=IN_COMMENT_DOUBLE_QUOTE;
                }
                break;
            case IN_COMMENT_WAIT_FOR_STAR_SLASH:
                if (strncmp(p, "*/", 2)==0) {
                    in_comment=IN_COMMENT_NOT;
                    p+=sizeof("*/")-1;
                }
                break;
            case IN_COMMENT_DOUBLE_QUOTE:
                if (*p=='"') {
                    // need to count how many '\' in the front. If it is even number then end of quote!
                    const char *q;
                    for(q=p-1; q>=p_start && *q=='\\'; q--);
                    if ((p-q)&1) {
                        // note (p-q)-1 is the number of '\\' before '"'.
                        // if ((p-q)-1) is odd, the " is escaped.
                        // if ((p-q)-1) is even, the " is a real close quotoation.
                        in_comment=IN_COMMENT_NOT;
                    }
                }
                break;
            default:
                break;
            }
        
            if (in_comment==IN_COMMENT_NOT || in_comment==IN_COMMENT_DOUBLE_QUOTE)
                str = str+*p;
        }
        
        if (ret==0)
            in_comment = IN_COMMENT_NOT;
    }

    return ret;
}

int 
main(int argc, char **argv) {
    FILE *fp=NULL;
    std::string str;

    if (argc!=2) {
        fprintf(stderr, "remove C style comment in a text file and show in stdout\n");
        fprintf(stderr, "usage: %s file\n", argv[0]);
        goto MAIN_ERROR;
    }

    fp=fopen(argv[1], "rb");

    if (fp==NULL)  {
        fprintf(stderr, "error in open %s\n", argv[1]);
        goto MAIN_ERROR;
    }

    while(my_fgets(str, fp, 1))
        fprintf(stderr, "%s\n", str.c_str());

                    
MAIN_ERROR:
    if (fp)
        fclose(fp);

    return 0;

}