| |
| Posted by Matthew in reply to Stewart Gordon | PermalinkReply |
|
Matthew
Posted in reply to Stewart Gordon
| I just used this, for a lib I'm trying to get correctly documented, but it strips most of the comments, most important of which are the header comment blocks which include the Doxygen \file token.
Have there been any refinements of this since you published this version?
If not, any chance of a version that leaves *all* comments intact?
Cheers
Matthew
"Stewart Gordon" <smjg_1998@yahoo.com> wrote in message news:cel3tf$2vbr$1@digitaldaemon.com...
> Finally, a version of dfilter that's pretty much worth its weight.
>
> Fixes:
> - Decryptified some variable names in the code
> - Understands attribute blocks (at least those where the attribute is
> public, protected or private)
> - Removes the module and import statements, which create rubbish in the
> doxygen output
> - Converts linebreaks on input (if only to make the output more readable
> for debugging purposes)
>
> Suggested future developments (any takers?):
> - Finish off supporting attribute blocks
> - Support version/debug blocks (would filter for version specified on
> command line, just as DMD does)
> - Provide a means of hiding code from Doxygen (since Doxygen's own
> method relies on the C preprocessor)
>
> Stewart.
>
> --
> My e-mail is valid but not my primary mailbox. Please keep replies on the 'group where everyone may benefit.
>
>
--------------------------------------------------------------------------------
> import std.file, std.ctype, std.string, std.c.stdio;
>
> char [] data; /* Data. */
> char *current; /* Current point. */
> char *previous; /* Previous filter point. */
> char *end; /* End of the data. */
> char *ptoken; /* Start of this token. */
>
> /* Read in a token. */
> char [] token ()
> {
> restart:
> ptoken = current;
>
> if (current >= end)
> return null;
>
> if (isalpha (*current) || *current == '_')
> {
> for (current ++; current < end; current ++)
> if (!isalnum (*current) && *current != '_')
> break;
>
> return ptoken [0 .. cast(int) (current - ptoken)];
> }
>
> if (*current == ' ' || *current == '\r' || *current == '\n'
> || *current == '\t')
> {
> current ++;
> goto restart;
> }
>
> if (*current == '"')
> {
> for (current ++; current < end; current ++)
> if (*current == '\\')
> current ++;
> else if (*current == '"')
> {
> current ++;
> break;
> }
> goto restart;
> }
>
> if (*current == '\'')
> {
> for (current ++; current < end; current ++)
> if (*current == '\'')
> {
> current ++;
> break;
> }
> goto restart;
> }
>
> if (current < end - 1)
> {
> if (*current == '/' && current [1] == '/')
> {
> for (current += 2; ; current ++)
> if (current >= end || *current == '\n')
> {
> current ++;
> goto restart;
> }
> }
>
> if (*current == '/' && current [1] == '*')
> {
> for (current += 2; ; current ++)
> if (current >= end - 1
> || (*current == '*' && current [1] == '/'))
> {
> current += 2;
> goto restart;
> }
> }
>
> if (*current == '/' && current [1] == '+')
> {
> int depth = 1;
>
> for (current += 2; ; current ++)
> if (current >= end - 1)
> goto restart;
> else if (*current == '/' && current [1] == '+')
> {
> current += 2;
> depth ++;
> }
> else if (*current == '+' && current [1] == '/')
> {
> current += 2;
> depth --;
> if (!depth)
> goto restart;
> }
> }
> }
>
> current ++;
> return ptoken [0 .. 1];
> }
>
> /* Print all text to this point and set previous to the current point. */
> void flush (char *p)
> {
> fwrite (previous, cast(int) (p - previous), 1, stdout);
> previous = current;
> }
>
> /* Consume a "{ ... }" or "(xxx) { ... }" block. */
> void skipBlock (char *p)
> {
> char *o = previous;
>
> flush (p);
>
> int depth = 0;
> char [] t = token ();
>
> if (t == "(")
> {
> while (1)
> {
> t = token ();
> if (t == ")" || t == null)
> break;
> }
> t = token ();
> }
>
> if (t != "{")
> {
> previous = p;
> flush (current);
> return;
> }
>
> while (1)
> {
> if (t == null)
> break;
> if (t == "{")
> depth ++;
> if (t == "}")
> {
> depth --;
> if (depth == 0)
> break;
> }
>
> t = token ();
> }
>
> previous = current;
> }
>
> int main (char [] [] args)
> {
> if (args.length == 1)
> {
> printf ("%.*s FILENAME\n\nPreprocesses the file in preparation for Doxygen.\n", args [0]);
> return 1;
> }
>
> data = cast(char []) read (args [1]);
>
> // SG translate line breaks
> data = replace(data, "\r\n", "\n");
> data = replace(data, "\r", "\n");
>
> current = previous = data;
> end = previous + data.length;
>
> char [] t;
> char [] [] protectRecord;
> char [] protect = "public";
> char [] [] brackets;
> char [] nextOpenBracket;
> char [] nextSemiColon;
> bit insideBrackets;
> int blockLevel=0;
>
> while (1)
> {
> t = token ();
> if (t == null)
> {
> flush (current);
> return 0;
> }
>
> debug {
> fprintf(stderr, "Token '%.*s', blockLevel %d\n",
> t, blockLevel);
> }
>
> //remove private blocks if they are on the module level
> if(blockLevel==0 && t=="private")
> {
> debug {
> fputs("entered remove module-level private block\n", stderr);
> }
> flush (ptoken);
>
> //FIXME: this doesn't work for constructs of the form
> //private:
> //public XXX;
> //<more private data>
>
> bool endReached=false;
> bool breakAfterDecl=true;
> bool tokenCount=0;
>
> while(!endReached)
> {
> t = token ();
> if (t == null)
> return 0;
>
> tokenCount++;
>
> switch(t)
> {
> case ":":
> if(tokenCount==1)
> /* do not break after next declaration.
> * Instead we search for the next public statement
> */
> breakAfterDecl=false;
> break;
> case ";":
> if(blockLevel==0 && breakAfterDecl)
> endReached=true;
> break;
> case "{":
> blockLevel++;
> break;
> case "}":
> blockLevel--;
> if(blockLevel==0 && breakAfterDecl)
> endReached=true;
> break;
> case "public":
> if(blockLevel==0)
> {
> printf ("%.*s", t);
> endReached=true;
> }
> break;
> default:
> break;
> }
> }
>
> previous=current;
> debug {
> fputs("exited remove module-level private block\n", stderr);
> }
> }
>
> switch (t)
> {
> /* Remove these keywords. */
> case "body":
> flush (ptoken);
> previous = current;
> break;
>
> /* Remove these blocks. */
> case "unittest":
> case "invariant":
> case "in":
> case "out":
> skipBlock (ptoken);
> break;
>
> // SG remove these statements.
> case "module":
> case "import":
> while (token() != ";") {
> previous = current;
> }
> break;
>
> /* Remove "keyword:" but only if it is followed with a colon. */
> case "override":
> case "abstract":
> case "final":
> flush (ptoken);
> if ((t = token ()) == ":")
> previous = current;
> break;
>
> case ";":
> flush (current);
> printf ("%.*s", nextSemiColon);
> nextSemiColon = null;
> break;
>
> /* "keyword" without "keyword:" into "keyword: ... { ... } antikeyword:" */
> case "public":
> case "private":
> case "protected":
> flush (ptoken);
> /+
> if (token () == ":")
> {
> printf ("%.*s", t);
> protect = t;
> break;
> }
>
> if (t != protect)
> {
> printf ("%.*s: ", t);
> previous = ptoken;
> nextOpenBracket = protect ~ ":";
> nextSemiColon = protect ~ ":";
> }+/
>
> // SG added support for attribute block
> switch (token()) {
> case ":":
> printf("%.*s", t);
> protect = t;
> break;
>
> case "{":
> blockLevel++;
> printf("%.*s: ", t);
> protectRecord ~= protect;
> brackets ~= protect ~ ":";
> protect = t;
> previous = current;
> //nextOpenBracket = protect ~ ":";
> break;
>
> default:
> printf("%.*s: ", t);
> previous = ptoken;
> nextOpenBracket = protect ~ ":";
> nextSemiColon = protect ~ ":";
> }
> break;
>
> /* Modify into "package". */
> /*Not necessary anymore
> case "module":
> flush (ptoken);
> printf ("package ", nextSemiColon);
> previous = current;
> break;*/
>
> /* Modify into import X.Y.*. */
> /* Not necessary anymore
> case "import":
> flush (ptoken);
> printf ("import ", nextSemiColon);
>
> while ((t = token ()) != null)
> {
> if (t == ";")
> {
> printf (";");
> break;
> }
> else
> printf ("%.*s", t);
> }
> previous = current;
> break;*/
>
> /* Remove "extern (...)". */
> case "extern":
> flush (ptoken);
> if ((t = token ()) != "(")
> {
> current = ptoken;
> break;
> }
>
> while ((t = token ()) != null)
> if (t == ")")
> break;
> previous = current;
> break;
>
> /* "alias" into "typedef". */
> case "alias":
> flush (ptoken);
> printf ("typedef");
> previous = current;
> break;
>
> /* "instance" into "typedef". */
> case "instance":
> flush (ptoken);
> printf ("typedef");
> previous = current;
>
> while ((t = token ()) != null)
> if (t == "(")
> {
> flush (ptoken);
> printf ("<");
> previous = current;
> }
> else if (t == ")")
> {
> flush (ptoken);
> printf (">");
> previous = current;
> break;
> }
>
> break;
>
> case "{":
> // SG terminate bracket here
> brackets ~= "};" ~ nextOpenBracket;
> nextOpenBracket = null;
> blockLevel++;
> break;
>
> /* "}" into "};" */
> case "}":
> blockLevel--;
> if (protectRecord.length)
> {
> protect = protectRecord [protectRecord.length - 1];
> protectRecord.length = protectRecord.length - 1;
> }
>
> // SG removed extra stuff
> flush(ptoken);
> /+flush (current);
> printf (";");+/
> if (brackets.length && brackets [brackets.length - 1])
> {
> printf (" %.*s", brackets [brackets.length - 1]);
> brackets = brackets [0 .. brackets.length - 1];
> }
> break;
>
> /* "class ... {" into "class ... { public:". */
> /* Not necessary anymore
> case "class":
> case "interface":
> {
> bit colon = false;
>
> flush (ptoken);
>
> printf ("class");
>
> protectRecord ~= protect;
> protect = "public";
>
> while ((t = token ()) != null)
> {
> restart:
> if (t == ":" && !colon)
> {
> colon = true;
> t = token ();
> if (t != "public" && t != "private"
> && t != "protected")
> {
> flush (ptoken);
> previous = ptoken;
> printf ("public ");
> goto restart;
> }
> }
> else if (t == ";")
> break;
> else if (t == "{")
> {
> flush (current);
> printf (" public:");
> break;
> }
> }
> break;
> }*/
>
> /* "template name (x)" into "template namespace name <x>". */
> case "template":
> protectRecord ~= protect;
> protect = "public";
>
> flush (current);
> printf (" class");
> while ((t = token ()) != null)
> if (t == "(")
> {
> flush (ptoken);
> printf ("<");
> previous = current;
> }
> else if (t == ")")
> {
> flush (ptoken);
> printf (">");
> previous = current;
> break;
> }
>
> while ((t = token ()) != null)
> if (t == "{")
> {
> blockLevel++; // should this stay in?
>
> // SG add brackets here
> brackets ~= "};" ~ nextOpenBracket;
>
> flush (current);
> printf (" public:");
> break;
> }
> else if (t == ";")
> break;
> break;
>
> /* "delegate (...) name" into "(*name) (...)". */
> case "delegate":
> flush (ptoken);
> previous = current;
> while ((t = token ()) != null)
> if (t == ")")
> {
> t = token ();
> printf ("(*%.*s)", t);
> flush (ptoken);
> previous = current;
> break;
> }
> break;
>
> default:
> break;
> }
> }
> }
>
|