Another updated doxygen filter

Stewart Gordon wrote:

> J Anderson wrote:
>
> <snip>
>
>> Can't you just go?
>>
>> /+
>> #ifdef DOXYGEN_IGNORE_THIS
>> +/
>>
>> ...
>>
>> /+
>> #endif
>> +/
>
>
> I'd half thought of that (or rather, the 'correct' equivalent of that #ifdef) once I'd pretty much gone to bed, but guessed that Doxygen would choke on /+ ... +/ for the time being.  The filter wants updating to deal with these anyway, but I'd also have expected Doxygen to ignore #ifs within comments.  Hopefully I'll get round to experimenting tonight....
>
> There are two possibilities for updating dfilter to deal with /+ and +/:
> (a) remove them altogether.
> (b) convert them to /* and */.
>
> The only thing (b) would gain is the ability to use /+ ... +/ for documentation comments.  But it would be necessary to change all nested +/s and */s to something.  I suppose either solution is fairly easy to implement, though (a) would be marginally simpler.
>
> Then, your suggestion could be supported by a special case.  But then, might we just as well define our own notation, and actually filter the contents out at filter time?
>
> Stewart.
>
/++/  works (because doxygen just ignores /++/) however if you are going to filter these out with dfilter, then of course it wouldn't work. 

-- 
-Anderson: http://badmama.com.au/~anderson/

September 29, 2004

Re: Another updated doxygen filter

Posted by Matthew
in reply to Stewart Gordon

Permalink

Matthew

Posted in reply to Stewart Gordon

Permalink

I just used this, for a lib I'm trying to get correctly documented, but it strips most of the comments, most important of which are the header comment blocks which include the Doxygen \file token.

Have there been any refinements of this since you published this version?

If not, any chance of a version that leaves *all* comments intact?

Cheers

Matthew

"Stewart Gordon" <smjg_1998@yahoo.com> wrote in message news:cel3tf$2vbr$1@digitaldaemon.com...
> Finally, a version of dfilter that's pretty much worth its weight.
>
> Fixes:
> - Decryptified some variable names in the code
> - Understands attribute blocks (at least those where the attribute is
> public, protected or private)
> - Removes the module and import statements, which create rubbish in the
> doxygen output
> - Converts linebreaks on input (if only to make the output more readable
> for debugging purposes)
>
> Suggested future developments (any takers?):
> - Finish off supporting attribute blocks
> - Support version/debug blocks (would filter for version specified on
> command line, just as DMD does)
> - Provide a means of hiding code from Doxygen (since Doxygen's own
> method relies on the C preprocessor)
>
> Stewart.
>
> -- 
> My e-mail is valid but not my primary mailbox.  Please keep replies on the 'group where everyone may benefit.
>
>


--------------------------------------------------------------------------------


> import std.file, std.ctype, std.string, std.c.stdio;
>
> char [] data; /* Data. */
> char *current; /* Current point. */
> char *previous; /* Previous filter point. */
> char *end; /* End of the data. */
> char *ptoken; /* Start of this token. */
>
> /* Read in a token. */
> char [] token ()
> {
> restart:
> ptoken = current;
>
> if (current >= end)
> return null;
>
> if (isalpha (*current) || *current == '_')
> {
> for (current ++; current < end; current ++)
> if (!isalnum (*current) && *current != '_')
> break;
>
> return ptoken [0 .. cast(int) (current - ptoken)];
> }
>
> if (*current == ' ' || *current == '\r' || *current == '\n'
>   || *current == '\t')
> {
> current ++;
> goto restart;
> }
>
> if (*current == '"')
> {
> for (current ++; current < end; current ++)
> if (*current == '\\')
> current ++;
> else if (*current == '"')
> {
> current ++;
> break;
> }
> goto restart;
> }
>
> if (*current == '\'')
> {
> for (current ++; current < end; current ++)
> if (*current == '\'')
> {
> current ++;
> break;
> }
> goto restart;
> }
>
> if (current < end - 1)
> {
> if (*current == '/' && current [1] == '/')
> {
> for (current += 2; ; current ++)
> if (current >= end || *current == '\n')
> {
> current ++;
> goto restart;
> }
> }
>
> if (*current == '/' && current [1] == '*')
> {
> for (current += 2; ; current ++)
> if (current >= end - 1
>   || (*current == '*' && current [1] == '/'))
> {
> current += 2;
> goto restart;
> }
> }
>
> if (*current == '/' && current [1] == '+')
> {
> int depth = 1;
>
> for (current += 2; ; current ++)
> if (current >= end - 1)
> goto restart;
> else if (*current == '/' && current [1] == '+')
> {
> current += 2;
> depth ++;
> }
> else if (*current == '+' && current [1] == '/')
> {
> current += 2;
> depth --;
> if (!depth)
> goto restart;
> }
> }
> }
>
> current ++;
> return ptoken [0 .. 1];
> }
>
> /* Print all text to this point and set previous to the current point. */
> void flush (char *p)
> {
> fwrite (previous, cast(int) (p - previous), 1, stdout);
> previous = current;
> }
>
> /* Consume a "{ ... }" or "(xxx) { ... }" block. */
> void skipBlock (char *p)
> {
> char *o = previous;
>
> flush (p);
>
> int depth = 0;
> char [] t = token ();
>
> if (t == "(")
> {
> while (1)
> {
> t = token ();
> if (t == ")" || t == null)
> break;
> }
> t = token ();
> }
>
> if (t != "{")
> {
> previous = p;
> flush (current);
> return;
> }
>
> while (1)
> {
> if (t == null)
> break;
> if (t == "{")
> depth ++;
> if (t == "}")
> {
> depth --;
> if (depth == 0)
> break;
> }
>
> t = token ();
> }
>
> previous = current;
> }
>
> int main (char [] [] args)
> {
> if (args.length == 1)
> {
> printf ("%.*s FILENAME\n\nPreprocesses the file in preparation for Doxygen.\n", args [0]);
> return 1;
> }
>
> data = cast(char []) read (args [1]);
>
> // SG translate line breaks
> data = replace(data, "\r\n", "\n");
> data = replace(data, "\r", "\n");
>
> current = previous = data;
> end = previous + data.length;
>
> char [] t;
> char [] [] protectRecord;
> char [] protect = "public";
> char [] [] brackets;
> char [] nextOpenBracket;
> char [] nextSemiColon;
> bit insideBrackets;
> int blockLevel=0;
>
> while (1)
> {
> t = token ();
> if (t == null)
> {
> flush (current);
> return 0;
> }
>
> debug {
> fprintf(stderr, "Token '%.*s', blockLevel %d\n",
>   t, blockLevel);
> }
>
> //remove private blocks if they are on the module level
> if(blockLevel==0 && t=="private")
> {
> debug {
> fputs("entered remove module-level private block\n", stderr);
> }
> flush (ptoken);
>
> //FIXME: this doesn't work for constructs of the form
> //private:
> //public XXX;
> //<more private data>
>
> bool endReached=false;
> bool breakAfterDecl=true;
> bool tokenCount=0;
>
> while(!endReached)
> {
> t = token ();
> if (t == null)
> return 0;
>
> tokenCount++;
>
> switch(t)
> {
> case ":":
> if(tokenCount==1)
> /* do not break after next declaration.
> * Instead we search for the next public statement
> */
> breakAfterDecl=false;
> break;
> case ";":
> if(blockLevel==0 && breakAfterDecl)
> endReached=true;
> break;
> case "{":
> blockLevel++;
> break;
> case "}":
> blockLevel--;
> if(blockLevel==0 && breakAfterDecl)
> endReached=true;
> break;
> case "public":
> if(blockLevel==0)
> {
> printf ("%.*s", t);
> endReached=true;
> }
> break;
> default:
> break;
> }
> }
>
> previous=current;
> debug {
> fputs("exited remove module-level private block\n", stderr);
> }
> }
>
> switch (t)
> {
> /* Remove these keywords. */
> case "body":
> flush (ptoken);
> previous = current;
> break;
>
> /* Remove these blocks. */
> case "unittest":
> case "invariant":
> case "in":
> case "out":
> skipBlock (ptoken);
> break;
>
> // SG remove these statements.
> case "module":
> case "import":
> while (token() != ";") {
> previous = current;
> }
> break;
>
> /* Remove "keyword:" but only if it is followed with a colon. */
> case "override":
> case "abstract":
> case "final":
> flush (ptoken);
> if ((t = token ()) == ":")
> previous = current;
> break;
>
> case ";":
> flush (current);
> printf ("%.*s", nextSemiColon);
> nextSemiColon = null;
> break;
>
> /* "keyword" without "keyword:" into "keyword: ... { ... } antikeyword:" */
> case "public":
> case "private":
> case "protected":
> flush (ptoken);
> /+
> if (token () == ":")
> {
> printf ("%.*s", t);
> protect = t;
> break;
> }
>
> if (t != protect)
> {
> printf ("%.*s: ", t);
> previous = ptoken;
> nextOpenBracket = protect ~ ":";
> nextSemiColon = protect ~ ":";
> }+/
>
> // SG added support for attribute block
> switch (token()) {
> case ":":
> printf("%.*s", t);
> protect = t;
> break;
>
> case "{":
> blockLevel++;
> printf("%.*s: ", t);
> protectRecord ~= protect;
> brackets ~= protect ~ ":";
> protect = t;
> previous = current;
> //nextOpenBracket = protect ~ ":";
> break;
>
> default:
> printf("%.*s: ", t);
> previous = ptoken;
> nextOpenBracket = protect ~ ":";
> nextSemiColon = protect ~ ":";
> }
> break;
>
> /* Modify into "package". */
> /*Not necessary anymore
> case "module":
> flush (ptoken);
> printf ("package ", nextSemiColon);
> previous = current;
> break;*/
>
> /* Modify into import X.Y.*. */
> /* Not necessary anymore
> case "import":
> flush (ptoken);
> printf ("import ", nextSemiColon);
>
> while ((t = token ()) != null)
> {
> if (t == ";")
> {
> printf (";");
> break;
> }
> else
> printf ("%.*s", t);
> }
> previous = current;
> break;*/
>
> /* Remove "extern (...)". */
> case "extern":
> flush (ptoken);
> if ((t = token ()) != "(")
> {
> current = ptoken;
> break;
> }
>
> while ((t = token ()) != null)
> if (t == ")")
> break;
> previous = current;
> break;
>
> /* "alias" into "typedef". */
> case "alias":
> flush (ptoken);
> printf ("typedef");
> previous = current;
> break;
>
> /* "instance" into "typedef". */
> case "instance":
> flush (ptoken);
> printf ("typedef");
> previous = current;
>
> while ((t = token ()) != null)
> if (t == "(")
> {
> flush (ptoken);
> printf ("<");
> previous = current;
> }
> else if (t == ")")
> {
> flush (ptoken);
> printf (">");
> previous = current;
> break;
> }
>
> break;
>
> case "{":
> // SG terminate bracket here
> brackets ~= "};" ~ nextOpenBracket;
> nextOpenBracket = null;
> blockLevel++;
> break;
>
> /* "}" into "};" */
> case "}":
> blockLevel--;
> if (protectRecord.length)
> {
> protect = protectRecord [protectRecord.length - 1];
> protectRecord.length = protectRecord.length - 1;
> }
>
> // SG removed extra stuff
> flush(ptoken);
> /+flush (current);
> printf (";");+/
> if (brackets.length && brackets [brackets.length - 1])
> {
> printf (" %.*s", brackets [brackets.length - 1]);
> brackets = brackets [0 .. brackets.length - 1];
> }
> break;
>
> /* "class ... {" into "class ... { public:". */
> /* Not necessary anymore
> case "class":
> case "interface":
> {
> bit colon = false;
>
> flush (ptoken);
>
> printf ("class");
>
> protectRecord ~= protect;
> protect = "public";
>
> while ((t = token ()) != null)
> {
> restart:
> if (t == ":" && !colon)
> {
> colon = true;
> t = token ();
> if (t != "public" && t != "private"
>   && t != "protected")
> {
> flush (ptoken);
> previous = ptoken;
> printf ("public ");
> goto restart;
> }
> }
> else if (t == ";")
> break;
> else if (t == "{")
> {
> flush (current);
> printf (" public:");
> break;
> }
> }
> break;
> }*/
>
> /* "template name (x)" into "template namespace name <x>". */
> case "template":
> protectRecord ~= protect;
> protect = "public";
>
> flush (current);
> printf (" class");
> while ((t = token ()) != null)
> if (t == "(")
> {
> flush (ptoken);
> printf ("<");
> previous = current;
> }
> else if (t == ")")
> {
> flush (ptoken);
> printf (">");
> previous = current;
> break;
> }
>
> while ((t = token ()) != null)
> if (t == "{")
> {
> blockLevel++; // should this stay in?
>
> // SG add brackets here
> brackets ~= "};" ~ nextOpenBracket;
>
> flush (current);
> printf (" public:");
> break;
> }
> else if (t == ";")
> break;
> break;
>
> /* "delegate (...) name" into "(*name) (...)". */
> case "delegate":
> flush (ptoken);
> previous = current;
> while ((t = token ()) != null)
> if (t == ")")
> {
> t = token ();
> printf ("(*%.*s)", t);
> flush (ptoken);
> previous = current;
> break;
> }
> break;
>
> default:
> break;
> }
> }
> }
>

Forums