March 01, 2017
Is there a way to get the name of a named capture when iterating over captures from a regular expression match?  I've looked at the std.regex code and it seems like "no" to my eyes, but I wonder if others here have... a way.

My original problem is this: I need to populate an associative array (AA) with all named captures that successfully matched during a regex match (and none of the captures that failed).  I was wondering what the best way to do this might be.

Thanks!

Please see comments in the below program for details and my current progress:

void main()
{
        import std.compiler;
        import std.regex;
        import std.range;
        import std.stdio;

        writefln("Compiler name:    %s", std.compiler.name);
        writefln("Compiler version: %s.%s", version_major, version_minor);
        writeln("");

        enum pattern = `(?P<var>\w+)\s*=\s*(?P<value>\d+)?;`;
        writefln("Regular expression: `%s`", pattern);
        writeln("");

        auto re = regex(pattern);

        auto c = matchFirst("a = 42;", re);
        reportCaptures(re, c);

        c = matchFirst("a = ;", re);
        reportCaptures(re, c);
}

void reportCaptures(Regex, RegexCaptures)(Regex re, RegexCaptures captures)
{
        import std.range;
        import std.regex;
        import std.stdio;

        writefln("Captures from matched string '%s'", captures[0]);

        string[string] captureList;

        // I am trying to read the captures from a regular expression match
        // into the above AA.
        //
        // ...
        //
        // This kind of works, but requires a string lookup for each capture
        // and using it in practice relies on undocumented behavior regarding
        // the return value of std.regex.Capture's opIndex[string] method
        // when the string index is a valid named capture that was not actually
        // captured during the match (ex: the named capture was qualified with
        // the ? operator or the * operator in the regex and never appeared in
        // the matched string).
        foreach( captureName; re.namedCaptures )
        {
                auto capture = captures[captureName];
                if ( capture is null )
                        writefln("  captures[%s] is null", captureName);
                else if ( capture.empty )
                        writefln("  captures[%s] is empty", captureName);
                else
                {
                        writefln("  captures[%s] is '%s'", captureName, capture);
                        captureList[captureName] = capture;
                }
        }

        writefln("Total captures: %s", captureList);

        /+
        // I really want to do something like this, instead:
        foreach( capture; captures )
                captureList[capture.name] = capture.value;

        // And, in reality, it might need to be more like this:
        foreach( capture; captures )
                foreach ( valueIndex, value; capture.values )
                        captureList[format("%s-%s",capture.name,valueIndex)] = value;
        // Because, logically, named captures qualified with the
        // *, +, or {} operators in regular expressions may capture
        // multiple slices.

        writefln("Total captures: %s", captureList);
        +/

        writeln("");
}


//Output, DMD64 D Compiler v2.073.1:
//---
//
//Compiler name:    Digital Mars D
//Compiler version: 2.73
//
//Regular expression: `(?P<var>\w+)\s*=\s*(?P<value>\d+)?;`
//
//Captures from matched string 'a = 42;'
//  captures[value] is '42'
//  captures[var] is 'a'
//Total captures: ["value":"42", "var":"a"]
//
//Captures from matched string 'a = ;'
//  captures[value] is empty
//  captures[var] is 'a'
//Total captures: ["var":"a"]