Thread overview
problems with ?/{0,1} in RegExp
Oct 25, 2002
Mike Wynn
Oct 26, 2002
Walter
Oct 26, 2002
Mike Wynn
Oct 26, 2002
Walter
October 25, 2002
I'm having grief from the RegExp libs,
I'm tryng to match -- item [optional item] item
eg "(a)(b)?(c)"  ()'s used so I can use replace( "$1" ); later

but this will match to "acc" but I can not use $2 or $3 in a replace statement.

also I want to do ..
"(a)((b|c)?(d|e)*)(f) and use 'a' and 'f' (which might be say '\S+\s*\{' and
'}\s*\S+')
ideally with the following replace symantics (see end of message for the
current o/p)
$1 -> (a)
$2 -> ((b|c)?(d|e)*)
$3 -> (f)
even if $2 == ""
and either another regexp has to be run over $2 or
$21 -> (b|c)?
$22 -> (d|e)*

I'm not 100% used to regexps I've always avoided them so am a bit unsure
what the regexp experts would be expecting; I would be happy if I could use
$1 -> (a)
$2 -> ((b|c)?(d|e)*)
$3 -> (b|c)?
$4 -> (d|e)*
$5 -> (f)

the code I've been trying is this

import regexp;
import string;

char[] convInner( RegExp form, char[] inp )
{
 if ( form.test( inp ) != 0 )
 {
  char[] one = "1";
  char[] two = "2";
  char[] three = "3";
  one = form.replace( "$1" );
// comment out theses lines to get this to not throw an exception
  two = form.replace( "$2" );
  three = form.replace( "$3" );
  return "Transfromed '"~one~"' + '"~two~"' + '"~three~"';";
 }
 return "NO TRANSFORM "~inp;
}

void myConvert1( char[] inp )
{
 char[] rv;
 rv = convInner( new RegExp( '^(a)(b){0,1}(c*)', null ), inp );
 printf( "conv1) %s -> %s\n", (char *)inp, (char *)rv );
}

void myConvert2( char[] inp )
{
 char[] rv;
 rv = convInner( new RegExp( '^(a)((b){0,1})(c*)', null ), inp );
 printf( "conv2) %s -> %s\n", (char *)inp, (char *)rv );
}

void myConvert3( char[] inp )
{
 char[] rv;
 rv = convInner( new RegExp( '^(a)(b)?(c*)', null ), inp );
 printf( "conv3) %s -> %s\n", (char *)inp, (char *)rv );
}

void myConvert4( char[] inp )
{
 char[] rv;
 rv = convInner( new RegExp( '^(a)((b)?)(c*)', null ), inp );
 printf( "conv4) %s -> %s\n", (char *)inp, (char *)rv );
}

int main( char[][] args )
{
 char[] str1 = "abcc";
 char[] str2 = "acc";

 myConvert1( str1 );
 myConvert2( str1 );
 myConvert3( str1 );
 myConvert4( str1 );

 myConvert1( str2 );
 myConvert2( str2 );
 myConvert3( str2 );
 myConvert4( str2 );
 return 0;
}

which outputs
conv1) abcc -> Transfromed 'a' + 'b' + 'cc';
conv2) abcc -> Transfromed 'a' + 'b' + 'b';
conv3) abcc -> Transfromed 'a' + 'b' + 'cc';
conv4) abcc -> Transfromed 'a' + 'b' + 'b';
Error: ArrayBoundsError regexp(2396)



October 26, 2002
I'll take a look. -Walter

"Mike Wynn" <mike.wynn@l8night.co.uk> wrote in message news:apc16h$c6r$1@digitaldaemon.com...
> I'm having grief from the RegExp libs,
> I'm tryng to match -- item [optional item] item
> eg "(a)(b)?(c)"  ()'s used so I can use replace( "$1" ); later
>
> but this will match to "acc" but I can not use $2 or $3 in a replace statement.
>
> also I want to do ..
> "(a)((b|c)?(d|e)*)(f) and use 'a' and 'f' (which might be say '\S+\s*\{'
and
> '}\s*\S+')
> ideally with the following replace symantics (see end of message for the
> current o/p)
> $1 -> (a)
> $2 -> ((b|c)?(d|e)*)
> $3 -> (f)
> even if $2 == ""
> and either another regexp has to be run over $2 or
> $21 -> (b|c)?
> $22 -> (d|e)*
>
> I'm not 100% used to regexps I've always avoided them so am a bit unsure what the regexp experts would be expecting; I would be happy if I could
use
> $1 -> (a)
> $2 -> ((b|c)?(d|e)*)
> $3 -> (b|c)?
> $4 -> (d|e)*
> $5 -> (f)
>
> the code I've been trying is this
>
> import regexp;
> import string;
>
> char[] convInner( RegExp form, char[] inp )
> {
>  if ( form.test( inp ) != 0 )
>  {
>   char[] one = "1";
>   char[] two = "2";
>   char[] three = "3";
>   one = form.replace( "$1" );
> // comment out theses lines to get this to not throw an exception
>   two = form.replace( "$2" );
>   three = form.replace( "$3" );
>   return "Transfromed '"~one~"' + '"~two~"' + '"~three~"';";
>  }
>  return "NO TRANSFORM "~inp;
> }
>
> void myConvert1( char[] inp )
> {
>  char[] rv;
>  rv = convInner( new RegExp( '^(a)(b){0,1}(c*)', null ), inp );
>  printf( "conv1) %s -> %s\n", (char *)inp, (char *)rv );
> }
>
> void myConvert2( char[] inp )
> {
>  char[] rv;
>  rv = convInner( new RegExp( '^(a)((b){0,1})(c*)', null ), inp );
>  printf( "conv2) %s -> %s\n", (char *)inp, (char *)rv );
> }
>
> void myConvert3( char[] inp )
> {
>  char[] rv;
>  rv = convInner( new RegExp( '^(a)(b)?(c*)', null ), inp );
>  printf( "conv3) %s -> %s\n", (char *)inp, (char *)rv );
> }
>
> void myConvert4( char[] inp )
> {
>  char[] rv;
>  rv = convInner( new RegExp( '^(a)((b)?)(c*)', null ), inp );
>  printf( "conv4) %s -> %s\n", (char *)inp, (char *)rv );
> }
>
> int main( char[][] args )
> {
>  char[] str1 = "abcc";
>  char[] str2 = "acc";
>
>  myConvert1( str1 );
>  myConvert2( str1 );
>  myConvert3( str1 );
>  myConvert4( str1 );
>
>  myConvert1( str2 );
>  myConvert2( str2 );
>  myConvert3( str2 );
>  myConvert4( str2 );
>  return 0;
> }
>
> which outputs
> conv1) abcc -> Transfromed 'a' + 'b' + 'cc';
> conv2) abcc -> Transfromed 'a' + 'b' + 'b';
> conv3) abcc -> Transfromed 'a' + 'b' + 'cc';
> conv4) abcc -> Transfromed 'a' + 'b' + 'b';
> Error: ArrayBoundsError regexp(2396)
>
>
>


October 26, 2002
as an aside, I had a look around the Perl site for info on Regexp's (its one
thing Perl's very good at)
and found http://dev.perl.org/rfc/360.html
regexps are slow (in comparison to arithmetic ops) so having Objects rather
than char[] returned would not represent a major performance hit.
( semi c++ pseudo code to show ownership)
i.e.

RegExpElement [] Regexp::Eval( char[] str ); // runs the regexp over the
string, can reuse the Regexp object
char[] RegExpElement::toString(); // get the "value" of the () element
RegExpElement RegExpElement::next(); // get the next if the group had a
postfix of *,+,? or {}
RegExpElement[] RegExpElement::chlidren(); // get the child groups from the
regexp

e.g
RegExp rex = new RegExp( "(a)*(b+)(c+(d+)(e)*)f" );

then
rex.Eval( "aabbbcdddef" );
would return
[
    "a" :next-> "a" :next-> null
    "bbb"
    "cdde" <children> [
        "dd"
        "e" :next-> null;
    ]
]

then
rex.Eval( "bbcdf" );
would return
[
    MT :next-> null
    "bb"
    "cd" <children> [
        "d"
        MT :next-> null;
    ]
]

i'm not sure if MT should be "" or null or a special 'empty' Element.


"Walter" <walter@digitalmars.com> wrote in message news:apcrb4$17d1$1@digitaldaemon.com...
> I'll take a look. -Walter
>
> "Mike Wynn" <mike.wynn@l8night.co.uk> wrote in message news:apc16h$c6r$1@digitaldaemon.com...
> > I'm having grief from the RegExp libs,
> > I'm tryng to match -- item [optional item] item
> > eg "(a)(b)?(c)"  ()'s used so I can use replace( "$1" ); later
> >
> > but this will match to "acc" but I can not use $2 or $3 in a replace statement.
> >
> > also I want to do ..
> > "(a)((b|c)?(d|e)*)(f) and use 'a' and 'f' (which might be say '\S+\s*\{'
> and
> > '}\s*\S+')
> > ideally with the following replace symantics (see end of message for the
> > current o/p)
> > $1 -> (a)
> > $2 -> ((b|c)?(d|e)*)
> > $3 -> (f)
> > even if $2 == ""
> > and either another regexp has to be run over $2 or
> > $21 -> (b|c)?
> > $22 -> (d|e)*
> >
> > I'm not 100% used to regexps I've always avoided them so am a bit unsure what the regexp experts would be expecting; I would be happy if I could
> use
> > $1 -> (a)
> > $2 -> ((b|c)?(d|e)*)
> > $3 -> (b|c)?
> > $4 -> (d|e)*
> > $5 -> (f)
> >
> > the code I've been trying is this
> >
> > import regexp;
> > import string;
> >
> > char[] convInner( RegExp form, char[] inp )
> > {
> >  if ( form.test( inp ) != 0 )
> >  {
> >   char[] one = "1";
> >   char[] two = "2";
> >   char[] three = "3";
> >   one = form.replace( "$1" );
> > // comment out theses lines to get this to not throw an exception
> >   two = form.replace( "$2" );
> >   three = form.replace( "$3" );
> >   return "Transfromed '"~one~"' + '"~two~"' + '"~three~"';";
> >  }
> >  return "NO TRANSFORM "~inp;
> > }
> >
> > void myConvert1( char[] inp )
> > {
> >  char[] rv;
> >  rv = convInner( new RegExp( '^(a)(b){0,1}(c*)', null ), inp );
> >  printf( "conv1) %s -> %s\n", (char *)inp, (char *)rv );
> > }
> >
> > void myConvert2( char[] inp )
> > {
> >  char[] rv;
> >  rv = convInner( new RegExp( '^(a)((b){0,1})(c*)', null ), inp );
> >  printf( "conv2) %s -> %s\n", (char *)inp, (char *)rv );
> > }
> >
> > void myConvert3( char[] inp )
> > {
> >  char[] rv;
> >  rv = convInner( new RegExp( '^(a)(b)?(c*)', null ), inp );
> >  printf( "conv3) %s -> %s\n", (char *)inp, (char *)rv );
> > }
> >
> > void myConvert4( char[] inp )
> > {
> >  char[] rv;
> >  rv = convInner( new RegExp( '^(a)((b)?)(c*)', null ), inp );
> >  printf( "conv4) %s -> %s\n", (char *)inp, (char *)rv );
> > }
> >
> > int main( char[][] args )
> > {
> >  char[] str1 = "abcc";
> >  char[] str2 = "acc";
> >
> >  myConvert1( str1 );
> >  myConvert2( str1 );
> >  myConvert3( str1 );
> >  myConvert4( str1 );
> >
> >  myConvert1( str2 );
> >  myConvert2( str2 );
> >  myConvert3( str2 );
> >  myConvert4( str2 );
> >  return 0;
> > }
> >
> > which outputs
> > conv1) abcc -> Transfromed 'a' + 'b' + 'cc';
> > conv2) abcc -> Transfromed 'a' + 'b' + 'b';
> > conv3) abcc -> Transfromed 'a' + 'b' + 'cc';
> > conv4) abcc -> Transfromed 'a' + 'b' + 'b';
> > Error: ArrayBoundsError regexp(2396)
> >
> >
> >
>
>


October 26, 2002
"Mike Wynn" <mike.wynn@l8night.co.uk> wrote in message news:apdtc6$2no8$1@digitaldaemon.com...
> as an aside, I had a look around the Perl site for info on Regexp's (its
one
> thing Perl's very good at)
> and found http://dev.perl.org/rfc/360.html

D regexp's are equivalent to the javascript functionality, but I agree that Perl has gone way beyond that!