Thread overview | |||||
---|---|---|---|---|---|
|
February 01, 2003 A third example use of C macros that has not been discussed | ||||
---|---|---|---|---|
| ||||
There are many situations where you want to reimplement a piece of code several times, with a different type for each. This can happen for optimization, or for marshalling functions. This is the strongest reason for support of a macro preprocessor. This example shows an optimized memcpy function that copies in words at a time, properly accounting for possible alignment differences on processors that do not support non-aligned word stores. It is eligantly done with macros of course. -paul -------- /* this must be set to the most efficient copying type - usually unsigned long: */ typedef unsigned long cpy_t; #define word_copy(t,d,s,count) \ do { \ unsigned int c; \ char *d8 = (char *) (d); \ char *s8 = (char *) (s); \ register t a0, a1, *dst, *src; \ c = (unsigned long) (d) & (sizeof (t) - 1); \ while (count && (c & (sizeof (t) - 1))) \ (*d8++ = *s8++), count--, c++; \ dst = (t *) d8; \ src = (t *) s8; \ while (count >= (sizeof (t)) * 2) { \ a0 = src[0]; \ a1 = src[1]; \ count -= (sizeof (t)) * 2; \ dst[0] = a0; \ dst[1] = a1; \ src += 2; \ dst += 2; \ } \ while (count >= (sizeof (t))) { \ *dst++ = *src++; \ count -= sizeof (t); \ } \ d8 = (char *) dst; \ s8 = (char *) src; \ while (count--) \ *d8++ = *s8++; \ } while (0) #define byte_copy(t,d,s,count) \ do { \ char *d8 = (char *) (d); \ char *s8 = (char *) (s); \ while (count--) \ *d8++ = *s8++; \ } while (0) void *memcpy (void *_dest, const void *_src, size_t count) { unsigned int f; /* check alignment */ f = sizeof (cpy_t); while ((((unsigned long) _src) & (f - 1)) != (((unsigned long) _dest & (f - 1)))) f >>= 1; switch (f) { case 8: word_copy (u_int64_t, _dest, _src, count); break; case 4: word_copy (u_int32_t, _dest, _src, count); break; case 2: word_copy (u_int16_t, _dest, _src, count); break; case 1: byte_copy (u_int8_t, _dest, _src, count); break; } return (void *) _dest; } |
February 01, 2003 Re: A third example use of C macros that has not been discussed | ||||
---|---|---|---|---|
| ||||
Posted in reply to Paul Sheer | Maybe I'm missing something, but it seems to me that you could do that with templates pretty easily. In so doing, you get a bit of typesafety, and you avoid arguments from being evaluated more than once.
template CopyLoop(T)
{
void word_copy(T* dest, T* src, int count)
{
do
{
uint c;
char *d8 = (char *) (dest);
char *s8 = (char *) (src);
register t a0, a1, *dst, *src;
c = (ulong) (dest) & (sizeof (t) - 1);
while (count && (c & (sizeof (t) - 1)))
(*d8++ = *s8++), count--, c++;
dst = (t *) d8;
src = (t *) s8;
while (count >= (sizeof (T)) * 2)
{
a0 = src[0];
a1 = src[1];
count -= (sizeof (T)) * 2;
dst[0] = a0;
dst[1] = a1;
src += 2;
dst += 2;
}
while (count >= (sizeof (T)))
{
*dst++ = *src++;
count -= sizeof (T);
}
d8 = (char *) dst;
s8 = (char *) src;
while (count--)
*d8++ = *s8++;
} while (0);
}
}
Paul Sheer wrote:
> There are many situations where you want to reimplement
> a piece of code several times, with a different type
> for each. This can happen for optimization, or for
> marshalling functions. This is the strongest reason
> for support of a macro preprocessor.
>
> This example shows an optimized memcpy function that
> copies in words at a time, properly accounting for
> possible alignment differences on processors that
> do not support non-aligned word stores.
>
> It is eligantly done with macros of course.
>
> -paul
>
> --------
>
> /* this must be set to the most efficient copying type - usually
> unsigned long: */
> typedef unsigned long cpy_t;
>
> #define word_copy(t,d,s,count) \
> do { \
> unsigned int c; \
> char *d8 = (char *) (d); \
> char *s8 = (char *) (s); \
> register t a0, a1, *dst, *src; \
> c = (unsigned long) (d) & (sizeof (t) - 1); \
> while (count && (c & (sizeof (t) - 1))) \
> (*d8++ = *s8++), count--, c++; \
> dst = (t *) d8; \
> src = (t *) s8; \
> while (count >= (sizeof (t)) * 2) { \
> a0 = src[0]; \
> a1 = src[1]; \
> count -= (sizeof (t)) * 2; \
> dst[0] = a0; \
> dst[1] = a1; \
> src += 2; \
> dst += 2; \
> } \
> while (count >= (sizeof (t))) { \
> *dst++ = *src++; \
> count -= sizeof (t); \
> } \
> d8 = (char *) dst; \
> s8 = (char *) src; \
> while (count--) \
> *d8++ = *s8++; \
> } while (0)
>
> #define byte_copy(t,d,s,count) \
> do { \
> char *d8 = (char *) (d); \
> char *s8 = (char *) (s); \
> while (count--) \
> *d8++ = *s8++; \
> } while (0)
>
> void *memcpy (void *_dest, const void *_src, size_t count)
> {
> unsigned int f;
> /* check alignment */
> f = sizeof (cpy_t);
> while ((((unsigned long) _src) & (f - 1)) != (((unsigned long) _dest & (f - 1))))
> f >>= 1;
> switch (f) {
> case 8:
> word_copy (u_int64_t, _dest, _src, count);
> break;
> case 4:
> word_copy (u_int32_t, _dest, _src, count);
> break;
> case 2:
> word_copy (u_int16_t, _dest, _src, count);
> break;
> case 1:
> byte_copy (u_int8_t, _dest, _src, count);
> break;
> }
> return (void *) _dest;
> }
>
>
|
February 02, 2003 Re: A third example use of C macros that has not been discussed | ||||
---|---|---|---|---|
| ||||
Posted in reply to Paul Sheer | IMHO: the compiler should generate an optimised memcpy from the src memcpy( foo, bar, len ); (inlined if optimised for speed, either way the fastest for the platform and the cpu's supported instruction set). and you've not put a duff's device in there, (they're legal in D) tight loops kill performance. and on some architectures unaligned int reads are allowed and less expencive than 4 byte reads. I'm sure less expensive than 4 byte reads and 4 branches. Mike. "Paul Sheer" <psheer@icon.co.za> wrote in message news:b1gs7d$2i9c$1@digitaldaemon.com... > > There are many situations where you want to reimplement > a piece of code several times, with a different type > for each. This can happen for optimization, or for > marshalling functions. This is the strongest reason > for support of a macro preprocessor. > > This example shows an optimized memcpy function that > copies in words at a time, properly accounting for > possible alignment differences on processors that > do not support non-aligned word stores. > > It is eligantly done with macros of course. > > -paul > > -------- > > /* this must be set to the most efficient copying type - usually > unsigned long: */ > typedef unsigned long cpy_t; > > #define word_copy(t,d,s,count) \ > do { \ > unsigned int c; \ > char *d8 = (char *) (d); \ > char *s8 = (char *) (s); \ > register t a0, a1, *dst, *src; \ > c = (unsigned long) (d) & (sizeof (t) - 1); \ > while (count && (c & (sizeof (t) - 1))) \ > (*d8++ = *s8++), count--, c++; \ > dst = (t *) d8; \ > src = (t *) s8; \ > while (count >= (sizeof (t)) * 2) { \ > a0 = src[0]; \ > a1 = src[1]; \ > count -= (sizeof (t)) * 2; \ > dst[0] = a0; \ > dst[1] = a1; \ > src += 2; \ > dst += 2; \ > } \ > while (count >= (sizeof (t))) { \ > *dst++ = *src++; \ > count -= sizeof (t); \ > } \ > d8 = (char *) dst; \ > s8 = (char *) src; \ > while (count--) \ > *d8++ = *s8++; \ > } while (0) > > #define byte_copy(t,d,s,count) \ > do { \ > char *d8 = (char *) (d); \ > char *s8 = (char *) (s); \ > while (count--) \ > *d8++ = *s8++; \ > } while (0) > > void *memcpy (void *_dest, const void *_src, size_t count) > { > unsigned int f; > /* check alignment */ > f = sizeof (cpy_t); > while ((((unsigned long) _src) & (f - 1)) != > (((unsigned long) _dest & (f - 1)))) > f >>= 1; > switch (f) { > case 8: > word_copy (u_int64_t, _dest, _src, count); > break; > case 4: > word_copy (u_int32_t, _dest, _src, count); > break; > case 2: > word_copy (u_int16_t, _dest, _src, count); > break; > case 1: > byte_copy (u_int8_t, _dest, _src, count); > break; > } > return (void *) _dest; > } > > |
Copyright © 1999-2021 by the D Language Foundation