August 01, 2002
Federico wrote:

> In article <MPG.17b2f9f4a1b121699896ae@news.digitalmars.com>, Heinz Saathoff says...
> >Why not allocating a big buffer on startup? It shouldn't make much difference in access time if you change
>
> Sorry, it does.
> I was able to reduce the problem to a simple and stupid matrix multiplication
> of two NxN matrices into a third one (the original code is different and
> much more sophisticated, but exhibits the very same pathology).

Could you give an example in 'code' of what you are doing and how static v.s. dynamic makes such a difference?

Jan


August 01, 2002
1) Record type BC is a COMDAT record - perfectly valid. Bug in Alink.
2) These externs should be defined in whatever operating system .lib file
you're linking with - not a problem with SNN.LIB.
3) Identifying the problem as being with SNN.OBJ (when the name of the file
is SNN.LIB) indicates a bug in ilink32, not SNN.
4) Access violations when ilink32 is run is a bug in the linker, not SNN.


"Federico" <Federico_member@pathlink.com> wrote in message news:aic9qt$d1o$1@digitaldaemon.com...
> In article <ai9qud$27he$1@digitaldaemon.com>, Walter says...
> >
> >What problems with snn?
> >
>
> Please, find below outputs of Alink, Blinker, Tlink. The latter gives no information, but the problem happens as soon as SNN.LIB is accessed.
>
> Moreover, if the big arrays at file scope are not declared static, both alink and blinker complains on the .obj generated from my source code.
>
> Maybe I'm stuck in a stupid problem, I apologize but I'm not an expert in linking under Win32.
>
> Federico
>
>
> -----------------Alink------------------------------
>
> e:\eee\ramspeed\qq>alink -oPE -subsys con rs.obj USER32.LIB KERNEL32.LIB ALINK v1.6 (C) Copyright 1998-9 Anthony A.J. Williams.
>
> All Rights Reserved
>
>
> Loading file rs.obj
>
> Loading file USER32.LIB
>
> Loading file KERNEL32.LIB
>
> Loading file SNN.lib
>
>
> Error in file at 000000EE - unknown object module record type BC
>
> name count = 19
>
> seg count = 8
>
> extcount=10
>
> grpcount=2
>
> comcount=4
>
> fixcount=18
>
> impcount=0
>
> expcount=0
>
>
> e:\eee\ramspeed\qq>
>
> --------------------Blinker-----------------------------
>
> e:\eee\ramspeed\qq>blinker file rs.obj lib user32.lib lib kernel32.lib
>
> BLINKER : 1115 : SNN.LIB(BUILDENV) : '__IMP__MULTIBYTETOWIDECHAR@24' :
> unresolved external
> BLINKER : 1115 : SNN.LIB(CLOCK) : '__IMP__GETTICKCOUNT@0' : unresolved
external
> BLINKER : 1115 : SNN.LIB(GETENV) : '__IMP__GETENVIRONMENTVARIABLEA@12' :
> unresolved external
> BLINKER : 1115 : SNN.LIB(ISATTY) : '__IMP__GETFILETYPE@4' : unresolved
external
> BLINKER : 1115 : SNN.LIB(LOCTIME) : '__IMP__GETTIMEZONEINFORMATION@4' :
> unresolved external
> BLINKER : 1115 : SNN.LIB(SEMLOCK) : '__IMP__CLOSEHANDLE@4' : unresolved
external
> BLINKER : 1115 : SNN.LIB(SEMLOCK) : '__IMP__CREATESEMAPHOREA@16' :
unresolved
> external
> BLINKER : 1115 : SNN.LIB(SEMLOCK) : '__IMP__WAITFORSINGLEOBJECT@8' :
unresolved
> external
> BLINKER : 1115 : SNN.LIB(SEMLOCK) : '__IMP__RELEASESEMAPHORE@12' :
unresolved
> external
> BLINKER : 1115 : SNN.LIB(SETMBCP) : '__IMP__GETACP@0' : unresolved
external
> BLINKER : 1115 : SNN.LIB(SETMBCP) : '__IMP__GETOEMCP@0' : unresolved
external
> BLINKER : 1115 : SNN.LIB(SETMBCP) : '__IMP__GETCPINFO@8' : unresolved
external
> BLINKER : 1115 : SNN.LIB(SETNTERR) : '__IMP__GETLASTERROR@0' : unresolved
> external
> BLINKER : 1115 : SNN.LIB(TIME) : '__IMP__GETLOCALTIME@4' : unresolved
external
> BLINKER : 1115 : SNN.LIB(TOLOWER) : '__IMP__LCMAPSTRINGA@24' : unresolved
> external
> BLINKER : 1115 : SNN.LIB(XCFILTER) : '__IMP__UNHANDLEDEXCEPTIONFILTER@4' :
> unresolved external
> BLINKER : 1115 : SNN.LIB(WCTOMB) : '__IMP__WIDECHARTOMULTIBYTE@32' :
unresolved
> external
> BLINKER : 1115 : SNN.LIB(SBRK) : '__IMP__VIRTUALALLOC@16' : unresolved
external
> BLINKER : 1115 : SNN.LIB(SBRK) : '__IMP__VIRTUALFREE@12' : unresolved
external
> BLINKER : 1115 : SNN.LIB(ISCTYPE) : '__IMP__GETSTRINGTYPEA@20' :
unresolved
> external
> BLINKER : 1115 : SNN.LIB(LCAPI32) : '__IMP__COMPARESTRINGW@24' :
unresolved
> external
> BLINKER : 1115 : SNN.LIB(LCAPI32) : '__IMP__COMPARESTRINGA@24' :
unresolved
> external
> BLINKER : 1115 : SNN.LIB(LCAPI32) : '__IMP__GETLOCALEINFOW@16' :
unresolved
> external
> BLINKER : 1115 : SNN.LIB(LCAPI32) : '__IMP__GETLOCALEINFOA@16' :
unresolved
> external
> BLINKER : 1115 : SNN.LIB(LCAPI32) : '__IMP__GETSTRINGTYPEW@16' :
unresolved
> external
> BLINKER : 1115 : SNN.LIB(LCAPI32) : '__IMP__LCMAPSTRINGW@24' : unresolved
> external
> BLINKER : 1115 : SNN.LIB(FIND) : '__IMP__FINDNEXTFILEA@8' : unresolved
external
> BLINKER : 1115 : SNN.LIB(FIND) : '__IMP__FINDCLOSE@4' : unresolved
external
> BLINKER : 1115 : SNN.LIB(FIND) : '__IMP__FILETIMETODOSDATETIME@12' :
unresolved
> external
> BLINKER : 1115 : SNN.LIB(FIND) : '__IMP__FINDFIRSTFILEA@8' : unresolved
external
> BLINKER : 1115 : SNN.LIB(IO) : '__IMP__GETSTDHANDLE@4' : unresolved
external
> BLINKER : 1115 : SNN.LIB(IO) : '__IMP__CREATEFILEA@28' : unresolved
external
> BLINKER : 1115 : SNN.LIB(IO) : '__IMP__MOVEFILEA@8' : unresolved external
> BLINKER : 1115 : SNN.LIB(IO) : '__IMP__DELETEFILEA@4' : unresolved
external
> BLINKER : 1115 : SNN.LIB(IO) : '__IMP__SETHANDLECOUNT@4' : unresolved
external
> BLINKER : 1115 : SNN.LIB(IO) : '__IMP__SETFILEPOINTER@16' : unresolved
external
> BLINKER : 1115 : SNN.LIB(IO) : '__IMP__GETFILEATTRIBUTESA@4' : unresolved
> external
> BLINKER : 1115 : SNN.LIB(SETARGV) : '__IMP__GETMODULEFILENAMEA@12' :
unresolved
> external
> BLINKER : 1115 : SNN.LIB(W32FATER) : '__IMP__WRITECONSOLEA@20' :
unresolved
> external
> BLINKER : 1115 : SNN.LIB(W32FATER) : '__IMP__MESSAGEBOXA@16' : unresolved
> external
> BLINKER : 1115 : SNN.LIB(CONSTART) : '__IMP__GETMODULEHANDLEA@4' :
unresolved
> external
> BLINKER : 1115 : SNN.LIB(CONSTART) : '__IMP__GETCOMMANDLINEA@0' :
unresolved
> external
>
> BLINKER : 0 Warning error(s), 42 Fatal error(s)
>
> RS.EXE (not created) (0.1 seconds)
>
> e:\eee\ramspeed\qq>
>
> -----------------------Tlink------------------------
>
> e:\eee\ramspeed\qq>ilink32 -Lc:\progra~1\dm\lib\
> rs.obj,,,user32.lib+kernel32.lib
> Turbo Incremental Link 5.00 Copyright (c) 1997, 2000 Borland
>
> Warning: Unable to perform incremental link - performing full link...
>
> Fatal: Unable to open file 'SNN.OBJ'
>
> e:\eee\ramspeed\qq>ilink32 -Lc:\progra~1\dm\lib
> rs.obj,,,snn.lib+user32.lib+kernel32.lib
> Turbo Incremental Link 5.00 Copyright (c) 1997, 2000 Borland
>
> Fatal: Error detected (IMP1807)
>
> Fatal: Access violation.  Link terminated.
>
> Warning: Unable to perform incremental link - performing full link...
>
> Fatal: Error detected (IMP1807)
>
> Fatal: Access violation.  Link terminated.
>
>
> e:\eee\ramspeed\qq>
>
>


August 01, 2002
"Federico" <Federico_member@pathlink.com> wrote in message news:aic9al$ceu$1@digitaldaemon.com...
> In FORTRAN 9X you can fine control the effect using the TARGET attribute
(if
> the compiler takes advantage of the information), C99 introduced the
restrict
> keyword with similar purposes, and using it can pay a lot with some
compilers.
> To my surprise, using __restrict with DMC had no effect.

__restrict is simply ignored by DMC at the moment, which is allowable behavior under C99. Nevertheless, it can be made use of to generate better code, and a future version of DMC may do this.


August 02, 2002
Federico schrieb...
> >Why not allocating a big buffer on startup? It shouldn't make much difference in access time if you change
> 
> Sorry, it does.
> I was able to reduce the problem to a simple and stupid matrix multiplication
> of two NxN matrices into a third one (the original code is different and
> much more sophisticated, but exhibits the very same pathology).

Do you do the matrix multiply in a function taking the matrices as 'pointer to double' parameters like

   void MatrixMul(int N, double *A, double *B, double *C)
   {  // multiply NxN matrix A*B resulting in C
   }

and call this with static allocated matrix in one case and dynamic one in the other case? This should not result in different performance as the generated code for MatrixMul would be the same. What would make a difference is simulating a 2D array with pointers as in

  double **A = new double*[N];
  for(int i=0; i<N; ++i) A[i] = new double[N];
  // same for B and C

and writing MatrixMul as

  void MatrixMul(int N, double **A, double **B, double **C)
  { // do the multiply
  }

This would indeed involve a overhead.

Another point could be different alignment of static array and dynamic allocated array. We had this discussion in the group that 8-byte aligned doubles are faster than 4-byte aligned doubles.


> I had not time to look in detail to DMC generated assembler, but the one for
> the computational core is much more verbose and lenghty for the dinamic
> allocation version than for the static one.
> This usually comes from the optimizer being more aggressive with static data,
> as it can more easily check at compile time for aliasing problems, something
> cannot be done easily for dynamically allocated areas and pointers to them.

IMO this can only happen when the routines directly access the global static buffer and not taking the arguments as parameters.

But, as Jan said, a small example code for static and dynamic version may help to clarify where the difference is.

- Heinz
August 02, 2002
I never meant the problem was IN snn.lib, I said:

> BTW. If I declare the big arrays 'static', Blinker does not complain anymore on the .obj, but has problems with SNN.LIB. The same happens with Borland Tlink.

I never said "SNN.LIB causes problems".
Federico

In article <aicch8$ggd$1@digitaldaemon.com>, Walter says...
>
>1) Record type BC is a COMDAT record - perfectly valid. Bug in Alink.
>2) These externs should be defined in whatever operating system .lib file
>you're linking with - not a problem with SNN.LIB.
>3) Identifying the problem as being with SNN.OBJ (when the name of the file
>is SNN.LIB) indicates a bug in ilink32, not SNN.
>4) Access violations when ilink32 is run is a bug in the linker, not SNN.
>


August 02, 2002
In article <3D49ACB6.963DE854@smartsoft.cc>, Jan Knepper says...

>What about the DM Linker???

Look at the other messages, it fails.

>It seems to me like you have to throw an option for blinker to prevent it from capitalizing the function names, that might fix one problem.

Thanks Jan, you are obviously write (and I'm a stupid :-( ). Your message prompted me to better study the horrible on line documentation of Blinker DEMO version, and I happened to write the correct script and definiton files to produce a Win32 executables, just to discover that:

----------------------------blinker ouput---------------------------
e:\eee\ramspeed\qq>blinker @blrs
__   __

(®¯) (®¯)       BLINKER DOS Extender and Windows Linker 5.10



___                 Blink and you'll miss it !!


Copyright (c) Assembler Software Manufacturers, Inc. 1990-99

All Rights Reserved * Demo * www.blinkinc.com 1-804-784-2347


BLINKER : 1000 : This demo version will not create 32 bit Windows programs
---------------------------------------------------------------------

Maybe that explains Blinker complaints.

Federico


August 02, 2002
> >What about the DM Linker???
> Look at the other messages, it fails.

I meant, the messages the linker gives you. You give a report for all the other linkers I can not help you with.

> >It seems to me like you have to throw an option for blinker to prevent it from capitalizing the function names, that might fix one problem.
>
> Thanks Jan, you are obviously write (and I'm a stupid :-( ). Your message prompted me to better study the horrible on line documentation of Blinker DEMO version, and I happened to write the correct script and definiton files to produce a Win32 executables, just to discover that: [ ... ]

Well, I guess that answers your question about blinker...

Jan


August 02, 2002
"Federico" <Federico_member@pathlink.com> wrote in message news:aieu3p$5ki$1@digitaldaemon.com...
> I never said "SNN.LIB causes problems".

You're right, you did not. I apologize.


August 03, 2002
In article <3D49AD77.20CE8DA7@smartsoft.cc>, Jan Knepper says...
>
>Could you give an example in 'code' of what you are doing and how static v.s. dynamic makes such a difference?
>

Well, today I had some time to look at generated assembler and I happened to
solve the problem: now my code meets the deadline for the compuattion.
The dynamic allocation version was a poor performer because the pointers to the
data areas were allocated at file scope. If they are local to the function
using them (temporary variables or formal parameters) the resulting code is
more compact and faster as well (the codes are reported below, too bad that
I'm not allowed to publish the real code, just a different one exhibiting the
same problem).
The files:
rs.c           the original one, arrays defined at file scope
rsdyn.c        pointers defined at file scope, malloc'ed memory areas
rsdynb.c       pointers defined at function scope, malloc'ed memory areas
rsdynf.c       pointers defined at file scope, malloc'ed memory areas passed
as parameters to a function implementing the core computation
rsdynbf.c      pointers defined at function scope, malloc'ed memory areas
passed as parameters to a function implementing the core
computation

The fact that defining pointers at function scope with respect to file scope makes such a difference is still puzzling to me, I'm not accustomed to that on RISC architectures and find it a little bit strange.

Federico

---------------------------------rs.c------------------------------------
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define N 2000


float a[N][N];
float b[N][N];
float derpo[N][N];

int main() {
int i, j, k;
float *pa, *pb, *pc;
float tot = 0.0;
clock_t start, stop;

fprintf(stderr,"Using %f MB of memory.\n", 3.0*N*N*sizeof(float)/1048576.0);

start = clock();

for(pa=a[0], pb=b[0], i=0; i<(N*N); ++i)
*pa++ = rand(), *pb++ = rand();

for(i=0; i<N; ++i) {
for(j=0; j<N; ++j)
derpo[i][j] = 0.0;
for(k=0; k<N; ++k)
for(j=0; j<N; ++j)
derpo[i][j] += a[i][k]*b[k][j];
}

for(pc=derpo[0], i=0; i<(N*N); ++i)
tot += *pc++;

stop = clock();

fprintf(stderr,"Total: %f, time: %fs.\n", tot, (1.0*stop-start)/CLOCKS_PER_SEC);

return 0;
}
---------------------------------rsdyn.c---------------------------------
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define N 2000


float (*a)[N];
float (*b)[N];
float (*derpo)[N];

int main() {
int i, j, k;
float *pa, *pb, *pc;
float tot = 0.0;
clock_t start, stop;

a = malloc(N*N*sizeof(float));
b = malloc(N*N*sizeof(float));
derpo = malloc(N*N*sizeof(float));

if (!(a && b && derpo)) {
fprintf(stderr, "Failed to allocate memory!\n");
exit(1);
}

fprintf(stderr,"Using %f MB of memory.\n", 3.0*N*N*sizeof(float)/1048576.0);

start = clock();

for(pa=a[0], pb=b[0], i=0; i<(N*N); ++i)
*pa++ = rand(), *pb++ = rand();

for(i=0; i<N; ++i) {
for(j=0; j<N; ++j)
derpo[i][j] = 0.0;
for(k=0; k<N; ++k)
for(j=0; j<N; ++j)
derpo[i][j] += a[i][k]*b[k][j];
}

for(pc=derpo[0], i=0; i<(N*N); ++i)
tot += *pc++;

stop = clock();

fprintf(stderr,"Total: %f, time: %fs.\n", tot, (1.0*stop-start)/CLOCKS_PER_SEC);

return 0;
}
---------------------------------rsdynb.c--------------------------------
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define N 2000

int main() {
int i, j, k;

float (*a)[N];
float (*b)[N];
float (*derpo)[N];

float *pa, *pb, *pc;
float tot = 0.0;
clock_t start, stop;

a = malloc(N*N*sizeof(float));
b = malloc(N*N*sizeof(float));
derpo = malloc(N*N*sizeof(float));

if (!(a && b && derpo)) {
fprintf(stderr, "Failed to allocate memory!\n");
exit(1);
}

fprintf(stderr,"Using %f MB of memory.\n", 3.0*N*N*sizeof(float)/1048576.0);

start = clock();

for(pa=a[0], pb=b[0], i=0; i<(N*N); ++i)
*pa++ = rand(), *pb++ = rand();

for(i=0; i<N; ++i) {
for(j=0; j<N; ++j)
derpo[i][j] = 0.0;
for(k=0; k<N; ++k)
for(j=0; j<N; ++j)
derpo[i][j] += a[i][k]*b[k][j];
}

for(pc=derpo[0], i=0; i<(N*N); ++i)
tot += *pc++;

stop = clock();

fprintf(stderr,"Total: %f, time: %fs.\n", tot, (1.0*stop-start)/CLOCKS_PER_SEC);

return 0;
}
---------------------------------rsdynf.c--------------------------------
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define N 2000

float (*a)[N];
float (*b)[N];
float (*derpo)[N];

void MatrixMul(float *A, float *B, float *C)
{  // multiply NxN matrix A*B resulting in C
int i, j, k;
float *b;

for(i=0; i<N; ++i, C+=N, A+=N) {
for(j=0; j<N; ++j)
C[j] = 0.0;
for(k=0, b=B; k<N; ++k, b+=N)
for(j=0; j<N; ++j)
C[j] += A[k]*b[j];
}

}


int main() {
int i, j, k;

float *pa, *pb, *pc;
float tot = 0.0;
clock_t start, stop;

a = malloc(N*N*sizeof(float));
b = malloc(N*N*sizeof(float));
derpo = malloc(N*N*sizeof(float));

if (!(a && b && derpo)) {
fprintf(stderr, "Failed to allocate memory!\n");
exit(1);
}

fprintf(stderr,"Using %f MB of memory.\n", 3.0*N*N*sizeof(float)/1048576.0);

start = clock();

for(pa=a[0], pb=b[0], i=0; i<(N*N); ++i)
*pa++ = rand(), *pb++ = rand();

MatrixMul((float *)a, (float *)b, (float *)derpo);

for(pc=derpo[0], i=0; i<(N*N); ++i)
tot += *pc++;

stop = clock();

fprintf(stderr,"Total: %f, time: %fs.\n", tot, (1.0*stop-start)/CLOCKS_PER_SEC);

return 0;
}
---------------------------------rsdynbf.c-------------------------------
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define N 2000

void MatrixMul(float *A, float *B, float *C)
{  // multiply NxN matrix A*B resulting in C
int i, j, k;
float *b;

for(i=0; i<N; ++i, C+=N, A+=N) {
for(j=0; j<N; ++j)
C[j] = 0.0;
for(k=0, b=B; k<N; ++k, b+=N)
for(j=0; j<N; ++j)
C[j] += A[k]*b[j];
}

}


int main() {
int i, j, k;

float (*a)[N];
float (*b)[N];
float (*derpo)[N];

float *pa, *pb, *pc;
float tot = 0.0;
clock_t start, stop;

a = malloc(N*N*sizeof(float));
b = malloc(N*N*sizeof(float));
derpo = malloc(N*N*sizeof(float));

if (!(a && b && derpo)) {
fprintf(stderr, "Failed to allocate memory!\n");
exit(1);
}

fprintf(stderr,"Using %f MB of memory.\n", 3.0*N*N*sizeof(float)/1048576.0);

start = clock();

for(pa=a[0], pb=b[0], i=0; i<(N*N); ++i)
*pa++ = rand(), *pb++ = rand();

MatrixMul((float *)a, (float *)b, (float *)derpo);

for(pc=derpo[0], i=0; i<(N*N); ++i)
tot += *pc++;

stop = clock();

fprintf(stderr,"Total: %f, time: %fs.\n", tot, (1.0*stop-start)/CLOCKS_PER_SEC);

return 0;
}


August 03, 2002
In article <3D4B044D.97C0BB2E@smartsoft.cc>, Jan Knepper says...
>
>> >What about the DM Linker???
>> Look at the other messages, it fails.
>
>I meant, the messages the linker gives you. You give a report for all the other linkers I can not help you with.
>
A dialog box appears. The title is:
"Unexpected OPTLINK termination at EIP=0043E29C"

The body contains a red circle with a white X in it, and says: "EAX=00000000 EBX=0046C7A0 ECX=00001000 EDX=000008DF ESI=00000000 EDI=00004056 EBP=006BFF78 ESP=006BFE10 First=00430000"

Federico