Jump to page: 1 2
Thread overview
an example of parallel calculation of metrics
Sep 30, 2015
Jay Norwood
Sep 30, 2015
Jay Norwood
Oct 01, 2015
Jay Norwood
Oct 01, 2015
Ali Çehreli
Oct 01, 2015
Jay Norwood
Oct 01, 2015
Ali Çehreli
Oct 01, 2015
Jay Norwood
Oct 01, 2015
Jay Norwood
Oct 01, 2015
Jay Norwood
Oct 01, 2015
Jay Norwood
Oct 01, 2015
Jay Norwood
September 30, 2015
This is something I'm playing with for work. We do this a lot, capture counter events for some number of on-chip performance counters, compute some metrics, display the outputs. This seems ideal for the application.

import std.algorithm, std.parallelism, std.range;
import std.stdio;
import std.datetime;
import std.typecons;
import std.meta;

// define some input measurement sample tuples and output metric tuples
alias TI = Tuple!(long, long, long, long, long);
alias TO = Tuple!(long, long, long, long);

// various metric definitions
// the Tuples could also define names for each member and use the names here in the metrics.
long met1( TI m){ return m[0] + m[1] + m[2]; }
long met2( TI m){ return m[1] + m[2] + m[3]; }
long met3( TI m){ return m[0] - m[1] + m[2]; }
long met4( TI m){ return m[0] + m[1] - m[2]; }

// a convenience to use all the metrics above as a list
alias Metrics = AliasSeq!(met1,met2,met3,met4);

void main(string[] argv)
{
	auto samples = iota(1_000);
	auto meas = new TI[samples.length];
	auto results = new TO[samples.length];

	// Initialize some values for the measured samples
	foreach(i, ref m; meas){
		m[0] = i;
		m[1] = i+1;
		m[2] = i+2;
		m[3] = i+3;
		m[4] = i+4;
	}

	std.datetime.StopWatch sw;
	sw.start();

    ref TI getTerm(int i)
    {
        return meas[i];
    }

	// compute the metric results for the above measured sample values in parallel
	taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);

	// how long did this take
	long exec_ms = sw.peek().msecs;
	writeln("results:", results);
	writeln("time:", exec_ms);

}


September 30, 2015
On Wednesday, 30 September 2015 at 22:24:25 UTC, Jay Norwood wrote:
> // various metric definitions
> // the Tuples could also define names for each member and use the names here in the metrics.
> long met1( TI m){ return m[0] + m[1] + m[2]; }
> long met2( TI m){ return m[1] + m[2] + m[3]; }
> long met3( TI m){ return m[0] - m[1] + m[2]; }
> long met4( TI m){ return m[0] + m[1] - m[2]; }
>

should use reference parameters here:
long met1( ref TI m){ return m[0] + m[1] + m[2]; }
long met2( ref TI m){ return m[1] + m[2] + m[3]; }
long met3( ref TI m){ return m[0] - m[1] + m[2]; }
long met4( ref TI m){ return m[0] + m[1] - m[2]; }



October 01, 2015
This compiles and appears to execute correctly, but if I uncomment the taskPool line I get a compile error message about wrong buffer type.  Am I breaking some rule for std.parallelism.amap?

import std.algorithm, std.parallelism, std.range;
import std.stdio;
import std.datetime;
import std.typecons;
import std.meta;

// define some input measurement sample tuples and output metric tuples
alias TR = Tuple!(double,"per_sec", double, "per_cycle", long,"raw");
alias TI = Tuple!(long, "proc_cyc", long, "DATA_RD", long, "DATA_WR", long, "INST_FETCH", long, "L1I_MISS", long, "L1I_HIT", long,"L1D_HIT", long, "L1D_MISS");
alias TO = Tuple!(TR,"L1_MISS", TR, "L1_HIT", TR,"DATA_ACC", TR,"ALL_ACC");
const double CYC_PER_SEC = 1_600_000_000;

// various metric definitions
// using Tuples with defined names for each member, and use the names here in the metrics.
TR met_l1_miss ( ref TI m){ TR rv; with(rv) with(m) { raw = L1I_MISS+L1D_MISS; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; }
TR met_l1_hit ( ref TI m){ TR rv; with(rv) with(m) { raw = L1I_HIT+L1D_HIT; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; }
TR met_data_acc ( ref TI m){ TR rv; with(rv) with(m) { raw = DATA_RD+DATA_WR; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; }
TR met_all_acc( ref TI m){ TR rv; with(rv) with(m) { raw = DATA_RD+DATA_WR+INST_FETCH; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; }

// a convenience to use all the metrics above as a list
alias Metrics = AliasSeq!(met_l1_miss,met_l1_hit,met_data_acc,met_all_acc);

void main(string[] argv)
{
	auto samples = iota(1_00);
	auto meas = new TI[samples.length];
	auto results = new TO[samples.length];

	// Initialize some values for the measured samples
	foreach(i, ref m; meas){
		with(m){ proc_cyc = 1_000_000+i*2; DATA_RD = 1000+i; DATA_WR= 2000+i; INST_FETCH=proc_cyc/2;
		        L1I_HIT= INST_FETCH-100; L1I_MISS=100;
				L1D_HIT= DATA_RD+DATA_WR - 200; L1D_MISS=200;}
	}

	std.datetime.StopWatch sw;
	sw.start();

    ref TI getTerm(int i)
    {
        return meas[i];
    }

	// compute the metric results for the above measured sample values in parallel
	//taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);

	TR rv1 = met_l1_miss( meas[0]);
	TR rv2 = met_l1_hit( meas[0]);
	TR rv3 = met_data_acc( meas[0]);
	TR rv4 = met_all_acc( meas[0]);

	// how long did this take
	long exec_ms = sw.peek().msecs;
	writeln("measurements:", meas[0]);
	writeln("rv1:", rv1);
	writeln("rv2:", rv2);
	writeln("rv3:", rv3);
	writeln("rv4:", rv4);
	writeln("results:", results[1]);
	writeln("time:", exec_ms);

}

October 01, 2015
On 09/30/2015 09:15 PM, Jay Norwood wrote:

> alias TO = Tuple!(TR,"L1_MISS", TR, "L1_HIT", TR,"DATA_ACC", TR,"ALL_ACC");

Looks like a bug. Workaround: Get rid of member names there:

alias TO = Tuple!(TR, TR, TR, TR);

> //taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);

For some reason, having member names prevents 'results' passing one of amap's requirements. The following check in std.parallelism thinks that 'results' does not have random assignable elements if it is a Tuple with member names.

    else static if(randAssignable!(Args[$ - 1]) && Args.length > 1)
    {
        static assert(0, "Wrong buffer type.");
    }

Ali

October 01, 2015
On Thursday, 1 October 2015 at 07:03:40 UTC, Ali Çehreli wrote:
> Looks like a bug. Workaround: Get rid of member names

Thanks.  My particular use case, working with metric expressions, is easier to understand if I use the names.  I converted the use of Tuple to struct to see if I could get an easier error msg. Turns out the use of struct also results in much cleaner writeln text.

Still has the compile error, though.

import std.algorithm, std.parallelism, std.range;
import std.stdio;
import std.datetime;
import std.typecons;
import std.meta;

// define some input measurement sample tuples and output metric tuples
struct TR {double per_sec; double per_cycle; long raw;}
struct TI {long proc_cyc;  long DATA_RD; long DATA_WR; long INST_FETCH; long L1I_MISS; long L1I_HIT; long L1D_HIT; long L1D_MISS;}
struct TO { TR L1_MISS; TR L1_HIT; TR DATA_ACC; TR ALL_ACC;}
const double CYC_PER_SEC = 1_600_000_000;

// various metric definitions
// using Tuples with defined names for each member, and use the names here in the metrics.
TR met_l1_miss ( ref TI m){ TR rv; with(rv) with(m) { raw = L1I_MISS+L1D_MISS; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; }
TR met_l1_hit ( ref TI m){ TR rv; with(rv) with(m) { raw = L1I_HIT+L1D_HIT; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; }
TR met_data_acc ( ref TI m){ TR rv; with(rv) with(m) { raw = DATA_RD+DATA_WR; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; }
TR met_all_acc( ref TI m){ TR rv; with(rv) with(m) { raw = DATA_RD+DATA_WR+INST_FETCH; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; }

// a convenience to use all the metrics above as a list
alias Metrics = AliasSeq!(met_l1_miss,met_l1_hit,met_data_acc,met_all_acc);

void main(string[] argv)
{
	auto samples = iota(1_00);
	auto meas = new TI[samples.length];
	auto results = new TO[samples.length];

	// Initialize some values for the measured samples
	foreach(i, ref m; meas){
		with(m){ proc_cyc = 1_000_000+i*2; DATA_RD = 1000+i; DATA_WR= 2000+i; INST_FETCH=proc_cyc/2;
		        L1I_HIT= INST_FETCH-100; L1I_MISS=100;
				L1D_HIT= DATA_RD+DATA_WR - 200; L1D_MISS=200;}
	}

	std.datetime.StopWatch sw;
	sw.start();

    ref TI getTerm(int i)
    {
        return meas[i];
    }

	// compute the metric results for the above measured sample values in parallel
	taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);

	TR rv1 = met_l1_miss( meas[0]);
	TR rv2 = met_l1_hit( meas[0]);
	TR rv3 = met_data_acc( meas[0]);
	TR rv4 = met_all_acc( meas[0]);

	// how long did this take
	long exec_ms = sw.peek().msecs;
	writeln("measurements:", meas[0]);
	writeln("rv1:", rv1);
	writeln("rv2:", rv2);
	writeln("rv3:", rv3);
	writeln("rv4:", rv4);
	writeln("results:", results[1]);
	writeln("time:", exec_ms);

}


October 01, 2015
On 10/01/2015 08:56 AM, Jay Norwood wrote:

> Thanks.  My particular use case, working with metric expressions, is
> easier to understand if I use the names.

Makes sense. Please open a bug at least for investigation why tuples with named members don't work with amap.

> I converted the use of Tuple
> to struct to see if I could get an easier error msg. Turns out the use
> of struct also results in much cleaner writeln text.
>
> Still has the compile error, though.

We have to live with the fact that amap and friends produce a Tuple result if there are multiple functions. A struct won't work.

However, if you prove to yourself that the result tuple and your struct have the same memory layout, you can cast the tuple slice to struct slice after calling amap:

alias TO_for_amap_result = Tuple!(TR, TR, TR, TR);
struct TO { TR L1_MISS; TR L1_HIT; TR DATA_ACC; TR ALL_ACC;}

// ...

    auto results_for_amap = new TO_for_amap_result[samples.length];

// ...


taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results_for_amap);

    auto results = cast(TO[])results_for_amap;

// Use 'results' from this point on...

Ali

October 01, 2015
On Thursday, 1 October 2015 at 18:08:31 UTC, Ali Çehreli wrote:
> Makes sense. Please open a bug at least for investigation why tuples with named members don't work with amap.

ok, thanks.  I opened the issue.

https://issues.dlang.org/show_bug.cgi?id=15134

October 01, 2015
On Thursday, 1 October 2015 at 18:08:31 UTC, Ali Çehreli wrote:
> However, if you prove to yourself that the result tuple and your struct have the same memory layout, you can cast the tuple slice to struct slice after calling amap:

After re-reading your explanation, I see that the problem is only that the results needs to be a Tuple.  It works with named tuple members in this example as the result and array of struct as the input.  I'll re-check if the multi-member result also works with named members.  I'll update the issue report.

import std.meta;
import std.stdio;

// define some input measurement sample tuples and output metric tuples

struct TI {long L1I_MISS; long L1D_MISS; }
alias TO = Tuple!(long, "raw");

// various metric definitions
// using Tuples with defined names for each member, and use the names here in the metrics.
TO met_l1_miss ( ref TI m){ TO rv;  rv.raw = m.L1I_MISS+m.L1D_MISS; return rv; }

// a convenience to use all the metrics above as a list
alias Metrics = AliasSeq!(met_l1_miss);

void main(string[] argv)
{
	auto samples = iota(100);
	auto meas = new TI[samples.length];
	auto results = new TO[samples.length];

	// Initialize some values for the measured samples
	foreach(i, ref m; meas){
		m.L1D_MISS= 100+i; m.L1I_MISS=100-i;
	}

    ref TI getTerm(int i)
    {
        return meas[i];
    }

	// compute the metric results for the above measured sample values in parallel
	taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);

	TO rv1 = met_l1_miss( meas[1]);

	writeln("measurements:", meas[1]);
	writeln("rv1:", rv1);
	writeln("results:", results[1]);

}

October 01, 2015
So, this is a condensed version of the original problem. It looks like the problem is that the return value for taskPool.amap can't be a tuple of tuples or a tuple of struct.  Either way, it fails with the Wrong buffer type error message if I uncomment the taskPool line

import std.algorithm, std.parallelism, std.range;
import std.typecons;
import std.meta;
import std.stdio;

// define some input measurement sample tuples and output metric tuples

struct TR { long raw; double per_cyc;}
//alias TR = Tuple!(long, "raw", double, "per_cyc");
alias TI = Tuple!(long, "L1I_MISS",long, "L1D_MISS", long, "L1D_READ", long, "L1D_WRITE", long, "cycles" );
alias TO = Tuple!(TR, "L1_MISS", TR, "L1D_ACCESS");

// various metric definitions
// using Tuples with defined names for each member, and use the names here in the metrics.
TR met_l1_miss ( ref TI m){ TR rv;  rv.raw = m.L1I_MISS+m.L1D_MISS;  rv.per_cyc = cast(double)rv.raw/m.cycles; return rv; }
TR met_l1_access ( ref TI m){ TR rv;  rv.raw = m.L1D_READ+m.L1D_WRITE;  rv.per_cyc = cast(double)rv.raw/m.cycles; return rv; }

// a convenience to use all the metrics above as a list
alias Metrics = AliasSeq!(met_l1_miss, met_l1_access);

void main(string[] argv)
{
	auto samples = iota(100);
	auto meas = new TI[samples.length];
	auto results = new TO[samples.length];

	// Initialize some values for the measured samples
	foreach(i, ref m; meas){
		m.L1D_MISS= 100+i; m.L1I_MISS=100-i;
		m.L1D_READ= 200+i; m.L1D_WRITE=200-i;
		m.cycles= 10+i;
	}

    ref TI getTerm(int i)
    {
        return meas[i];
    }

	// compute the metric results for the above measured sample values in parallel
	//taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);

	TR rv1 = met_l1_miss( meas[1]);
	TR rv2 = met_l1_access( meas[1]);

	writeln("measurements:", meas[1]);
	writeln("rv1:", rv1);
	writeln("rv2:", rv2);
	writeln("results:", results[1]);

}

October 01, 2015
I re-submitted this as:
https://issues.dlang.org/show_bug.cgi?id=15135

« First   ‹ Prev
1 2