January 19, 2022
On Thu, Jan 20, 2022 at 12:12:56AM +0000, forkit via Digitalmars-d-learn wrote: [...]
> createBoolAssociativeMatrix(mArrBool,3, 2);
> 
> [ [1000:[1, 0]], [1001:[1, 1]], [1001:[1, 0]]]
> 
> 
> where 1000 is some random id...

Do the id's have to be unique?  If not, std.random.uniform() would do
the job.

If they have to be unique, you can either use a sequential global counter (a 64-bit counter will suffice -- you'll won't exhaust it for at least 60+ years of bumping the counter once per CPU tick at 8.4 GHz), or use an AA of ids already generated and just call uniform() to generate a new one until it doesn't collide anymore.


T

-- 
A mathematician learns more and more about less and less, until he knows everything about nothing; whereas a philospher learns less and less about more and more, until he knows nothing about everything.
January 20, 2022
On Thursday, 20 January 2022 at 00:30:44 UTC, H. S. Teoh wrote:
>
> Do the id's have to be unique?

yep...

I'm almost there ;-)

// ---
module test;

import std.stdio : writeln;
import std.range : iota, isForwardRange, hasSlicing, hasLength, isInfinite;
import std.array : array, Appender;
import std.random : Random, unpredictableSeed, dice, choice;
import std.algorithm : map, uniq;

@safe:

Random rnd;

static this()
{
  rnd = Random(unpredictableSeed);
}

void main()
{
    int recordsNeeded = 5;

    uint[] uniqueIDs;
    makeUniqueIDs(uniqueIDs, recordsNeeded);
    writeln(uniqueIDs);

    uint[][] mArrBool;

    // e.g: create a matrix consisting of 5 tuples,
    // with each tuple containing 3 random bools (0 or 1)
    createBoolMatrix(mArrBool,recordsNeeded, 3);

    // process just writeln's it's argument at the moment
    process(mArrBool); // [[1, 1, 1], [0, 0, 1], [1, 1, 1], [1, 1, 1], [1, 1, 0]]

    // to do (integrate a single value taken from uniqueIDs so that each tuple looks like this: [999575454:[1, 1, 1]]
    // e.g.
    // processRecords(records);
    // output from above should look like this below:
    // [ [999575454:[1, 1, 1]], [999704246:[0, 0, 1]], [999969331:[1, 1, 1]], [999678591:[1, 1, 1]], [999691754:[1, 1, 0]] ]

}

void createBoolMatrix(ref uint[][] m, size_t numberOfTuples, size_t numberOfBoolsInTuple)
{
    m = iota(numberOfTuples)
            .map!(i => iota(numberOfBoolsInTuple)
            .map!(numberOfBoolsInTuple => cast(uint) rnd.dice(0.6, 1.4))
			.array).array;
}

void process(T)(const ref T t) if (isForwardRange!T && hasSlicing!T && hasLength!T && !isInfinite!T)
{
    t.writeln;
}

void processRecords(T)(const ref T t) if (isForwardRange!T && hasSlicing!T && hasLength!T && !isInfinite!T)
{
    t.writeln;
}


void makeUniqueIDs(ref uint[] arr, size_t sz)
{
    // id needs to be 9 digits, and needs to start with 999
    int[] a = iota(999_000_000, 1_000_000_000).array; // can produce a max of 1_000_000 records.

    Appender!(uint[]) appndr;
    // pre-allocate space to avoid costly reallocations
    appndr.reserve(sz+1);

    foreach(value; 1..(sz + 1))
        appndr ~= cast(uint)a.choice(rnd);

    // just interesting to see often this asserts.
    //assert(appndr[].array == appndr[].uniq.array);

    arr = appndr[].uniq.array;

    // function should not return if this asserts (i.e. app will exit)
    assert(arr[].array == arr[].uniq.array);
}
// ---

January 20, 2022
On Thursday, 20 January 2022 at 04:00:59 UTC, forkit wrote:
> void makeUniqueIDs(ref uint[] arr, size_t sz)
> {
>   ...
> }

arrg!

what was i thinking! ;-)

// ---
void makeUniqueIDs(ref uint[] arr, size_t sz)
{
    arr.reserve(sz);

    // id needs to be 9 digits, and needs to start with 999
    int[] a = iota(999_000_000, 1_000_000_000).array;
    // above will contain 1_000_000 records that we can choose from.

    int i = 0;
    uint x;
    while(i != sz)
    {
       x = cast(uint)a.choice(rnd);

       // ensure every id added is unique.
       if (!arr.canFind(x))
       {
           arr ~= x;
           i++;
       }
       else
           i--;
    }
}


//------


January 20, 2022
On Thursday, 20 January 2022 at 04:38:39 UTC, forkit wrote:
>

all done ;-)

// ---

module test;

import std.stdio : writeln;
import std.range : iota, isForwardRange, hasSlicing, hasLength, isInfinite;
import std.array : array, Appender;
import std.random : Random, unpredictableSeed, dice, choice;
import std.algorithm : map, uniq, canFind;

@safe:

Random rnd;

static this()
{
  rnd = Random(unpredictableSeed);
}

void main()
{
    int recordsNeeded = 2;
    int boolValuesNeeded = 3;

    uint[] uniqueIDs;
    makeUniqueIDs(uniqueIDs, recordsNeeded);

    uint[][] tuples;
    createBoolMatrix(tuples, recordsNeeded, boolValuesNeeded);

    uint[][uint][] records = CreateTupleDictionary(uniqueIDs, tuples);
    processRecords(records);

}

auto CreateTupleDictionary(ref uint[] ids, ref uint[][] tuples)
{
    uint[][uint][] records;

    foreach(i, id; ids)
        records ~= [ ids[i] : tuples[i] ];

    return records.dup;
}

void processRecords(T)(const ref T t) if (isForwardRange!T && hasSlicing!T && hasLength!T && !isInfinite!T)
{
    t.writeln;

    // output from above should look like this:
    // [[999583661:[1, 1, 0]], [999273256:[1, 1, 1]]]

    // hoping to explore parallel here too...
}

void createBoolMatrix(ref uint[][] m, size_t numberOfTuples, size_t numberOfBoolsInTuple)
{
    m = iota(numberOfTuples)
            .map!(i => iota(numberOfBoolsInTuple)
            .map!(numberOfBoolsInTuple => cast(uint) rnd.dice(0.6, 1.4))
            .array).array;
}


void makeUniqueIDs(ref uint[] arr, size_t sz)
{
    arr.reserve(sz);

    // id needs to be 9 digits, and needs to start with 999
    int[] a = iota(999_000_000, 1_000_000_000).array;
    // above will contain 1_000_000 records that we can choose from.

    int i = 0;
    uint x;
    while(i != sz)
    {
       x = cast(uint)a.choice(rnd);

       // ensure every id added is unique.
       if (!arr.canFind(x))
       {
           arr ~= x;
           i++;
       }
    }
}

// ---

January 20, 2022
On Thursday, 20 January 2022 at 04:00:59 UTC, forkit wrote:
> On Thursday, 20 January 2022 at 00:30:44 UTC, H. S. Teoh wrote:
>>
>> Do the id's have to be unique?
>
> yep...
>

Don't make them random then, but use an incrementor.

If you can have ids that aren't integers then you could use uuids too.

https://dlang.org/phobos/std_uuid.html

January 20, 2022
On Thursday, 20 January 2022 at 10:11:10 UTC, bauss wrote:

>
> Don't make them random then, but use an incrementor.
>
> If you can have ids that aren't integers then you could use uuids too.
>
> https://dlang.org/phobos/std_uuid.html

The 'uniqueness' of id would actually be created in the database.

I just creating a dataset to simulate an export.

I'm pretty much done, just wish -profile=gc was working in createUniqueIDArray(..)

// ---------------

module test;
@safe:

import std.stdio : write, writef, writeln, writefln;
import std.range : iota, isForwardRange, hasSlicing, hasLength, isInfinite;
import std.array : array, byPair;
import std.random : Random, unpredictableSeed, dice, choice;
import std.algorithm : map, uniq, canFind;

debug { import std; }

Random rnd;
static this() {  rnd = Random(unpredictableSeed); }

void main()
{
    const int recordsNeeded = 10;
    const int valuesPerRecord = 8;

    int[] idArray;
    createUniqueIDArray(idArray, recordsNeeded);

    int[][] valuesArray;
    createValuesArray(valuesArray, recordsNeeded, valuesPerRecord);

    int[][int][] records = CreateDataSet(idArray, valuesArray, recordsNeeded);
    ProcessRecords(records);
}

void ProcessRecords(ref const(int[][int][]) recArray)
{
    void processRecord(ref int id, ref const(int)[] result)
    {
        writef("%s\t%s", id, result);
    }

    foreach(ref record; recArray)
    {
        foreach (ref rp; record.byPair)
        {
            processRecord(rp.expand);
        }
        writeln;
    }
}

int[][int][] CreateDataSet(ref int[] idArray, ref int[][] valuesArray, int numRecords)
{
    int[][int][] records;
    records.reserve(numRecords);
    debug { writefln("records.capacity is %s", records.capacity); }

    foreach(i, id; idArray)
        records ~= [ idArray[i] : valuesArray[i] ]; // NOTE: does register with -profile=gc

    return records.dup;
}

void createValuesArray(ref int[][] m, size_t recordsNeeded, size_t valuesPerRecord)
{
    m = iota(recordsNeeded)
            .map!(i => iota(valuesPerRecord)
            .map!(valuesPerRecord => cast(int)rnd.dice(0.6, 1.4))
            .array).array;  // NOTE: does register with -profile=gc
}


void createUniqueIDArray(ref int[] idArray, int recordsNeeded)
{
    idArray.reserve(recordsNeeded);
    debug { writefln("idArray.capacity is %s", idArray.capacity); }

    // id needs to be 9 digits, and needs to start with 999
    // below will contain 1_000_000 records that we can choose from.
    int[] ids = iota(999_000_000, 1_000_000_000).array; // NOTE: does NOT register with -profile=gc

    int i = 0;
    int x;
    while(i != recordsNeeded)
    {
       x = ids.choice(rnd);

       // ensure every id added is unique.
       if (!idArray.canFind(x))
       {
           idArray ~= x; // NOTE: does NOT register with -profile=gc
           i++;
       }
    }
}

/+
sample output:

999623777	[0, 0, 1, 1, 1, 0, 0, 0]
999017078	[1, 0, 1, 1, 1, 1, 1, 1]
999269073	[1, 1, 0, 0, 1, 1, 0, 1]
999408504	[0, 1, 1, 1, 1, 1, 0, 0]
999752314	[1, 0, 0, 1, 1, 1, 1, 0]
999660730	[0, 1, 0, 0, 1, 1, 1, 1]
999709822	[1, 1, 1, 0, 1, 1, 0, 0]
999642248	[1, 1, 1, 0, 0, 1, 1, 0]
999533069	[1, 1, 1, 0, 0, 0, 0, 0]
999661591	[1, 1, 1, 1, 1, 0, 1, 1]

+/

// ---------------


January 20, 2022
On Thursday, 20 January 2022 at 12:15:56 UTC, forkit wrote:

> void createUniqueIDArray(ref int[] idArray, int recordsNeeded)
> {
>     idArray.reserve(recordsNeeded);
>     debug { writefln("idArray.capacity is %s", idArray.capacity); }
>
>     // id needs to be 9 digits, and needs to start with 999
>     // below will contain 1_000_000 records that we can choose from.
>     int[] ids = iota(999_000_000, 1_000_000_000).array; // NOTE: does NOT register with -profile=gc
>
>     int i = 0;
>     int x;
>     while(i != recordsNeeded)
>     {
>        x = ids.choice(rnd);
>
>        // ensure every id added is unique.
>        if (!idArray.canFind(x))
>        {
>            idArray ~= x; // NOTE: does NOT register with -profile=gc
>            i++;
>        }
>     }
> }

Allocating 4 megs to generate 10 numbers??? You can generate a random number between 999000000 and 1000000000.

```
immutable(int)[] createUniqueIDArray(int recordsNeeded)
{
    import std.random;
    import std.algorithm.searching : canFind;
    int[] result = new int[recordsNeeded];

    int i = 0;
    int x;
    while(i != recordsNeeded)
    {
        // id needs to be 9 digits, and needs to start with 999
       x = uniform(999*10^^6, 10^^9);

       // ensure every id added is unique.
       if (!result[0 .. i].canFind(x))
           result[i++] = x;
    }
    import std.exception : assumeUnique;
    return result.assumeUnique;
}

void main()
{
    import std.stdio;
    createUniqueIDArray(10).writeln;
}
```

Only one allocation, and it would be tracked with -profile=gc...
January 20, 2022
On Thursday, 20 January 2022 at 12:40:09 UTC, Stanislav Blinov wrote:
>
> Allocating 4 megs to generate 10 numbers??? You can generate a random number between 999000000 and 1000000000.
>
> ...
>         // id needs to be 9 digits, and needs to start with 999
>        x = uniform(999*10^^6, 10^^9);
>
>        // ensure every id added is unique.
>        if (!result[0 .. i].canFind(x))
>            result[i++] = x;
>     }
>     import std.exception : assumeUnique;
>     return result.assumeUnique;
> ...

Nice. Thanks. I had to compromise a little though, as assumUnique is @system, and all my code is @safe (and trying to avoid the need for inline @system wrapper ;-)

//---

void createUniqueIDArray(ref int[] idArray, int recordsNeeded)
{
    idArray.reserve(recordsNeeded);
    debug { writefln("idArray.capacity is %s", idArray.capacity); }

    int i = 0;
    int x;
    while(i != recordsNeeded)
    {
       // generate a random 9 digit id that starts with 999
       x = uniform(999*10^^6, 10^^9); // thanks Stanislav!

       // ensure every id added is unique.
       if (!idArray.canFind(x))
       {
           idArray ~= x; // NOTE: does NOT register with -profile=gc
           i++;
       }
    }
}

//---
January 20, 2022
On Thursday, 20 January 2022 at 21:16:46 UTC, forkit wrote:
>

Cannot work out why I cannot pass valuesArray in as ref const??

get error: Error: cannot append type `const(int[])[const(int)]` to type `int[][int][]`


// --

int[][int][] CreateDataSet(ref const int[] idArray, ref const(int[][]) valuesArray, const int numRecords)
{
    int[][int][] records;
    records.reserve(numRecords);

    foreach(i, id; idArray)
        records ~= [ idArray[i] : valuesArray[i] ];

    return records.dup;
}

// ---
January 20, 2022

On 1/20/22 5:07 PM, forkit wrote:

>

On Thursday, 20 January 2022 at 21:16:46 UTC, forkit wrote:

>

Cannot work out why I cannot pass valuesArray in as ref const??

get error: Error: cannot append type const(int[])[const(int)] to type int[][int][]

Because it would allow altering const data.

e.g.:

const(int[])[const(int)] v = [1: [1, 2, 3]];
int[][int][] arr = [v]; // assume this works
arr[0][1][0] = 5; // oops, just set v[1][0]

General rule of thumb is that you can convert the HEAD of a structure to mutable from const, but not the TAIL (the stuff it points at).

An associative array is a pointer-to-implementation construct, so it's a reference.

-Steve