forkit
Posted in reply to forkit
| On Friday, 21 January 2022 at 09:10:56 UTC, forkit wrote:
>
ok... in the interest of corecting the code I posted previously...
... here is a version that actually works in secs (for a million records), as opposed to hours!
// ---------------
/+
=====================================================================
This program create a sample dataset consisting of 'random' records,
and then outputs that dataset to a file.
Arguments can be passed on the command line,
or otherwise default values are used instead.
Example of that output can be seen at the end of this code.
=====================================================================
+/
module test;
@safe:
import std.stdio : write, writef, writeln, writefln;
import std.range : iota, takeExactly;
import std.array : array, byPair, Appender, appender;
import std.random : Random, unpredictableSeed, dice, choice, uniform;
import std.algorithm : map, uniq, canFind, among;
import std.conv : to;
import std.format;
import std.stdio : File;
import std.file : exists;
import std.exception : enforce;
debug { import std; }
Random rnd;
static this() { rnd = Random(unpredictableSeed); } // thanks Ali
void main(string[] args)
{
int recordsNeeded, valuesPerRecord;
string fname;
if(args.length < 4)
{
//recordsNeeded = 1_000_000;
//recordsNeeded = 100_000;
recordsNeeded = 10;
valuesPerRecord= 8;
//fname = "D:/rnd_records.txt";
fname = "./rnd_records.txt";
}
else
{
// assumes valid values being passed in ;-)
recordsNeeded = to!int(args[1]);
valuesPerRecord = to!int(args[2]);
fname = args[3];
}
debug
{ writefln("%s records, %s values for record, will be written to file: %s", recordsNeeded, valuesPerRecord, fname); }
else
{ enforce(!exists(fname), "Oop! That file already exists!"); }
// id needs to be 9 digits, and needs to start with 999
int[] idArray = takeExactly(iota(999*10^^6, 10^^9), recordsNeeded).array;
debug { writefln("idArray.length = %s", idArray.length); }
int[][] valuesArray;
createValuesArray(valuesArray, recordsNeeded, valuesPerRecord);
int[][int][] records = CreateDataSet(idArray, valuesArray, recordsNeeded);
ProcessRecords(records, fname);
writefln("All done. Check if records written to %s", fname);
}
void createValuesArray
(ref int[][] valuesArray, const(int) recordsNeeded, const(int) valuesPerRecord)
{
valuesArray = iota(recordsNeeded)
.map!(i => iota(valuesPerRecord)
.map!(valuesPerRecord => cast(int)rnd.dice(0.6, 1.4))
.array).array; // NOTE: does register with -profile=gc
debug { writefln("valuesArray.length = %s", valuesArray.length); }
}
int[][int][] CreateDataSet
(const(int)[] idArray, int[][] valuesArray, const(int) numRecords)
{
int[][int][] records;
records.reserve(numRecords);
debug { writefln("records.capacity is %s", records.capacity); }
foreach(i, const id; idArray)
{
// NOTE: below does register with -profile=gc
records ~= [ idArray[i] : valuesArray[i] ];
}
debug { writefln("records.length = %s", records.length); }
return records.dup;
}
void ProcessRecords
(in int[][int][] recArray, const(string) fname)
{
auto file = File(fname, "w");
scope(exit) file.close;
Appender!string bigString = appender!string;
bigString.reserve(recArray.length);
debug { writefln("bigString.capacity is %s", bigString.capacity); }
// NOTE: forward declaration required for this nested function
void processRecord(const(int) id, const(int)[] values)
{
// NOTE: below does register with -profile=gc
bigString ~= id.to!string ~ "," ~ values.format!"%(%s,%)" ~ "\n";
}
foreach(ref const record; recArray)
{
foreach (ref rp; record.byPair)
{
processRecord(rp.expand);
}
}
file.write(bigString[]);
}
/+
sample file output:
9992511730,1,0,1,0,1,0,1
9995369731,1,1,1,1,1,1,1
9993136031,1,0,0,0,1,0,0
9998979051,1,1,1,1,0,1,1
9998438090,1,1,0,1,1,0,0
9995132750,0,0,1,0,1,1,1
9997123630,0,1,1,1,0,1,1
9998351590,1,0,0,1,1,1,1
9991454121,1,1,1,1,1,0,1
9997673520,1,1,1,1,1,1,1
+/
// ---------------
|