As well all know std.regex slows down our builds even if all you're doing is importing it.
So on Discord we were chatting and I got annoyed about it enough to look into it (which as we all know is a good way to make me do something about it).
To start off with lets do some base timings with dmd.
Here is my test module, disable the regex call as required.
import std.regex;
void main() {
auto r = regex(`[a-z]`); // remove me
}
To compile this its 2.2s, to compile it without the regex call its 1.2s.
Okay that's quite a big jump but at least we're using it. Now on to modifying std.regex or should I say std.uni.
That's right, we will be modifying std.uni not std.regex!
All we need to do is add -version=std_uni_bootstrap
to our call to dmd to get this working and apply the changes at the end of this post.
Now the times are 1.2s and 0.9s.
Why is turning on bootstrap version in std.uni decreasing compile times so significantly? This is almost certainly because of the Unicode tables being compressed. std.regex is triggering decompression and bringing a whole pile of logic that wouldn't be required otherwise. Which costs an awful lot CPU and ram during CTFE. newCTFE anyone?
If you want to repeat, you'll need the below changes to std.uni (just add at bottom of file).
public:
version(std_uni_bootstrap) {
int icmp(S1, S2)(S1 r1, S2 r2) { return 0;}
dchar toLower()(dchar c) { return c; }
dchar toUpper()(dchar c) { return c; }
void toLowerInPlace(C)(ref C[] s){}
void toUpperInPlace(C)(ref C[] s){}
size_t graphemeStride(C)(const scope C[] input, size_t index) {return 0;}
bool isGraphical()(dchar c) { return false;}
struct unicode {
static @property auto opDispatch(string name)() {
return CodepointSet.init;
}
static CodepointSet parseSet(Range)(ref Range range, bool casefold=false) {
return CodepointSet.init;
}
static CodepointSet parsePropertySpec(Range)(ref Range p,
bool negated, bool casefold) {
return CodepointSet.init;
}
static dchar parseControlCode(Parser)(ref Parser p) {
return 0;
}
}
alias Escapables = AliasSeq!('[', ']', '\\', '^', '$', '.', '|', '?', ',', '-',
';', ':', '#', '&', '%', '/', '<', '>', '`', '*', '+', '(', ')', '{', '}', '~');
struct Stack(T) {
@safe:
T[] data;
@property bool empty(){ return data.empty; }
@property size_t length(){ return data.length; }
void push(T val){ data ~= val; }
@trusted T pop()
{
assert(!empty);
auto val = data[$ - 1];
data = data[0 .. $ - 1];
if (!__ctfe)
cast(void) data.assumeSafeAppend();
return val;
}
@property ref T top()
{
assert(!empty);
return data[$ - 1];
}
}
bool isAlpha()(dchar c) {return false;}
CodepointSet wordCharacter()() { return CodepointSet.init;}
dchar parseUniHex(Range)(ref Range str, size_t maxDigit) {
return 0;
}
auto simpleCaseFoldings()(dchar ch) {
static struct Range
{
@safe pure nothrow:
uint idx; //if == uint.max, then read c.
union
{
dchar c; // == 0 - empty range
uint len;
}
@property bool isSmall() const { return idx == uint.max; }
this(dchar ch)
{
idx = uint.max;
c = ch;
}
this(uint start, uint size)
{
idx = start;
len = size;
}
@property dchar front() const
{
return 0;
}
@property bool empty() const
{
if (isSmall)
{
return c == 0;
}
return len == 0;
}
@property size_t length() const
{
if (isSmall)
{
return c == 0 ? 0 : 1;
}
return len;
}
void popFront()
{
if (isSmall)
c = 0;
else
{
idx++;
len--;
}
}
}
return Range.init;
}
}