Thread overview
Strange CTFE issue, string replacement
Apr 09, 2017
Jethro
Apr 09, 2017
Stefan Koch
Apr 09, 2017
Jethro
Apr 09, 2017
Stefan Koch
April 09, 2017
I tried to make a string like replacement called fstring which uses buffering to avoid lots of little allocations. The problem is, that it actually causes the compiler to use 10x the memory and about 10 times slower.


It doesn't do anything special but tries to emulate string(so it can be a drop in replacement, more or less)



Why would it be so ineffective in CTFE? I know strings are built in and the compiler probably has some optimizations, but an order of magnitude more memory usage is was not expected(should be lower)? I could understand a small speed slowdown, but not 10x.

Any ideas?

class fstring(T = char)
{
	import std.traits;
	T[] buf;	
	int length = 0;
	int Inc = 65535;

	this(int inc = 65535)
	{
		Inc = inc;
		buf.length = Inc;
		for(int i = 0; i < buf.length; i++)	buf[i] = 0;
	}

	// foreach
	ReturnType!(D) opApply(D)(scope D dg)
    {
		ReturnType!(D) result;
		for (int i = 0; i < length; i++)
		{
			result = dg(buf[i]);
			if (result) break;
		}
		return result;
    }

	// Append string to end
	auto Append(S)(S s)
	{
		while (length + s.length >= buf.length) buf.length += Inc;
		for (int i = 0; i < s.length; i++) buf[length + i] = cast(T)(s[i]);
		length += s.length;
	}

	// Prepends string to start
	auto Prepend(S)(S s)
	{
		while (length + s.length >= buf.length) buf.length += Inc;
		for (int i = 0; i < length; i++) buf[s.length + length - i - 1] = buf[length - i - 1];
		for (int i = 0; i < s.length; i++) buf[i] = cast(T)(s[i]);
		length += s.length;
	}


	auto opOpAssign(string op, S)(S s) if (op == "~") { Append(s); }	
	void opAssign(S)(S s) if (!is(S == typeof(this))) { length = 0; Append(s); }
	auto opBinary(string op, S)(S s) if (op == "~") { Append(s); return this; }
	auto opBinaryRight(string op, S)(S s) if (op == "~") { Prepend(s); return this; }

	bool opEquals(S)(S s)
	{		
		if (buf.length != s.length) return false;
		for(int i = 0; i < buf.length; i++)	if (buf[i] != s[i]) return false;
		return true;
	}

	// Forward replaces string a with string b in place(no memory allocations) and greedily(as they are found from left to right)
	auto replace(A, B)(A a, B b)
	{
		if (length < a.length || a.length == 0 || b.length == 0) return this;
		auto small = (a.length >= b.length);
		int idx = 0;
		int count = 0; // number of matches, used when b.length > a.length to determine how much to overallocate(which depends on the number of matches)
		for(int i = 0; i <= length - a.length; i++)
		{
			if (buf[i] == a[0])
			{
				auto found = true;
				for(int j = 1; j < a.length; j++)	// Work backwards as more likely to be a mismatch(faster)
					if (buf[i + a.length - j] != a[a.length - j])
					{
						found = false;
						break;
					}

				if (found)
				{
					count++;
					i += a.length - 1;	// dec by 1 because for loop will inc
					if (small) for(int j = 0; j < b.length; j++) buf[idx++] = b[j];
					continue;
				}
			}

			if (small) buf[idx++] = buf[i]; 			
		}

		int extra = -count*(a.length - b.length);
		if (small) { length += extra; if (buf.length > length) buf[length] = 0; return this; }

		// We now know the count so the (a.length - b.length)*count is the amount to expand/contract buf		
		auto end = length + extra;
		if (end >= buf.length) buf.length += Inc;


		// shift the string to make space at the head, this allows us to use essentially the same algorithm as above, the difference is that we must first know the number of matches to overallocate the buffer if necessary(could do dynamically) and to shift the string ahead
		for(int i = 0; i < length; i++)	
			buf[end - 1 - i] = buf[length - i - 1];

		idx = 0;		
		for(int i = extra; i <= length - a.length + extra; i++)
		{
			if (buf[i] == a[0])
			{
				auto found = true;
				for(int j = 1; j < a.length; j++)	// Work backwards as more likely to be a mismatch(faster)
					if (buf[i + a.length - j] != a[a.length - j])
					{
						found = false;
						break;
					}

				if (found)
				{
					count--;
					i += a.length - 1;	// dec by 1 because for loop will inc
					for(int j = 0; j < b.length; j++)
						buf[idx++] = b[j];
					if (count == 0) break;
					continue;
				}
			}

			buf[idx++] = buf[i]; 			
		}

		length = length + extra;
		assert(count == 0, "fstring: Programmatic Bug - Replace String Error!");
		return this;
	}

	// Strips away the begining(dir = -1), the end(dir = 1), or both(dir = 0) that match s
	auto strip(int dir = 0, Args...)(Args s)
	{

		int start = 0, end = length;
                if (dir <= 0)
		for(int i = 0; i <= length/2 + 1; i++)
		{
			bool anyFound = false;
			foreach(ss; s)
			{			
				
				auto found = true;
				for(int j = 0; j < ss.length; j++)
				{
					if (i + j >= length) break;
					if (buf[i + j] != ss[j])
						found = false;
				}
				anyFound = anyFound | found;

				if (found) i += ss.length - 1;
			}

			if (anyFound)
				start = i+1;
			else
				break;
		}

	        if (dir >= 0)	
		for(int i = length/2; i < length; i++)
		{
			bool anyFound = false;
			foreach(ss; s)
			{			
				auto found = true;
				for(int j = 0; j < ss.length; j++)
				{
					if (i + j >= length) break;
					if (buf[i + j] != ss[j])
						found = false;
				}
				anyFound = anyFound | found;

				if (found) i += ss.length - 1;
			}

			if (!anyFound)
				end = i+1;
		}
		
		for(int i = start; i < end; i++)
			buf[i - start] = buf[i];
		length = end - start;
		return this;
	}

	auto opSlice(int start, int end) { return buf[start..end]; }
	@property int opDollar(size_t dim : 0)() { return length; }
	auto opIndex() { return buf[0..length]; }
	auto opIndex(size_t i) { return buf[i]; }
	auto opIndexAssign(S)(S v, size_t i) { buf[i] = v; }
	override string toString() { return to!string(buf[0..length]); }

}
April 09, 2017
On Sunday, 9 April 2017 at 19:38:33 UTC, Jethro wrote:
> I tried to make a string like replacement called fstring which uses buffering to avoid lots of little allocations. The problem is, that it actually causes the compiler to use 10x the memory and about 10 times slower.
>
>
> It doesn't do anything special but tries to emulate string(so it can be a drop in replacement, more or less)
>
>
>
> Why would it be so ineffective in CTFE? I know strings are built in and the compiler probably has some optimizations, but an order of magnitude more memory usage is was not expected(should be lower)? I could understand a small speed slowdown, but not 10x.
>
> Any ideas?
>
> class fstring(T = char)
> {
> 	import std.traits;
> 	T[] buf;	
> 	int length = 0;
> 	int Inc = 65535;
>
> 	this(int inc = 65535)
> 	{
> 		Inc = inc;
> 		buf.length = Inc;
> 		for(int i = 0; i < buf.length; i++)	buf[i] = 0;
> 	}
>
> 	// foreach
> 	ReturnType!(D) opApply(D)(scope D dg)
>     {
> 		ReturnType!(D) result;
> 		for (int i = 0; i < length; i++)
> 		{
> 			result = dg(buf[i]);
> 			if (result) break;
> 		}
> 		return result;
>     }
>
> 	// Append string to end
> 	auto Append(S)(S s)
> 	{
> 		while (length + s.length >= buf.length) buf.length += Inc;
> 		for (int i = 0; i < s.length; i++) buf[length + i] = cast(T)(s[i]);
> 		length += s.length;
> 	}
>
> 	// Prepends string to start
> 	auto Prepend(S)(S s)
> 	{
> 		while (length + s.length >= buf.length) buf.length += Inc;
> 		for (int i = 0; i < length; i++) buf[s.length + length - i - 1] = buf[length - i - 1];
> 		for (int i = 0; i < s.length; i++) buf[i] = cast(T)(s[i]);
> 		length += s.length;
> 	}
>
>
> 	auto opOpAssign(string op, S)(S s) if (op == "~") { Append(s); }	
> 	void opAssign(S)(S s) if (!is(S == typeof(this))) { length = 0; Append(s); }
> 	auto opBinary(string op, S)(S s) if (op == "~") { Append(s); return this; }
> 	auto opBinaryRight(string op, S)(S s) if (op == "~") { Prepend(s); return this; }
>
> 	bool opEquals(S)(S s)
> 	{		
> 		if (buf.length != s.length) return false;
> 		for(int i = 0; i < buf.length; i++)	if (buf[i] != s[i]) return false;
> 		return true;
> 	}
>
> 	// Forward replaces string a with string b in place(no memory allocations) and greedily(as they are found from left to right)
> 	auto replace(A, B)(A a, B b)
> 	{
> 		if (length < a.length || a.length == 0 || b.length == 0) return this;
> 		auto small = (a.length >= b.length);
> 		int idx = 0;
> 		int count = 0; // number of matches, used when b.length > a.length to determine how much to overallocate(which depends on the number of matches)
> 		for(int i = 0; i <= length - a.length; i++)
> 		{
> 			if (buf[i] == a[0])
> 			{
> 				auto found = true;
> 				for(int j = 1; j < a.length; j++)	// Work backwards as more likely to be a mismatch(faster)
> 					if (buf[i + a.length - j] != a[a.length - j])
> 					{
> 						found = false;
> 						break;
> 					}
>
> 				if (found)
> 				{
> 					count++;
> 					i += a.length - 1;	// dec by 1 because for loop will inc
> 					if (small) for(int j = 0; j < b.length; j++) buf[idx++] = b[j];
> 					continue;
> 				}
> 			}
>
> 			if (small) buf[idx++] = buf[i]; 			
> 		}
>
> 		int extra = -count*(a.length - b.length);
> 		if (small) { length += extra; if (buf.length > length) buf[length] = 0; return this; }
>
> 		// We now know the count so the (a.length - b.length)*count is the amount to expand/contract buf		
> 		auto end = length + extra;
> 		if (end >= buf.length) buf.length += Inc;
>
>
> 		// shift the string to make space at the head, this allows us to use essentially the same algorithm as above, the difference is that we must first know the number of matches to overallocate the buffer if necessary(could do dynamically) and to shift the string ahead
> 		for(int i = 0; i < length; i++)	
> 			buf[end - 1 - i] = buf[length - i - 1];
>
> 		idx = 0;		
> 		for(int i = extra; i <= length - a.length + extra; i++)
> 		{
> 			if (buf[i] == a[0])
> 			{
> 				auto found = true;
> 				for(int j = 1; j < a.length; j++)	// Work backwards as more likely to be a mismatch(faster)
> 					if (buf[i + a.length - j] != a[a.length - j])
> 					{
> 						found = false;
> 						break;
> 					}
>
> 				if (found)
> 				{
> 					count--;
> 					i += a.length - 1;	// dec by 1 because for loop will inc
> 					for(int j = 0; j < b.length; j++)
> 						buf[idx++] = b[j];
> 					if (count == 0) break;
> 					continue;
> 				}
> 			}
>
> 			buf[idx++] = buf[i]; 			
> 		}
>
> 		length = length + extra;
> 		assert(count == 0, "fstring: Programmatic Bug - Replace String Error!");
> 		return this;
> 	}
>
> 	// Strips away the begining(dir = -1), the end(dir = 1), or both(dir = 0) that match s
> 	auto strip(int dir = 0, Args...)(Args s)
> 	{
>
> 		int start = 0, end = length;
>                 if (dir <= 0)
> 		for(int i = 0; i <= length/2 + 1; i++)
> 		{
> 			bool anyFound = false;
> 			foreach(ss; s)
> 			{			
> 				
> 				auto found = true;
> 				for(int j = 0; j < ss.length; j++)
> 				{
> 					if (i + j >= length) break;
> 					if (buf[i + j] != ss[j])
> 						found = false;
> 				}
> 				anyFound = anyFound | found;
>
> 				if (found) i += ss.length - 1;
> 			}
>
> 			if (anyFound)
> 				start = i+1;
> 			else
> 				break;
> 		}
>
> 	        if (dir >= 0)	
> 		for(int i = length/2; i < length; i++)
> 		{
> 			bool anyFound = false;
> 			foreach(ss; s)
> 			{			
> 				auto found = true;
> 				for(int j = 0; j < ss.length; j++)
> 				{
> 					if (i + j >= length) break;
> 					if (buf[i + j] != ss[j])
> 						found = false;
> 				}
> 				anyFound = anyFound | found;
>
> 				if (found) i += ss.length - 1;
> 			}
>
> 			if (!anyFound)
> 				end = i+1;
> 		}
> 		
> 		for(int i = start; i < end; i++)
> 			buf[i - start] = buf[i];
> 		length = end - start;
> 		return this;
> 	}
>
> 	auto opSlice(int start, int end) { return buf[start..end]; }
> 	@property int opDollar(size_t dim : 0)() { return length; }
> 	auto opIndex() { return buf[0..length]; }
> 	auto opIndex(size_t i) { return buf[i]; }
> 	auto opIndexAssign(S)(S v, size_t i) { buf[i] = v; }
> 	override string toString() { return to!string(buf[0..length]); }
>
> }

The constructor is nuts.
You do not need to zero the string!

Also avoid templates if you can.
April 09, 2017
On Sunday, 9 April 2017 at 19:55:57 UTC, Stefan Koch wrote:
> On Sunday, 9 April 2017 at 19:38:33 UTC, Jethro wrote:
>> [...]
>
> The constructor is nuts.
> You do not need to zero the string!
>
> Also avoid templates if you can.

Please don't criticize banal stuff by choosing one meaningless line and calling the whole thing "nuts". It's a waste of time. I zero'ed it because it helps in debugging and for other errors.(if used in C terminated like string usage it helps to have the string actually terminate at some point)


My questions still stand though. Why is it 10x the speed and memory?!?!
April 09, 2017
On Sunday, 9 April 2017 at 20:20:55 UTC, Jethro wrote:
> On Sunday, 9 April 2017 at 19:55:57 UTC, Stefan Koch wrote:
>> On Sunday, 9 April 2017 at 19:38:33 UTC, Jethro wrote:
>>> [...]
>>
>> The constructor is nuts.
>> You do not need to zero the string!
>>
>> Also avoid templates if you can.
>
> Please don't criticize banal stuff by choosing one meaningless line and calling the whole thing "nuts". It's a waste of time. I zero'ed it because it helps in debugging and for other errors.(if used in C terminated like string usage it helps to have the string actually terminate at some point)
>
>
> My questions still stand though. Why is it 10x the speed and memory?!?!

The short answer is because the current (soon to be replaced) CTFE implementation has a number of issues.