import std.stdio;
import std.array: appender, array;
import std.algorithm : findSplit, splitter, joiner, canFind, map;
import std.typecons : tuple, Tuple;
import std.conv : to;
import std.range : dropOne, dropExactly, takeExactly, chain;

alias push_type = Tuple!(int, char[], int, bool, bool);
alias npush_type = Tuple!(char[], int, char[]);

void read_log(string filename) {
    File file = File(filename, "r");
    auto npushed = appender!(npush_type[])();
    auto pushed = appender!(push_type[])();
    foreach (line; file.byLine) {
        if (auto findResult = line.findSplit(" SYNC_PUSH: ")) {
            auto rel = findResult[2];
            auto att = rel.splitter(" ");

            auto firstVal = att.front.to!int;
            auto secondVal = att.dropExactly(2).takeExactly(2).joiner(" ").to!(char[]).dup;
            auto thirdVal = att.dropExactly(5).front.to!int;
            auto fourthVal = findResult[2].canFind("PA-SOC_POP");
            auto fifthVal = findResult[2].canFind("CU-SOC_POP");
            pushed.put(tuple(firstVal, secondVal, thirdVal, fourthVal, fifthVal));
            continue;
        }
        if (auto findResult = line.findSplit(" SOC_NOT_PUSHED: ")) {
            auto leftPart = findResult[0].splitter(" ").dropExactly(2)
                                                       .takeExactly(2);
            auto rightPart = findResult[2].splitter(" ").takeExactly(2);
            auto firstVal = chain(leftPart.front, leftPart.dropOne.front).to!(char[]);
            auto thirdVal = rightPart.front.to!(char[]).dup;
            auto secondVal = rightPart.dropOne.front.to!int;
            npushed.put(tuple(firstVal, secondVal, thirdVal));
            continue;
        }
    }
    // Doing more stuff with these arrays later. For now, just printing lengths
    writeln(npushed.data.length);
    writeln(pushed.data.length);
}

On Fri, Jun 9, 2017 at 12:01 PM, uncorroded via Digitalmars-d-learn <digitalmars-d-learn@puremagic.com> wrote:
On Friday, 9 June 2017 at 08:58:38 UTC, Daniel Kozak wrote:

There is no difference in speed because you do not process your data
lazily, so you make many allocations, so this is main reason why it is so slow. I could improve that, but I will need to see some example data, which you are trying to parse.

But some rules,
1.) instead of ~= you shoud use std.array.appender
2.) instead of std.string.split you could use std.algorithm.splitter or
std.algorithm.findSplit
3.) instead of indexOf I would use std.algorithm.startsWith (in case it is
on the begining of the line)

Thanks everyone for the tips.
The log file itself is 52 MB but I have added a sample in pastebin ( https://pastebin.com/vj778PK4 ). Will try the suggestions today evening.