November 02, 2007
Paul Findlay Wrote:
> That's exactly what I was doing when trying Tim Bray's "Wide Finder" thing in D.

Much slower than the psyco version still, I presume because I know Python more than D still (I have used functional-style coding in D 1.x, and with the new closures of D 2.x it may improve) and because Python AAs are quite more optimized:


import std.stream, d.func, d.time, std.string;
import std.regexp: search;

void main() {
  auto t0 = clock();
  string patt = `GET /ongoing/When/\d\d\dx/(\d\d\d\d/\d\d/\d\d/[^ .]+) `;

  int[string] count;
  foreach(string line; new BufferedFile("o1000k.ap"))
    if (line.find("GET /ongoing/When") != -1)
      if (auto m = search(line, patt))
          count[m.match(1)]++;

  foreach(key; sortedAA(count, &Vgetter!(string, int))[0 .. 10]) {
    // writefln("%40s = %s", key, count[key]);
  }

  writefln(clock()-t0, " s");

  foreach(key; sortedAA(count, &Vgetter!(string, int))[$-10 .. $])
    writefln("%40s = %s", key, count[key]);
}


Psyco:

import sys, time, re, collections, psyco
timer = time.clock if sys.platform == "win32" else time.time

def main(filenamein):
  t0 = timer()
  search = re.compile(r"GET /ongoing/When/\d\d\dx/(\d\d\d\d/\d\d/\d\d/[^ .]+) ").search

  count = collections.defaultdict(int)
  for line in open(filenamein, "rb"):
    if "GET /ongoing/When" in line:
      match = search(line)
      if match:
        count[match.group(1)] += 1

  for key in sorted(count, key=count.get)[:10]:
    pass # print "%40s = %s" % (key, count[key])

  print round(timer() - t0, 2), "s"

  # sanity check
  for key in sorted(count, key=count.get)[-10:]:
    print "%40s = %s" % (key, count[key])

psyco.full()
main("o1000k.ap")

Bear hugs,
bearophile