| |
| Posted by Andy Valencia | PermalinkReply |
|
Andy Valencia
| I wrote a "count newlines" based on mapped files. It used about twice the CPU of the version which just read 1 meg at a time. I thought something was amiss (needless slice indirection or something), so I wrote the code in C. It had the same CPU usage as the D version. So...mapped files, not so much. Not D's fault. And writing it in C made me realize how much easier it is to code in D!
The D version:
import std.stdio : writeln;
import std.mmfile : MmFile;
const uint CHUNKSZ = 65536;
size_t
countnl(ref shared char[] data)
{
size_t res = 0;
foreach (c; data) {
if (c == '\n') {
res += 1;
}
}
return res;
}
void
usage(in string progname)
{
import core.stdc.stdlib : exit;
import std.stdio : stderr;
stderr.writeln("Usage is: ", progname, " %s <file> ...");
exit(1);
}
public:
void
main(string[] argv)
{
if (argv.length < 2) {
usage(argv[0]);
}
foreach(mn; argv[1 .. $]) {
auto mf = new MmFile(mn);
auto data = cast(shared char[])mf.opSlice();
size_t res;
res = countnl(data);
writeln(mn, ": ", res);
}
}
And the C one (no performance gain over D):
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
static unsigned long
countnl(int fd, char *nm)
{
char *buf, *p;
struct stat st;
unsigned int cnt;
unsigned long res;
if (fstat(fd, &st) < 0) {
perror(nm);
return(0);
}
cnt = st.st_size;
buf = mmap(0, cnt, PROT_READ, MAP_SHARED, fd, 0);
if (buf == MAP_FAILED) {
perror(nm);
return(0);
}
res = 0L;
for (p = buf; cnt; cnt -= 1) {
if (*p++ == '\n') {
res += 1L;
}
}
munmap(buf, st.st_size);
return(res);
}
int
main(int argc, char **argv)
{
int x;
for (x = 1; x < argc; ++x) {
unsigned long res;
char *nm = argv[x];
int fd = open(nm, O_RDONLY);
if (fd < 0) {
perror(nm);
continue;
}
res = countnl(fd, nm);
close(fd);
printf("%s: %uld\n", nm, res);
}
}
|