Jump to page: 1 2
Thread overview
[Performance] shootout.binarytrees when implemented with gc is 10x slower than C# on .NET 2.0
Nov 11, 2006
Andrey Khropov
Nov 12, 2006
Kyle Furlong
Nov 12, 2006
Andrey Khropov
Nov 12, 2006
Kyle Furlong
Nov 12, 2006
Craig Black
Nov 12, 2006
Craig Black
Nov 12, 2006
Chris Miller
Nov 12, 2006
Craig Black
Nov 13, 2006
David Medlock
Nov 12, 2006
Dave
Nov 12, 2006
Andrey Khropov
Nov 12, 2006
Dave
November 11, 2006
Here is the D code for shootout.binarytrees with GC. Absolutely equal to the C# code (I also embedded a timer):

------------------------------------------------------------------------
import std.c.stdlib, std.stdio, std.perf, std.gc;

int main(char[][] args)
{
    int N = args.length > 1 ? atoi(args[1]) : 1;

    auto t = new HighPerformanceCounter();
    t.start();

    int minDepth = 4;
    int maxDepth = (minDepth + 2) > N ? minDepth + 2 : N;
    int stretchDepth = maxDepth + 1;

    TreeNode stretchTree = TreeNode.BottomUpTree(0, stretchDepth);
    writefln("stretch tree of depth ",stretchDepth,"\t check:
",stretchTree.ItemCheck);

    TreeNode longLivedTree = TreeNode.BottomUpTree(0, maxDepth);

    int depth;
    for(depth = minDepth; depth <= maxDepth; depth += 2)
    {
        int check, iterations = 1 << (maxDepth - depth + minDepth);

        for(int i = 1; i <= iterations; i++)
        {
            auto tempTree = TreeNode.BottomUpTree(i, depth);
            check += tempTree.ItemCheck;
            //delete tempTree;

            tempTree = TreeNode.BottomUpTree(-i, depth);
            check += tempTree.ItemCheck;
            //delete tempTree;
        }

        writefln(iterations * 2,"\t trees of depth ",depth,"\t check: ",check);
    }

    writefln("long lived tree of depth ",maxDepth,"\t check:
",longLivedTree.ItemCheck);

    t.stop();

    writefln(t.milliseconds()," ms elapsed.");

    return 0;
}

class TreeNode
{
public:
    this(int item, TreeNode left = null, TreeNode right = null)
    {
      this.item  = item;
      this.left  = left;
      this.right = right;
    }

    static TreeNode BottomUpTree(int item, int depth)
    {
        if(depth > 0)
            return new TreeNode(item
                                ,BottomUpTree(2 * item - 1, depth - 1)
                                ,BottomUpTree(2 * item, depth - 1));
        return new TreeNode(item);
    }

    int ItemCheck()
    {
        if(left)
            return item + left.ItemCheck() - right.ItemCheck();
        return item;
    }
private:
    TreeNode            left, right;
    int                 item;
}
------------------------------------------------------------------------
Here's the C# code (also with embedded timer):
------------------------------------------------------------------------
using System;
using System.Diagnostics;

class BinaryTrees
{
   const int minDepth = 4;

   public static void Main(String[] args)
   {
      int n = 0;
      if (args.Length > 0) n = Int32.Parse(args[0]);

      Stopwatch t = new Stopwatch();
      t.Start();

      int maxDepth = Math.Max(minDepth + 2, n);
      int stretchDepth = maxDepth + 1;

      int check = (TreeNode.bottomUpTree(0,stretchDepth)).itemCheck();
      Console.WriteLine("stretch tree of depth {0}\t check: {1}", stretchDepth,
check);

      TreeNode longLivedTree = TreeNode.bottomUpTree(0,maxDepth);

      for (int depth=minDepth; depth<=maxDepth; depth+=2){
         int iterations = 1 << (maxDepth - depth + minDepth);

         check = 0;
         for (int i=1; i<=iterations; i++)
         {
            check += (TreeNode.bottomUpTree(i,depth)).itemCheck();
            check += (TreeNode.bottomUpTree(-i,depth)).itemCheck();
         }

         Console.WriteLine("{0}\t trees of depth {1}\t check: {2}",
            iterations*2, depth, check);
      }

      Console.WriteLine("long lived tree of depth {0}\t check: {1}",
         maxDepth, longLivedTree.itemCheck());

      t.Stop();
      Console.WriteLine(t.ElapsedMilliseconds + "ms elapsed.");
   }


   class TreeNode
   {
      private TreeNode left, right;
      private int item;

      TreeNode(int item){
         this.item = item;
      }

      TreeNode(int item,TreeNode left, TreeNode right){
         this.left = left;
         this.right = right;
         this.item = item;
      }

      internal static TreeNode bottomUpTree(int item, int depth){
         if (depth>0){
            return new TreeNode(
                 item,
                 bottomUpTree(2*item-1, depth-1),
                 bottomUpTree(2*item, depth-1)
               );
         }
         else {
            return new TreeNode(item);
         }
      }

      internal int itemCheck(){
         // if necessary deallocate here
         if (left==null) return item;
         else return item + left.itemCheck() - right.itemCheck();
      }
   }
}
------------------------------------------------------------------------

So the results are (with cmdline parameter = 16):

C# on .NET 2.0	    				 :  1.902 sec
DMD 0.173 - malloc(original version)  :  5.189 sec
C# on Mono 1.1.18				 :  6.630 sec
DMD 0.173 - full GC 				 : 19.720 sec

-- 
AKhropov
November 12, 2006
Andrey Khropov wrote:
> Here is the D code for shootout.binarytrees with GC. Absolutely equal to the C#
> code (I also embedded a timer):
> 
> ------------------------------------------------------------------------
> import std.c.stdlib, std.stdio, std.perf, std.gc;
> 
> int main(char[][] args)
> {
>     int N = args.length > 1 ? atoi(args[1]) : 1;
>         auto t = new HighPerformanceCounter();
>     t.start();
> 
>     int minDepth = 4;
>     int maxDepth = (minDepth + 2) > N ? minDepth + 2 : N;
>     int stretchDepth = maxDepth + 1;
> 
>     TreeNode stretchTree = TreeNode.BottomUpTree(0, stretchDepth);
>     writefln("stretch tree of depth ",stretchDepth,"\t check:
> ",stretchTree.ItemCheck);
> 
>     TreeNode longLivedTree = TreeNode.BottomUpTree(0, maxDepth);
> 
>     int depth;
>     for(depth = minDepth; depth <= maxDepth; depth += 2)
>     {
>         int check, iterations = 1 << (maxDepth - depth + minDepth);
> 
>         for(int i = 1; i <= iterations; i++)
>         {
>             auto tempTree = TreeNode.BottomUpTree(i, depth);
>             check += tempTree.ItemCheck;
>             //delete tempTree;
> 
>             tempTree = TreeNode.BottomUpTree(-i, depth);
>             check += tempTree.ItemCheck;
>             //delete tempTree;
>         }
> 
>         writefln(iterations * 2,"\t trees of depth ",depth,"\t check: ",check);
>     }
> 
>     writefln("long lived tree of depth ",maxDepth,"\t check:
> ",longLivedTree.ItemCheck);
>         t.stop();
>         writefln(t.milliseconds()," ms elapsed.");
> 
>     return 0;
> }
> 
> class TreeNode
> {
> public:
>     this(int item, TreeNode left = null, TreeNode right = null)
>     {
>       this.item  = item;
>       this.left  = left;
>       this.right = right;
>     }
> 
>     static TreeNode BottomUpTree(int item, int depth)
>     {
>         if(depth > 0)
>             return new TreeNode(item
>                                 ,BottomUpTree(2 * item - 1, depth - 1)
>                                 ,BottomUpTree(2 * item, depth - 1));
>         return new TreeNode(item);
>     }
> 
>     int ItemCheck()
>     {
>         if(left)
>             return item + left.ItemCheck() - right.ItemCheck();
>         return item;
>     }
> private:
>     TreeNode            left, right;
>     int                 item;
> }
> ------------------------------------------------------------------------
> Here's the C# code (also with embedded timer):
> ------------------------------------------------------------------------
> using System;
> using System.Diagnostics;
> 
> class BinaryTrees
> {
>    const int minDepth = 4;
> 
>    public static void Main(String[] args)    {              int n = 0;
>       if (args.Length > 0) n = Int32.Parse(args[0]);
>             Stopwatch t = new Stopwatch();
>       t.Start();            int maxDepth = Math.Max(minDepth + 2, n);
>       int stretchDepth = maxDepth + 1;
> 
>       int check = (TreeNode.bottomUpTree(0,stretchDepth)).itemCheck();
>       Console.WriteLine("stretch tree of depth {0}\t check: {1}", stretchDepth,
> check);
> 
>       TreeNode longLivedTree = TreeNode.bottomUpTree(0,maxDepth);
> 
>       for (int depth=minDepth; depth<=maxDepth; depth+=2){
>          int iterations = 1 << (maxDepth - depth + minDepth);
> 
>          check = 0;
>          for (int i=1; i<=iterations; i++)
>          {
>             check += (TreeNode.bottomUpTree(i,depth)).itemCheck();              check += (TreeNode.bottomUpTree(-i,depth)).itemCheck();          }
> 
>          Console.WriteLine("{0}\t trees of depth {1}\t check: {2}",             iterations*2, depth, check);
>       }
>        Console.WriteLine("long lived tree of depth {0}\t check: {1}",          maxDepth, longLivedTree.itemCheck());
>                t.Stop();
>       Console.WriteLine(t.ElapsedMilliseconds + "ms elapsed.");         }
> 
> 
>    class TreeNode    {           private TreeNode left, right;
>       private int item;
> 
>       TreeNode(int item){
>          this.item = item;
>       }
>             TreeNode(int item,TreeNode left, TreeNode right){
>          this.left = left;          this.right = right;
>          this.item = item;
>       }
> 
>       internal static TreeNode bottomUpTree(int item, int depth){
>          if (depth>0){
>             return new TreeNode(
>                  item,
>                  bottomUpTree(2*item-1, depth-1),
>                  bottomUpTree(2*item, depth-1)
>                );
>          }
>          else {
>             return new TreeNode(item);
>          }
>       }
> 
>       internal int itemCheck(){
>          // if necessary deallocate here          if (left==null) return item;
>          else return item + left.itemCheck() - right.itemCheck();       }
>    }
> }
> ------------------------------------------------------------------------
> 
> So the results are (with cmdline parameter = 16):
> 
> C# on .NET 2.0	    				 :  1.902 sec
> DMD 0.173 - malloc(original version)  :  5.189 sec
> C# on Mono 1.1.18				 :  6.630 sec
> DMD 0.173 - full GC 				 : 19.720 sec
> 

Andrey, its no secret that the D GC is not implemented to be the fastest around. C# I believe uses at the least a generational collector, which I think is also copying.

D currently uses a conservative mark and sweep collector. Rock solid, but slow.
November 12, 2006
That really sucks.

Eventually, I know that it is Walter's intention to  optimize D's GC.  In the mean time, when performance is critical, we can do heap allocation in the traditional manner using malloc and free.  However, IMO, there should be a more simple way to do this than overloading new and delete.

-Craig


November 12, 2006
> C# on .NET 2.0     :  1.902 sec
> DMD 0.173 - malloc(original version)  :  5.189 sec
> C# on Mono 1.1.18 :  6.630 sec
> DMD 0.173 - full GC : 19.720 sec

I see that C# GC is faster than DMD's malloc.  How is this possible?  Did Microsoft figure out how to make the GC really fast, or is D's malloc slow? I wonder how D's malloc compares to nedmalloc.  It is a very fast cross-platform allocator.  If it is the case that D's malloc is slow, why don't we adopt a public domain allocator that is real fast?  It would be very easy to do.

-Craig


November 12, 2006
Andrey Khropov wrote:
> So the results are (with cmdline parameter = 16):
> 
> C# on .NET 2.0	    				 :  1.902 sec
> DMD 0.173 - malloc(original version)  :  5.189 sec
> C# on Mono 1.1.18				 :  6.630 sec
> DMD 0.173 - full GC 				 : 19.720 sec
> 

How about this (diff of original version)?

--- binarytrees.d       2006-06-02 09:34:38.000000000 -0500
+++ binarytrees_list.d  2006-11-11 21:30:23.000000000 -0600
@@ -41,7 +41,6 @@
     }

     writefln("long lived tree of depth ",maxDepth,"\t check: ",longLivedTree.ItemCheck);
-    TreeNode.DeleteTree(longLivedTree);

     return 0;
 }
@@ -80,6 +79,9 @@
     TreeNode*           left, right;
     int                 item;

+    static TreeNode*    list;
+    TreeNode*           next;
+
     static TreeNode* opCall(int item, TreeNode* left = null, TreeNode* right = null)
     {
         TreeNode* t = new TreeNode;
@@ -91,11 +93,26 @@

     new(uint sz)
     {
-        return malloc(sz);
+        TreeNode* tn;
+        if(list)
+        {
+            tn = list;
+            list = tn.next;
+        }
+        else
+        {
+            tn = cast(TreeNode*)malloc(sz);
+        }
+        return tn;
     }

     delete(void* p)
     {
-        free(p);
+        if(p)
+        {
+            TreeNode* tn = cast(TreeNode*)p;
+            tn.next = list;
+            list = tn;
+        }
     }
 }
November 12, 2006
On Sat, 11 Nov 2006 23:01:50 -0500, Craig Black <cblack@ara.com> wrote:

>> C# on .NET 2.0     :  1.902 sec
>> DMD 0.173 - malloc(original version)  :  5.189 sec
>> C# on Mono 1.1.18 :  6.630 sec
>> DMD 0.173 - full GC : 19.720 sec
>
> I see that C# GC is faster than DMD's malloc.  How is this possible?  Did
> Microsoft figure out how to make the GC really fast, or is D's malloc slow?

The type of GC used by .NET is just about as fast as stack allocation; a simple pointer is advanced. It's an upside to relocating data (copying collector).
November 12, 2006
Kyle Furlong wrote:

> Andrey, its no secret that the D GC is not implemented to be the fastest around. C# I believe uses at the least a generational collector, which I think is also copying.
> 
> D currently uses a conservative mark and sweep collector. Rock solid, but slow.

My point here is that if we want to successfully compete with .NET we should adapt a better (copying,generational) collector.

Of course it poses some problems (pointers to GC data for example), but if we have to go back to new/delete/malloc/free to get fast code (and it's actually very likely that C#'s GC with the fast allocation will be faster anyway (as in this case) unless we employ some special trickery like allocation lists) it spoils the whole idea of GC as a primary memory management mechanism for D.

-- 
AKhropov
November 12, 2006
Dave wrote:

Much better :) (you should post it to shootout maybe) :

1) DMD 0.173 - allocation lists             :  1.605 sec
2) C# on .NET 2.0                           :  1.902 sec
3) DMD 0.173 - malloc(original version)     :  5.189 sec
4) C# on Mono 1.1.18                        :  6.630 sec
5) DMD 0.173 - full GC                      : 19.720 sec

But anyway I'd like D's GC to work reasonably well out of the box.

-- 
AKhropov
November 12, 2006
Andrey Khropov wrote:
> Kyle Furlong wrote:
> 
>> Andrey, its no secret that the D GC is not implemented to be the fastest
>> around. C# I believe uses at the least a generational collector, which I
>> think is also copying.
>>
>> D currently uses a conservative mark and sweep collector. Rock solid, but
>> slow.
> 
> My point here is that if we want to successfully compete with .NET we should
> adapt a better (copying,generational) collector.
> 
> Of course it poses some problems (pointers to GC data for example), but if we
> have to go back to new/delete/malloc/free to get fast code (and it's actually
> very likely that C#'s GC with the fast allocation will be faster anyway (as in
> this case) unless we employ some special trickery like allocation lists) it
> spoils the whole idea of GC as a primary memory management mechanism for D.
> 

One step ahead of you... ;)
November 12, 2006
Andrey Khropov wrote:
> Dave wrote:
> 
> Much better :) (you should post it to shootout maybe) :
> 

It doesn't qualify <g>

> 1) DMD 0.173 - allocation lists             :  1.605 sec
> 2) C# on .NET 2.0                           :  1.902 sec
> 3) DMD 0.173 - malloc(original version)     :  5.189 sec
> 4) C# on Mono 1.1.18                        :  6.630 sec
> 5) DMD 0.173 - full GC                      : 19.720 sec
> 
> But anyway I'd like D's GC to work reasonably well out of the box.
> 

Ditto.
« First   ‹ Prev
1 2