2016-02-29 59 views
2

繼續我的F#性能測試。對於一些更多的背景看這裏:F#NativePtr.stackalloc比C#更快 - stackalloc - 包含反編譯代碼

f# NativePtr.stackalloc in Struct Constructor

F# NativePtr.stackalloc Unexpected Stack Overflow

在F#的工作現在,我已經得到了堆陣列。但是,由於某些原因,等效的C#大約快50倍。我已經在下面包含了ILSpy反編譯版本,看起來只有一行是真的不同(在stackAlloc中)。

這是怎麼回事?未經檢驗的算術是否真的爲這種巨大差異負責?不知道我怎麼能測試這?

https://msdn.microsoft.com/en-us/library/a569z7k8.aspx

F#代碼

#nowarn "9" 

open Microsoft.FSharp.NativeInterop 
open System 
open System.Diagnostics  
open System.Runtime.CompilerServices   

[<MethodImpl(MethodImplOptions.NoInlining)>] 
let stackAlloc x = 
    let mutable ints:nativeptr<byte> = NativePtr.stackalloc x 
    () 

[<EntryPoint>] 
let main argv = 
    printfn "%A" argv 

    let size = 8192    
    let reps = 10000 

    stackAlloc size // JIT 
    let clock = Stopwatch() 
    clock.Start() 
    for i = 1 to reps do    
     stackAlloc size 
    clock.Stop() 

    let elapsed = clock.Elapsed.TotalMilliseconds 
    let description = "F# NativePtr.stackalloc" 
    Console.WriteLine("{0} ({1} bytes, {2} reps): {3:#,##0.####}ms", description, size, reps, elapsed) 

    Console.ReadKey() |> ignore 
    0 

C#代碼

using System; 
using System.Diagnostics; 

namespace CSharpLanguageFeatures 
{ 
    class CSharpStackArray 
    { 
     static void Main(string[] args) 
     { 
      int size = 8192; 
      int reps = 10000; 

      stackAlloc(size); // JIT 
      Stopwatch clock = new Stopwatch(); 
      clock.Start(); 
      for (int i = 0; i < reps; i++) 
      { 
       stackAlloc(size); 
      } 
      clock.Stop(); 

      string elapsed = clock.Elapsed.TotalMilliseconds.ToString("#,##0.####"); 
      string description = "C# stackalloc"; 
      Console.WriteLine("{0} ({1} bytes, {2} reps): {3:#,##0.####}ms", description, size, reps, elapsed); 
      Console.ReadKey(); 
     } 

     public unsafe static void stackAlloc(int arraySize) 
     { 
      byte* pArr = stackalloc byte[arraySize]; 
     } 
    } 
} 

F#版本反編譯

using Microsoft.FSharp.Core; 
using System; 
using System.Diagnostics; 
using System.IO; 
using System.Runtime.CompilerServices; 

[CompilationMapping(SourceConstructFlags.Module)] 
public static class FSharpStackArray 
{ 
    [MethodImpl(MethodImplOptions.NoInlining)] 
    public unsafe static void stackAlloc(int x) 
    { 
     IntPtr ints = stackalloc byte[x * sizeof(byte)]; 
    } 

    [EntryPoint] 
    public static int main(string[] argv) 
    { 
     PrintfFormat<FSharpFunc<string[], Unit>, TextWriter, Unit, Unit> format = new PrintfFormat<FSharpFunc<string[], Unit>, TextWriter, Unit, Unit, string[]>("%A"); 
     PrintfModule.PrintFormatLineToTextWriter<FSharpFunc<string[], Unit>>(Console.Out, format).Invoke(argv); 
     FSharpStackArray.stackAlloc(8192); 
     Stopwatch clock = new Stopwatch(); 
     clock.Start(); 
     for (int i = 1; i < 10001; i++) 
     { 
      FSharpStackArray.stackAlloc(8192); 
     } 
     clock.Stop(); 
     double elapsed = clock.Elapsed.TotalMilliseconds; 
     Console.WriteLine("{0} ({1} bytes, {2} reps): {3:#,##0.####}ms", "F# NativePtr.stackalloc", 8192, 10000, elapsed); 
     ConsoleKeyInfo consoleKeyInfo = Console.ReadKey(); 
     return 0; 
    } 
} 

C#版反編譯

using System; 
using System.Diagnostics; 

namespace CSharpLanguageFeatures 
{ 
    internal class CSharpStackArray 
    { 
     private static void Main(string[] args) 
     { 
      int size = 8192; 
      int reps = 10000; 
      CSharpStackArray.stackAlloc(size); 
      Stopwatch clock = new Stopwatch(); 
      clock.Start(); 
      for (int i = 0; i < reps; i++) 
      { 
       CSharpStackArray.stackAlloc(size); 
      } 
      clock.Stop(); 
      string elapsed = clock.Elapsed.TotalMilliseconds.ToString("#,##0.####"); 
      string description = "C# stackalloc"; 
      Console.WriteLine("{0} ({1} bytes, {2} reps): {3:#,##0.####}ms", new object[] 
      { 
       description, 
       size, 
       reps, 
       elapsed 
      }); 
      Console.ReadKey(); 
     } 

     public unsafe static void stackAlloc(int arraySize) 
     { 
      IntPtr arg_06_0 = stackalloc byte[checked(unchecked((UIntPtr)arraySize) * 1)]; 
     } 
    } 
} 

F#版IL - 字節分配

.method public static 
    void stackAlloc (
     int32 x 
    ) cil managed noinlining 
{ 
    // Method begins at RVA 0x2050 
    // Code size 13 (0xd) 
    .maxstack 4 
    .locals init (
     [0] native int ints 
    ) 

    IL_0000: nop 
    IL_0001: ldarg.0 
    IL_0002: sizeof [mscorlib]System.Byte 
    IL_0008: mul 
    IL_0009: localloc 
    IL_000b: stloc.0 
    IL_000c: ret 
} // end of method FSharpStackArray::stackAlloc 

C#版IL - 字節分配

.method public hidebysig static 
    void stackAlloc (
     int32 arraySize 
    ) cil managed 
{ 
    // Method begins at RVA 0x2094 
    // Code size 8 (0x8) 
    .maxstack 8 

    IL_0000: ldarg.0 
    IL_0001: conv.u 
    IL_0002: ldc.i4.1 
    IL_0003: mul.ovf.un 
    IL_0004: localloc 
    IL_0006: pop 
    IL_0007: ret 
} // end of method CSharpStackArray::stackAlloc 

更新F#IL - IntPtr的分配

.method public static 
    void stackAlloc (
     int32 x 
    ) cil managed noinlining 
{ 
    // Method begins at RVA 0x2050 
    // Code size 13 (0xd) 
    .maxstack 4 
    .locals init (
     [0] native int ints 
    ) 

    IL_0000: nop 
    IL_0001: ldarg.0 
    IL_0002: sizeof [mscorlib]System.IntPtr 
    IL_0008: mul 
    IL_0009: localloc 
    IL_000b: stloc.0 
    IL_000c: ret 
} // end of method FSharpStackArray::stackAlloc 

更新C#IL - IntPtr的分配

.method public hidebysig static 
    void stackAlloc (
     int32 arraySize 
    ) cil managed 
{ 
    // Method begins at RVA 0x2415 
    // Code size 13 (0xd) 
    .maxstack 8 

    IL_0000: ldarg.0 
    IL_0001: conv.u 
    IL_0002: sizeof [mscorlib]System.IntPtr 
    IL_0008: mul.ovf.un 
    IL_0009: localloc 
    IL_000b: pop 
    IL_000c: ret 
} // end of method CSharpStackArray::stackAlloc 
+2

你可以顯示所有的'stackAlloc'方法的實際IL? –

+0

嗯,可能是[mscorlib] System.Byte或nop的大小?循環中還有一個缺陷? – Researcher

+0

'sizeof'肯定起作用,但我覺得'mul'更重要。 –

回答

2

感謝大家的這種幫助。

答案是C#編譯器沒有將指針存儲爲本地。這是因爲分配的內存從不需要。缺少「sizeof」和不同的「mul」給了C#更小的優勢。

F#彙編 - 差異是評論

.method public static 
    void stackAlloc (
     int32 x 
    ) cil managed noinlining 
{ 
    // Method begins at RVA 0x2050 
    // Code size 13 (0xd) 
    .maxstack 4 
    .locals init (//***** Not in C# Version *****// 
     [0] native int ints 
    ) 

    IL_0000: nop 
    IL_0001: ldarg.0 
    IL_0002: sizeof [mscorlib]System.Byte //***** C# just uses "1" *****// 
    IL_0008: mul //***** C# uses "mul.ovf.un" *****// 
    IL_0009: localloc 
    IL_000b: stloc.0 //***** Not in C# Version *****// 
    IL_000c: ret 
} // end of method FSharpStackArray::stackAlloc 

C#彙編 - 差異是評論

.method public hidebysig static 
    void stackAlloc (
     int32 arraySize 
    ) cil managed 
{ 
    // Method begins at RVA 0x2094 
    // Code size 8 (0x8) 
    .maxstack 8 

    IL_0000: ldarg.0 
    IL_0001: conv.u 
    IL_0002: ldc.i4.1 //***** F# uses sizeof [mscorlib]System.Byte *****// 
    IL_0003: mul.ovf.un //***** F# uses "mul" *****// 
    IL_0004: localloc 
    IL_0006: pop 
    IL_0007: ret 
} // end of method CSharpStackArray::stackAlloc 

這項工作教會了我幾件事情:

  1. 編譯器表演一下優化。顯然,不同語言中相同的高級代碼可能會導致完全不同的機器指令集。
  2. 在對dotnet語言進行基準測試時,您可以閱讀中間程序集以瞭解發生了什麼。爲此使用ILSpy。
  3. 您可以使用ilasm.exe修改和編譯中間程序集。
  4. C#編譯器在刪除不必要的代碼方面做得更好。一旦你在分配的內存中設置了每個字節,性能就會變得與最初預期的非常相似。

決賽F#代碼

#nowarn "9" 

open Microsoft.FSharp.NativeInterop 
open System 
open System.Diagnostics  
open System.Runtime.CompilerServices   

[<MethodImpl(MethodImplOptions.NoInlining)>] 
let stackAlloc x = 
    let mutable bytes:nativeptr<byte> = NativePtr.stackalloc x 
    for i = 0 to (x - 1) do 
     NativePtr.set bytes i (byte i) 
    () 

[<EntryPoint>] 
let main argv = 
    printfn "%A" argv 

    let size = 8192    
    let reps = 10000 

    stackAlloc size // JIT 
    let clock = Stopwatch() 
    clock.Start() 
    for i = 1 to reps do    
     stackAlloc size 
    clock.Stop() 

    let elapsed = clock.Elapsed.TotalMilliseconds 
    let description = "F# NativePtr.stackalloc" 
    Console.WriteLine("{0} ({1} bytes, {2} reps): {3:#,##0.####}ms", description, size, reps, elapsed) 

    Console.ReadKey() |> ignore 
    0 

最後的C#代碼

using System; 
using System.Diagnostics; 

namespace CSharpStackArray 
{ 
    class Program 
    { 
     static void Main(string[] args) 
     { 
      int size = 8192; 
      int reps = 10000; 

      stackAlloc(size); // JIT 
      Stopwatch clock = new Stopwatch(); 
      clock.Start(); 
      for (int i = 0; i < reps; i++) 
      { 
       stackAlloc(size); 
      } 
      clock.Stop(); 

      string elapsed = clock.Elapsed.TotalMilliseconds.ToString("#,##0.####"); 
      string description = "C# stackalloc"; 
      Console.WriteLine("{0} ({1} bytes, {2} reps): {3:#,##0.####}ms", description, size, reps, elapsed); 
      Console.ReadKey(); 
     } 

     public unsafe static void stackAlloc(int arraySize) 
     { 
      byte* pArr = stackalloc byte[arraySize]; 
      for (int i = 0; i < arraySize; i++) 
      { 
       pArr[i] = (byte)i; 
      } 
     } 
    } 
}