蠻力旅行推銷員：爲什麼Haskell比C慢得多？

我最初編寫了一個功能強力搜索（城市的ADT表示，城市的元組作爲距離數組的索引，懶洋洋地用Data.List.permutations產生排列並以嚴格的左邊摺疊消耗它們），但那是很痛苦的緩慢。我設法通過以更緊迫的方式重寫它，並使用Heap算法進行排列，將其速度提高了3倍，但直接將結果轉換爲（寫得不好）C的速度更快了10倍！到底是怎麼回事？蠻力旅行推銷員：爲什麼Haskell比C慢得多？

{-# LANGUAGE TupleSections, BangPatterns, ForeignFunctionInterface #-} 

import Data.Array.Unboxed 
import Data.Array.Base (unsafeAt) 
import qualified Data.Vector.Unboxed.Mutable as M 
import qualified Data.Vector.Unboxed as V 
import Data.Word (Word) 
import Data.IORef 
import GHC.Arr (Ix(..)) 
import Control.Arrow 
import Control.Monad 

import Data.Array.Storable 
import Foreign.Ptr (Ptr) 
import Foreign.C (CInt) 

import Criterion.Main 

distances :: UArray Int Word 
distances = listArray (0, 255) [0,629,1023,1470,1276,915,894,1199,760,795,676,903,1394,350,647,922,629, 
           0,556,1312,674,1097,317,946,784,504,1228,276,1254,878,712,1236,1023,556, 
           0,861,408,977,280,480,1340,288,1424,533,822,1346,1267,1198,1470,1312,861, 
           0,1196,751,1125,382,2052,809,1476,1381,78,1817,1957,980,1276,674,408,1196, 
           0,1384,383,837,1370,676,1777,469,1173,1551,1327,1600,915,1097,977,751,1384, 
           0,1103,722,1641,719,730,1303,676,1223,1530,249,894,317,280,1125,383,1103, 
           0,743,1083,392,1409,256,1079,1178,1019,1293,1199,946,480,382,837,722,743, 
           0,1708,454,1366,999,343,1549,1618,972,760,784,1340,2052,1370,1641,1083,1708, 
           0,1253,1392,901,1986,640,113,1679,795,504,288,809,676,719,392,454,1253, 
           0,1138,623,749,1136,1164,925,676,1228,1424,1476,1777,730,1409,1366,1392,1138, 
           0,1502,1399,786,1283,551,903,276,533,1381,469,1303,256,999,901,623,1502, 
           0,1335,1127,857,1469,1394,1254,822,78,1173,676,1079,343,1986,749,1399,1335, 
           0,1741,1889,908,350,878,1346,1817,1551,1223,1178,1549,640,1136,786,1127,1741, 
           0,543,1182,647,712,1267,1957,1327,1530,1019,1618,113,1164,1283,857,1889,543, 
           0,1566,922,1236,1198,980,1600,249,1293,972,1679,925,551,1469,908,1182,1566,0] 

tsp_mut :: [Int] -> IO ([Int], Word) 
tsp_mut [] = error "empty list" 
tsp_mut cs = do 
    route <- V.thaw . V.fromList $ cs 
    let l = length cs -1 
     dis a b= unsafeAt distances $ 16*a + b 
     f (prev, !acc) c = (c, acc + dis prev c) 
     len = V.unsafeFreeze route >>= return . snd . (\v -> V.foldl' f (V.unsafeLast v, 0) v) 
    ref <- newIORef . (cs,) =<< len 
    let permut 0 = do 
       !l <- len 
       (_, ol) <- readIORef ref 
       when (l < ol) (writeIORef ref . (,l) . V.toList =<< V.freeze route) 
     permut n = let op = if odd n then const 0 else id 
        in forM_ [0..n] (\ i -> permut (n-1) >> M.unsafeSwap route (op i) n)    
    permut l >> readIORef ref 


foreign import ccall unsafe "tsp_c" 
     c_tsp_c :: Int -> Ptr CInt -> IO Word 
tsp_c :: [Int] -> IO ([Int], Word) 
tsp_c cs = do 
     let l=length cs 
     marr <- newListArray (0, l-1) $ map fromIntegral cs 
     r <- withStorableArray marr $ c_tsp_c l 
     list <-getElems marr 
     return (map fromIntegral list, fromIntegral r) 

main = defaultMain [ pertestIO "tsp_mut" tsp_mut, 
         pertestIO "tsp_c" tsp_c ]    
     where 
      pertestIO str f = bgroup str $ map (uncurry bench . (show &&& nfIO . (f . (`take` [0..15])))) [6..11]

這裏的C代碼：

#include <stdlib.h> 
#include <string.h> 

typedef unsigned int word; 

word distances[] = { 0,629,1023,1470,1276,915,894,1199,760,795,676,903,1394,350,647,922,629, 
        0,556,1312,674,1097,317,946,784,504,1228,276,1254,878,712,1236,1023,556, 
        0,861,408,977,280,480,1340,288,1424,533,822,1346,1267,1198,1470,1312,861, 
        0,1196,751,1125,382,2052,809,1476,1381,78,1817,1957,980,1276,674,408,1196, 
        0,1384,383,837,1370,676,1777,469,1173,1551,1327,1600,915,1097,977,751,1384, 
        0,1103,722,1641,719,730,1303,676,1223,1530,249,894,317,280,1125,383,1103, 
        0,743,1083,392,1409,256,1079,1178,1019,1293,1199,946,480,382,837,722,743, 
        0,1708,454,1366,999,343,1549,1618,972,760,784,1340,2052,1370,1641,1083,1708, 
        0,1253,1392,901,1986,640,113,1679,795,504,288,809,676,719,392,454,1253, 
        0,1138,623,749,1136,1164,925,676,1228,1424,1476,1777,730,1409,1366,1392,1138, 
        0,1502,1399,786,1283,551,903,276,533,1381,469,1303,256,999,901,623,1502, 
        0,1335,1127,857,1469,1394,1254,822,78,1173,676,1079,343,1986,749,1399,1335, 
        0,1741,1889,908,350,878,1346,1817,1551,1223,1178,1549,640,1136,786,1127,1741, 
        0,543,1182,647,712,1267,1957,1327,1530,1019,1618,113,1164,1283,857,1889,543, 
        0,1566,922,1236,1198,980,1600,249,1293,972,1679,925,551,1469,908,1182,1566,0}; 

word dist (int a, int b) { 
     return distances[16*a+b]; 
} 

int len (int cities[], int length) { 
    int l = dist(cities[length-1], cities[0]); 
    int i; 
    for (i=0; i < length-1; i++) { 
     l +=dist(cities[i], cities[i+1]); 
    } 
    return l; 
} 

int *route, *bestRoute; 
word minL; 
int sz, size, cntr; 

void permut (int n){ 
    if (n==0) { 
     cntr++; 
     int l=len(route, sz); 
     if (l<minL) { 
      memcpy(bestRoute, route,size); 
      minL=l; 
      } 
     return;  
     } 
    int i, swap, temp; 
    for (i=0; i<=n; i++) { 
     permut(n-1); 
     swap=n% 2 ? i : 0; 
     temp=route[swap]; 
     route[swap]=route[n]; 
     route[n]=temp; 
    }   
} 

word tsp_c (int length, int cities[]) { 
    size= length * sizeof(int); 
    cntr=0; 
    sz=length; 
    route = malloc(size); 
    bestRoute = malloc(size); 
    memcpy(route, cities, size); 
    memcpy(bestRoute, cities, size); 
    minL = len (cities, length); 
    permut(length-1); 
    memcpy(cities, bestRoute, length * sizeof(int)); 
    free(route); 
    free(bestRoute); 
    /* printf("permutations: %d", cntr); */ 
    return minL; 
}

我用GHC 7.8.3的Windows上的64位版本的-O2

來源

2014-11-25 willy-s

這似乎是一個有趣的（即使簡短）閱讀：http://www.quora.com/Is-Haskell-as-fast-as-C++-If-not-why-not – enhzflep 2014-11-25 07:49:10

+26

您的Haskell代碼不是相當於C代碼。它使用不同的數據結構，不同的控制結構，並分配很多。如果你想要相同的性能，請使用相同的代碼。或者用慣用的Haskell編寫（使用Vector） – 2014-11-25 09:07:30

剛剛用'Data.List.permutations'而不是'tsp_mut'來測試你的標準基準。結果？ Haskell的實現在〜2ns內完成，例如'tsp_c'需要大約10 ^（i-5）μs。你使用優化標誌嗎？ – Zeta 2014-11-25 12:50:42

引述哈斯克爾網站：

在不惜任何代價要求性能的應用程序中，或者當目標是詳細調整低級別算法，命令式語言如C很可能比哈斯克爾一個更好的選擇，正是因爲它提供了在計算進行具體的方式更親密控制 ...

另一種方式來看待在這裏：C'指令'更接近硬件，因此它本身具有優勢。當你從'高'級語言直接翻譯爲C時，這一點尤其明顯。

另一方面，高級語言的優勢在於您專注於更大的圖景，您可以（通常）更快地完成更多優化。

來源

2015-04-07 05:14:46

蠻力旅行推銷員：爲什麼Haskell比C慢得多？

回答

相關問題