10
我最初編寫了一個功能強力搜索(城市的ADT表示,城市的元組作爲距離數組的索引,懶洋洋地用Data.List.permutations
產生排列並以嚴格的左邊摺疊消耗它們),但那是很痛苦的緩慢。我設法通過以更緊迫的方式重寫它,並使用Heap算法進行排列,將其速度提高了3倍,但直接將結果轉換爲(寫得不好)C的速度更快了10倍!到底是怎麼回事?蠻力旅行推銷員:爲什麼Haskell比C慢得多?
{-# LANGUAGE TupleSections, BangPatterns, ForeignFunctionInterface #-}
import Data.Array.Unboxed
import Data.Array.Base (unsafeAt)
import qualified Data.Vector.Unboxed.Mutable as M
import qualified Data.Vector.Unboxed as V
import Data.Word (Word)
import Data.IORef
import GHC.Arr (Ix(..))
import Control.Arrow
import Control.Monad
import Data.Array.Storable
import Foreign.Ptr (Ptr)
import Foreign.C (CInt)
import Criterion.Main
distances :: UArray Int Word
distances = listArray (0, 255) [0,629,1023,1470,1276,915,894,1199,760,795,676,903,1394,350,647,922,629,
0,556,1312,674,1097,317,946,784,504,1228,276,1254,878,712,1236,1023,556,
0,861,408,977,280,480,1340,288,1424,533,822,1346,1267,1198,1470,1312,861,
0,1196,751,1125,382,2052,809,1476,1381,78,1817,1957,980,1276,674,408,1196,
0,1384,383,837,1370,676,1777,469,1173,1551,1327,1600,915,1097,977,751,1384,
0,1103,722,1641,719,730,1303,676,1223,1530,249,894,317,280,1125,383,1103,
0,743,1083,392,1409,256,1079,1178,1019,1293,1199,946,480,382,837,722,743,
0,1708,454,1366,999,343,1549,1618,972,760,784,1340,2052,1370,1641,1083,1708,
0,1253,1392,901,1986,640,113,1679,795,504,288,809,676,719,392,454,1253,
0,1138,623,749,1136,1164,925,676,1228,1424,1476,1777,730,1409,1366,1392,1138,
0,1502,1399,786,1283,551,903,276,533,1381,469,1303,256,999,901,623,1502,
0,1335,1127,857,1469,1394,1254,822,78,1173,676,1079,343,1986,749,1399,1335,
0,1741,1889,908,350,878,1346,1817,1551,1223,1178,1549,640,1136,786,1127,1741,
0,543,1182,647,712,1267,1957,1327,1530,1019,1618,113,1164,1283,857,1889,543,
0,1566,922,1236,1198,980,1600,249,1293,972,1679,925,551,1469,908,1182,1566,0]
tsp_mut :: [Int] -> IO ([Int], Word)
tsp_mut [] = error "empty list"
tsp_mut cs = do
route <- V.thaw . V.fromList $ cs
let l = length cs -1
dis a b= unsafeAt distances $ 16*a + b
f (prev, !acc) c = (c, acc + dis prev c)
len = V.unsafeFreeze route >>= return . snd . (\v -> V.foldl' f (V.unsafeLast v, 0) v)
ref <- newIORef . (cs,) =<< len
let permut 0 = do
!l <- len
(_, ol) <- readIORef ref
when (l < ol) (writeIORef ref . (,l) . V.toList =<< V.freeze route)
permut n = let op = if odd n then const 0 else id
in forM_ [0..n] (\ i -> permut (n-1) >> M.unsafeSwap route (op i) n)
permut l >> readIORef ref
foreign import ccall unsafe "tsp_c"
c_tsp_c :: Int -> Ptr CInt -> IO Word
tsp_c :: [Int] -> IO ([Int], Word)
tsp_c cs = do
let l=length cs
marr <- newListArray (0, l-1) $ map fromIntegral cs
r <- withStorableArray marr $ c_tsp_c l
list <-getElems marr
return (map fromIntegral list, fromIntegral r)
main = defaultMain [ pertestIO "tsp_mut" tsp_mut,
pertestIO "tsp_c" tsp_c ]
where
pertestIO str f = bgroup str $ map (uncurry bench . (show &&& nfIO . (f . (`take` [0..15])))) [6..11]
這裏的C代碼:
#include <stdlib.h>
#include <string.h>
typedef unsigned int word;
word distances[] = { 0,629,1023,1470,1276,915,894,1199,760,795,676,903,1394,350,647,922,629,
0,556,1312,674,1097,317,946,784,504,1228,276,1254,878,712,1236,1023,556,
0,861,408,977,280,480,1340,288,1424,533,822,1346,1267,1198,1470,1312,861,
0,1196,751,1125,382,2052,809,1476,1381,78,1817,1957,980,1276,674,408,1196,
0,1384,383,837,1370,676,1777,469,1173,1551,1327,1600,915,1097,977,751,1384,
0,1103,722,1641,719,730,1303,676,1223,1530,249,894,317,280,1125,383,1103,
0,743,1083,392,1409,256,1079,1178,1019,1293,1199,946,480,382,837,722,743,
0,1708,454,1366,999,343,1549,1618,972,760,784,1340,2052,1370,1641,1083,1708,
0,1253,1392,901,1986,640,113,1679,795,504,288,809,676,719,392,454,1253,
0,1138,623,749,1136,1164,925,676,1228,1424,1476,1777,730,1409,1366,1392,1138,
0,1502,1399,786,1283,551,903,276,533,1381,469,1303,256,999,901,623,1502,
0,1335,1127,857,1469,1394,1254,822,78,1173,676,1079,343,1986,749,1399,1335,
0,1741,1889,908,350,878,1346,1817,1551,1223,1178,1549,640,1136,786,1127,1741,
0,543,1182,647,712,1267,1957,1327,1530,1019,1618,113,1164,1283,857,1889,543,
0,1566,922,1236,1198,980,1600,249,1293,972,1679,925,551,1469,908,1182,1566,0};
word dist (int a, int b) {
return distances[16*a+b];
}
int len (int cities[], int length) {
int l = dist(cities[length-1], cities[0]);
int i;
for (i=0; i < length-1; i++) {
l +=dist(cities[i], cities[i+1]);
}
return l;
}
int *route, *bestRoute;
word minL;
int sz, size, cntr;
void permut (int n){
if (n==0) {
cntr++;
int l=len(route, sz);
if (l<minL) {
memcpy(bestRoute, route,size);
minL=l;
}
return;
}
int i, swap, temp;
for (i=0; i<=n; i++) {
permut(n-1);
swap=n% 2 ? i : 0;
temp=route[swap];
route[swap]=route[n];
route[n]=temp;
}
}
word tsp_c (int length, int cities[]) {
size= length * sizeof(int);
cntr=0;
sz=length;
route = malloc(size);
bestRoute = malloc(size);
memcpy(route, cities, size);
memcpy(bestRoute, cities, size);
minL = len (cities, length);
permut(length-1);
memcpy(cities, bestRoute, length * sizeof(int));
free(route);
free(bestRoute);
/* printf("permutations: %d", cntr); */
return minL;
}
我用GHC 7.8.3的Windows上的64位版本的-O2
這似乎是一個有趣的(即使簡短)閱讀:http://www.quora.com/Is-Haskell-as-fast-as-C++-If-not-why-not – enhzflep 2014-11-25 07:49:10
您的Haskell代碼不是相當於C代碼。它使用不同的數據結構,不同的控制結構,並分配很多。如果你想要相同的性能,請使用相同的代碼。或者用慣用的Haskell編寫(使用Vector) – 2014-11-25 09:07:30
剛剛用'Data.List.permutations'而不是'tsp_mut'來測試你的標準基準。結果? Haskell的實現在〜2ns內完成,例如'tsp_c'需要大約10 ^(i-5)μs。你使用優化標誌嗎? – Zeta 2014-11-25 12:50:42