2014-11-08 36 views
4

我想加快用Python和NumPy編寫的代碼。我使用Gray-Skott算法(http://mrob.com/pub/comp/xmorphia/index.html)作爲反應擴散模型,但使用Numba和Cython的算法更慢!它可以加速嗎?提前致謝!反應擴散算法中的Numba或Cython加速

的Python + NumPy的

def GrayScott(counts, Du, Dv, F, k): 
    n = 300 
    U = np.zeros((n+2,n+2), dtype=np.float_) 
    V = np.zeros((n+2,n+2), dtype=np.float_) 
    u, v = U[1:-1,1:-1], V[1:-1,1:-1] 

    r = 20 
    u[:] = 1.0 
    U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50 
    V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25 
    u += 0.15*np.random.random((n,n)) 
    v += 0.15*np.random.random((n,n)) 

    for i in range(counts): 
     Lu = (    U[0:-2,1:-1] + 
       U[1:-1,0:-2] - 4*U[1:-1,1:-1] + U[1:-1,2:] + 
           U[2: ,1:-1]) 
     Lv = (    V[0:-2,1:-1] + 
       V[1:-1,0:-2] - 4*V[1:-1,1:-1] + V[1:-1,2:] + 
           V[2: ,1:-1]) 
     uvv = u*v*v 
     u += Du*Lu - uvv + F*(1 - u) 
     v += Dv*Lv + uvv - (F + k)*v 

    return V 

Numba

from numba import jit, autojit 

@autojit 
def numbaGrayScott(counts, Du, Dv, F, k): 
    n = 300 
    U = np.zeros((n+2,n+2), dtype=np.float_) 
    V = np.zeros((n+2,n+2), dtype=np.float_) 
    u, v = U[1:-1,1:-1], V[1:-1,1:-1] 

    r = 20 
    u[:] = 1.0 
    U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50 
    V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25 
    u += 0.15*np.random.random((n,n)) 
    v += 0.15*np.random.random((n,n)) 

    Lu = np.zeros_like(u) 
    Lv = np.zeros_like(v) 

    for i in range(counts): 
     for row in range(n): 
      for col in range(n): 
       Lu[row,col] = U[row+1,col+2] + U[row+1,col] + U[row+2,col+1] + U[row,col+1] - 4*U[row+1,col+1] 
       Lv[row,col] = V[row+1,col+2] + V[row+1,col] + V[row+2,col+1] + V[row,col+1] - 4*V[row+1,col+1] 

     uvv = u*v*v 
     u += Du*Lu - uvv + F*(1 - u) 
     v += Dv*Lv + uvv - (F + k)*v 

    return V 

用Cython

%%cython 
cimport cython 
import numpy as np 
cimport numpy as np 

cpdef cythonGrayScott(int counts, double Du, double Dv, double F, double k): 
    cdef int n = 300 
    cdef np.ndarray U = np.zeros((n+2,n+2), dtype=np.float_) 
    cdef np.ndarray V = np.zeros((n+2,n+2), dtype=np.float_) 
    cdef np.ndarray u = U[1:-1,1:-1] 
    cdef np.ndarray v = V[1:-1,1:-1] 

    cdef int r = 20 
    u[:] = 1.0 
    U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50 
    V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25 
    u += 0.15*np.random.random((n,n)) 
    v += 0.15*np.random.random((n,n)) 

    cdef np.ndarray Lu = np.zeros_like(u) 
    cdef np.ndarray Lv = np.zeros_like(v) 
    cdef int i, row, col 
    cdef np.ndarray uvv 

    for i in range(counts): 
     for row in range(n): 
      for col in range(n): 
       Lu[row,col] = U[row+1,col+2] + U[row+1,col] + U[row+2,col+1] + U[row,col+1] - 4*U[row+1,col+1] 
       Lv[row,col] = V[row+1,col+2] + V[row+1,col] + V[row+2,col+1] + V[row,col+1] - 4*V[row+1,col+1] 

     uvv = u*v*v 
     u += Du*Lu - uvv + F*(1 - u) 
     v += Dv*Lv + uvv - (F + k)*v 

    return V 

用例:

GrayScott(4000, 0.16, 0.08, 0.04, 0.06) 
+0

Numba能夠顯着加速代碼嗎? – Ohm 2015-09-12 00:40:02

回答

6

下面是步驟,用來加快用Cython版本:

  • cdef np.ndarray doen't讓元素訪問速度更快,你需要用Cython使用memoryview:cdef double[:, ::1] bU = U

  • 關閉boundscheckwraparound

  • 在for-loop中執行所有計算。

下面是修改的代碼用Cython:

%%cython 
#cython: boundscheck=False 
#cython: wraparound=False 
cimport cython 
import numpy as np 
cimport numpy as np 

cpdef cythonGrayScott(int counts, double Du, double Dv, double F, double k): 
    cdef int n = 300 
    cdef np.ndarray U = np.zeros((n+2,n+2), dtype=np.float_) 
    cdef np.ndarray V = np.zeros((n+2,n+2), dtype=np.float_) 
    cdef np.ndarray u = U[1:-1,1:-1] 
    cdef np.ndarray v = V[1:-1,1:-1] 

    cdef int r = 20 
    u[:] = 1.0 
    U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50 
    V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25 
    u += 0.15*np.random.random((n,n)) 
    v += 0.15*np.random.random((n,n)) 

    cdef np.ndarray Lu = np.zeros_like(u) 
    cdef np.ndarray Lv = np.zeros_like(v) 
    cdef int i, c, r1, c1, r2, c2 
    cdef double uvv 

    cdef double[:, ::1] bU = U 
    cdef double[:, ::1] bV = V 
    cdef double[:, ::1] bLu = Lu 
    cdef double[:, ::1] bLv = Lv 

    for i in range(counts): 
     for r in range(n): 
      r1 = r + 1 
      r2 = r + 2 
      for c in range(n): 
       c1 = c + 1 
       c2 = c + 2 
       bLu[r,c] = bU[r1,c2] + bU[r1,c] + bU[r2,c1] + bU[r,c1] - 4*bU[r1,c1] 
       bLv[r,c] = bV[r1,c2] + bV[r1,c] + bV[r2,c1] + bV[r,c1] - 4*bV[r1,c1] 

     for r in range(n): 
      r1 = r + 1 
      for c in range(n): 
       c1 = c + 1 
       uvv = bU[r1,c1]*bV[r1,c1]*bV[r1,c1] 
       bU[r1,c1] += Du*bLu[r,c] - uvv + F*(1 - bU[r1,c1]) 
       bV[r1,c1] += Dv*bLv[r,c] + uvv - (F + k)*bV[r1,c1] 

    return V 

它是11倍比numpy的版本快。

+0

不錯的一個,我的+1。 – doug 2014-11-09 05:31:32

+0

非常感謝!你的幫助使我的計算更加美好:) – snotna 2014-11-09 14:58:18

5

除了循環和龐大的操作量,在你的情況下最有可能的殺死性能是數組分配。我不知道爲什麼你的Numba和Cython版本不符合你的期望,但是你可以通過在原地進行所有操作(例如替換當前的循環)來更快地讓你的numpy代碼快兩倍(以一些可讀性爲代價)搭配:

Lu, Lv, uvv = np.empty_like(u), np.empty_like(v), np.empty_like(u) 

for i in range(counts): 
    Lu[:] = u 
    Lu *= -4 
    Lu += U[:-2,1:-1] 
    Lu += U[1:-1,:-2] 
    Lu += U[1:-1,2:] 
    Lu += U[2:,1:-1] 
    Lu *= Du 

    Lv[:] = v 
    Lv *= -4 
    Lv += V[:-2,1:-1] 
    Lv += V[1:-1,:-2] 
    Lv += V[1:-1,2:] 
    Lv += V[2:,1:-1] 
    Lv *= Dv 

    uvv[:] = u 
    uvv *= v 
    uvv *= v 
    Lu -= uvv 
    Lv += uvv 

    u *= 1 - F 
    u += F 
    u += Lu 

    v *= 1 - F - k 
    v += Lv 
+0

非常有用的技巧 - 在我的機器上,它提供了大約40-50%的加速。但當然可讀性會丟失... – snotna 2014-11-09 15:08:46