更換出現多處理pool.map與mpi4py

我在使用MPI初學者，我還在通過文件去。然而，當涉及到mpi4py時，幾乎沒有什麼工作要做。我寫了目前使用的多模塊許多內核上運行代碼，但我需要mpi4py這樣我就可以使用一個以上的節點來運行我的代碼替換此。我的代碼如下，使用多處理模塊時，也沒有。更換出現多處理pool.map與mpi4py

隨着多，

import numpy as np 
import multiprocessing 


start_time = time.time() 

E = 0.1 
M = 5 
n = 1000 
G = 1 
c = 1 
stretch = [10, 1] 


#Point-Distribution Generator Function 
def CDF_inv(x, e, m): 
    A = 1/(1 + np.log(m/e)) 
    if x == 1: 
     return m 
    elif 0 <= x <= A: 
     return e * x/A 
    elif A < x < 1: 
     return e * np.exp((x/A) - 1) 

#Elliptical point distribution Generator Function 

def get_coor_ellip(dist=CDF_inv, params=[E, M], stretch=stretch): 
    R = dist(random.random(), *params) 
    theta = random.random() * 2 * np.pi 
    return (R * np.cos(theta) * stretch[0], R * np.sin(theta) * stretch[1]) 


def get_dist_sq(x_array, y_array): 
    return x_array**2 + y_array**2 


#Function to obtain alpha 

def get_alpha(args): 
    zeta_list_part, M_list_part, X, Y = args 
    alpha_x = 0 
    alpha_y = 0 
    for key in range(len(M_list_part)): 
     z_m_z_x = X - zeta_list_part[key][0] 
     z_m_z_y = Y - zeta_list_part[key][1] 
     dist_z_m_z = get_dist_sq(z_m_z_x, z_m_z_y) 
     alpha_x += M_list_part[key] * z_m_z_x/dist_z_m_z 
     alpha_y += M_list_part[key] * z_m_z_y/dist_z_m_z 
    return (alpha_x, alpha_y) 

#The part of the process containing the loop that needs to be parallelised, where I use pool.map() 

if __name__ == '__main__': 
    # n processes, scale accordingly 
    num_processes = 10 
    pool = multiprocessing.Pool(processes=num_processes) 
    random_sample = [CDF_inv(x, E, M) 
        for x in [random.random() for e in range(n)]] 
    zeta_list = [get_coor_ellip() for e in range(n)] 
    x1, y1 = zip(*zeta_list) 
    zeta_list = np.column_stack((np.array(x1), np.array(y1))) 
    x = np.linspace(-3, 3, 100) 
    y = np.linspace(-3, 3, 100) 
    X, Y = np.meshgrid(x, y) 
    print len(x)*len(y)*n,'calculations to be carried out.' 
    M_list = np.array([.001 for i in range(n)]) 
    # split zeta_list, M_list, X, and Y 
    zeta_list_split = np.array_split(zeta_list, num_processes, axis=0) 
    M_list_split = np.array_split(M_list, num_processes) 
    X_list = [X for e in range(num_processes)] 
    Y_list = [Y for e in range(num_processes)] 

    alpha_list = pool.map(
      get_alpha, zip(zeta_list_split, M_list_split, X_list, Y_list)) 
    alpha_x = 0 
    alpha_y = 0 
    for e in alpha_list: 
     alpha_x += e[0] * 4 * G/(c**2) 
     alpha_y += e[1] * 4 * G/(c**2) 

print("%f seconds" % (time.time() - start_time))

無多，

import numpy as np 


E = 0.1 
M = 5 
G = 1 
c = 1 
M_list = [.1 for i in range(n)] 

#Point-Distribution Generator Function 

def CDF_inv(x, e, m): 
    A = 1/(1 + np.log(m/e)) 
    if x == 1: 
     return m 
    elif 0 <= x <= A: 
     return e * x/A 
    elif A < x < 1: 
     return e * np.exp((x/A) - 1) 



n = 1000 
random_sample = [CDF_inv(x, E, M) 
       for x in [random.random() for e in range(n)]] 
stretch = [5, 2] 

#Elliptical point distribution Generator Function 

def get_coor_ellip(dist=CDF_inv, params=[E, M], stretch=stretch): 
    R = dist(random.random(), *params) 
    theta = random.random() * 2 * np.pi 
    return (R * np.cos(theta) * stretch[0], R * np.sin(theta) * stretch[1]) 

#zeta_list is the list of coordinates of a distribution of points 
zeta_list = [get_coor_ellip() for e in range(n)] 
x1, y1 = zip(*zeta_list) 
zeta_list = np.column_stack((np.array(x1), np.array(y1))) 

#Creation of a X-Y Grid 
x = np.linspace(-3, 3, 100) 
y = np.linspace(-3, 3, 100) 
X, Y = np.meshgrid(x, y) 

def get_dist_sq(x_array, y_array): 
    return x_array**2 + y_array**2 


#Calculation of alpha, containing the loop that needs to be parallelised. 

alpha_x = 0 
alpha_y = 0 
for key in range(len(M_list)): 
    z_m_z_x = X - zeta_list[key][0] 
    z_m_z_y = Y - zeta_list[key][1] 
    dist_z_m_z = get_dist_sq(z_m_z_x, z_m_z_y) 
    alpha_x += M_list[key] * z_m_z_x/dist_z_m_z 
    alpha_y += M_list[key] * z_m_z_y/dist_z_m_z 
alpha_x *= 4 * G/(c**2) 
alpha_y *= 4 * G/(c**2)

基本上我的代碼所做的是，它首先生成按照一定的分發點的列表。然後我應用一個方程來獲得數量'阿爾法'使用點之間的距離之間的不同關係。需要並行化的部分是計算alpha所涉及的單個for循環。我想要做的是使用mpi4py而不是多處理來做到這一點，我不知道如何去做這件事。

來源

2017-03-27 ThunderFlash

改造multiprocessing.map版本MPI可以使用scatter/gather來完成。在你的情況下，它是有用的，你已經準備好輸入列表爲每個等級的一個塊。主要的區別是，所有的代碼被用在首位各級執行，所以你必須讓那些只有大師等級0 conidtional來所做的一切。

if __name__ == '__main__': 
    comm = MPI.COMM_WORLD 
    if comm.rank == 0: 
     random_sample = [CDF_inv(x, E, M) 
         for x in [random.random() for e in range(n)]] 
     zeta_list = [get_coor_ellip() for e in range(n)] 
     x1, y1 = zip(*zeta_list) 
     zeta_list = np.column_stack((np.array(x1), np.array(y1))) 
     x = np.linspace(-3, 3, 100) 
     y = np.linspace(-3, 3, 100) 
     X, Y = np.meshgrid(x, y) 
     print len(x)*len(y)*n,'calculations to be carried out.' 
     M_list = np.array([.001 for i in range(n)]) 
     # split zeta_list, M_list, X, and Y 
     zeta_list_split = np.array_split(zeta_list, comm.size, axis=0) 
     M_list_split = np.array_split(M_list, comm.size) 
     X_list = [X for e in range(comm.size)] 
     Y_list = [Y for e in range(comm.size)] 
     work_list = list(zip(zeta_list_split, M_list_split, X_list, Y_list)) 
    else: 
     work_list = None 

    my_work = comm.scatter(work_list) 
    my_alpha = get_alpha(my_work) 

    alpha_list = comm.gather(my_alpha) 
    if comm.rank == 0: 
     alpha_x = 0 
     alpha_y = 0 
     for e in alpha_list: 
      alpha_x += e[0] * 4 * G/(c**2) 
      alpha_y += e[1] * 4 * G/(c**2)

只要每個處理器獲得類似數量的工作，這工作正常。如果通信成爲一個問題，你可能要處理器，而不是做這一切的主級別0

注中拆分數據生成：關於代碼的有些東西是假的，例如alpha_[xy]結束爲np.ndarray。串行版本發生錯誤。

來源

2017-03-28 09:29:57 Zulan

哇！這工作完美，謝謝！就目前而言，數據生成似乎並不是太耗時，但我會將它分發給其他處理器。而且，alpha_ [xy]不是一個有效的變量，我實際上使用alpha_x和alpha_y來派生另一個稱爲漸變的數量。當我運行它時，它似乎正常工作... – ThunderFlash

更換出現多處理pool.map與mpi4py

回答

相關問題