2017-06-19 121 views
0

在尖點,還有一個乘法來計算SPMV(稀疏矩陣向量乘法),其採用一降低和一個結合:對於任何矩陣真正縮放在Cusp中稀疏矩陣矢量乘法?

template <typename LinearOperator, 
      typename MatrixOrVector1, 
      typename MatrixOrVector2, 
      typename UnaryFunction, 
      typename BinaryFunction1, 
      typename BinaryFunction2> 
    void multiply(const LinearOperator& A, 
        const MatrixOrVector1& B, 
        MatrixOrVector2& C, 
        UnaryFunction initialize, 
        BinaryFunction1 combine, 
        BinaryFunction2 reduce); 

從好像定製的界面結合,減少應該是可能的/矢量乘法。我認爲cusp支持使用其他的聯合和減少推力/ functional.h中定義的函數除了乘法和加計算spmv。例如,我可以使用thrust :: plus來代替乘法的原始組合函數(即乘法)嗎? 我想,這個縮放spmv也支持coo,csr,dia,hyb格式的稀疏矩陣。

但是,當我在a.cu中測試矩陣A處於coo格式的以下示例時,我得到了錯誤的答案。 它使用plus運算符進行組合。我用cmd編譯它:nvcc a.cu -o a到。

#include <cusp/csr_matrix.h> 
#include <cusp/monitor.h> 
#include <cusp/multiply.h> 
#include <cusp/print.h> 
#include <cusp/krylov/cg.h> 

int main(void) 
{ 
    // COO format in host memory 
    int host_I[13] = {0,0,1,1,2,2,2,3,3,3,4,5,5}; // COO row indices 
    int host_J[13] = {0,1,1,2,2,4,6,3,4,5,5,5,6}; // COO column indices 
    int host_V[13] = {1,1,1,1,1,1,1,1,1,1,1,1,1}; 
    // x and y arrays in host memory 
    int host_x[7] = {1,1,1,1,1,1,1}; 
    int host_y[6] = {0,0,0,0,0,0}; 

    // allocate device memory for COO format 
    int * device_I; 
    cudaMalloc(&device_I, 13 * sizeof(int)); 
    int * device_J; 
    cudaMalloc(&device_J, 13 * sizeof(int)); 
    int * device_V; 
    cudaMalloc(&device_V, 13 * sizeof(int)); 

    // allocate device memory for x and y arrays 
    int * device_x; 
    cudaMalloc(&device_x, 7 * sizeof(int)); 
    int * device_y; 
    cudaMalloc(&device_y, 6 * sizeof(int)); 

    // copy raw data from host to device 
    cudaMemcpy(device_I, host_I, 13 * sizeof(int), cudaMemcpyHostToDevice); 
    cudaMemcpy(device_J, host_J, 13 * sizeof(int), cudaMemcpyHostToDevice); 
    cudaMemcpy(device_V, host_V, 13 * sizeof(int), cudaMemcpyHostToDevice); 
    cudaMemcpy(device_x, host_x, 7 * sizeof(int), cudaMemcpyHostToDevice); 
    cudaMemcpy(device_y, host_y, 6 * sizeof(int), cudaMemcpyHostToDevice); 

    // matrices and vectors now reside on the device 

    // *NOTE* raw pointers must be wrapped with thrust::device_ptr! 
    thrust::device_ptr<int> wrapped_device_I(device_I); 
    thrust::device_ptr<int> wrapped_device_J(device_J); 
    thrust::device_ptr<int> wrapped_device_V(device_V); 
    thrust::device_ptr<int> wrapped_device_x(device_x); 
    thrust::device_ptr<int> wrapped_device_y(device_y); 

    // use array1d_view to wrap the individual arrays 
    typedef typename cusp::array1d_view< thrust::device_ptr<int> > DeviceIndexArrayView; 
    typedef typename cusp::array1d_view< thrust::device_ptr<int> > DeviceValueArrayView; 

    DeviceIndexArrayView row_indices (wrapped_device_I, wrapped_device_I + 13); 
    DeviceIndexArrayView column_indices(wrapped_device_J, wrapped_device_J + 13); 
    DeviceValueArrayView values  (wrapped_device_V, wrapped_device_V + 13); 
    DeviceValueArrayView x    (wrapped_device_x, wrapped_device_x + 7); 
    DeviceValueArrayView y    (wrapped_device_y, wrapped_device_y + 6); 

    // combine the three array1d_views into a coo_matrix_view 
    typedef cusp::coo_matrix_view<DeviceIndexArrayView, 
      DeviceIndexArrayView, 
      DeviceValueArrayView> DeviceView; 

    // construct a coo_matrix_view from the array1d_views 
    DeviceView A(6, 7, 13, row_indices, column_indices, values); 

    std::cout << "\ndevice coo_matrix_view" << std::endl; 
    cusp::print(A); 
    cusp::constant_functor<int> initialize; 
    thrust::plus<int> combine; 
    thrust::plus<int> reduce; 
    cusp::multiply(A , x , y , initialize, combine, reduce); 
    std::cout << "\nx array" << std::endl; 
    cusp::print(x); 
    std::cout << "\n y array, y = A * x" << std::endl; 
    cusp::print(y); 

    cudaMemcpy(host_y, device_y, 6 * sizeof(int), cudaMemcpyDeviceToHost); 

    // free device arrays 
    cudaFree(device_I); 
    cudaFree(device_J); 
    cudaFree(device_V); 
    cudaFree(device_x); 
    cudaFree(device_y); 

    return 0; 
} 

我得到了下面的答案。

device coo_matrix_view 
sparse matrix <6, 7> with 13 entries 
       0    0  (1) 
       0    1  (1) 
       1    1  (1) 
       1    2  (1) 
       2    2  (1) 
       2    4  (1) 
       2    6  (1) 
       3    3  (1) 
       3    4  (1) 
       3    5  (1) 
       4    5  (1) 
       5    5  (1) 
       5    6  (1) 
x array 
array1d <7> 

     (1) 
     (1) 
     (1) 
     (1) 
     (1) 
     (1) 
     (1) 
y array, y = A * x 
array1d <6> 
     (4) 
     (4) 
     (6) 
     (6) 
     (2) 
     (631) 

向量y我很奇怪,我認爲正確的回答y應該是:

[9, 
9, 
10, 
10, 
8, 
9] 

因此,我不知道是否該替換合併和減少的可以適用於其他稀疏矩陣格式,如coo。或者,也許我上面寫的代碼是不正確的調用乘法。 你能給我一些幫助嗎?任何信息都會有幫助。

謝謝!

回答

1

從您的示例的代碼和工具的非常簡短的閱讀中,這似乎是在CUSP中嚴重破壞導致此用法案例的問題。該代碼只是偶然地對於組合運算符是乘法的情況無法正常工作,因爲它使用零元素執行的虛假操作不影響還原操作(即,它只是求和大量的附加零)。