2016-01-21 243 views
0

我想寫一些c代碼來使用scalapack中的pzheevd例程來查找大型矩陣的所有特徵值。我有以下簡單的例子,它已經硬編碼了一個簡單的4x4矩陣。使用單個進程,2個進程或4個我可以得到正確的特徵值(-2.0396,-2,2,2.0396)。然而,使用不相稱的數字(如3)返回的特徵值是不正確的,即使它看起來像所有矩陣元素被正確分配。Scalapack返回錯誤的答案

要構建的代碼使用:

mpicc -g test.c -llapack -o test -lblacs-openmpi -lblacsCinit-openmpi -L/usr/local/lib -lscalapack -lgfortran -lm -llapack -lblas 

實例的作品:

$ mpirun -n 1 ./test 
Info: 0 
Eigenvalues: -2.039608 -2.000000 2.000000 2.039608 

並沒有一個:

$ mpirun -n 3 ./test 
Info: 0 
Eigenvalues: -2.223729 -1.805190 2.003994 2.024926 

,代碼:

#include <stdio.h> 
#include <math.h> 
#include <stdlib.h> 
#include "mpi.h" 

typedef struct complex16{double dr,di;} complex16; 

extern void Cblacs_get(int context, int what, int *val); 
extern void Cblacs_gridinit(int* context, char* order, 
          int nproc_rows, int nproc_cols); 
extern void Cblacs_pcoord(int context, int p, 
          int* my_proc_row, int* my_proc_col); 
extern void Cblacs_exit(int doneflag); 
extern void descinit_(int* descrip, int* m, int* n, 
         int* row_block_size, int* col_block_size, 
         int* first_proc_row, int* first_proc_col, 
         int* blacs_grid, int* leading_dim, 
         int* error_info); 
extern int numroc_(int* order, int* block_size, 
        int* my_process_row_or_col, int* first_process_row_or_col, 
        int* nproc_rows_or_cols); 
extern void pzheevd_(char *jobz, char *uplo, int *n, complex16 *a, int *ia, int *ja, int *desca, double *w, complex16 *z, int *iz, int *jz, int *descz, complex16 *work, int *lwork, double *rwork, int *lrwork, int *iwork, int *liwork, int *info); 

main(int argc, char** argv) { 
    int my_rank, size, m, n; 
    int row_block_size=1, col_block_size=1; 
    int nproc_rows, nproc_cols; 
    int my_process_row, my_process_col; 
    int blacs_grid; 
    int first_proc_row = 0, first_proc_col = 0; 
    int descrip[9], info, nlocal_rows, nlocal_cols; 
    int i,j; 
    int leading_dim; 
    m=4; n=4; 

    MPI_Init(&argc, &argv); 
    MPI_Comm_size(MPI_COMM_WORLD, &size); 
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); 

    nproc_rows = sqrt(size); 
    nproc_cols = size/nproc_rows; 

    Cblacs_get(0, 0, &blacs_grid); 
    Cblacs_gridinit(&blacs_grid, "R", nproc_rows, nproc_cols); 
    Cblacs_pcoord(blacs_grid, my_rank, &my_process_row,&my_process_col); 

    nlocal_rows = numroc_(&m, &row_block_size, &my_process_row, &first_proc_row, &nproc_rows); 
    nlocal_cols = numroc_(&n, &col_block_size, &my_process_col, &first_proc_col, &nproc_cols); 
    leading_dim = numroc_(&m, &col_block_size, &my_process_row, &first_proc_col, &nproc_rows); 
    descinit_(descrip, &m, &n, &row_block_size, &col_block_size, &first_proc_row, &first_proc_col, &blacs_grid, &leading_dim, &info); 

    complex16 *a, *z; 
    double *w; 
    a = (complex16*)malloc(nlocal_rows * nlocal_cols * sizeof(complex16)); 
    z = (complex16*)malloc(nlocal_rows * nlocal_cols * sizeof(complex16)); 
    w = (double*)malloc(n * sizeof(double)); 

    double *mat_els; 
    mat_els = (double *)malloc(n*m * sizeof(double)); 
    mat_els[0] = -2.0;mat_els[1]=-0.2; mat_els[2] = -0.2; mat_els[3] = 0.0; 
    mat_els[4] = -0.2;mat_els[5]=2.0; mat_els[6] = 0.0; mat_els[7] = -0.2; 
    mat_els[8] = -0.2;mat_els[9]=0.0; mat_els[10] = 2.0; mat_els[11] = -0.2; 
    mat_els[12] = 0.0;mat_els[13]=-0.2; mat_els[14] = -0.2; mat_els[15] = -2.0; 

    int full_row, full_col; 
    for(i = 0; i < nlocal_rows; i++) 
    { 
     for(j = 0; j < nlocal_cols; j++) 
     { 
      full_row = i * nproc_rows + my_process_row; 
      full_col = j * nproc_cols + my_process_col; 
      a[(i*nlocal_cols + j)].dr = mat_els[full_row * m + full_col]; 
      a[(i*nlocal_cols + j)].di = 0.0; 
     } 
    } 
    char jobz = 'V'; // N not implemented yet. 
    char uplo = 'U'; 
    int ai = 1, aj = 1, zi = 1, zj = 1; 

    double *rwork; 
    complex16 *work; 
    int *iwork; 
    int lwork, lrwork, liwork; 

    rwork = (double*)malloc(2 * sizeof(double)); 
    work = (complex16*)malloc(2 * sizeof(complex16)); 
    iwork = (int*)malloc(2 * sizeof(int)); 

    lwork = -1; lrwork = -1; liwork = -1; 
    pzheevd_(&jobz, &uplo, &n, a, &ai, &aj, descrip, w, z, &zi, &zj, descrip, work, &lwork, rwork, &lrwork, iwork, &liwork, &info); 
    lwork = work[0].dr; lrwork = rwork[0]; liwork = iwork[0]; 
    free(work); free(rwork); free(iwork); 

    rwork = (double*)malloc(lrwork * sizeof(double)); 
    work = (complex16*)malloc(lwork * sizeof(complex16)); 
    iwork = (int*)malloc(liwork * sizeof(int)); 
    pzheevd_(&jobz, &uplo, &n, a, &ai, &aj, descrip, w, z, &zi, &zj, descrip, work, &lwork, rwork, &lrwork, iwork, &liwork, &info); 

    if (my_rank == 0) 
    { 
     printf("Info: %d\n", info); 
     printf("Eigenvalues: "); 
     for(i = 0; i < n;i++) 
      { 
      printf("%lf ", w[i]); 
      } 
     printf("\n"); 
    } 
    free(w);free(z);free(a); 
    free(work);free(iwork);free(rwork); 
    Cblacs_exit(1); 
    MPI_Finalize(); 
} 

回答

1

我發現了我應該早點猜到的解決方案。矩陣元素應該使用Fortran列順序格式而不是C樣式行順序格式來提供。將for循環中的元素分配更改爲以下內容可以解決問題,並且現在可以爲所有數量的進程(可以形成有效的網格)找到相同的特徵值。

a[(j*nlocal_rows + i)].dr = mat_els[full_row * m + full_col]; 
a[(j*nlocal_rows + i)].di = 0.0;