2014-08-28 227 views
-1

我有一個300,000點的數組,我想每600點的fft。我試圖用cufftPlanMany來執行,但我及彼未知錯誤:CuFFT未知錯誤

cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, 1,1, CUFFT_C2C, 500)); 

retrevialfft.cu(82) : cufftSafeCall() CUFFT error: <unknown> 

這裏的上下文中

cudaSetDevice(0); 

    // Allocate host memory for the signal 
    cufftComplex* h_signal=(cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE); 


    // Initalize the memory for the signal 
    for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) { 
    h_signal[i].x = rand()/(float)RAND_MAX; 
    h_signal[i].y = 0; 

    // printf("Orignal: %f %f \n", h_signal[i].x, h_signal[i].y); 
    } 




    int mem_size = sizeof(cufftComplex) * SIGNAL_SIZE; 

    // Allocate device memory for signal 
    cufftComplex* d_signal; 
    cudaMalloc((void**)&d_signal, mem_size); 

    int rank = 1; //1d plan                              
    int numCols = 300000; 
    int n[] = {numCols}; 

    int batch = 500; 
    int istride = 1; 
    int ostride = 1; 
    int idist = numCols; 

    // CUFFT plan                                 
    cufftHandle plan; 
    cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, 1,1, CUFFT_C2C, 500)); 

    // Transform signal                               
    printf("Transforming signal cufftExecC2C\n"); 
    cufftSafeCall(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD)); 



    // Copy device memory to host                             
    cufftComplex* h_transformed = (cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE);; 
    cudaMemcpy(h_transformed, d_signal, mem_size, 
          cudaMemcpyDeviceToHost); 



//Destroy CUFFT context                              
    cufftDestroy(plan); 

    // cleanup memory                                
    free(h_signal); 

    free(h_transformed); 
    cudaFree(d_signal); 
    cudaDeviceReset(); 

代碼什麼的錯誤實際上是任何想法?

+1

您設置了多少「SIGNAL_SIZE」?請發佈完整的代碼,讓其他人可以複製,粘貼,編譯和運行,而無需添加任何其他代碼行。 – JackOLantern 2014-08-28 21:08:05

+0

對不起,我下次肯定會這樣做。謝謝你的幫助! – 2014-08-31 02:31:24

回答

1

您決定不顯示任何關於您的問題的更多細節。下面,我使用cufftPlanMany()提供完整的工作代碼來執行批量化的1D FFT。我希望它有幫助。

#include <stdio.h> 
#include <stdlib.h> 
#include <cufft.h> 
#include <assert.h> 

/********************/ 
/* CUDA ERROR CHECK */ 
/********************/ 
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } 
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) 
{ 
    if (code != cudaSuccess) 
    { 
     fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); 
     if (abort) { getchar(); exit(code); } 
    } 
} 

/*********************/ 
/* CUFFT ERROR CHECK */ 
/*********************/ 
static const char *_cudaGetErrorEnum(cufftResult error) 
{ 
    switch (error) 
    { 
     case CUFFT_SUCCESS: 
      return "CUFFT_SUCCESS"; 

     case CUFFT_INVALID_PLAN: 
      return "CUFFT_INVALID_PLAN"; 

     case CUFFT_ALLOC_FAILED: 
      return "CUFFT_ALLOC_FAILED"; 

     case CUFFT_INVALID_TYPE: 
      return "CUFFT_INVALID_TYPE"; 

     case CUFFT_INVALID_VALUE: 
      return "CUFFT_INVALID_VALUE"; 

     case CUFFT_INTERNAL_ERROR: 
      return "CUFFT_INTERNAL_ERROR"; 

     case CUFFT_EXEC_FAILED: 
      return "CUFFT_EXEC_FAILED"; 

     case CUFFT_SETUP_FAILED: 
      return "CUFFT_SETUP_FAILED"; 

     case CUFFT_INVALID_SIZE: 
      return "CUFFT_INVALID_SIZE"; 

     case CUFFT_UNALIGNED_DATA: 
      return "CUFFT_UNALIGNED_DATA"; 
    } 

    return "<unknown>"; 
} 

#define cufftSafeCall(err)  __cufftSafeCall(err, __FILE__, __LINE__) 
inline void __cufftSafeCall(cufftResult err, const char *file, const int line) 
{ 
    if(CUFFT_SUCCESS != err) { 
     fprintf(stderr, "CUFFT error in file '%s', line %d\n %s\nerror %d: %s\nterminating!\n",__FILE__, __LINE__,err, \ 
      _cudaGetErrorEnum(err)); \ 
      cudaDeviceReset(); assert(0); \ 
    } 
} 

/********/ 
/* MAIN */ 
/********/ 
void main() { 

    int batch = 3;       // --- How many transforms to be performed 
    int numCols = 16;      // --- Size of each transform 

    int SIGNAL_SIZE = batch * numCols;  // --- Overall size for all the signals 

    // --- Allocate host memory for all the signals 
    cufftComplex* h_signal=(cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE); 

    // --- Initalize host memory for all the signals 
    for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) { 
     h_signal[i].x = 1.f; 
     h_signal[i].y = 0.f; 
    } 

    // --- Allocate device memory for all the signals 
    cufftComplex* d_signal; gpuErrchk(cudaMalloc((void**)&d_signal, sizeof(cufftComplex) * SIGNAL_SIZE)); 

    // --- Host to Device memcopy 
    gpuErrchk(cudaMemcpy(d_signal, h_signal, sizeof(cufftComplex) * SIGNAL_SIZE, cudaMemcpyHostToDevice)); 

    int rank = 1; // --- 1d plan                              
    int n[] = {numCols}; 

    int istride = 1; 
    int ostride = 1; 
    int idist = numCols; 
    int odist = numCols; 

    // --- CUFFT plan                                 
    cufftHandle plan; 
    cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, ostride, odist, CUFFT_C2C, 500)); 

    // --- Signals transformations 
    cufftSafeCall(cufftExecC2C(plan, (cufftComplex*)d_signal, (cufftComplex*)d_signal, CUFFT_FORWARD)); 

    // --- Device to Host memcopy 
    gpuErrchk(cudaMemcpy(h_signal, d_signal, sizeof(cufftComplex) * SIGNAL_SIZE, cudaMemcpyDeviceToHost)); 

    for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) printf("Real part = %f; Imaginar part = %f\n", h_signal[i].x, h_signal[i].y); 

    // --- Destroy CUFFT context                              
    cufftSafeCall(cufftDestroy(plan)); 

    // --- Memory cleanup 
    free(h_signal); 
    gpuErrchk(cudaFree(d_signal)); 

    cudaDeviceReset(); 

}