2017-04-01 65 views
-1

我試圖創建一個結構,將在一個地方同時容納主機和設備陣列,並應駐留在主機上。我後來打算將它擴展爲鏈表的一個元素。其基本結構是這樣的:指向主機內部的設備陣列結構

typedef struct Data{ 
    double *h; 
    double *d; 
} Data; 

凡* H點到主機上雙打和* d點的陣列在設備上雙打的數組。

關於將整個結構體複製到設備上有各種各樣的答案(CUDA cudaMemcpy Struct of Arrays),但沒有一個完全符合我的需要。我有以下代碼,但不斷收到非法內存訪問錯誤。

#include <stdio.h> 
#include <stdlib.h> 
#include <math.h> 
#include "cuda.h" 

/* 
* CUDA Error stuff 
*/ 

static void HandleError(cudaError_t err, 
         const char *file, 
         int line) { 
    if (err != cudaSuccess) { 
     printf("%s in %s at line %d\n", cudaGetErrorString(err), 
       file, line); 
     exit(EXIT_FAILURE); 
    } 
} 
#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__)) 


#define HANDLE_NULL(a) {if (a == NULL) { \ 
          printf("Host memory failed in %s at line %d\n", \ 
            __FILE__, __LINE__); \ 
          exit(EXIT_FAILURE);}} 

//malloc error code 
int errMsg(const char *message, int errorCode) 
{ 
    printf("%s\n", message); 
    return errorCode; 
} 

typedef struct Data{ 
    double *h; 
    double *d; 
} Data; 

__global__ void kernel(Data *d) 
{ 
    int tid = blockIdx.x * blockDim.x + threadIdx.x; 
    if(tid<100){ 
     d->d[tid] = 2; 
    } 

} 

int main() 
{ 
    Data *d; 
    d = (Data*)malloc(sizeof(Data)); 

    d->h = (double*)malloc(sizeof(double)*100); 
    HANDLE_ERROR(cudaMalloc((void**) &(d->d), 100*sizeof(double))); 

    for(int i=0; i<100; i++){ 
     d->h[i] = i; 
    } 

    HANDLE_ERROR(cudaMemcpy(d->d, d->h, 100*sizeof(double), cudaMemcpyHostToDevice)); 

    printf("%f\n", d->h[1]); 

    kernel<<<1, 102>>>(d); 

    printf("done\n"); 

    { 
    cudaError_t cudaerr = cudaDeviceSynchronize(); 
    if (cudaerr != cudaSuccess) 
     printf("kernel launch failed with error \"%s\"->\n", 
       cudaGetErrorString(cudaerr)); 
    } 

    HANDLE_ERROR(cudaMemcpy(d->h, d->d, 100*sizeof(double), cudaMemcpyDeviceToHost)); 
    printf("%f\n", d->h[99]); 


    return 0; 
} 

我得到的輸出是:

1.000000 
done 
kernel launch failed with error "an illegal memory access was encountered"-> 
an illegal memory access was encountered in linkedListGPU.cu at line 77 

我懷疑我剛纔搞砸了我的指針一點。錯誤處理代碼從Wiley介紹到CUDA書籍,如果代碼不允許在這裏,我將刪除它。

謝謝。

+0

您正在向設備傳遞主機指針並嘗試在內核中訪問它。這顯然是不會工作的 – talonmies

+0

謝謝,只需將內核調用改爲(d-> d)並調整內核代碼就可以修復它。對不起,我現在一直在設備結構上混淆。 –

回答

1

問題是d本身是一個指向主機分配結構(其中dh指針包含在您通過d結構指針到內核像這樣:

kernel<<<1, 102>>>(d); 
       ^
        this is a pointer to memory on the host 

,然後嘗試取消引用設備代碼這裏指針:

d->...; 
    ^
    This operator dereferences the pointer to the left of it 

你會得到一個非法的內存訪問

至少有2種明顯的方式來解決這個問題:

  1. 的價值,而不是通過指針傳遞的結構。

下面是一個例子:

$ cat t1311.cu 
#include <stdio.h> 
#include <stdlib.h> 
#include <math.h> 
#include "cuda.h" 

/* 
* CUDA Error stuff 
*/ 

static void HandleError(cudaError_t err, 
         const char *file, 
         int line) { 
    if (err != cudaSuccess) { 
     printf("%s in %s at line %d\n", cudaGetErrorString(err), 
       file, line); 
     exit(EXIT_FAILURE); 
    } 
} 
#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__)) 


#define HANDLE_NULL(a) {if (a == NULL) { \ 
          printf("Host memory failed in %s at line %d\n", \ 
            __FILE__, __LINE__); \ 
          exit(EXIT_FAILURE);}} 

//malloc error code 
int errMsg(const char *message, int errorCode) 
{ 
    printf("%s\n", message); 
    return errorCode; 
} 

typedef struct Data{ 
    double *h; 
    double *d; 
} Data; 

__global__ void kernel(Data d) 
{ 
    int tid = blockIdx.x * blockDim.x + threadIdx.x; 
    if(tid<100){ 
     d.d[tid] = 2; 
    } 

} 

int main() 
{ 
    Data d; 

    d.h = (double*)malloc(sizeof(double)*100); 
    HANDLE_ERROR(cudaMalloc((void**) &(d.d), 100*sizeof(double))); 

    for(int i=0; i<100; i++){ 
     d.h[i] = i; 
    } 

    HANDLE_ERROR(cudaMemcpy(d.d, d.h, 100*sizeof(double), cudaMemcpyHostToDevice)); 

    printf("%f\n", d.h[1]); 

    kernel<<<1, 102>>>(d); 

    printf("done\n"); 

    { 
    cudaError_t cudaerr = cudaDeviceSynchronize(); 
    if (cudaerr != cudaSuccess) 
     printf("kernel launch failed with error \"%s\"->\n", 
       cudaGetErrorString(cudaerr)); 
    } 

    HANDLE_ERROR(cudaMemcpy(d.h, d.d, 100*sizeof(double), cudaMemcpyDeviceToHost)); 
    printf("%f\n", d.h[99]); 


    return 0; 
} 
$ nvcc -arch=sm_35 -o t1311 t1311.cu 
$ cuda-memcheck ./t1311 
========= CUDA-MEMCHECK 
1.000000 
done 
2.000000 
========= ERROR SUMMARY: 0 errors 
$ 
  • 使該結構的一個設備上的副本,所述d主機指針指向:
  • 這裏是一個例子:

    $ cat t1311.cu 
    #include <stdio.h> 
    #include <stdlib.h> 
    #include <math.h> 
    #include "cuda.h" 
    
    /* 
    * CUDA Error stuff 
    */ 
    
    static void HandleError(cudaError_t err, 
             const char *file, 
             int line) { 
        if (err != cudaSuccess) { 
         printf("%s in %s at line %d\n", cudaGetErrorString(err), 
           file, line); 
         exit(EXIT_FAILURE); 
        } 
    } 
    #define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__)) 
    
    
    #define HANDLE_NULL(a) {if (a == NULL) { \ 
              printf("Host memory failed in %s at line %d\n", \ 
                __FILE__, __LINE__); \ 
              exit(EXIT_FAILURE);}} 
    
    //malloc error code 
    int errMsg(const char *message, int errorCode) 
    { 
        printf("%s\n", message); 
        return errorCode; 
    } 
    
    typedef struct Data{ 
        double *h; 
        double *d; 
    } Data; 
    
    __global__ void kernel(Data *d) 
    { 
        int tid = blockIdx.x * blockDim.x + threadIdx.x; 
        if(tid<100){ 
         d->d[tid] = 2; 
        } 
    
    } 
    
    int main() 
    { 
        Data *d, *dev_d; 
        d = (Data*)malloc(sizeof(Data)); 
        HANDLE_ERROR(cudaMalloc(&dev_d, sizeof(Data))); 
        d->h = (double*)malloc(sizeof(double)*100); 
        HANDLE_ERROR(cudaMalloc((void**) &(d->d), 100*sizeof(double))); 
    
        for(int i=0; i<100; i++){ 
         d->h[i] = i; 
        } 
    
        HANDLE_ERROR(cudaMemcpy(d->d, d->h, 100*sizeof(double), cudaMemcpyHostToDevice)); 
        HANDLE_ERROR(cudaMemcpy(dev_d, d, sizeof(Data), cudaMemcpyHostToDevice)); 
        printf("%f\n", d->h[1]); 
    
        kernel<<<1, 102>>>(dev_d); 
    
        printf("done\n"); 
    
        { 
        cudaError_t cudaerr = cudaDeviceSynchronize(); 
        if (cudaerr != cudaSuccess) 
         printf("kernel launch failed with error \"%s\"->\n", 
           cudaGetErrorString(cudaerr)); 
        } 
    
        HANDLE_ERROR(cudaMemcpy(d->h, d->d, 100*sizeof(double), cudaMemcpyDeviceToHost)); 
        printf("%f\n", d->h[99]); 
    
    
        return 0; 
    } 
    $ nvcc -arch=sm_35 -o t1311 t1311.cu 
    $ cuda-memcheck ./t1311 
    ========= CUDA-MEMCHECK 
    1.000000 
    done 
    2.000000 
    ========= ERROR SUMMARY: 0 errors 
    $ 
    

    順便說一句,你可以按照方法概述here進一步進行您的調試過程。