指向主機內部的設備陣列結構

-1

我試圖創建一個結構，將在一個地方同時容納主機和設備陣列，並應駐留在主機上。我後來打算將它擴展爲鏈表的一個元素。其基本結構是這樣的：指向主機內部的設備陣列結構

typedef struct Data{ 
    double *h; 
    double *d; 
} Data;

凡* H點到主機上雙打和* d點的陣列在設備上雙打的數組。

關於將整個結構體複製到設備上有各種各樣的答案（CUDA cudaMemcpy Struct of Arrays），但沒有一個完全符合我的需要。我有以下代碼，但不斷收到非法內存訪問錯誤。

#include <stdio.h> 
#include <stdlib.h> 
#include <math.h> 
#include "cuda.h" 

/* 
* CUDA Error stuff 
*/ 

static void HandleError(cudaError_t err, 
         const char *file, 
         int line) { 
    if (err != cudaSuccess) { 
     printf("%s in %s at line %d\n", cudaGetErrorString(err), 
       file, line); 
     exit(EXIT_FAILURE); 
    } 
} 
#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__)) 


#define HANDLE_NULL(a) {if (a == NULL) { \ 
          printf("Host memory failed in %s at line %d\n", \ 
            __FILE__, __LINE__); \ 
          exit(EXIT_FAILURE);}} 

//malloc error code 
int errMsg(const char *message, int errorCode) 
{ 
    printf("%s\n", message); 
    return errorCode; 
} 

typedef struct Data{ 
    double *h; 
    double *d; 
} Data; 

__global__ void kernel(Data *d) 
{ 
    int tid = blockIdx.x * blockDim.x + threadIdx.x; 
    if(tid<100){ 
     d->d[tid] = 2; 
    } 

} 

int main() 
{ 
    Data *d; 
    d = (Data*)malloc(sizeof(Data)); 

    d->h = (double*)malloc(sizeof(double)*100); 
    HANDLE_ERROR(cudaMalloc((void**) &(d->d), 100*sizeof(double))); 

    for(int i=0; i<100; i++){ 
     d->h[i] = i; 
    } 

    HANDLE_ERROR(cudaMemcpy(d->d, d->h, 100*sizeof(double), cudaMemcpyHostToDevice)); 

    printf("%f\n", d->h[1]); 

    kernel<<<1, 102>>>(d); 

    printf("done\n"); 

    { 
    cudaError_t cudaerr = cudaDeviceSynchronize(); 
    if (cudaerr != cudaSuccess) 
     printf("kernel launch failed with error \"%s\"->\n", 
       cudaGetErrorString(cudaerr)); 
    } 

    HANDLE_ERROR(cudaMemcpy(d->h, d->d, 100*sizeof(double), cudaMemcpyDeviceToHost)); 
    printf("%f\n", d->h[99]); 


    return 0; 
}

我得到的輸出是：

1.000000 
done 
kernel launch failed with error "an illegal memory access was encountered"-> 
an illegal memory access was encountered in linkedListGPU.cu at line 77

我懷疑我剛纔搞砸了我的指針一點。錯誤處理代碼從Wiley介紹到CUDA書籍，如果代碼不允許在這裏，我將刪除它。

謝謝。

來源

2017-04-01 Tom Wilshaw

您正在向設備傳遞主機指針並嘗試在內核中訪問它。這顯然是不會工作的 – talonmies

謝謝，只需將內核調用改爲（d-> d）並調整內核代碼就可以修復它。對不起，我現在一直在設備結構上混淆。 –

問題是d本身是一個指向主機分配結構（其中d和h指針包含在您通過d結構指針到內核像這樣：

kernel<<<1, 102>>>(d); 
       ^
        this is a pointer to memory on the host

，然後嘗試取消引用設備代碼這裏指針：

d->...; 
    ^
    This operator dereferences the pointer to the left of it

你會得到一個非法的內存訪問

至少有2種明顯的方式來解決這個問題：

的價值，而不是通過指針傳遞的結構。

下面是一個例子：

$ cat t1311.cu 
#include <stdio.h> 
#include <stdlib.h> 
#include <math.h> 
#include "cuda.h" 

/* 
* CUDA Error stuff 
*/ 

static void HandleError(cudaError_t err, 
         const char *file, 
         int line) { 
    if (err != cudaSuccess) { 
     printf("%s in %s at line %d\n", cudaGetErrorString(err), 
       file, line); 
     exit(EXIT_FAILURE); 
    } 
} 
#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__)) 


#define HANDLE_NULL(a) {if (a == NULL) { \ 
          printf("Host memory failed in %s at line %d\n", \ 
            __FILE__, __LINE__); \ 
          exit(EXIT_FAILURE);}} 

//malloc error code 
int errMsg(const char *message, int errorCode) 
{ 
    printf("%s\n", message); 
    return errorCode; 
} 

typedef struct Data{ 
    double *h; 
    double *d; 
} Data; 

__global__ void kernel(Data d) 
{ 
    int tid = blockIdx.x * blockDim.x + threadIdx.x; 
    if(tid<100){ 
     d.d[tid] = 2; 
    } 

} 

int main() 
{ 
    Data d; 

    d.h = (double*)malloc(sizeof(double)*100); 
    HANDLE_ERROR(cudaMalloc((void**) &(d.d), 100*sizeof(double))); 

    for(int i=0; i<100; i++){ 
     d.h[i] = i; 
    } 

    HANDLE_ERROR(cudaMemcpy(d.d, d.h, 100*sizeof(double), cudaMemcpyHostToDevice)); 

    printf("%f\n", d.h[1]); 

    kernel<<<1, 102>>>(d); 

    printf("done\n"); 

    { 
    cudaError_t cudaerr = cudaDeviceSynchronize(); 
    if (cudaerr != cudaSuccess) 
     printf("kernel launch failed with error \"%s\"->\n", 
       cudaGetErrorString(cudaerr)); 
    } 

    HANDLE_ERROR(cudaMemcpy(d.h, d.d, 100*sizeof(double), cudaMemcpyDeviceToHost)); 
    printf("%f\n", d.h[99]); 


    return 0; 
} 
$ nvcc -arch=sm_35 -o t1311 t1311.cu 
$ cuda-memcheck ./t1311 
========= CUDA-MEMCHECK 
1.000000 
done 
2.000000 
========= ERROR SUMMARY: 0 errors 
$

使該結構的一個設備上的副本，所述d主機指針指向：

這裏是一個例子：

$ cat t1311.cu 
#include <stdio.h> 
#include <stdlib.h> 
#include <math.h> 
#include "cuda.h" 

/* 
* CUDA Error stuff 
*/ 

static void HandleError(cudaError_t err, 
         const char *file, 
         int line) { 
    if (err != cudaSuccess) { 
     printf("%s in %s at line %d\n", cudaGetErrorString(err), 
       file, line); 
     exit(EXIT_FAILURE); 
    } 
} 
#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__)) 


#define HANDLE_NULL(a) {if (a == NULL) { \ 
          printf("Host memory failed in %s at line %d\n", \ 
            __FILE__, __LINE__); \ 
          exit(EXIT_FAILURE);}} 

//malloc error code 
int errMsg(const char *message, int errorCode) 
{ 
    printf("%s\n", message); 
    return errorCode; 
} 

typedef struct Data{ 
    double *h; 
    double *d; 
} Data; 

__global__ void kernel(Data *d) 
{ 
    int tid = blockIdx.x * blockDim.x + threadIdx.x; 
    if(tid<100){ 
     d->d[tid] = 2; 
    } 

} 

int main() 
{ 
    Data *d, *dev_d; 
    d = (Data*)malloc(sizeof(Data)); 
    HANDLE_ERROR(cudaMalloc(&dev_d, sizeof(Data))); 
    d->h = (double*)malloc(sizeof(double)*100); 
    HANDLE_ERROR(cudaMalloc((void**) &(d->d), 100*sizeof(double))); 

    for(int i=0; i<100; i++){ 
     d->h[i] = i; 
    } 

    HANDLE_ERROR(cudaMemcpy(d->d, d->h, 100*sizeof(double), cudaMemcpyHostToDevice)); 
    HANDLE_ERROR(cudaMemcpy(dev_d, d, sizeof(Data), cudaMemcpyHostToDevice)); 
    printf("%f\n", d->h[1]); 

    kernel<<<1, 102>>>(dev_d); 

    printf("done\n"); 

    { 
    cudaError_t cudaerr = cudaDeviceSynchronize(); 
    if (cudaerr != cudaSuccess) 
     printf("kernel launch failed with error \"%s\"->\n", 
       cudaGetErrorString(cudaerr)); 
    } 

    HANDLE_ERROR(cudaMemcpy(d->h, d->d, 100*sizeof(double), cudaMemcpyDeviceToHost)); 
    printf("%f\n", d->h[99]); 


    return 0; 
} 
$ nvcc -arch=sm_35 -o t1311 t1311.cu 
$ cuda-memcheck ./t1311 
========= CUDA-MEMCHECK 
1.000000 
done 
2.000000 
========= ERROR SUMMARY: 0 errors 
$

順便說一句，你可以按照方法概述here進一步進行您的調試過程。

來源

2017-04-01 19:49:14

指向主機內部的設備陣列結構

回答

相關問題