cuda程序的輸出結果不是預期的

#include<cuda_runtime.h> 
#include<stdio.h> 
#include<cuda.h> 
#include<stdlib.h> 


__global__ void setVal(char **c){ 

c[(blockIdx.y * gridDim.x) + blockIdx.x] = "hello\0"; 

} 


int main(){ 

char **gpu = NULL; 
cudaMalloc((void**)&gpu, 6 * sizeof(char *)); 
int i; 
/* 
I cannot access second level directly 
for(i =0 ; i < 6 ;i++){ 
    cudaMalloc((void**)&gpu[i], 10 * sizeof(char)); 
}*/ 


dim3 grid(3,2); 
setVal<<<grid, 1>>>(gpu); 
char *p = (char*)malloc(10 * sizeof(char)); 
char *x[6]; 

cudaMemcpy(x, gpu, 6*sizeof(char*), cudaMemcpyDeviceToHost); 
for(i =0 ; i< 6; i++){ 
    cudaMemcpy(p, x[i], 10*sizeof(char), cudaMemcpyDeviceToHost); 
    //put synchronize here if problem 
    printf("%s\n",p); 

} 


getchar(); 
return 0; 
}

根據所有建議，我修改了我的代碼以使我的概念正確無誤。但是，代碼仍然沒有工作:(任何幫助將不勝感激cuda程序的輸出結果不是預期的

來源

2011-07-03 Programmer

當我添加一個手錶網格，它說網格沒有找到。 – Programmer

任何人都可以使用cuda機器爲我運行並檢查？ – Programmer

你甚至沒有問題了。 – tkerwin

試試這個 - 我測試了它在GTX 285 CUDA 3.2下 - 所以這是一個有點比目前的版本更加嚴格，但有用。

#include<stdio.h> 
#include<string.h> 

__global__ void setValues(char** word) 
{ 
    volatile char* myWord = word[blockIdx.x]; 

    myWord[0] = 'H'; 
    myWord[1] = 'o'; 
    myWord[2] = 'l'; 
    myWord[3] = 'a'; 
    myWord[4] = '\0'; 
} 

int main() 
{ 
    const size_t bufferSize = 32; 
    const int nObjects = 10; 

    char* h_x[nObjects]; 
    char** d_x = 0; 

    cudaMalloc((void**)(&d_x), nObjects * sizeof(char*)); 

    for (int i=0; i < nObjects; i++) 
    { 
     h_x[i] = NULL; 
     cudaMalloc((void**)(&h_x[i]), bufferSize * sizeof(char)); 
     printf("h_x[%d] = %lx\n",i,(unsigned long)h_x[i]); 
    } 

    cudaMemcpy(d_x, h_x, nObjects*sizeof(char*), cudaMemcpyHostToDevice); 
    printf("Copied h_x[] to d_x[]\n"); 

    char msg[] = "Hello World!"; 
    cudaMemcpy(h_x[0], msg, 13*sizeof(char), cudaMemcpyHostToDevice); 

    /* Force Thread Synchronization */ 
    cudaError err = cudaThreadSynchronize(); 

    /* Check for and display Error */ 
    if (cudaSuccess != err) 
    { 
     fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", 
       __FILE__, __LINE__, cudaGetErrorString(err)); 
    } 

    setValues<<<nObjects,1>>>(d_x); 

    /* Force Thread Synchronization */ 
    err = cudaThreadSynchronize(); 

    /* Check for and display Error */ 
    if (cudaSuccess != err) 
    { 
     fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", 
       __FILE__, __LINE__, cudaGetErrorString(err)); 
    } 

    printf("Kernel Completed Successfully. Woot.\n\n"); 

    char p[bufferSize]; 

    printf("d_x = %lx\n", (unsigned long)d_x); 
    printf("h_x = %lx\n", (unsigned long)h_x); 

    cudaMemcpy(h_x, d_x, nObjects*sizeof(char*), cudaMemcpyDeviceToHost); 

    printf("d_x = %lx\n", (unsigned long)d_x); 
    printf("h_x = %lx\n", (unsigned long)h_x); 

    for (int i=0; i < nObjects; i++) 
    { 
     cudaMemcpy(&p, h_x[i], bufferSize*sizeof(char), cudaMemcpyDeviceToHost); 
     printf("%d p[] = %s\n",i,p); 
    } 

    /* Force Thread Synchronization */ 
    err = cudaThreadSynchronize(); 

    /* Check for and display Error */ 
    if (cudaSuccess != err) 
    { 
     fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", 
       __FILE__, __LINE__, cudaGetErrorString(err)); 
    } 

    getchar(); 

    return 0; 
}

正如@喬恩筆記，你不能將X（因爲你已經聲明）它的GPU，因爲它是該生活在CPU上的地址。在上面的代碼中，我創建了一個char *的數組，並將它們傳遞給我也在GPU上分配的char **。希望這可以幫助！

來源

2011-07-03 06:04:19

char **是什麼意思？請舉個例子嗎？ – Programmer

它是一個指向數組的指針。 –

這裏有一個非常棒的頁面（http://www.cplusplus.com/doc/tutorial/pointers/），它描述了指向**底部的指針**的指針。注意：它甚至在其示例中使用'char **'。 –

有我在這裏看到的一些問題下面是一些最明顯的的：。

首先，我的猜測是，字符串常量「4」存儲在主機（CPU）存儲器中，因此您必須將其明確複製到設備（全局）存儲器。一旦字符串「4」位於設備存儲器中，然後您可以將指針「4」中的一個設備內存值，比如一個元素的數組arr

二，將你傳給的數組內核是也在主機內存中。請記住，您需要使用cudaMalloc來分配設備內核可以指向的（全局）設備內存區域。

來源

2011-07-03 04:24:22

您的代碼的主要問題是您沒有爲setValues調用分配任何設備內存。你不能將它傳遞給主機內存的指針（char * x [6]），並期望它工作; CUDA內核必須在CUDA內存上運行。您創建的內存，然後在其上進行操作，然後將它複製回：

#include <stdio.h> 
#include <string.h> 
#include <cuda.h> 
#include <cuda_runtime.h> 

__global__ void setValues(char *arr){ 
    arr[blockIdx.y * gridDim.x + blockIdx.x] = '4'; 
} 

int main() { 
    const int NCHARS=6; 
    char *xd; 

    cudaMalloc(&xd, NCHARS); 
    dim3 grid(3,2); 
    setValues<<<grid,1>>>(xd); 

    char *p; 
    p = (char*) malloc(20*sizeof(char)); 
    strcpy(p,""); 

    cudaMemcpy(p, xd, NCHARS, cudaMemcpyDeviceToHost); 
    p[NCHARS]='\0'; 

    printf("<%s>\n", p); 
    getchar(); 

    cudaFree(xd); 

    return 0; 
}

來源

2011-07-03 04:25:15

注意：char * xd是一個指向char數組的指針。我需要一個char指針數組，因爲我想初始化一個字符串給數組的每個索引。你能改變你的代碼來適應這個嗎？ – Programmer

cuda程序的輸出結果不是預期的

回答

相關問題