內部結構的數組的分配我已經這些結構:CUDA:一個struct
typedef struct neuron
{
float* weights;
int n_weights;
}Neuron;
typedef struct neurallayer
{
Neuron *neurons;
int n_neurons;
int act_function;
}NLayer;
「n圖層」結構可以包含「神經元」的任意數量的
我試圖分配5「神經元」一「n圖層」結構從以這種方式主機:
NLayer* nL;
int i;
int tmp=9;
cudaMalloc((void**)&nL,sizeof(NLayer));
cudaMalloc((void**)&nL->neurons,6*sizeof(Neuron));
for(i=0;i<5;i++)
cudaMemcpy(&nL->neurons[i].n_weights,&tmp,sizeof(int),cudaMemcpyHostToDevice);
...然後我試圖修改「NL->神經元[0] .n_weights」變量與內核:
__global__ void test(NLayer* n)
{
n->neurons[0].n_weights=121;
}
,但在編譯時NVCC返回該「警告」與內核無關的唯一行:
Warning: Cannot tell what pointer points to, assuming global memory space
當內核完成其工作的結構開始無法訪問。
這很可能是我在分配過程中做錯了什麼事......有人可以幫助我嗎? 非常感謝,對不起我的英語! :)
UPDATE:
感謝奧蘭我修改我的代碼創建這個函數應該分配結構「n圖層」的一個實例:
NLayer* setNLayer(int numNeurons,int weightsPerNeuron,int act_fun)
{
int i;
NLayer h_layer;
NLayer* d_layer;
float* d_weights;
//SET THE LAYER VARIABLE OF THE HOST NLAYER
h_layer.act_function=act_fun;
h_layer.n_neurons=numNeurons;
//ALLOCATING THE DEVICE NLAYER
if(cudaMalloc((void**)&d_layer,sizeof(NLayer))!=cudaSuccess)
puts("ERROR: Unable to allocate the Layer");
//ALLOCATING THE NEURONS ON THE DEVICE
if(cudaMalloc((void**)&h_layer.neurons,numNeurons*sizeof(Neuron))!=cudaSuccess)
puts("ERROR: Unable to allocate the Neurons of the Layer");
//COPING THE HOST NLAYER ON THE DEVICE
if(cudaMemcpy(d_layer,&h_layer,sizeof(NLayer),cudaMemcpyHostToDevice)!=cudaSuccess)
puts("ERROR: Unable to copy the data layer onto the device");
for(i=0;i<numNeurons;i++)
{
//ALLOCATING THE WEIGHTS' ARRAY ON THE DEVICE
cudaMalloc((void**)&d_weights,weightsPerNeuron*sizeof(float));
//COPING ITS POINTER AS PART OF THE i-TH NEURONS STRUCT
if(cudaMemcpy(&d_layer->neurons[i].weights,&d_weights,sizeof(float*),cudaMemcpyHostToDevice)!=cudaSuccess)
puts("Error: unable to copy weights' pointer to the device");
}
//RETURN THE DEVICE POINTER
return d_layer;
}
,我調用該函數(內核「測試」是以前聲明的):
int main()
{
NLayer* nL;
int h_tmp1;
float h_tmp2;
nL=setNLayer(10,12,13);
test<<<1,1>>>(nL);
if(cudaMemcpy(&h_tmp1,&nL->neurons[0].n_weights,sizeof(float),cudaMemcpyDeviceToHost)!=cudaSuccess);
puts("ERROR!!");
printf("RESULT:%d",h_tmp1);
}
當我編譯該代碼編譯器顯示我的警告,當我執行該程序時,它在屏幕上打印:
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
Error: unable to copy weights' pointer to the device
ERROR!!
RESULT:1
如果我評論內核調用,最後一個錯誤不會比較。
我在哪裏錯了? 我不知道該怎麼辦 感謝您的幫助!
雖然你對這個警告是正確的,但我懷疑是什麼導致了程序的異常行爲。畢竟,編譯器關於位於全局Emory空間中的結構的假設是正確的... – aland 2012-08-09 01:52:52
我使用的是具有1.2功能的NVIDIA GeForce 320M 256 MB,所以我不認爲它是「費米」卡 – 2012-08-09 09:29:32