我想知道爲什麼cudaMemcpyToSymbol不適用於我。 (但cudaMemcpy一樣。)cudaMemcpyToSymbol vs cudaMemcpy
// symbols:
__constant__ float flt[480]; // 1920 bytes
__constant__ int ints[160]; // 640 bytes
// func code follows:
float* pFlts;
cudaMalloc((void**)&pFlts, 1920+640); // chunk of gpu mem (floats & ints)
// This does NOT work properly:
cudaMemcpyToSymbol(flt,pFlts,1920,0,cudaMemcpyDeviceToDevice); // first copy
cudaMemcpyToSymbol(ints,pFlts,640,1920,cudaMemcpyDeviceToDevice); // second copy
的第二個副本搗毀的第一個副本(FLT)的內容,而第二個副本不會發生。 (如果我刪除第二個副本,第一份工作正常。)
結果:
GpuDumpFloatMemory<<<1,1>>>(0x500500000, 13, 320) TotThrds=1 ** Source of 1st copy
0x500500500: float[320]= 1.000
0x500500504: float[321]= 0.866
0x500500508: float[322]= 0.500
0x50050050c: float[323]= -0.000
0x500500510: float[324]= -0.500
0x500500514: float[325]= -0.866
0x500500518: float[326]= -1.000
0x50050051c: float[327]= -0.866
0x500500520: float[328]= -0.500
0x500500524: float[329]= 0.000
0x500500528: float[330]= 0.500
0x50050052c: float[331]= 0.866
0x500500530: float[332]= 1.000
GpuDumpFloatMemory<<<1,1>>>(0x500100a98, 13, 320) TotThrds=1 ** Dest of 1st copy
0x500100f98: float[320]= 0.000
0x500100f9c: float[321]= 0.500
0x500100fa0: float[322]= 0.866
0x500100fa4: float[323]= 1.000
0x500100fa8: float[324]= 0.866
0x500100fac: float[325]= 0.500
0x500100fb0: float[326]= -0.000
0x500100fb4: float[327]= -0.500
0x500100fb8: float[328]= -0.866
0x500100fbc: float[329]= -1.000
0x500100fc0: float[330]= -0.866
0x500100fc4: float[331]= -0.500
0x500100fc8: float[332]= 0.000
GpuDumpIntMemory<<<1,1>>>(0x500500780, 13, 0) TotThrds=1 ** Source of 2nd copy
0x500500780: int[0]= 1
0x500500784: int[1]= 1
0x500500788: int[2]= 1
0x50050078c: int[3]= 1
0x500500790: int[4]= 1
0x500500794: int[5]= 1
0x500500798: int[6]= 1
0x50050079c: int[7]= 1
0x5005007a0: int[8]= 1
0x5005007a4: int[9]= 1
0x5005007a8: int[10]= 1
0x5005007ac: int[11]= 1
0x5005007b0: int[12]= 0
GpuDumpIntMemory<<<1,1>>>(0x500100818, 13, 0) TotThrds=1 ** Dest of 2nd copy
0x500100818: int[0]= 0
0x50010081c: int[1]= 0
0x500100820: int[2]= 0
0x500100824: int[3]= 0
0x500100828: int[4]= 0
0x50010082c: int[5]= 0
0x500100830: int[6]= 0
0x500100834: int[7]= 0
0x500100838: int[8]= 0
0x50010083c: int[9]= 0
0x500100840: int[10]= 0
0x500100844: int[11]= 0
0x500100848: int[12]= 0
以下工作正常:
cudaMemcpyToSymbol(flt,pFlts,1920,0,cudaMemcpyDeviceToDevice); // first copy
int* pTemp;
cudaGetSymbolAddress((void**) &pTemp, ints);
cudaMemcpy(ints,pFlts+480,640,cudaMemcpyDeviceToDevice); // second copy
結果:
GpuDumpFloatMemory<<<1,1>>>(0x500500000, 13, 320) TotThrds=1 ** Source of first copy
0x500500500: float[320]= 1.000
0x500500504: float[321]= 0.866
0x500500508: float[322]= 0.500
0x50050050c: float[323]= -0.000
0x500500510: float[324]= -0.500
0x500500514: float[325]= -0.866
0x500500518: float[326]= -1.000
0x50050051c: float[327]= -0.866
0x500500520: float[328]= -0.500
0x500500524: float[329]= 0.000
0x500500528: float[330]= 0.500
0x50050052c: float[331]= 0.866
0x500500530: float[332]= 1.000
GpuDumpFloatMemory<<<1,1>>>(0x500100a98, 13, 320) TotThrds=1 ** Dest of first copy
0x500100f98: float[320]= 1.000
0x500100f9c: float[321]= 0.866
0x500100fa0: float[322]= 0.500
0x500100fa4: float[323]= -0.000
0x500100fa8: float[324]= -0.500
0x500100fac: float[325]= -0.866
0x500100fb0: float[326]= -1.000
0x500100fb4: float[327]= -0.866
0x500100fb8: float[328]= -0.500
0x500100fbc: float[329]= 0.000
0x500100fc0: float[330]= 0.500
0x500100fc4: float[331]= 0.866
0x500100fc8: float[332]= 1.000
GpuDumpIntMemory<<<1,1>>>(0x500500780, 13, 0) TotThrds=1 ** Source of 2nd copy
0x500500780: int[0]= 1
0x500500784: int[1]= 1
0x500500788: int[2]= 1
0x50050078c: int[3]= 1
0x500500790: int[4]= 1
0x500500794: int[5]= 1
0x500500798: int[6]= 1
0x50050079c: int[7]= 1
0x5005007a0: int[8]= 1
0x5005007a4: int[9]= 1
0x5005007a8: int[10]= 1
0x5005007ac: int[11]= 1
0x5005007b0: int[12]= 0
GpuDumpIntMemory<<<1,1>>>(0x500100818, 13, 0) TotThrds=1 ** Destination of 2nd copy
0x500100818: int[0]= 1
0x50010081c: int[1]= 1
0x500100820: int[2]= 1
0x500100824: int[3]= 1
0x500100828: int[4]= 1
0x50010082c: int[5]= 1
0x500100830: int[6]= 1
0x500100834: int[7]= 1
0x500100838: int[8]= 1
0x50010083c: int[9]= 1
0x500100840: int[10]= 1
0x500100844: int[11]= 1
0x500100848: int[12]= 0
當我看看壞的情況,看起來好像符號表發生了一些事情。如在中,第一個複製目標的數據非常熟悉。不像它被覆蓋,只是移動。像指針是錯誤的。
你在對你的cuda呼叫進行錯誤檢查嗎?你被給了一個例子[這裏](http://stackoverflow.com/questions/14968293/copy-symbol-address-to-symbol)。 – 2013-03-13 17:03:32
是的,我沒有包含檢查的宏。沒有錯誤報告。 (cudaSuccess) – Doug 2013-03-13 17:18:59
偏移量適用於符號,而不是源。那是你的問題。 – talonmies 2013-03-13 17:26:44