2
我的程序使用CUDA基數排序類。從CUDA 4.0更新到4.2之後,類輔助init函數崩潰,並顯示消息「堆棧變量devprop'已損壞」。我已經隔離了註釋某些函數代碼的問題,並發現cudaGetDeviceProperties正在破壞devprop。我只是不知道爲什麼會發生這種情況,以及如何解決這個問題。我的設置是CUDA 4.2,dev驅動程序301.32,Nsight 2.2,Windows 7 64位,爲Win32編譯。下面的代碼片段有崩潰initDeviceParameters()輔助功能:cudaGetDeviceProperties是否返回損壞的信息?
namespace nvRadixSort
{
#include "radixsort.h"
#include "cudpp/cudpp.h"
#include <stdio.h>
#include <assert.h>
bool bManualCoalesce = false;
bool bUsePersistentCTAs = false;
void initDeviceParameters(bool keysOnly)
{
int deviceID = -1;
if(cudaSuccess == cudaGetDevice(&deviceID))
{
cudaDeviceProp devprop;
cudaGetDeviceProperties(&devprop, deviceID);
int smVersion = devprop.major * 10 + devprop.minor;
// sm_12 and later devices don't need help with coalesce in reorderData kernel
bManualCoalesce = (smVersion < 12);
bUsePersistentCTAs = (smVersion < 20);
if(bUsePersistentCTAs)
{
//Irrelevant. My setup is 2.1
}
}
}
}
這是相關類代碼:
#include <cuda_runtime_api.h>
#include "cudpp/cudpp.h"
namespace nvRadixSort
{
class RadixSort
{
public:
RadixSort(unsigned int maxElements, bool keysOnly = false)
: mScanPlan(0),
mNumElements(0),
mTempKeys(0),
mTempValues(0),
mCounters(0),
mCountersSum(0),
mBlockOffsets(0)
{
// Allocate temporary storage
initialize(maxElements, keysOnly);
}
protected: // data
CUDPPHandle mCudppContext;
CUDPPHandle mScanPlan; // CUDPP plan handle for prefix sum
unsigned int mNumElements; // Number of elements of temp storage allocated
unsigned int *mTempKeys; // Intermediate storage for keys
unsigned int *mTempValues; // Intermediate storage for values
unsigned int *mCounters; // Counter for each radix
unsigned int *mCountersSum; // Prefix sum of radix counters
unsigned int *mBlockOffsets; // Global offsets of each radix in each block
protected: // methods
void initialize(unsigned int numElements, bool keysOnly)
{
// initialize parameters based on present CUDA device
initDeviceParameters(keysOnly);
// Allocate temporary storage
mNumElements = numElements;
unsigned int numBlocks = ((numElements % (CTA_SIZE * 4)) == 0) ?
(numElements/(CTA_SIZE * 4)) : (numElements/(CTA_SIZE * 4) + 1);
unsigned int numBlocks2 = ((numElements % (CTA_SIZE * 2)) == 0) ?
(numElements/(CTA_SIZE * 2)) : (numElements/(CTA_SIZE * 2) + 1);
// Initialize scan
cudppCreate(&mCudppContext);
CUDPPConfiguration scanConfig;
scanConfig.algorithm = CUDPP_SCAN;
scanConfig.datatype = CUDPP_UINT;
scanConfig.op = CUDPP_ADD;
scanConfig.options = CUDPP_OPTION_EXCLUSIVE | CUDPP_OPTION_FORWARD;
cudppPlan(mCudppContext , &mScanPlan, scanConfig, 16 * numBlocks2, 1, 0);
cudaMalloc((void **)&mTempKeys, numElements * sizeof(unsigned int));
if(!keysOnly)
cudaMalloc((void **)&mTempValues, numElements * sizeof(unsigned int));
cudaMalloc((void **)&mCounters, WARP_SIZE_ * numBlocks * sizeof(unsigned int));
cudaMalloc((void **)&mCountersSum, WARP_SIZE_ * numBlocks * sizeof(unsigned int));
cudaMalloc((void **)&mBlockOffsets, WARP_SIZE_ * numBlocks * sizeof(unsigned int));
checkCudaError("RadixSort::initialize()");
}
}
cudaGetDeviceProperties()的返回值是什麼?你的驅動程序是最新的嗎? (如果出現問題,您應該提前收到錯誤消息,但只是詢問) – Tom 2012-07-25 17:19:38
返回值爲cudaSuccess。我更改了代碼來驗證。我的司機是301.32。 – 2012-07-25 17:36:29
它可能是CUDA運行時庫(cudart)的問題嗎?我真的被困在這個問題上... – 2012-07-26 14:25:12