你好,當我使用Mac OS + OpenCL Framework時,這段代碼工作正常,但是當操作系統改爲openSUSE 11.4 +(來自AMD的OpenCL實現)時,代碼就會出現這樣的錯誤。看來typedef float clfft_complex [2];造成這個錯誤。你可以說什麼呢?OpenCL內核編譯錯誤
錯誤:
Err: "/tmp/OCLRS2tPp.cl", line 4: error: kernel pointer arguments must point to
addrSpace global, local, or constant
__kernel void linear_interp(__global clfft_complex *input,
^
1 error detected in the compilation of "/tmp/OCLRS2tPp.cl".
Internal error: clc compiler invocation failed.
內核代碼:
typedef float clfft_complex[2];
__kernel void linear_interp(__global clfft_complex *input,
__global clfft_complex *output)
{
int global_id = get_global_id(0);
input[global_id][0] = 1.5f;
input[global_id][1] = 5.5f;
}
主機代碼:
//////////////////////////////////
/* Preparing OpenCL Environment */
//////////////////////////////////
cl_uint cl_platformsN = 0;
cl_platform_id *cl_platformIDs = NULL;
clGetPlatformIDs (0, NULL, &cl_platformsN);
cl_platformIDs = (cl_platform_id*)malloc(cl_platformsN * sizeof(cl_platform_id));
clGetPlatformIDs(cl_platformsN, cl_platformIDs, NULL);
cl_int status = CL_SUCCESS;
cl_device_id device; // Compute device
cl_context context; // Compute context
CL_CHECK_ERROR(clGetDeviceIDs(cl_platformIDs[0], DEVICE_TYPE, 1, &device, NULL));
context = clCreateContext(NULL, 1, &device, NULL, NULL, &status);
////////////
/* Device */
////////////
cl_uint wavefronts_per_SIMD = 7;
cl_int device_max_cu;
size_t wg_count;
size_t global_work_size;
#if DEVICE_TYPE == CL_DEVICE_TYPE_GPU
size_t local_work_size = 64;
#else
size_t local_work_size = 1;
#endif
// Get info about the compute units on the device
CL_CHECK_ERROR(clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &device_max_cu, NULL));
wg_count = device_max_cu * wavefronts_per_SIMD;
global_work_size = wg_count * local_work_size;
/////////////////////
/* Input Data Part */
/////////////////////
/* Input a slice properties */
int bits_per_sample;
int samples_per_pixel;
int theta_size;
int slice_size;
/* Read the slice */
clfft_complex *data_tiff = tiff_read_complex(tiff_input,
&bits_per_sample,
&samples_per_pixel,
&slice_size,
&theta_size);
////////////////////////
/* OpenCL - DFI Part */
////////////////////////
/* Sync events */
const int events_num = 5;
cl_event event_list[events_num];
/* Command Queue */
cl_command_queue command_queue = clCreateCommandQueue(context, device, 0, &status);
/* Program */
const char* programSource = load_program_source(KERNELS_FILE_PATH);
if(programSource == NULL) {
fprintf(stderr, "Programm '%s' can not be created. File was not found.", KERNELS_FILE_PATH);
return;
}
cl_program program = clCreateProgramWithSource(context, 1,
(const char**)&programSource, NULL,
&status);
status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
size_t paramValueSize = 1024 * 1024, param_value_size_ret;
char *paramValue;
paramValue = (char*)calloc(paramValueSize, sizeof(char));
status = clGetProgramBuildInfo(program,
device,
CL_PROGRAM_BUILD_LOG,
paramValueSize,
paramValue,
¶m_value_size_ret);
printf("Err: %s", paramValue);
char buf[0x10000];
clGetProgramBuildInfo(program,
device,
CL_PROGRAM_BUILD_LOG,
0x10000,
buf,
NULL);
if(status != CL_SUCCESS) {
fprintf(stderr, "Programm '%s' can not be build. (%s)", KERNELS_FILE_PATH, opencl_map_error(status));
return;
}
/* Kernels */
cl_kernel kernel_linear_interp = clCreateKernel(program, "linear_interp", &status);
在內核的輸入有clfft_complex(2個浮點數組)的數組,在主機上定義爲typedef float clfft_complex [2] ;.由於內核必須知道這種類型,我在內核中定義了它。目前內核的內容絕對不重要,主要問題是「爲什麼它不能識別typedef float clfft_complex [2];作爲類型來投射」。我沒有顯示clSetKernelArg(),因爲在編譯cl_program時執行失敗。無論如何感謝您的幫助。 –
編譯器確認並編譯了typedef行,沒有任何問題。看看'http://www.khronos.org/message_boards/viewtopic.php?t = 4446'與你有同樣的問題。 – ardiyu07
很明顯,謝謝。 –