我試圖在兩個共享庫運行重定位裝置代碼,無論使用CUDA的推力RDC&推力。如果我停止在kernel.cu中使用推力,一切運行良好,這不是一個選項。CUDA&在SIGSEV多個共享對象導致registerEntryFunction
編輯:如果rdc被禁用,程序也可以工作。對我來說也不是一種選擇。
它編譯正常,但在運行時停止並出現段錯誤。 gdb告訴我這個:
Program received signal SIGSEGV, Segmentation fault.
0x0000000000422cc8 in cudart::globalState::registerEntryFunction(void**, char const*, char*, char const*, int, uint3*, uint3*, dim3*, dim3*, int*)()
(cuda-gdb) bt
#0 0x0000000000422cc8 in cudart::globalState::registerEntryFunction(void**, char const*, char*, char const*, int, uint3*, uint3*, dim3*, dim3*, int*)()
#1 0x000000000040876c in __cudaRegisterFunction()
#2 0x0000000000402b58 in __nv_cudaEntityRegisterCallback(void**)()
#3 0x00007ffff75051a3 in __cudaRegisterLinkedBinary(__fatBinC_Wrapper_t const*, void (*)(void**), void*)()
from /home/mindoms/rdctestmcsimple/libkernel.so
#4 0x00007ffff75050b1 in __cudaRegisterLinkedBinary_66_tmpxft_00007a5f_00000000_16_cuda_device_runtime_ compute_52_cpp1_ii_8b1a5d37() from /home/user/rdctestmcsimple/libkernel.so
#5 0x000000000045285d in __libc_csu_init()
#6 0x00007ffff65ea50f in __libc_start_main() from /lib64/libc.so.6
這裏是我剝去的示例(使用cmake),顯示錯誤。
main.cpp中:
#include "kernel.cuh"
#include "kernel2.cuh"
int main(){
Kernel k;
k.callKernel();
Kernel2 k2;
k2.callKernel2();
}
kernel.cuh:
#ifndef __KERNEL_CUH__
#define __KERNEL_CUH__
class Kernel{
public:
void callKernel();
};
#endif
kernel.cu:
#include "kernel.cuh"
#include <stdio.h>
#include <iostream>
#include <thrust/device_vector.h>
__global__
void thekernel(int *data){
if (threadIdx.x == 0)
printf("the kernel says hello\n");
data[threadIdx.x] = threadIdx.x * 2;
}
void Kernel::callKernel(){
thrust::device_vector<int> D2;
D2.resize(11);
int * raw_ptr = thrust::raw_pointer_cast(&D2[0]);
printf("Kernel::callKernel called\n");
thekernel <<< 1, 10 >>> (raw_ptr);
cudaThreadSynchronize();
cudaError_t code = cudaGetLastError();
if (code != cudaSuccess) {
std::cout << "Cuda error: " << cudaGetErrorString(code) << " after callKernel!" << std::endl;
}
for (int i = 0; i < D2.size(); i++)
std::cout << "Kernel D[" << i << "]=" << D2[i] << std::endl;
}
kernel2.cuh:
#ifndef __KERNEL2_CUH__
#define __KERNEL2_CUH__
class Kernel2{
public:
void callKernel2();
};
#endif
kernel2.cu
#include "kernel2.cuh"
#include <stdio.h>
#include <iostream>
#include <thrust/device_vector.h>
__global__
void thekernel2(int *data2){
if (threadIdx.x == 0)
printf("the kernel2 says hello\n");
data2[threadIdx.x] = threadIdx.x * 2;
}
void Kernel2::callKernel2(){
thrust::device_vector<int> D;
D.resize(11);
int * raw_ptr = thrust::raw_pointer_cast(&D[0]);
printf("Kernel2::callKernel2 called\n");
thekernel2 <<< 1, 10 >>> (raw_ptr);
cudaThreadSynchronize();
cudaError_t code = cudaGetLastError();
if (code != cudaSuccess) {
std::cout << "Cuda error: " << cudaGetErrorString(code) << " after callKernel2!" << std::endl;
}
for (int i = 0; i < D.size(); i++)
std::cout << "Kernel2 D[" << i << "]=" << D[i] << std::endl;
}
CMake的文件下的最初使用,但我得到了同樣的問題,當我編譯「手動」:
nvcc -arch=sm_35 -Xcompiler -fPIC -dc kernel2.cu
nvcc -arch=sm_35 -shared -Xcompiler -fPIC kernel2.o -o libkernel2.so
nvcc -arch=sm_35 -Xcompiler -fPIC -dc kernel.cu
nvcc -arch=sm_35 -shared -Xcompiler -fPIC kernel.o -o libkernel.so
g++ -o main main.cpp libkernel.so libkernel2.so -L/opt/cuda/current/lib64
添加-cudart shared
每NVCC呼叫的建議某處結果在一個不同的錯誤:
warning: Cuda API error detected: cudaFuncGetAttributes returned (0x8)
terminate called after throwing an instance of 'thrust::system::system_error'
what(): function_attributes(): after cudaFuncGetAttributes: invalid device function
Program received signal SIGABRT, Aborted.
0x000000313c432625 in raise() from /lib64/libc.so.6
(cuda-gdb) bt
#0 0x000000313c432625 in raise() from /lib64/libc.so.6
#1 0x000000313c433e05 in abort() from /lib64/libc.so.6
#2 0x00000031430bea7d in __gnu_cxx::__verbose_terminate_handler()() from /usr/lib64/libstdc++.so.6
#3 0x00000031430bcbd6 in std::set_unexpected(void (*)())() from /usr/lib64/libstdc++.so.6
#4 0x00000031430bcc03 in std::terminate()() from /usr/lib64/libstdc++.so.6
#5 0x00000031430bcc86 in __cxa_rethrow() from /usr/lib64/libstdc++.so.6
#6 0x00007ffff7d600eb in thrust::detail::vector_base<int, thrust::device_malloc_allocator<int> >::append(unsigned long)() from ./libkernel.so
#7 0x00007ffff7d5f740 in thrust::detail::vector_base<int, thrust::device_malloc_allocator<int> >::resize(unsigned long)() from ./libkernel.so
#8 0x00007ffff7d5b19a in Kernel::callKernel()() from ./libkernel.so
#9 0x00000000004006f8 in main()
的CMakeLists.txt:請調整你的環境
cmake_minimum_required(VERSION 2.6.2)
project(Cuda-project)
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/CMake/cuda" ${CMAKE_MODULE_PATH})
SET(CUDA_TOOLKIT_ROOT_DIR "/opt/cuda/current")
SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_52,code=sm_52)
find_package(CUDA REQUIRED)
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
set(CUDA_SEPARABLE_COMPILATION ON)
set(BUILD_SHARED_LIBS ON)
list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC)
CUDA_ADD_LIBRARY(kernel
kernel.cu
)
CUDA_ADD_LIBRARY(kernel2
kernel2.cu
)
cuda_add_executable(rdctest main.cpp)
TARGET_LINK_LIBRARIES(rdctest kernel kernel2 cudadevrt)
關於我的系統:
Fedora 23
kernel: 4.4.2-301.fc23.x86_64
Nvidia Driver: 361.28
Nvidia Toolkit: 7.5.18
g++: g++ (GCC) 5.3.1 20151207 (Red Hat 5.3.1-2)
轉載於:
CentOS release 6.7 (Final)
Kernel: 2.6.32-573.8.1.el6.x86_64
Nvidia Driver: 352.55
Nvidia Toolkit: 7.5.18
g++ (GCC) 4.4.7 20120313 (Red Hat 4.4.7-16)
glibc 2.12
cmake to 3.5
的fedora 23和g ++ 5.3.1爲[不適合CUDA 7.5正式支持的環境](HTTP://docs.nvidia .com/cuda/cuda-installation-guide-linux/index.html#system-requirements) –
@RobertCrovella感謝您的輸入。我在一個支持的系統上重現了這個問題並更新了這個問題。 – estefan
相關討論[這裏](https://groups.google.com/forum/#!topic/thrust-users/LJ8vPiY6-78)。 –