CUDA＆在SIGSEV多個共享對象導致registerEntryFunction

我試圖在兩個共享庫運行重定位裝置代碼，無論使用CUDA的推力RDC＆推力。如果我停止在kernel.cu中使用推力，一切運行良好，這不是一個選項。CUDA＆在SIGSEV多個共享對象導致registerEntryFunction

編輯：如果rdc被禁用，程序也可以工作。對我來說也不是一種選擇。

它編譯正常，但在運行時停止並出現段錯誤。 gdb告訴我這個：

Program received signal SIGSEGV, Segmentation fault. 
0x0000000000422cc8 in cudart::globalState::registerEntryFunction(void**, char const*, char*, char const*, int, uint3*, uint3*, dim3*, dim3*, int*)() 
(cuda-gdb) bt 
#0 0x0000000000422cc8 in cudart::globalState::registerEntryFunction(void**, char const*, char*, char const*, int, uint3*, uint3*, dim3*, dim3*, int*)() 
#1 0x000000000040876c in __cudaRegisterFunction() 
#2 0x0000000000402b58 in __nv_cudaEntityRegisterCallback(void**)() 
#3 0x00007ffff75051a3 in __cudaRegisterLinkedBinary(__fatBinC_Wrapper_t const*, void (*)(void**), void*)() 
from /home/mindoms/rdctestmcsimple/libkernel.so 
#4 0x00007ffff75050b1 in __cudaRegisterLinkedBinary_66_tmpxft_00007a5f_00000000_16_cuda_device_runtime_ compute_52_cpp1_ii_8b1a5d37() from /home/user/rdctestmcsimple/libkernel.so 
#5 0x000000000045285d in __libc_csu_init() 
#6 0x00007ffff65ea50f in __libc_start_main() from /lib64/libc.so.6

這裏是我剝去的示例（使用cmake），顯示錯誤。

main.cpp中：

#include "kernel.cuh" 
#include "kernel2.cuh" 

int main(){ 
    Kernel k; 
    k.callKernel(); 

    Kernel2 k2; 
    k2.callKernel2(); 
}

kernel.cuh：

#ifndef __KERNEL_CUH__ 
#define __KERNEL_CUH__ 
    class Kernel{ 
    public: 
    void callKernel(); 
    }; 
#endif

kernel.cu：

#include "kernel.cuh" 
#include <stdio.h> 
#include <iostream> 
#include <thrust/device_vector.h> 

__global__ 
void thekernel(int *data){ 
    if (threadIdx.x == 0) 
    printf("the kernel says hello\n"); 

    data[threadIdx.x] = threadIdx.x * 2; 
} 

void Kernel::callKernel(){ 

    thrust::device_vector<int> D2; 
    D2.resize(11); 
    int * raw_ptr = thrust::raw_pointer_cast(&D2[0]); 

    printf("Kernel::callKernel called\n"); 
    thekernel <<< 1, 10 >>> (raw_ptr); 

    cudaThreadSynchronize(); 
    cudaError_t code = cudaGetLastError(); 
    if (code != cudaSuccess) { 
    std::cout << "Cuda error: " << cudaGetErrorString(code) << " after callKernel!" << std::endl; 
    } 

    for (int i = 0; i < D2.size(); i++) 
    std::cout << "Kernel D[" << i << "]=" << D2[i] << std::endl; 
}

kernel2.cuh：

#ifndef __KERNEL2_CUH__ 
#define __KERNEL2_CUH__ 
    class Kernel2{ 
    public: 
    void callKernel2(); 
    }; 
#endif

kernel2.cu

#include "kernel2.cuh" 
#include <stdio.h> 
#include <iostream> 
#include <thrust/device_vector.h> 

__global__ 
void thekernel2(int *data2){ 
    if (threadIdx.x == 0) 
    printf("the kernel2 says hello\n"); 

    data2[threadIdx.x] = threadIdx.x * 2; 
} 

void Kernel2::callKernel2(){ 
    thrust::device_vector<int> D; 
    D.resize(11); 
    int * raw_ptr = thrust::raw_pointer_cast(&D[0]); 
    printf("Kernel2::callKernel2 called\n"); 
    thekernel2 <<< 1, 10 >>> (raw_ptr); 

    cudaThreadSynchronize(); 
    cudaError_t code = cudaGetLastError(); 
    if (code != cudaSuccess) { 
    std::cout << "Cuda error: " << cudaGetErrorString(code) << " after callKernel2!" << std::endl; 
} 

    for (int i = 0; i < D.size(); i++) 
    std::cout << "Kernel2 D[" << i << "]=" << D[i] << std::endl; 
}

CMake的文件下的最初使用，但我得到了同樣的問題，當我編譯「手動」：

nvcc -arch=sm_35 -Xcompiler -fPIC -dc kernel2.cu 
nvcc -arch=sm_35 -shared -Xcompiler -fPIC kernel2.o -o libkernel2.so 
nvcc -arch=sm_35 -Xcompiler -fPIC -dc kernel.cu 
nvcc -arch=sm_35 -shared -Xcompiler -fPIC kernel.o -o libkernel.so 
g++ -o main main.cpp libkernel.so libkernel2.so -L/opt/cuda/current/lib64

添加-cudart shared每NVCC呼叫的建議某處結果在一個不同的錯誤：

warning: Cuda API error detected: cudaFuncGetAttributes returned (0x8) 

terminate called after throwing an instance of 'thrust::system::system_error' 
    what(): function_attributes(): after cudaFuncGetAttributes: invalid device function 

Program received signal SIGABRT, Aborted. 
0x000000313c432625 in raise() from /lib64/libc.so.6 
(cuda-gdb) bt 
#0 0x000000313c432625 in raise() from /lib64/libc.so.6 
#1 0x000000313c433e05 in abort() from /lib64/libc.so.6 
#2 0x00000031430bea7d in __gnu_cxx::__verbose_terminate_handler()() from /usr/lib64/libstdc++.so.6 
#3 0x00000031430bcbd6 in std::set_unexpected(void (*)())() from /usr/lib64/libstdc++.so.6 
#4 0x00000031430bcc03 in std::terminate()() from /usr/lib64/libstdc++.so.6 
#5 0x00000031430bcc86 in __cxa_rethrow() from /usr/lib64/libstdc++.so.6 
#6 0x00007ffff7d600eb in thrust::detail::vector_base<int, thrust::device_malloc_allocator<int> >::append(unsigned long)() from ./libkernel.so 
#7 0x00007ffff7d5f740 in thrust::detail::vector_base<int, thrust::device_malloc_allocator<int> >::resize(unsigned long)() from ./libkernel.so 
#8 0x00007ffff7d5b19a in Kernel::callKernel()() from ./libkernel.so 
#9 0x00000000004006f8 in main()

的CMakeLists.txt：請調整你的環境

cmake_minimum_required(VERSION 2.6.2) 

project(Cuda-project) 

set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/CMake/cuda" ${CMAKE_MODULE_PATH}) 

SET(CUDA_TOOLKIT_ROOT_DIR "/opt/cuda/current") 

SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_52,code=sm_52) 

find_package(CUDA REQUIRED) 
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64) 

set(CUDA_SEPARABLE_COMPILATION ON) 

set(BUILD_SHARED_LIBS ON) 

list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC) 

CUDA_ADD_LIBRARY(kernel 
    kernel.cu 
) 

CUDA_ADD_LIBRARY(kernel2 
    kernel2.cu 
) 

cuda_add_executable(rdctest main.cpp) 
TARGET_LINK_LIBRARIES(rdctest kernel kernel2 cudadevrt)

關於我的系統：

Fedora 23 
kernel: 4.4.2-301.fc23.x86_64 
Nvidia Driver: 361.28 
Nvidia Toolkit: 7.5.18 
g++: g++ (GCC) 5.3.1 20151207 (Red Hat 5.3.1-2)

轉載於：

CentOS release 6.7 (Final) 
Kernel: 2.6.32-573.8.1.el6.x86_64 
Nvidia Driver: 352.55 
Nvidia Toolkit: 7.5.18 
g++ (GCC) 4.4.7 20120313 (Red Hat 4.4.7-16) 
glibc 2.12 
cmake to 3.5

來源

2016-05-30 estefan

的fedora 23和g ++ 5.3.1爲[不適合CUDA 7.5正式支持的環境]（HTTP：//docs.nvidia .com/cuda/cuda-installation-guide-linux/index.html＃system-requirements） –

@RobertCrovella感謝您的輸入。我在一個支持的系統上重現了這個問題並更新了這個問題。 – estefan

相關討論[這裏]（https://groups.google.com/forum/#!topic/thrust-users/LJ8vPiY6-78）。 –

顯然，這已經是與使用什麼CUDA運行時：共享或靜態的。

我稍微修改您的例子：與其建造兩個共享庫，並將其鏈接到單獨的可執行文件，我創建連接在一起一個共享庫中有兩個靜態庫，以及一個鏈接到可執行文件。

而且，這裏是使用新的（> = 3.8）原生CUDA語言支持更新的CMake的文件。

cmake_minimum_required(VERSION 3.8) 
project (CudaSharedThrust CXX CUDA) 

string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_61,code=compute_61") 

if(BUILD_SHARED_LIBS) 
    set(CMAKE_POSITION_INDEPENDENT_CODE ON) 
endif() 

add_library(kernel STATIC kernel.cu) 
set_target_properties(kernel PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 

add_library(kernel2 STATIC kernel2.cu) 
set_target_properties(kernel2 PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 

add_library(allkernels empty.cu) # empty.cu is an empty file 
set_target_properties(allkernels PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 
target_link_libraries(allkernels kernel kernel2) 


add_executable(rdctest main.cpp) 
set_target_properties(rdctest PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 
target_link_libraries(rdctest allkernels)

沒有任何CMake標誌（靜態構建），構建成功，程序工作。

與-DBUILD_SHARED_LIBS=ON的基礎上，程序編譯，但它崩潰與同樣的錯誤是你的。

樓內有

cmake .. -DBUILD_SHARED_LIBS=ON -DCMAKE_CUDA_FLAGS:STRING="--cudart shared"

編譯，實際上使得它跑了！因此，出於某種原因，共享CUDA運行時對於這類事情是必需的。

另請注意，從1個SO中的2個SO's - > 2個靜態庫是必要的，否則程序會崩潰，出現hrust::system::system_error。

然而這是預期的，因爲NVCC設備期間實際忽略共享對象文件鏈接：http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#libraries

來源

2017-12-14 12:25:16

CUDA＆在SIGSEV多個共享對象導致registerEntryFunction

回答

相關問題