2016-05-30 65 views
2

我試圖在兩個共享庫運行重定位裝置代碼,無論使用CUDA的推力RDC&推力。如果我停止在kernel.cu中使用推力,一切運行良好,這不是一個選項。CUDA&在SIGSEV多個共享對象導致registerEntryFunction

編輯:如果rdc被禁用,程序也可以工作。對我來說也不是一種選擇。

它編譯正常,但在運行時停止並出現段錯誤。 gdb告訴我這個:

Program received signal SIGSEGV, Segmentation fault. 
0x0000000000422cc8 in cudart::globalState::registerEntryFunction(void**, char const*, char*, char const*, int, uint3*, uint3*, dim3*, dim3*, int*)() 
(cuda-gdb) bt 
#0 0x0000000000422cc8 in cudart::globalState::registerEntryFunction(void**, char const*, char*, char const*, int, uint3*, uint3*, dim3*, dim3*, int*)() 
#1 0x000000000040876c in __cudaRegisterFunction() 
#2 0x0000000000402b58 in __nv_cudaEntityRegisterCallback(void**)() 
#3 0x00007ffff75051a3 in __cudaRegisterLinkedBinary(__fatBinC_Wrapper_t const*, void (*)(void**), void*)() 
from /home/mindoms/rdctestmcsimple/libkernel.so 
#4 0x00007ffff75050b1 in __cudaRegisterLinkedBinary_66_tmpxft_00007a5f_00000000_16_cuda_device_runtime_ compute_52_cpp1_ii_8b1a5d37() from /home/user/rdctestmcsimple/libkernel.so 
#5 0x000000000045285d in __libc_csu_init() 
#6 0x00007ffff65ea50f in __libc_start_main() from /lib64/libc.so.6 

這裏是我剝去的示例(使用cmake),顯示錯誤。

main.cpp中:

#include "kernel.cuh" 
#include "kernel2.cuh" 

int main(){ 
    Kernel k; 
    k.callKernel(); 

    Kernel2 k2; 
    k2.callKernel2(); 
} 

kernel.cuh:

#ifndef __KERNEL_CUH__ 
#define __KERNEL_CUH__ 
    class Kernel{ 
    public: 
    void callKernel(); 
    }; 
#endif 

kernel.cu:

#include "kernel.cuh" 
#include <stdio.h> 
#include <iostream> 
#include <thrust/device_vector.h> 

__global__ 
void thekernel(int *data){ 
    if (threadIdx.x == 0) 
    printf("the kernel says hello\n"); 

    data[threadIdx.x] = threadIdx.x * 2; 
} 

void Kernel::callKernel(){ 

    thrust::device_vector<int> D2; 
    D2.resize(11); 
    int * raw_ptr = thrust::raw_pointer_cast(&D2[0]); 

    printf("Kernel::callKernel called\n"); 
    thekernel <<< 1, 10 >>> (raw_ptr); 

    cudaThreadSynchronize(); 
    cudaError_t code = cudaGetLastError(); 
    if (code != cudaSuccess) { 
    std::cout << "Cuda error: " << cudaGetErrorString(code) << " after callKernel!" << std::endl; 
    } 

    for (int i = 0; i < D2.size(); i++) 
    std::cout << "Kernel D[" << i << "]=" << D2[i] << std::endl; 
} 

kernel2.cuh:

#ifndef __KERNEL2_CUH__ 
#define __KERNEL2_CUH__ 
    class Kernel2{ 
    public: 
    void callKernel2(); 
    }; 
#endif 

kernel2.cu

#include "kernel2.cuh" 
#include <stdio.h> 
#include <iostream> 
#include <thrust/device_vector.h> 

__global__ 
void thekernel2(int *data2){ 
    if (threadIdx.x == 0) 
    printf("the kernel2 says hello\n"); 

    data2[threadIdx.x] = threadIdx.x * 2; 
} 

void Kernel2::callKernel2(){ 
    thrust::device_vector<int> D; 
    D.resize(11); 
    int * raw_ptr = thrust::raw_pointer_cast(&D[0]); 
    printf("Kernel2::callKernel2 called\n"); 
    thekernel2 <<< 1, 10 >>> (raw_ptr); 

    cudaThreadSynchronize(); 
    cudaError_t code = cudaGetLastError(); 
    if (code != cudaSuccess) { 
    std::cout << "Cuda error: " << cudaGetErrorString(code) << " after callKernel2!" << std::endl; 
} 

    for (int i = 0; i < D.size(); i++) 
    std::cout << "Kernel2 D[" << i << "]=" << D[i] << std::endl; 
} 

CMake的文件下的最初使用,但我得到了同樣的問題,當我編譯「手動」:

nvcc -arch=sm_35 -Xcompiler -fPIC -dc kernel2.cu 
nvcc -arch=sm_35 -shared -Xcompiler -fPIC kernel2.o -o libkernel2.so 
nvcc -arch=sm_35 -Xcompiler -fPIC -dc kernel.cu 
nvcc -arch=sm_35 -shared -Xcompiler -fPIC kernel.o -o libkernel.so 
g++ -o main main.cpp libkernel.so libkernel2.so -L/opt/cuda/current/lib64 

添加-cudart shared每NVCC呼叫的建議某處結果在一個不同的錯誤:

warning: Cuda API error detected: cudaFuncGetAttributes returned (0x8) 

terminate called after throwing an instance of 'thrust::system::system_error' 
    what(): function_attributes(): after cudaFuncGetAttributes: invalid device function 

Program received signal SIGABRT, Aborted. 
0x000000313c432625 in raise() from /lib64/libc.so.6 
(cuda-gdb) bt 
#0 0x000000313c432625 in raise() from /lib64/libc.so.6 
#1 0x000000313c433e05 in abort() from /lib64/libc.so.6 
#2 0x00000031430bea7d in __gnu_cxx::__verbose_terminate_handler()() from /usr/lib64/libstdc++.so.6 
#3 0x00000031430bcbd6 in std::set_unexpected(void (*)())() from /usr/lib64/libstdc++.so.6 
#4 0x00000031430bcc03 in std::terminate()() from /usr/lib64/libstdc++.so.6 
#5 0x00000031430bcc86 in __cxa_rethrow() from /usr/lib64/libstdc++.so.6 
#6 0x00007ffff7d600eb in thrust::detail::vector_base<int, thrust::device_malloc_allocator<int> >::append(unsigned long)() from ./libkernel.so 
#7 0x00007ffff7d5f740 in thrust::detail::vector_base<int, thrust::device_malloc_allocator<int> >::resize(unsigned long)() from ./libkernel.so 
#8 0x00007ffff7d5b19a in Kernel::callKernel()() from ./libkernel.so 
#9 0x00000000004006f8 in main() 

的CMakeLists.txt:請調整你的環境

cmake_minimum_required(VERSION 2.6.2) 

project(Cuda-project) 

set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/CMake/cuda" ${CMAKE_MODULE_PATH}) 

SET(CUDA_TOOLKIT_ROOT_DIR "/opt/cuda/current") 

SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_52,code=sm_52) 

find_package(CUDA REQUIRED) 
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64) 

set(CUDA_SEPARABLE_COMPILATION ON) 

set(BUILD_SHARED_LIBS ON) 

list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC) 

CUDA_ADD_LIBRARY(kernel 
    kernel.cu 
) 

CUDA_ADD_LIBRARY(kernel2 
    kernel2.cu 
) 

cuda_add_executable(rdctest main.cpp) 
TARGET_LINK_LIBRARIES(rdctest kernel kernel2 cudadevrt) 

關於我的系統:

Fedora 23 
kernel: 4.4.2-301.fc23.x86_64 
Nvidia Driver: 361.28 
Nvidia Toolkit: 7.5.18 
g++: g++ (GCC) 5.3.1 20151207 (Red Hat 5.3.1-2) 

轉載於:

CentOS release 6.7 (Final) 
Kernel: 2.6.32-573.8.1.el6.x86_64 
Nvidia Driver: 352.55 
Nvidia Toolkit: 7.5.18 
g++ (GCC) 4.4.7 20120313 (Red Hat 4.4.7-16) 
glibc 2.12 
cmake to 3.5 
+1

的fedora 23和g ++ 5.3.1爲[不適合CUDA 7.5正式支持的環境](HTTP://docs.nvidia .com/cuda/cuda-installation-guide-linux/index.html#system-requirements) –

+0

@RobertCrovella感謝您的輸入。我在一個支持的系統上重現了這個問題並更新了這個問題。 – estefan

+0

相關討論[這裏](https://groups.google.com/forum/#!topic/thrust-users/LJ8vPiY6-78)。 –

回答

0

顯然,這已經是與使用什麼CUDA運行時:共享或靜態的。

我稍微修改您的例子:與其建造兩個共享庫,並將其鏈接到單獨的可執行文件,我創建連接在一起一個共享庫中有兩個靜態庫,以及一個鏈接到可執行文件。

而且,這裏是使用新的(> = 3.8)原生CUDA語言支持更新的CMake的文件。

cmake_minimum_required(VERSION 3.8) 
project (CudaSharedThrust CXX CUDA) 

string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_61,code=compute_61") 

if(BUILD_SHARED_LIBS) 
    set(CMAKE_POSITION_INDEPENDENT_CODE ON) 
endif() 

add_library(kernel STATIC kernel.cu) 
set_target_properties(kernel PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 

add_library(kernel2 STATIC kernel2.cu) 
set_target_properties(kernel2 PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 

add_library(allkernels empty.cu) # empty.cu is an empty file 
set_target_properties(allkernels PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 
target_link_libraries(allkernels kernel kernel2) 


add_executable(rdctest main.cpp) 
set_target_properties(rdctest PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 
target_link_libraries(rdctest allkernels) 

沒有任何CMake標誌(靜態構建),構建成功,程序工作。

-DBUILD_SHARED_LIBS=ON的基礎上,程序編譯,但它崩潰與同樣的錯誤是你的。

樓內有

cmake .. -DBUILD_SHARED_LIBS=ON -DCMAKE_CUDA_FLAGS:STRING="--cudart shared" 

編譯,實際上使得它跑了!因此,出於某種原因,共享CUDA運行時對於這類事情是必需的。

另請注意,從1個SO中的2個SO's - > 2個靜態庫是必要的,否則程序會崩潰,出現hrust::system::system_error

然而這是預期的,因爲NVCC設備期間實際忽略共享對象文件鏈接:http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#libraries