2013-03-20 128 views
1

我試圖編譯一個基本的CUDA矩陣乘法程序,但我遇到了此問題:CUDA的Makefile NVCC錯誤

nvcc -I. -I/usr/local/cuda/include -c matrixMult1.cu -o matrixMult1.o 
make: nvcc: Command not found 
make: *** [matrixMult1.o] Error 127 

我最初得到另一個錯誤,並建議我使用NVCC ,唯一的解釋是我對nvcc一無所知。任何人有想法?提前致謝!

的Makefile:

​​

內核:

//******************************************************************** 
// matrixMul_kernel.cu 
// 
// Kernel for a basic matrix multiplication program. 
//******************************************************************** 

#ifndef _MATRIXMUL_KERNEL_H_ 
#define _MATRIXMUL_KERNEL_H_ 

#include <stdio.h> 

/* Thread block size */ 
#define BLOCK_SIZE 3 

#define WA 3 
#define HA 3 
#define WB 3 
#define HB WA 
#define WC WB 
#define HC HA 

/* CUDA Kernel */ 
__global__ void matrixMul (float * C, float * A, float * B, int wA, 
       int wB) { 

    /* Two dimensional thread ID */ 
    int tx = threadIdx.x; 
    int ty = threadIdx.y; 

    /* Computation holder variable */ 
    float value = 0; 

    /* Loop through row of A and column of B to compute cell of C */ 
    for (int i = 0; i < wA; ++i) { 
    float elementA = A[ty * wA + i]; 
    float elementB = B[i * wB + tx]; 
    value += elementA * elementB; 
    } 

    /* Write the result to C */ 
    C[ty * wA + tx] = value; 
} 

#endif 

主要課程:

//******************************************************************** 
// matrixMult1.c 
// 
// A basic matrix multiplication program. 
//******************************************************************** 

#include <stdlib.h> 
#include <stdio.h> 
#include <math.h> 
#include <matrixMul_kernel.cu> 

#define WA 3 
#define HA 3 
#define WB 3 
#define HB WA 
#define WC WB 
#define HC HA 

void initMatrix(float * matrix, int numIndices); 

//************* 
// Main Program 
//************* 
int main(int argc, char** argv) { 

    /* Set random seed */ 
    srand(2013); 

    /* Compute memory sizes for matrices A, B, and C */ 
    unsigned int sizeA = WA * HA; 
    unsigned int sizeB = WB * HB; 
    unsigned int sizeC = WC * HC; 
    unsigned int memoryA = sizeof(float) * sizeA; 
    unsigned int memoryB = sizeof(float) * sizeB; 
    unsigned int memoryC = sizeof(float) * sizeC; 

    /* Allocate memory for matrices A, B, and C */ 
    float * matrixA = (float *) malloc(memoryA); 
    float * matrixB = (float *) malloc(memoryB); 
    float * matrixC = (float *) malloc(memoryC); 

    /* Initialize matrices A and B */ 
    initMatrix(matrixA, sizeA); 
    initMatrix(matrixB, sizeB); 

    /* Print matrix A */ 
    printf("\nMatrix A:\n"); 
    for (int i = 0; i < sizeA; i++) { 
     printf("%f ", matrixA[i]); 

     if (((i + 1) % WA) == 0) { 
      printf("\n"); 
     } else { 
      printf(" | "); 
     } 
    } 

    /* Print matrix B */ 
    printf("\nMatrix B:\n"); 
    for (int i = 0; i < sizeB; i++) { 
     printf("%f ", matrixB[i]); 

     if (((i + 1) % WA) == 0) { 
      printf("\n"); 
     } else { 
      printf(" | "); 
     } 
    } 

    /* Allocate device memory */ 
    float* deviceMemA; 
    float* deviceMemB; 
    float* deviceMemC; 
    cudaMalloc((void**) &deviceMemA, memoryA); 
    cudaMalloc((void**) &deviceMemB, memoryB); 
    cudaMalloc((void**) &deviceMemC, memoryC); 

    /* Copy host memory to device */ 
    cudaMemcpy(deviceMemA, matrixA, memoryA, 
      cudaMemcpyHostToDevice); 
    cudaMemcpy(deviceMemB, matrixB, memoryB, 
      cudaMemcpyHostToDevice); 

    dim3 threads(BLOCK_SIZE, BLOCK_SIZE); 
    dim3 grid(WC/threads.x, HC/threads.y); 

    /* Execute kernel */ 
    matrixMul<<< grid, threads >>>(deviceMemC, deviceMemA, 
        deviceMemB, WA, WB); 

    cudaMemcpy(deviceMemC, matrixC, memoryC, 
      cudaMemcpyHostToDevice); 

    /* Print matrix C */ 
    printf("\nMatrix C:\n"); 
    for (int i = 0; i < sizeC; i++) { 
     printf("%f ", matrixC[i]); 

     if (((i + 1) % WC) == 0) { 
      printf("\n"); 
     } else { 
      printf(" | "); 
     } 
    } 
    printf("\n"); 

    /* Free up memory */ 
    free(matrixA); 
    free(matrixB); 
    free(matrixC); 
    cudaFree(deviceMemA); 
    cudaFree(deviceMemB); 
    cudaFree(deviceMemC); 
} 

//-------------------------------------------------------------------- 
// initMatrix - Assigns a random float value to each indice of the 
//    matrix. 
// 
// PRE: matrix is a pointer to a block of bytes in memory; numIndices 
//  is the number of indicies in the matrix being instantiated. 
// POST: Each index of the matrix has been instantiated with a random 
//  float value. 
//-------------------------------------------------------------------- 
void initMatrix(float * matrix, int numIndices) { 

    /* 
    Loop through the block of bytes, assigning a random float 
    for each index of the matrix 
    */ 
    for (int i = 0; i < numIndices; ++i) { 

     /* Assign a random float between 0 and 1 at this byte */ 
     matrix[i] = rand()/(float)RAND_MAX; 
    } 
} 

回答

1

此錯誤:

nvcc: Command not found 

表示nvcc不在您的shell的PATH中。

爲了解決這個問題,假設它是bash或類似:

PATH=$PATH:/usr/local/cuda/bin 
make 

...或者將其添加到系統或用戶的配置文件。