大的差異取決於地方的內存分配

我有一個Matlab MEX功能，這使得重複調用名爲calculate(). C函數我做的函數的兩個版本：大的差異取決於地方的內存分配

版本A：每次mex()調用calculate()，它只傳遞輸入參數，並且calculate()所需的所有內存在calculate()內分配並釋放 - 每次！

版本B：calculate()所需的內存分配在mex()的開頭，並且指針傳遞給calculate()。內存僅在mex()結束時釋放。換句話說，分配/釋放只對整個業務進行一次。

根據內存分配需要時間的觀念，我根據天真的假設創建了B版，這應該會提高速度。但它實際上增加了約5倍的執行時間！那裏發生了什麼？

下面是版本B--實際的代碼，你可以從那裏我本來的版本A.

/* MEX business*/ 
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){ 
    /* mexPrintf("Inside the mex function of CalcMz2.\n"); 
mexEvalString("drawnow;");*/ 

/*declare local variables*/ 
struct InputParameters voxelPars; 
InputParameters *point2pars; 
struct Zcontrast contrast; 
struct Zcontrast* ptr2contrast; 
ptr2contrast = &contrast; 
mxArray *in, *out; 
mxArray *temp; 
double *output; 
int nFields; 
int i,j,k; 

in = mxDuplicateArray(prhs[0]); /*'in' now is a copy of the input struct*/ 
nFields = mxGetNumberOfFields(prhs[0]); 
if (nFields != 39) mexPrintf("Error: the number of fields in the input struct is incorrect.\n"); 

//associate outputs 
    out = plhs[0] = mxCreateDoubleScalar(0.0); 
    output = mxGetPr(out); 

    /*Passed variables memory allocation*/ 
double **pntrA; 
pntrA = (double **)mxCalloc(18, sizeof(double*)); 
for (i=0; i <18; i++){ 
    pntrA[i] = (double *) mxCalloc(18, sizeof(double)); 
} 
double *pntrAinvB; 
pntrAinvB = (double *) mxCalloc(18, sizeof(double)); 


/*used by MatrInv()*/ 
double **CopyOfMatrix; 
CopyOfMatrix = (double**)mxCalloc(18, sizeof(double*)); 
for (i=0; i<18; i++){ 
    CopyOfMatrix[i] = (double*)mxCalloc(18, sizeof(double)); 
    } 

double *vector; 
double *col; 
int *indx; 
vector = (double *)mxCalloc(18, sizeof(double)); 
indx = (int *)mxCalloc(18, sizeof(int)); 
col = (double *)mxCalloc(18, sizeof(double)); 

/*used by CalculateY()*/ 
double *Aat; 
Aat = (double*)mxCalloc(18*18, sizeof(double)); 

double *Aate; 
Aate = (double*)mxCalloc(18*18, sizeof(double)); 

double *sum; 
sum = (double*)mxCalloc(18, sizeof(double)); 
double *product; 
product = (double*)mxCalloc(18, sizeof(double)); 

/*Reassign values passed from input struct*/ 
temp = mxGetFieldByNumber(prhs[0],0,0); 
voxelPars.ampl = mxGetPr(temp); 
temp = mxGetFieldByNumber(prhs[0],0,1); 
voxelPars.phi = mxGetPr(temp); 
temp = mxGetFieldByNumber(prhs[0],0,2); 
voxelPars.count1 = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,3); 
voxelPars.pw1 = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,4); 
voxelPars.b1 = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,5); 
voxelPars.puloffsetppm = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,6); 
voxelPars.pw1dc = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,7); 
voxelPars.cf = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,8); 
voxelPars.M0w = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,9); 
voxelPars.T1a = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,10); 
voxelPars.T2a = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,11); 
voxelPars.offsetappm = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,12); 
voxelPars.bwfraction = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,13); 
voxelPars.M0a = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,14); 
voxelPars.M0bw = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,15); 
voxelPars.T1bw = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,16); 
voxelPars.T2bw = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,17); 
voxelPars.offsetbwppm = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,18); 
voxelPars.exratebw = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,19); 
voxelPars.M0b = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,20); 
voxelPars.T1b = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,21); 
voxelPars.T2b = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,22); 
voxelPars.offsetbppm = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,23); 
voxelPars.exrateb = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,24); 
voxelPars.M0c = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,25); 
voxelPars.T1c = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,26); 
voxelPars.T2c = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,27); 
voxelPars.offsetcppm = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,28); 
voxelPars.exratec = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,29); 
voxelPars.M0d = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,30); 
voxelPars.T1d = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,31); 
voxelPars.T2d = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,32); 
voxelPars.offsetdppm = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,33); 
voxelPars.exrated = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,34); 
voxelPars.M0e = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,35); 
voxelPars.T1e = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,36); 
voxelPars.T2e = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,37); 
voxelPars.offseteppm = mxGetScalar(temp); 
temp = mxGetFieldByNumber(prhs[0],0,38); 
voxelPars.exratee = mxGetScalar(temp); 

point2pars = &voxelPars; 


/*Initialize contrast's values to zero*/ 
ptr2contrast->SimMza = ptr2contrast->SimMzb = ptr2contrast->contrastz = 1.0; 

/* call calculate()*/ 
calculate(point2pars, ptr2contrast, pntrA, pntrAinvB, CopyOfMatrix,vector, col, indx, Aat, Aate, sum, product); 
*output = ptr2contrast->contrastz; 
//mexPrintf("The contrast has been calculated as :%f.\n",  ptr2contrast->contrastz); 


/*Free memory 
mxFree(pntrA); mxFree(pntrAinvB);*/ 
mxFree(CopyOfMatrix); mxFree(Aat); mxFree(Aate); 
mxFree(vector);mxFree(indx);mxFree(col); 
mxFree(sum); mxFree(product); 
}//end mex function 



void calculate (struct InputParameters *voxelPars1, struct Zcontrast  *contrast, double **pntrA,double *pntrAinvB, double **CopyOfMatrix, double *vector, double *col,int *indx, double *Aat, double *Aate, double *sum, double *product){ 

/*Local variables*/ 
int i, j, k, n1, m; 
double pwms, pw1delay; 
int npul; 
double timeStepSize; 
double W; 
double Wa, Wbw, Wb, Wc, Wd, We; 
double Cbw, Cb, Cc, Cd, Ce; 
double M0a, M0bw, M0b, M0c, M0d, M0e; //declaring these as locals just to make life easier 
double pa, pbw, pb, pc, pd, pe; 
double Cabw, Cab, Cac, Cad, Cae; 
double k1a, k1bw, k1b, k1c, k1d, k1e; 
double k2a, k2bw, k2b, k2c, k2d, k2e; 

//  double **pntrA; 
//  pntrA = (double **)mxCalloc(18, sizeof(double*)); 
//  for (i=0; i <18; i++){ 
//   pntrA[i] = (double *) mxCalloc(18, sizeof(double)); 


    //  } 
    //   
    double AdinvB[18]; 
    double AinvB[18]; 
    for(i=0; i<18; i++){ 
      AdinvB[i] = 0.0; 
     AinvB[i] = 0.0; 
    } 
//  
// /*Passed variables memory allocation*/ 
//  double **CopyOfMatrix; /*used by MatriInv()*/ 
//  CopyOfMatrix = (double**)mxCalloc(18, sizeof(double*)); 
//  for (i=0; i<18; i++){ 
//   CopyOfMatrix[i] = (double*)mxCalloc(18, sizeof(double)); 
//  } 
//  
//  double *vector; 
//  double *col; 
//  int *indx; 
//  vector = (double *)mxCalloc(18, sizeof(double)); 
//  indx = (int *)mxCalloc(18, sizeof(int)); 
//  col = (double *)mxCalloc(18, sizeof(double)); 
//  
//  /*used by CalculateY()*/ 
//  double *Aat; 
//  Aat = (double*)mxCalloc(18*18, sizeof(double)); 
// /* for (i=0; i<18; i++){ 
//   Aat[i] = (double*)mxCalloc(18, sizeof(double)); 
//  }*/ 
//  
//  double *Aate; 
//  Aate = (double*)mxCalloc(18*18, sizeof(double)); 
// /* for (i=0; i<18; i++){ 
//   Aate[i] = (double*)mxCalloc(18, sizeof(double)); 
//  } */ 
//  
//  double *sum; 
//  sum = (double*)mxCalloc(18, sizeof(double)); 
//  double *product; 
//  product = (double*)mxCalloc(18, sizeof(double)); 

    double W1[voxelPars1->count1]; //Now THIS might require a pointer, since count1 is only passed in the argument struct. We'll see. 
    double W1x[voxelPars1->count1]; 
    double W1y[voxelPars1->count1]; 
    /* mexPrintf("Declared all local variables in calculate().\n"); 
    mexEvalString("drawnow;");*/ 

    /*Fill local variables with values from input struct voxelPars*/

內存分配....計算保留後

來源

2017-09-15 Abigail Cember

你認爲這裏的代碼不相關嗎？ –

在這裏顯示你的工作，你的代碼可能有一些問題。 – krpra

去評論見我最近遇到類似的問題。雖然我對matlab內存管理設施沒有任何洞察力，但我可以說，對於我來說，切換到C本地malloc/free給了x5的大概加速。

看來，在你的情況下，也不需要管理Matlab堆上的內存（使用mxCalloc和類似的） - 因爲內存分配僅用於mex函數內部使用。建議您嘗試一下：只需用malloc，mxFree替換mxCalloc即可。

來源

2017-10-18 12:44:39

大的差異取決於地方的內存分配

回答

相關問題