2017-02-19 364 views
1

我寫了一個程序(希望)可以計算兩個信號之間的互相關。雖然我已經完成了如何執行計算的很好的搜索,但我無法弄清楚一些重要的細節。我特別關心平均計算。似乎有些算法利用整個數據集的平均值來執行每次移位(或延遲)的相關計算。換句話說,他們使用不變的平均值。我甚至發現了一些只計算一次分母的算法,將它用作其餘延遲的常量值。但是,我認爲平均值和分母都應該迭代計算,只考慮疊加範圍內的數據。因此,我爲這個程序寫了兩個版本。他們似乎在較小的延遲中產生非常相似的結果。我想知道哪一個是正確的。如何正確計算互相關?

迭代平均:

#include <stdio.h> 
#include <stdlib.h> 
#include <math.h> 

FILE *input1, *input2, *output; 
int m = 0, n = 0, t; 
float *VarA, *VarB, *Results, *Results2; 

void open_inputs_and_output(); 

void count_and_check_lines(); 

void allocate_memory(); 

void read_data(); 

void allocate_memory2(); 

void write_output(); 

int main() 
{ 
    float SumAverageA = 0, SumAverageB = 0, AverageA, AverageB, SubA, SubB, SumAB = 0, SumAs = 0, SumBs = 0, Correl; 
    int p = 0, i, delay; 

    open_inputs_and_output(); 

    count_and_check_lines(); 

    rewind(input1); 
    rewind(input2); 

    allocate_memory(); 

    read_data(); 

    fclose(input1); 
    fclose(input2); 

    printf("How many lag steps from the origin do you want to calculate?\nIf you want as many steps as the number of input points, type -1.\n"); 
    scanf("%i", &p); 

    if(p < -1) 
    { 
     printf("Bad number!\n"); 
     exit(1); 
    } 
    else if(p == -1) 
     t = n; 
    else 
     t = p; 

    allocate_memory2(); 

    printf("\nWait...\n\n"); 

    for(delay = 0; delay < t; delay++) 
    { 
     for(i = delay; i < n; i ++) 
     { 
      SumAverageA += VarA[i]; 
      SumAverageB += VarB[(i - delay)]; 
     } 

     AverageA = SumAverageA/(n - delay); 
     AverageB = SumAverageB/(n - delay); 

     for(i = delay; i < n; i++) 
     { 
      SubA = VarA[i] - AverageA; 
      SubB = VarB[(i - delay)] - AverageB; 
      SumAB += (SubA * SubB); 
      SumAs += (SubA * SubA); 
      SumBs += (SubB * SubB); 
     } 

     Correl = SumAB/(sqrt(SumAs * SumBs)); 

     Results[delay] = Correl; 

     SumAverageA = 0; 
     SumAverageB = 0; 
     SumAB = 0; 
     SumAs = 0; 
     SumBs = 0; 

     for(i = delay; i < n; i++) 
     { 
      SubB = VarB[i] - AverageB; 
      SubA = VarA[(i - delay)] - AverageA; 
      SumAB += (SubA * SubB); 
      SumAs += (SubA * SubA); 
      SumBs += (SubB * SubB); 
     } 

     Correl = SumAB/(sqrt(SumAs * SumBs)); 

     Results2[delay] = Correl; 

     SumAverageA = 0; 
     SumAverageB = 0; 
     SumAB = 0; 
     SumAs = 0; 
     SumBs = 0; 
    } 

    printf("Calculations performed.\n"); 

    free(VarA); 
    free(VarB); 

    write_output(); 

    free(Results); 
    free(Results2); 

    fclose(output); 

    return 0; 
} 

void open_inputs_and_output() 
{ 
    input1 = fopen("C:\\...\\test.txt","r"); 

    if (input1 == NULL) 
    { 
     printf("Error! Could not open input 1.\n"); 
     exit(1); 
    } 
    else 
     printf("Input1 opening: OK.\n"); 

    input2 = fopen("C:\\...\\test2.txt","r"); 

    if (input2 == NULL) 
    { 
     printf("Error! Could not open input 2.\n"); 
     exit(1); 
    } 
    else 
     printf("Input2 opening: OK.\n"); 

    output = fopen("C:\\...\\out.txt","w"); 

    if (output == NULL) 
    { 
     printf("Error! Could not open output.\n"); 
     exit(1); 
    } 
    else 
     printf("Output opening: OK.\n"); 
} 

void count_and_check_lines() 
{ 
    float k; 

    while(fscanf(input1,"%f",&k) == 1) 
     n++; 

    printf("n = %i\n", n); 

    while(fscanf(input2,"%f",&k) == 1) 
     m++; 

    printf("m = %i\n", m); 

    if(m != n) 
    { 
     printf("Error: Number of rows does not match!\n"); 
     exit(1); 
    } 
    else 
     printf("Number of rows matches.\n"); 
} 

void allocate_memory() 
{ 
    VarA = calloc(n, sizeof(float)); 

    if(VarA == NULL) 
    { 
     printf("Could not allocate memory for VarA.\n"); 
     exit(1); 
    } 

    VarB = calloc(m, sizeof(float)); 

    if(VarA == NULL) 
    { 
     printf("Could not allocate memory for VarB.\n"); 
     exit(1); 
    } 
} 

void read_data() 
{ 
    int i; 

    for(i = 0; i < n; i++) 
     fscanf(input1,"%f",&VarA[i]); 

    printf("Data A successfully read.\n"); 

    for(i = 0; i < m; i++) 
     fscanf(input2,"%f",&VarB[i]); 

    printf("Data B successfully read.\n"); 
} 

void allocate_memory2() 
{ 
    Results = calloc(t, sizeof(float)); 

    if(Results == NULL) 
    { 
     printf("Could not allocate memory for Results.\n"); 
     exit(1); 
    } 

    Results2 = calloc(t, sizeof(float)); 

    if(Results2 == NULL) 
    { 
     printf("Could not allocate memory for Results2.\n"); 
     exit(1); 
    } 
} 

void write_output() 
{ 
    int i; 

    for(i = t - 1; i > 0; i--) 
     fprintf(output,"-%i %f\n", i , Results2[i]); 

    for(i = 0; i < t; i++) 
     fprintf(output,"%i %f\n", i , Results[i]); 

    printf("Results written.\n"); 
} 

恆定平均:

#include <stdio.h> 
#include <stdlib.h> 
#include <math.h> 

FILE *input1, *input2, *output; 
int m = 0, n = 0, t; 
float *VarA, *VarB, *Results, *Results2; 

void open_inputs_and_output(); 

void count_and_check_lines(); 

void allocate_memory(); 

void read_data(); 

void allocate_memory2(); 

void write_output(); 

int main() 
{ 
    float SumAverageA = 0, SumAverageB = 0, AverageA, AverageB, SubA, SubB, SumAB = 0, SumAs = 0, SumBs = 0, Correl; 
    int p = 0, i, delay; 

    open_inputs_and_output(); 

    count_and_check_lines(); 

    rewind(input1); 
    rewind(input2); 

    allocate_memory(); 

    read_data(); 

    fclose(input1); 
    fclose(input2); 

    printf("How many lag steps from the origin do you want to calculate?\nIf you want as many steps as the number of input points, type -1.\n"); 
    scanf("%i", &p); 

    if(p < -1) 
    { 
     printf("Bad number!\n"); 
     exit(1); 
    } 
    else if(p == -1) 
     t = n; 
    else 
     t = p; 

    allocate_memory2(); 

    printf("\nWait...\n\n"); 

    for(i = 0; i < n; i ++) 
    { 
     SumAverageA += VarA[i]; 
     SumAverageB += VarB[i]; 
    } 

    AverageA = SumAverageA/n; 
    AverageB = SumAverageB/n; 

    for(delay = 0; delay < t; delay++) 
    { 
     for(i = delay; i < n; i++) 
     { 
      SubA = VarA[i] - AverageA; 
      SubB = VarB[(i - delay)] - AverageB; 
      SumAB += (SubA * SubB); 
      SumAs += (SubA * SubA); 
      SumBs += (SubB * SubB); 
     } 

     Correl = SumAB/(sqrt(SumAs * SumBs)); 

     Results[delay] = Correl; 

     SumAverageA = 0; 
     SumAverageB = 0; 
     SumAB = 0; 
     SumAs = 0; 
     SumBs = 0; 

     for(i = delay; i < n; i++) 
     { 
      SubB = VarB[i] - AverageB; 
      SubA = VarA[(i - delay)] - AverageA; 
      SumAB += (SubA * SubB); 
      SumAs += (SubA * SubA); 
      SumBs += (SubB * SubB); 
     } 

     Correl = SumAB/(sqrt(SumAs * SumBs)); 

     Results2[delay] = Correl; 

     SumAverageA = 0; 
     SumAverageB = 0; 
     SumAB = 0; 
     SumAs = 0; 
     SumBs = 0; 
    } 

    printf("Calculations performed.\n"); 

    free(VarA); 
    free(VarB); 

    write_output(); 

    free(Results); 
    free(Results2); 

    fclose(output); 

    return 0; 
} 

void open_inputs_and_output() 
{ 
    input1 = fopen("C:\\...\\test.txt","r"); 

    if (input1 == NULL) 
    { 
     printf("Error! Could not open input 1.\n"); 
     exit(1); 
    } 
    else 
     printf("Input1 opening: OK.\n"); 

    input2 = fopen("C:\\...\\test2.txt","r"); 

    if (input2 == NULL) 
    { 
     printf("Error! Could not open input 2.\n"); 
     exit(1); 
    } 
    else 
     printf("Input2 opening: OK.\n"); 

    output = fopen("C:\\...\\out.txt","w"); 

    if (output == NULL) 
    { 
     printf("Error! Could not open output.\n"); 
     exit(1); 
    } 
    else 
     printf("Output opening: OK.\n"); 
} 

void count_and_check_lines() 
{ 
    float k; 

    while(fscanf(input1,"%f",&k) == 1) 
     n++; 

    printf("n = %i\n", n); 

    while(fscanf(input2,"%f",&k) == 1) 
     m++; 

    printf("m = %i\n", m); 

    if(m != n) 
    { 
     printf("Error: Number of rows does not match!\n"); 
     exit(1); 
    } 
    else 
     printf("Number of rows matches.\n"); 
} 

void allocate_memory() 
{ 
    VarA = calloc(n, sizeof(float)); 

    if(VarA == NULL) 
    { 
     printf("Could not allocate memory for VarA.\n"); 
     exit(1); 
    } 

    VarB = calloc(m, sizeof(float)); 

    if(VarA == NULL) 
    { 
     printf("Could not allocate memory for VarB.\n"); 
     exit(1); 
    } 
} 

void read_data() 
{ 
    int i; 

    for(i = 0; i < n; i++) 
     fscanf(input1,"%f",&VarA[i]); 

    printf("Data A successfully read.\n"); 

    for(i = 0; i < m; i++) 
     fscanf(input2,"%f",&VarB[i]); 

    printf("Data B successfully read.\n"); 
} 

void allocate_memory2() 
{ 
    Results = calloc(t, sizeof(float)); 

    if(Results == NULL) 
    { 
     printf("Could not allocate memory for Results.\n"); 
     exit(1); 
    } 

    Results2 = calloc(t, sizeof(float)); 

    if(Results2 == NULL) 
    { 
     printf("Could not allocate memory for Results2.\n"); 
     exit(1); 
    } 
} 

void write_output() 
{ 
    int i; 

    for(i = t - 1; i > 0; i--) 
     fprintf(output,"-%i %f\n", i , Results2[i]); 

    for(i = 0; i < t; i++) 
     fprintf(output,"%i %f\n", i , Results[i]); 

    printf("Results written.\n"); 
} 

參考文獻:

http://www.jot.fm/issues/issue_2010_03/column2.pdf

http://paulbourke.net/miscellaneous/correlate/

+0

歡迎來到Stack Overflow。對不起,你在5個多小時內沒有得到任何答覆;這是相對不尋常的。既然你似乎已經找到了一些可以用各種方法做事情的算法,也許答案是「當算法是應該使用的算法時,每個算法都是正確的」,而你的問題是你不確定你應該在你的算法中使用哪種算法情況。你讀過哪些參考資料?當算法適用時他們說什麼?他們是否說過其他選擇,以及爲什麼不應該使用其他選項? (請將其他信息添加到您的問題中,請不要作爲評論。) –

+0

他們對替代方法沒有多說。我的方法只是基於對相關方程和互相關概念的分析的邏輯推論。這可能是正確的,但也可能不正確。數據處理和統計不完全是我的領域。 – user3277482

回答

0

如果您的進程是wide sense stationary,那麼平均值不會隨着時間而改變。如果過程也是ergodic,那麼可以通過計算單個時間序列的平均值來獲得這些平均值。在這種情況下,您最好使用所有可用的樣本來儘可能精確地估計平均值。這自然會導致您的「恆常平均」實施。

另一方面,如果你的過程不是廣義的平穩和遍歷的,那麼獲得他們當地方法的一個好的估計可能被證明是一個更大的挑戰。估計過程近似平穩的較小時間窗內的平均值可能是一種合理的方法(這與您的「迭代平均值」實現類似)。請注意,還有其他方法,它們的適用性取決於具體的應用和特定信號的屬性。

然後,您可能會想知道如何知道您的流程是否具有廣義靜態。不幸的是,除非你對這些過程如何產生了解很多,否則通常最好的做法是在假設過程是廣義靜止的假設下工作,然後嘗試反駁該假設(通過觀察超出預期範圍的結果;參見statistical hypothesis testing)。

+0

謝謝。它使事情更清晰。我正在處理一種蛋白質的分子動力學軌跡,它在開放和閉合狀態之間波動(壽命範圍從5到幾十納秒)。考慮到在一個很大(但不足以導致蛋白質去摺疊)時間尺度的平均值應該是恆定的,我相信測量的變量可以被認爲是遍歷的,但我不確定WSS。因爲在較短的時間尺度上,平均值會發生變化,但如果數據在較大的時間尺度上累積,則平均值趨於恆定。 – user3277482