2017-02-12 34 views
0

我正在嘗試使用仿射間隙成本實現全局對齊算法。我首先在Java中實現它,然後在Python中實現它。但是我在Java中的輸出與我在Python中獲得的輸出不同。我在Python中實現了與我在Java中完全相同的代碼。仿射間隙序列對齊輸出不正確

我在Python代碼如下:

import numpy as np 

def deletionMatrix(D,S,i,j): 
    res = 0 
    if i==0 and j>=0: 
     res = S[i-1][j] - (gapOpen + gapExt) 
     D[i][j] = res 

    if i > 1 and j >= 0: 
     res_S = S[i-1][j] - (gapOpen + gapExt) 
     res_D = D[i-1][j] - gapOpen 

     res = max(res_S, res_D) 
     D[i][j] = res 

    return(res) 

def insertionMatrix(I, S, i, j): 
    res = 0 

    if i >= 0 and j == 1: 
     res = S[i][j-1] - (gapOpen + gapExt) 
     I[i][j] = res 

    if i >= 0 and j > 1: 
     res_S = S[i][j-1] - (gapOpen + gapExt) 
     res_I = I[i][j-1] - gapOpen 

     res = max(res_S, res_I) 
     I[i][j] = res 

    return(res) 

def matrix(S,D,I,m,n,match,mismatch): 
    for i in range(0,len(m)): 
     for j in range(0,len(n)): 
      if i == 0 and j == 0: 
       S[i][j] = 0 

      if i == 0 and j > 0: 
       S[i][j] = insertionMatrix(I,S,i,j) 

      if i > 0 and j == 0: 
       S[i][j] = deletionMatrix(D,S,i,j) 

      if i > 0 and j > 0: 
       if m[i-1] == n[j-1]: 
        res_S = S[i-1][j-1] + match 
        res_D = deletionMatrix(D,S,i,j) 
        res_I = insertionMatrix(I,S,i,j) 

        S[i][j] = max(res_S,res_D,res_I) 

       elif m[i-1] != n[j-1]: 
        res_S = S[i-1][j-1] + mismatch 
        res_D = deletionMatrix(D,S,i,j) 
        res_I = insertionMatrix(I,S,i,j) 

        S[i][j] = max(res_S,res_D,res_I) 

    return(S) 


gapOpen = 5 
gapExt = 2 

m = "GAATTCAGTTA" 
n = "GGATCGA" 

mLen = len(m) + 1 
nLen = len(n) + 1 

S = np.zeros([mLen,nLen]) 
D = np.zeros([mLen,nLen]) 
I = np.zeros([mLen,nLen]) 

match = 1 
mismatch = -3 

S = matrix(S,D,I,m,n,match,mismatch) 

for i in range(0,len(m)): 
    for j in range(0,len(n)): 
     print(S[i][j],) 

    print() 

我隨信附上了我想實現算法的圖像。有人可以告訴我我要去哪裏嗎? This is the algorithm that I'm trying to implement. The alpha is the gapOpen variable in my code and beta is the gapExt variable. And A[i],B[j] simply is match or mismatch variable. That is the two characters being read at a given time (from String m and n), if they are the same then its a match otherwise it is a mismatch

以下是我的java代碼。

public class AffineGapCost { static int gapOpen = 5; static int gapExt = 2; public static int deletionMatrix(int[][] D, int[][] S ,int i, int j) { int res = 0; if(i == 1 && j>=0) { res = S[i-1][j] - (gapOpen + gapExt); D[i][j] = res; } if(i > 1 && j >= 0) { int res_S = S[i-1][j] - (gapOpen + gapExt); int res_D = D[i-1][j] - gapOpen; res = Math.max(res_S, res_D); D[i][j] = res; } return res; } public static int insertionMatrix(int[][] I, int[][] S, int i, int j) { int res = 0; if(i >= 0 && j==1) { res = S[i][j-1] - (gapOpen + gapExt); I[i][j] = res; } if(i >= 0 && j > 1) { int res_S = S[i][j-1] - (gapOpen + gapExt); int res_I = I[i][j-1] - gapOpen; res = Math.max(res_S,res_I); I[i][j] = res; } return res; } public static int[][] matrix(int[][] S, int[][] D, int[][] I,String m, String n,int match, int mismatch) { for(int i=0;i<=m.length();i++) { for(int j=0;j<=n.length();j++) { if(i == 0 && j == 0) { S[i][j] = 0; } if(i == 0 && j > 0) { S[i][j] = insertionMatrix(I, S, i, j); } if(i > 0 && j == 0) { S[i][j] = deletionMatrix(D,S,i,j); } if(i >0 && j>0) { int res_S = S[i-1][j-1] + (m.charAt(i-1) == n.charAt(j-1) ? match:mismatch); int res_D = deletionMatrix(D,S,i,j); int res_I = insertionMatrix(I,S,i,j); S[i][j] = Math.max(Math.max(res_S,res_D),res_I); } } } return S; } <pre><code> public static void main(String[] args) { // Initializing two strings String m = "GAATTCAGTTA"; String n = "GGATCGA"; //String m = "ctaca"; //String n = "cttca"; //String m = "ACGGCT"; //String n = "ACGT"; int[][] S = new int[m.length() + 1][n.length() + 1]; int[][] D = new int[m.length() + 1][n.length() + 1]; int[][] I = new int[m.length() + 1][n.length() + 1]; int match = 1; int mismatch = -3; S = matrix(S,D,I,m,n,match,mismatch); for(int i=0;i<=m.length();i++) { for(int j=0;j<=n.length();j++) { System.out.print(S[i][j] + "\t"); } System.out.println(); } traceBack(S, D, I, m, n,match,mismatch); } }

我真的很感激,如果有人能告訴我我要去哪裏錯了。我一直在試圖找出最近2天的問題,但似乎無法弄清楚我做錯了什麼。

回答

0

這裏的一些潛在的問題:

deletionMatrix,Java的說:

if(i == 1 && j>=0) 

但是Python說:

if i==0 and j>=0: 

matrix,Java的說:

for(int i=0;i<=m.length();i++) { 
    for(int j=0;j<=n.length();j++) { 

但Python說:

for i in range(0,len(m)): 
    for j in range(0,len(n)): 

這是每個循環中較短的一個。 range(m,n)mn-1去,所以你可能想:

for i in range(len(m) + 1): 
    for j in range(len(n) + 1): 

這在main代碼的嵌套循環打印再次發生。

下面是你的Python代碼返工產生相同的輸出你的Java代碼(如果你在Java代碼中註釋掉調用未定義的方法traceBack()):

import numpy as np 

gapOpen = 5 
gapExt = 2 

def deletionMatrix(D, S, i, j): 
    res = 0 
    if i == 1 and j >= 0: 
     res = S[i - 1][j] - (gapOpen + gapExt) 
     D[i][j] = res 

    if i > 1 and j >= 0: 
     res_S = S[i - 1][j] - (gapOpen + gapExt) 
     res_D = D[i - 1][j] - gapOpen 

     res = max(res_S, res_D) 
     D[i][j] = res 

    return res 

def insertionMatrix(I, S, i, j): 
    res = 0 

    if i >= 0 and j == 1: 
     res = S[i][j-1] - (gapOpen + gapExt) 
     I[i][j] = res 

    if i >= 0 and j > 1: 
     res_S = S[i][j-1] - (gapOpen + gapExt) 
     res_I = I[i][j-1] - gapOpen 

     res = max(res_S, res_I) 
     I[i][j] = res 

    return res 

def matrix(S, D, I, m, n, match, mismatch): 

    for i in range(len(m) + 1): 

     for j in range(len(n) + 1): 

      if i == 0 and j == 0: 
       S[i][j] = 0 

      if i == 0 and j > 0: 
       S[i][j] = insertionMatrix(I, S, i, j) 

      if i > 0 and j == 0: 
       S[i][j] = deletionMatrix(D, S, i, j) 

      if i > 0 and j > 0: 
       res_S = S[i-1][j-1] + (match if m[i-1] == n[j-1] else mismatch) 
       res_D = deletionMatrix(D, S, i, j) 
       res_I = insertionMatrix(I, S, i, j) 

       S[i][j] = max(res_S, res_D, res_I) 

    return S 

if __name__ == '__main__': 

    # Initializing two strings 
    m = "GAATTCAGTTA" 
    n = "GGATCGA" 

    mLen = len(m) + 1 
    nLen = len(n) + 1 

    S = np.zeros([mLen, nLen]) 
    D = np.zeros([mLen, nLen]) 
    I = np.zeros([mLen, nLen]) 

    match = 1 
    mismatch = -3 

    S = matrix(S, D, I, m, n, match, mismatch) 

    for i in range(mLen): 
     for j in range(nLen): 
      print(S[i][j], end='\t') 
     print()