#Genomics Homework4 #Use DP algorithm to find the highest scored alignment #1. Calculate the alignment score of seq1&seq2 #2. find the best alignment #3. Calculate the best alignment score
defwater(seq1, seq2): m, n = len(seq1), len(seq2) # length of two sequences
# Generate DP table and traceback path pointer matrix score = zeros((m+1, n+1)) # the DP table pointer = zeros((m+1, n+1)) # to store the traceback path
max_score = 0# initial maximum score in DP table
# Calculate DP table and mark pointers for i in range(1, m + 1): for j in range(1, n + 1):#第一行第一列都是0,所以循环从1开始。 score_diagonal = score[i-1][j-1] + match_score(seq1[i-1], seq2[j-1]) #填充的格子从1开始,填充格子的1对应序列的0,所以match_score的下表要减去1 score_up = score[i][j-1] + gap_penalty score_left = score[i-1][j] + gap_penalty
if score[i][j] == 0: pointer[i][j] = 0# 0 means end of the path if score[i][j] == score_left: pointer[i][j] = 1# 1 means trace up if score[i][j] == score_up: pointer[i][j] = 2# 2 means trace left if score[i][j] == score_diagonal: pointer[i][j] = 3# 3 means trace diagonal if score[i][j] >= max_score: max_i = i max_j = j max_score = score[i][j]
align1, align2 = '', ''# initial sequences
i,j = max_i,max_j # indices of path starting point
#输出结果: alignment symbol = '' identity = 0 for i in range(0,len(align1)): # if two AAs are the same, then output the letter if (align1[i] == align2[i]): symbol += "*" # if one of them is N elif (align1[i] =="N"or align2[i]=="N"): symbol += "N" # if they are not identical and none of them is gap elif (align1[i] != align2[i] and align1[i] != '-'and align2[i] != '-'): symbol += 'M' #if one of them is a gap, output a - elif (align1[i] == '-'or align2[i] == '-'): symbol += '-'