I’m trying to make a Hirschberg algorithm with python. I’ve written the code but I don’t know how to compose all alignments in the recursion. Please help me if anyone knows.
import sys
class Assignment2022:
def __init__(self, A, B, m, d, g):
self.F = None
self.A = A
self.B = B
self.m = m
self.d = d if d < 0 else -d
self.g = g if g < 0 else -g
self.WWW = []
self.ZZZ = []
def compare(self, Ai, Bj):
if Ai == Bj:
return self.m
else:
return self.d
def NeedlemanWunsch(self, A, B):
nA = len(A)
nB = len(B)
F = [[_ * self.g for _ in range(0, nA 1)]]
for _ in range(1, nB 1):
row = [0] * (nA 1)
row[0] = _ * self.g
F.append(row)
for j in range(nB 1):
for i in range(nA 1):
if i == 0 and j == 0:
F[j][i] = 0
elif j == 0:
F[j][i] = self.g * i
elif i == 0:
F[j][i] = self.g * j
else:
dm = F[j - 1][i - 1] self.m if A[i - 1] == B[j - 1] else F[j - 1][i - 1] self.d
F[j][i] = max(F[j - 1][i] self.g, F[j][i - 1] self.g, dm)
self.F = F
self.WWW=[]
self.ZZZ=[]
self.EnumerateAlignments(A, B, F, W='', Z='')
return self.WWW, self.ZZZ
def EnumerateAlignments(self, A, B, F, W, Z):
i = len(A)
j = len(B)
if i == 0 and j == 0:
self.WWW.append(W)
self.ZZZ.append(Z)
elif i > 0 and j > 0:
mm = self.compare(A[i - 1], B[j - 1])
if F[j][i] == F[j - 1][i - 1] mm:
self.EnumerateAlignments(A[:i - 1], B[:j - 1], F, A[i-1] W, B[j-1] Z)
if i > 0 and F[j][i] == F[j][i - 1] self.g:
self.EnumerateAlignments(A[:i - 1], B, F, A[i-1] W, '-' Z)
if j > 0 and F[j][i] == F[j - 1][i] self.g:
self.EnumerateAlignments(A, B[:j - 1], F, '-' W, B[j-1] Z)
def ComputeAlignmentScore(self, A, B):
L = [0] * (len(B) 1)
for j in range(len(L)):
L[j] = j * g
K = [0] * (len(B) 1)
for i in range(1, len(A) 1):
L, K = K, L
L[0] = i * g
for j in range(1, len(B) 1):
md = self.compare(A[i - 1], B[j - 1])
L[j] = max(L[j - 1] g, K[j] g, K[j - 1] md)
return L
def Hirschberg(self, A, B):
if len(A) == 0:
WW = ['-'] * len(B)
ZZ = B
elif len(B) == 0:
WW = A
ZZ = ['-'] * len(A)
elif len(A) == 1 or len(B) == 1:
WW, ZZ = self.NeedlemanWunsch(A, B)
else:
i = len(A) // 2
Sl = self.ComputeAlignmentScore(A[:i], B)
B_ = B[::-1]
Sr = self.ComputeAlignmentScore(A[i:][::-1], B_)
Sr.reverse()
S = [Sl[i] Sr[i] for i in range(len(Sl))]
J = [i for i, _ in enumerate(S) if _ == max(S)]
WW, ZZ = [], []
for j in J:
WWl, ZZl = self.Hirschberg(A[:i], B[:j])
WWr, ZZr = self.Hirschberg(A[i:], B[j:], lr='r')
WWn = WWl WWr
ZZn = ZZl ZZr
WW.append(WWn)
ZZ.append(ZZn)
return WW, ZZ
if __name__ == '__main__':
# args = ['-2','1' ,'-1','GACGC','ACTGACG']
# args = ['-1','1' ,'-1','GATTACA','GCATGCG']
args = ['-1', ' 1', '-1', "deep end", 'depend']
solver = Assignment2022(A, B, m, d, g, t=t)
WW, ZZ = solver.Hirschberg(A, B)
print(WW)
print(ZZ)
In the task I read the pseudocode is:
Hirschberg(A, B)
Input: A, the first sequence to be aligned
B, the second sequence to be aligned
Output: WW, all the alignments of A with B
ZZ, all the alignments of B with A corresponding to WW
1 if |A| = 0 then
2 WW ← “ − ” × |B|
3 ZZ ← B
4 else if |B| = 0 then
5 WW ← A
6 ZZ ← “ − ” × |A|
7 else if |A| = 1 or |B| = 1 then
8 (WW , ZZ) ← NeedlemanWunsch(A, B)
9 else
10 i ← ⌊|A|/2⌋
11 Sl ← ComputeAlignmentScore(A[0 . . . i], B)
12 Sr ← ComputeAlignmentScore(A ̃[i . . . |A|], B ̃)
13 S ← Sl ̃Sr
14 J ← Max(S)
15 WW ← CreateList()
16 ZZ ← CreateList()
17 foreach j in J do
18 (WWl, ZZl) ← Hirschberg(A[0 . . . i], B[0 . . . j])
19 (WWr, ZZr) ← Hirschberg(A[i . . . |A|], B[j . . . |B|])
20 UpdateAlignments(WW, ZZ, WWl WWr, ZZl ZZr)
21 return (WW, ZZ)
My problem is how to implement the UpdateAligments function to collect all the Alignments. Please if someone has any Idea let's help me with that because I'm really stuck on that for days.
the example Inputs and outputs:
for input
args = ['-1', ' 1', '-1', "deep end", 'depend']
The alignments must be:
WW = ["deep end"]
ZZ = ["d-ep-end"]
for input
args = ['-1','1' ,'-1','GATTACA','GCATGCG']
The aligments must be:
WW = ["G-ATTACA", "G-ATTACA", "G-ATTACA"]
ZZ = ["GCA-TGCG", "GCAT-GCG", "GCATG-CG"]
CodePudding user response:
I removed the code part so that your professor won't have hard feelings, and i present only the explanation of the algorithm, witch i hope is fine.
Explanation: Every [WWl[i], ZZl[i]] is a possible alignment between a[0:i] and b[0:j]. Similarly, WWr and ZZr are a bunch of ways to align a[i:end] with b[j:end].
If you think it, an alignment of a[0:i] with b[0:j], concatenated with an alignment of a[i:end] with b[j:end], creates an alignment of a with b.
So, for every pair [ WWl[random1] WWr[random2] , ZZl[random1] ZZr[random2] ] you have created an alignment of a with b. (I used " " as concatenation symbol.)
So you must take all the possible alignments that can be created from WWl,WWr,ZZl,ZZr and store them to WW, ZZ, making sure they are unique pairs.
CodePudding user response:
Ta leme Septemvri magkes.
P.L.