mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
Support EMBOSS needle/water alignments to the reverse strand (#4142)
* update * update * update * update * update * update * add tests
This commit is contained in:
@ -11,7 +11,7 @@ for example from the needle, water, and stretcher tools.
|
|||||||
"""
|
"""
|
||||||
from Bio.Align import Alignment
|
from Bio.Align import Alignment
|
||||||
from Bio.Align import interfaces
|
from Bio.Align import interfaces
|
||||||
from Bio.Seq import Seq
|
from Bio.Seq import Seq, reverse_complement
|
||||||
from Bio.SeqRecord import SeqRecord
|
from Bio.SeqRecord import SeqRecord
|
||||||
|
|
||||||
|
|
||||||
@ -99,6 +99,7 @@ class AlignmentIterator(interfaces.AlignmentIterator):
|
|||||||
aligned_sequences = [""] * number_of_sequences
|
aligned_sequences = [""] * number_of_sequences
|
||||||
consensus = ""
|
consensus = ""
|
||||||
starts = [0] * number_of_sequences
|
starts = [0] * number_of_sequences
|
||||||
|
ends = [0] * number_of_sequences
|
||||||
column = 0
|
column = 0
|
||||||
index = 0
|
index = 0
|
||||||
continue
|
continue
|
||||||
@ -171,14 +172,19 @@ class AlignmentIterator(interfaces.AlignmentIterator):
|
|||||||
n = len(sequences)
|
n = len(sequences)
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
start = starts[i]
|
start = starts[i]
|
||||||
if start == 0:
|
end = ends[i]
|
||||||
sequence = Seq(sequences[i])
|
if start < end:
|
||||||
else:
|
|
||||||
coordinates[i, :] += start
|
coordinates[i, :] += start
|
||||||
|
data = sequences[i]
|
||||||
|
else:
|
||||||
|
start, end = end, start
|
||||||
|
coordinates[i, :] = end - coordinates[i, :]
|
||||||
|
data = reverse_complement(sequences[i])
|
||||||
|
if start == 0:
|
||||||
|
sequence = Seq(data)
|
||||||
|
else:
|
||||||
# create a partially defined sequence
|
# create a partially defined sequence
|
||||||
length = start + len(sequences[i])
|
sequence = Seq({start: data}, length=end)
|
||||||
data = {start: sequences[i]}
|
|
||||||
sequence = Seq(data, length=length)
|
|
||||||
record = SeqRecord(sequence, identifiers[i])
|
record = SeqRecord(sequence, identifiers[i])
|
||||||
records.append(record)
|
records.append(record)
|
||||||
alignment = Alignment(records, coordinates)
|
alignment = Alignment(records, coordinates)
|
||||||
@ -198,21 +204,43 @@ class AlignmentIterator(interfaces.AlignmentIterator):
|
|||||||
identifier, start = prefix.split(None, 1)
|
identifier, start = prefix.split(None, 1)
|
||||||
assert identifiers[index].startswith(identifier)
|
assert identifiers[index].startswith(identifier)
|
||||||
aligned_sequence, end = line[21:].split(None, 1)
|
aligned_sequence, end = line[21:].split(None, 1)
|
||||||
start = int(start) - 1 # Python counting
|
start = int(start)
|
||||||
end = int(end)
|
end = int(end)
|
||||||
length = len(sequences[index])
|
length = len(sequences[index])
|
||||||
sequence = aligned_sequence.replace("-", "")
|
sequence = aligned_sequence.replace("-", "")
|
||||||
if length == 0 and len(sequence) > 0:
|
if length == 0 and len(sequence) > 0:
|
||||||
|
if start < end:
|
||||||
|
start -= 1 # Python counting
|
||||||
|
assert end == start + len(sequence)
|
||||||
|
else:
|
||||||
|
end -= 1 # Python counting
|
||||||
|
assert end == start - len(sequence)
|
||||||
# Record the start
|
# Record the start
|
||||||
starts[index] = start
|
starts[index] = start
|
||||||
else:
|
else:
|
||||||
if (
|
if starts[index] <= ends[index]:
|
||||||
self.metadata["Align_format"] == "srspair"
|
# forward strand
|
||||||
and len(sequence) == 0
|
if (
|
||||||
):
|
self.metadata["Align_format"] == "srspair"
|
||||||
start += 1
|
and len(sequence) == 0
|
||||||
assert start == starts[index] + length
|
):
|
||||||
assert end == start + len(sequence)
|
assert start == ends[index]
|
||||||
|
assert end == start
|
||||||
|
else:
|
||||||
|
start -= 1
|
||||||
|
assert end == start + len(sequence)
|
||||||
|
else:
|
||||||
|
if (
|
||||||
|
self.metadata["Align_format"] == "srspair"
|
||||||
|
and len(sequence) == 0
|
||||||
|
):
|
||||||
|
assert start - 1 == ends[index]
|
||||||
|
assert end == start
|
||||||
|
else:
|
||||||
|
end -= 1
|
||||||
|
assert end == start - len(sequence)
|
||||||
|
# Record the end
|
||||||
|
ends[index] = end
|
||||||
sequences[index] += sequence
|
sequences[index] += sequence
|
||||||
aligned_sequences[index] += aligned_sequence
|
aligned_sequences[index] += aligned_sequence
|
||||||
if index == 0:
|
if index == 0:
|
||||||
|
47
Tests/Emboss/water_reverse1.txt
Normal file
47
Tests/Emboss/water_reverse1.txt
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
########################################
|
||||||
|
# Program: water
|
||||||
|
# Rundate: Sat 22 Oct 2022 23:47:41
|
||||||
|
# Commandline: water
|
||||||
|
# -asequence seqA.fa
|
||||||
|
# -bsequence seqB.fa
|
||||||
|
# -gapopen 0.001
|
||||||
|
# -gapextend 0.001
|
||||||
|
# -sreverse1
|
||||||
|
# -outfile water_reverse1.txt
|
||||||
|
# Align_format: srspair
|
||||||
|
# Report_file: water_reverse1.txt
|
||||||
|
########################################
|
||||||
|
|
||||||
|
#=======================================
|
||||||
|
#
|
||||||
|
# Aligned_sequences: 2
|
||||||
|
# 1: seqA
|
||||||
|
# 2: seqB
|
||||||
|
# Matrix: EDNAFULL
|
||||||
|
# Gap_penalty: 0.001
|
||||||
|
# Extend_penalty: 0.001
|
||||||
|
#
|
||||||
|
# Length: 121
|
||||||
|
# Identity: 32/121 (26.4%)
|
||||||
|
# Similarity: 32/121 (26.4%)
|
||||||
|
# Gaps: 89/121 (73.6%)
|
||||||
|
# Score: 159.911
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#=======================================
|
||||||
|
|
||||||
|
seqA 121 GGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT 72
|
||||||
|
|||||||||||||||||||
|
||||||
|
seqB 1 GGGGGGGGGGGGGGGGGGG------------------------------- 19
|
||||||
|
|
||||||
|
seqA 71 TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT 22
|
||||||
|
|
||||||
|
seqB 19 -------------------------------------------------- 19
|
||||||
|
|
||||||
|
seqA 21 TTTTTTTTCCCCCCCCCCCCC 1
|
||||||
|
|||||||||||||
|
||||||
|
seqB 20 --------CCCCCCCCCCCCC 32
|
||||||
|
|
||||||
|
|
||||||
|
#---------------------------------------
|
||||||
|
#---------------------------------------
|
47
Tests/Emboss/water_reverse2.txt
Normal file
47
Tests/Emboss/water_reverse2.txt
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
########################################
|
||||||
|
# Program: water
|
||||||
|
# Rundate: Sun 23 Oct 2022 00:06:18
|
||||||
|
# Commandline: water
|
||||||
|
# -asequence seqA.fa
|
||||||
|
# -bsequence seqB.fa
|
||||||
|
# -gapopen 0.001
|
||||||
|
# -gapextend 0.001
|
||||||
|
# -sreverse2
|
||||||
|
# -outfile water_reverse2.txt
|
||||||
|
# Align_format: srspair
|
||||||
|
# Report_file: water_reverse2.txt
|
||||||
|
########################################
|
||||||
|
|
||||||
|
#=======================================
|
||||||
|
#
|
||||||
|
# Aligned_sequences: 2
|
||||||
|
# 1: seqA
|
||||||
|
# 2: seqB
|
||||||
|
# Matrix: EDNAFULL
|
||||||
|
# Gap_penalty: 0.001
|
||||||
|
# Extend_penalty: 0.001
|
||||||
|
#
|
||||||
|
# Length: 121
|
||||||
|
# Identity: 32/121 (26.4%)
|
||||||
|
# Similarity: 32/121 (26.4%)
|
||||||
|
# Gaps: 89/121 (73.6%)
|
||||||
|
# Score: 159.911
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#=======================================
|
||||||
|
|
||||||
|
seqA 1 GGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 50
|
||||||
|
|||||||||||||
|
||||||
|
seqB 32 GGGGGGGGGGGGG------------------------------------- 20
|
||||||
|
|
||||||
|
seqA 51 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 100
|
||||||
|
|
||||||
|
seqB 20 -------------------------------------------------- 20
|
||||||
|
|
||||||
|
seqA 101 AACCCCCCCCCCCCCCCCCCC 121
|
||||||
|
|||||||||||||||||||
|
||||||
|
seqB 19 --CCCCCCCCCCCCCCCCCCC 1
|
||||||
|
|
||||||
|
|
||||||
|
#---------------------------------------
|
||||||
|
#---------------------------------------
|
39
Tests/Emboss/water_reverse3.txt
Normal file
39
Tests/Emboss/water_reverse3.txt
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
########################################
|
||||||
|
# Program: water
|
||||||
|
# Rundate: Sat 22 Oct 2022 22:56:03
|
||||||
|
# Commandline: water
|
||||||
|
# -asequence seqA.fa
|
||||||
|
# -bsequence seqB.fa
|
||||||
|
# -gapopen 1
|
||||||
|
# -gapextend 0.5
|
||||||
|
# -sreverse1
|
||||||
|
# -outfile water_reverse3.txt
|
||||||
|
# Align_format: srspair
|
||||||
|
# Report_file: water_reverse3.txt
|
||||||
|
########################################
|
||||||
|
|
||||||
|
#=======================================
|
||||||
|
#
|
||||||
|
# Aligned_sequences: 2
|
||||||
|
# 1: seqA
|
||||||
|
# 2: seqB
|
||||||
|
# Matrix: EDNAFULL
|
||||||
|
# Gap_penalty: 1.0
|
||||||
|
# Extend_penalty: 0.5
|
||||||
|
#
|
||||||
|
# Length: 19
|
||||||
|
# Identity: 16/19 (84.2%)
|
||||||
|
# Similarity: 16/19 (84.2%)
|
||||||
|
# Gaps: 3/19 (15.8%)
|
||||||
|
# Score: 77.5
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#=======================================
|
||||||
|
|
||||||
|
seqA 20 TTTTTTTAAA-CCGGGCCC 3
|
||||||
|
||||||| | ||||||||
|
||||||
|
seqB 3 TTTTTTT--ACCCGGGCCC 19
|
||||||
|
|
||||||
|
|
||||||
|
#---------------------------------------
|
||||||
|
#---------------------------------------
|
39
Tests/Emboss/water_reverse4.txt
Normal file
39
Tests/Emboss/water_reverse4.txt
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
########################################
|
||||||
|
# Program: water
|
||||||
|
# Rundate: Sat 22 Oct 2022 22:56:15
|
||||||
|
# Commandline: water
|
||||||
|
# -asequence seqA.fa
|
||||||
|
# -bsequence seqB.fa
|
||||||
|
# -gapopen 1
|
||||||
|
# -gapextend 0.5
|
||||||
|
# -sreverse2
|
||||||
|
# -outfile water_reverse4.txt
|
||||||
|
# Align_format: srspair
|
||||||
|
# Report_file: water_reverse4.txt
|
||||||
|
########################################
|
||||||
|
|
||||||
|
#=======================================
|
||||||
|
#
|
||||||
|
# Aligned_sequences: 2
|
||||||
|
# 1: seqA
|
||||||
|
# 2: seqB
|
||||||
|
# Matrix: EDNAFULL
|
||||||
|
# Gap_penalty: 1.0
|
||||||
|
# Extend_penalty: 0.5
|
||||||
|
#
|
||||||
|
# Length: 19
|
||||||
|
# Identity: 16/19 (84.2%)
|
||||||
|
# Similarity: 16/19 (84.2%)
|
||||||
|
# Gaps: 3/19 (15.8%)
|
||||||
|
# Score: 77.5
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#=======================================
|
||||||
|
|
||||||
|
seqA 3 GGGCCCGGTT-TAAAAAAA 20
|
||||||
|
|||||||| ||||||||
|
||||||
|
seqB 19 GGGCCCGG--GTAAAAAAA 3
|
||||||
|
|
||||||
|
|
||||||
|
#---------------------------------------
|
||||||
|
#---------------------------------------
|
@ -1214,6 +1214,223 @@ numpy.array([['K', 'I', 'L', 'I', 'V', 'D', 'D', 'Q', 'Y', 'G', 'I', 'R', 'I',
|
|||||||
with self.assertRaises(StopIteration):
|
with self.assertRaises(StopIteration):
|
||||||
next(alignments)
|
next(alignments)
|
||||||
|
|
||||||
|
def test_water_reverse1(self):
|
||||||
|
# water -asequence seqA.fa -bsequence seqB.fa -gapopen 10 -gapextend 0.5 -sreverse1 -outfile water_reverse1.txt
|
||||||
|
path = "Emboss/water_reverse1.txt"
|
||||||
|
alignments = Align.parse(path, "emboss")
|
||||||
|
self.assertEqual(alignments.metadata["Program"], "water")
|
||||||
|
self.assertEqual(alignments.metadata["Rundate"], "Sat 22 Oct 2022 23:47:41")
|
||||||
|
self.assertEqual(
|
||||||
|
alignments.metadata["Command line"],
|
||||||
|
"water -asequence seqA.fa -bsequence seqB.fa -gapopen 0.001 -gapextend 0.001 -sreverse1 -outfile water_reverse1.txt",
|
||||||
|
)
|
||||||
|
self.assertEqual(alignments.metadata["Align_format"], "srspair")
|
||||||
|
self.assertEqual(alignments.metadata["Report_file"], "water_reverse1.txt")
|
||||||
|
|
||||||
|
alignment = next(alignments)
|
||||||
|
self.assertEqual(alignment.annotations["Matrix"], "EDNAFULL")
|
||||||
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 0.001)
|
||||||
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.001)
|
||||||
|
self.assertEqual(alignment.annotations["Identity"], 32)
|
||||||
|
self.assertEqual(alignment.annotations["Similarity"], 32)
|
||||||
|
self.assertEqual(alignment.annotations["Gaps"], 89)
|
||||||
|
self.assertAlmostEqual(alignment.annotations["Score"], 159.911)
|
||||||
|
self.assertEqual(len(alignment), 2)
|
||||||
|
self.assertEqual(alignment.shape, (2, 121))
|
||||||
|
self.assertEqual(alignment.sequences[0].id, "seqA")
|
||||||
|
self.assertEqual(alignment.sequences[1].id, "seqB")
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[0].seq,
|
||||||
|
"GGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCCC",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[1].seq,
|
||||||
|
"GGGGGGGGGGGGGGGGGGGCCCCCCCCCCCCC",
|
||||||
|
)
|
||||||
|
self.assertTrue(
|
||||||
|
numpy.array_equal(
|
||||||
|
alignment.coordinates,
|
||||||
|
# fmt: off
|
||||||
|
# flake8: noqa
|
||||||
|
numpy.array([[121, 102, 13, 0],
|
||||||
|
[ 0, 19, 19, 32]])
|
||||||
|
# fmt: on
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[0],
|
||||||
|
"GGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCC",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[1],
|
||||||
|
"GGGGGGGGGGGGGGGGGGG-----------------------------------------------------------------------------------------CCCCCCCCCCCCC",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.column_annotations["emboss_consensus"],
|
||||||
|
"||||||||||||||||||| |||||||||||||",
|
||||||
|
)
|
||||||
|
with self.assertRaises(StopIteration):
|
||||||
|
next(alignments)
|
||||||
|
|
||||||
|
def test_water_reverse2(self):
|
||||||
|
# water -asequence seqA.fa -bsequence seqB.fa -gapopen 10 -gapextend 0.5 -sreverse2 -outfile water_reverse2.txt
|
||||||
|
path = "Emboss/water_reverse2.txt"
|
||||||
|
alignments = Align.parse(path, "emboss")
|
||||||
|
self.assertEqual(alignments.metadata["Program"], "water")
|
||||||
|
self.assertEqual(alignments.metadata["Rundate"], "Sun 23 Oct 2022 00:06:18")
|
||||||
|
self.assertEqual(
|
||||||
|
alignments.metadata["Command line"],
|
||||||
|
"water -asequence seqA.fa -bsequence seqB.fa -gapopen 0.001 -gapextend 0.001 -sreverse2 -outfile water_reverse2.txt",
|
||||||
|
)
|
||||||
|
self.assertEqual(alignments.metadata["Align_format"], "srspair")
|
||||||
|
self.assertEqual(alignments.metadata["Report_file"], "water_reverse2.txt")
|
||||||
|
alignment = next(alignments)
|
||||||
|
self.assertEqual(alignment.annotations["Matrix"], "EDNAFULL")
|
||||||
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 0.001)
|
||||||
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.001)
|
||||||
|
self.assertEqual(alignment.annotations["Identity"], 32)
|
||||||
|
self.assertEqual(alignment.annotations["Similarity"], 32)
|
||||||
|
self.assertEqual(alignment.annotations["Gaps"], 89)
|
||||||
|
self.assertAlmostEqual(alignment.annotations["Score"], 159.911)
|
||||||
|
self.assertEqual(len(alignment), 2)
|
||||||
|
self.assertEqual(alignment.shape, (2, 121))
|
||||||
|
self.assertEqual(alignment.sequences[0].id, "seqA")
|
||||||
|
self.assertEqual(alignment.sequences[1].id, "seqB")
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[0].seq,
|
||||||
|
"GGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCCC",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[1].seq,
|
||||||
|
"GGGGGGGGGGGGGGGGGGGCCCCCCCCCCCCC",
|
||||||
|
)
|
||||||
|
self.assertTrue(
|
||||||
|
numpy.array_equal(
|
||||||
|
alignment.coordinates,
|
||||||
|
# fmt: off
|
||||||
|
numpy.array([[ 0, 13, 102, 121],
|
||||||
|
[32, 19, 19, 0]])
|
||||||
|
# fmt: on
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[0],
|
||||||
|
"GGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCCC",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[1],
|
||||||
|
"GGGGGGGGGGGGG-----------------------------------------------------------------------------------------CCCCCCCCCCCCCCCCCCC",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.column_annotations["emboss_consensus"],
|
||||||
|
"||||||||||||| |||||||||||||||||||",
|
||||||
|
)
|
||||||
|
with self.assertRaises(StopIteration):
|
||||||
|
next(alignments)
|
||||||
|
|
||||||
|
def test_water_reverse3(self):
|
||||||
|
# water -asequence seqA.fa -bsequence seqB.fa -gapopen 10 -gapextend 0.5 -sreverse1 -outfile water_reverse3.txt
|
||||||
|
path = "Emboss/water_reverse3.txt"
|
||||||
|
alignments = Align.parse(path, "emboss")
|
||||||
|
self.assertEqual(alignments.metadata["Program"], "water")
|
||||||
|
self.assertEqual(alignments.metadata["Rundate"], "Sat 22 Oct 2022 22:56:03")
|
||||||
|
self.assertEqual(
|
||||||
|
alignments.metadata["Command line"],
|
||||||
|
"water -asequence seqA.fa -bsequence seqB.fa -gapopen 1 -gapextend 0.5 -sreverse1 -outfile water_reverse3.txt",
|
||||||
|
)
|
||||||
|
self.assertEqual(alignments.metadata["Align_format"], "srspair")
|
||||||
|
self.assertEqual(alignments.metadata["Report_file"], "water_reverse3.txt")
|
||||||
|
|
||||||
|
alignment = next(alignments)
|
||||||
|
self.assertEqual(alignment.annotations["Matrix"], "EDNAFULL")
|
||||||
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 1.0)
|
||||||
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
||||||
|
self.assertEqual(alignment.annotations["Identity"], 16)
|
||||||
|
self.assertEqual(alignment.annotations["Similarity"], 16)
|
||||||
|
self.assertEqual(alignment.annotations["Gaps"], 3)
|
||||||
|
self.assertAlmostEqual(alignment.annotations["Score"], 77.5)
|
||||||
|
self.assertEqual(len(alignment), 2)
|
||||||
|
self.assertEqual(alignment.shape, (2, 19))
|
||||||
|
self.assertEqual(alignment.sequences[0].id, "seqA")
|
||||||
|
self.assertEqual(alignment.sequences[1].id, "seqB")
|
||||||
|
self.assertEqual(
|
||||||
|
repr(alignment.sequences[0].seq),
|
||||||
|
"Seq({2: 'GGGCCCGGTTTAAAAAAA'}, length=20)",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
repr(alignment.sequences[1].seq),
|
||||||
|
"Seq({2: 'TTTTTTTACCCGGGCCC'}, length=19)",
|
||||||
|
)
|
||||||
|
self.assertTrue(
|
||||||
|
numpy.array_equal(
|
||||||
|
alignment.coordinates,
|
||||||
|
# fmt: off
|
||||||
|
# flake8: noqa
|
||||||
|
numpy.array([[20, 13, 11, 10, 10, 2],
|
||||||
|
[ 2, 9, 9, 10, 11, 19]])
|
||||||
|
# fmt: on
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.assertEqual(alignment[0], "TTTTTTTAAA-CCGGGCCC")
|
||||||
|
self.assertEqual(alignment[1], "TTTTTTT--ACCCGGGCCC")
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.column_annotations["emboss_consensus"],
|
||||||
|
"||||||| | ||||||||",
|
||||||
|
)
|
||||||
|
with self.assertRaises(StopIteration):
|
||||||
|
next(alignments)
|
||||||
|
|
||||||
|
def test_water_reverse4(self):
|
||||||
|
# water -asequence seqA.fa -bsequence seqB.fa -gapopen 10 -gapextend 0.5 -sreverse2 -outfile water_reverse4.txt
|
||||||
|
path = "Emboss/water_reverse4.txt"
|
||||||
|
alignments = Align.parse(path, "emboss")
|
||||||
|
self.assertEqual(alignments.metadata["Program"], "water")
|
||||||
|
self.assertEqual(alignments.metadata["Rundate"], "Sat 22 Oct 2022 22:56:15")
|
||||||
|
self.assertEqual(
|
||||||
|
alignments.metadata["Command line"],
|
||||||
|
"water -asequence seqA.fa -bsequence seqB.fa -gapopen 1 -gapextend 0.5 -sreverse2 -outfile water_reverse4.txt",
|
||||||
|
)
|
||||||
|
self.assertEqual(alignments.metadata["Align_format"], "srspair")
|
||||||
|
self.assertEqual(alignments.metadata["Report_file"], "water_reverse4.txt")
|
||||||
|
alignment = next(alignments)
|
||||||
|
self.assertEqual(alignment.annotations["Matrix"], "EDNAFULL")
|
||||||
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 1.0)
|
||||||
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
||||||
|
self.assertEqual(alignment.annotations["Identity"], 16)
|
||||||
|
self.assertEqual(alignment.annotations["Similarity"], 16)
|
||||||
|
self.assertEqual(alignment.annotations["Gaps"], 3)
|
||||||
|
self.assertAlmostEqual(alignment.annotations["Score"], 77.5)
|
||||||
|
self.assertEqual(len(alignment), 2)
|
||||||
|
self.assertEqual(alignment.shape, (2, 19))
|
||||||
|
self.assertEqual(alignment.sequences[0].id, "seqA")
|
||||||
|
self.assertEqual(alignment.sequences[1].id, "seqB")
|
||||||
|
self.assertEqual(
|
||||||
|
repr(alignment.sequences[0].seq),
|
||||||
|
"Seq({2: 'GGGCCCGGTTTAAAAAAA'}, length=20)",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
repr(alignment.sequences[1].seq),
|
||||||
|
"Seq({2: 'TTTTTTTACCCGGGCCC'}, length=19)",
|
||||||
|
)
|
||||||
|
self.assertTrue(
|
||||||
|
numpy.array_equal(
|
||||||
|
alignment.coordinates,
|
||||||
|
# fmt: off
|
||||||
|
# flake8: noqa
|
||||||
|
numpy.array([[ 2, 10, 12, 12, 20],
|
||||||
|
[19, 11, 11, 10, 2]])
|
||||||
|
# fmt: on
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.assertEqual(alignment[0], "GGGCCCGGTT-TAAAAAAA")
|
||||||
|
self.assertEqual(alignment[1], "GGGCCCGG--GTAAAAAAA")
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.column_annotations["emboss_consensus"],
|
||||||
|
"|||||||| ||||||||",
|
||||||
|
)
|
||||||
|
with self.assertRaises(StopIteration):
|
||||||
|
next(alignments)
|
||||||
|
|
||||||
def test_pair_aln_full_blank_line(self):
|
def test_pair_aln_full_blank_line(self):
|
||||||
path = "Emboss/emboss_pair_aln_full_blank_line.txt"
|
path = "Emboss/emboss_pair_aln_full_blank_line.txt"
|
||||||
alignments = Align.parse(path, "emboss")
|
alignments = Align.parse(path, "emboss")
|
||||||
|
Reference in New Issue
Block a user