mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
* update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * updat * update * update * update * update * update * update * update * update * update * update * update * update * halfway finished * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * pdate * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * replace precompiler #defines by inline functions * update * update * add tests * update * update * update * update * update * update * update * update * documentation * update * avoid stpcpy * pointer printing * compiler warning * testing without codonalign * testing without codonalign and pairwisealigner * compiler warning * adding codonalign and pairwisealigner back in * remove inline from check_indices * add inline to check_indices * update * add test line 12287 test_pairwise_aligner.py * update * update * update * update * change dtype from int32 to "i" * all done * testing * testing * testing * testing * done * done * fix how pointers are printed on Windows * update * update * update * update * fix id printing on pypy * style change only * Use Py_uintptr_t instead of uintptr_t * fix memory leak * remove double semicolon * check if GitHub actions are now pickup up Python version 3.13.5 without hardcoding it --------- Co-authored-by: Michiel Jan Laurens de Hoon <mdehoon@Michiels-MacBook-Air.local> Co-authored-by: Michiel de Hoon <mdehoon@tkx288.genome.gsc.riken.jp> Co-authored-by: Michiel de Hoon <michiel.dehoon@riken.jp>
3428 lines
189 KiB
Python
3428 lines
189 KiB
Python
# Copyright 2008-2014 by Peter Cock. All rights reserved.
|
|
#
|
|
# This code is part of the Biopython distribution and governed by its
|
|
# license. Please see the LICENSE file that should have been included
|
|
# as part of this package.
|
|
"""Tests for Bio.Align.emboss module."""
|
|
import unittest
|
|
from io import StringIO
|
|
from tempfile import NamedTemporaryFile
|
|
|
|
from Bio import Align
|
|
from Bio.Align import substitution_matrices
|
|
|
|
substitution_matrix = substitution_matrices.load("BLOSUM62")
|
|
|
|
try:
|
|
import numpy as np
|
|
except ImportError:
|
|
from Bio import MissingPythonDependencyError
|
|
|
|
raise MissingPythonDependencyError(
|
|
"Install numpy if you want to use Bio.Align.emboss."
|
|
) from None
|
|
|
|
|
|
class TestEmboss(unittest.TestCase):
|
|
def test_pair_example(self):
|
|
# Alignment file obtained from EMBOSS:
|
|
# http://emboss.sourceforge.net/docs/themes/alnformats/align.pair
|
|
path = "Emboss/water.txt"
|
|
alignments = Align.parse(path, "emboss")
|
|
self.assertEqual(alignments.metadata["Program"], "water")
|
|
self.assertEqual(alignments.metadata["Rundate"], "Wed Jan 16 17:23:19 2002")
|
|
self.assertEqual(alignments.metadata["Report_file"], "stdout")
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 10.0)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
|
self.assertEqual(alignment.annotations["Identity"], 112)
|
|
self.assertEqual(alignment.annotations["Similarity"], 112)
|
|
self.assertEqual(alignment.annotations["Gaps"], 19)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 591.5)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 131))
|
|
self.assertEqual(alignment.sequences[0].id, "IXI_234")
|
|
self.assertEqual(alignment.sequences[1].id, "IXI_235")
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq,
|
|
"TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"TSPASIRPPAGPSSRRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGWRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
np.array([[0, 15, 24, 74, 84, 131], [0, 15, 15, 65, 65, 112]]),
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|
"TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE",
|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
"||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||||||",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
IXI_234 0 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTC
|
|
0 |||||||||||||||---------||||||||||||||||||||||||||||||||||||
|
|
IXI_235 0 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTC
|
|
|
|
IXI_234 60 TTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTG
|
|
60 ||||||||||||||----------||||||||||||||||||||||||||||||||||||
|
|
IXI_235 51 TTSTSTRHRGRSGW----------RASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTG
|
|
|
|
IXI_234 120 PPAWAGDRSHE 131
|
|
120 ||||||||||| 131
|
|
IXI_235 101 PPAWAGDRSHE 112
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['T', 'S', 'P', 'A', 'S', 'I', 'R', 'P', 'P', 'A', 'G', 'P', 'S', 'S',
|
|
'R', 'P', 'A', 'M', 'V', 'S', 'S', 'R', 'R', 'T', 'R', 'P', 'S', 'P',
|
|
'P', 'G', 'P', 'R', 'R', 'P', 'T', 'G', 'R', 'P', 'C', 'C', 'S', 'A',
|
|
'A', 'P', 'R', 'R', 'P', 'Q', 'A', 'T', 'G', 'G', 'W', 'K', 'T', 'C',
|
|
'S', 'G', 'T', 'C', 'T', 'T', 'S', 'T', 'S', 'T', 'R', 'H', 'R', 'G',
|
|
'R', 'S', 'G', 'W', 'S', 'A', 'R', 'T', 'T', 'T', 'A', 'A', 'C', 'L',
|
|
'R', 'A', 'S', 'R', 'K', 'S', 'M', 'R', 'A', 'A', 'C', 'S', 'R', 'S',
|
|
'A', 'G', 'S', 'R', 'P', 'N', 'R', 'F', 'A', 'P', 'T', 'L', 'M', 'S',
|
|
'S', 'C', 'I', 'T', 'S', 'T', 'T', 'G', 'P', 'P', 'A', 'W', 'A', 'G',
|
|
'D', 'R', 'S', 'H', 'E'],
|
|
['T', 'S', 'P', 'A', 'S', 'I', 'R', 'P', 'P', 'A', 'G', 'P', 'S', 'S',
|
|
'R', '-', '-', '-', '-', '-', '-', '-', '-', '-', 'R', 'P', 'S', 'P',
|
|
'P', 'G', 'P', 'R', 'R', 'P', 'T', 'G', 'R', 'P', 'C', 'C', 'S', 'A',
|
|
'A', 'P', 'R', 'R', 'P', 'Q', 'A', 'T', 'G', 'G', 'W', 'K', 'T', 'C',
|
|
'S', 'G', 'T', 'C', 'T', 'T', 'S', 'T', 'S', 'T', 'R', 'H', 'R', 'G',
|
|
'R', 'S', 'G', 'W', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-',
|
|
'R', 'A', 'S', 'R', 'K', 'S', 'M', 'R', 'A', 'A', 'C', 'S', 'R', 'S',
|
|
'A', 'G', 'S', 'R', 'P', 'N', 'R', 'F', 'A', 'P', 'T', 'L', 'M', 'S',
|
|
'S', 'C', 'I', 'T', 'S', 'T', 'T', 'G', 'P', 'P', 'A', 'W', 'A', 'G',
|
|
'D', 'R', 'S', 'H', 'E']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 620.0; 112 aligned letters; 112 identities; 0 mismatches; 112 positives; 19 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 620.0,
|
|
aligned = 112:
|
|
identities = 112,
|
|
positives = 112,
|
|
mismatches = 0.
|
|
gaps = 19:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 19:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 19:
|
|
open_internal_deletions = 2,
|
|
extend_internal_deletions = 17;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 19)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 19)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 19)
|
|
self.assertEqual(counts.gaps, 19)
|
|
self.assertEqual(counts.aligned, 112)
|
|
self.assertEqual(counts.identities, 112)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
self.assertEqual(counts.positives, 112)
|
|
with self.assertRaises(StopIteration):
|
|
next(alignments)
|
|
|
|
def test_local_water2(self):
|
|
"""Test parsing a local alignment."""
|
|
path = "Emboss/water2.txt"
|
|
alignments = Align.parse(path, "emboss")
|
|
self.assertEqual(alignments.metadata["Program"], "water")
|
|
self.assertEqual(alignments.metadata["Rundate"], "Sat Apr 04 2009 22:08:44")
|
|
self.assertEqual(
|
|
alignments.metadata["Command line"],
|
|
"water -asequence asis:ACACACTCACACACACTTGGTCAGAGATGCTGTGCTTCTTGGAAGCAAGGNCTCAAAGGCAAGGTGCACGCAGAGGGACGTTTGAGTCTGGGATGAAGCATGTNCGTATTATTTATATGATGGAATTTCACGTTTTTATG -bsequence asis:CGTTTGAGTACTGGGATG -gapopen 10 -gapextend 0.5 -filter",
|
|
)
|
|
self.assertEqual(alignments.metadata["Align_format"], "srspair")
|
|
self.assertEqual(alignments.metadata["Report_file"], "stdout")
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EDNAFULL")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 10.0)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
|
self.assertEqual(alignment.annotations["Identity"], 17)
|
|
self.assertEqual(alignment.annotations["Similarity"], 17)
|
|
self.assertEqual(alignment.annotations["Gaps"], 1)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 75.0)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 18))
|
|
self.assertEqual(alignment.sequences[0].id, "asis")
|
|
self.assertEqual(alignment.sequences[1].id, "asis")
|
|
self.assertEqual(
|
|
repr(alignment.sequences[0].seq),
|
|
"Seq({78: 'CGTTTGAGTCTGGGATG'}, length=95)",
|
|
)
|
|
self.assertEqual(alignment.sequences[0].seq[78:95], "CGTTTGAGTCTGGGATG")
|
|
self.assertEqual(alignment.sequences[1].seq[0:18], "CGTTTGAGTACTGGGATG")
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates, np.array([[78, 87, 87, 95], [0, 9, 10, 18]])
|
|
)
|
|
)
|
|
self.assertEqual(alignment[0], "CGTTTGAGT-CTGGGATG")
|
|
self.assertEqual(alignment[1], "CGTTTGAGTACTGGGATG")
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"], "||||||||| ||||||||"
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
asis 78 CGTTTGAGT-CTGGGATG 95
|
|
0 |||||||||-|||||||| 18
|
|
asis 0 CGTTTGAGTACTGGGATG 18
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['C', 'G', 'T', 'T', 'T', 'G', 'A', 'G', 'T', '-', 'C', 'T', 'G',
|
|
'G', 'G', 'A', 'T', 'G'],
|
|
['C', 'G', 'T', 'T', 'T', 'G', 'A', 'G', 'T', 'A', 'C', 'T', 'G',
|
|
'G', 'G', 'A', 'T', 'G']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (17 aligned letters; 17 identities; 0 mismatches; 1 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 17:
|
|
identities = 17,
|
|
mismatches = 0.
|
|
gaps = 1:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 1:
|
|
internal_insertions = 1:
|
|
open_internal_insertions = 1,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 1)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 1)
|
|
self.assertEqual(counts.insertions, 1)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 1)
|
|
self.assertEqual(counts.aligned, 17)
|
|
self.assertEqual(counts.identities, 17)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
with self.assertRaises(StopIteration):
|
|
next(alignments)
|
|
|
|
def test_matcher_simple(self):
|
|
path = "Emboss/matcher_simple.txt"
|
|
alignments = Align.parse(path, "emboss")
|
|
self.assertEqual(alignments.metadata["Program"], "matcher")
|
|
self.assertEqual(alignments.metadata["Rundate"], "Tue 8 Dec 2009 11:48:35")
|
|
self.assertEqual(
|
|
alignments.metadata["Command line"],
|
|
"matcher [-asequence] rose.pro [-bsequence] rosemary.pro [-outfile] matcher_simple.txt -auto -sprotein -aformat simple",
|
|
)
|
|
self.assertEqual(alignments.metadata["Align_format"], "simple")
|
|
self.assertEqual(alignments.metadata["Report_file"], "matcher_simple.txt")
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 14)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 4)
|
|
self.assertEqual(alignment.annotations["Identity"], 7)
|
|
self.assertEqual(alignment.annotations["Similarity"], 8)
|
|
self.assertEqual(alignment.annotations["Gaps"], 0)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 29)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 16))
|
|
self.assertEqual(alignment.sequences[0].id, "AF069992_1")
|
|
self.assertEqual(alignment.sequences[1].id, "CAA85685.1")
|
|
self.assertEqual(
|
|
repr(alignment.sequences[0].seq),
|
|
"Seq({72: 'GPPPQSPDENRAGESS'}, length=88)",
|
|
)
|
|
self.assertEqual(
|
|
repr(alignment.sequences[1].seq),
|
|
"Seq({46: 'GVPPEEAGAAVAAESS'}, length=62)",
|
|
)
|
|
self.assertEqual(alignment.sequences[0].seq[72:88], "GPPPQSPDENRAGESS")
|
|
self.assertEqual(alignment.sequences[1].seq[46:62], "GVPPEEAGAAVAAESS")
|
|
self.assertTrue(
|
|
np.array_equal(alignment.coordinates, np.array([[72, 88], [46, 62]]))
|
|
)
|
|
self.assertEqual(alignment[0], "GPPPQSPDENRAGESS")
|
|
self.assertEqual(alignment[1], "GVPPEEAGAAVAAESS")
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"], "|.||:......|.|||"
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
AF069992_ 72 GPPPQSPDENRAGESS 88
|
|
0 |.||.......|.||| 16
|
|
CAA85685. 46 GVPPEEAGAAVAAESS 62
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['G', 'P', 'P', 'P', 'Q', 'S', 'P', 'D', 'E', 'N', 'R', 'A', 'G',
|
|
'E', 'S', 'S'],
|
|
['G', 'V', 'P', 'P', 'E', 'E', 'A', 'G', 'A', 'A', 'V', 'A', 'A',
|
|
'E', 'S', 'S']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 29.0; 16 aligned letters; 7 identities; 9 mismatches; 8 positives; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 29.0,
|
|
aligned = 16:
|
|
identities = 7,
|
|
positives = 8,
|
|
mismatches = 9.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 16)
|
|
self.assertEqual(counts.identities, 7)
|
|
self.assertEqual(counts.mismatches, 9)
|
|
self.assertEqual(counts.positives, 8)
|
|
with self.assertRaises(StopIteration):
|
|
next(alignments)
|
|
|
|
def test_matcher_pair(self):
|
|
path = "Emboss/matcher_pair.txt"
|
|
alignments = Align.parse(path, "emboss")
|
|
self.check_matcher_pair(alignments)
|
|
alignments = iter(alignments)
|
|
self.check_matcher_pair(alignments)
|
|
with Align.parse(path, "emboss") as alignments:
|
|
self.check_matcher_pair(alignments)
|
|
with self.assertRaises(AttributeError):
|
|
alignments._stream
|
|
with Align.parse(path, "emboss") as alignments:
|
|
pass
|
|
with self.assertRaises(AttributeError):
|
|
alignments._stream
|
|
with open(path) as stream:
|
|
data = stream.read()
|
|
stream = NamedTemporaryFile("w+t")
|
|
stream.write(data)
|
|
stream.seek(0)
|
|
alignments = Align.parse(stream, "emboss")
|
|
self.check_matcher_pair(alignments)
|
|
|
|
def check_matcher_pair(self, alignments):
|
|
self.assertEqual(alignments.metadata["Program"], "matcher")
|
|
self.assertEqual(alignments.metadata["Rundate"], "Tue 8 Dec 2009 12:01:34")
|
|
self.assertEqual(
|
|
alignments.metadata["Command line"],
|
|
"matcher [-asequence] hba_human.fasta [-bsequence] hbb_human.fasta [-outfile] matcher_pair.txt -alternatives 5 -aformat pair -sprotein",
|
|
)
|
|
self.assertEqual(alignments.metadata["Align_format"], "pair")
|
|
self.assertEqual(alignments.metadata["Report_file"], "matcher_pair.txt")
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 14)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 4)
|
|
self.assertEqual(alignment.annotations["Identity"], 63)
|
|
self.assertEqual(alignment.annotations["Similarity"], 88)
|
|
self.assertEqual(alignment.annotations["Gaps"], 8)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 264)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 145))
|
|
self.assertEqual(alignment.sequences[0].id, "HBA_HUMAN")
|
|
self.assertEqual(alignment.sequences[1].id, "HBB_HUMAN")
|
|
self.assertEqual(
|
|
repr(alignment.sequences[0].seq),
|
|
"Seq({2: 'LSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQV...SKY'}, length=141)",
|
|
)
|
|
self.assertEqual(
|
|
repr(alignment.sequences[1].seq),
|
|
"Seq({3: 'LTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMG...HKY'}, length=146)",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq[2:141],
|
|
"LSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKY",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq[3:146],
|
|
"LTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKY",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
np.array(
|
|
[[2, 18, 20, 47, 47, 51, 51, 141], [3, 19, 19, 46, 47, 51, 56, 146]]
|
|
),
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"LSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHF-DLSH-----GSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKY",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|
"LTPEEKSAVTALWGKV--NVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKY",
|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
"|:|.:|:.|.|.|||| :..|.|.|||.|:.:.:|.|:.:|..| |||. |:.:||.|||||..|.::.:||:|::....:.||:||..||.|||.||:||.:.|:..||.|...||||.|.|:..|.:|.|:..|..||",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
HBA_HUMAN 2 LSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHF-DLSH-----GSAQV
|
|
0 |.|..|..|.|.||||--...|.|.|||.|.....|.|...|..|-|||.-----|...|
|
|
HBB_HUMAN 3 LTPEEKSAVTALWGKV--NVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKV
|
|
|
|
HBA_HUMAN 56 KGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPA
|
|
60 |.|||||..|.....||.|........||.||..||.|||.||.||...|...||.|...
|
|
HBB_HUMAN 61 KAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGK
|
|
|
|
HBA_HUMAN 116 EFTPAVHASLDKFLASVSTVLTSKY 141
|
|
120 ||||.|.|...|..|.|...|..|| 145
|
|
HBB_HUMAN 121 EFTPPVQAAYQKVVAGVANALAHKY 146
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['L', 'S', 'P', 'A', 'D', 'K', 'T', 'N', 'V', 'K', 'A', 'A', 'W',
|
|
'G', 'K', 'V', 'G', 'A', 'H', 'A', 'G', 'E', 'Y', 'G', 'A', 'E',
|
|
'A', 'L', 'E', 'R', 'M', 'F', 'L', 'S', 'F', 'P', 'T', 'T', 'K',
|
|
'T', 'Y', 'F', 'P', 'H', 'F', '-', 'D', 'L', 'S', 'H', '-', '-',
|
|
'-', '-', '-', 'G', 'S', 'A', 'Q', 'V', 'K', 'G', 'H', 'G', 'K',
|
|
'K', 'V', 'A', 'D', 'A', 'L', 'T', 'N', 'A', 'V', 'A', 'H', 'V',
|
|
'D', 'D', 'M', 'P', 'N', 'A', 'L', 'S', 'A', 'L', 'S', 'D', 'L',
|
|
'H', 'A', 'H', 'K', 'L', 'R', 'V', 'D', 'P', 'V', 'N', 'F', 'K',
|
|
'L', 'L', 'S', 'H', 'C', 'L', 'L', 'V', 'T', 'L', 'A', 'A', 'H',
|
|
'L', 'P', 'A', 'E', 'F', 'T', 'P', 'A', 'V', 'H', 'A', 'S', 'L',
|
|
'D', 'K', 'F', 'L', 'A', 'S', 'V', 'S', 'T', 'V', 'L', 'T', 'S',
|
|
'K', 'Y'],
|
|
['L', 'T', 'P', 'E', 'E', 'K', 'S', 'A', 'V', 'T', 'A', 'L', 'W',
|
|
'G', 'K', 'V', '-', '-', 'N', 'V', 'D', 'E', 'V', 'G', 'G', 'E',
|
|
'A', 'L', 'G', 'R', 'L', 'L', 'V', 'V', 'Y', 'P', 'W', 'T', 'Q',
|
|
'R', 'F', 'F', 'E', 'S', 'F', 'G', 'D', 'L', 'S', 'T', 'P', 'D',
|
|
'A', 'V', 'M', 'G', 'N', 'P', 'K', 'V', 'K', 'A', 'H', 'G', 'K',
|
|
'K', 'V', 'L', 'G', 'A', 'F', 'S', 'D', 'G', 'L', 'A', 'H', 'L',
|
|
'D', 'N', 'L', 'K', 'G', 'T', 'F', 'A', 'T', 'L', 'S', 'E', 'L',
|
|
'H', 'C', 'D', 'K', 'L', 'H', 'V', 'D', 'P', 'E', 'N', 'F', 'R',
|
|
'L', 'L', 'G', 'N', 'V', 'L', 'V', 'C', 'V', 'L', 'A', 'H', 'H',
|
|
'F', 'G', 'K', 'E', 'F', 'T', 'P', 'P', 'V', 'Q', 'A', 'A', 'Y',
|
|
'Q', 'K', 'V', 'V', 'A', 'G', 'V', 'A', 'N', 'A', 'L', 'A', 'H',
|
|
'K', 'Y']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 326.0; 137 aligned letters; 63 identities; 74 mismatches; 88 positives; 8 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 326.0,
|
|
aligned = 137:
|
|
identities = 63,
|
|
positives = 88,
|
|
mismatches = 74.
|
|
gaps = 8:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 8:
|
|
internal_insertions = 6:
|
|
open_internal_insertions = 2,
|
|
extend_internal_insertions = 4;
|
|
internal_deletions = 2:
|
|
open_internal_deletions = 1,
|
|
extend_internal_deletions = 1;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 6)
|
|
self.assertEqual(counts.internal_deletions, 2)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 8)
|
|
self.assertEqual(counts.insertions, 6)
|
|
self.assertEqual(counts.deletions, 2)
|
|
self.assertEqual(counts.gaps, 8)
|
|
self.assertEqual(counts.aligned, 137)
|
|
self.assertEqual(counts.identities, 63)
|
|
self.assertEqual(counts.mismatches, 74)
|
|
self.assertEqual(counts.positives, 88)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 14)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 4)
|
|
self.assertEqual(alignment.annotations["Identity"], 6)
|
|
self.assertEqual(alignment.annotations["Similarity"], 9)
|
|
self.assertEqual(alignment.annotations["Gaps"], 0)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 32)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 13))
|
|
self.assertEqual(alignment.sequences[0].id, "HBA_HUMAN")
|
|
self.assertEqual(alignment.sequences[1].id, "HBB_HUMAN")
|
|
self.assertEqual(
|
|
repr(alignment.sequences[0].seq),
|
|
"Seq({60: 'KKVADALTNAVAH'}, length=73)",
|
|
)
|
|
self.assertEqual(
|
|
repr(alignment.sequences[1].seq),
|
|
"Seq({131: 'QKVVAGVANALAH'}, length=144)",
|
|
)
|
|
self.assertEqual(alignment.sequences[0].seq[60:73], "KKVADALTNAVAH")
|
|
self.assertEqual(alignment.sequences[1].seq[131:144], "QKVVAGVANALAH")
|
|
self.assertTrue(
|
|
np.array_equal(alignment.coordinates, np.array([[60, 73], [131, 144]]))
|
|
)
|
|
self.assertEqual(alignment[0], "KKVADALTNAVAH")
|
|
self.assertEqual(alignment[1], "QKVVAGVANALAH")
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"], ":||...:.||:||"
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
HBA_HUMAN 60 KKVADALTNAVAH 73
|
|
0 .||.....||.|| 13
|
|
HBB_HUMAN 131 QKVVAGVANALAH 144
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['K', 'K', 'V', 'A', 'D', 'A', 'L', 'T', 'N', 'A', 'V', 'A', 'H'],
|
|
['Q', 'K', 'V', 'V', 'A', 'G', 'V', 'A', 'N', 'A', 'L', 'A', 'H']],
|
|
dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 32.0; 13 aligned letters; 6 identities; 7 mismatches; 9 positives; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 32.0,
|
|
aligned = 13:
|
|
identities = 6,
|
|
positives = 9,
|
|
mismatches = 7.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 13)
|
|
self.assertEqual(counts.identities, 6)
|
|
self.assertEqual(counts.mismatches, 7)
|
|
self.assertEqual(counts.positives, 9)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 14)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 4)
|
|
self.assertEqual(alignment.annotations["Identity"], 7)
|
|
self.assertEqual(alignment.annotations["Similarity"], 10)
|
|
self.assertEqual(alignment.annotations["Gaps"], 0)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 28)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 18))
|
|
self.assertEqual(alignment.sequences[0].id, "HBA_HUMAN")
|
|
self.assertEqual(alignment.sequences[1].id, "HBB_HUMAN")
|
|
self.assertEqual(
|
|
repr(alignment.sequences[0].seq),
|
|
"Seq({90: 'KLRVDPVNFKLLSHCLLV'}, length=108)",
|
|
)
|
|
self.assertEqual(
|
|
repr(alignment.sequences[1].seq),
|
|
"Seq({17: 'KVNVDEVGGEALGRLLVV'}, length=35)",
|
|
)
|
|
self.assertEqual(alignment.sequences[0].seq[90:108], "KLRVDPVNFKLLSHCLLV")
|
|
self.assertEqual(alignment.sequences[1].seq[17:35], "KVNVDEVGGEALGRLLVV")
|
|
self.assertTrue(
|
|
np.array_equal(alignment.coordinates, np.array([[90, 108], [17, 35]]))
|
|
)
|
|
self.assertEqual(alignment[0], "KLRVDPVNFKLLSHCLLV")
|
|
self.assertEqual(alignment[1], "KVNVDEVGGEALGRLLVV")
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"], "|:.||.|..:.|...|:|"
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
HBA_HUMAN 90 KLRVDPVNFKLLSHCLLV 108
|
|
0 |..||.|....|...|.| 18
|
|
HBB_HUMAN 17 KVNVDEVGGEALGRLLVV 35
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['K', 'L', 'R', 'V', 'D', 'P', 'V', 'N', 'F', 'K', 'L', 'L', 'S',
|
|
'H', 'C', 'L', 'L', 'V'],
|
|
['K', 'V', 'N', 'V', 'D', 'E', 'V', 'G', 'G', 'E', 'A', 'L', 'G',
|
|
'R', 'L', 'L', 'V', 'V']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 28.0; 18 aligned letters; 7 identities; 11 mismatches; 10 positives; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 28.0,
|
|
aligned = 18:
|
|
identities = 7,
|
|
positives = 10,
|
|
mismatches = 11.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 18)
|
|
self.assertEqual(counts.identities, 7)
|
|
self.assertEqual(counts.mismatches, 11)
|
|
self.assertEqual(counts.positives, 10)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 14)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 4)
|
|
self.assertEqual(alignment.annotations["Identity"], 6)
|
|
self.assertEqual(alignment.annotations["Similarity"], 6)
|
|
self.assertEqual(alignment.annotations["Gaps"], 0)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 23)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 10))
|
|
self.assertEqual(alignment.sequences[0].id, "HBA_HUMAN")
|
|
self.assertEqual(alignment.sequences[1].id, "HBB_HUMAN")
|
|
self.assertEqual(
|
|
repr(alignment.sequences[0].seq),
|
|
"Seq({80: 'LSALSDLHAH'}, length=90)",
|
|
)
|
|
self.assertEqual(
|
|
repr(alignment.sequences[1].seq),
|
|
"Seq({68: 'LGAFSDGLAH'}, length=78)",
|
|
)
|
|
self.assertEqual(alignment.sequences[0].seq[80:90], "LSALSDLHAH")
|
|
self.assertEqual(alignment.sequences[1].seq[68:78], "LGAFSDGLAH")
|
|
self.assertTrue(
|
|
np.array_equal(alignment.coordinates, np.array([[80, 90], [68, 78]]))
|
|
)
|
|
self.assertEqual(alignment[0], "LSALSDLHAH")
|
|
self.assertEqual(alignment[1], "LGAFSDGLAH")
|
|
self.assertEqual(alignment.column_annotations["emboss_consensus"], "|.|.||..||")
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
HBA_HUMAN 80 LSALSDLHAH 90
|
|
0 |.|.||..|| 10
|
|
HBB_HUMAN 68 LGAFSDGLAH 78
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['L', 'S', 'A', 'L', 'S', 'D', 'L', 'H', 'A', 'H'],
|
|
['L', 'G', 'A', 'F', 'S', 'D', 'G', 'L', 'A', 'H']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 23.0; 10 aligned letters; 6 identities; 4 mismatches; 6 positives; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 23.0,
|
|
aligned = 10:
|
|
identities = 6,
|
|
positives = 6,
|
|
mismatches = 4.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 10)
|
|
self.assertEqual(counts.identities, 6)
|
|
self.assertEqual(counts.mismatches, 4)
|
|
self.assertEqual(counts.positives, 6)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 14)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 4)
|
|
self.assertEqual(alignment.annotations["Identity"], 6)
|
|
self.assertEqual(alignment.annotations["Similarity"], 8)
|
|
self.assertEqual(alignment.annotations["Gaps"], 0)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 23)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 10))
|
|
self.assertEqual(alignment.sequences[0].id, "HBA_HUMAN")
|
|
self.assertEqual(alignment.sequences[1].id, "HBB_HUMAN")
|
|
self.assertEqual(
|
|
repr(alignment.sequences[0].seq),
|
|
"Seq({10: 'VKAAWGKVGA'}, length=20)",
|
|
)
|
|
self.assertEqual(
|
|
repr(alignment.sequences[1].seq),
|
|
"Seq({126: 'VQAAYQKVVA'}, length=136)",
|
|
)
|
|
self.assertEqual(alignment.sequences[0].seq[10:20], "VKAAWGKVGA")
|
|
self.assertEqual(alignment.sequences[1].seq[126:136], "VQAAYQKVVA")
|
|
self.assertTrue(
|
|
np.array_equal(alignment.coordinates, np.array([[10, 20], [126, 136]]))
|
|
)
|
|
self.assertEqual(alignment[0], "VKAAWGKVGA")
|
|
self.assertEqual(alignment[1], "VQAAYQKVVA")
|
|
self.assertEqual(alignment.column_annotations["emboss_consensus"], "|:||:.||.|")
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
HBA_HUMAN 10 VKAAWGKVGA 20
|
|
0 |.||..||.| 10
|
|
HBB_HUMAN 126 VQAAYQKVVA 136
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['V', 'K', 'A', 'A', 'W', 'G', 'K', 'V', 'G', 'A'],
|
|
['V', 'Q', 'A', 'A', 'Y', 'Q', 'K', 'V', 'V', 'A']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 23.0; 10 aligned letters; 6 identities; 4 mismatches; 8 positives; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 23.0,
|
|
aligned = 10:
|
|
identities = 6,
|
|
positives = 8,
|
|
mismatches = 4.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 10)
|
|
self.assertEqual(counts.identities, 6)
|
|
self.assertEqual(counts.mismatches, 4)
|
|
self.assertEqual(counts.positives, 8)
|
|
with self.assertRaises(StopIteration):
|
|
next(alignments)
|
|
|
|
def test_pair_example_nobrief(self):
|
|
# Variation on the alignment file obtained from EMBOSS
|
|
# (http://emboss.sourceforge.net/docs/themes/alnformats/align.pair)
|
|
# if we include 3 sequences to align against, and we use the -nobrief
|
|
# command line option.
|
|
path = "Emboss/needle_nobrief_multiple.pair"
|
|
alignments = Align.parse(path, "emboss")
|
|
self.assertEqual(alignments.metadata["Program"], "needle")
|
|
self.assertEqual(alignments.metadata["Rundate"], "Fri 23 Jul 2021 22:45:41")
|
|
self.assertEqual(
|
|
alignments.metadata["Command line"],
|
|
"needle -asequence seqa.fa -bsequence seqb.fa -datafile EBLOSUM62 -gapopen 10 -gapextend 0.5 -nobrief -outfile stdout",
|
|
)
|
|
self.assertEqual(alignments.metadata["Align_format"], "srspair")
|
|
self.assertEqual(alignments.metadata["Report_file"], "stdout")
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 10.0)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
|
self.assertEqual(alignment.annotations["Identity"], 112)
|
|
self.assertEqual(alignment.annotations["Similarity"], 112)
|
|
self.assertEqual(alignment.annotations["Gaps"], 19)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 591.5)
|
|
self.assertEqual(alignment.annotations["Longest_Identity"], "100.00%")
|
|
self.assertEqual(alignment.annotations["Longest_Similarity"], "100.00%")
|
|
self.assertEqual(alignment.annotations["Shortest_Identity"], "85.50%")
|
|
self.assertEqual(alignment.annotations["Shortest_Similarity"], "85.50%")
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 131))
|
|
self.assertEqual(alignment.sequences[0].id, "IXI_234")
|
|
self.assertEqual(alignment.sequences[1].id, "IXI_235")
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq,
|
|
"TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"TSPASIRPPAGPSSRRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGWRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
np.array([[0, 15, 24, 74, 84, 131], [0, 15, 15, 65, 65, 112]]),
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|
"TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE",
|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
"||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||||||",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
IXI_234 0 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTC
|
|
0 |||||||||||||||---------||||||||||||||||||||||||||||||||||||
|
|
IXI_235 0 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTC
|
|
|
|
IXI_234 60 TTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTG
|
|
60 ||||||||||||||----------||||||||||||||||||||||||||||||||||||
|
|
IXI_235 51 TTSTSTRHRGRSGW----------RASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTG
|
|
|
|
IXI_234 120 PPAWAGDRSHE 131
|
|
120 ||||||||||| 131
|
|
IXI_235 101 PPAWAGDRSHE 112
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['T', 'S', 'P', 'A', 'S', 'I', 'R', 'P', 'P', 'A', 'G', 'P', 'S',
|
|
'S', 'R', 'P', 'A', 'M', 'V', 'S', 'S', 'R', 'R', 'T', 'R', 'P',
|
|
'S', 'P', 'P', 'G', 'P', 'R', 'R', 'P', 'T', 'G', 'R', 'P', 'C',
|
|
'C', 'S', 'A', 'A', 'P', 'R', 'R', 'P', 'Q', 'A', 'T', 'G', 'G',
|
|
'W', 'K', 'T', 'C', 'S', 'G', 'T', 'C', 'T', 'T', 'S', 'T', 'S',
|
|
'T', 'R', 'H', 'R', 'G', 'R', 'S', 'G', 'W', 'S', 'A', 'R', 'T',
|
|
'T', 'T', 'A', 'A', 'C', 'L', 'R', 'A', 'S', 'R', 'K', 'S', 'M',
|
|
'R', 'A', 'A', 'C', 'S', 'R', 'S', 'A', 'G', 'S', 'R', 'P', 'N',
|
|
'R', 'F', 'A', 'P', 'T', 'L', 'M', 'S', 'S', 'C', 'I', 'T', 'S',
|
|
'T', 'T', 'G', 'P', 'P', 'A', 'W', 'A', 'G', 'D', 'R', 'S', 'H',
|
|
'E'],
|
|
['T', 'S', 'P', 'A', 'S', 'I', 'R', 'P', 'P', 'A', 'G', 'P', 'S',
|
|
'S', 'R', '-', '-', '-', '-', '-', '-', '-', '-', '-', 'R', 'P',
|
|
'S', 'P', 'P', 'G', 'P', 'R', 'R', 'P', 'T', 'G', 'R', 'P', 'C',
|
|
'C', 'S', 'A', 'A', 'P', 'R', 'R', 'P', 'Q', 'A', 'T', 'G', 'G',
|
|
'W', 'K', 'T', 'C', 'S', 'G', 'T', 'C', 'T', 'T', 'S', 'T', 'S',
|
|
'T', 'R', 'H', 'R', 'G', 'R', 'S', 'G', 'W', '-', '-', '-', '-',
|
|
'-', '-', '-', '-', '-', '-', 'R', 'A', 'S', 'R', 'K', 'S', 'M',
|
|
'R', 'A', 'A', 'C', 'S', 'R', 'S', 'A', 'G', 'S', 'R', 'P', 'N',
|
|
'R', 'F', 'A', 'P', 'T', 'L', 'M', 'S', 'S', 'C', 'I', 'T', 'S',
|
|
'T', 'T', 'G', 'P', 'P', 'A', 'W', 'A', 'G', 'D', 'R', 'S', 'H',
|
|
'E']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 620.0; 112 aligned letters; 112 identities; 0 mismatches; 112 positives; 19 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 620.0,
|
|
aligned = 112:
|
|
identities = 112,
|
|
positives = 112,
|
|
mismatches = 0.
|
|
gaps = 19:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 19:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 19:
|
|
open_internal_deletions = 2,
|
|
extend_internal_deletions = 17;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 19)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 19)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 19)
|
|
self.assertEqual(counts.gaps, 19)
|
|
self.assertEqual(counts.aligned, 112)
|
|
self.assertEqual(counts.identities, 112)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
self.assertEqual(counts.positives, 112)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 10.0)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
|
self.assertEqual(alignment.annotations["Identity"], 120)
|
|
self.assertEqual(alignment.annotations["Similarity"], 120)
|
|
self.assertEqual(alignment.annotations["Gaps"], 4)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 618.0)
|
|
self.assertEqual(alignment.annotations["Longest_Identity"], "94.49%")
|
|
self.assertEqual(alignment.annotations["Longest_Similarity"], "94.49%")
|
|
self.assertEqual(alignment.annotations["Shortest_Identity"], "91.60%")
|
|
self.assertEqual(alignment.annotations["Shortest_Similarity"], "91.60%")
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 131))
|
|
self.assertEqual(alignment.sequences[0].id, "IXI_234")
|
|
self.assertEqual(alignment.sequences[1].id, "IXI_236")
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq,
|
|
"TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"TSPASIRPPAGPSSRPAMVSSRRPSPPPPRRPPGRPCCSAAPPRPQATGGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRGSRPPRFAPPLMSSCITSTTGPPPPAGDRSHE",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
np.array([[0, 22, 24, 97, 99, 131], [0, 22, 22, 95, 95, 127]]),
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|
"TSPASIRPPAGPSSRPAMVSSR--RPSPPPPRRPPGRPCCSAAPPRPQATGGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSR--GSRPPRFAPPLMSSCITSTTGPPPPAGDRSHE",
|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
"|||||||||||||||||||||| |||||.||||.|||||||||.|||||||||||||||||||||||||||||||||||||||||||||||||||| ||||.||||.|||||||||||||..|||||||",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
IXI_234 0 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTC
|
|
0 ||||||||||||||||||||||--|||||.||||.|||||||||.|||||||||||||||
|
|
IXI_236 0 TSPASIRPPAGPSSRPAMVSSR--RPSPPPPRRPPGRPCCSAAPPRPQATGGWKTCSGTC
|
|
|
|
IXI_234 60 TTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTG
|
|
60 |||||||||||||||||||||||||||||||||||||--||||.||||.|||||||||||
|
|
IXI_236 58 TTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSR--GSRPPRFAPPLMSSCITSTTG
|
|
|
|
IXI_234 120 PPAWAGDRSHE 131
|
|
120 ||..||||||| 131
|
|
IXI_236 116 PPPPAGDRSHE 127
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['T', 'S', 'P', 'A', 'S', 'I', 'R', 'P', 'P', 'A', 'G', 'P', 'S',
|
|
'S', 'R', 'P', 'A', 'M', 'V', 'S', 'S', 'R', 'R', 'T', 'R', 'P',
|
|
'S', 'P', 'P', 'G', 'P', 'R', 'R', 'P', 'T', 'G', 'R', 'P', 'C',
|
|
'C', 'S', 'A', 'A', 'P', 'R', 'R', 'P', 'Q', 'A', 'T', 'G', 'G',
|
|
'W', 'K', 'T', 'C', 'S', 'G', 'T', 'C', 'T', 'T', 'S', 'T', 'S',
|
|
'T', 'R', 'H', 'R', 'G', 'R', 'S', 'G', 'W', 'S', 'A', 'R', 'T',
|
|
'T', 'T', 'A', 'A', 'C', 'L', 'R', 'A', 'S', 'R', 'K', 'S', 'M',
|
|
'R', 'A', 'A', 'C', 'S', 'R', 'S', 'A', 'G', 'S', 'R', 'P', 'N',
|
|
'R', 'F', 'A', 'P', 'T', 'L', 'M', 'S', 'S', 'C', 'I', 'T', 'S',
|
|
'T', 'T', 'G', 'P', 'P', 'A', 'W', 'A', 'G', 'D', 'R', 'S', 'H',
|
|
'E'],
|
|
['T', 'S', 'P', 'A', 'S', 'I', 'R', 'P', 'P', 'A', 'G', 'P', 'S',
|
|
'S', 'R', 'P', 'A', 'M', 'V', 'S', 'S', 'R', '-', '-', 'R', 'P',
|
|
'S', 'P', 'P', 'P', 'P', 'R', 'R', 'P', 'P', 'G', 'R', 'P', 'C',
|
|
'C', 'S', 'A', 'A', 'P', 'P', 'R', 'P', 'Q', 'A', 'T', 'G', 'G',
|
|
'W', 'K', 'T', 'C', 'S', 'G', 'T', 'C', 'T', 'T', 'S', 'T', 'S',
|
|
'T', 'R', 'H', 'R', 'G', 'R', 'S', 'G', 'W', 'S', 'A', 'R', 'T',
|
|
'T', 'T', 'A', 'A', 'C', 'L', 'R', 'A', 'S', 'R', 'K', 'S', 'M',
|
|
'R', 'A', 'A', 'C', 'S', 'R', '-', '-', 'G', 'S', 'R', 'P', 'P',
|
|
'R', 'F', 'A', 'P', 'P', 'L', 'M', 'S', 'S', 'C', 'I', 'T', 'S',
|
|
'T', 'T', 'G', 'P', 'P', 'P', 'P', 'A', 'G', 'D', 'R', 'S', 'H',
|
|
'E']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 639.0; 127 aligned letters; 120 identities; 7 mismatches; 120 positives; 4 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 639.0,
|
|
aligned = 127:
|
|
identities = 120,
|
|
positives = 120,
|
|
mismatches = 7.
|
|
gaps = 4:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 4:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 4:
|
|
open_internal_deletions = 2,
|
|
extend_internal_deletions = 2;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 4)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 4)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 4)
|
|
self.assertEqual(counts.gaps, 4)
|
|
self.assertEqual(counts.aligned, 127)
|
|
self.assertEqual(counts.identities, 120)
|
|
self.assertEqual(counts.mismatches, 7)
|
|
self.assertEqual(counts.positives, 120)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 10.0)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
|
self.assertEqual(alignment.annotations["Identity"], 119)
|
|
self.assertEqual(alignment.annotations["Similarity"], 124)
|
|
self.assertEqual(alignment.annotations["Gaps"], 7)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 609.0)
|
|
self.assertEqual(alignment.annotations["Longest_Identity"], "95.97%")
|
|
self.assertEqual(alignment.annotations["Longest_Similarity"], "100.00%")
|
|
self.assertEqual(alignment.annotations["Shortest_Identity"], "90.84%")
|
|
self.assertEqual(alignment.annotations["Shortest_Similarity"], "94.66%")
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 131))
|
|
self.assertEqual(alignment.sequences[0].id, "IXI_234")
|
|
self.assertEqual(alignment.sequences[1].id, "IXI_237")
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq,
|
|
"TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"TSPASLRPPAGPSSRPAMVSSRRRPSPPGPRRPTCSAAPRRPQATGGYKTCSGTCTTSTSTRHRGRSGYSARTTTAACLRASRKSMRAACSRGSRPNRFAPTLMSSCLTSTTGPPAYAGDRSHE",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
np.array(
|
|
[[0, 23, 24, 35, 39, 97, 99, 131], [0, 23, 23, 34, 34, 92, 92, 124]]
|
|
),
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTGPPAWAGDRSHE",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|
"TSPASLRPPAGPSSRPAMVSSRR-RPSPPGPRRPT----CSAAPRRPQATGGYKTCSGTCTTSTSTRHRGRSGYSARTTTAACLRASRKSMRAACSR--GSRPNRFAPTLMSSCLTSTTGPPAYAGDRSHE",
|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
"|||||:||||||||||||||||| ||||||||||| |||||||||||||:||||||||||||||||||||:||||||||||||||||||||||| |||||||||||||||:||||||||:|||||||",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
IXI_234 0 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQATGGWKTCSGTC
|
|
0 |||||.|||||||||||||||||-|||||||||||----|||||||||||||.|||||||
|
|
IXI_237 0 TSPASLRPPAGPSSRPAMVSSRR-RPSPPGPRRPT----CSAAPRRPQATGGYKTCSGTC
|
|
|
|
IXI_234 60 TTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAGSRPNRFAPTLMSSCITSTTG
|
|
60 |||||||||||||.|||||||||||||||||||||||--|||||||||||||||.|||||
|
|
IXI_237 55 TTSTSTRHRGRSGYSARTTTAACLRASRKSMRAACSR--GSRPNRFAPTLMSSCLTSTTG
|
|
|
|
IXI_234 120 PPAWAGDRSHE 131
|
|
120 |||.||||||| 131
|
|
IXI_237 113 PPAYAGDRSHE 124
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['T', 'S', 'P', 'A', 'S', 'I', 'R', 'P', 'P', 'A', 'G', 'P', 'S',
|
|
'S', 'R', 'P', 'A', 'M', 'V', 'S', 'S', 'R', 'R', 'T', 'R', 'P',
|
|
'S', 'P', 'P', 'G', 'P', 'R', 'R', 'P', 'T', 'G', 'R', 'P', 'C',
|
|
'C', 'S', 'A', 'A', 'P', 'R', 'R', 'P', 'Q', 'A', 'T', 'G', 'G',
|
|
'W', 'K', 'T', 'C', 'S', 'G', 'T', 'C', 'T', 'T', 'S', 'T', 'S',
|
|
'T', 'R', 'H', 'R', 'G', 'R', 'S', 'G', 'W', 'S', 'A', 'R', 'T',
|
|
'T', 'T', 'A', 'A', 'C', 'L', 'R', 'A', 'S', 'R', 'K', 'S', 'M',
|
|
'R', 'A', 'A', 'C', 'S', 'R', 'S', 'A', 'G', 'S', 'R', 'P', 'N',
|
|
'R', 'F', 'A', 'P', 'T', 'L', 'M', 'S', 'S', 'C', 'I', 'T', 'S',
|
|
'T', 'T', 'G', 'P', 'P', 'A', 'W', 'A', 'G', 'D', 'R', 'S', 'H',
|
|
'E'],
|
|
['T', 'S', 'P', 'A', 'S', 'L', 'R', 'P', 'P', 'A', 'G', 'P', 'S',
|
|
'S', 'R', 'P', 'A', 'M', 'V', 'S', 'S', 'R', 'R', '-', 'R', 'P',
|
|
'S', 'P', 'P', 'G', 'P', 'R', 'R', 'P', 'T', '-', '-', '-', '-',
|
|
'C', 'S', 'A', 'A', 'P', 'R', 'R', 'P', 'Q', 'A', 'T', 'G', 'G',
|
|
'Y', 'K', 'T', 'C', 'S', 'G', 'T', 'C', 'T', 'T', 'S', 'T', 'S',
|
|
'T', 'R', 'H', 'R', 'G', 'R', 'S', 'G', 'Y', 'S', 'A', 'R', 'T',
|
|
'T', 'T', 'A', 'A', 'C', 'L', 'R', 'A', 'S', 'R', 'K', 'S', 'M',
|
|
'R', 'A', 'A', 'C', 'S', 'R', '-', '-', 'G', 'S', 'R', 'P', 'N',
|
|
'R', 'F', 'A', 'P', 'T', 'L', 'M', 'S', 'S', 'C', 'L', 'T', 'S',
|
|
'T', 'T', 'G', 'P', 'P', 'A', 'Y', 'A', 'G', 'D', 'R', 'S', 'H',
|
|
'E']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 641.0; 124 aligned letters; 119 identities; 5 mismatches; 124 positives; 7 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 641.0,
|
|
aligned = 124:
|
|
identities = 119,
|
|
positives = 124,
|
|
mismatches = 5.
|
|
gaps = 7:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 7:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 7:
|
|
open_internal_deletions = 3,
|
|
extend_internal_deletions = 4;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 7)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 7)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 7)
|
|
self.assertEqual(counts.gaps, 7)
|
|
self.assertEqual(counts.aligned, 124)
|
|
self.assertEqual(counts.identities, 119)
|
|
self.assertEqual(counts.mismatches, 5)
|
|
self.assertEqual(counts.positives, 124)
|
|
with self.assertRaises(StopIteration):
|
|
next(alignments)
|
|
|
|
def test_pair_example2(self):
|
|
path = "Emboss/needle.txt"
|
|
alignments = Align.parse(path, "emboss")
|
|
self.assertEqual(alignments.metadata["Program"], "needle")
|
|
self.assertEqual(alignments.metadata["Rundate"], "Sun 27 Apr 2007 17:20:35")
|
|
self.assertEqual(
|
|
alignments.metadata["Command line"],
|
|
"needle [-asequence] Spo0F.faa [-bsequence] paired_r.faa -sformat2 pearson",
|
|
)
|
|
self.assertEqual(alignments.metadata["Align_format"], "srspair")
|
|
self.assertEqual(alignments.metadata["Report_file"], "ref_rec .needle")
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 10.0)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
|
self.assertEqual(alignment.annotations["Identity"], 32)
|
|
self.assertEqual(alignment.annotations["Similarity"], 64)
|
|
self.assertEqual(alignment.annotations["Gaps"], 17)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 112.0)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 124))
|
|
self.assertEqual(alignment.sequences[0].id, "ref_rec")
|
|
self.assertEqual(alignment.sequences[1].id, "gi|94968718|receiver")
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq,
|
|
"KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAKPFDIDEIRDAV",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"VLLADDHALVRRGFRLMLEDDPEIEIVAEAGDGAQAVKLAGELHPRVVVMDCAMPGMSGMDATKQIRTQWPDIAVLMLTMHSEDTWVRLALEAGANGYILKSAIDLDLIQAVRRVANGET",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
np.array(
|
|
[
|
|
[0, 1, 7, 7, 17, 19, 100, 100, 108, 109, 111, 111],
|
|
[0, 0, 6, 10, 20, 20, 101, 102, 110, 110, 112, 120],
|
|
]
|
|
),
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"KILIVDD----QYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAK-PFDIDEIRDAV--------",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|
"-VLLADDHALVRRGFRLMLED--DPEIEIVAEAGDGAQAVKLAGELHPRVVVMDCAMPGMSGMDATKQIRTQWPDIAVLMLTMHSEDTWVRLALEAGANGYILKSAIDLDLIQ-AVRRVANGET",
|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
" :|:.|| :.|.|::|.: :.|.....:|.:|.||:.:..:..|.:|::|..:|||.|::..|:::....:|.|:::|.:.|...::.:.|.||..:..| ..|:|.|: || ",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
ref_rec 0 KILIVDD----QYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLDMKIPGM
|
|
0 -.|..||----..|.|..|..--..|......|..|.||........|..|..|...|||
|
|
gi|949687 0 -VLLADDHALVRRGFRLMLED--DPEIEIVAEAGDGAQAVKLAGELHPRVVVMDCAMPGM
|
|
|
|
ref_rec 56 DGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAK-PFDIDEIRDAV----
|
|
60 .|....|........|.|...|...|........|.||.....|-..|.|.|.-||----
|
|
gi|949687 57 SGMDATKQIRTQWPDIAVLMLTMHSEDTWVRLALEAGANGYILKSAIDLDLIQ-AVRRVA
|
|
|
|
ref_rec 111 ---- 111
|
|
120 ---- 124
|
|
gi|949687 116 NGET 120
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['K', 'I', 'L', 'I', 'V', 'D', 'D', '-', '-', '-', '-', 'Q', 'Y',
|
|
'G', 'I', 'R', 'I', 'L', 'L', 'N', 'E', 'V', 'F', 'N', 'K', 'E',
|
|
'G', 'Y', 'Q', 'T', 'F', 'Q', 'A', 'A', 'N', 'G', 'L', 'Q', 'A',
|
|
'L', 'D', 'I', 'V', 'T', 'K', 'E', 'R', 'P', 'D', 'L', 'V', 'L',
|
|
'L', 'D', 'M', 'K', 'I', 'P', 'G', 'M', 'D', 'G', 'I', 'E', 'I',
|
|
'L', 'K', 'R', 'M', 'K', 'V', 'I', 'D', 'E', 'N', 'I', 'R', 'V',
|
|
'I', 'I', 'M', 'T', 'A', 'Y', 'G', 'E', 'L', 'D', 'M', 'I', 'Q',
|
|
'E', 'S', 'K', 'E', 'L', 'G', 'A', 'L', 'T', 'H', 'F', 'A', 'K',
|
|
'-', 'P', 'F', 'D', 'I', 'D', 'E', 'I', 'R', 'D', 'A', 'V', '-',
|
|
'-', '-', '-', '-', '-', '-', '-'],
|
|
['-', 'V', 'L', 'L', 'A', 'D', 'D', 'H', 'A', 'L', 'V', 'R', 'R',
|
|
'G', 'F', 'R', 'L', 'M', 'L', 'E', 'D', '-', '-', 'D', 'P', 'E',
|
|
'I', 'E', 'I', 'V', 'A', 'E', 'A', 'G', 'D', 'G', 'A', 'Q', 'A',
|
|
'V', 'K', 'L', 'A', 'G', 'E', 'L', 'H', 'P', 'R', 'V', 'V', 'V',
|
|
'M', 'D', 'C', 'A', 'M', 'P', 'G', 'M', 'S', 'G', 'M', 'D', 'A',
|
|
'T', 'K', 'Q', 'I', 'R', 'T', 'Q', 'W', 'P', 'D', 'I', 'A', 'V',
|
|
'L', 'M', 'L', 'T', 'M', 'H', 'S', 'E', 'D', 'T', 'W', 'V', 'R',
|
|
'L', 'A', 'L', 'E', 'A', 'G', 'A', 'N', 'G', 'Y', 'I', 'L', 'K',
|
|
'S', 'A', 'I', 'D', 'L', 'D', 'L', 'I', 'Q', '-', 'A', 'V', 'R',
|
|
'R', 'V', 'A', 'N', 'G', 'E', 'T']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 154.0; 107 aligned letters; 32 identities; 75 mismatches; 64 positives; 17 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 154.0,
|
|
aligned = 107:
|
|
identities = 32,
|
|
positives = 64,
|
|
mismatches = 75.
|
|
gaps = 17:
|
|
left_gaps = 1:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 1:
|
|
open_left_deletions = 1,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 8:
|
|
internal_insertions = 5:
|
|
open_internal_insertions = 2,
|
|
extend_internal_insertions = 3;
|
|
internal_deletions = 3:
|
|
open_internal_deletions = 2,
|
|
extend_internal_deletions = 1;
|
|
right_gaps = 8:
|
|
right_insertions = 8:
|
|
open_right_insertions = 1,
|
|
extend_right_insertions = 7;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 1)
|
|
self.assertEqual(counts.right_insertions, 8)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 5)
|
|
self.assertEqual(counts.internal_deletions, 3)
|
|
self.assertEqual(counts.left_gaps, 1)
|
|
self.assertEqual(counts.right_gaps, 8)
|
|
self.assertEqual(counts.internal_gaps, 8)
|
|
self.assertEqual(counts.insertions, 13)
|
|
self.assertEqual(counts.deletions, 4)
|
|
self.assertEqual(counts.gaps, 17)
|
|
self.assertEqual(counts.aligned, 107)
|
|
self.assertEqual(counts.identities, 32)
|
|
self.assertEqual(counts.mismatches, 75)
|
|
self.assertEqual(counts.positives, 64)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 10.0)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
|
self.assertEqual(alignment.annotations["Identity"], 34)
|
|
self.assertEqual(alignment.annotations["Similarity"], 58)
|
|
self.assertEqual(alignment.annotations["Gaps"], 9)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 154.0)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 119))
|
|
self.assertEqual(alignment.sequences[0].id, "ref_rec")
|
|
self.assertEqual(alignment.sequences[1].id, "gi|94968761|receiver")
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq,
|
|
"KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAKPFDIDEIRDAV",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"ILIVDDEANTLASLSRAFRLAGHEATVCDNAVRALEIAKSKPFDLILSDVVMPGRDGLTLLEDLKTAGVQAPVVMMSGQAHIEMAVKATRLGALDFLEKPLSTDKLLLTVENALKLKR",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates, np.array([[0, 1, 111, 111], [0, 0, 110, 118]])
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAKPFDIDEIRDAV--------",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|
"-ILIVDDEANTLASLSRAFRLAGHEATVCDNAVRALEIAKSKPFDLILSDVVMPGRDGLTLLEDLKTAGVQAPVVMMSGQAHIEMAVKATRLGALDFLEKPLSTDKLLLTVENALKLKR",
|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
" ||||||:......|:..|...|::.....|.::||:|...:..||:|.|:.:||.||:.:|:.:|.......|::|:....::|..::..||||....||...|::...| ",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
ref_rec 0 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLDMKIPGMDGIE
|
|
0 -||||||.......|...|...|.......|...||.|......||.|.|...||.||..
|
|
gi|949687 0 -ILIVDDEANTLASLSRAFRLAGHEATVCDNAVRALEIAKSKPFDLILSDVVMPGRDGLT
|
|
|
|
ref_rec 60 ILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAKPFDIDEIRDAV--------
|
|
60 .|...|.......|..|.......|......||||....||...|.....|--------
|
|
gi|949687 59 LLEDLKTAGVQAPVVMMSGQAHIEMAVKATRLGALDFLEKPLSTDKLLLTVENALKLKR
|
|
|
|
ref_rec 111
|
|
119
|
|
gi|949687 118
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['K', 'I', 'L', 'I', 'V', 'D', 'D', 'Q', 'Y', 'G', 'I', 'R', 'I',
|
|
'L', 'L', 'N', 'E', 'V', 'F', 'N', 'K', 'E', 'G', 'Y', 'Q', 'T',
|
|
'F', 'Q', 'A', 'A', 'N', 'G', 'L', 'Q', 'A', 'L', 'D', 'I', 'V',
|
|
'T', 'K', 'E', 'R', 'P', 'D', 'L', 'V', 'L', 'L', 'D', 'M', 'K',
|
|
'I', 'P', 'G', 'M', 'D', 'G', 'I', 'E', 'I', 'L', 'K', 'R', 'M',
|
|
'K', 'V', 'I', 'D', 'E', 'N', 'I', 'R', 'V', 'I', 'I', 'M', 'T',
|
|
'A', 'Y', 'G', 'E', 'L', 'D', 'M', 'I', 'Q', 'E', 'S', 'K', 'E',
|
|
'L', 'G', 'A', 'L', 'T', 'H', 'F', 'A', 'K', 'P', 'F', 'D', 'I',
|
|
'D', 'E', 'I', 'R', 'D', 'A', 'V', '-', '-', '-', '-', '-', '-',
|
|
'-', '-'],
|
|
['-', 'I', 'L', 'I', 'V', 'D', 'D', 'E', 'A', 'N', 'T', 'L', 'A',
|
|
'S', 'L', 'S', 'R', 'A', 'F', 'R', 'L', 'A', 'G', 'H', 'E', 'A',
|
|
'T', 'V', 'C', 'D', 'N', 'A', 'V', 'R', 'A', 'L', 'E', 'I', 'A',
|
|
'K', 'S', 'K', 'P', 'F', 'D', 'L', 'I', 'L', 'S', 'D', 'V', 'V',
|
|
'M', 'P', 'G', 'R', 'D', 'G', 'L', 'T', 'L', 'L', 'E', 'D', 'L',
|
|
'K', 'T', 'A', 'G', 'V', 'Q', 'A', 'P', 'V', 'V', 'M', 'M', 'S',
|
|
'G', 'Q', 'A', 'H', 'I', 'E', 'M', 'A', 'V', 'K', 'A', 'T', 'R',
|
|
'L', 'G', 'A', 'L', 'D', 'F', 'L', 'E', 'K', 'P', 'L', 'S', 'T',
|
|
'D', 'K', 'L', 'L', 'L', 'T', 'V', 'E', 'N', 'A', 'L', 'K', 'L',
|
|
'K', 'R']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 154.0; 110 aligned letters; 34 identities; 76 mismatches; 58 positives; 9 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 154.0,
|
|
aligned = 110:
|
|
identities = 34,
|
|
positives = 58,
|
|
mismatches = 76.
|
|
gaps = 9:
|
|
left_gaps = 1:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 1:
|
|
open_left_deletions = 1,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 8:
|
|
right_insertions = 8:
|
|
open_right_insertions = 1,
|
|
extend_right_insertions = 7;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 1)
|
|
self.assertEqual(counts.right_insertions, 8)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 1)
|
|
self.assertEqual(counts.right_gaps, 8)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 8)
|
|
self.assertEqual(counts.deletions, 1)
|
|
self.assertEqual(counts.gaps, 9)
|
|
self.assertEqual(counts.aligned, 110)
|
|
self.assertEqual(counts.identities, 34)
|
|
self.assertEqual(counts.mismatches, 76)
|
|
self.assertEqual(counts.positives, 58)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 10.0)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
|
self.assertEqual(alignment.annotations["Identity"], 29)
|
|
self.assertEqual(alignment.annotations["Similarity"], 53)
|
|
self.assertEqual(alignment.annotations["Gaps"], 9)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 121.0)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 120))
|
|
self.assertEqual(alignment.sequences[0].id, "ref_rec")
|
|
self.assertEqual(alignment.sequences[1].id, "gi|94967506|receiver")
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq,
|
|
"KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAKPFDIDEIRDAV",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPEAAEEYILTHPVDLAIVDVYLGSTTGVEVLRRCRVHRPKLYAVIITGQISLEMAARSIAEGAVDYIQKPIDIDALLNIAERALEHKE",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates, np.array([[0, 0, 111, 111], [0, 1, 112, 120]])
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"-KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAKPFDIDEIRDAV--------",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|
"LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPEAAEEYILTHPVDLAIVDVYLGSTTGVEVLRRCRVHRPKLYAVIITGQISLEMAARSIAEGAVDYIQKPIDIDALLNIAERALEHKE",
|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
" .|::|||..|..:.:..||.:.|:..........|.:.:.....||.::|:.:....|:|:|:|.:|....:..:|:|....|:|...|...||:.:..||.|||.:.:.. ",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
ref_rec 0 -KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLDMKIPGMDGI
|
|
0 -.|..|||..|.......||...|...........|.........||...|.......|.
|
|
gi|949675 0 LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPEAAEEYILTHPVDLAIVDVYLGSTTGV
|
|
|
|
ref_rec 59 EILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAKPFDIDEIRDAV--------
|
|
60 |.|.|..|........|.|....|.|...|...||.....||.|||......--------
|
|
gi|949675 60 EVLRRCRVHRPKLYAVIITGQISLEMAARSIAEGAVDYIQKPIDIDALLNIAERALEHKE
|
|
|
|
ref_rec 111
|
|
120
|
|
gi|949675 120
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['-', 'K', 'I', 'L', 'I', 'V', 'D', 'D', 'Q', 'Y', 'G', 'I', 'R',
|
|
'I', 'L', 'L', 'N', 'E', 'V', 'F', 'N', 'K', 'E', 'G', 'Y', 'Q',
|
|
'T', 'F', 'Q', 'A', 'A', 'N', 'G', 'L', 'Q', 'A', 'L', 'D', 'I',
|
|
'V', 'T', 'K', 'E', 'R', 'P', 'D', 'L', 'V', 'L', 'L', 'D', 'M',
|
|
'K', 'I', 'P', 'G', 'M', 'D', 'G', 'I', 'E', 'I', 'L', 'K', 'R',
|
|
'M', 'K', 'V', 'I', 'D', 'E', 'N', 'I', 'R', 'V', 'I', 'I', 'M',
|
|
'T', 'A', 'Y', 'G', 'E', 'L', 'D', 'M', 'I', 'Q', 'E', 'S', 'K',
|
|
'E', 'L', 'G', 'A', 'L', 'T', 'H', 'F', 'A', 'K', 'P', 'F', 'D',
|
|
'I', 'D', 'E', 'I', 'R', 'D', 'A', 'V', '-', '-', '-', '-', '-',
|
|
'-', '-', '-'],
|
|
['L', 'H', 'I', 'V', 'V', 'V', 'D', 'D', 'D', 'P', 'G', 'T', 'C',
|
|
'V', 'Y', 'I', 'E', 'S', 'V', 'F', 'A', 'E', 'L', 'G', 'H', 'T',
|
|
'C', 'K', 'S', 'F', 'V', 'R', 'P', 'E', 'A', 'A', 'E', 'E', 'Y',
|
|
'I', 'L', 'T', 'H', 'P', 'V', 'D', 'L', 'A', 'I', 'V', 'D', 'V',
|
|
'Y', 'L', 'G', 'S', 'T', 'T', 'G', 'V', 'E', 'V', 'L', 'R', 'R',
|
|
'C', 'R', 'V', 'H', 'R', 'P', 'K', 'L', 'Y', 'A', 'V', 'I', 'I',
|
|
'T', 'G', 'Q', 'I', 'S', 'L', 'E', 'M', 'A', 'A', 'R', 'S', 'I',
|
|
'A', 'E', 'G', 'A', 'V', 'D', 'Y', 'I', 'Q', 'K', 'P', 'I', 'D',
|
|
'I', 'D', 'A', 'L', 'L', 'N', 'I', 'A', 'E', 'R', 'A', 'L', 'E',
|
|
'H', 'K', 'E']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 121.0; 111 aligned letters; 29 identities; 82 mismatches; 53 positives; 9 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 121.0,
|
|
aligned = 111:
|
|
identities = 29,
|
|
positives = 53,
|
|
mismatches = 82.
|
|
gaps = 9:
|
|
left_gaps = 1:
|
|
left_insertions = 1:
|
|
open_left_insertions = 1,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 8:
|
|
right_insertions = 8:
|
|
open_right_insertions = 1,
|
|
extend_right_insertions = 7;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 1)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 8)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 1)
|
|
self.assertEqual(counts.right_gaps, 8)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 9)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 9)
|
|
self.assertEqual(counts.aligned, 111)
|
|
self.assertEqual(counts.identities, 29)
|
|
self.assertEqual(counts.mismatches, 82)
|
|
self.assertEqual(counts.positives, 53)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 10.0)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
|
self.assertEqual(alignment.annotations["Identity"], 30)
|
|
self.assertEqual(alignment.annotations["Similarity"], 64)
|
|
self.assertEqual(alignment.annotations["Gaps"], 9)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 126.0)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 118))
|
|
self.assertEqual(alignment.sequences[0].id, "ref_rec")
|
|
self.assertEqual(alignment.sequences[1].id, "gi|94970045|receiver")
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq,
|
|
"KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAKPFDIDEIRDAV",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"VLLVEDEEALRAAAGDFLETRGYKIMTARDGTEALSMASKFAERIDVLITDLVMPGISGRVLAQELVKIHPETKVMYMSGYDDETVMVNGEIDSSSAFLRKPFRMDALSAKIREVL",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
np.array(
|
|
[
|
|
[0, 1, 41, 41, 82, 83, 98, 98, 105, 105, 111],
|
|
[0, 0, 40, 42, 83, 83, 98, 99, 106, 110, 116],
|
|
]
|
|
),
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTK--ERPDLVLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF-AKPFDID----EIRDAV",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|
"-VLLVEDEEALRAAAGDFLETRGYKIMTARDGTEALSMASKFAERIDVLITDLVMPGISGRVLAQELVKIHPETKVMYMSGYDD-ETVMVNGEIDSSSAFLRKPFRMDALSAKIREVL",
|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
" :|:|:|:..:|....:.....||:...|.:|.:||.:.:| ||.|:::.|:.:||:.|..:.:.:..|....:|:.|:.|.: :.:..:.|:.:.:.| .|||.:| :||:.:",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
ref_rec 0 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTK--ERPDLVLLDMKIPGMDG
|
|
0 -.|.|.|....|..........||....|..|..||....|--||.|....|...||..|
|
|
gi|949700 0 -VLLVEDEEALRAAAGDFLETRGYKIMTARDGTEALSMASKFAERIDVLITDLVMPGISG
|
|
|
|
ref_rec 58 IEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF-AKPFDID----EIRDAV
|
|
60 .........|.....|..|..|..-.......|......|-.|||..|----.||...
|
|
gi|949700 59 RVLAQELVKIHPETKVMYMSGYDD-ETVMVNGEIDSSSAFLRKPFRMDALSAKIREVL
|
|
|
|
ref_rec 111
|
|
118
|
|
gi|949700 116
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['K', 'I', 'L', 'I', 'V', 'D', 'D', 'Q', 'Y', 'G', 'I', 'R', 'I',
|
|
'L', 'L', 'N', 'E', 'V', 'F', 'N', 'K', 'E', 'G', 'Y', 'Q', 'T',
|
|
'F', 'Q', 'A', 'A', 'N', 'G', 'L', 'Q', 'A', 'L', 'D', 'I', 'V',
|
|
'T', 'K', '-', '-', 'E', 'R', 'P', 'D', 'L', 'V', 'L', 'L', 'D',
|
|
'M', 'K', 'I', 'P', 'G', 'M', 'D', 'G', 'I', 'E', 'I', 'L', 'K',
|
|
'R', 'M', 'K', 'V', 'I', 'D', 'E', 'N', 'I', 'R', 'V', 'I', 'I',
|
|
'M', 'T', 'A', 'Y', 'G', 'E', 'L', 'D', 'M', 'I', 'Q', 'E', 'S',
|
|
'K', 'E', 'L', 'G', 'A', 'L', 'T', 'H', 'F', '-', 'A', 'K', 'P',
|
|
'F', 'D', 'I', 'D', '-', '-', '-', '-', 'E', 'I', 'R', 'D', 'A',
|
|
'V'],
|
|
['-', 'V', 'L', 'L', 'V', 'E', 'D', 'E', 'E', 'A', 'L', 'R', 'A',
|
|
'A', 'A', 'G', 'D', 'F', 'L', 'E', 'T', 'R', 'G', 'Y', 'K', 'I',
|
|
'M', 'T', 'A', 'R', 'D', 'G', 'T', 'E', 'A', 'L', 'S', 'M', 'A',
|
|
'S', 'K', 'F', 'A', 'E', 'R', 'I', 'D', 'V', 'L', 'I', 'T', 'D',
|
|
'L', 'V', 'M', 'P', 'G', 'I', 'S', 'G', 'R', 'V', 'L', 'A', 'Q',
|
|
'E', 'L', 'V', 'K', 'I', 'H', 'P', 'E', 'T', 'K', 'V', 'M', 'Y',
|
|
'M', 'S', 'G', 'Y', 'D', 'D', '-', 'E', 'T', 'V', 'M', 'V', 'N',
|
|
'G', 'E', 'I', 'D', 'S', 'S', 'S', 'A', 'F', 'L', 'R', 'K', 'P',
|
|
'F', 'R', 'M', 'D', 'A', 'L', 'S', 'A', 'K', 'I', 'R', 'E', 'V',
|
|
'L']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 168.0; 109 aligned letters; 30 identities; 79 mismatches; 64 positives; 9 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 168.0,
|
|
aligned = 109:
|
|
identities = 30,
|
|
positives = 64,
|
|
mismatches = 79.
|
|
gaps = 9:
|
|
left_gaps = 1:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 1:
|
|
open_left_deletions = 1,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 8:
|
|
internal_insertions = 7:
|
|
open_internal_insertions = 3,
|
|
extend_internal_insertions = 4;
|
|
internal_deletions = 1:
|
|
open_internal_deletions = 1,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 1)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 7)
|
|
self.assertEqual(counts.internal_deletions, 1)
|
|
self.assertEqual(counts.left_gaps, 1)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 8)
|
|
self.assertEqual(counts.insertions, 7)
|
|
self.assertEqual(counts.deletions, 2)
|
|
self.assertEqual(counts.gaps, 9)
|
|
self.assertEqual(counts.aligned, 109)
|
|
self.assertEqual(counts.identities, 30)
|
|
self.assertEqual(counts.mismatches, 79)
|
|
self.assertEqual(counts.positives, 64)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EBLOSUM62")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 10.0)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
|
self.assertEqual(alignment.annotations["Identity"], 35)
|
|
self.assertEqual(alignment.annotations["Similarity"], 70)
|
|
self.assertEqual(alignment.annotations["Gaps"], 18)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 156.5)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 125))
|
|
self.assertEqual(alignment.sequences[0].id, "ref_rec")
|
|
self.assertEqual(alignment.sequences[1].id, "gi|94970041|receiver")
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq,
|
|
"KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAKPFDIDEIRDAV",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"TVLLVEDEEGVRKLVRGILSRQGYHVLEATSGEEALEIVRESTQKIDMLLSDVVLVGMSGRELSERLRIQMPSLKVIYMSGYTDDAIVRHGVLTESAEFLQKPFTSDSLLRKVRAVLQKRQ",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
np.array(
|
|
[
|
|
[0, 39, 39, 88, 92, 99, 99, 111, 111],
|
|
[0, 39, 41, 90, 90, 97, 101, 113, 121],
|
|
]
|
|
),
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIV--TKERPDLVLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFA----KPFDIDEIRDAV--------",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|
"TVLLVEDEEGVRKLVRGILSRQGYHVLEATSGEEALEIVRESTQKIDMLLSDVVLVGMSGRELSERLRIQMPSLKVIYMSGYTDDAIVRH----GVLTESAEFLQKPFTSDSLLRKVRAVLQKRQ",
|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
".:|:|:|:.|:|.|:..:.:::||...:|.:|.:||:|| :.::.|::|.|:.:.||.|.|:.:|:::...:::||.|:.|.:..:::. |.||..| |||..|.:...| ",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
ref_rec 0 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIV--TKERPDLVLLDMKIPGMDG
|
|
0 ..|.|.|..|.|.|........||....|..|..||.||--.....|..|.|....||.|
|
|
gi|949700 0 TVLLVEDEEGVRKLVRGILSRQGYHVLEATSGEEALEIVRESTQKIDMLLSDVVLVGMSG
|
|
|
|
ref_rec 58 IEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFA----KPFDIDEIRDAV---
|
|
60 .|...|.........||.|..|........----|.||..|----|||..|.....|---
|
|
gi|949700 60 RELSERLRIQMPSLKVIYMSGYTDDAIVRH----GVLTESAEFLQKPFTSDSLLRKVRAV
|
|
|
|
ref_rec 111 ----- 111
|
|
120 ----- 125
|
|
gi|949700 116 LQKRQ 121
|
|
""",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['K', 'I', 'L', 'I', 'V', 'D', 'D', 'Q', 'Y', 'G', 'I', 'R', 'I',
|
|
'L', 'L', 'N', 'E', 'V', 'F', 'N', 'K', 'E', 'G', 'Y', 'Q', 'T',
|
|
'F', 'Q', 'A', 'A', 'N', 'G', 'L', 'Q', 'A', 'L', 'D', 'I', 'V',
|
|
'-', '-', 'T', 'K', 'E', 'R', 'P', 'D', 'L', 'V', 'L', 'L', 'D',
|
|
'M', 'K', 'I', 'P', 'G', 'M', 'D', 'G', 'I', 'E', 'I', 'L', 'K',
|
|
'R', 'M', 'K', 'V', 'I', 'D', 'E', 'N', 'I', 'R', 'V', 'I', 'I',
|
|
'M', 'T', 'A', 'Y', 'G', 'E', 'L', 'D', 'M', 'I', 'Q', 'E', 'S',
|
|
'K', 'E', 'L', 'G', 'A', 'L', 'T', 'H', 'F', 'A', '-', '-', '-',
|
|
'-', 'K', 'P', 'F', 'D', 'I', 'D', 'E', 'I', 'R', 'D', 'A', 'V',
|
|
'-', '-', '-', '-', '-', '-', '-', '-'],
|
|
['T', 'V', 'L', 'L', 'V', 'E', 'D', 'E', 'E', 'G', 'V', 'R', 'K',
|
|
'L', 'V', 'R', 'G', 'I', 'L', 'S', 'R', 'Q', 'G', 'Y', 'H', 'V',
|
|
'L', 'E', 'A', 'T', 'S', 'G', 'E', 'E', 'A', 'L', 'E', 'I', 'V',
|
|
'R', 'E', 'S', 'T', 'Q', 'K', 'I', 'D', 'M', 'L', 'L', 'S', 'D',
|
|
'V', 'V', 'L', 'V', 'G', 'M', 'S', 'G', 'R', 'E', 'L', 'S', 'E',
|
|
'R', 'L', 'R', 'I', 'Q', 'M', 'P', 'S', 'L', 'K', 'V', 'I', 'Y',
|
|
'M', 'S', 'G', 'Y', 'T', 'D', 'D', 'A', 'I', 'V', 'R', 'H', '-',
|
|
'-', '-', '-', 'G', 'V', 'L', 'T', 'E', 'S', 'A', 'E', 'F', 'L',
|
|
'Q', 'K', 'P', 'F', 'T', 'S', 'D', 'S', 'L', 'L', 'R', 'K', 'V',
|
|
'R', 'A', 'V', 'L', 'Q', 'K', 'R', 'Q']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 190.0; 107 aligned letters; 35 identities; 72 mismatches; 70 positives; 18 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 190.0,
|
|
aligned = 107:
|
|
identities = 35,
|
|
positives = 70,
|
|
mismatches = 72.
|
|
gaps = 18:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 10:
|
|
internal_insertions = 6:
|
|
open_internal_insertions = 2,
|
|
extend_internal_insertions = 4;
|
|
internal_deletions = 4:
|
|
open_internal_deletions = 1,
|
|
extend_internal_deletions = 3;
|
|
right_gaps = 8:
|
|
right_insertions = 8:
|
|
open_right_insertions = 1,
|
|
extend_right_insertions = 7;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 8)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 6)
|
|
self.assertEqual(counts.internal_deletions, 4)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 8)
|
|
self.assertEqual(counts.internal_gaps, 10)
|
|
self.assertEqual(counts.insertions, 14)
|
|
self.assertEqual(counts.deletions, 4)
|
|
self.assertEqual(counts.gaps, 18)
|
|
self.assertEqual(counts.aligned, 107)
|
|
self.assertEqual(counts.identities, 35)
|
|
self.assertEqual(counts.mismatches, 72)
|
|
self.assertEqual(counts.positives, 70)
|
|
with self.assertRaises(StopIteration):
|
|
next(alignments)
|
|
|
|
def test_pair_example3(self):
|
|
path = "Emboss/needle_overhang.txt"
|
|
alignments = Align.parse(path, "emboss")
|
|
self.assertEqual(alignments.metadata["Program"], "needle")
|
|
self.assertEqual(alignments.metadata["Rundate"], "Mon 14 Jul 2008 11:45:42")
|
|
self.assertEqual(
|
|
alignments.metadata["Command line"],
|
|
"needle [-asequence] asis:TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGTGTAGTAGCTCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTTGAAATTTTTAGGTTATTTAGACCATTTTTTTTTGTTTCGCTAATTAGAATTTTATTAGCCTTTGGTTTTTTTTTATTTTTTGGGGTTAAGACAAGGTGTCGTTGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATAGGATCTACCTTTTATCTTTCTAATCTTTTGTTTTAGTATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTTTCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGGTTAATAGC [-bsequence] asis:TTATTAATCTTATGGTTTTGCCGTAAAATTTCTTTCTTTATTTTTTATTGTTAGGATTTTGTTGATTTTATTTTTCTCAAGAATTTTTAGGTCAATTAGACCGGCTTATTTTTTTGTCAGTGTTTAAAGTTTTATTAATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCTGAATTAGTTTTTGGGAGTCTCTAGACATCTCATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGGAGTAAGAATTTCGATTCAGCAACTTTAGTTCACAGTCTTTTTTTTTATTAAGAAAGGTTT -filter",
|
|
)
|
|
self.assertEqual(alignments.metadata["Align_format"], "srspair")
|
|
self.assertEqual(alignments.metadata["Report_file"], "stdout")
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EDNAFULL")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 10.0)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
|
self.assertEqual(alignment.annotations["Identity"], 210)
|
|
self.assertEqual(alignment.annotations["Similarity"], 210)
|
|
self.assertEqual(alignment.annotations["Gaps"], 408)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 561.0)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 667))
|
|
self.assertEqual(alignment.sequences[0].id, "asis")
|
|
self.assertEqual(alignment.sequences[1].id, "asis")
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq,
|
|
"TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGTGTAGTAGCTCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTTGAAATTTTTAGGTTATTTAGACCATTTTTTTTTGTTTCGCTAATTAGAATTTTATTAGCCTTTGGTTTTTTTTTATTTTTTGGGGTTAAGACAAGGTGTCGTTGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATAGGATCTACCTTTTATCTTTCTAATCTTTTGTTTTAGTATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTTTCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGGTTAATAGC",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"TTATTAATCTTATGGTTTTGCCGTAAAATTTCTTTCTTTATTTTTTATTGTTAGGATTTTGTTGATTTTATTTTTCTCAAGAATTTTTAGGTCAATTAGACCGGCTTATTTTTTTGTCAGTGTTTAAAGTTTTATTAATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCTGAATTAGTTTTTGGGAGTCTCTAGACATCTCATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGGAGTAAGAATTTCGATTCAGCAACTTTAGTTCACAGTCTTTTTTTTTATTAAGAAAGGTTT",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 162, 169, 201, 210, 210, 220, 222, 236, 240,
|
|
244, 253, 277, 277, 278, 279, 300, 300, 310, 310,
|
|
314, 320, 334, 351, 357, 357, 379, 379, 390, 403,
|
|
405, 407, 418, 418, 442, 442, 447, 447, 448, 452,
|
|
455, 455, 460, 465, 478, 479, 509, 510, 518, 615],
|
|
[ 0, 0, 7, 7, 16, 22, 32, 32, 46, 46,
|
|
50, 50, 74, 80, 81, 81, 102, 107, 117, 119,
|
|
123, 123, 137, 137, 143, 147, 169, 170, 181, 181,
|
|
183, 183, 194, 215, 239, 241, 246, 250, 251, 251,
|
|
254, 255, 260, 260, 273, 273, 303, 303, 311, 311]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGT------GTAGTAGCTCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTT------GAAATTTTTAGGTTATTTAGACC-----ATTTTTTTTT--GTTTCGCTAATTAGAATTTTATTAGCCTTTGGTTTTTTTTTATTTTT----TGGGGTTAAGACAAGGTGTCGT-TGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATA---------------------GGATCTACCTTTTATCTTTCTAAT--CTTTT----GTTTTAGT-ATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTTTCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGGTTAATAGC",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|
"------------------------------------------------------------------------------------------------------------------------------------------------------------------TTATTAA--------------------------------TCTTATGGTTTTGCCGTAAAATTTC--TTTCTTTATTTTTT----ATTG---------TTAGGATTTTGTTGATTTTATTTTTCTCAAG-AATTTTTAGGTCAATTAGACCGGCTTATTTTTTTGTCAGTGT------TTAAAGTTTTATTA-----------------ATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCTGAATTAGTTT-------------TT--GGGAGTCTCTAGACATCTCATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGG----AGTAAGAAT-----TTCGATTCAGCAA-CTTTAGTTCACAGTCTTTTTTTTTATTAAG-AAAGGTTT-------------------------------------------------------------------------------------------------",
|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
" .|||||| .|||||.|| |||..|..|| ||||.||||.||.| ||.| ||.|.|||||.|||.||||.|||| | |||||||||||.|.||||||| ||||||||.| ||.| |||.|.|||||||| |||||| .||||...||||..|||||..| ||||||||||| || ||.||.||.|| ||..||.||.|.|||..||||.|| ||||| | ||| |.||| |||||||||.||| .||||||...|||||||||||||||||..| |||||||| ",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
asis 0 TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGG
|
|
0 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 60 GGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCT
|
|
60 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 120 ACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGT
|
|
120 ------------------------------------------.||||||-----------
|
|
asis 0 ------------------------------------------TTATTAA-----------
|
|
|
|
asis 180 CCGGATGTGGCTCTCCATGATACTTATTGT------GTAGTAGCTCATTTTCATTATGTT
|
|
180 ---------------------.|||||.||------|||..|..||--||||.||||.||
|
|
asis 7 ---------------------TCTTATGGTTTTGCCGTAAAATTTC--TTTCTTTATTTT
|
|
|
|
asis 234 CTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTT------GAAATTTTTAG
|
|
240 .|----||.|---------||.|.|||||.|||.||||.||||------|-|||||||||
|
|
asis 44 TT----ATTG---------TTAGGATTTTGTTGATTTTATTTTTCTCAAG-AATTTTTAG
|
|
|
|
asis 288 GTTATTTAGACC-----ATTTTTTTTT--GTTTCGCTAATTAGAATTTTATTAGCCTTTG
|
|
300 ||.|.|||||||-----||||||||.|--||.|------|||.|.||||||||-------
|
|
asis 90 GTCAATTAGACCGGCTTATTTTTTTGTCAGTGT------TTAAAGTTTTATTA-------
|
|
|
|
asis 341 GTTTTTTTTTATTTTT----TGGGGTTAAGACAAGGTGTCGT-TGAATTAGTTTAGCAAA
|
|
360 ----------||||||----.||||...||||..|||||..|-|||||||||||------
|
|
asis 137 ----------ATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCTGAATTAGTTT------
|
|
|
|
asis 396 ATACTGCTTAAGGTAGGCTATA---------------------GGATCTACCTTTTATCT
|
|
420 -------||--||.||.||.||---------------------||..||.||.|.|||..
|
|
asis 181 -------TT--GGGAGTCTCTAGACATCTCATGGGTTGGCCGGGGGCCTGCCGTCTATAG
|
|
|
|
asis 435 TTCTAAT--CTTTT----GTTTTAGT-ATAATTGGTCTTCGATTCAACAATTTTTAGTCT
|
|
480 ||||.||--|||||----|----|||-|.|||-----|||||||||.|||-.||||||..
|
|
asis 232 TTCTTATTCCTTTTAAGGG----AGTAAGAAT-----TTCGATTCAGCAA-CTTTAGTTC
|
|
|
|
asis 488 TCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTT
|
|
540 .|||||||||||||||||..|-||||||||------------------------------
|
|
asis 282 ACAGTCTTTTTTTTTATTAAG-AAAGGTTT------------------------------
|
|
|
|
asis 548 TCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGGT
|
|
600 ------------------------------------------------------------
|
|
asis 311 ------------------------------------------------------------
|
|
|
|
asis 608 TAATAGC 615
|
|
660 ------- 667
|
|
asis 311 ------- 311
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (259 aligned letters; 210 identities; 49 mismatches; 408 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 259:
|
|
identities = 210,
|
|
mismatches = 49.
|
|
gaps = 408:
|
|
left_gaps = 162:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 162:
|
|
open_left_deletions = 1,
|
|
extend_left_deletions = 161;
|
|
internal_gaps = 149:
|
|
internal_insertions = 52:
|
|
open_internal_insertions = 10,
|
|
extend_internal_insertions = 42;
|
|
internal_deletions = 97:
|
|
open_internal_deletions = 13,
|
|
extend_internal_deletions = 84;
|
|
right_gaps = 97:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 97:
|
|
open_right_deletions = 1,
|
|
extend_right_deletions = 96.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 162)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 97)
|
|
self.assertEqual(counts.internal_insertions, 52)
|
|
self.assertEqual(counts.internal_deletions, 97)
|
|
self.assertEqual(counts.left_gaps, 162)
|
|
self.assertEqual(counts.right_gaps, 97)
|
|
self.assertEqual(counts.internal_gaps, 149)
|
|
self.assertEqual(counts.insertions, 52)
|
|
self.assertEqual(counts.deletions, 356)
|
|
self.assertEqual(counts.gaps, 408)
|
|
self.assertEqual(counts.aligned, 259)
|
|
self.assertEqual(counts.identities, 210)
|
|
self.assertEqual(counts.mismatches, 49)
|
|
with self.assertRaises(StopIteration):
|
|
next(alignments)
|
|
|
|
def test_needle_asis(self):
|
|
path = "Emboss/needle_asis.txt"
|
|
alignments = Align.parse(path, "emboss")
|
|
self.assertEqual(alignments.metadata["Program"], "needle")
|
|
self.assertEqual(alignments.metadata["Rundate"], "Mon 14 Jul 2008 11:37:15")
|
|
self.assertEqual(
|
|
alignments.metadata["Command line"],
|
|
"needle [-asequence] asis:TATTTTTTGGATTTTTTTCTAGATTTTCTAGGTTATTTAAACCGTTTTTTTTTAATTTAGTGTTTGAGTTTTGACAGGTCTCCACTTTGGGGGCTCCATCGCAAGGAAATTAGAATTCTTATACTTGGTTCTCTTTCCCAGGGACTCCAAGGATCTTTTCATTAGTTTGGATTTTGGTGTTTTCTTTAATTTTGTTAAGAAACAAATCCTTTCTAGAGTTTTTTCTAGCATTATGTTTTTTTTTCTCCTTATCTAAGGGGGTTTGTCGAGGTTTCTTAAATCTTTTTTTCTCTGGGTTTTAAAATTGTTTAAATTTTTTTGACCGAGGGGTTGGGGTGGTTTTCTCATGATAACAGGGGCTGGTGCTTTAGATCCTACCTCTACTGACCCGGGGTCTGCTACTGTGGCTTCTGATGAAGATCCACAGTATGCGCCTACGGAARCTCGGCAGTTTGGTGTTCGAAATCCAGCCCCTCGAATTAATACTCTTGTGCAGGTGGTTGACGAGCGCGGTATCGAATTGCAAAATTTGGGGCGGGACCCCGCTGTTCCGCCTGTTGCTCCGGGGGGGGCAGGTTAATCCTCCAGTCGTCTCCTTTTGGGGGCGTCTTTGACGGGGGTTTAAATCTTTCTTTGGTTGTGGATAGGATTTTTTTTCTAATATCGATCCTACCTGTTTTGGCGGGGCTATTACTTTGTTACTTTTGACCGAAATTTTAATGGAAATTTCTTTGATTCAAATGAATCCCTTAGTTTTCCAACACTTTTTTTTGGTTTTTTTAGGGATAGTCTACGCTGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGTGTAGTAGCTCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTTGAAATTTTTAGGTTATTTAGACCATTTTTTTTTGTTTCGCTAATTAGAATTTTATTAGCCTTTGGTTTTTTTTTATTTTTTGGGGTTAAGACAAGGTGTCGTTGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATAGGATCTACCTTTTATCTTTCTAATCTTTTGTTTTAGTATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTTTCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGGTTAATAGCAGGATTTGCTTTTTTAACTTATACTGGTTCGTAACGCATTAGCTCAACTCTCTCTTGTAGTTCTAGCAGCCGCCTTTTCTTTGTTGGGGGAGGGTTTAGGAGGAGTCTTTTTTTTCCTAACCCAAGGTGTTTCTTTCTTTTTTTCTTTAAAGTTCTTGACTGTTGGCACTTGTCTCCATAAATTTTCTTTCTTGTAAAGGGCTCCTAAGGCTTCTTGTTTCTGAATTCCTCTTTTCTTTTATTCTGTTTTGAGCTTATTTTTCTTGTTAGCTATTACGTAGGCATAGGGCAAATAATTTTTTTTTCTGCTCTCATTATTCCTTCTCCCTGCTTGTTTCACCCTGTGGGCTCTTTGAGCCCCACTAAGTGAGCGGGGCTCCTGCTTCCGCTCAATTAAATTTTGGTGGGTATTGAGTCTCAGAGGGACTATGATATAGGTTCAGATTGATGGACCTAATCAATCAATTGTATCGCTATACAATCTAGTACCCCTACCAGGGTACCAAGAGAGAGATAACTAGGGTGAATACTACGACTTAGATGTAGTGTTTAAGTTTCTACGGGCTACAGAGAAGCTACCCGCAGGGTAYATATTTGTTCATTACATATTTGTTGACTTTTCTATCTCTGCTTTTACTTTTTTATTTATTTTTAAATCTTTTTAACTTCAGCTGTTTTTCCTTATCTATTTGACGTAGGCATAGGAAAGTTAACGAATTTTGTAATATTTTTAATTATTTTGTATAGTATACAGGGTAGTGGTATGTAATAGGTAAATTCCATAAGTTCATTATAGTTTATCAGTTGAGAGGAATTTAGTATAAGAAGGCCCATTGGGGCTCTTGTCTTATCCAAGAACTGGTAAGATTTAATTCTACCGGGACGGTAGAAATCGGGCAGAGCATGATCTATTTCTTCGGGTATGGCTATAGGGACTAGGTGCTAGGGAGGTATTAGGGCACCGCTCTTTATACAATCTCCATAGATACAACCAGGTCAACTAGGACAACGGAGGACGTTGACAGAGCATAAATAGCGATAGCGTACAAGATAWAATAGGGGCAGTGGTAGCGAAGCGTAGAAGAAAAAATAAGAGTATTGTTTGTAAATAATTCTTTTTTTAGTTTTTAAATATTCTTTTTTTAGGTGGTGTGTGGTTAGGTATGGGGTTAAGGGTGTGGCAAAGAGAAATGTTTATTAAACATTCTTATGGCCGTAGATAGCATATCGATTATACGAGACCTTCGTAAGATCAATCCCCACTAGCATTGCTCATACAGGTTAACTCAATAGGAGGAGCTGGGGTAGAACGTTTCTAGTTCGGGGGTAACCGCAGTTCAATGAAAGTGACGACGTCGGATGGAACAAACTTAATACCACCAGTTGTGCTAACGATTGTTATCTCAATCTATCCCAACAGGCCCCCAGGTAGTGATGAGTGGTGGAATGGTACAGGGTACCAGTGGGTGAAGAGCGTCACGAACCAGGGAATACGGAGTACAGAGTTGAGCGCCCGGGGCTCCGCCCCCGGCTTTTATAGCGCGAGACGTGGTCAGTCGATTCAGCGTTAGGTTTAAACTCCTTTGGCAAAGATTGACTCTAGCGATCCAGAGACCCTGCCTGGCATAAAAGTCTTTATWAACACCAGTAGGTTCAATAAGGTAGTAATCCAATAGAATGGAAAACTCAAGATCTAATCTCTCGAYTTCCTAGTGTCATGGAAATCAGCCAGGTTCTCTTCATCTGCAACAGTAGAAGAAGAAGAGAGACTAGCGAGAGAGTCTTATGGCGGAGACGCTAAGGCTTAAATGTAATGTAGATAACCCCTTACGGAACACTAGAGTGCGACGTAGACTACATAATCCCTCAGGGATATTAGCTCTGCTCGATTAACAATAGCATACTTTGTTACACGGAGTGTATCTAGGGGGAATAATACTAACTTACTTAGCACTATCGCGATGCTACGCATTCGCTCTTTCGCTAAATAAGATACGACGATGAGTGGTTGGTGGAGAGAATAACCGATTCTAACTTGATAATTCGCATGAAATAATTTTTTTATTTTGTTTTTTTTTTGCTCTTAATTTTAGWGGGRGTGTTTATTTTTATTCTAATAAAAAGGATCCGTTGAA [-bsequence] asis:TTATTAATCTTATGGTTTTGCCGTAAAATTTCTTTCTTTATTTTTTATTGTTAGGATTTTGTTGATTTTATTTTTCTCAAGAATTTTTAGGTCAATTAGACCGGCTTATTTTTTTGTCAGTGTTTAAAGTTTTATTAATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCTGAATTAGTTTTTGGGAGTCTCTAGACATCTCATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGGAGTAAGAATTTCGATTCAGCAACTTTAGTTCACAGTCTTTTTTTTTATTAAGAAAGGTTTTAATATTCTTGTGGTTTTGAACCTTTAGGTTTCTTTCTTTACCTTCGAGGGATTGGGCACTAGAATGAGTTTTAAGAGTGTGTGAAAGGGGGCTTGATAGCAGGGGAATGCTTTTTTAACTTATACTGGCTCGTAACGCATCAGTTCAACTCTCTCTTGCAGTTCTAGCAGCCGCCTTTTTTTTGTTGGGGGGGGGTTAAGAGAGTGTTTTTTTTCTAATCCAAGGGTCTTACTTTCTTTCTTTCTTTAAAAATTCTTTGGCTGTCGACACCTTTCTCTCCCGTCAGTCTCATGGTTTCTGGCTCTCTTGGGCTTTTTTTGTTTGTGAATGCCTCTTTTTTTTATTCTGTTTTGAGCTTATTTTTCTTGTTTACTATTACGTAGGTATAGGGCAAATAATTTTTTTTTCGCGTCTCTTGGCATGCCCATTACTCTAGTTTTATTCCCGGGCTTCTTCTCTCACCCTAGAGGGCTCTTTGAGCCCACACTCAAGTGAGCGGGGCTCCCGCTTCCGCTCAATTAAATTTGGTGGGTATTGAGTCTCAGAGGGACTATGATATAGGTTCAGATTGATGGACCTAGTCAATCAATTGTATCGCTATACAATCTAGTACCCCTACCAGGGTACCAGGAGAGAGATAACTAGGGTGAATACTACGACTTAGATGTACTGTTTAAGTTTCTACGGGCTACAGAGAAGCTACCCGCAGGGTATATATTTGCTCATTACATATTTGTTGATTTTTCTATGTCCGCTTTACTTTTTATATTTTTTTAACTTCAGCTGTTTTTCCTTATCTATTTGACGTAGGCATAGGAAAGTTAACGAATTTTGTAATATTTTTAATTATTTTGTATAGTATACAGGGTAGTGGTATGTAATAGGTAAATTCCATAAGTTCATTATAGTCTATCAGTTGAGAGGAATTTAGTATAAGAAAGCCTGTCAGGGCTCTTGCCTTATCCAAGAACTGGTAAGGATTTCTTGACAGAGGGACTCTGTCAAATCGGGCAGAGCATGATCTATTTCTTCGGGTATGGTTATAAGGCTTAGGTGCTTGGAGGGTATTAGGGCACCGCTCTTAATACAGTCTCCATAGGTGTAACCAGGTCAACTAGGACAACGGAGGACGTTGACAAAGCATGGATAGCGATAGCGTAGAAGATAAAATGGGGCAGTGGTAGCGAAGCGTAGAAGAAAAAATAAGAGTATTGTTTGTAAATAATTCTTTTTTTAGTTTTTAAATATTCTTTTTTTAGGTGGTGTGTGGTTAGGTATGGGGTTAGGGGAGTGGCAAAGAGAAGTGTTTATTAAACATTCTTATGGCCGTAGATAGCATATCGATTATACGAGACCTTCGTAAGATCAATCCCCACTAGCATTGCTCATACAGGTTAACTCAATAGGAGGAGCTGGGGTAGAACGTATCTAGTTCGGGGGTAACCGCAGTTCAATGAAAGTGACGACGTCGGATGGAACAAACTTAATACCACCAGTTGTGCTAACGATTGTTATCTCAATCTATCCCAACAGGCCCCCAGGTAGTGATGAGTGGTGGAATGGTACAGGGTACCAGTGGGTGAAGAGCGTCACGAACCAGGGAATACGGAGTACAGAGTTGAGCGCCCGGGGCTCCGCCCCCGGCTTTTATAGCGCGAGACGTGGTCAGTCGATTCAGCGTTAGGTTTTAAACTCCTTTGGCAAAGATTGATTCTAGCGATCCAGAGACCCTGCCTGGCATAAAAGTCTTTATTAGCACCAGTAGGTTCAATAAGGTAGTAGTCCAATAGAATGGAAAACTCGAGATCTAATCTCTCGATTTCCTAGTGTCATGGAAATCAGCCAGGTTCTCTTCATCTGCAACAGTAGAAGAAGAAGAGAGGCTAGCGAGAGAGTCTTATGGCGGAGACGCTAAGGCTTAAATGTAATGTAGATAACCCCTTACGGAACACTTGAGTGCGACGTAGACTACATAATCCCTCAGGGATATTAGCTCTGCTCGATTAACAATAGCATACTTTGTTACACGGAGTGTATCTGGGGGGAATAATACTAACTTACTTAGCACTATCGCGATGCTACGCATTCGCTCTTTCGCTAAATAAGATACGACGATGAGTGGTTGGTGGAGAGAATAACCGATTCTAACTTGATAATTCGCATGAAATAATTTTTTATTTGTTTTTTTTTTTGCTCTTAATTTTAGAGGATGTTTATTTTTATTCTAATAAAAAGGATCCGTTGAA -filter",
|
|
)
|
|
self.assertEqual(alignments.metadata["Align_format"], "srspair")
|
|
self.assertEqual(alignments.metadata["Report_file"], "stdout")
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EDNAFULL")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 10.0)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
|
self.assertEqual(alignment.annotations["Identity"], 2296)
|
|
self.assertEqual(alignment.annotations["Similarity"], 2301)
|
|
self.assertEqual(alignment.annotations["Gaps"], 1202)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 10155.0)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 3653))
|
|
self.assertEqual(alignment.sequences[0].id, "asis")
|
|
self.assertEqual(alignment.sequences[1].id, "asis")
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq,
|
|
"TATTTTTTGGATTTTTTTCTAGATTTTCTAGGTTATTTAAACCGTTTTTTTTTAATTTAGTGTTTGAGTTTTGACAGGTCTCCACTTTGGGGGCTCCATCGCAAGGAAATTAGAATTCTTATACTTGGTTCTCTTTCCCAGGGACTCCAAGGATCTTTTCATTAGTTTGGATTTTGGTGTTTTCTTTAATTTTGTTAAGAAACAAATCCTTTCTAGAGTTTTTTCTAGCATTATGTTTTTTTTTCTCCTTATCTAAGGGGGTTTGTCGAGGTTTCTTAAATCTTTTTTTCTCTGGGTTTTAAAATTGTTTAAATTTTTTTGACCGAGGGGTTGGGGTGGTTTTCTCATGATAACAGGGGCTGGTGCTTTAGATCCTACCTCTACTGACCCGGGGTCTGCTACTGTGGCTTCTGATGAAGATCCACAGTATGCGCCTACGGAARCTCGGCAGTTTGGTGTTCGAAATCCAGCCCCTCGAATTAATACTCTTGTGCAGGTGGTTGACGAGCGCGGTATCGAATTGCAAAATTTGGGGCGGGACCCCGCTGTTCCGCCTGTTGCTCCGGGGGGGGCAGGTTAATCCTCCAGTCGTCTCCTTTTGGGGGCGTCTTTGACGGGGGTTTAAATCTTTCTTTGGTTGTGGATAGGATTTTTTTTCTAATATCGATCCTACCTGTTTTGGCGGGGCTATTACTTTGTTACTTTTGACCGAAATTTTAATGGAAATTTCTTTGATTCAAATGAATCCCTTAGTTTTCCAACACTTTTTTTTGGTTTTTTTAGGGATAGTCTACGCTGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGTGTAGTAGCTCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTTGAAATTTTTAGGTTATTTAGACCATTTTTTTTTGTTTCGCTAATTAGAATTTTATTAGCCTTTGGTTTTTTTTTATTTTTTGGGGTTAAGACAAGGTGTCGTTGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATAGGATCTACCTTTTATCTTTCTAATCTTTTGTTTTAGTATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTTTCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGGTTAATAGCAGGATTTGCTTTTTTAACTTATACTGGTTCGTAACGCATTAGCTCAACTCTCTCTTGTAGTTCTAGCAGCCGCCTTTTCTTTGTTGGGGGAGGGTTTAGGAGGAGTCTTTTTTTTCCTAACCCAAGGTGTTTCTTTCTTTTTTTCTTTAAAGTTCTTGACTGTTGGCACTTGTCTCCATAAATTTTCTTTCTTGTAAAGGGCTCCTAAGGCTTCTTGTTTCTGAATTCCTCTTTTCTTTTATTCTGTTTTGAGCTTATTTTTCTTGTTAGCTATTACGTAGGCATAGGGCAAATAATTTTTTTTTCTGCTCTCATTATTCCTTCTCCCTGCTTGTTTCACCCTGTGGGCTCTTTGAGCCCCACTAAGTGAGCGGGGCTCCTGCTTCCGCTCAATTAAATTTTGGTGGGTATTGAGTCTCAGAGGGACTATGATATAGGTTCAGATTGATGGACCTAATCAATCAATTGTATCGCTATACAATCTAGTACCCCTACCAGGGTACCAAGAGAGAGATAACTAGGGTGAATACTACGACTTAGATGTAGTGTTTAAGTTTCTACGGGCTACAGAGAAGCTACCCGCAGGGTAYATATTTGTTCATTACATATTTGTTGACTTTTCTATCTCTGCTTTTACTTTTTTATTTATTTTTAAATCTTTTTAACTTCAGCTGTTTTTCCTTATCTATTTGACGTAGGCATAGGAAAGTTAACGAATTTTGTAATATTTTTAATTATTTTGTATAGTATACAGGGTAGTGGTATGTAATAGGTAAATTCCATAAGTTCATTATAGTTTATCAGTTGAGAGGAATTTAGTATAAGAAGGCCCATTGGGGCTCTTGTCTTATCCAAGAACTGGTAAGATTTAATTCTACCGGGACGGTAGAAATCGGGCAGAGCATGATCTATTTCTTCGGGTATGGCTATAGGGACTAGGTGCTAGGGAGGTATTAGGGCACCGCTCTTTATACAATCTCCATAGATACAACCAGGTCAACTAGGACAACGGAGGACGTTGACAGAGCATAAATAGCGATAGCGTACAAGATAWAATAGGGGCAGTGGTAGCGAAGCGTAGAAGAAAAAATAAGAGTATTGTTTGTAAATAATTCTTTTTTTAGTTTTTAAATATTCTTTTTTTAGGTGGTGTGTGGTTAGGTATGGGGTTAAGGGTGTGGCAAAGAGAAATGTTTATTAAACATTCTTATGGCCGTAGATAGCATATCGATTATACGAGACCTTCGTAAGATCAATCCCCACTAGCATTGCTCATACAGGTTAACTCAATAGGAGGAGCTGGGGTAGAACGTTTCTAGTTCGGGGGTAACCGCAGTTCAATGAAAGTGACGACGTCGGATGGAACAAACTTAATACCACCAGTTGTGCTAACGATTGTTATCTCAATCTATCCCAACAGGCCCCCAGGTAGTGATGAGTGGTGGAATGGTACAGGGTACCAGTGGGTGAAGAGCGTCACGAACCAGGGAATACGGAGTACAGAGTTGAGCGCCCGGGGCTCCGCCCCCGGCTTTTATAGCGCGAGACGTGGTCAGTCGATTCAGCGTTAGGTTTAAACTCCTTTGGCAAAGATTGACTCTAGCGATCCAGAGACCCTGCCTGGCATAAAAGTCTTTATWAACACCAGTAGGTTCAATAAGGTAGTAATCCAATAGAATGGAAAACTCAAGATCTAATCTCTCGAYTTCCTAGTGTCATGGAAATCAGCCAGGTTCTCTTCATCTGCAACAGTAGAAGAAGAAGAGAGACTAGCGAGAGAGTCTTATGGCGGAGACGCTAAGGCTTAAATGTAATGTAGATAACCCCTTACGGAACACTAGAGTGCGACGTAGACTACATAATCCCTCAGGGATATTAGCTCTGCTCGATTAACAATAGCATACTTTGTTACACGGAGTGTATCTAGGGGGAATAATACTAACTTACTTAGCACTATCGCGATGCTACGCATTCGCTCTTTCGCTAAATAAGATACGACGATGAGTGGTTGGTGGAGAGAATAACCGATTCTAACTTGATAATTCGCATGAAATAATTTTTTTATTTTGTTTTTTTTTTGCTCTTAATTTTAGWGGGRGTGTTTATTTTTATTCTAATAAAAAGGATCCGTTGAA",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"TTATTAATCTTATGGTTTTGCCGTAAAATTTCTTTCTTTATTTTTTATTGTTAGGATTTTGTTGATTTTATTTTTCTCAAGAATTTTTAGGTCAATTAGACCGGCTTATTTTTTTGTCAGTGTTTAAAGTTTTATTAATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCTGAATTAGTTTTTGGGAGTCTCTAGACATCTCATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGGAGTAAGAATTTCGATTCAGCAACTTTAGTTCACAGTCTTTTTTTTTATTAAGAAAGGTTTTAATATTCTTGTGGTTTTGAACCTTTAGGTTTCTTTCTTTACCTTCGAGGGATTGGGCACTAGAATGAGTTTTAAGAGTGTGTGAAAGGGGGCTTGATAGCAGGGGAATGCTTTTTTAACTTATACTGGCTCGTAACGCATCAGTTCAACTCTCTCTTGCAGTTCTAGCAGCCGCCTTTTTTTTGTTGGGGGGGGGTTAAGAGAGTGTTTTTTTTCTAATCCAAGGGTCTTACTTTCTTTCTTTCTTTAAAAATTCTTTGGCTGTCGACACCTTTCTCTCCCGTCAGTCTCATGGTTTCTGGCTCTCTTGGGCTTTTTTTGTTTGTGAATGCCTCTTTTTTTTATTCTGTTTTGAGCTTATTTTTCTTGTTTACTATTACGTAGGTATAGGGCAAATAATTTTTTTTTCGCGTCTCTTGGCATGCCCATTACTCTAGTTTTATTCCCGGGCTTCTTCTCTCACCCTAGAGGGCTCTTTGAGCCCACACTCAAGTGAGCGGGGCTCCCGCTTCCGCTCAATTAAATTTGGTGGGTATTGAGTCTCAGAGGGACTATGATATAGGTTCAGATTGATGGACCTAGTCAATCAATTGTATCGCTATACAATCTAGTACCCCTACCAGGGTACCAGGAGAGAGATAACTAGGGTGAATACTACGACTTAGATGTACTGTTTAAGTTTCTACGGGCTACAGAGAAGCTACCCGCAGGGTATATATTTGCTCATTACATATTTGTTGATTTTTCTATGTCCGCTTTACTTTTTATATTTTTTTAACTTCAGCTGTTTTTCCTTATCTATTTGACGTAGGCATAGGAAAGTTAACGAATTTTGTAATATTTTTAATTATTTTGTATAGTATACAGGGTAGTGGTATGTAATAGGTAAATTCCATAAGTTCATTATAGTCTATCAGTTGAGAGGAATTTAGTATAAGAAAGCCTGTCAGGGCTCTTGCCTTATCCAAGAACTGGTAAGGATTTCTTGACAGAGGGACTCTGTCAAATCGGGCAGAGCATGATCTATTTCTTCGGGTATGGTTATAAGGCTTAGGTGCTTGGAGGGTATTAGGGCACCGCTCTTAATACAGTCTCCATAGGTGTAACCAGGTCAACTAGGACAACGGAGGACGTTGACAAAGCATGGATAGCGATAGCGTAGAAGATAAAATGGGGCAGTGGTAGCGAAGCGTAGAAGAAAAAATAAGAGTATTGTTTGTAAATAATTCTTTTTTTAGTTTTTAAATATTCTTTTTTTAGGTGGTGTGTGGTTAGGTATGGGGTTAGGGGAGTGGCAAAGAGAAGTGTTTATTAAACATTCTTATGGCCGTAGATAGCATATCGATTATACGAGACCTTCGTAAGATCAATCCCCACTAGCATTGCTCATACAGGTTAACTCAATAGGAGGAGCTGGGGTAGAACGTATCTAGTTCGGGGGTAACCGCAGTTCAATGAAAGTGACGACGTCGGATGGAACAAACTTAATACCACCAGTTGTGCTAACGATTGTTATCTCAATCTATCCCAACAGGCCCCCAGGTAGTGATGAGTGGTGGAATGGTACAGGGTACCAGTGGGTGAAGAGCGTCACGAACCAGGGAATACGGAGTACAGAGTTGAGCGCCCGGGGCTCCGCCCCCGGCTTTTATAGCGCGAGACGTGGTCAGTCGATTCAGCGTTAGGTTTTAAACTCCTTTGGCAAAGATTGATTCTAGCGATCCAGAGACCCTGCCTGGCATAAAAGTCTTTATTAGCACCAGTAGGTTCAATAAGGTAGTAGTCCAATAGAATGGAAAACTCGAGATCTAATCTCTCGATTTCCTAGTGTCATGGAAATCAGCCAGGTTCTCTTCATCTGCAACAGTAGAAGAAGAAGAGAGGCTAGCGAGAGAGTCTTATGGCGGAGACGCTAAGGCTTAAATGTAATGTAGATAACCCCTTACGGAACACTTGAGTGCGACGTAGACTACATAATCCCTCAGGGATATTAGCTCTGCTCGATTAACAATAGCATACTTTGTTACACGGAGTGTATCTGGGGGGAATAATACTAACTTACTTAGCACTATCGCGATGCTACGCATTCGCTCTTTCGCTAAATAAGATACGACGATGAGTGGTTGGTGGAGAGAATAACCGATTCTAACTTGATAATTCGCATGAAATAATTTTTTATTTGTTTTTTTTTTTGCTCTTAATTTTAGAGGATGTTTATTTTTATTCTAATAAAAAGGATCCGTTGAA",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 958, 965, 997, 1006, 1006, 1016, 1018,
|
|
1032, 1036, 1040, 1049, 1073, 1073, 1074, 1075,
|
|
1096, 1096, 1106, 1106, 1110, 1116, 1130, 1147,
|
|
1153, 1153, 1175, 1175, 1186, 1199, 1201, 1203,
|
|
1214, 1214, 1238, 1238, 1243, 1243, 1244, 1248,
|
|
1251, 1251, 1256, 1261, 1274, 1275, 1305, 1306,
|
|
1323, 1323, 1330, 1331, 1348, 1348, 1353, 1354,
|
|
1364, 1364, 1403, 1403, 1414, 1414, 1415, 1417,
|
|
1510, 1512, 1526, 1527, 1536, 1536, 1559, 1559,
|
|
1566, 1566, 1580, 1580, 1598, 1598, 1603, 1610,
|
|
1615, 1615, 1622, 1622, 1646, 1647, 1717, 1718,
|
|
1719, 1719, 1723, 1723, 1728, 1728, 1735, 1736,
|
|
1738, 1738, 1754, 1754, 1771, 1771, 1775, 1775,
|
|
1809, 1810, 2042, 2043, 2053, 2053, 2057, 2069,
|
|
2276, 2276, 2279, 2283, 2287, 2287, 2294, 2294,
|
|
2298, 2299, 2468, 2469, 2983, 2983, 3467, 3468,
|
|
3507, 3509, 3546],
|
|
[ 0, 0, 7, 7, 16, 22, 32, 32,
|
|
46, 46, 50, 50, 74, 80, 81, 81,
|
|
102, 107, 117, 119, 123, 123, 137, 137,
|
|
143, 147, 169, 170, 181, 181, 183, 183,
|
|
194, 215, 239, 241, 246, 250, 251, 251,
|
|
254, 255, 260, 260, 273, 273, 303, 303,
|
|
320, 322, 329, 329, 346, 348, 353, 353,
|
|
363, 364, 403, 404, 415, 418, 419, 419,
|
|
512, 512, 526, 526, 535, 536, 559, 560,
|
|
567, 568, 582, 584, 602, 606, 611, 611,
|
|
616, 617, 624, 626, 650, 650, 720, 720,
|
|
721, 724, 728, 737, 742, 749, 756, 756,
|
|
758, 761, 777, 778, 795, 796, 800, 801,
|
|
835, 835, 1067, 1067, 1077, 1078, 1082, 1082,
|
|
1289, 1290, 1293, 1293, 1297, 1301, 1308, 1310,
|
|
1314, 1314, 1483, 1483, 1997, 1998, 2482, 2482,
|
|
2521, 2521, 2558]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"TATTTTTTGGATTTTTTTCTAGATTTTCTAGGTTATTTAAACCGTTTTTTTTTAATTTAGTGTTTGAGTTTTGACAGGTCTCCACTTTGGGGGCTCCATCGCAAGGAAATTAGAATTCTTATACTTGGTTCTCTTTCCCAGGGACTCCAAGGATCTTTTCATTAGTTTGGATTTTGGTGTTTTCTTTAATTTTGTTAAGAAACAAATCCTTTCTAGAGTTTTTTCTAGCATTATGTTTTTTTTTCTCCTTATCTAAGGGGGTTTGTCGAGGTTTCTTAAATCTTTTTTTCTCTGGGTTTTAAAATTGTTTAAATTTTTTTGACCGAGGGGTTGGGGTGGTTTTCTCATGATAACAGGGGCTGGTGCTTTAGATCCTACCTCTACTGACCCGGGGTCTGCTACTGTGGCTTCTGATGAAGATCCACAGTATGCGCCTACGGAARCTCGGCAGTTTGGTGTTCGAAATCCAGCCCCTCGAATTAATACTCTTGTGCAGGTGGTTGACGAGCGCGGTATCGAATTGCAAAATTTGGGGCGGGACCCCGCTGTTCCGCCTGTTGCTCCGGGGGGGGCAGGTTAATCCTCCAGTCGTCTCCTTTTGGGGGCGTCTTTGACGGGGGTTTAAATCTTTCTTTGGTTGTGGATAGGATTTTTTTTCTAATATCGATCCTACCTGTTTTGGCGGGGCTATTACTTTGTTACTTTTGACCGAAATTTTAATGGAAATTTCTTTGATTCAAATGAATCCCTTAGTTTTCCAACACTTTTTTTTGGTTTTTTTAGGGATAGTCTACGCTGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGT------GTAGTAGCTCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTT------GAAATTTTTAGGTTATTTAGACC-----ATTTTTTTTT--GTTTCGCTAATTAGAATTTTATTAGCCTTTGGTTTTTTTTTATTTTT----TGGGGTTAAGACAAGGTGTCGT-TGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATA---------------------GGATCTACCTTTTATCTTTCTAAT--CTTTT----GTTTTAGT-ATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCT--TGGTTTTGGAGGCTTTGGCTTTCTT--CTTACTCTTAGGAGGA-TGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGG-TTAATAGCAGG---ATTTGCTTTTTTAACTTATACTGGTTCGTAACGCATTAGCTCAACTCTCTCTTGTAGTTCTAGCAGCCGCCTTTTCTTTGTTGGGGGAGGGTTTAGGAGGAGTCTTTTTTTTCCTAACCCAA-GGTGTTTCTTTCTTTTTTTCTTT-AAAGTTC-TTGACTGTTGGCAC--TTGTCTCCATAAATTTTC----TTTCTTGTAAAGGGCTC-CTAAGGC--TTCTTGTTTCTGAATTCCTCTTTTCTTTTATTCTGTTTTGAGCTTATTTTTCTTGTTAGCTATTACGTAGGCATAGGGCAAATAATTTTTTTTTCTG---CTCT---------CATTA-------TTCCTTCTCC---CTGCTTGTTTCACCCT-GTGGGCTCTTTGAGCCC-CACT-AAGTGAGCGGGGCTCCTGCTTCCGCTCAATTAAATTTTGGTGGGTATTGAGTCTCAGAGGGACTATGATATAGGTTCAGATTGATGGACCTAATCAATCAATTGTATCGCTATACAATCTAGTACCCCTACCAGGGTACCAAGAGAGAGATAACTAGGGTGAATACTACGACTTAGATGTAGTGTTTAAGTTTCTACGGGCTACAGAGAAGCTACCCGCAGGGTAYATATTTGTTCATTACATATTTGTTGACTTTTCTATCTCTGCTTTTACTTTTT-TATTTATTTTTAAATCTTTTTAACTTCAGCTGTTTTTCCTTATCTATTTGACGTAGGCATAGGAAAGTTAACGAATTTTGTAATATTTTTAATTATTTTGTATAGTATACAGGGTAGTGGTATGTAATAGGTAAATTCCATAAGTTCATTATAGTTTATCAGTTGAGAGGAATTTAGTATAAGAAGGCCCATTGGGGCTCTTGTCTTATCCAAGAACTGGTAA-GATTTAATTCT----ACCGGGA--CGGTAGAAATCGGGCAGAGCATGATCTATTTCTTCGGGTATGGCTATAGGGACTAGGTGCTAGGGAGGTATTAGGGCACCGCTCTTTATACAATCTCCATAGATACAACCAGGTCAACTAGGACAACGGAGGACGTTGACAGAGCATAAATAGCGATAGCGTACAAGATAWAATAGGGGCAGTGGTAGCGAAGCGTAGAAGAAAAAATAAGAGTATTGTTTGTAAATAATTCTTTTTTTAGTTTTTAAATATTCTTTTTTTAGGTGGTGTGTGGTTAGGTATGGGGTTAAGGGTGTGGCAAAGAGAAATGTTTATTAAACATTCTTATGGCCGTAGATAGCATATCGATTATACGAGACCTTCGTAAGATCAATCCCCACTAGCATTGCTCATACAGGTTAACTCAATAGGAGGAGCTGGGGTAGAACGTTTCTAGTTCGGGGGTAACCGCAGTTCAATGAAAGTGACGACGTCGGATGGAACAAACTTAATACCACCAGTTGTGCTAACGATTGTTATCTCAATCTATCCCAACAGGCCCCCAGGTAGTGATGAGTGGTGGAATGGTACAGGGTACCAGTGGGTGAAGAGCGTCACGAACCAGGGAATACGGAGTACAGAGTTGAGCGCCCGGGGCTCCGCCCCCGGCTTTTATAGCGCGAGACGTGGTCAGTCGATTCAGCGTTAGG-TTTAAACTCCTTTGGCAAAGATTGACTCTAGCGATCCAGAGACCCTGCCTGGCATAAAAGTCTTTATWAACACCAGTAGGTTCAATAAGGTAGTAATCCAATAGAATGGAAAACTCAAGATCTAATCTCTCGAYTTCCTAGTGTCATGGAAATCAGCCAGGTTCTCTTCATCTGCAACAGTAGAAGAAGAAGAGAGACTAGCGAGAGAGTCTTATGGCGGAGACGCTAAGGCTTAAATGTAATGTAGATAACCCCTTACGGAACACTAGAGTGCGACGTAGACTACATAATCCCTCAGGGATATTAGCTCTGCTCGATTAACAATAGCATACTTTGTTACACGGAGTGTATCTAGGGGGAATAATACTAACTTACTTAGCACTATCGCGATGCTACGCATTCGCTCTTTCGCTAAATAAGATACGACGATGAGTGGTTGGTGGAGAGAATAACCGATTCTAACTTGATAATTCGCATGAAATAATTTTTTTATTTTGTTTTTTTTTTGCTCTTAATTTTAGWGGGRGTGTTTATTTTTATTCTAATAAAAAGGATCCGTTGAA",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|

|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
" .|||||| .|||||.|| |||..|..|| ||||.||||.||.| ||.| ||.|.|||||.|||.||||.|||| | |||||||||||.|.||||||| ||||||||.| ||.| |||.|.|||||||| |||||| .||||...||||..|||||..| ||||||||||| || ||.||.||.|| ||..||.||.|.|||..||||.|| ||||| | ||| |.||| |||||||||.||| .||||||...|||||||||||||||||..| |||||||||||.|.||| ||||||| ||..||||.|.|||||| .|||| |||.|..||| |||||.||||||.||||||||||||.||||||||||||| ||.|||||||| | |||||||||||||||||||||.|||||||||||.||.||||||||||||||.||||||||||||||||||||.|||||||||||.|||||.|| .||||.|||||||| ||||.|||| |||.||.||||||||.||||||| |||.||| |||.||||.|.||| ||.|||||....|.|.|| ||||| ||||| ||..||| ||.||||||.|||||.|||||||| ||||||||||||||||||||||||||||||||..||||||||||||.||||||||||||||||||||||| | |||| ||||| ||..||| || ||.|||.|.||||||| |.||||||||||||||| |||| ||||||||||||||||.||||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||||||||||||||||.||||||||||||||||||||||||||||||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||||||.|||||||.||||||||||||||||||.||||||||.||.|| |||||||||| |||| |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||||||||||||||||||||||||||||.|||..|..|||||||||.||||||||||||||||||| ||| |||| |..|||| |.|| .|||||||||||||||||||||||||||||||||||||.||||.||..||||||||.||..||||||||||||||||||||.|||||.|||||||||.|..|||||||||||||||||||||||||||||||||||.|||||..||||||||||||||.||||||.||| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||.|||||||||||||.||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||||.|.|||||||||||||||||||||||||.||||||||||||||||||||.||||||||||||||||.||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||.||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ||||||||||..||||||||||||||||||||||||.|| .||||||||||||||||||||||||||||||||||||",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
asis 0 TATTTTTTGGATTTTTTTCTAGATTTTCTAGGTTATTTAAACCGTTTTTTTTTAATTTAG
|
|
0 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 60 TGTTTGAGTTTTGACAGGTCTCCACTTTGGGGGCTCCATCGCAAGGAAATTAGAATTCTT
|
|
60 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 120 ATACTTGGTTCTCTTTCCCAGGGACTCCAAGGATCTTTTCATTAGTTTGGATTTTGGTGT
|
|
120 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 180 TTTCTTTAATTTTGTTAAGAAACAAATCCTTTCTAGAGTTTTTTCTAGCATTATGTTTTT
|
|
180 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 240 TTTTCTCCTTATCTAAGGGGGTTTGTCGAGGTTTCTTAAATCTTTTTTTCTCTGGGTTTT
|
|
240 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 300 AAAATTGTTTAAATTTTTTTGACCGAGGGGTTGGGGTGGTTTTCTCATGATAACAGGGGC
|
|
300 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 360 TGGTGCTTTAGATCCTACCTCTACTGACCCGGGGTCTGCTACTGTGGCTTCTGATGAAGA
|
|
360 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 420 TCCACAGTATGCGCCTACGGAARCTCGGCAGTTTGGTGTTCGAAATCCAGCCCCTCGAAT
|
|
420 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 480 TAATACTCTTGTGCAGGTGGTTGACGAGCGCGGTATCGAATTGCAAAATTTGGGGCGGGA
|
|
480 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 540 CCCCGCTGTTCCGCCTGTTGCTCCGGGGGGGGCAGGTTAATCCTCCAGTCGTCTCCTTTT
|
|
540 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 600 GGGGGCGTCTTTGACGGGGGTTTAAATCTTTCTTTGGTTGTGGATAGGATTTTTTTTCTA
|
|
600 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 660 ATATCGATCCTACCTGTTTTGGCGGGGCTATTACTTTGTTACTTTTGACCGAAATTTTAA
|
|
660 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 720 TGGAAATTTCTTTGATTCAAATGAATCCCTTAGTTTTCCAACACTTTTTTTTGGTTTTTT
|
|
720 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 780 TAGGGATAGTCTACGCTGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACC
|
|
780 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 840 CCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTAT
|
|
840 ------------------------------------------------------------
|
|
asis 0 ------------------------------------------------------------
|
|
|
|
asis 900 AATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGT
|
|
900 ----------------------------------------------------------.|
|
|
asis 0 ----------------------------------------------------------TT
|
|
|
|
asis 960 ATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGT------GTAGTAGC
|
|
960 |||||--------------------------------.|||||.||------|||..|..
|
|
asis 2 ATTAA--------------------------------TCTTATGGTTTTGCCGTAAAATT
|
|
|
|
asis 1014 TCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTT-
|
|
1020 ||--||||.||||.||.|----||.|---------||.|.|||||.|||.||||.||||-
|
|
asis 30 TC--TTTCTTTATTTTTT----ATTG---------TTAGGATTTTGTTGATTTTATTTTT
|
|
|
|
asis 1073 -----GAAATTTTTAGGTTATTTAGACC-----ATTTTTTTTT--GTTTCGCTAATTAGA
|
|
1080 -----|-|||||||||||.|.|||||||-----||||||||.|--||.|------|||.|
|
|
asis 75 CTCAAG-AATTTTTAGGTCAATTAGACCGGCTTATTTTTTTGTCAGTGT------TTAAA
|
|
|
|
asis 1121 ATTTTATTAGCCTTTGGTTTTTTTTTATTTTT----TGGGGTTAAGACAAGGTGTCGT-T
|
|
1140 .||||||||-----------------||||||----.||||...||||..|||||..|-|
|
|
asis 128 GTTTTATTA-----------------ATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCT
|
|
|
|
asis 1176 GAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATA---------------------G
|
|
1200 ||||||||||-------------||--||.||.||.||---------------------|
|
|
asis 171 GAATTAGTTT-------------TT--GGGAGTCTCTAGACATCTCATGGGTTGGCCGGG
|
|
|
|
asis 1215 GATCTACCTTTTATCTTTCTAAT--CTTTT----GTTTTAGT-ATAATTGGTCTTCGATT
|
|
1260 |..||.||.|.|||..||||.||--|||||----|----|||-|.|||-----|||||||
|
|
asis 216 GGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGG----AGTAAGAAT-----TTCGATT
|
|
|
|
asis 1268 CAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCT--TGG
|
|
1320 ||.|||-.||||||...|||||||||||||||||..|-|||||||||||.|.|||--|||
|
|
asis 267 CAGCAA-CTTTAGTTCACAGTCTTTTTTTTTATTAAG-AAAGGTTTTAATATTCTTGTGG
|
|
|
|
asis 1326 TTTTGGAGGCTTTGGCTTTCTT--CTTACTCTTAGGAGGA-TGGGCGCTAGAAAGAGTTT
|
|
1380 ||||-||..||||.|.||||||--.||||-|||.|..|||-|||||.||||||.||||||
|
|
asis 325 TTTT-GAACCTTTAGGTTTCTTTCTTTAC-CTTCGAGGGATTGGGCACTAGAATGAGTTT
|
|
|
|
asis 1383 TAAGAGGGTGTGAAAGGGGG-TTAATAGCAGG---ATTTGCTTTTTTAACTTATACTGGT
|
|
1440 ||||||.|||||||||||||-||.||||||||---|--|||||||||||||||||||||.
|
|
asis 383 TAAGAGTGTGTGAAAGGGGGCTTGATAGCAGGGGAA--TGCTTTTTTAACTTATACTGGC
|
|
|
|
asis 1439 TCGTAACGCATTAGCTCAACTCTCTCTTGTAGTTCTAGCAGCCGCCTTTTCTTTGTTGGG
|
|
1500 |||||||||||.||.||||||||||||||.||||||||||||||||||||.|||||||||
|
|
asis 441 TCGTAACGCATCAGTTCAACTCTCTCTTGCAGTTCTAGCAGCCGCCTTTTTTTTGTTGGG
|
|
|
|
asis 1499 GGAGGGTTTAGGAGGAGTCTTTTTTTTCCTAACCCAA-GGTGTTTCTTTCTTTTTTTCTT
|
|
1560 ||.|||||.||--.||||.||||||||-||||.||||-|||.||.||||||||.||||||
|
|
asis 501 GGGGGGTTAAG--AGAGTGTTTTTTTT-CTAATCCAAGGGTCTTACTTTCTTTCTTTCTT
|
|
|
|
asis 1558 T-AAAGTTC-TTGACTGTTGGCAC--TTGTCTCCATAAATTTTC----TTTCTTGTAAAG
|
|
1620 |-|||.|||-|||.||||.|.|||--||.|||||....|.|.||----|||||-------
|
|
asis 558 TAAAAATTCTTTGGCTGTCGACACCTTTCTCTCCCGTCAGTCTCATGGTTTCT-------
|
|
|
|
asis 1610 GGCTC-CTAAGGC--TTCTTGTTTCTGAATTCCTCTTTTCTTTTATTCTGTTTTGAGCTT
|
|
1680 |||||-||..|||--||.||||||.|||||.||||||||-||||||||||||||||||||
|
|
asis 611 GGCTCTCTTGGGCTTTTTTTGTTTGTGAATGCCTCTTTT-TTTTATTCTGTTTTGAGCTT
|
|
|
|
asis 1667 ATTTTTCTTGTTAGCTATTACGTAGGCATAGGGCAAATAATTTTTTTTTCTG---CTCT-
|
|
1740 ||||||||||||..||||||||||||.|||||||||||||||||||||||-|---||||-
|
|
asis 670 ATTTTTCTTGTTTACTATTACGTAGGTATAGGGCAAATAATTTTTTTTTC-GCGTCTCTT
|
|
|
|
asis 1723 --------CATTA-------TTCCTTCTCC---CTGCTTGTTTCACCCT-GTGGGCTCTT
|
|
1800 --------|||||-------||..|||-||---||.|||.|.|||||||-|.||||||||
|
|
asis 729 GGCATGCCCATTACTCTAGTTTTATTC-CCGGGCTTCTTCTCTCACCCTAGAGGGCTCTT
|
|
|
|
asis 1764 TGAGCCC-CACT-AAGTGAGCGGGGCTCCTGCTTCCGCTCAATTAAATTTTGGTGGGTAT
|
|
1860 |||||||-||||-||||||||||||||||.|||||||||||||||||-||||||||||||
|
|
asis 788 TGAGCCCACACTCAAGTGAGCGGGGCTCCCGCTTCCGCTCAATTAAA-TTTGGTGGGTAT
|
|
|
|
asis 1822 TGAGTCTCAGAGGGACTATGATATAGGTTCAGATTGATGGACCTAATCAATCAATTGTAT
|
|
1920 |||||||||||||||||||||||||||||||||||||||||||||.||||||||||||||
|
|
asis 847 TGAGTCTCAGAGGGACTATGATATAGGTTCAGATTGATGGACCTAGTCAATCAATTGTAT
|
|
|
|
asis 1882 CGCTATACAATCTAGTACCCCTACCAGGGTACCAAGAGAGAGATAACTAGGGTGAATACT
|
|
1980 ||||||||||||||||||||||||||||||||||.|||||||||||||||||||||||||
|
|
asis 907 CGCTATACAATCTAGTACCCCTACCAGGGTACCAGGAGAGAGATAACTAGGGTGAATACT
|
|
|
|
asis 1942 ACGACTTAGATGTAGTGTTTAAGTTTCTACGGGCTACAGAGAAGCTACCCGCAGGGTAYA
|
|
2040 ||||||||||||||.|||||||||||||||||||||||||||||||||||||||||||.|
|
|
asis 967 ACGACTTAGATGTACTGTTTAAGTTTCTACGGGCTACAGAGAAGCTACCCGCAGGGTATA
|
|
|
|
asis 2002 TATTTGTTCATTACATATTTGTTGACTTTTCTATCTCTGCTTTTACTTTTT-TATTTATT
|
|
2100 ||||||.||||||||||||||||||.||||||||.||.||-||||||||||-||||----
|
|
asis 1027 TATTTGCTCATTACATATTTGTTGATTTTTCTATGTCCGC-TTTACTTTTTATATT----
|
|
|
|
asis 2061 TTTAAATCTTTTTAACTTCAGCTGTTTTTCCTTATCTATTTGACGTAGGCATAGGAAAGT
|
|
2160 --------||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
asis 1082 --------TTTTTAACTTCAGCTGTTTTTCCTTATCTATTTGACGTAGGCATAGGAAAGT
|
|
|
|
asis 2121 TAACGAATTTTGTAATATTTTTAATTATTTTGTATAGTATACAGGGTAGTGGTATGTAAT
|
|
2220 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
asis 1134 TAACGAATTTTGTAATATTTTTAATTATTTTGTATAGTATACAGGGTAGTGGTATGTAAT
|
|
|
|
asis 2181 AGGTAAATTCCATAAGTTCATTATAGTTTATCAGTTGAGAGGAATTTAGTATAAGAAGGC
|
|
2280 |||||||||||||||||||||||||||.|||||||||||||||||||||||||||||.||
|
|
asis 1194 AGGTAAATTCCATAAGTTCATTATAGTCTATCAGTTGAGAGGAATTTAGTATAAGAAAGC
|
|
|
|
asis 2241 CCATTGGGGCTCTTGTCTTATCCAAGAACTGGTAA-GATTTAATTCT----ACCGGGA--
|
|
2340 |..|..|||||||||.|||||||||||||||||||-|||----||||----|..||||--
|
|
asis 1254 CTGTCAGGGCTCTTGCCTTATCCAAGAACTGGTAAGGAT----TTCTTGACAGAGGGACT
|
|
|
|
asis 2294 CGGTAGAAATCGGGCAGAGCATGATCTATTTCTTCGGGTATGGCTATAGGGACTAGGTGC
|
|
2400 |.||-.|||||||||||||||||||||||||||||||||||||.||||.||..|||||||
|
|
asis 1310 CTGT-CAAATCGGGCAGAGCATGATCTATTTCTTCGGGTATGGTTATAAGGCTTAGGTGC
|
|
|
|
asis 2354 TAGGGAGGTATTAGGGCACCGCTCTTTATACAATCTCCATAGATACAACCAGGTCAACTA
|
|
2460 |.||..||||||||||||||||||||.|||||.|||||||||.|..||||||||||||||
|
|
asis 1369 TTGGAGGGTATTAGGGCACCGCTCTTAATACAGTCTCCATAGGTGTAACCAGGTCAACTA
|
|
|
|
asis 2414 GGACAACGGAGGACGTTGACAGAGCATAAATAGCGATAGCGTACAAGATAWAATAGGGGC
|
|
2520 |||||||||||||||||||||.|||||..||||||||||||||.||||||.|||-|||||
|
|
asis 1429 GGACAACGGAGGACGTTGACAAAGCATGGATAGCGATAGCGTAGAAGATAAAAT-GGGGC
|
|
|
|
asis 2474 AGTGGTAGCGAAGCGTAGAAGAAAAAATAAGAGTATTGTTTGTAAATAATTCTTTTTTTA
|
|
2580 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
asis 1488 AGTGGTAGCGAAGCGTAGAAGAAAAAATAAGAGTATTGTTTGTAAATAATTCTTTTTTTA
|
|
|
|
asis 2534 GTTTTTAAATATTCTTTTTTTAGGTGGTGTGTGGTTAGGTATGGGGTTAAGGGTGTGGCA
|
|
2640 |||||||||||||||||||||||||||||||||||||||||||||||||.|||.||||||
|
|
asis 1548 GTTTTTAAATATTCTTTTTTTAGGTGGTGTGTGGTTAGGTATGGGGTTAGGGGAGTGGCA
|
|
|
|
asis 2594 AAGAGAAATGTTTATTAAACATTCTTATGGCCGTAGATAGCATATCGATTATACGAGACC
|
|
2700 |||||||.||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
asis 1608 AAGAGAAGTGTTTATTAAACATTCTTATGGCCGTAGATAGCATATCGATTATACGAGACC
|
|
|
|
asis 2654 TTCGTAAGATCAATCCCCACTAGCATTGCTCATACAGGTTAACTCAATAGGAGGAGCTGG
|
|
2760 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
asis 1668 TTCGTAAGATCAATCCCCACTAGCATTGCTCATACAGGTTAACTCAATAGGAGGAGCTGG
|
|
|
|
asis 2714 GGTAGAACGTTTCTAGTTCGGGGGTAACCGCAGTTCAATGAAAGTGACGACGTCGGATGG
|
|
2820 ||||||||||.|||||||||||||||||||||||||||||||||||||||||||||||||
|
|
asis 1728 GGTAGAACGTATCTAGTTCGGGGGTAACCGCAGTTCAATGAAAGTGACGACGTCGGATGG
|
|
|
|
asis 2774 AACAAACTTAATACCACCAGTTGTGCTAACGATTGTTATCTCAATCTATCCCAACAGGCC
|
|
2880 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
asis 1788 AACAAACTTAATACCACCAGTTGTGCTAACGATTGTTATCTCAATCTATCCCAACAGGCC
|
|
|
|
asis 2834 CCCAGGTAGTGATGAGTGGTGGAATGGTACAGGGTACCAGTGGGTGAAGAGCGTCACGAA
|
|
2940 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
asis 1848 CCCAGGTAGTGATGAGTGGTGGAATGGTACAGGGTACCAGTGGGTGAAGAGCGTCACGAA
|
|
|
|
asis 2894 CCAGGGAATACGGAGTACAGAGTTGAGCGCCCGGGGCTCCGCCCCCGGCTTTTATAGCGC
|
|
3000 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
asis 1908 CCAGGGAATACGGAGTACAGAGTTGAGCGCCCGGGGCTCCGCCCCCGGCTTTTATAGCGC
|
|
|
|
asis 2954 GAGACGTGGTCAGTCGATTCAGCGTTAGG-TTTAAACTCCTTTGGCAAAGATTGACTCTA
|
|
3060 |||||||||||||||||||||||||||||-|||||||||||||||||||||||||.||||
|
|
asis 1968 GAGACGTGGTCAGTCGATTCAGCGTTAGGTTTTAAACTCCTTTGGCAAAGATTGATTCTA
|
|
|
|
asis 3013 GCGATCCAGAGACCCTGCCTGGCATAAAAGTCTTTATWAACACCAGTAGGTTCAATAAGG
|
|
3120 |||||||||||||||||||||||||||||||||||||.|.||||||||||||||||||||
|
|
asis 2028 GCGATCCAGAGACCCTGCCTGGCATAAAAGTCTTTATTAGCACCAGTAGGTTCAATAAGG
|
|
|
|
asis 3073 TAGTAATCCAATAGAATGGAAAACTCAAGATCTAATCTCTCGAYTTCCTAGTGTCATGGA
|
|
3180 |||||.||||||||||||||||||||.||||||||||||||||.||||||||||||||||
|
|
asis 2088 TAGTAGTCCAATAGAATGGAAAACTCGAGATCTAATCTCTCGATTTCCTAGTGTCATGGA
|
|
|
|
asis 3133 AATCAGCCAGGTTCTCTTCATCTGCAACAGTAGAAGAAGAAGAGAGACTAGCGAGAGAGT
|
|
3240 ||||||||||||||||||||||||||||||||||||||||||||||.|||||||||||||
|
|
asis 2148 AATCAGCCAGGTTCTCTTCATCTGCAACAGTAGAAGAAGAAGAGAGGCTAGCGAGAGAGT
|
|
|
|
asis 3193 CTTATGGCGGAGACGCTAAGGCTTAAATGTAATGTAGATAACCCCTTACGGAACACTAGA
|
|
3300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||.||
|
|
asis 2208 CTTATGGCGGAGACGCTAAGGCTTAAATGTAATGTAGATAACCCCTTACGGAACACTTGA
|
|
|
|
asis 3253 GTGCGACGTAGACTACATAATCCCTCAGGGATATTAGCTCTGCTCGATTAACAATAGCAT
|
|
3360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
asis 2268 GTGCGACGTAGACTACATAATCCCTCAGGGATATTAGCTCTGCTCGATTAACAATAGCAT
|
|
|
|
asis 3313 ACTTTGTTACACGGAGTGTATCTAGGGGGAATAATACTAACTTACTTAGCACTATCGCGA
|
|
3420 |||||||||||||||||||||||.||||||||||||||||||||||||||||||||||||
|
|
asis 2328 ACTTTGTTACACGGAGTGTATCTGGGGGGAATAATACTAACTTACTTAGCACTATCGCGA
|
|
|
|
asis 3373 TGCTACGCATTCGCTCTTTCGCTAAATAAGATACGACGATGAGTGGTTGGTGGAGAGAAT
|
|
3480 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
asis 2388 TGCTACGCATTCGCTCTTTCGCTAAATAAGATACGACGATGAGTGGTTGGTGGAGAGAAT
|
|
|
|
asis 3433 AACCGATTCTAACTTGATAATTCGCATGAAATAATTTTTTTATTTTGTTTTTTTTTTGCT
|
|
3540 ||||||||||||||||||||||||||||||||||-||||||||||..|||||||||||||
|
|
asis 2448 AACCGATTCTAACTTGATAATTCGCATGAAATAA-TTTTTTATTTGTTTTTTTTTTTGCT
|
|
|
|
asis 3493 CTTAATTTTAGWGGGRGTGTTTATTTTTATTCTAATAAAAAGGATCCGTTGAA 3546
|
|
3600 |||||||||||.||--.|||||||||||||||||||||||||||||||||||| 3653
|
|
asis 2507 CTTAATTTTAGAGG--ATGTTTATTTTTATTCTAATAAAAAGGATCCGTTGAA 2558
|
|
""",
|
|
)
|
|
counts = alignment.counts(substitution_matrix)
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (substitution score = 12769.0; 2451 aligned letters; 2296 identities; 155 mismatches; 2296 positives; 1202 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
substitution_score = 12769.0,
|
|
aligned = 2451:
|
|
identities = 2296,
|
|
positives = 2296,
|
|
mismatches = 155.
|
|
gaps = 1202:
|
|
left_gaps = 958:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 958:
|
|
open_left_deletions = 1,
|
|
extend_left_deletions = 957;
|
|
internal_gaps = 244:
|
|
internal_insertions = 107:
|
|
open_internal_insertions = 34,
|
|
extend_internal_insertions = 73;
|
|
internal_deletions = 137:
|
|
open_internal_deletions = 30,
|
|
extend_internal_deletions = 107;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 958)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 107)
|
|
self.assertEqual(counts.internal_deletions, 137)
|
|
self.assertEqual(counts.left_gaps, 958)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 244)
|
|
self.assertEqual(counts.insertions, 107)
|
|
self.assertEqual(counts.deletions, 1095)
|
|
self.assertEqual(counts.gaps, 1202)
|
|
self.assertEqual(counts.aligned, 2451)
|
|
self.assertEqual(counts.identities, 2296)
|
|
self.assertEqual(counts.mismatches, 155)
|
|
self.assertEqual(counts.positives, 2296)
|
|
with self.assertRaises(StopIteration):
|
|
next(alignments)
|
|
|
|
def test_water_reverse1(self):
|
|
# water -asequence seqA.fa -bsequence seqB.fa -gapopen 10 -gapextend 0.5 -sreverse1 -outfile water_reverse1.txt
|
|
path = "Emboss/water_reverse1.txt"
|
|
alignments = Align.parse(path, "emboss")
|
|
self.assertEqual(alignments.metadata["Program"], "water")
|
|
self.assertEqual(alignments.metadata["Rundate"], "Sat 22 Oct 2022 23:47:41")
|
|
self.assertEqual(
|
|
alignments.metadata["Command line"],
|
|
"water -asequence seqA.fa -bsequence seqB.fa -gapopen 0.001 -gapextend 0.001 -sreverse1 -outfile water_reverse1.txt",
|
|
)
|
|
self.assertEqual(alignments.metadata["Align_format"], "srspair")
|
|
self.assertEqual(alignments.metadata["Report_file"], "water_reverse1.txt")
|
|
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EDNAFULL")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 0.001)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.001)
|
|
self.assertEqual(alignment.annotations["Identity"], 32)
|
|
self.assertEqual(alignment.annotations["Similarity"], 32)
|
|
self.assertEqual(alignment.annotations["Gaps"], 89)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 159.911)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 121))
|
|
self.assertEqual(alignment.sequences[0].id, "seqA")
|
|
self.assertEqual(alignment.sequences[1].id, "seqB")
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq,
|
|
"GGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCCC",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"GGGGGGGGGGGGGGGGGGGCCCCCCCCCCCCC",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[121, 102, 13, 0],
|
|
[ 0, 19, 19, 32]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"GGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCC",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|
"GGGGGGGGGGGGGGGGGGG-----------------------------------------------------------------------------------------CCCCCCCCCCCCC",
|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
"||||||||||||||||||| |||||||||||||",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
seqA 121 GGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
|
|
0 |||||||||||||||||||-----------------------------------------
|
|
seqB 0 GGGGGGGGGGGGGGGGGGG-----------------------------------------
|
|
|
|
seqA 61 TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCCCCCCCCCCC
|
|
60 ------------------------------------------------||||||||||||
|
|
seqB 19 ------------------------------------------------CCCCCCCCCCCC
|
|
|
|
seqA 1 C 0
|
|
120 | 121
|
|
seqB 31 C 32
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (32 aligned letters; 32 identities; 0 mismatches; 89 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 32:
|
|
identities = 32,
|
|
mismatches = 0.
|
|
gaps = 89:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 89:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 89:
|
|
open_internal_deletions = 1,
|
|
extend_internal_deletions = 88;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 89)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 89)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 89)
|
|
self.assertEqual(counts.gaps, 89)
|
|
self.assertEqual(counts.aligned, 32)
|
|
self.assertEqual(counts.identities, 32)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
with self.assertRaises(StopIteration):
|
|
next(alignments)
|
|
|
|
def test_water_reverse2(self):
|
|
# water -asequence seqA.fa -bsequence seqB.fa -gapopen 10 -gapextend 0.5 -sreverse2 -outfile water_reverse2.txt
|
|
path = "Emboss/water_reverse2.txt"
|
|
alignments = Align.parse(path, "emboss")
|
|
self.assertEqual(alignments.metadata["Program"], "water")
|
|
self.assertEqual(alignments.metadata["Rundate"], "Sun 23 Oct 2022 00:06:18")
|
|
self.assertEqual(
|
|
alignments.metadata["Command line"],
|
|
"water -asequence seqA.fa -bsequence seqB.fa -gapopen 0.001 -gapextend 0.001 -sreverse2 -outfile water_reverse2.txt",
|
|
)
|
|
self.assertEqual(alignments.metadata["Align_format"], "srspair")
|
|
self.assertEqual(alignments.metadata["Report_file"], "water_reverse2.txt")
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EDNAFULL")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 0.001)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.001)
|
|
self.assertEqual(alignment.annotations["Identity"], 32)
|
|
self.assertEqual(alignment.annotations["Similarity"], 32)
|
|
self.assertEqual(alignment.annotations["Gaps"], 89)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 159.911)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 121))
|
|
self.assertEqual(alignment.sequences[0].id, "seqA")
|
|
self.assertEqual(alignment.sequences[1].id, "seqB")
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq,
|
|
"GGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCCC",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"GGGGGGGGGGGGGGGGGGGCCCCCCCCCCCCC",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 13, 102, 121],
|
|
[32, 19, 19, 0]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"GGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCCC",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|
"GGGGGGGGGGGGG-----------------------------------------------------------------------------------------CCCCCCCCCCCCCCCCCCC",
|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
"||||||||||||| |||||||||||||||||||",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
seqA 0 GGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
|
|
0 |||||||||||||-----------------------------------------------
|
|
seqB 32 GGGGGGGGGGGGG-----------------------------------------------
|
|
|
|
seqA 60 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCC
|
|
60 ------------------------------------------||||||||||||||||||
|
|
seqB 19 ------------------------------------------CCCCCCCCCCCCCCCCCC
|
|
|
|
seqA 120 C 121
|
|
120 | 121
|
|
seqB 1 C 0
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (32 aligned letters; 32 identities; 0 mismatches; 89 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 32:
|
|
identities = 32,
|
|
mismatches = 0.
|
|
gaps = 89:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 89:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 89:
|
|
open_internal_deletions = 1,
|
|
extend_internal_deletions = 88;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 89)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 89)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 89)
|
|
self.assertEqual(counts.gaps, 89)
|
|
self.assertEqual(counts.aligned, 32)
|
|
self.assertEqual(counts.identities, 32)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
with self.assertRaises(StopIteration):
|
|
next(alignments)
|
|
|
|
def test_water_reverse3(self):
|
|
# water -asequence seqA.fa -bsequence seqB.fa -gapopen 10 -gapextend 0.5 -sreverse1 -outfile water_reverse3.txt
|
|
path = "Emboss/water_reverse3.txt"
|
|
alignments = Align.parse(path, "emboss")
|
|
self.assertEqual(alignments.metadata["Program"], "water")
|
|
self.assertEqual(alignments.metadata["Rundate"], "Sat 22 Oct 2022 22:56:03")
|
|
self.assertEqual(
|
|
alignments.metadata["Command line"],
|
|
"water -asequence seqA.fa -bsequence seqB.fa -gapopen 1 -gapextend 0.5 -sreverse1 -outfile water_reverse3.txt",
|
|
)
|
|
self.assertEqual(alignments.metadata["Align_format"], "srspair")
|
|
self.assertEqual(alignments.metadata["Report_file"], "water_reverse3.txt")
|
|
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EDNAFULL")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 1.0)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
|
self.assertEqual(alignment.annotations["Identity"], 16)
|
|
self.assertEqual(alignment.annotations["Similarity"], 16)
|
|
self.assertEqual(alignment.annotations["Gaps"], 3)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 77.5)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 19))
|
|
self.assertEqual(alignment.sequences[0].id, "seqA")
|
|
self.assertEqual(alignment.sequences[1].id, "seqB")
|
|
self.assertEqual(
|
|
repr(alignment.sequences[0].seq),
|
|
"Seq({2: 'GGGCCCGGTTTAAAAAAA'}, length=20)",
|
|
)
|
|
self.assertEqual(
|
|
repr(alignment.sequences[1].seq),
|
|
"Seq({2: 'TTTTTTTACCCGGGCCC'}, length=19)",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[20, 13, 11, 10, 10, 2],
|
|
[ 2, 9, 9, 10, 11, 19]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(alignment[0], "TTTTTTTAAA-CCGGGCCC")
|
|
self.assertEqual(alignment[1], "TTTTTTT--ACCCGGGCCC")
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
"||||||| | ||||||||",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
seqA 20 TTTTTTTAAA-CCGGGCCC 2
|
|
0 |||||||--|-|||||||| 19
|
|
seqB 2 TTTTTTT--ACCCGGGCCC 19
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (16 aligned letters; 16 identities; 0 mismatches; 3 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 16:
|
|
identities = 16,
|
|
mismatches = 0.
|
|
gaps = 3:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 3:
|
|
internal_insertions = 1:
|
|
open_internal_insertions = 1,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 2:
|
|
open_internal_deletions = 1,
|
|
extend_internal_deletions = 1;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 1)
|
|
self.assertEqual(counts.internal_deletions, 2)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 3)
|
|
self.assertEqual(counts.insertions, 1)
|
|
self.assertEqual(counts.deletions, 2)
|
|
self.assertEqual(counts.gaps, 3)
|
|
self.assertEqual(counts.aligned, 16)
|
|
self.assertEqual(counts.identities, 16)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
with self.assertRaises(StopIteration):
|
|
next(alignments)
|
|
|
|
def test_water_reverse4(self):
|
|
# water -asequence seqA.fa -bsequence seqB.fa -gapopen 10 -gapextend 0.5 -sreverse2 -outfile water_reverse4.txt
|
|
path = "Emboss/water_reverse4.txt"
|
|
alignments = Align.parse(path, "emboss")
|
|
self.assertEqual(alignments.metadata["Program"], "water")
|
|
self.assertEqual(alignments.metadata["Rundate"], "Sat 22 Oct 2022 22:56:15")
|
|
self.assertEqual(
|
|
alignments.metadata["Command line"],
|
|
"water -asequence seqA.fa -bsequence seqB.fa -gapopen 1 -gapextend 0.5 -sreverse2 -outfile water_reverse4.txt",
|
|
)
|
|
self.assertEqual(alignments.metadata["Align_format"], "srspair")
|
|
self.assertEqual(alignments.metadata["Report_file"], "water_reverse4.txt")
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EDNAFULL")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 1.0)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 0.5)
|
|
self.assertEqual(alignment.annotations["Identity"], 16)
|
|
self.assertEqual(alignment.annotations["Similarity"], 16)
|
|
self.assertEqual(alignment.annotations["Gaps"], 3)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], 77.5)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 19))
|
|
self.assertEqual(alignment.sequences[0].id, "seqA")
|
|
self.assertEqual(alignment.sequences[1].id, "seqB")
|
|
self.assertEqual(
|
|
repr(alignment.sequences[0].seq),
|
|
"Seq({2: 'GGGCCCGGTTTAAAAAAA'}, length=20)",
|
|
)
|
|
self.assertEqual(
|
|
repr(alignment.sequences[1].seq),
|
|
"Seq({2: 'TTTTTTTACCCGGGCCC'}, length=19)",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[ 2, 10, 12, 12, 20],
|
|
[19, 11, 11, 10, 2]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(alignment[0], "GGGCCCGGTT-TAAAAAAA")
|
|
self.assertEqual(alignment[1], "GGGCCCGG--GTAAAAAAA")
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
"|||||||| ||||||||",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
seqA 2 GGGCCCGGTT-TAAAAAAA 20
|
|
0 ||||||||---|||||||| 19
|
|
seqB 19 GGGCCCGG--GTAAAAAAA 2
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (16 aligned letters; 16 identities; 0 mismatches; 3 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 16:
|
|
identities = 16,
|
|
mismatches = 0.
|
|
gaps = 3:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 3:
|
|
internal_insertions = 1:
|
|
open_internal_insertions = 1,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 2:
|
|
open_internal_deletions = 1,
|
|
extend_internal_deletions = 1;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 1)
|
|
self.assertEqual(counts.internal_deletions, 2)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 3)
|
|
self.assertEqual(counts.insertions, 1)
|
|
self.assertEqual(counts.deletions, 2)
|
|
self.assertEqual(counts.gaps, 3)
|
|
self.assertEqual(counts.aligned, 16)
|
|
self.assertEqual(counts.identities, 16)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
with self.assertRaises(StopIteration):
|
|
next(alignments)
|
|
|
|
def test_pair_aln_full_blank_line(self):
|
|
path = "Emboss/emboss_pair_aln_full_blank_line.txt"
|
|
alignments = Align.parse(path, "emboss")
|
|
self.assertEqual(alignments.metadata["Program"], "stretcher")
|
|
self.assertEqual(alignments.metadata["Rundate"], "Tue 15 May 2018 17:01:31")
|
|
self.assertEqual(
|
|
alignments.metadata["Command line"],
|
|
"stretcher -auto -stdout -asequence emboss_stretcher-I20180515-170128-0371-22292969-p1m.aupfile -bsequence emboss_stretcher-I20180515-170128-0371-22292969-p1m.bupfile -datafile EDNAFULL -gapopen 16 -gapextend 4 -aformat3 pair -snucleotide1 -snucleotide2",
|
|
)
|
|
self.assertEqual(alignments.metadata["Align_format"], "pair")
|
|
self.assertEqual(alignments.metadata["Report_file"], "stdout")
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.annotations["Matrix"], "EDNAFULL")
|
|
self.assertAlmostEqual(alignment.annotations["Gap_penalty"], 16)
|
|
self.assertAlmostEqual(alignment.annotations["Extend_penalty"], 4)
|
|
self.assertEqual(alignment.annotations["Identity"], 441)
|
|
self.assertEqual(alignment.annotations["Similarity"], 441)
|
|
self.assertEqual(alignment.annotations["Gaps"], 847)
|
|
self.assertAlmostEqual(alignment.annotations["Score"], -2623)
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertEqual(alignment.shape, (2, 1450))
|
|
self.assertEqual(
|
|
alignment.sequences[0].id, "hg38_chrX_131691529_131830643_47210_48660"
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].id, "mm10_chrX_50555743_50635321_27140_27743"
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq,
|
|
"GGCAGGTGCATAGCTTGAGCCTAGGAGTTCAAGTCCAGCCCTGACAATGTAGAGAGACCCCGTCTCTTCAAAAAATACAAAAAATAGCCAGGCATGGTGACCTACAATGGAAGCCCTAGCTACGTAGGAGGCGGAAATGGGAGGATCACCTCAGCCCAGGGAGGCTGATGTTGCAGTGAGCCATGATCATGCCTCTACACTCCACCCTGGGCAACAGAGTAAGATGCTGTCTAAAATATATATATATGCATATCTGTGTGTATATATATATATATATATGTGTGTGTGTGTGTGTGTATATACATATGTGTGTGTATATACATATATGTGTATATATATATGTGTGTATATATACATATACATATTCAGCATCACCTTATATTCTTTGAATATATCTACATCAATACATACTTTTGAGTGCTTGAAATTTTTTATATTTTACTCTAGAAGAACTGTAAGAAATTATAAAGTAGAAAACTTGTGGTAGGTCAAACATAGTAAGAAGAAATAATCACTTTTTAAAGGTCTGTGCTAGGTACTATGATCTGTTCCCTATATATACATAATATGGACTTTTATAAACTAATGTTCAAATTCCCCTGTAGTATAACTTCTTGTTGTTGTTTATTTTTTTTTTTTTGTATTTTTCATTTTAGATATGGGGTTTCACTCTGTTGACCAGGCTGATCTCGAACCACTGGTCTCAAGCGATCCTCCCATCTTGGACTCCCAAAGTGCTAGGATTACAGGCACGAGGCACCTTGACTGGCCACCATGTACTATAGCTGTTAAAACAAGTTTGTTTCACTGATAACTGGAGTACTTTTCAAATATAATTAATAATTCATGGAAATAATGATAGCTTTAAAAGTATTGGCACTTTTAAAAACTGAGTTTGTAAACTTCATATAACATAAAATTAACCATTAAAATGTATTAATTTCAATGGCATTTAGGACACTCACAATGCAGTGCAAGCATTACCACTATGTAGTGGCAAATCATTTTCACTACCACAAAAGAAAATCCTGGACCCATTAGTTAGTCATTCCCCATTCCACTCTCTGCCCAGCCCCTGGCAAACACTCATCTGATTTCCCTCACTACTGATCATCACAACAAGTGGCCTTGTTCATCTTGTTGTGGGAACCAGGAGACCAGAGAGACCAATGGGTGGAACAGGAGGATTTTACTAGGTGGTCACCGACTCAGCAGATTAACATCCAAAGGCTGAGCCCCAAACCAAGAGAGGGCTTGACTTTTATACATATATCTGAAAAGGGCCCAAAACCTGTAAGGCCGGTAAGCAAGCTTACAGCAGAACAAAGGCAGTTTATCAAACAGTGACAGGTTTTACAGTTCAGGCATGTCTTGTGACCTTTGCCATAACTGCACAGCTGGAAAACAGGAACTTACAAAATCCTTACAAGCTTGCAGAAACAGTTACAAA",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"GTTCAAGGCCATCCGGGATTAAAGGTGTGGTAGAACTCTTCTGATGGAGACAATATAAGGACATTGGAAGAAGGGAGTCTTGCCCTTGCTCCTTCGCCTACTTGCTGTGTAAGACTGAGTAACTCCTAGACCCTTGGACTTCCATTTCAGCCACTACTGAACCATTGTTGGGAATTGGGCTGCAGACTGTAAGTCATCAATAAATTCCTTTACTATATAGAGACTATCCATAAATTCTGTGACTCTAGAGAACCCTGACAATACAACTGGGAAGCACGGACATCCTCTTTGAGATATAATTATCAACTGGCAAGTGTTTGTTTATTGATATTTTACTTAAGACAAAGTTAAACCTACTCCTGTCCTCTGGGCATGGTAGCATGGACTTATTCTGGAACTACCAGAGGAAAAGACAGAAGCCTACTGGAAAGGCCCAGGCCATCCTGCCTCTTGTAGTTCACTAGGACCAGGGCTCAGCATAGTCCTTGGCTTCTAAATCTGCTACCATATCTTTATCATGTAAAACTGACACAAAATTAAACATATCAAAATTTTATGAAAACCATTAAGTATCTGGAAAAGAAAAAAATCAACAGTTATAAA",
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 1, 27, 45, 49, 54, 61, 68,
|
|
79, 91, 93, 101, 105, 113, 133, 145,
|
|
157, 163, 168, 174, 180, 199, 224, 236,
|
|
253, 259, 370, 377, 378, 391, 408, 420,
|
|
440, 466, 469, 479, 483, 497, 509, 518,
|
|
537, 543, 576, 585, 595, 598, 606, 616,
|
|
647, 659, 668, 673, 676, 682, 693, 701,
|
|
753, 760, 767, 778, 825, 839, 844, 847,
|
|
854, 858, 861, 863, 868, 877, 881, 885,
|
|
895, 898, 899, 927, 939, 944, 946, 957,
|
|
962, 964, 972, 983, 985, 988, 989, 997,
|
|
1008, 1019, 1021, 1036, 1042, 1044, 1064, 1080,
|
|
1087, 1091, 1106, 1109, 1133, 1144, 1189, 1197,
|
|
1210, 1215, 1228, 1236, 1247, 1263, 1269, 1279,
|
|
1285, 1297, 1299, 1305, 1308, 1327, 1329, 1331,
|
|
1334, 1341, 1357, 1368, 1376, 1381, 1386, 1391,
|
|
1395, 1407, 1414, 1422, 1438, 1450],
|
|
[ 0, 1, 1, 19, 19, 24, 24, 31,
|
|
31, 43, 43, 51, 51, 59, 59, 71,
|
|
71, 77, 77, 83, 83, 102, 102, 114,
|
|
114, 120, 120, 127, 127, 140, 140, 152,
|
|
152, 178, 178, 188, 188, 202, 202, 211,
|
|
211, 217, 217, 226, 226, 229, 229, 239,
|
|
239, 251, 251, 256, 256, 262, 262, 270,
|
|
270, 277, 277, 288, 288, 302, 302, 305,
|
|
305, 309, 309, 311, 311, 320, 320, 324,
|
|
324, 327, 327, 355, 355, 360, 360, 371,
|
|
371, 373, 373, 384, 384, 387, 387, 395,
|
|
395, 406, 406, 421, 421, 423, 423, 439,
|
|
439, 443, 443, 446, 446, 457, 457, 465,
|
|
465, 470, 470, 478, 478, 494, 494, 504,
|
|
504, 516, 516, 522, 522, 541, 541, 543,
|
|
543, 550, 550, 561, 561, 566, 566, 571,
|
|
571, 583, 583, 591, 591, 603]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
alignment[0],
|
|
"GGCAGGTGCATAGCTTGAGCCTAGGAGTTCAAGTCCAGCCCTGACAATGTAGAGAGACCCCGTCTCTTCAAAAAATACAAAAAATAGCCAGGCATGGTGACCTACAATGGAAGCCCTAGCTACGTAGGAGGCGGAAATGGGAGGATCACCTCAGCCCAGGGAGGCTGATGTTGCAGTGAGCCATGATCATGCCTCTACACTCCACCCTGGGCAACAGAGTAAGATGCTGTCTAAAATATATATATATGCATATCTGTGTGTATATATATATATATATATGTGTGTGTGTGTGTGTGTATATACATATGTGTGTGTATATACATATATGTGTATATATATATGTGTGTATATATACATATACATATTCAGCATCACCTTATATTCTTTGAATATATCTACATCAATACATACTTTTGAGTGCTTGAAATTTTTTATATTTTACTCTAGAAGAACTGTAAGAAATTATAAAGTAGAAAACTTGTGGTAGGTCAAACATAGTAAGAAGAAATAATCACTTTTTAAAGGTCTGTGCTAGGTACTATGATCTGTTCCCTATATATACATAATATGGACTTTTATAAACTAATGTTCAAATTCCCCTGTAGTATAACTTCTTGTTGTTGTTTATTTTTTTTTTTTTGTATTTTTCATTTTAGATATGGGGTTTCACTCTGTTGACCAGGCTGATCTCGAACCACTGGTCTCAAGCGATCCTCCCATCTTGGACTCCCAAAGTGCTAGGATTACAGGCACGAGGCACCTTGACTGGCCACCATGTACTATAGCTGTTAAAACAAGTTTGTTTCACTGATAACTGGAGTACTTTTCAAATATAATTAATAATTCATGGAAATAATGATAGCTTTAAAAGTATTGGCACTTTTAAAAACTGAGTTTGTAAACTTCATATAACATAAAATTAACCATTAAAATGTATTAATTTCAATGGCATTTAGGACACTCACAATGCAGTGCAAGCATTACCACTATGTAGTGGCAAATCATTTTCACTACCACAAAAGAAAATCCTGGACCCATTAGTTAGTCATTCCCCATTCCACTCTCTGCCCAGCCCCTGGCAAACACTCATCTGATTTCCCTCACTACTGATCATCACAACAAGTGGCCTTGTTCATCTTGTTGTGGGAACCAGGAGACCAGAGAGACCAATGGGTGGAACAGGAGGATTTTACTAGGTGGTCACCGACTCAGCAGATTAACATCCAAAGGCTGAGCCCCAAACCAAGAGAGGGCTTGACTTTTATACATATATCTGAAAAGGGCCCAAAACCTGTAAGGCCGGTAAGCAAGCTTACAGCAGAACAAAGGCAGTTTATCAAACAGTGACAGGTTTTACAGTTCAGGCATGTCTTGTGACCTTTGCCATAACTGCACAGCTGGAAAACAGGAACTTACAAAATCCTTACAAGCTTGCAGAAACAGTTACAAA",
|
|
)
|
|
self.assertEqual(
|
|
alignment[1],
|
|
"G--------------------------TTCAAGGCCATCCGGGAT----TAAAG-------GTGTGGT-----------AGAACTCTTCTG--ATGGAGAC----AATATAAG--------------------GACATTGGAAGA------------AGGGAG-----TCTTGC------CCTTGCTCCTTCGCCTACT-------------------------TGCTGTGTAAGA-----------------CTGAGT---------------------------------------------------------------------------------------------------------------AACTCCT-AGACCCTTGGACT-----------------TCCATTTCAGCC--------------------ACTACTGAACCATTGTTGGGAATTGG---GCTGCAGACT----GTAAGTCATCAATA------------AATTCCTTT-------------------ACTATA---------------------------------TAGAGACTA----------TCC--------ATAAATTCTG-------------------------------TGACTCTAGAGA---------ACCCT---GACAAT-----------ACAACTGG----------------------------------------------------GAAGCAC-------GGACATCCTCT-----------------------------------------------TTGAGATATAATTA-----TCA-------ACTG---GC-----AAGTGTTTG----TTTA----------TTG-ATATTTTACTTAAGACAAAGTTAAACCT------------ACTCC--TGTCCTCTGGG-----CA--------TGGTAGCATGG--ACT-TATTCTGG-----------AACTACCAGAG--GAAAAGACAGAAGCC------TA--------------------CTGGAAAGGCCCAGGC-------CATC---------------CTG------------------------CCTCTTGTAGT---------------------------------------------TCACTAGG-------------ACCAG-------------GGCTCAGC-----------ATAGTCCTTGGCTTCT------AAATCTGCTA------CCATATCTTTAT--CATGTA---AAACTGACACAAAATTAAA--CA---TATCAAA----------------ATTTTATGAAA--------ACCAT-----TAAGT----ATCTGGAAAAGA-------AAAAAATC----------------AACAGTTATAAA",
|
|
)
|
|
self.assertEqual(
|
|
alignment.column_annotations["emboss_consensus"],
|
|
"| ||||||.|||.||..||. ||.|| ||.|..| |.||.|...|.| ||||.||| |||..||| ||.||.|||.|| |||||| |.|||| ||.||.||.|.|..||||. ||||||.|||.| |||.|| |.|.||| |.|..|||.||.| |.|.|||.||.. |||...|||..|.|||..|.||||.. |..|.|.||| |||.||||...||| |||..|||| |||||. ||.|.|||| ||| ||||.||||. |.|.|.||||.| ||.|| |||.|. ||.||||| ||.|||| ||.||.|.|.| ||.|.||||||||| ||| |.|| || ||||.||.| |||| ||| |.|.||.|..|||.|.|||.||||.|.| |.|.| ||.|.|.|.|| || ||..|||||.. ||| |.|..||| .|||||||.|. |||||..|.|.|.|| || |||...||.|||.||| |||| ||| |.||||||.|| |.|||||| |.||| ||||.||| |.||..||||.|||.| |.|||||..| |.|.|.||.||. |..||| ||.||.|||..|.|..||| || ||||||| |.||.|.|.|. |||.| |||.| |.||||||||.| |.|||||| ||||||||.|||",
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
hg38_chrX 0 GGCAGGTGCATAGCTTGAGCCTAGGAGTTCAAGTCCAGCCCTGACAATGTAGAGAGACCC
|
|
0 |--------------------------||||||.|||.||..||.----||.||------
|
|
mm10_chrX 0 G--------------------------TTCAAGGCCATCCGGGAT----TAAAG------
|
|
|
|
hg38_chrX 60 CGTCTCTTCAAAAAATACAAAAAATAGCCAGGCATGGTGACCTACAATGGAAGCCCTAGC
|
|
60 -||.|..|-----------|.||.|...|.|--||||.|||----|||..|||-------
|
|
mm10_chrX 24 -GTGTGGT-----------AGAACTCTTCTG--ATGGAGAC----AATATAAG-------
|
|
|
|
hg38_chrX 120 TACGTAGGAGGCGGAAATGGGAGGATCACCTCAGCCCAGGGAGGCTGATGTTGCAGTGAG
|
|
120 -------------||.||.|||.||------------||||||-----|.||||------
|
|
mm10_chrX 59 -------------GACATTGGAAGA------------AGGGAG-----TCTTGC------
|
|
|
|
hg38_chrX 180 CCATGATCATGCCTCTACACTCCACCCTGGGCAACAGAGTAAGATGCTGTCTAAAATATA
|
|
180 ||.||.||.|.|..||||.-------------------------||||||.|||.|----
|
|
mm10_chrX 83 CCTTGCTCCTTCGCCTACT-------------------------TGCTGTGTAAGA----
|
|
|
|
hg38_chrX 240 TATATATGCATATCTGTGTGTATATATATATATATATATGTGTGTGTGTGTGTGTGTATA
|
|
240 -------------|||.||-----------------------------------------
|
|
mm10_chrX 114 -------------CTGAGT-----------------------------------------
|
|
|
|
hg38_chrX 300 TACATATGTGTGTGTATATACATATATGTGTATATATATATGTGTGTATATATACATATA
|
|
300 ------------------------------------------------------------
|
|
mm10_chrX 120 ------------------------------------------------------------
|
|
|
|
hg38_chrX 360 CATATTCAGCATCACCTTATATTCTTTGAATATATCTACATCAATACATACTTTTGAGTG
|
|
360 ----------|.|.|||-|.|..|||.||.|-----------------|.|.|||.||..
|
|
mm10_chrX 120 ----------AACTCCT-AGACCCTTGGACT-----------------TCCATTTCAGCC
|
|
|
|
hg38_chrX 420 CTTGAAATTTTTTATATTTTACTCTAGAAGAACTGTAAGAAATTATAAAGTAGAAAACTT
|
|
420 --------------------|||...|||..|.|||..|.||||..---|..|.|.|||-
|
|
mm10_chrX 152 --------------------ACTACTGAACCATTGTTGGGAATTGG---GCTGCAGACT-
|
|
|
|
hg38_chrX 480 GTGGTAGGTCAAACATAGTAAGAAGAAATAATCACTTTTTAAAGGTCTGTGCTAGGTACT
|
|
480 ---|||.||||...|||------------|||..||||-------------------|||
|
|
mm10_chrX 188 ---GTAAGTCATCAATA------------AATTCCTTT-------------------ACT
|
|
|
|
hg38_chrX 540 ATGATCTGTTCCCTATATATACATAATATGGACTTTTATAAACTAATGTTCAAATTCCCC
|
|
540 ||.---------------------------------||.|.||||----------|||--
|
|
mm10_chrX 214 ATA---------------------------------TAGAGACTA----------TCC--
|
|
|
|
hg38_chrX 600 TGTAGTATAACTTCTTGTTGTTGTTTATTTTTTTTTTTTTGTATTTTTCATTTTAGATAT
|
|
600 ------||||.||||.-------------------------------|.|.|.||||.|-
|
|
mm10_chrX 229 ------ATAAATTCTG-------------------------------TGACTCTAGAGA-
|
|
|
|
hg38_chrX 660 GGGGTTTCACTCTGTTGACCAGGCTGATCTCGAACCACTGGTCTCAAGCGATCCTCCCAT
|
|
660 --------||.||---|||.|.-----------||.|||||-------------------
|
|
mm10_chrX 251 --------ACCCT---GACAAT-----------ACAACTGG-------------------
|
|
|
|
hg38_chrX 720 CTTGGACTCCCAAAGTGCTAGGATTACAGGCACGAGGCACCTTGACTGGCCACCATGTAC
|
|
720 ---------------------------------||.||||-------||.||.|.|.|--
|
|
mm10_chrX 270 ---------------------------------GAAGCAC-------GGACATCCTCT--
|
|
|
|
hg38_chrX 780 TATAGCTGTTAAAACAAGTTTGTTTCACTGATAACTGGAGTACTTTTCAAATATAATTAA
|
|
780 ---------------------------------------------||.|.|||||||||-
|
|
mm10_chrX 288 ---------------------------------------------TTGAGATATAATTA-
|
|
|
|
hg38_chrX 840 TAATTCATGGAAATAATGATAGCTTTAAAAGTATTGGCACTTTTAAAAACTGAGTTTGTA
|
|
840 ----|||-------|.||---||-----||||.||.|----||||----------|||-|
|
|
mm10_chrX 302 ----TCA-------ACTG---GC-----AAGTGTTTG----TTTA----------TTG-A
|
|
|
|
hg38_chrX 900 AACTTCATATAACATAAAATTAACCATTAAAATGTATTAATTTCAATGGCATTTAGGACA
|
|
900 .|.||.|..|||.|.|||.||||.|.|------------|.|.|--||.|.|.|.||---
|
|
mm10_chrX 328 TATTTTACTTAAGACAAAGTTAAACCT------------ACTCC--TGTCCTCTGGG---
|
|
|
|
hg38_chrX 960 CTCACAATGCAGTGCAAGCATTACCACTATGTAGTGGCAAATCATTTTCACTACCACAAA
|
|
960 --||--------||..|||||..--|||-|.|..|||-----------.|||||||.|.-
|
|
mm10_chrX 371 --CA--------TGGTAGCATGG--ACT-TATTCTGG-----------AACTACCAGAG-
|
|
|
|
hg38_chrX 1020 AGAAAATCCTGGACCCATTAGTTAGTCATTCCCCATTCCACTCTCTGCCCAGCCCCTGGC
|
|
1020 -|||||..|.|.|.||------||--------------------|||...||.|||.|||
|
|
mm10_chrX 406 -GAAAAGACAGAAGCC------TA--------------------CTGGAAAGGCCCAGGC
|
|
|
|
hg38_chrX 1080 AAACACTCATCTGATTTCCCTCACTACTGATCATCACAACAAGTGGCCTTGTTCATCTTG
|
|
1080 -------||||---------------|||------------------------|.|||||
|
|
mm10_chrX 439 -------CATC---------------CTG------------------------CCTCTTG
|
|
|
|
hg38_chrX 1140 TTGTGGGAACCAGGAGACCAGAGAGACCAATGGGTGGAACAGGAGGATTTTACTAGGTGG
|
|
1140 |.||---------------------------------------------|.||||||---
|
|
mm10_chrX 453 TAGT---------------------------------------------TCACTAGG---
|
|
|
|
hg38_chrX 1200 TCACCGACTCAGCAGATTAACATCCAAAGGCTGAGCCCCAAACCAAGAGAGGGCTTGACT
|
|
1200 ----------|.|||-------------||||.|||-----------|.||..||||.||
|
|
mm10_chrX 465 ----------ACCAG-------------GGCTCAGC-----------ATAGTCCTTGGCT
|
|
|
|
hg38_chrX 1260 TTTATACATATATCTGAAAAGGGCCCAAAACCTGTAAGGCCGGTAAGCAAGCTTACAGCA
|
|
1260 |.|------|.|||||..|------|.|.|.||.||.--|..|||---||.||.|||..|
|
|
mm10_chrX 491 TCT------AAATCTGCTA------CCATATCTTTAT--CATGTA---AAACTGACACAA
|
|
|
|
hg38_chrX 1320 GAACAAAGGCAGTTTATCAAACAGTGACAGGTTTTACAGTTCAGGCATGTCTTGTGACCT
|
|
1320 .|..|||--||---|||||||----------------|.||.|.|.|.--------|||.
|
|
mm10_chrX 534 AATTAAA--CA---TATCAAA----------------ATTTTATGAAA--------ACCA
|
|
|
|
hg38_chrX 1380 TTGCCATAACTGCACAGCTGGAAAACAGGAACTTACAAAATCCTTACAAGCTTGCAGAAA
|
|
1380 |-----|||.|----|.||||||||.|-------|.||||||----------------||
|
|
mm10_chrX 565 T-----TAAGT----ATCTGGAAAAGA-------AAAAAATC----------------AA
|
|
|
|
hg38_chrX 1440 CAGTTACAAA 1450
|
|
1440 ||||||.||| 1450
|
|
mm10_chrX 593 CAGTTATAAA 603
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (603 aligned letters; 441 identities; 162 mismatches; 847 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 603:
|
|
identities = 441,
|
|
mismatches = 162.
|
|
gaps = 847:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 847:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 847:
|
|
open_internal_deletions = 66,
|
|
extend_internal_deletions = 781;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 847)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 847)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 847)
|
|
self.assertEqual(counts.gaps, 847)
|
|
self.assertEqual(counts.aligned, 603)
|
|
self.assertEqual(counts.identities, 441)
|
|
self.assertEqual(counts.mismatches, 162)
|
|
with self.assertRaises(StopIteration):
|
|
next(alignments)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
runner = unittest.TextTestRunner(verbosity=2)
|
|
unittest.main(testRunner=runner)
|