mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
1419 lines
59 KiB
Python
1419 lines
59 KiB
Python
# Copyright 2008 by Peter Cock. All rights reserved.
|
|
# This code is part of the Biopython distribution and governed by its
|
|
# license. Please see the LICENSE file that should have been included
|
|
# as part of this package.
|
|
"""Tests for AlignIO module."""
|
|
import string
|
|
import unittest
|
|
import warnings
|
|
from io import StringIO
|
|
|
|
from Bio import AlignIO
|
|
from Bio import BiopythonDeprecationWarning
|
|
from Bio import SeqIO
|
|
from Bio.Align import AlignInfo
|
|
from Bio.Align import MultipleSeqAlignment
|
|
from Bio.Data import IUPACData
|
|
from Bio.motifs import Motif
|
|
from Bio.Seq import Seq
|
|
from Bio.SeqRecord import SeqRecord
|
|
|
|
test_write_read_alignment_formats = sorted(AlignIO._FormatToWriter)
|
|
test_write_read_align_with_seq_count = test_write_read_alignment_formats + [
|
|
"fasta",
|
|
"tab",
|
|
]
|
|
|
|
|
|
class TestAlignIO_exceptions(unittest.TestCase):
|
|
t_formats = list(AlignIO._FormatToWriter) + list(SeqIO._FormatToWriter)
|
|
|
|
def test_phylip_reject_duplicate(self):
|
|
"""Check that writing duplicated IDs after truncation fails for PHYLIP."""
|
|
handle = StringIO()
|
|
sequences = [
|
|
SeqRecord(Seq("AAAA"), id="longsequencename1"),
|
|
SeqRecord(Seq("AAAA"), id="longsequencename2"),
|
|
SeqRecord(Seq("AAAA"), id="other_sequence"),
|
|
]
|
|
alignment = MultipleSeqAlignment(sequences)
|
|
with self.assertRaises(ValueError) as cm:
|
|
AlignIO.write(alignment, handle, "phylip")
|
|
self.assertEqual(
|
|
"Repeated name 'longsequen' (originally 'longsequencename2'), possibly due to truncation",
|
|
str(cm.exception),
|
|
)
|
|
|
|
def test_parsing_empty_files(self):
|
|
"""Check that parsing an empty file returns an empty list."""
|
|
for t_format in AlignIO._FormatToIterator:
|
|
handle = StringIO()
|
|
alignments = list(AlignIO.parse(handle, t_format))
|
|
self.assertEqual(alignments, [])
|
|
|
|
def test_writing_empty_files(self):
|
|
"""Check that writers can cope with no alignments."""
|
|
for t_format in self.t_formats:
|
|
handle = StringIO()
|
|
number = AlignIO.write([], handle, t_format)
|
|
self.assertEqual(number, 0)
|
|
|
|
def test_writing_not_alignments(self):
|
|
"""Check that writers reject records that are not alignments."""
|
|
path = "Clustalw/opuntia.aln"
|
|
records = list(AlignIO.read(path, "clustal"))
|
|
for t_format in self.t_formats:
|
|
handle = StringIO()
|
|
self.assertRaises(Exception, AlignIO.write, [records], handle, t_format)
|
|
|
|
|
|
class TestAlignIO_reading(unittest.TestCase):
|
|
def simple_alignment_comparison(self, alignments, alignments2, fmt):
|
|
self.assertEqual(len(alignments), len(alignments2))
|
|
for a1, a2 in zip(alignments, alignments2):
|
|
self.assertEqual(a1.get_alignment_length(), a2.get_alignment_length())
|
|
self.assertEqual(len(a1), len(a2))
|
|
for r1, r2 in zip(a1, a2):
|
|
# Check the bare minimum (ID and sequence) as
|
|
# many formats can't store more than that.
|
|
# Check the sequence:
|
|
self.assertEqual(r1.seq, r2.seq)
|
|
# Beware of different quirks and limitations in the
|
|
# valid character sets and the identifier lengths!
|
|
if fmt in ["phylip", "phylip-sequential"]:
|
|
id1 = r1.id.replace("[", "").replace("]", "")[:10]
|
|
elif fmt == "phylip-relaxed":
|
|
id1 = r1.id.replace(" ", "").replace(":", "|")
|
|
elif fmt == "clustal":
|
|
id1 = r1.id.replace(" ", "_")[:30]
|
|
elif fmt in ["stockholm", "maf"]:
|
|
id1 = r1.id.replace(" ", "_")
|
|
elif fmt == "fasta":
|
|
id1 = r1.id.split()[0]
|
|
else:
|
|
id1 = r1.id
|
|
id2 = r2.id
|
|
self.assertEqual(id1, id2)
|
|
|
|
def check_reverse_write_read(self, alignments, indent=" "):
|
|
alignments.reverse()
|
|
for fmt in test_write_read_align_with_seq_count:
|
|
records_per_alignment = len(alignments[0])
|
|
for a in alignments:
|
|
if records_per_alignment != len(a):
|
|
records_per_alignment = None
|
|
# Can we expect this format to work?
|
|
if (
|
|
not records_per_alignment
|
|
and fmt not in test_write_read_alignment_formats
|
|
):
|
|
continue
|
|
|
|
# Going to write to a handle...
|
|
handle = StringIO()
|
|
|
|
if fmt == "nexus":
|
|
with self.assertRaises(ValueError) as cm:
|
|
c = AlignIO.write(alignments, handle=handle, format=fmt)
|
|
self.assertEqual(
|
|
"We can only write one Alignment to a Nexus file.",
|
|
str(cm.exception),
|
|
)
|
|
continue
|
|
c = AlignIO.write(alignments, handle=handle, format=fmt)
|
|
self.assertEqual(c, len(alignments))
|
|
|
|
# First, try with the seq_count
|
|
if records_per_alignment:
|
|
handle.flush()
|
|
handle.seek(0)
|
|
alignments2 = list(
|
|
AlignIO.parse(
|
|
handle=handle, format=fmt, seq_count=records_per_alignment
|
|
)
|
|
)
|
|
self.simple_alignment_comparison(alignments, alignments2, fmt)
|
|
|
|
if fmt in test_write_read_alignment_formats:
|
|
# Don't need the seq_count
|
|
handle.flush()
|
|
handle.seek(0)
|
|
alignments2 = list(AlignIO.parse(handle=handle, format=fmt))
|
|
self.simple_alignment_comparison(alignments, alignments2, fmt)
|
|
|
|
# Try writing just one Alignment (not a list)
|
|
handle = StringIO()
|
|
AlignIO.write(alignments[0:1], handle, fmt)
|
|
self.assertEqual(handle.getvalue(), format(alignments[0], fmt))
|
|
|
|
def check_iterator_for_loop_handle(self, path, fmt, length, m=None):
|
|
# Try using the iterator with a for loop and a handle
|
|
with open(path) as handle:
|
|
alignments = list(AlignIO.parse(handle, format=fmt))
|
|
self.assertEqual(len(alignments), length)
|
|
if m is not None:
|
|
for alignment in alignments:
|
|
self.assertEqual(len(alignment), m)
|
|
return alignments
|
|
|
|
def check_iterator_for_loop_filename(self, path, fmt, length):
|
|
# Try using the iterator with a for loop and a filename not handle
|
|
counter = 0
|
|
for record in AlignIO.parse(path, format=fmt):
|
|
counter += 1
|
|
self.assertEqual(counter, length)
|
|
|
|
def check_iterator_next(self, path, fmt, length):
|
|
# Try using the iterator with the next() method
|
|
counter = 0
|
|
alignments = AlignIO.parse(path, format=fmt)
|
|
while True:
|
|
try:
|
|
alignment = next(alignments)
|
|
except StopIteration:
|
|
break
|
|
self.assertIsNotNone(alignment)
|
|
counter += 1
|
|
self.assertEqual(counter, length)
|
|
|
|
def check_iterator_next_and_list(self, path, fmt, length):
|
|
# Try a mixture of next() and list
|
|
counter = 0
|
|
alignments = AlignIO.parse(path, format=fmt)
|
|
alignment = next(alignments)
|
|
counter = 1
|
|
counter += len(list(alignments))
|
|
self.assertEqual(counter, length)
|
|
|
|
def check_iterator_next_for_loop(self, path, fmt, length):
|
|
# Try a mixture of next() and for loop
|
|
alignments = AlignIO.parse(path, format=fmt)
|
|
alignment = next(alignments)
|
|
counter = 1
|
|
for alignment in alignments:
|
|
counter += 1
|
|
self.assertEqual(counter, length)
|
|
|
|
def check_write_three_times_and_read(self, path, fmt, m):
|
|
with open(path) as handle:
|
|
data = handle.read()
|
|
handle = StringIO()
|
|
handle.write(data + "\n\n" + data + "\n\n" + data)
|
|
handle.seek(0)
|
|
self.assertEqual(
|
|
len(list(AlignIO.parse(handle=handle, format=fmt, seq_count=m))), 3
|
|
)
|
|
handle.close()
|
|
|
|
def check_read(self, path, fmt, m, k):
|
|
# Check Bio.AlignIO.read(...)
|
|
with open(path) as handle:
|
|
alignment = AlignIO.read(handle, format=fmt)
|
|
self.assertIsInstance(alignment, MultipleSeqAlignment)
|
|
self.assertEqual(len(alignment), m)
|
|
self.assertEqual(alignment.get_alignment_length(), k)
|
|
return alignment
|
|
|
|
def check_read_fails(self, path, fmt):
|
|
with open(path) as handle:
|
|
self.assertRaises(ValueError, AlignIO.read, handle, format=fmt)
|
|
|
|
def check_alignment_rows(self, alignment, sequences, column_annotations=None):
|
|
max_len = 40
|
|
items = []
|
|
for record in alignment:
|
|
name = record.id
|
|
sequence = record.seq
|
|
if len(sequence) > max_len:
|
|
sequence = sequence[: max_len - 6] + "..." + sequence[-3:]
|
|
item = (name, sequence)
|
|
items.append(item)
|
|
self.assertEqual(sequences, sorted(items))
|
|
if column_annotations is None:
|
|
self.assertEqual(alignment.column_annotations, {})
|
|
else:
|
|
self.assertEqual(alignment.column_annotations, column_annotations)
|
|
|
|
def check_alignment_columns(self, alignment, columns):
|
|
alignment_len = alignment.get_alignment_length()
|
|
# Compare each sequence column
|
|
for index in range(min(5, alignment_len)):
|
|
self.assertEqual(alignment[:, index], columns[index])
|
|
if alignment_len > 5:
|
|
self.assertEqual(alignment[:, -1], columns[-1])
|
|
|
|
def test_reading_alignments_clustal1(self):
|
|
path = "Clustalw/clustalw.aln"
|
|
self.check_iterator_for_loop_handle(path, "clustal", 1, 2)
|
|
self.check_iterator_for_loop_filename(path, "clustal", 1)
|
|
self.check_iterator_next(path, "clustal", 1)
|
|
self.check_iterator_next_and_list(path, "clustal", 1)
|
|
self.check_iterator_next_for_loop(path, "clustal", 1)
|
|
self.check_write_three_times_and_read(path, "clustal", 2)
|
|
alignment = self.check_read(path, "clustal", 2, 601)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
(
|
|
"gi|4959044|gb|AAD34209.1|AF069",
|
|
"MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQF...SVV",
|
|
),
|
|
(
|
|
"gi|671626|emb|CAA85685.1|",
|
|
"---------MSPQTETKASVGFKAGVKEYKLTYY...---",
|
|
),
|
|
],
|
|
{
|
|
"clustal_consensus": " * *: :: :. :* : :. : . :* :: .: ** **:... *.*** .. .:* * *: .* :* : :* .* *::. . .:: :*..* :* .* .. . : . : *. .:: : . .* . : *.: ..:: * . :: : .*. :. :. . . .* **.*.. :.. *.. . . ::* :.: .*: : * :: *** . * :. . . : *: .:: ::: .. . : : :: * * : .. :.* . ::. :: * : : * * :.. * .. * :** . .:. .. :*. ..: :. . .:* * : : * . ..*:. .** *.*... : :: :* .* ::* : :. :. : "
|
|
},
|
|
)
|
|
|
|
def test_reading_alignments_clustal2(self):
|
|
path = "Clustalw/opuntia.aln"
|
|
self.check_iterator_for_loop_handle(path, "clustal", 1, 7)
|
|
self.check_iterator_for_loop_filename(path, "clustal", 1)
|
|
self.check_iterator_next(path, "clustal", 1)
|
|
self.check_iterator_next_and_list(path, "clustal", 1)
|
|
self.check_iterator_next_for_loop(path, "clustal", 1)
|
|
self.check_write_three_times_and_read(path, "clustal", 7)
|
|
alignment = self.check_read(path, "clustal", 7, 156)
|
|
self.check_alignment_columns(
|
|
alignment,
|
|
["TTTTTTT", "AAAAAAA", "TTTTTTT", "AAAAAAA", "CCCCCCC", "AAAAAAA"],
|
|
)
|
|
|
|
def test_reading_alignments_clustal3(self):
|
|
path = "Clustalw/hedgehog.aln"
|
|
self.check_iterator_for_loop_handle(path, "clustal", 1, 5)
|
|
self.check_iterator_for_loop_filename(path, "clustal", 1)
|
|
self.check_iterator_next(path, "clustal", 1)
|
|
self.check_iterator_next_and_list(path, "clustal", 1)
|
|
self.check_iterator_next_for_loop(path, "clustal", 1)
|
|
self.check_write_three_times_and_read(path, "clustal", 5)
|
|
alignment = self.check_read(path, "clustal", 5, 447)
|
|
self.check_alignment_columns(
|
|
alignment, ["M----", "F----", "N----", "L----", "V----", "---SS"]
|
|
)
|
|
|
|
def test_reading_alignments_clustal4(self):
|
|
path = "Clustalw/odd_consensus.aln"
|
|
self.check_iterator_for_loop_handle(path, "clustal", 1, 2)
|
|
self.check_iterator_for_loop_filename(path, "clustal", 1)
|
|
self.check_iterator_next(path, "clustal", 1)
|
|
self.check_iterator_next_and_list(path, "clustal", 1)
|
|
self.check_iterator_next_for_loop(path, "clustal", 1)
|
|
self.check_write_three_times_and_read(path, "clustal", 2)
|
|
alignment = self.check_read(path, "clustal", 2, 687)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
("AT3G20900.1-CDS", "----------------------------------...TAG"),
|
|
("AT3G20900.1-SEQ", "ATGAACAAAGTAGCGAGGAAGAACAAAACATCAG...TAG"),
|
|
],
|
|
{
|
|
"clustal_consensus": " * * *** ***** * * ** *******************************************************************************************************************************************************************************"
|
|
},
|
|
)
|
|
|
|
def test_reading_alignments_clustal5(self):
|
|
path = "Clustalw/protein.aln"
|
|
self.check_iterator_for_loop_handle(path, "clustal", 1, 20)
|
|
self.check_iterator_for_loop_filename(path, "clustal", 1)
|
|
self.check_iterator_next(path, "clustal", 1)
|
|
self.check_iterator_next_and_list(path, "clustal", 1)
|
|
self.check_iterator_next_for_loop(path, "clustal", 1)
|
|
self.check_write_three_times_and_read(path, "clustal", 20)
|
|
alignment = self.check_read(path, "clustal", 20, 411)
|
|
self.check_alignment_columns(
|
|
alignment,
|
|
[
|
|
"-M------------------",
|
|
"-T------------------",
|
|
"-V------------------",
|
|
"-L-----------------M",
|
|
"-E---------------MMS",
|
|
"-------------------T",
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_clustal6(self):
|
|
path = "Clustalw/promals3d.aln"
|
|
self.check_iterator_for_loop_handle(path, "clustal", 1, 20)
|
|
self.check_iterator_for_loop_filename(path, "clustal", 1)
|
|
self.check_iterator_next(path, "clustal", 1)
|
|
self.check_iterator_next_and_list(path, "clustal", 1)
|
|
self.check_iterator_next_for_loop(path, "clustal", 1)
|
|
self.check_write_three_times_and_read(path, "clustal", 20)
|
|
alignment = self.check_read(path, "clustal", 20, 414)
|
|
self.check_alignment_columns(
|
|
alignment,
|
|
[
|
|
"MMMMMMMMMMMMMMMM-M--",
|
|
"-----------------T--",
|
|
"-----------------V--",
|
|
"-----------------L--",
|
|
"-S---------------E--",
|
|
"-T------------------",
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_fasta(self):
|
|
path = "GFF/multi.fna" # Trivial nucleotide alignment
|
|
self.check_iterator_for_loop_handle(path, "fasta", 1, 3)
|
|
self.check_iterator_for_loop_filename(path, "fasta", 1)
|
|
self.check_iterator_next(path, "fasta", 1)
|
|
self.check_iterator_next_and_list(path, "fasta", 1)
|
|
self.check_iterator_next_for_loop(path, "fasta", 1)
|
|
self.check_write_three_times_and_read(path, "fasta", 3)
|
|
alignment = self.check_read(path, "fasta", 3, 8)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[("test1", "ACGTCGCG"), ("test2", "GGGGCCCC"), ("test3", "AAACACAC")],
|
|
)
|
|
|
|
def test_reading_alignments_nexus1(self):
|
|
path = "Nexus/test_Nexus_input.nex"
|
|
self.check_iterator_for_loop_handle(path, "nexus", 1, 9)
|
|
self.check_iterator_for_loop_filename(path, "nexus", 1)
|
|
self.check_iterator_next(path, "nexus", 1)
|
|
self.check_iterator_next_and_list(path, "nexus", 1)
|
|
self.check_iterator_next_for_loop(path, "nexus", 1)
|
|
alignment = self.check_read(path, "nexus", 9, 48)
|
|
self.check_alignment_columns(
|
|
alignment,
|
|
[
|
|
"AAAAAAAAc",
|
|
"-----c?tc",
|
|
"CCCCCCCCc",
|
|
"--c-?a-tc",
|
|
"GGGGGGGGc",
|
|
"tt--?ag?c",
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_nexus2(self):
|
|
path = "Nexus/codonposset.nex"
|
|
self.check_iterator_for_loop_handle(path, "nexus", 1, 2)
|
|
self.check_iterator_for_loop_filename(path, "nexus", 1)
|
|
self.check_iterator_next(path, "nexus", 1)
|
|
self.check_iterator_next_and_list(path, "nexus", 1)
|
|
self.check_iterator_next_for_loop(path, "nexus", 1)
|
|
alignment = self.check_read(path, "nexus", 2, 22)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
("Aegotheles", "AAAAAGGCATTGTGGTGGGAAT"),
|
|
("Aerodramus", "?????????TTGTGGTGGGAAT"),
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_msf1(self):
|
|
path = "msf/DOA_prot.msf"
|
|
with self.assertRaisesRegex(
|
|
ValueError,
|
|
"GCG MSF header said alignment length 62, "
|
|
"but 11 of 12 sequences said Len: 250",
|
|
):
|
|
AlignIO.read(path, "msf")
|
|
|
|
def test_reading_alignments_msf2(self):
|
|
path = "msf/W_prot.msf"
|
|
with warnings.catch_warnings(record=True) as w:
|
|
self.check_iterator_for_loop_handle(path, "msf", 1, 11)
|
|
self.check_iterator_for_loop_filename(path, "msf", 1)
|
|
self.check_iterator_next(path, "msf", 1)
|
|
self.check_iterator_next_and_list(path, "msf", 1)
|
|
self.check_iterator_next_for_loop(path, "msf", 1)
|
|
alignment = self.check_read(path, "msf", 11, 99)
|
|
warning_msgs = {str(_.message) for _ in w}
|
|
self.assertIn(
|
|
"One of more alignment sequences were truncated and have been gap padded",
|
|
warning_msgs,
|
|
)
|
|
self.check_alignment_columns(
|
|
alignment,
|
|
[
|
|
"GGGGGGGGGGG",
|
|
"LLLLLLLLLLL",
|
|
"TTTTTTTTTTT",
|
|
"PPPPPPPPPPP",
|
|
"FFFFFFSSSSS",
|
|
# ...
|
|
"LLLLLL----L",
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_stockholm1(self):
|
|
path = "Stockholm/simple.sth"
|
|
self.check_iterator_for_loop_handle(path, "stockholm", 1, 2)
|
|
self.check_iterator_for_loop_filename(path, "stockholm", 1)
|
|
self.check_iterator_next(path, "stockholm", 1)
|
|
self.check_iterator_next_and_list(path, "stockholm", 1)
|
|
self.check_iterator_next_for_loop(path, "stockholm", 1)
|
|
self.check_write_three_times_and_read(path, "stockholm", 2)
|
|
alignment = self.check_read(path, "stockholm", 2, 104)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
("AE007476.1", "AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGU...GAU"),
|
|
("AP001509.1", "UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-U...UGU"),
|
|
],
|
|
{
|
|
"secondary_structure": ".................<<<<<<<<...<<<<<<<........>>>>>>>........<<<<<<<.......>>>>>>>..>>>>>>>>..............."
|
|
},
|
|
)
|
|
|
|
def test_reading_alignments_stockholm2(self):
|
|
path = "Stockholm/funny.sth"
|
|
self.check_iterator_for_loop_handle(path, "stockholm", 1, 6)
|
|
self.check_iterator_for_loop_filename(path, "stockholm", 1)
|
|
self.check_iterator_next(path, "stockholm", 1)
|
|
self.check_iterator_next_and_list(path, "stockholm", 1)
|
|
self.check_iterator_next_for_loop(path, "stockholm", 1)
|
|
self.check_write_three_times_and_read(path, "stockholm", 6)
|
|
alignment = self.check_read(path, "stockholm", 6, 43)
|
|
self.check_alignment_columns(
|
|
alignment, ["MMMEEE", "TQIVVV", "CHEMMM", "RVALLL", "ASDTTT", "SYSEEE"]
|
|
)
|
|
|
|
def test_reading_alignments_phylip1(self):
|
|
path = "Phylip/reference_dna.phy"
|
|
self.check_iterator_for_loop_handle(path, "phylip", 1, 6)
|
|
self.check_iterator_for_loop_filename(path, "phylip", 1)
|
|
self.check_iterator_next(path, "phylip", 1)
|
|
self.check_iterator_next_and_list(path, "phylip", 1)
|
|
self.check_iterator_next_for_loop(path, "phylip", 1)
|
|
self.check_write_three_times_and_read(path, "phylip", 6)
|
|
alignment = self.check_read(path, "phylip", 6, 13)
|
|
self.check_alignment_columns(
|
|
alignment, ["CCTTCG", "GGAAAG", "ATAAAC", "TTTTAA", "GAGGAG", "CTTTTC"]
|
|
)
|
|
|
|
def test_reading_alignments_phylip2(self):
|
|
path = "Phylip/reference_dna2.phy"
|
|
self.check_iterator_for_loop_handle(path, "phylip", 1, 6)
|
|
self.check_iterator_for_loop_filename(path, "phylip", 1)
|
|
self.check_iterator_next(path, "phylip", 1)
|
|
self.check_iterator_next_and_list(path, "phylip", 1)
|
|
self.check_iterator_next_for_loop(path, "phylip", 1)
|
|
self.check_write_three_times_and_read(path, "phylip", 6)
|
|
alignment = self.check_read(path, "phylip", 6, 39)
|
|
self.check_alignment_columns(
|
|
alignment, ["CCTTCG", "GGAAAG", "ATAAAC", "TTTTAA", "GAGGAG", "CTTTTC"]
|
|
)
|
|
|
|
def test_reading_alignments_phylip3(self):
|
|
path = "Phylip/hennigian.phy"
|
|
self.check_iterator_for_loop_handle(path, "phylip", 1, 10)
|
|
self.check_iterator_for_loop_filename(path, "phylip", 1)
|
|
self.check_iterator_next(path, "phylip", 1)
|
|
self.check_iterator_next_and_list(path, "phylip", 1)
|
|
self.check_iterator_next_for_loop(path, "phylip", 1)
|
|
self.check_write_three_times_and_read(path, "phylip", 10)
|
|
alignment = self.check_read(path, "phylip", 10, 40)
|
|
self.check_alignment_columns(
|
|
alignment,
|
|
[
|
|
"CCCCCAAAAA",
|
|
"AAAAACCCCC",
|
|
"CCCAAAAAAA",
|
|
"AAACCAAAAA",
|
|
"CCAAAAAAAA",
|
|
"AAAAAAAAAA",
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_phylip4(self):
|
|
path = "Phylip/horses.phy"
|
|
self.check_iterator_for_loop_handle(path, "phylip", 1, 10)
|
|
self.check_iterator_for_loop_filename(path, "phylip", 1)
|
|
self.check_iterator_next(path, "phylip", 1)
|
|
self.check_iterator_next_and_list(path, "phylip", 1)
|
|
self.check_iterator_next_for_loop(path, "phylip", 1)
|
|
self.check_write_three_times_and_read(path, "phylip", 10)
|
|
alignment = self.check_read(path, "phylip", 10, 40)
|
|
self.check_alignment_columns(
|
|
alignment,
|
|
[
|
|
"AACCCCCCCC",
|
|
"AAAACCCCCC",
|
|
"AAAAAAAAAC",
|
|
"ACAAAAAAAA",
|
|
"ACACCCCCCC",
|
|
"AAAAAAAAAA",
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_phylip5(self):
|
|
path = "Phylip/random.phy"
|
|
self.check_iterator_for_loop_handle(path, "phylip", 1, 10)
|
|
self.check_iterator_for_loop_filename(path, "phylip", 1)
|
|
self.check_iterator_next(path, "phylip", 1)
|
|
self.check_iterator_next_and_list(path, "phylip", 1)
|
|
self.check_iterator_next_for_loop(path, "phylip", 1)
|
|
self.check_write_three_times_and_read(path, "phylip", 10)
|
|
alignment = self.check_read(path, "phylip", 10, 40)
|
|
self.check_alignment_columns(
|
|
alignment,
|
|
[
|
|
"CAAAACAAAC",
|
|
"AACAACCACC",
|
|
"CAAAACAAAA",
|
|
"ACAACACACA",
|
|
"CCAAAACCAA",
|
|
"AAAAAAAAAA",
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_phylip6(self):
|
|
path = "Phylip/interlaced.phy"
|
|
self.check_iterator_for_loop_handle(path, "phylip", 1, 3)
|
|
self.check_iterator_for_loop_filename(path, "phylip", 1)
|
|
self.check_iterator_next(path, "phylip", 1)
|
|
self.check_iterator_next_and_list(path, "phylip", 1)
|
|
self.check_iterator_next_for_loop(path, "phylip", 1)
|
|
self.check_write_three_times_and_read(path, "phylip", 3)
|
|
alignment = self.check_read(path, "phylip", 3, 384)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
("ALEU_HORVU", "MAHARVLLLALAVLATAAVAVASSSSFADSNPIR...VAA"),
|
|
("CATH_HUMAN", "------MWATLPLLCAGAWLLGV--------PVC...PLV"),
|
|
("CYS1_DICDI", "-----MKVILLFVLAVFTVFVSS-----------...I--"),
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_phylip7(self):
|
|
path = "Phylip/interlaced2.phy"
|
|
self.check_iterator_for_loop_handle(path, "phylip", 1, 4)
|
|
self.check_iterator_for_loop_filename(path, "phylip", 1)
|
|
self.check_iterator_next(path, "phylip", 1)
|
|
self.check_iterator_next_and_list(path, "phylip", 1)
|
|
self.check_iterator_next_for_loop(path, "phylip", 1)
|
|
self.check_write_three_times_and_read(path, "phylip", 4)
|
|
alignment = self.check_read(path, "phylip", 4, 131)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
("IXI_234", "TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRP...SHE"),
|
|
("IXI_235", "TSPASIRPPAGPSSR---------RPSPPGPRRP...SHE"),
|
|
("IXI_236", "TSPASIRPPAGPSSRPAMVSSR--RPSPPPPRRP...SHE"),
|
|
("IXI_237", "TSPASLRPPAGPSSRPAMVSSRR-RPSPPGPRRP...SHE"),
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_phylip8(self):
|
|
path = "ExtendedPhylip/primates.phyx"
|
|
self.check_iterator_for_loop_handle(path, "phylip-relaxed", 1, 12)
|
|
self.check_iterator_for_loop_filename(path, "phylip-relaxed", 1)
|
|
self.check_iterator_next(path, "phylip-relaxed", 1)
|
|
self.check_iterator_next_and_list(path, "phylip-relaxed", 1)
|
|
self.check_iterator_next_for_loop(path, "phylip-relaxed", 1)
|
|
self.check_write_three_times_and_read(path, "phylip-relaxed", 12)
|
|
alignment = self.check_read(path, "phylip-relaxed", 12, 898)
|
|
self.check_alignment_columns(
|
|
alignment,
|
|
[
|
|
"AAAAAAAAAAAA",
|
|
"AAAAAAAAAAAA",
|
|
"GGGGGGGGGGGG",
|
|
"TCCCCCCCCCCC",
|
|
"TTTTTTTTTTTT",
|
|
"TTTTTTTTTTTT",
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_phylip9(self):
|
|
path = "Phylip/sequential.phy"
|
|
self.check_iterator_for_loop_handle(path, "phylip-sequential", 1, 3)
|
|
self.check_iterator_for_loop_filename(path, "phylip-sequential", 1)
|
|
self.check_iterator_next(path, "phylip-sequential", 1)
|
|
self.check_iterator_next_and_list(path, "phylip-sequential", 1)
|
|
self.check_iterator_next_for_loop(path, "phylip-sequential", 1)
|
|
self.check_write_three_times_and_read(path, "phylip-sequential", 3)
|
|
alignment = self.check_read(path, "phylip-sequential", 3, 384)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
("ALEU_HORVU", "MAHARVLLLALAVLATAAVAVASSSSFADSNPIR...VAA"),
|
|
("CATH_HUMAN", "------MWATLPLLCAGAWLLGV--------PVC...PLV"),
|
|
("CYS1_DICDI", "-----MKVILLFVLAVFTVFVSS-----------...I--"),
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_phylip10(self):
|
|
path = "Phylip/sequential2.phy"
|
|
self.check_iterator_for_loop_handle(path, "phylip-sequential", 1, 4)
|
|
self.check_iterator_for_loop_filename(path, "phylip-sequential", 1)
|
|
self.check_iterator_next(path, "phylip-sequential", 1)
|
|
self.check_iterator_next_and_list(path, "phylip-sequential", 1)
|
|
self.check_iterator_next_for_loop(path, "phylip-sequential", 1)
|
|
self.check_write_three_times_and_read(path, "phylip-sequential", 4)
|
|
alignment = self.check_read(path, "phylip-sequential", 4, 131)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
("IXI_234", "TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRP...SHE"),
|
|
("IXI_235", "TSPASIRPPAGPSSR---------RPSPPGPRRP...SHE"),
|
|
("IXI_236", "TSPASIRPPAGPSSRPAMVSSR--RPSPPPPRRP...SHE"),
|
|
("IXI_237", "TSPASLRPPAGPSSRPAMVSSRR-RPSPPGPRRP...SHE"),
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_emboss1(self):
|
|
path = "Emboss/alignret.txt"
|
|
self.check_iterator_for_loop_handle(path, "emboss", 1, 4)
|
|
self.check_iterator_for_loop_filename(path, "emboss", 1)
|
|
self.check_iterator_next(path, "emboss", 1)
|
|
self.check_iterator_next_and_list(path, "emboss", 1)
|
|
self.check_iterator_next_for_loop(path, "emboss", 1)
|
|
alignment = self.check_read(path, "emboss", 4, 131)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
("IXI_234", "TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRP...SHE"),
|
|
("IXI_235", "TSPASIRPPAGPSSR---------RPSPPGPRRP...SHE"),
|
|
("IXI_236", "TSPASIRPPAGPSSRPAMVSSR--RPSPPPPRRP...SHE"),
|
|
("IXI_237", "TSPASLRPPAGPSSRPAMVSSRR-RPSPPGPRRP...SHE"),
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_emboss2(self):
|
|
path = "Emboss/needle.txt"
|
|
alignments = self.check_iterator_for_loop_handle(path, "emboss", 5, 2)
|
|
self.check_iterator_for_loop_filename(path, "emboss", 5)
|
|
self.check_iterator_next(path, "emboss", 5)
|
|
self.check_iterator_next_and_list(path, "emboss", 5)
|
|
self.check_iterator_next_for_loop(path, "emboss", 5)
|
|
self.check_read_fails(path, "emboss")
|
|
# Show the alignment
|
|
self.assertEqual(alignments[0].get_alignment_length(), 124)
|
|
self.assertEqual(alignments[1].get_alignment_length(), 119)
|
|
self.assertEqual(alignments[2].get_alignment_length(), 120)
|
|
self.assertEqual(alignments[3].get_alignment_length(), 118)
|
|
self.assertEqual(alignments[4].get_alignment_length(), 125)
|
|
self.check_alignment_rows(
|
|
alignments[0],
|
|
[
|
|
("gi|94968718|receiver", "-VLLADDHALVRRGFRLMLED--DPEIEIVAEAG...GET"),
|
|
("ref_rec", "KILIVDD----QYGIRILLNEVFNKEGYQTFQAA...---"),
|
|
],
|
|
)
|
|
self.check_alignment_rows(
|
|
alignments[1],
|
|
[
|
|
("gi|94968761|receiver", "-ILIVDDEANTLASLSRAFRLAGHEATVCDNAVR...LKR"),
|
|
("ref_rec", "KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQ...---"),
|
|
],
|
|
)
|
|
self.check_alignment_rows(
|
|
alignments[2],
|
|
[
|
|
("gi|94967506|receiver", "LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPE...HKE"),
|
|
("ref_rec", "-KILIVDDQYGIRILLNEVFNKEGYQTFQAANGL...---"),
|
|
],
|
|
)
|
|
self.check_alignment_rows(
|
|
alignments[3],
|
|
[
|
|
("gi|94970045|receiver", "-VLLVEDEEALRAAAGDFLETRGYKIMTARDGTE...EVL"),
|
|
("ref_rec", "KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQ...DAV"),
|
|
],
|
|
)
|
|
self.check_alignment_rows(
|
|
alignments[4],
|
|
[
|
|
("gi|94970041|receiver", "TVLLVEDEEGVRKLVRGILSRQGYHVLEATSGEE...KRQ"),
|
|
("ref_rec", "KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQ...---"),
|
|
],
|
|
)
|
|
self.check_reverse_write_read(alignments)
|
|
|
|
def test_reading_alignments_emboss3(self):
|
|
path = "Emboss/needle_asis.txt"
|
|
self.check_iterator_for_loop_handle(path, "emboss", 1, 2)
|
|
self.check_iterator_for_loop_filename(path, "emboss", 1)
|
|
self.check_iterator_next(path, "emboss", 1)
|
|
self.check_iterator_next_and_list(path, "emboss", 1)
|
|
self.check_iterator_next_for_loop(path, "emboss", 1)
|
|
alignment = self.check_read(path, "emboss", 2, 3653)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
("asis", "----------------------------------...GAA"),
|
|
("asis", "TATTTTTTGGATTTTTTTCTAGATTTTCTAGGTT...GAA"),
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_emboss4(self):
|
|
path = "Emboss/water.txt"
|
|
self.check_iterator_for_loop_handle(path, "emboss", 1, 2)
|
|
self.check_iterator_for_loop_filename(path, "emboss", 1)
|
|
self.check_iterator_next(path, "emboss", 1)
|
|
self.check_iterator_next_and_list(path, "emboss", 1)
|
|
self.check_iterator_next_for_loop(path, "emboss", 1)
|
|
alignment = self.check_read(path, "emboss", 2, 131)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
("IXI_234", "TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRP...SHE"),
|
|
("IXI_235", "TSPASIRPPAGPSSR---------RPSPPGPRRP...SHE"),
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_emboss5(self):
|
|
path = "Emboss/water2.txt"
|
|
self.check_iterator_for_loop_handle(path, "emboss", 1, 2)
|
|
self.check_iterator_for_loop_filename(path, "emboss", 1)
|
|
self.check_iterator_next(path, "emboss", 1)
|
|
self.check_iterator_next_and_list(path, "emboss", 1)
|
|
self.check_iterator_next_for_loop(path, "emboss", 1)
|
|
alignment = self.check_read(path, "emboss", 2, 18)
|
|
self.check_alignment_rows(
|
|
alignment, [("asis", "CGTTTGAGT-CTGGGATG"), ("asis", "CGTTTGAGTACTGGGATG")]
|
|
)
|
|
|
|
def test_reading_alignments_emboss6(self):
|
|
path = "Emboss/matcher_simple.txt"
|
|
self.check_iterator_for_loop_handle(path, "emboss", 1, 2)
|
|
self.check_iterator_for_loop_filename(path, "emboss", 1)
|
|
self.check_iterator_next(path, "emboss", 1)
|
|
self.check_iterator_next_and_list(path, "emboss", 1)
|
|
self.check_iterator_next_for_loop(path, "emboss", 1)
|
|
alignment = self.check_read(path, "emboss", 2, 16)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[("AF069992_1", "GPPPQSPDENRAGESS"), ("CAA85685.1", "GVPPEEAGAAVAAESS")],
|
|
)
|
|
|
|
def test_reading_alignments_emboss7(self):
|
|
path = "Emboss/matcher_pair.txt"
|
|
alignments = self.check_iterator_for_loop_handle(path, "emboss", 5, 2)
|
|
self.check_iterator_for_loop_filename(path, "emboss", 5)
|
|
self.check_iterator_next(path, "emboss", 5)
|
|
self.check_iterator_next_and_list(path, "emboss", 5)
|
|
self.check_iterator_next_for_loop(path, "emboss", 5)
|
|
self.check_read_fails(path, "emboss")
|
|
self.assertEqual(alignments[0].get_alignment_length(), 145)
|
|
self.assertEqual(alignments[1].get_alignment_length(), 13)
|
|
self.assertEqual(alignments[2].get_alignment_length(), 18)
|
|
self.assertEqual(alignments[3].get_alignment_length(), 10)
|
|
self.assertEqual(alignments[4].get_alignment_length(), 10)
|
|
self.check_alignment_rows(
|
|
alignments[0],
|
|
[
|
|
("HBA_HUMAN", "LSPADKTNVKAAWGKVGAHAGEYGAEALERMFLS...SKY"),
|
|
("HBB_HUMAN", "LTPEEKSAVTALWGKV--NVDEVGGEALGRLLVV...HKY"),
|
|
],
|
|
)
|
|
self.check_alignment_rows(
|
|
alignments[1],
|
|
[("HBA_HUMAN", "KKVADALTNAVAH"), ("HBB_HUMAN", "QKVVAGVANALAH")],
|
|
)
|
|
self.check_alignment_rows(
|
|
alignments[2],
|
|
[("HBA_HUMAN", "KLRVDPVNFKLLSHCLLV"), ("HBB_HUMAN", "KVNVDEVGGEALGRLLVV")],
|
|
)
|
|
self.check_alignment_rows(
|
|
alignments[3], [("HBA_HUMAN", "LSALSDLHAH"), ("HBB_HUMAN", "LGAFSDGLAH")]
|
|
)
|
|
self.check_alignment_rows(
|
|
alignments[4], [("HBA_HUMAN", "VKAAWGKVGA"), ("HBB_HUMAN", "VQAAYQKVVA")]
|
|
)
|
|
self.check_reverse_write_read(alignments)
|
|
|
|
def test_reading_alignments_emboss8(self):
|
|
path = "Emboss/emboss_pair_aln_full_blank_line.txt"
|
|
self.check_iterator_for_loop_handle(path, "emboss", 1, 2)
|
|
self.check_iterator_for_loop_filename(path, "emboss", 1)
|
|
self.check_iterator_next(path, "emboss", 1)
|
|
self.check_iterator_next_and_list(path, "emboss", 1)
|
|
self.check_iterator_next_for_loop(path, "emboss", 1)
|
|
alignment = self.check_read(path, "emboss", 2, 1450)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
(
|
|
"hg38_chrX_131691529_131830643_47210_48660",
|
|
"GGCAGGTGCATAGCTTGAGCCTAGGAGTTCAAGT...AAA",
|
|
),
|
|
(
|
|
"mm10_chrX_50555743_50635321_27140_27743",
|
|
"G--------------------------TTCAAGG...AAA",
|
|
),
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_fasta_m10_1(self):
|
|
path = "Fasta/output001.m10"
|
|
alignments = self.check_iterator_for_loop_handle(path, "fasta-m10", 4, 2)
|
|
self.check_iterator_for_loop_filename(path, "fasta-m10", 4)
|
|
self.check_iterator_next(path, "fasta-m10", 4)
|
|
self.check_iterator_next_and_list(path, "fasta-m10", 4)
|
|
self.check_iterator_next_for_loop(path, "fasta-m10", 4)
|
|
self.check_read_fails(path, "fasta-m10")
|
|
self.assertEqual(alignments[0].get_alignment_length(), 108)
|
|
self.check_alignment_rows(
|
|
alignments[0],
|
|
[
|
|
(
|
|
"gi|10955263|ref|NP_052604.1|",
|
|
"SGSNT-RRRAISRPVRLTAEED---QEIRKRAAE...LSR",
|
|
),
|
|
(
|
|
"gi|152973457|ref|YP_001338508.1|",
|
|
"AGSGAPRRRGSGLASRISEQSEALLQEAAKHAAE...LSR",
|
|
),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[1].get_alignment_length(), 64)
|
|
self.check_alignment_rows(
|
|
alignments[1],
|
|
[
|
|
(
|
|
"gi|10955263|ref|NP_052604.1|",
|
|
"AAECGKTVSGFLRAAALGKKVNSLTDDRVLKEV-...AIT",
|
|
),
|
|
(
|
|
"gi|152973588|ref|YP_001338639.1|",
|
|
"ASRQGCTVGG--KMDSVQDKASDKDKERVMKNIN...TLT",
|
|
),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[2].get_alignment_length(), 38)
|
|
self.check_alignment_rows(
|
|
alignments[2],
|
|
[
|
|
(
|
|
"gi|10955264|ref|NP_052605.1|",
|
|
"MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIE",
|
|
),
|
|
(
|
|
"gi|152973462|ref|YP_001338513.1|",
|
|
"IKKDLGVSFLKLKNREKTLIVDALKKKYPVAELLSVLQ",
|
|
),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[3].get_alignment_length(), 43)
|
|
self.check_alignment_rows(
|
|
alignments[3],
|
|
[
|
|
(
|
|
"gi|10955265|ref|NP_052606.1|",
|
|
"SELHSKLPKSIDKIHEDIKKQLSC-SLIMKKIDV...TYC",
|
|
),
|
|
(
|
|
"gi|152973545|ref|YP_001338596.1|",
|
|
"SRINSDVARRIPGIHRDPKDRLSSLKQVEEALDM...EYC",
|
|
),
|
|
],
|
|
)
|
|
self.check_reverse_write_read(alignments)
|
|
|
|
def test_reading_alignments_fasta_m10_2(self):
|
|
path = "Fasta/output002.m10"
|
|
alignments = self.check_iterator_for_loop_handle(path, "fasta-m10", 6, 2)
|
|
self.check_iterator_for_loop_filename(path, "fasta-m10", 6)
|
|
self.check_iterator_next(path, "fasta-m10", 6)
|
|
self.check_iterator_next_and_list(path, "fasta-m10", 6)
|
|
self.check_iterator_next_for_loop(path, "fasta-m10", 6)
|
|
self.check_read_fails(path, "fasta-m10")
|
|
self.assertEqual(alignments[0].get_alignment_length(), 88)
|
|
self.check_alignment_rows(
|
|
alignments[0],
|
|
[
|
|
(
|
|
"gi|10955263|ref|NP_052604.1|",
|
|
"SGSNTRRRAISRPVR--LTAEEDQEIRKRAAECG...AEV",
|
|
),
|
|
(
|
|
"gi|162139799|ref|NP_309634.2|",
|
|
"SQRSTRRKPENQPTRVILFNKPYDVLPQFTDEAG...VQV",
|
|
),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[1].get_alignment_length(), 53)
|
|
self.check_alignment_rows(
|
|
alignments[1],
|
|
[
|
|
(
|
|
"gi|10955263|ref|NP_052604.1|",
|
|
"EIRKRAAECGKTVSGFLRAAA-LGKKV----NSL...KKL",
|
|
),
|
|
(
|
|
"gi|15831859|ref|NP_310632.1|",
|
|
"EIKPRGTSKGEAIAAFMQEAPFIGRTPVFLGDDL...VKI",
|
|
),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[2].get_alignment_length(), 92)
|
|
self.check_alignment_rows(
|
|
alignments[2],
|
|
[
|
|
(
|
|
"gi|10955264|ref|NP_052605.1|",
|
|
"SEFFSKIESDLKKKKSKGDVFFDLIIPNG-----...ATS",
|
|
),
|
|
(
|
|
"gi|15829419|ref|NP_308192.1|",
|
|
"TELNSELAKAMKVDAQRG-AFVSQVLPNSSAAKA...QSS",
|
|
),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[5].get_alignment_length(), 157)
|
|
self.check_alignment_rows(
|
|
alignments[5],
|
|
[
|
|
(
|
|
"gi|10955265|ref|NP_052606.1|",
|
|
"QYIMTTSNGDRVRAKIYKRGSIQFQGKYLQIASL...REI",
|
|
),
|
|
(
|
|
"gi|15833861|ref|NP_312634.1|",
|
|
"EFIRLLSDHDQFEKDQISELTVAANALKLEVAK-...KKV",
|
|
),
|
|
],
|
|
)
|
|
self.check_reverse_write_read(alignments)
|
|
|
|
def test_reading_alignments_fasta_m10_3(self):
|
|
path = "Fasta/output003.m10"
|
|
alignments = self.check_iterator_for_loop_handle(path, "fasta-m10", 3, 2)
|
|
self.check_iterator_for_loop_filename(path, "fasta-m10", 3)
|
|
self.check_iterator_next(path, "fasta-m10", 3)
|
|
self.check_iterator_next_and_list(path, "fasta-m10", 3)
|
|
self.check_iterator_next_for_loop(path, "fasta-m10", 3)
|
|
self.check_read_fails(path, "fasta-m10")
|
|
self.assertEqual(alignments[0].get_alignment_length(), 55)
|
|
self.check_alignment_rows(
|
|
alignments[0],
|
|
[
|
|
(
|
|
"gi|10955263|ref|NP_052604.1|",
|
|
"VRLTAEEDQ--EIRKRAAECG-KTVSGFLRAAAL...LGA",
|
|
),
|
|
(
|
|
"gi|152973837|ref|YP_001338874.1|",
|
|
"ISISNNKDQYEELQKEQGERDLKTVDQLVRIAAA...IAA",
|
|
),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[1].get_alignment_length(), 22)
|
|
self.check_alignment_rows(
|
|
alignments[1],
|
|
[
|
|
("gi|10955265|ref|NP_052606.1|", "DDRANLFEFLSEEGITITEDNN"),
|
|
("gi|152973840|ref|YP_001338877.1|", "DDAEHLFRTLSSR-LDALQDGN"),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[2].get_alignment_length(), 63)
|
|
self.check_alignment_rows(
|
|
alignments[2],
|
|
[
|
|
(
|
|
"gi|10955264|ref|NP_052605.1|",
|
|
"VYTSFN---GEKFSSYTLNKVTKTDEYNDLSELS...KGI",
|
|
),
|
|
(
|
|
"gi|152973841|ref|YP_001338878.1|",
|
|
"VFGSFEQPKGEHLSGQVSEQ--RDTAFADQNEQV...QAM",
|
|
),
|
|
],
|
|
)
|
|
self.check_reverse_write_read(alignments)
|
|
|
|
def test_reading_alignments_fasta_m10_4(self):
|
|
path = "Fasta/output004.m10"
|
|
self.check_iterator_for_loop_handle(path, "fasta-m10", 1, 2)
|
|
self.check_iterator_for_loop_filename(path, "fasta-m10", 1)
|
|
self.check_iterator_next(path, "fasta-m10", 1)
|
|
self.check_iterator_next_and_list(path, "fasta-m10", 1)
|
|
self.check_iterator_next_for_loop(path, "fasta-m10", 1)
|
|
alignment = self.check_read(path, "fasta-m10", 2, 102)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
(
|
|
"ref|NC_002127.1|:c1351-971",
|
|
"AAAAAAGATAAAAAATATCAAATAGAAGCAATAA...TCA",
|
|
),
|
|
(
|
|
"ref|NC_002695.1|:1970775-1971404",
|
|
"AGAGAAAATAAAACAAGTAATAAAATATTAATGG...ACA",
|
|
),
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_fasta_m10_5(self):
|
|
path = "Fasta/output005.m10"
|
|
self.check_iterator_for_loop_handle(path, "fasta-m10", 1, 2)
|
|
self.check_iterator_for_loop_filename(path, "fasta-m10", 1)
|
|
self.check_iterator_next(path, "fasta-m10", 1)
|
|
self.check_iterator_next_and_list(path, "fasta-m10", 1)
|
|
self.check_iterator_next_for_loop(path, "fasta-m10", 1)
|
|
alignment = self.check_read(path, "fasta-m10", 2, 110)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
(
|
|
"gi|10955264|ref|NP_052605.1|",
|
|
"IKNKDKTLFIVYAT-DIYSPSEFFSKIESDLKKK...LSK",
|
|
),
|
|
(
|
|
"gi|10955282|ref|NP_052623.1|",
|
|
"IKDELPVAFCSWASLDLECEVKYINDVTSLYAKD...MSE",
|
|
),
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_fasta_m10_6(self):
|
|
path = "Fasta/output006.m10"
|
|
self.check_iterator_for_loop_handle(path, "fasta-m10", 1, 2)
|
|
self.check_iterator_for_loop_filename(path, "fasta-m10", 1)
|
|
self.check_iterator_next(path, "fasta-m10", 1)
|
|
self.check_iterator_next_and_list(path, "fasta-m10", 1)
|
|
self.check_iterator_next_for_loop(path, "fasta-m10", 1)
|
|
alignment = self.check_read(path, "fasta-m10", 2, 131)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
(
|
|
"gi|116660610|gb|EG558221.1|EG558221",
|
|
"GCAACGCTTCAAGAACTGGAATTAGGAACCGTGA...CAT",
|
|
),
|
|
("query", "GCAACGCTTCAAGAACTGGAATTAGGAACCGTGA...CAT"),
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_fasta_m10_7(self):
|
|
path = "Fasta/output007.m10"
|
|
alignments = self.check_iterator_for_loop_handle(path, "fasta-m10", 9, 2)
|
|
self.check_iterator_for_loop_filename(path, "fasta-m10", 9)
|
|
self.check_iterator_next(path, "fasta-m10", 9)
|
|
self.check_iterator_next_and_list(path, "fasta-m10", 9)
|
|
self.check_iterator_next_for_loop(path, "fasta-m10", 9)
|
|
self.check_read_fails(path, "fasta-m10")
|
|
self.assertEqual(alignments[0].get_alignment_length(), 108)
|
|
self.check_alignment_rows(
|
|
alignments[0],
|
|
[
|
|
(
|
|
"gi|10955263|ref|NP_052604.1|",
|
|
"SGSNT-RRRAISRPVRLTAEED---QEIRKRAAE...LSR",
|
|
),
|
|
(
|
|
"gi|152973457|ref|YP_001338508.1|",
|
|
"AGSGAPRRRGSGLASRISEQSEALLQEAAKHAAE...LSR",
|
|
),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[1].get_alignment_length(), 64)
|
|
self.check_alignment_rows(
|
|
alignments[1],
|
|
[
|
|
(
|
|
"gi|10955263|ref|NP_052604.1|",
|
|
"AAECGKTVSGFLRAAALGKKVNSLTDDRVLKEV-...AIT",
|
|
),
|
|
(
|
|
"gi|152973588|ref|YP_001338639.1|",
|
|
"ASRQGCTVGG--KMDSVQDKASDKDKERVMKNIN...TLT",
|
|
),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[2].get_alignment_length(), 45)
|
|
self.check_alignment_rows(
|
|
alignments[2],
|
|
[
|
|
(
|
|
"gi|10955263|ref|NP_052604.1|",
|
|
"EIRKRAAECGKTVSGFLRAAA-----LGKKVNSL...VMR",
|
|
),
|
|
(
|
|
"gi|152973480|ref|YP_001338531.1|",
|
|
"ELVKLIADMGISVRALLRKNVEPYEELGLEEDKF...MLQ",
|
|
),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[8].get_alignment_length(), 64)
|
|
self.check_alignment_rows(
|
|
alignments[8],
|
|
[
|
|
(
|
|
"gi|10955265|ref|NP_052606.1|",
|
|
"ISGTYKGIDFLIKLMPSGGNTTIGRASGQNNTYF...FSD",
|
|
),
|
|
(
|
|
"gi|152973505|ref|YP_001338556.1|",
|
|
"IDGVITAFD-LRTGMNISKDKVVAQIQGMDPVW-...YPD",
|
|
),
|
|
],
|
|
)
|
|
self.check_reverse_write_read(alignments)
|
|
|
|
def test_reading_alignments_fasta_m10_8(self):
|
|
path = "Fasta/output008.m10"
|
|
alignments = self.check_iterator_for_loop_handle(path, "fasta-m10", 12, 2)
|
|
self.check_iterator_for_loop_filename(path, "fasta-m10", 12)
|
|
self.check_iterator_next(path, "fasta-m10", 12)
|
|
self.check_iterator_next_and_list(path, "fasta-m10", 12)
|
|
self.check_iterator_next_for_loop(path, "fasta-m10", 12)
|
|
self.check_read_fails(path, "fasta-m10")
|
|
self.assertEqual(alignments[0].get_alignment_length(), 65)
|
|
self.check_alignment_rows(
|
|
alignments[0],
|
|
[
|
|
(
|
|
"gi|283855822|gb|GQ290312.1|",
|
|
"IPHQLPHALRHRPAQEAAHASQLHPAQPGCGQPL...GLL",
|
|
),
|
|
("sp|Q9NSY1|BMP2K_HUMAN", "LQHRHPHQQQQQQQQQQQQQQQQQQQQQQQQQQQ...QML"),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[1].get_alignment_length(), 201)
|
|
self.check_alignment_rows(
|
|
alignments[1],
|
|
[
|
|
(
|
|
"gi|57163782|ref|NM_001009242.1|",
|
|
"GPELLRALLQQNGCGTQPLRVPTVLPG*AMAVLH...QKS",
|
|
),
|
|
("sp|Q9NSY1|BMP2K_HUMAN", "GPEIL---LGQ-GPPQQPPQQHRVLQQLQQGDWR...NRS"),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[2].get_alignment_length(), 348)
|
|
self.check_alignment_rows(
|
|
alignments[2],
|
|
[
|
|
(
|
|
"gi|57163782|ref|NM_001009242.1|",
|
|
"MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEP...APA",
|
|
),
|
|
("sp|P08100|OPSD_HUMAN", "MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEP...APA"),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[11].get_alignment_length(), 31)
|
|
self.check_alignment_rows(
|
|
alignments[11],
|
|
[
|
|
("gi|283855822|gb|GQ290312.1|", "SQQIRNATTMMMTMRVTSFSAFWVVADSCCW"),
|
|
("sp|P08100|OPSD_HUMAN", "AQQQESATTQKAEKEVTRMVIIMVIAFLICW"),
|
|
],
|
|
)
|
|
self.check_reverse_write_read(alignments)
|
|
|
|
def test_reading_alignments_ig(self):
|
|
path = "IntelliGenetics/VIF_mase-pro.txt"
|
|
self.check_iterator_for_loop_handle(path, "ig", 1, 16)
|
|
self.check_iterator_for_loop_filename(path, "ig", 1)
|
|
self.check_iterator_next(path, "ig", 1)
|
|
self.check_iterator_next_and_list(path, "ig", 1)
|
|
self.check_iterator_next_for_loop(path, "ig", 1)
|
|
self.check_write_three_times_and_read(path, "ig", 16)
|
|
alignment = self.check_read(path, "ig", 16, 298)
|
|
self.check_alignment_columns(
|
|
alignment,
|
|
[
|
|
"MMMMMMMMMMMMMMMM",
|
|
"EEEEEEETEEEENEEE",
|
|
"NNNNNNNAEEEEQRKK",
|
|
"--------DEEEEE--",
|
|
"--------KKKKKK--",
|
|
"HHHHHHH-AAAAL-R-",
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_pir(self):
|
|
path = "NBRF/clustalw.pir"
|
|
self.check_iterator_for_loop_handle(path, "pir", 1, 2)
|
|
self.check_iterator_for_loop_filename(path, "pir", 1)
|
|
self.check_iterator_next(path, "pir", 1)
|
|
self.check_iterator_next_and_list(path, "pir", 1)
|
|
self.check_iterator_next_for_loop(path, "pir", 1)
|
|
self.check_write_three_times_and_read(path, "pir", 2)
|
|
alignment = self.check_read(path, "pir", 2, 2527)
|
|
self.check_alignment_rows(
|
|
alignment,
|
|
[
|
|
(
|
|
"804Angiostrongylus_cantonensis",
|
|
"----------------------------------...---",
|
|
),
|
|
(
|
|
"815Parelaphostrongylus_odocoil",
|
|
"----------------------------------...---",
|
|
),
|
|
],
|
|
)
|
|
|
|
def test_reading_alignments_maf1(self):
|
|
path = "MAF/humor.maf"
|
|
alignments = self.check_iterator_for_loop_handle(path, "maf", 2, 3)
|
|
self.check_iterator_for_loop_filename(path, "maf", 2)
|
|
self.check_iterator_next(path, "maf", 2)
|
|
self.check_iterator_next_and_list(path, "maf", 2)
|
|
self.check_iterator_next_for_loop(path, "maf", 2)
|
|
self.check_read_fails(path, "maf")
|
|
self.assertEqual(alignments[0].get_alignment_length(), 5486)
|
|
self.check_alignment_rows(
|
|
alignments[0],
|
|
[
|
|
("NM_006987", "gcacagcctttactccctgactgcgtttatattc...CCG"),
|
|
("mm3", "gcacagcctttactccctgactgcgtttatattc...TTG"),
|
|
("rn3", "gcacagcctttactccctgactgcgtttatattc...CCG"),
|
|
],
|
|
)
|
|
self.assertEqual(alignments[1].get_alignment_length(), 5753)
|
|
self.check_alignment_rows(
|
|
alignments[1],
|
|
[
|
|
("NM_018289", "tttgtccatgttggtcaggctggtctcgaactcc...GGT"),
|
|
("mm3", "tttgtccatgttggtcaggctggtctcgaactcc...GGT"),
|
|
("rn3", "tttgtccatgttggtcaggctggtctcgaactcc...GGT"),
|
|
],
|
|
)
|
|
self.check_reverse_write_read(alignments)
|
|
|
|
def test_reading_alignments_maf2(self):
|
|
path = "MAF/bug2453.maf"
|
|
alignments = self.check_iterator_for_loop_handle(path, "maf", 3)
|
|
self.check_iterator_for_loop_filename(path, "maf", 3)
|
|
self.check_iterator_next(path, "maf", 3)
|
|
self.check_iterator_next_and_list(path, "maf", 3)
|
|
self.check_iterator_next_for_loop(path, "maf", 3)
|
|
self.check_read_fails(path, "maf")
|
|
self.assertEqual(len(alignments[0]), 5)
|
|
self.assertEqual(alignments[0].get_alignment_length(), 42)
|
|
self.check_alignment_columns(
|
|
alignments[0], ["AAA--", "AAAAA", "AAAAA", "---T-", "GGGGG", "GGGGG"]
|
|
)
|
|
self.assertEqual(len(alignments[1]), 5)
|
|
self.assertEqual(alignments[1].get_alignment_length(), 6)
|
|
self.check_alignment_columns(
|
|
alignments[1], ["TTTTt", "AAAAa", "AAAAa", "AAAAg", "GGGGg", "AAAAa"]
|
|
)
|
|
self.assertEqual(len(alignments[2]), 4)
|
|
self.assertEqual(alignments[2].get_alignment_length(), 13)
|
|
self.check_alignment_rows(
|
|
alignments[2],
|
|
[
|
|
("baboon", "gcagctgaaaaca"),
|
|
("hg16.chr7", "gcagctgaaaaca"),
|
|
("mm4.chr6", "ACAGCTGAAAATA"),
|
|
("panTro1.chr6", "gcagctgaaaaca"),
|
|
],
|
|
)
|
|
self.check_reverse_write_read(alignments)
|
|
|
|
def test_reading_alignments_maf3(self):
|
|
path = "MAF/ucsc_test.maf"
|
|
alignments = self.check_iterator_for_loop_handle(path, "maf", 3)
|
|
self.check_iterator_for_loop_filename(path, "maf", 3)
|
|
self.check_iterator_next(path, "maf", 3)
|
|
self.check_iterator_next_and_list(path, "maf", 3)
|
|
self.check_iterator_next_for_loop(path, "maf", 3)
|
|
self.check_read_fails(path, "maf")
|
|
self.assertEqual(len(alignments[0]), 5)
|
|
self.assertEqual(alignments[0].get_alignment_length(), 42)
|
|
self.check_alignment_columns(
|
|
alignments[0], ["AAA--", "AAAAA", "AAAAA", "---T-", "GGGGG", "GGGGG"]
|
|
)
|
|
self.assertEqual(len(alignments[1]), 5)
|
|
self.assertEqual(alignments[1].get_alignment_length(), 6)
|
|
self.check_alignment_columns(
|
|
alignments[1], ["TTTTt", "AAAAa", "AAAAa", "AAAAg", "GGGGg", "AAAAa"]
|
|
)
|
|
self.assertEqual(len(alignments[2]), 4)
|
|
self.assertEqual(alignments[2].get_alignment_length(), 13)
|
|
self.check_alignment_columns(
|
|
alignments[2], ["gggA", "cccC", "aaaA", "gggG", "cccC", "aaaA"]
|
|
)
|
|
self.check_reverse_write_read(alignments)
|
|
|
|
def test_reading_alignments_maf4(self):
|
|
path = "MAF/ucsc_mm9_chr10.maf"
|
|
alignments = self.check_iterator_for_loop_handle(path, "maf", 48)
|
|
self.check_iterator_for_loop_filename(path, "maf", 48)
|
|
self.check_iterator_next(path, "maf", 48)
|
|
self.check_iterator_next_and_list(path, "maf", 48)
|
|
self.check_iterator_next_for_loop(path, "maf", 48)
|
|
self.check_read_fails(path, "maf")
|
|
self.assertEqual(len(alignments[0]), 2)
|
|
self.assertEqual(alignments[0].get_alignment_length(), 164)
|
|
self.check_alignment_rows(
|
|
alignments[0],
|
|
[
|
|
("mm9.chr10", "TCATAGGTATTTATTTTTAAATATGGTTTGCTTT...GTT"),
|
|
("oryCun1.scaffold_133159", "TCACAGATATTTACTATTAAATATGGTTTGTTAT...GTT"),
|
|
],
|
|
)
|
|
self.assertEqual(len(alignments[1]), 4)
|
|
self.assertEqual(alignments[1].get_alignment_length(), 466)
|
|
self.check_alignment_rows(
|
|
alignments[1],
|
|
[
|
|
("hg18.chr6", "AGTCTTCATAAGTGGAAATATAAGTTTTAATTAT...TTC"),
|
|
("mm9.chr10", "AGTCTTTCCAATGGGACCTGTGAGTCCTAACTAT...CTG"),
|
|
("panTro2.chr6", "AGTCTTCATAAGTGGAAATATAAGTTTTAATTAT...TTC"),
|
|
("ponAbe2.chr6", "AGTCTTCATAAGTGGAAATATAAGTTTTAATTAT...TTC"),
|
|
],
|
|
)
|
|
self.assertEqual(len(alignments[2]), 5)
|
|
self.assertEqual(alignments[2].get_alignment_length(), 127)
|
|
self.check_alignment_columns(
|
|
alignments[2], ["TTTTT", "GGGGG", "GGGGG", "GGGGG", "TTTTC", "CCCCC"]
|
|
)
|
|
self.assertEqual(len(alignments[47]), 6)
|
|
self.assertEqual(alignments[47].get_alignment_length(), 46)
|
|
self.check_alignment_columns(
|
|
alignments[47], ["TTTTTT", "GGGGGG", "TTTTTT", "TTTTTT", "TGGGAT", "tTTTT-"]
|
|
)
|
|
self.check_reverse_write_read(alignments)
|
|
|
|
def test_reading_alignments_mauve(self):
|
|
path = "Mauve/simple.xmfa"
|
|
alignments = self.check_iterator_for_loop_handle(path, "mauve", 5)
|
|
self.check_iterator_for_loop_filename(path, "mauve", 5)
|
|
self.check_iterator_next(path, "mauve", 5)
|
|
self.check_iterator_next_and_list(path, "mauve", 5)
|
|
self.check_iterator_next_for_loop(path, "mauve", 5)
|
|
self.check_read_fails(path, "mauve")
|
|
self.assertEqual(len(alignments[0]), 2)
|
|
self.assertEqual(alignments[0].get_alignment_length(), 5670)
|
|
self.check_alignment_rows(
|
|
alignments[0],
|
|
[
|
|
("1/0-5670", "ATATTAGGTTTTTACCTACCCAGGAAAAGCCAAC...AAT"),
|
|
("2/0-5670", "ATATTAGGTTTTTACCTACCCAGGAAAAGCCAAC...AAT"),
|
|
],
|
|
)
|
|
self.assertEqual(len(alignments[1]), 2)
|
|
self.assertEqual(alignments[1].get_alignment_length(), 4420)
|
|
self.check_alignment_rows(
|
|
alignments[1],
|
|
[
|
|
("1/5670-9940", "GAACATCAGCACCTGAGTTGCTAAAGTCATTTAG...CTC"),
|
|
("2/7140-11410", "GAACATCAGCACCTGAGTTGCTAAAGTCATTTAG...CTC"),
|
|
],
|
|
)
|
|
self.assertEqual(len(alignments[2]), 1)
|
|
self.assertEqual(alignments[2].get_alignment_length(), 4970)
|
|
self.check_alignment_rows(
|
|
alignments[2],
|
|
[("1/9940-14910", "TCTACCAACCACCACAGACATCAATCACTTCTGC...GAC")],
|
|
)
|
|
self.assertEqual(len(alignments[3]), 1)
|
|
self.assertEqual(alignments[3].get_alignment_length(), 1470)
|
|
self.assertEqual(len(alignments[4]), 1)
|
|
self.assertEqual(alignments[4].get_alignment_length(), 1470)
|
|
self.check_alignment_rows(
|
|
alignments[4],
|
|
[("2/11410-12880", "ATTCGCACATAAGAATGTACCTTGCTGTAATTTA...ATA")],
|
|
)
|
|
self.check_reverse_write_read(alignments)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
runner = unittest.TextTestRunner(verbosity=2)
|
|
unittest.main(testRunner=runner)
|