Bio.Align parser/writer for A2M alignment files (#3966)

* test files

* test script

* add module

* add state column annotation

* update
This commit is contained in:
mdehoon
2022-06-30 23:29:58 +09:00
committed by GitHub
parent a67e899126
commit 77bf915518
7 changed files with 542 additions and 0 deletions

130
Bio/Align/a2m.py Normal file
View File

@ -0,0 +1,130 @@
# Copyright 2022 by Michiel de Hoon. All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Bio.Align support for A2M files.
A2M files are alignment files created by align2model or hmmscore in the SAM
Sequence Alignment and Modeling Software System.
"""
from Bio.Align import Alignment
from Bio.Align import interfaces
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import BiopythonExperimentalWarning
import warnings
warnings.warn(
"Bio.Align.a2m is an experimental module which may undergo "
"significant changes prior to its future official release.",
BiopythonExperimentalWarning,
)
class AlignmentWriter(interfaces.AlignmentWriter):
"""Alignment file writer for the A2M file format."""
def __init__(self, target):
"""Create an AlignmentWriter object.
Arguments:
- target - output stream or file name
"""
super().__init__(target, mode="w")
def format_alignment(self, alignment):
"""Return a string with the alignment in the A2M file format."""
if not isinstance(alignment, Alignment):
raise TypeError("Expected an Alignment object")
lines = []
state = alignment.column_annotations["state"]
for sequence, line in zip(alignment.sequences, alignment):
lines.append(f">{sequence.id} {sequence.description}")
s = ""
for c, m in zip(line, state):
if m == "D":
s += c.upper()
elif m == "I":
if c == "-":
s += "."
else:
s += c.lower()
lines.append(s)
return "\n".join(lines)
class AlignmentIterator(interfaces.AlignmentIterator):
"""Alignment iterator for files in the A2M file format.
An A2M file contains one multiple alignment. Matches are represented by
upper case letters and deletions by dashes in alignment columns containing
matches or deletions only. Insertions are represented by lower case letters,
with gaps aligned to the insertion shown as periods. Header lines start
with '>' followed by the name of the sequence, and optionally a description.
"""
def __init__(self, source):
"""Create an AlignmentIterator object.
Arguments:
- source - input data or file name
"""
super().__init__(source, mode="t", fmt="A2M")
def parse(self, stream):
"""Parse the next alignment from the stream."""
names = []
descriptions = []
lines = []
for line in stream:
if line.startswith(">"):
parts = line[1:].rstrip().split(None, 1)
try:
name, description = parts
except ValueError:
name = parts[0]
description = None
names.append(name)
descriptions.append(description)
lines.append("")
else:
lines[-1] += line.strip()
if not lines:
raise ValueError("Empty file.")
state = ""
for c in lines[0]:
if c == "-" or c.isupper():
state += "D" # Match/deletion state
elif c == "." or c.islower():
state += "I" # Insertion state
else:
raise Exception("Unexpected letter '%s' in alignment" % c)
for line in lines[1:]:
for c, m in zip(line, state):
if m == "D": # Match/deletion state
assert c == "-" or c.isupper()
elif m == "I": # Insertion state
assert c == "." or c.islower()
else:
raise Exception("Unexpected letter '%s' in alignment" % c)
for i, line in enumerate(lines):
lines[i] = line.upper().replace(".", "-")
coordinates = Alignment.infer_coordinates(lines)
records = []
for name, description, line in zip(names, descriptions, lines):
line = line.replace("-", "")
sequence = Seq(line)
if description is None:
record = SeqRecord(sequence, name)
else:
record = SeqRecord(sequence, name, description=description)
records.append(record)
alignment = Alignment(records, coordinates)
alignment.column_annotations = {}
alignment.column_annotations["state"] = state
yield alignment

View File

@ -0,0 +1,16 @@
>gi|4959044|gb|AAD34209.1|AF069
MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDW
LNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTE
VPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNS
ESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLR
QIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESG
SLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSE.NSTCPICRRAVLSSGNRESV
V
>gi|671626|emb|CAA85685.1|
---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPG-----------------VPPEEAGAAVAAESSTGT---------
WTTVWTDGLTSLDRYKG-----RCYHIEPVPG-------------------EKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIP
VAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLN
ATAG-----------------------TCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKAL
RLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASG-----------------------------GIHVWHMPA
LTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVA-----------VEACVKARNEG---RDLAAEGNAIIREACKWSPElAAACEVWKEIKFEFPAMD--
-

View File

@ -0,0 +1,4 @@
>Test1seq
GCTGGGGATGGAGAGGGAACAGAGT.T
>AT3G20900
GCTGGGGATGGAGAGGGAACAGAGTaG

View File

@ -0,0 +1,32 @@
>V_Harveyi_PATH
MKNW........IKV....AVAAI.A..LSAA...................TVQAATEVKVGMSGRYFPFTFVK..QDKLQGFEVDMWDEIGKRNDYKIE
YVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRK.GNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDT..G
IEHDVALGRADAFIMDRLSALE.LIKKTG.LPLQLAGEPFE.....TIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK....
>B_subtilis_YXEM
MKMKkw......TVL....VVAALlA.vLSACgn............g.nssSKEDDNVLHVGATGQSYPFAYKE..NGKLTGFDVEVMEAVAKKIDMKLD
WKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKK.DNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETqeG
TLKDVAYGRVDAYVNSRTVLIA.QIKKTG.LPLKLAGDPIV.....YEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVeqkh
>FLIY_ECOLI
MKLAhlgrqalmGVM....AVALVaG..MSVKsf.........adeg.llnKVKERGTLLVGLEGTYPPFSFQGd.DGKLTGFEVEFAQQLAKHLGVEAS
LKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKgNEGTIKTADDLKGKKVGVGLGTNYEEWLRQN--VQGVDVRTYDDdpT
KYQDLRVGRIDAILVDRLAALD.LVKKTN.DTLAVTGEAFS.....RQESGVALRK--GNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK....
>Deinococcus_radiodurans
MKKSll......SLKlsglLVPSVlAlsLSACss...............psSTLNQGTLKIAMEGTYPPFTSKNe.QGELVGFDVDIAKAVAQKLNLKPE
FVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKnNTFNPQSLADLKGKRVGSTLGSNYEKQLI-D--TGDIKIVTYPGapE
ILADLVAGRIDAAYNDRLVVNY.IIND-QkLPVRGAGQIGD.....AAPVGIALKK--GNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQ...p
>B_subtilis_GlnH_homo_YCKK
MKKAll......ALF....MVVSIaA..LAACgagndnqskdnakdgdlwaSIKKKGVLTVGTEGTYEPFTYHDkdTDKLTGYDVEVITEVAKRLGLKVD
FKETQWGSMFAGLNSKRFDVVANQVGKTD-REDKYDFSDKYTTSRAVVVTKK.DNNDIKSEADVKGKTSAQSLTSNYNKLAT-N--A-GAKVEGVEGmaQ
ALQMIQQARVDMTYNDKLAVLN.YLKTSGnKNVKIAFETGE.....PQSTYFTFRK--GSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK....
>YA80_HAEIN
MKKLlf......TTA....LLTGAiA..FSTFs...........hageiadRVEKTKTLLVGTEGTYAPFTFHDk.SGKLTGFDVEVIRKVAEKLGLKVE
FKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKS.SDNSIKSFEDLKGRKSAQSATSNWGKDAK-A--A-GAQILVVDGlaQ
SLELIKQGRAEATINDKLAVLD.YFKQHPnSGLKIAYDRGD.....KTPTAFAFLQ--GEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ....
>E_coli_GlnH
MKSVl.......KVS....LAALTlA..FAVSsh.........a.......---ADKKLVVATDTAFVPFEFKQ..GDKYVGFDVDLWAAIAKELKLDYE
LKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKAn-NNDVKSVKDLDGKVVAVKSGTGSVDYAKAN--IKTKDLRQFPNidN
AYMELGTNRADAVLHDTPNILY.FIKTAGnGQFKAVGDSLE.....AQQYGIAFPK--GSDELRDKVNGALKTLRENGTYNEIYKKWFGTEP-K....
>HISJ_E_COLI
MKKLvl......SLS....LV---lA..FSSAta...............a.FAAIPQNIRIGTDPTYAPFESKNs.QGELVGFDIDLAKELCKRINTQCT
FVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAK.NSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGqdN
IYSDLTAGRIDAAFQDEVAASEgFLKQPVgKDYKFGGPSVKdeklfGVGTGMGLRK--EDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYG...g

24
Tests/Clustalw/muscle.a2m Normal file
View File

@ -0,0 +1,24 @@
>Test1seq
.................................................................AGTTACAATAACTGACGAAGCTAAGTAGGCTACTA
ATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATC
GTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACA
AAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTA
ATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTC
TATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT.
>AT3G20900.1-SEQ
atgaacaaagtagcgaggaagaacaaaacatcaggtgaacaaaaaaaaaactcaatccacatcaaAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAA
ATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATA
GTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAA
CAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTA
ATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTC
TATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAg
>AT3G20900.1-CDS
.................................................................-----------------------------------
----------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------ATGAACAAAGTAGCGAGGAAGA
A------------------------------CAAAACATC------------------------------------------------------------
----------------------------------------------------------------------------------------------------
------------AGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAg

View File

@ -0,0 +1,15 @@
>plas_horvu
D.VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG.VD.VSKISQEEYLTAPGETFSVTLTV...PGTYGFYCEPHAGAGMVGKVT
V
>plas_chlre
-.VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG.VN.ADAISRDDYLNAPGETYSVKLTA...AGEYGYYCEPHQGAGMVGKII
V
>plas_anava
-.VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKsADlAKSLSHKQLLMSPGQSTSTTFPAdapAGEYTFYCEPHRGAGMVGKIT
V
>plas_proho
VqIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG.ES.APALSNTKLRIAPGSFYSVTLGT...PGTYSFYCTPHRGAGMVGTIT
V
>azup_achcy
VhMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG.AE.A-------FKSKINENYKVTFTA...PGVYGVKCTPHYGMGMVGVVE
V

321
Tests/test_Align_a2m.py Normal file
View File

@ -0,0 +1,321 @@
# Copyright 2006-2014 by Peter Cock. All rights reserved.
# Copyright 2022 by Michiel de Hoon. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Tests for Bio.Align.a2m module."""
import unittest
import warnings
from io import StringIO
from Bio import BiopythonExperimentalWarning
with warnings.catch_warnings():
warnings.simplefilter("ignore", BiopythonExperimentalWarning)
from Bio.Align.a2m import AlignmentIterator
from Bio.Align.a2m import AlignmentWriter
class TestA2MReadingWriting(unittest.TestCase):
def check_reading_writing(self, path):
alignments = AlignmentIterator(path)
stream = StringIO()
writer = AlignmentWriter(stream)
n = writer.write_file(alignments, mincount=1, maxcount=1)
self.assertEqual(n, 1)
alignments = AlignmentIterator(path)
alignment = next(alignments)
stream.seek(0)
saved_alignments = AlignmentIterator(stream)
saved_alignment = next(saved_alignments)
with self.assertRaises(StopIteration):
next(saved_alignments)
self.assertEqual(len(alignment), len(saved_alignment))
for i, (sequence, saved_sequence) in enumerate(
zip(alignment.sequences, saved_alignment.sequences)
):
self.assertEqual(sequence.id, saved_sequence.id)
self.assertEqual(sequence.seq, saved_sequence.seq)
self.assertEqual(alignment[i], saved_alignment[i])
def test_clustalw(self):
path = "Clustalw/clustalw.a2m"
with open(path) as stream:
alignments = AlignmentIterator(stream)
alignment = next(alignments)
with self.assertRaises(StopIteration):
next(alignments)
self.assertEqual(
repr(alignment),
"<Bio.Align.Alignment object (2 rows x 601 columns) at 0x%x>"
% id(alignment),
)
self.assertEqual(len(alignment), 2)
self.assertEqual(alignment.sequences[0].id, "gi|4959044|gb|AAD34209.1|AF069")
self.assertEqual(alignment.sequences[1].id, "gi|671626|emb|CAA85685.1|")
self.assertEqual(
alignment.sequences[0].seq,
"MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDWLNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTEVPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNSESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLRQIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESGSLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSENSTCPICRRAVLSSGNRESVV",
)
self.assertEqual(
alignment.sequences[1].seq,
"MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPGVPPEEAGAAVAAESSTGTWTTVWTDGLTSLDRYKGRCYHIEPVPGEKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIPVAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLNATAGTCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKALRLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASGGIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVAVEACVKARNEGRDLAAEGNAIIREACKWSPELAAACEVWKEIKFEFPAMD",
)
self.assertEqual(
alignment[0],
"MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDWLNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTEVPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNSESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLRQIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESGSLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSE-NSTCPICRRAVLSSGNRESVV",
)
self.assertEqual(
alignment[1],
"---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPG-----------------VPPEEAGAAVAAESSTGT---------WTTVWTDGLTSLDRYKG-----RCYHIEPVPG-------------------EKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIPVAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLNATAG-----------------------TCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKALRLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASG-----------------------------GIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVA-----------VEACVKARNEG---RDLAAEGNAIIREACKWSPELAAACEVWKEIKFEFPAMD---",
)
self.assertEqual(
alignment.column_annotations["state"],
"DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIDDDDDDDDDDDDDDDDDDDDD",
)
self.check_reading_writing(path)
def test_msaprobs(self):
path = "Clustalw/msaprobs.a2m"
# This example was obtained from
# http://virgil.ruc.dk/kurser/Sekvens/Treedraw.htm
# and converted to A2M format.
with open(path) as stream:
alignments = AlignmentIterator(stream)
alignment = next(alignments)
with self.assertRaises(StopIteration):
next(alignments)
self.assertEqual(
repr(alignment),
"<Bio.Align.Alignment object (8 rows x 298 columns) at 0x%x>"
% id(alignment),
)
self.assertEqual(len(alignment), 8)
self.assertEqual(alignment.shape, (8, 298))
self.assertEqual(alignment.sequences[0].id, "V_Harveyi_PATH")
self.assertEqual(alignment.sequences[1].id, "B_subtilis_YXEM")
self.assertEqual(alignment.sequences[2].id, "FLIY_ECOLI")
self.assertEqual(alignment.sequences[3].id, "Deinococcus_radiodurans")
self.assertEqual(alignment.sequences[4].id, "B_subtilis_GlnH_homo_YCKK")
self.assertEqual(alignment.sequences[5].id, "YA80_HAEIN")
self.assertEqual(alignment.sequences[6].id, "E_coli_GlnH")
self.assertEqual(alignment.sequences[7].id, "HISJ_E_COLI")
self.assertEqual(
alignment.sequences[0].seq,
"MKNWIKVAVAAIALSAATVQAATEVKVGMSGRYFPFTFVKQDKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRKGNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDTGIEHDVALGRADAFIMDRLSALELIKKTGLPLQLAGEPFETIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK",
)
self.assertEqual(
alignment.sequences[1].seq,
"MKMKKWTVLVVAALLAVLSACGNGNSSSKEDDNVLHVGATGQSYPFAYKENGKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKKDNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETQEGTLKDVAYGRVDAYVNSRTVLIAQIKKTGLPLKLAGDPIVYEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVEQKH",
)
self.assertEqual(
alignment.sequences[2].seq,
"MKLAHLGRQALMGVMAVALVAGMSVKSFADEGLLNKVKERGTLLVGLEGTYPPFSFQGDDGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKGNEGTIKTADDLKGKKVGVGLGTNYEEWLRQNVQGVDVRTYDDDPTKYQDLRVGRIDAILVDRLAALDLVKKTNDTLAVTGEAFSRQESGVALRKGNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK",
)
self.assertEqual(
alignment.sequences[3].seq,
"MKKSLLSLKLSGLLVPSVLALSLSACSSPSSTLNQGTLKIAMEGTYPPFTSKNEQGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKNNTFNPQSLADLKGKRVGSTLGSNYEKQLIDTGDIKIVTYPGAPEILADLVAGRIDAAYNDRLVVNYIINDQKLPVRGAGQIGDAAPVGIALKKGNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQP",
)
self.assertEqual(
alignment.sequences[4].seq,
"MKKALLALFMVVSIAALAACGAGNDNQSKDNAKDGDLWASIKKKGVLTVGTEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAGLNSKRFDVVANQVGKTDREDKYDFSDKYTTSRAVVVTKKDNNDIKSEADVKGKTSAQSLTSNYNKLATNAGAKVEGVEGMAQALQMIQQARVDMTYNDKLAVLNYLKTSGNKNVKIAFETGEPQSTYFTFRKGSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK",
)
self.assertEqual(
alignment.sequences[5].seq,
"MKKLLFTTALLTGAIAFSTFSHAGEIADRVEKTKTLLVGTEGTYAPFTFHDKSGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKSSDNSIKSFEDLKGRKSAQSATSNWGKDAKAAGAQILVVDGLAQSLELIKQGRAEATINDKLAVLDYFKQHPNSGLKIAYDRGDKTPTAFAFLQGEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ",
)
self.assertEqual(
alignment.sequences[6].seq,
"MKSVLKVSLAALTLAFAVSSHAADKKLVVATDTAFVPFEFKQGDKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKANNNDVKSVKDLDGKVVAVKSGTGSVDYAKANIKTKDLRQFPNIDNAYMELGTNRADAVLHDTPNILYFIKTAGNGQFKAVGDSLEAQQYGIAFPKGSDELRDKVNGALKTLRENGTYNEIYKKWFGTEPK",
)
self.assertEqual(
alignment.sequences[7].seq,
"MKKLVLSLSLVLAFSSATAAFAAIPQNIRIGTDPTYAPFESKNSQGELVGFDIDLAKELCKRINTQCTFVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAKNSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGQDNIYSDLTAGRIDAAFQDEVAASEGFLKQPVGKDYKFGGPSVKDEKLFGVGTGMGLRKEDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYGG",
)
self.assertEqual(
alignment[0],
"MKNW--------IKV----AVAAI-A--LSAA-------------------TVQAATEVKVGMSGRYFPFTFVK--QDKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRK-GNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDT--GIEHDVALGRADAFIMDRLSALE-LIKKTG-LPLQLAGEPFE-----TIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK----",
)
self.assertEqual(
alignment[1],
"MKMKKW------TVL----VVAALLA-VLSACGN------------G-NSSSKEDDNVLHVGATGQSYPFAYKE--NGKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKK-DNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETQEGTLKDVAYGRVDAYVNSRTVLIA-QIKKTG-LPLKLAGDPIV-----YEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVEQKH",
)
self.assertEqual(
alignment[2],
"MKLAHLGRQALMGVM----AVALVAG--MSVKSF---------ADEG-LLNKVKERGTLLVGLEGTYPPFSFQGD-DGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKGNEGTIKTADDLKGKKVGVGLGTNYEEWLRQN--VQGVDVRTYDDDPTKYQDLRVGRIDAILVDRLAALD-LVKKTN-DTLAVTGEAFS-----RQESGVALRK--GNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK----",
)
self.assertEqual(
alignment[3],
"MKKSLL------SLKLSGLLVPSVLALSLSACSS---------------PSSTLNQGTLKIAMEGTYPPFTSKNE-QGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKNNTFNPQSLADLKGKRVGSTLGSNYEKQLI-D--TGDIKIVTYPGAPEILADLVAGRIDAAYNDRLVVNY-IIND-QKLPVRGAGQIGD-----AAPVGIALKK--GNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQ---P",
)
self.assertEqual(
alignment[4],
"MKKALL------ALF----MVVSIAA--LAACGAGNDNQSKDNAKDGDLWASIKKKGVLTVGTEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAGLNSKRFDVVANQVGKTD-REDKYDFSDKYTTSRAVVVTKK-DNNDIKSEADVKGKTSAQSLTSNYNKLAT-N--A-GAKVEGVEGMAQALQMIQQARVDMTYNDKLAVLN-YLKTSGNKNVKIAFETGE-----PQSTYFTFRK--GSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK----",
)
self.assertEqual(
alignment[5],
"MKKLLF------TTA----LLTGAIA--FSTFS-----------HAGEIADRVEKTKTLLVGTEGTYAPFTFHDK-SGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKS-SDNSIKSFEDLKGRKSAQSATSNWGKDAK-A--A-GAQILVVDGLAQSLELIKQGRAEATINDKLAVLD-YFKQHPNSGLKIAYDRGD-----KTPTAFAFLQ--GEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ----",
)
self.assertEqual(
alignment[6],
"MKSVL-------KVS----LAALTLA--FAVSSH---------A----------ADKKLVVATDTAFVPFEFKQ--GDKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKAN-NNDVKSVKDLDGKVVAVKSGTGSVDYAKAN--IKTKDLRQFPNIDNAYMELGTNRADAVLHDTPNILY-FIKTAGNGQFKAVGDSLE-----AQQYGIAFPK--GSDELRDKVNGALKTLRENGTYNEIYKKWFGTEP-K----",
)
self.assertEqual(
alignment[7],
"MKKLVL------SLS----LV---LA--FSSATA---------------A-FAAIPQNIRIGTDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAK-NSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGQDNIYSDLTAGRIDAAFQDEVAASEGFLKQPVGKDYKFGGPSVKDEKLFGVGTGMGLRK--EDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYG---G",
)
self.assertEqual(
alignment.column_annotations["state"],
"DDDDIIIIIIIIDDDIIIIDDDDDIDIIDDDDIIIIIIIIIIIIIIIIIIIDDDDDDDDDDDDDDDDDDDDDDDIIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIIDDDDDDDDDDDDDDDDDDDDDDDIDDDDDDIDDDDDDDDDDDIIIIIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIIII",
)
self.check_reading_writing(path)
def test_muscle(self):
path = "Clustalw/muscle.a2m"
with open(path) as stream:
alignments = AlignmentIterator(stream)
alignment = next(alignments)
with self.assertRaises(StopIteration):
next(alignments)
self.assertEqual(
repr(alignment),
"<Bio.Align.Alignment object (3 rows x 687 columns) at 0x%x>"
% id(alignment),
)
self.assertEqual(len(alignment), 3)
self.assertEqual(alignment.sequences[0].id, "Test1seq")
self.assertEqual(alignment.sequences[1].id, "AT3G20900.1-SEQ")
self.assertEqual(alignment.sequences[2].id, "AT3G20900.1-CDS")
self.assertEqual(
alignment.sequences[0].seq,
"AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACAAAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT",
)
self.assertEqual(
alignment.sequences[1].seq,
"ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCACATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAACAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
)
self.assertEqual(
alignment.sequences[2].seq,
"ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
)
self.assertEqual(
alignment[0],
"-----------------------------------------------------------------AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACAAAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT-",
)
self.assertEqual(
alignment[1],
"ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCACATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAACAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
)
self.assertEqual(
alignment[2],
"--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ATGAACAAAGTAGCGAGGAAGAA------------------------------CAAAACATC----------------------------------------------------------------------------------------------------------------------------------------------------------------------------AGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
)
self.assertEqual(
alignment.column_annotations["state"],
"IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDI",
)
self.check_reading_writing(path)
def test_kalign(self):
path = "Clustalw/kalign.a2m"
with open(path) as stream:
alignments = AlignmentIterator(stream)
alignment = next(alignments)
with self.assertRaises(StopIteration):
next(alignments)
self.assertEqual(
repr(alignment),
"<Bio.Align.Alignment object (2 rows x 27 columns) at 0x%x>"
% id(alignment),
)
self.assertEqual(len(alignment), 2)
self.assertEqual(alignment.sequences[0].id, "Test1seq")
self.assertEqual(alignment.sequences[1].id, "AT3G20900")
self.assertEqual(alignment.sequences[0].seq, "GCTGGGGATGGAGAGGGAACAGAGTT")
self.assertEqual(alignment.sequences[1].seq, "GCTGGGGATGGAGAGGGAACAGAGTAG")
self.assertEqual(alignment[0], "GCTGGGGATGGAGAGGGAACAGAGT-T")
self.assertEqual(alignment[1], "GCTGGGGATGGAGAGGGAACAGAGTAG")
self.assertEqual(
alignment.column_annotations["state"],
"DDDDDDDDDDDDDDDDDDDDDDDDDID",
)
self.check_reading_writing(path)
def test_probcons(self):
path = "Clustalw/probcons.a2m"
# example taken from the PROBCONS documentation,
# and converted to aligned A2M format.
with open(path) as stream:
alignments = AlignmentIterator(stream)
alignment = next(alignments)
with self.assertRaises(StopIteration):
next(alignments)
self.assertEqual(
repr(alignment),
"<Bio.Align.Alignment object (5 rows x 101 columns) at 0x%x>"
% id(alignment),
)
self.assertEqual(len(alignment), 5)
self.assertEqual(alignment.sequences[0].id, "plas_horvu")
self.assertEqual(alignment.sequences[1].id, "plas_chlre")
self.assertEqual(alignment.sequences[2].id, "plas_anava")
self.assertEqual(alignment.sequences[3].id, "plas_proho")
self.assertEqual(alignment.sequences[4].id, "azup_achcy")
self.assertEqual(
alignment.sequences[0].seq,
"DVLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSGVDVSKISQEEYLTAPGETFSVTLTVPGTYGFYCEPHAGAGMVGKVTV",
)
self.assertEqual(
alignment.sequences[1].seq,
"VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSGVNADAISRDDYLNAPGETYSVKLTAAGEYGYYCEPHQGAGMVGKIIV",
)
self.assertEqual(
alignment.sequences[2].seq,
"VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHKQLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV",
)
self.assertEqual(
alignment.sequences[3].seq,
"VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDKVPAGESAPALSNTKLRIAPGSFYSVTLGTPGTYSFYCTPHRGAGMVGTITV",
)
self.assertEqual(
alignment.sequences[4].seq,
"VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDKGHNVETIKGMIPDGAEAFKSKINENYKVTFTAPGVYGVKCTPHYGMGMVGVVEV",
)
self.assertEqual(
alignment[0],
"D-VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG-VD-VSKISQEEYLTAPGETFSVTLTV---PGTYGFYCEPHAGAGMVGKVTV",
)
self.assertEqual(
alignment[1],
"--VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG-VN-ADAISRDDYLNAPGETYSVKLTA---AGEYGYYCEPHQGAGMVGKIIV",
)
self.assertEqual(
alignment[2],
"--VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHKQLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV",
)
self.assertEqual(
alignment[3],
"VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG-ES-APALSNTKLRIAPGSFYSVTLGT---PGTYSFYCTPHRGAGMVGTITV",
)
self.assertEqual(
alignment[4],
"VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG-AE-A-------FKSKINENYKVTFTA---PGVYGVKCTPHYGMGMVGVVEV",
)
self.assertEqual(
alignment.column_annotations["state"],
"DIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIDDIDDDDDDDDDDDDDDDDDDDDDDDIIIDDDDDDDDDDDDDDDDDDDDDD",
)
self.check_reading_writing(path)
def test_empty(self):
"""Checking empty file."""
stream = StringIO()
alignments = AlignmentIterator(stream)
with self.assertRaises(ValueError):
next(alignments)
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)